1#!/usr/bin/perl -w
2#
3# Clean a patch file -- or directory of patch files -- of stealth whitespace.
4# WARNING: this can be a highly destructive operation.  Use with caution.
5#
6
7use bytes;
8use File::Basename;
9
10# Default options
11$max_width = 79;
12
13# Clean up space-tab sequences, either by removing spaces or
14# replacing them with tabs.
15sub clean_space_tabs($)
16{
17    no bytes;			# Tab alignment depends on characters
18
19    my($li) = @_;
20    my($lo) = '';
21    my $pos = 0;
22    my $nsp = 0;
23    my($i, $c);
24
25    for ($i = 0; $i < length($li); $i++) {
26	$c = substr($li, $i, 1);
27	if ($c eq "\t") {
28	    my $npos = ($pos+$nsp+8) & ~7;
29	    my $ntab = ($npos >> 3) - ($pos >> 3);
30	    $lo .= "\t" x $ntab;
31	    $pos = $npos;
32	    $nsp = 0;
33	} elsif ($c eq "\n" || $c eq "\r") {
34	    $lo .= " " x $nsp;
35	    $pos += $nsp;
36	    $nsp = 0;
37	    $lo .= $c;
38	    $pos = 0;
39	} elsif ($c eq " ") {
40	    $nsp++;
41	} else {
42	    $lo .= " " x $nsp;
43	    $pos += $nsp;
44	    $nsp = 0;
45	    $lo .= $c;
46	    $pos++;
47	}
48    }
49    $lo .= " " x $nsp;
50    return $lo;
51}
52
53# Compute the visual width of a string
54sub strwidth($) {
55    no bytes;			# Tab alignment depends on characters
56
57    my($li) = @_;
58    my($c, $i);
59    my $pos = 0;
60    my $mlen = 0;
61
62    for ($i = 0; $i < length($li); $i++) {
63	$c = substr($li,$i,1);
64	if ($c eq "\t") {
65	    $pos = ($pos+8) & ~7;
66	} elsif ($c eq "\n") {
67	    $mlen = $pos if ($pos > $mlen);
68	    $pos = 0;
69	} else {
70	    $pos++;
71	}
72    }
73
74    $mlen = $pos if ($pos > $mlen);
75    return $mlen;
76}
77
78$name = basename($0);
79
80@files = ();
81
82while (defined($a = shift(@ARGV))) {
83    if ($a =~ /^-/) {
84	if ($a eq '-width' || $a eq '-w') {
85	    $max_width = shift(@ARGV)+0;
86	} else {
87	    print STDERR "Usage: $name [-width #] files...\n";
88	    exit 1;
89	}
90    } else {
91	push(@files, $a);
92    }
93}
94
95foreach $f ( @files ) {
96    print STDERR "$name: $f\n";
97
98    if (! -f $f) {
99	print STDERR "$f: not a file\n";
100	next;
101    }
102
103    if (!open(FILE, '+<', $f)) {
104	print STDERR "$name: Cannot open file: $f: $!\n";
105	next;
106    }
107
108    binmode FILE;
109
110    # First, verify that it is not a binary file; consider any file
111    # with a zero byte to be a binary file.  Is there any better, or
112    # additional, heuristic that should be applied?
113    $is_binary = 0;
114
115    while (read(FILE, $data, 65536) > 0) {
116	if ($data =~ /\0/) {
117	    $is_binary = 1;
118	    last;
119	}
120    }
121
122    if ($is_binary) {
123	print STDERR "$name: $f: binary file\n";
124	next;
125    }
126
127    seek(FILE, 0, 0);
128
129    $in_bytes = 0;
130    $out_bytes = 0;
131    $lineno = 0;
132
133    @lines  = ();
134
135    $in_hunk = 0;
136    $err = 0;
137
138    while ( defined($line = <FILE>) ) {
139	$lineno++;
140	$in_bytes += length($line);
141
142	if (!$in_hunk) {
143	    if ($line =~
144		/^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@/) {
145		$minus_lines = $2;
146		$plus_lines = $4;
147		if ($minus_lines || $plus_lines) {
148		    $in_hunk = 1;
149		    @hunk_lines = ($line);
150		}
151	    } else {
152		push(@lines, $line);
153		$out_bytes += length($line);
154	    }
155	} else {
156	    # We're in a hunk
157
158	    if ($line =~ /^\+/) {
159		$plus_lines--;
160
161		$text = substr($line, 1);
162		$text =~ s/[ \t\r]*$//;		# Remove trailing spaces
163		$text = clean_space_tabs($text);
164
165		$l_width = strwidth($text);
166		if ($max_width && $l_width > $max_width) {
167		    print STDERR
168			"$f:$lineno: adds line exceeds $max_width ",
169			"characters ($l_width)\n";
170		}
171
172		push(@hunk_lines, '+'.$text);
173	    } elsif ($line =~ /^\-/) {
174		$minus_lines--;
175		push(@hunk_lines, $line);
176	    } elsif ($line =~ /^ /) {
177		$plus_lines--;
178		$minus_lines--;
179		push(@hunk_lines, $line);
180	    } else {
181		print STDERR "$name: $f: malformed patch\n";
182		$err = 1;
183		last;
184	    }
185
186	    if ($plus_lines < 0 || $minus_lines < 0) {
187		print STDERR "$name: $f: malformed patch\n";
188		$err = 1;
189		last;
190	    } elsif ($plus_lines == 0 && $minus_lines == 0) {
191		# End of a hunk.  Process this hunk.
192		my $i;
193		my $l;
194		my @h = ();
195		my $adj = 0;
196		my $done = 0;
197
198		for ($i = scalar(@hunk_lines)-1; $i > 0; $i--) {
199		    $l = $hunk_lines[$i];
200		    if (!$done && $l eq "+\n") {
201			$adj++; # Skip this line
202		    } elsif ($l =~ /^[ +]/) {
203			$done = 1;
204			unshift(@h, $l);
205		    } else {
206			unshift(@h, $l);
207		    }
208		}
209
210		$l = $hunk_lines[0];  # Hunk header
211		undef @hunk_lines;    # Free memory
212
213		if ($adj) {
214		    die unless
215			($l =~ /^\@\@\s+\-([0-9]+),([0-9]+)\s+\+([0-9]+),([0-9]+)\s\@\@(.*)$/);
216		    my $mstart = $1;
217		    my $mlin = $2;
218		    my $pstart = $3;
219		    my $plin = $4;
220		    my $tail = $5; # doesn't include the final newline
221
222		    $l = sprintf("@@ -%d,%d +%d,%d @@%s\n",
223				 $mstart, $mlin, $pstart, $plin-$adj,
224				 $tail);
225		}
226		unshift(@h, $l);
227
228		# Transfer to the output array
229		foreach $l (@h) {
230		    $out_bytes += length($l);
231		    push(@lines, $l);
232		}
233
234		$in_hunk = 0;
235	    }
236	}
237    }
238
239    if ($in_hunk) {
240	print STDERR "$name: $f: malformed patch\n";
241	$err = 1;
242    }
243
244    if (!$err) {
245	if ($in_bytes != $out_bytes) {
246	    # Only write to the file if changed
247	    seek(FILE, 0, 0);
248	    print FILE @lines;
249
250	    if ( !defined($where = tell(FILE)) ||
251		 !truncate(FILE, $where) ) {
252		die "$name: Failed to truncate modified file: $f: $!\n";
253	    }
254	}
255    }
256
257    close(FILE);
258}
259