1#!/usr/bin/perl -w
2
3# make_sunver.pl
4#
5# This script takes at least two arguments, a GNU style version script and
6# a list of object and archive files, and generates a corresponding Sun
7# style version script as follows:
8#
9# Each glob pattern, C++ mangled pattern or literal in the input script is
10# matched against all global symbols in the input objects, emitting those
11# that matched (or nothing if no match was found).
12# A comment with the original pattern and its type is left in the output
13# file to make it easy to understand the matches.
14#
15# It uses elfdump when present (native), GNU readelf otherwise.
16# It depends on the GNU version of c++filt, since it must understand the
17# GNU mangling style.
18
19use FileHandle;
20use IPC::Open2;
21
22# Enforce C locale.
23$ENV{'LC_ALL'} = "C";
24$ENV{'LANG'} = "C";
25
26# Input version script, GNU style.
27my $symvers = shift;
28
29##########
30# Get all the symbols from the library, match them, and add them to a hash.
31
32my %sym_hash = ();
33
34# List of objects and archives to process.
35my @OBJECTS = ();
36
37# List of shared objects to omit from processing.
38my @SHAREDOBJS = ();
39
40# Filter out those input archives that have corresponding shared objects to
41# avoid adding all symbols matched in the archive to the output map.
42foreach $file (@ARGV) {
43    if (($so = $file) =~ s/\.a$/.so/ && -e $so) {
44	printf STDERR "omitted $file -> $so\n";
45	push (@SHAREDOBJS, $so);
46    } else {
47	push (@OBJECTS, $file);
48    }
49}
50
51# We need to detect and ignore hidden symbols.  Solaris nm can only detect
52# this in the harder to parse default output format, and GNU nm not at all,
53# so use elfdump -s in the native case and GNU readelf -s otherwise.
54# GNU objdump -t cannot be used since it produces a variable number of
55# columns.
56
57# The path to elfdump.
58my $elfdump = "/usr/ccs/bin/elfdump";
59
60if (-f $elfdump) {
61    open ELFDUMP,$elfdump.' -s '.(join ' ',@OBJECTS).'|' or die $!;
62    my $skip_arsym = 0;
63
64    while (<ELFDUMP>) {
65	chomp;
66
67	# Ignore empty lines.
68	if (/^$/) {
69	    # End of archive symbol table, stop skipping.
70	    $skip_arsym = 0 if $skip_arsym;
71	    next;
72	}
73
74	# Keep skipping until end of archive symbol table.
75	next if ($skip_arsym);
76
77	# Ignore object name header for individual objects and archives.
78	next if (/:$/);
79
80	# Ignore table header lines.
81	next if (/^Symbol Table Section:/);
82	next if (/index.*value.*size/);
83
84	# Start of archive symbol table: start skipping.
85	if (/^Symbol Table: \(archive/) {
86	    $skip_arsym = 1;
87	    next;
88	}
89
90	# Split table.
91	(undef, undef, undef, undef, $bind, $oth, undef, $shndx, $name) = split;
92
93	# Error out for unknown input.
94	die "unknown input line:\n$_" unless defined($bind);
95
96	# Ignore local symbols.
97	next if ($bind eq "LOCL");
98	# Ignore hidden symbols.
99	next if ($oth eq "H");
100	# Ignore undefined symbols.
101	next if ($shndx eq "UNDEF");
102	# Error out for unhandled cases.
103	if ($bind !~ /^(GLOB|WEAK)/ or $oth ne "D") {
104	    die "unhandled symbol:\n$_";
105	}
106
107	# Remember symbol.
108	$sym_hash{$name}++;
109    }
110    close ELFDUMP or die "$elfdump error";
111} else {
112    open READELF, 'readelf -s -W '.(join ' ',@OBJECTS).'|' or die $!;
113    # Process each symbol.
114    while (<READELF>) {
115	chomp;
116
117	# Ignore empty lines.
118	next if (/^$/);
119
120	# Ignore object name header.
121	next if (/^File: .*$/);
122
123	# Ignore table header lines.
124	next if (/^Symbol table.*contains.*:/);
125	next if (/Num:.*Value.*Size/);
126
127	# Split table.
128	(undef, undef, undef, undef, $bind, $vis, $ndx, $name) = split;
129
130	# Error out for unknown input.
131	die "unknown input line:\n$_" unless defined($bind);
132
133	# Ignore local symbols.
134	next if ($bind eq "LOCAL");
135	# Ignore hidden symbols.
136	next if ($vis eq "HIDDEN");
137	# Ignore undefined symbols.
138	next if ($ndx eq "UND");
139	# Error out for unhandled cases.
140	if ($bind !~ /^(GLOBAL|WEAK)/ or $vis ne "DEFAULT") {
141	    die "unhandled symbol:\n$_";
142	}
143
144	# Remember symbol.
145	$sym_hash{$name}++;
146    }
147    close READELF or die "readelf error";
148}
149
150##########
151# The various types of glob patterns.
152#
153# A glob pattern that is to be applied to the demangled name: 'cxx'.
154# A glob patterns that applies directly to the name in the .o files: 'glob'.
155# This pattern is ignored; used for local variables (usually just '*'): 'ign'.
156
157# The type of the current pattern.
158my $glob = 'glob';
159
160# We're currently inside `extern "C++"', which Sun ld doesn't understand.
161my $in_extern = 0;
162
163# The c++filt command to use.  This *must* be GNU c++filt; the Sun Studio
164# c++filt doesn't handle the GNU mangling style.
165my $cxxfilt = $ENV{'CXXFILT'} || "c++filt";
166
167# The current version name.
168my $current_version = "";
169
170# Was there any attempt to match a symbol to this version?
171my $matches_attempted;
172
173# The number of versions which matched this symbol.
174my $matched_symbols;
175
176open F,$symvers or die $!;
177
178# Print information about generating this file
179print "# This file was generated by make_sunver.pl.  DO NOT EDIT!\n";
180print "# It was generated by:\n";
181printf "# %s %s %s\n", $0, $symvers, (join ' ',@ARGV);
182printf "# Omitted archives with corresponding shared libraries: %s\n",
183    (join ' ', @SHAREDOBJS) if $#SHAREDOBJS >= 0;
184print "#\n\n";
185
186while (<F>) {
187    # Lines of the form '};'
188    if (/^([ \t]*)(\}[ \t]*;[ \t]*)$/) {
189	$glob = 'glob';
190	if ($in_extern) {
191	    $in_extern--;
192	    print "$1##$2\n";
193	} else {
194	    print;
195	}
196	next;
197    }
198
199    # Lines of the form '} SOME_VERSION_NAME_1.0;'
200    if (/^[ \t]*\}[ \tA-Z0-9_.a-z]+;[ \t]*$/) {
201	$glob = 'glob';
202	# We tried to match symbols agains this version, but none matched.
203	# Emit dummy hidden symbol to avoid marking this version WEAK.
204	if ($matches_attempted && $matched_symbols == 0) {
205	    print "  hidden:\n";
206	    print "    .force_WEAK_off_$current_version = DATA S0x0 V0x0;\n";
207	}
208	print; next;
209    }
210
211    # Comment and blank lines
212    if (/^[ \t]*\#/) { print; next; }
213    if (/^[ \t]*$/) { print; next; }
214
215    # Lines of the form '{'
216    if (/^([ \t]*){$/) {
217	if ($in_extern) {
218	    print "$1##{\n";
219	} else {
220	    print;
221	}
222	next;
223    }
224
225    # Lines of the form 'SOME_VERSION_NAME_1.1 {'
226    if (/^([A-Z0-9_.]+)[ \t]+{$/) {
227	# Record version name.
228	$current_version = $1;
229	# Reset match attempts, #matched symbols for this version.
230	$matches_attempted = 0;
231	$matched_symbols = 0;
232	print;
233	next;
234    }
235
236    # Ignore 'global:'
237    if (/^[ \t]*global:$/) { print; next; }
238
239    # After 'local:', globs should be ignored, they won't be exported.
240    if (/^[ \t]*local:$/) {
241	$glob = 'ign';
242	print;
243	next;
244    }
245
246    # After 'extern "C++"', globs are C++ patterns
247    if (/^([ \t]*)(extern \"C\+\+\"[ \t]*)$/) {
248	$in_extern++;
249	$glob = 'cxx';
250	# Need to comment, Sun ld cannot handle this.
251	print "$1##$2\n"; next;
252    }
253
254    # Chomp newline now we're done with passing through the input file.
255    chomp;
256
257    # Catch globs.  Note that '{}' is not allowed in globs by this script,
258    # so only '*' and '[]' are available.
259    if (/^([ \t]*)([^ \t;{}#]+);?[ \t]*$/) {
260	my $ws = $1;
261	my $ptn = $2;
262	# Turn the glob into a regex by replacing '*' with '.*', '?' with '.'.
263	# Keep $ptn so we can still print the original form.
264	($pattern = $ptn) =~ s/\*/\.\*/g;
265	$pattern =~ s/\?/\./g;
266
267	if ($glob eq 'ign') {
268	    # We're in a local: * section; just continue.
269	    print "$_\n";
270	    next;
271	}
272
273	# Print the glob commented for human readers.
274	print "$ws##$ptn ($glob)\n";
275	# We tried to match a symbol to this version.
276	$matches_attempted++;
277
278	if ($glob eq 'glob') {
279	    my %ptn_syms = ();
280
281	    # Match ptn against symbols in %sym_hash.
282	    foreach my $sym (keys %sym_hash) {
283		# Maybe it matches one of the patterns based on the symbol in
284		# the .o file.
285		$ptn_syms{$sym}++ if ($sym =~ /^$pattern$/);
286	    }
287
288	    foreach my $sym (sort keys(%ptn_syms)) {
289		$matched_symbols++;
290		print "$ws$sym;\n";
291	    }
292	} elsif ($glob eq 'cxx') {
293	    my %dem_syms = ();
294
295	    # Verify that we're actually using GNU c++filt.  Other versions
296	    # most likely cannot handle GNU style symbol mangling.
297	    my $cxxout = `$cxxfilt --version 2>&1`;
298	    $cxxout =~ m/GNU/ or die "$0 requires GNU c++filt to function";
299
300	    # Talk to c++filt through a pair of file descriptors.
301	    # Need to start a fresh instance per pattern, otherwise the
302	    # process grows to 500+ MB.
303	    my $pid = open2(*FILTIN, *FILTOUT, $cxxfilt) or die $!;
304
305	    # Match ptn against symbols in %sym_hash.
306	    foreach my $sym (keys %sym_hash) {
307		# No?  Well, maybe its demangled form matches one of those
308		# patterns.
309		printf FILTOUT "%s\n",$sym;
310		my $dem = <FILTIN>;
311		chomp $dem;
312		$dem_syms{$sym}++ if ($dem =~ /^$pattern$/);
313	    }
314
315	    close FILTOUT or die "c++filt error";
316	    close FILTIN or die "c++filt error";
317	    # Need to wait for the c++filt process to avoid lots of zombies.
318	    waitpid $pid, 0;
319
320	    foreach my $sym (sort keys(%dem_syms)) {
321		$matched_symbols++;
322		print "$ws$sym;\n";
323	    }
324	} else {
325	    # No?  Well, then ignore it.
326	}
327	next;
328    }
329    # Important sanity check.  This script can't handle lots of formats
330    # that GNU ld can, so be sure to error out if one is seen!
331    die "strange line `$_'";
332}
333close F;
334