1#!/bin/ksh
2#  $OpenBSD: check_sym,v 1.11 2022/01/03 03:40:48 guenther Exp $
3#
4# Copyright (c) 2016,2019,2022 Philip Guenther <guenther@openbsd.org>
5#
6# Permission to use, copy, modify, and distribute this software for any
7# purpose with or without fee is hereby granted, provided that the above
8# copyright notice and this permission notice appear in all copies.
9#
10# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17#
18#
19#  check_sym -- compare the symbols and external function references in two
20#	versions of a shared library
21#
22#  SYNOPSIS
23#	check_sym [-chkv] [old [new]]
24#
25#  DESCRIPTION
26#	Library developers need to be aware when they have changed the
27#	ABI of a library.  To assist them, check_sym examines two versions
28#	of a shared library and reports changes to the following:
29#	 * the set of exported symbols and their strengths
30#	 * the set of undefined symbols referenced
31#	 * the set of lazily-resolved functions (PLT)
32#
33#	In each case, additions and removals are reported; for exported
34#	symbols it also reports when a symbol is weakened or strengthened.
35#
36#	The shared libraries to compare can be specified on the
37#	command-line.  Otherwise, check_sym expects to be run from the
38#	source directory of a library with a shlib_version file specifying
39#	the version being built and the new library in the obj subdirectory.
40#	If the old library to compare against wasn't specified either then
41#	check_sym will take the highest version of that library in the
42#	*current* directory, or the highest version of that library in
43#	/usr/lib if it wasn't present in the current directory.
44#
45#	By default, check_sym places all its intermediate files in a
46#	temporary directory and removed it on exit.  They contain useful
47#	details for understanding what changed, so if the -k option is used
48#	they will instead be placed in /tmp/ and left behind.  If any of
49#	them cannot be created by the user, the command will fail.  The
50#	files left behind by the -k option can be cleaned up by invoking
51#	check_syms with the -c option.
52#
53#	The -v option enables verbose output.
54#
55#	The *basic* rules of thumb for library versions are: if you
56#	 * stop exporting a symbol, or
57#	 * change the size of a data symbol
58#	 * start exporting a symbol that an inter-dependent library needs
59#	then you need to bump the MAJOR version of the library.
60#
61#	Otherwise, if you:
62#	 * start exporting a symbol
63#	then you need to bump the MINOR version of the library.
64#
65#  SEE ALSO
66#	readelf(1), elf(5)
67#
68#  AUTHORS
69#	Philip Guenther <guenther@openbsd.org>
70#
71#  CAVEATS
72#	The elf format is infinitely extendable, but check_sym only
73#	handles a few weirdnesses.  Running it on or against new archs
74#	may result in meaningless results.
75#
76#  BUGS
77#	While the author stills find the intermediate files useful,
78#	most people won't.  By default they should be placed in a
79#	temp directory and removed.
80#
81
82get_lib_name()
83{
84	sed -n 's/^[ 	]*LIB[ 	]*=[ 	]*\([^ 	]*\).*/\1/p' "$@"
85}
86
87pick_highest()
88{
89	old=
90	omaj=-1
91	omin=0
92	for i
93	do
94		[[ -f $i ]] || continue
95		maj=${i%.*}; maj=${maj##*.}
96		min=${i##*.}
97		if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]]
98		then
99			old=$i
100			omaj=$maj
101			omin=$min
102		fi
103	done
104	[[ $old != "" ]]
105}
106
107usage()
108{
109	usage="usage: check_sym [-chkv] [old [new]]"
110	if [[ $# -gt 0 ]]
111	then
112		echo "check_sym: $@
113$usage" >&2
114		exit 1
115	fi
116	echo "$usage"
117	exit 0
118}
119
120unset odir
121file_list={D{,S,W,O},J,S,U,d,j,r,s}{1,2}
122
123keep_temp=false
124verbose=false
125while getopts :chkv opt "$@"
126do
127	case $opt in
128	c)	rm -f /tmp/$file_list
129		exit 0;;
130	h)	usage;;
131	k)	keep_temp=true;;
132	v)	verbose=true;;
133	\?)	usage "unknown option -- $OPTARG";;
134	esac
135done
136shift $((OPTIND - 1))
137[[ $# -gt 2 ]] && usage "too many arguments"
138
139# Old library?
140if [[ $1 = ?(*/)lib*.so* ]]
141then
142	if [[ ! -f $1 ]]
143	then
144		echo "$1 doesn't exist" >&2
145		exit 1
146	fi
147	old=$1
148	lib=${old##*/}
149	lib=${lib%%.so.*}
150	shift
151else
152	# try determining it from the current directory
153	if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) &&
154	   [[ $lib != "" ]]
155	then
156		lib=lib$lib
157	else
158		lib=libc
159	fi
160
161	# Is there a copy of that lib in the current directory?
162	# If so, use the highest numbered one
163	if ! pick_highest $lib.so.* && ! pick_highest /usr/lib/$lib.so.*
164	then
165		echo "unable to find $lib.so.*" >&2
166		exit 1
167	fi
168fi
169
170# New library?
171if [[ $1 = ?(*/)lib*.so* ]]
172then
173	new=$1
174	shift
175else
176	# Dig info out of the just built library
177	. ./shlib_version
178	new=obj/${lib}.so.${major}.${minor}
179fi
180if [[ ! -f $new ]]
181then
182	echo "$new doesn't exist" >&2
183	exit 1
184fi
185
186# Filter the output of readelf -s to be easier to parse by removing a
187# field that only appears on some symbols: [<other>: 88]
188# Not really arch-specific, but I've only seen it on alpha
189filt_symtab() {
190	sed 's/\[<other>: [0-9a-f]*\]//'
191}
192
193if $keep_temp
194then
195	# precreate all the files we'll use, but with noclobber set to avoid
196	# symlink attacks
197	odir=/tmp
198	files=
199	trap 'ret=$?; rm -f $files; exit $ret' 1 2 15 ERR
200else
201	trap 'ret=$?; rm -rf "$odir"; exit $ret' 0 1 2 15 ERR
202	odir=$(mktemp -dt check_sym.XXXXXXXXXX)
203fi
204set -C
205for i in $odir/$file_list
206do
207	rm -f $i
208	3>$i
209	files="$files $i"
210done
211set +C
212
213readelf -rW $old > $odir/r1
214readelf -rW $new > $odir/r2
215
216readelf -sW $old | filt_symtab > $odir/s1
217readelf -sW $new | filt_symtab > $odir/s2
218
219
220case $(readelf -h $new | grep '^ *Machine:') in
221*MIPS*)	cpu=mips64;;
222*HPPA*)	cpu=hppa;;
223*)	cpu=dontcare;;
224esac
225
226if [[ $cpu = mips64 ]]
227then
228	gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
229	gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
230fi
231
232# Now that we're done accessing $old and $new (which could be
233# relative paths), chdir into our work directory, whatever it is
234cd $odir
235
236jump_slots() { 
237	case $cpu in
238	hppa)	awk '/IPLT/ && $5 != ""{print $5}' r$1
239		;;
240	mips64)	# the $((gotsym$1)) converts hex to decimal
241		awk -v g=$((gotsym$1)) \
242			'/^Symbol table ..symtab/{exit}
243			$6 == "PROTECTED" { next }
244			$1+0 >= g && $4 == "FUNC" {print $8}' s$1
245		;;
246	*)	awk '/JU*MP_SL/ && $5 != ""{print $5}' r$1
247		;;
248	esac | sort -o j$1
249}
250
251dynamic_sym() {
252	awk -v s=$1 '/^Symbol table ..symtab/{exit}
253		! /^ *[1-9]/   {next}
254		$7 == "UND"    {print $8 | ("sort -o U" s); next }
255		$5 == "GLOBAL" {print $8 | ("sort -o DS" s) }
256		$5 == "WEAK"   {print $8 | ("sort -o DW" s) }
257		$5 != "LOCAL"  {print $8 | ("sort -o D" s) }
258		$5 != "LOCAL" && $4 == "OBJECT" {
259				print $8, $3 | ("sort -o DO" s) }
260		{print $4, $5, $6, $8}' s$1 | sort -o d$1
261}
262
263static_sym() { 
264	awk '/^Symbol table ..symtab/{s=1}
265	     /LOCAL/{next}
266	     s&&/^ *[1-9]/{print $4, $5, $6, $8}' s$1 | sort -o S$1
267}
268
269data_sym_changes() {
270	join "$@" | awk '$2 != $3 { print $1 " " $2 " --> " $3 }'
271}
272
273output_if_not_empty() {
274	leader=$1
275	shift
276	if "$@" | grep -q .
277	then
278		echo "$leader"
279		"$@" | sed 's:^:	:'
280		echo
281	fi
282}
283
284
285for i in 1 2
286do
287	jump_slots $i
288	dynamic_sym $i
289	static_sym $i
290	comm -23 j$i U$i >J$i
291done
292
293echo "$old --> $new"
294if cmp -s d[12] && cmp -s DO[12]
295then
296	printf "No dynamic export changes\n"
297else
298	printf "Dynamic export changes:\n"
299	output_if_not_empty "added:" comm -13 D[12]
300	output_if_not_empty "removed:" comm -23 D[12]
301	output_if_not_empty "weakened:" comm -12 DS1 DW2
302	output_if_not_empty "strengthened:" comm -12 DW1 DS2
303	output_if_not_empty "data object sizes changes:" \
304					data_sym_changes DO[12]
305fi
306if ! cmp -s U[12]
307then
308	printf "External reference changes:\n"
309	output_if_not_empty "added:" comm -13 U[12]
310	output_if_not_empty "removed:" comm -23 U[12]
311fi
312
313if $verbose; then
314	printf "\nReloc counts:\nbefore:\n" 
315	grep ^R r1
316	printf "\nafter:\n"
317	grep ^R r2
318fi
319
320output_if_not_empty "PLT added:" comm -13 J1 J2
321output_if_not_empty "PLT removed:" comm -23 J1 J2
322