1/*
2 * Copyright 2010-2011 PathScale, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright notice,
8 *    this list of conditions and the following disclaimer.
9 *
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 *    this list of conditions and the following disclaimer in the documentation
12 *    and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
15 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26/**
27 * dwarf_eh.h - Defines some helper functions for parsing DWARF exception
28 * handling tables.
29 *
30 * This file contains various helper functions that are independent of the
31 * language-specific code.  It can be used in any personality function for the
32 * Itanium ABI.
33 */
34#include <assert.h>
35
36// TODO: Factor out Itanium / ARM differences.  We probably want an itanium.h
37// and arm.h that can be included by this file depending on the target ABI.
38
39// _GNU_SOURCE must be defined for unwind.h to expose some of the functions
40// that we want.  If it isn't, then we define it and undefine it to make sure
41// that it doesn't impact the rest of the program.
42#ifndef _GNU_SOURCE
43#	define _GNU_SOURCE 1
44#	include "unwind.h"
45#	undef _GNU_SOURCE
46#else
47#	include "unwind.h"
48#endif
49
50#include <stdint.h>
51
52/// Type used for pointers into DWARF data
53typedef unsigned char *dw_eh_ptr_t;
54
55// Flag indicating a signed quantity
56#define DW_EH_PE_signed 0x08
57/// DWARF data encoding types.
58enum dwarf_data_encoding
59{
60	/// Absolute pointer value
61	DW_EH_PE_absptr   = 0x00,
62	/// Unsigned, little-endian, base 128-encoded (variable length).
63	DW_EH_PE_uleb128 = 0x01,
64	/// Unsigned 16-bit integer.
65	DW_EH_PE_udata2  = 0x02,
66	/// Unsigned 32-bit integer.
67	DW_EH_PE_udata4  = 0x03,
68	/// Unsigned 64-bit integer.
69	DW_EH_PE_udata8  = 0x04,
70	/// Signed, little-endian, base 128-encoded (variable length)
71	DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,
72	/// Signed 16-bit integer.
73	DW_EH_PE_sdata2  = DW_EH_PE_udata2 | DW_EH_PE_signed,
74	/// Signed 32-bit integer.
75	DW_EH_PE_sdata4  = DW_EH_PE_udata4 | DW_EH_PE_signed,
76	/// Signed 32-bit integer.
77	DW_EH_PE_sdata8  = DW_EH_PE_udata8 | DW_EH_PE_signed
78};
79
80/**
81 * Returns the encoding for a DWARF EH table entry.  The encoding is stored in
82 * the low four of an octet.  The high four bits store the addressing mode.
83 */
84static inline enum dwarf_data_encoding get_encoding(unsigned char x)
85{
86	return (enum dwarf_data_encoding)(x & 0xf);
87}
88
89/**
90 * DWARF addressing mode constants.  When reading a pointer value from a DWARF
91 * exception table, you must know how it is stored and what the addressing mode
92 * is.  The low four bits tell you the encoding, allowing you to decode a
93 * number.  The high four bits tell you the addressing mode, allowing you to
94 * turn that number into an address in memory.
95 */
96enum dwarf_data_relative
97{
98	/// Value is omitted
99	DW_EH_PE_omit     = 0xff,
100	/// Value relative to program counter
101	DW_EH_PE_pcrel    = 0x10,
102	/// Value relative to the text segment
103	DW_EH_PE_textrel  = 0x20,
104	/// Value relative to the data segment
105	DW_EH_PE_datarel  = 0x30,
106	/// Value relative to the start of the function
107	DW_EH_PE_funcrel  = 0x40,
108	/// Aligned pointer (Not supported yet - are they actually used?)
109	DW_EH_PE_aligned  = 0x50,
110	/// Pointer points to address of real value
111	DW_EH_PE_indirect = 0x80
112};
113/**
114 * Returns the addressing mode component of this encoding.
115 */
116static inline enum dwarf_data_relative get_base(unsigned char x)
117{
118	return (enum dwarf_data_relative)(x & 0x70);
119}
120/**
121 * Returns whether an encoding represents an indirect address.
122 */
123static int is_indirect(unsigned char x)
124{
125	return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect);
126}
127
128/**
129 * Returns the size of a fixed-size encoding.  This function will abort if
130 * called with a value that is not a fixed-size encoding.
131 */
132static inline int dwarf_size_of_fixed_size_field(unsigned char type)
133{
134	switch (get_encoding(type))
135	{
136		default: abort();
137		case DW_EH_PE_sdata2:
138		case DW_EH_PE_udata2: return 2;
139		case DW_EH_PE_sdata4:
140		case DW_EH_PE_udata4: return 4;
141		case DW_EH_PE_sdata8:
142		case DW_EH_PE_udata8: return 8;
143		case DW_EH_PE_absptr: return sizeof(void*);
144	}
145}
146
147/**
148 * Read an unsigned, little-endian, base-128, DWARF value.  Updates *data to
149 * point to the end of the value.  Stores the number of bits read in the value
150 * pointed to by b, allowing you to determine the value of the highest bit, and
151 * therefore the sign of a signed value.
152 *
153 * This function is not intended to be called directly.  Use read_sleb128() or
154 * read_uleb128() for reading signed and unsigned versions, respectively.
155 */
156static uint64_t read_leb128(dw_eh_ptr_t *data, int *b)
157{
158	uint64_t uleb = 0;
159	unsigned int bit = 0;
160	unsigned char digit = 0;
161	// We have to read at least one octet, and keep reading until we get to one
162	// with the high bit unset
163	do
164	{
165		// This check is a bit too strict - we should also check the highest
166		// bit of the digit.
167		assert(bit < sizeof(uint64_t) * 8);
168		// Get the base 128 digit
169		digit = (**data) & 0x7f;
170		// Add it to the current value
171		uleb += digit << bit;
172		// Increase the shift value
173		bit += 7;
174		// Proceed to the next octet
175		(*data)++;
176		// Terminate when we reach a value that does not have the high bit set
177		// (i.e. which was not modified when we mask it with 0x7f)
178	} while ((*(*data - 1)) != digit);
179	*b = bit;
180
181	return uleb;
182}
183
184/**
185 * Reads an unsigned little-endian base-128 value starting at the address
186 * pointed to by *data.  Updates *data to point to the next byte after the end
187 * of the variable-length value.
188 */
189static int64_t read_uleb128(dw_eh_ptr_t *data)
190{
191	int b;
192	return read_leb128(data, &b);
193}
194
195/**
196 * Reads a signed little-endian base-128 value starting at the address pointed
197 * to by *data.  Updates *data to point to the next byte after the end of the
198 * variable-length value.
199 */
200static int64_t read_sleb128(dw_eh_ptr_t *data)
201{
202	int bits;
203	// Read as if it's signed
204	uint64_t uleb = read_leb128(data, &bits);
205	// If the most significant bit read is 1, then we need to sign extend it
206	if ((uleb >> (bits-1)) == 1)
207	{
208		// Sign extend by setting all bits in front of it to 1
209		uleb |= ((int64_t)-1) << bits;
210	}
211	return (int64_t)uleb;
212}
213/**
214 * Reads a value using the specified encoding from the address pointed to by
215 * *data.  Updates the value of *data to point to the next byte after the end
216 * of the data.
217 */
218static uint64_t read_value(char encoding, dw_eh_ptr_t *data)
219{
220	enum dwarf_data_encoding type = get_encoding(encoding);
221	uint64_t v;
222	switch (type)
223	{
224		// Read fixed-length types
225#define READ(dwarf, type) \
226		case dwarf:\
227			v = (uint64_t)(*(type*)(*data));\
228			*data += sizeof(type);\
229			break;
230		READ(DW_EH_PE_udata2, uint16_t)
231		READ(DW_EH_PE_udata4, uint32_t)
232		READ(DW_EH_PE_udata8, uint64_t)
233		READ(DW_EH_PE_sdata2, int16_t)
234		READ(DW_EH_PE_sdata4, int32_t)
235		READ(DW_EH_PE_sdata8, int64_t)
236		READ(DW_EH_PE_absptr, intptr_t)
237#undef READ
238		// Read variable-length types
239		case DW_EH_PE_sleb128:
240			v = read_sleb128(data);
241			break;
242		case DW_EH_PE_uleb128:
243			v = read_uleb128(data);
244			break;
245		default: abort();
246	}
247
248	return v;
249}
250
251/**
252 * Resolves an indirect value.  This expects an unwind context, an encoding, a
253 * decoded value, and the start of the region as arguments.  The returned value
254 * is a pointer to the address identified by the encoded value.
255 *
256 * If the encoding does not specify an indirect value, then this returns v.
257 */
258static uint64_t resolve_indirect_value(_Unwind_Context *c,
259                                       unsigned char encoding,
260                                       int64_t v,
261                                       dw_eh_ptr_t start)
262{
263	switch (get_base(encoding))
264	{
265		case DW_EH_PE_pcrel:
266			v += (uint64_t)start;
267			break;
268		case DW_EH_PE_textrel:
269			v += (uint64_t)_Unwind_GetTextRelBase(c);
270			break;
271		case DW_EH_PE_datarel:
272			v += (uint64_t)_Unwind_GetDataRelBase(c);
273			break;
274		case DW_EH_PE_funcrel:
275			v += (uint64_t)_Unwind_GetRegionStart(c);
276		default:
277			break;
278	}
279	// If this is an indirect value, then it is really the address of the real
280	// value
281	// TODO: Check whether this should really always be a pointer - it seems to
282	// be a GCC extensions, so not properly documented...
283	if (is_indirect(encoding))
284	{
285		v = (uint64_t)(uintptr_t)*(void**)v;
286	}
287	return v;
288}
289
290
291/**
292 * Reads an encoding and a value, updating *data to point to the next byte.
293 */
294static inline void read_value_with_encoding(_Unwind_Context *context,
295                                            dw_eh_ptr_t *data,
296                                            uint64_t *out)
297{
298	dw_eh_ptr_t start = *data;
299	unsigned char encoding = *((*data)++);
300	// If this value is omitted, skip it and don't touch the output value
301	if (encoding == DW_EH_PE_omit) { return; }
302
303	*out = read_value(encoding, data);
304	*out = resolve_indirect_value(context, encoding, *out, start);
305}
306
307/**
308 * Structure storing a decoded language-specific data area.  Use parse_lsda()
309 * to generate an instance of this structure from the address returned by the
310 * generic unwind library.
311 *
312 * You should not need to inspect the fields of this structure directly if you
313 * are just using this header.  The structure stores the locations of the
314 * various tables used for unwinding exceptions and is used by the functions
315 * for reading values from these tables.
316 */
317struct dwarf_eh_lsda
318{
319	/// The start of the region.  This is a cache of the value returned by
320	/// _Unwind_GetRegionStart().
321	dw_eh_ptr_t region_start;
322	/// The start of the landing pads table.
323	dw_eh_ptr_t landing_pads;
324	/// The start of the type table.
325	dw_eh_ptr_t type_table;
326	/// The encoding used for entries in the type tables.
327	unsigned char type_table_encoding;
328	/// The location of the call-site table.
329	dw_eh_ptr_t call_site_table;
330	/// The location of the action table.
331	dw_eh_ptr_t action_table;
332	/// The encoding used for entries in the call-site table.
333	unsigned char callsite_encoding;
334};
335
336/**
337 * Parse the header on the language-specific data area and return a structure
338 * containing the addresses and encodings of the various tables.
339 */
340static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context,
341                                              unsigned char *data)
342{
343	struct dwarf_eh_lsda lsda;
344
345	lsda.region_start = (dw_eh_ptr_t)(uintptr_t)_Unwind_GetRegionStart(context);
346
347	// If the landing pads are relative to anything other than the start of
348	// this region, find out where.  This is @LPStart in the spec, although the
349	// encoding that GCC uses does not quite match the spec.
350	uint64_t v = (uint64_t)(uintptr_t)lsda.region_start;
351	read_value_with_encoding(context, &data, &v);
352	lsda.landing_pads = (dw_eh_ptr_t)(uintptr_t)v;
353
354	// If there is a type table, find out where it is.  This is @TTBase in the
355	// spec.  Note: we find whether there is a type table pointer by checking
356	// whether the leading byte is DW_EH_PE_omit (0xff), which is not what the
357	// spec says, but does seem to be how G++ indicates this.
358	lsda.type_table = 0;
359	lsda.type_table_encoding = *data++;
360	if (lsda.type_table_encoding != DW_EH_PE_omit)
361	{
362		v = read_uleb128(&data);
363		dw_eh_ptr_t type_table = data;
364		type_table += v;
365		lsda.type_table = type_table;
366		//lsda.type_table = (uintptr_t*)(data + v);
367	}
368#if __arm__
369	lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect);
370#endif
371
372	lsda.callsite_encoding = (enum dwarf_data_encoding)(*(data++));
373
374	// Action table is immediately after the call site table
375	lsda.action_table = data;
376	uintptr_t callsite_size = (uintptr_t)read_uleb128(&data);
377	lsda.action_table = data + callsite_size;
378	// Call site table is immediately after the header
379	lsda.call_site_table = (dw_eh_ptr_t)data;
380
381
382	return lsda;
383}
384
385/**
386 * Structure representing an action to be performed while unwinding.  This
387 * contains the address that should be unwound to and the action record that
388 * provoked this action.
389 */
390struct dwarf_eh_action
391{
392	/**
393	 * The address that this action directs should be the new program counter
394	 * value after unwinding.
395	 */
396	dw_eh_ptr_t landing_pad;
397	/// The address of the action record.
398	dw_eh_ptr_t action_record;
399};
400
401/**
402 * Look up the landing pad that corresponds to the current invoke.
403 * Returns true if record exists.  The context is provided by the generic
404 * unwind library and the lsda should be the result of a call to parse_lsda().
405 *
406 * The action record is returned via the result parameter.
407 */
408static bool dwarf_eh_find_callsite(struct _Unwind_Context *context,
409                                   struct dwarf_eh_lsda *lsda,
410                                   struct dwarf_eh_action *result)
411{
412	result->action_record = 0;
413	result->landing_pad = 0;
414	// The current instruction pointer offset within the region
415	uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);
416	unsigned char *callsite_table = (unsigned char*)lsda->call_site_table;
417
418	while (callsite_table <= lsda->action_table)
419	{
420		// Once again, the layout deviates from the spec.
421		uint64_t call_site_start, call_site_size, landing_pad, action;
422		call_site_start = read_value(lsda->callsite_encoding, &callsite_table);
423		call_site_size = read_value(lsda->callsite_encoding, &callsite_table);
424
425		// Call site entries are sorted, so if we find a call site that's after
426		// the current instruction pointer then there is no action associated
427		// with this call and we should unwind straight through this frame
428		// without doing anything.
429		if (call_site_start > ip) { break; }
430
431		// Read the address of the landing pad and the action from the call
432		// site table.
433		landing_pad = read_value(lsda->callsite_encoding, &callsite_table);
434		action = read_uleb128(&callsite_table);
435
436		// We should not include the call_site_start (beginning of the region)
437		// address in the ip range. For each call site:
438		//
439		// address1: call proc
440		// address2: next instruction
441		//
442		// The call stack contains address2 and not address1, address1 can be
443		// at the end of another EH region.
444		if (call_site_start < ip && ip <= call_site_start + call_site_size)
445		{
446			if (action)
447			{
448				// Action records are 1-biased so both no-record and zeroth
449				// record can be stored.
450				result->action_record = lsda->action_table + action - 1;
451			}
452			// No landing pad means keep unwinding.
453			if (landing_pad)
454			{
455				// Landing pad is the offset from the value in the header
456				result->landing_pad = lsda->landing_pads + landing_pad;
457			}
458			return true;
459		}
460	}
461	return false;
462}
463
464/// Defines an exception class from 8 bytes (endian independent)
465#define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \
466	(((uint64_t)a << 56) +\
467	 ((uint64_t)b << 48) +\
468	 ((uint64_t)c << 40) +\
469	 ((uint64_t)d << 32) +\
470	 ((uint64_t)e << 24) +\
471	 ((uint64_t)f << 16) +\
472	 ((uint64_t)g << 8) +\
473	 ((uint64_t)h))
474
475#define GENERIC_EXCEPTION_CLASS(e,f,g,h) \
476	 ((uint32_t)e << 24) +\
477	 ((uint32_t)f << 16) +\
478	 ((uint32_t)g << 8) +\
479	 ((uint32_t)h)
480