1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*-
2 *
3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved.
4 *
5 * @APPLE_LICENSE_HEADER_START@
6 *
7 * This file contains Original Code and/or Modifications of Original Code
8 * as defined in and that are subject to the Apple Public Source License
9 * Version 2.0 (the 'License'). You may not use this file except in
10 * compliance with the License. Please obtain a copy of the License at
11 * http://www.opensource.apple.com/apsl/ and read it before using this
12 * file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
19 * Please see the License for the specific language governing rights and
20 * limitations under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23*/
24#ifndef __MACH_O_TRIE__
25#define __MACH_O_TRIE__
26
27#include <algorithm>
28
29#include "MachOFileAbstraction.hpp"
30
31
32namespace mach_o {
33namespace trie {
34
35struct Edge
36{
37					Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { }
38					~Edge() {  }
39	const char*		fSubString;
40	struct Node*	fChild;
41
42};
43
44struct Node
45{
46						Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0),
47											fOther(0), fImportedName(NULL), fOrdered(false),
48											fHaveExportInfo(false), fTrieOffset(0) {}
49						~Node() { }
50	const char*			fCummulativeString;
51	std::vector<Edge>	fChildren;
52	uint64_t			fAddress;
53	uint64_t			fFlags;
54	uint64_t			fOther;
55	const char*			fImportedName;
56	bool				fOrdered;
57	bool				fHaveExportInfo;
58	uint32_t			fTrieOffset;
59
60	void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) {
61		const char* partialStr = &fullStr[strlen(fCummulativeString)];
62		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
63			Edge& e = *it;
64			int subStringLen = strlen(e.fSubString);
65			if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
66				// already have matching edge, go down that path
67				e.fChild->addSymbol(fullStr, address, flags, other, importName);
68				return;
69			}
70			else {
71				for (int i=subStringLen-1; i > 0; --i) {
72					if ( strncmp(e.fSubString, partialStr, i) == 0 ) {
73						// found a common substring, splice in new node
74						//  was A -> C,  now A -> B -> C
75						char* bNodeCummStr = strdup(e.fChild->fCummulativeString);
76						bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0';
77						//node* aNode = this;
78						Node* bNode = new Node(bNodeCummStr);
79						Node* cNode = e.fChild;
80						char* abEdgeStr = strdup(e.fSubString);
81						abEdgeStr[i] = '\0';
82						char* bcEdgeStr = strdup(&e.fSubString[i]);
83						Edge& abEdge = e;
84						abEdge.fSubString = abEdgeStr;
85						abEdge.fChild = bNode;
86						Edge bcEdge(bcEdgeStr, cNode);
87						bNode->fChildren.push_back(bcEdge);
88						bNode->addSymbol(fullStr, address, flags, other, importName);
89						return;
90					}
91				}
92			}
93		}
94
95		// no commonality with any existing child, make a new edge that is this whole string
96		Node* newNode = new Node(strdup(fullStr));
97		Edge newEdge(strdup(partialStr), newNode);
98		fChildren.push_back(newEdge);
99		newNode->fAddress = address;
100		newNode->fFlags = flags;
101		newNode->fOther = other;
102		if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) )
103			newNode->fImportedName = importName;
104		else
105			newNode->fImportedName = NULL;
106		newNode->fHaveExportInfo = true;
107	}
108
109	void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) {
110		if ( !fOrdered ) {
111			orderedNodes.push_back(this);
112			//fprintf(stderr, "ordered %p %s\n", this, fCummulativeString);
113			fOrdered = true;
114		}
115		const char* partialStr = &name[strlen(fCummulativeString)];
116		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
117			Edge& e = *it;
118			int subStringLen = strlen(e.fSubString);
119			if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) {
120				// already have matching edge, go down that path
121				e.fChild->addOrderedNodes(name, orderedNodes);
122				return;
123			}
124		}
125	}
126
127	// byte for terminal node size in bytes, or 0x00 if not terminal node
128	// teminal node (uleb128 flags, uleb128 addr [uleb128 other])
129	// byte for child node count
130	//  each child: zero terminated substring, uleb128 node offset
131	bool updateOffset(uint32_t& offset) {
132		uint32_t nodeSize = 1; // length of export info when no export info
133		if ( fHaveExportInfo ) {
134			if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
135				nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal
136				if ( fImportedName != NULL )
137					nodeSize += strlen(fImportedName);
138				++nodeSize; // trailing zero in imported name
139			}
140			else {
141				nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
142				if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
143					nodeSize += uleb128_size(fOther);
144			}
145			// do have export info, overall node size so far is uleb128 of export info + export info
146			nodeSize += uleb128_size(nodeSize);
147		}
148		// add children
149		++nodeSize; // byte for count of chidren
150		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
151			Edge& e = *it;
152			nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset);
153		}
154		bool result = (fTrieOffset != offset);
155		fTrieOffset = offset;
156		//fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString);
157		offset += nodeSize;
158		// return true if fTrieOffset was changed
159		return result;
160	}
161
162	void appendToStream(std::vector<uint8_t>& out) {
163		if ( fHaveExportInfo ) {
164			if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
165				if ( fImportedName != NULL ) {
166					// nodes with re-export info: size, flags, ordinal, string
167					uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1;
168					out.push_back(nodeSize);
169					append_uleb128(fFlags, out);
170					append_uleb128(fOther, out);
171					append_string(fImportedName, out);
172				}
173				else {
174					// nodes with re-export info: size, flags, ordinal, empty-string
175					uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1;
176					out.push_back(nodeSize);
177					append_uleb128(fFlags, out);
178					append_uleb128(fOther, out);
179					out.push_back(0);
180				}
181			}
182			else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) {
183				// nodes with export info: size, flags, address, other
184				uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther);
185				out.push_back(nodeSize);
186				append_uleb128(fFlags, out);
187				append_uleb128(fAddress, out);
188				append_uleb128(fOther, out);
189			}
190			else {
191				// nodes with export info: size, flags, address
192				uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress);
193				out.push_back(nodeSize);
194				append_uleb128(fFlags, out);
195				append_uleb128(fAddress, out);
196			}
197		}
198		else {
199			// no export info uleb128 of zero is one byte of zero
200			out.push_back(0);
201		}
202		// write number of children
203		out.push_back(fChildren.size());
204		// write each child
205		for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) {
206			Edge& e = *it;
207			append_string(e.fSubString, out);
208			append_uleb128(e.fChild->fTrieOffset, out);
209		}
210	}
211
212private:
213	static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) {
214		uint8_t byte;
215		do {
216			byte = value & 0x7F;
217			value &= ~0x7F;
218			if ( value != 0 )
219				byte |= 0x80;
220			out.push_back(byte);
221			value = value >> 7;
222		} while( byte >= 0x80 );
223	}
224
225	static void append_string(const char* str, std::vector<uint8_t>& out) {
226		for (const char* s = str; *s != '\0'; ++s)
227			out.push_back(*s);
228		out.push_back('\0');
229	}
230
231	static unsigned int	uleb128_size(uint64_t value) {
232		uint32_t result = 0;
233		do {
234			value = value >> 7;
235			++result;
236		} while ( value != 0 );
237		return result;
238	}
239
240
241};
242
243inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) {
244	uint64_t result = 0;
245	int		 bit = 0;
246	do {
247		if (p == end)
248			throw "malformed uleb128 extends beyond trie";
249
250		uint64_t slice = *p & 0x7f;
251
252		if (bit >= 64 || slice << bit >> bit != slice)
253			throw "uleb128 too big for 64-bits";
254		else {
255			result |= (slice << bit);
256			bit += 7;
257		}
258	}
259	while (*p++ & 0x80);
260	return result;
261}
262
263
264
265struct Entry
266{
267	const char*		name;
268	uint64_t		address;
269	uint64_t		flags;
270	uint64_t		other;
271	const char*		importName;
272};
273
274
275
276inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output)
277{
278	Node start(strdup(""));
279
280	// make nodes for all exported symbols
281	for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
282		start.addSymbol(it->name, it->address, it->flags, it->other, it->importName);
283	}
284
285	// create vector of nodes
286	std::vector<Node*> orderedNodes;
287	orderedNodes.reserve(entries.size()*2);
288	for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) {
289		start.addOrderedNodes(it->name, orderedNodes);
290	}
291
292	// assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized
293	bool more;
294	do {
295		uint32_t offset = 0;
296		more = false;
297		for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
298			if ( (*it)->updateOffset(offset) )
299				more = true;
300		}
301	} while ( more );
302
303	// create trie stream
304	for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) {
305		(*it)->appendToStream(output);
306	}
307}
308
309struct EntryWithOffset
310{
311	uintptr_t		nodeOffset;
312	Entry			entry;
313
314	bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); }
315};
316
317
318
319static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end,
320									char* cummulativeString, int curStrOffset,
321									std::vector<EntryWithOffset>& output)
322{
323	if ( p >= end )
324		throw "malformed trie, node past end";
325	const uint8_t terminalSize = read_uleb128(p, end);
326	const uint8_t* children = p + terminalSize;
327	if ( terminalSize != 0 ) {
328		EntryWithOffset e;
329		e.nodeOffset = p-start;
330		e.entry.name = strdup(cummulativeString);
331		e.entry.flags = read_uleb128(p, end);
332		if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) {
333			e.entry.address = 0;
334			e.entry.other = read_uleb128(p, end); // dylib ordinal
335			e.entry.importName = (char*)p;
336		}
337		else {
338			e.entry.address = read_uleb128(p, end);
339			if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER )
340				e.entry.other = read_uleb128(p, end);
341			else
342				e.entry.other = 0;
343			e.entry.importName = NULL;
344		}
345		output.push_back(e);
346	}
347	const uint8_t childrenCount = *children++;
348	const uint8_t* s = children;
349	for (uint8_t i=0; i < childrenCount; ++i) {
350		int edgeStrLen = 0;
351		while (*s != '\0') {
352			cummulativeString[curStrOffset+edgeStrLen] = *s++;
353			++edgeStrLen;
354		}
355		cummulativeString[curStrOffset+edgeStrLen] = *s++;
356		uint32_t childNodeOffet = read_uleb128(s, end);
357		processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output);
358	}
359}
360
361
362inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output)
363{
364	// empty trie has no entries
365	if ( start == end )
366		return;
367	char cummulativeString[4000];
368	std::vector<EntryWithOffset> entries;
369	processExportNode(start, start, end, cummulativeString, 0, entries);
370	// to preserve tie layout order, sort by node offset
371	std::sort(entries.begin(), entries.end());
372	// copy to output
373	output.reserve(entries.size());
374	for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it)
375		output.push_back(it->entry);
376}
377
378
379
380
381}; // namespace trie
382}; // namespace mach_o
383
384
385#endif	// __MACH_O_TRIE__
386
387
388