1/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- 2 * 3 * Copyright (c) 2008-2010 Apple Inc. All rights reserved. 4 * 5 * @APPLE_LICENSE_HEADER_START@ 6 * 7 * This file contains Original Code and/or Modifications of Original Code 8 * as defined in and that are subject to the Apple Public Source License 9 * Version 2.0 (the 'License'). You may not use this file except in 10 * compliance with the License. Please obtain a copy of the License at 11 * http://www.opensource.apple.com/apsl/ and read it before using this 12 * file. 13 * 14 * The Original Code and all software distributed under the License are 15 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 19 * Please see the License for the specific language governing rights and 20 * limitations under the License. 21 * 22 * @APPLE_LICENSE_HEADER_END@ 23*/ 24#ifndef __MACH_O_TRIE__ 25#define __MACH_O_TRIE__ 26 27#include <algorithm> 28 29#include "MachOFileAbstraction.hpp" 30 31 32namespace mach_o { 33namespace trie { 34 35struct Edge 36{ 37 Edge(const char* s, struct Node* n) : fSubString(s), fChild(n) { } 38 ~Edge() { } 39 const char* fSubString; 40 struct Node* fChild; 41 42}; 43 44struct Node 45{ 46 Node(const char* s) : fCummulativeString(s), fAddress(0), fFlags(0), 47 fOther(0), fImportedName(NULL), fOrdered(false), 48 fHaveExportInfo(false), fTrieOffset(0) {} 49 ~Node() { } 50 const char* fCummulativeString; 51 std::vector<Edge> fChildren; 52 uint64_t fAddress; 53 uint64_t fFlags; 54 uint64_t fOther; 55 const char* fImportedName; 56 bool fOrdered; 57 bool fHaveExportInfo; 58 uint32_t fTrieOffset; 59 60 void addSymbol(const char* fullStr, uint64_t address, uint64_t flags, uint64_t other, const char* importName) { 61 const char* partialStr = &fullStr[strlen(fCummulativeString)]; 62 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 63 Edge& e = *it; 64 int subStringLen = strlen(e.fSubString); 65 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { 66 // already have matching edge, go down that path 67 e.fChild->addSymbol(fullStr, address, flags, other, importName); 68 return; 69 } 70 else { 71 for (int i=subStringLen-1; i > 0; --i) { 72 if ( strncmp(e.fSubString, partialStr, i) == 0 ) { 73 // found a common substring, splice in new node 74 // was A -> C, now A -> B -> C 75 char* bNodeCummStr = strdup(e.fChild->fCummulativeString); 76 bNodeCummStr[strlen(bNodeCummStr)+i-subStringLen] = '\0'; 77 //node* aNode = this; 78 Node* bNode = new Node(bNodeCummStr); 79 Node* cNode = e.fChild; 80 char* abEdgeStr = strdup(e.fSubString); 81 abEdgeStr[i] = '\0'; 82 char* bcEdgeStr = strdup(&e.fSubString[i]); 83 Edge& abEdge = e; 84 abEdge.fSubString = abEdgeStr; 85 abEdge.fChild = bNode; 86 Edge bcEdge(bcEdgeStr, cNode); 87 bNode->fChildren.push_back(bcEdge); 88 bNode->addSymbol(fullStr, address, flags, other, importName); 89 return; 90 } 91 } 92 } 93 } 94 95 // no commonality with any existing child, make a new edge that is this whole string 96 Node* newNode = new Node(strdup(fullStr)); 97 Edge newEdge(strdup(partialStr), newNode); 98 fChildren.push_back(newEdge); 99 newNode->fAddress = address; 100 newNode->fFlags = flags; 101 newNode->fOther = other; 102 if ( (flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && (importName != NULL) && (strcmp(fullStr,importName) != 0) ) 103 newNode->fImportedName = importName; 104 else 105 newNode->fImportedName = NULL; 106 newNode->fHaveExportInfo = true; 107 } 108 109 void addOrderedNodes(const char* name, std::vector<Node*>& orderedNodes) { 110 if ( !fOrdered ) { 111 orderedNodes.push_back(this); 112 //fprintf(stderr, "ordered %p %s\n", this, fCummulativeString); 113 fOrdered = true; 114 } 115 const char* partialStr = &name[strlen(fCummulativeString)]; 116 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 117 Edge& e = *it; 118 int subStringLen = strlen(e.fSubString); 119 if ( strncmp(e.fSubString, partialStr, subStringLen) == 0 ) { 120 // already have matching edge, go down that path 121 e.fChild->addOrderedNodes(name, orderedNodes); 122 return; 123 } 124 } 125 } 126 127 // byte for terminal node size in bytes, or 0x00 if not terminal node 128 // teminal node (uleb128 flags, uleb128 addr [uleb128 other]) 129 // byte for child node count 130 // each child: zero terminated substring, uleb128 node offset 131 bool updateOffset(uint32_t& offset) { 132 uint32_t nodeSize = 1; // length of export info when no export info 133 if ( fHaveExportInfo ) { 134 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { 135 nodeSize = uleb128_size(fFlags) + uleb128_size(fOther); // ordinal 136 if ( fImportedName != NULL ) 137 nodeSize += strlen(fImportedName); 138 ++nodeSize; // trailing zero in imported name 139 } 140 else { 141 nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); 142 if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) 143 nodeSize += uleb128_size(fOther); 144 } 145 // do have export info, overall node size so far is uleb128 of export info + export info 146 nodeSize += uleb128_size(nodeSize); 147 } 148 // add children 149 ++nodeSize; // byte for count of chidren 150 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 151 Edge& e = *it; 152 nodeSize += strlen(e.fSubString) + 1 + uleb128_size(e.fChild->fTrieOffset); 153 } 154 bool result = (fTrieOffset != offset); 155 fTrieOffset = offset; 156 //fprintf(stderr, "updateOffset %p %05d %s\n", this, fTrieOffset, fCummulativeString); 157 offset += nodeSize; 158 // return true if fTrieOffset was changed 159 return result; 160 } 161 162 void appendToStream(std::vector<uint8_t>& out) { 163 if ( fHaveExportInfo ) { 164 if ( fFlags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { 165 if ( fImportedName != NULL ) { 166 // nodes with re-export info: size, flags, ordinal, string 167 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + strlen(fImportedName) + 1; 168 out.push_back(nodeSize); 169 append_uleb128(fFlags, out); 170 append_uleb128(fOther, out); 171 append_string(fImportedName, out); 172 } 173 else { 174 // nodes with re-export info: size, flags, ordinal, empty-string 175 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fOther) + 1; 176 out.push_back(nodeSize); 177 append_uleb128(fFlags, out); 178 append_uleb128(fOther, out); 179 out.push_back(0); 180 } 181 } 182 else if ( fFlags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { 183 // nodes with export info: size, flags, address, other 184 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress) + uleb128_size(fOther); 185 out.push_back(nodeSize); 186 append_uleb128(fFlags, out); 187 append_uleb128(fAddress, out); 188 append_uleb128(fOther, out); 189 } 190 else { 191 // nodes with export info: size, flags, address 192 uint32_t nodeSize = uleb128_size(fFlags) + uleb128_size(fAddress); 193 out.push_back(nodeSize); 194 append_uleb128(fFlags, out); 195 append_uleb128(fAddress, out); 196 } 197 } 198 else { 199 // no export info uleb128 of zero is one byte of zero 200 out.push_back(0); 201 } 202 // write number of children 203 out.push_back(fChildren.size()); 204 // write each child 205 for (std::vector<Edge>::iterator it = fChildren.begin(); it != fChildren.end(); ++it) { 206 Edge& e = *it; 207 append_string(e.fSubString, out); 208 append_uleb128(e.fChild->fTrieOffset, out); 209 } 210 } 211 212private: 213 static void append_uleb128(uint64_t value, std::vector<uint8_t>& out) { 214 uint8_t byte; 215 do { 216 byte = value & 0x7F; 217 value &= ~0x7F; 218 if ( value != 0 ) 219 byte |= 0x80; 220 out.push_back(byte); 221 value = value >> 7; 222 } while( byte >= 0x80 ); 223 } 224 225 static void append_string(const char* str, std::vector<uint8_t>& out) { 226 for (const char* s = str; *s != '\0'; ++s) 227 out.push_back(*s); 228 out.push_back('\0'); 229 } 230 231 static unsigned int uleb128_size(uint64_t value) { 232 uint32_t result = 0; 233 do { 234 value = value >> 7; 235 ++result; 236 } while ( value != 0 ); 237 return result; 238 } 239 240 241}; 242 243inline uint64_t read_uleb128(const uint8_t*& p, const uint8_t* end) { 244 uint64_t result = 0; 245 int bit = 0; 246 do { 247 if (p == end) 248 throw "malformed uleb128 extends beyond trie"; 249 250 uint64_t slice = *p & 0x7f; 251 252 if (bit >= 64 || slice << bit >> bit != slice) 253 throw "uleb128 too big for 64-bits"; 254 else { 255 result |= (slice << bit); 256 bit += 7; 257 } 258 } 259 while (*p++ & 0x80); 260 return result; 261} 262 263 264 265struct Entry 266{ 267 const char* name; 268 uint64_t address; 269 uint64_t flags; 270 uint64_t other; 271 const char* importName; 272}; 273 274 275 276inline void makeTrie(const std::vector<Entry>& entries, std::vector<uint8_t>& output) 277{ 278 Node start(strdup("")); 279 280 // make nodes for all exported symbols 281 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { 282 start.addSymbol(it->name, it->address, it->flags, it->other, it->importName); 283 } 284 285 // create vector of nodes 286 std::vector<Node*> orderedNodes; 287 orderedNodes.reserve(entries.size()*2); 288 for (std::vector<Entry>::const_iterator it = entries.begin(); it != entries.end(); ++it) { 289 start.addOrderedNodes(it->name, orderedNodes); 290 } 291 292 // assign each node in the vector an offset in the trie stream, iterating until all uleb128 sizes have stabilized 293 bool more; 294 do { 295 uint32_t offset = 0; 296 more = false; 297 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { 298 if ( (*it)->updateOffset(offset) ) 299 more = true; 300 } 301 } while ( more ); 302 303 // create trie stream 304 for (std::vector<Node*>::iterator it = orderedNodes.begin(); it != orderedNodes.end(); ++it) { 305 (*it)->appendToStream(output); 306 } 307} 308 309struct EntryWithOffset 310{ 311 uintptr_t nodeOffset; 312 Entry entry; 313 314 bool operator<(const EntryWithOffset& other) const { return ( nodeOffset < other.nodeOffset ); } 315}; 316 317 318 319static inline void processExportNode(const uint8_t* const start, const uint8_t* p, const uint8_t* const end, 320 char* cummulativeString, int curStrOffset, 321 std::vector<EntryWithOffset>& output) 322{ 323 if ( p >= end ) 324 throw "malformed trie, node past end"; 325 const uint8_t terminalSize = read_uleb128(p, end); 326 const uint8_t* children = p + terminalSize; 327 if ( terminalSize != 0 ) { 328 EntryWithOffset e; 329 e.nodeOffset = p-start; 330 e.entry.name = strdup(cummulativeString); 331 e.entry.flags = read_uleb128(p, end); 332 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT ) { 333 e.entry.address = 0; 334 e.entry.other = read_uleb128(p, end); // dylib ordinal 335 e.entry.importName = (char*)p; 336 } 337 else { 338 e.entry.address = read_uleb128(p, end); 339 if ( e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) 340 e.entry.other = read_uleb128(p, end); 341 else 342 e.entry.other = 0; 343 e.entry.importName = NULL; 344 } 345 output.push_back(e); 346 } 347 const uint8_t childrenCount = *children++; 348 const uint8_t* s = children; 349 for (uint8_t i=0; i < childrenCount; ++i) { 350 int edgeStrLen = 0; 351 while (*s != '\0') { 352 cummulativeString[curStrOffset+edgeStrLen] = *s++; 353 ++edgeStrLen; 354 } 355 cummulativeString[curStrOffset+edgeStrLen] = *s++; 356 uint32_t childNodeOffet = read_uleb128(s, end); 357 processExportNode(start, start+childNodeOffet, end, cummulativeString, curStrOffset+edgeStrLen, output); 358 } 359} 360 361 362inline void parseTrie(const uint8_t* start, const uint8_t* end, std::vector<Entry>& output) 363{ 364 // empty trie has no entries 365 if ( start == end ) 366 return; 367 char cummulativeString[4000]; 368 std::vector<EntryWithOffset> entries; 369 processExportNode(start, start, end, cummulativeString, 0, entries); 370 // to preserve tie layout order, sort by node offset 371 std::sort(entries.begin(), entries.end()); 372 // copy to output 373 output.reserve(entries.size()); 374 for (std::vector<EntryWithOffset>::iterator it=entries.begin(); it != entries.end(); ++it) 375 output.push_back(it->entry); 376} 377 378 379 380 381}; // namespace trie 382}; // namespace mach_o 383 384 385#endif // __MACH_O_TRIE__ 386 387 388