1//===-- sanitizer_procmaps_mac.cc -----------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Information about the process mappings (Mac-specific parts).
11//===----------------------------------------------------------------------===//
12
13#include "sanitizer_platform.h"
14#if SANITIZER_MAC
15#include "sanitizer_common.h"
16#include "sanitizer_placement_new.h"
17#include "sanitizer_procmaps.h"
18
19#include <mach-o/dyld.h>
20#include <mach-o/loader.h>
21#include <mach/mach.h>
22
23// These are not available in older macOS SDKs.
24#ifndef CPU_SUBTYPE_X86_64_H
25#define CPU_SUBTYPE_X86_64_H  ((cpu_subtype_t)8)   /* Haswell */
26#endif
27#ifndef CPU_SUBTYPE_ARM_V7S
28#define CPU_SUBTYPE_ARM_V7S   ((cpu_subtype_t)11)  /* Swift */
29#endif
30#ifndef CPU_SUBTYPE_ARM_V7K
31#define CPU_SUBTYPE_ARM_V7K   ((cpu_subtype_t)12)
32#endif
33#ifndef CPU_TYPE_ARM64
34#define CPU_TYPE_ARM64        (CPU_TYPE_ARM | CPU_ARCH_ABI64)
35#endif
36
37namespace __sanitizer {
38
39// Contains information used to iterate through sections.
40struct MemoryMappedSegmentData {
41  char name[kMaxSegName];
42  uptr nsects;
43  const char *current_load_cmd_addr;
44  u32 lc_type;
45  uptr base_virt_addr;
46  uptr addr_mask;
47};
48
49template <typename Section>
50static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
51                            bool isWritable) {
52  const Section *sc = (const Section *)data->current_load_cmd_addr;
53  data->current_load_cmd_addr += sizeof(Section);
54
55  uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
56  uptr sec_end = sec_start + sc->size;
57  module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
58                          sc->sectname);
59}
60
61void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
62  // Don't iterate over sections when the caller hasn't set up the
63  // data pointer, when there are no sections, or when the segment
64  // is executable. Avoid iterating over executable sections because
65  // it will confuse libignore, and because the extra granularity
66  // of information is not needed by any sanitizers.
67  if (!data_ || !data_->nsects || IsExecutable()) {
68    module->addAddressRange(start, end, IsExecutable(), IsWritable(),
69                            data_ ? data_->name : nullptr);
70    return;
71  }
72
73  do {
74    if (data_->lc_type == LC_SEGMENT) {
75      NextSectionLoad<struct section>(module, data_, IsWritable());
76#ifdef MH_MAGIC_64
77    } else if (data_->lc_type == LC_SEGMENT_64) {
78      NextSectionLoad<struct section_64>(module, data_, IsWritable());
79#endif
80    }
81  } while (--data_->nsects);
82}
83
84MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
85  Reset();
86}
87
88MemoryMappingLayout::~MemoryMappingLayout() {
89}
90
91// More information about Mach-O headers can be found in mach-o/loader.h
92// Each Mach-O image has a header (mach_header or mach_header_64) starting with
93// a magic number, and a list of linker load commands directly following the
94// header.
95// A load command is at least two 32-bit words: the command type and the
96// command size in bytes. We're interested only in segment load commands
97// (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
98// into the task's address space.
99// The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or
100// segment_command_64 correspond to the memory address, memory size and the
101// file offset of the current memory segment.
102// Because these fields are taken from the images as is, one needs to add
103// _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
104
105void MemoryMappingLayout::Reset() {
106  // Count down from the top.
107  // TODO(glider): as per man 3 dyld, iterating over the headers with
108  // _dyld_image_count is thread-unsafe. We need to register callbacks for
109  // adding and removing images which will invalidate the MemoryMappingLayout
110  // state.
111  data_.current_image = _dyld_image_count();
112  data_.current_load_cmd_count = -1;
113  data_.current_load_cmd_addr = 0;
114  data_.current_magic = 0;
115  data_.current_filetype = 0;
116  data_.current_arch = kModuleArchUnknown;
117  internal_memset(data_.current_uuid, 0, kModuleUUIDSize);
118}
119
120// The dyld load address should be unchanged throughout process execution,
121// and it is expensive to compute once many libraries have been loaded,
122// so cache it here and do not reset.
123static mach_header *dyld_hdr = 0;
124static const char kDyldPath[] = "/usr/lib/dyld";
125static const int kDyldImageIdx = -1;
126
127// static
128void MemoryMappingLayout::CacheMemoryMappings() {
129  // No-op on Mac for now.
130}
131
132void MemoryMappingLayout::LoadFromCache() {
133  // No-op on Mac for now.
134}
135
136// _dyld_get_image_header() and related APIs don't report dyld itself.
137// We work around this by manually recursing through the memory map
138// until we hit a Mach header matching dyld instead. These recurse
139// calls are expensive, but the first memory map generation occurs
140// early in the process, when dyld is one of the only images loaded,
141// so it will be hit after only a few iterations.
142static mach_header *get_dyld_image_header() {
143  unsigned depth = 1;
144  vm_size_t size = 0;
145  vm_address_t address = 0;
146  kern_return_t err = KERN_SUCCESS;
147  mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
148
149  while (true) {
150    struct vm_region_submap_info_64 info;
151    err = vm_region_recurse_64(mach_task_self(), &address, &size, &depth,
152                               (vm_region_info_t)&info, &count);
153    if (err != KERN_SUCCESS) return nullptr;
154
155    if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
156      mach_header *hdr = (mach_header *)address;
157      if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) &&
158          hdr->filetype == MH_DYLINKER) {
159        return hdr;
160      }
161    }
162    address += size;
163  }
164}
165
166const mach_header *get_dyld_hdr() {
167  if (!dyld_hdr) dyld_hdr = get_dyld_image_header();
168
169  return dyld_hdr;
170}
171
172// Next and NextSegmentLoad were inspired by base/sysinfo.cc in
173// Google Perftools, https://github.com/gperftools/gperftools.
174
175// NextSegmentLoad scans the current image for the next segment load command
176// and returns the start and end addresses and file offset of the corresponding
177// segment.
178// Note that the segment addresses are not necessarily sorted.
179template <u32 kLCSegment, typename SegmentCommand>
180static bool NextSegmentLoad(MemoryMappedSegment *segment,
181MemoryMappedSegmentData *seg_data, MemoryMappingLayoutData &layout_data) {
182  const char *lc = layout_data.current_load_cmd_addr;
183  layout_data.current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
184  if (((const load_command *)lc)->cmd == kLCSegment) {
185    const SegmentCommand* sc = (const SegmentCommand *)lc;
186    uptr base_virt_addr, addr_mask;
187    if (layout_data.current_image == kDyldImageIdx) {
188      base_virt_addr = (uptr)get_dyld_hdr();
189      // vmaddr is masked with 0xfffff because on macOS versions < 10.12,
190      // it contains an absolute address rather than an offset for dyld.
191      // To make matters even more complicated, this absolute address
192      // isn't actually the absolute segment address, but the offset portion
193      // of the address is accurate when combined with the dyld base address,
194      // and the mask will give just this offset.
195      addr_mask = 0xfffff;
196    } else {
197      base_virt_addr =
198          (uptr)_dyld_get_image_vmaddr_slide(layout_data.current_image);
199      addr_mask = ~0;
200    }
201
202    segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
203    segment->end = segment->start + sc->vmsize;
204    // Most callers don't need section information, so only fill this struct
205    // when required.
206    if (seg_data) {
207      seg_data->nsects = sc->nsects;
208      seg_data->current_load_cmd_addr =
209          (const char *)lc + sizeof(SegmentCommand);
210      seg_data->lc_type = kLCSegment;
211      seg_data->base_virt_addr = base_virt_addr;
212      seg_data->addr_mask = addr_mask;
213      internal_strncpy(seg_data->name, sc->segname,
214                       ARRAY_SIZE(seg_data->name));
215    }
216
217    // Return the initial protection.
218    segment->protection = sc->initprot;
219    segment->offset = (layout_data.current_filetype ==
220                       /*MH_EXECUTE*/ 0x2)
221                          ? sc->vmaddr
222                          : sc->fileoff;
223    if (segment->filename) {
224      const char *src = (layout_data.current_image == kDyldImageIdx)
225                            ? kDyldPath
226                            : _dyld_get_image_name(layout_data.current_image);
227      internal_strncpy(segment->filename, src, segment->filename_size);
228    }
229    segment->arch = layout_data.current_arch;
230    internal_memcpy(segment->uuid, layout_data.current_uuid, kModuleUUIDSize);
231    return true;
232  }
233  return false;
234}
235
236ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
237  cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
238  switch (cputype) {
239    case CPU_TYPE_I386:
240      return kModuleArchI386;
241    case CPU_TYPE_X86_64:
242      if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
243      if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
244      CHECK(0 && "Invalid subtype of x86_64");
245      return kModuleArchUnknown;
246    case CPU_TYPE_ARM:
247      if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
248      if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
249      if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
250      if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
251      CHECK(0 && "Invalid subtype of ARM");
252      return kModuleArchUnknown;
253    case CPU_TYPE_ARM64:
254      return kModuleArchARM64;
255    default:
256      CHECK(0 && "Invalid CPU type");
257      return kModuleArchUnknown;
258  }
259}
260
261static const load_command *NextCommand(const load_command *lc) {
262  return (const load_command *)((const char *)lc + lc->cmdsize);
263}
264
265static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
266  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
267    if (lc->cmd != LC_UUID) continue;
268
269    const uuid_command *uuid_lc = (const uuid_command *)lc;
270    const uint8_t *uuid = &uuid_lc->uuid[0];
271    internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
272    return;
273  }
274}
275
276static bool IsModuleInstrumented(const load_command *first_lc) {
277  for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
278    if (lc->cmd != LC_LOAD_DYLIB) continue;
279
280    const dylib_command *dylib_lc = (const dylib_command *)lc;
281    uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
282    const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset;
283    dylib_name = StripModuleName(dylib_name);
284    if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) {
285      return true;
286    }
287  }
288  return false;
289}
290
291bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
292  for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
293    const mach_header *hdr = (data_.current_image == kDyldImageIdx)
294                                 ? get_dyld_hdr()
295                                 : _dyld_get_image_header(data_.current_image);
296    if (!hdr) continue;
297    if (data_.current_load_cmd_count < 0) {
298      // Set up for this image;
299      data_.current_load_cmd_count = hdr->ncmds;
300      data_.current_magic = hdr->magic;
301      data_.current_filetype = hdr->filetype;
302      data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
303      switch (data_.current_magic) {
304#ifdef MH_MAGIC_64
305        case MH_MAGIC_64: {
306          data_.current_load_cmd_addr =
307              (const char *)hdr + sizeof(mach_header_64);
308          break;
309        }
310#endif
311        case MH_MAGIC: {
312          data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header);
313          break;
314        }
315        default: {
316          continue;
317        }
318      }
319      FindUUID((const load_command *)data_.current_load_cmd_addr,
320               data_.current_uuid);
321      data_.current_instrumented = IsModuleInstrumented(
322          (const load_command *)data_.current_load_cmd_addr);
323    }
324
325    for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) {
326      switch (data_.current_magic) {
327        // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
328#ifdef MH_MAGIC_64
329        case MH_MAGIC_64: {
330          if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
331          segment, segment->data_, data_))
332            return true;
333          break;
334        }
335#endif
336        case MH_MAGIC: {
337          if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
338          segment, segment->data_, data_))
339            return true;
340          break;
341        }
342      }
343    }
344    // If we get here, no more load_cmd's in this image talk about
345    // segments.  Go on to the next image.
346  }
347  return false;
348}
349
350void MemoryMappingLayout::DumpListOfModules(
351    InternalMmapVectorNoCtor<LoadedModule> *modules) {
352  Reset();
353  InternalScopedString module_name(kMaxPathLength);
354  MemoryMappedSegment segment(module_name.data(), kMaxPathLength);
355  MemoryMappedSegmentData data;
356  segment.data_ = &data;
357  while (Next(&segment)) {
358    if (segment.filename[0] == '\0') continue;
359    LoadedModule *cur_module = nullptr;
360    if (!modules->empty() &&
361        0 == internal_strcmp(segment.filename, modules->back().full_name())) {
362      cur_module = &modules->back();
363    } else {
364      modules->push_back(LoadedModule());
365      cur_module = &modules->back();
366      cur_module->set(segment.filename, segment.start, segment.arch,
367                      segment.uuid, data_.current_instrumented);
368    }
369    segment.AddAddressRanges(cur_module);
370  }
371}
372
373}  // namespace __sanitizer
374
375#endif  // SANITIZER_MAC
376