dfsan.cpp revision 360784
1//===-- dfsan.cpp ---------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file is a part of DataFlowSanitizer.
10//
11// DataFlowSanitizer runtime.  This file defines the public interface to
12// DataFlowSanitizer as well as the definition of certain runtime functions
13// called automatically by the compiler (specifically the instrumentation pass
14// in llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp).
15//
16// The public interface is defined in include/sanitizer/dfsan_interface.h whose
17// functions are prefixed dfsan_ while the compiler interface functions are
18// prefixed __dfsan_.
19//===----------------------------------------------------------------------===//
20
21#include "sanitizer_common/sanitizer_atomic.h"
22#include "sanitizer_common/sanitizer_common.h"
23#include "sanitizer_common/sanitizer_file.h"
24#include "sanitizer_common/sanitizer_flags.h"
25#include "sanitizer_common/sanitizer_flag_parser.h"
26#include "sanitizer_common/sanitizer_libc.h"
27
28#include "dfsan/dfsan.h"
29
30using namespace __dfsan;
31
32typedef atomic_uint16_t atomic_dfsan_label;
33static const dfsan_label kInitializingLabel = -1;
34
35static const uptr kNumLabels = 1 << (sizeof(dfsan_label) * 8);
36
37static atomic_dfsan_label __dfsan_last_label;
38static dfsan_label_info __dfsan_label_info[kNumLabels];
39
40Flags __dfsan::flags_data;
41
42SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_retval_tls;
43SANITIZER_INTERFACE_ATTRIBUTE THREADLOCAL dfsan_label __dfsan_arg_tls[64];
44
45SANITIZER_INTERFACE_ATTRIBUTE uptr __dfsan_shadow_ptr_mask;
46
47// On Linux/x86_64, memory is laid out as follows:
48//
49// +--------------------+ 0x800000000000 (top of memory)
50// | application memory |
51// +--------------------+ 0x700000008000 (kAppAddr)
52// |                    |
53// |       unused       |
54// |                    |
55// +--------------------+ 0x200200000000 (kUnusedAddr)
56// |    union table     |
57// +--------------------+ 0x200000000000 (kUnionTableAddr)
58// |   shadow memory    |
59// +--------------------+ 0x000000010000 (kShadowAddr)
60// | reserved by kernel |
61// +--------------------+ 0x000000000000
62//
63// To derive a shadow memory address from an application memory address,
64// bits 44-46 are cleared to bring the address into the range
65// [0x000000008000,0x100000000000).  Then the address is shifted left by 1 to
66// account for the double byte representation of shadow labels and move the
67// address into the shadow memory range.  See the function shadow_for below.
68
69// On Linux/MIPS64, memory is laid out as follows:
70//
71// +--------------------+ 0x10000000000 (top of memory)
72// | application memory |
73// +--------------------+ 0xF000008000 (kAppAddr)
74// |                    |
75// |       unused       |
76// |                    |
77// +--------------------+ 0x2200000000 (kUnusedAddr)
78// |    union table     |
79// +--------------------+ 0x2000000000 (kUnionTableAddr)
80// |   shadow memory    |
81// +--------------------+ 0x0000010000 (kShadowAddr)
82// | reserved by kernel |
83// +--------------------+ 0x0000000000
84
85// On Linux/AArch64 (39-bit VMA), memory is laid out as follow:
86//
87// +--------------------+ 0x8000000000 (top of memory)
88// | application memory |
89// +--------------------+ 0x7000008000 (kAppAddr)
90// |                    |
91// |       unused       |
92// |                    |
93// +--------------------+ 0x1200000000 (kUnusedAddr)
94// |    union table     |
95// +--------------------+ 0x1000000000 (kUnionTableAddr)
96// |   shadow memory    |
97// +--------------------+ 0x0000010000 (kShadowAddr)
98// | reserved by kernel |
99// +--------------------+ 0x0000000000
100
101// On Linux/AArch64 (42-bit VMA), memory is laid out as follow:
102//
103// +--------------------+ 0x40000000000 (top of memory)
104// | application memory |
105// +--------------------+ 0x3ff00008000 (kAppAddr)
106// |                    |
107// |       unused       |
108// |                    |
109// +--------------------+ 0x1200000000 (kUnusedAddr)
110// |    union table     |
111// +--------------------+ 0x8000000000 (kUnionTableAddr)
112// |   shadow memory    |
113// +--------------------+ 0x0000010000 (kShadowAddr)
114// | reserved by kernel |
115// +--------------------+ 0x0000000000
116
117// On Linux/AArch64 (48-bit VMA), memory is laid out as follow:
118//
119// +--------------------+ 0x1000000000000 (top of memory)
120// | application memory |
121// +--------------------+ 0xffff00008000 (kAppAddr)
122// |       unused       |
123// +--------------------+ 0xaaaab0000000 (top of PIE address)
124// | application PIE    |
125// +--------------------+ 0xaaaaa0000000 (top of PIE address)
126// |                    |
127// |       unused       |
128// |                    |
129// +--------------------+ 0x1200000000 (kUnusedAddr)
130// |    union table     |
131// +--------------------+ 0x8000000000 (kUnionTableAddr)
132// |   shadow memory    |
133// +--------------------+ 0x0000010000 (kShadowAddr)
134// | reserved by kernel |
135// +--------------------+ 0x0000000000
136
137typedef atomic_dfsan_label dfsan_union_table_t[kNumLabels][kNumLabels];
138
139#ifdef DFSAN_RUNTIME_VMA
140// Runtime detected VMA size.
141int __dfsan::vmaSize;
142#endif
143
144static uptr UnusedAddr() {
145  return MappingArchImpl<MAPPING_UNION_TABLE_ADDR>()
146         + sizeof(dfsan_union_table_t);
147}
148
149static atomic_dfsan_label *union_table(dfsan_label l1, dfsan_label l2) {
150  return &(*(dfsan_union_table_t *) UnionTableAddr())[l1][l2];
151}
152
153// Checks we do not run out of labels.
154static void dfsan_check_label(dfsan_label label) {
155  if (label == kInitializingLabel) {
156    Report("FATAL: DataFlowSanitizer: out of labels\n");
157    Die();
158  }
159}
160
161// Resolves the union of two unequal labels.  Nonequality is a precondition for
162// this function (the instrumentation pass inlines the equality test).
163extern "C" SANITIZER_INTERFACE_ATTRIBUTE
164dfsan_label __dfsan_union(dfsan_label l1, dfsan_label l2) {
165  if (flags().fast16labels)
166    return l1 | l2;
167  DCHECK_NE(l1, l2);
168
169  if (l1 == 0)
170    return l2;
171  if (l2 == 0)
172    return l1;
173
174  if (l1 > l2)
175    Swap(l1, l2);
176
177  atomic_dfsan_label *table_ent = union_table(l1, l2);
178  // We need to deal with the case where two threads concurrently request
179  // a union of the same pair of labels.  If the table entry is uninitialized,
180  // (i.e. 0) use a compare-exchange to set the entry to kInitializingLabel
181  // (i.e. -1) to mark that we are initializing it.
182  dfsan_label label = 0;
183  if (atomic_compare_exchange_strong(table_ent, &label, kInitializingLabel,
184                                     memory_order_acquire)) {
185    // Check whether l2 subsumes l1.  We don't need to check whether l1
186    // subsumes l2 because we are guaranteed here that l1 < l2, and (at least
187    // in the cases we are interested in) a label may only subsume labels
188    // created earlier (i.e. with a lower numerical value).
189    if (__dfsan_label_info[l2].l1 == l1 ||
190        __dfsan_label_info[l2].l2 == l1) {
191      label = l2;
192    } else {
193      label =
194        atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
195      dfsan_check_label(label);
196      __dfsan_label_info[label].l1 = l1;
197      __dfsan_label_info[label].l2 = l2;
198    }
199    atomic_store(table_ent, label, memory_order_release);
200  } else if (label == kInitializingLabel) {
201    // Another thread is initializing the entry.  Wait until it is finished.
202    do {
203      internal_sched_yield();
204      label = atomic_load(table_ent, memory_order_acquire);
205    } while (label == kInitializingLabel);
206  }
207  return label;
208}
209
210extern "C" SANITIZER_INTERFACE_ATTRIBUTE
211dfsan_label __dfsan_union_load(const dfsan_label *ls, uptr n) {
212  dfsan_label label = ls[0];
213  for (uptr i = 1; i != n; ++i) {
214    dfsan_label next_label = ls[i];
215    if (label != next_label)
216      label = __dfsan_union(label, next_label);
217  }
218  return label;
219}
220
221extern "C" SANITIZER_INTERFACE_ATTRIBUTE
222void __dfsan_unimplemented(char *fname) {
223  if (flags().warn_unimplemented)
224    Report("WARNING: DataFlowSanitizer: call to uninstrumented function %s\n",
225           fname);
226}
227
228// Use '-mllvm -dfsan-debug-nonzero-labels' and break on this function
229// to try to figure out where labels are being introduced in a nominally
230// label-free program.
231extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_nonzero_label() {
232  if (flags().warn_nonzero_labels)
233    Report("WARNING: DataFlowSanitizer: saw nonzero label\n");
234}
235
236// Indirect call to an uninstrumented vararg function. We don't have a way of
237// handling these at the moment.
238extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
239__dfsan_vararg_wrapper(const char *fname) {
240  Report("FATAL: DataFlowSanitizer: unsupported indirect call to vararg "
241         "function %s\n", fname);
242  Die();
243}
244
245// Like __dfsan_union, but for use from the client or custom functions.  Hence
246// the equality comparison is done here before calling __dfsan_union.
247SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
248dfsan_union(dfsan_label l1, dfsan_label l2) {
249  if (l1 == l2)
250    return l1;
251  return __dfsan_union(l1, l2);
252}
253
254extern "C" SANITIZER_INTERFACE_ATTRIBUTE
255dfsan_label dfsan_create_label(const char *desc, void *userdata) {
256  dfsan_label label =
257    atomic_fetch_add(&__dfsan_last_label, 1, memory_order_relaxed) + 1;
258  dfsan_check_label(label);
259  __dfsan_label_info[label].l1 = __dfsan_label_info[label].l2 = 0;
260  __dfsan_label_info[label].desc = desc;
261  __dfsan_label_info[label].userdata = userdata;
262  return label;
263}
264
265extern "C" SANITIZER_INTERFACE_ATTRIBUTE
266void __dfsan_set_label(dfsan_label label, void *addr, uptr size) {
267  for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp) {
268    // Don't write the label if it is already the value we need it to be.
269    // In a program where most addresses are not labeled, it is common that
270    // a page of shadow memory is entirely zeroed.  The Linux copy-on-write
271    // implementation will share all of the zeroed pages, making a copy of a
272    // page when any value is written.  The un-sharing will happen even if
273    // the value written does not change the value in memory.  Avoiding the
274    // write when both |label| and |*labelp| are zero dramatically reduces
275    // the amount of real memory used by large programs.
276    if (label == *labelp)
277      continue;
278
279    *labelp = label;
280  }
281}
282
283SANITIZER_INTERFACE_ATTRIBUTE
284void dfsan_set_label(dfsan_label label, void *addr, uptr size) {
285  __dfsan_set_label(label, addr, size);
286}
287
288SANITIZER_INTERFACE_ATTRIBUTE
289void dfsan_add_label(dfsan_label label, void *addr, uptr size) {
290  for (dfsan_label *labelp = shadow_for(addr); size != 0; --size, ++labelp)
291    if (*labelp != label)
292      *labelp = __dfsan_union(*labelp, label);
293}
294
295// Unlike the other dfsan interface functions the behavior of this function
296// depends on the label of one of its arguments.  Hence it is implemented as a
297// custom function.
298extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
299__dfsw_dfsan_get_label(long data, dfsan_label data_label,
300                       dfsan_label *ret_label) {
301  *ret_label = 0;
302  return data_label;
303}
304
305SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
306dfsan_read_label(const void *addr, uptr size) {
307  if (size == 0)
308    return 0;
309  return __dfsan_union_load(shadow_for(addr), size);
310}
311
312extern "C" SANITIZER_INTERFACE_ATTRIBUTE
313const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
314  return &__dfsan_label_info[label];
315}
316
317extern "C" SANITIZER_INTERFACE_ATTRIBUTE int
318dfsan_has_label(dfsan_label label, dfsan_label elem) {
319  if (label == elem)
320    return true;
321  const dfsan_label_info *info = dfsan_get_label_info(label);
322  if (info->l1 != 0) {
323    return dfsan_has_label(info->l1, elem) || dfsan_has_label(info->l2, elem);
324  } else {
325    return false;
326  }
327}
328
329extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label
330dfsan_has_label_with_desc(dfsan_label label, const char *desc) {
331  const dfsan_label_info *info = dfsan_get_label_info(label);
332  if (info->l1 != 0) {
333    return dfsan_has_label_with_desc(info->l1, desc) ||
334           dfsan_has_label_with_desc(info->l2, desc);
335  } else {
336    return internal_strcmp(desc, info->desc) == 0;
337  }
338}
339
340extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr
341dfsan_get_label_count(void) {
342  dfsan_label max_label_allocated =
343      atomic_load(&__dfsan_last_label, memory_order_relaxed);
344
345  return static_cast<uptr>(max_label_allocated);
346}
347
348extern "C" SANITIZER_INTERFACE_ATTRIBUTE void
349dfsan_dump_labels(int fd) {
350  dfsan_label last_label =
351      atomic_load(&__dfsan_last_label, memory_order_relaxed);
352
353  for (uptr l = 1; l <= last_label; ++l) {
354    char buf[64];
355    internal_snprintf(buf, sizeof(buf), "%u %u %u ", l,
356                      __dfsan_label_info[l].l1, __dfsan_label_info[l].l2);
357    WriteToFile(fd, buf, internal_strlen(buf));
358    if (__dfsan_label_info[l].l1 == 0 && __dfsan_label_info[l].desc) {
359      WriteToFile(fd, __dfsan_label_info[l].desc,
360                  internal_strlen(__dfsan_label_info[l].desc));
361    }
362    WriteToFile(fd, "\n", 1);
363  }
364}
365
366void Flags::SetDefaults() {
367#define DFSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
368#include "dfsan_flags.inc"
369#undef DFSAN_FLAG
370}
371
372static void RegisterDfsanFlags(FlagParser *parser, Flags *f) {
373#define DFSAN_FLAG(Type, Name, DefaultValue, Description) \
374  RegisterFlag(parser, #Name, Description, &f->Name);
375#include "dfsan_flags.inc"
376#undef DFSAN_FLAG
377}
378
379static void InitializeFlags() {
380  SetCommonFlagsDefaults();
381  flags().SetDefaults();
382
383  FlagParser parser;
384  RegisterCommonFlags(&parser);
385  RegisterDfsanFlags(&parser, &flags());
386  parser.ParseStringFromEnv("DFSAN_OPTIONS");
387  InitializeCommonFlags();
388  if (Verbosity()) ReportUnrecognizedFlags();
389  if (common_flags()->help) parser.PrintFlagDescriptions();
390}
391
392static void InitializePlatformEarly() {
393  AvoidCVE_2016_2143();
394#ifdef DFSAN_RUNTIME_VMA
395  __dfsan::vmaSize =
396    (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
397  if (__dfsan::vmaSize == 39 || __dfsan::vmaSize == 42 ||
398      __dfsan::vmaSize == 48) {
399    __dfsan_shadow_ptr_mask = ShadowMask();
400  } else {
401    Printf("FATAL: DataFlowSanitizer: unsupported VMA range\n");
402    Printf("FATAL: Found %d - Supported 39, 42, and 48\n", __dfsan::vmaSize);
403    Die();
404  }
405#endif
406}
407
408static void dfsan_fini() {
409  if (internal_strcmp(flags().dump_labels_at_exit, "") != 0) {
410    fd_t fd = OpenFile(flags().dump_labels_at_exit, WrOnly);
411    if (fd == kInvalidFd) {
412      Report("WARNING: DataFlowSanitizer: unable to open output file %s\n",
413             flags().dump_labels_at_exit);
414      return;
415    }
416
417    Report("INFO: DataFlowSanitizer: dumping labels to %s\n",
418           flags().dump_labels_at_exit);
419    dfsan_dump_labels(fd);
420    CloseFile(fd);
421  }
422}
423
424extern "C" void dfsan_flush() {
425  UnmapOrDie((void*)ShadowAddr(), UnusedAddr() - ShadowAddr());
426  if (!MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
427    Die();
428}
429
430static void dfsan_init(int argc, char **argv, char **envp) {
431  InitializeFlags();
432
433  ::InitializePlatformEarly();
434
435  if (!MmapFixedNoReserve(ShadowAddr(), UnusedAddr() - ShadowAddr()))
436    Die();
437
438  // Protect the region of memory we don't use, to preserve the one-to-one
439  // mapping from application to shadow memory. But if ASLR is disabled, Linux
440  // will load our executable in the middle of our unused region. This mostly
441  // works so long as the program doesn't use too much memory. We support this
442  // case by disabling memory protection when ASLR is disabled.
443  uptr init_addr = (uptr)&dfsan_init;
444  if (!(init_addr >= UnusedAddr() && init_addr < AppAddr()))
445    MmapFixedNoAccess(UnusedAddr(), AppAddr() - UnusedAddr());
446
447  InitializeInterceptors();
448
449  // Register the fini callback to run when the program terminates successfully
450  // or it is killed by the runtime.
451  Atexit(dfsan_fini);
452  AddDieCallback(dfsan_fini);
453
454  __dfsan_label_info[kInitializingLabel].desc = "<init label>";
455}
456
457#if SANITIZER_CAN_USE_PREINIT_ARRAY
458__attribute__((section(".preinit_array"), used))
459static void (*dfsan_init_ptr)(int, char **, char **) = dfsan_init;
460#endif
461