xray_x86_64.cpp revision 360784
1#include "cpuid.h"
2#include "sanitizer_common/sanitizer_common.h"
3#if !SANITIZER_FUCHSIA
4#include "sanitizer_common/sanitizer_posix.h"
5#endif
6#include "xray_defs.h"
7#include "xray_interface_internal.h"
8
9#if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
10#include <sys/types.h>
11#if SANITIZER_OPENBSD
12#include <sys/time.h>
13#include <machine/cpu.h>
14#endif
15#include <sys/sysctl.h>
16#elif SANITIZER_FUCHSIA
17#include <zircon/syscalls.h>
18#endif
19
20#include <atomic>
21#include <cstdint>
22#include <errno.h>
23#include <fcntl.h>
24#include <iterator>
25#include <limits>
26#include <tuple>
27#include <unistd.h>
28
29namespace __xray {
30
31#if SANITIZER_LINUX
32static std::pair<ssize_t, bool>
33retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
34  auto BytesToRead = std::distance(Begin, End);
35  ssize_t BytesRead;
36  ssize_t TotalBytesRead = 0;
37  while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
38    if (BytesRead == -1) {
39      if (errno == EINTR)
40        continue;
41      Report("Read error; errno = %d\n", errno);
42      return std::make_pair(TotalBytesRead, false);
43    }
44
45    TotalBytesRead += BytesRead;
46    BytesToRead -= BytesRead;
47    Begin += BytesRead;
48  }
49  return std::make_pair(TotalBytesRead, true);
50}
51
52static bool readValueFromFile(const char *Filename,
53                              long long *Value) XRAY_NEVER_INSTRUMENT {
54  int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
55  if (Fd == -1)
56    return false;
57  static constexpr size_t BufSize = 256;
58  char Line[BufSize] = {};
59  ssize_t BytesRead;
60  bool Success;
61  std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
62  close(Fd);
63  if (!Success)
64    return false;
65  const char *End = nullptr;
66  long long Tmp = internal_simple_strtoll(Line, &End, 10);
67  bool Result = false;
68  if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
69    *Value = Tmp;
70    Result = true;
71  }
72  return Result;
73}
74
75uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
76  long long TSCFrequency = -1;
77  if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
78                        &TSCFrequency)) {
79    TSCFrequency *= 1000;
80  } else if (readValueFromFile(
81                 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
82                 &TSCFrequency)) {
83    TSCFrequency *= 1000;
84  } else {
85    Report("Unable to determine CPU frequency for TSC accounting.\n");
86  }
87  return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
88}
89#elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
90uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
91    long long TSCFrequency = -1;
92    size_t tscfreqsz = sizeof(TSCFrequency);
93#if SANITIZER_OPENBSD
94    int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
95    if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
96#elif SANITIZER_MAC
97    if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
98                              &tscfreqsz, NULL, 0) != -1) {
99
100#else
101    if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
102                              NULL, 0) != -1) {
103#endif
104        return static_cast<uint64_t>(TSCFrequency);
105    } else {
106      Report("Unable to determine CPU frequency for TSC accounting.\n");
107    }
108
109    return 0;
110}
111#elif !SANITIZER_FUCHSIA
112uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
113    /* Not supported */
114    return 0;
115}
116#endif
117
118static constexpr uint8_t CallOpCode = 0xe8;
119static constexpr uint16_t MovR10Seq = 0xba41;
120static constexpr uint16_t Jmp9Seq = 0x09eb;
121static constexpr uint16_t Jmp20Seq = 0x14eb;
122static constexpr uint16_t Jmp15Seq = 0x0feb;
123static constexpr uint8_t JmpOpCode = 0xe9;
124static constexpr uint8_t RetOpCode = 0xc3;
125static constexpr uint16_t NopwSeq = 0x9066;
126
127static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
128static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
129
130bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
131                        const XRaySledEntry &Sled,
132                        void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
133  // Here we do the dance of replacing the following sled:
134  //
135  // xray_sled_n:
136  //   jmp +9
137  //   <9 byte nop>
138  //
139  // With the following:
140  //
141  //   mov r10d, <function id>
142  //   call <relative 32bit offset to entry trampoline>
143  //
144  // We need to do this in the following order:
145  //
146  // 1. Put the function id first, 2 bytes from the start of the sled (just
147  // after the 2-byte jmp instruction).
148  // 2. Put the call opcode 6 bytes from the start of the sled.
149  // 3. Put the relative offset 7 bytes from the start of the sled.
150  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
151  // opcode and first operand.
152  //
153  // Prerequisite is to compute the relative offset to the trampoline's address.
154  int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
155                             (static_cast<int64_t>(Sled.Address) + 11);
156  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
157    Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
158           Trampoline, reinterpret_cast<void *>(Sled.Address));
159    return false;
160  }
161  if (Enable) {
162    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
163    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
164    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
165    std::atomic_store_explicit(
166        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
167        std::memory_order_release);
168  } else {
169    std::atomic_store_explicit(
170        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
171        std::memory_order_release);
172    // FIXME: Write out the nops still?
173  }
174  return true;
175}
176
177bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
178                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
179  // Here we do the dance of replacing the following sled:
180  //
181  // xray_sled_n:
182  //   ret
183  //   <10 byte nop>
184  //
185  // With the following:
186  //
187  //   mov r10d, <function id>
188  //   jmp <relative 32bit offset to exit trampoline>
189  //
190  // 1. Put the function id first, 2 bytes from the start of the sled (just
191  // after the 1-byte ret instruction).
192  // 2. Put the jmp opcode 6 bytes from the start of the sled.
193  // 3. Put the relative offset 7 bytes from the start of the sled.
194  // 4. Do an atomic write over the jmp instruction for the "mov r10d"
195  // opcode and first operand.
196  //
197  // Prerequisite is to compute the relative offset fo the
198  // __xray_FunctionExit function's address.
199  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
200                             (static_cast<int64_t>(Sled.Address) + 11);
201  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
202    Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
203           __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
204    return false;
205  }
206  if (Enable) {
207    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
208    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
209    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
210    std::atomic_store_explicit(
211        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
212        std::memory_order_release);
213  } else {
214    std::atomic_store_explicit(
215        reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
216        std::memory_order_release);
217    // FIXME: Write out the nops still?
218  }
219  return true;
220}
221
222bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
223                           const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
224  // Here we do the dance of replacing the tail call sled with a similar
225  // sequence as the entry sled, but calls the tail exit sled instead.
226  int64_t TrampolineOffset =
227      reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
228      (static_cast<int64_t>(Sled.Address) + 11);
229  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
230    Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
231           __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address));
232    return false;
233  }
234  if (Enable) {
235    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
236    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
237    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
238    std::atomic_store_explicit(
239        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
240        std::memory_order_release);
241  } else {
242    std::atomic_store_explicit(
243        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
244        std::memory_order_release);
245    // FIXME: Write out the nops still?
246  }
247  return true;
248}
249
250bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
251                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
252  // Here we do the dance of replacing the following sled:
253  //
254  // In Version 0:
255  //
256  // xray_sled_n:
257  //   jmp +20          // 2 bytes
258  //   ...
259  //
260  // With the following:
261  //
262  //   nopw             // 2 bytes*
263  //   ...
264  //
265  //
266  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
267  //
268  // ---
269  //
270  // In Version 1:
271  //
272  //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
273  //   to a jmp, use 15 bytes instead.
274  //
275  if (Enable) {
276    std::atomic_store_explicit(
277        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
278        std::memory_order_release);
279  } else {
280    switch (Sled.Version) {
281    case 1:
282      std::atomic_store_explicit(
283          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
284          std::memory_order_release);
285      break;
286    case 0:
287    default:
288      std::atomic_store_explicit(
289          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
290          std::memory_order_release);
291      break;
292    }
293    }
294  return false;
295}
296
297bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
298                      const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
299  // Here we do the dance of replacing the following sled:
300  //
301  // xray_sled_n:
302  //   jmp +20          // 2 byte instruction
303  //   ...
304  //
305  // With the following:
306  //
307  //   nopw             // 2 bytes
308  //   ...
309  //
310  //
311  // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
312  // The 20 byte sled stashes three argument registers, calls the trampoline,
313  // unstashes the registers and returns. If the arguments are already in
314  // the correct registers, the stashing and unstashing become equivalently
315  // sized nops.
316  if (Enable) {
317    std::atomic_store_explicit(
318        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
319        std::memory_order_release);
320  } else {
321      std::atomic_store_explicit(
322          reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
323          std::memory_order_release);
324  }
325  return false;
326}
327
328#if !SANITIZER_FUCHSIA
329// We determine whether the CPU we're running on has the correct features we
330// need. In x86_64 this will be rdtscp support.
331bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
332  unsigned int EAX, EBX, ECX, EDX;
333
334  // We check whether rdtscp support is enabled. According to the x86_64 manual,
335  // level should be set at 0x80000001, and we should have a look at bit 27 in
336  // EDX. That's 0x8000000 (or 1u << 27).
337  __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
338    : "0"(0x80000001));
339  if (!(EDX & (1u << 27))) {
340    Report("Missing rdtscp support.\n");
341    return false;
342  }
343  // Also check whether we can determine the CPU frequency, since if we cannot,
344  // we should use the emulated TSC instead.
345  if (!getTSCFrequency()) {
346    Report("Unable to determine CPU frequency.\n");
347    return false;
348  }
349  return true;
350}
351#endif
352
353} // namespace __xray
354