Args.cpp revision 360784
1//===-- Args.cpp ------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Utility/Args.h"
10#include "lldb/Utility/ConstString.h"
11#include "lldb/Utility/FileSpec.h"
12#include "lldb/Utility/Stream.h"
13#include "lldb/Utility/StringList.h"
14#include "llvm/ADT/StringSwitch.h"
15
16using namespace lldb;
17using namespace lldb_private;
18
19// A helper function for argument parsing.
20// Parses the initial part of the first argument using normal double quote
21// rules: backslash escapes the double quote and itself. The parsed string is
22// appended to the second argument. The function returns the unparsed portion
23// of the string, starting at the closing quote.
24static llvm::StringRef ParseDoubleQuotes(llvm::StringRef quoted,
25                                         std::string &result) {
26  // Inside double quotes, '\' and '"' are special.
27  static const char *k_escapable_characters = "\"\\";
28  while (true) {
29    // Skip over over regular characters and append them.
30    size_t regular = quoted.find_first_of(k_escapable_characters);
31    result += quoted.substr(0, regular);
32    quoted = quoted.substr(regular);
33
34    // If we have reached the end of string or the closing quote, we're done.
35    if (quoted.empty() || quoted.front() == '"')
36      break;
37
38    // We have found a backslash.
39    quoted = quoted.drop_front();
40
41    if (quoted.empty()) {
42      // A lone backslash at the end of string, let's just append it.
43      result += '\\';
44      break;
45    }
46
47    // If the character after the backslash is not a whitelisted escapable
48    // character, we leave the character sequence untouched.
49    if (strchr(k_escapable_characters, quoted.front()) == nullptr)
50      result += '\\';
51
52    result += quoted.front();
53    quoted = quoted.drop_front();
54  }
55
56  return quoted;
57}
58
59static size_t ArgvToArgc(const char **argv) {
60  if (!argv)
61    return 0;
62  size_t count = 0;
63  while (*argv++)
64    ++count;
65  return count;
66}
67
68// Trims all whitespace that can separate command line arguments from the left
69// side of the string.
70static llvm::StringRef ltrimForArgs(llvm::StringRef str) {
71  static const char *k_space_separators = " \t";
72  return str.ltrim(k_space_separators);
73}
74
75// A helper function for SetCommandString. Parses a single argument from the
76// command string, processing quotes and backslashes in a shell-like manner.
77// The function returns a tuple consisting of the parsed argument, the quote
78// char used, and the unparsed portion of the string starting at the first
79// unqouted, unescaped whitespace character.
80static std::tuple<std::string, char, llvm::StringRef>
81ParseSingleArgument(llvm::StringRef command) {
82  // Argument can be split into multiple discontiguous pieces, for example:
83  //  "Hello ""World"
84  // this would result in a single argument "Hello World" (without the quotes)
85  // since the quotes would be removed and there is not space between the
86  // strings.
87  std::string arg;
88
89  // Since we can have multiple quotes that form a single command in a command
90  // like: "Hello "world'!' (which will make a single argument "Hello world!")
91  // we remember the first quote character we encounter and use that for the
92  // quote character.
93  char first_quote_char = '\0';
94
95  bool arg_complete = false;
96  do {
97    // Skip over over regular characters and append them.
98    size_t regular = command.find_first_of(" \t\r\"'`\\");
99    arg += command.substr(0, regular);
100    command = command.substr(regular);
101
102    if (command.empty())
103      break;
104
105    char special = command.front();
106    command = command.drop_front();
107    switch (special) {
108    case '\\':
109      if (command.empty()) {
110        arg += '\\';
111        break;
112      }
113
114      // If the character after the backslash is not a whitelisted escapable
115      // character, we leave the character sequence untouched.
116      if (strchr(" \t\\'\"`", command.front()) == nullptr)
117        arg += '\\';
118
119      arg += command.front();
120      command = command.drop_front();
121
122      break;
123
124    case ' ':
125    case '\t':
126    case '\r':
127      // We are not inside any quotes, we just found a space after an argument.
128      // We are done.
129      arg_complete = true;
130      break;
131
132    case '"':
133    case '\'':
134    case '`':
135      // We found the start of a quote scope.
136      if (first_quote_char == '\0')
137        first_quote_char = special;
138
139      if (special == '"')
140        command = ParseDoubleQuotes(command, arg);
141      else {
142        // For single quotes, we simply skip ahead to the matching quote
143        // character (or the end of the string).
144        size_t quoted = command.find(special);
145        arg += command.substr(0, quoted);
146        command = command.substr(quoted);
147      }
148
149      // If we found a closing quote, skip it.
150      if (!command.empty())
151        command = command.drop_front();
152
153      break;
154    }
155  } while (!arg_complete);
156
157  return std::make_tuple(arg, first_quote_char, command);
158}
159
160Args::ArgEntry::ArgEntry(llvm::StringRef str, char quote) : quote(quote) {
161  size_t size = str.size();
162  ptr.reset(new char[size + 1]);
163
164  ::memcpy(data(), str.data() ? str.data() : "", size);
165  ptr[size] = 0;
166}
167
168// Args constructor
169Args::Args(llvm::StringRef command) { SetCommandString(command); }
170
171Args::Args(const Args &rhs) { *this = rhs; }
172
173Args::Args(const StringList &list) : Args() {
174  for (const std::string &arg : list)
175    AppendArgument(arg);
176}
177
178Args &Args::operator=(const Args &rhs) {
179  Clear();
180
181  m_argv.clear();
182  m_entries.clear();
183  for (auto &entry : rhs.m_entries) {
184    m_entries.emplace_back(entry.ref(), entry.quote);
185    m_argv.push_back(m_entries.back().data());
186  }
187  m_argv.push_back(nullptr);
188  return *this;
189}
190
191// Destructor
192Args::~Args() {}
193
194void Args::Dump(Stream &s, const char *label_name) const {
195  if (!label_name)
196    return;
197
198  int i = 0;
199  for (auto &entry : m_entries) {
200    s.Indent();
201    s.Format("{0}[{1}]=\"{2}\"\n", label_name, i++, entry.ref());
202  }
203  s.Format("{0}[{1}]=NULL\n", label_name, i);
204  s.EOL();
205}
206
207bool Args::GetCommandString(std::string &command) const {
208  command.clear();
209
210  for (size_t i = 0; i < m_entries.size(); ++i) {
211    if (i > 0)
212      command += ' ';
213    command += m_entries[i].ref();
214  }
215
216  return !m_entries.empty();
217}
218
219bool Args::GetQuotedCommandString(std::string &command) const {
220  command.clear();
221
222  for (size_t i = 0; i < m_entries.size(); ++i) {
223    if (i > 0)
224      command += ' ';
225
226    if (m_entries[i].quote) {
227      command += m_entries[i].quote;
228      command += m_entries[i].ref();
229      command += m_entries[i].quote;
230    } else {
231      command += m_entries[i].ref();
232    }
233  }
234
235  return !m_entries.empty();
236}
237
238void Args::SetCommandString(llvm::StringRef command) {
239  Clear();
240  m_argv.clear();
241
242  command = ltrimForArgs(command);
243  std::string arg;
244  char quote;
245  while (!command.empty()) {
246    std::tie(arg, quote, command) = ParseSingleArgument(command);
247    m_entries.emplace_back(arg, quote);
248    m_argv.push_back(m_entries.back().data());
249    command = ltrimForArgs(command);
250  }
251  m_argv.push_back(nullptr);
252}
253
254size_t Args::GetArgumentCount() const { return m_entries.size(); }
255
256const char *Args::GetArgumentAtIndex(size_t idx) const {
257  if (idx < m_argv.size())
258    return m_argv[idx];
259  return nullptr;
260}
261
262char **Args::GetArgumentVector() {
263  assert(!m_argv.empty());
264  // TODO: functions like execve and posix_spawnp exhibit undefined behavior
265  // when argv or envp is null.  So the code below is actually wrong.  However,
266  // other code in LLDB depends on it being null.  The code has been acting
267  // this way for some time, so it makes sense to leave it this way until
268  // someone has the time to come along and fix it.
269  return (m_argv.size() > 1) ? m_argv.data() : nullptr;
270}
271
272const char **Args::GetConstArgumentVector() const {
273  assert(!m_argv.empty());
274  return (m_argv.size() > 1) ? const_cast<const char **>(m_argv.data())
275                             : nullptr;
276}
277
278void Args::Shift() {
279  // Don't pop the last NULL terminator from the argv array
280  if (m_entries.empty())
281    return;
282  m_argv.erase(m_argv.begin());
283  m_entries.erase(m_entries.begin());
284}
285
286void Args::Unshift(llvm::StringRef arg_str, char quote_char) {
287  InsertArgumentAtIndex(0, arg_str, quote_char);
288}
289
290void Args::AppendArguments(const Args &rhs) {
291  assert(m_argv.size() == m_entries.size() + 1);
292  assert(m_argv.back() == nullptr);
293  m_argv.pop_back();
294  for (auto &entry : rhs.m_entries) {
295    m_entries.emplace_back(entry.ref(), entry.quote);
296    m_argv.push_back(m_entries.back().data());
297  }
298  m_argv.push_back(nullptr);
299}
300
301void Args::AppendArguments(const char **argv) {
302  size_t argc = ArgvToArgc(argv);
303
304  assert(m_argv.size() == m_entries.size() + 1);
305  assert(m_argv.back() == nullptr);
306  m_argv.pop_back();
307  for (auto arg : llvm::makeArrayRef(argv, argc)) {
308    m_entries.emplace_back(arg, '\0');
309    m_argv.push_back(m_entries.back().data());
310  }
311
312  m_argv.push_back(nullptr);
313}
314
315void Args::AppendArgument(llvm::StringRef arg_str, char quote_char) {
316  InsertArgumentAtIndex(GetArgumentCount(), arg_str, quote_char);
317}
318
319void Args::InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
320                                 char quote_char) {
321  assert(m_argv.size() == m_entries.size() + 1);
322  assert(m_argv.back() == nullptr);
323
324  if (idx > m_entries.size())
325    return;
326  m_entries.emplace(m_entries.begin() + idx, arg_str, quote_char);
327  m_argv.insert(m_argv.begin() + idx, m_entries[idx].data());
328}
329
330void Args::ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
331                                  char quote_char) {
332  assert(m_argv.size() == m_entries.size() + 1);
333  assert(m_argv.back() == nullptr);
334
335  if (idx >= m_entries.size())
336    return;
337
338  m_entries[idx] = ArgEntry(arg_str, quote_char);
339  m_argv[idx] = m_entries[idx].data();
340}
341
342void Args::DeleteArgumentAtIndex(size_t idx) {
343  if (idx >= m_entries.size())
344    return;
345
346  m_argv.erase(m_argv.begin() + idx);
347  m_entries.erase(m_entries.begin() + idx);
348}
349
350void Args::SetArguments(size_t argc, const char **argv) {
351  Clear();
352
353  auto args = llvm::makeArrayRef(argv, argc);
354  m_entries.resize(argc);
355  m_argv.resize(argc + 1);
356  for (size_t i = 0; i < args.size(); ++i) {
357    char quote =
358        ((args[i][0] == '\'') || (args[i][0] == '"') || (args[i][0] == '`'))
359            ? args[i][0]
360            : '\0';
361
362    m_entries[i] = ArgEntry(args[i], quote);
363    m_argv[i] = m_entries[i].data();
364  }
365}
366
367void Args::SetArguments(const char **argv) {
368  SetArguments(ArgvToArgc(argv), argv);
369}
370
371void Args::Clear() {
372  m_entries.clear();
373  m_argv.clear();
374  m_argv.push_back(nullptr);
375}
376
377const char *Args::GetShellSafeArgument(const FileSpec &shell,
378                                       const char *unsafe_arg,
379                                       std::string &safe_arg) {
380  struct ShellDescriptor {
381    ConstString m_basename;
382    const char *m_escapables;
383  };
384
385  static ShellDescriptor g_Shells[] = {{ConstString("bash"), " '\"<>()&"},
386                                       {ConstString("tcsh"), " '\"<>()&$"},
387                                       {ConstString("sh"), " '\"<>()&"}};
388
389  // safe minimal set
390  const char *escapables = " '\"";
391
392  if (auto basename = shell.GetFilename()) {
393    for (const auto &Shell : g_Shells) {
394      if (Shell.m_basename == basename) {
395        escapables = Shell.m_escapables;
396        break;
397      }
398    }
399  }
400
401  safe_arg.assign(unsafe_arg);
402  size_t prev_pos = 0;
403  while (prev_pos < safe_arg.size()) {
404    // Escape spaces and quotes
405    size_t pos = safe_arg.find_first_of(escapables, prev_pos);
406    if (pos != std::string::npos) {
407      safe_arg.insert(pos, 1, '\\');
408      prev_pos = pos + 2;
409    } else
410      break;
411  }
412  return safe_arg.c_str();
413}
414
415lldb::Encoding Args::StringToEncoding(llvm::StringRef s,
416                                      lldb::Encoding fail_value) {
417  return llvm::StringSwitch<lldb::Encoding>(s)
418      .Case("uint", eEncodingUint)
419      .Case("sint", eEncodingSint)
420      .Case("ieee754", eEncodingIEEE754)
421      .Case("vector", eEncodingVector)
422      .Default(fail_value);
423}
424
425uint32_t Args::StringToGenericRegister(llvm::StringRef s) {
426  if (s.empty())
427    return LLDB_INVALID_REGNUM;
428  uint32_t result = llvm::StringSwitch<uint32_t>(s)
429                        .Case("pc", LLDB_REGNUM_GENERIC_PC)
430                        .Case("sp", LLDB_REGNUM_GENERIC_SP)
431                        .Case("fp", LLDB_REGNUM_GENERIC_FP)
432                        .Cases("ra", "lr", LLDB_REGNUM_GENERIC_RA)
433                        .Case("flags", LLDB_REGNUM_GENERIC_FLAGS)
434                        .Case("arg1", LLDB_REGNUM_GENERIC_ARG1)
435                        .Case("arg2", LLDB_REGNUM_GENERIC_ARG2)
436                        .Case("arg3", LLDB_REGNUM_GENERIC_ARG3)
437                        .Case("arg4", LLDB_REGNUM_GENERIC_ARG4)
438                        .Case("arg5", LLDB_REGNUM_GENERIC_ARG5)
439                        .Case("arg6", LLDB_REGNUM_GENERIC_ARG6)
440                        .Case("arg7", LLDB_REGNUM_GENERIC_ARG7)
441                        .Case("arg8", LLDB_REGNUM_GENERIC_ARG8)
442                        .Default(LLDB_INVALID_REGNUM);
443  return result;
444}
445
446void Args::EncodeEscapeSequences(const char *src, std::string &dst) {
447  dst.clear();
448  if (src) {
449    for (const char *p = src; *p != '\0'; ++p) {
450      size_t non_special_chars = ::strcspn(p, "\\");
451      if (non_special_chars > 0) {
452        dst.append(p, non_special_chars);
453        p += non_special_chars;
454        if (*p == '\0')
455          break;
456      }
457
458      if (*p == '\\') {
459        ++p; // skip the slash
460        switch (*p) {
461        case 'a':
462          dst.append(1, '\a');
463          break;
464        case 'b':
465          dst.append(1, '\b');
466          break;
467        case 'f':
468          dst.append(1, '\f');
469          break;
470        case 'n':
471          dst.append(1, '\n');
472          break;
473        case 'r':
474          dst.append(1, '\r');
475          break;
476        case 't':
477          dst.append(1, '\t');
478          break;
479        case 'v':
480          dst.append(1, '\v');
481          break;
482        case '\\':
483          dst.append(1, '\\');
484          break;
485        case '\'':
486          dst.append(1, '\'');
487          break;
488        case '"':
489          dst.append(1, '"');
490          break;
491        case '0':
492          // 1 to 3 octal chars
493          {
494            // Make a string that can hold onto the initial zero char, up to 3
495            // octal digits, and a terminating NULL.
496            char oct_str[5] = {'\0', '\0', '\0', '\0', '\0'};
497
498            int i;
499            for (i = 0; (p[i] >= '0' && p[i] <= '7') && i < 4; ++i)
500              oct_str[i] = p[i];
501
502            // We don't want to consume the last octal character since the main
503            // for loop will do this for us, so we advance p by one less than i
504            // (even if i is zero)
505            p += i - 1;
506            unsigned long octal_value = ::strtoul(oct_str, nullptr, 8);
507            if (octal_value <= UINT8_MAX) {
508              dst.append(1, static_cast<char>(octal_value));
509            }
510          }
511          break;
512
513        case 'x':
514          // hex number in the format
515          if (isxdigit(p[1])) {
516            ++p; // Skip the 'x'
517
518            // Make a string that can hold onto two hex chars plus a
519            // NULL terminator
520            char hex_str[3] = {*p, '\0', '\0'};
521            if (isxdigit(p[1])) {
522              ++p; // Skip the first of the two hex chars
523              hex_str[1] = *p;
524            }
525
526            unsigned long hex_value = strtoul(hex_str, nullptr, 16);
527            if (hex_value <= UINT8_MAX)
528              dst.append(1, static_cast<char>(hex_value));
529          } else {
530            dst.append(1, 'x');
531          }
532          break;
533
534        default:
535          // Just desensitize any other character by just printing what came
536          // after the '\'
537          dst.append(1, *p);
538          break;
539        }
540      }
541    }
542  }
543}
544
545void Args::ExpandEscapedCharacters(const char *src, std::string &dst) {
546  dst.clear();
547  if (src) {
548    for (const char *p = src; *p != '\0'; ++p) {
549      if (isprint(*p))
550        dst.append(1, *p);
551      else {
552        switch (*p) {
553        case '\a':
554          dst.append("\\a");
555          break;
556        case '\b':
557          dst.append("\\b");
558          break;
559        case '\f':
560          dst.append("\\f");
561          break;
562        case '\n':
563          dst.append("\\n");
564          break;
565        case '\r':
566          dst.append("\\r");
567          break;
568        case '\t':
569          dst.append("\\t");
570          break;
571        case '\v':
572          dst.append("\\v");
573          break;
574        case '\'':
575          dst.append("\\'");
576          break;
577        case '"':
578          dst.append("\\\"");
579          break;
580        case '\\':
581          dst.append("\\\\");
582          break;
583        default: {
584          // Just encode as octal
585          dst.append("\\0");
586          char octal_str[32];
587          snprintf(octal_str, sizeof(octal_str), "%o", *p);
588          dst.append(octal_str);
589        } break;
590        }
591      }
592    }
593  }
594}
595
596std::string Args::EscapeLLDBCommandArgument(const std::string &arg,
597                                            char quote_char) {
598  const char *chars_to_escape = nullptr;
599  switch (quote_char) {
600  case '\0':
601    chars_to_escape = " \t\\'\"`";
602    break;
603  case '"':
604    chars_to_escape = "$\"`\\";
605    break;
606  case '`':
607  case '\'':
608    return arg;
609  default:
610    assert(false && "Unhandled quote character");
611    return arg;
612  }
613
614  std::string res;
615  res.reserve(arg.size());
616  for (char c : arg) {
617    if (::strchr(chars_to_escape, c))
618      res.push_back('\\');
619    res.push_back(c);
620  }
621  return res;
622}
623
624OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string) {
625  SetFromString(arg_string);
626}
627
628void OptionsWithRaw::SetFromString(llvm::StringRef arg_string) {
629  const llvm::StringRef original_args = arg_string;
630
631  arg_string = ltrimForArgs(arg_string);
632  std::string arg;
633  char quote;
634
635  // If the string doesn't start with a dash, we just have no options and just
636  // a raw part.
637  if (!arg_string.startswith("-")) {
638    m_suffix = original_args;
639    return;
640  }
641
642  bool found_suffix = false;
643
644  while (!arg_string.empty()) {
645    // The length of the prefix before parsing.
646    std::size_t prev_prefix_length = original_args.size() - arg_string.size();
647
648    // Parse the next argument from the remaining string.
649    std::tie(arg, quote, arg_string) = ParseSingleArgument(arg_string);
650
651    // If we get an unquoted '--' argument, then we reached the suffix part
652    // of the command.
653    Args::ArgEntry entry(arg, quote);
654    if (!entry.IsQuoted() && arg == "--") {
655      // The remaining line is the raw suffix, and the line we parsed so far
656      // needs to be interpreted as arguments.
657      m_has_args = true;
658      m_suffix = arg_string;
659      found_suffix = true;
660
661      // The length of the prefix after parsing.
662      std::size_t prefix_length = original_args.size() - arg_string.size();
663
664      // Take the string we know contains all the arguments and actually parse
665      // it as proper arguments.
666      llvm::StringRef prefix = original_args.take_front(prev_prefix_length);
667      m_args = Args(prefix);
668      m_arg_string = prefix;
669
670      // We also record the part of the string that contains the arguments plus
671      // the delimiter.
672      m_arg_string_with_delimiter = original_args.take_front(prefix_length);
673
674      // As the rest of the string became the raw suffix, we are done here.
675      break;
676    }
677
678    arg_string = ltrimForArgs(arg_string);
679  }
680
681  // If we didn't find a suffix delimiter, the whole string is the raw suffix.
682  if (!found_suffix) {
683    found_suffix = true;
684    m_suffix = original_args;
685  }
686}
687