FileSpec.cpp revision 360784
1//===-- FileSpec.cpp --------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Utility/FileSpec.h"
10#include "lldb/Utility/RegularExpression.h"
11#include "lldb/Utility/Stream.h"
12
13#include "llvm/ADT/SmallString.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/ADT/Triple.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Support/ErrorOr.h"
19#include "llvm/Support/FileSystem.h"
20#include "llvm/Support/Program.h"
21#include "llvm/Support/raw_ostream.h"
22
23#include <algorithm>
24#include <system_error>
25#include <vector>
26
27#include <assert.h>
28#include <limits.h>
29#include <stdio.h>
30#include <string.h>
31
32using namespace lldb;
33using namespace lldb_private;
34
35namespace {
36
37static constexpr FileSpec::Style GetNativeStyle() {
38#if defined(_WIN32)
39  return FileSpec::Style::windows;
40#else
41  return FileSpec::Style::posix;
42#endif
43}
44
45bool PathStyleIsPosix(FileSpec::Style style) {
46  return (style == FileSpec::Style::posix ||
47          (style == FileSpec::Style::native &&
48           GetNativeStyle() == FileSpec::Style::posix));
49}
50
51const char *GetPathSeparators(FileSpec::Style style) {
52  return llvm::sys::path::get_separator(style).data();
53}
54
55char GetPreferredPathSeparator(FileSpec::Style style) {
56  return GetPathSeparators(style)[0];
57}
58
59void Denormalize(llvm::SmallVectorImpl<char> &path, FileSpec::Style style) {
60  if (PathStyleIsPosix(style))
61    return;
62
63  std::replace(path.begin(), path.end(), '/', '\\');
64}
65
66} // end anonymous namespace
67
68FileSpec::FileSpec() : m_style(GetNativeStyle()) {}
69
70// Default constructor that can take an optional full path to a file on disk.
71FileSpec::FileSpec(llvm::StringRef path, Style style) : m_style(style) {
72  SetFile(path, style);
73}
74
75FileSpec::FileSpec(llvm::StringRef path, const llvm::Triple &triple)
76    : FileSpec{path, triple.isOSWindows() ? Style::windows : Style::posix} {}
77
78namespace {
79/// Safely get a character at the specified index.
80///
81/// \param[in] path
82///     A full, partial, or relative path to a file.
83///
84/// \param[in] i
85///     An index into path which may or may not be valid.
86///
87/// \return
88///   The character at index \a i if the index is valid, or 0 if
89///   the index is not valid.
90inline char safeCharAtIndex(const llvm::StringRef &path, size_t i) {
91  if (i < path.size())
92    return path[i];
93  return 0;
94}
95
96/// Check if a path needs to be normalized.
97///
98/// Check if a path needs to be normalized. We currently consider a
99/// path to need normalization if any of the following are true
100///  - path contains "/./"
101///  - path contains "/../"
102///  - path contains "//"
103///  - path ends with "/"
104/// Paths that start with "./" or with "../" are not considered to
105/// need normalization since we aren't trying to resolve the path,
106/// we are just trying to remove redundant things from the path.
107///
108/// \param[in] path
109///     A full, partial, or relative path to a file.
110///
111/// \return
112///   Returns \b true if the path needs to be normalized.
113bool needsNormalization(const llvm::StringRef &path) {
114  if (path.empty())
115    return false;
116  // We strip off leading "." values so these paths need to be normalized
117  if (path[0] == '.')
118    return true;
119  for (auto i = path.find_first_of("\\/"); i != llvm::StringRef::npos;
120       i = path.find_first_of("\\/", i + 1)) {
121    const auto next = safeCharAtIndex(path, i+1);
122    switch (next) {
123      case 0:
124        // path separator char at the end of the string which should be
125        // stripped unless it is the one and only character
126        return i > 0;
127      case '/':
128      case '\\':
129        // two path separator chars in the middle of a path needs to be
130        // normalized
131        if (i > 0)
132          return true;
133        ++i;
134        break;
135
136      case '.': {
137          const auto next_next = safeCharAtIndex(path, i+2);
138          switch (next_next) {
139            default: break;
140            case 0: return true; // ends with "/."
141            case '/':
142            case '\\':
143              return true; // contains "/./"
144            case '.': {
145              const auto next_next_next = safeCharAtIndex(path, i+3);
146              switch (next_next_next) {
147                default: break;
148                case 0: return true; // ends with "/.."
149                case '/':
150                case '\\':
151                  return true; // contains "/../"
152              }
153              break;
154            }
155          }
156        }
157        break;
158
159      default:
160        break;
161    }
162  }
163  return false;
164}
165
166
167}
168
169void FileSpec::SetFile(llvm::StringRef pathname) { SetFile(pathname, m_style); }
170
171// Update the contents of this object with a new path. The path will be split
172// up into a directory and filename and stored as uniqued string values for
173// quick comparison and efficient memory usage.
174void FileSpec::SetFile(llvm::StringRef pathname, Style style) {
175  m_filename.Clear();
176  m_directory.Clear();
177  m_is_resolved = false;
178  m_style = (style == Style::native) ? GetNativeStyle() : style;
179
180  if (pathname.empty())
181    return;
182
183  llvm::SmallString<128> resolved(pathname);
184
185  // Normalize the path by removing ".", ".." and other redundant components.
186  if (needsNormalization(resolved))
187    llvm::sys::path::remove_dots(resolved, true, m_style);
188
189  // Normalize back slashes to forward slashes
190  if (m_style == Style::windows)
191    std::replace(resolved.begin(), resolved.end(), '\\', '/');
192
193  if (resolved.empty()) {
194    // If we have no path after normalization set the path to the current
195    // directory. This matches what python does and also a few other path
196    // utilities.
197    m_filename.SetString(".");
198    return;
199  }
200
201  // Split path into filename and directory. We rely on the underlying char
202  // pointer to be nullptr when the components are empty.
203  llvm::StringRef filename = llvm::sys::path::filename(resolved, m_style);
204  if(!filename.empty())
205    m_filename.SetString(filename);
206
207  llvm::StringRef directory = llvm::sys::path::parent_path(resolved, m_style);
208  if(!directory.empty())
209    m_directory.SetString(directory);
210}
211
212void FileSpec::SetFile(llvm::StringRef path, const llvm::Triple &triple) {
213  return SetFile(path, triple.isOSWindows() ? Style::windows : Style::posix);
214}
215
216// Convert to pointer operator. This allows code to check any FileSpec objects
217// to see if they contain anything valid using code such as:
218//
219//  if (file_spec)
220//  {}
221FileSpec::operator bool() const { return m_filename || m_directory; }
222
223// Logical NOT operator. This allows code to check any FileSpec objects to see
224// if they are invalid using code such as:
225//
226//  if (!file_spec)
227//  {}
228bool FileSpec::operator!() const { return !m_directory && !m_filename; }
229
230bool FileSpec::DirectoryEquals(const FileSpec &rhs) const {
231  const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
232  return ConstString::Equals(m_directory, rhs.m_directory, case_sensitive);
233}
234
235bool FileSpec::FileEquals(const FileSpec &rhs) const {
236  const bool case_sensitive = IsCaseSensitive() || rhs.IsCaseSensitive();
237  return ConstString::Equals(m_filename, rhs.m_filename, case_sensitive);
238}
239
240// Equal to operator
241bool FileSpec::operator==(const FileSpec &rhs) const {
242  return FileEquals(rhs) && DirectoryEquals(rhs);
243}
244
245// Not equal to operator
246bool FileSpec::operator!=(const FileSpec &rhs) const { return !(*this == rhs); }
247
248// Less than operator
249bool FileSpec::operator<(const FileSpec &rhs) const {
250  return FileSpec::Compare(*this, rhs, true) < 0;
251}
252
253// Dump a FileSpec object to a stream
254Stream &lldb_private::operator<<(Stream &s, const FileSpec &f) {
255  f.Dump(s.AsRawOstream());
256  return s;
257}
258
259// Clear this object by releasing both the directory and filename string values
260// and making them both the empty string.
261void FileSpec::Clear() {
262  m_directory.Clear();
263  m_filename.Clear();
264}
265
266// Compare two FileSpec objects. If "full" is true, then both the directory and
267// the filename must match. If "full" is false, then the directory names for
268// "a" and "b" are only compared if they are both non-empty. This allows a
269// FileSpec object to only contain a filename and it can match FileSpec objects
270// that have matching filenames with different paths.
271//
272// Return -1 if the "a" is less than "b", 0 if "a" is equal to "b" and "1" if
273// "a" is greater than "b".
274int FileSpec::Compare(const FileSpec &a, const FileSpec &b, bool full) {
275  int result = 0;
276
277  // case sensitivity of compare
278  const bool case_sensitive = a.IsCaseSensitive() || b.IsCaseSensitive();
279
280  // If full is true, then we must compare both the directory and filename.
281
282  // If full is false, then if either directory is empty, then we match on the
283  // basename only, and if both directories have valid values, we still do a
284  // full compare. This allows for matching when we just have a filename in one
285  // of the FileSpec objects.
286
287  if (full || (a.m_directory && b.m_directory)) {
288    result = ConstString::Compare(a.m_directory, b.m_directory, case_sensitive);
289    if (result)
290      return result;
291  }
292  return ConstString::Compare(a.m_filename, b.m_filename, case_sensitive);
293}
294
295bool FileSpec::Equal(const FileSpec &a, const FileSpec &b, bool full) {
296  if (full || (a.GetDirectory() && b.GetDirectory()))
297    return a == b;
298
299  return a.FileEquals(b);
300}
301
302bool FileSpec::Match(const FileSpec &pattern, const FileSpec &file) {
303  if (pattern.GetDirectory())
304    return pattern == file;
305  if (pattern.GetFilename())
306    return pattern.FileEquals(file);
307  return true;
308}
309
310llvm::Optional<FileSpec::Style> FileSpec::GuessPathStyle(llvm::StringRef absolute_path) {
311  if (absolute_path.startswith("/"))
312    return Style::posix;
313  if (absolute_path.startswith(R"(\\)"))
314    return Style::windows;
315  if (absolute_path.size() > 3 && llvm::isAlpha(absolute_path[0]) &&
316      absolute_path.substr(1, 2) == R"(:\)")
317    return Style::windows;
318  return llvm::None;
319}
320
321// Dump the object to the supplied stream. If the object contains a valid
322// directory name, it will be displayed followed by a directory delimiter, and
323// the filename.
324void FileSpec::Dump(llvm::raw_ostream &s) const {
325  std::string path{GetPath(true)};
326  s << path;
327  char path_separator = GetPreferredPathSeparator(m_style);
328  if (!m_filename && !path.empty() && path.back() != path_separator)
329    s << path_separator;
330}
331
332FileSpec::Style FileSpec::GetPathStyle() const { return m_style; }
333
334// Directory string get accessor.
335ConstString &FileSpec::GetDirectory() { return m_directory; }
336
337// Directory string const get accessor.
338ConstString FileSpec::GetDirectory() const { return m_directory; }
339
340// Filename string get accessor.
341ConstString &FileSpec::GetFilename() { return m_filename; }
342
343// Filename string const get accessor.
344ConstString FileSpec::GetFilename() const { return m_filename; }
345
346// Extract the directory and path into a fixed buffer. This is needed as the
347// directory and path are stored in separate string values.
348size_t FileSpec::GetPath(char *path, size_t path_max_len,
349                         bool denormalize) const {
350  if (!path)
351    return 0;
352
353  std::string result = GetPath(denormalize);
354  ::snprintf(path, path_max_len, "%s", result.c_str());
355  return std::min(path_max_len - 1, result.length());
356}
357
358std::string FileSpec::GetPath(bool denormalize) const {
359  llvm::SmallString<64> result;
360  GetPath(result, denormalize);
361  return std::string(result.begin(), result.end());
362}
363
364const char *FileSpec::GetCString(bool denormalize) const {
365  return ConstString{GetPath(denormalize)}.AsCString(nullptr);
366}
367
368void FileSpec::GetPath(llvm::SmallVectorImpl<char> &path,
369                       bool denormalize) const {
370  path.append(m_directory.GetStringRef().begin(),
371              m_directory.GetStringRef().end());
372  // Since the path was normalized and all paths use '/' when stored in these
373  // objects, we don't need to look for the actual syntax specific path
374  // separator, we just look for and insert '/'.
375  if (m_directory && m_filename && m_directory.GetStringRef().back() != '/' &&
376      m_filename.GetStringRef().back() != '/')
377    path.insert(path.end(), '/');
378  path.append(m_filename.GetStringRef().begin(),
379              m_filename.GetStringRef().end());
380  if (denormalize && !path.empty())
381    Denormalize(path, m_style);
382}
383
384ConstString FileSpec::GetFileNameExtension() const {
385  return ConstString(
386      llvm::sys::path::extension(m_filename.GetStringRef(), m_style));
387}
388
389ConstString FileSpec::GetFileNameStrippingExtension() const {
390  return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style));
391}
392
393// Return the size in bytes that this object takes in memory. This returns the
394// size in bytes of this object, not any shared string values it may refer to.
395size_t FileSpec::MemorySize() const {
396  return m_filename.MemorySize() + m_directory.MemorySize();
397}
398
399FileSpec
400FileSpec::CopyByAppendingPathComponent(llvm::StringRef component) const {
401  FileSpec ret = *this;
402  ret.AppendPathComponent(component);
403  return ret;
404}
405
406FileSpec FileSpec::CopyByRemovingLastPathComponent() const {
407  llvm::SmallString<64> current_path;
408  GetPath(current_path, false);
409  if (llvm::sys::path::has_parent_path(current_path, m_style))
410    return FileSpec(llvm::sys::path::parent_path(current_path, m_style),
411                    m_style);
412  return *this;
413}
414
415ConstString FileSpec::GetLastPathComponent() const {
416  llvm::SmallString<64> current_path;
417  GetPath(current_path, false);
418  return ConstString(llvm::sys::path::filename(current_path, m_style));
419}
420
421void FileSpec::PrependPathComponent(llvm::StringRef component) {
422  llvm::SmallString<64> new_path(component);
423  llvm::SmallString<64> current_path;
424  GetPath(current_path, false);
425  llvm::sys::path::append(new_path,
426                          llvm::sys::path::begin(current_path, m_style),
427                          llvm::sys::path::end(current_path), m_style);
428  SetFile(new_path, m_style);
429}
430
431void FileSpec::PrependPathComponent(const FileSpec &new_path) {
432  return PrependPathComponent(new_path.GetPath(false));
433}
434
435void FileSpec::AppendPathComponent(llvm::StringRef component) {
436  llvm::SmallString<64> current_path;
437  GetPath(current_path, false);
438  llvm::sys::path::append(current_path, m_style, component);
439  SetFile(current_path, m_style);
440}
441
442void FileSpec::AppendPathComponent(const FileSpec &new_path) {
443  return AppendPathComponent(new_path.GetPath(false));
444}
445
446bool FileSpec::RemoveLastPathComponent() {
447  llvm::SmallString<64> current_path;
448  GetPath(current_path, false);
449  if (llvm::sys::path::has_parent_path(current_path, m_style)) {
450    SetFile(llvm::sys::path::parent_path(current_path, m_style));
451    return true;
452  }
453  return false;
454}
455/// Returns true if the filespec represents an implementation source
456/// file (files with a ".c", ".cpp", ".m", ".mm" (many more)
457/// extension).
458///
459/// \return
460///     \b true if the filespec represents an implementation source
461///     file, \b false otherwise.
462bool FileSpec::IsSourceImplementationFile() const {
463  ConstString extension(GetFileNameExtension());
464  if (!extension)
465    return false;
466
467  static RegularExpression g_source_file_regex(llvm::StringRef(
468      "^.([cC]|[mM]|[mM][mM]|[cC][pP][pP]|[cC]\\+\\+|[cC][xX][xX]|[cC][cC]|["
469      "cC][pP]|[sS]|[aA][sS][mM]|[fF]|[fF]77|[fF]90|[fF]95|[fF]03|[fF][oO]["
470      "rR]|[fF][tT][nN]|[fF][pP][pP]|[aA][dD][aA]|[aA][dD][bB]|[aA][dD][sS])"
471      "$"));
472  return g_source_file_regex.Execute(extension.GetStringRef());
473}
474
475bool FileSpec::IsRelative() const {
476  return !IsAbsolute();
477}
478
479bool FileSpec::IsAbsolute() const {
480  llvm::SmallString<64> current_path;
481  GetPath(current_path, false);
482
483  // Early return if the path is empty.
484  if (current_path.empty())
485    return false;
486
487  // We consider paths starting with ~ to be absolute.
488  if (current_path[0] == '~')
489    return true;
490
491  return llvm::sys::path::is_absolute(current_path, m_style);
492}
493
494void FileSpec::MakeAbsolute(const FileSpec &dir) {
495  if (IsRelative())
496    PrependPathComponent(dir);
497}
498
499void llvm::format_provider<FileSpec>::format(const FileSpec &F,
500                                             raw_ostream &Stream,
501                                             StringRef Style) {
502  assert(
503      (Style.empty() || Style.equals_lower("F") || Style.equals_lower("D")) &&
504      "Invalid FileSpec style!");
505
506  StringRef dir = F.GetDirectory().GetStringRef();
507  StringRef file = F.GetFilename().GetStringRef();
508
509  if (dir.empty() && file.empty()) {
510    Stream << "(empty)";
511    return;
512  }
513
514  if (Style.equals_lower("F")) {
515    Stream << (file.empty() ? "(empty)" : file);
516    return;
517  }
518
519  // Style is either D or empty, either way we need to print the directory.
520  if (!dir.empty()) {
521    // Directory is stored in normalized form, which might be different than
522    // preferred form.  In order to handle this, we need to cut off the
523    // filename, then denormalize, then write the entire denorm'ed directory.
524    llvm::SmallString<64> denormalized_dir = dir;
525    Denormalize(denormalized_dir, F.GetPathStyle());
526    Stream << denormalized_dir;
527    Stream << GetPreferredPathSeparator(F.GetPathStyle());
528  }
529
530  if (Style.equals_lower("D")) {
531    // We only want to print the directory, so now just exit.
532    if (dir.empty())
533      Stream << "(empty)";
534    return;
535  }
536
537  if (!file.empty())
538    Stream << file;
539}
540