ClangHighlighter.cpp revision 360784
1//===-- ClangHighlighter.cpp ------------------------------------*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9#include "ClangHighlighter.h" 10 11#include "lldb/Host/FileSystem.h" 12#include "lldb/Target/Language.h" 13#include "lldb/Utility/AnsiTerminal.h" 14#include "lldb/Utility/StreamString.h" 15 16#include "clang/Basic/SourceManager.h" 17#include "clang/Lex/Lexer.h" 18#include "llvm/ADT/StringSet.h" 19#include "llvm/Support/MemoryBuffer.h" 20 21using namespace lldb_private; 22 23bool ClangHighlighter::isKeyword(llvm::StringRef token) const { 24 return keywords.find(token) != keywords.end(); 25} 26 27ClangHighlighter::ClangHighlighter() { 28#define KEYWORD(X, N) keywords.insert(#X); 29#include "clang/Basic/TokenKinds.def" 30} 31 32/// Determines which style should be applied to the given token. 33/// \param highlighter 34/// The current highlighter that should use the style. 35/// \param token 36/// The current token. 37/// \param tok_str 38/// The string in the source code the token represents. 39/// \param options 40/// The style we use for coloring the source code. 41/// \param in_pp_directive 42/// If we are currently in a preprocessor directive. NOTE: This is 43/// passed by reference and will be updated if the current token starts 44/// or ends a preprocessor directive. 45/// \return 46/// The ColorStyle that should be applied to the token. 47static HighlightStyle::ColorStyle 48determineClangStyle(const ClangHighlighter &highlighter, 49 const clang::Token &token, llvm::StringRef tok_str, 50 const HighlightStyle &options, bool &in_pp_directive) { 51 using namespace clang; 52 53 if (token.is(tok::comment)) { 54 // If we were in a preprocessor directive before, we now left it. 55 in_pp_directive = false; 56 return options.comment; 57 } else if (in_pp_directive || token.getKind() == tok::hash) { 58 // Let's assume that the rest of the line is a PP directive. 59 in_pp_directive = true; 60 // Preprocessor directives are hard to match, so we have to hack this in. 61 return options.pp_directive; 62 } else if (tok::isStringLiteral(token.getKind())) 63 return options.string_literal; 64 else if (tok::isLiteral(token.getKind())) 65 return options.scalar_literal; 66 else if (highlighter.isKeyword(tok_str)) 67 return options.keyword; 68 else 69 switch (token.getKind()) { 70 case tok::raw_identifier: 71 case tok::identifier: 72 return options.identifier; 73 case tok::l_brace: 74 case tok::r_brace: 75 return options.braces; 76 case tok::l_square: 77 case tok::r_square: 78 return options.square_brackets; 79 case tok::l_paren: 80 case tok::r_paren: 81 return options.parentheses; 82 case tok::comma: 83 return options.comma; 84 case tok::coloncolon: 85 case tok::colon: 86 return options.colon; 87 88 case tok::amp: 89 case tok::ampamp: 90 case tok::ampequal: 91 case tok::star: 92 case tok::starequal: 93 case tok::plus: 94 case tok::plusplus: 95 case tok::plusequal: 96 case tok::minus: 97 case tok::arrow: 98 case tok::minusminus: 99 case tok::minusequal: 100 case tok::tilde: 101 case tok::exclaim: 102 case tok::exclaimequal: 103 case tok::slash: 104 case tok::slashequal: 105 case tok::percent: 106 case tok::percentequal: 107 case tok::less: 108 case tok::lessless: 109 case tok::lessequal: 110 case tok::lesslessequal: 111 case tok::spaceship: 112 case tok::greater: 113 case tok::greatergreater: 114 case tok::greaterequal: 115 case tok::greatergreaterequal: 116 case tok::caret: 117 case tok::caretequal: 118 case tok::pipe: 119 case tok::pipepipe: 120 case tok::pipeequal: 121 case tok::question: 122 case tok::equal: 123 case tok::equalequal: 124 return options.operators; 125 default: 126 break; 127 } 128 return HighlightStyle::ColorStyle(); 129} 130 131void ClangHighlighter::Highlight(const HighlightStyle &options, 132 llvm::StringRef line, 133 llvm::Optional<size_t> cursor_pos, 134 llvm::StringRef previous_lines, 135 Stream &result) const { 136 using namespace clang; 137 138 FileSystemOptions file_opts; 139 FileManager file_mgr(file_opts, 140 FileSystem::Instance().GetVirtualFileSystem()); 141 142 // The line might end in a backslash which would cause Clang to drop the 143 // backslash and the terminating new line. This makes sense when parsing C++, 144 // but when highlighting we care about preserving the backslash/newline. To 145 // not lose this information we remove the new line here so that Clang knows 146 // this is just a single line we are highlighting. We add back the newline 147 // after tokenizing. 148 llvm::StringRef line_ending = ""; 149 // There are a few legal line endings Clang recognizes and we need to 150 // temporarily remove from the string. 151 if (line.consume_back("\r\n")) 152 line_ending = "\r\n"; 153 else if (line.consume_back("\n")) 154 line_ending = "\n"; 155 else if (line.consume_back("\r")) 156 line_ending = "\r"; 157 158 unsigned line_number = previous_lines.count('\n') + 1U; 159 160 // Let's build the actual source code Clang needs and setup some utility 161 // objects. 162 std::string full_source = previous_lines.str() + line.str(); 163 llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs()); 164 llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts( 165 new DiagnosticOptions()); 166 DiagnosticsEngine diags(diag_ids, diags_opts); 167 clang::SourceManager SM(diags, file_mgr); 168 auto buf = llvm::MemoryBuffer::getMemBuffer(full_source); 169 170 FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get()); 171 172 // Let's just enable the latest ObjC and C++ which should get most tokens 173 // right. 174 LangOptions Opts; 175 Opts.ObjC = true; 176 // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too 177 Opts.CPlusPlus17 = true; 178 Opts.LineComment = true; 179 180 Lexer lex(FID, buf.get(), SM, Opts); 181 // The lexer should keep whitespace around. 182 lex.SetKeepWhitespaceMode(true); 183 184 // Keeps track if we have entered a PP directive. 185 bool in_pp_directive = false; 186 187 // True once we actually lexed the user provided line. 188 bool found_user_line = false; 189 190 // True if we already highlighted the token under the cursor, false otherwise. 191 bool highlighted_cursor = false; 192 Token token; 193 bool exit = false; 194 while (!exit) { 195 // Returns true if this is the last token we get from the lexer. 196 exit = lex.LexFromRawLexer(token); 197 198 bool invalid = false; 199 unsigned current_line_number = 200 SM.getSpellingLineNumber(token.getLocation(), &invalid); 201 if (current_line_number != line_number) 202 continue; 203 found_user_line = true; 204 205 // We don't need to print any tokens without a spelling line number. 206 if (invalid) 207 continue; 208 209 // Same as above but with the column number. 210 invalid = false; 211 unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid); 212 if (invalid) 213 continue; 214 // Column numbers start at 1, but indexes in our string start at 0. 215 --start; 216 217 // Annotations don't have a length, so let's skip them. 218 if (token.isAnnotation()) 219 continue; 220 221 // Extract the token string from our source code. 222 llvm::StringRef tok_str = line.substr(start, token.getLength()); 223 224 // If the token is just an empty string, we can skip all the work below. 225 if (tok_str.empty()) 226 continue; 227 228 // If the cursor is inside this token, we have to apply the 'selected' 229 // highlight style before applying the actual token color. 230 llvm::StringRef to_print = tok_str; 231 StreamString storage; 232 auto end = start + token.getLength(); 233 if (cursor_pos && end > *cursor_pos && !highlighted_cursor) { 234 highlighted_cursor = true; 235 options.selected.Apply(storage, tok_str); 236 to_print = storage.GetString(); 237 } 238 239 // See how we are supposed to highlight this token. 240 HighlightStyle::ColorStyle color = 241 determineClangStyle(*this, token, tok_str, options, in_pp_directive); 242 243 color.Apply(result, to_print); 244 } 245 246 // Add the line ending we trimmed before tokenizing. 247 result << line_ending; 248 249 // If we went over the whole file but couldn't find our own file, then 250 // somehow our setup was wrong. When we're in release mode we just give the 251 // user the normal line and pretend we don't know how to highlight it. In 252 // debug mode we bail out with an assert as this should never happen. 253 if (!found_user_line) { 254 result << line; 255 assert(false && "We couldn't find the user line in the input file?"); 256 } 257} 258