RewriteMacros.cpp revision 263508
1//===--- RewriteMacros.cpp - Rewrite macros into their expansions ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This code rewrites macro invocations into their expansions.  This gives you
11// a macro expanded file that retains comments and #includes.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Rewrite/Frontend/Rewriters.h"
16#include "clang/Basic/SourceManager.h"
17#include "clang/Lex/Preprocessor.h"
18#include "clang/Rewrite/Core/Rewriter.h"
19#include "llvm/ADT/OwningPtr.h"
20#include "llvm/Support/Path.h"
21#include "llvm/Support/raw_ostream.h"
22#include <cstdio>
23
24using namespace clang;
25
26/// isSameToken - Return true if the two specified tokens start have the same
27/// content.
28static bool isSameToken(Token &RawTok, Token &PPTok) {
29  // If two tokens have the same kind and the same identifier info, they are
30  // obviously the same.
31  if (PPTok.getKind() == RawTok.getKind() &&
32      PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
33    return true;
34
35  // Otherwise, if they are different but have the same identifier info, they
36  // are also considered to be the same.  This allows keywords and raw lexed
37  // identifiers with the same name to be treated the same.
38  if (PPTok.getIdentifierInfo() &&
39      PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
40    return true;
41
42  return false;
43}
44
45
46/// GetNextRawTok - Return the next raw token in the stream, skipping over
47/// comments if ReturnComment is false.
48static const Token &GetNextRawTok(const std::vector<Token> &RawTokens,
49                                  unsigned &CurTok, bool ReturnComment) {
50  assert(CurTok < RawTokens.size() && "Overran eof!");
51
52  // If the client doesn't want comments and we have one, skip it.
53  if (!ReturnComment && RawTokens[CurTok].is(tok::comment))
54    ++CurTok;
55
56  return RawTokens[CurTok++];
57}
58
59
60/// LexRawTokensFromMainFile - Lets all the raw tokens from the main file into
61/// the specified vector.
62static void LexRawTokensFromMainFile(Preprocessor &PP,
63                                     std::vector<Token> &RawTokens) {
64  SourceManager &SM = PP.getSourceManager();
65
66  // Create a lexer to lex all the tokens of the main file in raw mode.  Even
67  // though it is in raw mode, it will not return comments.
68  const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID());
69  Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts());
70
71  // Switch on comment lexing because we really do want them.
72  RawLex.SetCommentRetentionState(true);
73
74  Token RawTok;
75  do {
76    RawLex.LexFromRawLexer(RawTok);
77
78    // If we have an identifier with no identifier info for our raw token, look
79    // up the indentifier info.  This is important for equality comparison of
80    // identifier tokens.
81    if (RawTok.is(tok::raw_identifier))
82      PP.LookUpIdentifierInfo(RawTok);
83
84    RawTokens.push_back(RawTok);
85  } while (RawTok.isNot(tok::eof));
86}
87
88
89/// RewriteMacrosInInput - Implement -rewrite-macros mode.
90void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) {
91  SourceManager &SM = PP.getSourceManager();
92
93  Rewriter Rewrite;
94  Rewrite.setSourceMgr(SM, PP.getLangOpts());
95  RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID());
96
97  std::vector<Token> RawTokens;
98  LexRawTokensFromMainFile(PP, RawTokens);
99  unsigned CurRawTok = 0;
100  Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
101
102
103  // Get the first preprocessing token.
104  PP.EnterMainSourceFile();
105  Token PPTok;
106  PP.Lex(PPTok);
107
108  // Preprocess the input file in parallel with raw lexing the main file. Ignore
109  // all tokens that are preprocessed from a file other than the main file (e.g.
110  // a header).  If we see tokens that are in the preprocessed file but not the
111  // lexed file, we have a macro expansion.  If we see tokens in the lexed file
112  // that aren't in the preprocessed view, we have macros that expand to no
113  // tokens, or macro arguments etc.
114  while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) {
115    SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation());
116
117    // If PPTok is from a different source file, ignore it.
118    if (!SM.isWrittenInMainFile(PPLoc)) {
119      PP.Lex(PPTok);
120      continue;
121    }
122
123    // If the raw file hits a preprocessor directive, they will be extra tokens
124    // in the raw file that don't exist in the preprocsesed file.  However, we
125    // choose to preserve them in the output file and otherwise handle them
126    // specially.
127    if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) {
128      // If this is a #warning directive or #pragma mark (GNU extensions),
129      // comment the line out.
130      if (RawTokens[CurRawTok].is(tok::identifier)) {
131        const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo();
132        if (II->getName() == "warning") {
133          // Comment out #warning.
134          RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
135        } else if (II->getName() == "pragma" &&
136                   RawTokens[CurRawTok+1].is(tok::identifier) &&
137                   (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() ==
138                    "mark")) {
139          // Comment out #pragma mark.
140          RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
141        }
142      }
143
144      // Otherwise, if this is a #include or some other directive, just leave it
145      // in the file by skipping over the line.
146      RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
147      while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof))
148        RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
149      continue;
150    }
151
152    // Okay, both tokens are from the same file.  Get their offsets from the
153    // start of the file.
154    unsigned PPOffs = SM.getFileOffset(PPLoc);
155    unsigned RawOffs = SM.getFileOffset(RawTok.getLocation());
156
157    // If the offsets are the same and the token kind is the same, ignore them.
158    if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) {
159      RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
160      PP.Lex(PPTok);
161      continue;
162    }
163
164    // If the PP token is farther along than the raw token, something was
165    // deleted.  Comment out the raw token.
166    if (RawOffs <= PPOffs) {
167      // Comment out a whole run of tokens instead of bracketing each one with
168      // comments.  Add a leading space if RawTok didn't have one.
169      bool HasSpace = RawTok.hasLeadingSpace();
170      RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]);
171      unsigned EndPos;
172
173      do {
174        EndPos = RawOffs+RawTok.getLength();
175
176        RawTok = GetNextRawTok(RawTokens, CurRawTok, true);
177        RawOffs = SM.getFileOffset(RawTok.getLocation());
178
179        if (RawTok.is(tok::comment)) {
180          // Skip past the comment.
181          RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
182          break;
183        }
184
185      } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() &&
186               (PPOffs != RawOffs || !isSameToken(RawTok, PPTok)));
187
188      RB.InsertTextBefore(EndPos, "*/");
189      continue;
190    }
191
192    // Otherwise, there was a replacement an expansion.  Insert the new token
193    // in the output buffer.  Insert the whole run of new tokens at once to get
194    // them in the right order.
195    unsigned InsertPos = PPOffs;
196    std::string Expansion;
197    while (PPOffs < RawOffs) {
198      Expansion += ' ' + PP.getSpelling(PPTok);
199      PP.Lex(PPTok);
200      PPLoc = SM.getExpansionLoc(PPTok.getLocation());
201      PPOffs = SM.getFileOffset(PPLoc);
202    }
203    Expansion += ' ';
204    RB.InsertTextBefore(InsertPos, Expansion);
205  }
206
207  // Get the buffer corresponding to MainFileID.  If we haven't changed it, then
208  // we are done.
209  if (const RewriteBuffer *RewriteBuf =
210      Rewrite.getRewriteBufferFor(SM.getMainFileID())) {
211    //printf("Changed:\n");
212    *OS << std::string(RewriteBuf->begin(), RewriteBuf->end());
213  } else {
214    fprintf(stderr, "No changes\n");
215  }
216  OS->flush();
217}
218