1//===--- Format.cpp - Format C++ code -------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements functions declared in Format.h. This will be
12/// split into separate files as we go.
13///
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "format-formatter"
17
18#include "ContinuationIndenter.h"
19#include "TokenAnnotator.h"
20#include "UnwrappedLineParser.h"
21#include "WhitespaceManager.h"
22#include "clang/Basic/Diagnostic.h"
23#include "clang/Basic/SourceManager.h"
24#include "clang/Format/Format.h"
25#include "clang/Lex/Lexer.h"
26#include "llvm/ADT/STLExtras.h"
27#include "llvm/Support/Allocator.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/YAMLTraits.h"
30#include "llvm/Support/Path.h"
31#include <queue>
32#include <string>
33
34namespace llvm {
35namespace yaml {
36template <>
37struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> {
38  static void enumeration(IO &IO,
39                          clang::format::FormatStyle::LanguageStandard &Value) {
40    IO.enumCase(Value, "Cpp03", clang::format::FormatStyle::LS_Cpp03);
41    IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03);
42    IO.enumCase(Value, "Cpp11", clang::format::FormatStyle::LS_Cpp11);
43    IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11);
44    IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto);
45  }
46};
47
48template <>
49struct ScalarEnumerationTraits<clang::format::FormatStyle::UseTabStyle> {
50  static void enumeration(IO &IO,
51                          clang::format::FormatStyle::UseTabStyle &Value) {
52    IO.enumCase(Value, "Never", clang::format::FormatStyle::UT_Never);
53    IO.enumCase(Value, "false", clang::format::FormatStyle::UT_Never);
54    IO.enumCase(Value, "Always", clang::format::FormatStyle::UT_Always);
55    IO.enumCase(Value, "true", clang::format::FormatStyle::UT_Always);
56    IO.enumCase(Value, "ForIndentation",
57                clang::format::FormatStyle::UT_ForIndentation);
58  }
59};
60
61template <>
62struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> {
63  static void
64  enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) {
65    IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach);
66    IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux);
67    IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup);
68    IO.enumCase(Value, "Allman", clang::format::FormatStyle::BS_Allman);
69  }
70};
71
72template <>
73struct ScalarEnumerationTraits<
74    clang::format::FormatStyle::NamespaceIndentationKind> {
75  static void
76  enumeration(IO &IO,
77              clang::format::FormatStyle::NamespaceIndentationKind &Value) {
78    IO.enumCase(Value, "None", clang::format::FormatStyle::NI_None);
79    IO.enumCase(Value, "Inner", clang::format::FormatStyle::NI_Inner);
80    IO.enumCase(Value, "All", clang::format::FormatStyle::NI_All);
81  }
82};
83
84template <> struct MappingTraits<clang::format::FormatStyle> {
85  static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) {
86    if (IO.outputting()) {
87      StringRef StylesArray[] = { "LLVM",    "Google", "Chromium",
88                                  "Mozilla", "WebKit" };
89      ArrayRef<StringRef> Styles(StylesArray);
90      for (size_t i = 0, e = Styles.size(); i < e; ++i) {
91        StringRef StyleName(Styles[i]);
92        clang::format::FormatStyle PredefinedStyle;
93        if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) &&
94            Style == PredefinedStyle) {
95          IO.mapOptional("# BasedOnStyle", StyleName);
96          break;
97        }
98      }
99    } else {
100      StringRef BasedOnStyle;
101      IO.mapOptional("BasedOnStyle", BasedOnStyle);
102      if (!BasedOnStyle.empty())
103        if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) {
104          IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle));
105          return;
106        }
107    }
108
109    IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset);
110    IO.mapOptional("ConstructorInitializerIndentWidth",
111                   Style.ConstructorInitializerIndentWidth);
112    IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
113    IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
114    IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine",
115                   Style.AllowAllParametersOfDeclarationOnNextLine);
116    IO.mapOptional("AllowShortIfStatementsOnASingleLine",
117                   Style.AllowShortIfStatementsOnASingleLine);
118    IO.mapOptional("AllowShortLoopsOnASingleLine",
119                   Style.AllowShortLoopsOnASingleLine);
120    IO.mapOptional("AlwaysBreakTemplateDeclarations",
121                   Style.AlwaysBreakTemplateDeclarations);
122    IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
123                   Style.AlwaysBreakBeforeMultilineStrings);
124    IO.mapOptional("BreakBeforeBinaryOperators",
125                   Style.BreakBeforeBinaryOperators);
126    IO.mapOptional("BreakBeforeTernaryOperators",
127                   Style.BreakBeforeTernaryOperators);
128    IO.mapOptional("BreakConstructorInitializersBeforeComma",
129                   Style.BreakConstructorInitializersBeforeComma);
130    IO.mapOptional("BinPackParameters", Style.BinPackParameters);
131    IO.mapOptional("ColumnLimit", Style.ColumnLimit);
132    IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine",
133                   Style.ConstructorInitializerAllOnOneLineOrOnePerLine);
134    IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding);
135    IO.mapOptional("ExperimentalAutoDetectBinPacking",
136                   Style.ExperimentalAutoDetectBinPacking);
137    IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
138    IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep);
139    IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation);
140    IO.mapOptional("ObjCSpaceBeforeProtocolList",
141                   Style.ObjCSpaceBeforeProtocolList);
142    IO.mapOptional("PenaltyBreakBeforeFirstCallParameter",
143                   Style.PenaltyBreakBeforeFirstCallParameter);
144    IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment);
145    IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString);
146    IO.mapOptional("PenaltyBreakFirstLessLess",
147                   Style.PenaltyBreakFirstLessLess);
148    IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter);
149    IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
150                   Style.PenaltyReturnTypeOnItsOwnLine);
151    IO.mapOptional("PointerBindsToType", Style.PointerBindsToType);
152    IO.mapOptional("SpacesBeforeTrailingComments",
153                   Style.SpacesBeforeTrailingComments);
154    IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle);
155    IO.mapOptional("Standard", Style.Standard);
156    IO.mapOptional("IndentWidth", Style.IndentWidth);
157    IO.mapOptional("TabWidth", Style.TabWidth);
158    IO.mapOptional("UseTab", Style.UseTab);
159    IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
160    IO.mapOptional("IndentFunctionDeclarationAfterType",
161                   Style.IndentFunctionDeclarationAfterType);
162    IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses);
163    IO.mapOptional("SpacesInAngles", Style.SpacesInAngles);
164    IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses);
165    IO.mapOptional("SpacesInCStyleCastParentheses",
166                   Style.SpacesInCStyleCastParentheses);
167    IO.mapOptional("SpaceAfterControlStatementKeyword",
168                   Style.SpaceAfterControlStatementKeyword);
169    IO.mapOptional("SpaceBeforeAssignmentOperators",
170                   Style.SpaceBeforeAssignmentOperators);
171    IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth);
172  }
173};
174}
175}
176
177namespace clang {
178namespace format {
179
180void setDefaultPenalties(FormatStyle &Style) {
181  Style.PenaltyBreakComment = 60;
182  Style.PenaltyBreakFirstLessLess = 120;
183  Style.PenaltyBreakString = 1000;
184  Style.PenaltyExcessCharacter = 1000000;
185}
186
187FormatStyle getLLVMStyle() {
188  FormatStyle LLVMStyle;
189  LLVMStyle.AccessModifierOffset = -2;
190  LLVMStyle.AlignEscapedNewlinesLeft = false;
191  LLVMStyle.AlignTrailingComments = true;
192  LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
193  LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
194  LLVMStyle.AllowShortLoopsOnASingleLine = false;
195  LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
196  LLVMStyle.AlwaysBreakTemplateDeclarations = false;
197  LLVMStyle.BinPackParameters = true;
198  LLVMStyle.BreakBeforeBinaryOperators = false;
199  LLVMStyle.BreakBeforeTernaryOperators = true;
200  LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
201  LLVMStyle.BreakConstructorInitializersBeforeComma = false;
202  LLVMStyle.ColumnLimit = 80;
203  LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
204  LLVMStyle.ConstructorInitializerIndentWidth = 4;
205  LLVMStyle.Cpp11BracedListStyle = false;
206  LLVMStyle.DerivePointerBinding = false;
207  LLVMStyle.ExperimentalAutoDetectBinPacking = false;
208  LLVMStyle.IndentCaseLabels = false;
209  LLVMStyle.IndentFunctionDeclarationAfterType = false;
210  LLVMStyle.IndentWidth = 2;
211  LLVMStyle.TabWidth = 8;
212  LLVMStyle.MaxEmptyLinesToKeep = 1;
213  LLVMStyle.NamespaceIndentation = FormatStyle::NI_None;
214  LLVMStyle.ObjCSpaceBeforeProtocolList = true;
215  LLVMStyle.PointerBindsToType = false;
216  LLVMStyle.SpacesBeforeTrailingComments = 1;
217  LLVMStyle.Standard = FormatStyle::LS_Cpp03;
218  LLVMStyle.UseTab = FormatStyle::UT_Never;
219  LLVMStyle.SpacesInParentheses = false;
220  LLVMStyle.SpaceInEmptyParentheses = false;
221  LLVMStyle.SpacesInCStyleCastParentheses = false;
222  LLVMStyle.SpaceAfterControlStatementKeyword = true;
223  LLVMStyle.SpaceBeforeAssignmentOperators = true;
224  LLVMStyle.ContinuationIndentWidth = 4;
225  LLVMStyle.SpacesInAngles = false;
226
227  setDefaultPenalties(LLVMStyle);
228  LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60;
229  LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
230
231  return LLVMStyle;
232}
233
234FormatStyle getGoogleStyle() {
235  FormatStyle GoogleStyle;
236  GoogleStyle.AccessModifierOffset = -1;
237  GoogleStyle.AlignEscapedNewlinesLeft = true;
238  GoogleStyle.AlignTrailingComments = true;
239  GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
240  GoogleStyle.AllowShortIfStatementsOnASingleLine = true;
241  GoogleStyle.AllowShortLoopsOnASingleLine = true;
242  GoogleStyle.AlwaysBreakBeforeMultilineStrings = true;
243  GoogleStyle.AlwaysBreakTemplateDeclarations = true;
244  GoogleStyle.BinPackParameters = true;
245  GoogleStyle.BreakBeforeBinaryOperators = false;
246  GoogleStyle.BreakBeforeTernaryOperators = true;
247  GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
248  GoogleStyle.BreakConstructorInitializersBeforeComma = false;
249  GoogleStyle.ColumnLimit = 80;
250  GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
251  GoogleStyle.ConstructorInitializerIndentWidth = 4;
252  GoogleStyle.Cpp11BracedListStyle = true;
253  GoogleStyle.DerivePointerBinding = true;
254  GoogleStyle.ExperimentalAutoDetectBinPacking = false;
255  GoogleStyle.IndentCaseLabels = true;
256  GoogleStyle.IndentFunctionDeclarationAfterType = true;
257  GoogleStyle.IndentWidth = 2;
258  GoogleStyle.TabWidth = 8;
259  GoogleStyle.MaxEmptyLinesToKeep = 1;
260  GoogleStyle.NamespaceIndentation = FormatStyle::NI_None;
261  GoogleStyle.ObjCSpaceBeforeProtocolList = false;
262  GoogleStyle.PointerBindsToType = true;
263  GoogleStyle.SpacesBeforeTrailingComments = 2;
264  GoogleStyle.Standard = FormatStyle::LS_Auto;
265  GoogleStyle.UseTab = FormatStyle::UT_Never;
266  GoogleStyle.SpacesInParentheses = false;
267  GoogleStyle.SpaceInEmptyParentheses = false;
268  GoogleStyle.SpacesInCStyleCastParentheses = false;
269  GoogleStyle.SpaceAfterControlStatementKeyword = true;
270  GoogleStyle.SpaceBeforeAssignmentOperators = true;
271  GoogleStyle.ContinuationIndentWidth = 4;
272  GoogleStyle.SpacesInAngles = false;
273
274  setDefaultPenalties(GoogleStyle);
275  GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200;
276  GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
277
278  return GoogleStyle;
279}
280
281FormatStyle getChromiumStyle() {
282  FormatStyle ChromiumStyle = getGoogleStyle();
283  ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
284  ChromiumStyle.AllowShortIfStatementsOnASingleLine = false;
285  ChromiumStyle.AllowShortLoopsOnASingleLine = false;
286  ChromiumStyle.BinPackParameters = false;
287  ChromiumStyle.DerivePointerBinding = false;
288  ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
289  return ChromiumStyle;
290}
291
292FormatStyle getMozillaStyle() {
293  FormatStyle MozillaStyle = getLLVMStyle();
294  MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
295  MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
296  MozillaStyle.DerivePointerBinding = true;
297  MozillaStyle.IndentCaseLabels = true;
298  MozillaStyle.ObjCSpaceBeforeProtocolList = false;
299  MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200;
300  MozillaStyle.PointerBindsToType = true;
301  return MozillaStyle;
302}
303
304FormatStyle getWebKitStyle() {
305  FormatStyle Style = getLLVMStyle();
306  Style.AccessModifierOffset = -4;
307  Style.AlignTrailingComments = false;
308  Style.BreakBeforeBinaryOperators = true;
309  Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
310  Style.BreakConstructorInitializersBeforeComma = true;
311  Style.ColumnLimit = 0;
312  Style.IndentWidth = 4;
313  Style.NamespaceIndentation = FormatStyle::NI_Inner;
314  Style.PointerBindsToType = true;
315  return Style;
316}
317
318bool getPredefinedStyle(StringRef Name, FormatStyle *Style) {
319  if (Name.equals_lower("llvm"))
320    *Style = getLLVMStyle();
321  else if (Name.equals_lower("chromium"))
322    *Style = getChromiumStyle();
323  else if (Name.equals_lower("mozilla"))
324    *Style = getMozillaStyle();
325  else if (Name.equals_lower("google"))
326    *Style = getGoogleStyle();
327  else if (Name.equals_lower("webkit"))
328    *Style = getWebKitStyle();
329  else
330    return false;
331
332  return true;
333}
334
335llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) {
336  if (Text.trim().empty())
337    return llvm::make_error_code(llvm::errc::invalid_argument);
338  llvm::yaml::Input Input(Text);
339  Input >> *Style;
340  return Input.error();
341}
342
343std::string configurationAsText(const FormatStyle &Style) {
344  std::string Text;
345  llvm::raw_string_ostream Stream(Text);
346  llvm::yaml::Output Output(Stream);
347  // We use the same mapping method for input and output, so we need a non-const
348  // reference here.
349  FormatStyle NonConstStyle = Style;
350  Output << NonConstStyle;
351  return Stream.str();
352}
353
354namespace {
355
356class NoColumnLimitFormatter {
357public:
358  NoColumnLimitFormatter(ContinuationIndenter *Indenter) : Indenter(Indenter) {}
359
360  /// \brief Formats the line starting at \p State, simply keeping all of the
361  /// input's line breaking decisions.
362  void format(unsigned FirstIndent, const AnnotatedLine *Line) {
363    LineState State =
364        Indenter->getInitialState(FirstIndent, Line, /*DryRun=*/false);
365    while (State.NextToken != NULL) {
366      bool Newline =
367          Indenter->mustBreak(State) ||
368          (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
369      Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
370    }
371  }
372
373private:
374  ContinuationIndenter *Indenter;
375};
376
377class LineJoiner {
378public:
379  LineJoiner(const FormatStyle &Style) : Style(Style) {}
380
381  /// \brief Calculates how many lines can be merged into 1 starting at \p I.
382  unsigned
383  tryFitMultipleLinesInOne(unsigned Indent,
384                           SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
385                           SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
386    // We can never merge stuff if there are trailing line comments.
387    AnnotatedLine *TheLine = *I;
388    if (TheLine->Last->Type == TT_LineComment)
389      return 0;
390
391    if (Indent > Style.ColumnLimit)
392      return 0;
393
394    unsigned Limit =
395        Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
396    // If we already exceed the column limit, we set 'Limit' to 0. The different
397    // tryMerge..() functions can then decide whether to still do merging.
398    Limit = TheLine->Last->TotalLength > Limit
399                ? 0
400                : Limit - TheLine->Last->TotalLength;
401
402    if (I + 1 == E || I[1]->Type == LT_Invalid)
403      return 0;
404
405    if (TheLine->Last->is(tok::l_brace)) {
406      return tryMergeSimpleBlock(I, E, Limit);
407    } else if (Style.AllowShortIfStatementsOnASingleLine &&
408               TheLine->First->is(tok::kw_if)) {
409      return tryMergeSimpleControlStatement(I, E, Limit);
410    } else if (Style.AllowShortLoopsOnASingleLine &&
411               TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
412      return tryMergeSimpleControlStatement(I, E, Limit);
413    } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline ||
414                                          TheLine->First->IsFirst)) {
415      return tryMergeSimplePPDirective(I, E, Limit);
416    }
417    return 0;
418  }
419
420private:
421  unsigned
422  tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
423                            SmallVectorImpl<AnnotatedLine *>::const_iterator E,
424                            unsigned Limit) {
425    if (Limit == 0)
426      return 0;
427    if (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)
428      return 0;
429    if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
430      return 0;
431    if (1 + I[1]->Last->TotalLength > Limit)
432      return 0;
433    return 1;
434  }
435
436  unsigned tryMergeSimpleControlStatement(
437      SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
438      SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
439    if (Limit == 0)
440      return 0;
441    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
442        I[1]->First->is(tok::l_brace))
443      return 0;
444    if (I[1]->InPPDirective != (*I)->InPPDirective ||
445        (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
446      return 0;
447    AnnotatedLine &Line = **I;
448    if (Line.Last->isNot(tok::r_paren))
449      return 0;
450    if (1 + I[1]->Last->TotalLength > Limit)
451      return 0;
452    if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
453                                   tok::kw_while) ||
454        I[1]->First->Type == TT_LineComment)
455      return 0;
456    // Only inline simple if's (no nested if or else).
457    if (I + 2 != E && Line.First->is(tok::kw_if) &&
458        I[2]->First->is(tok::kw_else))
459      return 0;
460    return 1;
461  }
462
463  unsigned
464  tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator &I,
465                      SmallVectorImpl<AnnotatedLine *>::const_iterator E,
466                      unsigned Limit) {
467    // No merging if the brace already is on the next line.
468    if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
469      return 0;
470
471    // First, check that the current line allows merging. This is the case if
472    // we're not in a control flow statement and the last token is an opening
473    // brace.
474    AnnotatedLine &Line = **I;
475    if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
476                            tok::kw_else, tok::kw_try, tok::kw_catch,
477                            tok::kw_for,
478                            // This gets rid of all ObjC @ keywords and methods.
479                            tok::at, tok::minus, tok::plus))
480      return 0;
481
482    FormatToken *Tok = I[1]->First;
483    if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
484        (Tok->getNextNonComment() == NULL ||
485         Tok->getNextNonComment()->is(tok::semi))) {
486      // We merge empty blocks even if the line exceeds the column limit.
487      Tok->SpacesRequiredBefore = 0;
488      Tok->CanBreakBefore = true;
489      return 1;
490    } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
491      // Check that we still have three lines and they fit into the limit.
492      if (I + 2 == E || I[2]->Type == LT_Invalid)
493        return 0;
494
495      if (!nextTwoLinesFitInto(I, Limit))
496        return 0;
497
498      // Second, check that the next line does not contain any braces - if it
499      // does, readability declines when putting it into a single line.
500      if (I[1]->Last->Type == TT_LineComment || Tok->MustBreakBefore)
501        return 0;
502      do {
503        if (Tok->isOneOf(tok::l_brace, tok::r_brace))
504          return 0;
505        Tok = Tok->Next;
506      } while (Tok != NULL);
507
508      // Last, check that the third line contains a single closing brace.
509      Tok = I[2]->First;
510      if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
511          Tok->MustBreakBefore)
512        return 0;
513
514      return 2;
515    }
516    return 0;
517  }
518
519  bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
520                           unsigned Limit) {
521    return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
522  }
523
524  const FormatStyle &Style;
525};
526
527class UnwrappedLineFormatter {
528public:
529  UnwrappedLineFormatter(SourceManager &SourceMgr,
530                         SmallVectorImpl<CharSourceRange> &Ranges,
531                         ContinuationIndenter *Indenter,
532                         WhitespaceManager *Whitespaces,
533                         const FormatStyle &Style)
534      : SourceMgr(SourceMgr), Ranges(Ranges), Indenter(Indenter),
535        Whitespaces(Whitespaces), Style(Style), Joiner(Style) {}
536
537  unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
538                  int AdditionalIndent = 0) {
539    assert(!Lines.empty());
540    unsigned Penalty = 0;
541    std::vector<int> IndentForLevel;
542    for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
543      IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
544    bool PreviousLineWasTouched = false;
545    const AnnotatedLine *PreviousLine = NULL;
546    bool FormatPPDirective = false;
547    for (SmallVectorImpl<AnnotatedLine *>::const_iterator I = Lines.begin(),
548                                                          E = Lines.end();
549         I != E; ++I) {
550      const AnnotatedLine &TheLine = **I;
551      const FormatToken *FirstTok = TheLine.First;
552      int Offset = getIndentOffset(*FirstTok);
553
554      // Check whether this line is part of a formatted preprocessor directive.
555      if (FirstTok->HasUnescapedNewline)
556        FormatPPDirective = false;
557      if (!FormatPPDirective && TheLine.InPPDirective &&
558          (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
559        FormatPPDirective = true;
560
561      // Determine indent and try to merge multiple unwrapped lines.
562      while (IndentForLevel.size() <= TheLine.Level)
563        IndentForLevel.push_back(-1);
564      IndentForLevel.resize(TheLine.Level + 1);
565      unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
566      if (static_cast<int>(Indent) + Offset >= 0)
567        Indent += Offset;
568      unsigned MergedLines = Joiner.tryFitMultipleLinesInOne(Indent, I, E);
569      if (!DryRun) {
570        for (unsigned i = 0; i < MergedLines; ++i) {
571          join(*I[i], *I[i + 1]);
572        }
573      }
574      I += MergedLines;
575
576      bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
577      if (TheLine.First->is(tok::eof)) {
578        if (PreviousLineWasTouched && !DryRun) {
579          unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
580          Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
581                                         /*IndentLevel=*/0, /*Spaces=*/0,
582                                         /*TargetColumn=*/0);
583        }
584      } else if (TheLine.Type != LT_Invalid &&
585                 (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
586        unsigned LevelIndent =
587            getIndent(IndentForLevel, TheLine.Level);
588        if (FirstTok->WhitespaceRange.isValid()) {
589          if (!DryRun)
590            formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
591                             Indent, TheLine.InPPDirective);
592        } else {
593          Indent = LevelIndent = FirstTok->OriginalColumn;
594        }
595
596        // If everything fits on a single line, just put it there.
597        unsigned ColumnLimit = Style.ColumnLimit;
598        if (I + 1 != E) {
599          AnnotatedLine *NextLine = I[1];
600          if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
601            ColumnLimit = getColumnLimit(TheLine.InPPDirective);
602        }
603
604        if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
605          LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
606          while (State.NextToken != NULL)
607            Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
608        } else if (Style.ColumnLimit == 0) {
609          NoColumnLimitFormatter Formatter(Indenter);
610          if (!DryRun)
611            Formatter.format(Indent, &TheLine);
612        } else {
613          Penalty += format(TheLine, Indent, DryRun);
614        }
615
616        IndentForLevel[TheLine.Level] = LevelIndent;
617        PreviousLineWasTouched = true;
618      } else {
619        // Format the first token if necessary, and notify the WhitespaceManager
620        // about the unchanged whitespace.
621        for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
622          if (Tok == TheLine.First &&
623              (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
624            unsigned LevelIndent = Tok->OriginalColumn;
625            if (!DryRun) {
626              // Remove trailing whitespace of the previous line if it was
627              // touched.
628              if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
629                formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
630                                 TheLine.InPPDirective);
631              } else {
632                Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
633              }
634            }
635
636            if (static_cast<int>(LevelIndent) - Offset >= 0)
637              LevelIndent -= Offset;
638            if (Tok->isNot(tok::comment))
639              IndentForLevel[TheLine.Level] = LevelIndent;
640          } else if (!DryRun) {
641            Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
642          }
643        }
644        // If we did not reformat this unwrapped line, the column at the end of
645        // the last token is unchanged - thus, we can calculate the end of the
646        // last token.
647        PreviousLineWasTouched = false;
648      }
649      if (!DryRun) {
650        for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
651          Tok->Finalized = true;
652        }
653      }
654      PreviousLine = *I;
655    }
656    return Penalty;
657  }
658
659private:
660  /// \brief Formats an \c AnnotatedLine and returns the penalty.
661  ///
662  /// If \p DryRun is \c false, directly applies the changes.
663  unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
664                  bool DryRun) {
665    LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
666
667    // If the ObjC method declaration does not fit on a line, we should format
668    // it with one arg per line.
669    if (State.Line->Type == LT_ObjCMethodDecl)
670      State.Stack.back().BreakBeforeParameter = true;
671
672    // Find best solution in solution space.
673    return analyzeSolutionSpace(State, DryRun);
674  }
675
676  /// \brief An edge in the solution space from \c Previous->State to \c State,
677  /// inserting a newline dependent on the \c NewLine.
678  struct StateNode {
679    StateNode(const LineState &State, bool NewLine, StateNode *Previous)
680        : State(State), NewLine(NewLine), Previous(Previous) {}
681    LineState State;
682    bool NewLine;
683    StateNode *Previous;
684  };
685
686  /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
687  ///
688  /// In case of equal penalties, we want to prefer states that were inserted
689  /// first. During state generation we make sure that we insert states first
690  /// that break the line as late as possible.
691  typedef std::pair<unsigned, unsigned> OrderedPenalty;
692
693  /// \brief An item in the prioritized BFS search queue. The \c StateNode's
694  /// \c State has the given \c OrderedPenalty.
695  typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
696
697  /// \brief The BFS queue type.
698  typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
699                              std::greater<QueueItem> > QueueType;
700
701  /// \brief Get the offset of the line relatively to the level.
702  ///
703  /// For example, 'public:' labels in classes are offset by 1 or 2
704  /// characters to the left from their level.
705  int getIndentOffset(const FormatToken &RootToken) {
706    if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
707      return Style.AccessModifierOffset;
708    return 0;
709  }
710
711  /// \brief Add a new line and the required indent before the first Token
712  /// of the \c UnwrappedLine if there was no structural parsing error.
713  void formatFirstToken(FormatToken &RootToken,
714                        const AnnotatedLine *PreviousLine, unsigned IndentLevel,
715                        unsigned Indent, bool InPPDirective) {
716    unsigned Newlines =
717        std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
718    // Remove empty lines before "}" where applicable.
719    if (RootToken.is(tok::r_brace) &&
720        (!RootToken.Next ||
721         (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
722      Newlines = std::min(Newlines, 1u);
723    if (Newlines == 0 && !RootToken.IsFirst)
724      Newlines = 1;
725
726    // Insert extra new line before access specifiers.
727    if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
728        RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
729      ++Newlines;
730
731    // Remove empty lines after access specifiers.
732    if (PreviousLine && PreviousLine->First->isAccessSpecifier())
733      Newlines = std::min(1u, Newlines);
734
735    Whitespaces->replaceWhitespace(
736        RootToken, Newlines, IndentLevel, Indent, Indent,
737        InPPDirective && !RootToken.HasUnescapedNewline);
738  }
739
740  /// \brief Get the indent of \p Level from \p IndentForLevel.
741  ///
742  /// \p IndentForLevel must contain the indent for the level \c l
743  /// at \p IndentForLevel[l], or a value < 0 if the indent for
744  /// that level is unknown.
745  unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
746    if (IndentForLevel[Level] != -1)
747      return IndentForLevel[Level];
748    if (Level == 0)
749      return 0;
750    return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
751  }
752
753  void join(AnnotatedLine &A, const AnnotatedLine &B) {
754    assert(!A.Last->Next);
755    assert(!B.First->Previous);
756    A.Last->Next = B.First;
757    B.First->Previous = A.Last;
758    B.First->CanBreakBefore = true;
759    unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
760    for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
761      Tok->TotalLength += LengthA;
762      A.Last = Tok;
763    }
764  }
765
766  unsigned getColumnLimit(bool InPPDirective) const {
767    // In preprocessor directives reserve two chars for trailing " \"
768    return Style.ColumnLimit - (InPPDirective ? 2 : 0);
769  }
770
771  bool touchesRanges(const CharSourceRange &Range) {
772    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
773                                                          E = Ranges.end();
774         I != E; ++I) {
775      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
776          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
777        return true;
778    }
779    return false;
780  }
781
782  bool touchesLine(const AnnotatedLine &TheLine) {
783    const FormatToken *First = TheLine.First;
784    const FormatToken *Last = TheLine.Last;
785    CharSourceRange LineRange = CharSourceRange::getCharRange(
786        First->WhitespaceRange.getBegin().getLocWithOffset(
787            First->LastNewlineOffset),
788        Last->getStartOfNonWhitespace().getLocWithOffset(
789            Last->TokenText.size() - 1));
790    return touchesRanges(LineRange);
791  }
792
793  bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
794                          SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
795    for (; I != E; ++I) {
796      if ((*I)->First->HasUnescapedNewline)
797        return false;
798      if (touchesLine(**I))
799        return true;
800    }
801    return false;
802  }
803
804  bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
805    const FormatToken *First = TheLine.First;
806    CharSourceRange LineRange = CharSourceRange::getCharRange(
807        First->WhitespaceRange.getBegin(),
808        First->WhitespaceRange.getBegin().getLocWithOffset(
809            First->LastNewlineOffset));
810    return touchesRanges(LineRange);
811  }
812
813  /// \brief Analyze the entire solution space starting from \p InitialState.
814  ///
815  /// This implements a variant of Dijkstra's algorithm on the graph that spans
816  /// the solution space (\c LineStates are the nodes). The algorithm tries to
817  /// find the shortest path (the one with lowest penalty) from \p InitialState
818  /// to a state where all tokens are placed. Returns the penalty.
819  ///
820  /// If \p DryRun is \c false, directly applies the changes.
821  unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
822    std::set<LineState> Seen;
823
824    // Increasing count of \c StateNode items we have created. This is used to
825    // create a deterministic order independent of the container.
826    unsigned Count = 0;
827    QueueType Queue;
828
829    // Insert start element into queue.
830    StateNode *Node =
831        new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
832    Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
833    ++Count;
834
835    unsigned Penalty = 0;
836
837    // While not empty, take first element and follow edges.
838    while (!Queue.empty()) {
839      Penalty = Queue.top().first.first;
840      StateNode *Node = Queue.top().second;
841      if (Node->State.NextToken == NULL) {
842        DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
843        break;
844      }
845      Queue.pop();
846
847      // Cut off the analysis of certain solutions if the analysis gets too
848      // complex. See description of IgnoreStackForComparison.
849      if (Count > 10000)
850        Node->State.IgnoreStackForComparison = true;
851
852      if (!Seen.insert(Node->State).second)
853        // State already examined with lower penalty.
854        continue;
855
856      FormatDecision LastFormat = Node->State.NextToken->Decision;
857      if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
858        addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
859      if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
860        addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
861    }
862
863    if (Queue.empty()) {
864      // We were unable to find a solution, do nothing.
865      // FIXME: Add diagnostic?
866      DEBUG(llvm::dbgs() << "Could not find a solution.\n");
867      return 0;
868    }
869
870    // Reconstruct the solution.
871    if (!DryRun)
872      reconstructPath(InitialState, Queue.top().second);
873
874    DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
875    DEBUG(llvm::dbgs() << "---\n");
876
877    return Penalty;
878  }
879
880  void reconstructPath(LineState &State, StateNode *Current) {
881    std::deque<StateNode *> Path;
882    // We do not need a break before the initial token.
883    while (Current->Previous) {
884      Path.push_front(Current);
885      Current = Current->Previous;
886    }
887    for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
888         I != E; ++I) {
889      unsigned Penalty = 0;
890      formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
891      Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
892
893      DEBUG({
894        if ((*I)->NewLine) {
895          llvm::dbgs() << "Penalty for placing "
896                       << (*I)->Previous->State.NextToken->Tok.getName() << ": "
897                       << Penalty << "\n";
898        }
899      });
900    }
901  }
902
903  /// \brief Add the following state to the analysis queue \c Queue.
904  ///
905  /// Assume the current state is \p PreviousNode and has been reached with a
906  /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
907  void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
908                           bool NewLine, unsigned *Count, QueueType *Queue) {
909    if (NewLine && !Indenter->canBreak(PreviousNode->State))
910      return;
911    if (!NewLine && Indenter->mustBreak(PreviousNode->State))
912      return;
913
914    StateNode *Node = new (Allocator.Allocate())
915        StateNode(PreviousNode->State, NewLine, PreviousNode);
916    if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
917      return;
918
919    Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
920
921    Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
922    ++(*Count);
923  }
924
925  /// \brief If the \p State's next token is an r_brace closing a nested block,
926  /// format the nested block before it.
927  ///
928  /// Returns \c true if all children could be placed successfully and adapts
929  /// \p Penalty as well as \p State. If \p DryRun is false, also directly
930  /// creates changes using \c Whitespaces.
931  ///
932  /// The crucial idea here is that children always get formatted upon
933  /// encountering the closing brace right after the nested block. Now, if we
934  /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
935  /// \c false), the entire block has to be kept on the same line (which is only
936  /// possible if it fits on the line, only contains a single statement, etc.
937  ///
938  /// If \p NewLine is true, we format the nested block on separate lines, i.e.
939  /// break after the "{", format all lines with correct indentation and the put
940  /// the closing "}" on yet another new line.
941  ///
942  /// This enables us to keep the simple structure of the
943  /// \c UnwrappedLineFormatter, where we only have two options for each token:
944  /// break or don't break.
945  bool formatChildren(LineState &State, bool NewLine, bool DryRun,
946                      unsigned &Penalty) {
947    FormatToken &Previous = *State.NextToken->Previous;
948    const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
949    if (!LBrace || LBrace->isNot(tok::l_brace) ||
950        LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
951      // The previous token does not open a block. Nothing to do. We don't
952      // assert so that we can simply call this function for all tokens.
953      return true;
954
955    if (NewLine) {
956      int AdditionalIndent = State.Stack.back().Indent -
957                             Previous.Children[0]->Level * Style.IndentWidth;
958      Penalty += format(Previous.Children, DryRun, AdditionalIndent);
959      return true;
960    }
961
962    // Cannot merge multiple statements into a single line.
963    if (Previous.Children.size() > 1)
964      return false;
965
966    // We can't put the closing "}" on a line with a trailing comment.
967    if (Previous.Children[0]->Last->isTrailingComment())
968      return false;
969
970    if (!DryRun) {
971      Whitespaces->replaceWhitespace(
972          *Previous.Children[0]->First,
973          /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
974          /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
975    }
976    Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
977
978    State.Column += 1 + Previous.Children[0]->Last->TotalLength;
979    return true;
980  }
981
982  SourceManager &SourceMgr;
983  SmallVectorImpl<CharSourceRange> &Ranges;
984  ContinuationIndenter *Indenter;
985  WhitespaceManager *Whitespaces;
986  FormatStyle Style;
987  LineJoiner Joiner;
988
989  llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
990};
991
992class FormatTokenLexer {
993public:
994  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
995                   encoding::Encoding Encoding)
996      : FormatTok(NULL), IsFirstToken(true), GreaterStashed(false), Column(0),
997        TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
998        IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
999    Lex.SetKeepWhitespaceMode(true);
1000  }
1001
1002  ArrayRef<FormatToken *> lex() {
1003    assert(Tokens.empty());
1004    do {
1005      Tokens.push_back(getNextToken());
1006      maybeJoinPreviousTokens();
1007    } while (Tokens.back()->Tok.isNot(tok::eof));
1008    return Tokens;
1009  }
1010
1011  IdentifierTable &getIdentTable() { return IdentTable; }
1012
1013private:
1014  void maybeJoinPreviousTokens() {
1015    if (Tokens.size() < 4)
1016      return;
1017    FormatToken *Last = Tokens.back();
1018    if (!Last->is(tok::r_paren))
1019      return;
1020
1021    FormatToken *String = Tokens[Tokens.size() - 2];
1022    if (!String->is(tok::string_literal) || String->IsMultiline)
1023      return;
1024
1025    if (!Tokens[Tokens.size() - 3]->is(tok::l_paren))
1026      return;
1027
1028    FormatToken *Macro = Tokens[Tokens.size() - 4];
1029    if (Macro->TokenText != "_T")
1030      return;
1031
1032    const char *Start = Macro->TokenText.data();
1033    const char *End = Last->TokenText.data() + Last->TokenText.size();
1034    String->TokenText = StringRef(Start, End - Start);
1035    String->IsFirst = Macro->IsFirst;
1036    String->LastNewlineOffset = Macro->LastNewlineOffset;
1037    String->WhitespaceRange = Macro->WhitespaceRange;
1038    String->OriginalColumn = Macro->OriginalColumn;
1039    String->ColumnWidth = encoding::columnWidthWithTabs(
1040        String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding);
1041
1042    Tokens.pop_back();
1043    Tokens.pop_back();
1044    Tokens.pop_back();
1045    Tokens.back() = String;
1046  }
1047
1048  FormatToken *getNextToken() {
1049    if (GreaterStashed) {
1050      // Create a synthesized second '>' token.
1051      // FIXME: Increment Column and set OriginalColumn.
1052      Token Greater = FormatTok->Tok;
1053      FormatTok = new (Allocator.Allocate()) FormatToken;
1054      FormatTok->Tok = Greater;
1055      SourceLocation GreaterLocation =
1056          FormatTok->Tok.getLocation().getLocWithOffset(1);
1057      FormatTok->WhitespaceRange =
1058          SourceRange(GreaterLocation, GreaterLocation);
1059      FormatTok->TokenText = ">";
1060      FormatTok->ColumnWidth = 1;
1061      GreaterStashed = false;
1062      return FormatTok;
1063    }
1064
1065    FormatTok = new (Allocator.Allocate()) FormatToken;
1066    readRawToken(*FormatTok);
1067    SourceLocation WhitespaceStart =
1068        FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
1069    FormatTok->IsFirst = IsFirstToken;
1070    IsFirstToken = false;
1071
1072    // Consume and record whitespace until we find a significant token.
1073    unsigned WhitespaceLength = TrailingWhitespace;
1074    while (FormatTok->Tok.is(tok::unknown)) {
1075      for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
1076        switch (FormatTok->TokenText[i]) {
1077        case '\n':
1078          ++FormatTok->NewlinesBefore;
1079          // FIXME: This is technically incorrect, as it could also
1080          // be a literal backslash at the end of the line.
1081          if (i == 0 || (FormatTok->TokenText[i - 1] != '\\' &&
1082                         (FormatTok->TokenText[i - 1] != '\r' || i == 1 ||
1083                          FormatTok->TokenText[i - 2] != '\\')))
1084            FormatTok->HasUnescapedNewline = true;
1085          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
1086          Column = 0;
1087          break;
1088        case '\r':
1089        case '\f':
1090        case '\v':
1091          Column = 0;
1092          break;
1093        case ' ':
1094          ++Column;
1095          break;
1096        case '\t':
1097          Column += Style.TabWidth - Column % Style.TabWidth;
1098          break;
1099        case '\\':
1100          ++Column;
1101          if (i + 1 == e || (FormatTok->TokenText[i + 1] != '\r' &&
1102                             FormatTok->TokenText[i + 1] != '\n'))
1103            FormatTok->Type = TT_ImplicitStringLiteral;
1104          break;
1105        default:
1106          FormatTok->Type = TT_ImplicitStringLiteral;
1107          ++Column;
1108          break;
1109        }
1110      }
1111
1112      if (FormatTok->Type == TT_ImplicitStringLiteral)
1113        break;
1114      WhitespaceLength += FormatTok->Tok.getLength();
1115
1116      readRawToken(*FormatTok);
1117    }
1118
1119    // In case the token starts with escaped newlines, we want to
1120    // take them into account as whitespace - this pattern is quite frequent
1121    // in macro definitions.
1122    // FIXME: Add a more explicit test.
1123    while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
1124           FormatTok->TokenText[1] == '\n') {
1125      // FIXME: ++FormatTok->NewlinesBefore is missing...
1126      WhitespaceLength += 2;
1127      Column = 0;
1128      FormatTok->TokenText = FormatTok->TokenText.substr(2);
1129    }
1130
1131    FormatTok->WhitespaceRange = SourceRange(
1132        WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
1133
1134    FormatTok->OriginalColumn = Column;
1135
1136    TrailingWhitespace = 0;
1137    if (FormatTok->Tok.is(tok::comment)) {
1138      // FIXME: Add the trimmed whitespace to Column.
1139      StringRef UntrimmedText = FormatTok->TokenText;
1140      FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f");
1141      TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
1142    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
1143      IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText);
1144      FormatTok->Tok.setIdentifierInfo(&Info);
1145      FormatTok->Tok.setKind(Info.getTokenID());
1146    } else if (FormatTok->Tok.is(tok::greatergreater)) {
1147      FormatTok->Tok.setKind(tok::greater);
1148      FormatTok->TokenText = FormatTok->TokenText.substr(0, 1);
1149      GreaterStashed = true;
1150    }
1151
1152    // Now FormatTok is the next non-whitespace token.
1153
1154    StringRef Text = FormatTok->TokenText;
1155    size_t FirstNewlinePos = Text.find('\n');
1156    if (FirstNewlinePos == StringRef::npos) {
1157      // FIXME: ColumnWidth actually depends on the start column, we need to
1158      // take this into account when the token is moved.
1159      FormatTok->ColumnWidth =
1160          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
1161      Column += FormatTok->ColumnWidth;
1162    } else {
1163      FormatTok->IsMultiline = true;
1164      // FIXME: ColumnWidth actually depends on the start column, we need to
1165      // take this into account when the token is moved.
1166      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
1167          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
1168
1169      // The last line of the token always starts in column 0.
1170      // Thus, the length can be precomputed even in the presence of tabs.
1171      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
1172          Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
1173          Encoding);
1174      Column = FormatTok->LastLineColumnWidth;
1175    }
1176
1177    return FormatTok;
1178  }
1179
1180  FormatToken *FormatTok;
1181  bool IsFirstToken;
1182  bool GreaterStashed;
1183  unsigned Column;
1184  unsigned TrailingWhitespace;
1185  Lexer &Lex;
1186  SourceManager &SourceMgr;
1187  FormatStyle &Style;
1188  IdentifierTable IdentTable;
1189  encoding::Encoding Encoding;
1190  llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
1191  SmallVector<FormatToken *, 16> Tokens;
1192
1193  void readRawToken(FormatToken &Tok) {
1194    Lex.LexFromRawLexer(Tok.Tok);
1195    Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
1196                              Tok.Tok.getLength());
1197    // For formatting, treat unterminated string literals like normal string
1198    // literals.
1199    if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
1200        Tok.TokenText[0] == '"') {
1201      Tok.Tok.setKind(tok::string_literal);
1202      Tok.IsUnterminatedLiteral = true;
1203    }
1204  }
1205};
1206
1207class Formatter : public UnwrappedLineConsumer {
1208public:
1209  Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1210            const std::vector<CharSourceRange> &Ranges)
1211      : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
1212        Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
1213        Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
1214        Encoding(encoding::detectEncoding(Lex.getBuffer())) {
1215    DEBUG(llvm::dbgs() << "File encoding: "
1216                       << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
1217                                                               : "unknown")
1218                       << "\n");
1219  }
1220
1221  tooling::Replacements format() {
1222    tooling::Replacements Result;
1223    FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
1224
1225    UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
1226    bool StructuralError = Parser.parse();
1227    assert(UnwrappedLines.rbegin()->empty());
1228    for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE;
1229         ++Run) {
1230      DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1231      SmallVector<AnnotatedLine *, 16> AnnotatedLines;
1232      for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
1233        AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
1234      }
1235      tooling::Replacements RunResult =
1236          format(AnnotatedLines, StructuralError, Tokens);
1237      DEBUG({
1238        llvm::dbgs() << "Replacements for run " << Run << ":\n";
1239        for (tooling::Replacements::iterator I = RunResult.begin(),
1240                                             E = RunResult.end();
1241             I != E; ++I) {
1242          llvm::dbgs() << I->toString() << "\n";
1243        }
1244      });
1245      for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1246        delete AnnotatedLines[i];
1247      }
1248      Result.insert(RunResult.begin(), RunResult.end());
1249      Whitespaces.reset();
1250    }
1251    return Result;
1252  }
1253
1254  tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
1255                               bool StructuralError, FormatTokenLexer &Tokens) {
1256    TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
1257    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1258      Annotator.annotate(*AnnotatedLines[i]);
1259    }
1260    deriveLocalStyle(AnnotatedLines);
1261    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1262      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
1263    }
1264
1265    Annotator.setCommentLineLevels(AnnotatedLines);
1266    ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
1267                                  BinPackInconclusiveFunctions);
1268    UnwrappedLineFormatter Formatter(SourceMgr, Ranges, &Indenter, &Whitespaces,
1269                                     Style);
1270    Formatter.format(AnnotatedLines, /*DryRun=*/false);
1271    return Whitespaces.generateReplacements();
1272  }
1273
1274private:
1275  static bool inputUsesCRLF(StringRef Text) {
1276    return Text.count('\r') * 2 > Text.count('\n');
1277  }
1278
1279  void
1280  deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
1281    unsigned CountBoundToVariable = 0;
1282    unsigned CountBoundToType = 0;
1283    bool HasCpp03IncompatibleFormat = false;
1284    bool HasBinPackedFunction = false;
1285    bool HasOnePerLineFunction = false;
1286    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
1287      if (!AnnotatedLines[i]->First->Next)
1288        continue;
1289      FormatToken *Tok = AnnotatedLines[i]->First->Next;
1290      while (Tok->Next) {
1291        if (Tok->Type == TT_PointerOrReference) {
1292          bool SpacesBefore =
1293              Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
1294          bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
1295                             Tok->Next->WhitespaceRange.getEnd();
1296          if (SpacesBefore && !SpacesAfter)
1297            ++CountBoundToVariable;
1298          else if (!SpacesBefore && SpacesAfter)
1299            ++CountBoundToType;
1300        }
1301
1302        if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
1303          if (Tok->is(tok::coloncolon) &&
1304              Tok->Previous->Type == TT_TemplateOpener)
1305            HasCpp03IncompatibleFormat = true;
1306          if (Tok->Type == TT_TemplateCloser &&
1307              Tok->Previous->Type == TT_TemplateCloser)
1308            HasCpp03IncompatibleFormat = true;
1309        }
1310
1311        if (Tok->PackingKind == PPK_BinPacked)
1312          HasBinPackedFunction = true;
1313        if (Tok->PackingKind == PPK_OnePerLine)
1314          HasOnePerLineFunction = true;
1315
1316        Tok = Tok->Next;
1317      }
1318    }
1319    if (Style.DerivePointerBinding) {
1320      if (CountBoundToType > CountBoundToVariable)
1321        Style.PointerBindsToType = true;
1322      else if (CountBoundToType < CountBoundToVariable)
1323        Style.PointerBindsToType = false;
1324    }
1325    if (Style.Standard == FormatStyle::LS_Auto) {
1326      Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
1327                                                  : FormatStyle::LS_Cpp03;
1328    }
1329    BinPackInconclusiveFunctions =
1330        HasBinPackedFunction || !HasOnePerLineFunction;
1331  }
1332
1333  virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1334    assert(!UnwrappedLines.empty());
1335    UnwrappedLines.back().push_back(TheLine);
1336  }
1337
1338  virtual void finishRun() {
1339    UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1340  }
1341
1342  FormatStyle Style;
1343  Lexer &Lex;
1344  SourceManager &SourceMgr;
1345  WhitespaceManager Whitespaces;
1346  SmallVector<CharSourceRange, 8> Ranges;
1347  SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
1348
1349  encoding::Encoding Encoding;
1350  bool BinPackInconclusiveFunctions;
1351};
1352
1353} // end anonymous namespace
1354
1355tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1356                               SourceManager &SourceMgr,
1357                               std::vector<CharSourceRange> Ranges) {
1358  Formatter formatter(Style, Lex, SourceMgr, Ranges);
1359  return formatter.format();
1360}
1361
1362tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
1363                               std::vector<tooling::Range> Ranges,
1364                               StringRef FileName) {
1365  FileManager Files((FileSystemOptions()));
1366  DiagnosticsEngine Diagnostics(
1367      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
1368      new DiagnosticOptions);
1369  SourceManager SourceMgr(Diagnostics, Files);
1370  llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName);
1371  const clang::FileEntry *Entry =
1372      Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
1373  SourceMgr.overrideFileContents(Entry, Buf);
1374  FileID ID =
1375      SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
1376  Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr,
1377            getFormattingLangOpts(Style.Standard));
1378  SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
1379  std::vector<CharSourceRange> CharRanges;
1380  for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1381    SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset());
1382    SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength());
1383    CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
1384  }
1385  return reformat(Style, Lex, SourceMgr, CharRanges);
1386}
1387
1388LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) {
1389  LangOptions LangOpts;
1390  LangOpts.CPlusPlus = 1;
1391  LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1;
1392  LangOpts.LineComment = 1;
1393  LangOpts.Bool = 1;
1394  LangOpts.ObjC1 = 1;
1395  LangOpts.ObjC2 = 1;
1396  return LangOpts;
1397}
1398
1399const char *StyleOptionHelpDescription =
1400    "Coding style, currently supports:\n"
1401    "  LLVM, Google, Chromium, Mozilla, WebKit.\n"
1402    "Use -style=file to load style configuration from\n"
1403    ".clang-format file located in one of the parent\n"
1404    "directories of the source file (or current\n"
1405    "directory for stdin).\n"
1406    "Use -style=\"{key: value, ...}\" to set specific\n"
1407    "parameters, e.g.:\n"
1408    "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
1409
1410FormatStyle getStyle(StringRef StyleName, StringRef FileName) {
1411  // Fallback style in case the rest of this function can't determine a style.
1412  StringRef FallbackStyle = "LLVM";
1413  FormatStyle Style;
1414  getPredefinedStyle(FallbackStyle, &Style);
1415
1416  if (StyleName.startswith("{")) {
1417    // Parse YAML/JSON style from the command line.
1418    if (llvm::error_code ec = parseConfiguration(StyleName, &Style)) {
1419      llvm::errs() << "Error parsing -style: " << ec.message() << ", using "
1420                   << FallbackStyle << " style\n";
1421    }
1422    return Style;
1423  }
1424
1425  if (!StyleName.equals_lower("file")) {
1426    if (!getPredefinedStyle(StyleName, &Style))
1427      llvm::errs() << "Invalid value for -style, using " << FallbackStyle
1428                   << " style\n";
1429    return Style;
1430  }
1431
1432  SmallString<128> Path(FileName);
1433  llvm::sys::fs::make_absolute(Path);
1434  for (StringRef Directory = Path; !Directory.empty();
1435       Directory = llvm::sys::path::parent_path(Directory)) {
1436    if (!llvm::sys::fs::is_directory(Directory))
1437      continue;
1438    SmallString<128> ConfigFile(Directory);
1439
1440    llvm::sys::path::append(ConfigFile, ".clang-format");
1441    DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1442    bool IsFile = false;
1443    // Ignore errors from is_regular_file: we only need to know if we can read
1444    // the file or not.
1445    llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1446
1447    if (!IsFile) {
1448      // Try _clang-format too, since dotfiles are not commonly used on Windows.
1449      ConfigFile = Directory;
1450      llvm::sys::path::append(ConfigFile, "_clang-format");
1451      DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n");
1452      llvm::sys::fs::is_regular_file(Twine(ConfigFile), IsFile);
1453    }
1454
1455    if (IsFile) {
1456      OwningPtr<llvm::MemoryBuffer> Text;
1457      if (llvm::error_code ec =
1458              llvm::MemoryBuffer::getFile(ConfigFile.c_str(), Text)) {
1459        llvm::errs() << ec.message() << "\n";
1460        continue;
1461      }
1462      if (llvm::error_code ec = parseConfiguration(Text->getBuffer(), &Style)) {
1463        llvm::errs() << "Error reading " << ConfigFile << ": " << ec.message()
1464                     << "\n";
1465        continue;
1466      }
1467      DEBUG(llvm::dbgs() << "Using configuration file " << ConfigFile << "\n");
1468      return Style;
1469    }
1470  }
1471  llvm::errs() << "Can't find usable .clang-format, using " << FallbackStyle
1472               << " style\n";
1473  return Style;
1474}
1475
1476} // namespace format
1477} // namespace clang
1478