AbstractParser.java revision 1523:c15ff29c1295
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.parser; 27 28import static jdk.nashorn.internal.parser.TokenType.COMMENT; 29import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 30import static jdk.nashorn.internal.parser.TokenType.EOF; 31import static jdk.nashorn.internal.parser.TokenType.EOL; 32import static jdk.nashorn.internal.parser.TokenType.IDENT; 33import java.util.HashMap; 34import java.util.Map; 35import jdk.nashorn.internal.ir.IdentNode; 36import jdk.nashorn.internal.ir.LiteralNode; 37import jdk.nashorn.internal.parser.Lexer.LexerToken; 38import jdk.nashorn.internal.parser.Lexer.RegexToken; 39import jdk.nashorn.internal.runtime.ECMAErrors; 40import jdk.nashorn.internal.runtime.ErrorManager; 41import jdk.nashorn.internal.runtime.JSErrorType; 42import jdk.nashorn.internal.runtime.ParserException; 43import jdk.nashorn.internal.runtime.Source; 44import jdk.nashorn.internal.runtime.regexp.RegExpFactory; 45 46/** 47 * Base class for parsers. 48 */ 49public abstract class AbstractParser { 50 /** Source to parse. */ 51 protected final Source source; 52 53 /** Error manager to report errors. */ 54 protected final ErrorManager errors; 55 56 /** Stream of lex tokens to parse. */ 57 protected TokenStream stream; 58 59 /** Index of current token. */ 60 protected int k; 61 62 /** Previous token - accessible to sub classes */ 63 protected long previousToken; 64 65 /** Descriptor of current token. */ 66 protected long token; 67 68 /** Type of current token. */ 69 protected TokenType type; 70 71 /** Type of last token. */ 72 protected TokenType last; 73 74 /** Start position of current token. */ 75 protected int start; 76 77 /** Finish position of previous token. */ 78 protected int finish; 79 80 /** Current line number. */ 81 protected int line; 82 83 /** Position of last EOL + 1. */ 84 protected int linePosition; 85 86 /** Lexer used to scan source content. */ 87 protected Lexer lexer; 88 89 /** Is this parser running under strict mode? */ 90 protected boolean isStrictMode; 91 92 /** What should line numbers be counted from? */ 93 protected final int lineOffset; 94 95 private final Map<String, String> canonicalNames = new HashMap<>(); 96 97 /** 98 * Construct a parser. 99 * 100 * @param source Source to parse. 101 * @param errors Error reporting manager. 102 * @param strict True if we are in strict mode 103 * @param lineOffset Offset from which lines should be counted 104 */ 105 protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) { 106 if (source.getLength() > Token.LENGTH_MASK) { 107 throw new RuntimeException("Source exceeds size limit of " + Token.LENGTH_MASK + " bytes"); 108 } 109 this.source = source; 110 this.errors = errors; 111 this.k = -1; 112 this.token = Token.toDesc(EOL, 0, 1); 113 this.type = EOL; 114 this.last = EOL; 115 this.isStrictMode = strict; 116 this.lineOffset = lineOffset; 117 } 118 119 /** 120 * Get the ith token. 121 * 122 * @param i Index of token. 123 * 124 * @return the token 125 */ 126 protected final long getToken(final int i) { 127 // Make sure there are enough tokens available. 128 while (i > stream.last()) { 129 // If we need to buffer more for lookahead. 130 if (stream.isFull()) { 131 stream.grow(); 132 } 133 134 // Get more tokens. 135 lexer.lexify(); 136 } 137 138 return stream.get(i); 139 } 140 141 /** 142 * Return the tokenType of the ith token. 143 * 144 * @param i Index of token 145 * 146 * @return the token type 147 */ 148 protected final TokenType T(final int i) { 149 // Get token descriptor and extract tokenType. 150 return Token.descType(getToken(i)); 151 } 152 153 /** 154 * Seek next token that is not an EOL or comment. 155 * 156 * @return tokenType of next token. 157 */ 158 protected final TokenType next() { 159 do { 160 nextOrEOL(); 161 } while (type == EOL || type == COMMENT); 162 163 return type; 164 } 165 166 /** 167 * Seek next token or EOL (skipping comments.) 168 * 169 * @return tokenType of next token. 170 */ 171 protected final TokenType nextOrEOL() { 172 do { 173 nextToken(); 174 if (type == DIRECTIVE_COMMENT) { 175 checkDirectiveComment(); 176 } 177 } while (type == COMMENT || type == DIRECTIVE_COMMENT); 178 179 return type; 180 } 181 182 // sourceURL= after directive comment 183 private static final String SOURCE_URL_PREFIX = "sourceURL="; 184 185 // currently only @sourceURL=foo supported 186 private void checkDirectiveComment() { 187 // if already set, ignore this one 188 if (source.getExplicitURL() != null) { 189 return; 190 } 191 192 final String comment = (String) lexer.getValueOf(token, isStrictMode); 193 final int len = comment.length(); 194 // 4 characters for directive comment marker //@\s or //#\s 195 if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) { 196 source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length())); 197 } 198 } 199 200 /** 201 * Seek next token. 202 * 203 * @return tokenType of next token. 204 */ 205 private TokenType nextToken() { 206 // Capture last token tokenType. 207 last = type; 208 if (type != EOF) { 209 210 // Set up next token. 211 k++; 212 final long lastToken = token; 213 previousToken = token; 214 token = getToken(k); 215 type = Token.descType(token); 216 217 // do this before the start is changed below 218 if (last != EOL) { 219 finish = start + Token.descLength(lastToken); 220 } 221 222 if (type == EOL) { 223 line = Token.descLength(token); 224 linePosition = Token.descPosition(token); 225 } else { 226 start = Token.descPosition(token); 227 } 228 229 } 230 231 return type; 232 } 233 234 /** 235 * Get the message string for a message ID and arguments 236 * 237 * @param msgId The Message ID 238 * @param args The arguments 239 * 240 * @return The message string 241 */ 242 protected static String message(final String msgId, final String... args) { 243 return ECMAErrors.getMessage("parser.error." + msgId, args); 244 } 245 246 /** 247 * Report an error. 248 * 249 * @param message Error message. 250 * @param errorToken Offending token. 251 * @return ParserException upon failure. Caller should throw and not ignore 252 */ 253 protected final ParserException error(final String message, final long errorToken) { 254 return error(JSErrorType.SYNTAX_ERROR, message, errorToken); 255 } 256 257 /** 258 * Report an error. 259 * 260 * @param errorType The error type 261 * @param message Error message. 262 * @param errorToken Offending token. 263 * @return ParserException upon failure. Caller should throw and not ignore 264 */ 265 protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) { 266 final int position = Token.descPosition(errorToken); 267 final int lineNum = source.getLine(position); 268 final int columnNum = source.getColumn(position); 269 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken); 270 return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken); 271 } 272 273 /** 274 * Report an error. 275 * 276 * @param message Error message. 277 * @return ParserException upon failure. Caller should throw and not ignore 278 */ 279 protected final ParserException error(final String message) { 280 return error(JSErrorType.SYNTAX_ERROR, message); 281 } 282 283 /** 284 * Report an error. 285 * 286 * @param errorType The error type 287 * @param message Error message. 288 * @return ParserException upon failure. Caller should throw and not ignore 289 */ 290 protected final ParserException error(final JSErrorType errorType, final String message) { 291 // TODO - column needs to account for tabs. 292 final int position = Token.descPosition(token); 293 final int column = position - linePosition; 294 final String formatted = ErrorManager.format(message, source, line, column, token); 295 return new ParserException(errorType, formatted, source, line, column, token); 296 } 297 298 /** 299 * Report a warning to the error manager. 300 * 301 * @param errorType The error type of the warning 302 * @param message Warning message. 303 * @param errorToken error token 304 */ 305 protected final void warning(final JSErrorType errorType, final String message, final long errorToken) { 306 errors.warning(error(errorType, message, errorToken)); 307 } 308 309 /** 310 * Generate 'expected' message. 311 * 312 * @param expected Expected tokenType. 313 * 314 * @return the message string 315 */ 316 protected final String expectMessage(final TokenType expected) { 317 final String tokenString = Token.toString(source, token); 318 String msg; 319 320 if (expected == null) { 321 msg = AbstractParser.message("expected.stmt", tokenString); 322 } else { 323 final String expectedName = expected.getNameOrType(); 324 msg = AbstractParser.message("expected", expectedName, tokenString); 325 } 326 327 return msg; 328 } 329 330 /** 331 * Check current token and advance to the next token. 332 * 333 * @param expected Expected tokenType. 334 * 335 * @throws ParserException on unexpected token type 336 */ 337 protected final void expect(final TokenType expected) throws ParserException { 338 expectDontAdvance(expected); 339 next(); 340 } 341 342 /** 343 * Check current token, but don't advance to the next token. 344 * 345 * @param expected Expected tokenType. 346 * 347 * @throws ParserException on unexpected token type 348 */ 349 protected final void expectDontAdvance(final TokenType expected) throws ParserException { 350 if (type != expected) { 351 throw error(expectMessage(expected)); 352 } 353 } 354 355 /** 356 * Check next token, get its value and advance. 357 * 358 * @param expected Expected tokenType. 359 * @return The JavaScript value of the token 360 * @throws ParserException on unexpected token type 361 */ 362 protected final Object expectValue(final TokenType expected) throws ParserException { 363 if (type != expected) { 364 throw error(expectMessage(expected)); 365 } 366 367 final Object value = getValue(); 368 369 next(); 370 371 return value; 372 } 373 374 /** 375 * Get the value of the current token. 376 * 377 * @return JavaScript value of the token. 378 */ 379 protected final Object getValue() { 380 return getValue(token); 381 } 382 383 /** 384 * Get the value of a specific token 385 * 386 * @param valueToken the token 387 * 388 * @return JavaScript value of the token 389 */ 390 protected final Object getValue(final long valueToken) { 391 try { 392 return lexer.getValueOf(valueToken, isStrictMode); 393 } catch (final ParserException e) { 394 errors.error(e); 395 } 396 397 return null; 398 } 399 400 /** 401 * Certain future reserved words can be used as identifiers in 402 * non-strict mode. Check if the current token is one such. 403 * 404 * @return true if non strict mode identifier 405 */ 406 protected final boolean isNonStrictModeIdent() { 407 return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT; 408 } 409 410 /** 411 * Get ident. 412 * 413 * @return Ident node. 414 */ 415 protected final IdentNode getIdent() { 416 // Capture IDENT token. 417 long identToken = token; 418 419 if (isNonStrictModeIdent()) { 420 // Fake out identifier. 421 identToken = Token.recast(token, IDENT); 422 // Get IDENT. 423 final String ident = (String)getValue(identToken); 424 425 next(); 426 427 // Create IDENT node. 428 return createIdentNode(identToken, finish, ident).setIsFutureStrictName(); 429 } 430 431 // Get IDENT. 432 final String ident = (String)expectValue(IDENT); 433 if (ident == null) { 434 return null; 435 } 436 // Create IDENT node. 437 return createIdentNode(identToken, finish, ident); 438 } 439 440 /** 441 * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String) 442 * constructor} but making sure that the {@code name} is deduplicated within this parse job. 443 * @param identToken the token for the new {@code IdentNode} 444 * @param identFinish the finish for the new {@code IdentNode} 445 * @param name the name for the new {@code IdentNode}. It will be de-duplicated. 446 * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will 447 * be deduplicated. 448 */ 449 protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) { 450 final String existingName = canonicalNames.putIfAbsent(name, name); 451 final String canonicalName = existingName != null ? existingName : name; 452 return new IdentNode(identToken, identFinish, canonicalName); 453 } 454 455 /** 456 * Check if current token is in identifier name 457 * 458 * @return true if current token is an identifier name 459 */ 460 protected final boolean isIdentifierName() { 461 final TokenKind kind = type.getKind(); 462 if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) { 463 return true; 464 } 465 466 // only literals allowed are null, false and true 467 if (kind == TokenKind.LITERAL) { 468 switch (type) { 469 case FALSE: 470 case NULL: 471 case TRUE: 472 return true; 473 default: 474 return false; 475 } 476 } 477 478 // Fake out identifier. 479 final long identToken = Token.recast(token, IDENT); 480 // Get IDENT. 481 final String ident = (String)getValue(identToken); 482 return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0)); 483 } 484 485 /** 486 * Create an IdentNode from the current token 487 * 488 * @return an IdentNode representing the current token 489 */ 490 protected final IdentNode getIdentifierName() { 491 if (type == IDENT) { 492 return getIdent(); 493 } else if (isIdentifierName()) { 494 // Fake out identifier. 495 final long identToken = Token.recast(token, IDENT); 496 // Get IDENT. 497 final String ident = (String)getValue(identToken); 498 next(); 499 // Create IDENT node. 500 return createIdentNode(identToken, finish, ident); 501 } else { 502 expect(IDENT); 503 return null; 504 } 505 } 506 507 /** 508 * Create a LiteralNode from the current token 509 * 510 * @return LiteralNode representing the current token 511 * @throws ParserException if any literals fails to parse 512 */ 513 protected final LiteralNode<?> getLiteral() throws ParserException { 514 // Capture LITERAL token. 515 final long literalToken = token; 516 517 // Create literal node. 518 final Object value = getValue(); 519 // Advance to have a correct finish 520 next(); 521 522 LiteralNode<?> node = null; 523 524 if (value == null) { 525 node = LiteralNode.newInstance(literalToken, finish); 526 } else if (value instanceof Number) { 527 node = LiteralNode.newInstance(literalToken, finish, (Number)value); 528 } else if (value instanceof String) { 529 node = LiteralNode.newInstance(literalToken, finish, (String)value); 530 } else if (value instanceof LexerToken) { 531 if (value instanceof RegexToken) { 532 final RegexToken regex = (RegexToken)value; 533 try { 534 RegExpFactory.validate(regex.getExpression(), regex.getOptions()); 535 } catch (final ParserException e) { 536 throw error(e.getMessage()); 537 } 538 } 539 node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value); 540 } else { 541 assert false : "unknown type for LiteralNode: " + value.getClass(); 542 } 543 544 return node; 545 } 546} 547