AbstractParser.java revision 985:10c95d040380
1139826Simp/* 253541Sshin * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 353541Sshin * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 453541Sshin * 553541Sshin * This code is free software; you can redistribute it and/or modify it 653541Sshin * under the terms of the GNU General Public License version 2 only, as 753541Sshin * published by the Free Software Foundation. Oracle designates this 853541Sshin * particular file as subject to the "Classpath" exception as provided 953541Sshin * by Oracle in the LICENSE file that accompanied this code. 1053541Sshin * 1153541Sshin * This code is distributed in the hope that it will be useful, but WITHOUT 1253541Sshin * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1353541Sshin * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1453541Sshin * version 2 for more details (a copy is included in the LICENSE file that 1553541Sshin * accompanied this code). 1653541Sshin * 1753541Sshin * You should have received a copy of the GNU General Public License version 1853541Sshin * 2 along with this work; if not, write to the Free Software Foundation, 1953541Sshin * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 2053541Sshin * 2153541Sshin * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 2253541Sshin * or visit www.oracle.com if you need additional information or have any 2353541Sshin * questions. 2453541Sshin */ 2553541Sshin 2653541Sshinpackage jdk.nashorn.internal.parser; 2753541Sshin 2853541Sshinimport static jdk.nashorn.internal.parser.TokenType.COMMENT; 29174510Sobrienimport static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 3053541Sshinimport static jdk.nashorn.internal.parser.TokenType.EOF; 3153541Sshinimport static jdk.nashorn.internal.parser.TokenType.EOL; 32139826Simpimport static jdk.nashorn.internal.parser.TokenType.IDENT; 3353541Sshin 3453541Sshinimport java.util.HashMap; 3553541Sshinimport java.util.Map; 3653541Sshinimport jdk.nashorn.internal.ir.IdentNode; 3753541Sshinimport jdk.nashorn.internal.ir.LiteralNode; 3853541Sshinimport jdk.nashorn.internal.parser.Lexer.LexerToken; 3953541Sshinimport jdk.nashorn.internal.parser.Lexer.RegexToken; 4053541Sshinimport jdk.nashorn.internal.runtime.ECMAErrors; 4153541Sshinimport jdk.nashorn.internal.runtime.ErrorManager; 4253541Sshinimport jdk.nashorn.internal.runtime.JSErrorType; 4353541Sshinimport jdk.nashorn.internal.runtime.ParserException; 4453541Sshinimport jdk.nashorn.internal.runtime.Source; 4553541Sshinimport jdk.nashorn.internal.runtime.regexp.RegExpFactory; 4653541Sshin 4753541Sshin/** 4853541Sshin * Base class for parsers. 4953541Sshin */ 5053541Sshinpublic abstract class AbstractParser { 5153541Sshin /** Source to parse. */ 5253541Sshin protected final Source source; 5353541Sshin 5453541Sshin /** Error manager to report errors. */ 5553541Sshin protected final ErrorManager errors; 5653541Sshin 5753541Sshin /** Stream of lex tokens to parse. */ 5853541Sshin protected TokenStream stream; 5953541Sshin 6053541Sshin /** Index of current token. */ 61174510Sobrien protected int k; 6253541Sshin 6353541Sshin /** Previous token - accessible to sub classes */ 6453541Sshin protected long previousToken; 6553541Sshin 6653541Sshin /** Descriptor of current token. */ 6755205Speter protected long token; 6853541Sshin 6953541Sshin /** Type of current token. */ 7053541Sshin protected TokenType type; 7153541Sshin 72222748Srwatson /** Type of last token. */ 73222748Srwatson protected TokenType last; 74222748Srwatson 75241916Sdelphij /** Start position of current token. */ 76222748Srwatson protected int start; 77222748Srwatson 78222748Srwatson /** Finish position of previous token. */ 79241916Sdelphij protected int finish; 80241916Sdelphij 81222748Srwatson /** Current line number. */ 82241916Sdelphij protected int line; 83241916Sdelphij 84241916Sdelphij /** Position of last EOL + 1. */ 85241916Sdelphij protected int linePosition; 86241916Sdelphij 87241916Sdelphij /** Lexer used to scan source content. */ 88241916Sdelphij protected Lexer lexer; 89194777Sbz 9053541Sshin /** Is this parser running under strict mode? */ 91241916Sdelphij protected boolean isStrictMode; 92180427Sbz 93241916Sdelphij /** What should line numbers be counted from? */ 9453541Sshin protected final int lineOffset; 95241916Sdelphij 96222488Srwatson private final Map<String, String> canonicalNames = new HashMap<>(); 97241916Sdelphij 98222488Srwatson /** 99241916Sdelphij * Construct a parser. 100222488Srwatson * 101241916Sdelphij * @param source Source to parse. 102222691Srwatson * @param errors Error reporting manager. 103241916Sdelphij * @param strict True if we are in strict mode 104222691Srwatson * @param lineOffset Offset from which lines should be counted 105241916Sdelphij */ 106241916Sdelphij protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) { 107125776Sume this.source = source; 108241916Sdelphij this.errors = errors; 10998211Shsu this.k = -1; 110241916Sdelphij this.token = Token.toDesc(EOL, 0, 1); 111102218Struckman this.type = EOL; 112241916Sdelphij this.last = EOL; 113102218Struckman this.isStrictMode = strict; 114241916Sdelphij this.lineOffset = lineOffset; 115241916Sdelphij } 116241916Sdelphij 117241916Sdelphij /** 118241916Sdelphij * Get the ith token. 119241916Sdelphij * 120241916Sdelphij * @param i Index of token. 121241916Sdelphij * 12255205Speter * @return the token 12353541Sshin */ 12453541Sshin protected final long getToken(final int i) { 125 // Make sure there are enough tokens available. 126 while (i > stream.last()) { 127 // If we need to buffer more for lookahead. 128 if (stream.isFull()) { 129 stream.grow(); 130 } 131 132 // Get more tokens. 133 lexer.lexify(); 134 } 135 136 return stream.get(i); 137 } 138 139 /** 140 * Return the tokenType of the ith token. 141 * 142 * @param i Index of token 143 * 144 * @return the token type 145 */ 146 protected final TokenType T(final int i) { 147 // Get token descriptor and extract tokenType. 148 return Token.descType(getToken(i)); 149 } 150 151 /** 152 * Seek next token that is not an EOL or comment. 153 * 154 * @return tokenType of next token. 155 */ 156 protected final TokenType next() { 157 do { 158 nextOrEOL(); 159 } while (type == EOL || type == COMMENT); 160 161 return type; 162 } 163 164 /** 165 * Seek next token or EOL (skipping comments.) 166 * 167 * @return tokenType of next token. 168 */ 169 protected final TokenType nextOrEOL() { 170 do { 171 nextToken(); 172 if (type == DIRECTIVE_COMMENT) { 173 checkDirectiveComment(); 174 } 175 } while (type == COMMENT || type == DIRECTIVE_COMMENT); 176 177 return type; 178 } 179 180 // sourceURL= after directive comment 181 private static final String SOURCE_URL_PREFIX = "sourceURL="; 182 183 // currently only @sourceURL=foo supported 184 private void checkDirectiveComment() { 185 // if already set, ignore this one 186 if (source.getExplicitURL() != null) { 187 return; 188 } 189 190 final String comment = (String) lexer.getValueOf(token, isStrictMode); 191 final int len = comment.length(); 192 // 4 characters for directive comment marker //@\s or //#\s 193 if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) { 194 source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length())); 195 } 196 } 197 198 /** 199 * Seek next token. 200 * 201 * @return tokenType of next token. 202 */ 203 private TokenType nextToken() { 204 // Capture last token tokenType. 205 last = type; 206 if (type != EOF) { 207 208 // Set up next token. 209 k++; 210 final long lastToken = token; 211 previousToken = token; 212 token = getToken(k); 213 type = Token.descType(token); 214 215 // do this before the start is changed below 216 if (last != EOL) { 217 finish = start + Token.descLength(lastToken); 218 } 219 220 if (type == EOL) { 221 line = Token.descLength(token); 222 linePosition = Token.descPosition(token); 223 } else { 224 start = Token.descPosition(token); 225 } 226 227 } 228 229 return type; 230 } 231 232 /** 233 * Get the message string for a message ID and arguments 234 * 235 * @param msgId The Message ID 236 * @param args The arguments 237 * 238 * @return The message string 239 */ 240 protected static String message(final String msgId, final String... args) { 241 return ECMAErrors.getMessage("parser.error." + msgId, args); 242 } 243 244 /** 245 * Report an error. 246 * 247 * @param message Error message. 248 * @param errorToken Offending token. 249 * @return ParserException upon failure. Caller should throw and not ignore 250 */ 251 protected final ParserException error(final String message, final long errorToken) { 252 return error(JSErrorType.SYNTAX_ERROR, message, errorToken); 253 } 254 255 /** 256 * Report an error. 257 * 258 * @param errorType The error type 259 * @param message Error message. 260 * @param errorToken Offending token. 261 * @return ParserException upon failure. Caller should throw and not ignore 262 */ 263 protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) { 264 final int position = Token.descPosition(errorToken); 265 final int lineNum = source.getLine(position); 266 final int columnNum = source.getColumn(position); 267 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken); 268 return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken); 269 } 270 271 /** 272 * Report an error. 273 * 274 * @param message Error message. 275 * @return ParserException upon failure. Caller should throw and not ignore 276 */ 277 protected final ParserException error(final String message) { 278 return error(JSErrorType.SYNTAX_ERROR, message); 279 } 280 281 /** 282 * Report an error. 283 * 284 * @param errorType The error type 285 * @param message Error message. 286 * @return ParserException upon failure. Caller should throw and not ignore 287 */ 288 protected final ParserException error(final JSErrorType errorType, final String message) { 289 // TODO - column needs to account for tabs. 290 final int position = Token.descPosition(token); 291 final int column = position - linePosition; 292 final String formatted = ErrorManager.format(message, source, line, column, token); 293 return new ParserException(errorType, formatted, source, line, column, token); 294 } 295 296 /** 297 * Report a warning to the error manager. 298 * 299 * @param errorType The error type of the warning 300 * @param message Warning message. 301 * @param errorToken error token 302 */ 303 protected final void warning(final JSErrorType errorType, final String message, final long errorToken) { 304 errors.warning(error(errorType, message, errorToken)); 305 } 306 307 /** 308 * Generate 'expected' message. 309 * 310 * @param expected Expected tokenType. 311 * 312 * @return the message string 313 */ 314 protected final String expectMessage(final TokenType expected) { 315 final String tokenString = Token.toString(source, token); 316 String msg; 317 318 if (expected == null) { 319 msg = AbstractParser.message("expected.stmt", tokenString); 320 } else { 321 final String expectedName = expected.getNameOrType(); 322 msg = AbstractParser.message("expected", expectedName, tokenString); 323 } 324 325 return msg; 326 } 327 328 /** 329 * Check next token and advance. 330 * 331 * @param expected Expected tokenType. 332 * 333 * @throws ParserException on unexpected token type 334 */ 335 protected final void expect(final TokenType expected) throws ParserException { 336 if (type != expected) { 337 throw error(expectMessage(expected)); 338 } 339 340 next(); 341 } 342 343 /** 344 * Check next token, get its value and advance. 345 * 346 * @param expected Expected tokenType. 347 * @return The JavaScript value of the token 348 * @throws ParserException on unexpected token type 349 */ 350 protected final Object expectValue(final TokenType expected) throws ParserException { 351 if (type != expected) { 352 throw error(expectMessage(expected)); 353 } 354 355 final Object value = getValue(); 356 357 next(); 358 359 return value; 360 } 361 362 /** 363 * Get the value of the current token. 364 * 365 * @return JavaScript value of the token. 366 */ 367 protected final Object getValue() { 368 return getValue(token); 369 } 370 371 /** 372 * Get the value of a specific token 373 * 374 * @param valueToken the token 375 * 376 * @return JavaScript value of the token 377 */ 378 protected final Object getValue(final long valueToken) { 379 try { 380 return lexer.getValueOf(valueToken, isStrictMode); 381 } catch (final ParserException e) { 382 errors.error(e); 383 } 384 385 return null; 386 } 387 388 /** 389 * Certain future reserved words can be used as identifiers in 390 * non-strict mode. Check if the current token is one such. 391 * 392 * @return true if non strict mode identifier 393 */ 394 protected final boolean isNonStrictModeIdent() { 395 return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT; 396 } 397 398 /** 399 * Get ident. 400 * 401 * @return Ident node. 402 */ 403 protected final IdentNode getIdent() { 404 // Capture IDENT token. 405 long identToken = token; 406 407 if (isNonStrictModeIdent()) { 408 // Fake out identifier. 409 identToken = Token.recast(token, IDENT); 410 // Get IDENT. 411 final String ident = (String)getValue(identToken); 412 413 next(); 414 415 // Create IDENT node. 416 return createIdentNode(identToken, finish, ident).setIsFutureStrictName(); 417 } 418 419 // Get IDENT. 420 final String ident = (String)expectValue(IDENT); 421 if (ident == null) { 422 return null; 423 } 424 // Create IDENT node. 425 return createIdentNode(identToken, finish, ident); 426 } 427 428 /** 429 * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String) 430 * constructor} but making sure that the {@code name} is deduplicated within this parse job. 431 * @param identToken the token for the new {@code IdentNode} 432 * @param identFinish the finish for the new {@code IdentNode} 433 * @param name the name for the new {@code IdentNode}. It will be de-duplicated. 434 * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will 435 * be deduplicated. 436 */ 437 protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) { 438 final String existingName = canonicalNames.putIfAbsent(name, name); 439 final String canonicalName = existingName != null ? existingName : name; 440 return new IdentNode(identToken, identFinish, canonicalName); 441 } 442 443 /** 444 * Check if current token is in identifier name 445 * 446 * @return true if current token is an identifier name 447 */ 448 protected final boolean isIdentifierName() { 449 final TokenKind kind = type.getKind(); 450 if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) { 451 return true; 452 } 453 // Fake out identifier. 454 final long identToken = Token.recast(token, IDENT); 455 // Get IDENT. 456 final String ident = (String)getValue(identToken); 457 return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0)); 458 } 459 460 /** 461 * Create an IdentNode from the current token 462 * 463 * @return an IdentNode representing the current token 464 */ 465 protected final IdentNode getIdentifierName() { 466 if (type == IDENT) { 467 return getIdent(); 468 } else if (isIdentifierName()) { 469 // Fake out identifier. 470 final long identToken = Token.recast(token, IDENT); 471 // Get IDENT. 472 final String ident = (String)getValue(identToken); 473 next(); 474 // Create IDENT node. 475 return createIdentNode(identToken, finish, ident); 476 } else { 477 expect(IDENT); 478 return null; 479 } 480 } 481 482 /** 483 * Create a LiteralNode from the current token 484 * 485 * @return LiteralNode representing the current token 486 * @throws ParserException if any literals fails to parse 487 */ 488 protected final LiteralNode<?> getLiteral() throws ParserException { 489 // Capture LITERAL token. 490 final long literalToken = token; 491 492 // Create literal node. 493 final Object value = getValue(); 494 // Advance to have a correct finish 495 next(); 496 497 LiteralNode<?> node = null; 498 499 if (value == null) { 500 node = LiteralNode.newInstance(literalToken, finish); 501 } else if (value instanceof Number) { 502 node = LiteralNode.newInstance(literalToken, finish, (Number)value); 503 } else if (value instanceof String) { 504 node = LiteralNode.newInstance(literalToken, finish, (String)value); 505 } else if (value instanceof LexerToken) { 506 if (value instanceof RegexToken) { 507 final RegexToken regex = (RegexToken)value; 508 try { 509 RegExpFactory.validate(regex.getExpression(), regex.getOptions()); 510 } catch (final ParserException e) { 511 throw error(e.getMessage()); 512 } 513 } 514 node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value); 515 } else { 516 assert false : "unknown type for LiteralNode: " + value.getClass(); 517 } 518 519 return node; 520 } 521} 522