AbstractParser.java revision 971:c93b6091b11e
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.parser; 27 28import static jdk.nashorn.internal.parser.TokenType.COMMENT; 29import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT; 30import static jdk.nashorn.internal.parser.TokenType.EOF; 31import static jdk.nashorn.internal.parser.TokenType.EOL; 32import static jdk.nashorn.internal.parser.TokenType.IDENT; 33 34import jdk.nashorn.internal.ir.IdentNode; 35import jdk.nashorn.internal.ir.LiteralNode; 36import jdk.nashorn.internal.parser.Lexer.LexerToken; 37import jdk.nashorn.internal.parser.Lexer.RegexToken; 38import jdk.nashorn.internal.runtime.ECMAErrors; 39import jdk.nashorn.internal.runtime.ErrorManager; 40import jdk.nashorn.internal.runtime.JSErrorType; 41import jdk.nashorn.internal.runtime.ParserException; 42import jdk.nashorn.internal.runtime.Source; 43import jdk.nashorn.internal.runtime.regexp.RegExpFactory; 44 45/** 46 * Base class for parsers. 47 */ 48public abstract class AbstractParser { 49 /** Source to parse. */ 50 protected final Source source; 51 52 /** Error manager to report errors. */ 53 protected final ErrorManager errors; 54 55 /** Stream of lex tokens to parse. */ 56 protected TokenStream stream; 57 58 /** Index of current token. */ 59 protected int k; 60 61 /** Previous token - accessible to sub classes */ 62 protected long previousToken; 63 64 /** Descriptor of current token. */ 65 protected long token; 66 67 /** Type of current token. */ 68 protected TokenType type; 69 70 /** Type of last token. */ 71 protected TokenType last; 72 73 /** Start position of current token. */ 74 protected int start; 75 76 /** Finish position of previous token. */ 77 protected int finish; 78 79 /** Current line number. */ 80 protected int line; 81 82 /** Position of last EOL + 1. */ 83 protected int linePosition; 84 85 /** Lexer used to scan source content. */ 86 protected Lexer lexer; 87 88 /** Is this parser running under strict mode? */ 89 protected boolean isStrictMode; 90 91 /** What should line numbers be counted from? */ 92 protected final int lineOffset; 93 94 /** 95 * Construct a parser. 96 * 97 * @param source Source to parse. 98 * @param errors Error reporting manager. 99 * @param strict True if we are in strict mode 100 * @param lineOffset Offset from which lines should be counted 101 */ 102 protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) { 103 this.source = source; 104 this.errors = errors; 105 this.k = -1; 106 this.token = Token.toDesc(EOL, 0, 1); 107 this.type = EOL; 108 this.last = EOL; 109 this.isStrictMode = strict; 110 this.lineOffset = lineOffset; 111 } 112 113 /** 114 * Get the ith token. 115 * 116 * @param i Index of token. 117 * 118 * @return the token 119 */ 120 protected final long getToken(final int i) { 121 // Make sure there are enough tokens available. 122 while (i > stream.last()) { 123 // If we need to buffer more for lookahead. 124 if (stream.isFull()) { 125 stream.grow(); 126 } 127 128 // Get more tokens. 129 lexer.lexify(); 130 } 131 132 return stream.get(i); 133 } 134 135 /** 136 * Return the tokenType of the ith token. 137 * 138 * @param i Index of token 139 * 140 * @return the token type 141 */ 142 protected final TokenType T(final int i) { 143 // Get token descriptor and extract tokenType. 144 return Token.descType(getToken(i)); 145 } 146 147 /** 148 * Seek next token that is not an EOL or comment. 149 * 150 * @return tokenType of next token. 151 */ 152 protected final TokenType next() { 153 do { 154 nextOrEOL(); 155 } while (type == EOL || type == COMMENT); 156 157 return type; 158 } 159 160 /** 161 * Seek next token or EOL (skipping comments.) 162 * 163 * @return tokenType of next token. 164 */ 165 protected final TokenType nextOrEOL() { 166 do { 167 nextToken(); 168 if (type == DIRECTIVE_COMMENT) { 169 checkDirectiveComment(); 170 } 171 } while (type == COMMENT || type == DIRECTIVE_COMMENT); 172 173 return type; 174 } 175 176 // sourceURL= after directive comment 177 private static final String SOURCE_URL_PREFIX = "sourceURL="; 178 179 // currently only @sourceURL=foo supported 180 private void checkDirectiveComment() { 181 // if already set, ignore this one 182 if (source.getExplicitURL() != null) { 183 return; 184 } 185 186 final String comment = (String) lexer.getValueOf(token, isStrictMode); 187 final int len = comment.length(); 188 // 4 characters for directive comment marker //@\s or //#\s 189 if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) { 190 source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length())); 191 } 192 } 193 194 /** 195 * Seek next token. 196 * 197 * @return tokenType of next token. 198 */ 199 private TokenType nextToken() { 200 // Capture last token tokenType. 201 last = type; 202 if (type != EOF) { 203 204 // Set up next token. 205 k++; 206 final long lastToken = token; 207 previousToken = token; 208 token = getToken(k); 209 type = Token.descType(token); 210 211 // do this before the start is changed below 212 if (last != EOL) { 213 finish = start + Token.descLength(lastToken); 214 } 215 216 if (type == EOL) { 217 line = Token.descLength(token); 218 linePosition = Token.descPosition(token); 219 } else { 220 start = Token.descPosition(token); 221 } 222 223 } 224 225 return type; 226 } 227 228 /** 229 * Get the message string for a message ID and arguments 230 * 231 * @param msgId The Message ID 232 * @param args The arguments 233 * 234 * @return The message string 235 */ 236 protected static String message(final String msgId, final String... args) { 237 return ECMAErrors.getMessage("parser.error." + msgId, args); 238 } 239 240 /** 241 * Report an error. 242 * 243 * @param message Error message. 244 * @param errorToken Offending token. 245 * @return ParserException upon failure. Caller should throw and not ignore 246 */ 247 protected final ParserException error(final String message, final long errorToken) { 248 return error(JSErrorType.SYNTAX_ERROR, message, errorToken); 249 } 250 251 /** 252 * Report an error. 253 * 254 * @param errorType The error type 255 * @param message Error message. 256 * @param errorToken Offending token. 257 * @return ParserException upon failure. Caller should throw and not ignore 258 */ 259 protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) { 260 final int position = Token.descPosition(errorToken); 261 final int lineNum = source.getLine(position); 262 final int columnNum = source.getColumn(position); 263 final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken); 264 return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken); 265 } 266 267 /** 268 * Report an error. 269 * 270 * @param message Error message. 271 * @return ParserException upon failure. Caller should throw and not ignore 272 */ 273 protected final ParserException error(final String message) { 274 return error(JSErrorType.SYNTAX_ERROR, message); 275 } 276 277 /** 278 * Report an error. 279 * 280 * @param errorType The error type 281 * @param message Error message. 282 * @return ParserException upon failure. Caller should throw and not ignore 283 */ 284 protected final ParserException error(final JSErrorType errorType, final String message) { 285 // TODO - column needs to account for tabs. 286 final int position = Token.descPosition(token); 287 final int column = position - linePosition; 288 final String formatted = ErrorManager.format(message, source, line, column, token); 289 return new ParserException(errorType, formatted, source, line, column, token); 290 } 291 292 /** 293 * Report a warning to the error manager. 294 * 295 * @param errorType The error type of the warning 296 * @param message Warning message. 297 * @param errorToken error token 298 */ 299 protected final void warning(final JSErrorType errorType, final String message, final long errorToken) { 300 errors.warning(error(errorType, message, errorToken)); 301 } 302 303 /** 304 * Generate 'expected' message. 305 * 306 * @param expected Expected tokenType. 307 * 308 * @return the message string 309 */ 310 protected final String expectMessage(final TokenType expected) { 311 final String tokenString = Token.toString(source, token); 312 String msg; 313 314 if (expected == null) { 315 msg = AbstractParser.message("expected.stmt", tokenString); 316 } else { 317 final String expectedName = expected.getNameOrType(); 318 msg = AbstractParser.message("expected", expectedName, tokenString); 319 } 320 321 return msg; 322 } 323 324 /** 325 * Check next token and advance. 326 * 327 * @param expected Expected tokenType. 328 * 329 * @throws ParserException on unexpected token type 330 */ 331 protected final void expect(final TokenType expected) throws ParserException { 332 if (type != expected) { 333 throw error(expectMessage(expected)); 334 } 335 336 next(); 337 } 338 339 /** 340 * Check next token, get its value and advance. 341 * 342 * @param expected Expected tokenType. 343 * @return The JavaScript value of the token 344 * @throws ParserException on unexpected token type 345 */ 346 protected final Object expectValue(final TokenType expected) throws ParserException { 347 if (type != expected) { 348 throw error(expectMessage(expected)); 349 } 350 351 final Object value = getValue(); 352 353 next(); 354 355 return value; 356 } 357 358 /** 359 * Get the value of the current token. 360 * 361 * @return JavaScript value of the token. 362 */ 363 protected final Object getValue() { 364 return getValue(token); 365 } 366 367 /** 368 * Get the value of a specific token 369 * 370 * @param valueToken the token 371 * 372 * @return JavaScript value of the token 373 */ 374 protected final Object getValue(final long valueToken) { 375 try { 376 return lexer.getValueOf(valueToken, isStrictMode); 377 } catch (final ParserException e) { 378 errors.error(e); 379 } 380 381 return null; 382 } 383 384 /** 385 * Certain future reserved words can be used as identifiers in 386 * non-strict mode. Check if the current token is one such. 387 * 388 * @return true if non strict mode identifier 389 */ 390 protected final boolean isNonStrictModeIdent() { 391 return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT; 392 } 393 394 /** 395 * Get ident. 396 * 397 * @return Ident node. 398 */ 399 protected final IdentNode getIdent() { 400 // Capture IDENT token. 401 long identToken = token; 402 403 if (isNonStrictModeIdent()) { 404 // Fake out identifier. 405 identToken = Token.recast(token, IDENT); 406 // Get IDENT. 407 final String ident = (String)getValue(identToken); 408 409 next(); 410 411 // Create IDENT node. 412 return new IdentNode(identToken, finish, ident).setIsFutureStrictName(); 413 } 414 415 // Get IDENT. 416 final String ident = (String)expectValue(IDENT); 417 if (ident == null) { 418 return null; 419 } 420 // Create IDENT node. 421 return new IdentNode(identToken, finish, ident); 422 } 423 424 /** 425 * Check if current token is in identifier name 426 * 427 * @return true if current token is an identifier name 428 */ 429 protected final boolean isIdentifierName() { 430 final TokenKind kind = type.getKind(); 431 if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) { 432 return true; 433 } 434 // Fake out identifier. 435 final long identToken = Token.recast(token, IDENT); 436 // Get IDENT. 437 final String ident = (String)getValue(identToken); 438 return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0)); 439 } 440 441 /** 442 * Create an IdentNode from the current token 443 * 444 * @return an IdentNode representing the current token 445 */ 446 protected final IdentNode getIdentifierName() { 447 if (type == IDENT) { 448 return getIdent(); 449 } else if (isIdentifierName()) { 450 // Fake out identifier. 451 final long identToken = Token.recast(token, IDENT); 452 // Get IDENT. 453 final String ident = (String)getValue(identToken); 454 next(); 455 // Create IDENT node. 456 return new IdentNode(identToken, finish, ident); 457 } else { 458 expect(IDENT); 459 return null; 460 } 461 } 462 463 /** 464 * Create a LiteralNode from the current token 465 * 466 * @return LiteralNode representing the current token 467 * @throws ParserException if any literals fails to parse 468 */ 469 protected final LiteralNode<?> getLiteral() throws ParserException { 470 // Capture LITERAL token. 471 final long literalToken = token; 472 473 // Create literal node. 474 final Object value = getValue(); 475 // Advance to have a correct finish 476 next(); 477 478 LiteralNode<?> node = null; 479 480 if (value == null) { 481 node = LiteralNode.newInstance(literalToken, finish); 482 } else if (value instanceof Number) { 483 node = LiteralNode.newInstance(literalToken, finish, (Number)value); 484 } else if (value instanceof String) { 485 node = LiteralNode.newInstance(literalToken, finish, (String)value); 486 } else if (value instanceof LexerToken) { 487 if (value instanceof RegexToken) { 488 final RegexToken regex = (RegexToken)value; 489 try { 490 RegExpFactory.validate(regex.getExpression(), regex.getOptions()); 491 } catch (final ParserException e) { 492 throw error(e.getMessage()); 493 } 494 } 495 node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value); 496 } else { 497 assert false : "unknown type for LiteralNode: " + value.getClass(); 498 } 499 500 return node; 501 } 502} 503