AbstractParser.java revision 985:10c95d040380
1139826Simp/*
253541Sshin * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
353541Sshin * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
453541Sshin *
553541Sshin * This code is free software; you can redistribute it and/or modify it
653541Sshin * under the terms of the GNU General Public License version 2 only, as
753541Sshin * published by the Free Software Foundation.  Oracle designates this
853541Sshin * particular file as subject to the "Classpath" exception as provided
953541Sshin * by Oracle in the LICENSE file that accompanied this code.
1053541Sshin *
1153541Sshin * This code is distributed in the hope that it will be useful, but WITHOUT
1253541Sshin * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1353541Sshin * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1453541Sshin * version 2 for more details (a copy is included in the LICENSE file that
1553541Sshin * accompanied this code).
1653541Sshin *
1753541Sshin * You should have received a copy of the GNU General Public License version
1853541Sshin * 2 along with this work; if not, write to the Free Software Foundation,
1953541Sshin * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
2053541Sshin *
2153541Sshin * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2253541Sshin * or visit www.oracle.com if you need additional information or have any
2353541Sshin * questions.
2453541Sshin */
2553541Sshin
2653541Sshinpackage jdk.nashorn.internal.parser;
2753541Sshin
2853541Sshinimport static jdk.nashorn.internal.parser.TokenType.COMMENT;
29174510Sobrienimport static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
3053541Sshinimport static jdk.nashorn.internal.parser.TokenType.EOF;
3153541Sshinimport static jdk.nashorn.internal.parser.TokenType.EOL;
32139826Simpimport static jdk.nashorn.internal.parser.TokenType.IDENT;
3353541Sshin
3453541Sshinimport java.util.HashMap;
3553541Sshinimport java.util.Map;
3653541Sshinimport jdk.nashorn.internal.ir.IdentNode;
3753541Sshinimport jdk.nashorn.internal.ir.LiteralNode;
3853541Sshinimport jdk.nashorn.internal.parser.Lexer.LexerToken;
3953541Sshinimport jdk.nashorn.internal.parser.Lexer.RegexToken;
4053541Sshinimport jdk.nashorn.internal.runtime.ECMAErrors;
4153541Sshinimport jdk.nashorn.internal.runtime.ErrorManager;
4253541Sshinimport jdk.nashorn.internal.runtime.JSErrorType;
4353541Sshinimport jdk.nashorn.internal.runtime.ParserException;
4453541Sshinimport jdk.nashorn.internal.runtime.Source;
4553541Sshinimport jdk.nashorn.internal.runtime.regexp.RegExpFactory;
4653541Sshin
4753541Sshin/**
4853541Sshin * Base class for parsers.
4953541Sshin */
5053541Sshinpublic abstract class AbstractParser {
5153541Sshin    /** Source to parse. */
5253541Sshin    protected final Source source;
5353541Sshin
5453541Sshin    /** Error manager to report errors. */
5553541Sshin    protected final ErrorManager errors;
5653541Sshin
5753541Sshin    /** Stream of lex tokens to parse. */
5853541Sshin    protected TokenStream stream;
5953541Sshin
6053541Sshin    /** Index of current token. */
61174510Sobrien    protected int k;
6253541Sshin
6353541Sshin    /** Previous token - accessible to sub classes */
6453541Sshin    protected long previousToken;
6553541Sshin
6653541Sshin    /** Descriptor of current token. */
6755205Speter    protected long token;
6853541Sshin
6953541Sshin    /** Type of current token. */
7053541Sshin    protected TokenType type;
7153541Sshin
72222748Srwatson    /** Type of last token. */
73222748Srwatson    protected TokenType last;
74222748Srwatson
75241916Sdelphij    /** Start position of current token. */
76222748Srwatson    protected int start;
77222748Srwatson
78222748Srwatson    /** Finish position of previous token. */
79241916Sdelphij    protected int finish;
80241916Sdelphij
81222748Srwatson    /** Current line number. */
82241916Sdelphij    protected int line;
83241916Sdelphij
84241916Sdelphij    /** Position of last EOL + 1. */
85241916Sdelphij    protected int linePosition;
86241916Sdelphij
87241916Sdelphij    /** Lexer used to scan source content. */
88241916Sdelphij    protected Lexer lexer;
89194777Sbz
9053541Sshin    /** Is this parser running under strict mode? */
91241916Sdelphij    protected boolean isStrictMode;
92180427Sbz
93241916Sdelphij    /** What should line numbers be counted from? */
9453541Sshin    protected final int lineOffset;
95241916Sdelphij
96222488Srwatson    private final Map<String, String> canonicalNames = new HashMap<>();
97241916Sdelphij
98222488Srwatson    /**
99241916Sdelphij     * Construct a parser.
100222488Srwatson     *
101241916Sdelphij     * @param source     Source to parse.
102222691Srwatson     * @param errors     Error reporting manager.
103241916Sdelphij     * @param strict     True if we are in strict mode
104222691Srwatson     * @param lineOffset Offset from which lines should be counted
105241916Sdelphij     */
106241916Sdelphij    protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) {
107125776Sume        this.source       = source;
108241916Sdelphij        this.errors       = errors;
10998211Shsu        this.k            = -1;
110241916Sdelphij        this.token        = Token.toDesc(EOL, 0, 1);
111102218Struckman        this.type         = EOL;
112241916Sdelphij        this.last         = EOL;
113102218Struckman        this.isStrictMode = strict;
114241916Sdelphij        this.lineOffset   = lineOffset;
115241916Sdelphij    }
116241916Sdelphij
117241916Sdelphij    /**
118241916Sdelphij     * Get the ith token.
119241916Sdelphij     *
120241916Sdelphij     * @param i Index of token.
121241916Sdelphij     *
12255205Speter     * @return  the token
12353541Sshin     */
12453541Sshin    protected final long getToken(final int i) {
125        // Make sure there are enough tokens available.
126        while (i > stream.last()) {
127            // If we need to buffer more for lookahead.
128            if (stream.isFull()) {
129                stream.grow();
130            }
131
132            // Get more tokens.
133            lexer.lexify();
134        }
135
136        return stream.get(i);
137    }
138
139    /**
140     * Return the tokenType of the ith token.
141     *
142     * @param i Index of token
143     *
144     * @return the token type
145     */
146    protected final TokenType T(final int i) {
147        // Get token descriptor and extract tokenType.
148        return Token.descType(getToken(i));
149    }
150
151    /**
152     * Seek next token that is not an EOL or comment.
153     *
154     * @return tokenType of next token.
155     */
156    protected final TokenType next() {
157        do {
158            nextOrEOL();
159        } while (type == EOL || type == COMMENT);
160
161        return type;
162    }
163
164    /**
165     * Seek next token or EOL (skipping comments.)
166     *
167     * @return tokenType of next token.
168     */
169    protected final TokenType nextOrEOL() {
170        do {
171            nextToken();
172            if (type == DIRECTIVE_COMMENT) {
173                checkDirectiveComment();
174            }
175        } while (type == COMMENT || type == DIRECTIVE_COMMENT);
176
177        return type;
178    }
179
180    // sourceURL= after directive comment
181    private static final String SOURCE_URL_PREFIX = "sourceURL=";
182
183    // currently only @sourceURL=foo supported
184    private void checkDirectiveComment() {
185        // if already set, ignore this one
186        if (source.getExplicitURL() != null) {
187            return;
188        }
189
190        final String comment = (String) lexer.getValueOf(token, isStrictMode);
191        final int len = comment.length();
192        // 4 characters for directive comment marker //@\s or //#\s
193        if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) {
194            source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length()));
195        }
196    }
197
198    /**
199     * Seek next token.
200     *
201     * @return tokenType of next token.
202     */
203    private TokenType nextToken() {
204        // Capture last token tokenType.
205        last = type;
206        if (type != EOF) {
207
208            // Set up next token.
209            k++;
210            final long lastToken = token;
211            previousToken = token;
212            token = getToken(k);
213            type = Token.descType(token);
214
215            // do this before the start is changed below
216            if (last != EOL) {
217                finish = start + Token.descLength(lastToken);
218            }
219
220            if (type == EOL) {
221                line         = Token.descLength(token);
222                linePosition = Token.descPosition(token);
223            } else {
224                start = Token.descPosition(token);
225            }
226
227        }
228
229        return type;
230    }
231
232    /**
233     * Get the message string for a message ID and arguments
234     *
235     * @param msgId The Message ID
236     * @param args  The arguments
237     *
238     * @return The message string
239     */
240    protected static String message(final String msgId, final String... args) {
241        return ECMAErrors.getMessage("parser.error." + msgId, args);
242    }
243
244    /**
245     * Report an error.
246     *
247     * @param message    Error message.
248     * @param errorToken Offending token.
249     * @return ParserException upon failure. Caller should throw and not ignore
250     */
251    protected final ParserException error(final String message, final long errorToken) {
252        return error(JSErrorType.SYNTAX_ERROR, message, errorToken);
253    }
254
255    /**
256     * Report an error.
257     *
258     * @param errorType  The error type
259     * @param message    Error message.
260     * @param errorToken Offending token.
261     * @return ParserException upon failure. Caller should throw and not ignore
262     */
263    protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) {
264        final int position  = Token.descPosition(errorToken);
265        final int lineNum   = source.getLine(position);
266        final int columnNum = source.getColumn(position);
267        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken);
268        return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken);
269    }
270
271    /**
272     * Report an error.
273     *
274     * @param message Error message.
275     * @return ParserException upon failure. Caller should throw and not ignore
276     */
277    protected final ParserException error(final String message) {
278        return error(JSErrorType.SYNTAX_ERROR, message);
279    }
280
281    /**
282     * Report an error.
283     *
284     * @param errorType  The error type
285     * @param message    Error message.
286     * @return ParserException upon failure. Caller should throw and not ignore
287     */
288    protected final ParserException error(final JSErrorType errorType, final String message) {
289        // TODO - column needs to account for tabs.
290        final int position = Token.descPosition(token);
291        final int column = position - linePosition;
292        final String formatted = ErrorManager.format(message, source, line, column, token);
293        return new ParserException(errorType, formatted, source, line, column, token);
294    }
295
296    /**
297     * Report a warning to the error manager.
298     *
299     * @param errorType  The error type of the warning
300     * @param message    Warning message.
301     * @param errorToken error token
302     */
303    protected final void warning(final JSErrorType errorType, final String message, final long errorToken) {
304        errors.warning(error(errorType, message, errorToken));
305    }
306
307    /**
308     * Generate 'expected' message.
309     *
310     * @param expected Expected tokenType.
311     *
312     * @return the message string
313     */
314    protected final String expectMessage(final TokenType expected) {
315        final String tokenString = Token.toString(source, token);
316        String msg;
317
318        if (expected == null) {
319            msg = AbstractParser.message("expected.stmt", tokenString);
320        } else {
321            final String expectedName = expected.getNameOrType();
322            msg = AbstractParser.message("expected", expectedName, tokenString);
323        }
324
325        return msg;
326    }
327
328    /**
329     * Check next token and advance.
330     *
331     * @param expected Expected tokenType.
332     *
333     * @throws ParserException on unexpected token type
334     */
335    protected final void expect(final TokenType expected) throws ParserException {
336        if (type != expected) {
337            throw error(expectMessage(expected));
338        }
339
340        next();
341    }
342
343    /**
344     * Check next token, get its value and advance.
345     *
346     * @param  expected Expected tokenType.
347     * @return The JavaScript value of the token
348     * @throws ParserException on unexpected token type
349     */
350    protected final Object expectValue(final TokenType expected) throws ParserException {
351        if (type != expected) {
352            throw error(expectMessage(expected));
353        }
354
355        final Object value = getValue();
356
357        next();
358
359        return value;
360    }
361
362    /**
363     * Get the value of the current token.
364     *
365     * @return JavaScript value of the token.
366     */
367    protected final Object getValue() {
368        return getValue(token);
369    }
370
371    /**
372     * Get the value of a specific token
373     *
374     * @param valueToken the token
375     *
376     * @return JavaScript value of the token
377     */
378    protected final Object getValue(final long valueToken) {
379        try {
380            return lexer.getValueOf(valueToken, isStrictMode);
381        } catch (final ParserException e) {
382            errors.error(e);
383        }
384
385        return null;
386    }
387
388    /**
389     * Certain future reserved words can be used as identifiers in
390     * non-strict mode. Check if the current token is one such.
391     *
392     * @return true if non strict mode identifier
393     */
394    protected final boolean isNonStrictModeIdent() {
395        return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT;
396    }
397
398    /**
399     * Get ident.
400     *
401     * @return Ident node.
402     */
403    protected final IdentNode getIdent() {
404        // Capture IDENT token.
405        long identToken = token;
406
407        if (isNonStrictModeIdent()) {
408            // Fake out identifier.
409            identToken = Token.recast(token, IDENT);
410            // Get IDENT.
411            final String ident = (String)getValue(identToken);
412
413            next();
414
415            // Create IDENT node.
416            return createIdentNode(identToken, finish, ident).setIsFutureStrictName();
417        }
418
419        // Get IDENT.
420        final String ident = (String)expectValue(IDENT);
421        if (ident == null) {
422            return null;
423        }
424        // Create IDENT node.
425        return createIdentNode(identToken, finish, ident);
426    }
427
428    /**
429     * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String)
430     * constructor} but making sure that the {@code name} is deduplicated within this parse job.
431     * @param identToken the token for the new {@code IdentNode}
432     * @param identFinish the finish for the new {@code IdentNode}
433     * @param name the name for the new {@code IdentNode}. It will be de-duplicated.
434     * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will
435     * be deduplicated.
436     */
437    protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) {
438        final String existingName = canonicalNames.putIfAbsent(name, name);
439        final String canonicalName = existingName != null ? existingName : name;
440        return new IdentNode(identToken, identFinish, canonicalName);
441    }
442
443    /**
444     * Check if current token is in identifier name
445     *
446     * @return true if current token is an identifier name
447     */
448    protected final boolean isIdentifierName() {
449        final TokenKind kind = type.getKind();
450        if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) {
451            return true;
452        }
453        // Fake out identifier.
454        final long identToken = Token.recast(token, IDENT);
455        // Get IDENT.
456        final String ident = (String)getValue(identToken);
457        return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0));
458    }
459
460    /**
461     * Create an IdentNode from the current token
462     *
463     * @return an IdentNode representing the current token
464     */
465    protected final IdentNode getIdentifierName() {
466        if (type == IDENT) {
467            return getIdent();
468        } else if (isIdentifierName()) {
469            // Fake out identifier.
470            final long identToken = Token.recast(token, IDENT);
471            // Get IDENT.
472            final String ident = (String)getValue(identToken);
473            next();
474            // Create IDENT node.
475            return createIdentNode(identToken, finish, ident);
476        } else {
477            expect(IDENT);
478            return null;
479        }
480    }
481
482    /**
483     * Create a LiteralNode from the current token
484     *
485     * @return LiteralNode representing the current token
486     * @throws ParserException if any literals fails to parse
487     */
488    protected final LiteralNode<?> getLiteral() throws ParserException {
489        // Capture LITERAL token.
490        final long literalToken = token;
491
492        // Create literal node.
493        final Object value = getValue();
494        // Advance to have a correct finish
495        next();
496
497        LiteralNode<?> node = null;
498
499        if (value == null) {
500            node = LiteralNode.newInstance(literalToken, finish);
501        } else if (value instanceof Number) {
502            node = LiteralNode.newInstance(literalToken, finish, (Number)value);
503        } else if (value instanceof String) {
504            node = LiteralNode.newInstance(literalToken, finish, (String)value);
505        } else if (value instanceof LexerToken) {
506            if (value instanceof RegexToken) {
507                final RegexToken regex = (RegexToken)value;
508                try {
509                    RegExpFactory.validate(regex.getExpression(), regex.getOptions());
510                } catch (final ParserException e) {
511                    throw error(e.getMessage());
512                }
513            }
514            node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value);
515        } else {
516            assert false : "unknown type for LiteralNode: " + value.getClass();
517        }
518
519        return node;
520    }
521}
522