AbstractParser.java revision 971:c93b6091b11e
1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.parser;
27
28import static jdk.nashorn.internal.parser.TokenType.COMMENT;
29import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
30import static jdk.nashorn.internal.parser.TokenType.EOF;
31import static jdk.nashorn.internal.parser.TokenType.EOL;
32import static jdk.nashorn.internal.parser.TokenType.IDENT;
33
34import jdk.nashorn.internal.ir.IdentNode;
35import jdk.nashorn.internal.ir.LiteralNode;
36import jdk.nashorn.internal.parser.Lexer.LexerToken;
37import jdk.nashorn.internal.parser.Lexer.RegexToken;
38import jdk.nashorn.internal.runtime.ECMAErrors;
39import jdk.nashorn.internal.runtime.ErrorManager;
40import jdk.nashorn.internal.runtime.JSErrorType;
41import jdk.nashorn.internal.runtime.ParserException;
42import jdk.nashorn.internal.runtime.Source;
43import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
44
45/**
46 * Base class for parsers.
47 */
48public abstract class AbstractParser {
49    /** Source to parse. */
50    protected final Source source;
51
52    /** Error manager to report errors. */
53    protected final ErrorManager errors;
54
55    /** Stream of lex tokens to parse. */
56    protected TokenStream stream;
57
58    /** Index of current token. */
59    protected int k;
60
61    /** Previous token - accessible to sub classes */
62    protected long previousToken;
63
64    /** Descriptor of current token. */
65    protected long token;
66
67    /** Type of current token. */
68    protected TokenType type;
69
70    /** Type of last token. */
71    protected TokenType last;
72
73    /** Start position of current token. */
74    protected int start;
75
76    /** Finish position of previous token. */
77    protected int finish;
78
79    /** Current line number. */
80    protected int line;
81
82    /** Position of last EOL + 1. */
83    protected int linePosition;
84
85    /** Lexer used to scan source content. */
86    protected Lexer lexer;
87
88    /** Is this parser running under strict mode? */
89    protected boolean isStrictMode;
90
91    /** What should line numbers be counted from? */
92    protected final int lineOffset;
93
94    /**
95     * Construct a parser.
96     *
97     * @param source     Source to parse.
98     * @param errors     Error reporting manager.
99     * @param strict     True if we are in strict mode
100     * @param lineOffset Offset from which lines should be counted
101     */
102    protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) {
103        this.source       = source;
104        this.errors       = errors;
105        this.k            = -1;
106        this.token        = Token.toDesc(EOL, 0, 1);
107        this.type         = EOL;
108        this.last         = EOL;
109        this.isStrictMode = strict;
110        this.lineOffset   = lineOffset;
111    }
112
113    /**
114     * Get the ith token.
115     *
116     * @param i Index of token.
117     *
118     * @return  the token
119     */
120    protected final long getToken(final int i) {
121        // Make sure there are enough tokens available.
122        while (i > stream.last()) {
123            // If we need to buffer more for lookahead.
124            if (stream.isFull()) {
125                stream.grow();
126            }
127
128            // Get more tokens.
129            lexer.lexify();
130        }
131
132        return stream.get(i);
133    }
134
135    /**
136     * Return the tokenType of the ith token.
137     *
138     * @param i Index of token
139     *
140     * @return the token type
141     */
142    protected final TokenType T(final int i) {
143        // Get token descriptor and extract tokenType.
144        return Token.descType(getToken(i));
145    }
146
147    /**
148     * Seek next token that is not an EOL or comment.
149     *
150     * @return tokenType of next token.
151     */
152    protected final TokenType next() {
153        do {
154            nextOrEOL();
155        } while (type == EOL || type == COMMENT);
156
157        return type;
158    }
159
160    /**
161     * Seek next token or EOL (skipping comments.)
162     *
163     * @return tokenType of next token.
164     */
165    protected final TokenType nextOrEOL() {
166        do {
167            nextToken();
168            if (type == DIRECTIVE_COMMENT) {
169                checkDirectiveComment();
170            }
171        } while (type == COMMENT || type == DIRECTIVE_COMMENT);
172
173        return type;
174    }
175
176    // sourceURL= after directive comment
177    private static final String SOURCE_URL_PREFIX = "sourceURL=";
178
179    // currently only @sourceURL=foo supported
180    private void checkDirectiveComment() {
181        // if already set, ignore this one
182        if (source.getExplicitURL() != null) {
183            return;
184        }
185
186        final String comment = (String) lexer.getValueOf(token, isStrictMode);
187        final int len = comment.length();
188        // 4 characters for directive comment marker //@\s or //#\s
189        if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) {
190            source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length()));
191        }
192    }
193
194    /**
195     * Seek next token.
196     *
197     * @return tokenType of next token.
198     */
199    private TokenType nextToken() {
200        // Capture last token tokenType.
201        last = type;
202        if (type != EOF) {
203
204            // Set up next token.
205            k++;
206            final long lastToken = token;
207            previousToken = token;
208            token = getToken(k);
209            type = Token.descType(token);
210
211            // do this before the start is changed below
212            if (last != EOL) {
213                finish = start + Token.descLength(lastToken);
214            }
215
216            if (type == EOL) {
217                line         = Token.descLength(token);
218                linePosition = Token.descPosition(token);
219            } else {
220                start = Token.descPosition(token);
221            }
222
223        }
224
225        return type;
226    }
227
228    /**
229     * Get the message string for a message ID and arguments
230     *
231     * @param msgId The Message ID
232     * @param args  The arguments
233     *
234     * @return The message string
235     */
236    protected static String message(final String msgId, final String... args) {
237        return ECMAErrors.getMessage("parser.error." + msgId, args);
238    }
239
240    /**
241     * Report an error.
242     *
243     * @param message    Error message.
244     * @param errorToken Offending token.
245     * @return ParserException upon failure. Caller should throw and not ignore
246     */
247    protected final ParserException error(final String message, final long errorToken) {
248        return error(JSErrorType.SYNTAX_ERROR, message, errorToken);
249    }
250
251    /**
252     * Report an error.
253     *
254     * @param errorType  The error type
255     * @param message    Error message.
256     * @param errorToken Offending token.
257     * @return ParserException upon failure. Caller should throw and not ignore
258     */
259    protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) {
260        final int position  = Token.descPosition(errorToken);
261        final int lineNum   = source.getLine(position);
262        final int columnNum = source.getColumn(position);
263        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken);
264        return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken);
265    }
266
267    /**
268     * Report an error.
269     *
270     * @param message Error message.
271     * @return ParserException upon failure. Caller should throw and not ignore
272     */
273    protected final ParserException error(final String message) {
274        return error(JSErrorType.SYNTAX_ERROR, message);
275    }
276
277    /**
278     * Report an error.
279     *
280     * @param errorType  The error type
281     * @param message    Error message.
282     * @return ParserException upon failure. Caller should throw and not ignore
283     */
284    protected final ParserException error(final JSErrorType errorType, final String message) {
285        // TODO - column needs to account for tabs.
286        final int position = Token.descPosition(token);
287        final int column = position - linePosition;
288        final String formatted = ErrorManager.format(message, source, line, column, token);
289        return new ParserException(errorType, formatted, source, line, column, token);
290    }
291
292    /**
293     * Report a warning to the error manager.
294     *
295     * @param errorType  The error type of the warning
296     * @param message    Warning message.
297     * @param errorToken error token
298     */
299    protected final void warning(final JSErrorType errorType, final String message, final long errorToken) {
300        errors.warning(error(errorType, message, errorToken));
301    }
302
303    /**
304     * Generate 'expected' message.
305     *
306     * @param expected Expected tokenType.
307     *
308     * @return the message string
309     */
310    protected final String expectMessage(final TokenType expected) {
311        final String tokenString = Token.toString(source, token);
312        String msg;
313
314        if (expected == null) {
315            msg = AbstractParser.message("expected.stmt", tokenString);
316        } else {
317            final String expectedName = expected.getNameOrType();
318            msg = AbstractParser.message("expected", expectedName, tokenString);
319        }
320
321        return msg;
322    }
323
324    /**
325     * Check next token and advance.
326     *
327     * @param expected Expected tokenType.
328     *
329     * @throws ParserException on unexpected token type
330     */
331    protected final void expect(final TokenType expected) throws ParserException {
332        if (type != expected) {
333            throw error(expectMessage(expected));
334        }
335
336        next();
337    }
338
339    /**
340     * Check next token, get its value and advance.
341     *
342     * @param  expected Expected tokenType.
343     * @return The JavaScript value of the token
344     * @throws ParserException on unexpected token type
345     */
346    protected final Object expectValue(final TokenType expected) throws ParserException {
347        if (type != expected) {
348            throw error(expectMessage(expected));
349        }
350
351        final Object value = getValue();
352
353        next();
354
355        return value;
356    }
357
358    /**
359     * Get the value of the current token.
360     *
361     * @return JavaScript value of the token.
362     */
363    protected final Object getValue() {
364        return getValue(token);
365    }
366
367    /**
368     * Get the value of a specific token
369     *
370     * @param valueToken the token
371     *
372     * @return JavaScript value of the token
373     */
374    protected final Object getValue(final long valueToken) {
375        try {
376            return lexer.getValueOf(valueToken, isStrictMode);
377        } catch (final ParserException e) {
378            errors.error(e);
379        }
380
381        return null;
382    }
383
384    /**
385     * Certain future reserved words can be used as identifiers in
386     * non-strict mode. Check if the current token is one such.
387     *
388     * @return true if non strict mode identifier
389     */
390    protected final boolean isNonStrictModeIdent() {
391        return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT;
392    }
393
394    /**
395     * Get ident.
396     *
397     * @return Ident node.
398     */
399    protected final IdentNode getIdent() {
400        // Capture IDENT token.
401        long identToken = token;
402
403        if (isNonStrictModeIdent()) {
404            // Fake out identifier.
405            identToken = Token.recast(token, IDENT);
406            // Get IDENT.
407            final String ident = (String)getValue(identToken);
408
409            next();
410
411            // Create IDENT node.
412            return new IdentNode(identToken, finish, ident).setIsFutureStrictName();
413        }
414
415        // Get IDENT.
416        final String ident = (String)expectValue(IDENT);
417        if (ident == null) {
418            return null;
419        }
420        // Create IDENT node.
421        return new IdentNode(identToken, finish, ident);
422    }
423
424    /**
425     * Check if current token is in identifier name
426     *
427     * @return true if current token is an identifier name
428     */
429    protected final boolean isIdentifierName() {
430        final TokenKind kind = type.getKind();
431        if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) {
432            return true;
433        }
434        // Fake out identifier.
435        final long identToken = Token.recast(token, IDENT);
436        // Get IDENT.
437        final String ident = (String)getValue(identToken);
438        return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0));
439    }
440
441    /**
442     * Create an IdentNode from the current token
443     *
444     * @return an IdentNode representing the current token
445     */
446    protected final IdentNode getIdentifierName() {
447        if (type == IDENT) {
448            return getIdent();
449        } else if (isIdentifierName()) {
450            // Fake out identifier.
451            final long identToken = Token.recast(token, IDENT);
452            // Get IDENT.
453            final String ident = (String)getValue(identToken);
454            next();
455            // Create IDENT node.
456            return new IdentNode(identToken, finish, ident);
457        } else {
458            expect(IDENT);
459            return null;
460        }
461    }
462
463    /**
464     * Create a LiteralNode from the current token
465     *
466     * @return LiteralNode representing the current token
467     * @throws ParserException if any literals fails to parse
468     */
469    protected final LiteralNode<?> getLiteral() throws ParserException {
470        // Capture LITERAL token.
471        final long literalToken = token;
472
473        // Create literal node.
474        final Object value = getValue();
475        // Advance to have a correct finish
476        next();
477
478        LiteralNode<?> node = null;
479
480        if (value == null) {
481            node = LiteralNode.newInstance(literalToken, finish);
482        } else if (value instanceof Number) {
483            node = LiteralNode.newInstance(literalToken, finish, (Number)value);
484        } else if (value instanceof String) {
485            node = LiteralNode.newInstance(literalToken, finish, (String)value);
486        } else if (value instanceof LexerToken) {
487            if (value instanceof RegexToken) {
488                final RegexToken regex = (RegexToken)value;
489                try {
490                    RegExpFactory.validate(regex.getExpression(), regex.getOptions());
491                } catch (final ParserException e) {
492                    throw error(e.getMessage());
493                }
494            }
495            node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value);
496        } else {
497            assert false : "unknown type for LiteralNode: " + value.getClass();
498        }
499
500        return node;
501    }
502}
503