AbstractParser.java revision 1523:c15ff29c1295
1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.parser;
27
28import static jdk.nashorn.internal.parser.TokenType.COMMENT;
29import static jdk.nashorn.internal.parser.TokenType.DIRECTIVE_COMMENT;
30import static jdk.nashorn.internal.parser.TokenType.EOF;
31import static jdk.nashorn.internal.parser.TokenType.EOL;
32import static jdk.nashorn.internal.parser.TokenType.IDENT;
33import java.util.HashMap;
34import java.util.Map;
35import jdk.nashorn.internal.ir.IdentNode;
36import jdk.nashorn.internal.ir.LiteralNode;
37import jdk.nashorn.internal.parser.Lexer.LexerToken;
38import jdk.nashorn.internal.parser.Lexer.RegexToken;
39import jdk.nashorn.internal.runtime.ECMAErrors;
40import jdk.nashorn.internal.runtime.ErrorManager;
41import jdk.nashorn.internal.runtime.JSErrorType;
42import jdk.nashorn.internal.runtime.ParserException;
43import jdk.nashorn.internal.runtime.Source;
44import jdk.nashorn.internal.runtime.regexp.RegExpFactory;
45
46/**
47 * Base class for parsers.
48 */
49public abstract class AbstractParser {
50    /** Source to parse. */
51    protected final Source source;
52
53    /** Error manager to report errors. */
54    protected final ErrorManager errors;
55
56    /** Stream of lex tokens to parse. */
57    protected TokenStream stream;
58
59    /** Index of current token. */
60    protected int k;
61
62    /** Previous token - accessible to sub classes */
63    protected long previousToken;
64
65    /** Descriptor of current token. */
66    protected long token;
67
68    /** Type of current token. */
69    protected TokenType type;
70
71    /** Type of last token. */
72    protected TokenType last;
73
74    /** Start position of current token. */
75    protected int start;
76
77    /** Finish position of previous token. */
78    protected int finish;
79
80    /** Current line number. */
81    protected int line;
82
83    /** Position of last EOL + 1. */
84    protected int linePosition;
85
86    /** Lexer used to scan source content. */
87    protected Lexer lexer;
88
89    /** Is this parser running under strict mode? */
90    protected boolean isStrictMode;
91
92    /** What should line numbers be counted from? */
93    protected final int lineOffset;
94
95    private final Map<String, String> canonicalNames = new HashMap<>();
96
97    /**
98     * Construct a parser.
99     *
100     * @param source     Source to parse.
101     * @param errors     Error reporting manager.
102     * @param strict     True if we are in strict mode
103     * @param lineOffset Offset from which lines should be counted
104     */
105    protected AbstractParser(final Source source, final ErrorManager errors, final boolean strict, final int lineOffset) {
106        if (source.getLength() > Token.LENGTH_MASK) {
107            throw new RuntimeException("Source exceeds size limit of " + Token.LENGTH_MASK + " bytes");
108        }
109        this.source       = source;
110        this.errors       = errors;
111        this.k            = -1;
112        this.token        = Token.toDesc(EOL, 0, 1);
113        this.type         = EOL;
114        this.last         = EOL;
115        this.isStrictMode = strict;
116        this.lineOffset   = lineOffset;
117    }
118
119    /**
120     * Get the ith token.
121     *
122     * @param i Index of token.
123     *
124     * @return  the token
125     */
126    protected final long getToken(final int i) {
127        // Make sure there are enough tokens available.
128        while (i > stream.last()) {
129            // If we need to buffer more for lookahead.
130            if (stream.isFull()) {
131                stream.grow();
132            }
133
134            // Get more tokens.
135            lexer.lexify();
136        }
137
138        return stream.get(i);
139    }
140
141    /**
142     * Return the tokenType of the ith token.
143     *
144     * @param i Index of token
145     *
146     * @return the token type
147     */
148    protected final TokenType T(final int i) {
149        // Get token descriptor and extract tokenType.
150        return Token.descType(getToken(i));
151    }
152
153    /**
154     * Seek next token that is not an EOL or comment.
155     *
156     * @return tokenType of next token.
157     */
158    protected final TokenType next() {
159        do {
160            nextOrEOL();
161        } while (type == EOL || type == COMMENT);
162
163        return type;
164    }
165
166    /**
167     * Seek next token or EOL (skipping comments.)
168     *
169     * @return tokenType of next token.
170     */
171    protected final TokenType nextOrEOL() {
172        do {
173            nextToken();
174            if (type == DIRECTIVE_COMMENT) {
175                checkDirectiveComment();
176            }
177        } while (type == COMMENT || type == DIRECTIVE_COMMENT);
178
179        return type;
180    }
181
182    // sourceURL= after directive comment
183    private static final String SOURCE_URL_PREFIX = "sourceURL=";
184
185    // currently only @sourceURL=foo supported
186    private void checkDirectiveComment() {
187        // if already set, ignore this one
188        if (source.getExplicitURL() != null) {
189            return;
190        }
191
192        final String comment = (String) lexer.getValueOf(token, isStrictMode);
193        final int len = comment.length();
194        // 4 characters for directive comment marker //@\s or //#\s
195        if (len > 4 && comment.substring(4).startsWith(SOURCE_URL_PREFIX)) {
196            source.setExplicitURL(comment.substring(4 + SOURCE_URL_PREFIX.length()));
197        }
198    }
199
200    /**
201     * Seek next token.
202     *
203     * @return tokenType of next token.
204     */
205    private TokenType nextToken() {
206        // Capture last token tokenType.
207        last = type;
208        if (type != EOF) {
209
210            // Set up next token.
211            k++;
212            final long lastToken = token;
213            previousToken = token;
214            token = getToken(k);
215            type = Token.descType(token);
216
217            // do this before the start is changed below
218            if (last != EOL) {
219                finish = start + Token.descLength(lastToken);
220            }
221
222            if (type == EOL) {
223                line         = Token.descLength(token);
224                linePosition = Token.descPosition(token);
225            } else {
226                start = Token.descPosition(token);
227            }
228
229        }
230
231        return type;
232    }
233
234    /**
235     * Get the message string for a message ID and arguments
236     *
237     * @param msgId The Message ID
238     * @param args  The arguments
239     *
240     * @return The message string
241     */
242    protected static String message(final String msgId, final String... args) {
243        return ECMAErrors.getMessage("parser.error." + msgId, args);
244    }
245
246    /**
247     * Report an error.
248     *
249     * @param message    Error message.
250     * @param errorToken Offending token.
251     * @return ParserException upon failure. Caller should throw and not ignore
252     */
253    protected final ParserException error(final String message, final long errorToken) {
254        return error(JSErrorType.SYNTAX_ERROR, message, errorToken);
255    }
256
257    /**
258     * Report an error.
259     *
260     * @param errorType  The error type
261     * @param message    Error message.
262     * @param errorToken Offending token.
263     * @return ParserException upon failure. Caller should throw and not ignore
264     */
265    protected final ParserException error(final JSErrorType errorType, final String message, final long errorToken) {
266        final int position  = Token.descPosition(errorToken);
267        final int lineNum   = source.getLine(position);
268        final int columnNum = source.getColumn(position);
269        final String formatted = ErrorManager.format(message, source, lineNum, columnNum, errorToken);
270        return new ParserException(errorType, formatted, source, lineNum, columnNum, errorToken);
271    }
272
273    /**
274     * Report an error.
275     *
276     * @param message Error message.
277     * @return ParserException upon failure. Caller should throw and not ignore
278     */
279    protected final ParserException error(final String message) {
280        return error(JSErrorType.SYNTAX_ERROR, message);
281    }
282
283    /**
284     * Report an error.
285     *
286     * @param errorType  The error type
287     * @param message    Error message.
288     * @return ParserException upon failure. Caller should throw and not ignore
289     */
290    protected final ParserException error(final JSErrorType errorType, final String message) {
291        // TODO - column needs to account for tabs.
292        final int position = Token.descPosition(token);
293        final int column = position - linePosition;
294        final String formatted = ErrorManager.format(message, source, line, column, token);
295        return new ParserException(errorType, formatted, source, line, column, token);
296    }
297
298    /**
299     * Report a warning to the error manager.
300     *
301     * @param errorType  The error type of the warning
302     * @param message    Warning message.
303     * @param errorToken error token
304     */
305    protected final void warning(final JSErrorType errorType, final String message, final long errorToken) {
306        errors.warning(error(errorType, message, errorToken));
307    }
308
309    /**
310     * Generate 'expected' message.
311     *
312     * @param expected Expected tokenType.
313     *
314     * @return the message string
315     */
316    protected final String expectMessage(final TokenType expected) {
317        final String tokenString = Token.toString(source, token);
318        String msg;
319
320        if (expected == null) {
321            msg = AbstractParser.message("expected.stmt", tokenString);
322        } else {
323            final String expectedName = expected.getNameOrType();
324            msg = AbstractParser.message("expected", expectedName, tokenString);
325        }
326
327        return msg;
328    }
329
330    /**
331     * Check current token and advance to the next token.
332     *
333     * @param expected Expected tokenType.
334     *
335     * @throws ParserException on unexpected token type
336     */
337    protected final void expect(final TokenType expected) throws ParserException {
338        expectDontAdvance(expected);
339        next();
340    }
341
342    /**
343     * Check current token, but don't advance to the next token.
344     *
345     * @param expected Expected tokenType.
346     *
347     * @throws ParserException on unexpected token type
348     */
349    protected final void expectDontAdvance(final TokenType expected) throws ParserException {
350        if (type != expected) {
351            throw error(expectMessage(expected));
352        }
353    }
354
355    /**
356     * Check next token, get its value and advance.
357     *
358     * @param  expected Expected tokenType.
359     * @return The JavaScript value of the token
360     * @throws ParserException on unexpected token type
361     */
362    protected final Object expectValue(final TokenType expected) throws ParserException {
363        if (type != expected) {
364            throw error(expectMessage(expected));
365        }
366
367        final Object value = getValue();
368
369        next();
370
371        return value;
372    }
373
374    /**
375     * Get the value of the current token.
376     *
377     * @return JavaScript value of the token.
378     */
379    protected final Object getValue() {
380        return getValue(token);
381    }
382
383    /**
384     * Get the value of a specific token
385     *
386     * @param valueToken the token
387     *
388     * @return JavaScript value of the token
389     */
390    protected final Object getValue(final long valueToken) {
391        try {
392            return lexer.getValueOf(valueToken, isStrictMode);
393        } catch (final ParserException e) {
394            errors.error(e);
395        }
396
397        return null;
398    }
399
400    /**
401     * Certain future reserved words can be used as identifiers in
402     * non-strict mode. Check if the current token is one such.
403     *
404     * @return true if non strict mode identifier
405     */
406    protected final boolean isNonStrictModeIdent() {
407        return !isStrictMode && type.getKind() == TokenKind.FUTURESTRICT;
408    }
409
410    /**
411     * Get ident.
412     *
413     * @return Ident node.
414     */
415    protected final IdentNode getIdent() {
416        // Capture IDENT token.
417        long identToken = token;
418
419        if (isNonStrictModeIdent()) {
420            // Fake out identifier.
421            identToken = Token.recast(token, IDENT);
422            // Get IDENT.
423            final String ident = (String)getValue(identToken);
424
425            next();
426
427            // Create IDENT node.
428            return createIdentNode(identToken, finish, ident).setIsFutureStrictName();
429        }
430
431        // Get IDENT.
432        final String ident = (String)expectValue(IDENT);
433        if (ident == null) {
434            return null;
435        }
436        // Create IDENT node.
437        return createIdentNode(identToken, finish, ident);
438    }
439
440    /**
441     * Creates a new {@link IdentNode} as if invoked with a {@link IdentNode#IdentNode(long, int, String)
442     * constructor} but making sure that the {@code name} is deduplicated within this parse job.
443     * @param identToken the token for the new {@code IdentNode}
444     * @param identFinish the finish for the new {@code IdentNode}
445     * @param name the name for the new {@code IdentNode}. It will be de-duplicated.
446     * @return a newly constructed {@code IdentNode} with the specified token, finish, and name; the name will
447     * be deduplicated.
448     */
449    protected IdentNode createIdentNode(final long identToken, final int identFinish, final String name) {
450        final String existingName = canonicalNames.putIfAbsent(name, name);
451        final String canonicalName = existingName != null ? existingName : name;
452        return new IdentNode(identToken, identFinish, canonicalName);
453    }
454
455    /**
456     * Check if current token is in identifier name
457     *
458     * @return true if current token is an identifier name
459     */
460    protected final boolean isIdentifierName() {
461        final TokenKind kind = type.getKind();
462        if (kind == TokenKind.KEYWORD || kind == TokenKind.FUTURE || kind == TokenKind.FUTURESTRICT) {
463            return true;
464        }
465
466        // only literals allowed are null, false and true
467        if (kind == TokenKind.LITERAL) {
468            switch (type) {
469                case FALSE:
470                case NULL:
471                case TRUE:
472                    return true;
473                default:
474                    return false;
475            }
476        }
477
478        // Fake out identifier.
479        final long identToken = Token.recast(token, IDENT);
480        // Get IDENT.
481        final String ident = (String)getValue(identToken);
482        return !ident.isEmpty() && Character.isJavaIdentifierStart(ident.charAt(0));
483    }
484
485    /**
486     * Create an IdentNode from the current token
487     *
488     * @return an IdentNode representing the current token
489     */
490    protected final IdentNode getIdentifierName() {
491        if (type == IDENT) {
492            return getIdent();
493        } else if (isIdentifierName()) {
494            // Fake out identifier.
495            final long identToken = Token.recast(token, IDENT);
496            // Get IDENT.
497            final String ident = (String)getValue(identToken);
498            next();
499            // Create IDENT node.
500            return createIdentNode(identToken, finish, ident);
501        } else {
502            expect(IDENT);
503            return null;
504        }
505    }
506
507    /**
508     * Create a LiteralNode from the current token
509     *
510     * @return LiteralNode representing the current token
511     * @throws ParserException if any literals fails to parse
512     */
513    protected final LiteralNode<?> getLiteral() throws ParserException {
514        // Capture LITERAL token.
515        final long literalToken = token;
516
517        // Create literal node.
518        final Object value = getValue();
519        // Advance to have a correct finish
520        next();
521
522        LiteralNode<?> node = null;
523
524        if (value == null) {
525            node = LiteralNode.newInstance(literalToken, finish);
526        } else if (value instanceof Number) {
527            node = LiteralNode.newInstance(literalToken, finish, (Number)value);
528        } else if (value instanceof String) {
529            node = LiteralNode.newInstance(literalToken, finish, (String)value);
530        } else if (value instanceof LexerToken) {
531            if (value instanceof RegexToken) {
532                final RegexToken regex = (RegexToken)value;
533                try {
534                    RegExpFactory.validate(regex.getExpression(), regex.getOptions());
535                } catch (final ParserException e) {
536                    throw error(e.getMessage());
537                }
538            }
539            node = LiteralNode.newInstance(literalToken, finish, (LexerToken)value);
540        } else {
541            assert false : "unknown type for LiteralNode: " + value.getClass();
542        }
543
544        return node;
545    }
546}
547