JSONParser.java revision 953:221a84ef44c0
1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.parser;
27
28import static jdk.nashorn.internal.parser.TokenType.COLON;
29import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT;
30import static jdk.nashorn.internal.parser.TokenType.EOF;
31import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
32import static jdk.nashorn.internal.parser.TokenType.RBRACE;
33import static jdk.nashorn.internal.parser.TokenType.RBRACKET;
34import static jdk.nashorn.internal.parser.TokenType.STRING;
35
36import java.util.ArrayList;
37import java.util.List;
38import jdk.nashorn.internal.ir.Expression;
39import jdk.nashorn.internal.ir.LiteralNode;
40import jdk.nashorn.internal.ir.Node;
41import jdk.nashorn.internal.ir.ObjectNode;
42import jdk.nashorn.internal.ir.PropertyNode;
43import jdk.nashorn.internal.ir.UnaryNode;
44import jdk.nashorn.internal.runtime.ErrorManager;
45import jdk.nashorn.internal.runtime.Source;
46
47/**
48 * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
49 *
50 * See: 15.12.1.2 The JSON Syntactic Grammar
51 */
52public class JSONParser extends AbstractParser {
53
54    /**
55     * Constructor
56     * @param source  the source
57     * @param errors  the error manager
58     */
59    public JSONParser(final Source source, final ErrorManager errors) {
60        super(source, errors, false, 0);
61    }
62
63    /**
64     * Implementation of the Quote(value) operation as defined in the ECMA script spec
65     * It wraps a String value in double quotes and escapes characters within in
66     *
67     * @param value string to quote
68     *
69     * @return quoted and escaped string
70     */
71    public static String quote(final String value) {
72
73        final StringBuilder product = new StringBuilder();
74
75        product.append("\"");
76
77        for (final char ch : value.toCharArray()) {
78            // TODO: should use a table?
79            switch (ch) {
80            case '\\':
81                product.append("\\\\");
82                break;
83            case '"':
84                product.append("\\\"");
85                break;
86            case '\b':
87                product.append("\\b");
88                break;
89            case '\f':
90                product.append("\\f");
91                break;
92            case '\n':
93                product.append("\\n");
94                break;
95            case '\r':
96                product.append("\\r");
97                break;
98            case '\t':
99                product.append("\\t");
100                break;
101            default:
102                if (ch < ' ') {
103                    product.append(Lexer.unicodeEscape(ch));
104                    break;
105                }
106
107                product.append(ch);
108                break;
109            }
110        }
111
112        product.append("\"");
113
114        return product.toString();
115    }
116
117    /**
118     * Public parsed method - start lexing a new token stream for
119     * a JSON script
120     *
121     * @return the JSON literal
122     */
123    public Node parse() {
124        stream = new TokenStream();
125
126        lexer = new Lexer(source, stream) {
127
128            @Override
129            protected boolean skipComments() {
130                return false;
131            }
132
133            @Override
134            protected boolean isStringDelimiter(final char ch) {
135                return ch == '\"';
136            }
137
138            // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace
139            @Override
140            protected boolean isWhitespace(final char ch) {
141                return Lexer.isJsonWhitespace(ch);
142            }
143
144            @Override
145            protected boolean isEOL(final char ch) {
146                return Lexer.isJsonEOL(ch);
147            }
148
149            // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber
150            @Override
151            protected void scanNumber() {
152                // Record beginning of number.
153                final int startPosition = position;
154                // Assume value is a decimal.
155                TokenType valueType = TokenType.DECIMAL;
156
157                // floating point can't start with a "." with no leading digit before
158                if (ch0 == '.') {
159                    error(Lexer.message("json.invalid.number"), STRING, position, limit);
160                }
161
162                // First digit of number.
163                final int digit = convertDigit(ch0, 10);
164
165                // skip first digit
166                skip(1);
167
168                if (digit != 0) {
169                    // Skip over remaining digits.
170                    while (convertDigit(ch0, 10) != -1) {
171                        skip(1);
172                    }
173                }
174
175                if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
176                    // Must be a double.
177                    if (ch0 == '.') {
178                        // Skip period.
179                        skip(1);
180
181                        boolean mantissa = false;
182                        // Skip mantissa.
183                        while (convertDigit(ch0, 10) != -1) {
184                            mantissa = true;
185                            skip(1);
186                        }
187
188                        if (! mantissa) {
189                            // no digit after "."
190                            error(Lexer.message("json.invalid.number"), STRING, position, limit);
191                        }
192                    }
193
194                    // Detect exponent.
195                    if (ch0 == 'E' || ch0 == 'e') {
196                        // Skip E.
197                        skip(1);
198                        // Detect and skip exponent sign.
199                        if (ch0 == '+' || ch0 == '-') {
200                            skip(1);
201                        }
202                        boolean exponent = false;
203                        // Skip exponent.
204                        while (convertDigit(ch0, 10) != -1) {
205                            exponent = true;
206                            skip(1);
207                        }
208
209                        if (! exponent) {
210                            // no digit after "E"
211                            error(Lexer.message("json.invalid.number"), STRING, position, limit);
212                        }
213                    }
214
215                    valueType = TokenType.FLOATING;
216                }
217
218                // Add number token.
219                add(valueType, startPosition);
220            }
221
222            // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter
223            @Override
224            protected boolean isEscapeCharacter(final char ch) {
225                switch (ch) {
226                    case '"':
227                    case '/':
228                    case '\\':
229                    case 'b':
230                    case 'f':
231                    case 'n':
232                    case 'r':
233                    case 't':
234                    // could be unicode escape
235                    case 'u':
236                        return true;
237                    default:
238                        return false;
239                }
240            }
241        };
242
243        k = -1;
244
245        next();
246
247        final Node resultNode = jsonLiteral();
248        expect(EOF);
249
250        return resultNode;
251    }
252
253    @SuppressWarnings("fallthrough")
254    private LiteralNode<?> getStringLiteral() {
255        final LiteralNode<?> literal = getLiteral();
256        final String         str     = (String)literal.getValue();
257
258        for (int i = 0; i < str.length(); i++) {
259            final char ch = str.charAt(i);
260            switch (ch) {
261            default:
262                if (ch > 0x001f) {
263                    break;
264                }
265            case '"':
266            case '\\':
267                throw error(AbstractParser.message("unexpected.token", str));
268            }
269        }
270
271        return literal;
272    }
273
274    /**
275     * Parse a JSON literal from the token stream
276     * @return the JSON literal as a Node
277     */
278    private Expression jsonLiteral() {
279        final long literalToken = token;
280
281        switch (type) {
282        case STRING:
283            return getStringLiteral();
284        case ESCSTRING:
285        case DECIMAL:
286        case FLOATING:
287            return getLiteral();
288        case FALSE:
289            next();
290            return LiteralNode.newInstance(literalToken, finish, false);
291        case TRUE:
292            next();
293            return LiteralNode.newInstance(literalToken, finish, true);
294        case NULL:
295            next();
296            return LiteralNode.newInstance(literalToken, finish);
297        case LBRACKET:
298            return arrayLiteral();
299        case LBRACE:
300            return objectLiteral();
301        /*
302         * A.8.1 JSON Lexical Grammar
303         *
304         * JSONNumber :: See 15.12.1.1
305         *    -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt
306         */
307        case SUB:
308            next();
309
310            final long realToken = token;
311            final Object value = getValue();
312
313            if (value instanceof Number) {
314                next();
315                return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value));
316            }
317
318            throw error(AbstractParser.message("expected", "number", type.getNameOrType()));
319        default:
320            break;
321        }
322
323        throw error(AbstractParser.message("expected", "json literal", type.getNameOrType()));
324    }
325
326    /**
327     * Parse an array literal from the token stream
328     * @return the array literal as a Node
329     */
330    private LiteralNode<Expression[]> arrayLiteral() {
331        // Unlike JavaScript array literals, elison is not permitted in JSON.
332
333        // Capture LBRACKET token.
334        final long arrayToken = token;
335        // LBRACKET tested in caller.
336        next();
337
338        LiteralNode<Expression[]> result = null;
339        // Prepare to accummulating elements.
340        final List<Expression> elements = new ArrayList<>();
341
342loop:
343        while (true) {
344            switch (type) {
345            case RBRACKET:
346                next();
347                result = LiteralNode.newInstance(arrayToken, finish, elements);
348                break loop;
349
350            case COMMARIGHT:
351                next();
352                // check for trailing comma - not allowed in JSON
353                if (type == RBRACKET) {
354                    throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
355                }
356                break;
357
358            default:
359                // Add expression element.
360                elements.add(jsonLiteral());
361                // Comma between array elements is mandatory in JSON.
362                if (type != COMMARIGHT && type != RBRACKET) {
363                   throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType()));
364                }
365                break;
366            }
367        }
368
369        return result;
370    }
371
372    /**
373     * Parse an object literal from the token stream
374     * @return the object literal as a Node
375     */
376    private ObjectNode objectLiteral() {
377        // Capture LBRACE token.
378        final long objectToken = token;
379        // LBRACE tested in caller.
380        next();
381
382        // Prepare to accumulate elements.
383        final List<PropertyNode> elements = new ArrayList<>();
384
385        // Create a block for the object literal.
386loop:
387        while (true) {
388            switch (type) {
389            case RBRACE:
390                next();
391                break loop;
392
393            case COMMARIGHT:
394                next();
395                // check for trailing comma - not allowed in JSON
396                if (type == RBRACE) {
397                    throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
398                }
399                break;
400
401            default:
402                // Get and add the next property.
403                final PropertyNode property = propertyAssignment();
404                elements.add(property);
405
406                // Comma between property assigments is mandatory in JSON.
407                if (type != RBRACE && type != COMMARIGHT) {
408                    throw error(AbstractParser.message("expected", ", or }", type.getNameOrType()));
409                }
410                break;
411            }
412        }
413
414        // Construct new object literal.
415        return new ObjectNode(objectToken, finish, elements);
416    }
417
418    /**
419     * Parse a property assignment from the token stream
420     * @return the property assignment as a Node
421     */
422    private PropertyNode propertyAssignment() {
423        // Capture firstToken.
424        final long propertyToken = token;
425        LiteralNode<?> name = null;
426
427        if (type == STRING) {
428            name = getStringLiteral();
429        } else if (type == ESCSTRING) {
430            name = getLiteral();
431        }
432
433        if (name != null) {
434            expect(COLON);
435            final Expression value = jsonLiteral();
436            return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null);
437        }
438
439        // Raise an error.
440        throw error(AbstractParser.message("expected", "string", type.getNameOrType()));
441    }
442
443}
444