JSONParser.java revision 953:221a84ef44c0
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.parser; 27 28import static jdk.nashorn.internal.parser.TokenType.COLON; 29import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT; 30import static jdk.nashorn.internal.parser.TokenType.EOF; 31import static jdk.nashorn.internal.parser.TokenType.ESCSTRING; 32import static jdk.nashorn.internal.parser.TokenType.RBRACE; 33import static jdk.nashorn.internal.parser.TokenType.RBRACKET; 34import static jdk.nashorn.internal.parser.TokenType.STRING; 35 36import java.util.ArrayList; 37import java.util.List; 38import jdk.nashorn.internal.ir.Expression; 39import jdk.nashorn.internal.ir.LiteralNode; 40import jdk.nashorn.internal.ir.Node; 41import jdk.nashorn.internal.ir.ObjectNode; 42import jdk.nashorn.internal.ir.PropertyNode; 43import jdk.nashorn.internal.ir.UnaryNode; 44import jdk.nashorn.internal.runtime.ErrorManager; 45import jdk.nashorn.internal.runtime.Source; 46 47/** 48 * Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser. 49 * 50 * See: 15.12.1.2 The JSON Syntactic Grammar 51 */ 52public class JSONParser extends AbstractParser { 53 54 /** 55 * Constructor 56 * @param source the source 57 * @param errors the error manager 58 */ 59 public JSONParser(final Source source, final ErrorManager errors) { 60 super(source, errors, false, 0); 61 } 62 63 /** 64 * Implementation of the Quote(value) operation as defined in the ECMA script spec 65 * It wraps a String value in double quotes and escapes characters within in 66 * 67 * @param value string to quote 68 * 69 * @return quoted and escaped string 70 */ 71 public static String quote(final String value) { 72 73 final StringBuilder product = new StringBuilder(); 74 75 product.append("\""); 76 77 for (final char ch : value.toCharArray()) { 78 // TODO: should use a table? 79 switch (ch) { 80 case '\\': 81 product.append("\\\\"); 82 break; 83 case '"': 84 product.append("\\\""); 85 break; 86 case '\b': 87 product.append("\\b"); 88 break; 89 case '\f': 90 product.append("\\f"); 91 break; 92 case '\n': 93 product.append("\\n"); 94 break; 95 case '\r': 96 product.append("\\r"); 97 break; 98 case '\t': 99 product.append("\\t"); 100 break; 101 default: 102 if (ch < ' ') { 103 product.append(Lexer.unicodeEscape(ch)); 104 break; 105 } 106 107 product.append(ch); 108 break; 109 } 110 } 111 112 product.append("\""); 113 114 return product.toString(); 115 } 116 117 /** 118 * Public parsed method - start lexing a new token stream for 119 * a JSON script 120 * 121 * @return the JSON literal 122 */ 123 public Node parse() { 124 stream = new TokenStream(); 125 126 lexer = new Lexer(source, stream) { 127 128 @Override 129 protected boolean skipComments() { 130 return false; 131 } 132 133 @Override 134 protected boolean isStringDelimiter(final char ch) { 135 return ch == '\"'; 136 } 137 138 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace 139 @Override 140 protected boolean isWhitespace(final char ch) { 141 return Lexer.isJsonWhitespace(ch); 142 } 143 144 @Override 145 protected boolean isEOL(final char ch) { 146 return Lexer.isJsonEOL(ch); 147 } 148 149 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber 150 @Override 151 protected void scanNumber() { 152 // Record beginning of number. 153 final int startPosition = position; 154 // Assume value is a decimal. 155 TokenType valueType = TokenType.DECIMAL; 156 157 // floating point can't start with a "." with no leading digit before 158 if (ch0 == '.') { 159 error(Lexer.message("json.invalid.number"), STRING, position, limit); 160 } 161 162 // First digit of number. 163 final int digit = convertDigit(ch0, 10); 164 165 // skip first digit 166 skip(1); 167 168 if (digit != 0) { 169 // Skip over remaining digits. 170 while (convertDigit(ch0, 10) != -1) { 171 skip(1); 172 } 173 } 174 175 if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') { 176 // Must be a double. 177 if (ch0 == '.') { 178 // Skip period. 179 skip(1); 180 181 boolean mantissa = false; 182 // Skip mantissa. 183 while (convertDigit(ch0, 10) != -1) { 184 mantissa = true; 185 skip(1); 186 } 187 188 if (! mantissa) { 189 // no digit after "." 190 error(Lexer.message("json.invalid.number"), STRING, position, limit); 191 } 192 } 193 194 // Detect exponent. 195 if (ch0 == 'E' || ch0 == 'e') { 196 // Skip E. 197 skip(1); 198 // Detect and skip exponent sign. 199 if (ch0 == '+' || ch0 == '-') { 200 skip(1); 201 } 202 boolean exponent = false; 203 // Skip exponent. 204 while (convertDigit(ch0, 10) != -1) { 205 exponent = true; 206 skip(1); 207 } 208 209 if (! exponent) { 210 // no digit after "E" 211 error(Lexer.message("json.invalid.number"), STRING, position, limit); 212 } 213 } 214 215 valueType = TokenType.FLOATING; 216 } 217 218 // Add number token. 219 add(valueType, startPosition); 220 } 221 222 // ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter 223 @Override 224 protected boolean isEscapeCharacter(final char ch) { 225 switch (ch) { 226 case '"': 227 case '/': 228 case '\\': 229 case 'b': 230 case 'f': 231 case 'n': 232 case 'r': 233 case 't': 234 // could be unicode escape 235 case 'u': 236 return true; 237 default: 238 return false; 239 } 240 } 241 }; 242 243 k = -1; 244 245 next(); 246 247 final Node resultNode = jsonLiteral(); 248 expect(EOF); 249 250 return resultNode; 251 } 252 253 @SuppressWarnings("fallthrough") 254 private LiteralNode<?> getStringLiteral() { 255 final LiteralNode<?> literal = getLiteral(); 256 final String str = (String)literal.getValue(); 257 258 for (int i = 0; i < str.length(); i++) { 259 final char ch = str.charAt(i); 260 switch (ch) { 261 default: 262 if (ch > 0x001f) { 263 break; 264 } 265 case '"': 266 case '\\': 267 throw error(AbstractParser.message("unexpected.token", str)); 268 } 269 } 270 271 return literal; 272 } 273 274 /** 275 * Parse a JSON literal from the token stream 276 * @return the JSON literal as a Node 277 */ 278 private Expression jsonLiteral() { 279 final long literalToken = token; 280 281 switch (type) { 282 case STRING: 283 return getStringLiteral(); 284 case ESCSTRING: 285 case DECIMAL: 286 case FLOATING: 287 return getLiteral(); 288 case FALSE: 289 next(); 290 return LiteralNode.newInstance(literalToken, finish, false); 291 case TRUE: 292 next(); 293 return LiteralNode.newInstance(literalToken, finish, true); 294 case NULL: 295 next(); 296 return LiteralNode.newInstance(literalToken, finish); 297 case LBRACKET: 298 return arrayLiteral(); 299 case LBRACE: 300 return objectLiteral(); 301 /* 302 * A.8.1 JSON Lexical Grammar 303 * 304 * JSONNumber :: See 15.12.1.1 305 * -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt 306 */ 307 case SUB: 308 next(); 309 310 final long realToken = token; 311 final Object value = getValue(); 312 313 if (value instanceof Number) { 314 next(); 315 return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value)); 316 } 317 318 throw error(AbstractParser.message("expected", "number", type.getNameOrType())); 319 default: 320 break; 321 } 322 323 throw error(AbstractParser.message("expected", "json literal", type.getNameOrType())); 324 } 325 326 /** 327 * Parse an array literal from the token stream 328 * @return the array literal as a Node 329 */ 330 private LiteralNode<Expression[]> arrayLiteral() { 331 // Unlike JavaScript array literals, elison is not permitted in JSON. 332 333 // Capture LBRACKET token. 334 final long arrayToken = token; 335 // LBRACKET tested in caller. 336 next(); 337 338 LiteralNode<Expression[]> result = null; 339 // Prepare to accummulating elements. 340 final List<Expression> elements = new ArrayList<>(); 341 342loop: 343 while (true) { 344 switch (type) { 345 case RBRACKET: 346 next(); 347 result = LiteralNode.newInstance(arrayToken, finish, elements); 348 break loop; 349 350 case COMMARIGHT: 351 next(); 352 // check for trailing comma - not allowed in JSON 353 if (type == RBRACKET) { 354 throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType())); 355 } 356 break; 357 358 default: 359 // Add expression element. 360 elements.add(jsonLiteral()); 361 // Comma between array elements is mandatory in JSON. 362 if (type != COMMARIGHT && type != RBRACKET) { 363 throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType())); 364 } 365 break; 366 } 367 } 368 369 return result; 370 } 371 372 /** 373 * Parse an object literal from the token stream 374 * @return the object literal as a Node 375 */ 376 private ObjectNode objectLiteral() { 377 // Capture LBRACE token. 378 final long objectToken = token; 379 // LBRACE tested in caller. 380 next(); 381 382 // Prepare to accumulate elements. 383 final List<PropertyNode> elements = new ArrayList<>(); 384 385 // Create a block for the object literal. 386loop: 387 while (true) { 388 switch (type) { 389 case RBRACE: 390 next(); 391 break loop; 392 393 case COMMARIGHT: 394 next(); 395 // check for trailing comma - not allowed in JSON 396 if (type == RBRACE) { 397 throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType())); 398 } 399 break; 400 401 default: 402 // Get and add the next property. 403 final PropertyNode property = propertyAssignment(); 404 elements.add(property); 405 406 // Comma between property assigments is mandatory in JSON. 407 if (type != RBRACE && type != COMMARIGHT) { 408 throw error(AbstractParser.message("expected", ", or }", type.getNameOrType())); 409 } 410 break; 411 } 412 } 413 414 // Construct new object literal. 415 return new ObjectNode(objectToken, finish, elements); 416 } 417 418 /** 419 * Parse a property assignment from the token stream 420 * @return the property assignment as a Node 421 */ 422 private PropertyNode propertyAssignment() { 423 // Capture firstToken. 424 final long propertyToken = token; 425 LiteralNode<?> name = null; 426 427 if (type == STRING) { 428 name = getStringLiteral(); 429 } else if (type == ESCSTRING) { 430 name = getLiteral(); 431 } 432 433 if (name != null) { 434 expect(COLON); 435 final Expression value = jsonLiteral(); 436 return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null); 437 } 438 439 // Raise an error. 440 throw error(AbstractParser.message("expected", "string", type.getNameOrType())); 441 } 442 443} 444