CClassNode.java revision 1088:7e62d98d4625
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni.ast; 21 22import jdk.nashorn.internal.runtime.regexp.joni.BitSet; 23import jdk.nashorn.internal.runtime.regexp.joni.CodeRangeBuffer; 24import jdk.nashorn.internal.runtime.regexp.joni.Config; 25import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper; 26import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment; 27import jdk.nashorn.internal.runtime.regexp.joni.Syntax; 28import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE; 29import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE; 30import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType; 31import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 32import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 33import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 34import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException; 35import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 36 37@SuppressWarnings("javadoc") 38public final class CClassNode extends Node { 39 private static final int FLAG_NCCLASS_NOT = 1<<0; 40 private static final int FLAG_NCCLASS_SHARE = 1<<1; 41 42 int flags; 43 public final BitSet bs = new BitSet(); // conditional creation ? 44 public CodeRangeBuffer mbuf; /* multi-byte info or NULL */ 45 46 private int ctype; // for hashing purposes 47 48 private final static short AsciiCtypeTable[] = { 49 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 50 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008, 51 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 52 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 53 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 54 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 55 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 56 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 57 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2, 58 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 59 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 60 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0, 61 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2, 62 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 63 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 64 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008, 65 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 66 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 67 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 68 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 69 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 70 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 71 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 72 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 73 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 74 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 75 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 76 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 77 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 78 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 79 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 80 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 81 }; 82 83 // node_new_cclass 84 public CClassNode() {} 85 86 public void clear() { 87 bs.clear(); 88 flags = 0; 89 mbuf = null; 90 } 91 92 @Override 93 public int getType() { 94 return CCLASS; 95 } 96 97 @Override 98 public String getName() { 99 return "Character Class"; 100 } 101 102 @Override 103 public boolean equals(final Object other) { 104 if (!(other instanceof CClassNode)) { 105 return false; 106 } 107 final CClassNode cc = (CClassNode)other; 108 return ctype == cc.ctype && isNot() == cc.isNot(); 109 } 110 111 @Override 112 public int hashCode() { 113 if (Config.USE_SHARED_CCLASS_TABLE) { 114 int hash = 0; 115 hash += ctype; 116 if (isNot()) { 117 hash++; 118 } 119 return hash + (hash >> 5); 120 } 121 return super.hashCode(); 122 } 123 124 @Override 125 public String toString(final int level) { 126 final StringBuilder value = new StringBuilder(); 127 value.append("\n flags: " + flagsToString()); 128 value.append("\n bs: " + pad(bs, level + 1)); 129 value.append("\n mbuf: " + pad(mbuf, level + 1)); 130 131 return value.toString(); 132 } 133 134 public String flagsToString() { 135 final StringBuilder f = new StringBuilder(); 136 if (isNot()) { 137 f.append("NOT "); 138 } 139 if (isShare()) { 140 f.append("SHARE "); 141 } 142 return f.toString(); 143 } 144 145 public boolean isEmpty() { 146 return mbuf == null && bs.isEmpty(); 147 } 148 149 public void addCodeRangeToBuf(final int from, final int to) { 150 mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to); 151 } 152 153 public void addCodeRange(final ScanEnvironment env, final int from, final int to) { 154 mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to); 155 } 156 157 public void addAllMultiByteRange() { 158 mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf); 159 } 160 161 public void clearNotFlag() { 162 if (isNot()) { 163 bs.invert(); 164 165 mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf); 166 clearNot(); 167 } 168 } 169 170 // and_cclass 171 public void and(final CClassNode other) { 172 final boolean not1 = isNot(); 173 BitSet bsr1 = bs; 174 final CodeRangeBuffer buf1 = mbuf; 175 final boolean not2 = other.isNot(); 176 BitSet bsr2 = other.bs; 177 final CodeRangeBuffer buf2 = other.mbuf; 178 179 if (not1) { 180 final BitSet bs1 = new BitSet(); 181 bsr1.invertTo(bs1); 182 bsr1 = bs1; 183 } 184 185 if (not2) { 186 final BitSet bs2 = new BitSet(); 187 bsr2.invertTo(bs2); 188 bsr2 = bs2; 189 } 190 191 bsr1.and(bsr2); 192 193 if (bsr1 != bs) { 194 bs.copy(bsr1); 195 bsr1 = bs; 196 } 197 198 if (not1) { 199 bs.invert(); 200 } 201 202 CodeRangeBuffer pbuf = null; 203 204 if (not1 && not2) { 205 pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false); 206 } else { 207 pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2); 208 209 if (not1) { 210 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); 211 } 212 } 213 mbuf = pbuf; 214 215 } 216 217 // or_cclass 218 public void or(final CClassNode other) { 219 final boolean not1 = isNot(); 220 BitSet bsr1 = bs; 221 final CodeRangeBuffer buf1 = mbuf; 222 final boolean not2 = other.isNot(); 223 BitSet bsr2 = other.bs; 224 final CodeRangeBuffer buf2 = other.mbuf; 225 226 if (not1) { 227 final BitSet bs1 = new BitSet(); 228 bsr1.invertTo(bs1); 229 bsr1 = bs1; 230 } 231 232 if (not2) { 233 final BitSet bs2 = new BitSet(); 234 bsr2.invertTo(bs2); 235 bsr2 = bs2; 236 } 237 238 bsr1.or(bsr2); 239 240 if (bsr1 != bs) { 241 bs.copy(bsr1); 242 bsr1 = bs; 243 } 244 245 if (not1) { 246 bs.invert(); 247 } 248 249 CodeRangeBuffer pbuf = null; 250 if (not1 && not2) { 251 pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false); 252 } else { 253 pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2); 254 if (not1) { 255 pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf); 256 } 257 } 258 mbuf = pbuf; 259 } 260 261 // add_ctype_to_cc_by_range // Encoding out! 262 public void addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[]) { 263 final int n = mbr[0]; 264 265 if (!not) { 266 for (int i=0; i<n; i++) { 267 for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) { 268 if (j >= sbOut) { 269 if (Config.VANILLA) { 270 if (j == mbr[i * 2 + 2]) { 271 i++; 272 } else if (j > mbr[i * 2 + 1]) { 273 addCodeRangeToBuf(j, mbr[i * 2 + 2]); 274 i++; 275 } 276 } else { 277 if (j >= mbr[i * 2 + 1]) { 278 addCodeRangeToBuf(j, mbr[i * 2 + 2]); 279 i++; 280 } 281 } 282 // !goto sb_end!, remove duplication! 283 for (; i<n; i++) { 284 addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); 285 } 286 return; 287 } 288 bs.set(j); 289 } 290 } 291 // !sb_end:! 292 for (int i=0; i<n; i++) { 293 addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); 294 } 295 296 } else { 297 int prev = 0; 298 299 for (int i=0; i<n; i++) { 300 for (int j=prev; j < mbr[2 * i + 1]; j++) { 301 if (j >= sbOut) { 302 // !goto sb_end2!, remove duplication 303 prev = sbOut; 304 for (i=0; i<n; i++) { 305 if (prev < mbr[2 * i + 1]) { 306 addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); 307 } 308 prev = mbr[i * 2 + 2] + 1; 309 } 310 if (prev < 0x7fffffff/*!!!*/) { 311 addCodeRangeToBuf(prev, 0x7fffffff); 312 } 313 return; 314 } 315 bs.set(j); 316 } 317 prev = mbr[2 * i + 2] + 1; 318 } 319 320 for (int j=prev; j<sbOut; j++) { 321 bs.set(j); 322 } 323 324 // !sb_end2:! 325 prev = sbOut; 326 for (int i=0; i<n; i++) { 327 if (prev < mbr[2 * i + 1]) { 328 addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); 329 } 330 prev = mbr[i * 2 + 2] + 1; 331 } 332 if (prev < 0x7fffffff/*!!!*/) { 333 addCodeRangeToBuf(prev, 0x7fffffff); 334 } 335 } 336 } 337 338 public void addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut) { 339 int ct = ctp; 340 if (Config.NON_UNICODE_SDW) { 341 switch (ct) { 342 case CharacterType.D: 343 case CharacterType.S: 344 case CharacterType.W: 345 ct ^= CharacterType.SPECIAL_MASK; 346 347 if (env.syntax == Syntax.JAVASCRIPT && ct == CharacterType.SPACE) { 348 // \s in JavaScript includes unicode characters. 349 break; 350 } 351 352 if (not) { 353 for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { 354 // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); 355 if ((AsciiCtypeTable[c] & (1 << ct)) == 0) { 356 bs.set(c); 357 } 358 } 359 addAllMultiByteRange(); 360 } else { 361 for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) { 362 // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c); 363 if ((AsciiCtypeTable[c] & (1 << ct)) != 0) { 364 bs.set(c); 365 } 366 } 367 } 368 return; 369 default: 370 break; 371 } 372 } 373 374 final int[] ranges = EncodingHelper.ctypeCodeRange(ct, sbOut); 375 if (ranges != null) { 376 addCTypeByRange(ct, not, sbOut.value, ranges); 377 return; 378 } 379 380 switch(ct) { 381 case CharacterType.ALPHA: 382 case CharacterType.BLANK: 383 case CharacterType.CNTRL: 384 case CharacterType.DIGIT: 385 case CharacterType.LOWER: 386 case CharacterType.PUNCT: 387 case CharacterType.SPACE: 388 case CharacterType.UPPER: 389 case CharacterType.XDIGIT: 390 case CharacterType.ASCII: 391 case CharacterType.ALNUM: 392 if (not) { 393 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 394 if (!EncodingHelper.isCodeCType(c, ct)) { 395 bs.set(c); 396 } 397 } 398 addAllMultiByteRange(); 399 } else { 400 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 401 if (EncodingHelper.isCodeCType(c, ct)) { 402 bs.set(c); 403 } 404 } 405 } 406 break; 407 408 case CharacterType.GRAPH: 409 case CharacterType.PRINT: 410 if (not) { 411 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 412 if (!EncodingHelper.isCodeCType(c, ct)) { 413 bs.set(c); 414 } 415 } 416 } else { 417 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 418 if (EncodingHelper.isCodeCType(c, ct)) { 419 bs.set(c); 420 } 421 } 422 addAllMultiByteRange(); 423 } 424 break; 425 426 case CharacterType.WORD: 427 if (!not) { 428 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 429 if (EncodingHelper.isWord(c)) { 430 bs.set(c); 431 } 432 } 433 434 addAllMultiByteRange(); 435 } else { 436 for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { 437 if (!EncodingHelper.isWord(c)) { 438 bs.set(c); 439 } 440 } 441 } 442 break; 443 444 default: 445 throw new InternalException(ErrorMessages.ERR_PARSER_BUG); 446 } // switch 447 } 448 449 public static final class CCStateArg { 450 public int v; 451 public int vs; 452 public boolean vsIsRaw; 453 public boolean vIsRaw; 454 public CCVALTYPE inType; 455 public CCVALTYPE type; 456 public CCSTATE state; 457 } 458 459 public void nextStateClass(final CCStateArg arg, final ScanEnvironment env) { 460 if (arg.state == CCSTATE.RANGE) { 461 throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE); 462 } 463 464 if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) { 465 if (arg.type == CCVALTYPE.SB) { 466 bs.set(arg.vs); 467 } else if (arg.type == CCVALTYPE.CODE_POINT) { 468 addCodeRange(env, arg.vs, arg.vs); 469 } 470 } 471 arg.state = CCSTATE.VALUE; 472 arg.type = CCVALTYPE.CLASS; 473 } 474 475 public void nextStateValue(final CCStateArg arg, final ScanEnvironment env) { 476 477 switch(arg.state) { 478 case VALUE: 479 if (arg.type == CCVALTYPE.SB) { 480 if (arg.vs > 0xff) { 481 throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); 482 } 483 bs.set(arg.vs); 484 } else if (arg.type == CCVALTYPE.CODE_POINT) { 485 addCodeRange(env, arg.vs, arg.vs); 486 } 487 break; 488 489 case RANGE: 490 if (arg.inType == arg.type) { 491 if (arg.inType == CCVALTYPE.SB) { 492 if (arg.vs > 0xff || arg.v > 0xff) { 493 throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE); 494 } 495 496 if (arg.vs > arg.v) { 497 if (env.syntax.allowEmptyRangeInCC()) { 498 // goto ccs_range_end 499 arg.state = CCSTATE.COMPLETE; 500 break; 501 } 502 throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); 503 } 504 bs.setRange(arg.vs, arg.v); 505 } else { 506 addCodeRange(env, arg.vs, arg.v); 507 } 508 } else { 509 if (arg.vs > arg.v) { 510 if (env.syntax.allowEmptyRangeInCC()) { 511 // goto ccs_range_end 512 arg.state = CCSTATE.COMPLETE; 513 break; 514 } 515 throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); 516 } 517 bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff); 518 addCodeRange(env, arg.vs, arg.v); 519 } 520 // ccs_range_end: 521 arg.state = CCSTATE.COMPLETE; 522 break; 523 524 case COMPLETE: 525 case START: 526 arg.state = CCSTATE.VALUE; 527 break; 528 529 default: 530 break; 531 532 } // switch 533 534 arg.vsIsRaw = arg.vIsRaw; 535 arg.vs = arg.v; 536 arg.type = arg.inType; 537 } 538 539 // onig_is_code_in_cc_len 540 public boolean isCodeInCCLength(final int code) { 541 boolean found; 542 543 if (code > 0xff) { 544 found = mbuf != null && mbuf.isInCodeRange(code); 545 } else { 546 found = bs.at(code); 547 } 548 549 if (isNot()) { 550 return !found; 551 } 552 return found; 553 } 554 555 // onig_is_code_in_cc 556 public boolean isCodeInCC(final int code) { 557 return isCodeInCCLength(code); 558 } 559 560 public void setNot() { 561 flags |= FLAG_NCCLASS_NOT; 562 } 563 564 public void clearNot() { 565 flags &= ~FLAG_NCCLASS_NOT; 566 } 567 568 public boolean isNot() { 569 return (flags & FLAG_NCCLASS_NOT) != 0; 570 } 571 572 public void setShare() { 573 flags |= FLAG_NCCLASS_SHARE; 574 } 575 576 public void clearShare() { 577 flags &= ~FLAG_NCCLASS_SHARE; 578 } 579 580 public boolean isShare() { 581 return (flags & FLAG_NCCLASS_SHARE) != 0; 582 } 583 584} 585