ByteCodePrinter.java revision 1088:7e62d98d4625
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 23import jdk.nashorn.internal.runtime.regexp.joni.constants.Arguments; 24import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 25import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize; 26import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 27 28class ByteCodePrinter { 29 final int[] code; 30 final int codeLength; 31 final char[][] templates; 32 33 Object[] operands; 34 35 private final static String OpCodeNames[] = new String[] { 36 "finish", /*OP_FINISH*/ 37 "end", /*OP_END*/ 38 "exact1", /*OP_EXACT1*/ 39 "exact2", /*OP_EXACT2*/ 40 "exact3", /*OP_EXACT3*/ 41 "exact4", /*OP_EXACT4*/ 42 "exact5", /*OP_EXACT5*/ 43 "exactn", /*OP_EXACTN*/ 44 "exactmb2-n1", /*OP_EXACTMB2N1*/ 45 "exactmb2-n2", /*OP_EXACTMB2N2*/ 46 "exactmb2-n3", /*OP_EXACTMB2N3*/ 47 "exactmb2-n", /*OP_EXACTMB2N*/ 48 "exactmb3n", /*OP_EXACTMB3N*/ 49 "exactmbn", /*OP_EXACTMBN*/ 50 "exact1-ic", /*OP_EXACT1_IC*/ 51 "exactn-ic", /*OP_EXACTN_IC*/ 52 "cclass", /*OP_CCLASS*/ 53 "cclass-mb", /*OP_CCLASS_MB*/ 54 "cclass-mix", /*OP_CCLASS_MIX*/ 55 "cclass-not", /*OP_CCLASS_NOT*/ 56 "cclass-mb-not", /*OP_CCLASS_MB_NOT*/ 57 "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/ 58 "cclass-node", /*OP_CCLASS_NODE*/ 59 "anychar", /*OP_ANYCHAR*/ 60 "anychar-ml", /*OP_ANYCHAR_ML*/ 61 "anychar*", /*OP_ANYCHAR_STAR*/ 62 "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/ 63 "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/ 64 "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ 65 "word", /*OP_WORD*/ 66 "not-word", /*OP_NOT_WORD*/ 67 "word-bound", /*OP_WORD_BOUND*/ 68 "not-word-bound", /*OP_NOT_WORD_BOUND*/ 69 "word-begin", /*OP_WORD_BEGIN*/ 70 "word-end", /*OP_WORD_END*/ 71 "begin-buf", /*OP_BEGIN_BUF*/ 72 "end-buf", /*OP_END_BUF*/ 73 "begin-line", /*OP_BEGIN_LINE*/ 74 "end-line", /*OP_END_LINE*/ 75 "semi-end-buf", /*OP_SEMI_END_BUF*/ 76 "begin-position", /*OP_BEGIN_POSITION*/ 77 "backref1", /*OP_BACKREF1*/ 78 "backref2", /*OP_BACKREF2*/ 79 "backrefn", /*OP_BACKREFN*/ 80 "backrefn-ic", /*OP_BACKREFN_IC*/ 81 "backref_multi", /*OP_BACKREF_MULTI*/ 82 "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/ 83 "backref_at_level", /*OP_BACKREF_AT_LEVEL*/ 84 "mem-start", /*OP_MEMORY_START*/ 85 "mem-start-push", /*OP_MEMORY_START_PUSH*/ 86 "mem-end-push", /*OP_MEMORY_END_PUSH*/ 87 "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/ 88 "mem-end", /*OP_MEMORY_END*/ 89 "mem-end-rec", /*OP_MEMORY_END_REC*/ 90 "fail", /*OP_FAIL*/ 91 "jump", /*OP_JUMP*/ 92 "push", /*OP_PUSH*/ 93 "pop", /*OP_POP*/ 94 "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/ 95 "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/ 96 "repeat", /*OP_REPEAT*/ 97 "repeat-ng", /*OP_REPEAT_NG*/ 98 "repeat-inc", /*OP_REPEAT_INC*/ 99 "repeat-inc-ng", /*OP_REPEAT_INC_NG*/ 100 "repeat-inc-sg", /*OP_REPEAT_INC_SG*/ 101 "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/ 102 "null-check-start", /*OP_NULL_CHECK_START*/ 103 "null-check-end", /*OP_NULL_CHECK_END*/ 104 "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/ 105 "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/ 106 "push-pos", /*OP_PUSH_POS*/ 107 "pop-pos", /*OP_POP_POS*/ 108 "push-pos-not", /*OP_PUSH_POS_NOT*/ 109 "fail-pos", /*OP_FAIL_POS*/ 110 "push-stop-bt", /*OP_PUSH_STOP_BT*/ 111 "pop-stop-bt", /*OP_POP_STOP_BT*/ 112 "look-behind", /*OP_LOOK_BEHIND*/ 113 "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/ 114 "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/ 115 "call", /*OP_CALL*/ 116 "return", /*OP_RETURN*/ 117 "state-check-push", /*OP_STATE_CHECK_PUSH*/ 118 "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/ 119 "state-check", /*OP_STATE_CHECK*/ 120 "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/ 121 "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ 122 "set-option-push", /*OP_SET_OPTION_PUSH*/ 123 "set-option", /*OP_SET_OPTION*/ 124 }; 125 126 private final static int OpCodeArgTypes[] = new int[] { 127 Arguments.NON, /*OP_FINISH*/ 128 Arguments.NON, /*OP_END*/ 129 Arguments.SPECIAL, /*OP_EXACT1*/ 130 Arguments.SPECIAL, /*OP_EXACT2*/ 131 Arguments.SPECIAL, /*OP_EXACT3*/ 132 Arguments.SPECIAL, /*OP_EXACT4*/ 133 Arguments.SPECIAL, /*OP_EXACT5*/ 134 Arguments.SPECIAL, /*OP_EXACTN*/ 135 Arguments.SPECIAL, /*OP_EXACTMB2N1*/ 136 Arguments.SPECIAL, /*OP_EXACTMB2N2*/ 137 Arguments.SPECIAL, /*OP_EXACTMB2N3*/ 138 Arguments.SPECIAL, /*OP_EXACTMB2N*/ 139 Arguments.SPECIAL, /*OP_EXACTMB3N*/ 140 Arguments.SPECIAL, /*OP_EXACTMBN*/ 141 Arguments.SPECIAL, /*OP_EXACT1_IC*/ 142 Arguments.SPECIAL, /*OP_EXACTN_IC*/ 143 Arguments.SPECIAL, /*OP_CCLASS*/ 144 Arguments.SPECIAL, /*OP_CCLASS_MB*/ 145 Arguments.SPECIAL, /*OP_CCLASS_MIX*/ 146 Arguments.SPECIAL, /*OP_CCLASS_NOT*/ 147 Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/ 148 Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/ 149 Arguments.SPECIAL, /*OP_CCLASS_NODE*/ 150 Arguments.NON, /*OP_ANYCHAR*/ 151 Arguments.NON, /*OP_ANYCHAR_ML*/ 152 Arguments.NON, /*OP_ANYCHAR_STAR*/ 153 Arguments.NON, /*OP_ANYCHAR_ML_STAR*/ 154 Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/ 155 Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ 156 Arguments.NON, /*OP_WORD*/ 157 Arguments.NON, /*OP_NOT_WORD*/ 158 Arguments.NON, /*OP_WORD_BOUND*/ 159 Arguments.NON, /*OP_NOT_WORD_BOUND*/ 160 Arguments.NON, /*OP_WORD_BEGIN*/ 161 Arguments.NON, /*OP_WORD_END*/ 162 Arguments.NON, /*OP_BEGIN_BUF*/ 163 Arguments.NON, /*OP_END_BUF*/ 164 Arguments.NON, /*OP_BEGIN_LINE*/ 165 Arguments.NON, /*OP_END_LINE*/ 166 Arguments.NON, /*OP_SEMI_END_BUF*/ 167 Arguments.NON, /*OP_BEGIN_POSITION*/ 168 Arguments.NON, /*OP_BACKREF1*/ 169 Arguments.NON, /*OP_BACKREF2*/ 170 Arguments.MEMNUM, /*OP_BACKREFN*/ 171 Arguments.SPECIAL, /*OP_BACKREFN_IC*/ 172 Arguments.SPECIAL, /*OP_BACKREF_MULTI*/ 173 Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/ 174 Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/ 175 Arguments.MEMNUM, /*OP_MEMORY_START*/ 176 Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/ 177 Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/ 178 Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/ 179 Arguments.MEMNUM, /*OP_MEMORY_END*/ 180 Arguments.MEMNUM, /*OP_MEMORY_END_REC*/ 181 Arguments.NON, /*OP_FAIL*/ 182 Arguments.RELADDR, /*OP_JUMP*/ 183 Arguments.RELADDR, /*OP_PUSH*/ 184 Arguments.NON, /*OP_POP*/ 185 Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/ 186 Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/ 187 Arguments.SPECIAL, /*OP_REPEAT*/ 188 Arguments.SPECIAL, /*OP_REPEAT_NG*/ 189 Arguments.MEMNUM, /*OP_REPEAT_INC*/ 190 Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/ 191 Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/ 192 Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/ 193 Arguments.MEMNUM, /*OP_NULL_CHECK_START*/ 194 Arguments.MEMNUM, /*OP_NULL_CHECK_END*/ 195 Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/ 196 Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/ 197 Arguments.NON, /*OP_PUSH_POS*/ 198 Arguments.NON, /*OP_POP_POS*/ 199 Arguments.RELADDR, /*OP_PUSH_POS_NOT*/ 200 Arguments.NON, /*OP_FAIL_POS*/ 201 Arguments.NON, /*OP_PUSH_STOP_BT*/ 202 Arguments.NON, /*OP_POP_STOP_BT*/ 203 Arguments.SPECIAL, /*OP_LOOK_BEHIND*/ 204 Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/ 205 Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/ 206 Arguments.ABSADDR, /*OP_CALL*/ 207 Arguments.NON, /*OP_RETURN*/ 208 Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/ 209 Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/ 210 Arguments.STATE_CHECK, /*OP_STATE_CHECK*/ 211 Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/ 212 Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ 213 Arguments.OPTION, /*OP_SET_OPTION_PUSH*/ 214 Arguments.OPTION, /*OP_SET_OPTION*/ 215 }; 216 217 public ByteCodePrinter(final Regex regex) { 218 code = regex.code; 219 codeLength = regex.codeLength; 220 operands = regex.operands; 221 222 templates = regex.templates; 223 } 224 225 public String byteCodeListToString() { 226 return compiledByteCodeListToString(); 227 } 228 229 private void pString(final StringBuilder sb, final int len, final int s) { 230 sb.append(":"); 231 sb.append(new String(code, s, len)); 232 } 233 234 private void pLenString(final StringBuilder sb, final int len, final int s) { 235 sb.append(":").append(len).append(":"); 236 sb.append(new String(code, s, len)); 237 } 238 239 private static void pLenStringFromTemplate(final StringBuilder sb, final int len, final char[] tm, final int idx) { 240 sb.append(":T:").append(len).append(":"); 241 sb.append(tm, idx, len); 242 } 243 244 public int compiledByteCodeToString(final StringBuilder sb, final int bptr) { 245 int len, n, mem, addr, scn, cod; 246 BitSet bs; 247 CClassNode cc; 248 int tm, idx; 249 int bp = bptr; 250 251 sb.append("[").append(OpCodeNames[code[bp]]); 252 final int argType = OpCodeArgTypes[code[bp]]; 253 final int ip = bp; 254 if (argType != Arguments.SPECIAL) { 255 bp++; 256 switch (argType) { 257 default: 258 case Arguments.NON: 259 break; 260 261 case Arguments.RELADDR: 262 sb.append(":(").append(code[bp]).append(")"); 263 bp += OPSize.RELADDR; 264 break; 265 266 case Arguments.ABSADDR: 267 sb.append(":(").append(code[bp]).append(")"); 268 bp += OPSize.ABSADDR; 269 break; 270 271 case Arguments.LENGTH: 272 sb.append(":").append(code[bp]); 273 bp += OPSize.LENGTH; 274 break; 275 276 case Arguments.MEMNUM: 277 sb.append(":").append(code[bp]); 278 bp += OPSize.MEMNUM; 279 break; 280 281 case Arguments.OPTION: 282 sb.append(":").append(code[bp]); 283 bp += OPSize.OPTION; 284 break; 285 286 case Arguments.STATE_CHECK: 287 sb.append(":").append(code[bp]); 288 bp += OPSize.STATE_CHECK; 289 break; 290 } 291 } else { 292 switch (code[bp++]) { 293 case OPCode.EXACT1: 294 case OPCode.ANYCHAR_STAR_PEEK_NEXT: 295 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: 296 pString(sb, 1, bp++); 297 break; 298 299 case OPCode.EXACT2: 300 pString(sb, 2, bp); 301 bp += 2; 302 break; 303 304 case OPCode.EXACT3: 305 pString(sb, 3, bp); 306 bp += 3; 307 break; 308 309 case OPCode.EXACT4: 310 pString(sb, 4, bp); 311 bp += 4; 312 break; 313 314 case OPCode.EXACT5: 315 pString(sb, 5, bp); 316 bp += 5; 317 break; 318 319 case OPCode.EXACTN: 320 len = code[bp]; 321 bp += OPSize.LENGTH; 322 if (Config.USE_STRING_TEMPLATES) { 323 tm = code[bp]; 324 bp += OPSize.INDEX; 325 idx = code[bp]; 326 bp += OPSize.INDEX; 327 pLenStringFromTemplate(sb, len, templates[tm], idx); 328 } else { 329 pLenString(sb, len, bp); 330 bp += len; 331 } 332 break; 333 334 case OPCode.EXACT1_IC: 335 pString(sb, 1, bp); 336 bp++; 337 break; 338 339 case OPCode.EXACTN_IC: 340 len = code[bp]; 341 bp += OPSize.LENGTH; 342 if (Config.USE_STRING_TEMPLATES) { 343 tm = code[bp]; 344 bp += OPSize.INDEX; 345 idx = code[bp]; 346 bp += OPSize.INDEX; 347 pLenStringFromTemplate(sb, len, templates[tm], idx); 348 } else { 349 pLenString(sb, len, bp); 350 bp += len; 351 } 352 break; 353 354 case OPCode.CCLASS: 355 bs = new BitSet(); 356 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); 357 n = bs.numOn(); 358 bp += BitSet.BITSET_SIZE; 359 sb.append(":").append(n); 360 break; 361 362 case OPCode.CCLASS_NOT: 363 bs = new BitSet(); 364 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); 365 n = bs.numOn(); 366 bp += BitSet.BITSET_SIZE; 367 sb.append(":").append(n); 368 break; 369 370 case OPCode.CCLASS_MB: 371 case OPCode.CCLASS_MB_NOT: 372 len = code[bp]; 373 bp += OPSize.LENGTH; 374 cod = code[bp]; 375 //bp += OPSize.CODE_POINT; 376 bp += len; 377 sb.append(":").append(cod).append(":").append(len); 378 break; 379 380 case OPCode.CCLASS_MIX: 381 case OPCode.CCLASS_MIX_NOT: 382 bs = new BitSet(); 383 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); 384 n = bs.numOn(); 385 bp += BitSet.BITSET_SIZE; 386 len = code[bp]; 387 bp += OPSize.LENGTH; 388 cod = code[bp]; 389 //bp += OPSize.CODE_POINT; 390 bp += len; 391 sb.append(":").append(n).append(":").append(cod).append(":").append(len); 392 break; 393 394 case OPCode.CCLASS_NODE: 395 cc = (CClassNode)operands[code[bp]]; 396 bp += OPSize.POINTER; 397 n = cc.bs.numOn(); 398 sb.append(":").append(cc).append(":").append(n); 399 break; 400 401 case OPCode.BACKREFN_IC: 402 mem = code[bp]; 403 bp += OPSize.MEMNUM; 404 sb.append(":").append(mem); 405 break; 406 407 case OPCode.BACKREF_MULTI_IC: 408 case OPCode.BACKREF_MULTI: 409 sb.append(" "); 410 len = code[bp]; 411 bp += OPSize.LENGTH; 412 for (int i=0; i<len; i++) { 413 mem = code[bp]; 414 bp += OPSize.MEMNUM; 415 if (i > 0) { 416 sb.append(", "); 417 } 418 sb.append(mem); 419 } 420 break; 421 422 case OPCode.BACKREF_WITH_LEVEL: { 423 final int option = code[bp]; 424 bp += OPSize.OPTION; 425 sb.append(":").append(option); 426 final int level = code[bp]; 427 bp += OPSize.LENGTH; 428 sb.append(":").append(level); 429 sb.append(" "); 430 len = code[bp]; 431 bp += OPSize.LENGTH; 432 for (int i=0; i<len; i++) { 433 mem = code[bp]; 434 bp += OPSize.MEMNUM; 435 if (i > 0) { 436 sb.append(", "); 437 } 438 sb.append(mem); 439 } 440 break; 441 } 442 443 case OPCode.REPEAT: 444 case OPCode.REPEAT_NG: 445 mem = code[bp]; 446 bp += OPSize.MEMNUM; 447 addr = code[bp]; 448 bp += OPSize.RELADDR; 449 sb.append(":").append(mem).append(":").append(addr); 450 break; 451 452 case OPCode.PUSH_OR_JUMP_EXACT1: 453 case OPCode.PUSH_IF_PEEK_NEXT: 454 addr = code[bp]; 455 bp += OPSize.RELADDR; 456 sb.append(":(").append(addr).append(")"); 457 pString(sb, 1, bp); 458 bp++; 459 break; 460 461 case OPCode.LOOK_BEHIND: 462 len = code[bp]; 463 bp += OPSize.LENGTH; 464 sb.append(":").append(len); 465 break; 466 467 case OPCode.PUSH_LOOK_BEHIND_NOT: 468 addr = code[bp]; 469 bp += OPSize.RELADDR; 470 len = code[bp]; 471 bp += OPSize.LENGTH; 472 sb.append(":").append(len).append(":(").append(addr).append(")"); 473 break; 474 475 case OPCode.STATE_CHECK_PUSH: 476 case OPCode.STATE_CHECK_PUSH_OR_JUMP: 477 scn = code[bp]; 478 bp += OPSize.STATE_CHECK_NUM; 479 addr = code[bp]; 480 bp += OPSize.RELADDR; 481 sb.append(":").append(scn).append(":(").append(addr).append(")"); 482 break; 483 484 default: 485 throw new InternalException("undefined code: " + code[--bp]); 486 } 487 } 488 489 sb.append("]"); 490 491 // @opcode_address(opcode_size) 492 if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) { 493 sb.append("@").append(ip).append("(").append((bp - ip)).append(")"); 494 } 495 496 return bp; 497 } 498 499 private String compiledByteCodeListToString() { 500 final StringBuilder sb = new StringBuilder(); 501 sb.append("code length: ").append(codeLength).append("\n"); 502 503 int ncode = 0; 504 int bp = 0; 505 final int end = codeLength; 506 507 while (bp < end) { 508 ncode++; 509 510 if (bp > 0) { 511 sb.append(ncode % 5 == 0 ? "\n" : " "); 512 } 513 514 bp = compiledByteCodeToString(sb, bp); 515 } 516 sb.append("\n"); 517 return sb.toString(); 518 } 519} 520