ByteCodePrinter.java revision 953:221a84ef44c0
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; 23import jdk.nashorn.internal.runtime.regexp.joni.constants.Arguments; 24import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode; 25import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize; 26import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException; 27 28class ByteCodePrinter { 29 final int[] code; 30 final int codeLength; 31 final char[][] templates; 32 33 Object[] operands; 34 35 private final static String OpCodeNames[] = new String[] { 36 "finish", /*OP_FINISH*/ 37 "end", /*OP_END*/ 38 "exact1", /*OP_EXACT1*/ 39 "exact2", /*OP_EXACT2*/ 40 "exact3", /*OP_EXACT3*/ 41 "exact4", /*OP_EXACT4*/ 42 "exact5", /*OP_EXACT5*/ 43 "exactn", /*OP_EXACTN*/ 44 "exactmb2-n1", /*OP_EXACTMB2N1*/ 45 "exactmb2-n2", /*OP_EXACTMB2N2*/ 46 "exactmb2-n3", /*OP_EXACTMB2N3*/ 47 "exactmb2-n", /*OP_EXACTMB2N*/ 48 "exactmb3n", /*OP_EXACTMB3N*/ 49 "exactmbn", /*OP_EXACTMBN*/ 50 "exact1-ic", /*OP_EXACT1_IC*/ 51 "exactn-ic", /*OP_EXACTN_IC*/ 52 "cclass", /*OP_CCLASS*/ 53 "cclass-mb", /*OP_CCLASS_MB*/ 54 "cclass-mix", /*OP_CCLASS_MIX*/ 55 "cclass-not", /*OP_CCLASS_NOT*/ 56 "cclass-mb-not", /*OP_CCLASS_MB_NOT*/ 57 "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/ 58 "cclass-node", /*OP_CCLASS_NODE*/ 59 "anychar", /*OP_ANYCHAR*/ 60 "anychar-ml", /*OP_ANYCHAR_ML*/ 61 "anychar*", /*OP_ANYCHAR_STAR*/ 62 "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/ 63 "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/ 64 "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ 65 "word", /*OP_WORD*/ 66 "not-word", /*OP_NOT_WORD*/ 67 "word-bound", /*OP_WORD_BOUND*/ 68 "not-word-bound", /*OP_NOT_WORD_BOUND*/ 69 "word-begin", /*OP_WORD_BEGIN*/ 70 "word-end", /*OP_WORD_END*/ 71 "begin-buf", /*OP_BEGIN_BUF*/ 72 "end-buf", /*OP_END_BUF*/ 73 "begin-line", /*OP_BEGIN_LINE*/ 74 "end-line", /*OP_END_LINE*/ 75 "semi-end-buf", /*OP_SEMI_END_BUF*/ 76 "begin-position", /*OP_BEGIN_POSITION*/ 77 "backref1", /*OP_BACKREF1*/ 78 "backref2", /*OP_BACKREF2*/ 79 "backrefn", /*OP_BACKREFN*/ 80 "backrefn-ic", /*OP_BACKREFN_IC*/ 81 "backref_multi", /*OP_BACKREF_MULTI*/ 82 "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/ 83 "backref_at_level", /*OP_BACKREF_AT_LEVEL*/ 84 "mem-start", /*OP_MEMORY_START*/ 85 "mem-start-push", /*OP_MEMORY_START_PUSH*/ 86 "mem-end-push", /*OP_MEMORY_END_PUSH*/ 87 "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/ 88 "mem-end", /*OP_MEMORY_END*/ 89 "mem-end-rec", /*OP_MEMORY_END_REC*/ 90 "fail", /*OP_FAIL*/ 91 "jump", /*OP_JUMP*/ 92 "push", /*OP_PUSH*/ 93 "pop", /*OP_POP*/ 94 "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/ 95 "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/ 96 "repeat", /*OP_REPEAT*/ 97 "repeat-ng", /*OP_REPEAT_NG*/ 98 "repeat-inc", /*OP_REPEAT_INC*/ 99 "repeat-inc-ng", /*OP_REPEAT_INC_NG*/ 100 "repeat-inc-sg", /*OP_REPEAT_INC_SG*/ 101 "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/ 102 "null-check-start", /*OP_NULL_CHECK_START*/ 103 "null-check-end", /*OP_NULL_CHECK_END*/ 104 "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/ 105 "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/ 106 "push-pos", /*OP_PUSH_POS*/ 107 "pop-pos", /*OP_POP_POS*/ 108 "push-pos-not", /*OP_PUSH_POS_NOT*/ 109 "fail-pos", /*OP_FAIL_POS*/ 110 "push-stop-bt", /*OP_PUSH_STOP_BT*/ 111 "pop-stop-bt", /*OP_POP_STOP_BT*/ 112 "look-behind", /*OP_LOOK_BEHIND*/ 113 "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/ 114 "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/ 115 "call", /*OP_CALL*/ 116 "return", /*OP_RETURN*/ 117 "state-check-push", /*OP_STATE_CHECK_PUSH*/ 118 "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/ 119 "state-check", /*OP_STATE_CHECK*/ 120 "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/ 121 "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ 122 "set-option-push", /*OP_SET_OPTION_PUSH*/ 123 "set-option", /*OP_SET_OPTION*/ 124 }; 125 126 private final static int OpCodeArgTypes[] = new int[] { 127 Arguments.NON, /*OP_FINISH*/ 128 Arguments.NON, /*OP_END*/ 129 Arguments.SPECIAL, /*OP_EXACT1*/ 130 Arguments.SPECIAL, /*OP_EXACT2*/ 131 Arguments.SPECIAL, /*OP_EXACT3*/ 132 Arguments.SPECIAL, /*OP_EXACT4*/ 133 Arguments.SPECIAL, /*OP_EXACT5*/ 134 Arguments.SPECIAL, /*OP_EXACTN*/ 135 Arguments.SPECIAL, /*OP_EXACTMB2N1*/ 136 Arguments.SPECIAL, /*OP_EXACTMB2N2*/ 137 Arguments.SPECIAL, /*OP_EXACTMB2N3*/ 138 Arguments.SPECIAL, /*OP_EXACTMB2N*/ 139 Arguments.SPECIAL, /*OP_EXACTMB3N*/ 140 Arguments.SPECIAL, /*OP_EXACTMBN*/ 141 Arguments.SPECIAL, /*OP_EXACT1_IC*/ 142 Arguments.SPECIAL, /*OP_EXACTN_IC*/ 143 Arguments.SPECIAL, /*OP_CCLASS*/ 144 Arguments.SPECIAL, /*OP_CCLASS_MB*/ 145 Arguments.SPECIAL, /*OP_CCLASS_MIX*/ 146 Arguments.SPECIAL, /*OP_CCLASS_NOT*/ 147 Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/ 148 Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/ 149 Arguments.SPECIAL, /*OP_CCLASS_NODE*/ 150 Arguments.NON, /*OP_ANYCHAR*/ 151 Arguments.NON, /*OP_ANYCHAR_ML*/ 152 Arguments.NON, /*OP_ANYCHAR_STAR*/ 153 Arguments.NON, /*OP_ANYCHAR_ML_STAR*/ 154 Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/ 155 Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/ 156 Arguments.NON, /*OP_WORD*/ 157 Arguments.NON, /*OP_NOT_WORD*/ 158 Arguments.NON, /*OP_WORD_BOUND*/ 159 Arguments.NON, /*OP_NOT_WORD_BOUND*/ 160 Arguments.NON, /*OP_WORD_BEGIN*/ 161 Arguments.NON, /*OP_WORD_END*/ 162 Arguments.NON, /*OP_BEGIN_BUF*/ 163 Arguments.NON, /*OP_END_BUF*/ 164 Arguments.NON, /*OP_BEGIN_LINE*/ 165 Arguments.NON, /*OP_END_LINE*/ 166 Arguments.NON, /*OP_SEMI_END_BUF*/ 167 Arguments.NON, /*OP_BEGIN_POSITION*/ 168 Arguments.NON, /*OP_BACKREF1*/ 169 Arguments.NON, /*OP_BACKREF2*/ 170 Arguments.MEMNUM, /*OP_BACKREFN*/ 171 Arguments.SPECIAL, /*OP_BACKREFN_IC*/ 172 Arguments.SPECIAL, /*OP_BACKREF_MULTI*/ 173 Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/ 174 Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/ 175 Arguments.MEMNUM, /*OP_MEMORY_START*/ 176 Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/ 177 Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/ 178 Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/ 179 Arguments.MEMNUM, /*OP_MEMORY_END*/ 180 Arguments.MEMNUM, /*OP_MEMORY_END_REC*/ 181 Arguments.NON, /*OP_FAIL*/ 182 Arguments.RELADDR, /*OP_JUMP*/ 183 Arguments.RELADDR, /*OP_PUSH*/ 184 Arguments.NON, /*OP_POP*/ 185 Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/ 186 Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/ 187 Arguments.SPECIAL, /*OP_REPEAT*/ 188 Arguments.SPECIAL, /*OP_REPEAT_NG*/ 189 Arguments.MEMNUM, /*OP_REPEAT_INC*/ 190 Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/ 191 Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/ 192 Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/ 193 Arguments.MEMNUM, /*OP_NULL_CHECK_START*/ 194 Arguments.MEMNUM, /*OP_NULL_CHECK_END*/ 195 Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/ 196 Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/ 197 Arguments.NON, /*OP_PUSH_POS*/ 198 Arguments.NON, /*OP_POP_POS*/ 199 Arguments.RELADDR, /*OP_PUSH_POS_NOT*/ 200 Arguments.NON, /*OP_FAIL_POS*/ 201 Arguments.NON, /*OP_PUSH_STOP_BT*/ 202 Arguments.NON, /*OP_POP_STOP_BT*/ 203 Arguments.SPECIAL, /*OP_LOOK_BEHIND*/ 204 Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/ 205 Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/ 206 Arguments.ABSADDR, /*OP_CALL*/ 207 Arguments.NON, /*OP_RETURN*/ 208 Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/ 209 Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/ 210 Arguments.STATE_CHECK, /*OP_STATE_CHECK*/ 211 Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/ 212 Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/ 213 Arguments.OPTION, /*OP_SET_OPTION_PUSH*/ 214 Arguments.OPTION, /*OP_SET_OPTION*/ 215 }; 216 217 public ByteCodePrinter(final Regex regex) { 218 code = regex.code; 219 codeLength = regex.codeLength; 220 operands = regex.operands; 221 222 templates = regex.templates; 223 } 224 225 public String byteCodeListToString() { 226 return compiledByteCodeListToString(); 227 } 228 229 private void pString(final StringBuilder sb, final int len, final int s) { 230 sb.append(":"); 231 sb.append(new String(code, s, len)); 232 } 233 234 private void pLenString(final StringBuilder sb, final int len, final int s) { 235 sb.append(":").append(len).append(":"); 236 sb.append(new String(code, s, len)); 237 } 238 239 private void pLenStringFromTemplate(final StringBuilder sb, final int len, final char[] tm, final int idx) { 240 sb.append(":T:").append(len).append(":"); 241 sb.append(tm, idx, len); 242 } 243 244 public int compiledByteCodeToString(final StringBuilder sb, int bp) { 245 int len, n, mem, addr, scn, cod; 246 BitSet bs; 247 CClassNode cc; 248 int tm, idx; 249 250 sb.append("[").append(OpCodeNames[code[bp]]); 251 final int argType = OpCodeArgTypes[code[bp]]; 252 final int ip = bp; 253 if (argType != Arguments.SPECIAL) { 254 bp++; 255 switch (argType) { 256 case Arguments.NON: 257 break; 258 259 case Arguments.RELADDR: 260 sb.append(":(").append(code[bp]).append(")"); 261 bp += OPSize.RELADDR; 262 break; 263 264 case Arguments.ABSADDR: 265 sb.append(":(").append(code[bp]).append(")"); 266 bp += OPSize.ABSADDR; 267 break; 268 269 case Arguments.LENGTH: 270 sb.append(":").append(code[bp]); 271 bp += OPSize.LENGTH; 272 break; 273 274 case Arguments.MEMNUM: 275 sb.append(":").append(code[bp]); 276 bp += OPSize.MEMNUM; 277 break; 278 279 case Arguments.OPTION: 280 sb.append(":").append(code[bp]); 281 bp += OPSize.OPTION; 282 break; 283 284 case Arguments.STATE_CHECK: 285 sb.append(":").append(code[bp]); 286 bp += OPSize.STATE_CHECK; 287 break; 288 } 289 } else { 290 switch (code[bp++]) { 291 case OPCode.EXACT1: 292 case OPCode.ANYCHAR_STAR_PEEK_NEXT: 293 case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: 294 pString(sb, 1, bp++); 295 break; 296 297 case OPCode.EXACT2: 298 pString(sb, 2, bp); 299 bp += 2; 300 break; 301 302 case OPCode.EXACT3: 303 pString(sb, 3, bp); 304 bp += 3; 305 break; 306 307 case OPCode.EXACT4: 308 pString(sb, 4, bp); 309 bp += 4; 310 break; 311 312 case OPCode.EXACT5: 313 pString(sb, 5, bp); 314 bp += 5; 315 break; 316 317 case OPCode.EXACTN: 318 len = code[bp]; 319 bp += OPSize.LENGTH; 320 if (Config.USE_STRING_TEMPLATES) { 321 tm = code[bp]; 322 bp += OPSize.INDEX; 323 idx = code[bp]; 324 bp += OPSize.INDEX; 325 pLenStringFromTemplate(sb, len, templates[tm], idx); 326 } else { 327 pLenString(sb, len, bp); 328 bp += len; 329 } 330 break; 331 332 case OPCode.EXACT1_IC: 333 pString(sb, 1, bp); 334 bp++; 335 break; 336 337 case OPCode.EXACTN_IC: 338 len = code[bp]; 339 bp += OPSize.LENGTH; 340 if (Config.USE_STRING_TEMPLATES) { 341 tm = code[bp]; 342 bp += OPSize.INDEX; 343 idx = code[bp]; 344 bp += OPSize.INDEX; 345 pLenStringFromTemplate(sb, len, templates[tm], idx); 346 } else { 347 pLenString(sb, len, bp); 348 bp += len; 349 } 350 break; 351 352 case OPCode.CCLASS: 353 bs = new BitSet(); 354 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); 355 n = bs.numOn(); 356 bp += BitSet.BITSET_SIZE; 357 sb.append(":").append(n); 358 break; 359 360 case OPCode.CCLASS_NOT: 361 bs = new BitSet(); 362 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); 363 n = bs.numOn(); 364 bp += BitSet.BITSET_SIZE; 365 sb.append(":").append(n); 366 break; 367 368 case OPCode.CCLASS_MB: 369 case OPCode.CCLASS_MB_NOT: 370 len = code[bp]; 371 bp += OPSize.LENGTH; 372 cod = code[bp]; 373 //bp += OPSize.CODE_POINT; 374 bp += len; 375 sb.append(":").append(cod).append(":").append(len); 376 break; 377 378 case OPCode.CCLASS_MIX: 379 case OPCode.CCLASS_MIX_NOT: 380 bs = new BitSet(); 381 System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE); 382 n = bs.numOn(); 383 bp += BitSet.BITSET_SIZE; 384 len = code[bp]; 385 bp += OPSize.LENGTH; 386 cod = code[bp]; 387 //bp += OPSize.CODE_POINT; 388 bp += len; 389 sb.append(":").append(n).append(":").append(cod).append(":").append(len); 390 break; 391 392 case OPCode.CCLASS_NODE: 393 cc = (CClassNode)operands[code[bp]]; 394 bp += OPSize.POINTER; 395 n = cc.bs.numOn(); 396 sb.append(":").append(cc).append(":").append(n); 397 break; 398 399 case OPCode.BACKREFN_IC: 400 mem = code[bp]; 401 bp += OPSize.MEMNUM; 402 sb.append(":").append(mem); 403 break; 404 405 case OPCode.BACKREF_MULTI_IC: 406 case OPCode.BACKREF_MULTI: 407 sb.append(" "); 408 len = code[bp]; 409 bp += OPSize.LENGTH; 410 for (int i=0; i<len; i++) { 411 mem = code[bp]; 412 bp += OPSize.MEMNUM; 413 if (i > 0) sb.append(", "); 414 sb.append(mem); 415 } 416 break; 417 418 case OPCode.BACKREF_WITH_LEVEL: { 419 final int option = code[bp]; 420 bp += OPSize.OPTION; 421 sb.append(":").append(option); 422 final int level = code[bp]; 423 bp += OPSize.LENGTH; 424 sb.append(":").append(level); 425 sb.append(" "); 426 len = code[bp]; 427 bp += OPSize.LENGTH; 428 for (int i=0; i<len; i++) { 429 mem = code[bp]; 430 bp += OPSize.MEMNUM; 431 if (i > 0) sb.append(", "); 432 sb.append(mem); 433 } 434 break; 435 } 436 437 case OPCode.REPEAT: 438 case OPCode.REPEAT_NG: 439 mem = code[bp]; 440 bp += OPSize.MEMNUM; 441 addr = code[bp]; 442 bp += OPSize.RELADDR; 443 sb.append(":").append(mem).append(":").append(addr); 444 break; 445 446 case OPCode.PUSH_OR_JUMP_EXACT1: 447 case OPCode.PUSH_IF_PEEK_NEXT: 448 addr = code[bp]; 449 bp += OPSize.RELADDR; 450 sb.append(":(").append(addr).append(")"); 451 pString(sb, 1, bp); 452 bp++; 453 break; 454 455 case OPCode.LOOK_BEHIND: 456 len = code[bp]; 457 bp += OPSize.LENGTH; 458 sb.append(":").append(len); 459 break; 460 461 case OPCode.PUSH_LOOK_BEHIND_NOT: 462 addr = code[bp]; 463 bp += OPSize.RELADDR; 464 len = code[bp]; 465 bp += OPSize.LENGTH; 466 sb.append(":").append(len).append(":(").append(addr).append(")"); 467 break; 468 469 case OPCode.STATE_CHECK_PUSH: 470 case OPCode.STATE_CHECK_PUSH_OR_JUMP: 471 scn = code[bp]; 472 bp += OPSize.STATE_CHECK_NUM; 473 addr = code[bp]; 474 bp += OPSize.RELADDR; 475 sb.append(":").append(scn).append(":(").append(addr).append(")"); 476 break; 477 478 default: 479 throw new InternalException("undefined code: " + code[--bp]); 480 } 481 } 482 483 sb.append("]"); 484 485 // @opcode_address(opcode_size) 486 if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) { 487 sb.append("@").append(ip).append("(").append((bp - ip)).append(")"); 488 } 489 490 return bp; 491 } 492 493 private String compiledByteCodeListToString() { 494 final StringBuilder sb = new StringBuilder(); 495 sb.append("code length: ").append(codeLength).append("\n"); 496 497 int ncode = 0; 498 int bp = 0; 499 final int end = codeLength; 500 501 while (bp < end) { 502 ncode++; 503 504 if (bp > 0) sb.append(ncode % 5 == 0 ? "\n" : " "); 505 506 bp = compiledByteCodeToString(sb, bp); 507 } 508 sb.append("\n"); 509 return sb.toString(); 510 } 511} 512