ByteCodePrinter.java revision 953:221a84ef44c0
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
23import jdk.nashorn.internal.runtime.regexp.joni.constants.Arguments;
24import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
25import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
26import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
27
28class ByteCodePrinter {
29    final int[] code;
30    final int codeLength;
31    final char[][] templates;
32
33    Object[] operands;
34
35    private final static String OpCodeNames[] = new String[] {
36            "finish", /*OP_FINISH*/
37            "end", /*OP_END*/
38            "exact1", /*OP_EXACT1*/
39            "exact2", /*OP_EXACT2*/
40            "exact3", /*OP_EXACT3*/
41            "exact4", /*OP_EXACT4*/
42            "exact5", /*OP_EXACT5*/
43            "exactn", /*OP_EXACTN*/
44            "exactmb2-n1", /*OP_EXACTMB2N1*/
45            "exactmb2-n2", /*OP_EXACTMB2N2*/
46            "exactmb2-n3", /*OP_EXACTMB2N3*/
47            "exactmb2-n", /*OP_EXACTMB2N*/
48            "exactmb3n", /*OP_EXACTMB3N*/
49            "exactmbn", /*OP_EXACTMBN*/
50            "exact1-ic", /*OP_EXACT1_IC*/
51            "exactn-ic", /*OP_EXACTN_IC*/
52            "cclass", /*OP_CCLASS*/
53            "cclass-mb", /*OP_CCLASS_MB*/
54            "cclass-mix", /*OP_CCLASS_MIX*/
55            "cclass-not", /*OP_CCLASS_NOT*/
56            "cclass-mb-not", /*OP_CCLASS_MB_NOT*/
57            "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/
58            "cclass-node", /*OP_CCLASS_NODE*/
59            "anychar", /*OP_ANYCHAR*/
60            "anychar-ml", /*OP_ANYCHAR_ML*/
61            "anychar*", /*OP_ANYCHAR_STAR*/
62            "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/
63            "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
64            "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
65            "word", /*OP_WORD*/
66            "not-word", /*OP_NOT_WORD*/
67            "word-bound", /*OP_WORD_BOUND*/
68            "not-word-bound", /*OP_NOT_WORD_BOUND*/
69            "word-begin", /*OP_WORD_BEGIN*/
70            "word-end", /*OP_WORD_END*/
71            "begin-buf", /*OP_BEGIN_BUF*/
72            "end-buf", /*OP_END_BUF*/
73            "begin-line", /*OP_BEGIN_LINE*/
74            "end-line", /*OP_END_LINE*/
75            "semi-end-buf", /*OP_SEMI_END_BUF*/
76            "begin-position", /*OP_BEGIN_POSITION*/
77            "backref1", /*OP_BACKREF1*/
78            "backref2", /*OP_BACKREF2*/
79            "backrefn", /*OP_BACKREFN*/
80            "backrefn-ic", /*OP_BACKREFN_IC*/
81            "backref_multi", /*OP_BACKREF_MULTI*/
82            "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/
83            "backref_at_level", /*OP_BACKREF_AT_LEVEL*/
84            "mem-start", /*OP_MEMORY_START*/
85            "mem-start-push", /*OP_MEMORY_START_PUSH*/
86            "mem-end-push", /*OP_MEMORY_END_PUSH*/
87            "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/
88            "mem-end", /*OP_MEMORY_END*/
89            "mem-end-rec", /*OP_MEMORY_END_REC*/
90            "fail", /*OP_FAIL*/
91            "jump", /*OP_JUMP*/
92            "push", /*OP_PUSH*/
93            "pop", /*OP_POP*/
94            "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/
95            "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/
96            "repeat", /*OP_REPEAT*/
97            "repeat-ng", /*OP_REPEAT_NG*/
98            "repeat-inc", /*OP_REPEAT_INC*/
99            "repeat-inc-ng", /*OP_REPEAT_INC_NG*/
100            "repeat-inc-sg", /*OP_REPEAT_INC_SG*/
101            "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/
102            "null-check-start", /*OP_NULL_CHECK_START*/
103            "null-check-end", /*OP_NULL_CHECK_END*/
104            "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/
105            "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/
106            "push-pos", /*OP_PUSH_POS*/
107            "pop-pos", /*OP_POP_POS*/
108            "push-pos-not", /*OP_PUSH_POS_NOT*/
109            "fail-pos", /*OP_FAIL_POS*/
110            "push-stop-bt", /*OP_PUSH_STOP_BT*/
111            "pop-stop-bt", /*OP_POP_STOP_BT*/
112            "look-behind", /*OP_LOOK_BEHIND*/
113            "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/
114            "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/
115            "call", /*OP_CALL*/
116            "return", /*OP_RETURN*/
117            "state-check-push", /*OP_STATE_CHECK_PUSH*/
118            "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/
119            "state-check", /*OP_STATE_CHECK*/
120            "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/
121            "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
122            "set-option-push", /*OP_SET_OPTION_PUSH*/
123            "set-option", /*OP_SET_OPTION*/
124    };
125
126    private final static int OpCodeArgTypes[] = new int[] {
127            Arguments.NON, /*OP_FINISH*/
128            Arguments.NON, /*OP_END*/
129            Arguments.SPECIAL, /*OP_EXACT1*/
130            Arguments.SPECIAL, /*OP_EXACT2*/
131            Arguments.SPECIAL, /*OP_EXACT3*/
132            Arguments.SPECIAL, /*OP_EXACT4*/
133            Arguments.SPECIAL, /*OP_EXACT5*/
134            Arguments.SPECIAL, /*OP_EXACTN*/
135            Arguments.SPECIAL, /*OP_EXACTMB2N1*/
136            Arguments.SPECIAL, /*OP_EXACTMB2N2*/
137            Arguments.SPECIAL, /*OP_EXACTMB2N3*/
138            Arguments.SPECIAL, /*OP_EXACTMB2N*/
139            Arguments.SPECIAL, /*OP_EXACTMB3N*/
140            Arguments.SPECIAL, /*OP_EXACTMBN*/
141            Arguments.SPECIAL, /*OP_EXACT1_IC*/
142            Arguments.SPECIAL, /*OP_EXACTN_IC*/
143            Arguments.SPECIAL, /*OP_CCLASS*/
144            Arguments.SPECIAL, /*OP_CCLASS_MB*/
145            Arguments.SPECIAL, /*OP_CCLASS_MIX*/
146            Arguments.SPECIAL, /*OP_CCLASS_NOT*/
147            Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/
148            Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/
149            Arguments.SPECIAL, /*OP_CCLASS_NODE*/
150            Arguments.NON, /*OP_ANYCHAR*/
151            Arguments.NON, /*OP_ANYCHAR_ML*/
152            Arguments.NON, /*OP_ANYCHAR_STAR*/
153            Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
154            Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
155            Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
156            Arguments.NON, /*OP_WORD*/
157            Arguments.NON, /*OP_NOT_WORD*/
158            Arguments.NON, /*OP_WORD_BOUND*/
159            Arguments.NON, /*OP_NOT_WORD_BOUND*/
160            Arguments.NON, /*OP_WORD_BEGIN*/
161            Arguments.NON, /*OP_WORD_END*/
162            Arguments.NON, /*OP_BEGIN_BUF*/
163            Arguments.NON, /*OP_END_BUF*/
164            Arguments.NON, /*OP_BEGIN_LINE*/
165            Arguments.NON, /*OP_END_LINE*/
166            Arguments.NON, /*OP_SEMI_END_BUF*/
167            Arguments.NON, /*OP_BEGIN_POSITION*/
168            Arguments.NON, /*OP_BACKREF1*/
169            Arguments.NON, /*OP_BACKREF2*/
170            Arguments.MEMNUM, /*OP_BACKREFN*/
171            Arguments.SPECIAL, /*OP_BACKREFN_IC*/
172            Arguments.SPECIAL, /*OP_BACKREF_MULTI*/
173            Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/
174            Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/
175            Arguments.MEMNUM, /*OP_MEMORY_START*/
176            Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/
177            Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/
178            Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/
179            Arguments.MEMNUM, /*OP_MEMORY_END*/
180            Arguments.MEMNUM, /*OP_MEMORY_END_REC*/
181            Arguments.NON, /*OP_FAIL*/
182            Arguments.RELADDR, /*OP_JUMP*/
183            Arguments.RELADDR, /*OP_PUSH*/
184            Arguments.NON, /*OP_POP*/
185            Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/
186            Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/
187            Arguments.SPECIAL, /*OP_REPEAT*/
188            Arguments.SPECIAL, /*OP_REPEAT_NG*/
189            Arguments.MEMNUM, /*OP_REPEAT_INC*/
190            Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/
191            Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/
192            Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/
193            Arguments.MEMNUM, /*OP_NULL_CHECK_START*/
194            Arguments.MEMNUM, /*OP_NULL_CHECK_END*/
195            Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/
196            Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/
197            Arguments.NON, /*OP_PUSH_POS*/
198            Arguments.NON, /*OP_POP_POS*/
199            Arguments.RELADDR, /*OP_PUSH_POS_NOT*/
200            Arguments.NON, /*OP_FAIL_POS*/
201            Arguments.NON, /*OP_PUSH_STOP_BT*/
202            Arguments.NON, /*OP_POP_STOP_BT*/
203            Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
204            Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/
205            Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/
206            Arguments.ABSADDR, /*OP_CALL*/
207            Arguments.NON, /*OP_RETURN*/
208            Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/
209            Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/
210            Arguments.STATE_CHECK, /*OP_STATE_CHECK*/
211            Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
212            Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
213            Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
214            Arguments.OPTION, /*OP_SET_OPTION*/
215    };
216
217    public ByteCodePrinter(final Regex regex) {
218        code = regex.code;
219        codeLength = regex.codeLength;
220        operands = regex.operands;
221
222        templates = regex.templates;
223    }
224
225    public String byteCodeListToString() {
226        return compiledByteCodeListToString();
227    }
228
229    private void pString(final StringBuilder sb, final int len, final int s) {
230        sb.append(":");
231        sb.append(new String(code, s, len));
232    }
233
234    private void pLenString(final StringBuilder sb, final int len, final int s) {
235        sb.append(":").append(len).append(":");
236        sb.append(new String(code, s, len));
237    }
238
239    private void pLenStringFromTemplate(final StringBuilder sb, final int len, final char[] tm, final int idx) {
240        sb.append(":T:").append(len).append(":");
241        sb.append(tm, idx, len);
242    }
243
244    public int compiledByteCodeToString(final StringBuilder sb, int bp) {
245        int len, n, mem, addr, scn, cod;
246        BitSet bs;
247        CClassNode cc;
248        int tm, idx;
249
250        sb.append("[").append(OpCodeNames[code[bp]]);
251        final int argType = OpCodeArgTypes[code[bp]];
252        final int ip = bp;
253        if (argType != Arguments.SPECIAL) {
254            bp++;
255            switch (argType) {
256            case Arguments.NON:
257                break;
258
259            case Arguments.RELADDR:
260                sb.append(":(").append(code[bp]).append(")");
261                bp += OPSize.RELADDR;
262                break;
263
264            case Arguments.ABSADDR:
265                sb.append(":(").append(code[bp]).append(")");
266                bp += OPSize.ABSADDR;
267                break;
268
269            case Arguments.LENGTH:
270                sb.append(":").append(code[bp]);
271                bp += OPSize.LENGTH;
272                break;
273
274            case Arguments.MEMNUM:
275                sb.append(":").append(code[bp]);
276                bp += OPSize.MEMNUM;
277                break;
278
279            case Arguments.OPTION:
280                sb.append(":").append(code[bp]);
281                bp += OPSize.OPTION;
282                break;
283
284            case Arguments.STATE_CHECK:
285                sb.append(":").append(code[bp]);
286                bp += OPSize.STATE_CHECK;
287                break;
288            }
289        } else {
290            switch (code[bp++]) {
291            case OPCode.EXACT1:
292            case OPCode.ANYCHAR_STAR_PEEK_NEXT:
293            case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
294                pString(sb, 1, bp++);
295                break;
296
297            case OPCode.EXACT2:
298                pString(sb, 2, bp);
299                bp += 2;
300                break;
301
302            case OPCode.EXACT3:
303                pString(sb, 3, bp);
304                bp += 3;
305                break;
306
307            case OPCode.EXACT4:
308                pString(sb, 4, bp);
309                bp += 4;
310                break;
311
312            case OPCode.EXACT5:
313                pString(sb, 5, bp);
314                bp += 5;
315                break;
316
317            case OPCode.EXACTN:
318                len = code[bp];
319                bp += OPSize.LENGTH;
320                if (Config.USE_STRING_TEMPLATES) {
321                    tm = code[bp];
322                    bp += OPSize.INDEX;
323                    idx = code[bp];
324                    bp += OPSize.INDEX;
325                    pLenStringFromTemplate(sb, len, templates[tm], idx);
326                } else {
327                    pLenString(sb, len, bp);
328                    bp += len;
329                }
330                break;
331
332            case OPCode.EXACT1_IC:
333                pString(sb, 1, bp);
334                bp++;
335                break;
336
337            case OPCode.EXACTN_IC:
338                len = code[bp];
339                bp += OPSize.LENGTH;
340                if (Config.USE_STRING_TEMPLATES) {
341                    tm = code[bp];
342                    bp += OPSize.INDEX;
343                    idx = code[bp];
344                    bp += OPSize.INDEX;
345                    pLenStringFromTemplate(sb, len, templates[tm], idx);
346                } else {
347                    pLenString(sb, len, bp);
348                    bp += len;
349                }
350                break;
351
352            case OPCode.CCLASS:
353                bs = new BitSet();
354                System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
355                n = bs.numOn();
356                bp += BitSet.BITSET_SIZE;
357                sb.append(":").append(n);
358                break;
359
360            case OPCode.CCLASS_NOT:
361                bs = new BitSet();
362                System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
363                n = bs.numOn();
364                bp += BitSet.BITSET_SIZE;
365                sb.append(":").append(n);
366                break;
367
368            case OPCode.CCLASS_MB:
369            case OPCode.CCLASS_MB_NOT:
370                len = code[bp];
371                bp += OPSize.LENGTH;
372                cod = code[bp];
373                //bp += OPSize.CODE_POINT;
374                bp += len;
375                sb.append(":").append(cod).append(":").append(len);
376                break;
377
378            case OPCode.CCLASS_MIX:
379            case OPCode.CCLASS_MIX_NOT:
380                bs = new BitSet();
381                System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
382                n = bs.numOn();
383                bp += BitSet.BITSET_SIZE;
384                len = code[bp];
385                bp += OPSize.LENGTH;
386                cod = code[bp];
387                //bp += OPSize.CODE_POINT;
388                bp += len;
389                sb.append(":").append(n).append(":").append(cod).append(":").append(len);
390                break;
391
392            case OPCode.CCLASS_NODE:
393                cc = (CClassNode)operands[code[bp]];
394                bp += OPSize.POINTER;
395                n = cc.bs.numOn();
396                sb.append(":").append(cc).append(":").append(n);
397                break;
398
399            case OPCode.BACKREFN_IC:
400                mem = code[bp];
401                bp += OPSize.MEMNUM;
402                sb.append(":").append(mem);
403                break;
404
405            case OPCode.BACKREF_MULTI_IC:
406            case OPCode.BACKREF_MULTI:
407                sb.append(" ");
408                len = code[bp];
409                bp += OPSize.LENGTH;
410                for (int i=0; i<len; i++) {
411                    mem = code[bp];
412                    bp += OPSize.MEMNUM;
413                    if (i > 0) sb.append(", ");
414                    sb.append(mem);
415                }
416                break;
417
418            case OPCode.BACKREF_WITH_LEVEL: {
419                final int option = code[bp];
420                bp += OPSize.OPTION;
421                sb.append(":").append(option);
422                final int level = code[bp];
423                bp += OPSize.LENGTH;
424                sb.append(":").append(level);
425                sb.append(" ");
426                len = code[bp];
427                bp += OPSize.LENGTH;
428                for (int i=0; i<len; i++) {
429                    mem = code[bp];
430                    bp += OPSize.MEMNUM;
431                    if (i > 0) sb.append(", ");
432                    sb.append(mem);
433                }
434                break;
435            }
436
437            case OPCode.REPEAT:
438            case OPCode.REPEAT_NG:
439                mem = code[bp];
440                bp += OPSize.MEMNUM;
441                addr = code[bp];
442                bp += OPSize.RELADDR;
443                sb.append(":").append(mem).append(":").append(addr);
444                break;
445
446            case OPCode.PUSH_OR_JUMP_EXACT1:
447            case OPCode.PUSH_IF_PEEK_NEXT:
448                addr = code[bp];
449                bp += OPSize.RELADDR;
450                sb.append(":(").append(addr).append(")");
451                pString(sb, 1, bp);
452                bp++;
453                break;
454
455            case OPCode.LOOK_BEHIND:
456                len = code[bp];
457                bp += OPSize.LENGTH;
458                sb.append(":").append(len);
459                break;
460
461            case OPCode.PUSH_LOOK_BEHIND_NOT:
462                addr = code[bp];
463                bp += OPSize.RELADDR;
464                len = code[bp];
465                bp += OPSize.LENGTH;
466                sb.append(":").append(len).append(":(").append(addr).append(")");
467                break;
468
469            case OPCode.STATE_CHECK_PUSH:
470            case OPCode.STATE_CHECK_PUSH_OR_JUMP:
471                scn = code[bp];
472                bp += OPSize.STATE_CHECK_NUM;
473                addr = code[bp];
474                bp += OPSize.RELADDR;
475                sb.append(":").append(scn).append(":(").append(addr).append(")");
476                break;
477
478            default:
479                throw new InternalException("undefined code: " + code[--bp]);
480            }
481        }
482
483        sb.append("]");
484
485        // @opcode_address(opcode_size)
486        if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) {
487            sb.append("@").append(ip).append("(").append((bp - ip)).append(")");
488        }
489
490        return bp;
491    }
492
493    private String compiledByteCodeListToString() {
494        final StringBuilder sb = new StringBuilder();
495        sb.append("code length: ").append(codeLength).append("\n");
496
497        int ncode = 0;
498        int bp = 0;
499        final int end = codeLength;
500
501        while (bp < end) {
502            ncode++;
503
504            if (bp > 0) sb.append(ncode % 5 == 0 ? "\n" : " ");
505
506            bp = compiledByteCodeToString(sb, bp);
507        }
508        sb.append("\n");
509        return sb.toString();
510    }
511}
512