ByteCodePrinter.java revision 1088:7e62d98d4625
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
23import jdk.nashorn.internal.runtime.regexp.joni.constants.Arguments;
24import jdk.nashorn.internal.runtime.regexp.joni.constants.OPCode;
25import jdk.nashorn.internal.runtime.regexp.joni.constants.OPSize;
26import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
27
28class ByteCodePrinter {
29    final int[] code;
30    final int codeLength;
31    final char[][] templates;
32
33    Object[] operands;
34
35    private final static String OpCodeNames[] = new String[] {
36            "finish", /*OP_FINISH*/
37            "end", /*OP_END*/
38            "exact1", /*OP_EXACT1*/
39            "exact2", /*OP_EXACT2*/
40            "exact3", /*OP_EXACT3*/
41            "exact4", /*OP_EXACT4*/
42            "exact5", /*OP_EXACT5*/
43            "exactn", /*OP_EXACTN*/
44            "exactmb2-n1", /*OP_EXACTMB2N1*/
45            "exactmb2-n2", /*OP_EXACTMB2N2*/
46            "exactmb2-n3", /*OP_EXACTMB2N3*/
47            "exactmb2-n", /*OP_EXACTMB2N*/
48            "exactmb3n", /*OP_EXACTMB3N*/
49            "exactmbn", /*OP_EXACTMBN*/
50            "exact1-ic", /*OP_EXACT1_IC*/
51            "exactn-ic", /*OP_EXACTN_IC*/
52            "cclass", /*OP_CCLASS*/
53            "cclass-mb", /*OP_CCLASS_MB*/
54            "cclass-mix", /*OP_CCLASS_MIX*/
55            "cclass-not", /*OP_CCLASS_NOT*/
56            "cclass-mb-not", /*OP_CCLASS_MB_NOT*/
57            "cclass-mix-not", /*OP_CCLASS_MIX_NOT*/
58            "cclass-node", /*OP_CCLASS_NODE*/
59            "anychar", /*OP_ANYCHAR*/
60            "anychar-ml", /*OP_ANYCHAR_ML*/
61            "anychar*", /*OP_ANYCHAR_STAR*/
62            "anychar-ml*", /*OP_ANYCHAR_ML_STAR*/
63            "anychar*-peek-next", /*OP_ANYCHAR_STAR_PEEK_NEXT*/
64            "anychar-ml*-peek-next", /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
65            "word", /*OP_WORD*/
66            "not-word", /*OP_NOT_WORD*/
67            "word-bound", /*OP_WORD_BOUND*/
68            "not-word-bound", /*OP_NOT_WORD_BOUND*/
69            "word-begin", /*OP_WORD_BEGIN*/
70            "word-end", /*OP_WORD_END*/
71            "begin-buf", /*OP_BEGIN_BUF*/
72            "end-buf", /*OP_END_BUF*/
73            "begin-line", /*OP_BEGIN_LINE*/
74            "end-line", /*OP_END_LINE*/
75            "semi-end-buf", /*OP_SEMI_END_BUF*/
76            "begin-position", /*OP_BEGIN_POSITION*/
77            "backref1", /*OP_BACKREF1*/
78            "backref2", /*OP_BACKREF2*/
79            "backrefn", /*OP_BACKREFN*/
80            "backrefn-ic", /*OP_BACKREFN_IC*/
81            "backref_multi", /*OP_BACKREF_MULTI*/
82            "backref_multi-ic", /*OP_BACKREF_MULTI_IC*/
83            "backref_at_level", /*OP_BACKREF_AT_LEVEL*/
84            "mem-start", /*OP_MEMORY_START*/
85            "mem-start-push", /*OP_MEMORY_START_PUSH*/
86            "mem-end-push", /*OP_MEMORY_END_PUSH*/
87            "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/
88            "mem-end", /*OP_MEMORY_END*/
89            "mem-end-rec", /*OP_MEMORY_END_REC*/
90            "fail", /*OP_FAIL*/
91            "jump", /*OP_JUMP*/
92            "push", /*OP_PUSH*/
93            "pop", /*OP_POP*/
94            "push-or-jump-e1", /*OP_PUSH_OR_JUMP_EXACT1*/
95            "push-if-peek-next", /*OP_PUSH_IF_PEEK_NEXT*/
96            "repeat", /*OP_REPEAT*/
97            "repeat-ng", /*OP_REPEAT_NG*/
98            "repeat-inc", /*OP_REPEAT_INC*/
99            "repeat-inc-ng", /*OP_REPEAT_INC_NG*/
100            "repeat-inc-sg", /*OP_REPEAT_INC_SG*/
101            "repeat-inc-ng-sg", /*OP_REPEAT_INC_NG_SG*/
102            "null-check-start", /*OP_NULL_CHECK_START*/
103            "null-check-end", /*OP_NULL_CHECK_END*/
104            "null-check-end-memst", /*OP_NULL_CHECK_END_MEMST*/
105            "null-check-end-memst-push", /*OP_NULL_CHECK_END_MEMST_PUSH*/
106            "push-pos", /*OP_PUSH_POS*/
107            "pop-pos", /*OP_POP_POS*/
108            "push-pos-not", /*OP_PUSH_POS_NOT*/
109            "fail-pos", /*OP_FAIL_POS*/
110            "push-stop-bt", /*OP_PUSH_STOP_BT*/
111            "pop-stop-bt", /*OP_POP_STOP_BT*/
112            "look-behind", /*OP_LOOK_BEHIND*/
113            "push-look-behind-not", /*OP_PUSH_LOOK_BEHIND_NOT*/
114            "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/
115            "call", /*OP_CALL*/
116            "return", /*OP_RETURN*/
117            "state-check-push", /*OP_STATE_CHECK_PUSH*/
118            "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/
119            "state-check", /*OP_STATE_CHECK*/
120            "state-check-anychar*", /*OP_STATE_CHECK_ANYCHAR_STAR*/
121            "state-check-anychar-ml*", /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
122            "set-option-push", /*OP_SET_OPTION_PUSH*/
123            "set-option", /*OP_SET_OPTION*/
124    };
125
126    private final static int OpCodeArgTypes[] = new int[] {
127            Arguments.NON, /*OP_FINISH*/
128            Arguments.NON, /*OP_END*/
129            Arguments.SPECIAL, /*OP_EXACT1*/
130            Arguments.SPECIAL, /*OP_EXACT2*/
131            Arguments.SPECIAL, /*OP_EXACT3*/
132            Arguments.SPECIAL, /*OP_EXACT4*/
133            Arguments.SPECIAL, /*OP_EXACT5*/
134            Arguments.SPECIAL, /*OP_EXACTN*/
135            Arguments.SPECIAL, /*OP_EXACTMB2N1*/
136            Arguments.SPECIAL, /*OP_EXACTMB2N2*/
137            Arguments.SPECIAL, /*OP_EXACTMB2N3*/
138            Arguments.SPECIAL, /*OP_EXACTMB2N*/
139            Arguments.SPECIAL, /*OP_EXACTMB3N*/
140            Arguments.SPECIAL, /*OP_EXACTMBN*/
141            Arguments.SPECIAL, /*OP_EXACT1_IC*/
142            Arguments.SPECIAL, /*OP_EXACTN_IC*/
143            Arguments.SPECIAL, /*OP_CCLASS*/
144            Arguments.SPECIAL, /*OP_CCLASS_MB*/
145            Arguments.SPECIAL, /*OP_CCLASS_MIX*/
146            Arguments.SPECIAL, /*OP_CCLASS_NOT*/
147            Arguments.SPECIAL, /*OP_CCLASS_MB_NOT*/
148            Arguments.SPECIAL, /*OP_CCLASS_MIX_NOT*/
149            Arguments.SPECIAL, /*OP_CCLASS_NODE*/
150            Arguments.NON, /*OP_ANYCHAR*/
151            Arguments.NON, /*OP_ANYCHAR_ML*/
152            Arguments.NON, /*OP_ANYCHAR_STAR*/
153            Arguments.NON, /*OP_ANYCHAR_ML_STAR*/
154            Arguments.SPECIAL, /*OP_ANYCHAR_STAR_PEEK_NEXT*/
155            Arguments.SPECIAL, /*OP_ANYCHAR_ML_STAR_PEEK_NEXT*/
156            Arguments.NON, /*OP_WORD*/
157            Arguments.NON, /*OP_NOT_WORD*/
158            Arguments.NON, /*OP_WORD_BOUND*/
159            Arguments.NON, /*OP_NOT_WORD_BOUND*/
160            Arguments.NON, /*OP_WORD_BEGIN*/
161            Arguments.NON, /*OP_WORD_END*/
162            Arguments.NON, /*OP_BEGIN_BUF*/
163            Arguments.NON, /*OP_END_BUF*/
164            Arguments.NON, /*OP_BEGIN_LINE*/
165            Arguments.NON, /*OP_END_LINE*/
166            Arguments.NON, /*OP_SEMI_END_BUF*/
167            Arguments.NON, /*OP_BEGIN_POSITION*/
168            Arguments.NON, /*OP_BACKREF1*/
169            Arguments.NON, /*OP_BACKREF2*/
170            Arguments.MEMNUM, /*OP_BACKREFN*/
171            Arguments.SPECIAL, /*OP_BACKREFN_IC*/
172            Arguments.SPECIAL, /*OP_BACKREF_MULTI*/
173            Arguments.SPECIAL, /*OP_BACKREF_MULTI_IC*/
174            Arguments.SPECIAL, /*OP_BACKREF_AT_LEVEL*/
175            Arguments.MEMNUM, /*OP_MEMORY_START*/
176            Arguments.MEMNUM, /*OP_MEMORY_START_PUSH*/
177            Arguments.MEMNUM, /*OP_MEMORY_END_PUSH*/
178            Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/
179            Arguments.MEMNUM, /*OP_MEMORY_END*/
180            Arguments.MEMNUM, /*OP_MEMORY_END_REC*/
181            Arguments.NON, /*OP_FAIL*/
182            Arguments.RELADDR, /*OP_JUMP*/
183            Arguments.RELADDR, /*OP_PUSH*/
184            Arguments.NON, /*OP_POP*/
185            Arguments.SPECIAL, /*OP_PUSH_OR_JUMP_EXACT1*/
186            Arguments.SPECIAL, /*OP_PUSH_IF_PEEK_NEXT*/
187            Arguments.SPECIAL, /*OP_REPEAT*/
188            Arguments.SPECIAL, /*OP_REPEAT_NG*/
189            Arguments.MEMNUM, /*OP_REPEAT_INC*/
190            Arguments.MEMNUM, /*OP_REPEAT_INC_NG*/
191            Arguments.MEMNUM, /*OP_REPEAT_INC_SG*/
192            Arguments.MEMNUM, /*OP_REPEAT_INC_NG_SG*/
193            Arguments.MEMNUM, /*OP_NULL_CHECK_START*/
194            Arguments.MEMNUM, /*OP_NULL_CHECK_END*/
195            Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST*/
196            Arguments.MEMNUM, /*OP_NULL_CHECK_END_MEMST_PUSH*/
197            Arguments.NON, /*OP_PUSH_POS*/
198            Arguments.NON, /*OP_POP_POS*/
199            Arguments.RELADDR, /*OP_PUSH_POS_NOT*/
200            Arguments.NON, /*OP_FAIL_POS*/
201            Arguments.NON, /*OP_PUSH_STOP_BT*/
202            Arguments.NON, /*OP_POP_STOP_BT*/
203            Arguments.SPECIAL, /*OP_LOOK_BEHIND*/
204            Arguments.SPECIAL, /*OP_PUSH_LOOK_BEHIND_NOT*/
205            Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/
206            Arguments.ABSADDR, /*OP_CALL*/
207            Arguments.NON, /*OP_RETURN*/
208            Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/
209            Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/
210            Arguments.STATE_CHECK, /*OP_STATE_CHECK*/
211            Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_STAR*/
212            Arguments.STATE_CHECK, /*OP_STATE_CHECK_ANYCHAR_ML_STAR*/
213            Arguments.OPTION, /*OP_SET_OPTION_PUSH*/
214            Arguments.OPTION, /*OP_SET_OPTION*/
215    };
216
217    public ByteCodePrinter(final Regex regex) {
218        code = regex.code;
219        codeLength = regex.codeLength;
220        operands = regex.operands;
221
222        templates = regex.templates;
223    }
224
225    public String byteCodeListToString() {
226        return compiledByteCodeListToString();
227    }
228
229    private void pString(final StringBuilder sb, final int len, final int s) {
230        sb.append(":");
231        sb.append(new String(code, s, len));
232    }
233
234    private void pLenString(final StringBuilder sb, final int len, final int s) {
235        sb.append(":").append(len).append(":");
236        sb.append(new String(code, s, len));
237    }
238
239    private static void pLenStringFromTemplate(final StringBuilder sb, final int len, final char[] tm, final int idx) {
240        sb.append(":T:").append(len).append(":");
241        sb.append(tm, idx, len);
242    }
243
244    public int compiledByteCodeToString(final StringBuilder sb, final int bptr) {
245        int len, n, mem, addr, scn, cod;
246        BitSet bs;
247        CClassNode cc;
248        int tm, idx;
249        int bp = bptr;
250
251        sb.append("[").append(OpCodeNames[code[bp]]);
252        final int argType = OpCodeArgTypes[code[bp]];
253        final int ip = bp;
254        if (argType != Arguments.SPECIAL) {
255            bp++;
256            switch (argType) {
257            default:
258            case Arguments.NON:
259                break;
260
261            case Arguments.RELADDR:
262                sb.append(":(").append(code[bp]).append(")");
263                bp += OPSize.RELADDR;
264                break;
265
266            case Arguments.ABSADDR:
267                sb.append(":(").append(code[bp]).append(")");
268                bp += OPSize.ABSADDR;
269                break;
270
271            case Arguments.LENGTH:
272                sb.append(":").append(code[bp]);
273                bp += OPSize.LENGTH;
274                break;
275
276            case Arguments.MEMNUM:
277                sb.append(":").append(code[bp]);
278                bp += OPSize.MEMNUM;
279                break;
280
281            case Arguments.OPTION:
282                sb.append(":").append(code[bp]);
283                bp += OPSize.OPTION;
284                break;
285
286            case Arguments.STATE_CHECK:
287                sb.append(":").append(code[bp]);
288                bp += OPSize.STATE_CHECK;
289                break;
290            }
291        } else {
292            switch (code[bp++]) {
293            case OPCode.EXACT1:
294            case OPCode.ANYCHAR_STAR_PEEK_NEXT:
295            case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT:
296                pString(sb, 1, bp++);
297                break;
298
299            case OPCode.EXACT2:
300                pString(sb, 2, bp);
301                bp += 2;
302                break;
303
304            case OPCode.EXACT3:
305                pString(sb, 3, bp);
306                bp += 3;
307                break;
308
309            case OPCode.EXACT4:
310                pString(sb, 4, bp);
311                bp += 4;
312                break;
313
314            case OPCode.EXACT5:
315                pString(sb, 5, bp);
316                bp += 5;
317                break;
318
319            case OPCode.EXACTN:
320                len = code[bp];
321                bp += OPSize.LENGTH;
322                if (Config.USE_STRING_TEMPLATES) {
323                    tm = code[bp];
324                    bp += OPSize.INDEX;
325                    idx = code[bp];
326                    bp += OPSize.INDEX;
327                    pLenStringFromTemplate(sb, len, templates[tm], idx);
328                } else {
329                    pLenString(sb, len, bp);
330                    bp += len;
331                }
332                break;
333
334            case OPCode.EXACT1_IC:
335                pString(sb, 1, bp);
336                bp++;
337                break;
338
339            case OPCode.EXACTN_IC:
340                len = code[bp];
341                bp += OPSize.LENGTH;
342                if (Config.USE_STRING_TEMPLATES) {
343                    tm = code[bp];
344                    bp += OPSize.INDEX;
345                    idx = code[bp];
346                    bp += OPSize.INDEX;
347                    pLenStringFromTemplate(sb, len, templates[tm], idx);
348                } else {
349                    pLenString(sb, len, bp);
350                    bp += len;
351                }
352                break;
353
354            case OPCode.CCLASS:
355                bs = new BitSet();
356                System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
357                n = bs.numOn();
358                bp += BitSet.BITSET_SIZE;
359                sb.append(":").append(n);
360                break;
361
362            case OPCode.CCLASS_NOT:
363                bs = new BitSet();
364                System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
365                n = bs.numOn();
366                bp += BitSet.BITSET_SIZE;
367                sb.append(":").append(n);
368                break;
369
370            case OPCode.CCLASS_MB:
371            case OPCode.CCLASS_MB_NOT:
372                len = code[bp];
373                bp += OPSize.LENGTH;
374                cod = code[bp];
375                //bp += OPSize.CODE_POINT;
376                bp += len;
377                sb.append(":").append(cod).append(":").append(len);
378                break;
379
380            case OPCode.CCLASS_MIX:
381            case OPCode.CCLASS_MIX_NOT:
382                bs = new BitSet();
383                System.arraycopy(code, bp, bs.bits, 0, BitSet.BITSET_SIZE);
384                n = bs.numOn();
385                bp += BitSet.BITSET_SIZE;
386                len = code[bp];
387                bp += OPSize.LENGTH;
388                cod = code[bp];
389                //bp += OPSize.CODE_POINT;
390                bp += len;
391                sb.append(":").append(n).append(":").append(cod).append(":").append(len);
392                break;
393
394            case OPCode.CCLASS_NODE:
395                cc = (CClassNode)operands[code[bp]];
396                bp += OPSize.POINTER;
397                n = cc.bs.numOn();
398                sb.append(":").append(cc).append(":").append(n);
399                break;
400
401            case OPCode.BACKREFN_IC:
402                mem = code[bp];
403                bp += OPSize.MEMNUM;
404                sb.append(":").append(mem);
405                break;
406
407            case OPCode.BACKREF_MULTI_IC:
408            case OPCode.BACKREF_MULTI:
409                sb.append(" ");
410                len = code[bp];
411                bp += OPSize.LENGTH;
412                for (int i=0; i<len; i++) {
413                    mem = code[bp];
414                    bp += OPSize.MEMNUM;
415                    if (i > 0) {
416                        sb.append(", ");
417                    }
418                    sb.append(mem);
419                }
420                break;
421
422            case OPCode.BACKREF_WITH_LEVEL: {
423                final int option = code[bp];
424                bp += OPSize.OPTION;
425                sb.append(":").append(option);
426                final int level = code[bp];
427                bp += OPSize.LENGTH;
428                sb.append(":").append(level);
429                sb.append(" ");
430                len = code[bp];
431                bp += OPSize.LENGTH;
432                for (int i=0; i<len; i++) {
433                    mem = code[bp];
434                    bp += OPSize.MEMNUM;
435                    if (i > 0) {
436                        sb.append(", ");
437                    }
438                    sb.append(mem);
439                }
440                break;
441            }
442
443            case OPCode.REPEAT:
444            case OPCode.REPEAT_NG:
445                mem = code[bp];
446                bp += OPSize.MEMNUM;
447                addr = code[bp];
448                bp += OPSize.RELADDR;
449                sb.append(":").append(mem).append(":").append(addr);
450                break;
451
452            case OPCode.PUSH_OR_JUMP_EXACT1:
453            case OPCode.PUSH_IF_PEEK_NEXT:
454                addr = code[bp];
455                bp += OPSize.RELADDR;
456                sb.append(":(").append(addr).append(")");
457                pString(sb, 1, bp);
458                bp++;
459                break;
460
461            case OPCode.LOOK_BEHIND:
462                len = code[bp];
463                bp += OPSize.LENGTH;
464                sb.append(":").append(len);
465                break;
466
467            case OPCode.PUSH_LOOK_BEHIND_NOT:
468                addr = code[bp];
469                bp += OPSize.RELADDR;
470                len = code[bp];
471                bp += OPSize.LENGTH;
472                sb.append(":").append(len).append(":(").append(addr).append(")");
473                break;
474
475            case OPCode.STATE_CHECK_PUSH:
476            case OPCode.STATE_CHECK_PUSH_OR_JUMP:
477                scn = code[bp];
478                bp += OPSize.STATE_CHECK_NUM;
479                addr = code[bp];
480                bp += OPSize.RELADDR;
481                sb.append(":").append(scn).append(":(").append(addr).append(")");
482                break;
483
484            default:
485                throw new InternalException("undefined code: " + code[--bp]);
486            }
487        }
488
489        sb.append("]");
490
491        // @opcode_address(opcode_size)
492        if (Config.DEBUG_COMPILE_BYTE_CODE_INFO) {
493            sb.append("@").append(ip).append("(").append((bp - ip)).append(")");
494        }
495
496        return bp;
497    }
498
499    private String compiledByteCodeListToString() {
500        final StringBuilder sb = new StringBuilder();
501        sb.append("code length: ").append(codeLength).append("\n");
502
503        int ncode = 0;
504        int bp = 0;
505        final int end = codeLength;
506
507        while (bp < end) {
508            ncode++;
509
510            if (bp > 0) {
511                sb.append(ncode % 5 == 0 ? "\n" : " ");
512            }
513
514            bp = compiledByteCodeToString(sb, bp);
515        }
516        sb.append("\n");
517        return sb.toString();
518    }
519}
520