CClassNode.java revision 1088:7e62d98d4625
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni.ast;
21
22import jdk.nashorn.internal.runtime.regexp.joni.BitSet;
23import jdk.nashorn.internal.runtime.regexp.joni.CodeRangeBuffer;
24import jdk.nashorn.internal.runtime.regexp.joni.Config;
25import jdk.nashorn.internal.runtime.regexp.joni.EncodingHelper;
26import jdk.nashorn.internal.runtime.regexp.joni.ScanEnvironment;
27import jdk.nashorn.internal.runtime.regexp.joni.Syntax;
28import jdk.nashorn.internal.runtime.regexp.joni.constants.CCSTATE;
29import jdk.nashorn.internal.runtime.regexp.joni.constants.CCVALTYPE;
30import jdk.nashorn.internal.runtime.regexp.joni.encoding.CharacterType;
31import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
32import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
33import jdk.nashorn.internal.runtime.regexp.joni.exception.InternalException;
34import jdk.nashorn.internal.runtime.regexp.joni.exception.SyntaxException;
35import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
36
37@SuppressWarnings("javadoc")
38public final class CClassNode extends Node {
39    private static final int FLAG_NCCLASS_NOT = 1<<0;
40    private static final int FLAG_NCCLASS_SHARE = 1<<1;
41
42    int flags;
43    public final BitSet bs = new BitSet();  // conditional creation ?
44    public CodeRangeBuffer mbuf;            /* multi-byte info or NULL */
45
46    private int ctype;                      // for hashing purposes
47
48    private final static short AsciiCtypeTable[] = {
49            0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
50            0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
51            0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
52            0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
53            0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
54            0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
55            0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
56            0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
57            0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
58            0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
59            0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
60            0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
61            0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
62            0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
63            0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
64            0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
65            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
66            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
67            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
68            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
69            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
70            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
71            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
72            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
73            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
74            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
75            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
76            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
77            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
78            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
79            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
80            0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
81    };
82
83    // node_new_cclass
84    public CClassNode() {}
85
86    public void clear() {
87        bs.clear();
88        flags = 0;
89        mbuf = null;
90    }
91
92    @Override
93    public int getType() {
94        return CCLASS;
95    }
96
97    @Override
98    public String getName() {
99        return "Character Class";
100    }
101
102    @Override
103    public boolean equals(final Object other) {
104        if (!(other instanceof CClassNode)) {
105            return false;
106        }
107        final CClassNode cc = (CClassNode)other;
108        return ctype == cc.ctype && isNot() == cc.isNot();
109    }
110
111    @Override
112    public int hashCode() {
113        if (Config.USE_SHARED_CCLASS_TABLE) {
114            int hash = 0;
115            hash += ctype;
116            if (isNot()) {
117                hash++;
118            }
119            return hash + (hash >> 5);
120        }
121        return super.hashCode();
122    }
123
124    @Override
125    public String toString(final int level) {
126        final StringBuilder value = new StringBuilder();
127        value.append("\n  flags: " + flagsToString());
128        value.append("\n  bs: " + pad(bs, level + 1));
129        value.append("\n  mbuf: " + pad(mbuf, level + 1));
130
131        return value.toString();
132    }
133
134    public String flagsToString() {
135        final StringBuilder f = new StringBuilder();
136        if (isNot()) {
137            f.append("NOT ");
138        }
139        if (isShare()) {
140            f.append("SHARE ");
141        }
142        return f.toString();
143    }
144
145    public boolean isEmpty() {
146        return mbuf == null && bs.isEmpty();
147    }
148
149    public void addCodeRangeToBuf(final int from, final int to) {
150        mbuf = CodeRangeBuffer.addCodeRangeToBuff(mbuf, from, to);
151    }
152
153    public void addCodeRange(final ScanEnvironment env, final int from, final int to) {
154        mbuf = CodeRangeBuffer.addCodeRange(mbuf, env, from, to);
155    }
156
157    public void addAllMultiByteRange() {
158        mbuf = CodeRangeBuffer.addAllMultiByteRange(mbuf);
159    }
160
161    public void clearNotFlag() {
162        if (isNot()) {
163            bs.invert();
164
165            mbuf = CodeRangeBuffer.notCodeRangeBuff(mbuf);
166            clearNot();
167        }
168    }
169
170    // and_cclass
171    public void and(final CClassNode other) {
172        final boolean not1 = isNot();
173        BitSet bsr1 = bs;
174        final CodeRangeBuffer buf1 = mbuf;
175        final boolean not2 = other.isNot();
176        BitSet bsr2 = other.bs;
177        final CodeRangeBuffer buf2 = other.mbuf;
178
179        if (not1) {
180            final BitSet bs1 = new BitSet();
181            bsr1.invertTo(bs1);
182            bsr1 = bs1;
183        }
184
185        if (not2) {
186            final BitSet bs2 = new BitSet();
187            bsr2.invertTo(bs2);
188            bsr2 = bs2;
189        }
190
191        bsr1.and(bsr2);
192
193        if (bsr1 != bs) {
194            bs.copy(bsr1);
195            bsr1 = bs;
196        }
197
198        if (not1) {
199            bs.invert();
200        }
201
202        CodeRangeBuffer pbuf = null;
203
204        if (not1 && not2) {
205            pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, false, buf2, false);
206        } else {
207            pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, not1, buf2, not2);
208
209            if (not1) {
210                pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
211            }
212        }
213        mbuf = pbuf;
214
215    }
216
217    // or_cclass
218    public void or(final CClassNode other) {
219        final boolean not1 = isNot();
220        BitSet bsr1 = bs;
221        final CodeRangeBuffer buf1 = mbuf;
222        final boolean not2 = other.isNot();
223        BitSet bsr2 = other.bs;
224        final CodeRangeBuffer buf2 = other.mbuf;
225
226        if (not1) {
227            final BitSet bs1 = new BitSet();
228            bsr1.invertTo(bs1);
229            bsr1 = bs1;
230        }
231
232        if (not2) {
233            final BitSet bs2 = new BitSet();
234            bsr2.invertTo(bs2);
235            bsr2 = bs2;
236        }
237
238        bsr1.or(bsr2);
239
240        if (bsr1 != bs) {
241            bs.copy(bsr1);
242            bsr1 = bs;
243        }
244
245        if (not1) {
246            bs.invert();
247        }
248
249        CodeRangeBuffer pbuf = null;
250        if (not1 && not2) {
251            pbuf = CodeRangeBuffer.andCodeRangeBuff(buf1, false, buf2, false);
252        } else {
253            pbuf = CodeRangeBuffer.orCodeRangeBuff(buf1, not1, buf2, not2);
254            if (not1) {
255                pbuf = CodeRangeBuffer.notCodeRangeBuff(pbuf);
256            }
257        }
258        mbuf = pbuf;
259    }
260
261    // add_ctype_to_cc_by_range // Encoding out!
262    public void addCTypeByRange(final int ct, final boolean not, final int sbOut, final int mbr[]) {
263        final int n = mbr[0];
264
265        if (!not) {
266            for (int i=0; i<n; i++) {
267                for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) {
268                    if (j >= sbOut) {
269                        if (Config.VANILLA) {
270                            if (j == mbr[i * 2 + 2]) {
271                                i++;
272                            } else if (j > mbr[i * 2 + 1]) {
273                                addCodeRangeToBuf(j, mbr[i * 2 + 2]);
274                                i++;
275                            }
276                        } else {
277                            if (j >= mbr[i * 2 + 1]) {
278                                addCodeRangeToBuf(j, mbr[i * 2 + 2]);
279                                i++;
280                            }
281                        }
282                        // !goto sb_end!, remove duplication!
283                        for (; i<n; i++) {
284                            addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
285                        }
286                        return;
287                    }
288                    bs.set(j);
289                }
290            }
291            // !sb_end:!
292            for (int i=0; i<n; i++) {
293                addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]);
294            }
295
296        } else {
297            int prev = 0;
298
299            for (int i=0; i<n; i++) {
300                for (int j=prev; j < mbr[2 * i + 1]; j++) {
301                    if (j >= sbOut) {
302                        // !goto sb_end2!, remove duplication
303                        prev = sbOut;
304                        for (i=0; i<n; i++) {
305                            if (prev < mbr[2 * i + 1]) {
306                                addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
307                            }
308                            prev = mbr[i * 2 + 2] + 1;
309                        }
310                        if (prev < 0x7fffffff/*!!!*/) {
311                            addCodeRangeToBuf(prev, 0x7fffffff);
312                        }
313                        return;
314                    }
315                    bs.set(j);
316                }
317                prev = mbr[2 * i + 2] + 1;
318            }
319
320            for (int j=prev; j<sbOut; j++) {
321                bs.set(j);
322            }
323
324            // !sb_end2:!
325            prev = sbOut;
326            for (int i=0; i<n; i++) {
327                if (prev < mbr[2 * i + 1]) {
328                    addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1);
329                }
330                prev = mbr[i * 2 + 2] + 1;
331            }
332            if (prev < 0x7fffffff/*!!!*/) {
333                addCodeRangeToBuf(prev, 0x7fffffff);
334            }
335        }
336    }
337
338    public void addCType(final int ctp, final boolean not, final ScanEnvironment env, final IntHolder sbOut) {
339        int ct = ctp;
340        if (Config.NON_UNICODE_SDW) {
341            switch (ct) {
342            case CharacterType.D:
343            case CharacterType.S:
344            case CharacterType.W:
345                ct ^= CharacterType.SPECIAL_MASK;
346
347                if (env.syntax == Syntax.JAVASCRIPT && ct == CharacterType.SPACE) {
348                    // \s in JavaScript includes unicode characters.
349                    break;
350                }
351
352                if (not) {
353                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
354                        // if (!ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
355                        if ((AsciiCtypeTable[c] & (1 << ct)) == 0) {
356                            bs.set(c);
357                        }
358                    }
359                    addAllMultiByteRange();
360                } else {
361                    for (int c = 0; c < BitSet.SINGLE_BYTE_SIZE; c++) {
362                        // if (ASCIIEncoding.INSTANCE.isCodeCType(c, ctype)) bs.set(c);
363                        if ((AsciiCtypeTable[c] & (1 << ct)) != 0) {
364                            bs.set(c);
365                        }
366                    }
367                }
368                return;
369            default:
370                break;
371            }
372        }
373
374        final int[] ranges = EncodingHelper.ctypeCodeRange(ct, sbOut);
375        if (ranges != null) {
376            addCTypeByRange(ct, not, sbOut.value, ranges);
377            return;
378        }
379
380        switch(ct) {
381        case CharacterType.ALPHA:
382        case CharacterType.BLANK:
383        case CharacterType.CNTRL:
384        case CharacterType.DIGIT:
385        case CharacterType.LOWER:
386        case CharacterType.PUNCT:
387        case CharacterType.SPACE:
388        case CharacterType.UPPER:
389        case CharacterType.XDIGIT:
390        case CharacterType.ASCII:
391        case CharacterType.ALNUM:
392            if (not) {
393                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
394                    if (!EncodingHelper.isCodeCType(c, ct)) {
395                        bs.set(c);
396                    }
397                }
398                addAllMultiByteRange();
399            } else {
400                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
401                    if (EncodingHelper.isCodeCType(c, ct)) {
402                        bs.set(c);
403                    }
404                }
405            }
406            break;
407
408        case CharacterType.GRAPH:
409        case CharacterType.PRINT:
410            if (not) {
411                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
412                    if (!EncodingHelper.isCodeCType(c, ct)) {
413                        bs.set(c);
414                    }
415                }
416            } else {
417                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
418                    if (EncodingHelper.isCodeCType(c, ct)) {
419                        bs.set(c);
420                    }
421                }
422                addAllMultiByteRange();
423            }
424            break;
425
426        case CharacterType.WORD:
427            if (!not) {
428                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
429                    if (EncodingHelper.isWord(c)) {
430                        bs.set(c);
431                    }
432                }
433
434                addAllMultiByteRange();
435            } else {
436                for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
437                    if (!EncodingHelper.isWord(c)) {
438                        bs.set(c);
439                    }
440                }
441            }
442            break;
443
444        default:
445            throw new InternalException(ErrorMessages.ERR_PARSER_BUG);
446        } // switch
447    }
448
449    public static final class CCStateArg {
450        public int v;
451        public int vs;
452        public boolean vsIsRaw;
453        public boolean vIsRaw;
454        public CCVALTYPE inType;
455        public CCVALTYPE type;
456        public CCSTATE state;
457    }
458
459    public void nextStateClass(final CCStateArg arg, final ScanEnvironment env) {
460        if (arg.state == CCSTATE.RANGE) {
461            throw new SyntaxException(ErrorMessages.ERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE);
462        }
463
464        if (arg.state == CCSTATE.VALUE && arg.type != CCVALTYPE.CLASS) {
465            if (arg.type == CCVALTYPE.SB) {
466                bs.set(arg.vs);
467            } else if (arg.type == CCVALTYPE.CODE_POINT) {
468                addCodeRange(env, arg.vs, arg.vs);
469            }
470        }
471        arg.state = CCSTATE.VALUE;
472        arg.type = CCVALTYPE.CLASS;
473    }
474
475    public void nextStateValue(final CCStateArg arg, final ScanEnvironment env) {
476
477        switch(arg.state) {
478        case VALUE:
479            if (arg.type == CCVALTYPE.SB) {
480                if (arg.vs > 0xff) {
481                    throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
482                }
483                bs.set(arg.vs);
484            } else if (arg.type == CCVALTYPE.CODE_POINT) {
485                addCodeRange(env, arg.vs, arg.vs);
486            }
487            break;
488
489        case RANGE:
490            if (arg.inType == arg.type) {
491                if (arg.inType == CCVALTYPE.SB) {
492                    if (arg.vs > 0xff || arg.v > 0xff) {
493                        throw new ValueException(ErrorMessages.ERR_INVALID_CODE_POINT_VALUE);
494                    }
495
496                    if (arg.vs > arg.v) {
497                        if (env.syntax.allowEmptyRangeInCC()) {
498                            // goto ccs_range_end
499                            arg.state = CCSTATE.COMPLETE;
500                            break;
501                        }
502                        throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
503                    }
504                    bs.setRange(arg.vs, arg.v);
505                } else {
506                    addCodeRange(env, arg.vs, arg.v);
507                }
508            } else {
509                if (arg.vs > arg.v) {
510                    if (env.syntax.allowEmptyRangeInCC()) {
511                        // goto ccs_range_end
512                        arg.state = CCSTATE.COMPLETE;
513                        break;
514                    }
515                    throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
516                }
517                bs.setRange(arg.vs, arg.v < 0xff ? arg.v : 0xff);
518                addCodeRange(env, arg.vs, arg.v);
519            }
520            // ccs_range_end:
521            arg.state = CCSTATE.COMPLETE;
522            break;
523
524        case COMPLETE:
525        case START:
526            arg.state = CCSTATE.VALUE;
527            break;
528
529        default:
530            break;
531
532        } // switch
533
534        arg.vsIsRaw = arg.vIsRaw;
535        arg.vs = arg.v;
536        arg.type = arg.inType;
537    }
538
539    // onig_is_code_in_cc_len
540    public boolean isCodeInCCLength(final int code) {
541        boolean found;
542
543        if (code > 0xff) {
544            found = mbuf != null && mbuf.isInCodeRange(code);
545        } else {
546            found = bs.at(code);
547        }
548
549        if (isNot()) {
550            return !found;
551        }
552        return found;
553    }
554
555    // onig_is_code_in_cc
556    public boolean isCodeInCC(final int code) {
557         return isCodeInCCLength(code);
558    }
559
560    public void setNot() {
561        flags |= FLAG_NCCLASS_NOT;
562    }
563
564    public void clearNot() {
565        flags &= ~FLAG_NCCLASS_NOT;
566    }
567
568    public boolean isNot() {
569        return (flags & FLAG_NCCLASS_NOT) != 0;
570    }
571
572    public void setShare() {
573        flags |= FLAG_NCCLASS_SHARE;
574    }
575
576    public void clearShare() {
577        flags &= ~FLAG_NCCLASS_SHARE;
578    }
579
580    public boolean isShare() {
581        return (flags & FLAG_NCCLASS_SHARE) != 0;
582    }
583
584}
585