CodeRangeBuffer.java revision 953:221a84ef44c0
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20package jdk.nashorn.internal.runtime.regexp.joni;
21
22import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages;
23import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException;
24
25public final class CodeRangeBuffer implements Cloneable {
26    private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5;
27    private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff;
28
29    int[] p;
30    int used;
31
32
33    public CodeRangeBuffer() {
34        p = new int[INIT_MULTI_BYTE_RANGE_SIZE];
35        writeCodePoint(0, 0);
36    }
37
38    // CodeRange.isInCodeRange
39    public boolean isInCodeRange(final int code) {
40        int low = 0;
41        final int n = p[0];
42        int high = n;
43
44        while (low < high) {
45            final int x = (low + high) >> 1;
46            if (code > p[(x << 1) + 2]) {
47                low = x + 1;
48            } else {
49                high = x;
50            }
51        }
52        return low < n && code >= p[(low << 1) + 1];
53    }
54
55    private CodeRangeBuffer(final CodeRangeBuffer orig) {
56        p = new int[orig.p.length];
57        System.arraycopy(orig.p, 0, p, 0, p.length);
58        used = orig.used;
59    }
60
61    @Override
62    public String toString() {
63        final StringBuilder buf = new StringBuilder();
64        buf.append("CodeRange");
65        buf.append("\n  used: ").append(used);
66        buf.append("\n  code point: ").append(p[0]);
67        buf.append("\n  ranges: ");
68
69        for (int i=0; i<p[0]; i++) {
70            buf.append("[").append(rangeNumToString(p[i * 2 + 1])).append("..").append(rangeNumToString(p[i * 2 + 2])).append("]");
71            if (i > 0 && i % 6 == 0) buf.append("\n          ");
72        }
73
74        return buf.toString();
75    }
76
77    private static String rangeNumToString(final int num){
78        return "0x" + Integer.toString(num, 16);
79    }
80
81    public void expand(final int low) {
82        int length = p.length;
83        do { length <<= 1; } while (length < low);
84        final int[]tmp = new int[length];
85        System.arraycopy(p, 0, tmp, 0, used);
86        p = tmp;
87    }
88
89    public void ensureSize(final int size) {
90        int length = p.length;
91        while (length < size ) { length <<= 1; }
92        if (p.length != length) {
93            final int[]tmp = new int[length];
94            System.arraycopy(p, 0, tmp, 0, used);
95            p = tmp;
96        }
97    }
98
99    private void moveRight(final int from, final int to, final int n) {
100        if (to + n > p.length) expand(to + n);
101        System.arraycopy(p, from, p, to, n);
102        if (to + n > used) used = to + n;
103    }
104
105    protected void moveLeft(final int from, final int to, final int n) {
106        System.arraycopy(p, from, p, to, n);
107    }
108
109    private void moveLeftAndReduce(final int from, final int to) {
110        System.arraycopy(p, from, p, to, used - from);
111        used -= from - to;
112    }
113
114    public void writeCodePoint(final int pos, final int b) {
115        final int u = pos + 1;
116        if (p.length < u) expand(u);
117        p[pos] = b;
118        if (used < u) used = u;
119    }
120
121    @Override
122    public CodeRangeBuffer clone() {
123        return new CodeRangeBuffer(this);
124    }
125
126    // ugly part: these methods should be made OO
127    // add_code_range_to_buf
128    public static CodeRangeBuffer addCodeRangeToBuff(CodeRangeBuffer pbuf, int from, int to) {
129        if (from > to) {
130            final int n = from;
131            from = to;
132            to = n;
133        }
134
135        if (pbuf == null) pbuf = new CodeRangeBuffer(); // move to CClassNode
136
137        final int[]p = pbuf.p;
138        int n = p[0];
139
140        int low = 0;
141        int bound = n;
142
143        while (low < bound) {
144            final int x = (low + bound) >>> 1;
145            if (from > p[x * 2 + 2]) {
146                low = x + 1;
147            } else {
148                bound = x;
149            }
150        }
151
152        int high = low;
153        bound = n;
154
155        while (high < bound) {
156            final int x = (high + bound) >>> 1;
157            if (to >= p[x * 2 + 1] - 1) {
158                high = x + 1;
159            } else {
160                bound = x;
161            }
162        }
163
164        final int incN = low + 1 - high;
165
166        if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES);
167
168        if (incN != 1) {
169            if (from > p[low * 2 + 1]) from = p[low * 2 + 1];
170            if (to < p[(high - 1) * 2 + 2]) to = p[(high - 1) * 2 + 2];
171        }
172
173        if (incN != 0 && high < n) {
174            final int fromPos = 1 + high * 2;
175            final int toPos = 1 + (low + 1) * 2;
176            final int size = (n - high) * 2;
177
178            if (incN > 0) {
179                pbuf.moveRight(fromPos, toPos, size);
180            } else {
181                pbuf.moveLeftAndReduce(fromPos, toPos);
182            }
183        }
184
185        final int pos = 1 + low * 2;
186        // pbuf.ensureSize(pos + 2);
187        pbuf.writeCodePoint(pos, from);
188        pbuf.writeCodePoint(pos + 1, to);
189        n += incN;
190        pbuf.writeCodePoint(0, n);
191
192        return pbuf;
193    }
194
195    // add_code_range, be aware of it returning null!
196    public static CodeRangeBuffer addCodeRange(final CodeRangeBuffer pbuf, final ScanEnvironment env, final int from, final int to) {
197        if (from > to) {
198            if (env.syntax.allowEmptyRangeInCC()) {
199                return pbuf;
200            } else {
201                throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS);
202            }
203        }
204        return addCodeRangeToBuff(pbuf, from, to);
205    }
206
207    // SET_ALL_MULTI_BYTE_RANGE
208    protected static CodeRangeBuffer setAllMultiByteRange(final CodeRangeBuffer pbuf) {
209        return addCodeRangeToBuff(pbuf, EncodingHelper.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE);
210    }
211
212    // ADD_ALL_MULTI_BYTE_RANGE
213    public static CodeRangeBuffer addAllMultiByteRange(final CodeRangeBuffer pbuf) {
214        return setAllMultiByteRange(pbuf);
215    }
216
217    // not_code_range_buf
218    public static CodeRangeBuffer notCodeRangeBuff(final CodeRangeBuffer bbuf) {
219        CodeRangeBuffer pbuf = null;
220
221        if (bbuf == null) return setAllMultiByteRange(pbuf);
222
223        final int[]p = bbuf.p;
224        final int n = p[0];
225
226        if (n <= 0) return setAllMultiByteRange(pbuf);
227
228        int pre = EncodingHelper.mbcodeStartPosition();
229
230        int from;
231        int to = 0;
232        for (int i=0; i<n; i++) {
233            from = p[i * 2 + 1];
234            to = p[i * 2 + 2];
235            if (pre <= from - 1) {
236                pbuf = addCodeRangeToBuff(pbuf, pre, from - 1);
237            }
238            if (to == ALL_MULTI_BYTE_RANGE) break;
239            pre = to + 1;
240        }
241
242        if (to < ALL_MULTI_BYTE_RANGE) pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE);
243        return pbuf;
244    }
245
246    // or_code_range_buf
247    public static CodeRangeBuffer orCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
248                                                  CodeRangeBuffer bbuf2, boolean not2) {
249        CodeRangeBuffer pbuf = null;
250
251        if (bbuf1 == null && bbuf2 == null) {
252            if (not1 || not2) {
253                return setAllMultiByteRange(pbuf);
254            }
255            return null;
256        }
257
258        if (bbuf2 == null) {
259            CodeRangeBuffer tbuf;
260            boolean tnot;
261            // swap
262            tnot = not1; not1 = not2; not2 = tnot;
263            tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
264        }
265
266        if (bbuf1 == null) {
267            if (not1) {
268                return setAllMultiByteRange(pbuf);
269            } else {
270                if (!not2) {
271                    return bbuf2.clone();
272                } else {
273                    return notCodeRangeBuff(bbuf2);
274                }
275            }
276        }
277
278        if (not1) {
279            CodeRangeBuffer tbuf;
280            boolean tnot;
281            // swap
282            tnot = not1; not1 = not2; not2 = tnot;
283            tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
284        }
285
286        if (!not2 && !not1) { /* 1 OR 2 */
287            pbuf = bbuf2.clone();
288        } else if (!not1) { /* 1 OR (not 2) */
289            pbuf = notCodeRangeBuff(bbuf2);
290        }
291
292        final int[]p1 = bbuf1.p;
293        final int n1 = p1[0];
294
295        for (int i=0; i<n1; i++) {
296            final int from = p1[i * 2 + 1];
297            final int to = p1[i * 2 + 2];
298            pbuf = addCodeRangeToBuff(pbuf, from, to);
299        }
300
301        return pbuf;
302    }
303
304    // and_code_range1
305    public static CodeRangeBuffer andCodeRange1(CodeRangeBuffer pbuf, int from1, int to1, final int[]data, final int n) {
306        for (int i=0; i<n; i++) {
307            final int from2 = data[i * 2 + 1];
308            final int to2 = data[i * 2 + 2];
309            if (from2 < from1) {
310                if (to2 < from1) {
311                    continue;
312                } else {
313                    from1 = to2 + 1;
314                }
315            } else if (from2 <= to1) {
316                if (to2 < to1) {
317                    if (from1 <= from2 - 1) {
318                        pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1);
319                    }
320                    from1 = to2 + 1;
321                } else {
322                    to1 = from2 - 1;
323                }
324            } else {
325                from1 = from2;
326            }
327            if (from1 > to1) break;
328        }
329
330        if (from1 <= to1) {
331            pbuf = addCodeRangeToBuff(pbuf, from1, to1);
332        }
333
334        return pbuf;
335    }
336
337    // and_code_range_buf
338    public static CodeRangeBuffer andCodeRangeBuff(CodeRangeBuffer bbuf1, boolean not1,
339                                                   CodeRangeBuffer bbuf2, boolean not2) {
340        CodeRangeBuffer pbuf = null;
341
342        if (bbuf1 == null) {
343            if (not1 && bbuf2 != null) return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */
344            return null;
345        } else if (bbuf2 == null) {
346            if (not2) return bbuf1.clone();
347            return null;
348        }
349
350        if (not1) {
351            CodeRangeBuffer tbuf;
352            boolean tnot;
353            // swap
354            tnot = not1; not1 = not2; not2 = tnot;
355            tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf;
356        }
357
358        final int[]p1 = bbuf1.p;
359        final int n1 = p1[0];
360        final int[]p2 = bbuf2.p;
361        final int n2 = p2[0];
362
363        if (!not2 && !not1) { /* 1 AND 2 */
364            for (int i=0; i<n1; i++) {
365                final int from1 = p1[i * 2 + 1];
366                final int to1 = p1[i * 2 + 2];
367
368                for (int j=0; j<n2; j++) {
369                    final int from2 = p2[j * 2 + 1];
370                    final int to2 = p2[j * 2 + 2];
371
372                    if (from2 > to1) break;
373                    if (to2 < from1) continue;
374                    final int from = from1 > from2 ? from1 : from2;
375                    final int to = to1 < to2 ? to1 : to2;
376                    pbuf = addCodeRangeToBuff(pbuf, from, to);
377                }
378            }
379        } else if (!not1) { /* 1 AND (not 2) */
380            for (int i=0; i<n1; i++) {
381                final int from1 = p1[i * 2 + 1];
382                final int to1 = p1[i * 2 + 2];
383                pbuf = andCodeRange1(pbuf, from1, to1, p2, n2);
384            }
385        }
386
387        return pbuf;
388    }
389}
390