Matcher.java revision 953:221a84ef44c0
1/*
2 * Permission is hereby granted, free of charge, to any person obtaining a copy of
3 * this software and associated documentation files (the "Software"), to deal in
4 * the Software without restriction, including without limitation the rights to
5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
6 * of the Software, and to permit persons to whom the Software is furnished to do
7 * so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice shall be included in all
10 * copies or substantial portions of the Software.
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18 * SOFTWARE.
19 */
20
21package jdk.nashorn.internal.runtime.regexp.joni;
22
23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest;
24
25import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType;
26import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder;
27
28public abstract class Matcher extends IntHolder {
29    protected final Regex regex;
30
31    protected final char[] chars;
32    protected final int str;
33    protected final int end;
34
35    protected int msaStart;
36    protected int msaOptions;
37    protected final Region msaRegion;
38    protected int msaBestLen;
39    protected int msaBestS;
40
41    protected int msaBegin;
42    protected int msaEnd;
43
44    public Matcher(final Regex regex, final char[] chars) {
45        this(regex, chars, 0, chars.length);
46    }
47
48    public Matcher(final Regex regex, final char[] chars, final int p, final int end) {
49        this.regex = regex;
50
51        this.chars = chars;
52        this.str = p;
53        this.end = end;
54
55        this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
56    }
57
58    // main matching method
59    protected abstract int matchAt(int range, int sstart, int sprev);
60
61    public final Region getRegion() {
62        return msaRegion;
63    }
64
65    public final int getBegin() {
66        return msaBegin;
67    }
68
69    public final int getEnd() {
70        return msaEnd;
71    }
72
73    protected final void msaInit(final int option, final int start) {
74        msaOptions = option;
75        msaStart = start;
76        if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1;
77    }
78
79    public final int match(final int at, final int range, final int option) {
80        msaInit(option, at);
81
82        final int prev = EncodingHelper.prevCharHead(str, at);
83
84        if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
85            return matchAt(end /*range*/, at, prev);
86        } else {
87            return matchAt(range /*range*/, at, prev);
88        }
89    }
90
91    int low, high; // these are the return values
92    private boolean forwardSearchRange(final char[] chars, final int str, final int end, final int s, final int range, final IntHolder lowPrev) {
93        int pprev = -1;
94        int p = s;
95
96        if (Config.DEBUG_SEARCH) {
97            Config.log.println("forward_search_range: "+
98                                "str: " + str +
99                                ", end: " + end +
100                                ", s: " + s +
101                                ", range: " + range);
102        }
103
104        if (regex.dMin > 0) {
105            p += regex.dMin;
106        }
107
108        retry:while (true) {
109            p = regex.searchAlgorithm.search(regex, chars, p, end, range);
110
111            if (p != -1 && p < range) {
112                if (p - regex.dMin < s) {
113                    // retry_gate:
114                    pprev = p;
115                    p++;
116                    continue retry;
117                }
118
119                if (regex.subAnchor != 0) {
120                    switch (regex.subAnchor) {
121                    case AnchorType.BEGIN_LINE:
122                        if (p != str) {
123                            final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
124                            if (!EncodingHelper.isNewLine(chars, prev, end)) {
125                                // goto retry_gate;
126                                pprev = p;
127                                p++;
128                                continue retry;
129                            }
130                        }
131                        break;
132
133                    case AnchorType.END_LINE:
134                        if (p == end) {
135                            if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
136                                final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
137                                if (prev != -1 && EncodingHelper.isNewLine(chars, prev, end)) {
138                                    // goto retry_gate;
139                                    pprev = p;
140                                    p++;
141                                    continue retry;
142                                }
143                            }
144                        } else if (!EncodingHelper.isNewLine(chars, p, end)) {
145                            //if () break;
146                            // goto retry_gate;
147                            pprev = p;
148                            p++;
149                            continue retry;
150                        }
151                        break;
152                    } // switch
153                }
154
155                if (regex.dMax == 0) {
156                    low = p;
157                    if (lowPrev != null) { // ??? // remove null checks
158                        if (low > s) {
159                            lowPrev.value = EncodingHelper.prevCharHead(s, p);
160                        } else {
161                            lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p);
162                        }
163                    }
164                } else {
165                    if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
166                        low = p - regex.dMax;
167
168                        if (low > s) {
169                            low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev);
170                            if (lowPrev != null && lowPrev.value == -1) {
171                                lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low);
172                            }
173                        } else {
174                            if (lowPrev != null) {
175                                lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, low);
176                            }
177                        }
178                    }
179                }
180                /* no needs to adjust *high, *high is used as range check only */
181                high = p - regex.dMin;
182
183                if (Config.DEBUG_SEARCH) {
184                    Config.log.println("forward_search_range success: "+
185                                        "low: " + (low - str) +
186                                        ", high: " + (high - str) +
187                                        ", dmin: " + regex.dMin +
188                                        ", dmax: " + regex.dMax);
189                }
190
191                return true;    /* success */
192            }
193
194            return false;   /* fail */
195        } //while
196    }
197
198    // low, high
199    private boolean backwardSearchRange(final char[] chars, final int str, final int end, final int s, int range, final int adjrange) {
200        range += regex.dMin;
201        int p = s;
202
203        retry:while (true) {
204            p = regex.searchAlgorithm.searchBackward(regex, chars, range, adjrange, end, p, s, range);
205
206            if (p != -1) {
207                if (regex.subAnchor != 0) {
208                    switch (regex.subAnchor) {
209                    case AnchorType.BEGIN_LINE:
210                        if (p != str) {
211                            final int prev = EncodingHelper.prevCharHead(str, p);
212                            if (!EncodingHelper.isNewLine(chars, prev, end)) {
213                                p = prev;
214                                continue retry;
215                            }
216                        }
217                        break;
218
219                    case AnchorType.END_LINE:
220                        if (p == end) {
221                            if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
222                                final int prev = EncodingHelper.prevCharHead(adjrange, p);
223                                if (prev == -1) return false;
224                                if (EncodingHelper.isNewLine(chars, prev, end)) {
225                                    p = prev;
226                                    continue retry;
227                                }
228                            }
229                        } else if (!EncodingHelper.isNewLine(chars, p, end)) {
230                            p = EncodingHelper.prevCharHead(adjrange, p);
231                            if (p == -1) return false;
232                            continue retry;
233                        }
234                        break;
235                    } // switch
236                }
237
238                /* no needs to adjust *high, *high is used as range check only */
239                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
240                    low = p - regex.dMax;
241                    high = p - regex.dMin;
242                }
243
244                if (Config.DEBUG_SEARCH) {
245                    Config.log.println("backward_search_range: "+
246                                        "low: " + (low - str) +
247                                        ", high: " + (high - str));
248                }
249
250                return true;
251            }
252
253            if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail.");
254            return false;
255        } // while
256    }
257
258    // MATCH_AND_RETURN_CHECK
259    private boolean matchCheck(final int upperRange, final int s, final int prev) {
260        if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
261            if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
262                //range = upperRange;
263                if (matchAt(upperRange, s, prev) != -1) {
264                    if (!isFindLongest(regex.options)) return true;
265                }
266            } else {
267                //range = upperRange;
268                if (matchAt(upperRange, s, prev) != -1) return true;
269            }
270        } else {
271            if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
272                if (matchAt(end, s, prev) != -1) {
273                    //range = upperRange;
274                    if (!isFindLongest(regex.options)) return true;
275                }
276            } else {
277                //range = upperRange;
278                if (matchAt(end, s, prev) != -1) return true;
279            }
280        }
281        return false;
282    }
283
284    public final int search(int start, int range, final int option) {
285        int s, prev;
286        int origStart = start;
287        final int origRange = range;
288
289        if (Config.DEBUG_SEARCH) {
290            Config.log.println("onig_search (entry point): "+
291                    "str: " + str +
292                    ", end: " + (end - str) +
293                    ", start: " + (start - str) +
294                    ", range " + (range - str));
295        }
296
297        if (start > end || start < str) return -1;
298
299        /* anchor optimize: resume search range */
300        if (regex.anchor != 0 && str < end) {
301            int minSemiEnd, maxSemiEnd;
302
303            if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) {
304                /* search start-position only */
305                // !begin_position:!
306                if (range > start) {
307                    range = start + 1;
308                } else {
309                    range = start;
310                }
311            } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) {
312                /* search str-position only */
313                if (range > start) {
314                    if (start != str) return -1; // mismatch_no_msa;
315                    range = str + 1;
316                } else {
317                    if (range <= str) {
318                        start = str;
319                        range = str;
320                    } else {
321                        return -1; // mismatch_no_msa;
322                    }
323                }
324            } else if ((regex.anchor & AnchorType.END_BUF) != 0) {
325                minSemiEnd = maxSemiEnd = end;
326                // !end_buf:!
327                if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
328            } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) {
329                final int preEnd = EncodingHelper.stepBack(str, end, 1);
330                maxSemiEnd = end;
331                if (EncodingHelper.isNewLine(chars, preEnd, end)) {
332                    minSemiEnd = preEnd;
333                    if (minSemiEnd > str && start <= minSemiEnd) {
334                        // !goto end_buf;!
335                        if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
336                    }
337                } else {
338                    minSemiEnd = end;
339                    // !goto end_buf;!
340                    if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa;
341                }
342            } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) {
343                // goto !begin_position;!
344                if (range > start) {
345                    range = start + 1;
346                } else {
347                    range = start;
348                }
349            }
350
351        } else if (str == end) { /* empty string */
352            // empty address ?
353            if (Config.DEBUG_SEARCH) {
354                Config.log.println("onig_search: empty string.");
355            }
356
357            if (regex.thresholdLength == 0) {
358                s = start = str;
359                prev = -1;
360                msaInit(option, start);
361
362                if (matchCheck(end, s, prev)) return match(s);
363                return mismatch();
364            }
365            return -1; // goto mismatch_no_msa;
366        }
367
368        if (Config.DEBUG_SEARCH) {
369            Config.log.println("onig_search(apply anchor): " +
370                                "end: " + (end - str) +
371                                ", start " + (start - str) +
372                                ", range " + (range - str));
373        }
374
375        msaInit(option, origStart);
376
377        s = start;
378        if (range > start) {    /* forward search */
379            if (s > str) {
380                prev = EncodingHelper.prevCharHead(str, s);
381            } else {
382                prev = 0; // -1
383            }
384
385            if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
386                int schRange = range;
387                if (regex.dMax != 0) {
388                    if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
389                        schRange = end;
390                    } else {
391                        schRange += regex.dMax;
392                        if (schRange > end) schRange = end;
393                    }
394                }
395                if ((end - start) < regex.thresholdLength) return mismatch();
396
397                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) {
398                    do {
399                        if (!forwardSearchRange(chars, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev
400                        if (s < low) {
401                            s = low;
402                            prev = value;
403                        }
404                        while (s <= high) {
405                            if (matchCheck(origRange, s, prev)) return match(s); // ???
406                            prev = s;
407                            s++;
408                        }
409                    } while (s < range);
410                    return mismatch();
411
412                } else { /* check only. */
413                    if (!forwardSearchRange(chars, str, end, s, schRange, null)) return mismatch();
414
415                    if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
416                        do {
417                            if (matchCheck(origRange, s, prev)) return match(s);
418                            prev = s;
419                            s++;
420                        } while (s < range);
421                        return mismatch();
422                    }
423
424                }
425            }
426
427            do {
428                if (matchCheck(origRange, s, prev)) return match(s);
429                prev = s;
430                s++;
431            } while (s < range);
432
433            if (s == range) { /* because empty match with /$/. */
434                if (matchCheck(origRange, s, prev)) return match(s);
435            }
436        } else { /* backward search */
437            if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
438                if (origStart < end) {
439                    origStart++; // /* is upper range */
440                }
441            }
442
443            if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
444                int adjrange;
445                if (range < end) {
446                    adjrange = range;
447                } else {
448                    adjrange = end;
449                }
450                if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) {
451                    do {
452                        int schStart = s + regex.dMax;
453                        if (schStart > end) schStart = end;
454                        if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); // low, high
455                        if (s > high) s = high;
456                        while (s != -1 && s >= low) {
457                            prev = EncodingHelper.prevCharHead(str, s);
458                            if (matchCheck(origStart, s, prev)) return match(s);
459                            s = prev;
460                        }
461                    } while (s >= range);
462                    return mismatch();
463                } else { /* check only. */
464                    if ((end - range) < regex.thresholdLength) return mismatch();
465
466                    int schStart = s;
467                    if (regex.dMax != 0) {
468                        if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
469                            schStart = end;
470                        } else {
471                            schStart += regex.dMax;
472                            if (schStart > end) {
473                                schStart = end;
474                            }
475                        }
476                    }
477                    if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch();
478                }
479            }
480
481            do {
482                prev = EncodingHelper.prevCharHead(str, s);
483                if (matchCheck(origStart, s, prev)) return match(s);
484                s = prev;
485            } while (s >= range);
486
487        }
488        return mismatch();
489    }
490
491    private boolean endBuf(int start, int range, final int minSemiEnd, final int maxSemiEnd) {
492        if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa;
493
494        if (range > start) {
495            if ((minSemiEnd - start) > regex.anchorDmax) {
496                start = minSemiEnd - regex.anchorDmax;
497                if (start >= end) {
498                    /* match with empty at end */
499                    start = EncodingHelper.prevCharHead(str, end);
500                }
501            }
502            if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) {
503                range = maxSemiEnd - regex.anchorDmin + 1;
504            }
505            if (start >= range) return true; // mismatch_no_msa;
506        } else {
507            if ((minSemiEnd - range) > regex.anchorDmax) {
508                range = minSemiEnd - regex.anchorDmax;
509            }
510            if ((maxSemiEnd - start) < regex.anchorDmin) {
511                start = maxSemiEnd - regex.anchorDmin;
512            }
513            if (range > start) return true; // mismatch_no_msa;
514        }
515        return false;
516    }
517
518    private int match(final int s) {
519        return s - str; // sstart ???
520    }
521
522    private int mismatch() {
523        if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
524            if (msaBestLen >= 0) {
525                final int s = msaBestS;
526                return match(s);
527            }
528        }
529        // falls through finish:
530        return -1;
531    }
532}
533