Matcher.java revision 953:221a84ef44c0
1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20 21package jdk.nashorn.internal.runtime.regexp.joni; 22 23import static jdk.nashorn.internal.runtime.regexp.joni.Option.isFindLongest; 24 25import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 26import jdk.nashorn.internal.runtime.regexp.joni.encoding.IntHolder; 27 28public abstract class Matcher extends IntHolder { 29 protected final Regex regex; 30 31 protected final char[] chars; 32 protected final int str; 33 protected final int end; 34 35 protected int msaStart; 36 protected int msaOptions; 37 protected final Region msaRegion; 38 protected int msaBestLen; 39 protected int msaBestS; 40 41 protected int msaBegin; 42 protected int msaEnd; 43 44 public Matcher(final Regex regex, final char[] chars) { 45 this(regex, chars, 0, chars.length); 46 } 47 48 public Matcher(final Regex regex, final char[] chars, final int p, final int end) { 49 this.regex = regex; 50 51 this.chars = chars; 52 this.str = p; 53 this.end = end; 54 55 this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1); 56 } 57 58 // main matching method 59 protected abstract int matchAt(int range, int sstart, int sprev); 60 61 public final Region getRegion() { 62 return msaRegion; 63 } 64 65 public final int getBegin() { 66 return msaBegin; 67 } 68 69 public final int getEnd() { 70 return msaEnd; 71 } 72 73 protected final void msaInit(final int option, final int start) { 74 msaOptions = option; 75 msaStart = start; 76 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) msaBestLen = -1; 77 } 78 79 public final int match(final int at, final int range, final int option) { 80 msaInit(option, at); 81 82 final int prev = EncodingHelper.prevCharHead(str, at); 83 84 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 85 return matchAt(end /*range*/, at, prev); 86 } else { 87 return matchAt(range /*range*/, at, prev); 88 } 89 } 90 91 int low, high; // these are the return values 92 private boolean forwardSearchRange(final char[] chars, final int str, final int end, final int s, final int range, final IntHolder lowPrev) { 93 int pprev = -1; 94 int p = s; 95 96 if (Config.DEBUG_SEARCH) { 97 Config.log.println("forward_search_range: "+ 98 "str: " + str + 99 ", end: " + end + 100 ", s: " + s + 101 ", range: " + range); 102 } 103 104 if (regex.dMin > 0) { 105 p += regex.dMin; 106 } 107 108 retry:while (true) { 109 p = regex.searchAlgorithm.search(regex, chars, p, end, range); 110 111 if (p != -1 && p < range) { 112 if (p - regex.dMin < s) { 113 // retry_gate: 114 pprev = p; 115 p++; 116 continue retry; 117 } 118 119 if (regex.subAnchor != 0) { 120 switch (regex.subAnchor) { 121 case AnchorType.BEGIN_LINE: 122 if (p != str) { 123 final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); 124 if (!EncodingHelper.isNewLine(chars, prev, end)) { 125 // goto retry_gate; 126 pprev = p; 127 p++; 128 continue retry; 129 } 130 } 131 break; 132 133 case AnchorType.END_LINE: 134 if (p == end) { 135 if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 136 final int prev = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); 137 if (prev != -1 && EncodingHelper.isNewLine(chars, prev, end)) { 138 // goto retry_gate; 139 pprev = p; 140 p++; 141 continue retry; 142 } 143 } 144 } else if (!EncodingHelper.isNewLine(chars, p, end)) { 145 //if () break; 146 // goto retry_gate; 147 pprev = p; 148 p++; 149 continue retry; 150 } 151 break; 152 } // switch 153 } 154 155 if (regex.dMax == 0) { 156 low = p; 157 if (lowPrev != null) { // ??? // remove null checks 158 if (low > s) { 159 lowPrev.value = EncodingHelper.prevCharHead(s, p); 160 } else { 161 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, p); 162 } 163 } 164 } else { 165 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 166 low = p - regex.dMax; 167 168 if (low > s) { 169 low = EncodingHelper.rightAdjustCharHeadWithPrev(low, lowPrev); 170 if (lowPrev != null && lowPrev.value == -1) { 171 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : s, low); 172 } 173 } else { 174 if (lowPrev != null) { 175 lowPrev.value = EncodingHelper.prevCharHead((pprev != -1) ? pprev : str, low); 176 } 177 } 178 } 179 } 180 /* no needs to adjust *high, *high is used as range check only */ 181 high = p - regex.dMin; 182 183 if (Config.DEBUG_SEARCH) { 184 Config.log.println("forward_search_range success: "+ 185 "low: " + (low - str) + 186 ", high: " + (high - str) + 187 ", dmin: " + regex.dMin + 188 ", dmax: " + regex.dMax); 189 } 190 191 return true; /* success */ 192 } 193 194 return false; /* fail */ 195 } //while 196 } 197 198 // low, high 199 private boolean backwardSearchRange(final char[] chars, final int str, final int end, final int s, int range, final int adjrange) { 200 range += regex.dMin; 201 int p = s; 202 203 retry:while (true) { 204 p = regex.searchAlgorithm.searchBackward(regex, chars, range, adjrange, end, p, s, range); 205 206 if (p != -1) { 207 if (regex.subAnchor != 0) { 208 switch (regex.subAnchor) { 209 case AnchorType.BEGIN_LINE: 210 if (p != str) { 211 final int prev = EncodingHelper.prevCharHead(str, p); 212 if (!EncodingHelper.isNewLine(chars, prev, end)) { 213 p = prev; 214 continue retry; 215 } 216 } 217 break; 218 219 case AnchorType.END_LINE: 220 if (p == end) { 221 if (!Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) { 222 final int prev = EncodingHelper.prevCharHead(adjrange, p); 223 if (prev == -1) return false; 224 if (EncodingHelper.isNewLine(chars, prev, end)) { 225 p = prev; 226 continue retry; 227 } 228 } 229 } else if (!EncodingHelper.isNewLine(chars, p, end)) { 230 p = EncodingHelper.prevCharHead(adjrange, p); 231 if (p == -1) return false; 232 continue retry; 233 } 234 break; 235 } // switch 236 } 237 238 /* no needs to adjust *high, *high is used as range check only */ 239 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 240 low = p - regex.dMax; 241 high = p - regex.dMin; 242 } 243 244 if (Config.DEBUG_SEARCH) { 245 Config.log.println("backward_search_range: "+ 246 "low: " + (low - str) + 247 ", high: " + (high - str)); 248 } 249 250 return true; 251 } 252 253 if (Config.DEBUG_SEARCH) Config.log.println("backward_search_range: fail."); 254 return false; 255 } // while 256 } 257 258 // MATCH_AND_RETURN_CHECK 259 private boolean matchCheck(final int upperRange, final int s, final int prev) { 260 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 261 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 262 //range = upperRange; 263 if (matchAt(upperRange, s, prev) != -1) { 264 if (!isFindLongest(regex.options)) return true; 265 } 266 } else { 267 //range = upperRange; 268 if (matchAt(upperRange, s, prev) != -1) return true; 269 } 270 } else { 271 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 272 if (matchAt(end, s, prev) != -1) { 273 //range = upperRange; 274 if (!isFindLongest(regex.options)) return true; 275 } 276 } else { 277 //range = upperRange; 278 if (matchAt(end, s, prev) != -1) return true; 279 } 280 } 281 return false; 282 } 283 284 public final int search(int start, int range, final int option) { 285 int s, prev; 286 int origStart = start; 287 final int origRange = range; 288 289 if (Config.DEBUG_SEARCH) { 290 Config.log.println("onig_search (entry point): "+ 291 "str: " + str + 292 ", end: " + (end - str) + 293 ", start: " + (start - str) + 294 ", range " + (range - str)); 295 } 296 297 if (start > end || start < str) return -1; 298 299 /* anchor optimize: resume search range */ 300 if (regex.anchor != 0 && str < end) { 301 int minSemiEnd, maxSemiEnd; 302 303 if ((regex.anchor & AnchorType.BEGIN_POSITION) != 0) { 304 /* search start-position only */ 305 // !begin_position:! 306 if (range > start) { 307 range = start + 1; 308 } else { 309 range = start; 310 } 311 } else if ((regex.anchor & AnchorType.BEGIN_BUF) != 0) { 312 /* search str-position only */ 313 if (range > start) { 314 if (start != str) return -1; // mismatch_no_msa; 315 range = str + 1; 316 } else { 317 if (range <= str) { 318 start = str; 319 range = str; 320 } else { 321 return -1; // mismatch_no_msa; 322 } 323 } 324 } else if ((regex.anchor & AnchorType.END_BUF) != 0) { 325 minSemiEnd = maxSemiEnd = end; 326 // !end_buf:! 327 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; 328 } else if ((regex.anchor & AnchorType.SEMI_END_BUF) != 0) { 329 final int preEnd = EncodingHelper.stepBack(str, end, 1); 330 maxSemiEnd = end; 331 if (EncodingHelper.isNewLine(chars, preEnd, end)) { 332 minSemiEnd = preEnd; 333 if (minSemiEnd > str && start <= minSemiEnd) { 334 // !goto end_buf;! 335 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; 336 } 337 } else { 338 minSemiEnd = end; 339 // !goto end_buf;! 340 if (endBuf(start, range, minSemiEnd, maxSemiEnd)) return -1; // mismatch_no_msa; 341 } 342 } else if ((regex.anchor & AnchorType.ANYCHAR_STAR_ML) != 0) { 343 // goto !begin_position;! 344 if (range > start) { 345 range = start + 1; 346 } else { 347 range = start; 348 } 349 } 350 351 } else if (str == end) { /* empty string */ 352 // empty address ? 353 if (Config.DEBUG_SEARCH) { 354 Config.log.println("onig_search: empty string."); 355 } 356 357 if (regex.thresholdLength == 0) { 358 s = start = str; 359 prev = -1; 360 msaInit(option, start); 361 362 if (matchCheck(end, s, prev)) return match(s); 363 return mismatch(); 364 } 365 return -1; // goto mismatch_no_msa; 366 } 367 368 if (Config.DEBUG_SEARCH) { 369 Config.log.println("onig_search(apply anchor): " + 370 "end: " + (end - str) + 371 ", start " + (start - str) + 372 ", range " + (range - str)); 373 } 374 375 msaInit(option, origStart); 376 377 s = start; 378 if (range > start) { /* forward search */ 379 if (s > str) { 380 prev = EncodingHelper.prevCharHead(str, s); 381 } else { 382 prev = 0; // -1 383 } 384 385 if (regex.searchAlgorithm != SearchAlgorithm.NONE) { 386 int schRange = range; 387 if (regex.dMax != 0) { 388 if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { 389 schRange = end; 390 } else { 391 schRange += regex.dMax; 392 if (schRange > end) schRange = end; 393 } 394 } 395 if ((end - start) < regex.thresholdLength) return mismatch(); 396 397 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE) { 398 do { 399 if (!forwardSearchRange(chars, str, end, s, schRange, this)) return mismatch(); // low, high, lowPrev 400 if (s < low) { 401 s = low; 402 prev = value; 403 } 404 while (s <= high) { 405 if (matchCheck(origRange, s, prev)) return match(s); // ??? 406 prev = s; 407 s++; 408 } 409 } while (s < range); 410 return mismatch(); 411 412 } else { /* check only. */ 413 if (!forwardSearchRange(chars, str, end, s, schRange, null)) return mismatch(); 414 415 if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) { 416 do { 417 if (matchCheck(origRange, s, prev)) return match(s); 418 prev = s; 419 s++; 420 } while (s < range); 421 return mismatch(); 422 } 423 424 } 425 } 426 427 do { 428 if (matchCheck(origRange, s, prev)) return match(s); 429 prev = s; 430 s++; 431 } while (s < range); 432 433 if (s == range) { /* because empty match with /$/. */ 434 if (matchCheck(origRange, s, prev)) return match(s); 435 } 436 } else { /* backward search */ 437 if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) { 438 if (origStart < end) { 439 origStart++; // /* is upper range */ 440 } 441 } 442 443 if (regex.searchAlgorithm != SearchAlgorithm.NONE) { 444 int adjrange; 445 if (range < end) { 446 adjrange = range; 447 } else { 448 adjrange = end; 449 } 450 if (regex.dMax != MinMaxLen.INFINITE_DISTANCE && (end - range) >= regex.thresholdLength) { 451 do { 452 int schStart = s + regex.dMax; 453 if (schStart > end) schStart = end; 454 if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); // low, high 455 if (s > high) s = high; 456 while (s != -1 && s >= low) { 457 prev = EncodingHelper.prevCharHead(str, s); 458 if (matchCheck(origStart, s, prev)) return match(s); 459 s = prev; 460 } 461 } while (s >= range); 462 return mismatch(); 463 } else { /* check only. */ 464 if ((end - range) < regex.thresholdLength) return mismatch(); 465 466 int schStart = s; 467 if (regex.dMax != 0) { 468 if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) { 469 schStart = end; 470 } else { 471 schStart += regex.dMax; 472 if (schStart > end) { 473 schStart = end; 474 } 475 } 476 } 477 if (!backwardSearchRange(chars, str, end, schStart, range, adjrange)) return mismatch(); 478 } 479 } 480 481 do { 482 prev = EncodingHelper.prevCharHead(str, s); 483 if (matchCheck(origStart, s, prev)) return match(s); 484 s = prev; 485 } while (s >= range); 486 487 } 488 return mismatch(); 489 } 490 491 private boolean endBuf(int start, int range, final int minSemiEnd, final int maxSemiEnd) { 492 if ((maxSemiEnd - str) < regex.anchorDmin) return true; // mismatch_no_msa; 493 494 if (range > start) { 495 if ((minSemiEnd - start) > regex.anchorDmax) { 496 start = minSemiEnd - regex.anchorDmax; 497 if (start >= end) { 498 /* match with empty at end */ 499 start = EncodingHelper.prevCharHead(str, end); 500 } 501 } 502 if ((maxSemiEnd - (range - 1)) < regex.anchorDmin) { 503 range = maxSemiEnd - regex.anchorDmin + 1; 504 } 505 if (start >= range) return true; // mismatch_no_msa; 506 } else { 507 if ((minSemiEnd - range) > regex.anchorDmax) { 508 range = minSemiEnd - regex.anchorDmax; 509 } 510 if ((maxSemiEnd - start) < regex.anchorDmin) { 511 start = maxSemiEnd - regex.anchorDmin; 512 } 513 if (range > start) return true; // mismatch_no_msa; 514 } 515 return false; 516 } 517 518 private int match(final int s) { 519 return s - str; // sstart ??? 520 } 521 522 private int mismatch() { 523 if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) { 524 if (msaBestLen >= 0) { 525 final int s = msaBestS; 526 return match(s); 527 } 528 } 529 // falls through finish: 530 return -1; 531 } 532} 533