changeset 56596:7a026580fed5

8225198: Optimize regex tree for greedy quantifiers of type {N,} Reviewed-by: redestad, bchristi
author igerasim
date Tue, 04 Jun 2019 18:55:53 -0700
parents b78597cfcced
children 29ab1f3bd353
files src/java.base/share/classes/java/util/regex/Pattern.java src/java.base/share/classes/java/util/regex/PrintPattern.java
diffstat 2 files changed, 20 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/src/java.base/share/classes/java/util/regex/Pattern.java	Tue Jun 04 21:25:40 2019 -0400
+++ b/src/java.base/share/classes/java/util/regex/Pattern.java	Tue Jun 04 18:55:53 2019 -0700
@@ -3279,11 +3279,12 @@
                         cmin = Math.addExact(Math.multiplyExact(cmin, 10),
                                              ch - '0');
                     } while (ASCII.isDigit(ch = read()));
-                    cmax = cmin;
                     if (ch == ',') {
                         ch = read();
-                        cmax = MAX_REPS;
-                        if (ch != '}') {
+                        if (ch == '}') {
+                            unread();
+                            return curly(prev, cmin);
+                        } else {
                             cmax = 0;
                             while (ASCII.isDigit(ch)) {
                                 cmax = Math.addExact(Math.multiplyExact(cmax, 10),
@@ -3291,6 +3292,8 @@
                                 ch = read();
                             }
                         }
+                    } else {
+                        cmax = cmin;
                     }
                 } catch (ArithmeticException ae) {
                     throw error("Illegal repetition range");
@@ -3299,18 +3302,16 @@
                     throw error("Unclosed counted closure");
                 if (cmax < cmin)
                     throw error("Illegal repetition range");
-                Curly curly;
                 ch = peek();
                 if (ch == '?') {
                     next();
-                    curly = new Curly(prev, cmin, cmax, Qtype.LAZY);
+                    return new Curly(prev, cmin, cmax, Qtype.LAZY);
                 } else if (ch == '+') {
                     next();
-                    curly = new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
+                    return new Curly(prev, cmin, cmax, Qtype.POSSESSIVE);
                 } else {
-                    curly = new Curly(prev, cmin, cmax, Qtype.GREEDY);
+                    return new Curly(prev, cmin, cmax, Qtype.GREEDY);
                 }
-                return curly;
             } else {
                 throw error("Illegal repetition");
             }
@@ -4266,8 +4267,8 @@
     }
 
     /**
-     * Handles the greedy style repetition with the minimum either be
-     * 0 or 1 and the maximum be MAX_REPS, for * and + quantifier.
+     * Handles the greedy style repetition with the specified minimum
+     * and the maximum equal to MAX_REPS, for *, + and {N,} quantifiers.
      */
     static class CharPropertyGreedy extends Node {
         final CharPredicate predicate;
@@ -4277,7 +4278,7 @@
             this.predicate = cp.predicate;
             this.cmin = cmin;
         }
-        boolean match(Matcher matcher, int i,  CharSequence seq) {
+        boolean match(Matcher matcher, int i, CharSequence seq) {
             int n = 0;
             int to = matcher.to;
             // greedy, all the way down
@@ -4320,7 +4321,7 @@
             super(bcp, cmin);
         }
 
-        boolean match(Matcher matcher, int i,  CharSequence seq) {
+        boolean match(Matcher matcher, int i, CharSequence seq) {
             int n = 0;
             int to = matcher.to;
             while (i < to && predicate.is(seq.charAt(i))) {
@@ -5157,41 +5158,6 @@
         }
     }
 
-    static final class Conditional extends Node {
-        Node cond, yes, not;
-        Conditional(Node cond, Node yes, Node not) {
-            this.cond = cond;
-            this.yes = yes;
-            this.not = not;
-        }
-        boolean match(Matcher matcher, int i, CharSequence seq) {
-            if (cond.match(matcher, i, seq)) {
-                return yes.match(matcher, i, seq);
-            } else {
-                return not.match(matcher, i, seq);
-            }
-        }
-        boolean study(TreeInfo info) {
-            int minL = info.minLength;
-            int maxL = info.maxLength;
-            boolean maxV = info.maxValid;
-            info.reset();
-            yes.study(info);
-
-            int minL2 = info.minLength;
-            int maxL2 = info.maxLength;
-            boolean maxV2 = info.maxValid;
-            info.reset();
-            not.study(info);
-
-            info.minLength = minL + Math.min(minL2, info.minLength);
-            info.maxLength = maxL + Math.max(maxL2, info.maxLength);
-            info.maxValid = (maxV & maxV2 & info.maxValid);
-            info.deterministic = false;
-            return next.study(info);
-        }
-    }
-
     /**
      * Zero width positive lookahead.
      */
--- a/src/java.base/share/classes/java/util/regex/PrintPattern.java	Tue Jun 04 21:25:40 2019 -0400
+++ b/src/java.base/share/classes/java/util/regex/PrintPattern.java	Tue Jun 04 18:55:53 2019 -0700
@@ -195,7 +195,13 @@
                     pstr = gcp.predicate.toString();
                 else
                     pstr = "Single \"" + pstr + "\"";
-                str = name + " " + pstr + ((gcp.cmin == 0) ? "*" : "+");
+                str = name + " " + pstr;
+                if (gcp.cmin == 0)
+                    str += "*";
+                else if (gcp.cmin == 1)
+                    str += "+";
+                else
+                    str += "{" + gcp.cmin + ",}";
                 print(node, str, depth);
             } else if (node instanceof Pattern.BackRef) {
                 str = "GroupBackRef " + ((Pattern.BackRef)node).groupIndex / 2;