changeset 6789:b10113081c39

8027426: String.toLowerCase incorrectly increases length, if string contains \u0130 char Reviewed-by: okutsu
author peytoia
date Wed, 30 Oct 2013 17:25:28 +0900
parents 3d28326a9e5d
children c45d0b1afbeb
files src/share/classes/java/lang/ConditionalSpecialCasing.java src/share/classes/java/lang/String.java test/java/lang/String/ToLowerCase.java
diffstat 3 files changed, 7 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/classes/java/lang/ConditionalSpecialCasing.java	Thu Oct 24 17:14:18 2013 +0400
+++ b/src/share/classes/java/lang/ConditionalSpecialCasing.java	Wed Oct 30 17:25:28 2013 +0900
@@ -74,7 +74,6 @@
         new Entry(0x00CC, new char[]{0x0069, 0x0307, 0x0300}, new char[]{0x00CC}, "lt", 0), // # LATIN CAPITAL LETTER I WITH GRAVE
         new Entry(0x00CD, new char[]{0x0069, 0x0307, 0x0301}, new char[]{0x00CD}, "lt", 0), // # LATIN CAPITAL LETTER I WITH ACUTE
         new Entry(0x0128, new char[]{0x0069, 0x0307, 0x0303}, new char[]{0x0128}, "lt", 0), // # LATIN CAPITAL LETTER I WITH TILDE
-        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, "lt", 0), // # LATIN CAPITAL LETTER I WITH DOT ABOVE
 
         //# ================================================================================
         //# Turkish and Azeri
@@ -85,10 +84,7 @@
         new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "tr", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
         new Entry(0x0049, new char[]{0x0131}, new char[]{0x0049}, "az", NOT_BEFORE_DOT), // # LATIN CAPITAL LETTER I
         new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "tr", 0), // # LATIN SMALL LETTER I
-        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0), // # LATIN SMALL LETTER I
-        //# ================================================================================
-        //# Other
-        new Entry(0x0130, new char[]{0x0069, 0x0307}, new char[]{0x0130}, "en", 0), // # LATIN CAPITALLETTER I WITH DOT ABOVE
+        new Entry(0x0069, new char[]{0x0069}, new char[]{0x0130}, "az", 0)  // # LATIN SMALL LETTER I
     };
 
     // A hash table that contains the above entries
--- a/src/share/classes/java/lang/String.java	Thu Oct 24 17:14:18 2013 +0400
+++ b/src/share/classes/java/lang/String.java	Wed Oct 30 17:25:28 2013 +0900
@@ -2461,21 +2461,14 @@
             }
             if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
                 lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
-            } else if (srcChar == '\u0130') { // LATIN CAPITAL LETTER I DOT
-                lowerChar = Character.ERROR;
             } else {
                 lowerChar = Character.toLowerCase(srcChar);
             }
             if ((lowerChar == Character.ERROR)
                     || (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
                 if (lowerChar == Character.ERROR) {
-                    if (!localeDependent && srcChar == '\u0130') {
-                        lowerCharArray =
-                                ConditionalSpecialCasing.toLowerCaseCharArray(this, i, Locale.ENGLISH);
-                    } else {
-                        lowerCharArray =
-                                ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
-                    }
+                    lowerCharArray =
+                            ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
                 } else if (srcCount == 2) {
                     resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
                     continue;
--- a/test/java/lang/String/ToLowerCase.java	Thu Oct 24 17:14:18 2013 +0400
+++ b/test/java/lang/String/ToLowerCase.java	Wed Oct 30 17:25:28 2013 +0900
@@ -23,7 +23,7 @@
 
 /*
     @test
-    @bug 4217441 4533872 4900935
+    @bug 4217441 4533872 4900935 8020037
     @summary toLowerCase should lower-case Greek Sigma correctly depending
              on the context (final/non-final).  Also it should handle
              Locale specific (lt, tr, and az) lowercasings and supplementary
@@ -69,10 +69,11 @@
         test("\u00CD", Locale.US, "\u00ED");
         test("\u0128", Locale.US, "\u0129");
 
-        // I-dot tests (Turkish and Azeri)
+        // I-dot tests
         test("\u0130", turkish, "i");
         test("\u0130", az, "i");
-        test("\u0130", Locale.US, "i\u0307");
+        test("\u0130", lt, "i");
+        test("\u0130", Locale.US, "i");
 
         // Remove dot_above in the sequence I + dot_above (Turkish and Azeri)
         test("I\u0307", turkish, "i");