changeset 2569:98186c162c1e

6933322: Add methods highSurrogate(), lowSurrogate() to class Character Summary: Add public variants of methods Surrogate.high, Surrogate.low Reviewed-by: okutsu, sherman Contributed-by: Ulf Zibis <ulf.zibis@gmx.de>
author martin
date Wed, 30 Jun 2010 16:11:32 -0700
parents 4f1b4e3c6d1b
children 838a21b99591
files src/share/classes/java/lang/Character.java src/share/classes/java/lang/String.java src/share/classes/sun/nio/cs/Surrogate.java src/share/classes/sun/nio/cs/UTF_32Coder.java src/share/classes/sun/nio/cs/UTF_8.java src/share/classes/sun/nio/cs/UnicodeEncoder.java src/share/classes/sun/nio/cs/ext/EUC_TW.java test/java/nio/charset/coders/BashStreams.java
diffstat 8 files changed, 83 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/classes/java/lang/Character.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/src/share/classes/java/lang/Character.java	Wed Jun 30 16:11:32 2010 -0700
@@ -4398,6 +4398,63 @@
     }
 
     /**
+     * Returns the leading surrogate (a
+     * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
+     * high surrogate code unit</a>) of the
+     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
+     * surrogate pair</a>
+     * representing the specified supplementary character (Unicode
+     * code point) in the UTF-16 encoding.  If the specified character
+     * is not a
+     * <a href="Character.html#supplementary">supplementary character</a>,
+     * an unspecified {@code char} is returned.
+     *
+     * <p>If
+     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
+     * is {@code true}, then
+     * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
+     * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
+     * are also always {@code true}.
+     *
+     * @param   codePoint a supplementary character (Unicode code point)
+     * @return  the leading surrogate code unit used to represent the
+     *          character in the UTF-16 encoding
+     * @since   1.7
+     */
+    public static char highSurrogate(int codePoint) {
+        return (char) ((codePoint >>> 10)
+            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
+    }
+
+    /**
+     * Returns the trailing surrogate (a
+     * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
+     * low surrogate code unit</a>) of the
+     * <a href="http://www.unicode.org/glossary/#surrogate_pair">
+     * surrogate pair</a>
+     * representing the specified supplementary character (Unicode
+     * code point) in the UTF-16 encoding.  If the specified character
+     * is not a
+     * <a href="Character.html#supplementary">supplementary character</a>,
+     * an unspecified {@code char} is returned.
+     *
+     * <p>If
+     * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
+     * is {@code true}, then
+     * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
+     * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
+     * are also always {@code true}.
+     *
+     * @param   codePoint a supplementary character (Unicode code point)
+     * @return  the trailing surrogate code unit used to represent the
+     *          character in the UTF-16 encoding
+     * @since   1.7
+     */
+    public static char lowSurrogate(int codePoint) {
+        return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
+    }
+
+    /**
      * Converts the specified character (Unicode code point) to its
      * UTF-16 representation. If the specified code point is a BMP
      * (Basic Multilingual Plane or Plane 0) value, the same value is
@@ -4470,9 +4527,8 @@
 
     static void toSurrogates(int codePoint, char[] dst, int index) {
         // We write elements "backwards" to guarantee all-or-nothing
-        dst[index+1] = (char)((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
-        dst[index] = (char)((codePoint >>> 10)
-            + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
+        dst[index+1] = lowSurrogate(codePoint);
+        dst[index] = highSurrogate(codePoint);
     }
 
     /**
--- a/src/share/classes/java/lang/String.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/src/share/classes/java/lang/String.java	Wed Jun 30 16:11:32 2010 -0700
@@ -37,8 +37,6 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
-import sun.nio.cs.Surrogate;
-
 
 /**
  * The <code>String</code> class represents character strings. All
@@ -1607,8 +1605,8 @@
         if (Character.isValidCodePoint(ch)) {
             final char[] value = this.value;
             final int offset = this.offset;
-            final char hi = Surrogate.high(ch);
-            final char lo = Surrogate.low(ch);
+            final char hi = Character.highSurrogate(ch);
+            final char lo = Character.lowSurrogate(ch);
             final int max = offset + count - 1;
             for (int i = offset + fromIndex; i < max; i++) {
                 if (value[i] == hi && value[i+1] == lo) {
@@ -1705,8 +1703,8 @@
         if (Character.isValidCodePoint(ch)) {
             final char[] value = this.value;
             final int offset = this.offset;
-            char hi = Surrogate.high(ch);
-            char lo = Surrogate.low(ch);
+            char hi = Character.highSurrogate(ch);
+            char lo = Character.lowSurrogate(ch);
             int i = offset + Math.min(fromIndex, count - 2);
             for (; i >= offset; i--) {
                 if (value[i] == hi && value[i+1] == lo) {
--- a/src/share/classes/sun/nio/cs/Surrogate.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/src/share/classes/sun/nio/cs/Surrogate.java	Wed Jun 30 16:11:32 2010 -0700
@@ -86,20 +86,20 @@
 
     /**
      * Returns the high UTF-16 surrogate for the given supplementary UCS-4 character.
+     * Use of {@link Character#highSurrogate} is generally preferred.
      */
     public static char high(int uc) {
         assert Character.isSupplementaryCodePoint(uc);
-        return (char)((uc >> 10)
-                      + (Character.MIN_HIGH_SURROGATE
-                         - (Character.MIN_SUPPLEMENTARY_CODE_POINT >> 10)));
+        return Character.highSurrogate(uc);
     }
 
     /**
      * Returns the low UTF-16 surrogate for the given supplementary UCS-4 character.
+     * Use of {@link Character#lowSurrogate} is generally preferred.
      */
     public static char low(int uc) {
         assert Character.isSupplementaryCodePoint(uc);
-        return (char)((uc & 0x3ff) + Character.MIN_LOW_SURROGATE);
+        return Character.lowSurrogate(uc);
     }
 
     /**
@@ -303,8 +303,8 @@
                     error = CoderResult.OVERFLOW;
                     return -1;
                 }
-                dst.put(Surrogate.high(uc));
-                dst.put(Surrogate.low(uc));
+                dst.put(Character.highSurrogate(uc));
+                dst.put(Character.lowSurrogate(uc));
                 error = null;
                 return 2;
             } else {
@@ -348,8 +348,8 @@
                     error = CoderResult.OVERFLOW;
                     return -1;
                 }
-                da[dp] = Surrogate.high(uc);
-                da[dp + 1] = Surrogate.low(uc);
+                da[dp] = Character.highSurrogate(uc);
+                da[dp + 1] = Character.lowSurrogate(uc);
                 error = null;
                 return 2;
             } else {
--- a/src/share/classes/sun/nio/cs/UTF_32Coder.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/src/share/classes/sun/nio/cs/UTF_32Coder.java	Wed Jun 30 16:11:32 2010 -0700
@@ -97,8 +97,8 @@
                         if (dst.remaining() < 2)
                             return CoderResult.OVERFLOW;
                         mark += 4;
-                        dst.put(Surrogate.high(cp));
-                        dst.put(Surrogate.low(cp));
+                        dst.put(Character.highSurrogate(cp));
+                        dst.put(Character.lowSurrogate(cp));
                     } else {
                         return CoderResult.malformedForLength(4);
                     }
--- a/src/share/classes/sun/nio/cs/UTF_8.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/src/share/classes/sun/nio/cs/UTF_8.java	Wed Jun 30 16:11:32 2010 -0700
@@ -252,8 +252,8 @@
                         !Character.isSupplementaryCodePoint(uc)) {
                         return malformed(src, sp, dst, dp, 4);
                     }
-                    da[dp++] = Surrogate.high(uc);
-                    da[dp++] = Surrogate.low(uc);
+                    da[dp++] = Character.highSurrogate(uc);
+                    da[dp++] = Character.lowSurrogate(uc);
                     sp += 4;
                 } else
                     return malformed(src, sp, dst, dp, 1);
@@ -309,8 +309,8 @@
                         !Character.isSupplementaryCodePoint(uc)) {
                         return malformed(src, mark, 4);
                     }
-                    dst.put(Surrogate.high(uc));
-                    dst.put(Surrogate.low(uc));
+                    dst.put(Character.highSurrogate(uc));
+                    dst.put(Character.lowSurrogate(uc));
                     mark += 4;
                 } else {
                     return malformed(src, mark, 1);
--- a/src/share/classes/sun/nio/cs/UnicodeEncoder.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/src/share/classes/sun/nio/cs/UnicodeEncoder.java	Wed Jun 30 16:11:32 2010 -0700
@@ -93,8 +93,8 @@
                 if (dst.remaining() < 4)
                     return CoderResult.OVERFLOW;
                 mark += 2;
-                put(Surrogate.high(d), dst);
-                put(Surrogate.low(d), dst);
+                put(Character.highSurrogate(d), dst);
+                put(Character.lowSurrogate(d), dst);
             }
             return CoderResult.UNDERFLOW;
         } finally {
--- a/src/share/classes/sun/nio/cs/ext/EUC_TW.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/src/share/classes/sun/nio/cs/ext/EUC_TW.java	Wed Jun 30 16:11:32 2010 -0700
@@ -34,7 +34,6 @@
 import java.nio.charset.CoderResult;
 import java.util.Arrays;
 import sun.nio.cs.HistoricallyNamedCharset;
-import sun.nio.cs.Surrogate;
 import static sun.nio.cs.CharsetMapping.*;
 
 public class EUC_TW extends Charset implements HistoricallyNamedCharset
@@ -159,8 +158,8 @@
                 c1[0] = c;
                 return c1;
             } else {
-                c2[0] = Surrogate.high(0x20000 + c);
-                c2[1] = Surrogate.low(0x20000 + c);
+                c2[0] = Character.highSurrogate(0x20000 + c);
+                c2[1] = Character.lowSurrogate(0x20000 + c);
                 return c2;
             }
         }
--- a/test/java/nio/charset/coders/BashStreams.java	Wed Jun 30 16:11:32 2010 -0700
+++ b/test/java/nio/charset/coders/BashStreams.java	Wed Jun 30 16:11:32 2010 -0700
@@ -89,8 +89,8 @@
             count++;
             if (Character.isSupplementaryCodePoint(c)) {
                 count++;
-                push(sun.nio.cs.Surrogate.low(c));
-                return sun.nio.cs.Surrogate.high(c);
+                push(Character.lowSurrogate(c));
+                return Character.highSurrogate(c);
             }
             return (char)c;
         }