--- a/src/share/classes/java/util/Locale.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/java/util/Locale.java Wed Nov 04 17:57:23 2009 -0500
@@ -41,6 +41,7 @@ package java.util;
package java.util;
import java.io.IOException;
+import java.io.InvalidClassException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.ObjectStreamField;
@@ -51,18 +52,16 @@ import java.util.spi.LocaleNameProvider;
import sun.security.action.GetPropertyAction;
import sun.util.LocaleServiceProviderPool;
+import sun.util.locale.AsciiUtil;
+import sun.util.locale.BaseLocale;
+import sun.util.locale.InternalLocaleBuilder;
+import sun.util.locale.LanguageTag;
+import sun.util.locale.LocaleExtensions;
+import sun.util.locale.LocaleObjectCache;
+import sun.util.locale.LocaleSyntaxException;
+import sun.util.locale.UnicodeLocaleExtension;
import sun.util.resources.LocaleData;
import sun.util.resources.OpenListResourceBundle;
-
-import sun.util.locale.BaseLocale;
-import sun.util.locale.BaseLocale.BaseLocaleKey;
-import sun.util.locale.AsciiUtil;
-import sun.util.locale.InternalLocaleBuilder;
-import sun.util.locale.LocaleSyntaxException;
-import sun.util.locale.LanguageTag;
-import sun.util.locale.LanguageTag.Extension;
-import sun.util.locale.LocaleExtensions;
-import sun.util.locale.LocaleObjectCache;
/**
*
@@ -73,94 +72,189 @@ import sun.util.locale.LocaleObjectCache
* is a locale-sensitive operation--the number should be formatted
* according to the customs/conventions of the user's native country,
* region, or culture.
+ *
+ * <p>
+ * The <code>Locale</code> class implements identifiers interchangeable with
+ * the <a href="http://www.rfc-editor.org/rfc/bcp/bcp47.txt">IETF BCP 47 Tags
+ * for Identifying Languages</a>, with support for the LDML (
+ * <a href="http://www.unicode.org/reports/tr35/">UTS#35 Locale Data Markup Language</a>)'s
+ * BCP 47-compatible extensions for locale data exchange.
+ *
+ * <p>
+ * A <code>Locale</code> object logically consists of the fields described below.
+ *
+ * <dl>
+ * <dt>language</dt>
+ * <dd>ISO 639 alpha-2 or alpha-3 language code, or registered language subtags
+ * up to 8 alpha letters (for future enhancements). When a language has
+ * both an alpha-2 code and an alpha-3 code, the alpha-2 code must be used.
+ * You can find a full list of valid language codes in the
+ * <a href="http://www.iana.org/assignments/language-subtag-registry">
+ * IANA Language Subtag Registry</a> (See "Type: language"). The language
+ * field is case insensitive, but <code>Locale</code> always canonicalizes
+ * to lower case.</dd>
+ * <dd>Example: "en" (English), "ja" (Japanese), "kok" (Konkani)</dd>
*
- * <P>
- * Create a <code>Locale</code> object using the constructors in this class:
+ * <dt>script</dt>
+ * <dd>ISO 15924 alpha-4 script code. You can find a full list of valid script
+ * codes in the <a href="http://www.iana.org/assignments/language-subtag-registry">
+ * IANA Language Subtag Registry</a> (See "Type: script"). The script
+ * field is case insensitive, but <code>Locale</code> always canonicalizes
+ * to title case (the first letter is upper case and the rest of the letters
+ * are lower case).</dd>
+ * <dd>Example: "Latn" (Latin), "Cyrl" (Cyrillic)
+ *
+ * <dt>country (region)</dt>
+ * <dd>ISO 3166 alpha-2 country code or UN M.49 numeric-3 area code.
+ * You can find a full list of valid country and region codes in the
+ * <a href="http://www.iana.org/assignments/language-subtag-registry">
+ * IANA Language Subtag Registry</a> (See "Type: region"). The country
+ * (region) field is case insensitive, but <code>Locale</code> always
+ * canonicalizes to upper case.</dd>
+ * <dd>Example: "US" (United States), "FR" (France), "029" (Caribbean)</dd>
+ *
+ * <dt>variant</dt>
+ * <dd>Any arbitrary value used to indicate a variation of a <code>Locale</code>.
+ * Where there are two or more variant values each indicating its own semantics,
+ * these values should be ordered by importance, with most important first,
+ * separated by underscore('_'). The variant field is case sensitive.</dd>
+ * <dd><b>Note: </b>IETF BCP 47 places syntactic restrictions on variant subtags.
+ * Also BCP 47 subtags are strictly used to indicate additional variations that
+ * define a language or its dialects that are not covered by any combinations
+ * of language, script and region subtags. However, the variant field in
+ * <code>Locale</code> has historically been used for any kind of variations,
+ * not just language variations. For example, some supported variants available
+ * in Java SE Runtime Environments indicate alternative cultural behaviors
+ * such as calendar type or number script. In BCP 47 this kind of information,
+ * which does not identify the language, is supported by extension subtags or
+ * private use subtags.</dd>
+ * <dd>Example: "polyton" (Polytonic Greek), "POSIX", "Traditional_WIN"</dd>
+ *
+ * <dt>extensions</dt>
+ * <dd>A map from single character keys to string values, indicating
+ * extensions apart from language identification. The extensions in <code>
+ * Locale</code> implement the semantics and syntax of BCP 47 extension
+ * subtags and private use subtags. A key must be a single alphanumeric
+ * character from the set [0-9a-zA-Z], and is case-insensitive. A value may
+ * consist of multiple subtags separated by hyphen('-'). When the key is "x"
+ * or "X", each subtag must be 1 to 8 alphanumeric character(s). For all other
+ * keys, each subtag must be 2 to 8 alphanumeric characters. The extensions
+ * are case insensitive, but <code>Locale</code> canonicalizes to all extension
+ * keys and values to lower case.</dd>
+ * <dd>Example: key="u"/value="ca-japanese" (Japanese Calendar),
+ * key="x"/value="java-1-7"</dd>
+ * </dl>
+ *
+ * <b>Note:</b> Although above specification requires field values to
+ * be registered in the <a href="http://www.iana.org/assignments/language-subtag-registry">
+ * IANA Language Subtag Registry</a>, the <code>Locale</code> class does not
+ * provide any validation features. The <code>Builder</code> only checks if
+ * an individual field satisfies the syntactical requirement, but does not validate
+ * the value itself. See the {@link Builder} for details.
+ *
+ * <h4>Unicode locale/language extension</h4>
+ * <p>
+ * <a href="http://www.unicode.org/reports/tr35">UTS#35 Unicode Locale Data Markup</a>
+ * defines optional keywords to override or refine the default behavior
+ * associated with a locale. A keyword is represented by a pair of key and type.
+ * For example, "nu-thai" indicates that Thai local digits (value:"thai") should
+ * be used for formatting numbers (key:"nu"). The keywords are mapped to a BCP 47
+ * extension value using the extension singleton key 'u'. The above example, "nu-thai",
+ * is mapped to the BCP 47 extension using a singleton extension key 'u', as the
+ * extension "u-nu-thai".
+ *
+ * <p>
+ * The key/type pairs are stored in a <code>Locale</code> object as an extension
+ * value with the key <code>UNICODE_LOCALE_EXTENSION</code> ('u').
+ * Thus, when a <code>Locale</code> object contains Unicode locale keywords,
+ * <code>getExtension(UNICODE_LOCALE_EXTENSION)</code> will return a String
+ * representing keyword(s), such as "nu-thai".
+ * The <code>Locale</code> class also provides <code>getUnicodeLocaleKeys</code>
+ * and <code>getUnicodeLocaleType</code> which allow you to access Unicode locale
+ * keywords directly.
+ *
+ * <p>
+ * The Unicode locale extension specifies optional behavior in locale-sensitive
+ * services. Although the LDML specification defines various keys and
+ * values, actual locale-sensitive service implementations in a Java
+ * Runtime Environment might not support any particular Unicode locale
+ * key/type pairs.
+ *
+ * <h4>Creating a Locale</h4>
+ * <p>
+ * There are several different ways to create a <code>Locale</code>
+ * object.
+ *
+ * <h5>Builder</h5>
+ * <p>
+ * Using {@link Builder} you can construct a <code>Locale</code> object
+ * that conforms to BCP 47 syntax.
+ *
+ * <h5>Constructors</h5>
+ * <p>
+ * The <code>Locale</code> class provides three constructors:
* <blockquote>
* <pre>
- * Locale(String language)
- * Locale(String language, String country)
- * Locale(String language, String country, String variant)
+ * Locale(String language)
+ * Locale(String language, String country)
+ * Locale(String language, String country, String variant)
* </pre>
* </blockquote>
- * The language argument is a valid <STRONG>ISO Language Code.</STRONG>
- * These codes are the lower-case, two-letter codes as defined by ISO-639.
- * You can find a full list of these codes at a number of sites, such as:
- * <BR><a href ="http://www.loc.gov/standards/iso639-2/php/English_list.php">
- * <code>http://www.loc.gov/standards/iso639-2/php/English_list.php</code></a>
- *
- * <P>
- * The country argument is a valid <STRONG>ISO Country Code.</STRONG> These
- * codes are the upper-case, two-letter codes as defined by ISO-3166.
- * You can find a full list of these codes at a number of sites, such as:
- * <BR><a href="http://www.iso.ch/iso/en/prods-services/iso3166ma/02iso-3166-code-lists/list-en1.html">
- * <code>http://www.iso.ch/iso/en/prods-services/iso3166ma/02iso-3166-code-lists/list-en1.html</code></a>
- *
- * <P>
- * The variant argument is a vendor or browser-specific code.
- * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
- * Where there are two variants, separate them with an underscore, and
- * put the most important one first. For example, a Traditional Spanish collation
- * might construct a locale with parameters for language, country and variant as:
- * "es", "ES", "Traditional_WIN".
- *
- * <P>
- * Because a <code>Locale</code> object is just an identifier for a region,
- * no validity check is performed when you construct a <code>Locale</code>.
- * If you want to see whether particular resources are available for the
- * <code>Locale</code> you construct, you must query those resources. For
- * example, ask the <code>NumberFormat</code> for the locales it supports
- * using its <code>getAvailableLocales</code> method.
- * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
- * locale, you get back the best available match, not necessarily
- * precisely what you asked for. For more information, look at
- * {@link ResourceBundle}.
- *
- * <P>
+ * These constructors allow you to create a <code>Locale</code> object
+ * with language, country and variant, but you cannot specify
+ * script or extensions.
+ *
+ * <h5>Factory Methods</h5>
+ * <p>
+ * The method <code>forLanguageTag</code> creates a <code>Locale</code>
+ * object for a well-formed BCP 47 language tag.
+ *
+ * <h5>Locale Constants</h5>
+ * <p>
* The <code>Locale</code> class provides a number of convenient constants
* that you can use to create <code>Locale</code> objects for commonly used
* locales. For example, the following creates a <code>Locale</code> object
* for the United States:
* <blockquote>
* <pre>
- * Locale.US
+ * Locale.US
* </pre>
* </blockquote>
- *
+ *
+ * <h4>Use of Locale</h4>
* <P>
- * Once you've created a <code>Locale</code> you can query it for information about
- * itself. Use <code>getCountry</code> to get the ISO Country Code and
- * <code>getLanguage</code> to get the ISO Language Code. You can
- * use <code>getDisplayCountry</code> to get the
+ * Once you've created a <code>Locale</code> you can query it for information
+ * about itself. Use <code>getCountry</code> to get the country (or region)
+ * code and <code>getLanguage</code> to get the language code.
+ * You can use <code>getDisplayCountry</code> to get the
* name of the country suitable for displaying to the user. Similarly,
* you can use <code>getDisplayLanguage</code> to get the name of
* the language suitable for displaying to the user. Interestingly,
* the <code>getDisplayXXX</code> methods are themselves locale-sensitive
* and have two versions: one that uses the default locale and one
* that uses the locale specified as an argument.
- *
- * <P>
+ * <p>
* The Java Platform provides a number of classes that perform locale-sensitive
* operations. For example, the <code>NumberFormat</code> class formats
- * numbers, currency, or percentages in a locale-sensitive manner. Classes
- * such as <code>NumberFormat</code> have a number of convenience methods
+ * numbers, currency, and percentages in a locale-sensitive manner. Classes
+ * such as <code>NumberFormat</code> have several convenience methods
* for creating a default object of that type. For example, the
* <code>NumberFormat</code> class provides these three convenience methods
* for creating a default <code>NumberFormat</code> object:
* <blockquote>
* <pre>
- * NumberFormat.getInstance()
- * NumberFormat.getCurrencyInstance()
- * NumberFormat.getPercentInstance()
+ * NumberFormat.getInstance()
+ * NumberFormat.getCurrencyInstance()
+ * NumberFormat.getPercentInstance()
* </pre>
* </blockquote>
- * These methods have two variants; one with an explicit locale
- * and one without; the latter using the default locale.
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter uses the default locale.
* <blockquote>
* <pre>
- * NumberFormat.getInstance(myLocale)
- * NumberFormat.getCurrencyInstance(myLocale)
- * NumberFormat.getPercentInstance(myLocale)
+ * NumberFormat.getInstance(myLocale)
+ * NumberFormat.getCurrencyInstance(myLocale)
+ * NumberFormat.getPercentInstance(myLocale)
* </pre>
* </blockquote>
* A <code>Locale</code> is the mechanism for identifying the kind of object
@@ -168,115 +262,96 @@ import sun.util.locale.LocaleObjectCache
* <STRONG>just</STRONG> a mechanism for identifying objects,
* <STRONG>not</STRONG> a container for the objects themselves.
*
- * <p><span style="background-color: #00ccff; font-weight: bold">New
- * API</span> As of Java 7, Locale provides support for BCP47 Language
- * Tags. These are described in <a
- * href="http://www.ietf.org/internet-drafts/draft-ietf-ltru-4646bis-21.txt">
- * RFC 4646</a> (<span style="background-color: #00ccff; font-weight:
- * bold">Currently Draft, remove or reference final version before
- * release.</span>) New APIs have been added to support conversion
- * between Locale and the format described in BCP47. Briefly,
- * Locale adds:
- *
- * <ul>
- * <li>A script field
- * <li>A set of extensions and a private use code
- * <li>API support for the locale extension
- * <li>A {@link #toLanguageTag} method to convert a Locale to a language tag
- * <li>A {@link #forLanguageTag} factory method to create a Locale from a language tag
- * <li>A {@link java.util.Locale.Builder} object to create a Locale from components
- * </ul>
- *
- * A few other miscellanious convenience APIs have also been added.
- *
- * <p><strong>Compatibility</strong>
- *
- * <p>In order to maintain compatibility with existing usage, Locale's
+ * <h4>Compatibility</h4>
+ * <!--
+ * <p>
+ * In order to maintain compatibility with existing usage, Locale's
* constructors retain their original behavior. The same is true for
* the <code>toString</code> method. Thus Locale objects can continue
* to be used as they were. In particular, clients who parse the
* output of toString into language, country, and variant fields can
* continue to do so (although this is strongly discouraged).
*
- * <p>But the existence of the additional fields, in particlar script,
- * means that the output of <code>toString<code> no longer represents
+ * <p>
+ * But the existence of the additional fields, in particular script,
+ * means that the output of <code>toString</code> no longer represents
* the full state of the locale, and so clients
* <strong>cannot</strong> rely on this string as a proxy for the
* actual locale when working with arbitrary locales.
*
- * <p>In addition, BCP47 imposes syntactical restrictions that are not
+ * <p>
+ * In addition, BCP 47 imposes syntactical restrictions that are not
* imposed by Locale's constructors. This means that conversions
- * between some Locales and BCP47 language tags cannot be made without
+ * between some Locales and BCP 47 language tags cannot be made without
* losing information. Thus <code>toLanguageTag</code> cannot
* represent the state of non-conforming locales.
*
- * <p>Because of these issues, it is recommended that clients migrate
- * away from constructing nonconforming locales and use the
- * <code>forLanguageTag<code> and <code>Builder<code> APIs instead.
+ * <p>
+ * Because of these issues, it is recommended that clients migrate
+ * away from constructing non-conforming locales and use the
+ * <code>forLanguageTag</code> and <code>Locale.Builder</code> APIs instead.
* Clients desiring a string representation of the complete locale can
- * then always rely <code>toLanguageTag<code> for this purpose.
- *
- * <p><strong>Special cases</strong>
- *
- * <p>For compatibility reasons, a few nonconforming locales are treated
- * as special cases. These are <b><tt>ja_JP_JP</tt></b>,
- * <b><tt>th_TH_TH</tt></b>, and <b><tt>no_NO_NY</tt></b>. These are
- * ill-formed in BCP47 since the variants are too short. To ease
- * migration to BCP47, these are treated specially during
- * construction.
- *
- * <p>Java has used <tt>ja_JP_JP</tt> to represent Japanese as used in
+ * then always rely <code>toLanguageTag</code> for this purpose.
+ *
+ * <h5>Special cases</h5>
+ * -->
+ * <p>
+ * For compatibility reasons, two non-conforming locales are treated
+ * as special cases. These are <b><tt>ja_JP_JP</tt></b> and
+ * <b><tt>th_TH_TH</tt></b>. These are ill-formed in BCP 47 since the
+ * variants are too short. To ease migration to BCP 47, these are treated
+ * specially during construction.
+ *
+ * <p>
+ * Java has used <tt>ja_JP_JP</tt> to represent Japanese as used in
* Japan together with the Japanese Imperial calendar. This is now
- * representable using locale extensions, by specifying the locale
- * extension key <tt>ca</tt> (for "calendar") and value
+ * representable using a Unicode locale extension, by specifying the
+ * Unicode locale key <tt>ca</tt> (for "calendar") and type
* <tt>japanese</tt>. When the Locale constructor is called with the
- * arguments "ja", "JP', "JP", the extension is automatically added.
- *
- * <p>Java has used <tt>th_TH_TH</tt> to represent Thai as used in
- * Thailand together with thai digits. This is now representable using
- * locale extensions, by specifying the locale extension key
+ * arguments "ja", "JP", "JP", this extension is automatically added.
+ *
+ * <p>
+ * Java has used <tt>th_TH_TH</tt> to represent Thai as used in
+ * Thailand together with Thai digits. This is also now representable using
+ * a Unicode locale extension, by specifying the Unicode locale key
* <tt>nu</tt> (for "number") and value <tt>thai</tt>. When the Locale
- * constructor is called with the arguments "th", "TH", "TH", the
+ * constructor is called with the arguments "th", "TH", "TH", this
* extension is automatically added.
*
- * <p>Java has used <tt>no_NO_NY</tt> to represent Norwegian Nyorsk as
- * used in Norway. This is now representable as <tt>nn_NO</tt>, since
- * the introduction of the language codes <tt>nn</tt> and <tt>nb</tt>
- * (for Norwegian Bokmal). <span style="background-color: #00ccff;
- * font-weight: bold">Note, doing this at struction breaks
- * compatibility and contradicts the statement about Locale's
- * constructors remaining unchanged.</span>
- *
- * <p><strong>Legacy language codes</strong>
- *
- * <p>Locale's constructor has always converted three language codes to
+ *
+ * <h5>Legacy language codes</h5>
+ *
+ * <p>
+ * Locale's constructor has always converted three language codes to
* their earlier, obsoleted forms: <tt>he</tt> maps to <tt>iw</tt>,
* <tt>yi</tt> maps to <tt>ji</tt>, and <tt>id</tt> maps to
* <tt>in</tt>. This continues to be the case, in order to not break
* backwards compatibility.
*
- * <p>The new BCP47 APIs map between the old and new language codes,
+ * <p>
+ * The new BCP 47 APIs map between the old and new language codes,
* maintaining the old codes internal to Locale (so that
- * <code>getLanguage</code> and <code>toString<code> reflect the old
- * code), but using the new codes in the BCP47 language tag APIs (so
+ * <code>getLanguage</code> and <code>toString</code> reflect the old
+ * code), but using the new codes in the BCP 47 language tag APIs (so
* that <code>toLanguageTag</code> reflects the new one). This
* preserves the equivalence between Locales no matter which code or
* API is used to construct them. Java's default resource bundle
* lookup mechanism also implements this mapping, so that resources
- * can be named using either convention, see {@link
- * ResourceBundle.Control}.
- *
- * <p><strong>Three-letter and two-letter language codes</strong>
- *
- * <p>The Locale constructor has always specified that the language param
- * be two characters in length, although in practice it has accepted
- * any length. The specification has now been relaxed to allow
- * language codes of two to eight characters, and in particular,
- * three-letter language codes as specified in the <a
- * href="http://www.iana.org/assignments/language-subtag-registry">IANA
- * registry</a>. For compatibility, the implementation still does not
- * impose a length constraint
- *
+ * can be named using either convention, see {@link ResourceBundle.Control}.
+ *
+ * <h5>Three-letter language/country(region) codes</h5>
+ *
+ * <p>The Locale constructors have always specified that the language
+ * and the country param be two characters in length, although in
+ * practice they have accepted any length. The specification has now been
+ * relaxed to allow language codes of two to eight characters and country
+ * (region) codes of two to three characters, and in particular,
+ * three-letter language codes and three-letter region codes as
+ * specified in the <a href="http://www.iana.org/assignments/language-subtag-registry">
+ * IANA Language Subtag Registry</a>. For compatibility, the implementation
+ * still does not impose a length constraint.
+ *
+ * @see Builder
* @see ResourceBundle
* @see java.text.Format
* @see java.text.NumberFormat
@@ -385,9 +460,7 @@ public final class Locale implements Clo
static public final Locale ROOT = getInstance("", "", "");
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * The key for the private use extension.
+ * The key for the private use extension ('x').
*
* @see #getExtension(char)
* @see Builder#setExtension(char, String)
@@ -396,15 +469,13 @@ public final class Locale implements Clo
static public final char PRIVATE_USE_EXTENSION = 'x';
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * The key for the LDML extension.
- *
+ * The key for Unicode locale extension ('u').
+ *
* @see #getExtension(char)
* @see Builder#setExtension(char, String)
* @since 1.7
*/
- static public final char LDML_EXTENSION = 'u';
+ static public final char UNICODE_LOCALE_EXTENSION = 'u';
/** serialization ID
*/
@@ -419,55 +490,94 @@ public final class Locale implements Clo
private static final int DISPLAY_SCRIPT = 3;
/**
- * Construct a locale from language, country, variant.
- * NOTE: ISO 639 is not a stable standard; some of the language codes it defines
- * (specifically iw, ji, and in) have changed. This constructor accepts both the
- * old codes (iw, ji, and in) and the new codes (he, yi, and id), but all other
+ * Private constructor used by getInstance method
+ */
+ private Locale(BaseLocale baseLocale, LocaleExtensions extensions) {
+ _baseLocale = baseLocale;
+ _extensions = extensions;
+ }
+
+ /**
+ * Construct a locale from language, country and variant.
+ * This constructor normalizes the language value to lowercase and
+ * the country value to uppercase.
+ * <p>
+ * <b>Note:</b>
+ * <ul>
+ * <li>ISO 639 is not a stable standard; some of the language codes it defines
+ * (specifically "iw", "ji", and "in") have changed. This constructor accepts both the
+ * old codes ("iw", "ji", and "in") and the new codes ("he", "yi", and "id"), but all other
* API on Locale will return only the OLD codes.
- * @param language lowercase two-letter ISO-639 code.
- * @param country uppercase two-letter ISO-3166 code.
- * @param variant vendor and browser specific code. See class description.
+ * <li>For backward compatibility reasons, this constructor does not make
+ * any syntactical checks on the input.
+ * </ul>
+ *
+ * @param language An ISO 639 alpha-2 or alpha-3 language code, or a language subtag
+ * up to 8 characters in length. See the <code>Locale</code> class description about
+ * valid language values.
+ * @param country An ISO 3166 alpha-2 country code or a UN M.49 numeric-3 area code.
+ * See the <code>Locale</code> class description about valid country values.
+ * @param variant Any arbitrary value used to indicate a variation of a <code>Locale</code>.
+ * See the <code>Locale</code> class description for the details.
* @exception NullPointerException thrown if any argument is null.
*/
public Locale(String language, String country, String variant) {
- this(language, "", country, variant, LocaleExtensions.EMPTY_EXTENSIONS);
- }
-
- /**
- * Construct a locale from language, country.
- * NOTE: ISO 639 is not a stable standard; some of the language codes it defines
- * (specifically iw, ji, and in) have changed. This constructor accepts both the
- * old codes (iw, ji, and in) and the new codes (he, yi, and id), but all other
+ this(language, "", country, variant);
+ }
+
+ /**
+ * Construct a locale from language and country.
+ * This constructor normalizes the language value to lowercase and
+ * the country value to uppercase.
+ * <p>
+ * <b>Note:</b>
+ * <ul>
+ * <li>ISO 639 is not a stable standard; some of the language codes it defines
+ * (specifically "iw", "ji", and "in") have changed. This constructor accepts both the
+ * old codes ("iw", "ji", and "in") and the new codes ("he", "yi", and "id"), but all other
* API on Locale will return only the OLD codes.
- * @param language lowercase two-letter ISO-639 code.
- * @param country uppercase two-letter ISO-3166 code.
+ * <li>For backward compatibility reasons, this constructor does not make
+ * any syntactical checks on the input.
+ * </ul>
+ *
+ * @param language An ISO 639 alpha-2 or alpha-3 language code, or a language subtag
+ * up to 8 characters in length. See the <code>Locale</code> class description about
+ * valid language values.
+ * @param country An ISO 3166 alpha-2 country code or a UN M.49 numeric-3 area code.
+ * See the <code>Locale</code> class description about valid country values.
* @exception NullPointerException thrown if either argument is null.
*/
public Locale(String language, String country) {
- this(language, "", country, "", LocaleExtensions.EMPTY_EXTENSIONS);
+ this(language, "", country, "");
}
/**
* Construct a locale from a language code.
- * NOTE: ISO 639 is not a stable standard; some of the language codes it defines
- * (specifically iw, ji, and in) have changed. This constructor accepts both the
- * old codes (iw, ji, and in) and the new codes (he, yi, and id), but all other
+ * This constructor normalizes the language value to lowercase.
+ * <p>
+ * <b>Note:</b>
+ * <ul>
+ * <li>ISO 639 is not a stable standard; some of the language codes it defines
+ * (specifically "iw", "ji", and "in") have changed. This constructor accepts both the
+ * old codes ("iw", "ji", and "in") and the new codes ("he", "yi", and "id"), but all other
* API on Locale will return only the OLD codes.
- * @param language lowercase two-letter ISO-639 code.
+ * <li>For backward compatibility reasons, this constructor does not make
+ * any syntactical checks on the input.
+ * </ul>
+ *
+ * @param language An ISO 639 alpha-2 or alpha-3 language code, or a language subtag
+ * up to 8 characters in length. See the <code>Locale</code> class description about
+ * valid language values.
* @exception NullPointerException thrown if argument is null.
* @since 1.4
*/
public Locale(String language) {
- this(language, "", "", "", LocaleExtensions.EMPTY_EXTENSIONS);
- }
-
- private Locale(String language, String script, String country, String variant, LocaleExtensions extensions) {
+ this(language, "", "", "");
+ }
+
+ private Locale(String language, String script, String country, String variant) {
_baseLocale = BaseLocale.getInstance(convertOldISOCodes(language), script, country, variant);
- if (extensions.equals(LocaleExtensions.EMPTY_EXTENSIONS)) {
- _extensions = getCompatibilityExtensions(language, script, country, variant);
- } else {
- _extensions = extensions;
- }
+ _extensions = getCompatibilityExtensions(language, script, country, variant);
}
/**
@@ -478,8 +588,8 @@ public final class Locale implements Clo
* returned. Otherwise, a new <code>Locale</code> instance is
* created and cached.
*
- * @param language lowercase two-letter ISO-639 code.
- * @param country uppercase two-letter ISO-3166 code.
+ * @param language lowercase 2 to 8 language code.
+ * @param country uppercase two-letter ISO-3166 code and numric-3 UN M.49 area code.
* @param variant vendor and browser specific code. See class description.
* @return the <code>Locale</code> instance requested
* @exception NullPointerException if any argument is null.
@@ -502,29 +612,28 @@ public final class Locale implements Clo
extensions = getCompatibilityExtensions(language, script, country, variant);
}
- LocaleKey key = new LocaleKey(language, script, country, variant, extensions);
+ BaseLocale baseloc = BaseLocale.getInstance(language, script, country, variant);
+ return getInstance(baseloc, extensions);
+ }
+
+ private static Locale getInstance(BaseLocale baseloc, LocaleExtensions extensions) {
+
+ LocaleKey key = new LocaleKey(baseloc, extensions);
Locale locale = LOCALECACHE.get(key);
if (locale == null) {
- locale = new Locale(language, script, country, variant, extensions);
- LOCALECACHE.put(
- new LocaleKey(
- locale.getLanguage(),
- locale.getScript(),
- locale.getCountry(),
- locale.getVariant(),
- locale._extensions),
- locale);
+ locale = new Locale(baseloc, extensions);
+ locale = LOCALECACHE.put(key, locale);
}
return locale;
}
- private static class LocaleKey implements Comparable<LocaleKey> {
- private BaseLocaleKey _baseKey;
- private String _extKey;
-
- private LocaleKey(String language, String script, String country, String variant, LocaleExtensions extensions) {
- _baseKey = new BaseLocaleKey(language, script, country, variant);
- _extKey = extensions.getCanonicalString();
+ private static class LocaleKey {
+ private BaseLocale _base;
+ private LocaleExtensions _exts;
+
+ private LocaleKey(BaseLocale baseLocale, LocaleExtensions extensions) {
+ _base = baseLocale;
+ _exts = extensions;
}
public boolean equals(Object obj) {
@@ -535,19 +644,11 @@ public final class Locale implements Clo
return false;
}
LocaleKey other = (LocaleKey)obj;
- return _baseKey.equals(other._baseKey) && _extKey.equals(other._extKey);
- }
-
- public int compareTo(LocaleKey other) {
- int res = _baseKey.compareTo(other._baseKey);
- if (res == 0) {
- res = _extKey.compareTo(other._extKey);
- }
- return res;
+ return _base.equals(other._base) && _exts.equals(other._exts);
}
public int hashCode() {
- return _baseKey.hashCode() | _extKey.hashCode();
+ return _base.hashCode() | _exts.hashCode();
}
}
@@ -646,6 +747,11 @@ public final class Locale implements Clo
/**
* Returns a list of all 2-letter country codes defined in ISO 3166.
* Can be used to create Locales.
+ * <p>
+ * <b>Note:</b> The <code>Locale</code> class also supports other codes for
+ * country (region), such as 3-letter numeric UN M.49 area codes.
+ * Therefore, the list returned by this method does not contain ALL valid
+ * codes that can be used to create Locales.
*/
public static String[] getISOCountries() {
if (isoCountries == null) {
@@ -659,9 +765,16 @@ public final class Locale implements Clo
/**
* Returns a list of all 2-letter language codes defined in ISO 639.
* Can be used to create Locales.
- * [NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
+ * <p>
+ * <b>Note:</b>
+ * <ul>
+ * <li>ISO 639 is not a stable standard-- some languages' codes have changed.
* The list this function returns includes both the new and the old codes for the
- * languages whose codes have changed.]
+ * languages whose codes have changed.
+ * <li>The <code>Locale</code> class also supports language codes up to
+ * 8 characters in length. Therefore, the list returned by this method does
+ * not contain ALL valid codes that can be used to create Locales.
+ * </ul>
*/
public static String[] getISOLanguages() {
if (isoLanguages == null) {
@@ -682,17 +795,22 @@ public final class Locale implements Clo
}
/**
- * Returns the language code for this locale, which should either be the empty string
- * or a lowercase ISO 639 code.
- * <p>NOTE: ISO 639 is not a stable standard-- some languages' codes have changed.
+ * Returns the language code of this Locale.
+ * <p>
+ * <b>Note:</b> ISO 639 is not a stable standard-- some languages' codes have changed.
* Locale's constructor recognizes both the new and the old codes for the languages
* whose codes have changed, but this function always returns the old code. If you
- * want to check for a specific language whose code has changed, don't do <pre>
+ * want to check for a specific language whose code has changed, don't do
+ * <pre>
* if (locale.getLanguage().equals("he"))
* ...
- * </pre>Instead, do<pre>
- * if (locale.getLanguage().equals(new Locale("he", "", "").getLanguage()))
- * ...</pre>
+ * </pre>
+ * Instead, do
+ * <pre>
+ * if (locale.getLanguage().equals(new Locale("he").getLanguage()))
+ * ...
+ * </pre>
+ * @return The language code, or the empty string if none is defined.
* @see #getDisplayLanguage
*/
public String getLanguage() {
@@ -700,14 +818,12 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Returns the script code for this locale, which should
* either be the empty string or an ISO 15924 4-letter script
* code. The first letter is uppercase and the rest are
* lowercase, for example, 'Latn', 'Cyrl'.
*
- * @return the script code, or the empty string if none is defined
+ * @return The script code, or the empty string if none is defined.
* @see #getDisplayScript
* @since 1.7
*/
@@ -717,7 +833,10 @@ public final class Locale implements Clo
/**
* Returns the country/region code for this locale, which should
- * either be the empty string or an uppercase ISO 3166 2 or 3-letter code.
+ * be empty string, or an uppercase ISO 3166 2-letter code,
+ * or a UN M.49 3-digits code.
+ *
+ * @return The country/region code, or the empty string if none is defined.
* @see #getDisplayCountry
*/
public String getCountry() {
@@ -725,8 +844,7 @@ public final class Locale implements Clo
}
/**
- * Returns the variant code for this locale, which should either be
- * the empty string or a conforming BCP47 variant string.
+ * Returns the variant code for this locale.
* @see #getDisplayVariant
*/
public String getVariant() {
@@ -734,118 +852,178 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Returns the extension (or private use value) associated with
- * the specified key, or null if there is no extension associated
- * with the key. The key must be one of <code>[0-9A-Za-z]</code>.
- * Keys are case-insensitive, so for example 'z' and 'Z' represent
- * the same extension.
+ * Returns the extension (or private use) value associated with
+ * the specified singleton key, or null if there is no extension
+ * associated with the key. To be valid, the key must be one
+ * of <code>[0-9A-Za-z]</code>. Keys are case-insensitive, so
+ * for example 'z' and 'Z' represent the same extension.
*
* @param key the extension key
* @return the extension, or null if this locale defines no
* extension for the specified key
* @throws IllegalArgumentException if the key is not valid
* @see #PRIVATE_USE_EXTENSION
- * @see #LDML_EXTENSION
+ * @see #UNICODE_LOCALE_EXTENSION
* @since 1.7
*/
public String getExtension(char key) {
- if (!LocaleExtensions.isValidExtensionKey(key)) {
- throw new IllegalArgumentException("Invalid extension key: " + key);
- }
- return _extensions.getExtensionValue(Character.valueOf(key));
- }
-
- /**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
+ String strKey = String.valueOf(key);
+ if (!LocaleExtensions.isValidKey(strKey)) {
+ throw new IllegalArgumentException("Invalid extension key: " + strKey);
+ }
+ return _extensions.getExtensionValue(key);
+ }
+
+ /**
+ * Returns the set of extension keys associated with this locale, or the
+ * empty set if it has no extensions. The returned set is unmodifiable.
*
- * Returns the set of extension keys associated with this locale, or null
- * if it has no extensions. The returned set is immutable.
- *
- * @return the set of extension keys, or null if this locale has
+ * @return the set of extension keys, or the empty set if this locale has
* no extensions
* @since 1.7
*/
public Set<Character> getExtensionKeys() {
- return _extensions.getExtensionKeys();
- }
-
- /**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
+ return _extensions.getKeys();
+ }
+
+ /**
+ * Returns the Unicode locale type associated with the specified Unicode
+ * locale key for this locale. Unicode locale keywrods are specified
+ * by the 'u' extension and consist of key/type pairs. The key must be
+ * two alphanumeric characters in length, or an IllegalArgumentException
+ * is thrown.
*
- * Returns the LDML value ('keyword type') associated with the
- * specified LDML key for this locale. The key must be two
- * alphanumeric characters in length, or an
- * IllegalArgumentException is thrown.
- *
- * @param ldmlKey the LDML key
- * @return the value ('keyword type') associated with the key, or null
- * if the locale does not define a value for the key
+ * @param key the Unicode locale key
+ * @return the Unicode locale type associated with the key, or null if the
+ * locale does not define a value for the key.
* @throws IllegalArgumentException if the key is not valid.
+ * @throws NullPointerException if <code>key</code> is null
* @since 1.7
*/
- public String getLDMLExtensionValue(String ldmlKey) {
- if (!LocaleExtensions.isValidLDMLKey(ldmlKey)) {
- throw new IllegalArgumentException("Invalid LDML key: " + ldmlKey);
- }
- return _extensions.getLDMLKeywordType(ldmlKey);
- }
-
- /**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Returns the set of LDML keys defined by this locale, or null if
- * this locale has no LDML extension. The returned set is
+ public String getUnicodeLocaleType(String key) {
+ if (!UnicodeLocaleExtension.isKey(key)) {
+ throw new IllegalArgumentException("Invalid Unicode locale key: " + key);
+ }
+ return _extensions.getUnicodeLocaleType(key);
+ }
+
+ /**
+ * Returns the set of keys for Unicode locale keywords defined by this locale,
+ * or null if this locale has no locale extension. The returned set is
* immutable.
- *
- * @return The set of the LDML keys, or null
+ *
+ * @return the set of the Unicode locale keys, or null
* @since 1.7
*/
- public Set<String> getLDMLExtensionKeys() {
- return _extensions.getLDMLKeywordKeys();
- }
-
- /**
- * Getter for the programmatic name of the entire locale,
- * with the language, country and variant separated by underbars.
- * Language is always lower case, and country is always upper case.
- * If the language is missing, the string will begin with an underbar.
+ public Set<String> getUnicodeLocaleKeys() {
+ return _extensions.getUnicodeLocaleKeys();
+ }
+
+ /**
+ * Returns a string representation of this <code>Locale</code>
+ * object, consisting of language, country, variant, script,
+ * and extensions as below:
+ * <p><blockquote>
+ * language + "_" + country + "_" + variant + "_#" + script + "-" + extensions
+ * </blockquote>
+ *
+ * Language is always lower case, country is always upper case,
+ * script is always title case, and extensions is always lower case.
+ * <p>
+ * If the language is missing, the string will begin with an underscore.
* If both the language and country fields are missing, this function
- * will return the empty string, even if the variant field is filled in
- * (you can't have a locale with just a variant-- the variant must accompany
- * a valid language or country code).
- * Examples: "en", "de_DE", "_GB", "en_US_WIN", "de__POSIX", "fr__MAC"
- * <br><strong>Note</strong>: this is compatible with previous
- * versions of the JDK, and so does not include script code or
- * extensions. It is recommended that callers convert to use
- * {@link toLanguageTag}.
+ * will return the empty string, even if the variant or script or extensions
+ * field is filled in (you can't have a locale with just a variant-- the variant
+ * must accompany a valid language or country code).
+ * <p>
+ * Examples: "en", "de_DE", "_GB", "en_US_WIN", "de__POSIX", "fr__MAC", "zh_CN_#Hans",
+ * "zh_TW_#Hant-x-java", "th_TH_TH_#u-nu-thai"
+ *
* @see #getDisplayName
*/
public final String toString() {
- return _baseLocale.getJava6String();
- }
-
- /**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Returns a well-formed language tag representing this locale.
+ boolean l = (_baseLocale.getLanguage().length() != 0);
+ boolean s = (_baseLocale.getScript().length() != 0);
+ boolean r = (_baseLocale.getRegion().length() != 0);
+ boolean v = (_baseLocale.getVariant().length() != 0);
+ boolean e = (_extensions.getID().length() != 0);
+
+ StringBuilder result = new StringBuilder(_baseLocale.getLanguage());
+ if (r || (l && v)) {
+ result.append('_')
+ .append(_baseLocale.getRegion()); // This may just append '_'
+ }
+ if (v && (l || r)) {
+ result.append('_')
+ .append(_baseLocale.getVariant());
+ }
+
+ if (s && (l || r)) {
+ result.append("_#")
+ .append(_baseLocale.getScript());
+ }
+
+ if (e && (l || r)) {
+ result.append('_');
+ if (!s) {
+ result.append('#');
+ }
+ result.append(_extensions.getID());
+ }
+
+ return result.toString();
+ }
+
+ /**
+ * Returns a well-formed IETF BCP 47 language tag representing
+ * this locale.
*
- * <p><b>Note</b>: If the language, country, or variant fields do
- * not satisfy BCP47 language tag syntax requirements, they are
- * omitted from the result. For example, using the constructor it
- * is possible to create a Locale instance with digits in the
- * language field, or only two characters in the variant field.
- * Since these values are not well-formed BCP47 language tag
- * syntax, they cannot be expressed in BCP47. Since such 'legacy'
- * locales lose information when converting to BCP47, it is
- * recommended that clients switch to conforming locales.
+ * <p>
+ * If this <code>Locale</code> object has language, country, or variant
+ * that does not satisfy the IETF BCP 47 language tag syntax requirements,
+ * this method handles these fields as described below:
+ * <p>
+ * <b>Language:</b> If language is empty or ill-formed (for example "a" or "e2"),
+ * it will be emitted as "und" (Undetermined).
+ * <p>
+ * <b>Country:</b> If country is ill-formed (for example "12" or "USA"), it
+ * will be omitted.
+ * <p>
+ * <b>Variant:</b> Variant is treated as consisting of subtags separated by
+ * underscore. 'Well-formed' subtags consist of either an ASCII letter followed
+ * by 4-7 ASCII characters, or an ASCII digit followed by 3-7 ASCII characters.
+ * If well-formed, the variant is emitted as each subtag in order (separated by
+ * hyphen). Otherwise:
+ * <ul>
+ * <li>if all sub-segments consist of 1 to 8 ASCII alphanumerics (for example
+ * "WIN" or "Solaris_10"), the first ill-formed sub-segment and all following
+ * will be emitted as private use subtags prefixed by the special subtag "variant"
+ * followed by the sub-segments in order (separated by hyphen). For example,
+ * "x-variant-WIN", "x-variant-Solaris-10". If this locale has a private use
+ * extension value, the special private use subtags prefixed by "variant" are
+ * appended after the locale's private use value.
+ * <li>if any subtag does not consist of 1 to 8 ASCII alphanumerics, the
+ * variant will be truncated and the problematic subtag and all following
+ * sub-segments will be omitted. If the remainder is non-empty, it will be
+ * emitted as a private use subtag as above (even if the remainder turns out
+ * to be well-formed). For example, "Solaris_isjustthecoolestthing" is emitted
+ * as "x-jvariant-Solaris", not as "solaris".</li>
+ * </ul>
*
- * <p><b>Note</b>: Underscores in the variant tag are normalized
- * to hyphen, and all fields, keys, and values are normalized to
- * lower case.
- *
+ * <p><b>Note:</b> Although the language tag created by this method
+ * satisfies the syntax requirements defined by the IETF BCP 47
+ * specification, it is not always a valid BCP 47 language tag.
+ * For example,
+ * <pre>
+ * new Locale("xx", "YY").toLanguageTag();
+ * </pre>
+ * will return "xx-YY", but the language subtag "xx" and the region subtag "YY"
+ * are invalid because they are not registered in the
+ * <a href="http://www.iana.org/assignments/language-subtag-registry">
+ * IANA Language Subtag Registry</a>.
+ *
* @return a BCP47 language tag representing the locale
+ * @see #forLanguageTag(String)
* @since 1.7
*/
public String toLanguageTag() {
@@ -857,87 +1035,78 @@ public final class Locale implements Clo
// Special handling for ill-formed locale no_NO_NY
baseLoc = BaseLocale.getInstance("nn", "", "NO", "");
}
- return LanguageTag.toLanguageTag(baseLoc, _extensions);
- }
-
- /**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Returns a locale for the specified language tag string. If the
- * specified language tag contains any ill-formed subtags, the
- * first such subtag and all following subtags are ignored.
+ LanguageTag tag = LanguageTag.parseLocale(baseLoc, _extensions);
+ return tag.getID();
+ }
+
+ /**
+ * Returns a locale for the specified IETF BCP 47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
*
* <p>This implements the 'Language-Tag' production of BCP47, and
* so supports grandfathered (regular and irregular) as well as
- * private use language tags. Private use tags are represented as
- * 'und-x-whatever', and grandfathered tags are converted to their
- * canonical replacements where they exist. Note that a few
- * grandfathered tags have no modern replacement; these will be
- * converted using the fallback described above so some
- * information might be lost.
+ * private use language tags. Stand alone private use tags are
+ * represented as empty language and extension 'x-whatever',
+ * and grandfathered tags are converted to their canonical replacements
+ * where they exist. Note that a few grandfathered tags have no
+ * modern replacement; these will be converted using the fallback
+ * described above so some information might be lost.
*
- * <p>For a list of grandfathered tags, see <a
- * href="http://www.ietf.org/internet-drafts/draft-ietf-ltru-4646bis-21.txt">
- * RFC4646</a> (<span style="background-color: #00ccff;
- * font-weight: bold">Currently Draft, remove or reference final
- * version before release.</span>)
+ * <p>For a list of grandfathered tags, see the
+ * <a href="http://www.iana.org/assignments/language-subtag-registry">
+ * IANA Language Subtag Registry</a>.
+ *
+ * <p><b>Notes:</b> This method converts private use subtags prefixed
+ * by "variant" to variant field in the result locale. For example,
+ * the code below will return "POSIX".
+ * <pre>
+ * Locale.forLanguageTag("en-US-x-variant-POSIX).getVariant();
+ * </pre>
*
* @param languageTag the language tag
* @return the locale that best represents the language tag
+ * @exception NullPointerException if <code>languageTag</code> is <code>null</code>
+ * @see #toLanguageTag()
* @since 1.7
*/
public static Locale forLanguageTag(String languageTag) {
- Locale locale = Locale.ROOT;
- LanguageTag tag = null;
- while (true) {
- try {
- tag = LanguageTag.parse(languageTag);
-
- Builder bldr = new Builder();
-
- bldr.setLanguage(tag.getLanguage()).setScript(tag.getScript())
- .setRegion(tag.getRegion()).setVariant(tag.getVariant());
-
- // setExtension may throw an exception if
- // it contains malformed LDML keys.
- Set<Extension> exts = tag.getExtensions();
- if (exts != null) {
- Iterator<Extension> itr = exts.iterator();
- while (itr.hasNext()) {
- Extension e = itr.next();
- bldr.setExtension(e.getSingleton(), e.getValue());
- }
- }
- bldr.setExtension(PRIVATE_USE_EXTENSION, tag.getPrivateUse());
- locale = bldr.create();
- break;
- } catch (LocaleSyntaxException e) {
- // this exception was thrown by LanguageTag#parse
- // - fall through
- } catch (IllformedLocaleException e) {
- // this expection was thrown by setExtension with
- // malformed LDML keys - fall through
- }
- // remove the last subtag and try it again
- int idx = languageTag.lastIndexOf('-');
- if (idx == -1) {
- // no more subtags
- break;
- }
- languageTag = languageTag.substring(0, idx);
- }
-
- return locale;
- }
-
- /**
- * Returns a three-letter abbreviation for this locale's language. If the locale
- * doesn't specify a language, this will be the empty string. Otherwise, this will
- * be a lowercase ISO 639-2/T language code.
+ LanguageTag tag = LanguageTag.parse(languageTag, true);
+
+ // Special handling for ill-formed Java 6 Locales - ja_JP_JP and th_TH_TH
+ BaseLocale baseloc = tag.getBaseLocale();
+ LocaleExtensions extensions = tag.getLocaleExtensions();
+
+ Locale loc;
+ // Special handling for ill-formed Java 6 Locales - ja_JP_JP and th_TH_TH
+ if (baseloc.getLanguage().equals("ja") && baseloc.getScript().equals("")
+ && baseloc.getRegion().equals("JP") && baseloc.getVariant().equals("")
+ && extensions.equals(LocaleExtensions.CALENDAR_JAPANESE)) {
+ loc = getInstance("ja", "", "JP", "JP", LocaleExtensions.CALENDAR_JAPANESE);
+ } else if (baseloc.getLanguage().equals("th") && baseloc.getScript().equals("")
+ && baseloc.getRegion().equals("TH") && baseloc.getVariant().equals("")
+ && extensions.equals(LocaleExtensions.NUMBER_THAI)) {
+ loc = getInstance("th", "", "TH", "TH", LocaleExtensions.NUMBER_THAI);
+ } else {
+ // normal case
+ loc = getInstance(tag.getBaseLocale(), tag.getLocaleExtensions());
+ }
+
+ return loc;
+ }
+
+ /**
+ * Returns a three-letter abbreviation of this locale's language.
+ * If the language matches an ISO 639-1 two-letter code, the
+ * corresponding ISO 639-2/T three-letter lowercase code is returned.
* The ISO 639-2 language codes can be found on-line at
- * <a href="http://www.loc.gov/standards/iso639-2/englangn.html">
- * <code>http://www.loc.gov/standards/iso639-2/englangn.html</code>.</a>
- * @exception MissingResourceException Throws MissingResourceException if the
+ * <a href="http://www.loc.gov/standards/iso639-2/langhome.html">
+ * <code>http://www.loc.gov/standards/iso639-2/langhome.html</code></a>.
+ * If the locale specifies a three-letter language, the language is
+ * returned as is. If the locale does not specify a language the
+ * empty string is returned.
+ * @return A three-letter abbreviation of this locale's language.
+ * @exception MissingResourceException Throws MissingResourceException if
* three-letter language abbreviation is not available for this locale.
*/
public String getISO3Language() throws MissingResourceException {
@@ -950,12 +1119,14 @@ public final class Locale implements Clo
}
/**
- * Returns a three-letter abbreviation for this locale's country. If the locale
- * doesn't specify a country, this will be the empty string. Otherwise, this will
- * be an uppercase ISO 3166 3-letter country code.
- * The ISO 3166-2 country codes can be found on-line at
- * <a href="http://www.davros.org/misc/iso3166.txt">
- * <code>http://www.davros.org/misc/iso3166.txt</code>.</a>
+ * Returns a three-letter abbreviation for this locale's country.
+ * If the country matches an ISO 3166-1 alpha-2 code, the corresponding
+ * ISO 3166-1 alpha-3 uppercase code is returned. The ISO 3166-1 codes can
+ * be found on-line at
+ * <a href="http://en.wikipedia.org/wiki/ISO_3166-1">
+ * <code>http://en.wikipedia.org/wiki/ISO_3166-1</code></a>.
+ * If the locale doesn't specify a country, this will be the empty string.
+ * @return A three-letter abbreviation of this locale's country.
* @exception MissingResourceException Throws MissingResourceException if the
* three-letter country abbreviation is not available for this locale.
*/
@@ -1025,12 +1196,10 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Returns a name for the the locale's script code that is
* appropriate for display to the user. If possible, the name
- * will be localized for the default locale. If this locale
- * doesn't specify a script code, it returns the empty string.
+ * will be localized for the default locale. Returns the empty string
+ * if this locale doesn't specify a script code.
*
* @return the display name of the script code for the current default locale
* @since 1.7
@@ -1040,14 +1209,13 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Returns a name for the locale's script code that is appropriate
* for display to the user. If possible, the name will be
* localized for the given locale. Returns the empty string if
* this locale doesn't specify a script code.
*
* @return the display name of the script code for the current default locale
+ * @exception NullPointerException if <code>inLocale</code> is <code>null</code>
* @since 1.7
*/
public String getDisplayScript(Locale inLocale) {
@@ -1166,16 +1334,22 @@ public final class Locale implements Clo
/**
* Returns a name for the locale that is appropriate for display to the
- * user. This will be the values returned by getDisplayLanguage(), getDisplayCountry(),
- * and getDisplayVariant() assembled into a single string. The display name will have
- * one of the following forms:<p><blockquote>
- * language (country, variant)<p>
- * language (country)<p>
- * language (variant)<p>
- * country (variant)<p>
- * language<p>
- * country<p>
- * variant<p></blockquote>
+ * user. This will be the values returned by getDisplayLanguage(),
+ * getDisplayScript(), getDisplayCountry(), and getDisplayVariant() assembled
+ * into a single string. The display name will have one of the following forms:
+ * <blockquote>
+ * language (script, country, variant)<br>
+ * language (script, country)<br>
+ * langauge (script, variant)<br>
+ * language (script)<br>
+ * language (country, variant)<br>
+ * language (country)<br>
+ * language (variant)<br>
+ * country (variant)<br>
+ * language<br>
+ * country<br>
+ * variant<br>
+ * </blockquote>
* depending on which fields are specified in the locale. If the language, country,
* and variant fields are all empty, this function returns the empty string.
*/
@@ -1188,9 +1362,12 @@ public final class Locale implements Clo
* user. This will be the values returned by getDisplayLanguage(), getDisplayCountry(),
* and getDisplayVariant() assembled into a single string. The display name will have
* one of the following forms:<p><blockquote>
+ * language (script, country, variant)<p>
+ * language (script, country)<p>
+ * langauge (script, variant)<p>
+ * language (script)<p>
* language (country, variant)<p>
* language (country)<p>
- * language (variant)<p>
* country (variant)<p>
* language<p>
* country<p>
@@ -1303,8 +1480,8 @@ public final class Locale implements Clo
/**
* Returns true if this Locale is equal to another object. A Locale is
- * deemed equal to another Locale with identical language, country,
- * and variant, and unequal to all other objects.
+ * deemed equal to another Locale with identical language, script, country,
+ * variant and extensions, and unequal to all other objects.
*
* @return true if this Locale is equal to the specified object.
*/
@@ -1325,9 +1502,6 @@ public final class Locale implements Clo
private transient LocaleExtensions _extensions;
private static Locale defaultLocale = null;
-
- private static final LocaleExtensions CALENDAR_JAPANESE = LocaleExtensions.getInstance("u-ca-japanese");
- private static final LocaleExtensions NUMBER_THAI = LocaleExtensions.getInstance("u-nu-thai");
/**
* Return an array of the display names of the variant.
@@ -1427,7 +1601,7 @@ public final class Locale implements Clo
fields.put("script", _baseLocale.getScript());
fields.put("country", _baseLocale.getRegion());
fields.put("variant", _baseLocale.getVariant());
- fields.put("extensions", _extensions.getCanonicalString());
+ fields.put("extensions", _extensions.getID());
out.writeFields();
}
@@ -1439,7 +1613,11 @@ public final class Locale implements Clo
String variant = (String)fields.get("variant", "");
String extStr = (String)fields.get("extensions", "");
_baseLocale = BaseLocale.getInstance(convertOldISOCodes(language), script, country, variant);
- _extensions = LocaleExtensions.getInstance(extStr);
+ try {
+ _extensions = LocaleExtensions.getInstance(extStr);
+ } catch (LocaleSyntaxException e) {
+ throw new InvalidClassException("Corrupted extensions: " + extStr);
+ }
}
/**
@@ -1493,13 +1671,13 @@ public final class Locale implements Clo
&& AsciiUtil.caseIgnoreMatch(country, "JP")
&& AsciiUtil.caseIgnoreMatch(variant, "JP")) {
// ja_JP_JP -> u-ca-japanese (calendar = japanese)
- extensions = CALENDAR_JAPANESE;
+ extensions = LocaleExtensions.CALENDAR_JAPANESE;
} else if (AsciiUtil.caseIgnoreMatch(language, "th")
&& script.length() == 0
&& AsciiUtil.caseIgnoreMatch(country, "TH")
&& AsciiUtil.caseIgnoreMatch(variant, "TH")) {
// th_TH_TH -> u-nu-thai (numbersystem = thai)
- extensions = NUMBER_THAI;
+ extensions = LocaleExtensions.NUMBER_THAI;
}
return extensions;
}
@@ -1536,174 +1714,169 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Builder is used to build instances of Locale from values
- * configured by the setter.
+ * <code>Builder</code> is used to build instances of <code>Locale</code>
+ * from values configured by the setter. Unlike the <code>Locale</code>
+ * constructors, the <code>Builder</code> checks if a value configured by a
+ * setter satisfies the syntactical requirements defined by the <code>Locale</code>
+ * class. A <code>Locale</code> object created by a <code>Builder</code> is
+ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
+ * without losing information.
*
- * <p>Builder supports the 'langtag' production of BCP47.
- * Language tags consist of the ASCII digits, upper and lower case
- * letters, and hyphen (which appears only as a field separator).
- * As a convenience, underscores are accepted and normalized to
- * hyphen. Values with any other character are ill-formed. Since
- * language tags are case-insensitive, they are normalized to
- * lower case, case distinctions are <b>not</b> preserved by the
- * builder.
+ * <p>
+ * <b>Note:</b> The <code>Locale</code> class does not provide
+ * any syntactical restrictions on variant, while BCP 47
+ * requires each variant subtag to be 5 to 8 alphanumeric letters or a single
+ * numeric letter followed by 3 alphanumeric letters. By default,
+ * the <code>setVariant</code> method throws <code>IllformedLocaleException</code>
+ * for a variant that does not satisfy the syntax above. If it is
+ * necessary to support such a variant, you could use the constructor <code>
+ * Builder(boolean isLenientVariant)</code> passing <code>true</code> to
+ * skip the syntax validation for variant. However, you should keep in
+ * mind that a <code>Locale</code> object created this way might lose
+ * the variant information when transformed to a BCP 47 language tag.
*
- * <p>Note that since this implements 'langtag' and not
- * 'Language-Tag', grandfathered language tags are not supported
- * by the builder. Clients who need this functionality should use
- * {@link java.util.Locale#forLanguageTag} instead.
+ * <p>
+ * The following example shows how to create a <code>Locale</code> object
+ * with the <code>Builder</code>.
+ * <blockquote>
+ * <pre>
+ * Locale aLocale = new Builder().setLanguage("sr").setScript("Latn").setRegion("RS").build();
+ * </pre>
+ * </blockquote>
*
* <p>Builders can be reused; <code>clear()</code> resets all
* fields to their default values.
- *
- * @see Builder#create
- * @see Builder#clear
+ *
+ * @see Locale#toLanguageTag()
* @since 1.7
*/
public static final class Builder {
- private InternalLocaleBuilder _locbld = new InternalLocaleBuilder();
+ private InternalLocaleBuilder _locbld;
+ private boolean _isLenientVariant = false;
private static final Locale JAPANESE_CAL_JAPANESE = Locale.getInstance("ja", "JP", "JP");
private static final Locale THAI_NUM_THAI = Locale.getInstance("th", "TH", "TH");
private static final Locale NORWEGIAN_NYNORSK = Locale.getInstance("no", "NO", "NY");
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Constructs an empty Builder. The default value of all
* fields, extensions, and private use information is the
* empty string.
- *
- * @since 1.7
*/
public Builder() {
+ this(false);
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
+ * Constructs an empty Builder with an option whether to allow
+ * <code>setVariant</code> to accept a value that does not
+ * conform to the IETF BCP 47 variant subtag's syntax requirements.
*
- * Resets the builder to match the provided locale. The
- * previous state of the builder is discarded. Fields that do
- * not conform to BCP47 syntax are ill-formed.
+ * @param isLenientVariant When true, this <code>Builder</code>
+ * will accept an ill-formed variant.
+ * @see #setVariant(String)
+ */
+ public Builder(boolean isLenientVariant) {
+ _isLenientVariant = isLenientVariant;
+ _locbld = new InternalLocaleBuilder(isLenientVariant);
+ }
+
+ /**
+ * Returns true if this <code>Builder</code> accepts a value that does
+ * not conform to the IETF BCP 47 variant subtag's syntax requirements
+ * in <code>setVariant</code>
+ *
+ * @return true if this <code>Build</code> accepts an ill-formed variant.
+ */
+ public boolean isLenientVariant() {
+ return _isLenientVariant;
+ }
+
+ /**
+ * Resets the <code>Builder</code> to match the provided <code>locale</code>.
+ * The previous state of the builder is discarded. Fields that do
+ * not conform to the <code>Locale</code> class specification, for example,
+ * a single letter language, are ill-formed.
*
* @param locale the locale
* @return this builder
* @throws IllformedLocaleException if <code>locale</code> has
* any ill-formed fields.
- * @since 1.7
+ * @throws NullPointerException if <code>locale</code> is null.
*/
public Builder setLocale(Locale locale) {
- if (locale.equals(NORWEGIAN_NYNORSK)) {
- // Semantically map the grandfathered locale no_NO_NY,
- // which has ill-formed variant value "NY"
- clear();
- setLanguage("nn").setRegion("NO");
- return this;
- }
- if (locale.equals(JAPANESE_CAL_JAPANESE)) {
- // Ignore short variant "JP" for the grandfathered locale "ja_JP_JP"
- clear();
- setLanguage("ja").setRegion("JP").setLDMLExtensionValue("ca", "japanese");
- return this;
- }
- if (locale.equals(THAI_NUM_THAI)) {
- // Ignore short variant "TH" for the grandfathered locale "th_TH_TH"
- clear();
- setLanguage("th").setRegion("TH").setLDMLExtensionValue("nu", "thai");
- return this;
- }
-
- // Create a temporary builder to prevent the internal
- // state to be updated half way on error input
- Builder tmpBldr = new Builder();
- tmpBldr.setLanguage(locale.getLanguage())
- .setScript(locale.getScript())
- .setRegion(locale.getCountry())
- .setVariant(locale.getVariant());
-
- Set<Character> extKeys = locale.getExtensionKeys();
- if (extKeys != null) {
- Iterator<Character> itr = extKeys.iterator();
- while (itr.hasNext()) {
- char key = itr.next().charValue();
- String value = locale.getExtension(key);
- if (value != null && value.length() > 0) {
- tmpBldr.setExtension(key, value);
- }
+ if (!_isLenientVariant) {
+ if (locale.equals(NORWEGIAN_NYNORSK)) {
+ // Semantically map the grandfathered locale no_NO_NY,
+ // which has ill-formed variant value "NY"
+ clear();
+ setLanguage("nn").setRegion("NO");
+ return this;
}
- }
- // Replacing the original InternalLocaleBuilder with
- // the one which was initialized by the given Locale
- this._locbld = tmpBldr._locbld;
-
+ if (locale.equals(JAPANESE_CAL_JAPANESE)) {
+ // Ignore short variant "JP" for the grandfathered locale "ja_JP_JP"
+ clear();
+ setLanguage("ja").setRegion("JP").setUnicodeLocaleKeyword("ca", "japanese");
+ return this;
+ }
+ if (locale.equals(THAI_NUM_THAI)) {
+ // Ignore short variant "TH" for the grandfathered locale "th_TH_TH"
+ clear();
+ setLanguage("th").setRegion("TH").setUnicodeLocaleKeyword("nu", "thai");
+ return this;
+ }
+ }
+
+ try {
+ _locbld.setLocale(locale._baseLocale, locale._extensions);
+ } catch (LocaleSyntaxException e) {
+ throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
+ }
return this;
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Resets the builder to match the provided language tag. The
- * previous state of the builder is discarded.
+ * Resets the builder to match the provided IETF BCP 47 language tag.
+ * The previous state of the builder is discarded.
*
* @param languageTag the language tag
* @return this builder
* @throws IllformedLocaleException if <code>languageTag</code> is ill-formed.
+ * @throws NullPointerException if <code>languageTag</code> is null.
* @see Locale#forLanguageTag(String)
- * @since 1.7
*/
public Builder setLanguageTag(String languageTag) {
- // Create a temporary builder to prevent the internal
- // state to be updated half way on error input
- Builder tmpBldr = new Builder();
-
LanguageTag tag = null;
try {
- tag = LanguageTag.parse(languageTag);
+ tag = LanguageTag.parseStrict(languageTag, _locbld.isLenientVariant());
} catch (LocaleSyntaxException e) {
throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
}
- // base locale fields
- tmpBldr.setLanguage(tag.getLanguage())
- .setScript(tag.getScript())
- .setRegion(tag.getRegion())
- .setVariant(tag.getVariant());
-
- // extensions
- Set<Extension> exts = tag.getExtensions();
- if (exts != null) {
- Iterator<Extension> itr = exts.iterator();
- while (itr.hasNext()) {
- Extension e = itr.next();
- tmpBldr.setExtension(e.getSingleton(), e.getValue());
- //TODO: setExtension may throw an IllformedLocaleException.
- // In this case, error index must be recalculated.
- }
- }
- tmpBldr.setExtension(PRIVATE_USE_EXTENSION, tag.getPrivateUse());
-
- // Replacing the original InternalLocaleBuilder with
- // the one which was initialized by the given language tag
- this._locbld = tmpBldr._locbld;
+ try {
+ _locbld.setLocale(tag.getBaseLocale(),tag.getLocaleExtensions());
+ } catch (LocaleSyntaxException e) {
+ throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
+ }
return this;
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Sets the language. If language is the empty string, the
- * language is defaulted. Language should be a two or
- * three-letter language code as defined in ISO639.
- * Well-formed values are any string of two to eight ASCII
- * letters.
+ * Sets the language. If <code>language</code> is the empty string,
+ * the language in this <code>Builder</code> will be removed.
+ * Typical language value is a two or three-letter language
+ * code as defined in ISO639.
+ * Well-formed values are any string of two to eight alpha
+ * letters. This method accepts upper case alpha letters
+ * [A-Z], but the language value in the <code>Locale</code>
+ * created by the <code>Builder</code> is always normalized
+ * to lower case letters.
*
* @param language the language
* @return this builder
* @throws IllformedLocaleException if <code>language</code> is ill-formed
- * @since 1.7
+ * @throws NullPointerException if <code>language</codE> is null.
*/
public Builder setLanguage(String language) {
try {
@@ -1715,17 +1888,19 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Sets the script. If script is the empty string, the script
- * is defaulted. Scripts should be a four-letter script code
- * as defined in ISO 15924. Well-formed values are any string
- * of four ASCII letters.
+ * Sets the script. If <code>script</code> is the empty string,
+ * the script in this <code>Builder</code> is removed.
+ * Typical script value is a four-letter script code as defined by ISO 15924.
+ * Well-formed values are any string of four alpha letters.
+ * This method accepts both upper and lower case alpha letters [a-zA-Z],
+ * but the script value in the <code>Locale</code> created by the
+ * <code>Builder</code> is always normalized to title case
+ * (the first letter is upper case and the rest of letters are lower case).
*
* @param script the script
* @return this builder
* @throws IllformedLocaleException if <code>script</code> is ill-formed
- * @since 1.7
+ * @throws NullPointerException if <code>script</code> is null.
*/
public Builder setScript(String script) {
try {
@@ -1737,17 +1912,18 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Sets the region. If region is the empty string, the region
- * is defaulted. Region should be a two-letter ISO 3166 code
- * or a three-digit M. 49 code. Well-formed values are any
- * two-letter or three-digit string.
+ * in this <code>Builder</code> is removed.
+ * Typical region value is a two-letter ISO 3166 code or a three-digit UN M.49
+ * area code. Well-formed values are any two-letter or three-digit string.
+ * This method accepts lower case letters [a-z], but the country value in
+ * the <code>Locale</code> created by the <code>Builder</code> is always
+ * normalized to upper case.
*
* @param region the region
* @return this builder
* @throws IllformedLocaleException if <code>region</code> is ill-formed
- * @since 1.7
+ * @throws NullPointerException if <code>region</code> is null.
*/
public Builder setRegion(String region) {
try {
@@ -1759,22 +1935,23 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Sets the variant. If variant is the empty string, the
- * variant is defaulted. Variants should be registered
- * variants (see <a
- * href="http://www.iana.org/assignments/language-subtag-registry">
- * IANA Language Subtag Registry</a>) corresponding to the
- * prefix. Well-formed variants are any series of fields of
- * either four characters starting with a digit, or five to
- * eight alphanumeric characters, separated by hyphen or
- * underscore.
+ * variant in this <code>Builder</code> is removed.
+ * <p>
+ * <b>Note:</b> By default, this method checks if <code>variant</code>
+ * satisfies the IETF BCP 47 variant subtag's syntax requirements,
+ * and normalizes the value to lowercase letters. However,
+ * the <code>Locale</code> class itself does not impose any syntactical
+ * restriction on variant. Also, the variant value in
+ * <code>Locale</code> is case sensitive. When a <code>Builder</code>
+ * is created by the constructor <code>Builder(boolean isLenientVariant)</code>
+ * with <code>true</code>, this method skips the syntax check, and leaves
+ * the value unchanged.
*
* @param variant the variant
* @return this builder
* @throws IllformedLocaleException if <code>variant</code> is ill-formed
- * @since 1.7
+ * @throws NullPointerException if <code>variant</code> is null.
*/
public Builder setVariant(String variant) {
try {
@@ -1786,8 +1963,6 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Sets the extension for the given key. If the value is the
* empty string, the extension is removed. Legal keys are
* characters in the ranges <code>[0-9A-Za-z]</code>. Keys
@@ -1796,28 +1971,28 @@ public final class Locale implements Clo
* series of fields of two to eight alphanumeric characters,
* separated by hyphen or underscore.
*
- * <p><b>Note</b>: The key {@link LDML_EXTENSION} ('u' or 'U') is
- * used for LDML Keywords. Setting a value for this key
- * replaces any existing LDML keywords with those defined in
- * the extension. To be well-formed, a value for this
- * extension must meet the additional constraint that the
- * number of fields be even (fields represent key value pairs,
- * where the value is mandatory), and that the keys and values
- * be legal LDML extension keys and values.
+ * <p><b>Note:</b> The key {@link Locale#UNICODE_LOCALE_EXTENSION
+ * UNICODE_LOCALE_EXTENSION} ('u') is used for the Unicode locale extension.
+ * Setting a value for this key replaces any existing Unicode locale key/type
+ * pairs with those defined in the extension.
+ * To be well-formed, a value for this extension must meet the additional
+ * constraints that each locale key is two alphanumeric characters,
+ * followed by at least one locale type subtag represented by
+ * three to eight alphanumeric characters, and that the keys and types
+ * be legal Unicode locale keys and values.
*
- * <p><b>Note</b>: The key {@link PRIVATE_USE_EXTENSION} ('x'
- * or 'X') is used for the private use code. To be
- * well-formed, the value for this key needs only to have
- * fields of one to eight alphanumeric characters, not two to
- * eight as in the general case.
+ * <p><b>Note:</b> The key {@link Locale#PRIVATE_USE_EXTENSION
+ * PRIVATE_USE_EXTENSION} ('x') is used for the private use code. To be
+ * well-formed, the value for this key needs only to have fields of one to
+ * eight alphanumeric characters, not two to eight as in the general case.
*
* @param key the extension key
* @param value the extension value
* @return this builder
* @throws IllformedLocaleException if <code>key</code> is illegal
* or <code>value</code> is ill-formed
- * @see #setLDMLKeyword
- * @since 1.7
+ * @throws NullPointerException if <code>value</code> is null.
+ * @see #setUnicodeLocaleKeyword(String, String)
*/
public Builder setExtension(char key, String value) {
try {
@@ -1829,32 +2004,25 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Sets the LDML extension value ('keyword type') for the
- * given LDML extension key. If the value is the empty
- * string, the LDML keyword is removed. Well-formed keys are
- * strings of two alphanumeric characters. Well-formed values
- * are strings of three to eight alphanumeric characters.
- *
- * <p><b>Note</b>: Keys are case-insensitive, so for example
- * the key 'ca' and the key "CA" represent the same LDML
- * extension key.
- *
- * <p><b>Note</b>: Setting the {@link LDML_EXTENSION} replaces
- * all LDML keywords with those defined in the extension.
- *
- * @param key the LDML extension key
- * @param value the LDML extension value
+ * Sets the Unicode locale keyword type for the given key. If the
+ * value is the empty string, the Unicode keyword is removed.
+ * Well-formed keys are strings of two alphanumeric characters.
+ * Well-formed types are one or more subtags where each of them is
+ * three to eight alphanumeric characters.
+ * <p>
+ * <b>Note</b>:Setting the 'u' extension replaces all Unicode locale
+ * keywords with those defined in the extension.
+ * @param key the Unicode locale key
+ * @param type the Unicode locale type
* @return this builder
- * @throws IllformedLocaleException if <code>key</code> or <code>value</code>
+ * @throws IllformedLocaleException if <code>key</code> or <code>type</code>
* is ill-formed
+ * @throws NullPointerException if <code>key</code> or <code>value</code> is null.
* @see #setExtension(char, String)
- * @since 1.7
*/
- public Builder setLDMLExtensionValue(String key, String value) {
+ public Builder setUnicodeLocaleKeyword(String key, String type) {
try {
- _locbld.setLDMLExtensionValue(key, value);
+ _locbld.setUnicodeLocaleExtension(key, type);
} catch (LocaleSyntaxException e) {
throw new IllformedLocaleException(e.getMessage(), e.getErrorIndex());
}
@@ -1862,12 +2030,9 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Resets the builder to its initial, default state.
+ * Resets the builder to its initial, empty state.
*
* @return this builder
- * @since 1.7
*/
public Builder clear() {
_locbld.clear();
@@ -1875,14 +2040,11 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
- * Resets the extensions to their initial, default state.
+ * Resets the extensions to their initial, empty state.
* Language, script, region and variant are unchanged.
*
* @return this builder
* @see #setExtension(char, String)
- * @since 1.7
*/
public Builder clearExtensions() {
_locbld.removeLocaleExtensions();
@@ -1890,18 +2052,22 @@ public final class Locale implements Clo
}
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- *
* Returns an instance of Locale created from the fields set
* on this builder.
*
* @return a new Locale
- * @since 1.7
*/
- public Locale create() {
- BaseLocale base = _locbld.getBaseLocale();
- LocaleExtensions ext = _locbld.getLocaleExtensions();
- return Locale.getInstance(base.getLanguage(), base.getScript(), base.getRegion(), base.getVariant(), ext);
+ public Locale build() {
+ BaseLocale baseloc = _locbld.getBaseLocale();
+ LocaleExtensions extensions = _locbld.getLocaleExtensions();
+
+ if (_isLenientVariant) {
+ if (extensions.equals(LocaleExtensions.EMPTY_EXTENSIONS)) {
+ extensions = Locale.getCompatibilityExtensions(baseloc.getLanguage(), baseloc.getScript(), baseloc.getRegion(), baseloc.getVariant());
+ }
+ }
+
+ return Locale.getInstance(baseloc, extensions);
}
}
}
--- a/src/share/classes/java/util/ResourceBundle.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/java/util/ResourceBundle.java Wed Nov 04 17:57:23 2009 -0500
@@ -865,23 +865,42 @@ public abstract class ResourceBundle {
* to generate a sequence of <a name="candidates"><em>candidate bundle names</em></a>.
* If the specified locale's language, country, and variant are all empty
* strings, then the base name is the only candidate bundle name.
- * Otherwise, the following sequence is generated from the attribute
- * values of the specified locale (language1, country1, and variant1)
- * and of the default locale (language2, country2, and variant2):
- * <ul>
- * <li> baseName + "_" + language1 + "_" + country1 + "_" + variant1
- * <li> baseName + "_" + language1 + "_" + country1
- * <li> baseName + "_" + language1
- * <li> baseName + "_" + language2 + "_" + country2 + "_" + variant2
- * <li> baseName + "_" + language2 + "_" + country2
- * <li> baseName + "_" + language2
- * <li> baseName
- * </ul>
+ * Otherwise, the following sequence is generated from the attribute values
+ * of the specified locale (language, script, country and variant).
+ * <pre>
+ * baseName + "_" + language + "_" + script + "_" + country + "_" + variant
+ * baseName + "_" + language + "_" + script + "_" + country
+ * baseName + "_" + language + "_" + script
+ * baseName + "_" + language + "_" + country + "_" + variant
+ * baseName + "_" + language + "_" + country
+ * baseName + "_" + language
+ * </pre>
* <p>
* Candidate bundle names where the final component is an empty string are omitted.
- * For example, if country1 is an empty string, the second candidate bundle name is omitted.
- *
- * <p>
+ * For example, if country is an empty string, the second and the fourth candidate
+ * bundle names are omitted. Also, if script is an empty string, the candidate names
+ * including script are omitted. For example, a locale with language "de" and
+ * variant "JAVA" will produces candidate names with base name "MyResource" below.
+ * <pre>
+ * MyResource_de__JAVA
+ * MyResource_de
+ * </pre>
+ * In the case that the variant contains one or more underscores ('_'), a sequence of
+ * bundle names generated by truncating the last underscore and the part following it
+ * is inserted after a candidate bundle name with the original variant. For example,
+ * for a locale with language "en", script "Latn, country "US" and variant "WINDOWS_VISTA",
+ * with bundle base name "MyResource" will produce a candidate bundle names below:
+ * <pre>
+ * MyResource_en_Latn_US_WINDOWS_VISTA
+ * MyResource_en_Latn_US_WINDOWS
+ * MyResource_en_Latn_US
+ * MyResource_en_Latn
+ * MyResource_en_US_WINDOWS_VISTA
+ * MyResource_en_US_WINDOWS
+ * MyResource_en_US
+ * MyResource_en
+ * </pre>
+ *
* <code>getBundle</code> then iterates over the candidate bundle names to find the first
* one for which it can <em>instantiate</em> an actual resource bundle. For each candidate
* bundle name, it attempts to create a resource bundle:
@@ -904,19 +923,30 @@ public abstract class ResourceBundle {
* {@link PropertyResourceBundle} instance from its contents.
* If successful, this instance becomes the <em>result resource bundle</em>.
* </ul>
- *
* <p>
- * If no result resource bundle has been found, a <code>MissingResourceException</code>
- * is thrown.
+ * If no matching resource bundle is found in the candidate bundle names generated
+ * from the specified locale, another sequence of candidate bundle names is generated
+ * from the attribute values of the default locale with the same algorithm. If no
+ * match is found in neither of them, the base name alone is looked up.
+ * If no result resource bundle has been found for any of candidate bundle names
+ * above, a <code>MissingResourceException</code> is thrown.
+ *
+ * <p><b>Note:</b> The list of candidate bundle names used by this method is generated by the default
+ * implementation of {@link ResourceBundle.Control Control}. For some <code>Locale</code>s,
+ * the list of candidate bundle names contains some extra names or the order of
+ * bundle names is slightly modified. See the description of the default implementation
+ * of {@link ResourceBundle.Control#getCandidateLocales(String, Locale) getCandidateLocales}
+ * for details.
+ *
*
* <p><a name="parent_chain"/>
* Once a result resource bundle has been found, its <em>parent chain</em> is instantiated.
- * <code>getBundle</code> iterates over the candidate bundle names that can be
- * obtained by successively removing variant, country, and language
- * (each time with the preceding "_") from the bundle name of the result resource bundle.
+ * <code>getBundle</code> iterates over the rest of the candidate bundle names generated from
+ * either the specified locale or the default locale, then the base name alone at the end.
* As above, candidate bundle names where the final component is an empty string are omitted.
* With each of the candidate bundle names it attempts to instantiate a resource bundle, as
* described above.
+ * <p>
* Whenever it succeeds, it calls the previously instantiated resource
* bundle's {@link #setParent(java.util.ResourceBundle) setParent} method
* with the new resource bundle, unless the previously instantiated resource
@@ -934,7 +964,9 @@ public abstract class ResourceBundle {
* path name (using "/") instead of a fully qualified class name (using ".").
*
* <p><a name="default_behavior_example"/>
- * <strong>Example:</strong><br>The following class and property files are provided:
+ * <strong>Example:</strong>
+ * <p>
+ * The following class and property files are provided:
* <pre>
* MyResources.class
* MyResources.properties
@@ -2183,17 +2215,112 @@ public abstract class ResourceBundle {
* returned by <code>getCandidateLocales</code>.
*
* <p>The default implementation returns a <code>List</code> containing
- * <code>Locale</code>s in the following sequence:
- * <pre>
- * Locale(language, country, variant)
- * Locale(language, country)
- * Locale(language)
- * Locale.ROOT
- * </pre>
- * where <code>language</code>, <code>country</code> and
- * <code>variant</code> are the language, country and variant values
- * of the given <code>locale</code>, respectively. Locales where the
- * final component values are empty strings are omitted.
+ * <code>Locale</code>s using the rules described below. In the description
+ * below, <em>L</em>, <em>S</em>, <em>C</em> and <em>V</em> represent non
+ * empty language, script, country, and variant. Also, [<em>L</em>, <em>C</em>]
+ * is used for representing a <code>Locale</code> that has non-empty values
+ * only for language and country. <em>L</em>("xx") represents the language value
+ * is "xx". For all cases, <code>Locale</code>s where the final component values
+ * are empty strings are omitted.
+ *
+ * <ol>
+ * <li>For an input <code>Locale</code> with an empty script value,
+ * append candidate <code>Locale</code>s by omitting the final component
+ * one by one as below:
+ * <ul>
+ * <li> [<em>L</em>, <em>C</em>, <em>V</em>]
+ * <li> [<em>L</em>, <em>C</em>]
+ * <li> [<em>L</em>]
+ * <li> <code>Locale.ROOT</code>
+ * </ul>
+ *
+ * <li>For an input <code>Locale</code> with a non-empty script
+ * value, append candidate <code>Locale</code>s by omitting the final component
+ * up to language, then append candidates generated from the <code>Locale</code>
+ * with country and variant restored:
+ * <ul>
+ * <li> [<em>L</em>, <em>S</em>, <em>C</em>, <em>V</em>]
+ * <li> [<em>L</em>, <em>S</em>, <em>C</em>]
+ * <li> [<em>L</em>, <em>S</em>]
+ * <li> [<em>L</em>, <em>C</em>, <em>V</em>]
+ * <li> [<em>L</em>, <em>C</em>]
+ * <li> [<em>L</em>]
+ * <li> <code>Locale.ROOT</code>
+ * </ul>
+ *
+ * <li>For an input <code>Locale</code> with a variant value including multiple
+ * subtags separated by underscore, generate <code>Locale</code> by omitting the
+ * variant subtags one by one, then insert them after every occurence of <code>
+ * Locale</code>s with the full variant value in the original list. In the
+ * candidate list below, the variant consists of two subtags - <em>V1</em> and <em>V2</em>:
+ * <ul>
+ * <li> [<em>L</em>, <em>S</em>, <em>C</em>, <em>V1</em>, <em>V2</em>]
+ * <li> [<em>L</em>, <em>S</em>, <em>C</em>, <em>V1</em>]
+ * <li> [<em>L</em>, <em>S</em>, <em>C</em>]
+ * <li> [<em>L</em>, <em>S</em>]
+ * <li> [<em>L</em>, <em>C</em>, <em>V1</em>, <em>V2</em>]
+ * <li> [<em>L</em>, <em>C</em>, <em>V1</em>]
+ * <li> [<em>L</em>, <em>C</em>]
+ * <li> [<em>L</em>]
+ * <li> <code>Locale.ROOT</code>
+ * </ul>
+ *
+ * <li>Special cases for Chinese. When an input <code>Locale</code> has the
+ * language "zh" (Chinese) and an empty script value, either "Hans" (Simplified) or
+ * "Hant" (Traditional) might be supplied depending on the country.
+ * When the country is "CN" (China) or "SG" (Singapore), "Hans" is supplied.
+ * When the country is "HK" (Hong Kong SAR China) or "MO" (Macau SAR China)
+ * or "TW" (Taiwan), "Hant" is supplied. For all other countries or when the country
+ * is empty, no script is supplied. For example, for <code>Locale("zh", "CN")
+ * </code>, the candidate list will be:
+ * <ul>
+ * <li> [<em>L</em>("zh"), <em>S</em>("Hans"), <em>C</em>("CN")]
+ * <li> [<em>L</em>("zh"), <em>S</em>("Hans")]
+ * <li> [<em>L</em>("zh"), <em>C</em>("CN")]
+ * <li> [<em>L</em>("zh")]
+ * <li> <code>Locale.ROOT</code>
+ * </ul>
+ *
+ * For <code>Locale("zh", "TW")</code>, the candidate list will be:
+ * <ul>
+ * <li> [<em>L</em>("zh"), <em>S</em>("Hant"), <em>C</em>("TW")]
+ * <li> [<em>L</em>("zh"), <em>S</em>("Hant")]
+ * <li> [<em>L</em>("zh"), <em>C</em>("TW")]
+ * <li> [<em>L</em>("zh")]
+ * <li> <code>Locale.ROOT</code>
+ * </ul>
+ *
+ * <li>Special cases for Norwegian. Both <code>Locale("no", "NO", "NY")</code> and
+ * <code>Locale("nn", "NO")</code> represent Norwegian Nynorsk (Norway) and
+ * they are interpreted interchangeably. The candidate lists produced by these
+ * are identical as illustrated below:
+ * <ul>
+ * <li> [<em>L</em>("nn"), <em>C</em>("NO")]
+ * <li> [<em>L</em>("no"), <em>C</em>("NO"), <em>V</em>("NY")]
+ * <li> [<em>L</em>("nn")]
+ * <li> [<em>L</em>("no"), <em>C</em>("NO")]
+ * <li> [<em>L</em>("no")]
+ * <li> <code>Locale.ROOT</code>
+ * </ul>
+ * Also, Java treats "no" as a synonym of Norwegian Bokmål "nb"
+ * in Java. When an input <code>Locale</code> has language "no" or "nb", <code>Locale</code>s
+ * with language code "no" and "nb" are interleaved. For example, both
+ * <code>Locale("no", "NO", "POSIX")</code> and <code>("nb", "NO", "POSIX")</code> produce
+ * the same candidate list below:
+ * <ul>
+ * <li> [<em>L</em>("nb"), <em>C</em>("NO"), <em>V</em>("POSIX")]
+ * <li> [<em>L</em>("no"), <em>C</em>("NO"), <em>V</em>("POSIX")]
+ * <li> [<em>L</em>("nb"), <em>C</em>("NO")]
+ * <li> [<em>L</em>("no"), <em>C</em>("NO")]
+ * <li> [<em>L</em>("nb")]
+ * <li> [<em>L</em>("no")]
+ * <li> <code>Locale.ROOT</code>
+ * </ul>
+ * </li>
+ *
+ * </li>
+ * </ol>
+ *
*
* <p>The default implementation uses an {@link ArrayList} that
* overriding implementations may modify before returning it to the
@@ -2606,13 +2733,14 @@ public abstract class ResourceBundle {
*
* <p>This implementation returns the following value:
* <pre>
- * baseName + "_" + language + "_" + country + "_" + variant
+ * baseName + "_" + language + "_" + script + "_" + country + "_" + variant
* </pre>
- * where <code>language</code>, <code>country</code> and
- * <code>variant</code> are the language, country and variant values
- * of <code>locale</code>, respectively. Final component values that
- * are empty Strings are omitted along with the preceding '_'. If
- * all of the values are empty strings, then <code>baseName</code>
+ * where <code>language</code>, <code>script</code>, <code>country</code>,
+ * and <code>variant</code> are the language, script, country, and variant
+ * values of <code>locale</code>, respectively. Final component values that
+ * are empty Strings are omitted along with the preceding '_'. When the
+ * script is empty, the script value is ommitted along with the preceding '_'.
+ * If all of the values are empty strings, then <code>baseName</code>
* is returned.
*
* <p>For example, if <code>baseName</code> is
--- a/src/share/classes/java/util/spi/LocaleNameProvider.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/java/util/spi/LocaleNameProvider.java Wed Nov 04 17:57:23 2009 -0500
@@ -44,15 +44,16 @@ public abstract class LocaleNameProvider
}
/**
- * Returns a localized name for the given ISO 639 language code and the
- * given locale that is appropriate for display to the user.
+ * Returns a localized name for the given <a href="http://www.rfc-editor.org/rfc/bcp/bcp47.txt">
+ * IETF BCP47</a> language code and the given locale that is appropriate for
+ * display to the user.
* For example, if <code>languageCode</code> is "fr" and <code>locale</code>
* is en_US, getDisplayLanguage() will return "French"; if <code>languageCode</code>
* is "en" and <code>locale</code> is fr_FR, getDisplayLanguage() will return "anglais".
* If the name returned cannot be localized according to <code>locale</code>,
* (say, the provider does not have a Japanese name for Croatian),
* this method returns null.
- * @param languageCode the ISO 639 language code string in the form of two or three
+ * @param languageCode the language code string in the form of two to eight
* lower-case letters between 'a' (U+0061) and 'z' (U+007A)
* @param locale the desired locale
* @return the name of the given language code for the specified locale, or null if it's not
@@ -68,16 +69,16 @@ public abstract class LocaleNameProvider
public abstract String getDisplayLanguage(String languageCode, Locale locale);
/**
- * <span style="background-color: #00ccff; font-weight: bold">New API</span>
- * Returns a localized name for the given ISO 15924 script code and the
- * given locale that is appropriate for display to the user.
+ * Returns a localized name for the given <a href="http://www.rfc-editor.org/rfc/bcp/bcp47.txt">
+ * IETF BCP47</a> script code and the given locale that is appropriate for
+ * display to the user.
* For example, if <code>scriptCode</code> is "Latn" and <code>locale</code>
* is en_US, getDisplayScript() will return "Latin"; if <code>scriptCode</code>
* is "Cyrl" and <code>locale</code> is fr_FR, getDisplayScript() will return "cyrillique".
* If the name returned cannot be localized according to <code>locale</code>,
- * (say, the provider does not have a Japanese name for Mongolian),
+ * (say, the provider does not have a Japanese name for Cyrillic),
* this method returns null.
- * @param scriptCode the ISO 15924 script code string in the form of title-case
+ * @param scriptCode the four letter script code string in the form of title-case
* letters (the first letter is upper-case character between 'A' (U+0041) and
* 'Z' (U+005A) followed by three lower-case character between 'a' (U+0061)
* and 'z' (U+007A)).
@@ -91,19 +92,21 @@ public abstract class LocaleNameProvider
* {@link java.util.spi.LocaleServiceProvider#getAvailableLocales()
* getAvailableLocales()}.
* @see java.util.Locale#getDisplayScript(java.util.Locale)
+ * @since 1.7
*/
public abstract String getDisplayScript(String scriptCode, Locale locale);
/**
- * Returns a localized name for the given ISO 3166 country code or UN M.49 area codes and the
- * given locale that is appropriate for display to the user.
+ * Returns a localized name for the given <a href="http://www.rfc-editor.org/rfc/bcp/bcp47.txt">
+ * IETF BCP47</a> region code (either ISO 3166 country code or UN M.49 area
+ * codes) and the given locale that is appropriate for display to the user.
* For example, if <code>countryCode</code> is "FR" and <code>locale</code>
* is en_US, getDisplayCountry() will return "France"; if <code>countryCode</code>
* is "US" and <code>locale</code> is fr_FR, getDisplayCountry() will return "Etats-Unis".
* If the name returned cannot be localized according to <code>locale</code>,
* (say, the provider does not have a Japanese name for Croatia),
* this method returns null.
- * @param countryCode the ISO 3166 country code string in the form of two
+ * @param countryCode the country(region) code string in the form of two
* upper-case letters between 'A' (U+0041) and 'Z' (U+005A) or the UN M.49 area code
* in the form of three digit letters between '0' (U+0030) and '9' (U+0039).
* @param locale the desired locale
--- a/src/share/classes/java/util/spi/LocaleServiceProvider.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/java/util/spi/LocaleServiceProvider.java Wed Nov 04 17:57:23 2009 -0500
@@ -86,18 +86,19 @@ import java.util.Locale;
* Otherwise, they call the <code>getAvailableLocales()</code> methods of
* installed providers for the appropriate interface to find one that
* supports the requested locale. If such a provider is found, its other
- * methods are called to obtain the requested object or name. If neither
- * the Java runtime environment itself nor an installed provider supports
- * the requested locale, a fallback locale is constructed by replacing the
- * first of the variant, country, or language strings of the locale that's
- * not an empty string with an empty string, and the lookup process is
- * restarted. In the case that the variant contains one or more '_'s, the
- * fallback locale is constructed by replacing the variant with a new variant
- * which eliminates the last '_' and the part following it. Even if a
- * fallback occurs, methods that return requested objects or name are
- * invoked with the original locale before the fallback.The Java runtime
- * environment must support the root locale for all locale sensitive services
- * in order to guarantee that this process terminates.
+ * methods are called to obtain the requested object or name. When checking
+ * whether a locale is supported, the locale's extensions are ignored.
+ * If neither the Java runtime environment itself nor an installed provider
+ * supports the requested locale, the methods go through a list of candidate
+ * locales and repeat the availability check for each until a match is found.
+ * The algorithm used for creating a list of candidate locales is same as
+ * the one used by <code>ResourceBunlde</code> by default (see
+ * {@link java.util.ResourceBundle.Control#getCandidateLocales getCandidateLocales}
+ * for the details). Even if a locale is resolved from the candidate list,
+ * methods that return requested objects or names are invoked with the original
+ * requested locale including extensions. The Java runtime environment must
+ * support the root locale for all locale sensitive services in order to
+ * guarantee that this process terminates.
* <p>
* Providers of names (but not providers of other objects) are allowed to
* return null for some name requests even for locales that they claim to
@@ -124,6 +125,11 @@ public abstract class LocaleServiceProvi
/**
* Returns an array of all locales for which this locale service provider
* can provide localized objects or names.
+ * <p>
+ * <b>Note:</b> Extensions in a <code>Locale</code> are ignored during
+ * service provider lookup. So the array returned by this method should
+ * not include two or more <code>Locale</code> objects only differing in
+ * their extensions.
*
* @return An array of all locales for which this locale service provider
* can provide localized objects or names.
--- a/src/share/classes/sun/util/locale/AsciiUtil.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/sun/util/locale/AsciiUtil.java Wed Nov 04 17:57:23 2009 -0500
@@ -85,6 +85,33 @@ public final class AsciiUtil {
return buf.toString();
}
+ public static String toTitleString(String s) {
+ if (s.length() == 0) {
+ return s;
+ }
+ int idx = 0;
+ char c = s.charAt(idx);
+ if (!(c >= 'a' && c <= 'z')) {
+ for (idx = 1; idx < s.length(); idx++) {
+ if (c >= 'A' && c <= 'Z') {
+ break;
+ }
+ }
+ }
+ if (idx == s.length()) {
+ return s;
+ }
+ StringBuilder buf = new StringBuilder(s.substring(0, idx));
+ if (idx == 0) {
+ buf.append(toUpper(s.charAt(idx)));
+ idx++;
+ }
+ for (; idx < s.length(); idx++) {
+ buf.append(toLower(s.charAt(idx)));
+ }
+ return buf.toString();
+ }
+
public static boolean isAlpha(char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
@@ -129,4 +156,25 @@ public final class AsciiUtil {
}
return b;
}
+
+ public static class CaseInsensitiveKey {
+ private String _key;
+ private int _hash;
+
+ public CaseInsensitiveKey(String key) {
+ _key = key;
+ _hash = AsciiUtil.toLowerString(key).hashCode();
+ }
+
+ public boolean equals(Object o) {
+ if (o instanceof CaseInsensitiveKey) {
+ return AsciiUtil.caseIgnoreMatch(_key, ((CaseInsensitiveKey)o)._key);
+ }
+ return false;
+ }
+
+ public int hashCode() {
+ return _hash;
+ }
+ }
}
--- a/src/share/classes/sun/util/locale/BaseLocale.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/sun/util/locale/BaseLocale.java Wed Nov 04 17:57:23 2009 -0500
@@ -1,264 +1,221 @@
-/*
- *******************************************************************************
- * Copyright (C) 2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-
-package sun.util.locale;
-
-
-
-public final class BaseLocale {
-
- private String _language = "";
- private String _script = "";
- private String _region = "";
- private String _variant = "";
-
- private transient String _id = "";
- private transient String _java6string = "";
- private transient BaseLocale _parent;
-
- private static final char SEPCHAR = '_';
-
- private static final LocaleObjectCache<BaseLocaleKey,BaseLocale> BASELOCALECACHE
- = new LocaleObjectCache<BaseLocaleKey,BaseLocale>();
-
- public static final BaseLocale ROOT = new BaseLocale("", "", "", "");
-
- private BaseLocale(String language, String script, String region, String variant) {
- if (language != null) {
- _language = language;
- }
- if (script != null) {
- _script = script;
- }
- if (region != null) {
- _region = region;
- }
- if (variant != null) {
- _variant = variant;
- }
- }
-
- public static BaseLocale getInstance(String language, String script, String region, String variant) {
- BaseLocaleKey key = new BaseLocaleKey(language, script, region, variant);
- BaseLocale baseLocale = BASELOCALECACHE.get(key);
- if (baseLocale == null) {
- // Create a canonical BaseLocale instance
- baseLocale = new BaseLocale(language, script, region, variant).canonicalize();
- BASELOCALECACHE.put(baseLocale.createKey(), baseLocale);
- }
- return baseLocale;
- }
-
- public boolean equals(Object obj) {
- return (this == obj) ||
- ((obj instanceof BaseLocale) && _id == (((BaseLocale)obj)._id));
- }
-
- public int hashCode() {
- return _id.hashCode();
- }
-
- public String getJava6String() {
- return _java6string;
- }
-
- public String getLanguage() {
- return _language;
- }
-
- public String getScript() {
- return _script;
- }
-
- public String getRegion() {
- return _region;
- }
-
- public String getVariant() {
- return _variant;
- }
-
- public BaseLocale getParent() {
- return _parent;
- }
-
- public String getID() {
- return _id;
- }
-
- public String toString() {
- return _id;
- }
-
- private BaseLocale canonicalize() {
-
- StringBuilder id = new StringBuilder();
-
- int languageLen = _language.length();
- int scriptLen = _script.length();
- int regionLen = _region.length();
- int variantLen = _variant.length();
-
- if (languageLen > 0) {
- // language to lower case
- _language = AsciiUtil.toLowerString(_language).intern();
-
- id.append(_language);
- }
-
- if (scriptLen > 0) {
- // script - the first letter to upper case, the rest to lower case
- StringBuilder buf = new StringBuilder();
- buf.append(AsciiUtil.toUpper(_script.charAt(0)));
- for (int i = 1; i < _script.length(); i++) {
- buf.append(AsciiUtil.toLower(_script.charAt(i)));
- }
- _script = buf.toString().intern();
-
- if (languageLen > 0) {
- id.append(SEPCHAR);
- }
- id.append(_script);
- }
-
- if (regionLen > 0) {
- // region to upper case
- _region = AsciiUtil.toUpperString(_region).intern();
-
- id.append(SEPCHAR);
- id.append(_region);
- }
-
- if (variantLen > 0) {
- // variant is case sensitive in JDK
- _variant = _variant.intern();
-
- if (regionLen == 0) {
- id.append(SEPCHAR);
- }
- id.append(SEPCHAR);
- id.append(_variant);
- }
-
- _id = id.toString().intern();
-
- // Compose legacy JDK ID string if required
- if (languageLen == 0 && regionLen == 0 && variantLen > 0) {
- _java6string = "";
- } else if (scriptLen > 0 || (regionLen == 0 && variantLen > 0)) {
- StringBuilder buf = new StringBuilder(_language);
- if (regionLen > 0) {
- buf.append(SEPCHAR);
- buf.append(_region);
- } else if (variantLen > 0) {
- buf.append(SEPCHAR);
- }
- if (variantLen > 0) {
- buf.append(SEPCHAR);
- buf.append(_variant);
- }
- _java6string = buf.toString().intern();
- } else {
- _java6string = _id;
- }
-
- // Resolve parent
- if (variantLen > 0) {
- // variant field in Java Locale may contain multiple
- // subtags
- int lastSep = _variant.lastIndexOf(SEPCHAR);
- if (lastSep == -1) {
- _parent = getInstance(_language, _script, _region, "");
- } else {
- _parent = getInstance(_language, _script, _region, _variant.substring(0, lastSep));
- }
- } else if (regionLen > 0) {
- _parent = getInstance(_language, _script, "", "");
- } else if (scriptLen > 0) {
- _parent = getInstance(_language, "", "", "");
- } else if (languageLen > 0) {
- _parent = ROOT;
- } else {
- // This is the root
- // We should never get here, because ROOT is pre-populated.
- _parent = null;
- }
- return this;
- }
-
- private BaseLocaleKey createKey() {
- return new BaseLocaleKey(_language, _script, _region, _variant);
- }
-
- public static class BaseLocaleKey implements Comparable<BaseLocaleKey> {
- private String _lang = "";
- private String _scrt = "";
- private String _regn = "";
- private String _vart = "";
-
- private int _hash; // Default to 0
-
- public BaseLocaleKey(String language, String script, String region, String variant) {
- if (language != null) {
- _lang = language;
- }
- if (script != null) {
- _scrt = script;
- }
- if (region != null) {
- _regn = region;
- }
- if (variant != null) {
- _vart = variant;
- }
- }
-
- public boolean equals(Object obj) {
- return (this == obj) ||
- (obj instanceof BaseLocaleKey)
- && AsciiUtil.caseIgnoreMatch(((BaseLocaleKey)obj)._lang, this._lang)
- && AsciiUtil.caseIgnoreMatch(((BaseLocaleKey)obj)._scrt, this._scrt)
- && AsciiUtil.caseIgnoreMatch(((BaseLocaleKey)obj)._regn, this._regn)
- && ((BaseLocaleKey)obj)._vart.equals(_vart); // variant is case sensitive in JDK!
- }
-
- public int compareTo(BaseLocaleKey other) {
- int res = AsciiUtil.caseIgnoreCompare(this._lang, other._lang);
- if (res == 0) {
- res = AsciiUtil.caseIgnoreCompare(this._scrt, other._scrt);
- if (res == 0) {
- res = AsciiUtil.caseIgnoreCompare(this._regn, other._regn);
- if (res == 0) {
- res = AsciiUtil.caseIgnoreCompare(this._vart, other._vart);
- }
- }
- }
- return res;
- }
-
- public int hashCode() {
- int h = _hash;
- if (h == 0) {
- // Generating a hash value from language, script, region and variant
- for (int i = 0; i < _lang.length(); i++) {
- h = 31*h + AsciiUtil.toLower(_lang.charAt(i));
- }
- for (int i = 0; i < _scrt.length(); i++) {
- h = 31*h + AsciiUtil.toLower(_scrt.charAt(i));
- }
- for (int i = 0; i < _regn.length(); i++) {
- h = 31*h + AsciiUtil.toLower(_regn.charAt(i));
- }
- for (int i = 0; i < _vart.length(); i++) {
- h = 31*h + AsciiUtil.toLower(_vart.charAt(i));
- }
- _hash = h;
- }
- return h;
- }
- }
-}
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+
+package sun.util.locale;
+
+
+public final class BaseLocale {
+
+ private static final boolean JDKIMPL = true;
+
+ private String _language = "";
+ private String _script = "";
+ private String _region = "";
+ private String _variant = "";
+
+ private transient volatile int _hash = 0;
+
+ private static final LocaleObjectCache<Key, BaseLocale> BASELOCALE_CACHE
+ = new LocaleObjectCache<Key, BaseLocale>();
+
+ public static final BaseLocale ROOT = BaseLocale.getInstance("", "", "", "");
+
+ private BaseLocale(String language, String script, String region, String variant) {
+ if (language != null) {
+ _language = AsciiUtil.toLowerString(language).intern();
+ }
+ if (script != null) {
+ _script = AsciiUtil.toTitleString(script).intern();
+ }
+ if (region != null) {
+ _region = AsciiUtil.toUpperString(region).intern();
+ }
+ if (variant != null) {
+ if (JDKIMPL) {
+ // preserve upper/lower cases
+ _variant = variant.intern();
+ } else {
+ _variant = AsciiUtil.toUpperString(variant).intern();
+ }
+ }
+ }
+
+ public static BaseLocale getInstance(String language, String script, String region, String variant) {
+ if (JDKIMPL) {
+ // JDK uses deprecated ISO639.1 language codes for he, yi and id
+ if (AsciiUtil.caseIgnoreMatch(language, "he")) {
+ language = "iw";
+ } else if (AsciiUtil.caseIgnoreMatch(language, "yi")) {
+ language = "ji";
+ } else if (AsciiUtil.caseIgnoreMatch(language, "id")) {
+ language = "in";
+ }
+ }
+ Key key = new Key(language, script, region, variant);
+ BaseLocale baseLocale = BASELOCALE_CACHE.get(key);
+ if (baseLocale == null) {
+ baseLocale = new BaseLocale(language, script, region, variant);
+ baseLocale = BASELOCALE_CACHE.put(baseLocale.createKey(), baseLocale);
+ }
+ return baseLocale;
+ }
+
+ public String getLanguage() {
+ return _language;
+ }
+
+ public String getScript() {
+ return _script;
+ }
+
+ public String getRegion() {
+ return _region;
+ }
+
+ public String getVariant() {
+ return _variant;
+ }
+
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ if (_language.length() > 0) {
+ buf.append("language=");
+ buf.append(_language);
+ }
+ if (_script.length() > 0) {
+ if (buf.length() > 0) {
+ buf.append(", ");
+ }
+ buf.append("script=");
+ buf.append(_script);
+ }
+ if (_region.length() > 0) {
+ if (buf.length() > 0) {
+ buf.append(", ");
+ }
+ buf.append("region=");
+ buf.append(_region);
+ }
+ if (_variant.length() > 0) {
+ if (buf.length() > 0) {
+ buf.append(", ");
+ }
+ buf.append("variant=");
+ buf.append(_variant);
+ }
+ return buf.toString();
+ }
+
+ public int hashCode() {
+ int h = _hash;
+ if (h == 0) {
+ // Generating a hash value from language, script, region and variant
+ for (int i = 0; i < _language.length(); i++) {
+ h = 31*h + _language.charAt(i);
+ }
+ for (int i = 0; i < _script.length(); i++) {
+ h = 31*h + _script.charAt(i);
+ }
+ for (int i = 0; i < _region.length(); i++) {
+ h = 31*h + _region.charAt(i);
+ }
+ for (int i = 0; i < _variant.length(); i++) {
+ h = 31*h + _variant.charAt(i);
+ }
+ _hash = h;
+ }
+ return h;
+ }
+
+ private Key createKey() {
+ return new Key(_language, _script, _region, _variant);
+ }
+
+ private static class Key implements Comparable<Key> {
+ private String _lang = "";
+ private String _scrt = "";
+ private String _regn = "";
+ private String _vart = "";
+
+ private volatile int _hash; // Default to 0
+
+ public Key(String language, String script, String region, String variant) {
+ if (language != null) {
+ _lang = language;
+ }
+ if (script != null) {
+ _scrt = script;
+ }
+ if (region != null) {
+ _regn = region;
+ }
+ if (variant != null) {
+ _vart = variant;
+ }
+ }
+
+ public boolean equals(Object obj) {
+ if (JDKIMPL) {
+ return (this == obj) ||
+ (obj instanceof Key)
+ && AsciiUtil.caseIgnoreMatch(((Key)obj)._lang, this._lang)
+ && AsciiUtil.caseIgnoreMatch(((Key)obj)._scrt, this._scrt)
+ && AsciiUtil.caseIgnoreMatch(((Key)obj)._regn, this._regn)
+ && ((Key)obj)._vart.equals(_vart); // variant is case sensitive in JDK!
+ }
+ return (this == obj) ||
+ (obj instanceof Key)
+ && AsciiUtil.caseIgnoreMatch(((Key)obj)._lang, this._lang)
+ && AsciiUtil.caseIgnoreMatch(((Key)obj)._scrt, this._scrt)
+ && AsciiUtil.caseIgnoreMatch(((Key)obj)._regn, this._regn)
+ && AsciiUtil.caseIgnoreMatch(((Key)obj)._vart, this._vart);
+ }
+
+ public int compareTo(Key other) {
+ int res = AsciiUtil.caseIgnoreCompare(this._lang, other._lang);
+ if (res == 0) {
+ res = AsciiUtil.caseIgnoreCompare(this._scrt, other._scrt);
+ if (res == 0) {
+ res = AsciiUtil.caseIgnoreCompare(this._regn, other._regn);
+ if (res == 0) {
+ if (JDKIMPL) {
+ res = this._vart.compareTo(other._vart);
+ } else {
+ res = AsciiUtil.caseIgnoreCompare(this._vart, other._vart);
+ }
+ }
+ }
+ }
+ return res;
+ }
+
+ public int hashCode() {
+ int h = _hash;
+ if (h == 0) {
+ // Generating a hash value from language, script, region and variant
+ for (int i = 0; i < _lang.length(); i++) {
+ h = 31*h + AsciiUtil.toLower(_lang.charAt(i));
+ }
+ for (int i = 0; i < _scrt.length(); i++) {
+ h = 31*h + AsciiUtil.toLower(_scrt.charAt(i));
+ }
+ for (int i = 0; i < _regn.length(); i++) {
+ h = 31*h + AsciiUtil.toLower(_regn.charAt(i));
+ }
+ for (int i = 0; i < _vart.length(); i++) {
+ if (JDKIMPL) {
+ h = 31*h + _vart.charAt(i);
+ } else {
+ h = 31*h + AsciiUtil.toLower(_vart.charAt(i));
+ }
+ }
+ _hash = h;
+ }
+ return h;
+ }
+ }
+}
--- a/src/share/classes/sun/util/locale/InternalLocaleBuilder.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/sun/util/locale/InternalLocaleBuilder.java Wed Nov 04 17:57:23 2009 -0500
@@ -1,349 +1,284 @@
-/*
- *******************************************************************************
- * Copyright (C) 2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-package sun.util.locale;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-public final class InternalLocaleBuilder {
-
- public static final char PRIVATEUSEKEY = 'x';
-
- private String _language = "";
- private String _script = "";
- private String _region = "";
- private String _variant = "";
-
- private FieldHandler _handler = FieldHandler.DEFAULT;
-
- private HashMap<Character, String> _extMap;
- private HashMap<String, String> _kwdMap;
-
- private static final char LDMLSINGLETON = 'u';
-
- private static final String LANGTAGSEP = "-";
- private static final String LOCALESEP = "_";
-
- private static final int DEFAULTMAPCAPACITY = 4;
-
- public InternalLocaleBuilder() {
- }
-
- public InternalLocaleBuilder(FieldHandler handler) {
- _handler = handler;
- }
-
- public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {
- String newval = "";
- if (language.length() > 0) {
- newval = _handler.process(FieldType.LANGUAGE, language);
- if (newval == null) {
- throw new LocaleSyntaxException("Ill-formed language: " + language);
- }
- }
- _language = newval;
- return this;
- }
-
- public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {
- String newval = "";
- if (script.length() > 0) {
- newval = _handler.process(FieldType.SCRIPT, script);
- if (newval == null) {
- throw new LocaleSyntaxException("Ill-formed script: " + script);
- }
- }
- _script = newval;
- return this;
- }
-
- public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {
- String newval = "";
- if (region.length() > 0) {
- newval = _handler.process(FieldType.REGION, region);
- if (newval == null) {
- throw new LocaleSyntaxException("Ill-formed region: " + region);
- }
- }
- _region = newval;
- return this;
- }
-
- public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {
- String newval = "";
- if (variant.length() > 0) {
- newval = _handler.process(FieldType.VARIANT, variant);
- if (newval == null) {
- throw new LocaleSyntaxException("Ill-formed variant: " + variant);
- }
- }
- _variant = newval;
- return this;
- }
-
- public InternalLocaleBuilder setLDMLExtensionValue(String key, String type) throws LocaleSyntaxException {
- if (key.length() == 0) {
- throw new LocaleSyntaxException("Empty LDML extension key");
- }
- String kwdkey = _handler.process(FieldType.LDMLKEY, key);
- if (kwdkey == null) {
- throw new LocaleSyntaxException("Ill-formed LDML extension key: " + key);
- }
-
- if (type.length() == 0) {
- if (_kwdMap != null) {
- _kwdMap.remove(kwdkey);
- }
- } else {
- String kwdtype = _handler.process(FieldType.LDMLTYPE, type);
- if (kwdtype == null) {
- throw new LocaleSyntaxException("Ill-formed LDML extension value: " + type);
- }
- if (_kwdMap == null) {
- _kwdMap = new HashMap<String, String>(DEFAULTMAPCAPACITY);
- }
- _kwdMap.put(kwdkey, kwdtype);
- }
-
- return this;
- }
-
- public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {
- if (!LocaleExtensions.isValidExtensionKey(singleton)) {
- throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);
- }
-
- // singleton char to lower case
- singleton = AsciiUtil.toLower(singleton);
-
- if (singleton == LDMLSINGLETON) {
- // 'u' extension reserved for locale keywords
- if (_kwdMap != null) {
- // blow out the keywords currently set
- _kwdMap.clear();
- }
- // parse locale keyword extension subtags
- String[] kwdtags = (value.replaceAll(LOCALESEP, LANGTAGSEP)).split(LANGTAGSEP);
- if ((kwdtags.length % 2) != 0) {
- // number of keyword subtags must be even number
- throw new LocaleSyntaxException("Ill-formed LDML extension key/value pairs: " + value);
- }
- int idx = 0;
- while (idx < kwdtags.length) {
- String kwdkey = _handler.process(FieldType.LDMLKEY, kwdtags[idx++]);
- String kwdtype = _handler.process(FieldType.LDMLTYPE, kwdtags[idx++]);
- if (kwdkey == null || kwdkey.length() == 0
- || kwdtype == null || kwdtype.length() == 0) {
- throw new LocaleSyntaxException("Ill-formed LDML extension key/value pairs: " + value);
- }
- if (_kwdMap == null) {
- _kwdMap = new HashMap<String, String>(kwdtags.length / 2);
- }
- String prevVal = _kwdMap.put(kwdkey, kwdtype);
- if (prevVal != null) {
- throw new LocaleSyntaxException("Ill-formed LDML extension containing duplicated keys: " + value);
- }
- }
- } else {
- // other extensions including privateuse
- if (value.length() == 0) {
- if (_extMap != null) {
- _extMap.remove(Character.valueOf(singleton));
- }
- } else {
- FieldType ftype = (singleton == PRIVATEUSEKEY) ? FieldType.PRIVATEUSE : FieldType.EXTENSION;
- String extval = _handler.process(ftype, value);
- if (extval == null) {
- throw new LocaleSyntaxException("Ill-formed LDML extension value: " + value);
- }
- if (_extMap == null) {
- _extMap = new HashMap<Character, String>(DEFAULTMAPCAPACITY);
- }
- _extMap.put(Character.valueOf(singleton), extval);
- }
- }
- return this;
- }
-
- public InternalLocaleBuilder clear() {
- _language = "";
- _script = "";
- _region = "";
- _variant = "";
- removeLocaleExtensions();
- return this;
- }
-
- public InternalLocaleBuilder removeLocaleExtensions() {
- if (_extMap != null) {
- _extMap.clear();
- }
- if (_kwdMap != null) {
- _kwdMap.clear();
- }
- return this;
- }
-
- public BaseLocale getBaseLocale() {
- return BaseLocale.getInstance(_language, _script, _region, _variant);
- }
-
- public LocaleExtensions getLocaleExtensions() {
- TreeMap<Character, String> extMap = null;
- TreeMap<String, String> kwdMap = null;
-
- // process keywords
- if (_kwdMap != null && _kwdMap.size() > 0) {
- Set<Map.Entry<String, String>> kwds = _kwdMap.entrySet();
- for (Map.Entry<String, String> kwd : kwds) {
- String key = kwd.getKey();
- String type = kwd.getValue();
- if (kwdMap == null) {
- kwdMap = new TreeMap<String, String>();
- }
- kwdMap.put(key.intern(), type.intern());
- }
- }
-
- // process extensions and privateuse
- if (_extMap != null) {
- Set<Map.Entry<Character, String>> exts = _extMap.entrySet();
- for (Map.Entry<Character, String> ext : exts) {
- Character key = ext.getKey();
- String value = ext.getValue();
- if (extMap == null) {
- extMap = new TreeMap<Character, String>();
- }
- extMap.put(key, value.intern());
- }
- }
-
- // set canonical locale keyword extension string to the extension map
- if (kwdMap != null) {
- StringBuilder buf = new StringBuilder();
- LocaleExtensions.keywordsToString(kwdMap, buf);
- if (extMap == null) {
- extMap = new TreeMap<Character, String>();
- }
- extMap.put(Character.valueOf(LDMLSINGLETON), buf.toString().intern());
- }
-
- return LocaleExtensions.getInstance(extMap, kwdMap);
- }
-
- protected enum FieldType {
- LANGUAGE,
- SCRIPT,
- REGION,
- VARIANT,
- LDMLKEY,
- LDMLTYPE,
- EXTENSION,
- PRIVATEUSE
- }
-
- public static class FieldHandler {
- public static FieldHandler DEFAULT = new FieldHandler();
-
- protected FieldHandler() {
- }
-
- public String process(FieldType type, String value) {
- value = map(type, value);
- if (value.length() > 0 && !validate(type, value)) {
- return null;
- }
- return value;
- }
-
- protected String map(FieldType type, String value) {
- switch (type) {
- case LANGUAGE:
- value = AsciiUtil.toLowerString(value);
- break;
- case SCRIPT:
- if (value.length() > 0) {
- StringBuilder buf = new StringBuilder();
- buf.append(AsciiUtil.toUpper(value.charAt(0)));
- for (int i = 1; i < value.length(); i++) {
- buf.append(AsciiUtil.toLower(value.charAt(i)));
- }
- value = buf.toString();
- }
- break;
- case REGION:
- value = AsciiUtil.toUpperString(value);
- break;
- case VARIANT:
- // Java variant is case sensitive - so no case mapping here
- value = value.replaceAll(LANGTAGSEP, LOCALESEP);
- break;
- case LDMLKEY:
- case LDMLTYPE:
- case EXTENSION:
- case PRIVATEUSE:
- value = AsciiUtil.toLowerString(value).replaceAll(LOCALESEP, LANGTAGSEP);
- break;
- }
- return value;
- }
-
- protected boolean validate(FieldType type, String value) {
- boolean isValid = false;
- String[] subtags;
-
- switch (type) {
- case LANGUAGE:
- isValid = LanguageTag.isLanguageSubtag(value);
- break;
- case SCRIPT:
- isValid = LanguageTag.isScriptSubtag(value);
- break;
- case REGION:
- isValid = LanguageTag.isRegionSubtag(value);
- break;
- case VARIANT:
- // variant field could have multiple subtags
- subtags = value.split(LOCALESEP);
- for (String subtag : subtags) {
- isValid = LanguageTag.isVariantSubtag(subtag);
- if (!isValid) {
- break;
- }
- }
- break;
- case LDMLKEY:
- isValid = LocaleExtensions.isValidLDMLKey(value);
- break;
- case LDMLTYPE:
- isValid = LocaleExtensions.isValidLDMLType(value);
- break;
- case EXTENSION:
- subtags = value.split(LANGTAGSEP);
- for (String subtag : subtags) {
- isValid = LanguageTag.isExtensionSubtag(subtag);
- if (!isValid) {
- break;
- }
- }
- break;
- case PRIVATEUSE:
- subtags = value.split(LANGTAGSEP);
- for (String subtag : subtags) {
- isValid = LanguageTag.isPrivateuseValueSubtag(subtag);
- if (!isValid) {
- break;
- }
- }
- break;
- }
- return isValid;
- }
- }
-}
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package sun.util.locale;
+
+import java.util.List;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import sun.util.locale.LanguageTag.ParseStatus;
+
+public final class InternalLocaleBuilder {
+
+ private String _language = "";
+ private String _script = "";
+ private String _region = "";
+ private String _variant = "";
+ private SortedMap<Character, Extension> _extMap;
+
+ private final boolean _lenientVariant;
+
+ private static final String LOCALESEP = "_";
+
+ public InternalLocaleBuilder() {
+ this(false);
+ }
+
+ public InternalLocaleBuilder(boolean lenientVariant) {
+ _lenientVariant = lenientVariant;
+ }
+
+ public boolean isLenientVariant() {
+ return _lenientVariant;
+ }
+
+ public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException {
+ String newval = "";
+ if (language.length() > 0) {
+ if (!LanguageTag.isLanguage(language)) {
+ throw new LocaleSyntaxException("Ill-formed language: " + language, 0);
+ }
+ newval = LanguageTag.canonicalizeLanguage(language);
+ }
+ _language = newval;
+ return this;
+ }
+
+ public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException {
+ String newval = "";
+ if (script.length() > 0) {
+ if (!LanguageTag.isScript(script)) {
+ throw new LocaleSyntaxException("Ill-formed script: " + script, 0);
+ }
+ newval = LanguageTag.canonicalizeScript(script);
+ }
+ _script = newval;
+ return this;
+ }
+
+ public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException {
+ String newval = "";
+ if (region.length() > 0) {
+ if (!LanguageTag.isRegion(region)) {
+ throw new LocaleSyntaxException("Ill-formed region: " + region);
+ }
+ newval = LanguageTag.canonicalizeRegion(region);
+ }
+ _region = newval;
+ return this;
+ }
+
+ public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException {
+ String newval = "";
+ if (variant.length() > 0) {
+ if (_lenientVariant) {
+ newval = variant;
+ } else {
+ newval = processVariant(variant);
+ }
+ }
+ _variant = newval;
+ return this;
+ }
+
+ public InternalLocaleBuilder setUnicodeLocaleExtension(String key, String type) throws LocaleSyntaxException {
+ if (key.length() == 0) {
+ throw new LocaleSyntaxException("Empty Unicode locale extension key");
+ }
+ if (!UnicodeLocaleExtension.isKey(key)) {
+ throw new LocaleSyntaxException("Ill-formed Unicode locale extension key: " + key, 0);
+ }
+
+ key = UnicodeLocaleExtension.canonicalizeKey(key);
+
+ UnicodeLocaleExtension ulext = null;
+ if (_extMap != null) {
+ ulext = (UnicodeLocaleExtension)_extMap.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON));
+ }
+
+ if (type.length() == 0) {
+ if (ulext != null) {
+ ulext.remove(key);
+ if (ulext.isEmpty()) {
+ _extMap.remove(Character.valueOf(UnicodeLocaleExtension.SINGLETON));
+ }
+ }
+ } else {
+ StringBuilder buf = new StringBuilder();
+ StringTokenIterator sti = new StringTokenIterator(type, LanguageTag.SEP);
+ for (String subtag = sti.first(); !sti.isDone(); subtag = sti.next()) {
+ if (!UnicodeLocaleExtension.isTypeSubtag(subtag)) {
+ throw new LocaleSyntaxException("Ill-formed Unicode locale extension type: " + type, sti.currentStart());
+ }
+ if (buf.length() > 0) {
+ buf.append(LanguageTag.SEP);
+ }
+ buf.append(UnicodeLocaleExtension.canonicalizeTypeSubtag(subtag));
+ }
+ if (ulext == null) {
+ SortedMap<String, String> ktmap = new TreeMap<String, String>();
+ ktmap.put(key, buf.toString());
+ ulext = new UnicodeLocaleExtension(ktmap);
+ if (_extMap == null) {
+ _extMap = new TreeMap<Character, Extension>();
+ }
+ _extMap.put(Character.valueOf(UnicodeLocaleExtension.SINGLETON), ulext);
+ } else {
+ ulext.put(key, buf.toString());
+ }
+ }
+
+ return this;
+ }
+
+ public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException {
+ String strSingleton = String.valueOf(singleton);
+ if (!LanguageTag.isExtensionSingleton(strSingleton) && !LanguageTag.isPrivateuseSingleton(strSingleton)) {
+ throw new LocaleSyntaxException("Ill-formed extension key: " + singleton);
+ }
+
+ strSingleton = LanguageTag.canonicalizeExtensionSingleton(strSingleton);
+ Character key = Character.valueOf(strSingleton.charAt(0));
+
+ if (value.length() == 0) {
+ if (_extMap != null) {
+ _extMap.remove(key);
+ }
+ } else {
+ StringTokenIterator sti = new StringTokenIterator(value, LanguageTag.SEP);
+ ParseStatus sts = new ParseStatus();
+
+ Extension ext = Extension.create(key.charValue(), sti, sts);
+ if (sts.isError()) {
+ throw new LocaleSyntaxException(sts.errorMsg, sts.errorIndex);
+ }
+ if (sts.parseLength != value.length() || ext == null) {
+ throw new LocaleSyntaxException("Ill-formed extension value: " + value, sti.currentStart());
+ }
+ if (_extMap == null) {
+ _extMap = new TreeMap<Character, Extension>();
+ }
+ _extMap.put(key, ext);
+ }
+ return this;
+ }
+
+ public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException {
+ String language = base.getLanguage();
+ String script = base.getScript();
+ String region = base.getRegion();
+ String variant = base.getVariant();
+
+ // Validate base locale fields before updating internal state.
+ // LocaleExtensions always store validated/canonicalized values,
+ // so no checks are necessary.
+ if (language.length() > 0) {
+ if (!LanguageTag.isLanguage(language)) {
+ throw new LocaleSyntaxException("Ill-formed language: " + language);
+ }
+ language = LanguageTag.canonicalizeLanguage(language);
+ }
+ if (script.length() > 0) {
+ if (!LanguageTag.isScript(script)) {
+ throw new LocaleSyntaxException("Ill-formed script: " + script);
+ }
+ script = LanguageTag.canonicalizeScript(script);
+ }
+ if (region.length() > 0) {
+ if (!LanguageTag.isRegion(region)) {
+ throw new LocaleSyntaxException("Ill-formed region: " + region);
+ }
+ region = LanguageTag.canonicalizeRegion(region);
+ }
+ if (_lenientVariant) {
+ // In lenient variant mode, parse special private use value
+ // reserved for Java Locale.
+ String privuse = extensions.getExtensionValue(Character.valueOf(LanguageTag.PRIVATEUSE.charAt(0)));
+ if (privuse != null) {
+ variant = LanguageTag.getJavaCompatibleVariant(variant, privuse);
+ }
+ } else {
+ if (variant.length() > 0) {
+ variant = processVariant(variant);
+ }
+ }
+
+ // update builder's internal fields
+ _language = language;
+ _script = script;
+ _region = region;
+ _variant = variant;
+
+ // empty extensions
+ if (_extMap == null) {
+ _extMap = new TreeMap<Character, Extension>();
+ } else {
+ _extMap.clear();
+ }
+
+ Set<Character> extKeys = extensions.getKeys();
+ for (Character key : extKeys) {
+ Extension ext = extensions.getExtension(key);
+ if (_lenientVariant && (ext instanceof PrivateuseExtension)) {
+ String modPrivuse = LanguageTag.getJavaCompatiblePrivateuse(ext.getValue());
+ if (!modPrivuse.equals(ext.getValue())) {
+ ext = new PrivateuseExtension(modPrivuse);
+ }
+ }
+ _extMap.put(key, ext);
+ }
+
+ return this;
+ }
+
+ public InternalLocaleBuilder clear() {
+ _language = "";
+ _script = "";
+ _region = "";
+ _variant = "";
+ removeLocaleExtensions();
+ return this;
+ }
+
+ public InternalLocaleBuilder removeLocaleExtensions() {
+ if (_extMap != null) {
+ _extMap.clear();
+ }
+ return this;
+ }
+
+ public BaseLocale getBaseLocale() {
+ return BaseLocale.getInstance(_language, _script, _region, _variant);
+ }
+
+ public LocaleExtensions getLocaleExtensions() {
+ if (_extMap != null && _extMap.size() > 0) {
+ return LocaleExtensions.getInstance(_extMap);
+ }
+ return LocaleExtensions.EMPTY_EXTENSIONS;
+ }
+
+ private String processVariant(String variant) throws LocaleSyntaxException {
+ StringTokenIterator sti = new StringTokenIterator(variant, LOCALESEP);
+ ParseStatus sts = new ParseStatus();
+
+ List<String> variants = LanguageTag.DEFAULT_PARSER.parseVariants(sti, sts);
+ if (sts.parseLength != variant.length()) {
+ throw new LocaleSyntaxException("Ill-formed variant: " + variant, sti.currentStart());
+ }
+
+ StringBuilder buf = new StringBuilder();
+ for (String var : variants) {
+ if (buf.length() != 0) {
+ buf.append(LOCALESEP);
+ }
+ buf.append(var);
+ }
+ return buf.toString();
+ }
+}
--- a/src/share/classes/sun/util/locale/LanguageTag.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/sun/util/locale/LanguageTag.java Wed Nov 04 17:57:23 2009 -0500
@@ -1,515 +1,897 @@
-/*
- *******************************************************************************
- * Copyright (C) 2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-package sun.util.locale;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Set;
-import java.util.TreeSet;
-
-public final class LanguageTag {
-
- private String _languageTag = ""; // entire language tag
- private String _grandfathered = ""; // grandfathered tag
- private String _privateuse = ""; // privateuse, not including leading "x-"
- private String _language = ""; // language subtag
- private String[] _extlang; // array of extlang subtags
- private String _script = ""; // script subtag
- private String _region = ""; // region subtag
- private TreeSet<String> _variants; // variant subtags in a single string
- private TreeSet<Extension> _extensions; // extension key/value pairs
-
- private static final int MINLEN = 2; // minimum length of a valid language tag
-
- private static final String SEP = "-";
- private static final char SEPCHAR = '-';
- private static final String PRIVATEUSE = "x";
-
- public static String UNDETERMINED = "und";
-
- // Map contains grandfathered tags and its preferred mappings from
- // http://www.ietf.org/internet-drafts/draft-ietf-ltru-4645bis-09.txt
- private static final HashMap<String,String> GRANDFATHERED = new HashMap<String,String>();
-
- static {
- final String[][] entries = {
- //{"tag", "preferred"},
- {"art-lojban", "jbo"},
- {"cel-gaulish", ""},
- {"en-GB-oed", ""},
- {"i-ami", "ami"},
- {"i-bnn", "bnn"},
- {"i-default", ""},
- {"i-enochian", ""},
- {"i-hak", "hak"},
- {"i-klingon", "tlh"},
- {"i-lux", "lb"},
- {"i-mingo", ""},
- {"i-navajo", "nv"},
- {"i-pwn", "pwn"},
- {"i-tao", "tao"},
- {"i-tay", "tay"},
- {"i-tsu", "tsu"},
- {"no-bok", "nb"},
- {"no-nyn", "nn"},
- {"sgn-BE-FR", "sfb"},
- {"sgn-BE-NL", "vgt"},
- {"sgn-CH-DE", "sgg"},
- {"zh-guoyu", "cmn"},
- {"zh-hakka", "hak"},
- {"zh-min", ""},
- {"zh-min-nan", "nan"},
- {"zh-xiang", "hsn"},
- };
- for (String[] e : entries) {
- GRANDFATHERED.put(e[0], e[1]);
- }
- }
-
- private static final String[][] DEPRECATEDLANGS = {
- // {<deprecated>, <current>},
- {"iw", "he"},
- {"ji", "yi"},
- {"in", "id"},
- };
-
- private LanguageTag(String tag) {
- _languageTag = tag;
- }
-
- // Bit flags used by the language tag parser
- private static final int LANG = 0x0001;
- private static final int EXTL = 0x0002;
- private static final int SCRT = 0x0004;
- private static final int REGN = 0x0008;
- private static final int VART = 0x0010;
- private static final int EXTS = 0x0020;
- private static final int EXTV = 0x0040;
- private static final int PRIV = 0x0080;
-
- public static LanguageTag parse(String langtag) throws LocaleSyntaxException {
- if (langtag.length() < MINLEN) {
- throw new LocaleSyntaxException("The specified tag '"
- + langtag + "' is too short");
- }
-
- if (langtag.endsWith(SEP)) {
- // This code utilizes String#split, which drops off the last empty segment.
- // We need to check if the tag ends with '-' here.
- int erridx = langtag.length() - 1;
- while (erridx - 1 >= 0 && langtag.charAt(erridx - 1) != SEPCHAR) {
- erridx--;
- }
- throw new LocaleSyntaxException("The specified tag '"
- + langtag + "' ends with " + SEP, erridx);
- }
-
- String tag = AsciiUtil.toLowerString(langtag);
- LanguageTag t = new LanguageTag(tag);
-
- // Check if the tag is grandfathered
- if (GRANDFATHERED.containsKey(tag)) {
- t._grandfathered = tag;
- // Preferred mapping
- String preferred = GRANDFATHERED.get(tag);
- if (preferred.length() > 0) {
- t._language = preferred;
- }
- return t;
- }
-
- // langtag = language
- // ["-" script]
- // ["-" region]
- // *("-" variant)
- // *("-" extension)
- // ["-" privateuse]
-
- String[] subtags = tag.split(SEP);
- int idx = 0;
- int extlangIdx = 0;
- String extSingleton = null;
- StringBuilder extBuf = null;
- int next = LANG | PRIV;
- String errorMsg = null;
-
- PARSE:
- while (true) {
- if (idx >= subtags.length) {
- break;
- }
- if ((next & LANG) != 0) {
- if (isLanguageSubtag(subtags[idx])) {
- t._language = subtags[idx++];
- next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
- continue;
- }
- }
- if ((next & EXTL) != 0) {
- if (isExtlangSubtag(subtags[idx])) {
- if (extlangIdx == 0) {
- t._extlang = new String[3];
- }
- t._extlang[extlangIdx++] = subtags[idx++];
- if (extlangIdx < 3) {
- next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
- } else {
- next = SCRT | REGN | VART | EXTS | PRIV;
- }
- continue;
- }
- }
- if ((next & SCRT) != 0) {
- if (isScriptSubtag(subtags[idx])) {
- t._script = subtags[idx++];
- next = REGN | VART | EXTS | PRIV;
- continue;
- }
- }
- if ((next & REGN) != 0) {
- if (isRegionSubtag(subtags[idx])) {
- t._region = subtags[idx++];
- next = VART | EXTS | PRIV;
- continue;
- }
- }
- if ((next & VART) != 0) {
- if (isVariantSubtag(subtags[idx])) {
- if (t._variants == null) {
- t._variants = new TreeSet<String>();
- }
- t._variants.add(subtags[idx++]);
- next = VART | EXTS | PRIV;
- continue;
- }
- }
- if ((next & EXTS) != 0) {
- if (isExtensionSingleton(subtags[idx])) {
- if (extSingleton != null) {
- if (extBuf == null) {
- errorMsg = "The specified tag '"
- + tag + "' contains an incomplete extension: "
- + extSingleton;
- break PARSE;
- }
- // Emit the previous extension key/value pair
- if (t._extensions == null) {
- t._extensions = new TreeSet<Extension>();
- }
- Extension e = new Extension(extSingleton.charAt(0), extBuf.toString());
- t._extensions.add(e);
- } else {
- if (t._extensions != null) {
- char extChar = subtags[idx].charAt(0);
- for (Extension e : t._extensions) {
- if (e.getSingleton() == extChar) {
- errorMsg = "The specified tag '"
- + tag + "' contains duplicated extension: "
- + extChar;
- break PARSE;
- }
- }
- }
- }
- extSingleton = subtags[idx++];
- extBuf = null; // Clear the extension value buffer
- next = EXTV;
- continue;
- }
- }
- if ((next & EXTV) != 0) {
- if (isExtensionSubtag(subtags[idx])) {
- if (extBuf == null) {
- extBuf = new StringBuilder(subtags[idx++]);
- } else {
- extBuf.append(SEP);
- extBuf.append(subtags[idx++]);
- }
- next = EXTS | EXTV | PRIV;
- continue;
- }
- }
- if ((next & PRIV) != 0) {
- if (AsciiUtil.caseIgnoreMatch(PRIVATEUSE, subtags[idx])) {
- // The rest of part will be private use value subtags
- StringBuilder puBuf = new StringBuilder();
- idx++;
- for (boolean bFirst = true ; idx < subtags.length; idx++) {
- if (!isPrivateuseValueSubtag(subtags[idx])) {
- errorMsg = "The specified tag '"
- + langtag + "' contains an illegal private use subtag: "
- + (subtags[idx].length() == 0 ? "<empty>" : subtags[idx]);
- break PARSE;
- }
- if (bFirst) {
- bFirst = false;
- } else {
- puBuf.append(SEP);
- }
- puBuf.append(subtags[idx]);
- }
- t._privateuse = puBuf.toString();
- if (t._privateuse.length() == 0) {
- // Empty privateuse value
- errorMsg = "The specified tag '"
- + langtag + "' contains an empty private use subtag";
- break PARSE;
- }
- break;
- }
- }
- // If we fell through here, it means this subtag is illegal
- errorMsg = "The specified tag '" + langtag
- + "' contains an illegal subtag: "
- + (subtags[idx].length() == 0 ? "<empty>" : subtags[idx]);
- break PARSE;
- }
-
- if (errorMsg == null) {
- if (extSingleton != null) {
- if (extBuf == null) {
- // extension singleton without following extension value
- errorMsg = "The specified tag '"
- + langtag + "' contains an incomplete extension: "
- + extSingleton;
- } else {
- // Emit the last extension key/value pair
- if (t._extensions == null) {
- t._extensions = new TreeSet<Extension>();
- }
- Extension e = new Extension(extSingleton.charAt(0), extBuf.toString());
- t._extensions.add(e);
- }
- }
- }
-
- if (errorMsg != null) {
- // restore the original string index
- int errIndex = 0;
- for (int i = 0; i < idx; i++) {
- errIndex += (subtags[i].length() + 1);
- }
- throw new LocaleSyntaxException(errorMsg, errIndex);
- }
-
- return t;
- }
-
- public String getTag() {
- return _languageTag;
- }
-
- public String getLanguage() {
- return _language;
- }
-
- public String getJDKLanguage() {
- String lang = _language;
- for (String[] langMap : DEPRECATEDLANGS) {
- if (AsciiUtil.caseIgnoreCompare(lang, langMap[1]) == 0) {
- // use the old code
- lang = langMap[0];
- break;
- }
- }
- return lang;
- }
-
- public String getExtlang(int idx) {
- if (_extlang != null && idx < _extlang.length) {
- return _extlang[idx];
- }
- return null;
- }
-
- public String getScript() {
- return _script;
- }
-
- public String getRegion() {
- return _region;
- }
-
- public String getVariant() {
- if (_variants != null) {
- StringBuilder buf = new StringBuilder();
- Iterator<String> itr = _variants.iterator();
- while (itr.hasNext()) {
- if (buf.length() > 0) {
- buf.append(SEP);
- }
- buf.append(itr.next());
- }
- return buf.toString();
- }
- return "";
- }
-
- public Set<String> getVarinats() {
- return Collections.unmodifiableSet(_variants);
- }
-
- public Set<Extension> getExtensions() {
- if (_extensions != null) {
- return Collections.unmodifiableSet(_extensions);
- }
- return null;
- }
-
- public String getPrivateUse() {
- return _privateuse;
- }
-
- public String getGrandfathered() {
- return _grandfathered;
- }
-
- public static boolean isLanguageSubtag(String s) {
- // language = 2*3ALPHA ; shortest ISO 639 code
- // ["-" extlang] ; sometimes followed by
- // ; extended language subtags
- // / 4ALPHA ; or reserved for future use
- // / 5*8ALPHA ; or registered language subtag
- return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s);
- }
-
- public static boolean isExtlangSubtag(String s) {
- // extlang = 3ALPHA ; selected ISO 639 codes
- // *2("-" 3ALPHA) ; permanently reserved
- return (s.length() == 3) && AsciiUtil.isAlphaString(s);
- }
-
- public static boolean isScriptSubtag(String s) {
- // script = 4ALPHA ; ISO 15924 code
- return (s.length() == 4) && AsciiUtil.isAlphaString(s);
- }
-
- public static boolean isRegionSubtag(String s) {
- // region = 2ALPHA ; ISO 3166-1 code
- // / 3DIGIT ; UN M.49 code
- return ((s.length() == 2) && AsciiUtil.isAlphaString(s))
- || ((s.length() == 3) && AsciiUtil.isNumericString(s));
- }
-
- public static boolean isVariantSubtag(String s) {
- // variant = 5*8alphanum ; registered variants
- // / (DIGIT 3alphanum)
- int len = s.length();
- if (len >= 5 && len <= 8) {
- return AsciiUtil.isAlphaNumericString(s);
- }
- if (len == 4) {
- return AsciiUtil.isNumeric(s.charAt(0))
- && AsciiUtil.isAlphaNumeric(s.charAt(1))
- && AsciiUtil.isAlphaNumeric(s.charAt(2))
- && AsciiUtil.isAlphaNumeric(s.charAt(3));
- }
- return false;
- }
-
- public static boolean isExtensionSingleton(String s) {
- // extension = singleton 1*("-" (2*8alphanum))
- return (s.length() == 1)
- && AsciiUtil.isAlphaString(s)
- && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s);
- }
-
- public static boolean isExtensionSubtag(String s) {
- // extension = singleton 1*("-" (2*8alphanum))
- return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
- }
-
- public static boolean isPrivateuseValueSubtag(String s) {
- // privateuse = "x" 1*("-" (1*8alphanum))
- return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
- }
-
- /*
- * Language tag extension key/value container
- */
- public static class Extension implements Comparable<Extension> {
- private char _singleton;
- private String _value;
-
- public Extension(char singleton, String value) {
- _singleton = AsciiUtil.toLower(singleton);
- _value = value;
- }
-
- public char getSingleton() {
- return _singleton;
- }
-
- public String getValue() {
- return _value;
- }
-
- public int compareTo(Extension other) {
- return (int)_singleton - (int)other._singleton;
- }
- }
-
- public static String toLanguageTag(BaseLocale base, LocaleExtensions ext) {
- StringBuilder buf = new StringBuilder();
-
- // language
- String language = base.getLanguage();
- if (language.length() == 0) {
- buf.append(UNDETERMINED);
- } else {
- if (isLanguageSubtag(language)) {
- // if deprecated language code, map to the current one
- for (String[] langMap : DEPRECATEDLANGS) {
- if (AsciiUtil.caseIgnoreCompare(language, langMap[0]) == 0) {
- language = langMap[1];
- break;
- }
- }
- buf.append(language);
- } else {
- buf.append(UNDETERMINED);
- }
- }
-
- // script
- String script = base.getScript();
- if (script.length() > 0 && isScriptSubtag(script)) {
- buf.append(SEP);
- buf.append(AsciiUtil.toLowerString(script));
- }
-
- // region
- String region = base.getRegion();
- if (region.length() > 0 && isRegionSubtag(region)) {
- buf.append(SEP);
- buf.append(AsciiUtil.toLowerString(region));
- }
-
- // variant
- String variant = base.getVariant();
- if (variant.length() > 0) {
- String[] variants = variant.split("_");
- for (String var : variants) {
- if (isVariantSubtag(var)) {
- buf.append(SEP);
- buf.append(AsciiUtil.toLowerString(var));
- }
- }
- }
-
- if (ext != null) {
- String exttags = ext.getCanonicalString();
- if (exttags.length() > 0) {
- // extensions including private use
- buf.append(SEP);
- buf.append(ext.getCanonicalString());
- }
- }
- return buf.toString();
- }
-}
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package sun.util.locale;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+public class LanguageTag {
+
+ private static final boolean JDKIMPL = true;
+
+ //
+ // static fields
+ //
+ public static final String SEP = "-";
+ public static final String PRIVATEUSE = "x";
+ public static String UNDETERMINED = "und";
+
+ private static final String JAVAVARIANT = "variant";
+ private static final String JAVASEP = "_";
+
+ private static final SortedMap<Character, Extension> EMPTY_EXTENSION_MAP = new TreeMap<Character, Extension>();
+
+ //
+ // Language tag parser instances
+ //
+ public static final Parser DEFAULT_PARSER = new Parser(false);
+ public static final Parser JAVA_VARIANT_COMPATIBLE_PARSER = new Parser(true);
+
+ //
+ // Language subtag fields
+ //
+ private String _grandfathered = ""; // grandfathered tag
+ private String _language = ""; // language subtag
+ private String _script = ""; // script subtag
+ private String _region = ""; // region subtag
+ private String _privateuse = ""; // privateuse, not including leading "x-"
+ private List<String> _extlangs = Collections.emptyList(); // extlang subtags
+ private List<String> _variants = Collections.emptyList(); // variant subtags
+ private SortedMap<Character, Extension> _extensions = EMPTY_EXTENSION_MAP; // extension key/value pairs
+
+ private boolean _javaCompatVariants = false;
+
+ // Map contains grandfathered tags and its preferred mappings from
+ // http://www.ietf.org/rfc/rfc5646.txt
+ private static final Map<AsciiUtil.CaseInsensitiveKey, String[]> GRANDFATHERED =
+ new HashMap<AsciiUtil.CaseInsensitiveKey, String[]>();
+
+ static {
+ // grandfathered = irregular ; non-redundant tags registered
+ // / regular ; during the RFC 3066 era
+ //
+ // irregular = "en-GB-oed" ; irregular tags do not match
+ // / "i-ami" ; the 'langtag' production and
+ // / "i-bnn" ; would not otherwise be
+ // / "i-default" ; considered 'well-formed'
+ // / "i-enochian" ; These tags are all valid,
+ // / "i-hak" ; but most are deprecated
+ // / "i-klingon" ; in favor of more modern
+ // / "i-lux" ; subtags or subtag
+ // / "i-mingo" ; combination
+ // / "i-navajo"
+ // / "i-pwn"
+ // / "i-tao"
+ // / "i-tay"
+ // / "i-tsu"
+ // / "sgn-BE-FR"
+ // / "sgn-BE-NL"
+ // / "sgn-CH-DE"
+ //
+ // regular = "art-lojban" ; these tags match the 'langtag'
+ // / "cel-gaulish" ; production, but their subtags
+ // / "no-bok" ; are not extended language
+ // / "no-nyn" ; or variant subtags: their meaning
+ // / "zh-guoyu" ; is defined by their registration
+ // / "zh-hakka" ; and all of these are deprecated
+ // / "zh-min" ; in favor of a more modern
+ // / "zh-min-nan" ; subtag or sequence of subtags
+ // / "zh-xiang"
+
+ final String[][] entries = {
+ //{"tag", "preferred"},
+ {"art-lojban", "jbo"},
+ {"cel-gaulish", "cel-gaulish"}, // gaulish is parsed as a variant
+ {"en-GB-oed", "en-GB"}, // oed (Oxford English Dictionary spelling) is ignored
+ {"i-ami", "ami"},
+ {"i-bnn", "bnn"},
+ {"i-default", UNDETERMINED}, // fallback
+ {"i-enochian", UNDETERMINED}, // fallback
+ {"i-hak", "hak"},
+ {"i-klingon", "tlh"},
+ {"i-lux", "lb"},
+ {"i-mingo", UNDETERMINED}, // fallback
+ {"i-navajo", "nv"},
+ {"i-pwn", "pwn"},
+ {"i-tao", "tao"},
+ {"i-tay", "tay"},
+ {"i-tsu", "tsu"},
+ {"no-bok", "nb"},
+ {"no-nyn", "nn"},
+ {"sgn-BE-FR", "sfb"},
+ {"sgn-BE-NL", "vgt"},
+ {"sgn-CH-DE", "sgg"},
+ {"zh-guoyu", "cmn"},
+ {"zh-hakka", "hak"},
+ {"zh-min", "zh"}, // fallback
+ {"zh-min-nan", "nan"},
+ {"zh-xiang", "hsn"},
+ };
+ for (String[] e : entries) {
+ GRANDFATHERED.put(new AsciiUtil.CaseInsensitiveKey(e[0]), e);
+ }
+ }
+
+ private LanguageTag() {
+ }
+
+ //
+ // Getter methods for language subtag fields
+ //
+
+ public String getLanguage() {
+ return _language;
+ }
+
+ public List<String> getExtlangs() {
+ return Collections.unmodifiableList(_extlangs);
+ }
+
+ public String getScript() {
+ return _script;
+ }
+
+ public String getRegion() {
+ return _region;
+ }
+
+ public List<String> getVariants() {
+ return Collections.unmodifiableList(_variants);
+ }
+
+ public SortedMap<Character, Extension> getExtensions() {
+ return Collections.unmodifiableSortedMap(_extensions);
+ }
+
+ public String getPrivateuse() {
+ return _privateuse;
+ }
+
+ public String getGrandfathered() {
+ return _grandfathered;
+ }
+
+ private String getJavaVariant() {
+ StringBuilder buf = new StringBuilder();
+ for (String var : _variants) {
+ if (buf.length() > 0) {
+ buf.append(JAVASEP);
+ }
+ buf.append(var);
+ }
+ if (_javaCompatVariants) {
+ return getJavaCompatibleVariant(buf.toString(), _privateuse);
+ }
+
+ return buf.toString();
+ }
+
+ private String getJavaPrivateuse() {
+ if (_javaCompatVariants) {
+ return getJavaCompatiblePrivateuse(_privateuse);
+ }
+ return _privateuse;
+ }
+
+ static String getJavaCompatibleVariant(String bcpVariants, String bcpPrivuse) {
+ StringBuilder buf = new StringBuilder(bcpVariants);
+ if (bcpPrivuse.length() > 0) {
+ int idx = -1;
+ if (bcpPrivuse.startsWith(JAVAVARIANT + SEP)) {
+ idx = (JAVAVARIANT + SEP).length();
+ } else {
+ idx = bcpPrivuse.indexOf(SEP + JAVAVARIANT + SEP);
+ if (idx != -1) {
+ idx += (SEP + JAVAVARIANT + SEP).length();
+ }
+ }
+ if (idx != -1) {
+ if (buf.length() != 0) {
+ buf.append(JAVASEP);
+ }
+ buf.append(bcpPrivuse.substring(idx).replace(SEP, JAVASEP));
+ }
+ }
+ return buf.toString();
+ }
+
+ static String getJavaCompatiblePrivateuse(String bcpPrivuse) {
+ if (bcpPrivuse.length() > 0) {
+ int idx = -1;
+ if (bcpPrivuse.startsWith(JAVAVARIANT + SEP)) {
+ idx = 0;
+ } else {
+ idx = bcpPrivuse.indexOf(SEP + JAVAVARIANT + SEP);
+ }
+ if (idx != -1) {
+ return bcpPrivuse.substring(0, idx);
+ }
+ }
+ return bcpPrivuse;
+ }
+
+ public BaseLocale getBaseLocale() {
+ String lang = _language;
+ if (_extlangs.size() > 0) {
+ // Extended language subtags are used for various historical
+ // and compatibility reasons. Each extended language subtag
+ // has a "Preferred-Value', that is exactly same with the extended
+ // language subtag itself. For example,
+ //
+ // Type: extlang
+ // Subtag: aao
+ // Description: Algerian Saharan Arabic
+ // Added: 2009-07-29
+ // Preferred-Value: aao
+ // Prefix: ar
+ // Macrolanguage: ar
+ //
+ // For example, language tag "ar-aao-DZ" is equivalent to
+ // "aao-DZ".
+ //
+ // Strictly speaking, the mapping requires prefix validation
+ // (e.g. primary language must be "ar" in the example above).
+ // However, this implementation does not check the prefix
+ // and simply use the first extlang value as locale's language.
+ lang = _extlangs.get(0);
+ }
+ if (lang.equals(UNDETERMINED)) {
+ lang = "";
+ }
+ return BaseLocale.getInstance(lang, _script, _region, getJavaVariant());
+ }
+
+ public LocaleExtensions getLocaleExtensions() {
+ String javaPrivuse = getJavaPrivateuse();
+ if (_extensions == null && javaPrivuse.length() == 0) {
+ return LocaleExtensions.EMPTY_EXTENSIONS;
+ }
+ SortedMap<Character, Extension> exts = new TreeMap<Character, Extension>();
+ if (_extensions != null) {
+ exts.putAll(_extensions);
+ }
+ if (javaPrivuse.length() > 0) {
+ PrivateuseExtension pext = new PrivateuseExtension(javaPrivuse);
+ exts.put(Character.valueOf(PrivateuseExtension.SINGLETON), pext);
+ }
+ return LocaleExtensions.getInstance(exts);
+ }
+
+ public String getID() {
+ if (_grandfathered.length() > 0) {
+ return _grandfathered;
+ }
+ StringBuilder buf = new StringBuilder();
+ if (_language.length() > 0) {
+ buf.append(_language);
+ if (_extlangs.size() > 0) {
+ for (String el : _extlangs) {
+ buf.append(SEP);
+ buf.append(el);
+ }
+ }
+ if (_script.length() > 0) {
+ buf.append(SEP);
+ buf.append(_script);
+ }
+ if (_region.length() > 0) {
+ buf.append(SEP);
+ buf.append(_region);
+ }
+ if (_variants.size() > 0) {
+ for (String var : _variants) {
+ buf.append(SEP);
+ buf.append(var);
+ }
+ }
+ if (_extensions.size() > 0) {
+ Set<Entry<Character, Extension>> exts = _extensions.entrySet();
+ for (Entry<Character, Extension> ext : exts) {
+ buf.append(SEP);
+ buf.append(ext.getKey());
+ buf.append(SEP);
+ buf.append(ext.getValue().getValue());
+ }
+ }
+ }
+ if (_privateuse.length() > 0) {
+ if (buf.length() > 0) {
+ buf.append(SEP);
+ }
+ buf.append(PRIVATEUSE);
+ buf.append(SEP);
+ buf.append(_privateuse);
+ }
+ return buf.toString();
+ }
+
+ public String toString() {
+ return getID();
+ }
+
+ //
+ // Language subtag syntax checking methods
+ //
+
+ public static boolean isLanguage(String s) {
+ // language = 2*3ALPHA ; shortest ISO 639 code
+ // ["-" extlang] ; sometimes followed by
+ // ; extended language subtags
+ // / 4ALPHA ; or reserved for future use
+ // / 5*8ALPHA ; or registered language subtag
+ return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaString(s);
+ }
+
+ public static boolean isExtlang(String s) {
+ // extlang = 3ALPHA ; selected ISO 639 codes
+ // *2("-" 3ALPHA) ; permanently reserved
+ return (s.length() == 3) && AsciiUtil.isAlphaString(s);
+ }
+
+ public static boolean isScript(String s) {
+ // script = 4ALPHA ; ISO 15924 code
+ return (s.length() == 4) && AsciiUtil.isAlphaString(s);
+ }
+
+ public static boolean isRegion(String s) {
+ // region = 2ALPHA ; ISO 3166-1 code
+ // / 3DIGIT ; UN M.49 code
+ return ((s.length() == 2) && AsciiUtil.isAlphaString(s))
+ || ((s.length() == 3) && AsciiUtil.isNumericString(s));
+ }
+
+ public static boolean isVariant(String s) {
+ // variant = 5*8alphanum ; registered variants
+ // / (DIGIT 3alphanum)
+ int len = s.length();
+ if (len >= 5 && len <= 8) {
+ return AsciiUtil.isAlphaNumericString(s);
+ }
+ if (len == 4) {
+ return AsciiUtil.isNumeric(s.charAt(0))
+ && AsciiUtil.isAlphaNumeric(s.charAt(1))
+ && AsciiUtil.isAlphaNumeric(s.charAt(2))
+ && AsciiUtil.isAlphaNumeric(s.charAt(3));
+ }
+ return false;
+ }
+
+ public static boolean isExtensionSingleton(String s) {
+ // singleton = DIGIT ; 0 - 9
+ // / %x41-57 ; A - W
+ // / %x59-5A ; Y - Z
+ // / %x61-77 ; a - w
+ // / %x79-7A ; y - z
+
+ return (s.length() == 1)
+ && AsciiUtil.isAlphaString(s)
+ && !AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s);
+ }
+
+ public static boolean isExtensionSubtag(String s) {
+ // extension = singleton 1*("-" (2*8alphanum))
+ return (s.length() >= 2) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
+ }
+
+ public static boolean isPrivateuseSingleton(String s) {
+ // privateuse = "x" 1*("-" (1*8alphanum))
+ return (s.length() == 1)
+ && AsciiUtil.caseIgnoreMatch(PRIVATEUSE, s);
+ }
+
+ public static boolean isPrivateuseSubtag(String s) {
+ // privateuse = "x" 1*("-" (1*8alphanum))
+ return (s.length() >= 1) && (s.length() <= 8) && AsciiUtil.isAlphaNumericString(s);
+ }
+
+ //
+ // Language subtag canonicalization methods
+ //
+
+ public static String canonicalizeLanguage(String s) {
+ return AsciiUtil.toLowerString(s);
+ }
+
+ public static String canonicalizeExtlang(String s) {
+ return AsciiUtil.toLowerString(s);
+ }
+
+ public static String canonicalizeScript(String s) {
+ return AsciiUtil.toTitleString(s);
+ }
+
+ public static String canonicalizeRegion(String s) {
+ return AsciiUtil.toUpperString(s);
+ }
+
+ public static String canonicalizeVariant(String s) {
+ return AsciiUtil.toLowerString(s);
+ }
+
+ public static String canonicalizeExtensionSingleton(String s) {
+ return AsciiUtil.toLowerString(s);
+ }
+
+ public static String canonicalizeExtensionSubtag(String s) {
+ return AsciiUtil.toLowerString(s);
+ }
+
+ public static String canonicalizePrivateuseSubtag(String s) {
+ return AsciiUtil.toLowerString(s);
+ }
+
+
+ public static LanguageTag parse(String str, boolean javaCompatVar) {
+ LanguageTag tag = new LanguageTag();
+ tag.parseString(str, javaCompatVar);
+ return tag;
+ }
+
+ public static LanguageTag parseStrict(String str, boolean javaCompatVar) throws LocaleSyntaxException {
+ LanguageTag tag = new LanguageTag();
+ ParseStatus sts = tag.parseString(str, javaCompatVar);
+ if (sts.isError()) {
+ throw new LocaleSyntaxException(sts.errorMsg, sts.errorIndex);
+ }
+ return tag;
+ }
+
+ public static LanguageTag parseLocale(BaseLocale base, LocaleExtensions locExts) {
+ LanguageTag tag = new LanguageTag();
+ tag._javaCompatVariants = true;
+
+ String language = base.getLanguage();
+ String script = base.getScript();
+ String region = base.getRegion();
+ String variant = base.getVariant();
+
+ String privuseVar = null; // store ill-formed variant subtags
+
+ if (language.length() > 0 && isLanguage(language)) {
+ // Convert a deprecated language code used by Java to
+ // a new code
+ language = canonicalizeLanguage(language);
+ if (language.equals("iw")) {
+ language = "he";
+ } else if (language.equals("ji")) {
+ language = "yi";
+ } else if (language.equals("in")) {
+ language = "id";
+ }
+ tag._language = language;
+ }
+ if (script.length() > 0 && isScript(script)) {
+ tag._script = canonicalizeScript(script);
+ }
+ if (region.length() > 0 && isRegion(region)) {
+ tag._region = canonicalizeRegion(region);
+ }
+ if (variant.length() > 0) {
+ List<String> variants = null;
+ StringTokenIterator varitr = new StringTokenIterator(variant, JAVASEP);
+ while (!varitr.isDone()) {
+ String var = varitr.current();
+ if (!isVariant(var)) {
+ break;
+ }
+ if (variants == null) {
+ variants = new ArrayList<String>();
+ }
+ if (JDKIMPL) {
+ variants.add(var); // Do not canonicalize!
+ } else {
+ variants.add(canonicalizeVariant(var));
+ }
+ varitr.next();
+ }
+ if (variants != null) {
+ tag._variants = variants;
+ }
+ if (!varitr.isDone()) {
+ // ill-formed variant subtags
+ StringBuilder buf = new StringBuilder();
+ while (!varitr.isDone()) {
+ String prvv = varitr.current();
+ if (!isPrivateuseSubtag(prvv)) {
+ // cannot use private use subtag - truncated
+ break;
+ }
+ if (buf.length() > 0) {
+ buf.append(SEP);
+ }
+ if (!JDKIMPL) {
+ prvv = AsciiUtil.toLowerString(prvv);
+ }
+ buf.append(prvv);
+ varitr.next();
+ }
+ if (buf.length() > 0) {
+ privuseVar = buf.toString();
+ }
+ }
+ }
+
+ TreeMap<Character, Extension> extensions = null;
+ String privateuse = null;
+
+ Set<Character> locextKeys = locExts.getKeys();
+ for (Character locextKey : locextKeys) {
+ Extension ext = locExts.getExtension(locextKey);
+ if (ext instanceof PrivateuseExtension) {
+ privateuse = ext.getValue();
+ } else {
+ if (extensions == null) {
+ extensions = new TreeMap<Character, Extension>();
+ }
+ extensions.put(locextKey, ext);
+ }
+ }
+
+ if (extensions != null) {
+ tag._extensions = extensions;
+ }
+
+ // append ill-formed variant subtags to private use
+ if (privuseVar != null) {
+ if (privateuse == null) {
+ privateuse = JAVAVARIANT + SEP + privuseVar;
+ } else {
+ privateuse = privateuse + SEP + JAVAVARIANT + SEP + privuseVar.replace(JAVASEP, SEP);
+ }
+ }
+
+ if (privateuse != null) {
+ tag._privateuse = privateuse;
+ } else if (tag._language.length() == 0) {
+ // use "und" if neither language nor privateuse is available
+ tag._language = UNDETERMINED;
+ }
+
+ return tag;
+ }
+
+ private ParseStatus parseString(String str, boolean javaCompatVar) {
+ // Check if the tag is grandfathered
+ String[] gfmap = GRANDFATHERED.get(new AsciiUtil.CaseInsensitiveKey(str));
+ ParseStatus sts;
+ if (gfmap != null) {
+ _grandfathered = gfmap[0];
+ sts = parseLanguageTag(gfmap[1], javaCompatVar);
+ sts.parseLength = str.length();
+ } else {
+ _grandfathered = "";
+ sts = parseLanguageTag(str, javaCompatVar);
+ }
+ return sts;
+ }
+
+ /*
+ * Parse Language-Tag, except grandfathered.
+ *
+ * BNF in RFC5464
+ *
+ * Language-Tag = langtag ; normal language tags
+ * / privateuse ; private use tag
+ * / grandfathered ; grandfathered tags
+ *
+ *
+ * langtag = language
+ * ["-" script]
+ * ["-" region]
+ * *("-" variant)
+ * *("-" extension)
+ * ["-" privateuse]
+ *
+ * language = 2*3ALPHA ; shortest ISO 639 code
+ * ["-" extlang] ; sometimes followed by
+ * ; extended language subtags
+ * / 4ALPHA ; or reserved for future use
+ * / 5*8ALPHA ; or registered language subtag
+ *
+ * extlang = 3ALPHA ; selected ISO 639 codes
+ * *2("-" 3ALPHA) ; permanently reserved
+ *
+ * script = 4ALPHA ; ISO 15924 code
+ *
+ * region = 2ALPHA ; ISO 3166-1 code
+ * / 3DIGIT ; UN M.49 code
+ *
+ * variant = 5*8alphanum ; registered variants
+ * / (DIGIT 3alphanum)
+ *
+ * extension = singleton 1*("-" (2*8alphanum))
+ *
+ * ; Single alphanumerics
+ * ; "x" reserved for private use
+ * singleton = DIGIT ; 0 - 9
+ * / %x41-57 ; A - W
+ * / %x59-5A ; Y - Z
+ * / %x61-77 ; a - w
+ * / %x79-7A ; y - z
+ *
+ * privateuse = "x" 1*("-" (1*8alphanum))
+ *
+ */
+ private ParseStatus parseLanguageTag(String langtag, boolean javaCompat) {
+ ParseStatus sts = new ParseStatus();
+ StringTokenIterator itr = new StringTokenIterator(langtag, SEP);
+ Parser parser = javaCompat ? JAVA_VARIANT_COMPATIBLE_PARSER : DEFAULT_PARSER;
+
+ _javaCompatVariants = javaCompat;
+
+ // langtag must start with either language or privateuse
+ _language = parser.parseLanguage(itr, sts);
+ if (_language.length() > 0) {
+ _extlangs = parser.parseExtlangs(itr, sts);
+ _script = parser.parseScript(itr, sts);
+ _region = parser.parseRegion(itr, sts);
+ _variants = parser.parseVariants(itr, sts);
+ _extensions = parser.parseExtensions(itr, sts);
+ }
+ _privateuse = parser.parsePrivateuse(itr, sts);
+
+ if (!itr.isDone() && !sts.isError()) {
+ String s = itr.current();
+ sts.errorIndex = itr.currentStart();
+ if (s.length() == 0) {
+ sts.errorMsg = "Empty subtag";
+ } else {
+ sts.errorMsg = "Invalid subtag: " + s;
+ }
+ }
+
+ return sts;
+ }
+
+ public static class ParseStatus {
+ int parseLength = 0;
+ int errorIndex = -1;
+ String errorMsg = null;
+
+ public void reset() {
+ parseLength = 0;
+ errorIndex = -1;
+ errorMsg = null;
+ }
+
+ boolean isError() {
+ return (errorIndex >= 0);
+ }
+ }
+
+ static class Parser {
+ private boolean _javaCompatVar;
+
+ Parser(boolean javaCompatVar) {
+ _javaCompatVar = javaCompatVar;
+ }
+
+ //
+ // Language subtag parsers
+ //
+
+ public String parseLanguage(StringTokenIterator itr, ParseStatus sts) {
+ String language = "";
+
+ if (itr.isDone() || sts.isError()) {
+ return language;
+ }
+
+ String s = itr.current();
+ if (isLanguage(s)) {
+ language = canonicalizeLanguage(s);
+ sts.parseLength = itr.currentEnd();
+ itr.next();
+ }
+ return language;
+ }
+
+ public List<String> parseExtlangs(StringTokenIterator itr, ParseStatus sts) {
+ List<String> extlangs = null;
+
+ if (itr.isDone() || sts.isError()) {
+ return Collections.emptyList();
+ }
+
+ while (!itr.isDone()) {
+ String s = itr.current();
+ if (!isExtlang(s)) {
+ break;
+ }
+ if (extlangs == null) {
+ extlangs = new ArrayList<String>(3);
+ }
+ extlangs.add(canonicalizeExtlang(s));
+ sts.parseLength = itr.currentEnd();
+ itr.next();
+
+ if (extlangs.size() == 3) {
+ // Maximum 3 extlangs
+ break;
+ }
+ }
+
+ if (extlangs == null) {
+ return Collections.emptyList();
+ }
+
+ return extlangs;
+ }
+
+ public String parseScript(StringTokenIterator itr, ParseStatus sts) {
+ String script = "";
+
+ if (itr.isDone() || sts.isError()) {
+ return script;
+ }
+
+ String s = itr.current();
+ if (isScript(s)) {
+ script = canonicalizeScript(s);
+ sts.parseLength = itr.currentEnd();
+ itr.next();
+ }
+
+ return script;
+ }
+
+ public String parseRegion(StringTokenIterator itr, ParseStatus sts) {
+ String region = "";
+
+ if (itr.isDone() || sts.isError()) {
+ return region;
+ }
+
+ String s = itr.current();
+ if (isRegion(s)) {
+ region = canonicalizeRegion(s);
+ sts.parseLength = itr.currentEnd();
+ itr.next();
+ }
+
+ return region;
+ }
+
+ public List<String> parseVariants(StringTokenIterator itr, ParseStatus sts) {
+ List<String> variants = null;
+
+ if (itr.isDone() || sts.isError()) {
+ return Collections.emptyList();
+ }
+
+ while (!itr.isDone()) {
+ String s = itr.current();
+ if (!isVariant(s)) {
+ break;
+ }
+ if (variants == null) {
+ variants = new ArrayList<String>(3);
+ }
+ if (_javaCompatVar) {
+ // preserve casing when Java compatibility option
+ // is enabled
+ variants.add(s);
+ } else {
+ variants.add(canonicalizeVariant(s));
+ }
+ sts.parseLength = itr.currentEnd();
+ itr.next();
+ }
+
+ if (variants == null) {
+ return Collections.emptyList();
+ }
+
+ return variants;
+ }
+
+ public SortedMap<Character, Extension> parseExtensions(StringTokenIterator itr, ParseStatus sts) {
+ SortedMap<Character, Extension> extensionMap = null;
+
+ if (itr.isDone() || sts.isError()) {
+ return EMPTY_EXTENSION_MAP;
+ }
+
+ while (!itr.isDone()) {
+ String s = itr.current();
+ if (!isExtensionSingleton(s)) {
+ break;
+ }
+ if (!itr.hasNext()) {
+ sts.errorIndex = itr.currentStart();
+ sts.errorMsg = "Missing extension subtag for extension :" + s;
+ break;
+ }
+
+ if (extensionMap == null) {
+ extensionMap = new TreeMap<Character, Extension>();
+ }
+
+ String singletonStr = canonicalizeExtensionSingleton(s);
+ Character singleton = Character.valueOf(singletonStr.charAt(0));
+
+ if (extensionMap.containsKey(singleton)) {
+ sts.errorIndex = itr.currentStart();
+ sts.errorMsg = "Duplicated extension: " + s;
+ break;
+ }
+
+ itr.next();
+ Extension ext = Extension.create(singleton.charValue(), itr, sts);
+ if (ext != null) {
+ extensionMap.put(singleton, ext);
+ }
+ if (sts.isError()) {
+ break;
+ }
+ }
+
+ if (extensionMap == null || extensionMap.size() == 0) {
+ return EMPTY_EXTENSION_MAP;
+ }
+
+ return extensionMap;
+ }
+
+ public String parsePrivateuse(StringTokenIterator itr, ParseStatus sts) {
+ String privateuse = "";
+
+ if (itr.isDone() || sts.isError()) {
+ return privateuse;
+ }
+
+ String s = itr.current();
+ if (isPrivateuseSingleton(s)) {
+ StringBuilder buf = new StringBuilder();
+ int singletonOffset = itr.currentStart();
+ boolean preserveCasing = false;
+ itr.next();
+
+ while (!itr.isDone()) {
+ s = itr.current();
+ if (!isPrivateuseSubtag(s)) {
+ break;
+ }
+ if (buf.length() != 0) {
+ buf.append(SEP);
+ }
+ if (!preserveCasing) {
+ s = canonicalizePrivateuseSubtag(s);
+ }
+ buf.append(s);
+ sts.parseLength = itr.currentEnd();
+
+ if (_javaCompatVar && s.equals(JAVAVARIANT)) {
+ // preserve casing after the special
+ // java reserved private use subtag
+ // when java compatibility variant option
+ // is enabled.
+ preserveCasing = true;
+ }
+ itr.next();
+ }
+
+ if (buf.length() == 0) {
+ // need at least 1 private subtag
+ sts.errorIndex = singletonOffset;
+ sts.errorMsg = "Incomplete privateuse";
+ } else {
+ privateuse = buf.toString();
+ }
+ }
+
+ return privateuse;
+ }
+ }
+}
--- a/src/share/classes/sun/util/locale/LocaleExtensions.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/sun/util/locale/LocaleExtensions.java Wed Nov 04 17:57:23 2009 -0500
@@ -1,266 +1,190 @@
-/*
- *******************************************************************************
- * Copyright (C) 2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-package sun.util.locale;
-
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
-public final class LocaleExtensions {
- public static final LocaleExtensions EMPTY_EXTENSIONS = new LocaleExtensions("");
-
- private String _extensions;
- private TreeMap<Character, String> _extMap;
- private TreeMap<String, String> _kwdMap;
-
- private static final String LOCALEEXTSEP = "-";
- private static final String LDMLSINGLETON = "u";
- private static final String PRIVUSE = "x";
- private static final int MINLEN = 3; // minimum length of string representation "x-?"
-
-
- private LocaleExtensions(String extensions) {
- _extensions = extensions == null ? "" : extensions;
- }
-
- public static LocaleExtensions getInstance(String extensions) {
- if (extensions == null || extensions.length() == 0) {
- return EMPTY_EXTENSIONS;
- }
-
- extensions = AsciiUtil.toLowerString(extensions).replaceAll("_", LOCALEEXTSEP);
-
- if (extensions.length() < MINLEN) {
- // malformed extensions - too short
- return new LocaleExtensions(extensions);
- }
-
- TreeMap<Character, String> extMap = null;
- TreeMap<String, String> kwdMap = null;
- boolean bParseFailure = false;
-
- // parse the extension subtags
- String[] subtags = extensions.split(LOCALEEXTSEP);
- String letter = null;
- extMap = new TreeMap<Character, String>();
- StringBuilder buf = new StringBuilder();
- boolean inLocaleKeywords = false;
- boolean inPrivateUse = false;
- String kwkey = null;
-
- for (int i = 0; i < subtags.length; i++) {
- if (subtags[i].length() == 0) {
- // empty subtag
- bParseFailure = true;
- break;
- }
- if (subtags[i].length() == 1 && !inPrivateUse) {
- if (letter != null) {
- // next extension singleton
- if (extMap.containsKey(subtags[i])) {
- // duplicated singleton extension letter
- bParseFailure = true;
- break;
- }
- // write out the previous extension
- if (inLocaleKeywords) {
- if (kwkey != null) {
- // no locale keyword key
- bParseFailure = true;
- break;
- }
- // creating a single string including locale keyword key/type pairs
- keywordsToString(kwdMap, buf);
- inLocaleKeywords = false;
- }
- if (buf.length() == 0) {
- // empty subtag
- bParseFailure = true;
- break;
- }
- extMap.put(Character.valueOf(letter.charAt(0)), buf.toString().intern());
- }
- // preparation for next extension
- if (subtags[i].equals(LDMLSINGLETON)) {
- kwdMap = new TreeMap<String, String>();
- inLocaleKeywords = true;
- } else if (subtags[i].equals(PRIVUSE)) {
- inPrivateUse = true;
- }
- buf.setLength(0);
- letter = subtags[i];
- continue;
- }
- if (inLocaleKeywords) {
- if (kwkey == null) {
- kwkey = subtags[i];
- } else {
- kwdMap.put(kwkey.intern(), subtags[i].intern());
- kwkey = null;
- }
- } else {
- // append an extension/prvate use subtag
- if (buf.length() > 0) {
- buf.append(LOCALEEXTSEP);
- }
- buf.append(subtags[i]);
- }
- }
- if (!bParseFailure) {
- // process the last extension
- if (inLocaleKeywords) {
- if (kwkey != null) {
- bParseFailure = true;
- } else {
- // creating a single string including locale keyword key/type pairs
- keywordsToString(kwdMap, buf);
- }
- }
- if (buf.length() == 0) {
- // empty subtag at the end
- bParseFailure = true;
- } else {
- extMap.put(Character.valueOf(letter.charAt(0)), buf.toString().intern());
- }
- }
-
- if (bParseFailure) {
- // parsing the extension string failed.
- // do not set any partial results in the result.
- return new LocaleExtensions(extensions);
- }
-
- String canonical = extensionsToCanonicalString(extMap);
- LocaleExtensions le = new LocaleExtensions(canonical);
- le._extMap = extMap;
- le._kwdMap = kwdMap;
-
- return le;
- }
-
- // This method assumes extension map and locale keyword map
- // are all in canonicalized format. This method is only used by
- // InternalLocaleBuilder.
- static LocaleExtensions getInstance(TreeMap<Character, String> extMap, TreeMap<String ,String> kwdMap) {
- if (extMap == null) {
- return EMPTY_EXTENSIONS;
- }
- String canonical = extensionsToCanonicalString(extMap);
- LocaleExtensions le = new LocaleExtensions(canonical);
- le._extMap = extMap;
- le._kwdMap = kwdMap;
-
- return le;
- }
-
- public boolean equals(Object obj) {
- return (this == obj) ||
- ((obj instanceof LocaleExtensions) && _extensions == (((LocaleExtensions)obj)._extensions));
- }
-
- public int hashCode() {
- return _extensions.hashCode();
- }
-
- public Set<Character> getExtensionKeys() {
- if (_extMap != null) {
- return Collections.unmodifiableSet(_extMap.keySet());
- }
- return null;
- }
-
- public String getExtensionValue(char key) {
- if (_extMap != null) {
- return _extMap.get(Character.valueOf(AsciiUtil.toLower(key)));
- }
- return null;
- }
-
- public Set<String> getLDMLKeywordKeys() {
- if (_kwdMap != null) {
- return Collections.unmodifiableSet(_kwdMap.keySet());
- }
- return null;
- }
-
- public String getLDMLKeywordType(String key) {
- if (key == null) {
- throw new NullPointerException("LDML key must not be null");
- }
- if (_kwdMap != null) {
- return _kwdMap.get(AsciiUtil.toLowerString(key));
- }
- return null;
- }
-
- public String getCanonicalString() {
- return _extensions;
- }
-
- public String toString() {
- return _extensions;
- }
-
- private static String extensionsToCanonicalString(TreeMap<Character, String> extMap) {
- if (extMap == null || extMap.size() == 0) {
- return "";
- }
- StringBuilder canonicalbuf = new StringBuilder();
- String privUseStr = null;
- if (extMap != null) {
- Set<Map.Entry<Character, String>> entries = extMap.entrySet();
- for (Map.Entry<Character, String> entry : entries) {
- Character key = entry.getKey();
- String value = entry.getValue();
- if (key.charValue() == PRIVUSE.charAt(0)) {
- privUseStr = value;
- continue;
- }
- if (canonicalbuf.length() > 0) {
- canonicalbuf.append(LOCALEEXTSEP);
- }
- canonicalbuf.append(key);
- canonicalbuf.append(LOCALEEXTSEP);
- canonicalbuf.append(value);
- }
- }
- if (privUseStr != null) {
- if (canonicalbuf.length() > 0) {
- canonicalbuf.append(LOCALEEXTSEP);
- }
- canonicalbuf.append(PRIVUSE);
- canonicalbuf.append(LOCALEEXTSEP);
- canonicalbuf.append(privUseStr);
- }
- return canonicalbuf.toString().intern();
- }
-
- static void keywordsToString(TreeMap<String, String> map, StringBuilder buf) {
- Set<Map.Entry<String, String>> entries = map.entrySet();
- for (Map.Entry<String, String> entry : entries) {
- if (buf.length() > 0) {
- buf.append(LOCALEEXTSEP);
- }
- buf.append(entry.getKey());
- buf.append(LOCALEEXTSEP);
- buf.append(entry.getValue());
- }
- }
-
- public static boolean isValidExtensionKey(char key) {
- return AsciiUtil.isAlphaNumeric(key);
- }
-
- public static boolean isValidLDMLKey(String key) {
- return (key.length() == 2) && AsciiUtil.isAlphaNumericString(key);
- }
-
- public static boolean isValidLDMLType(String type) {
- return (type.length() >= 3) && (type.length() <= 8) && AsciiUtil.isAlphaNumericString(type);
- }
-}
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package sun.util.locale;
+
+import java.util.Collections;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+
+import sun.util.locale.LanguageTag.ParseStatus;
+
+
+public class LocaleExtensions {
+
+ private SortedMap<Character, Extension> _map = EMPTY_MAP;
+ private String _id = "";
+
+ private static final SortedMap<Character, Extension> EMPTY_MAP =
+ Collections.unmodifiableSortedMap(new TreeMap<Character, Extension>());
+
+ private static final LocaleObjectCache<String, LocaleExtensions> LOCALEEXTENSIONS_CACHE =
+ new LocaleObjectCache<String, LocaleExtensions>();
+
+
+ public static LocaleExtensions EMPTY_EXTENSIONS = new LocaleExtensions();
+
+ public static final LocaleExtensions CALENDAR_JAPANESE;
+ public static final LocaleExtensions NUMBER_THAI;
+
+ static {
+ CALENDAR_JAPANESE = new LocaleExtensions();
+ CALENDAR_JAPANESE._id = UnicodeLocaleExtension.CA_JAPANESE.getID();
+ CALENDAR_JAPANESE._map = new TreeMap<Character, Extension>();
+ CALENDAR_JAPANESE._map.put(Character.valueOf(UnicodeLocaleExtension.CA_JAPANESE.getKey()), UnicodeLocaleExtension.CA_JAPANESE);
+ LOCALEEXTENSIONS_CACHE.put(CALENDAR_JAPANESE._id, CALENDAR_JAPANESE);
+
+ NUMBER_THAI = new LocaleExtensions();
+ NUMBER_THAI._id = UnicodeLocaleExtension.NU_THAI.getID();
+ NUMBER_THAI._map = new TreeMap<Character, Extension>();
+ NUMBER_THAI._map.put(Character.valueOf(UnicodeLocaleExtension.NU_THAI.getKey()), UnicodeLocaleExtension.NU_THAI);
+ LOCALEEXTENSIONS_CACHE.put(NUMBER_THAI._id, NUMBER_THAI);
+ }
+
+
+ private LocaleExtensions() {
+ }
+
+ public static LocaleExtensions getInstance(String str) throws LocaleSyntaxException {
+ if (str == null || str.length() == 0) {
+ return EMPTY_EXTENSIONS;
+ }
+ LocaleExtensions exts = LOCALEEXTENSIONS_CACHE.get(str);
+ if (exts == null) {
+ StringTokenIterator itr = new StringTokenIterator(str, LanguageTag.SEP);
+ ParseStatus sts = new ParseStatus();
+ TreeMap<Character, Extension> map = new TreeMap<Character, Extension>();
+
+ while (!itr.isDone()) {
+ int startOffset = itr.currentEnd();
+ Extension ext = Extension.create(itr, sts);
+ if (sts.isError()) {
+ throw new LocaleSyntaxException(sts.errorMsg, sts.errorIndex);
+ }
+ if (ext == null) {
+ throw new LocaleSyntaxException("Invalid extension subtag: " + itr.current(), startOffset);
+ }
+
+ Character keyChar = Character.valueOf(ext.getKey());
+ if (map.containsKey(keyChar)) {
+ throw new LocaleSyntaxException("Duplicated extension: " + keyChar, startOffset);
+ }
+
+ map.put(keyChar, ext);
+ }
+
+ String id = toID(map);
+ // check the cache with canonicalized ID
+ exts = LOCALEEXTENSIONS_CACHE.get(id);
+ if (exts == null) {
+ exts = new LocaleExtensions();
+ exts._map = map;
+ exts._id = id;
+
+ exts = LOCALEEXTENSIONS_CACHE.put(id, exts);
+ }
+ }
+ return exts;
+ }
+
+ static LocaleExtensions getInstance(SortedMap<Character, Extension> map) {
+ if (map == null || map.isEmpty()) {
+ return EMPTY_EXTENSIONS;
+ }
+ String id = toID(map);
+ LocaleExtensions exts = LOCALEEXTENSIONS_CACHE.get(id);
+ if (exts == null) {
+ exts = new LocaleExtensions();
+ exts._map = new TreeMap<Character, Extension>(map);
+ exts._id = id;
+
+ exts = LOCALEEXTENSIONS_CACHE.put(id, exts);
+ }
+ return exts;
+ }
+
+ private static String toID(SortedMap<Character, Extension> map) {
+ StringBuilder buf = new StringBuilder();
+ Extension privuse = null;
+ if (map != null && !map.isEmpty()) {
+ Set<Entry<Character, Extension>> entries = map.entrySet();
+ for (Entry<Character, Extension> entry : entries) {
+ Character key = entry.getKey();
+ if (key.charValue() == LanguageTag.PRIVATEUSE.charAt(0)) {
+ privuse = entry.getValue();
+ continue;
+ }
+ if (buf.length() > 0) {
+ buf.append(LanguageTag.SEP);
+ }
+ buf.append(entry.getKey());
+ buf.append(LanguageTag.SEP);
+ buf.append(entry.getValue().getValue());
+ }
+ }
+ if (privuse != null) {
+ if (buf.length() > 0) {
+ buf.append(LanguageTag.SEP);
+ }
+ buf.append(LanguageTag.PRIVATEUSE);
+ buf.append(LanguageTag.SEP);
+ buf.append(privuse.getValue());
+ }
+ return buf.toString();
+ }
+
+ public Set<Character> getKeys() {
+ return Collections.unmodifiableSet(_map.keySet());
+ }
+
+ public Extension getExtension(Character key) {
+ return _map.get(key);
+ }
+
+ public String getExtensionValue(Character key) {
+ Extension ext = _map.get(key);
+ if (ext == null) {
+ return "";
+ }
+ return ext.getValue();
+ }
+
+ public Set<String> getUnicodeLocaleKeys() {
+ Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON));
+ if (ext == null) {
+ return Collections.emptySet();
+ }
+ assert (ext instanceof UnicodeLocaleExtension);
+ return ((UnicodeLocaleExtension)ext).getKeys();
+ }
+
+ public String getUnicodeLocaleType(String unicodeLocaleKey) {
+ Extension ext = _map.get(Character.valueOf(UnicodeLocaleExtension.SINGLETON));
+ if (ext == null) {
+ return "";
+ }
+ assert (ext instanceof UnicodeLocaleExtension);
+ return ((UnicodeLocaleExtension)ext).getType(unicodeLocaleKey);
+ }
+
+ public String toString() {
+ return _id;
+ }
+
+ public String getID() {
+ return _id;
+ }
+
+ public int hashCode() {
+ return _id.hashCode();
+ }
+
+ public static boolean isValidKey(String key) {
+ return LanguageTag.isExtensionSingleton(key) || LanguageTag.isPrivateuseSingleton(key);
+ }
+}
--- a/src/share/classes/sun/util/locale/LocaleObjectCache.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/sun/util/locale/LocaleObjectCache.java Wed Nov 04 17:57:23 2009 -0500
@@ -1,78 +1,78 @@
-/*
- *******************************************************************************
- * Copyright (C) 2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-package sun.util.locale;
-
-import java.lang.ref.Reference;
-import java.lang.ref.ReferenceQueue;
-import java.lang.ref.WeakReference;
-import java.util.concurrent.ConcurrentHashMap;
-
-public class LocaleObjectCache<K, V> {
-
- private ConcurrentHashMap<K, WeakValueRef<V>> _map = new ConcurrentHashMap<K, WeakValueRef<V>>();
- private ReferenceQueue<V> _rq = new ReferenceQueue<V>();
-
- public LocaleObjectCache() {
- }
-
- public V get(Object key) {
- expungeStaleEntries();
- WeakValueRef<V> ref = _map.get(key);
- if (ref != null) {
- return ref.get();
- }
- return null;
- }
-
- /*
- * Unlike Map#put, this method returns non-null value actually
- * in the cache, even no values for the key was not available
- * before.
- */
- public V put(K key, V value) {
- expungeStaleEntries();
- WeakValueRef<V> ref = _map.get(key);
- if (ref != null) {
- // Make sure if another thread put the new value
- V valInCache = ref.get();
- if (valInCache != null) {
- return valInCache;
- }
- }
- // We do not synchronize the internal map here.
- // In the worst case, another thread may put the new
- // value with the same contents, but it should not cause
- // any serious problem.
- _map.put(key, new WeakValueRef<V>(key, value, _rq));
- return value;
- }
-
- private void expungeStaleEntries() {
- Reference<? extends V> val;
- while ((val = _rq.poll()) != null) {
- Object key = ((WeakValueRef<?>)val).getKey();
- _map.remove(key);
- }
- }
-
- private static class WeakValueRef<V> extends WeakReference<V> {
- private Object _key;
-
- public WeakValueRef(Object key, V value, ReferenceQueue<V> rq) {
- super(value, rq);
- _key = key;
- }
-
- public V get() {
- return super.get();
- }
-
- public Object getKey() {
- return _key;
- }
- }
-}
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package sun.util.locale;
+
+import java.lang.ref.Reference;
+import java.lang.ref.ReferenceQueue;
+import java.lang.ref.WeakReference;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class LocaleObjectCache<K, V> {
+
+ private ConcurrentHashMap<K, WeakValueRef<V>> _map = new ConcurrentHashMap<K, WeakValueRef<V>>();
+ private ReferenceQueue<V> _rq = new ReferenceQueue<V>();
+
+ public LocaleObjectCache() {
+ }
+
+ public V get(Object key) {
+ expungeStaleEntries();
+ WeakValueRef<V> ref = _map.get(key);
+ if (ref != null) {
+ return ref.get();
+ }
+ return null;
+ }
+
+ /*
+ * Unlike Map#put, this method returns non-null value actually
+ * in the cache, even no values for the key was not available
+ * before.
+ */
+ public V put(K key, V value) {
+ expungeStaleEntries();
+ WeakValueRef<V> ref = _map.get(key);
+ if (ref != null) {
+ // Make sure if another thread put the new value
+ V valInCache = ref.get();
+ if (valInCache != null) {
+ return valInCache;
+ }
+ }
+ // We do not synchronize the internal map here.
+ // In the worst case, another thread may put the new
+ // value with the same contents, but it should not cause
+ // any serious problem.
+ _map.put(key, new WeakValueRef<V>(key, value, _rq));
+ return value;
+ }
+
+ private void expungeStaleEntries() {
+ Reference<? extends V> val;
+ while ((val = _rq.poll()) != null) {
+ Object key = ((WeakValueRef<?>)val).getKey();
+ _map.remove(key);
+ }
+ }
+
+ private static class WeakValueRef<V> extends WeakReference<V> {
+ private Object _key;
+
+ public WeakValueRef(Object key, V value, ReferenceQueue<V> rq) {
+ super(value, rq);
+ _key = key;
+ }
+
+ public V get() {
+ return super.get();
+ }
+
+ public Object getKey() {
+ return _key;
+ }
+ }
+}
--- a/src/share/classes/sun/util/locale/LocaleSyntaxException.java Wed Oct 21 12:24:07 2009 -0700
+++ b/src/share/classes/sun/util/locale/LocaleSyntaxException.java Wed Nov 04 17:57:23 2009 -0500
@@ -1,27 +1,27 @@
-/*
- *******************************************************************************
- * Copyright (C) 2009, International Business Machines Corporation and *
- * others. All Rights Reserved. *
- *******************************************************************************
- */
-package sun.util.locale;
-
-public class LocaleSyntaxException extends Exception {
-
- private static final long serialVersionUID = 1L;
-
- private int _index = -1;
-
- public LocaleSyntaxException(String msg) {
- this(msg, 0);
- }
-
- public LocaleSyntaxException(String msg, int errorIndex) {
- super(msg);
- _index = errorIndex;
- }
-
- public int getErrorIndex() {
- return _index;
- }
-}
+/*
+ *******************************************************************************
+ * Copyright (C) 2009, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *******************************************************************************
+ */
+package sun.util.locale;
+
+public class LocaleSyntaxException extends Exception {
+
+ private static final long serialVersionUID = 1L;
+
+ private int _index = -1;
+
+ public LocaleSyntaxException(String msg) {
+ this(msg, 0);
+ }
+
+ public LocaleSyntaxException(String msg, int errorIndex) {
+ super(msg);
+ _index = errorIndex;
+ }
+
+ public int getErrorIndex() {
+ return _index;
+ }
+}
--- a/test/java/util/Locale/LocaleEnhanceTest.java Wed Oct 21 12:24:07 2009 -0700
+++ b/test/java/util/Locale/LocaleEnhanceTest.java Wed Nov 04 17:57:23 2009 -0500
@@ -21,16 +21,15 @@
* have any questions.
*/
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
import java.util.Arrays;
-import java.util.ArrayList;
-import java.util.HashMap;
import java.util.IllformedLocaleException;
import java.util.List;
import java.util.Locale;
+import java.util.Set;
import java.util.Locale.Builder;
-import java.util.Map;
-import java.util.Set;
/**
* @test
@@ -70,25 +69,25 @@ public class LocaleEnhanceTest extends L
*/
public void testCreateLocaleCanonicalValid() {
String[] valids = {
- "en-latn-us-newyork", "en_US_NewYork",
- "en-latn-us", "en_US",
- "en-latn-newyork", "en__NewYork", // double underscore
- "en-latn", "en",
- "en-us-newyork", "en_US_NewYork",
- "en-us", "en_US",
- "en-newyork", "en__NewYork", // double underscore
+ "en-Latn-US-NewYork", "en_US_NewYork_#Latn",
+ "en-Latn-US", "en_US_#Latn",
+ "en-Latn-NewYork", "en__NewYork_#Latn", // double underscore
+ "en-Latn", "en_#Latn",
+ "en-US-NewYork", "en_US_NewYork",
+ "en-US", "en_US",
+ "en-NewYork", "en__NewYork", // double underscore
"en", "en",
- "und-latn-us-newyork", "_US_NewYork",
- "und-latn-us", "_US",
- "und-latn-newyork", "", // variant only not supported
- "und-latn", "",
- "und-us-newyork", "_US_NewYork",
- "und-us", "_US",
- "und-newyork", "", // variant only not supported
+ "und-Latn-US-NewYork", "_US_NewYork_#Latn",
+ "und-Latn-US", "_US_#Latn",
+ "und-Latn-NewYork", "", // variant only not supported
+ "und-Latn", "",
+ "und-US-NewYork", "_US_NewYork",
+ "und-US", "_US",
+ "und-NewYork", "", // variant only not supported
"und", ""
};
- Builder builder = new Builder();
+ Builder builder = new Builder(true);
for (int i = 0; i < valids.length; i += 2) {
String tag = valids[i];
@@ -107,7 +106,7 @@ public class LocaleEnhanceTest extends L
.setScript(ids)
.setRegion(idc)
.setVariant(idv)
- .create();
+ .build();
assertEquals(msg + "language", idl, l.getLanguage());
assertEquals(msg + "script", ids, l.getScript());
assertEquals(msg + "country", idc, l.getCountry());
@@ -134,17 +133,17 @@ public class LocaleEnhanceTest extends L
public void testCreateLocaleMultipleVariants() {
String[] valids = {
- "en-latn-us-newer-yorker", "en_US_Newer_Yorker",
- "en-latn-newer-yorker", "en__Newer_Yorker",
- "en-us-newer-yorker", "en_US_Newer_Yorker",
- "en-newer-yorker", "en__Newer_Yorker",
- "und-latn-us-newer-yorker", "_US_Newer_Yorker",
- "und-latn-newer-yorker", "",
- "und-us-newer-yorker", "_US_Newer_Yorker",
- "und-newer-yorker", "",
+ "en-Latn-US-Newer-Yorker", "en_US_Newer_Yorker_#Latn",
+ "en-Latn-Newer-Yorker", "en__Newer_Yorker_#Latn",
+ "en-US-Newer-Yorker", "en_US_Newer_Yorker",
+ "en-Newer-Yorker", "en__Newer_Yorker",
+ "und-Latn-US-Newer-Yorker", "_US_Newer_Yorker_#Latn",
+ "und-Latn-Newer-Yorker", "",
+ "und-US-Newer-Yorker", "_US_Newer_Yorker",
+ "und-Newer-Yorker", "",
};
- Builder builder = new Builder();
+ Builder builder = new Builder(true); // lenient variant
final String idv = "Newer_Yorker";
for (int i = 0; i < valids.length; i += 2) {
@@ -162,9 +161,7 @@ public class LocaleEnhanceTest extends L
.setScript(ids)
.setRegion(idc)
.setVariant(idv)
- .create();
-
- Locale l2 = new Locale(idl, idc, idv);
+ .build();
assertEquals(msg + " language", idl, l.getLanguage());
assertEquals(msg + " script", ids, l.getScript());
@@ -173,7 +170,6 @@ public class LocaleEnhanceTest extends L
assertEquals(msg + "tag", tag, l.toLanguageTag());
assertEquals(msg + "id", id, l.toString());
- assertEquals(msg + "locale", id, l2.toString());
}
catch (IllegalArgumentException e) {
errln(msg + e.getMessage());
@@ -230,26 +226,22 @@ public class LocaleEnhanceTest extends L
*/
public void testCurrentLocales() {
Locale[] locales = java.text.DateFormat.getAvailableLocales();
- Builder builder = new Builder();
-
- Map<Locale, String> exceptionMap = new HashMap<Locale, String>();
- exceptionMap.put(new Locale("ja", "JP", "JP"), "ja-jp-u-ca-japanese");
- exceptionMap.put(new Locale("th", "TH", "TH"), "th-th-u-nu-thai");
- exceptionMap.put(new Locale("no", "NO", "NY"), "nn-no");
+ Builder builder = new Builder(true); // lenient
+
+ Locale noNONY = new Locale("no", "NO", "NY");
for (Locale target : locales) {
String tag = target.toLanguageTag();
- String exceptionTag = exceptionMap.get(target);
- if (exceptionTag != null) {
- assertEquals("exception tag " + target, exceptionTag, tag);
+ if (target.equals(noNONY)) {
+ assertEquals("exception tag " + target, "nn-NO", tag);
} else {
// the tag recreates the original locale
Locale tagResult = Locale.forLanguageTag(tag);
assertEquals("tagResult", target, tagResult);
// the builder also recreates the original locale
- Locale builderResult = builder.setLocale(target).create();
+ Locale builderResult = builder.setLocale(target).build();
assertEquals("builderResult", target, builderResult);
}
}
@@ -318,7 +310,7 @@ public class LocaleEnhanceTest extends L
assertEquals("forLanguageTag", "Latn", locale.getScript());
// Builder normalizes case
- locale = new Builder().setScript("LATN").create();
+ locale = new Builder().setScript("LATN").build();
assertEquals("builder", "Latn", locale.getScript());
// empty string is returned, not null, if there is no script
@@ -329,14 +321,14 @@ public class LocaleEnhanceTest extends L
public void testGetExtension() {
// forLanguageTag does NOT normalize to hyphen
Locale locale = Locale.forLanguageTag("und-a-some_ex-tension");
- assertNull("some_ex-tension", locale.getExtension('a'));
-
- // Builder normalizes to hyphen
- locale = new Builder().setExtension('a', "some_ex-tension").create();
+ assertEquals("some_ex-tension", "", locale.getExtension('a'));
+
+ // regular extension
+ locale = new Builder().setExtension('a', "some-ex-tension").build();
assertEquals("builder", "some-ex-tension", locale.getExtension('a'));
- // returns null if extension is not present
- assertNull("null b", locale.getExtension('b'));
+ // returns empty string if extension is not present
+ assertEquals("empty b", "", locale.getExtension('b'));
// throws exception if extension tag is illegal
new ExpectIAE() { public void call() { Locale.forLanguageTag("").getExtension('\uD800'); }};
@@ -361,38 +353,37 @@ public class LocaleEnhanceTest extends L
// ok
}
- // returns null if no extensions
- // ??? would empty set be better?
+ // returns empty set if no extensions
locale = Locale.forLanguageTag("und");
- assertNull("null result", locale.getExtensionKeys());
- }
-
- public void testGetLDMLExtensionValue() {
+ assertTrue("empty result", locale.getExtensionKeys().isEmpty());
+ }
+
+ public void testGetUnicodeLocaleType() {
Locale locale = Locale.forLanguageTag("und-u-co-japanese-nu-thai");
- assertEquals("collation", "japanese", locale.getLDMLExtensionValue("co"));
- assertEquals("numbers", "thai", locale.getLDMLExtensionValue("nu"));
-
- // LDML extension key is case insensitive
- assertEquals("key case", "japanese", locale.getLDMLExtensionValue("Co"));
+ assertEquals("collation", "japanese", locale.getUnicodeLocaleType("co"));
+ assertEquals("numbers", "thai", locale.getUnicodeLocaleType("nu"));
+
+ // Unicode locale extension key is case insensitive
+ assertEquals("key case", "japanese", locale.getUnicodeLocaleType("Co"));
// if keyword is not present, returns null
- assertNull("locale keyword not present", locale.getLDMLExtensionValue("xx"));
+ assertEquals("locale keyword not present", "", locale.getUnicodeLocaleType("xx"));
// if no locale extension is set, returns null
- locale = locale.forLanguageTag("und");
- assertNull("locale extension not present", locale.getLDMLExtensionValue("co"));