diff jdk/src/java.base/macosx/native/libjava/java_props_macosx.c @ 45829:991121499947

8160199: Language's script should be reflected in user.script on Mac OS X Reviewed-by: bchristi, sherman
author naoto
date Mon, 03 Jul 2017 12:54:24 -0700
parents f1398789e771
children
line wrap: on
line diff
--- a/jdk/src/java.base/macosx/native/libjava/java_props_macosx.c	Thu Aug 24 16:25:08 2017 +0200
+++ b/jdk/src/java.base/macosx/native/libjava/java_props_macosx.c	Mon Jul 03 12:54:24 2017 -0700
@@ -47,6 +47,7 @@
 #define LOCALEIDLENGTH  128
 char *getMacOSXLocale(int cat) {
     const char* retVal = NULL;
+    char localeString[LOCALEIDLENGTH];
 
     switch (cat) {
     case LC_MESSAGES:
@@ -74,73 +75,114 @@
             }
             CFRelease(languages);
 
+            // Explicitly supply region, if there is none
+            char *hyphenPos = strchr(languageString, '-');
+            int langStrLen = strlen(languageString);
+
+            if (hyphenPos == NULL || // languageString contains ISO639 only, e.g., "en"
+                languageString + langStrLen - hyphenPos == 5) { // ISO639-ScriptCode, e.g., "en-Latn"
+                CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
+                               localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding());
+                char *underscorePos = strrchr(localeString, '_');
+                char *region = NULL;
+
+                if (underscorePos != NULL) {
+                    region = underscorePos + 1;
+                }
+
+                if (region != NULL) {
+                    strcat(languageString, "-");
+                    strcat(languageString, region);
+                }
+            }
+
             retVal = languageString;
-
-            // Special case for Portuguese in Brazil:
-            // The language code needs the "_BR" region code (to distinguish it
-            // from Portuguese in Portugal), but this is missing when using the
-            // "Portuguese (Brazil)" language.
-            // If language is "pt" and the current locale is pt_BR, return pt_BR.
-            char localeString[LOCALEIDLENGTH];
-            if (strcmp(retVal, "pt") == 0 &&
-                    CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
-                                       localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding()) &&
-                    strcmp(localeString, "pt_BR") == 0) {
-                retVal = localeString;
-            }
         }
         break;
+
     default:
         {
-            char localeString[LOCALEIDLENGTH];
             if (!CFStringGetCString(CFLocaleGetIdentifier(CFLocaleCopyCurrent()),
                                     localeString, LOCALEIDLENGTH, CFStringGetSystemEncoding())) {
                 return NULL;
             }
+
             retVal = localeString;
         }
         break;
     }
 
     if (retVal != NULL) {
-        // Language IDs use the language designators and (optional) region
-        // and script designators of BCP 47.  So possible formats are:
-        //
-        // "en"         (language designator only)
-        // "haw"        (3-letter lanuage designator)
-        // "en-GB"      (language with alpha-2 region designator)
-        // "es-419"     (language with 3-digit UN M.49 area code)
-        // "zh-Hans"    (language with ISO 15924 script designator)
-        // "zh-Hans-US"  (language with ISO 15924 script designator and region)
-        // "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
-        //
-        // In the case of region designators (alpha-2 and/or UN M.49), we convert
-        // to our locale string format by changing '-' to '_'.  That is, if
-        // the '-' is followed by fewer than 4 chars.
-        char* scriptOrRegion = strchr(retVal, '-');
-        if (scriptOrRegion != NULL) {
-            int length = strlen(scriptOrRegion);
-            if (length > 5) {
-                // Region and script both exist. Honor the script for now
-                scriptOrRegion[5] = '\0';
-            } else if (length < 5) {
-                *scriptOrRegion = '_';
+        return strdup(convertToPOSIXLocale(retVal));
+    }
 
-                assert((length == 3 &&
-                    // '-' followed by a 2 character region designator
-                      isalpha(scriptOrRegion[1]) &&
-                      isalpha(scriptOrRegion[2])) ||
-                       (length == 4 &&
-                    // '-' followed by a 3-digit UN M.49 area code
-                      isdigit(scriptOrRegion[1]) &&
-                      isdigit(scriptOrRegion[2]) &&
-                      isdigit(scriptOrRegion[3])));
-            }
+    return NULL;
+}
+
+/* Language IDs use the language designators and (optional) region
+ * and script designators of BCP 47.  So possible formats are:
+ *
+ * "en"         (language designator only)
+ * "haw"        (3-letter lanuage designator)
+ * "en-GB"      (language with alpha-2 region designator)
+ * "es-419"     (language with 3-digit UN M.49 area code)
+ * "zh-Hans"    (language with ISO 15924 script designator)
+ * "zh-Hans-US"  (language with ISO 15924 script designator and region)
+ * "zh-Hans-419" (language with ISO 15924 script designator and UN M.49)
+ *
+ * convert these tags into POSIX conforming locale string, i.e.,
+ * lang{_region}{@script}. e.g., for "zh-Hans-US" into "zh_US@Hans"
+ */
+const char * convertToPOSIXLocale(const char* src) {
+    char* scriptRegion = strchr(src, '-');
+    if (scriptRegion != NULL) {
+        int length = strlen(scriptRegion);
+        char* region = strchr(scriptRegion + 1, '-');
+        char* atMark = NULL;
+
+        if (region == NULL) {
+            // CFLocaleGetIdentifier() returns '_' before region
+            region = strchr(scriptRegion + 1, '_');
         }
 
-        return strdup(retVal);
+        *scriptRegion = '_';
+        if (length > 5) {
+            // Region and script both exist.
+            char tmpScript[4];
+            int regionLength = length - 6;
+            atMark = scriptRegion + 1 + regionLength;
+            memcpy(tmpScript, scriptRegion + 1, 4);
+            memmove(scriptRegion + 1, region + 1, regionLength);
+            memcpy(atMark + 1, tmpScript, 4);
+        } else if (length == 5) {
+            // script only
+            atMark = scriptRegion;
+        }
+
+        if (atMark != NULL) {
+            *atMark = '@';
+
+            // assert script code
+            assert(isalpha(atMark[1]) &&
+                   isalpha(atMark[2]) &&
+                   isalpha(atMark[3]) &&
+                   isalpha(atMark[4]));
+        }
+
+        assert(((length == 3 || length == 8) &&
+            // '_' followed by a 2 character region designator
+                isalpha(scriptRegion[1]) &&
+                isalpha(scriptRegion[2])) ||
+                ((length == 4 || length == 9) &&
+            // '_' followed by a 3-digit UN M.49 area code
+                isdigit(scriptRegion[1]) &&
+                isdigit(scriptRegion[2]) &&
+                isdigit(scriptRegion[3])) ||
+            // '@' followed by a 4 character script code (already validated above)
+                (length == 5));
     }
-    return NULL;
+
+    return src;
 }
 
 char *setupMacOSXLocale(int cat) {