changeset 12244:a5fdf2f30c6e

8164612: NoSuchMethodException when method name contains NULL or Latin-1 supplement character Summary: String length needs to be updated when converting from unicode to utf8. Reviewed-by: kvn, coleenp
author thartmann
date Wed, 26 Oct 2016 14:36:05 +0200
parents 572f4f11a35f
children 1ca21e468b1b
files src/share/vm/classfile/stringTable.cpp src/share/vm/prims/jvmtiEnv.cpp src/share/vm/utilities/utf8.cpp src/share/vm/utilities/utf8.hpp test/runtime/CompactStrings/TestMethodNames.java
diffstat 5 files changed, 82 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/classfile/stringTable.cpp	Mon May 23 19:46:43 2016 +0200
+++ b/src/share/vm/classfile/stringTable.cpp	Wed Oct 26 14:36:05 2016 +0200
@@ -437,17 +437,15 @@
           st->print("%d: ", length);
         } else {
           ResourceMark rm(THREAD);
-          int utf8_length;
+          int utf8_length = length;
           char* utf8_string;
 
           if (!is_latin1) {
             jchar* chars = value->char_at_addr(0);
-            utf8_length = UNICODE::utf8_length(chars, length);
-            utf8_string = UNICODE::as_utf8(chars, length);
+            utf8_string = UNICODE::as_utf8(chars, utf8_length);
           } else {
             jbyte* bytes = value->byte_at_addr(0);
-            utf8_length = UNICODE::utf8_length(bytes, length);
-            utf8_string = UNICODE::as_utf8(bytes, length);
+            utf8_string = UNICODE::as_utf8(bytes, utf8_length);
           }
 
           st->print("%d: ", utf8_length);
--- a/src/share/vm/prims/jvmtiEnv.cpp	Mon May 23 19:46:43 2016 +0200
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Wed Oct 26 14:36:05 2016 +0200
@@ -1001,7 +1001,8 @@
     if (name() != NULL) {
       n = java_lang_String::as_utf8_string(name());
     } else {
-      n = UNICODE::as_utf8((jchar*) NULL, 0);
+      int utf8_length = 0;
+      n = UNICODE::as_utf8((jchar*) NULL, utf8_length);
     }
 
     info_ptr->name = (char *) jvmtiMalloc(strlen(n)+1);
--- a/src/share/vm/utilities/utf8.cpp	Mon May 23 19:46:43 2016 +0200
+++ b/src/share/vm/utilities/utf8.cpp	Wed Oct 26 14:36:05 2016 +0200
@@ -411,61 +411,46 @@
 }
 
 int UNICODE::utf8_size(jchar c) {
-  if ((0x0001 <= c) && (c <= 0x007F)) return 1;
-  if (c <= 0x07FF) return 2;
-  return 3;
+  if ((0x0001 <= c) && (c <= 0x007F)) {
+    // ASCII character
+    return 1;
+  } else  if (c <= 0x07FF) {
+    return 2;
+  } else {
+    return 3;
+  }
 }
 
 int UNICODE::utf8_size(jbyte c) {
-  if (c >= 0x0001) return 1;
-  return 2;
+  if (c >= 0x01) {
+    // ASCII character. Check is equivalent to
+    // (0x01 <= c) && (c <= 0x7F) because c is signed.
+    return 1;
+  } else {
+    // Non-ASCII character or 0x00 which needs to be
+    // two-byte encoded as 0xC080 in modified UTF-8.
+    return 2;
+  }
 }
 
-int UNICODE::utf8_length(jchar* base, int length) {
+template<typename T>
+int UNICODE::utf8_length(T* base, int length) {
   int result = 0;
   for (int index = 0; index < length; index++) {
-    jchar c = base[index];
-    if ((0x0001 <= c) && (c <= 0x007F)) result += 1;
-    else if (c <= 0x07FF) result += 2;
-    else result += 3;
-  }
-  return result;
-}
-
-int UNICODE::utf8_length(jbyte* base, int length) {
-  int result = 0;
-  for (int index = 0; index < length; index++) {
-    jbyte c = base[index];
+    T c = base[index];
     result += utf8_size(c);
   }
   return result;
 }
 
-char* UNICODE::as_utf8(jchar* base, int length) {
+template<typename T>
+char* UNICODE::as_utf8(T* base, int& length) {
   int utf8_len = utf8_length(base, length);
   u_char* buf = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
   char* result = as_utf8(base, length, (char*) buf, utf8_len + 1);
   assert((int) strlen(result) == utf8_len, "length prediction must be correct");
-  return result;
-}
-
-char* UNICODE::as_utf8(jbyte* base, int length) {
-  int utf8_len = utf8_length(base, length);
-  u_char* result = NEW_RESOURCE_ARRAY(u_char, utf8_len + 1);
-  u_char* p = result;
-  if (utf8_len == length) {
-    for (int index = 0; index < length; index++) {
-      *p++ = base[index];
-    }
-  } else {
-    // Unicode string contains U+0000 which should
-    // be encoded as 0xC080 in "modified" UTF8.
-    for (int index = 0; index < length; index++) {
-      p = utf8_write(p, ((jchar) base[index]) & 0xff);
-    }
-  }
-  *p = '\0';
-  assert(p == &result[utf8_len], "length prediction must be correct");
+  // Set string length to uft8 length
+  length = utf8_len;
   return (char*) result;
 }
 
@@ -490,9 +475,10 @@
     buflen -= sz;
     if (buflen <= 0) break; // string is truncated
     if (sz == 1) {
+      // Copy ASCII characters (UTF-8 is ASCII compatible)
       *p++ = c;
     } else {
-      // Unicode string contains U+0000 which should
+      // Non-ASCII character or 0x00 which should
       // be encoded as 0xC080 in "modified" UTF8.
       p = utf8_write(p, ((jchar) c) & 0xff);
     }
@@ -543,6 +529,10 @@
 }
 
 // Explicit instantiation for all supported types.
+template int UNICODE::utf8_length(jbyte* base, int length);
+template int UNICODE::utf8_length(jchar* base, int length);
+template char* UNICODE::as_utf8(jbyte* base, int& length);
+template char* UNICODE::as_utf8(jchar* base, int& length);
 template int UNICODE::quoted_ascii_length<jbyte>(jbyte* base, int length);
 template int UNICODE::quoted_ascii_length<jchar>(jchar* base, int length);
 template void UNICODE::as_quoted_ascii<jbyte>(const jbyte* base, int length, char* buf, int buflen);
--- a/src/share/vm/utilities/utf8.hpp	Mon May 23 19:46:43 2016 +0200
+++ b/src/share/vm/utilities/utf8.hpp	Wed Oct 26 14:36:05 2016 +0200
@@ -97,16 +97,15 @@
   static int utf8_size(jbyte c);
 
   // returns the utf8 length of a unicode string
-  static int utf8_length(jchar* base, int length);
-  static int utf8_length(jbyte* base, int length);
+  template<typename T> static int utf8_length(T* base, int length);
 
   // converts a unicode string to utf8 string
   static void convert_to_utf8(const jchar* base, int length, char* utf8_buffer);
 
   // converts a unicode string to a utf8 string; result is allocated
-  // in resource area unless a buffer is provided.
-  static char* as_utf8(jchar* base, int length);
-  static char* as_utf8(jbyte* base, int length);
+  // in resource area unless a buffer is provided. The unicode 'length'
+  // parameter is set to the length of the result utf8 string.
+  template<typename T> static char* as_utf8(T* base, int& length);
   static char* as_utf8(jchar* base, int length, char* buf, int buflen);
   static char* as_utf8(jbyte* base, int length, char* buf, int buflen);
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/CompactStrings/TestMethodNames.java	Wed Oct 26 14:36:05 2016 +0200
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import javax.script.*;
+import java.util.function.*;
+
+/*
+ * @test
+ * @bug 8164612
+ * @summary Test method names containing Latin-1 supplement characters.
+ * @run main/othervm -XX:+CompactStrings TestMethodNames
+ * @run main/othervm -XX:-CompactStrings TestMethodNames
+ */
+public class TestMethodNames {
+    public static void main(String[] args) throws Exception {
+        ScriptEngineManager m = new ScriptEngineManager();
+        ScriptEngine e = m.getEngineByName("nashorn");
+
+        e.eval("({get \"\0\"(){}})[\"\0\"]");
+        e.eval("({get \"\\x80\"(){}})[\"\\x80\"]");
+        e.eval("({get \"\\xff\"(){}})[\"\\xff\"]");
+    }
+}