changeset 32581:632402f18fe6

8132081: C2 support for Adler32 on SPARC Summary: Add C2 instrinsic support for Adler32 checksum on SPARC. Reviewed-by: kvn Contributed-by: ahmed.khawaja@oracle.com
author kvn
date Thu, 03 Sep 2015 15:03:12 -0700
parents b4817ffce063
children 56619bb8bcaa
files hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp hotspot/src/cpu/x86/vm/vm_version_x86.cpp hotspot/src/share/vm/classfile/vmSymbols.cpp hotspot/src/share/vm/classfile/vmSymbols.hpp hotspot/src/share/vm/opto/c2compiler.cpp hotspot/src/share/vm/opto/escape.cpp hotspot/src/share/vm/opto/library_call.cpp hotspot/src/share/vm/opto/runtime.cpp hotspot/src/share/vm/opto/runtime.hpp hotspot/src/share/vm/runtime/globals.hpp hotspot/src/share/vm/runtime/stubRoutines.cpp hotspot/src/share/vm/runtime/stubRoutines.hpp hotspot/test/compiler/intrinsics/adler32/TestAdler32.java
diffstat 17 files changed, 565 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -177,6 +177,12 @@
   if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) {
     warning("UseCRC32 specified, but not supported on this CPU");
   }
+
+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
   if (auxv & HWCAP_AES) {
     UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
     UseAESIntrinsics =
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -200,6 +200,11 @@
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
     UseMultiplyToLenIntrinsic = true;
   }
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -5110,6 +5110,188 @@
     return start;
   }
 
+#define ADLER32_NUM_TEMPS 16
+
+  /**
+   *  Arguments:
+   *
+   * Inputs:
+   *   O0   - int   adler
+   *   O1   - byte* buff
+   *   O2   - int   len
+   *
+   * Output:
+   *   O0   - int adler result
+   */
+  address generate_updateBytesAdler32() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
+    address start = __ pc();
+
+    Label L_cleanup_loop, L_cleanup_loop_check;
+    Label L_main_loop_check, L_main_loop, L_inner_loop, L_inner_loop_check;
+    Label L_nmax_check_done;
+
+    // Aliases
+    Register s1     = O0;
+    Register s2     = O3;
+    Register buff   = O1;
+    Register len    = O2;
+    Register temp[ADLER32_NUM_TEMPS] = {L0, L1, L2, L3, L4, L5, L6, L7, I0, I1, I2, I3, I4, I5, G3, I7};
+
+    // Max number of bytes we can process before having to take the mod
+    // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+    unsigned long NMAX = 0x15B0;
+
+    // Zero-out the upper bits of len
+    __ clruwu(len);
+
+    // Create the mask 0xFFFF
+    __ set64(0x00FFFF, O4, O5); // O5 is the temp register
+
+    // s1 is initialized to the lower 16 bits of adler
+    // s2 is initialized to the upper 16 bits of adler
+    __ srlx(O0, 16, O5); // adler >> 16
+    __ and3(O0, O4, s1); // s1  = (adler & 0xFFFF)
+    __ and3(O5, O4, s2); // s2  = ((adler >> 16) & 0xFFFF)
+
+    // The pipelined loop needs at least 16 elements for 1 iteration
+    // It does check this, but it is more effective to skip to the cleanup loop
+    // Setup the constant for cutoff checking
+    __ mov(15, O4);
+
+    // Check if we are above the cutoff, if not go to the cleanup loop immediately
+    __ cmp_and_br_short(len, O4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_loop_check);
+
+    // Free up some registers for our use
+    for (int i = 0; i < ADLER32_NUM_TEMPS; i++) {
+      __ movxtod(temp[i], as_FloatRegister(2*i));
+    }
+
+    // Loop maintenance stuff is done at the end of the loop, so skip to there
+    __ ba_short(L_main_loop_check);
+
+    __ BIND(L_main_loop);
+
+    // Prologue for inner loop
+    __ ldub(buff, 0, L0);
+    __ dec(O5);
+
+    for (int i = 1; i < 8; i++) {
+      __ ldub(buff, i, temp[i]);
+    }
+
+    __ inc(buff, 8);
+
+    // Inner loop processes 16 elements at a time, might never execute if only 16 elements
+    // to be processed by the outter loop
+    __ ba_short(L_inner_loop_check);
+
+    __ BIND(L_inner_loop);
+
+    for (int i = 0; i < 8; i++) {
+      __ ldub(buff, (2*i), temp[(8+(2*i)) % ADLER32_NUM_TEMPS]);
+      __ add(s1, temp[i], s1);
+      __ ldub(buff, (2*i)+1, temp[(8+(2*i)+1) % ADLER32_NUM_TEMPS]);
+      __ add(s2, s1, s2);
+    }
+
+    // Original temp 0-7 used and new loads to temp 0-7 issued
+    // temp 8-15 ready to be consumed
+    __ add(s1, I0, s1);
+    __ dec(O5);
+    __ add(s2, s1, s2);
+    __ add(s1, I1, s1);
+    __ inc(buff, 16);
+    __ add(s2, s1, s2);
+
+    for (int i = 0; i < 6; i++) {
+      __ add(s1, temp[10+i], s1);
+      __ add(s2, s1, s2);
+    }
+
+    __ BIND(L_inner_loop_check);
+    __ nop();
+    __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_inner_loop);
+
+    // Epilogue
+    for (int i = 0; i < 4; i++) {
+      __ ldub(buff, (2*i), temp[8+(2*i)]);
+      __ add(s1, temp[i], s1);
+      __ ldub(buff, (2*i)+1, temp[8+(2*i)+1]);
+      __ add(s2, s1, s2);
+    }
+
+    __ add(s1, temp[4], s1);
+    __ inc(buff, 8);
+
+    for (int i = 0; i < 11; i++) {
+      __ add(s2, s1, s2);
+      __ add(s1, temp[5+i], s1);
+    }
+
+    __ add(s2, s1, s2);
+
+    // Take the mod for s1 and s2
+    __ set64(0xFFF1, L0, L1);
+    __ udivx(s1, L0, L1);
+    __ udivx(s2, L0, L2);
+    __ mulx(L0, L1, L1);
+    __ mulx(L0, L2, L2);
+    __ sub(s1, L1, s1);
+    __ sub(s2, L2, s2);
+
+    // Make sure there is something left to process
+    __ BIND(L_main_loop_check);
+    __ set64(NMAX, L0, L1);
+    // k = len < NMAX ? len : NMAX
+    __ cmp_and_br_short(len, L0, Assembler::greaterEqualUnsigned, Assembler::pt, L_nmax_check_done);
+    __ andn(len, 0x0F, L0); // only loop a multiple of 16 times
+    __ BIND(L_nmax_check_done);
+    __ mov(L0, O5);
+    __ sub(len, L0, len); // len -= k
+
+    __ srlx(O5, 4, O5); // multiplies of 16
+    __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_main_loop);
+
+    // Restore anything we used, take the mod one last time, combine and return
+    // Restore any registers we saved
+    for (int i = 0; i < ADLER32_NUM_TEMPS; i++) {
+      __ movdtox(as_FloatRegister(2*i), temp[i]);
+    }
+
+    // There might be nothing left to process
+    __ ba_short(L_cleanup_loop_check);
+
+    __ BIND(L_cleanup_loop);
+    __ ldub(buff, 0, O4); // load single byte form buffer
+    __ inc(buff); // buff++
+    __ add(s1, O4, s1); // s1 += *buff++;
+    __ dec(len); // len--
+    __ add(s1, s2, s2); // s2 += s1;
+    __ BIND(L_cleanup_loop_check);
+    __ nop();
+    __ cmp_and_br_short(len, 0, Assembler::notEqual, Assembler::pt, L_cleanup_loop);
+
+    // Take the mod one last time
+    __ set64(0xFFF1, O1, O2);
+    __ udivx(s1, O1, O2);
+    __ udivx(s2, O1, O5);
+    __ mulx(O1, O2, O2);
+    __ mulx(O1, O5, O5);
+    __ sub(s1, O2, s1);
+    __ sub(s2, O5, s2);
+
+    // Combine lower bits and higher bits
+    __ sllx(s2, 16, s2); // s2 = s2 << 16
+    __ or3(s1, s2, s1);  // adler = s2 | s1
+    // Final return value is in O0
+    __ retl();
+    __ delayed()->nop();
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -5206,6 +5388,11 @@
     if (UseCRC32CIntrinsics) {
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
     }
+
+    // generate Adler32 intrinsics code
+    if (UseAdler32Intrinsics) {
+      StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
+    }
   }
 
 
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Thu Sep 03 15:03:12 2015 -0700
@@ -41,7 +41,7 @@
 enum /* platform_dependent_constants */ {
   // %%%%%%%% May be able to shrink this a lot
   code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 24000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 27000            // simply increase if too small (assembler will crash if too small)
 };
 
 class Sparc {
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -377,6 +377,15 @@
     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }
 
+  if (UseVIS > 2) {
+    if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
+      FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
+    }
+  } else if (UseAdler32Intrinsics) {
+    warning("SPARC Adler32 intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -714,6 +714,11 @@
     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }
 
+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  }
+
   // Adjust RTM (Restricted Transactional Memory) flags
   if (!supports_rtm() && UseRTMLocking) {
     // Can't continue because UseRTMLocking affects UseBiasedLocking flag
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -625,6 +625,10 @@
   case vmIntrinsics::_updateDirectByteBufferCRC32C:
     if (!UseCRC32CIntrinsics) return true;
     break;
+  case vmIntrinsics::_updateBytesAdler32:
+  case vmIntrinsics::_updateByteBufferAdler32:
+    if (!UseAdler32Intrinsics) return true;
+    break;
   case vmIntrinsics::_copyMemory:
     if (!InlineArrayCopy || !InlineUnsafeOps) return true;
     break;
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp	Thu Sep 03 15:03:12 2015 -0700
@@ -927,6 +927,12 @@
   do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_C_name, updateByteBuffer_signature, F_S) \
    do_name(    updateDirectByteBuffer_C_name,                     "updateDirectByteBuffer")                             \
                                                                                                                         \
+   /* support for java.util.zip.Adler32 */                                                                              \
+  do_class(java_util_zip_Adler32,        "java/util/zip/Adler32")                                                       \
+  do_intrinsic(_updateBytesAdler32,       java_util_zip_Adler32,  updateBytes_C_name,  updateBytes_signature,  F_SN)    \
+  do_intrinsic(_updateByteBufferAdler32,  java_util_zip_Adler32,  updateByteBuffer_A_name,  updateByteBuffer_signature,  F_SN) \
+   do_name(     updateByteBuffer_A_name,                          "updateByteBuffer")                                   \
+                                                                                                                        \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                         \
--- a/hotspot/src/share/vm/opto/c2compiler.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -419,6 +419,8 @@
   case vmIntrinsics::_updateByteBufferCRC32:
   case vmIntrinsics::_updateBytesCRC32C:
   case vmIntrinsics::_updateDirectByteBufferCRC32C:
+  case vmIntrinsics::_updateBytesAdler32:
+  case vmIntrinsics::_updateByteBufferAdler32:
   case vmIntrinsics::_profileBoolean:
   case vmIntrinsics::_isCompileConstant:
     break;
--- a/hotspot/src/share/vm/opto/escape.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/escape.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -963,6 +963,7 @@
                   strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "updateBytesAdler32") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
--- a/hotspot/src/share/vm/opto/library_call.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -296,6 +296,8 @@
   Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class);
   bool inline_updateBytesCRC32C();
   bool inline_updateDirectByteBufferCRC32C();
+  bool inline_updateBytesAdler32();
+  bool inline_updateByteBufferAdler32();
   bool inline_multiplyToLen();
   bool inline_squareToLen();
   bool inline_mulAdd();
@@ -699,6 +701,11 @@
   case vmIntrinsics::_updateDirectByteBufferCRC32C:
     return inline_updateDirectByteBufferCRC32C();
 
+  case vmIntrinsics::_updateBytesAdler32:
+    return inline_updateBytesAdler32();
+  case vmIntrinsics::_updateByteBufferAdler32:
+    return inline_updateByteBufferAdler32();
+
   case vmIntrinsics::_profileBoolean:
     return inline_profileBoolean();
   case vmIntrinsics::_isCompileConstant:
@@ -5547,6 +5554,87 @@
   return true;
 }
 
+//------------------------------inline_updateBytesAdler32----------------------
+//
+// Calculate Adler32 checksum for byte[] array.
+// int java.util.zip.Adler32.updateBytes(int crc, byte[] buf, int off, int len)
+//
+bool LibraryCallKit::inline_updateBytesAdler32() {
+  assert(UseAdler32Intrinsics, "Adler32 Instrinsic support need"); // check if we actually need to check this flag or check a different one
+  assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters");
+  assert(callee()->holder()->is_loaded(), "Adler32 class must be loaded");
+  // no receiver since it is static method
+  Node* crc     = argument(0); // type: int
+  Node* src     = argument(1); // type: oop
+  Node* offset  = argument(2); // type: int
+  Node* length  = argument(3); // type: int
+
+  const Type* src_type = src->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem != T_BYTE) {
+    return false;
+  }
+
+  // 'src_start' points to src array + scaled offset
+  Node* src_start = array_element_address(src, offset, src_elem);
+
+  // We assume that range check is done by caller.
+  // TODO: generate range check (offset+length < src.length) in debug VM.
+
+  // Call the stub.
+  address stubAddr = StubRoutines::updateBytesAdler32();
+  const char *stubName = "updateBytesAdler32";
+
+  Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesAdler32_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 crc, src_start, length);
+  Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+  return true;
+}
+
+//------------------------------inline_updateByteBufferAdler32---------------
+//
+// Calculate Adler32 checksum for DirectByteBuffer.
+// int java.util.zip.Adler32.updateByteBuffer(int crc, long buf, int off, int len)
+//
+bool LibraryCallKit::inline_updateByteBufferAdler32() {
+  assert(UseAdler32Intrinsics, "Adler32 Instrinsic support need"); // check if we actually need to check this flag or check a different one
+  assert(callee()->signature()->size() == 5, "updateByteBuffer has 4 parameters and one is long");
+  assert(callee()->holder()->is_loaded(), "Adler32 class must be loaded");
+  // no receiver since it is static method
+  Node* crc     = argument(0); // type: int
+  Node* src     = argument(1); // type: long
+  Node* offset  = argument(3); // type: int
+  Node* length  = argument(4); // type: int
+
+  src = ConvL2X(src);  // adjust Java long to machine word
+  Node* base = _gvn.transform(new CastX2PNode(src));
+  offset = ConvI2X(offset);
+
+  // 'src_start' points to src array + scaled offset
+  Node* src_start = basic_plus_adr(top(), base, offset);
+
+  // Call the stub.
+  address stubAddr = StubRoutines::updateBytesAdler32();
+  const char *stubName = "updateBytesAdler32";
+
+  Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesAdler32_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 crc, src_start, length);
+
+  Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+  return true;
+}
+
 //----------------------------inline_reference_get----------------------------
 // public T java.lang.ref.Reference.get();
 bool LibraryCallKit::inline_reference_get() {
--- a/hotspot/src/share/vm/opto/runtime.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -921,6 +921,28 @@
   return TypeFunc::make(domain, range);
 }
 
+/**
+*  int updateBytesAdler32(int adler, bytes* b, int off, int len)
+*/
+const TypeFunc* OptoRuntime::updateBytesAdler32_Type() {
+  // create input type (domain)
+  int num_args      = 3;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypeInt::INT;        // crc
+  fields[argp++] = TypePtr::NOTNULL;    // src + offset
+  fields[argp++] = TypeInt::INT;        // len
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  return TypeFunc::make(domain, range);
+}
+
 // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
 const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
   // create input type (domain)
--- a/hotspot/src/share/vm/opto/runtime.hpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/runtime.hpp	Thu Sep 03 15:03:12 2015 -0700
@@ -331,6 +331,8 @@
   static const TypeFunc* updateBytesCRC32_Type();
   static const TypeFunc* updateBytesCRC32C_Type();
 
+  static const TypeFunc* updateBytesAdler32_Type();
+
   // leaf on stack replacement interpreter accessor types
   static const TypeFunc* osr_end_Type();
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Sep 03 15:03:12 2015 -0700
@@ -836,6 +836,9 @@
   product(bool, UseCRC32CIntrinsics, false,                                 \
           "use intrinsics for java.util.zip.CRC32C")                        \
                                                                             \
+  product(bool, UseAdler32Intrinsics, false,                                \
+          "use intrinsics for java.util.zip.Adler32")                       \
+                                                                            \
   diagnostic(ccstrlist, DisableIntrinsic, "",                               \
          "do not expand intrinsics whose (internal) names appear here")     \
                                                                             \
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp	Thu Sep 03 15:03:12 2015 -0700
@@ -139,6 +139,7 @@
 address StubRoutines::_crc_table_adr = NULL;
 
 address StubRoutines::_updateBytesCRC32C = NULL;
+address StubRoutines::_updateBytesAdler32 = NULL;
 
 address StubRoutines::_multiplyToLen = NULL;
 address StubRoutines::_squareToLen = NULL;
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp	Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp	Thu Sep 03 15:03:12 2015 -0700
@@ -198,6 +198,7 @@
   static address _crc_table_adr;
 
   static address _updateBytesCRC32C;
+  static address _updateBytesAdler32;
 
   static address _multiplyToLen;
   static address _squareToLen;
@@ -364,6 +365,7 @@
   static address crc_table_addr()      { return _crc_table_adr; }
 
   static address updateBytesCRC32C()   { return _updateBytesCRC32C; }
+  static address updateBytesAdler32()  { return _updateBytesAdler32; }
 
   static address multiplyToLen()       {return _multiplyToLen; }
   static address squareToLen()         {return _squareToLen; }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/adler32/TestAdler32.java	Thu Sep 03 15:03:12 2015 -0700
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8132081
+ * @summary C2 support for Adler32 on SPARC
+ *
+ * @run main/othervm/timeout=600 -Xbatch TestAdler32 -m
+ */
+
+import java.nio.ByteBuffer;
+import java.util.zip.Checksum;
+import java.util.zip.Adler32;
+
+public class TestAdler32 {
+    public static void main(String[] args) {
+        int offset = Integer.getInteger("offset", 0);
+        int msgSize = Integer.getInteger("msgSize", 512);
+        boolean multi = false;
+        int iters = 20000;
+        int warmupIters = 20000;
+
+        if (args.length > 0) {
+            if (args[0].equals("-m")) {
+                multi = true;
+            } else {
+                iters = Integer.valueOf(args[0]);
+            }
+            if (args.length > 1) {
+                warmupIters = Integer.valueOf(args[1]);
+            }
+        }
+
+        if (multi) {
+            test_multi(warmupIters);
+            return;
+        }
+
+        System.out.println(" offset = " + offset);
+        System.out.println("msgSize = " + msgSize + " bytes");
+        System.out.println("  iters = " + iters);
+
+        byte[] b = initializedBytes(msgSize, offset);
+
+        Adler32 adler0 = new Adler32();
+        Adler32 adler1 = new Adler32();
+        Adler32 adler2 = new Adler32();
+
+        adler0.update(b, offset, msgSize);
+
+        System.out.println("-------------------------------------------------------");
+
+        /* warm up */
+        for (int i = 0; i < warmupIters; i++) {
+            adler1.reset();
+            adler1.update(b, offset, msgSize);
+        }
+
+        /* measure performance */
+        long start = System.nanoTime();
+        for (int i = 0; i < iters; i++) {
+            adler1.reset();
+            adler1.update(b, offset, msgSize);
+        }
+        long end = System.nanoTime();
+        double total = (double)(end - start)/1e9;         // in seconds
+        double thruput = (double)msgSize*iters/1e6/total; // in MB/s
+        System.out.println("Adler32.update(byte[]) runtime = " + total + " seconds");
+        System.out.println("Adler32.update(byte[]) throughput = " + thruput + " MB/s");
+
+        /* check correctness */
+        for (int i = 0; i < iters; i++) {
+            adler1.reset();
+            adler1.update(b, offset, msgSize);
+            if (!check(adler0, adler1)) break;
+        }
+        report("Adlers", adler0, adler1);
+
+        System.out.println("-------------------------------------------------------");
+
+        ByteBuffer buf = ByteBuffer.allocateDirect(msgSize);
+        buf.put(b, offset, msgSize);
+        buf.flip();
+
+        /* warm up */
+        for (int i = 0; i < warmupIters; i++) {
+            adler2.reset();
+            adler2.update(buf);
+            buf.rewind();
+        }
+
+        /* measure performance */
+        start = System.nanoTime();
+        for (int i = 0; i < iters; i++) {
+            adler2.reset();
+            adler2.update(buf);
+            buf.rewind();
+        }
+        end = System.nanoTime();
+        total = (double)(end - start)/1e9;         // in seconds
+        thruput = (double)msgSize*iters/1e6/total; // in MB/s
+        System.out.println("Adler32.update(ByteBuffer) runtime = " + total + " seconds");
+        System.out.println("Adler32.update(ByteBuffer) throughput = " + thruput + " MB/s");
+
+        /* check correctness */
+        for (int i = 0; i < iters; i++) {
+            adler2.reset();
+            adler2.update(buf);
+            buf.rewind();
+            if (!check(adler0, adler2)) break;
+        }
+        report("Adlers", adler0, adler1);
+
+        System.out.println("-------------------------------------------------------");
+    }
+
+    private static void report(String s, Checksum adler0, Checksum adler1) {
+        System.out.printf("%s: adler0 = %08x, adler1 = %08x\n",
+                          s, adler0.getValue(), adler1.getValue());
+    }
+
+    private static boolean check(Checksum adler0, Checksum adler1) {
+        if (adler0.getValue() != adler1.getValue()) {
+            System.err.printf("ERROR: adler0 = %08x, adler1 = %08x\n",
+                              adler0.getValue(), adler1.getValue());
+            return false;
+        }
+        return true;
+    }
+
+    private static byte[] initializedBytes(int M, int offset) {
+        byte[] bytes = new byte[M + offset];
+        for (int i = 0; i < offset; i++) {
+            bytes[i] = (byte) i;
+        }
+        for (int i = offset; i < bytes.length; i++) {
+            bytes[i] = (byte) (i - offset);
+        }
+        return bytes;
+    }
+
+    private static void test_multi(int iters) {
+        int len1 = 8;    // the  8B/iteration loop
+        int len2 = 32;   // the 32B/iteration loop
+        int len3 = 4096; // the 4KB/iteration loop
+
+        byte[] b = initializedBytes(len3*16, 0);
+        int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 };
+        int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7,
+                        len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7,
+                        len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7,
+                        len2, len2+1, len2+3, len2+5, len2+7,
+                        len2*2, len2*4, len2*8, len2*16, len2*32, len2*64,
+                        len3, len3+1, len3+3, len3+5, len3+7,
+                        len3*2, len3*4, len3*8,
+                        len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7,
+                        len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7,
+                        len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7,
+                        len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3,
+                        len1+len2+len3+5, len1+len2+len3+7,
+                        (len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3,
+                        (len1+len2+len3)*2+5, (len1+len2+len3)*2+7,
+                        (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3,
+                        (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 };
+        Adler32[] adler0 = new Adler32[offsets.length*sizes.length];
+        Adler32[] adler1 = new Adler32[offsets.length*sizes.length];
+        int i, j, k;
+
+        System.out.printf("testing %d cases ...\n", offsets.length*sizes.length);
+
+        /* set the result from interpreter as reference */
+        for (i = 0; i < offsets.length; i++) {
+            for (j = 0; j < sizes.length; j++) {
+                adler0[i*sizes.length + j] = new Adler32();
+                adler1[i*sizes.length + j] = new Adler32();
+                adler0[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+            }
+        }
+
+        /* warm up the JIT compiler and get result */
+        for (k = 0; k < iters; k++) {
+            for (i = 0; i < offsets.length; i++) {
+                for (j = 0; j < sizes.length; j++) {
+                    adler1[i*sizes.length + j].reset();
+                    adler1[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+                }
+            }
+        }
+
+        /* check correctness */
+        for (i = 0; i < offsets.length; i++) {
+            for (j = 0; j < sizes.length; j++) {
+                if (!check(adler0[i*sizes.length + j], adler1[i*sizes.length + j])) {
+                    System.out.printf("offsets[%d] = %d", i, offsets[i]);
+                    System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
+                }
+            }
+        }
+    }
+}