changeset 13569:821ef7c10085

Merge
author coleenp
date Thu, 24 Aug 2017 01:13:04 +0000
parents 12817e44b856 b9f8d262202d
children 4f5042758ccc 2dfe246179c0 f59d7a871cb5
files src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.microbenchmarks/src/org/graalvm/compiler/microbenchmarks/graal/TestJMH.java src/share/vm/classfile/javaClasses.cpp src/share/vm/oops/oop.inline.hpp src/share/vm/runtime/os.cpp
diffstat 134 files changed, 3295 insertions(+), 1482 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Aug 24 01:13:04 2017 +0000
@@ -3806,15 +3806,24 @@
       // Any use that can't embed the address computation?
       for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
         Node* u = addp->fast_out(i);
-        if (!u->is_Mem() || u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
+        if (!u->is_Mem()) {
           return;
         }
+        if (u->is_LoadVector() || u->is_StoreVector() || u->Opcode() == Op_StoreCM) {
+          return;
+        }
+        if (addp2->in(AddPNode::Offset)->Opcode() != Op_ConvI2L) {
+          int scale = 1 << addp2->in(AddPNode::Offset)->in(2)->get_int();
+          if (VM_Version::expensive_load(u->as_Mem()->memory_size(), scale)) {
+            return;
+          }
+        }
       }
-      
+
       Node* off = addp->in(AddPNode::Offset);
       Node* addr2 = addp2->in(AddPNode::Address);
       Node* base = addp->in(AddPNode::Base);
-      
+
       Node* new_addr = NULL;
       // Check whether the graph already has the new AddP we need
       // before we create one (no GVN available here).
@@ -3828,7 +3837,7 @@
           break;
         }
       }
-      
+
       if (new_addr == NULL) {
         new_addr = new AddPNode(base, addr2, off);
       }
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -272,8 +272,7 @@
   // load pointer for resolved_references[] objArray
   ldr(result, Address(result, ConstantPool::cache_offset_in_bytes()));
   ldr(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
-  // JNIHandles::resolve(obj);
-  ldr(result, Address(result, 0));
+  resolve_oop_handle(result);
   // Add in the index
   add(result, result, tmp);
   load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -3279,6 +3279,12 @@
   }
 }
 
+// ((OopHandle)result).resolve();
+void MacroAssembler::resolve_oop_handle(Register result) {
+  // OopHandle::resolve is an indirection.
+  ldr(result, Address(result, 0));
+}
+
 void MacroAssembler::load_mirror(Register dst, Register method) {
   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
   ldr(dst, Address(rmethod, Method::const_offset()));
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -790,6 +790,7 @@
   void store_klass(Register dst, Register src);
   void cmp_klass(Register oop, Register trial_klass, Register tmp);
 
+  void resolve_oop_handle(Register result);
   void load_mirror(Register dst, Register method);
 
   void load_heap_oop(Register dst, Address src);
--- a/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -56,6 +56,17 @@
   static void assert_is_initialized() {
   }
 
+  static bool expensive_load(int ld_size, int scale) {
+    if (cpu_family() == CPU_ARM) {
+      // Half-word load with index shift by 1 (aka scale is 2) has
+      // extra cycle latency, e.g. ldrsh w0, [x1,w2,sxtw #1].
+      if (ld_size == 2 && scale == 2) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   enum Family {
     CPU_ARM       = 'A',
     CPU_BROADCOM  = 'B',
--- a/src/cpu/arm/vm/interp_masm_arm.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/arm/vm/interp_masm_arm.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -300,8 +300,7 @@
   // load pointer for resolved_references[] objArray
   ldr(cache, Address(result, ConstantPool::cache_offset_in_bytes()));
   ldr(cache, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
-  // JNIHandles::resolve(result)
-  ldr(cache, Address(cache, 0));
+  resolve_oop_handle(cache);
   // Add in the index
   // convert from field index to resolved_references() index and from
   // word index to byte offset. Since this is a java object, it can be compressed
--- a/src/cpu/arm/vm/macroAssembler_arm.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/arm/vm/macroAssembler_arm.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -2887,6 +2887,11 @@
   return offset();
 }
 
+// ((OopHandle)result).resolve();
+void MacroAssembler::resolve_oop_handle(Register result) {
+  // OopHandle::resolve is an indirection.
+  ldr(result, Address(result, 0));
+}
 
 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
@@ -2896,6 +2901,7 @@
   ldr(mirror, Address(tmp, mirror_offset));
 }
 
+
 ///////////////////////////////////////////////////////////////////////////////
 
 // Compressed pointers
--- a/src/cpu/arm/vm/macroAssembler_arm.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/arm/vm/macroAssembler_arm.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -687,6 +687,7 @@
     AbstractAssembler::emit_address((address)L.data());
   }
 
+  void resolve_oop_handle(Register result);
   void load_mirror(Register mirror, Register method, Register tmp);
 
   // Porting layer between 32-bit ARM and AArch64
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -464,8 +464,7 @@
   // Load pointer for resolved_references[] objArray.
   ld(result, ConstantPool::cache_offset_in_bytes(), result);
   ld(result, ConstantPoolCache::resolved_references_offset_in_bytes(), result);
-  // JNIHandles::resolve(result)
-  ld(result, 0, result);
+  resolve_oop_handle(result);
 #ifdef ASSERT
   Label index_ok;
   lwa(R0, arrayOopDesc::length_offset_in_bytes(), result);
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -3372,6 +3372,12 @@
   }
 }
 
+// ((OopHandle)result).resolve();
+void MacroAssembler::resolve_oop_handle(Register result) {
+  // OopHandle::resolve is an indirection.
+  ld(result, 0, result);
+}
+
 void MacroAssembler::load_mirror_from_const_method(Register mirror, Register const_method) {
   ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
   ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -725,6 +725,7 @@
   void store_klass(Register dst_oop, Register klass, Register tmp = R0);
   void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
 
+  void resolve_oop_handle(Register result);
   void load_mirror_from_const_method(Register mirror, Register const_method);
 
   static int instr_size_for_decode_klass_not_null();
--- a/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -56,7 +56,7 @@
 // if too small.
 // Run with +PrintInterpreter to get the VM to print out the size.
 // Max size with JVMTI
-int TemplateInterpreter::InterpreterCodeSize = 230*K;
+int TemplateInterpreter::InterpreterCodeSize = 256*K;
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
--- a/src/cpu/s390/vm/interp_masm_s390.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/s390/vm/interp_masm_s390.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -364,8 +364,7 @@
   // Load pointer for resolved_references[] objArray.
   z_lg(result, ConstantPool::cache_offset_in_bytes(), result);
   z_lg(result, ConstantPoolCache::resolved_references_offset_in_bytes(), result);
-  // JNIHandles::resolve(result)
-  z_lg(result, 0, result); // Load resolved references array itself.
+  resolve_oop_handle(result); // Load resolved references array itself.
 #ifdef ASSERT
   NearLabel index_ok;
   z_lgf(Z_R0, Address(result, arrayOopDesc::length_offset_in_bytes()));
--- a/src/cpu/s390/vm/macroAssembler_s390.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/s390/vm/macroAssembler_s390.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -4660,6 +4660,12 @@
   }
 }
 
+// ((OopHandle)result).resolve();
+void MacroAssembler::resolve_oop_handle(Register result) {
+  // OopHandle::resolve is an indirection.
+  z_lg(result, 0, result);
+}
+
 void MacroAssembler::load_mirror(Register mirror, Register method) {
   mem2reg_opt(mirror, Address(method, Method::const_offset()));
   mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
--- a/src/cpu/s390/vm/macroAssembler_s390.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/s390/vm/macroAssembler_s390.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -832,6 +832,7 @@
   void oop_decoder(Register Rdst, Register Rsrc, bool maybeNULL,
                    Register Rbase = Z_R1, int pow2_offset = -1);
 
+  void resolve_oop_handle(Register result);
   void load_mirror(Register mirror, Register method);
 
   //--------------------------
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -730,8 +730,7 @@
   // load pointer for resolved_references[] objArray
   ld_ptr(result, ConstantPool::cache_offset_in_bytes(), result);
   ld_ptr(result, ConstantPoolCache::resolved_references_offset_in_bytes(), result);
-  // JNIHandles::resolve(result)
-  ld_ptr(result, 0, result);
+  resolve_oop_handle(result);
   // Add in the index
   add(result, tmp, result);
   load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -3822,6 +3822,12 @@
   card_table_write(bs->byte_map_base, tmp, store_addr);
 }
 
+// ((OopHandle)result).resolve();
+void MacroAssembler::resolve_oop_handle(Register result) {
+  // OopHandle::resolve is an indirection.
+  ld_ptr(result, 0, result);
+}
+
 void MacroAssembler::load_mirror(Register mirror, Register method) {
   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
   ld_ptr(method, in_bytes(Method::const_offset()), mirror);
--- a/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -995,6 +995,7 @@
   inline void ldbool(const Address& a, Register d);
   inline void movbool( bool boolconst, Register d);
 
+  void resolve_oop_handle(Register result);
   void load_mirror(Register mirror, Register method);
 
   // klass oop manipulations if compressed
--- a/src/cpu/x86/vm/interp_masm_x86.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/x86/vm/interp_masm_x86.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -511,8 +511,7 @@
   // load pointer for resolved_references[] objArray
   movptr(result, Address(result, ConstantPool::cache_offset_in_bytes()));
   movptr(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
-  // JNIHandles::resolve(obj);
-  movptr(result, Address(result, 0));
+  resolve_oop_handle(result);
   // Add in the index
   addptr(result, tmp);
   load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -6604,6 +6604,12 @@
 #endif // _LP64
 }
 
+// ((OopHandle)result).resolve();
+void MacroAssembler::resolve_oop_handle(Register result) {
+  // OopHandle::resolve is an indirection.
+  movptr(result, Address(result, 0));
+}
+
 void MacroAssembler::load_mirror(Register mirror, Register method) {
   // get mirror
   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
@@ -7030,7 +7036,6 @@
 
 #endif // _LP64
 
-
 // C2 compiled method's prolog code.
 void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b) {
 
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -327,6 +327,7 @@
   void movbool(Address dst, Register src);
   void testbool(Register dst);
 
+  void resolve_oop_handle(Register result);
   void load_mirror(Register mirror, Register method);
 
   // oop manipulations
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64ArithmeticLIRGenerator.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.amd64/src/org/graalvm/compiler/core/amd64/AMD64ArithmeticLIRGenerator.java	Thu Aug 24 01:13:04 2017 +0000
@@ -158,7 +158,7 @@
                 }
                 break;
             default:
-                throw GraalError.shouldNotReachHere();
+                throw GraalError.shouldNotReachHere(input.getPlatformKind().toString());
         }
         return result;
     }
@@ -451,7 +451,7 @@
     protected Value emitZeroExtendMemory(AMD64Kind memoryKind, int resultBits, AMD64AddressValue address, LIRFrameState state) {
         // Issue a zero extending load of the proper bit size and set the result to
         // the proper kind.
-        Variable result = getLIRGen().newVariable(LIRKind.value(resultBits == 32 ? AMD64Kind.DWORD : AMD64Kind.QWORD));
+        Variable result = getLIRGen().newVariable(LIRKind.value(resultBits <= 32 ? AMD64Kind.DWORD : AMD64Kind.QWORD));
         switch (memoryKind) {
             case BYTE:
                 getLIRGen().append(new AMD64Unary.MemoryOp(MOVZXB, DWORD, result, address, state));
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/calc/FloatConvert.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/calc/FloatConvert.java	Thu Aug 24 01:13:04 2017 +0000
@@ -25,16 +25,26 @@
 import org.graalvm.compiler.debug.GraalError;
 
 public enum FloatConvert {
-    F2I,
-    D2I,
-    F2L,
-    D2L,
-    I2F,
-    L2F,
-    D2F,
-    I2D,
-    L2D,
-    F2D;
+    F2I(FloatConvertCategory.FloatingPointToInteger),
+    D2I(FloatConvertCategory.FloatingPointToInteger),
+    F2L(FloatConvertCategory.FloatingPointToInteger),
+    D2L(FloatConvertCategory.FloatingPointToInteger),
+    I2F(FloatConvertCategory.IntegerToFloatingPoint),
+    L2F(FloatConvertCategory.IntegerToFloatingPoint),
+    D2F(FloatConvertCategory.FloatingPointToFloatingPoint),
+    I2D(FloatConvertCategory.IntegerToFloatingPoint),
+    L2D(FloatConvertCategory.IntegerToFloatingPoint),
+    F2D(FloatConvertCategory.FloatingPointToFloatingPoint);
+
+    private FloatConvertCategory category;
+
+    FloatConvert(FloatConvertCategory category) {
+        this.category = category;
+    }
+
+    public FloatConvertCategory getCategory() {
+        return category;
+    }
 
     public FloatConvert reverse() {
         switch (this) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/calc/FloatConvertCategory.java	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.core.common.calc;
+
+public enum FloatConvertCategory {
+    FloatingPointToInteger,
+    IntegerToFloatingPoint,
+    FloatingPointToFloatingPoint;
+}
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/type/ArithmeticOpTable.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/type/ArithmeticOpTable.java	Thu Aug 24 01:13:04 2017 +0000
@@ -33,9 +33,11 @@
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.And;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.Div;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.Mul;
+import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.MulHigh;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.Or;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.Rem;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.Sub;
+import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.UMulHigh;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.Xor;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.IntegerConvertOp.Narrow;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.IntegerConvertOp.SignExtend;
@@ -62,6 +64,8 @@
     private final BinaryOp<Sub> sub;
 
     private final BinaryOp<Mul> mul;
+    private final BinaryOp<MulHigh> mulHigh;
+    private final BinaryOp<UMulHigh> umulHigh;
     private final BinaryOp<Div> div;
     private final BinaryOp<Rem> rem;
 
@@ -92,7 +96,7 @@
         }
     }
 
-    public static final ArithmeticOpTable EMPTY = new ArithmeticOpTable(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);
+    public static final ArithmeticOpTable EMPTY = new ArithmeticOpTable(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);
 
     public interface ArithmeticOpWrapper {
 
@@ -121,6 +125,8 @@
         BinaryOp<Sub> sub = wrapIfNonNull(wrapper::wrapBinaryOp, inner.getSub());
 
         BinaryOp<Mul> mul = wrapIfNonNull(wrapper::wrapBinaryOp, inner.getMul());
+        BinaryOp<MulHigh> mulHigh = wrapIfNonNull(wrapper::wrapBinaryOp, inner.getMulHigh());
+        BinaryOp<UMulHigh> umulHigh = wrapIfNonNull(wrapper::wrapBinaryOp, inner.getUMulHigh());
         BinaryOp<Div> div = wrapIfNonNull(wrapper::wrapBinaryOp, inner.getDiv());
         BinaryOp<Rem> rem = wrapIfNonNull(wrapper::wrapBinaryOp, inner.getRem());
 
@@ -141,16 +147,18 @@
         IntegerConvertOp<Narrow> narrow = wrapIfNonNull(wrapper::wrapIntegerConvertOp, inner.getNarrow());
 
         FloatConvertOp[] floatConvert = CollectionsUtil.filterAndMapToArray(inner.floatConvert, Objects::nonNull, wrapper::wrapFloatConvertOp, FloatConvertOp[]::new);
-        return new ArithmeticOpTable(neg, add, sub, mul, div, rem, not, and, or, xor, shl, shr, ushr, abs, sqrt, zeroExtend, signExtend, narrow, floatConvert);
+        return new ArithmeticOpTable(neg, add, sub, mul, mulHigh, umulHigh, div, rem, not, and, or, xor, shl, shr, ushr, abs, sqrt, zeroExtend, signExtend, narrow, floatConvert);
     }
 
-    protected ArithmeticOpTable(UnaryOp<Neg> neg, BinaryOp<Add> add, BinaryOp<Sub> sub, BinaryOp<Mul> mul, BinaryOp<Div> div, BinaryOp<Rem> rem, UnaryOp<Not> not, BinaryOp<And> and, BinaryOp<Or> or,
-                    BinaryOp<Xor> xor, ShiftOp<Shl> shl, ShiftOp<Shr> shr, ShiftOp<UShr> ushr, UnaryOp<Abs> abs, UnaryOp<Sqrt> sqrt, IntegerConvertOp<ZeroExtend> zeroExtend,
-                    IntegerConvertOp<SignExtend> signExtend, IntegerConvertOp<Narrow> narrow, FloatConvertOp... floatConvert) {
+    protected ArithmeticOpTable(UnaryOp<Neg> neg, BinaryOp<Add> add, BinaryOp<Sub> sub, BinaryOp<Mul> mul, BinaryOp<MulHigh> mulHigh, BinaryOp<UMulHigh> umulHigh, BinaryOp<Div> div, BinaryOp<Rem> rem,
+                    UnaryOp<Not> not, BinaryOp<And> and, BinaryOp<Or> or, BinaryOp<Xor> xor, ShiftOp<Shl> shl, ShiftOp<Shr> shr, ShiftOp<UShr> ushr, UnaryOp<Abs> abs, UnaryOp<Sqrt> sqrt,
+                    IntegerConvertOp<ZeroExtend> zeroExtend, IntegerConvertOp<SignExtend> signExtend, IntegerConvertOp<Narrow> narrow, FloatConvertOp... floatConvert) {
         this.neg = neg;
         this.add = add;
         this.sub = sub;
         this.mul = mul;
+        this.mulHigh = mulHigh;
+        this.umulHigh = umulHigh;
         this.div = div;
         this.rem = rem;
         this.not = not;
@@ -207,6 +215,20 @@
     }
 
     /**
+     * Describes a signed operation that multiples the upper 32-bits of two long values.
+     */
+    public BinaryOp<MulHigh> getMulHigh() {
+        return mulHigh;
+    }
+
+    /**
+     * Describes an unsigned operation that multiples the upper 32-bits of two long values.
+     */
+    public BinaryOp<UMulHigh> getUMulHigh() {
+        return umulHigh;
+    }
+
+    /**
      * Describes the division operation.
      */
     public BinaryOp<Div> getDiv() {
@@ -321,6 +343,8 @@
                Objects.equals(add, that.add) &&
                Objects.equals(sub, that.sub) &&
                Objects.equals(mul, that.mul) &&
+               Objects.equals(mulHigh, that.mulHigh) &&
+               Objects.equals(umulHigh, that.umulHigh) &&
                Objects.equals(div, that.div) &&
                Objects.equals(rem, that.rem) &&
                Objects.equals(not, that.not) &&
@@ -360,8 +384,8 @@
 
     @Override
     public String toString() {
-        return getClass().getSimpleName() + "[" + toString(neg, add, sub, mul, div, rem, not, and, or, xor, shl, shr, ushr, abs, sqrt, zeroExtend, signExtend, narrow) + ",floatConvert[" +
-                        toString(floatConvert) + "]]";
+        return getClass().getSimpleName() + "[" + toString(neg, add, sub, mul, mulHigh, umulHigh, div, rem, not, and, or, xor, shl, shr, ushr, abs, sqrt, zeroExtend, signExtend, narrow) +
+                        ",floatConvert[" + toString(floatConvert) + "]]";
     }
 
     public abstract static class Op {
@@ -479,6 +503,20 @@
             }
         }
 
+        public abstract static class MulHigh extends BinaryOp<MulHigh> {
+
+            protected MulHigh(boolean associative, boolean commutative) {
+                super("*H", associative, commutative);
+            }
+        }
+
+        public abstract static class UMulHigh extends BinaryOp<UMulHigh> {
+
+            protected UMulHigh(boolean associative, boolean commutative) {
+                super("|*H|", associative, commutative);
+            }
+        }
+
         public abstract static class Div extends BinaryOp<Div> {
 
             protected Div(boolean associative, boolean commutative) {
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/type/FloatStamp.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/type/FloatStamp.java	Thu Aug 24 01:13:04 2017 +0000
@@ -302,7 +302,7 @@
         return null;
     }
 
-    private static final ArithmeticOpTable OPS = new ArithmeticOpTable(
+    public static final ArithmeticOpTable OPS = new ArithmeticOpTable(
 
                     new UnaryOp.Neg() {
 
@@ -437,6 +437,10 @@
                         }
                     },
 
+                    null,
+
+                    null,
+
                     new BinaryOp.Div(false, false) {
 
                         @Override
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/type/IntegerStamp.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.common/src/org/graalvm/compiler/core/common/type/IntegerStamp.java	Thu Aug 24 01:13:04 2017 +0000
@@ -858,6 +858,164 @@
                         }
                     },
 
+                    new BinaryOp.MulHigh(true, true) {
+
+                        @Override
+                        public Constant foldConstant(Constant const1, Constant const2) {
+                            PrimitiveConstant a = (PrimitiveConstant) const1;
+                            PrimitiveConstant b = (PrimitiveConstant) const2;
+                            assert a.getJavaKind() == b.getJavaKind();
+                            return JavaConstant.forIntegerKind(a.getJavaKind(), multiplyHigh(a.asLong(), b.asLong(), a.getJavaKind()));
+                        }
+
+                        @Override
+                        public Stamp foldStamp(Stamp stamp1, Stamp stamp2) {
+                            IntegerStamp a = (IntegerStamp) stamp1;
+                            IntegerStamp b = (IntegerStamp) stamp2;
+                            JavaKind javaKind = a.getStackKind();
+
+                            assert a.getBits() == b.getBits();
+                            assert javaKind == b.getStackKind();
+                            assert (javaKind == JavaKind.Int || javaKind == JavaKind.Long);
+
+                            if (a.isEmpty() || b.isEmpty()) {
+                                return a.empty();
+                            } else if (a.isUnrestricted() || b.isUnrestricted()) {
+                                return a.unrestricted();
+                            }
+
+                            long[] xExtremes = {a.lowerBound(), a.upperBound()};
+                            long[] yExtremes = {b.lowerBound(), b.upperBound()};
+                            long min = Long.MAX_VALUE;
+                            long max = Long.MIN_VALUE;
+                            for (long x : xExtremes) {
+                                for (long y : yExtremes) {
+                                    long result = multiplyHigh(x, y, javaKind);
+                                    min = Math.min(min, result);
+                                    max = Math.max(max, result);
+                                }
+                            }
+                            return StampFactory.forInteger(javaKind, min, max);
+                        }
+
+                        @Override
+                        public boolean isNeutral(Constant value) {
+                            return false;
+                        }
+
+                        private long multiplyHigh(long x, long y, JavaKind javaKind) {
+                            if (javaKind == JavaKind.Int) {
+                                return (x * y) >> 32;
+                            } else {
+                                assert javaKind == JavaKind.Long;
+                                long x0 = x & 0xFFFFFFFFL;
+                                long x1 = x >> 32;
+
+                                long y0 = y & 0xFFFFFFFFL;
+                                long y1 = y >> 32;
+
+                                long z0 = x0 * y0;
+                                long t = x1 * y0 + (z0 >>> 32);
+                                long z1 = t & 0xFFFFFFFFL;
+                                long z2 = t >> 32;
+                                z1 += x0 * y1;
+
+                                return x1 * y1 + z2 + (z1 >> 32);
+                            }
+                        }
+                    },
+
+                    new BinaryOp.UMulHigh(true, true) {
+
+                        @Override
+                        public Constant foldConstant(Constant const1, Constant const2) {
+                            PrimitiveConstant a = (PrimitiveConstant) const1;
+                            PrimitiveConstant b = (PrimitiveConstant) const2;
+                            assert a.getJavaKind() == b.getJavaKind();
+                            return JavaConstant.forIntegerKind(a.getJavaKind(), multiplyHighUnsigned(a.asLong(), b.asLong(), a.getJavaKind()));
+                        }
+
+                        @Override
+                        public Stamp foldStamp(Stamp stamp1, Stamp stamp2) {
+                            IntegerStamp a = (IntegerStamp) stamp1;
+                            IntegerStamp b = (IntegerStamp) stamp2;
+                            JavaKind javaKind = a.getStackKind();
+
+                            assert a.getBits() == b.getBits();
+                            assert javaKind == b.getStackKind();
+                            assert (javaKind == JavaKind.Int || javaKind == JavaKind.Long);
+
+                            if (a.isEmpty() || b.isEmpty()) {
+                                return a.empty();
+                            } else if (a.isUnrestricted() || b.isUnrestricted()) {
+                                return a.unrestricted();
+                            }
+
+                            // Note that the minima and maxima are calculated using signed min/max
+                            // functions, while the values themselves are unsigned.
+                            long[] xExtremes = getUnsignedExtremes(a);
+                            long[] yExtremes = getUnsignedExtremes(b);
+                            long min = Long.MAX_VALUE;
+                            long max = Long.MIN_VALUE;
+                            for (long x : xExtremes) {
+                                for (long y : yExtremes) {
+                                    long result = multiplyHighUnsigned(x, y, javaKind);
+                                    min = Math.min(min, result);
+                                    max = Math.max(max, result);
+                                }
+                            }
+
+                            // if min is negative, then the value can reach into the unsigned range
+                            if (min == max || min >= 0) {
+                                return StampFactory.forInteger(javaKind, min, max);
+                            } else {
+                                return StampFactory.forKind(javaKind);
+                            }
+                        }
+
+                        @Override
+                        public boolean isNeutral(Constant value) {
+                            return false;
+                        }
+
+                        private long[] getUnsignedExtremes(IntegerStamp stamp) {
+                            if (stamp.lowerBound() < 0 && stamp.upperBound() >= 0) {
+                                /*
+                                 * If -1 and 0 are both in the signed range, then we can't say
+                                 * anything about the unsigned range, so we have to return [0,
+                                 * MAX_UNSIGNED].
+                                 */
+                                return new long[]{0, -1L};
+                            } else {
+                                return new long[]{stamp.lowerBound(), stamp.upperBound()};
+                            }
+                        }
+
+                        private long multiplyHighUnsigned(long x, long y, JavaKind javaKind) {
+                            if (javaKind == JavaKind.Int) {
+                                long xl = x & 0xFFFFFFFFL;
+                                long yl = y & 0xFFFFFFFFL;
+                                long r = xl * yl;
+                                return (int) (r >>> 32);
+                            } else {
+                                assert javaKind == JavaKind.Long;
+                                long x0 = x & 0xFFFFFFFFL;
+                                long x1 = x >>> 32;
+
+                                long y0 = y & 0xFFFFFFFFL;
+                                long y1 = y >>> 32;
+
+                                long z0 = x0 * y0;
+                                long t = x1 * y0 + (z0 >>> 32);
+                                long z1 = t & 0xFFFFFFFFL;
+                                long z2 = t >>> 32;
+                                z1 += x0 * y1;
+
+                                return x1 * y1 + z2 + (z1 >>> 32);
+                            }
+                        }
+                    },
+
                     new BinaryOp.Div(true, false) {
 
                         @Override
@@ -1046,10 +1204,14 @@
                         public Stamp foldStamp(Stamp stamp, IntegerStamp shift) {
                             IntegerStamp value = (IntegerStamp) stamp;
                             int bits = value.getBits();
-                            long defaultMask = CodeUtil.mask(bits);
-                            if (value.upMask() == 0) {
+                            if (value.isEmpty()) {
+                                return value;
+                            } else if (shift.isEmpty()) {
+                                return StampFactory.forInteger(bits).empty();
+                            } else if (value.upMask() == 0) {
                                 return value;
                             }
+
                             int shiftMask = getShiftAmountMask(stamp);
                             int shiftBits = Integer.bitCount(shiftMask);
                             if (shift.lowerBound() == shift.upperBound()) {
@@ -1068,6 +1230,7 @@
                                 }
                             }
                             if ((shift.lowerBound() >>> shiftBits) == (shift.upperBound() >>> shiftBits)) {
+                                long defaultMask = CodeUtil.mask(bits);
                                 long downMask = defaultMask;
                                 long upMask = 0;
                                 for (long i = shift.lowerBound(); i <= shift.upperBound(); i++) {
@@ -1109,7 +1272,11 @@
                         public Stamp foldStamp(Stamp stamp, IntegerStamp shift) {
                             IntegerStamp value = (IntegerStamp) stamp;
                             int bits = value.getBits();
-                            if (shift.lowerBound() == shift.upperBound()) {
+                            if (value.isEmpty()) {
+                                return value;
+                            } else if (shift.isEmpty()) {
+                                return StampFactory.forInteger(bits).empty();
+                            } else if (shift.lowerBound() == shift.upperBound()) {
                                 long shiftCount = shift.lowerBound() & getShiftAmountMask(stamp);
                                 if (shiftCount == 0) {
                                     return stamp;
@@ -1153,6 +1320,12 @@
                         public Stamp foldStamp(Stamp stamp, IntegerStamp shift) {
                             IntegerStamp value = (IntegerStamp) stamp;
                             int bits = value.getBits();
+                            if (value.isEmpty()) {
+                                return value;
+                            } else if (shift.isEmpty()) {
+                                return StampFactory.forInteger(bits).empty();
+                            }
+
                             if (shift.lowerBound() == shift.upperBound()) {
                                 long shiftCount = shift.lowerBound() & getShiftAmountMask(stamp);
                                 if (shiftCount == 0) {
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.test/src/org/graalvm/compiler/core/test/UnsafeReadEliminationTest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.test/src/org/graalvm/compiler/core/test/UnsafeReadEliminationTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -46,6 +46,16 @@
     public static double SideEffectD;
     public static double SideEffectL;
 
+    private static final long byteArrayBaseOffset;
+    private static final long intArrayBaseOffset;
+    private static final long longArrayBaseOffset;
+
+    static {
+        byteArrayBaseOffset = UNSAFE.arrayBaseOffset(byte[].class);
+        intArrayBaseOffset = UNSAFE.arrayBaseOffset(int[].class);
+        longArrayBaseOffset = UNSAFE.arrayBaseOffset(long[].class);
+    }
+
     public static long test1Snippet(double a) {
         final Object m = Memory;
         if (a > 0) {
@@ -130,4 +140,76 @@
         Assert.assertEquals(writes, graph.getNodes().filter(WriteNode.class).count());
     }
 
+    public static int testWriteIntToByteArraySnippet() {
+        byte[] array = new byte[4];
+        UNSAFE.putInt(array, byteArrayBaseOffset, 0x01020304);
+        return array[0];
+    }
+
+    @Test
+    public void testWriteIntToByteArray() {
+        test("testWriteIntToByteArraySnippet");
+    }
+
+    public static byte testWriteSignedExtendedByteToByteArraySnippet(byte b) {
+        byte[] array = new byte[4];
+        array[0] = 0x01;
+        array[1] = 0x02;
+        array[2] = 0x03;
+        array[3] = 0x04;
+        UNSAFE.putInt(array, byteArrayBaseOffset, b);
+        return array[3];
+    }
+
+    @Test
+    public void testWriteSignedExtendedByteToByteArray() {
+        test("testWriteSignedExtendedByteToByteArraySnippet", (byte) 0);
+    }
+
+    public static int testWriteLongToIntArraySnippet() {
+        int[] array = new int[2];
+        UNSAFE.putLong(array, intArrayBaseOffset, 0x0102030405060708L);
+        return array[0];
+    }
+
+    @Test
+    public void testWriteLongToIntArray() {
+        test("testWriteLongToIntArraySnippet");
+    }
+
+    public static int testWriteByteToIntArraySnippet() {
+        int[] array = new int[1];
+        array[0] = 0x01020304;
+        UNSAFE.putByte(array, intArrayBaseOffset, (byte) 0x05);
+        return array[0];
+    }
+
+    @Test
+    public void testWriteByteToIntArray() {
+        test("testWriteByteToIntArraySnippet");
+    }
+
+    public static long testWriteIntToLongArraySnippet() {
+        long[] array = new long[1];
+        array[0] = 0x0102030405060708L;
+        UNSAFE.putInt(array, longArrayBaseOffset, 0x04030201);
+        return array[0];
+    }
+
+    @Test
+    public void testWriteIntToLongArray() {
+        test("testWriteIntToLongArraySnippet");
+    }
+
+    public static float testWriteFloatToIntArraySnippet() {
+        float[] array = new float[1];
+        UNSAFE.putInt(array, intArrayBaseOffset, Float.floatToRawIntBits(0.5f));
+        return array[0];
+    }
+
+    @Test
+    public void testWriteFloatToIntArray() {
+        test("testWriteFloatToIntArraySnippet");
+    }
+
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.test/src/org/graalvm/compiler/core/test/ea/UnsafeEATest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.test/src/org/graalvm/compiler/core/test/ea/UnsafeEATest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -38,10 +38,6 @@
     private static final long fieldOffset1;
     private static final long fieldOffset2;
 
-    private static final long byteArrayBaseOffset;
-    private static final long intArrayBaseOffset;
-    private static final long longArrayBaseOffset;
-
     static {
         try {
             long localFieldOffset1 = UNSAFE.objectFieldOffset(TestClassInt.class.getField("x"));
@@ -55,9 +51,6 @@
                 fieldOffset2 = UNSAFE.objectFieldOffset(TestClassInt.class.getField("z"));
             }
             assert fieldOffset2 == fieldOffset1 + 4;
-            byteArrayBaseOffset = UNSAFE.arrayBaseOffset(byte[].class);
-            intArrayBaseOffset = UNSAFE.arrayBaseOffset(int[].class);
-            longArrayBaseOffset = UNSAFE.arrayBaseOffset(long[].class);
         } catch (Exception e) {
             throw new RuntimeException(e);
         }
@@ -203,76 +196,4 @@
         return x;
     }
 
-    public static int testWriteIntToByteArraySnippet() {
-        byte[] array = new byte[4];
-        UNSAFE.putInt(array, byteArrayBaseOffset, 0x01020304);
-        return array[0];
-    }
-
-    @Test
-    public void testWriteIntToByteArray() {
-        test("testWriteIntToByteArraySnippet");
-    }
-
-    public static byte testWriteSignedExtendedByteToByteArraySnippet(byte b) {
-        byte[] array = new byte[4];
-        array[0] = 0x01;
-        array[1] = 0x02;
-        array[2] = 0x03;
-        array[3] = 0x04;
-        UNSAFE.putInt(array, byteArrayBaseOffset, b);
-        return array[3];
-    }
-
-    @Test
-    public void testWriteSignedExtendedByteToByteArray() {
-        test("testWriteSignedExtendedByteToByteArraySnippet", (byte) 0);
-    }
-
-    public static int testWriteLongToIntArraySnippet() {
-        int[] array = new int[2];
-        UNSAFE.putLong(array, intArrayBaseOffset, 0x0102030405060708L);
-        return array[0];
-    }
-
-    @Test
-    public void testWriteLongToIntArray() {
-        test("testWriteLongToIntArraySnippet");
-    }
-
-    public static int testWriteByteToIntArraySnippet() {
-        int[] array = new int[1];
-        array[0] = 0x01020304;
-        UNSAFE.putByte(array, intArrayBaseOffset, (byte) 0x05);
-        return array[0];
-    }
-
-    @Test
-    public void testWriteByteToIntArray() {
-        test("testWriteByteToIntArraySnippet");
-    }
-
-    public static long testWriteIntToLongArraySnippet() {
-        long[] array = new long[1];
-        array[0] = 0x0102030405060708L;
-        UNSAFE.putInt(array, longArrayBaseOffset, 0x04030201);
-        return array[0];
-    }
-
-    @Test
-    public void testWriteIntToLongArray() {
-        test("testWriteIntToLongArraySnippet");
-    }
-
-    public static float testWriteFloatToIntArraySnippet() {
-        float[] array = new float[1];
-        UNSAFE.putInt(array, intArrayBaseOffset, Float.floatToRawIntBits(0.5f));
-        return array[0];
-    }
-
-    @Test
-    public void testWriteFloatToIntArray() {
-        test("testWriteFloatToIntArraySnippet");
-    }
-
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core/src/org/graalvm/compiler/core/CompilationPrinter.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core/src/org/graalvm/compiler/core/CompilationPrinter.java	Thu Aug 24 01:13:04 2017 +0000
@@ -58,6 +58,11 @@
      */
     public static CompilationPrinter begin(OptionValues options, CompilationIdentifier id, JavaMethod method, int entryBCI) {
         if (PrintCompilation.getValue(options) && !TTY.isSuppressed()) {
+            try {
+                Class.forName("java.lang.management.ManagementFactory");
+            } catch (ClassNotFoundException ex) {
+                throw new IllegalArgumentException("PrintCompilation option requires java.management module");
+            }
             return new CompilationPrinter(id, method, entryBCI);
         }
         return DISABLED;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core/src/org/graalvm/compiler/core/CompilationWrapper.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core/src/org/graalvm/compiler/core/CompilationWrapper.java	Thu Aug 24 01:13:04 2017 +0000
@@ -82,26 +82,6 @@
          */
         ExitVM;
 
-        static ValueHelp HELP = new ValueHelp();
-
-        static class ValueHelp implements EnumOptionKey.ValueHelp<ExceptionAction> {
-            @Override
-            public String getHelp(Object value) {
-                ExceptionAction action = (ExceptionAction) value;
-                switch (action) {
-                    case Silent:
-                        return action + ": Print nothing to the console.";
-                    case Print:
-                        return action + ": Print a stack trace to the console.";
-                    case Diagnose:
-                        return action + ": Retry the compilation with extra diagnostics.";
-                    case ExitVM:
-                        return action + ": Same as " + Diagnose + " except that the VM process exits after retrying.";
-                }
-                return null;
-            }
-        }
-
         /**
          * Gets the action that is one level less verbose than this action, bottoming out at the
          * least verbose action.
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core/src/org/graalvm/compiler/core/GraalCompilerOptions.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core/src/org/graalvm/compiler/core/GraalCompilerOptions.java	Thu Aug 24 01:13:04 2017 +0000
@@ -36,13 +36,15 @@
     // @formatter:off
     @Option(help = "Print an informational line to the console for each completed compilation.", type = OptionType.Debug)
     public static final OptionKey<Boolean> PrintCompilation = new OptionKey<>(false);
-    @Option(help = "Pattern (see MethodFilter for format) for method that will trigger an exception when compiled. " +
-                   "This option exists to test handling compilation crashes gracefully.", type = OptionType.Debug)
+    @Option(help = "Pattern for method(s) that will trigger an exception when compiled. " +
+                   "This option exists to test handling compilation crashes gracefully. " +
+                   "See the MethodFilter option for the pattern syntax. ", type = OptionType.Debug)
     public static final OptionKey<String> CrashAt = new OptionKey<>(null);
-    @Option(help = "The action to take when compilation fails with a non-bailout exception.", type = OptionType.User)
-    public static final EnumOptionKey<ExceptionAction> CompilationFailureAction = new EnumOptionKey<>(ExceptionAction.Diagnose, ExceptionAction.HELP);
-    @Option(help = "The action to take when compilation fails with a bailout exception.", type = OptionType.User)
-    public static final EnumOptionKey<ExceptionAction> CompilationBailoutAction = new EnumOptionKey<>(ExceptionAction.Silent, ExceptionAction.HELP);
+    @Option(help = "file:doc-files/CompilationBailoutActionHelp.txt", type = OptionType.User)
+    public static final EnumOptionKey<ExceptionAction> CompilationBailoutAction = new EnumOptionKey<>(ExceptionAction.Silent);
+    @Option(help = "Specifies the action to take when compilation fails with a bailout exception. " +
+                    "The accepted values are the same as for CompilationBailoutAction.", type = OptionType.User)
+     public static final EnumOptionKey<ExceptionAction> CompilationFailureAction = new EnumOptionKey<>(ExceptionAction.Diagnose);
     @Option(help = "The maximum number of compilation failures or bailouts to handle with the action specified " +
                    "by CompilationFailureAction or CompilationBailoutAction before changing to a less verbose action.", type = OptionType.User)
     public static final OptionKey<Integer> MaxCompilationProblemsPerAction = new OptionKey<>(5);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core/src/org/graalvm/compiler/core/doc-files/CompilationBailoutActionHelp.txt	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,6 @@
+Specifies the action to take when compilation fails with a bailout exception.
+The accepted values are:
+    Silent - Print nothing to the console.
+     Print - Print a stack trace to the console.
+  Diagnose - Retry the compilation with extra diagnostics.
+    ExitVM - Same as Diagnose except that the VM process exits after retrying.
\ No newline at end of file
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/DebugContext.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/DebugContext.java	Thu Aug 24 01:13:04 2017 +0000
@@ -198,9 +198,22 @@
 
         private Immutable(OptionValues options) {
             this.options = options;
+            String timeValue = Time.getValue(options);
+            String trackMemUseValue = TrackMemUse.getValue(options);
             this.unscopedCounters = parseUnscopedMetricSpec(Counters.getValue(options), "".equals(Count.getValue(options)), false);
-            this.unscopedTimers = parseUnscopedMetricSpec(Timers.getValue(options), "".equals(Time.getValue(options)), true);
-            this.unscopedMemUseTrackers = parseUnscopedMetricSpec(MemUseTrackers.getValue(options), "".equals(TrackMemUse.getValue(options)), true);
+            this.unscopedTimers = parseUnscopedMetricSpec(Timers.getValue(options), "".equals(timeValue), true);
+            this.unscopedMemUseTrackers = parseUnscopedMetricSpec(MemUseTrackers.getValue(options), "".equals(trackMemUseValue), true);
+
+            if (unscopedTimers != null ||
+                            unscopedMemUseTrackers != null ||
+                            timeValue != null ||
+                            trackMemUseValue != null) {
+                try {
+                    Class.forName("java.lang.management.ManagementFactory");
+                } catch (ClassNotFoundException ex) {
+                    throw new IllegalArgumentException("Time, Timers, MemUseTrackers and TrackMemUse options require java.management module");
+                }
+            }
 
             this.scopesEnabled = DumpOnError.getValue(options) ||
                             Dump.getValue(options) != null ||
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/DebugFilter.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/DebugFilter.java	Thu Aug 24 01:13:04 2017 +0000
@@ -28,8 +28,11 @@
 import org.graalvm.compiler.debug.DebugContext.Scope;
 
 /**
- * Implements the filter specified by the {@link DebugOptions#Dump}, {@link DebugOptions#Log},
- * {@link DebugOptions#Count} and {@link DebugOptions#Time} options.
+ * Implements the filter specified by options such as {@link DebugOptions#Dump},
+ * {@link DebugOptions#Log}, {@link DebugOptions#Count} and {@link DebugOptions#Time}.
+ *
+ * See <a href="DumpHelp.txt">here</a> for a description of the filter syntax.
+ *
  * <p>
  * These options enable the associated debug facility if their filter matches the
  * {@linkplain Scope#getQualifiedName() name} of the current scope. For the
@@ -37,47 +40,7 @@
  * {@link DebugOptions#Count} and {@link DebugOptions#Time} options don't have a level, for them
  * {@code level = 0} means disabled and a {@code level > 0} means enabled.
  * <p>
- * A filter is a list of comma-separated terms of the form {@code <pattern>[:<level>]}. {@code
- * <pattern>} is interpreted as a glob pattern if it contains a "*" or "?" character. Otherwise, it
- * is interpreted as a substring. If {@code <pattern>} is empty, it matches every scope. If {@code :
- * <level>} is omitted, it defaults to {@link DebugContext#BASIC_LEVEL}. The term {@code ~<pattern>}
- * is a shorthand for {@code <pattern>:0} to disable a debug facility for a pattern.
- * <p>
- * The resulting log level of a scope is determined by the <em>last</em> matching term. If no term
- * matches, the log level is 0 (disabled). A filter with no terms matches every scope with a log
- * level of {@link DebugContext#BASIC_LEVEL}.
- *
- * <h2>Examples of filters</h2>
- *
- * <ul>
- * <li>(empty string)<br>
- * Matches any scope with log level {@link DebugContext#BASIC_LEVEL}.
- *
- * <li>{@code :1}<br>
- * Matches any scope with log level 1.
- *
- * <li>{@code *}<br>
- * Matches any scope with log level {@link DebugContext#BASIC_LEVEL}.
- *
- * <li>{@code CodeGen,CodeInstall}<br>
- * Matches scopes containing "CodeGen" or "CodeInstall", both with log level
- * {@link DebugContext#BASIC_LEVEL}.
- *
- * <li>{@code CodeGen:2,CodeInstall:1}<br>
- * Matches scopes containing "CodeGen" with log level 2, or "CodeInstall" with log level 1.
- *
- * <li>{@code :1,Dead:2}<br>
- * Matches scopes containing "Dead" with log level 2, and all other scopes with log level 1.
- *
- * <li>{@code :1,Dead:0}<br>
- * Matches all scopes with log level 1, except those containing "Dead".
- *
- * <li>{@code Code*}<br>
- * Matches scopes starting with "Code" with log level {@link DebugContext#BASIC_LEVEL}.
- *
- * <li>{@code Code,~Dead}<br>
- * Matches scopes containing "Code" but not "Dead", with log level {@link DebugContext#BASIC_LEVEL}.
- * </ul>
+ * The syntax for a filter is explained <a href="file:doc-files/DumpHelp.txt">here</a>.
  */
 final class DebugFilter {
 
@@ -148,13 +111,16 @@
         if (terms == null) {
             return DebugContext.BASIC_LEVEL;
         } else {
-            int level = 0;
+            int defaultLevel = 0;
+            int level = -1;
             for (Term t : terms) {
-                if (t.matches(input)) {
+                if (t.isMatchAny()) {
+                    defaultLevel = t.level;
+                } else if (t.matches(input)) {
                     level = t.level;
                 }
             }
-            return level;
+            return level == -1 ? defaultLevel : level;
         }
     }
 
@@ -176,7 +142,7 @@
 
         Term(String filter, int level) {
             this.level = level;
-            if (filter.isEmpty()) {
+            if (filter.isEmpty() || filter.equals("*")) {
                 this.pattern = null;
             } else if (filter.contains("*") || filter.contains("?")) {
                 this.pattern = Pattern.compile(MethodFilter.createGlobString(filter));
@@ -192,6 +158,10 @@
             return pattern == null || pattern.matcher(input).matches();
         }
 
+        public boolean isMatchAny() {
+            return pattern == null;
+        }
+
         @Override
         public String toString() {
             return (pattern == null ? ".*" : pattern.toString()) + ":" + level;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/DebugOptions.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/DebugOptions.java	Thu Aug 24 01:13:04 2017 +0000
@@ -64,24 +64,28 @@
                    "An empty value enables all memory usage trackers unconditionally.", type = OptionType.Debug)
     public static final OptionKey<String> MemUseTrackers = new OptionKey<>(null);
 
-    @Option(help = "Pattern for scope(s) in which counting is enabled (see DebugFilter and Debug.counter). " +
+    @Option(help = "Pattern for specifying scopes in which counters are enabled. " +
+                   "See the Dump option for the pattern syntax. " +
                    "An empty value enables all counters unconditionally.", type = OptionType.Debug)
     public static final OptionKey<String> Count = new OptionKey<>(null);
-    @Option(help = "Pattern for scope(s) in which memory use tracking is enabled (see DebugFilter and Debug.counter). " +
+    @Option(help = "Pattern for specifying scopes in which memory use tracking is enabled. " +
+                   "See the Dump option for the pattern syntax. " +
                    "An empty value enables all memory use trackers unconditionally.", type = OptionType.Debug)
     public static final OptionKey<String> TrackMemUse = new OptionKey<>(null);
-    @Option(help = "Pattern for scope(s) in which timing is enabled (see DebugFilter and Debug.timer). " +
+    @Option(help = "Pattern for specifying scopes in which timing is enabled. " +
+                   "See the Dump option for the pattern syntax. " +
                    "An empty value enables all timers unconditionally.", type = OptionType.Debug)
     public static final OptionKey<String> Time = new OptionKey<>(null);
 
-    @Option(help = "Pattern for scope(s) in which verification is enabled (see DebugFilter and Debug.verify).", type = OptionType.Debug)
+    @Option(help = "Pattern for specifying scopes in which logging is enabled. " +
+                   "See the Dump option for the pattern syntax.", type = OptionType.Debug)
     public static final OptionKey<String> Verify = new OptionKey<>(null);
-    @Option(help = "Pattern for scope(s) in which dumping is enabled (see DebugFilter and Debug.dump)", type = OptionType.Debug)
+    @Option(help = "file:doc-files/DumpHelp.txt", type = OptionType.Debug)
     public static final OptionKey<String> Dump = new OptionKey<>(null);
-    @Option(help = "Pattern for scope(s) in which logging is enabled (see DebugFilter and Debug.log)", type = OptionType.Debug)
+    @Option(help = "Pattern for specifying scopes in which logging is enabled. " +
+                   "See the Dump option for the pattern syntax.", type = OptionType.Debug)
     public static final OptionKey<String> Log = new OptionKey<>(null);
-
-    @Option(help = "Pattern for filtering debug scope output based on method context (see MethodFilter)", type = OptionType.Debug)
+    @Option(help = "file:doc-files/MethodFilterHelp.txt")
     public static final OptionKey<String> MethodFilter = new OptionKey<>(null);
     @Option(help = "Only check MethodFilter against the root method in the context if true, otherwise check all methods", type = OptionType.Debug)
     public static final OptionKey<Boolean> MethodFilterRootOnly = new OptionKey<>(false);
@@ -89,13 +93,11 @@
                    "The argument is substring matched against the simple name of the phase class", type = OptionType.Debug)
     public static final OptionKey<String> DumpOnPhaseChange = new OptionKey<>(null);
 
-    @Option(help = "Listst the console at VM shutdown the metric names available to the Timers, Counters and MemUseTrackers option. " +
+    @Option(help = "Lists on the console at VM shutdown the metric names available to the Timers, Counters and MemUseTrackers options. " +
                    "Note that this only lists the metrics that were initialized during the VM execution and so " +
                    "will not include metrics for compiler code that is not executed.", type = OptionType.Debug)
     public static final OptionKey<Boolean> ListMetrics = new OptionKey<>(false);
-    @Option(help = "File to which metrics are dumped per compilation. A CSV format is used if the file ends with .csv " +
-                    "otherwise a more human readable format is used. The fields in the CSV format are: " +
-                    "compilable, compilable_identity, compilation_nr, compilation_id, metric_name, metric_value", type = OptionType.Debug)
+    @Option(help = "file:doc-files/MetricsFileHelp.txt", type = OptionType.Debug)
      public static final OptionKey<String> MetricsFile = new OptionKey<>(null);
     @Option(help = "File to which aggregated metrics are dumped at shutdown. A CSV format is used if the file ends with .csv " +
                     "otherwise a more human readable format is used. If not specified, metrics are dumped to the console.", type = OptionType.Debug)
@@ -149,7 +151,7 @@
     @Option(help = "Enable dumping canonical text from for graphs.", type = OptionType.Debug)
     public static final OptionKey<Boolean> PrintCanonicalGraphStrings = new OptionKey<>(false);
     @Option(help = "Choose format used when dumping canonical text for graphs: " +
-            "0 gives a scheduled graph (better for spotting changes involving the schedule)" +
+            "0 gives a scheduled graph (better for spotting changes involving the schedule) " +
             "while 1 gives a CFG containing expressions rooted at fixed nodes (better for spotting small structure differences)", type = OptionType.Debug)
     public static final OptionKey<Integer> PrintCanonicalGraphStringFlavor = new OptionKey<>(0);
     @Option(help = "Exclude virtual nodes when dumping canonical text for graphs.", type = OptionType.Debug)
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/MethodFilter.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/MethodFilter.java	Thu Aug 24 01:13:04 2017 +0000
@@ -31,66 +31,7 @@
 
 /**
  * This class implements a method filter that can filter based on class name, method name and
- * parameters. The syntax for the source pattern that is passed to the constructor is as follows:
- *
- * <pre>
- * SourcePatterns = SourcePattern ["," SourcePatterns] .
- * SourcePattern = [ Class "." ] method [ "(" [ Parameter { ";" Parameter } ] ")" ] .
- * Parameter = Class | "int" | "long" | "float" | "double" | "short" | "char" | "boolean" .
- * Class = { package "." } class .
- * </pre>
- *
- *
- * Glob pattern matching (*, ?) is allowed in all parts of the source pattern. Examples for valid
- * filters are:
- *
- * <ul>
- * <li>
- *
- * <pre>
- * visit(Argument;BlockScope)
- * </pre>
- *
- * Matches all methods named "visit", with the first parameter of type "Argument", and the second
- * parameter of type "BlockScope". The packages of the parameter types are irrelevant.</li>
- * <li>
- *
- * <pre>
- * arraycopy(Object;;;;)
- * </pre>
- *
- * Matches all methods named "arraycopy", with the first parameter of type "Object", and four more
- * parameters of any type. The packages of the parameter types are irrelevant.</li>
- * <li>
- *
- * <pre>
- * org.graalvm.compiler.core.graph.PostOrderNodeIterator.*
- * </pre>
- *
- * Matches all methods in the class "org.graalvm.compiler.core.graph.PostOrderNodeIterator".</li>
- * <li>
- *
- * <pre>
- * *
- * </pre>
- *
- * Matches all methods in all classes</li>
- * <li>
- *
- * <pre>
- * org.graalvm.compiler.core.graph.*.visit
- * </pre>
- *
- * Matches all methods named "visit" in classes in the package "org.graalvm.compiler.core.graph".
- * <li>
- *
- * <pre>
- * arraycopy,toString
- * </pre>
- *
- * Matches all methods named "arraycopy" or "toString", meaning that ',' acts as an <i>or</i>
- * operator.</li>
- * </ul>
+ * parameters. The syntax for a filter is explained <a href="MethodFilterHelp.txt">here</a>.
  */
 public class MethodFilter {
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/doc-files/DumpHelp.txt	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,61 @@
+Filter pattern for specifying scopes in which dumping is enabled.
+
+A filter is a list of comma-separated terms of the form:
+ 
+  <pattern>[:<level>]
+ 
+If <pattern> contains a "*" or "?" character, it is interpreted as a glob pattern.
+Otherwise, it is interpreted as a substring. If <pattern> is empty, it
+matches every scope. If :<level> is omitted, it defaults to 1. The term
+~<pattern> is a shorthand for <pattern>:0 to disable a debug facility for a pattern.
+
+The default log level is 0 (disabled). Terms with an empty pattern set
+the default log level to the specified value. The last
+matching term with a non-empty pattern selects the level specified. If
+no term matches, the log level is the default level. A filter with no
+terms matches every scope with a log level of 1.
+
+Examples of debug filters:
+--------- 
+  (empty string)
+
+  Matches any scope with level 1.
+--------- 
+  :1
+
+  Matches any scope with level 1.
+--------- 
+  *
+
+  Matches any scope with level 1.
+--------- 
+  CodeGen,CodeInstall
+
+  Matches scopes containing "CodeGen" or "CodeInstall", both with level 1.
+--------- 
+  CodeGen:2,CodeInstall:1
+
+  Matches scopes containing "CodeGen" with level 2, or "CodeInstall" with level 1.
+---------
+  Outer:2,Inner:0}
+
+  Matches scopes containing "Outer" with log level 2, or "Inner" with log level 0. If the scope
+  name contains both patterns then the log level will be 0. This is useful for silencing subscopes.
+---------
+  :1,Dead:2
+
+  Matches scopes containing "Dead" with level 2, and all other scopes with level 1.
+--------- 
+  Dead:0,:1
+
+  Matches all scopes with level 1, except those containing "Dead".   Note that the location of
+  the :1 doesn't matter since it's specifying the default log level so it's the same as
+  specifying :1,Dead:0.
+--------- 
+  Code*
+
+  Matches scopes starting with "Code" with level 1.
+--------- 
+  Code,~Dead
+
+  Matches scopes containing "Code" but not "Dead", with level 1.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/doc-files/MethodFilterHelp.txt	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,40 @@
+Pattern for filtering debug scope output based on method context.
+The syntax for a pattern is:
+
+  SourcePatterns = SourcePattern ["," SourcePatterns] .
+  SourcePattern = [ Class "." ] method [ "(" [ Parameter { ";" Parameter } ] ")" ] .
+  Parameter = Class | "int" | "long" | "float" | "double" | "short" | "char" | "boolean" .
+  Class = { package "." } class .
+ 
+Glob pattern matching (*, ?) is allowed in all parts of the source pattern.
+
+Examples of method filters:
+--------- 
+  visit(Argument;BlockScope)  
+  
+  Matches all methods named "visit", with the first parameter of
+  type "Argument", and the second parameter of type "BlockScope".
+  The packages of the parameter types are irrelevant.
+---------
+  arraycopy(Object;;;;)
+ 
+  Matches all methods named "arraycopy", with the first parameter
+  of type "Object", and four more parameters of any type. The
+  packages of the parameter types are irrelevant.
+---------    
+  org.graalvm.compiler.core.graph.PostOrderNodeIterator.*
+ 
+  Matches all methods in the class "org.graalvm.compiler.core.graph.PostOrderNodeIterator".
+---------    
+  *
+ 
+  Matches all methods in all classes
+---------
+  org.graalvm.compiler.core.graph.*.visit
+ 
+  Matches all methods named "visit" in classes in the package
+  "org.graalvm.compiler.core.graph".
+---------
+  arraycopy,toString
+ 
+  Matches all methods named "arraycopy" or "toString", meaning that ',' acts as an or operator.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.debug/src/org/graalvm/compiler/debug/doc-files/MetricsFileHelp.txt	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,11 @@
+File to which metrics are dumped per compilation.
+A CSV format is used if the file ends with .csv otherwise a more
+human readable format is used. The fields in the CSV format are:
+           compilable - method being compiled
+  compilable_identity - identity hash code of compilable
+       compilation_nr - where this compilation lies in the ordered
+                        sequence of all compilations identified by
+                        compilable_identity
+       compilation_id - runtime issued identifier for the compilation
+          metric_name - name of metric
+         metric_value - value of metric
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.graph/src/org/graalvm/compiler/graph/NodeClass.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.graph/src/org/graalvm/compiler/graph/NodeClass.java	Thu Aug 24 01:13:04 2017 +0000
@@ -931,7 +931,7 @@
     }
 
     /**
-     * @returns true if the node has no inputs and no successors
+     * @return true if the node has no inputs and no successors
      */
     public boolean isLeafNode() {
         return isLeafNode;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CompilationWrapperTest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/CompilationWrapperTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -40,20 +40,12 @@
 import org.graalvm.compiler.test.SubprocessUtil;
 import org.graalvm.compiler.test.SubprocessUtil.Subprocess;
 import org.junit.Assert;
-import org.junit.Assume;
 import org.junit.Test;
 
 /**
  * Tests support for dumping graphs and other info useful for debugging a compiler crash.
  */
 public class CompilationWrapperTest extends GraalCompilerTest {
-    public CompilationWrapperTest() {
-        try {
-            Class.forName("java.lang.management.ManagementFactory");
-        } catch (ClassNotFoundException ex) {
-            Assume.assumeNoException("skip this test if there is no java.management JDK9 module around", ex);
-        }
-    }
 
     /**
      * Tests compilation requested by the VM.
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/ObjectCloneTest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test/ObjectCloneTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -25,6 +25,7 @@
 import java.util.ArrayList;
 
 import org.graalvm.compiler.core.test.GraalCompilerTest;
+import org.graalvm.compiler.nodes.graphbuilderconf.GraphBuilderConfiguration;
 import org.junit.Test;
 
 /**
@@ -84,4 +85,20 @@
         }
         test("cloneList", list);
     }
+
+    @Override
+    protected GraphBuilderConfiguration editGraphBuilderConfiguration(GraphBuilderConfiguration conf) {
+        return super.editGraphBuilderConfiguration(conf.withNodeSourcePosition(true));
+    }
+
+    static final int[] ARRAY = new int[]{1, 2, 4, 3};
+
+    public static int[] cloneConstantArray() {
+        return ARRAY.clone();
+    }
+
+    @Test
+    public void testCloneConstantArray() {
+        test("cloneConstantArray");
+    }
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/CompilerConfigurationFactory.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/CompilerConfigurationFactory.java	Thu Aug 24 01:13:04 2017 +0000
@@ -157,8 +157,8 @@
     /**
      * Selects and instantiates a {@link CompilerConfigurationFactory}. The selection algorithm is
      * as follows: if {@code name} is non-null, then select the factory with the same name else if
-     * {@link Options#CompilerConfiguration}{@code .getValue()} is non-null then select the factory
-     * whose name matches the value else select the factory with the highest
+     * {@code Options.CompilerConfiguration.getValue()} is non-null then select the factory whose
+     * name matches the value else select the factory with the highest
      * {@link #autoSelectionPriority} value.
      *
      * @param name the name of the compiler configuration to select (optional)
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/JVMCIVersionCheck.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/JVMCIVersionCheck.java	Thu Aug 24 01:13:04 2017 +0000
@@ -23,7 +23,6 @@
 package org.graalvm.compiler.hotspot;
 
 import java.util.Formatter;
-import java.util.Objects;
 
 /**
  * Mechanism for checking that the current Java runtime environment supports the minimum JVMCI API
@@ -40,10 +39,6 @@
     private static final int JVMCI8_MIN_MAJOR_VERSION = 0;
     private static final int JVMCI8_MIN_MINOR_VERSION = 29;
 
-    // MAX_VALUE indicates that no current EA version is compatible with Graal.
-    // Note: Keep README.md in sync with the EA version support checked here.
-    private static final int JVMCI9_MIN_EA_BUILD = 176;
-
     private static void failVersionCheck(boolean exit, String reason, Object... args) {
         Formatter errorMessage = new Formatter().format(reason, args);
         String javaHome = System.getProperty("java.home");
@@ -55,7 +50,7 @@
         if (System.getProperty("java.specification.version").compareTo("1.9") < 0) {
             errorMessage.format("Download the latest JVMCI JDK 8 from http://www.oracle.com/technetwork/oracle-labs/program-languages/downloads/index.html");
         } else {
-            errorMessage.format("Download the latest JDK 9 EA from https://jdk9.java.net/download/");
+            errorMessage.format("Download the latest JDK 9 build from https://jdk9.java.net/download/");
         }
         String value = System.getenv("JVMCI_VERSION_CHECK");
         if ("warn".equals(value)) {
@@ -119,34 +114,11 @@
                 // Allow local builds
                 return;
             }
-            // http://openjdk.java.net/jeps/223
-            if (vmVersion.startsWith("9+")) {
-                int start = "9+".length();
-                int end = start;
-                end = start;
-                while (end < vmVersion.length() && Character.isDigit(vmVersion.charAt(end))) {
-                    end++;
-                }
-                int build;
-                try {
-                    build = Integer.parseInt(vmVersion.substring(start, end));
-                } catch (NumberFormatException e) {
-                    failVersionCheck(exitOnFailure, "The VM does not support the minimum JVMCI API version required by Graal.%n" +
-                                    "Cannot read JDK9 EA build number from java.vm.version property: %s.%n", vmVersion);
-                    return;
-                }
-                if (build >= JVMCI9_MIN_EA_BUILD) {
-                    return;
-                }
-                if (Objects.equals(JVMCI9_MIN_EA_BUILD, Integer.MAX_VALUE)) {
-                    failVersionCheck(exitOnFailure, "This version of Graal is not compatible with any JDK 9 Early Access build.%n");
-                } else {
-                    failVersionCheck(exitOnFailure, "The VM is an insufficiently recent EA JDK9 build for Graal: %d < %d.%n", build, JVMCI9_MIN_EA_BUILD);
-                }
+            if (vmVersion.startsWith("9-ea")) {
+                failVersionCheck(exitOnFailure, "This version of Graal is not compatible with JDK 9 Early Access builds.%n");
                 return;
             } else {
-                // Graal will be compatible with all JDK versions as of 9 GA
-                // until a JVMCI API change is made in a 9u or later release.
+                // Graal is compatible with all JDK versions as of 9 GA.
             }
         }
     }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/debug/BenchmarkCounters.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/debug/BenchmarkCounters.java	Thu Aug 24 01:13:04 2017 +0000
@@ -41,7 +41,6 @@
 import org.graalvm.compiler.debug.GraalError;
 import org.graalvm.compiler.debug.TTY;
 import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
-import org.graalvm.compiler.hotspot.replacements.HotspotSnippetsOptions;
 import org.graalvm.compiler.nodes.debug.DynamicCounterNode;
 import org.graalvm.compiler.options.Option;
 import org.graalvm.compiler.options.OptionKey;
@@ -69,20 +68,8 @@
  * Counters will be displayed as a rate (per second) if their group name starts with "~", otherwise
  * they will be displayed as a total number.
  *
- * <h1>Example</h1> In order to create statistics about allocations within the DaCapo pmd benchmark
- * the following steps are necessary:
- * <ul>
- * <li>Set {@code -XX:JVMCICounterSize=value}. The actual required value depends on the granularity
- * of the profiling, 10000 should be enough for most cases.</li>
- * <li>Also: {@code -XX:+/-JVMCICountersExcludeCompiler} specifies whether the numbers generated by
- * compiler threads should be excluded (default: true).</li>
- * <li>Start the DaCapo pmd benchmark with
- * {@code "-Dgraal.BenchmarkDynamicCounters=err, starting ====, PASSED in "} and
- * {@code -Dgraal.ProfileAllocations=true}.</li>
- * <li>The numbers will only include allocation from compiled code!</li>
- * <li>The counters can be further configured by modifying the
- * {@link HotspotSnippetsOptions#ProfileAllocationsContext} flag..</li>
- * </ul>
+ * See <a href="BenchmarkDynamicCountersHelp.txt">here</a> for a detailed example of how to use
+ * benchmark counters.
  */
 public class BenchmarkCounters {
 
@@ -94,11 +81,7 @@
         @Option(help = "Turn on the benchmark counters, and displays the results every n milliseconds", type = OptionType.Debug)
         public static final OptionKey<Integer> TimedDynamicCounters = new OptionKey<>(-1);
 
-        @Option(help = "Turn on the benchmark counters, and listen for specific patterns on System.out/System.err:%n" +
-                       "Format: (err|out),start pattern,end pattern (~ matches multiple digits)%n" +
-                       "Examples:%n" +
-                       "  dacapo = 'err, starting =====, PASSED in'%n" +
-                       "  specjvm2008 = 'out,Iteration ~ (~s) begins:,Iteration ~ (~s) ends:'", type = OptionType.Debug)
+        @Option(help = "file:doc-files/BenchmarkDynamicCountersHelp.txt", type = OptionType.Debug)
         public static final OptionKey<String> BenchmarkDynamicCounters = new OptionKey<>(null);
         @Option(help = "Use grouping separators for number printing", type = OptionType.Debug)
         public static final OptionKey<Boolean> DynamicCountersPrintGroupSeparator = new OptionKey<>(true);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/debug/doc-files/BenchmarkDynamicCountersHelp.txt	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,24 @@
+Turn on the benchmark counters, and listen for specific patterns on System.out/System.err.
+The format of this option is:
+
+  (err|out),start pattern,end pattern
+  
+You can use "~" to match 1 or more digits.
+Examples:
+
+  err, starting =====, PASSED in
+  out,Iteration ~ (~s) begins:,Iteration ~ (~s) ends:
+  
+The first pattern matches DaCapo output and the second matches SPECjvm2008 output.
+
+As a more detailed example, here are the options to use for getting statistics
+about allocations within the DaCapo pmd benchmark:
+
+  -XX:JVMCICounterSize=<value> -XX:-JVMCICountersExcludeCompiler \
+  -Dgraal.BenchmarkDynamicCounters="err, starting ====, PASSED in " \
+  -Dgraal.ProfileAllocations=true
+  
+The JVMCICounterSize value depends on the granularity of the profiling -
+10000 should be sufficient. Omit JVMCICountersExcludeCompiler to exclude
+counting allocations on the compiler threads.
+The counters can be further configured by the ProfileAllocationsContext option.
\ No newline at end of file
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotSuitesProvider.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/meta/HotSpotSuitesProvider.java	Thu Aug 24 01:13:04 2017 +0000
@@ -25,6 +25,7 @@
 import static org.graalvm.compiler.core.common.GraalOptions.GeneratePIC;
 import static org.graalvm.compiler.core.common.GraalOptions.ImmutableCode;
 import static org.graalvm.compiler.core.common.GraalOptions.VerifyPhases;
+import static org.graalvm.compiler.core.phases.HighTier.Options.Inline;
 
 import java.util.ListIterator;
 
@@ -98,10 +99,12 @@
                 midTierLowering.add(new ReplaceConstantNodesPhase());
 
                 // Replace inlining policy
-                ListIterator<BasePhase<? super HighTierContext>> iter = ret.getHighTier().findPhase(InliningPhase.class);
-                InliningPhase inlining = (InliningPhase) iter.previous();
-                CanonicalizerPhase canonicalizer = inlining.getCanonicalizer();
-                iter.set(new InliningPhase(new AOTInliningPolicy(null), canonicalizer));
+                if (Inline.getValue(options)) {
+                    ListIterator<BasePhase<? super HighTierContext>> iter = ret.getHighTier().findPhase(InliningPhase.class);
+                    InliningPhase inlining = (InliningPhase) iter.previous();
+                    CanonicalizerPhase canonicalizer = inlining.getCanonicalizer();
+                    iter.set(new InliningPhase(new AOTInliningPolicy(null), canonicalizer));
+                }
             }
         }
 
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/HotspotSnippetsOptions.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/HotspotSnippetsOptions.java	Thu Aug 24 01:13:04 2017 +0000
@@ -51,7 +51,7 @@
     @Option(help = "Enable profiling of allocation sites.", type = OptionType.Debug)
     public static final OptionKey<Boolean> ProfileAllocations = new OptionKey<>(false);
 
-    @Option(help = "Control the naming of the counters when using ProfileAllocations.", type = OptionType.Debug)
+    @Option(help = "file:doc-files/ProfileAllocationsContextHelp.txt", type = OptionType.Debug)
     public static final EnumOptionKey<ProfileContext> ProfileAllocationsContext = new EnumOptionKey<>(ProfileContext.AllocatingMethod);
 
     @Option(help = "Enable profiling of monitor operations.", type = OptionType.Debug)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot/src/org/graalvm/compiler/hotspot/replacements/doc-files/ProfileAllocationsContextHelp.txt	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,8 @@
+Control the naming and granularity of the counters when using ProfileAllocations.
+The accepted values are:
+        AllocatingMethod - a counter per method
+         InstanceOrArray - one counter for all instance allocations and
+                           one counter for all array allocations 
+           AllocatedType - one counter per allocated type
+  AllocatedTypesInMethod - one counter per allocated type, per method
+ 
\ No newline at end of file
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.java/src/org/graalvm/compiler/java/ComputeLoopFrequenciesClosure.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.java/src/org/graalvm/compiler/java/ComputeLoopFrequenciesClosure.java	Thu Aug 24 01:13:04 2017 +0000
@@ -36,6 +36,8 @@
 import org.graalvm.compiler.phases.graph.ReentrantNodeIterator;
 import org.graalvm.util.EconomicMap;
 
+import static org.graalvm.compiler.nodes.cfg.ControlFlowGraph.multiplyProbabilities;
+
 public final class ComputeLoopFrequenciesClosure extends ReentrantNodeIterator.NodeIteratorClosure<Double> {
 
     private static final ComputeLoopFrequenciesClosure INSTANCE = new ComputeLoopFrequenciesClosure();
@@ -75,32 +77,18 @@
         for (double d : exitStates.getValues()) {
             exitProbability += d;
         }
-        exitProbability = Math.min(1D, exitProbability);
-        if (exitProbability < ControlFlowGraph.MIN_PROBABILITY) {
-            exitProbability = ControlFlowGraph.MIN_PROBABILITY;
-        }
-        assert exitProbability <= 1D && exitProbability >= 0D;
-        double loopFrequency = 1D / exitProbability;
+        exitProbability = Math.min(1.0, exitProbability);
+        exitProbability = Math.max(ControlFlowGraph.MIN_PROBABILITY, exitProbability);
+        double loopFrequency = 1.0 / exitProbability;
         loop.setLoopFrequency(loopFrequency);
 
         double adjustmentFactor = initialState * loopFrequency;
-        exitStates.replaceAll((exitNode, probability) -> multiplySaturate(probability, adjustmentFactor));
+        exitStates.replaceAll((exitNode, probability) -> multiplyProbabilities(probability, adjustmentFactor));
 
         return exitStates;
     }
 
     /**
-     * Multiplies a and b and saturates the result to {@link ControlFlowGraph#MAX_PROBABILITY}.
-     */
-    public static double multiplySaturate(double a, double b) {
-        double r = a * b;
-        if (r > ControlFlowGraph.MAX_PROBABILITY) {
-            return ControlFlowGraph.MAX_PROBABILITY;
-        }
-        return r;
-    }
-
-    /**
      * Computes the frequencies of all loops in the given graph. This is done by performing a
      * reverse postorder iteration and computing the probability of all fixed nodes. The combined
      * probability of all exits of a loop can be used to compute the loop's expected frequency.
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64ArrayEqualsOp.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.aarch64/src/org/graalvm/compiler/lir/aarch64/AArch64ArrayEqualsOp.java	Thu Aug 24 01:13:04 2017 +0000
@@ -68,6 +68,8 @@
 
     public AArch64ArrayEqualsOp(LIRGeneratorTool tool, JavaKind kind, Value result, Value array1, Value array2, Value length) {
         super(TYPE);
+
+        assert !kind.isNumericFloat() : "Float arrays comparison (bitwise_equal || both_NaN) isn't supported";
         this.kind = kind;
 
         Class<?> arrayClass = Array.newInstance(kind.toJavaClass(), 0).getClass();
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayEqualsOp.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.amd64/src/org/graalvm/compiler/lir/amd64/AMD64ArrayEqualsOp.java	Thu Aug 24 01:13:04 2017 +0000
@@ -33,6 +33,8 @@
 import org.graalvm.compiler.asm.amd64.AMD64Address;
 import org.graalvm.compiler.asm.amd64.AMD64Address.Scale;
 import org.graalvm.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
+import org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize;
+import org.graalvm.compiler.asm.amd64.AMD64Assembler.SSEOp;
 import org.graalvm.compiler.asm.amd64.AMD64MacroAssembler;
 import org.graalvm.compiler.core.common.LIRKind;
 import org.graalvm.compiler.lir.LIRInstructionClass;
@@ -69,6 +71,10 @@
     @Temp({REG}) protected Value temp2;
     @Temp({REG}) protected Value temp3;
     @Temp({REG}) protected Value temp4;
+
+    @Temp({REG, ILLEGAL}) protected Value temp5;
+    @Temp({REG, ILLEGAL}) protected Value tempXMM;
+
     @Temp({REG, ILLEGAL}) protected Value vectorTemp1;
     @Temp({REG, ILLEGAL}) protected Value vectorTemp2;
 
@@ -91,6 +97,15 @@
         this.temp3 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
         this.temp4 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
 
+        this.temp5 = kind.isNumericFloat() ? tool.newVariable(LIRKind.value(tool.target().arch.getWordKind())) : Value.ILLEGAL;
+        if (kind == JavaKind.Float) {
+            this.tempXMM = tool.newVariable(LIRKind.value(AMD64Kind.SINGLE));
+        } else if (kind == JavaKind.Double) {
+            this.tempXMM = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
+        } else {
+            this.tempXMM = Value.ILLEGAL;
+        }
+
         // We only need the vector temporaries if we generate SSE code.
         if (supportsSSE41(tool.target())) {
             this.vectorTemp1 = tool.newVariable(LIRKind.value(AMD64Kind.DOUBLE));
@@ -170,10 +185,14 @@
         Label loop = new Label();
         Label compareTail = new Label();
 
+        boolean requiresNaNCheck = kind.isNumericFloat();
+        Label loopCheck = new Label();
+        Label nanCheck = new Label();
+
         // Compare 16-byte vectors
         masm.andl(result, SSE4_1_VECTOR_SIZE - 1); // tail count (in bytes)
         masm.andl(length, ~(SSE4_1_VECTOR_SIZE - 1)); // vector count (in bytes)
-        masm.jccb(ConditionFlag.Zero, compareTail);
+        masm.jcc(ConditionFlag.Zero, compareTail);
 
         masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
         masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
@@ -186,13 +205,24 @@
         masm.movdqu(vector2, new AMD64Address(array2, length, Scale.Times1, 0));
         masm.pxor(vector1, vector2);
         masm.ptest(vector1, vector1);
-        masm.jcc(ConditionFlag.NotZero, falseLabel);
+        masm.jcc(ConditionFlag.NotZero, requiresNaNCheck ? nanCheck : falseLabel);
+
+        masm.bind(loopCheck);
         masm.addq(length, SSE4_1_VECTOR_SIZE);
         masm.jcc(ConditionFlag.NotZero, loop);
 
         masm.testl(result, result);
         masm.jcc(ConditionFlag.Zero, trueLabel);
 
+        if (requiresNaNCheck) {
+            Label unalignedCheck = new Label();
+            masm.jmpb(unalignedCheck);
+            masm.bind(nanCheck);
+            emitFloatCompareWithinRange(crb, masm, array1, array2, length, 0, falseLabel, SSE4_1_VECTOR_SIZE);
+            masm.jmpb(loopCheck);
+            masm.bind(unalignedCheck);
+        }
+
         /*
          * Compare the remaining bytes with an unaligned memory load aligned to the end of the
          * array.
@@ -201,7 +231,12 @@
         masm.movdqu(vector2, new AMD64Address(array2, result, Scale.Times1, -SSE4_1_VECTOR_SIZE));
         masm.pxor(vector1, vector2);
         masm.ptest(vector1, vector1);
-        masm.jcc(ConditionFlag.NotZero, falseLabel);
+        if (requiresNaNCheck) {
+            masm.jcc(ConditionFlag.Zero, trueLabel);
+            emitFloatCompareWithinRange(crb, masm, array1, array2, result, -SSE4_1_VECTOR_SIZE, falseLabel, SSE4_1_VECTOR_SIZE);
+        } else {
+            masm.jcc(ConditionFlag.NotZero, falseLabel);
+        }
         masm.jmp(trueLabel);
 
         masm.bind(compareTail);
@@ -233,10 +268,14 @@
         Label loop = new Label();
         Label compareTail = new Label();
 
+        boolean requiresNaNCheck = kind.isNumericFloat();
+        Label loopCheck = new Label();
+        Label nanCheck = new Label();
+
         // Compare 16-byte vectors
         masm.andl(result, AVX_VECTOR_SIZE - 1); // tail count (in bytes)
         masm.andl(length, ~(AVX_VECTOR_SIZE - 1)); // vector count (in bytes)
-        masm.jccb(ConditionFlag.Zero, compareTail);
+        masm.jcc(ConditionFlag.Zero, compareTail);
 
         masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
         masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
@@ -249,13 +288,24 @@
         masm.vmovdqu(vector2, new AMD64Address(array2, length, Scale.Times1, 0));
         masm.vpxor(vector1, vector1, vector2);
         masm.vptest(vector1, vector1);
-        masm.jcc(ConditionFlag.NotZero, falseLabel);
+        masm.jcc(ConditionFlag.NotZero, requiresNaNCheck ? nanCheck : falseLabel);
+
+        masm.bind(loopCheck);
         masm.addq(length, AVX_VECTOR_SIZE);
         masm.jcc(ConditionFlag.NotZero, loop);
 
         masm.testl(result, result);
         masm.jcc(ConditionFlag.Zero, trueLabel);
 
+        if (requiresNaNCheck) {
+            Label unalignedCheck = new Label();
+            masm.jmpb(unalignedCheck);
+            masm.bind(nanCheck);
+            emitFloatCompareWithinRange(crb, masm, array1, array2, length, 0, falseLabel, AVX_VECTOR_SIZE);
+            masm.jmpb(loopCheck);
+            masm.bind(unalignedCheck);
+        }
+
         /*
          * Compare the remaining bytes with an unaligned memory load aligned to the end of the
          * array.
@@ -264,7 +314,12 @@
         masm.vmovdqu(vector2, new AMD64Address(array2, result, Scale.Times1, -AVX_VECTOR_SIZE));
         masm.vpxor(vector1, vector1, vector2);
         masm.vptest(vector1, vector1);
-        masm.jcc(ConditionFlag.NotZero, falseLabel);
+        if (requiresNaNCheck) {
+            masm.jcc(ConditionFlag.Zero, trueLabel);
+            emitFloatCompareWithinRange(crb, masm, array1, array2, result, -AVX_VECTOR_SIZE, falseLabel, AVX_VECTOR_SIZE);
+        } else {
+            masm.jcc(ConditionFlag.NotZero, falseLabel);
+        }
         masm.jmp(trueLabel);
 
         masm.bind(compareTail);
@@ -283,11 +338,15 @@
         Label loop = new Label();
         Label compareTail = new Label();
 
+        boolean requiresNaNCheck = kind.isNumericFloat();
+        Label loopCheck = new Label();
+        Label nanCheck = new Label();
+
         Register temp = asRegister(temp4);
 
         masm.andl(result, VECTOR_SIZE - 1); // tail count (in bytes)
         masm.andl(length, ~(VECTOR_SIZE - 1));  // vector count (in bytes)
-        masm.jccb(ConditionFlag.Zero, compareTail);
+        masm.jcc(ConditionFlag.Zero, compareTail);
 
         masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
         masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
@@ -298,12 +357,27 @@
         masm.bind(loop);
         masm.movq(temp, new AMD64Address(array1, length, Scale.Times1, 0));
         masm.cmpq(temp, new AMD64Address(array2, length, Scale.Times1, 0));
-        masm.jccb(ConditionFlag.NotEqual, falseLabel);
+        masm.jcc(ConditionFlag.NotEqual, requiresNaNCheck ? nanCheck : falseLabel);
+
+        masm.bind(loopCheck);
         masm.addq(length, VECTOR_SIZE);
         masm.jccb(ConditionFlag.NotZero, loop);
 
         masm.testl(result, result);
-        masm.jccb(ConditionFlag.Zero, trueLabel);
+        masm.jcc(ConditionFlag.Zero, trueLabel);
+
+        if (requiresNaNCheck) {
+            // NaN check is slow path and hence placed outside of the main loop.
+            Label unalignedCheck = new Label();
+            masm.jmpb(unalignedCheck);
+            masm.bind(nanCheck);
+            // At most two iterations, unroll in the emitted code.
+            for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
+                emitFloatCompare(masm, array1, array2, length, offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
+            }
+            masm.jmpb(loopCheck);
+            masm.bind(unalignedCheck);
+        }
 
         /*
          * Compare the remaining bytes with an unaligned memory load aligned to the end of the
@@ -311,7 +385,15 @@
          */
         masm.movq(temp, new AMD64Address(array1, result, Scale.Times1, -VECTOR_SIZE));
         masm.cmpq(temp, new AMD64Address(array2, result, Scale.Times1, -VECTOR_SIZE));
-        masm.jccb(ConditionFlag.NotEqual, falseLabel);
+        if (requiresNaNCheck) {
+            masm.jcc(ConditionFlag.Equal, trueLabel);
+            // At most two iterations, unroll in the emitted code.
+            for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
+                emitFloatCompare(masm, array1, array2, result, -VECTOR_SIZE + offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
+            }
+        } else {
+            masm.jccb(ConditionFlag.NotEqual, falseLabel);
+        }
         masm.jmpb(trueLabel);
 
         masm.bind(compareTail);
@@ -333,8 +415,13 @@
             masm.jccb(ConditionFlag.Zero, compare2Bytes);
             masm.movl(temp, new AMD64Address(array1, 0));
             masm.cmpl(temp, new AMD64Address(array2, 0));
-            masm.jccb(ConditionFlag.NotEqual, falseLabel);
-
+            if (kind == JavaKind.Float) {
+                masm.jccb(ConditionFlag.Equal, trueLabel);
+                emitFloatCompare(masm, array1, array2, Register.None, 0, falseLabel, true);
+                masm.jmpb(trueLabel);
+            } else {
+                masm.jccb(ConditionFlag.NotEqual, falseLabel);
+            }
             if (kind.getByteCount() <= 2) {
                 // Move array pointers forward.
                 masm.leaq(array1, new AMD64Address(array1, 4));
@@ -372,6 +459,71 @@
         }
     }
 
+    /**
+     * Emits code to fall through if {@code src} is NaN, otherwise jump to {@code branchOrdered}.
+     */
+    private void emitNaNCheck(AMD64MacroAssembler masm, AMD64Address src, Label branchIfNonNaN) {
+        assert kind.isNumericFloat();
+        Register tempXMMReg = asRegister(tempXMM);
+        if (kind == JavaKind.Float) {
+            masm.movflt(tempXMMReg, src);
+        } else {
+            masm.movdbl(tempXMMReg, src);
+        }
+        SSEOp.UCOMIS.emit(masm, kind == JavaKind.Float ? OperandSize.PS : OperandSize.PD, tempXMMReg, tempXMMReg);
+        masm.jcc(ConditionFlag.NoParity, branchIfNonNaN);
+    }
+
+    /**
+     * Emits code to compare if two floats are bitwise equal or both NaN.
+     */
+    private void emitFloatCompare(AMD64MacroAssembler masm, Register base1, Register base2, Register index, int offset, Label falseLabel, boolean skipBitwiseCompare) {
+        AMD64Address address1 = new AMD64Address(base1, index, Scale.Times1, offset);
+        AMD64Address address2 = new AMD64Address(base2, index, Scale.Times1, offset);
+
+        Label bitwiseEqual = new Label();
+
+        if (!skipBitwiseCompare) {
+            // Bitwise compare
+            Register temp = asRegister(temp4);
+
+            if (kind == JavaKind.Float) {
+                masm.movl(temp, address1);
+                masm.cmpl(temp, address2);
+            } else {
+                masm.movq(temp, address1);
+                masm.cmpq(temp, address2);
+            }
+            masm.jccb(ConditionFlag.Equal, bitwiseEqual);
+        }
+
+        emitNaNCheck(masm, address1, falseLabel);
+        emitNaNCheck(masm, address2, falseLabel);
+
+        masm.bind(bitwiseEqual);
+    }
+
+    /**
+     * Emits code to compare float equality within a range.
+     */
+    private void emitFloatCompareWithinRange(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register base1, Register base2, Register index, int offset, Label falseLabel, int range) {
+        assert kind.isNumericFloat();
+        Label loop = new Label();
+        Register i = asRegister(temp5);
+
+        masm.movq(i, range);
+        masm.negq(i);
+        // Align the main loop
+        masm.align(crb.target.wordSize * 2);
+        masm.bind(loop);
+        emitFloatCompare(masm, base1, base2, index, offset, falseLabel, kind.getByteCount() == range);
+        masm.addq(index, kind.getByteCount());
+        masm.addq(i, kind.getByteCount());
+        masm.jccb(ConditionFlag.NotZero, loop);
+        // Floats within the range are equal, revert change to the register index
+        masm.subq(index, range);
+    }
+
     private static final Unsafe UNSAFE = initUnsafe();
 
     private static Unsafe initUnsafe() {
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sparc/src/org/graalvm/compiler/lir/sparc/SPARCArrayEqualsOp.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sparc/src/org/graalvm/compiler/lir/sparc/SPARCArrayEqualsOp.java	Thu Aug 24 01:13:04 2017 +0000
@@ -78,6 +78,8 @@
 
     public SPARCArrayEqualsOp(LIRGeneratorTool tool, JavaKind kind, AllocatableValue result, AllocatableValue array1, AllocatableValue array2, AllocatableValue length) {
         super(TYPE, SIZE);
+
+        assert !kind.isNumericFloat() : "Float arrays comparison (bitwise_equal || both_NaN) isn't supported";
         this.kind = kind;
 
         Class<?> arrayClass = Array.newInstance(kind.toJavaClass(), 0).getClass();
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/ArithmeticLIRGenerator.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir/src/org/graalvm/compiler/lir/gen/ArithmeticLIRGenerator.java	Thu Aug 24 01:13:04 2017 +0000
@@ -59,7 +59,7 @@
         if (isNumericInteger(a.getPlatformKind())) {
             LIRKind aKind = a.getValueKind(LIRKind.class);
             LIRKind bKind = b.getValueKind(LIRKind.class);
-            assert a.getPlatformKind() == b.getPlatformKind();
+            assert a.getPlatformKind() == b.getPlatformKind() : a.getPlatformKind() + " vs. " + b.getPlatformKind();
 
             if (aKind.isUnknownReference()) {
                 resultKind = aKind;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop.phases/src/org/graalvm/compiler/loop/phases/LoopPartialUnrollPhase.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop.phases/src/org/graalvm/compiler/loop/phases/LoopPartialUnrollPhase.java	Thu Aug 24 01:13:04 2017 +0000
@@ -51,6 +51,8 @@
                 try (Graph.NodeEventScope nes = graph.trackNodeEvents(listener)) {
                     LoopsData dataCounted = new LoopsData(graph);
                     dataCounted.detectedCountedLoops();
+                    Graph.Mark mark = graph.getMark();
+                    boolean prePostInserted = false;
                     for (LoopEx loop : dataCounted.countedLoops()) {
                         if (!LoopTransformations.isUnrollableLoop(loop)) {
                             continue;
@@ -59,9 +61,10 @@
                             if (loop.loopBegin().isSimpleLoop()) {
                                 // First perform the pre/post transformation and do the partial
                                 // unroll when we come around again.
-                                LoopTransformations.insertPrePostLoops(loop, graph);
+                                LoopTransformations.insertPrePostLoops(loop);
+                                prePostInserted = true;
                             } else {
-                                LoopTransformations.partialUnroll(loop, graph);
+                                LoopTransformations.partialUnroll(loop);
                             }
                             changed = true;
                         }
@@ -72,11 +75,25 @@
                         canonicalizer.applyIncremental(graph, context, listener.getNodes());
                         listener.getNodes().clear();
                     }
+
+                    assert !prePostInserted || checkCounted(graph, mark);
                 }
             }
         }
     }
 
+    private static boolean checkCounted(StructuredGraph graph, Graph.Mark mark) {
+        LoopsData dataCounted;
+        dataCounted = new LoopsData(graph);
+        dataCounted.detectedCountedLoops();
+        for (LoopEx anyLoop : dataCounted.loops()) {
+            if (graph.isNew(mark, anyLoop.loopBegin())) {
+                assert anyLoop.isCounted() : "pre/post transformation loses counted loop " + anyLoop.loopBegin();
+            }
+        }
+        return true;
+    }
+
     @Override
     public boolean checkContract() {
         return false;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop.phases/src/org/graalvm/compiler/loop/phases/LoopTransformations.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop.phases/src/org/graalvm/compiler/loop/phases/LoopTransformations.java	Thu Aug 24 01:13:04 2017 +0000
@@ -31,6 +31,7 @@
 import java.util.List;
 
 import org.graalvm.compiler.core.common.RetryableBailoutException;
+import org.graalvm.compiler.core.common.calc.Condition;
 import org.graalvm.compiler.debug.DebugContext;
 import org.graalvm.compiler.debug.GraalError;
 import org.graalvm.compiler.graph.Graph.Mark;
@@ -145,9 +146,9 @@
         // TODO (gd) probabilities need some amount of fixup.. (probably also in other transforms)
     }
 
-    public static void partialUnroll(LoopEx loop, StructuredGraph graph) {
+    public static void partialUnroll(LoopEx loop) {
         assert loop.loopBegin().isMainLoop();
-        graph.getDebug().log("LoopPartialUnroll %s", loop);
+        loop.loopBegin().graph().getDebug().log("LoopPartialUnroll %s", loop);
 
         LoopFragmentInside newSegment = loop.inside().duplicate();
         newSegment.insertWithinAfter(loop);
@@ -222,72 +223,73 @@
     // The pre loop is constrained to one iteration for now and will likely
     // be updated to produce vector alignment if applicable.
 
-    public static void insertPrePostLoops(LoopEx loop, StructuredGraph graph) {
+    public static LoopBeginNode insertPrePostLoops(LoopEx loop) {
+        StructuredGraph graph = loop.loopBegin().graph();
         graph.getDebug().log("LoopTransformations.insertPrePostLoops %s", loop);
         LoopFragmentWhole preLoop = loop.whole();
         CountedLoopInfo preCounted = loop.counted();
         IfNode preLimit = preCounted.getLimitTest();
-        if (preLimit != null) {
-            LoopBeginNode preLoopBegin = loop.loopBegin();
-            InductionVariable preIv = preCounted.getCounter();
-            LoopExitNode preLoopExitNode = preLoopBegin.getSingleLoopExit();
-            FixedNode continuationNode = preLoopExitNode.next();
+        assert preLimit != null;
+        LoopBeginNode preLoopBegin = loop.loopBegin();
+        InductionVariable preIv = preCounted.getCounter();
+        LoopExitNode preLoopExitNode = preLoopBegin.getSingleLoopExit();
+        FixedNode continuationNode = preLoopExitNode.next();
 
-            // Each duplication is inserted after the original, ergo create the post loop first
-            LoopFragmentWhole mainLoop = preLoop.duplicate();
-            LoopFragmentWhole postLoop = preLoop.duplicate();
-            preLoopBegin.incrementSplits();
-            preLoopBegin.incrementSplits();
-            preLoopBegin.setPreLoop();
-            graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication");
-            LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin);
-            mainLoopBegin.setMainLoop();
-            LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin);
-            postLoopBegin.setPostLoop();
+        // Each duplication is inserted after the original, ergo create the post loop first
+        LoopFragmentWhole mainLoop = preLoop.duplicate();
+        LoopFragmentWhole postLoop = preLoop.duplicate();
+        preLoopBegin.incrementSplits();
+        preLoopBegin.incrementSplits();
+        preLoopBegin.setPreLoop();
+        graph.getDebug().dump(DebugContext.VERBOSE_LEVEL, graph, "After duplication");
+        LoopBeginNode mainLoopBegin = mainLoop.getDuplicatedNode(preLoopBegin);
+        mainLoopBegin.setMainLoop();
+        LoopBeginNode postLoopBegin = postLoop.getDuplicatedNode(preLoopBegin);
+        postLoopBegin.setPostLoop();
 
-            EndNode postEndNode = getBlockEndAfterLoopExit(postLoopBegin);
-            AbstractMergeNode postMergeNode = postEndNode.merge();
-            LoopExitNode postLoopExitNode = postLoopBegin.getSingleLoopExit();
+        EndNode postEndNode = getBlockEndAfterLoopExit(postLoopBegin);
+        AbstractMergeNode postMergeNode = postEndNode.merge();
+        LoopExitNode postLoopExitNode = postLoopBegin.getSingleLoopExit();
 
-            // Update the main loop phi initialization to carry from the pre loop
-            for (PhiNode prePhiNode : preLoopBegin.phis()) {
-                PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
-                mainPhiNode.setValueAt(0, prePhiNode);
-            }
+        // Update the main loop phi initialization to carry from the pre loop
+        for (PhiNode prePhiNode : preLoopBegin.phis()) {
+            PhiNode mainPhiNode = mainLoop.getDuplicatedNode(prePhiNode);
+            mainPhiNode.setValueAt(0, prePhiNode);
+        }
 
-            EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopBegin);
-            AbstractMergeNode mainMergeNode = mainEndNode.merge();
-            AbstractEndNode postEntryNode = postLoopBegin.forwardEnd();
+        EndNode mainEndNode = getBlockEndAfterLoopExit(mainLoopBegin);
+        AbstractMergeNode mainMergeNode = mainEndNode.merge();
+        AbstractEndNode postEntryNode = postLoopBegin.forwardEnd();
 
-            // In the case of no Bounds tests, we just flow right into the main loop
-            AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode);
-            LoopExitNode mainLoopExitNode = mainLoopBegin.getSingleLoopExit();
-            mainLoopExitNode.setNext(mainLandingNode);
-            preLoopExitNode.setNext(mainLoopBegin.forwardEnd());
+        // In the case of no Bounds tests, we just flow right into the main loop
+        AbstractBeginNode mainLandingNode = BeginNode.begin(postEntryNode);
+        LoopExitNode mainLoopExitNode = mainLoopBegin.getSingleLoopExit();
+        mainLoopExitNode.setNext(mainLandingNode);
+        preLoopExitNode.setNext(mainLoopBegin.forwardEnd());
 
-            // Add and update any phi edges as per merge usage as needed and update usages
-            processPreLoopPhis(loop, mainLoop, postLoop);
-            continuationNode.predecessor().clearSuccessors();
-            postLoopExitNode.setNext(continuationNode);
-            cleanupMerge(postMergeNode, postLoopExitNode);
-            cleanupMerge(mainMergeNode, mainLandingNode);
+        // Add and update any phi edges as per merge usage as needed and update usages
+        processPreLoopPhis(loop, mainLoop, postLoop);
+        continuationNode.predecessor().clearSuccessors();
+        postLoopExitNode.setNext(continuationNode);
+        cleanupMerge(postMergeNode, postLoopExitNode);
+        cleanupMerge(mainMergeNode, mainLandingNode);
 
-            // Change the preLoop to execute one iteration for now
-            updateMainLoopLimit(preLimit, preIv, mainLoop);
-            updatePreLoopLimit(preLimit, preIv, preCounted);
-            preLoopBegin.setLoopFrequency(1);
-            mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2));
-            postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1));
+        // Change the preLoop to execute one iteration for now
+        updateMainLoopLimit(preLimit, preIv, mainLoop);
+        updatePreLoopLimit(preLimit, preIv, preCounted);
+        preLoopBegin.setLoopFrequency(1);
+        mainLoopBegin.setLoopFrequency(Math.max(0.0, mainLoopBegin.loopFrequency() - 2));
+        postLoopBegin.setLoopFrequency(Math.max(0.0, postLoopBegin.loopFrequency() - 1));
 
-            // The pre and post loops don't require safepoints at all
-            for (SafepointNode safepoint : preLoop.nodes().filter(SafepointNode.class)) {
-                graph.removeFixed(safepoint);
-            }
-            for (SafepointNode safepoint : postLoop.nodes().filter(SafepointNode.class)) {
-                graph.removeFixed(safepoint);
-            }
+        // The pre and post loops don't require safepoints at all
+        for (SafepointNode safepoint : preLoop.nodes().filter(SafepointNode.class)) {
+            graph.removeFixed(safepoint);
+        }
+        for (SafepointNode safepoint : postLoop.nodes().filter(SafepointNode.class)) {
+            graph.removeFixed(safepoint);
         }
         graph.getDebug().dump(DebugContext.DETAILED_LEVEL, graph, "InsertPrePostLoops %s", loop);
+        return mainLoopBegin;
     }
 
     /**
@@ -373,7 +375,7 @@
             throw GraalError.shouldNotReachHere();
         }
 
-        // Preloop always performs at least once iteration, so remove that from the main loop.
+        // Preloop always performs at least one iteration, so remove that from the main loop.
         ValueNode newLimit = sub(graph, ub, mainStride);
 
         // Re-wire the condition with the new limit
@@ -445,6 +447,14 @@
             return false;
         }
         LoopBeginNode loopBegin = loop.loopBegin();
+        LogicNode condition = loop.counted().getLimitTest().condition();
+        if (!(condition instanceof CompareNode)) {
+            return false;
+        }
+        if (((CompareNode) condition).condition() == Condition.EQ || ((CompareNode) condition).condition() == Condition.NE) {
+            condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s condition unsupported %s ", loopBegin, ((CompareNode) condition).condition());
+            return false;
+        }
         if (loopBegin.isMainLoop() || loopBegin.isSimpleLoop()) {
             // Flow-less loops to partial unroll for now. 3 blocks corresponds to an if that either
             // exits or continues the loop. There might be fixed and floating work within the loop
@@ -452,6 +462,7 @@
             if (loop.loop().getBlocks().size() < 3) {
                 return true;
             }
+            condition.getDebug().log(DebugContext.VERBOSE_LEVEL, "isUnrollableLoop %s too large to unroll %s ", loopBegin, loop.loop().getBlocks().size());
         }
         return false;
     }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop.test/src/org/graalvm/compiler/loop/test/LoopPartialUnrollTest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop.test/src/org/graalvm/compiler/loop/test/LoopPartialUnrollTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -22,12 +22,41 @@
  */
 package org.graalvm.compiler.loop.test;
 
+import java.util.ListIterator;
+
+import org.graalvm.compiler.core.common.CompilationIdentifier;
 import org.graalvm.compiler.core.test.GraalCompilerTest;
+import org.graalvm.compiler.debug.DebugContext;
 import org.graalvm.compiler.graph.iterators.NodeIterable;
+import org.graalvm.compiler.java.ComputeLoopFrequenciesClosure;
+import org.graalvm.compiler.loop.DefaultLoopPolicies;
+import org.graalvm.compiler.loop.LoopEx;
+import org.graalvm.compiler.loop.LoopFragmentInside;
+import org.graalvm.compiler.loop.LoopsData;
+import org.graalvm.compiler.loop.phases.LoopPartialUnrollPhase;
 import org.graalvm.compiler.nodes.LoopBeginNode;
 import org.graalvm.compiler.nodes.StructuredGraph;
+import org.graalvm.compiler.nodes.spi.LoweringTool;
+import org.graalvm.compiler.options.OptionValues;
+import org.graalvm.compiler.phases.BasePhase;
+import org.graalvm.compiler.phases.OptimisticOptimizations;
+import org.graalvm.compiler.phases.PhaseSuite;
+import org.graalvm.compiler.phases.common.CanonicalizerPhase;
+import org.graalvm.compiler.phases.common.ConditionalEliminationPhase;
+import org.graalvm.compiler.phases.common.DeadCodeEliminationPhase;
+import org.graalvm.compiler.phases.common.DeoptimizationGroupingPhase;
+import org.graalvm.compiler.phases.common.FloatingReadPhase;
+import org.graalvm.compiler.phases.common.FrameStateAssignmentPhase;
+import org.graalvm.compiler.phases.common.GuardLoweringPhase;
+import org.graalvm.compiler.phases.common.LoweringPhase;
+import org.graalvm.compiler.phases.common.RemoveValueProxyPhase;
+import org.graalvm.compiler.phases.tiers.MidTierContext;
+import org.graalvm.compiler.phases.tiers.Suites;
+import org.junit.Ignore;
 import org.junit.Test;
 
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+
 public class LoopPartialUnrollTest extends GraalCompilerTest {
 
     @Override
@@ -41,100 +70,72 @@
         return false;
     }
 
-    public static long testMultiplySnippet(int arg) {
-        long r = 1;
-        for (int i = 0; branchProbability(0.99, i < arg); i++) {
-            r += r * i;
+    public static long sumWithEqualityLimit(int[] text) {
+        long sum = 0;
+        for (int i = 0; branchProbability(0.99, i != text.length); ++i) {
+            sum += volatileInt;
         }
-        return r;
+        return sum;
     }
 
+    @Ignore("equality limits aren't working properly")
     @Test
-    public void testMultiply() {
-        test("testMultiplySnippet", 9);
-    }
-
-    public static int testNestedSumSnippet(int d) {
-        int c = 0;
-        for (int i = 0; i < d; i++) {
-            for (int j = 0; branchProbability(0.99, j < i); j++) {
-                c += c + j & 0x3;
-            }
-        }
-        return c;
-    }
-
-    @Test
-    public void testNestedSumBy2() {
-        for (int i = 0; i < 1000; i++) {
-            test("testNestedSumBy2Snippet", i);
-        }
-    }
-
-    public static int testNestedSumBy2Snippet(int d) {
-        int c = 0;
-        for (int i = 0; i < d; i++) {
-            for (int j = 0; branchProbability(0.99, j < i); j += 2) {
-                c += c + j & 0x3;
-            }
-        }
-        return c;
-    }
-
-    @Test
-    public void testNestedSum() {
-        for (int i = 0; i < 1000; i++) {
-            test("testNestedSumSnippet", i);
-        }
-    }
-
-    public static int testSumDownSnippet(int d) {
-        int c = 0;
-        for (int j = d; branchProbability(0.99, j > -4); j--) {
-            c += c + j & 0x3;
-        }
-        return c;
-    }
-
-    @Test
-    public void testSumDown() {
-        test("testSumDownSnippet", 1);
-        for (int i = 0; i < 160; i++) {
-            test("testSumDownSnippet", i);
-        }
-    }
-
-    public static int testSumDownBy2Snippet(int d) {
-        int c = 0;
-        for (int j = d; branchProbability(0.99, j > -4); j -= 2) {
-            c += c + j & 0x3;
-        }
-        return c;
-    }
-
-    @Test
-    public void testSumDownBy2() {
-        test("testSumDownBy2Snippet", 1);
-        for (int i = 0; i < 160; i++) {
-            test("testSumDownBy2Snippet", i);
+    public void testSumWithEqualityLimit() {
+        for (int i = 0; i < 128; i++) {
+            int[] data = new int[i];
+            test("sumWithEqualityLimit", data);
         }
     }
 
     @Test
     public void testLoopCarried() {
-        test("testLoopCarriedSnippet", 1, 2);
-        test("testLoopCarriedSnippet", 0, 4);
-        test("testLoopCarriedSnippet", 4, 0);
+        for (int i = 0; i < 64; i++) {
+            test("testLoopCarriedSnippet", i);
+        }
     }
 
-    public static int testLoopCarriedSnippet(int a, int b) {
-        int c = a;
-        int d = b;
-        for (int j = 0; branchProbability(0.99, j < a); j++) {
-            d = c;
-            c += 1;
+    @Test
+    public void testLoopCarriedDuplication() {
+        testDuplicateBody("testLoopCarriedReference", "testLoopCarriedSnippet");
+    }
+
+    static volatile int volatileInt = 3;
+
+    public int testLoopCarriedSnippet(int iterations) {
+        int a = 0;
+        int b = 0;
+        int c = 0;
+
+        for (int i = 0; branchProbability(0.99, i < iterations); i++) {
+            int t1 = volatileInt;
+            int t2 = a + b;
+            c = b;
+            b = a;
+            a = t1 + t2;
         }
-        return c + d;
+
+        return c;
+    }
+
+    public int testLoopCarriedReference(int iterations) {
+        int a = 0;
+        int b = 0;
+        int c = 0;
+
+        for (int i = 0; branchProbability(0.99, i < iterations); i += 2) {
+            int t1 = volatileInt;
+            int t2 = a + b;
+            c = b;
+            b = a;
+            a = t1 + t2;
+            t1 = volatileInt;
+            t2 = a + b;
+            c = b;
+            b = a;
+            a = t1 + t2;
+        }
+
+        return c;
     }
 
     public static long init = Runtime.getRuntime().totalMemory();
@@ -181,4 +182,82 @@
     public void testSignExtension() {
         test("testSignExtensionSnippet", 9L);
     }
+
+    @Override
+    protected Suites createSuites(OptionValues opts) {
+        Suites suites = super.createSuites(opts).copy();
+        PhaseSuite<MidTierContext> mid = suites.getMidTier();
+        ListIterator<BasePhase<? super MidTierContext>> iter = mid.findPhase(LoopPartialUnrollPhase.class);
+        BasePhase<? super MidTierContext> partialUnoll = iter.previous();
+        if (iter.previous().getClass() != FrameStateAssignmentPhase.class) {
+            // Ensure LoopPartialUnrollPhase runs immediately after FrameStateAssignment, so it gets
+            // priority over other optimizations in these tests.
+            mid.findPhase(LoopPartialUnrollPhase.class).remove();
+            ListIterator<BasePhase<? super MidTierContext>> fsa = mid.findPhase(FrameStateAssignmentPhase.class);
+            fsa.add(partialUnoll);
+        }
+        return suites;
+    }
+
+    public void testGraph(String reference, String test) {
+        StructuredGraph referenceGraph = buildGraph(reference, false);
+        StructuredGraph testGraph = buildGraph(test, true);
+        assertEquals(referenceGraph, testGraph, false, false);
+    }
+
+    @SuppressWarnings("try")
+    public StructuredGraph buildGraph(String name, boolean partialUnroll) {
+        CompilationIdentifier id = new CompilationIdentifier() {
+            @Override
+            public String toString(Verbosity verbosity) {
+                return name;
+            }
+        };
+        ResolvedJavaMethod method = getResolvedJavaMethod(name);
+        OptionValues options = new OptionValues(getInitialOptions(), DefaultLoopPolicies.UnrollMaxIterations, 2);
+        StructuredGraph graph = parse(builder(method, StructuredGraph.AllowAssumptions.YES, id, options), getEagerGraphBuilderSuite());
+        try (DebugContext.Scope buildScope = graph.getDebug().scope(name, method, graph)) {
+            MidTierContext context = new MidTierContext(getProviders(), getTargetProvider(), OptimisticOptimizations.ALL, null);
+
+            CanonicalizerPhase canonicalizer = new CanonicalizerPhase();
+            canonicalizer.apply(graph, context);
+            new RemoveValueProxyPhase().apply(graph);
+            new LoweringPhase(canonicalizer, LoweringTool.StandardLoweringStage.HIGH_TIER).apply(graph, context);
+            new FloatingReadPhase().apply(graph);
+            new DeadCodeEliminationPhase().apply(graph);
+            new ConditionalEliminationPhase(true).apply(graph, context);
+            ComputeLoopFrequenciesClosure.compute(graph);
+            new GuardLoweringPhase().apply(graph, context);
+            new LoweringPhase(canonicalizer, LoweringTool.StandardLoweringStage.MID_TIER).apply(graph, context);
+            new FrameStateAssignmentPhase().apply(graph);
+            new DeoptimizationGroupingPhase().apply(graph, context);
+            canonicalizer.apply(graph, context);
+            new ConditionalEliminationPhase(true).apply(graph, context);
+            if (partialUnroll) {
+                LoopsData dataCounted = new LoopsData(graph);
+                dataCounted.detectedCountedLoops();
+                for (LoopEx loop : dataCounted.countedLoops()) {
+                    LoopFragmentInside newSegment = loop.inside().duplicate();
+                    newSegment.insertWithinAfter(loop, false);
+                }
+                canonicalizer.apply(graph, getDefaultMidTierContext());
+            }
+            new DeadCodeEliminationPhase().apply(graph);
+            canonicalizer.apply(graph, context);
+            graph.getDebug().dump(DebugContext.BASIC_LEVEL, graph, "before compare");
+            return graph;
+        } catch (Throwable e) {
+            throw getDebugContext().handle(e);
+        }
+    }
+
+    public void testDuplicateBody(String reference, String test) {
+
+        StructuredGraph referenceGraph = buildGraph(reference, false);
+        StructuredGraph testGraph = buildGraph(test, true);
+        CanonicalizerPhase canonicalizer = new CanonicalizerPhase();
+        canonicalizer.apply(testGraph, getDefaultMidTierContext());
+        canonicalizer.apply(referenceGraph, getDefaultMidTierContext());
+        assertEquals(referenceGraph, testGraph);
+    }
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop/src/org/graalvm/compiler/loop/DefaultLoopPolicies.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop/src/org/graalvm/compiler/loop/DefaultLoopPolicies.java	Thu Aug 24 01:13:04 2017 +0000
@@ -100,11 +100,12 @@
 
     @Override
     public boolean shouldPartiallyUnroll(LoopEx loop) {
+        LoopBeginNode loopBegin = loop.loopBegin();
         if (!loop.isCounted()) {
+            loopBegin.getDebug().log(DebugContext.VERBOSE_LEVEL, "shouldPartiallyUnroll %s isn't counted", loopBegin);
             return false;
         }
         OptionValues options = loop.entryPoint().getOptions();
-        LoopBeginNode loopBegin = loop.loopBegin();
         int maxNodes = ExactPartialUnrollMaxNodes.getValue(options);
         maxNodes = Math.min(maxNodes, Math.max(0, MaximumDesiredSize.getValue(options) - loop.loopBegin().graph().getNodeCount()));
         int size = Math.max(1, loop.size() - 1 - loop.loopBegin().phis().count());
@@ -112,6 +113,7 @@
         if (unrollFactor == 1) {
             double loopFrequency = loopBegin.loopFrequency();
             if (loopBegin.isSimpleLoop() && loopFrequency < 5.0) {
+                loopBegin.getDebug().log(DebugContext.VERBOSE_LEVEL, "shouldPartiallyUnroll %s frequency too low %s ", loopBegin, loopFrequency);
                 return false;
             }
             loopBegin.setLoopOrigFrequency(loopFrequency);
@@ -136,6 +138,7 @@
             }
             return true;
         } else {
+            loopBegin.getDebug().log(DebugContext.VERBOSE_LEVEL, "shouldPartiallyUnroll %s unrolled loop is too large %s ", loopBegin, size);
             return false;
         }
     }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop/src/org/graalvm/compiler/loop/LoopFragmentInside.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.loop/src/org/graalvm/compiler/loop/LoopFragmentInside.java	Thu Aug 24 01:13:04 2017 +0000
@@ -22,6 +22,7 @@
  */
 package org.graalvm.compiler.loop;
 
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -142,18 +143,49 @@
         end.setNext(loop.entryPoint());
     }
 
+    /**
+     * Duplicate the body within the loop after the current copy copy of the body, updating the
+     * iteration limit to account for the duplication.
+     *
+     * @param loop
+     */
     public void insertWithinAfter(LoopEx loop) {
+        insertWithinAfter(loop, true);
+    }
+
+    /**
+     * Duplicate the body within the loop after the current copy copy of the body.
+     *
+     * @param loop
+     * @param updateLimit true if the iteration limit should be adjusted.
+     */
+    public void insertWithinAfter(LoopEx loop, boolean updateLimit) {
         assert isDuplicate() && original().loop() == loop;
 
         patchNodes(dataFixWithinAfter);
 
+        /*
+         * Collect any new back edges values before updating them since they might reference each
+         * other.
+         */
         LoopBeginNode mainLoopBegin = loop.loopBegin();
+        ArrayList<ValueNode> backedgeValues = new ArrayList<>();
         for (PhiNode mainPhiNode : mainLoopBegin.phis()) {
             ValueNode duplicatedNode = getDuplicatedNode(mainPhiNode.valueAt(1));
+            if (duplicatedNode == null) {
+                if (mainLoopBegin.isPhiAtMerge(mainPhiNode.valueAt(1))) {
+                    duplicatedNode = ((PhiNode) (mainPhiNode.valueAt(1))).valueAt(1);
+                } else {
+                    assert mainPhiNode.valueAt(1).isConstant() : mainPhiNode.valueAt(1);
+                }
+            }
+            backedgeValues.add(duplicatedNode);
+        }
+        int index = 0;
+        for (PhiNode mainPhiNode : mainLoopBegin.phis()) {
+            ValueNode duplicatedNode = backedgeValues.get(index++);
             if (duplicatedNode != null) {
                 mainPhiNode.setValueAt(1, duplicatedNode);
-            } else {
-                assert mainPhiNode.valueAt(1).isConstant() || mainLoopBegin.isPhiAtMerge(mainPhiNode.valueAt(1)) : mainPhiNode.valueAt(1);
             }
         }
 
@@ -166,27 +198,29 @@
         }
 
         int unrollFactor = mainLoopBegin.getUnrollFactor();
-
-        // Now use the previous unrollFactor to update the exit condition to power of two
         StructuredGraph graph = mainLoopBegin.graph();
-        InductionVariable iv = loop.counted().getCounter();
-        CompareNode compareNode = (CompareNode) loop.counted().getLimitTest().condition();
-        ValueNode compareBound;
-        if (compareNode.getX() == iv.valueNode()) {
-            compareBound = compareNode.getY();
-        } else if (compareNode.getY() == iv.valueNode()) {
-            compareBound = compareNode.getX();
-        } else {
-            throw GraalError.shouldNotReachHere();
-        }
-        if (iv.direction() == InductionVariable.Direction.Up) {
-            ConstantNode aboveVal = graph.unique(ConstantNode.forIntegerStamp(iv.initNode().stamp(), unrollFactor * iv.constantStride()));
-            ValueNode newLimit = graph.addWithoutUnique(new SubNode(compareBound, aboveVal));
-            compareNode.replaceFirstInput(compareBound, newLimit);
-        } else if (iv.direction() == InductionVariable.Direction.Down) {
-            ConstantNode aboveVal = graph.unique(ConstantNode.forIntegerStamp(iv.initNode().stamp(), unrollFactor * -iv.constantStride()));
-            ValueNode newLimit = graph.addWithoutUnique(new AddNode(compareBound, aboveVal));
-            compareNode.replaceFirstInput(compareBound, newLimit);
+        if (updateLimit) {
+            // Now use the previous unrollFactor to update the exit condition to power of two
+            InductionVariable iv = loop.counted().getCounter();
+            CompareNode compareNode = (CompareNode) loop.counted().getLimitTest().condition();
+            ValueNode compareBound;
+            if (compareNode.getX() == iv.valueNode()) {
+                compareBound = compareNode.getY();
+            } else if (compareNode.getY() == iv.valueNode()) {
+                compareBound = compareNode.getX();
+            } else {
+                throw GraalError.shouldNotReachHere();
+            }
+            long originalStride = unrollFactor == 1 ? iv.constantStride() : iv.constantStride() / unrollFactor;
+            if (iv.direction() == InductionVariable.Direction.Up) {
+                ConstantNode aboveVal = graph.unique(ConstantNode.forIntegerStamp(iv.initNode().stamp(), unrollFactor * originalStride));
+                ValueNode newLimit = graph.addWithoutUnique(new SubNode(compareBound, aboveVal));
+                compareNode.replaceFirstInput(compareBound, newLimit);
+            } else if (iv.direction() == InductionVariable.Direction.Down) {
+                ConstantNode aboveVal = graph.unique(ConstantNode.forIntegerStamp(iv.initNode().stamp(), unrollFactor * -originalStride));
+                ValueNode newLimit = graph.addWithoutUnique(new AddNode(compareBound, aboveVal));
+                compareNode.replaceFirstInput(compareBound, newLimit);
+            }
         }
         mainLoopBegin.setUnrollFactor(unrollFactor * 2);
         mainLoopBegin.setLoopFrequency(mainLoopBegin.loopFrequency() / 2);
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.microbenchmarks/src/org/graalvm/compiler/microbenchmarks/graal/TestJMH.java	Wed Aug 23 14:52:55 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package org.graalvm.compiler.microbenchmarks.graal;
-
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.Fork;
-import org.openjdk.jmh.annotations.Measurement;
-import org.openjdk.jmh.annotations.Warmup;
-
-@Warmup(iterations = 1)
-@Measurement(iterations = 1)
-@Fork(1)
-/**
- * This dummy class is used to verify that the JMH microbenchmarking environment is set up properly.
- */
-public class TestJMH {
-
-    @Benchmark
-    public void testJMH() {
-        // This method was intentionally left blank.
-    }
-
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.microbenchmarks/src/org/graalvm/compiler/microbenchmarks/graal/TestJMHWhitebox.java	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.microbenchmarks.graal;
+
+import org.graalvm.compiler.microbenchmarks.graal.util.GraalState;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Warmup;
+
+@Warmup(iterations = 1)
+@Measurement(iterations = 1)
+@Fork(1)
+/**
+ * This dummy class is used to verify that the JMH microbenchmarking environment is set up properly.
+ */
+public class TestJMHWhitebox {
+
+    @Benchmark
+    public void testJMH(@SuppressWarnings("unused") GraalState s) {
+        // This method was intentionally left blank.
+    }
+
+}
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes.test/src/org/graalvm/compiler/nodes/test/IntegerStampTest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes.test/src/org/graalvm/compiler/nodes/test/IntegerStampTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -25,12 +25,10 @@
 import static org.graalvm.compiler.core.test.GraalCompilerTest.getInitialOptions;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import jdk.vm.ci.meta.JavaConstant;
-import jdk.vm.ci.meta.JavaKind;
 
-import org.junit.Before;
-import org.junit.Test;
+import java.math.BigInteger;
 
+import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.IntegerConvertOp;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.ShiftOp;
 import org.graalvm.compiler.core.common.type.IntegerStamp;
@@ -42,6 +40,12 @@
 import org.graalvm.compiler.nodes.StructuredGraph;
 import org.graalvm.compiler.nodes.StructuredGraph.AllowAssumptions;
 import org.graalvm.compiler.options.OptionValues;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import jdk.vm.ci.meta.JavaConstant;
+import jdk.vm.ci.meta.JavaKind;
 
 /**
  * This class tests that integer stamps are created correctly for constants.
@@ -365,10 +369,187 @@
         assertEquals(IntegerStamp.create(32, 0, 0x1ff, 0, 0x1ff), shl.foldStamp(IntegerStamp.create(32, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 0, 1, 0, 1)));
         assertEquals(IntegerStamp.create(32, 0, 0x1fe0, 0, 0x1fe0), shl.foldStamp(IntegerStamp.create(32, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
         assertEquals(IntegerStamp.create(32, 0x1e0, 0x1fe0, 0, 0x1fe0), shl.foldStamp(IntegerStamp.create(32, 0xf, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(32, -4096, -4096, -4096, -4096), shl.foldStamp(IntegerStamp.create(32, -16, -16, -16, -16), IntegerStamp.create(32, 8, 8, 8, 8)));
+        assertEquals(StampFactory.empty(JavaKind.Int), shl.foldStamp(StampFactory.empty(JavaKind.Int), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(StampFactory.empty(JavaKind.Int), shl.foldStamp(IntegerStamp.create(32, 0xf, 0xff, 0, 0xff), (IntegerStamp) StampFactory.empty(JavaKind.Int)));
 
         assertEquals(IntegerStamp.create(64, 0, 0x1ff, 0, 0x1ff), shl.foldStamp(IntegerStamp.create(64, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 0, 1, 0, 1)));
         assertEquals(IntegerStamp.create(64, 0, 0x1fe0, 0, 0x1fe0), shl.foldStamp(IntegerStamp.create(64, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
         assertEquals(IntegerStamp.create(64, 0x1e0, 0x1fe0, 0, 0x1fe0), shl.foldStamp(IntegerStamp.create(64, 0xf, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(64, -4096, -4096, -4096, -4096), shl.foldStamp(IntegerStamp.create(64, -16, -16, -16, -16), IntegerStamp.create(32, 8, 8, 8, 8)));
+        assertEquals(StampFactory.empty(JavaKind.Long), shl.foldStamp(StampFactory.empty(JavaKind.Long), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(StampFactory.empty(JavaKind.Long), shl.foldStamp(IntegerStamp.create(64, 0xf, 0xff, 0, 0xff), (IntegerStamp) StampFactory.empty(JavaKind.Int)));
+    }
 
+    @Test
+    public void testUnsignedShiftRight() {
+        ShiftOp<?> ushr = IntegerStamp.OPS.getUShr();
+        assertEquals(IntegerStamp.create(32, 0, 0xff, 0, 0xff), ushr.foldStamp(IntegerStamp.create(32, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 0, 1, 0, 1)));
+        assertEquals(IntegerStamp.create(32, 0, 0x07, 0, 0x07), ushr.foldStamp(IntegerStamp.create(32, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(32, 0x0, 0x07, 0, 0x07), ushr.foldStamp(IntegerStamp.create(32, 0xf, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(32, 0xffffff, 0xffffff, 0xffffff, 0xffffff), ushr.foldStamp(IntegerStamp.create(32, -16, -16, -16, -16), IntegerStamp.create(32, 8, 8, 8, 8)));
+        assertEquals(StampFactory.empty(JavaKind.Int), ushr.foldStamp(StampFactory.empty(JavaKind.Int), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(StampFactory.empty(JavaKind.Int), ushr.foldStamp(IntegerStamp.create(32, 0xf, 0xff, 0, 0xff), (IntegerStamp) StampFactory.empty(JavaKind.Int)));
+
+        assertEquals(IntegerStamp.create(64, 0, 0xff, 0, 0xff), ushr.foldStamp(IntegerStamp.create(64, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 0, 1, 0, 1)));
+        assertEquals(IntegerStamp.create(64, 0, 0x07, 0, 0x07), ushr.foldStamp(IntegerStamp.create(64, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(64, 0x0, 0x07, 0, 0x07), ushr.foldStamp(IntegerStamp.create(64, 0xf, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(64, 0xffffffffffffffL, 0xffffffffffffffL, 0xffffffffffffffL, 0xffffffffffffffL),
+                        ushr.foldStamp(IntegerStamp.create(64, -16, -16, -16, -16), IntegerStamp.create(32, 8, 8, 8, 8)));
+        assertEquals(StampFactory.empty(JavaKind.Long), ushr.foldStamp(StampFactory.empty(JavaKind.Long), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(StampFactory.empty(JavaKind.Long), ushr.foldStamp(IntegerStamp.create(64, 0xf, 0xff, 0, 0xff), (IntegerStamp) StampFactory.empty(JavaKind.Int)));
+    }
+
+    @Test
+    public void testShiftRight() {
+        ShiftOp<?> shr = IntegerStamp.OPS.getShr();
+        assertEquals(IntegerStamp.create(32, 0, 0xff, 0, 0xff), shr.foldStamp(IntegerStamp.create(32, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 0, 1, 0, 1)));
+        assertEquals(IntegerStamp.create(32, 0, 0x07, 0, 0x07), shr.foldStamp(IntegerStamp.create(32, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(32, 0x0, 0x07, 0, 0x07), shr.foldStamp(IntegerStamp.create(32, 0xf, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(32, -1, -1, -1, -1), shr.foldStamp(IntegerStamp.create(32, -16, -16, -16, -16), IntegerStamp.create(32, 8, 8, 8, 8)));
+        assertEquals(StampFactory.empty(JavaKind.Int), shr.foldStamp(StampFactory.empty(JavaKind.Int), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(StampFactory.empty(JavaKind.Int), shr.foldStamp(IntegerStamp.create(32, 0xf, 0xff, 0, 0xff), (IntegerStamp) StampFactory.empty(JavaKind.Int)));
+
+        assertEquals(IntegerStamp.create(64, 0, 0xff, 0, 0xff), shr.foldStamp(IntegerStamp.create(64, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 0, 1, 0, 1)));
+        assertEquals(IntegerStamp.create(64, 0, 0x07, 0, 0x07), shr.foldStamp(IntegerStamp.create(64, 0, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(64, 0x0, 0x07, 0, 0x07), shr.foldStamp(IntegerStamp.create(64, 0xf, 0xff, 0, 0xff), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(IntegerStamp.create(64, -1, -1, -1, -1), shr.foldStamp(IntegerStamp.create(64, -16, -16, -16, -16), IntegerStamp.create(32, 8, 8, 8, 8)));
+        assertEquals(StampFactory.empty(JavaKind.Long), shr.foldStamp(StampFactory.empty(JavaKind.Long), IntegerStamp.create(32, 5, 5, 5, 5)));
+        assertEquals(StampFactory.empty(JavaKind.Long), shr.foldStamp(IntegerStamp.create(64, 0xf, 0xff, 0, 0xff), (IntegerStamp) StampFactory.empty(JavaKind.Int)));
+    }
+
+    @Test
+    public void testMulHigh() {
+        testSomeMulHigh(IntegerStamp.OPS.getMulHigh());
+    }
+
+    @Test
+    public void testUMulHigh() {
+        testSomeMulHigh(IntegerStamp.OPS.getUMulHigh());
+    }
+
+    private static void testSomeMulHigh(BinaryOp<?> someMulHigh) {
+        // 32 bits
+        testMulHigh(someMulHigh, 0, 0, 32);
+
+        testMulHigh(someMulHigh, 1, 1, 32);
+        testMulHigh(someMulHigh, 1, 5, 32);
+        testMulHigh(someMulHigh, 256, 256, 32);
+        testMulHigh(someMulHigh, 0xFFFFFFF, 0xFFFFFFA, 32);
+        testMulHigh(someMulHigh, Integer.MAX_VALUE, 2, 32);
+
+        testMulHigh(someMulHigh, -1, -1, 32);
+        testMulHigh(someMulHigh, -1, -5, 32);
+        testMulHigh(someMulHigh, -256, -256, 32);
+        testMulHigh(someMulHigh, -0xFFFFFFF, -0xFFFFFFA, 32);
+        testMulHigh(someMulHigh, Integer.MIN_VALUE, -2, 32);
+
+        testMulHigh(someMulHigh, -1, 1, 32);
+        testMulHigh(someMulHigh, -1, 5, 32);
+        testMulHigh(someMulHigh, -256, 256, 32);
+        testMulHigh(someMulHigh, -0xFFFFFFF, 0xFFFFFFA, 32);
+        testMulHigh(someMulHigh, Integer.MIN_VALUE, 2, 32);
+
+        testMulHigh(someMulHigh, Integer.MIN_VALUE, Integer.MIN_VALUE, 32);
+        testMulHigh(someMulHigh, Integer.MAX_VALUE, Integer.MAX_VALUE, 32);
+
+        assertEquals(StampFactory.forKind(JavaKind.Int).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).empty(), StampFactory.forKind(JavaKind.Int).empty()));
+        assertEquals(StampFactory.forKind(JavaKind.Int).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).empty(), StampFactory.forKind(JavaKind.Int).unrestricted()));
+        assertEquals(StampFactory.forKind(JavaKind.Int).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).empty(), IntegerStamp.create(32, 0, 0)));
+        assertEquals(StampFactory.forKind(JavaKind.Int).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).empty(), IntegerStamp.create(32, 1, 1)));
+        assertEquals(StampFactory.forKind(JavaKind.Int).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).empty(), IntegerStamp.create(32, -1, -1)));
+
+        assertEquals(StampFactory.forKind(JavaKind.Int).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).unrestricted(), StampFactory.forKind(JavaKind.Int).unrestricted()));
+        assertEquals(StampFactory.forKind(JavaKind.Int).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).unrestricted(), IntegerStamp.create(32, 0, 0)));
+        assertEquals(StampFactory.forKind(JavaKind.Int).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).unrestricted(), IntegerStamp.create(32, 1, 1)));
+        assertEquals(StampFactory.forKind(JavaKind.Int).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Int).unrestricted(), IntegerStamp.create(32, -1, -1)));
+
+        // 64 bits
+        testMulHigh(someMulHigh, 0, 0, 64);
+
+        testMulHigh(someMulHigh, 1, 1, 64);
+        testMulHigh(someMulHigh, 1, 5, 64);
+        testMulHigh(someMulHigh, 256, 256, 64);
+        testMulHigh(someMulHigh, 0xFFFFFFF, 0xFFFFFFA, 64);
+        testMulHigh(someMulHigh, 0xFFFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFAL, 64);
+        testMulHigh(someMulHigh, Integer.MAX_VALUE, 2, 64);
+        testMulHigh(someMulHigh, Long.MAX_VALUE, 2, 64);
+
+        testMulHigh(someMulHigh, -1, -1, 64);
+        testMulHigh(someMulHigh, -1, -5, 64);
+        testMulHigh(someMulHigh, -256, -256, 64);
+        testMulHigh(someMulHigh, -0xFFFFFFF, -0xFFFFFFA, 64);
+        testMulHigh(someMulHigh, -0xFFFFFFFFFFFFFFL, -0xFFFFFFFFFFFFFAL, 64);
+        testMulHigh(someMulHigh, Integer.MIN_VALUE, -2, 64);
+        testMulHigh(someMulHigh, Long.MIN_VALUE, -2, 64);
+
+        testMulHigh(someMulHigh, -1, 1, 64);
+        testMulHigh(someMulHigh, -1, 5, 64);
+        testMulHigh(someMulHigh, -256, 256, 64);
+        testMulHigh(someMulHigh, -0xFFFFFFF, 0xFFFFFFA, 64);
+        testMulHigh(someMulHigh, -0xFFFFFFFFFFFFFFL, 0xFFFFFFFFFFFFFAL, 64);
+        testMulHigh(someMulHigh, Integer.MIN_VALUE, 2, 64);
+        testMulHigh(someMulHigh, Long.MIN_VALUE, 2, 64);
+
+        testMulHigh(someMulHigh, Integer.MIN_VALUE, Integer.MIN_VALUE, 64);
+        testMulHigh(someMulHigh, Long.MIN_VALUE, Long.MIN_VALUE, 64);
+        testMulHigh(someMulHigh, Integer.MAX_VALUE, Integer.MAX_VALUE, 64);
+        testMulHigh(someMulHigh, Long.MAX_VALUE, Long.MAX_VALUE, 64);
+
+        assertEquals(StampFactory.forKind(JavaKind.Long).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).empty(), StampFactory.forKind(JavaKind.Long).empty()));
+        assertEquals(StampFactory.forKind(JavaKind.Long).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).empty(), StampFactory.forKind(JavaKind.Long).unrestricted()));
+        assertEquals(StampFactory.forKind(JavaKind.Long).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).empty(), IntegerStamp.create(64, 0, 0)));
+        assertEquals(StampFactory.forKind(JavaKind.Long).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).empty(), IntegerStamp.create(64, 1, 1)));
+        assertEquals(StampFactory.forKind(JavaKind.Long).empty(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).empty(), IntegerStamp.create(64, -1, -1)));
+
+        assertEquals(StampFactory.forKind(JavaKind.Long).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).unrestricted(), StampFactory.forKind(JavaKind.Long).unrestricted()));
+        assertEquals(StampFactory.forKind(JavaKind.Long).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).unrestricted(), IntegerStamp.create(64, 0, 0)));
+        assertEquals(StampFactory.forKind(JavaKind.Long).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).unrestricted(), IntegerStamp.create(64, 1, 1)));
+        assertEquals(StampFactory.forKind(JavaKind.Long).unrestricted(), someMulHigh.foldStamp(StampFactory.forKind(JavaKind.Long).unrestricted(), IntegerStamp.create(64, -1, -1)));
+    }
+
+    private static void testMulHigh(BinaryOp<?> someMulHigh, long a, long b, int bits) {
+        long expectedResult = getExpectedValue(someMulHigh, a, b, bits);
+        assertEquals(IntegerStamp.create(bits, expectedResult, expectedResult), someMulHigh.foldStamp(IntegerStamp.create(bits, a, a), IntegerStamp.create(bits, b, b)));
+    }
+
+    private static long getExpectedValue(BinaryOp<?> someMulHigh, long a, long b, int bits) {
+        if (someMulHigh == IntegerStamp.OPS.getMulHigh()) {
+            return mulHigh(a, b, bits);
+        } else {
+            assertEquals(IntegerStamp.OPS.getUMulHigh(), someMulHigh);
+            return umulHigh(a, b, bits);
+        }
+    }
+
+    private static long mulHigh(long a, long b, int bits) {
+        BigInteger valA = BigInteger.valueOf(a);
+        BigInteger valB = BigInteger.valueOf(b);
+        BigInteger result = valA.multiply(valB).shiftRight(bits);
+        if (bits == 32) {
+            return result.intValue();
+        } else {
+            assertEquals(64, bits);
+            return result.longValue();
+        }
+    }
+
+    private static long umulHigh(long a, long b, int bits) {
+        Assert.assertTrue(bits == 32 || bits == 64);
+        BigInteger valA = BigInteger.valueOf(a);
+        if (valA.compareTo(BigInteger.valueOf(0)) < 0) {
+            valA = valA.add(BigInteger.ONE.shiftLeft(bits));
+        }
+        BigInteger valB = BigInteger.valueOf(b);
+        if (valB.compareTo(BigInteger.valueOf(0)) < 0) {
+            valB = valB.add(BigInteger.ONE.shiftLeft(bits));
+        }
+
+        BigInteger result = valA.multiply(valB).shiftRight(bits);
+        if (bits == 32) {
+            return result.intValue();
+        } else {
+            return result.longValue();
+        }
     }
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes/src/org/graalvm/compiler/nodes/GraphDecoder.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes/src/org/graalvm/compiler/nodes/GraphDecoder.java	Thu Aug 24 01:13:04 2017 +0000
@@ -466,6 +466,28 @@
             AbstractMergeNode merge = (AbstractMergeNode) node;
             EndNode singleEnd = merge.forwardEndAt(0);
 
+            /*
+             * In some corner cases, the MergeNode already has PhiNodes. Since there is a single
+             * EndNode, each PhiNode can only have one input, and we can replace the PhiNode with
+             * this single input.
+             */
+            for (PhiNode phi : merge.phis()) {
+                assert phi.inputs().count() == 1 : "input count must match end count";
+                Node singlePhiInput = phi.inputs().first();
+
+                /*
+                 * We do not have the orderID of the PhiNode anymore, so we need to search through
+                 * the complete list of nodes to find a match.
+                 */
+                for (int i = 0; i < loopScope.createdNodes.length; i++) {
+                    if (loopScope.createdNodes[i] == phi) {
+                        loopScope.createdNodes[i] = singlePhiInput;
+                    }
+                }
+
+                phi.replaceAndDelete(singlePhiInput);
+            }
+
             /* Nodes that would use this merge as the guard need to use the previous block. */
             registerNode(loopScope, nodeOrderId, AbstractBeginNode.prevBegin(singleEnd), true, false);
 
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes/src/org/graalvm/compiler/nodes/calc/MulNode.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes/src/org/graalvm/compiler/nodes/calc/MulNode.java	Thu Aug 24 01:13:04 2017 +0000
@@ -25,6 +25,7 @@
 import static org.graalvm.compiler.nodeinfo.NodeCycles.CYCLES_2;
 
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable;
+import org.graalvm.compiler.core.common.type.IntegerStamp;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp;
 import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.Mul;
 import org.graalvm.compiler.core.common.type.Stamp;
@@ -108,6 +109,27 @@
                         return AddNode.create(new LeftShiftNode(forX, ConstantNode.forInt(CodeUtil.log2(i - 1))), forX);
                     } else if (CodeUtil.isPowerOf2(i + 1)) {
                         return SubNode.create(new LeftShiftNode(forX, ConstantNode.forInt(CodeUtil.log2(i + 1))), forX);
+                    } else {
+                        int bitCount = Long.bitCount(i);
+                        long highestBitValue = Long.highestOneBit(i);
+                        if (bitCount == 2) {
+                            // e.g., 0b1000_0010
+                            long lowerBitValue = i - highestBitValue;
+                            assert highestBitValue > 0 && lowerBitValue > 0;
+                            ValueNode left = new LeftShiftNode(forX, ConstantNode.forInt(CodeUtil.log2(highestBitValue)));
+                            ValueNode right = lowerBitValue == 1 ? forX : new LeftShiftNode(forX, ConstantNode.forInt(CodeUtil.log2(lowerBitValue)));
+                            return AddNode.create(left, right);
+                        } else {
+                            // e.g., 0b1111_1101
+                            int shiftToRoundUpToPowerOf2 = CodeUtil.log2(highestBitValue) + 1;
+                            long subValue = (1 << shiftToRoundUpToPowerOf2) - i;
+                            if (CodeUtil.isPowerOf2(subValue) && shiftToRoundUpToPowerOf2 < ((IntegerStamp) stamp).getBits()) {
+                                assert CodeUtil.log2(subValue) >= 1;
+                                ValueNode left = new LeftShiftNode(forX, ConstantNode.forInt(shiftToRoundUpToPowerOf2));
+                                ValueNode right = new LeftShiftNode(forX, ConstantNode.forInt(CodeUtil.log2(subValue)));
+                                return SubNode.create(left, right);
+                            }
+                        }
                     }
                 } else if (i < 0) {
                     if (CodeUtil.isPowerOf2(-i)) {
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes/src/org/graalvm/compiler/nodes/cfg/ControlFlowGraph.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.nodes/src/org/graalvm/compiler/nodes/cfg/ControlFlowGraph.java	Thu Aug 24 01:13:04 2017 +0000
@@ -562,7 +562,7 @@
                 if (pred.getSuccessorCount() > 1) {
                     assert pred.getEndNode() instanceof ControlSplitNode;
                     ControlSplitNode controlSplit = (ControlSplitNode) pred.getEndNode();
-                    probability *= controlSplit.probability(block.getBeginNode());
+                    probability = multiplyProbabilities(probability, controlSplit.probability(block.getBeginNode()));
                 }
             } else {
                 probability = predecessors[0].probability;
@@ -572,7 +572,7 @@
 
                 if (block.getBeginNode() instanceof LoopBeginNode) {
                     LoopBeginNode loopBegin = (LoopBeginNode) block.getBeginNode();
-                    probability *= loopBegin.loopFrequency();
+                    probability = multiplyProbabilities(probability, loopBegin.loopFrequency());
                 }
             }
             if (probability < MIN_PROBABILITY) {
@@ -755,4 +755,20 @@
     public void setNodeToBlock(NodeMap<Block> nodeMap) {
         this.nodeToBlock = nodeMap;
     }
+
+    /**
+     * Multiplies a and b and clamps the between {@link ControlFlowGraph#MIN_PROBABILITY} and
+     * {@link ControlFlowGraph#MAX_PROBABILITY}.
+     */
+    public static double multiplyProbabilities(double a, double b) {
+        assert !Double.isNaN(a) && !Double.isNaN(b) && Double.isFinite(a) && Double.isFinite(b) : a + " " + b;
+        double r = a * b;
+        if (r > MAX_PROBABILITY) {
+            return MAX_PROBABILITY;
+        }
+        if (r < MIN_PROBABILITY) {
+            return MIN_PROBABILITY;
+        }
+        return r;
+    }
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options.processor/src/org/graalvm/compiler/options/processor/OptionProcessor.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options.processor/src/org/graalvm/compiler/options/processor/OptionProcessor.java	Thu Aug 24 01:13:04 2017 +0000
@@ -22,9 +22,12 @@
  */
 package org.graalvm.compiler.options.processor;
 
+import java.io.BufferedReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -50,7 +53,9 @@
 import javax.lang.model.util.Elements;
 import javax.lang.model.util.Types;
 import javax.tools.Diagnostic.Kind;
+import javax.tools.FileObject;
 import javax.tools.JavaFileObject;
+import javax.tools.StandardLocation;
 
 import org.graalvm.compiler.options.Option;
 import org.graalvm.compiler.options.OptionDescriptor;
@@ -117,22 +122,13 @@
             return;
         }
 
-        String help = annotation.help();
-        if (help.length() != 0) {
-            char firstChar = help.charAt(0);
-            if (!Character.isUpperCase(firstChar)) {
-                processingEnv.getMessager().printMessage(Kind.ERROR, "Option help text must start with upper case letter", element);
-                return;
-            }
-        }
-
         String optionName = annotation.name();
         if (optionName.equals("")) {
             optionName = fieldName;
         }
 
         if (!Character.isUpperCase(optionName.charAt(0))) {
-            processingEnv.getMessager().printMessage(Kind.ERROR, "Option name must start with capital letter", element);
+            processingEnv.getMessager().printMessage(Kind.ERROR, "Option name must start with an upper case letter", element);
             return;
         }
 
@@ -154,6 +150,7 @@
         String separator = "";
         Set<Element> originatingElementsList = info.originatingElements;
         originatingElementsList.add(field);
+        PackageElement enclosingPackage = null;
         while (enclosing != null) {
             if (enclosing.getKind() == ElementKind.CLASS || enclosing.getKind() == ElementKind.INTERFACE) {
                 if (enclosing.getModifiers().contains(Modifier.PRIVATE)) {
@@ -164,13 +161,64 @@
                 originatingElementsList.add(enclosing);
                 declaringClass = enclosing.getSimpleName() + separator + declaringClass;
                 separator = ".";
-            } else {
-                assert enclosing.getKind() == ElementKind.PACKAGE;
+            } else if (enclosing.getKind() == ElementKind.PACKAGE) {
+                enclosingPackage = (PackageElement) enclosing;
             }
             enclosing = enclosing.getEnclosingElement();
         }
+        if (enclosingPackage == null) {
+            processingEnv.getMessager().printMessage(Kind.ERROR, "Option field cannot be declared in the unnamed package", element);
+            return;
+        }
+        String[] helpValue = annotation.help();
+        String help = "";
+        String[] extraHelp = {};
 
-        info.options.add(new OptionInfo(optionName, help, optionType, declaringClass, field));
+        if (helpValue.length == 1) {
+            help = helpValue[0];
+            if (help.startsWith("file:")) {
+                String path = help.substring("file:".length());
+                Filer filer = processingEnv.getFiler();
+                try {
+                    FileObject file;
+                    try {
+                        file = filer.getResource(StandardLocation.SOURCE_PATH, enclosingPackage.getQualifiedName(), path);
+                    } catch (IllegalArgumentException | IOException e) {
+                        // Handle the case when a compiler doesn't support the SOURCE_PATH location
+                        file = filer.getResource(StandardLocation.CLASS_OUTPUT, enclosingPackage.getQualifiedName(), path);
+                    }
+                    try (BufferedReader br = new BufferedReader(new InputStreamReader(file.openInputStream()))) {
+                        help = br.readLine();
+                        if (help == null) {
+                            help = "";
+                        }
+                        String line = br.readLine();
+                        List<String> lines = new ArrayList<>();
+                        while (line != null) {
+                            lines.add(line);
+                            line = br.readLine();
+                        }
+                        extraHelp = lines.toArray(new String[lines.size()]);
+                    }
+                } catch (IOException e) {
+                    String msg = String.format("Error reading %s containing the help text for option field: %s", path, e);
+                    processingEnv.getMessager().printMessage(Kind.ERROR, msg, element);
+                    return;
+                }
+            }
+        } else if (helpValue.length > 1) {
+            help = helpValue[0];
+            extraHelp = Arrays.copyOfRange(helpValue, 1, helpValue.length);
+        }
+        if (help.length() != 0) {
+            char firstChar = help.charAt(0);
+            if (!Character.isUpperCase(firstChar)) {
+                processingEnv.getMessager().printMessage(Kind.ERROR, "Option help text must start with an upper case letter", element);
+                return;
+            }
+        }
+
+        info.options.add(new OptionInfo(optionName, help, extraHelp, optionType, declaringClass, field));
     }
 
     private void createFiles(OptionsInfo info) {
@@ -200,11 +248,11 @@
 
             String desc = OptionDescriptor.class.getSimpleName();
 
-            int i = 0;
             Collections.sort(info.options);
 
             out.println("    @Override");
             out.println("    public OptionDescriptor get(String value) {");
+            out.println("        switch (value) {");
             out.println("        // CheckStyle: stop line length check");
             for (OptionInfo option : info.options) {
                 String name = option.name;
@@ -214,41 +262,52 @@
                 } else {
                     optionField = option.declaringClass + "." + option.field.getSimpleName();
                 }
+                out.println("        case \"" + name + "\": {");
                 String type = option.type;
                 String help = option.help;
+                String[] extraHelp = option.extraHelp;
                 String declaringClass = option.declaringClass;
                 Name fieldName = option.field.getSimpleName();
-                out.println("        if (value.equals(\"" + name + "\")) {");
-                out.printf("            return %s.create(\"%s\", %s.class, \"%s\", %s.class, \"%s\", %s);\n", desc, name, type, help, declaringClass, fieldName, optionField);
+                out.printf("            return " + desc + ".create(\n");
+                out.printf("                /*name*/ \"%s\",\n", name);
+                out.printf("                /*type*/ %s.class,\n", type);
+                out.printf("                /*help*/ \"%s\",\n", help);
+                if (extraHelp.length != 0) {
+                    out.printf("                /*extraHelp*/ new String[] {\n");
+                    for (String line : extraHelp) {
+                        out.printf("                         \"%s\",\n", line.replace("\\", "\\\\").replace("\"", "\\\""));
+                    }
+                    out.printf("                              },\n");
+                }
+                out.printf("                /*declaringClass*/ %s.class,\n", declaringClass);
+                out.printf("                /*fieldName*/ \"%s\",\n", fieldName);
+                out.printf("                /*option*/ %s);\n", optionField);
                 out.println("        }");
             }
             out.println("        // CheckStyle: resume line length check");
+            out.println("        }");
             out.println("        return null;");
             out.println("    }");
             out.println();
             out.println("    @Override");
             out.println("    public Iterator<" + desc + "> iterator() {");
-            out.println("        // CheckStyle: stop line length check");
-            out.println("        List<" + desc + "> options = Arrays.asList(");
-            for (OptionInfo option : info.options) {
-                String optionField;
-                if (option.field.getModifiers().contains(Modifier.PRIVATE)) {
-                    throw new InternalError();
-                } else {
-                    optionField = option.declaringClass + "." + option.field.getSimpleName();
-                }
-                String name = option.name;
-                String type = option.type;
-                String help = option.help;
-                String declaringClass = option.declaringClass;
-                Name fieldName = option.field.getSimpleName();
-                String comma = i == info.options.size() - 1 ? "" : ",";
-                out.printf("            %s.create(\"%s\", %s.class, \"%s\", %s.class, \"%s\", %s)%s\n", desc, name, type, help, declaringClass, fieldName, optionField, comma);
-                i++;
+            out.println("        return new Iterator<OptionDescriptor>() {");
+            out.println("            int i = 0;");
+            out.println("            @Override");
+            out.println("            public boolean hasNext() {");
+            out.println("                return i < " + info.options.size() + ";");
+            out.println("            }");
+            out.println("            @Override");
+            out.println("            public OptionDescriptor next() {");
+            out.println("                switch (i++) {");
+            for (int i = 0; i < info.options.size(); i++) {
+                OptionInfo option = info.options.get(i);
+                out.println("                    case " + i + ": return get(\"" + option.name + "\");");
             }
-            out.println("        );");
-            out.println("        // CheckStyle: resume line length check");
-            out.println("        return options.iterator();");
+            out.println("                }");
+            out.println("                throw new NoSuchElementException();");
+            out.println("            }");
+            out.println("        };");
             out.println("    }");
             out.println("}");
         }
@@ -274,13 +333,15 @@
 
         final String name;
         final String help;
+        final String[] extraHelp;
         final String type;
         final String declaringClass;
         final VariableElement field;
 
-        OptionInfo(String name, String help, String type, String declaringClass, VariableElement field) {
+        OptionInfo(String name, String help, String[] extraHelp, String type, String declaringClass, VariableElement field) {
             this.name = name;
             this.help = help;
+            this.extraHelp = extraHelp;
             this.type = type;
             this.declaringClass = declaringClass;
             this.field = field;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/EnumOptionKey.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/EnumOptionKey.java	Thu Aug 24 01:13:04 2017 +0000
@@ -28,27 +28,10 @@
 
 public class EnumOptionKey<T extends Enum<T>> extends OptionKey<T> {
     final Class<T> enumClass;
-    final ValueHelp<T> valueHelp;
-
-    /**
-     * Provides help text for enum values.
-     */
-    public interface ValueHelp<T extends Enum<T>> {
-        /**
-         * Gets help text for the enum {@code value} that includes the name of the value. If
-         * {@code null} is returned, {@code value.toString()} is used.
-         */
-        String getHelp(Object value);
-    }
-
-    public EnumOptionKey(T value) {
-        this(value, null);
-    }
 
     @SuppressWarnings("unchecked")
-    public EnumOptionKey(T value, ValueHelp<T> help) {
+    public EnumOptionKey(T value) {
         super(value);
-        this.valueHelp = help;
         if (value == null) {
             throw new IllegalArgumentException("Value must not be null");
         }
@@ -62,10 +45,6 @@
         return EnumSet.allOf(enumClass);
     }
 
-    public ValueHelp<T> getValueHelp() {
-        return valueHelp;
-    }
-
     Object valueOf(String name) {
         try {
             return Enum.valueOf(enumClass, name);
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/Option.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/Option.java	Thu Aug 24 01:13:04 2017 +0000
@@ -38,10 +38,20 @@
 public @interface Option {
 
     /**
-     * Gets a help message for the option. New lines can be embedded in the message with
-     * {@code "%n"}.
+     * Gets a help message for the option.
+     * <p>
+     * The first element of the array is the short help message. This part of the help message is
+     * subject to line wrapping when printed.
+     * <p>
+     * The remaining elements contain a more detailed expansion of the help message and will be
+     * printed as is in a left-aligned block (i.e. leading spaces will be preserved).
+     * <p>
+     * If there is only one element and it starts with {@code "file:"<path>}, then the help message
+     * is located in a file located by resolving {@code <path>} against the location of the package
+     * in which the option is declared. The first line in the file is the short help message as
+     * described above. The remaining lines are the help message expansion.
      */
-    String help();
+    String[] help();
 
     /**
      * The name of the option. By default, the name of the annotated field should be used.
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/OptionDescriptor.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/OptionDescriptor.java	Thu Aug 24 01:13:04 2017 +0000
@@ -22,6 +22,10 @@
  */
 package org.graalvm.compiler.options;
 
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
 /**
  * Describes the attributes of a static field {@linkplain Option option} and provides access to its
  * {@linkplain OptionKey value}.
@@ -31,25 +35,34 @@
     protected final String name;
     protected final Class<?> type;
     protected final String help;
+    protected final List<String> extraHelp;
     protected final OptionKey<?> optionKey;
     protected final Class<?> declaringClass;
     protected final String fieldName;
 
+    private static final String[] NO_EXTRA_HELP = {};
+
     public static OptionDescriptor create(String name, Class<?> type, String help, Class<?> declaringClass, String fieldName, OptionKey<?> option) {
+        return create(name, type, help, NO_EXTRA_HELP, declaringClass, fieldName, option);
+    }
+
+    public static OptionDescriptor create(String name, Class<?> type, String help, String[] extraHelp, Class<?> declaringClass, String fieldName, OptionKey<?> option) {
         assert option != null : declaringClass + "." + fieldName;
         OptionDescriptor result = option.getDescriptor();
         if (result == null) {
-            result = new OptionDescriptor(name, type, help, declaringClass, fieldName, option);
+            List<String> extraHelpList = extraHelp == null || extraHelp.length == 0 ? Collections.emptyList() : Collections.unmodifiableList(Arrays.asList(extraHelp));
+            result = new OptionDescriptor(name, type, help, extraHelpList, declaringClass, fieldName, option);
             option.setDescriptor(result);
         }
         assert result.name.equals(name) && result.type == type && result.declaringClass == declaringClass && result.fieldName.equals(fieldName) && result.optionKey == option;
         return result;
     }
 
-    private OptionDescriptor(String name, Class<?> type, String help, Class<?> declaringClass, String fieldName, OptionKey<?> optionKey) {
+    private OptionDescriptor(String name, Class<?> type, String help, List<String> extraHelp, Class<?> declaringClass, String fieldName, OptionKey<?> optionKey) {
         this.name = name;
         this.type = type;
         this.help = help;
+        this.extraHelp = extraHelp;
         this.optionKey = optionKey;
         this.declaringClass = declaringClass;
         this.fieldName = fieldName;
@@ -65,13 +78,24 @@
     }
 
     /**
-     * Gets a descriptive help message for the option.
+     * Gets a descriptive help message for the option. This message should be self contained without
+     * relying on {@link #getExtraHelp() extra help lines}.
+     *
+     * @see Option#help()
      */
     public String getHelp() {
         return help;
     }
 
     /**
+     * Gets extra lines of help text. These lines should not be subject to any line wrapping or
+     * formatting apart from indentation.
+     */
+    public List<String> getExtraHelp() {
+        return extraHelp;
+    }
+
+    /**
      * Gets the name of the option. It's up to the client of this object how to use the name to get
      * a user specified value for the option from the environment.
      */
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/OptionValues.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.options/src/org/graalvm/compiler/options/OptionValues.java	Thu Aug 24 01:13:04 2017 +0000
@@ -24,15 +24,12 @@
 
 import java.io.PrintStream;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.Comparator;
-import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
 import java.util.SortedMap;
 import java.util.TreeMap;
 
-import org.graalvm.compiler.options.EnumOptionKey.ValueHelp;
 import org.graalvm.util.EconomicMap;
 import org.graalvm.util.Equivalence;
 import org.graalvm.util.UnmodifiableEconomicMap;
@@ -165,10 +162,9 @@
      * @return {@code text} broken into lines
      */
     private static List<String> wrap(String text, int width) {
-        List<String> lines = Collections.singletonList(text);
+        List<String> lines = new ArrayList<>();
         if (text.length() > width) {
             String[] chunks = text.split("\\s+");
-            lines = new ArrayList<>();
             StringBuilder line = new StringBuilder();
             for (String chunk : chunks) {
                 if (line.length() + chunk.length() > width) {
@@ -178,22 +174,13 @@
                 if (line.length() != 0) {
                     line.append(' ');
                 }
-                String[] embeddedLines = chunk.split("%n", -2);
-                if (embeddedLines.length == 1) {
-                    line.append(chunk);
-                } else {
-                    for (int i = 0; i < embeddedLines.length; i++) {
-                        line.append(embeddedLines[i]);
-                        if (i < embeddedLines.length - 1) {
-                            lines.add(line.toString());
-                            line.setLength(0);
-                        }
-                    }
-                }
+                line.append(chunk);
             }
             if (line.length() != 0) {
                 lines.add(line.toString());
             }
+        } else {
+            lines.add(text);
         }
         return lines;
     }
@@ -222,24 +209,7 @@
             if (value instanceof String) {
                 value = '"' + String.valueOf(value) + '"';
             }
-            String help = desc.getHelp();
-            if (desc.getOptionKey() instanceof EnumOptionKey) {
-                EnumOptionKey<?> eoption = (EnumOptionKey<?>) desc.getOptionKey();
-                EnumSet<?> evalues = eoption.getAllValues();
-                String evaluesString = evalues.toString();
-                ValueHelp<?> valueHelp = eoption.getValueHelp();
-                if (help.length() > 0 && !help.endsWith(".")) {
-                    help += ".";
-                }
-                if (valueHelp == null) {
-                    help += " Valid values are: " + evaluesString.substring(1, evaluesString.length() - 1);
-                } else {
-                    for (Object o : evalues) {
-                        String vhelp = valueHelp.getHelp(o);
-                        help += "%n" + (vhelp == null ? o : vhelp);
-                    }
-                }
-            }
+
             String name = namePrefix + e.getKey();
             String assign = containsKey(desc.optionKey) ? ":=" : "=";
             String typeName = desc.getOptionKey() instanceof EnumOptionKey ? "String" : desc.getType().getSimpleName();
@@ -252,11 +222,16 @@
                 out.printf("%s[%s]%n", linePrefix, typeName);
             }
 
+            List<String> helpLines;
+            String help = desc.getHelp();
             if (help.length() != 0) {
-                List<String> helpLines = wrap(help, PROPERTY_LINE_WIDTH - PROPERTY_HELP_INDENT);
-                for (int i = 0; i < helpLines.size(); i++) {
-                    out.printf("%" + PROPERTY_HELP_INDENT + "s%s%n", "", helpLines.get(i));
-                }
+                helpLines = wrap(help, PROPERTY_LINE_WIDTH - PROPERTY_HELP_INDENT);
+                helpLines.addAll(desc.getExtraHelp());
+            } else {
+                helpLines = desc.getExtraHelp();
+            }
+            for (String line : helpLines) {
+                out.printf("%" + PROPERTY_HELP_INDENT + "s%s%n", "", line);
             }
         }
     }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases.common/src/org/graalvm/compiler/phases/common/inlining/InliningUtil.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases.common/src/org/graalvm/compiler/phases/common/inlining/InliningUtil.java	Thu Aug 24 01:13:04 2017 +0000
@@ -30,6 +30,7 @@
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 import java.util.function.Consumer;
 
 import org.graalvm.compiler.api.replacements.MethodSubstitution;
@@ -351,7 +352,7 @@
             }
         }
 
-        updateSourcePositions(invoke, inlineGraph, duplicates);
+        updateSourcePositions(invoke, inlineGraph, duplicates, !Objects.equals(inlineGraph.method(), inlineeMethod));
         if (stateAfter != null) {
             processFrameStates(invoke, inlineGraph, duplicates, stateAtExceptionEdge, returnNodes.size() > 1);
             int callerLockDepth = stateAfter.nestedLockDepth();
@@ -569,14 +570,14 @@
     }
 
     @SuppressWarnings("try")
-    private static void updateSourcePositions(Invoke invoke, StructuredGraph inlineGraph, UnmodifiableEconomicMap<Node, Node> duplicates) {
+    private static void updateSourcePositions(Invoke invoke, StructuredGraph inlineGraph, UnmodifiableEconomicMap<Node, Node> duplicates, boolean isSubstitution) {
         if (inlineGraph.mayHaveNodeSourcePosition() && invoke.stateAfter() != null) {
             if (invoke.asNode().getNodeSourcePosition() == null) {
                 // Temporarily ignore the assert below.
                 return;
             }
 
-            JavaConstant constantReceiver = invoke.getInvokeKind().hasReceiver() ? invoke.getReceiver().asJavaConstant() : null;
+            JavaConstant constantReceiver = invoke.getInvokeKind().hasReceiver() && !isSubstitution ? invoke.getReceiver().asJavaConstant() : null;
             NodeSourcePosition invokePos = invoke.asNode().getNodeSourcePosition();
             assert invokePos != null : "missing source information";
 
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases.common/src/org/graalvm/compiler/phases/common/inlining/policy/AbstractInliningPolicy.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases.common/src/org/graalvm/compiler/phases/common/inlining/policy/AbstractInliningPolicy.java	Thu Aug 24 01:13:04 2017 +0000
@@ -75,10 +75,11 @@
     }
 
     private static boolean onlyForcedIntrinsics(Replacements replacements, InlineInfo info) {
-        for (int i = 0; i < info.numberOfMethods(); i++) {
-            if (!InliningUtil.canIntrinsify(replacements, info.methodAt(i), info.invoke().bci())) {
-                return false;
-            }
+        if (!onlyIntrinsics(replacements, info)) {
+            return false;
+        }
+        if (!info.shouldInline()) {
+            return false;
         }
         return true;
     }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases/src/org/graalvm/compiler/phases/graph/FixedNodeProbabilityCache.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.phases/src/org/graalvm/compiler/phases/graph/FixedNodeProbabilityCache.java	Thu Aug 24 01:13:04 2017 +0000
@@ -39,6 +39,8 @@
 import org.graalvm.util.EconomicMap;
 import org.graalvm.util.Equivalence;
 
+import static org.graalvm.compiler.nodes.cfg.ControlFlowGraph.multiplyProbabilities;
+
 /**
  * Compute probabilities for fixed nodes on the fly and cache them at {@link AbstractBeginNode}s.
  */
@@ -106,7 +108,7 @@
             }
         } else {
             ControlSplitNode split = (ControlSplitNode) current.predecessor();
-            probability = split.probability((AbstractBeginNode) current) * applyAsDouble(split);
+            probability = multiplyProbabilities(split.probability((AbstractBeginNode) current), applyAsDouble(split));
         }
         assert !Double.isNaN(probability) && !Double.isInfinite(probability) : current + " " + probability;
         cache.put(current, probability);
@@ -125,7 +127,7 @@
             result += applyAsDouble(endNode);
         }
         if (current instanceof LoopBeginNode) {
-            result *= ((LoopBeginNode) current).loopFrequency();
+            result = multiplyProbabilities(result, ((LoopBeginNode) current).loopFrequency());
         }
         return result;
     }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.printer/src/org/graalvm/compiler/printer/BinaryGraphPrinter.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.printer/src/org/graalvm/compiler/printer/BinaryGraphPrinter.java	Thu Aug 24 01:13:04 2017 +0000
@@ -366,7 +366,7 @@
             return ((Class<?>) obj).getName();
         }
         if (obj instanceof ResolvedJavaType) {
-            return ((ResolvedJavaType) obj).getName();
+            return ((ResolvedJavaType) obj).toJavaName();
         }
         return null;
     }
@@ -403,7 +403,7 @@
 
     @Override
     public String fieldTypeName(ResolvedJavaField field) {
-        return field.getType().getName();
+        return field.getType().toJavaName();
     }
 
     @Override
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.amd64/src/org/graalvm/compiler/replacements/amd64/AMD64GraphBuilderPlugins.java	Thu Aug 24 01:13:04 2017 +0000
@@ -31,6 +31,8 @@
 import static org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation.TAN;
 import static org.graalvm.compiler.serviceprovider.JDK9Method.Java8OrEarlier;
 
+import java.util.Arrays;
+
 import org.graalvm.compiler.bytecode.BytecodeProvider;
 import org.graalvm.compiler.lir.amd64.AMD64ArithmeticLIRGeneratorTool.RoundingMode;
 import org.graalvm.compiler.nodes.ValueNode;
@@ -44,6 +46,7 @@
 import org.graalvm.compiler.nodes.java.AtomicReadAndWriteNode;
 import org.graalvm.compiler.nodes.memory.address.AddressNode;
 import org.graalvm.compiler.nodes.memory.address.OffsetAddressNode;
+import org.graalvm.compiler.replacements.ArraysSubstitutions;
 import org.graalvm.compiler.replacements.IntegerSubstitutions;
 import org.graalvm.compiler.replacements.LongSubstitutions;
 import org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.UnsafeGetPlugin;
@@ -73,6 +76,7 @@
                 registerUnsafePlugins(invocationPlugins, replacementsBytecodeProvider);
                 registerStringPlugins(invocationPlugins, arch, replacementsBytecodeProvider);
                 registerMathPlugins(invocationPlugins, arch, arithmeticStubs, replacementsBytecodeProvider);
+                registerArraysEqualsPlugins(invocationPlugins, replacementsBytecodeProvider);
             }
         });
     }
@@ -229,4 +233,10 @@
             r.registerOptional4("put" + kind.name() + "Unaligned", Receiver.class, Object.class, long.class, javaClass, new UnsafePutPlugin(kind, false));
         }
     }
+
+    private static void registerArraysEqualsPlugins(InvocationPlugins plugins, BytecodeProvider bytecodeProvider) {
+        Registration r = new Registration(plugins, Arrays.class, bytecodeProvider);
+        r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", float[].class, float[].class);
+        r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", double[].class, double[].class);
+    }
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.test/src/org/graalvm/compiler/replacements/test/ArraysSubstitutionsTest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.test/src/org/graalvm/compiler/replacements/test/ArraysSubstitutionsTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -232,68 +232,6 @@
     }
 
     @Test
-    public void testEqualsFloat() {
-        Object[] args1 = new Object[N];
-        Object[] args2 = new Object[N];
-        int n = 0;
-
-        // equal arrays
-        for (int i = 0; i < N / 2; i++, n++) {
-            args1[n] = new float[i];
-            args2[n] = new float[i];
-        }
-
-        // non-equal arrays
-        for (int i = 0; i < N / 2; i++, n++) {
-            float[] a2 = new float[i];
-            if (i > 0) {
-                a2[i - 1] = 1;
-            }
-            args1[n] = new float[i];
-            args2[n] = a2;
-        }
-
-        Class<?>[] parameterTypes = new Class<?>[]{float[].class, float[].class};
-        testSubstitution("arraysEqualsFloat", ArrayEqualsNode.class, Arrays.class, "equals", parameterTypes, false, args1, args2);
-    }
-
-    @SuppressWarnings("all")
-    public static boolean arraysEqualsFloat(float[] a, float[] b) {
-        return Arrays.equals(a, b);
-    }
-
-    @Test
-    public void testEqualsDouble() {
-        Object[] args1 = new Object[N];
-        Object[] args2 = new Object[N];
-        int n = 0;
-
-        // equal arrays
-        for (int i = 0; i < N / 2; i++, n++) {
-            args1[n] = new double[i];
-            args2[n] = new double[i];
-        }
-
-        // non-equal arrays
-        for (int i = 0; i < N / 2; i++, n++) {
-            double[] a2 = new double[i];
-            if (i > 0) {
-                a2[i - 1] = 1;
-            }
-            args1[n] = new double[i];
-            args2[n] = a2;
-        }
-
-        Class<?>[] parameterTypes = new Class<?>[]{double[].class, double[].class};
-        testSubstitution("arraysEqualsDouble", ArrayEqualsNode.class, Arrays.class, "equals", parameterTypes, false, args1, args2);
-    }
-
-    @SuppressWarnings("all")
-    public static boolean arraysEqualsDouble(double[] a, double[] b) {
-        return Arrays.equals(a, b);
-    }
-
-    @Test
     public void testEqualsNodeGVN() {
         test("testEqualsNodeGVNSnippet", true);
     }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.test/src/org/graalvm/compiler/replacements/test/DeoptimizeOnExceptionTest.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.test/src/org/graalvm/compiler/replacements/test/DeoptimizeOnExceptionTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -24,16 +24,25 @@
 
 import java.util.Random;
 
+import org.graalvm.compiler.api.directives.GraalDirectives;
+import org.graalvm.compiler.core.phases.HighTier;
 import org.graalvm.compiler.core.test.GraalCompilerTest;
+import org.graalvm.compiler.nodes.ValueNode;
+import org.graalvm.compiler.nodes.graphbuilderconf.GraphBuilderContext;
+import org.graalvm.compiler.nodes.graphbuilderconf.InlineInvokePlugin;
+import org.graalvm.compiler.options.OptionValues;
 import org.graalvm.compiler.phases.common.AbstractInliningPhase;
 import org.graalvm.compiler.test.ExportingClassLoader;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Test;
 import org.objectweb.asm.ClassWriter;
 import org.objectweb.asm.Label;
 import org.objectweb.asm.MethodVisitor;
 import org.objectweb.asm.Opcodes;
 
+import jdk.vm.ci.code.InstalledCode;
+import jdk.vm.ci.meta.DeoptimizationReason;
 import jdk.vm.ci.meta.ResolvedJavaMethod;
 
 /**
@@ -90,6 +99,61 @@
         return "SUCCESS";
     }
 
+    @Test
+    public void test3() {
+        Assume.assumeTrue("Only works on jdk8 right now", Java8OrEarlier);
+        ResolvedJavaMethod method = getResolvedJavaMethod("test3Snippet");
+
+        for (int i = 0; i < 2; i++) {
+            Result actual;
+            boolean expectedCompiledCode = (method.getProfilingInfo().getDeoptimizationCount(DeoptimizationReason.NotCompiledExceptionHandler) != 0);
+            InstalledCode code = getCode(method, null, false, true, new OptionValues(getInitialOptions(), HighTier.Options.Inline, false));
+            assertTrue(code.isValid());
+
+            try {
+                actual = new Result(code.executeVarargs(false), null);
+            } catch (Exception e) {
+                actual = new Result(null, e);
+            }
+
+            assertTrue(i > 0 == expectedCompiledCode, "expect compiled code to stay around after the first iteration");
+            assertEquals(new Result(expectedCompiledCode, null), actual);
+            assertTrue(expectedCompiledCode == code.isValid());
+        }
+    }
+
+    @Override
+    protected InlineInvokePlugin.InlineInfo bytecodeParserShouldInlineInvoke(GraphBuilderContext b, ResolvedJavaMethod method, ValueNode[] args) {
+        if (method.getName().equals("throwException")) {
+            if (b.getMethod().getProfilingInfo().getDeoptimizationCount(DeoptimizationReason.NotCompiledExceptionHandler) != 0) {
+                return InlineInvokePlugin.InlineInfo.DO_NOT_INLINE_WITH_EXCEPTION;
+            } else {
+                return InlineInvokePlugin.InlineInfo.DO_NOT_INLINE_NO_EXCEPTION;
+            }
+        }
+        return super.bytecodeParserShouldInlineInvoke(b, method, args);
+    }
+
+    private static void throwException() throws Exception {
+        throw new Exception();
+    }
+
+    static int footprint;
+
+    public static boolean test3Snippet(boolean rethrowException) throws Exception {
+        try {
+            footprint = 1;
+            throwException();
+        } catch (Exception e) {
+            footprint = 2;
+            if (rethrowException) {
+                throw e;
+            }
+        }
+
+        return GraalDirectives.inCompiledCode();
+    }
+
     public static class MyClassLoader extends ExportingClassLoader {
         @Override
         protected Class<?> findClass(String className) throws ClassNotFoundException {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.test/src/org/graalvm/compiler/replacements/test/FloatArraysEqualsTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.replacements.test;
+
+import java.util.Arrays;
+
+import org.graalvm.compiler.code.CompilationResult;
+import org.graalvm.compiler.core.test.GraalCompilerTest;
+import org.graalvm.compiler.nodes.ConstantNode;
+import org.graalvm.compiler.nodes.StructuredGraph;
+import org.graalvm.compiler.nodes.StructuredGraph.AllowAssumptions;
+import org.junit.Test;
+
+import jdk.vm.ci.code.InstalledCode;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+
+public class FloatArraysEqualsTest extends GraalCompilerTest {
+
+    public static boolean testFloatArraySnippet(float[] a, float[] b) {
+        return Arrays.equals(a, b);
+    }
+
+    private void testEqualInFloatArray(int arraySize, int index, float f1, float f2) {
+        if (arraySize > 0 && index >= 0 && index < arraySize) {
+            float[] a1 = new float[arraySize];
+            float[] a2 = new float[arraySize];
+            a1[index] = f1;
+            a2[index] = f2;
+            test("testFloatArraySnippet", a1, a2);
+        }
+    }
+
+    public static final int FLOAT_TAIL = 1;
+    public static final int FLOAT_8BYTE_ALIGNED = 2;
+    public static final int FLOAT_8BYTE_UNALIGNED = 3;
+    public static final int FLOAT_VECTOR_ALIGNED = 8;
+    public static final int FLOAT_VECTOR_UNALIGNED = 9;
+
+    @Test
+    public void testFloatArray() {
+        for (int size : new int[]{FLOAT_TAIL, FLOAT_8BYTE_ALIGNED, FLOAT_8BYTE_UNALIGNED, FLOAT_VECTOR_ALIGNED, FLOAT_VECTOR_UNALIGNED}) {
+            for (int index : new int[]{0, size - 1}) {
+                testEqualInFloatArray(size, index, 1024, 1024);
+                testEqualInFloatArray(size, index, 0.0f, -0.0f);
+                testEqualInFloatArray(size, index, Float.intBitsToFloat(0x7fc00000), Float.intBitsToFloat(0x7fc00000));
+                testEqualInFloatArray(size, index, Float.intBitsToFloat(0x7fc00000), Float.intBitsToFloat(0x7f800001));
+                testEqualInFloatArray(size, index, Float.intBitsToFloat(0x7fc00000), 1024);
+            }
+        }
+    }
+
+    public static boolean testDoubleArraySnippet(double[] a, double[] b) {
+        return Arrays.equals(a, b);
+    }
+
+    public static final int DOUBLE_8BYTE_ALIGNED = 1;
+    public static final int DOUBLE_VECTOR_ALIGNED = 4;
+    public static final int DOUBLE_VECTOR_UNALIGNED = 5;
+
+    private void testEqualInDoubleArray(int arraySize, int index, double d1, double d2) {
+        if (arraySize > 0 && index >= 0 && index < arraySize) {
+            double[] a1 = new double[arraySize];
+            double[] a2 = new double[arraySize];
+            a1[index] = d1;
+            a2[index] = d2;
+            test("testDoubleArraySnippet", a1, a2);
+        }
+    }
+
+    @Test
+    public void testDoubleArrayOrdinary() {
+        for (int size : new int[]{DOUBLE_8BYTE_ALIGNED, DOUBLE_VECTOR_ALIGNED, DOUBLE_VECTOR_UNALIGNED}) {
+            for (int index : new int[]{0, size - 1}) {
+                testEqualInDoubleArray(size, index, 1024, 1024);
+                testEqualInDoubleArray(size, index, 0.0d, -0.0d);
+                testEqualInDoubleArray(size, index, Double.longBitsToDouble(0x7ff8000000000000L), Double.longBitsToDouble(0x7ff8000000000000L));
+                testEqualInDoubleArray(size, index, Double.longBitsToDouble(0x7ff8000000000000L), Double.longBitsToDouble(0x7ff0000000000001L));
+                testEqualInDoubleArray(size, index, Double.longBitsToDouble(0x7ff8000000000000L), 1024);
+            }
+        }
+    }
+
+    public static boolean testFloatArrayWithPEASnippet0() {
+        return Arrays.equals(new float[]{0.0f}, new float[]{-0.0f});
+    }
+
+    public static boolean testFloatArrayWithPEASnippet1() {
+        return Arrays.equals(new float[]{Float.intBitsToFloat(0x7fc00000)}, new float[]{Float.intBitsToFloat(0x7fc00000)});
+    }
+
+    public static boolean testFloatArrayWithPEASnippet2() {
+        return Arrays.equals(new float[]{Float.intBitsToFloat(0x7fc00000)}, new float[]{Float.intBitsToFloat(0x7f800001)});
+
+    }
+
+    @Test
+    public void testFloatArrayWithPEA() {
+        test("testFloatArrayWithPEASnippet0");
+        test("testFloatArrayWithPEASnippet1");
+        test("testFloatArrayWithPEASnippet2");
+    }
+
+    public static boolean testDoubleArrayWithPEASnippet0() {
+        return Arrays.equals(new double[]{0.0d}, new double[]{-0.0d});
+    }
+
+    public static boolean testDoubleArrayWithPEASnippet1() {
+        return Arrays.equals(new double[]{Double.longBitsToDouble(0x7ff8000000000000L)}, new double[]{Double.longBitsToDouble(0x7ff8000000000000L)});
+    }
+
+    public static boolean testDoubleArrayWithPEASnippet2() {
+        return Arrays.equals(new double[]{Double.longBitsToDouble(0x7ff8000000000000L)}, new double[]{Double.longBitsToDouble(0x7ff0000000000001L)});
+    }
+
+    @Test
+    public void testDoubleArrayWithPEA() {
+        test("testDoubleArrayWithPEASnippet0");
+        test("testDoubleArrayWithPEASnippet1");
+        test("testDoubleArrayWithPEASnippet2");
+    }
+
+    public static final float[] FLOAT_ARRAY1 = new float[]{0.0f};
+    public static final float[] FLOAT_ARRAY2 = new float[]{-0.0f};
+    public static final float[] FLOAT_ARRAY3 = new float[]{Float.intBitsToFloat(0x7fc00000)};
+    public static final float[] FLOAT_ARRAY4 = new float[]{Float.intBitsToFloat(0x7f800001)};
+
+    public static final double[] DOUBLE_ARRAY1 = new double[]{0.0d};
+    public static final double[] DOUBLE_ARRAY2 = new double[]{-0.0d};
+    public static final double[] DOUBLE_ARRAY3 = new double[]{Double.longBitsToDouble(0x7ff8000000000000L)};
+    public static final double[] DOUBLE_ARRAY4 = new double[]{Double.longBitsToDouble(0x7ff0000000000001L)};
+
+    public static boolean testStableFloatArraySnippet0() {
+        return Arrays.equals(FLOAT_ARRAY1, FLOAT_ARRAY2);
+    }
+
+    public static boolean testStableFloatArraySnippet1() {
+        return Arrays.equals(FLOAT_ARRAY1, FLOAT_ARRAY2);
+    }
+
+    public static boolean testStableDoubleArraySnippet0() {
+        return Arrays.equals(DOUBLE_ARRAY1, DOUBLE_ARRAY2);
+    }
+
+    public static boolean testStableDoubleArraySnippet1() {
+        return Arrays.equals(DOUBLE_ARRAY3, DOUBLE_ARRAY4);
+    }
+
+    public void testStableArray(String methodName) {
+        ResolvedJavaMethod method = getResolvedJavaMethod(methodName);
+        Result expected = executeExpected(method, null);
+
+        StructuredGraph graph = parseEager(method, AllowAssumptions.YES);
+
+        for (ConstantNode constantNode : graph.getNodes().filter(ConstantNode.class).snapshot()) {
+            if (getConstantReflection().readArrayLength(constantNode.asJavaConstant()) != null) {
+                ConstantNode newConstantNode = ConstantNode.forConstant(constantNode.asJavaConstant(), 1, true, getMetaAccess());
+                newConstantNode = graph.unique(newConstantNode);
+                constantNode.replaceAndDelete(newConstantNode);
+            }
+        }
+
+        CompilationResult result = compile(method, graph);
+        InstalledCode code = addMethod(graph.getDebug(), method, result);
+
+        Result actual;
+
+        try {
+            actual = new Result(code.executeVarargs(), null);
+        } catch (Exception e) {
+            actual = new Result(null, e);
+        }
+
+        assertEquals(expected, actual);
+    }
+
+    @Test
+    public void testStableArray() {
+        testStableArray("testStableFloatArraySnippet0");
+        testStableArray("testStableFloatArraySnippet1");
+        testStableArray("testStableDoubleArraySnippet0");
+        testStableArray("testStableDoubleArraySnippet1");
+    }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.test/src/org/graalvm/compiler/replacements/test/UnsafeBooleanAccessTest.java	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.graalvm.compiler.replacements.test;
+
+import java.lang.reflect.Field;
+
+import org.graalvm.compiler.core.test.GraalCompilerTest;
+import org.junit.Test;
+
+public class UnsafeBooleanAccessTest extends GraalCompilerTest {
+
+    private static short onHeapMemory;
+
+    private static final Object onHeapMemoryBase;
+    private static final long onHeapMemoryOffset;
+
+    static {
+        try {
+            Field staticField = UnsafeBooleanAccessTest.class.getDeclaredField("onHeapMemory");
+            onHeapMemoryBase = UNSAFE.staticFieldBase(staticField);
+            onHeapMemoryOffset = UNSAFE.staticFieldOffset(staticField);
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public static boolean testGetBooleanSnippet() {
+        UNSAFE.putShort(onHeapMemoryBase, onHeapMemoryOffset, (short) 0x0204);
+        return UNSAFE.getBoolean(onHeapMemoryBase, onHeapMemoryOffset);
+    }
+
+    @Test
+    public void testGetBoolean() {
+        test("testGetBooleanSnippet");
+    }
+
+    public static short testPutBooleanSnippet() {
+        UNSAFE.putShort(onHeapMemoryBase, onHeapMemoryOffset, (short) 0x0204);
+        boolean bool = UNSAFE.getBoolean(onHeapMemoryBase, onHeapMemoryOffset);
+        UNSAFE.putBoolean(onHeapMemoryBase, onHeapMemoryOffset, bool);
+        return onHeapMemory;
+    }
+
+    @Test
+    public void testPutBoolean() {
+        test("testPutBooleanSnippet");
+    }
+
+    public static boolean testAndBooleanSnippet() {
+        UNSAFE.putShort(onHeapMemoryBase, onHeapMemoryOffset, (short) 0x0204);
+        boolean bool0 = UNSAFE.getBoolean(onHeapMemoryBase, onHeapMemoryOffset);
+        boolean bool1 = UNSAFE.getBoolean(onHeapMemoryBase, onHeapMemoryOffset + 1);
+        return bool0 & bool1;
+    }
+
+    @Test
+    public void testAndBoolean() {
+        test("testAndBooleanSnippet");
+    }
+
+}
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/DefaultJavaLoweringProvider.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/DefaultJavaLoweringProvider.java	Thu Aug 24 01:13:04 2017 +0000
@@ -59,8 +59,10 @@
 import org.graalvm.compiler.nodes.StructuredGraph;
 import org.graalvm.compiler.nodes.ValueNode;
 import org.graalvm.compiler.nodes.calc.AddNode;
+import org.graalvm.compiler.nodes.calc.ConditionalNode;
 import org.graalvm.compiler.nodes.calc.IntegerBelowNode;
 import org.graalvm.compiler.nodes.calc.IntegerConvertNode;
+import org.graalvm.compiler.nodes.calc.IntegerEqualsNode;
 import org.graalvm.compiler.nodes.calc.IsNullNode;
 import org.graalvm.compiler.nodes.calc.LeftShiftNode;
 import org.graalvm.compiler.nodes.calc.NarrowNode;
@@ -577,7 +579,7 @@
         } else {
             memoryRead.setGuard(guard);
         }
-        ValueNode readValue = implicitLoadConvert(graph, readKind, memoryRead, compressible);
+        ValueNode readValue = performBooleanCoercionIfNecessary(implicitLoadConvert(graph, readKind, memoryRead, compressible), readKind);
         load.replaceAtUsages(readValue);
         return memoryRead;
     }
@@ -592,11 +594,20 @@
         // An unsafe read must not float otherwise it may float above
         // a test guaranteeing the read is safe.
         memoryRead.setForceFixed(true);
-        ValueNode readValue = implicitLoadConvert(graph, readKind, memoryRead, false);
+        ValueNode readValue = performBooleanCoercionIfNecessary(implicitLoadConvert(graph, readKind, memoryRead, false), readKind);
         load.replaceAtUsages(readValue);
         graph.replaceFixedWithFixed(load, memoryRead);
     }
 
+    private static ValueNode performBooleanCoercionIfNecessary(ValueNode readValue, JavaKind readKind) {
+        if (readKind == JavaKind.Boolean) {
+            StructuredGraph graph = readValue.graph();
+            IntegerEqualsNode eq = graph.addOrUnique(new IntegerEqualsNode(readValue, ConstantNode.forInt(0, graph)));
+            return graph.addOrUnique(new ConditionalNode(eq, ConstantNode.forBoolean(false, graph), ConstantNode.forBoolean(true, graph)));
+        }
+        return readValue;
+    }
+
     protected void lowerUnsafeStoreNode(RawStoreNode store) {
         StructuredGraph graph = store.graph();
         boolean compressible = store.value().getStackKind() == JavaKind.Object;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/StandardGraphBuilderPlugins.java	Thu Aug 24 01:13:04 2017 +0000
@@ -189,9 +189,7 @@
         r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", short[].class, short[].class);
         r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", char[].class, char[].class);
         r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", int[].class, int[].class);
-        r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", float[].class, float[].class);
         r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", long[].class, long[].class);
-        r.registerMethodSubstitution(ArraysSubstitutions.class, "equals", double[].class, double[].class);
     }
 
     private static void registerArrayPlugins(InvocationPlugins plugins, BytecodeProvider bytecodeProvider) {
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/ArrayEqualsNode.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/ArrayEqualsNode.java	Thu Aug 24 01:13:04 2017 +0000
@@ -95,11 +95,16 @@
         return length;
     }
 
+    private static boolean isNaNFloat(JavaConstant constant) {
+        JavaKind kind = constant.getJavaKind();
+        return (kind == JavaKind.Float && Float.isNaN(constant.asFloat())) || (kind == JavaKind.Double && Double.isNaN(constant.asDouble()));
+    }
+
     private static boolean arrayEquals(ConstantReflectionProvider constantReflection, JavaConstant a, JavaConstant b, int len) {
         for (int i = 0; i < len; i++) {
             JavaConstant aElem = constantReflection.readArrayElement(a, i);
             JavaConstant bElem = constantReflection.readArrayElement(b, i);
-            if (!constantReflection.constantEquals(aElem, bElem)) {
+            if (!constantReflection.constantEquals(aElem, bElem) && !(isNaNFloat(aElem) && isNaNFloat(bElem))) {
                 return false;
             }
         }
@@ -145,8 +150,28 @@
                     ValueNode entry1 = tool.getEntry(virtual1, i);
                     ValueNode entry2 = tool.getEntry(virtual2, i);
                     if (entry1 != entry2) {
-                        // the contents might be different
-                        allEqual = false;
+                        if (entry1 instanceof ConstantNode && entry2 instanceof ConstantNode) {
+                            // Float NaN constants are different constant nodes but treated as
+                            // equal in Arrays.equals([F[F) or Arrays.equals([D[D).
+                            if (entry1.getStackKind() == JavaKind.Float && entry2.getStackKind() == JavaKind.Float) {
+                                float value1 = ((JavaConstant) ((ConstantNode) entry1).asConstant()).asFloat();
+                                float value2 = ((JavaConstant) ((ConstantNode) entry2).asConstant()).asFloat();
+                                if (Float.floatToIntBits(value1) != Float.floatToIntBits(value2)) {
+                                    allEqual = false;
+                                }
+                            } else if (entry1.getStackKind() == JavaKind.Double && entry2.getStackKind() == JavaKind.Double) {
+                                double value1 = ((JavaConstant) ((ConstantNode) entry1).asConstant()).asDouble();
+                                double value2 = ((JavaConstant) ((ConstantNode) entry2).asConstant()).asDouble();
+                                if (Double.doubleToLongBits(value1) != Double.doubleToLongBits(value2)) {
+                                    allEqual = false;
+                                }
+                            } else {
+                                allEqual = false;
+                            }
+                        } else {
+                            // the contents might be different
+                            allEqual = false;
+                        }
                     }
                     if (entry1.stamp().alwaysDistinct(entry2.stamp())) {
                         // the contents are different
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/ExplodeLoopNode.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/ExplodeLoopNode.java	Thu Aug 24 01:13:04 2017 +0000
@@ -57,7 +57,7 @@
             for (Node n : currentNext.cfgSuccessors()) {
                 succs.add(n);
             }
-            if (succs.size() == 1) {
+            if (succs.size() == 1 && succs.get(0) != currentNext) {
                 currentNext = succs.get(0);
             } else {
                 return null;
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/arithmetic/IntegerMulHighNode.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/arithmetic/IntegerMulHighNode.java	Thu Aug 24 01:13:04 2017 +0000
@@ -25,69 +25,28 @@
 import static org.graalvm.compiler.nodeinfo.NodeCycles.CYCLES_2;
 import static org.graalvm.compiler.nodeinfo.NodeSize.SIZE_2;
 
-import java.util.function.BiFunction;
-
-import org.graalvm.compiler.core.common.type.IntegerStamp;
-import org.graalvm.compiler.core.common.type.Stamp;
-import org.graalvm.compiler.core.common.type.StampFactory;
+import org.graalvm.compiler.core.common.type.ArithmeticOpTable;
+import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.MulHigh;
 import org.graalvm.compiler.graph.NodeClass;
+import org.graalvm.compiler.graph.spi.Canonicalizable;
 import org.graalvm.compiler.graph.spi.CanonicalizerTool;
 import org.graalvm.compiler.lir.gen.ArithmeticLIRGeneratorTool;
 import org.graalvm.compiler.nodeinfo.NodeInfo;
 import org.graalvm.compiler.nodes.ConstantNode;
 import org.graalvm.compiler.nodes.ValueNode;
-import org.graalvm.compiler.nodes.calc.BinaryNode;
-import org.graalvm.compiler.nodes.spi.ArithmeticLIRLowerable;
+import org.graalvm.compiler.nodes.calc.BinaryArithmeticNode;
 import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
 
-import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.Constant;
+import jdk.vm.ci.meta.PrimitiveConstant;
 import jdk.vm.ci.meta.Value;
 
 @NodeInfo(shortName = "*H", cycles = CYCLES_2, size = SIZE_2)
-public final class IntegerMulHighNode extends BinaryNode implements ArithmeticLIRLowerable {
+public final class IntegerMulHighNode extends BinaryArithmeticNode<MulHigh> implements Canonicalizable.BinaryCommutative<ValueNode> {
     public static final NodeClass<IntegerMulHighNode> TYPE = NodeClass.create(IntegerMulHighNode.class);
 
     public IntegerMulHighNode(ValueNode x, ValueNode y) {
-        this((IntegerStamp) x.stamp().unrestricted(), x, y);
-    }
-
-    public IntegerMulHighNode(IntegerStamp stamp, ValueNode x, ValueNode y) {
-        super(TYPE, stamp, x, y);
-    }
-
-    /**
-     * Determines the minimum and maximum result of this node for the given inputs and returns the
-     * result of the given BiFunction on the minimum and maximum values.
-     */
-    private <T> T processExtremes(Stamp forX, Stamp forY, BiFunction<Long, Long, T> op) {
-        IntegerStamp xStamp = (IntegerStamp) forX;
-        IntegerStamp yStamp = (IntegerStamp) forY;
-
-        JavaKind kind = getStackKind();
-        assert kind == JavaKind.Int || kind == JavaKind.Long;
-        long[] xExtremes = {xStamp.lowerBound(), xStamp.upperBound()};
-        long[] yExtremes = {yStamp.lowerBound(), yStamp.upperBound()};
-        long min = Long.MAX_VALUE;
-        long max = Long.MIN_VALUE;
-        for (long a : xExtremes) {
-            for (long b : yExtremes) {
-                long result = kind == JavaKind.Int ? multiplyHigh((int) a, (int) b) : multiplyHigh(a, b);
-                min = Math.min(min, result);
-                max = Math.max(max, result);
-            }
-        }
-        return op.apply(min, max);
-    }
-
-    @Override
-    public Stamp foldStamp(Stamp stampX, Stamp stampY) {
-        return processExtremes(stampX, stampY, (min, max) -> StampFactory.forInteger(getStackKind(), min, max));
-    }
-
-    @SuppressWarnings("cast")
-    @Override
-    public ValueNode canonical(CanonicalizerTool tool, ValueNode forX, ValueNode forY) {
-        return processExtremes(forX.stamp(), forY.stamp(), (min, max) -> min == (long) max ? ConstantNode.forIntegerKind(getStackKind(), min) : this);
+        super(TYPE, ArithmeticOpTable::getMulHigh, x, y);
     }
 
     @Override
@@ -97,29 +56,35 @@
         nodeValueMap.setResult(this, gen.emitMulHigh(a, b));
     }
 
-    public static int multiplyHigh(int x, int y) {
-        long r = (long) x * (long) y;
-        return (int) (r >> 32);
+    @Override
+    public ValueNode canonical(CanonicalizerTool tool, ValueNode forX, ValueNode forY) {
+        ValueNode ret = super.canonical(tool, forX, forY);
+        if (ret != this) {
+            return ret;
+        }
+
+        if (forX.isConstant() && !forY.isConstant()) {
+            // we try to swap and canonicalize
+            ValueNode improvement = canonical(tool, forY, forX);
+            if (improvement != this) {
+                return improvement;
+            }
+            // if this fails we only swap
+            return new IntegerMulHighNode(forY, forX);
+        }
+        return canonical(this, forY);
     }
 
-    public static long multiplyHigh(long x, long y) {
-        // Checkstyle: stop
-        long x0, y0, z0;
-        long x1, y1, z1, z2, t;
-        // Checkstyle: resume
-
-        x0 = x & 0xFFFFFFFFL;
-        x1 = x >> 32;
-
-        y0 = y & 0xFFFFFFFFL;
-        y1 = y >> 32;
-
-        z0 = x0 * y0;
-        t = x1 * y0 + (z0 >>> 32);
-        z1 = t & 0xFFFFFFFFL;
-        z2 = t >> 32;
-        z1 += x0 * y1;
-
-        return x1 * y1 + z2 + (z1 >> 32);
+    private static ValueNode canonical(IntegerMulHighNode self, ValueNode forY) {
+        if (forY.isConstant()) {
+            Constant c = forY.asConstant();
+            if (c instanceof PrimitiveConstant && ((PrimitiveConstant) c).getJavaKind().isNumericInteger()) {
+                long i = ((PrimitiveConstant) c).asLong();
+                if (i == 0 || i == 1) {
+                    return ConstantNode.forIntegerStamp(self.stamp(), 0);
+                }
+            }
+        }
+        return self;
     }
 }
--- a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/arithmetic/UnsignedMulHighNode.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements/src/org/graalvm/compiler/replacements/nodes/arithmetic/UnsignedMulHighNode.java	Thu Aug 24 01:13:04 2017 +0000
@@ -25,86 +25,28 @@
 import static org.graalvm.compiler.nodeinfo.NodeCycles.CYCLES_2;
 import static org.graalvm.compiler.nodeinfo.NodeSize.SIZE_2;
 
-import java.util.function.BiFunction;
-
-import org.graalvm.compiler.core.common.type.IntegerStamp;
-import org.graalvm.compiler.core.common.type.Stamp;
-import org.graalvm.compiler.core.common.type.StampFactory;
+import org.graalvm.compiler.core.common.type.ArithmeticOpTable;
+import org.graalvm.compiler.core.common.type.ArithmeticOpTable.BinaryOp.UMulHigh;
 import org.graalvm.compiler.graph.NodeClass;
+import org.graalvm.compiler.graph.spi.Canonicalizable;
 import org.graalvm.compiler.graph.spi.CanonicalizerTool;
 import org.graalvm.compiler.lir.gen.ArithmeticLIRGeneratorTool;
 import org.graalvm.compiler.nodeinfo.NodeInfo;
 import org.graalvm.compiler.nodes.ConstantNode;
 import org.graalvm.compiler.nodes.ValueNode;
-import org.graalvm.compiler.nodes.calc.BinaryNode;
-import org.graalvm.compiler.nodes.spi.ArithmeticLIRLowerable;
+import org.graalvm.compiler.nodes.calc.BinaryArithmeticNode;
 import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
 
-import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.Constant;
+import jdk.vm.ci.meta.PrimitiveConstant;
 import jdk.vm.ci.meta.Value;
 
 @NodeInfo(shortName = "|*H|", cycles = CYCLES_2, size = SIZE_2)
-public final class UnsignedMulHighNode extends BinaryNode implements ArithmeticLIRLowerable {
-
+public final class UnsignedMulHighNode extends BinaryArithmeticNode<UMulHigh> implements Canonicalizable.BinaryCommutative<ValueNode> {
     public static final NodeClass<UnsignedMulHighNode> TYPE = NodeClass.create(UnsignedMulHighNode.class);
 
     public UnsignedMulHighNode(ValueNode x, ValueNode y) {
-        this((IntegerStamp) x.stamp().unrestricted(), x, y);
-    }
-
-    public UnsignedMulHighNode(IntegerStamp stamp, ValueNode x, ValueNode y) {
-        super(TYPE, stamp, x, y);
-    }
-
-    private static long[] getUnsignedExtremes(IntegerStamp stamp) {
-        if (stamp.lowerBound() < 0 && stamp.upperBound() >= 0) {
-            /*
-             * If -1 and 0 are both in the signed range, then we can't say anything about the
-             * unsigned range, so we have to return [0, MAX_UNSIGNED].
-             */
-            return new long[]{0, -1L};
-        } else {
-            return new long[]{stamp.lowerBound(), stamp.upperBound()};
-        }
-    }
-
-    /**
-     * Determines the minimum and maximum result of this node for the given inputs and returns the
-     * result of the given BiFunction on the minimum and maximum values. Note that the minima and
-     * maxima are calculated using signed min/max functions, while the values themselves are
-     * unsigned.
-     */
-    private <T> T processExtremes(Stamp forX, Stamp forY, BiFunction<Long, Long, T> op) {
-        IntegerStamp xStamp = (IntegerStamp) forX;
-        IntegerStamp yStamp = (IntegerStamp) forY;
-
-        JavaKind kind = getStackKind();
-        assert kind == JavaKind.Int || kind == JavaKind.Long;
-        long[] xExtremes = getUnsignedExtremes(xStamp);
-        long[] yExtremes = getUnsignedExtremes(yStamp);
-        long min = Long.MAX_VALUE;
-        long max = Long.MIN_VALUE;
-        for (long a : xExtremes) {
-            for (long b : yExtremes) {
-                long result = kind == JavaKind.Int ? multiplyHighUnsigned((int) a, (int) b) : multiplyHighUnsigned(a, b);
-                min = Math.min(min, result);
-                max = Math.max(max, result);
-            }
-        }
-        return op.apply(min, max);
-    }
-
-    @SuppressWarnings("cast")
-    @Override
-    public Stamp foldStamp(Stamp stampX, Stamp stampY) {
-        // if min is negative, then the value can reach into the unsigned range
-        return processExtremes(stampX, stampY, (min, max) -> (min == (long) max || min >= 0) ? StampFactory.forInteger(getStackKind(), min, max) : StampFactory.forKind(getStackKind()));
-    }
-
-    @SuppressWarnings("cast")
-    @Override
-    public ValueNode canonical(CanonicalizerTool tool, ValueNode forX, ValueNode forY) {
-        return processExtremes(forX.stamp(), forY.stamp(), (min, max) -> min == (long) max ? ConstantNode.forIntegerKind(getStackKind(), min) : this);
+        super(TYPE, ArithmeticOpTable::getUMulHigh, x, y);
     }
 
     @Override
@@ -114,31 +56,35 @@
         nodeValueMap.setResult(this, gen.emitUMulHigh(a, b));
     }
 
-    public static int multiplyHighUnsigned(int x, int y) {
-        long xl = x & 0xFFFFFFFFL;
-        long yl = y & 0xFFFFFFFFL;
-        long r = xl * yl;
-        return (int) (r >> 32);
+    @Override
+    public ValueNode canonical(CanonicalizerTool tool, ValueNode forX, ValueNode forY) {
+        ValueNode ret = super.canonical(tool, forX, forY);
+        if (ret != this) {
+            return ret;
+        }
+
+        if (forX.isConstant() && !forY.isConstant()) {
+            // we try to swap and canonicalize
+            ValueNode improvement = canonical(tool, forY, forX);
+            if (improvement != this) {
+                return improvement;
+            }
+            // if this fails we only swap
+            return new UnsignedMulHighNode(forY, forX);
+        }
+        return canonical(this, forY);
     }
 
-    public static long multiplyHighUnsigned(long x, long y) {
-        // Checkstyle: stop
-        long x0, y0, z0;
-        long x1, y1, z1, z2, t;
-        // Checkstyle: resume
-
-        x0 = x & 0xFFFFFFFFL;
-        x1 = x >>> 32;
-
-        y0 = y & 0xFFFFFFFFL;
-        y1 = y >>> 32;
-
-        z0 = x0 * y0;
-        t = x1 * y0 + (z0 >>> 32);
-        z1 = t & 0xFFFFFFFFL;
-        z2 = t >>> 32;
-        z1 += x0 * y1;
-
-        return x1 * y1 + z2 + (z1 >>> 32);
+    private static ValueNode canonical(UnsignedMulHighNode self, ValueNode forY) {
+        if (forY.isConstant()) {
+            Constant c = forY.asConstant();
+            if (c instanceof PrimitiveConstant && ((PrimitiveConstant) c).getJavaKind().isNumericInteger()) {
+                long i = ((PrimitiveConstant) c).asLong();
+                if (i == 0 || i == 1) {
+                    return ConstantNode.forIntegerStamp(self.stamp(), 0);
+                }
+            }
+        }
+        return self;
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.micro.benchmarks/src/micro/benchmarks/TestJMHBlackbox.java	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package micro.benchmarks;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Warmup;
+
+@Warmup(iterations = 1)
+@Measurement(iterations = 1)
+@Fork(1)
+/**
+ * This dummy class is used to verify that the JMH microbenchmarking environment is set up properly.
+ */
+public class TestJMHBlackbox {
+
+    @Benchmark
+    public void testJMH() {
+        // This method was intentionally left blank.
+    }
+
+}
--- a/src/os/bsd/vm/os_bsd.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os/bsd/vm/os_bsd.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -952,7 +952,7 @@
   if (now <= prev) {
     return prev;   // same or retrograde time;
   }
-  const uint64_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&Bsd::_max_abstime, prev);
+  const uint64_t obsv = Atomic::cmpxchg(now, &Bsd::_max_abstime, prev);
   assert(obsv >= prev, "invariant");   // Monotonicity
   // If the CAS succeeded then we're done and return "now".
   // If the CAS failed and the observed value "obsv" is >= now then
--- a/src/os/linux/vm/os_linux.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os/linux/vm/os_linux.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -1747,9 +1747,9 @@
     {EM_SPARCV9,     EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"},
     {EM_PPC,         EM_PPC,     ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"},
 #if defined(VM_LITTLE_ENDIAN)
-    {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2LSB, (char*)"Power PC 64"},
+    {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2LSB, (char*)"Power PC 64 LE"},
 #else
-    {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64 LE"},
+    {EM_PPC64,       EM_PPC64,   ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64"},
 #endif
     {EM_ARM,         EM_ARM,     ELFCLASS32,   ELFDATA2LSB, (char*)"ARM"},
     {EM_S390,        EM_S390,    ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"},
--- a/src/os/solaris/vm/os_solaris.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os/solaris/vm/os_solaris.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -1197,7 +1197,7 @@
   if (now <= prev) {
     return prev;   // same or retrograde time;
   }
-  const hrtime_t obsv = Atomic::cmpxchg(now, (volatile jlong*)&max_hrtime, prev);
+  const hrtime_t obsv = Atomic::cmpxchg(now, &max_hrtime, prev);
   assert(obsv >= prev, "invariant");   // Monotonicity
   // If the CAS succeeded then we're done and return "now".
   // If the CAS failed and the observed value "obsv" is >= now then
--- a/src/os_cpu/aix_ppc/vm/atomic_aix_ppc.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/aix_ppc/vm/atomic_aix_ppc.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -306,8 +306,13 @@
   }
 }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte Atomic::cmpxchg(jbyte exchange_value, volatile jbyte* dest, jbyte compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_CAST(1 == sizeof(T));
 
   // Note that cmpxchg guarantees a two-way memory barrier across
   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
@@ -368,16 +373,22 @@
 
   cmpxchg_post_membar(order);
 
-  return (jbyte)(unsigned char)old_value;
+  return PrimitiveConversions::cast<T>((unsigned char)old_value);
 }
 
-inline jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_CAST(4 == sizeof(T));
 
   // Note that cmpxchg guarantees a two-way memory barrier across
   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
   // specified otherwise (see atomic.hpp).
 
-  unsigned int old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   cmpxchg_pre_membar(order);
@@ -412,16 +423,22 @@
 
   cmpxchg_post_membar(order);
 
-  return (jint) old_value;
+  return old_value;
 }
 
-inline jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong* dest, jlong compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_CAST(8 == sizeof(T));
 
   // Note that cmpxchg guarantees a two-way memory barrier across
   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
   // specified otherwise (see atomic.hpp).
 
-  long old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   cmpxchg_pre_membar(order);
@@ -456,15 +473,7 @@
 
   cmpxchg_post_membar(order);
 
-  return (jlong) old_value;
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
-
-inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
+  return old_value;
 }
 
 #undef strasm_sync
--- a/src/os_cpu/bsd_x86/vm/atomic_bsd_x86.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/bsd_x86/vm/atomic_bsd_x86.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -25,8 +25,6 @@
 #ifndef OS_CPU_BSD_X86_VM_ATOMIC_BSD_X86_HPP
 #define OS_CPU_BSD_X86_VM_ATOMIC_BSD_X86_HPP
 
-#include "runtime/os.hpp"
-
 // Implementation of class atomic
 
 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
@@ -81,8 +79,13 @@
   return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
 }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte    Atomic::cmpxchg    (jbyte    exchange_value, volatile jbyte*    dest, jbyte    compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order /* order */) const {
+  STATIC_ASSERT(1 == sizeof(T));
   __asm__ volatile (  "lock cmpxchgb %1,(%3)"
                     : "=a" (exchange_value)
                     : "q" (exchange_value), "a" (compare_value), "r" (dest)
@@ -90,7 +93,13 @@
   return exchange_value;
 }
 
-inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order /* order */) const {
+  STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile (  "lock cmpxchgl %1,(%3)"
                     : "=a" (exchange_value)
                     : "r" (exchange_value), "a" (compare_value), "r" (dest)
@@ -137,7 +146,13 @@
   return exchange_value;
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order /* order */) const {
+  STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ (  "lock cmpxchgq %1,(%3)"
                         : "=a" (exchange_value)
                         : "r" (exchange_value), "a" (compare_value), "r" (dest)
@@ -145,14 +160,6 @@
   return exchange_value;
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
-
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
 
 #else // !AMD64
@@ -184,16 +191,14 @@
   void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst);
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
-  return _Atomic_cmpxchg_long(exchange_value, dest, compare_value, os::is_MP());
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
 }
 
 inline jlong Atomic::load(const volatile jlong* src) {
--- a/src/os_cpu/bsd_zero/vm/atomic_bsd_zero.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/bsd_zero/vm/atomic_bsd_zero.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -57,9 +57,9 @@
 /* Perform an atomic compare and swap: if the current value of `*PTR'
    is OLDVAL, then write NEWVAL into `*PTR'.  Return the contents of
    `*PTR' before the operation.*/
-static inline int m68k_compare_and_swap(volatile int *ptr,
-                                        int oldval,
-                                        int newval) {
+static inline int m68k_compare_and_swap(int newval,
+                                        volatile int *ptr,
+                                        int oldval) {
   for (;;) {
       int prev = *ptr;
       if (prev != oldval)
@@ -118,9 +118,9 @@
 /* Perform an atomic compare and swap: if the current value of `*PTR'
    is OLDVAL, then write NEWVAL into `*PTR'.  Return the contents of
    `*PTR' before the operation.*/
-static inline int arm_compare_and_swap(volatile int *ptr,
-                                       int oldval,
-                                       int newval) {
+static inline int arm_compare_and_swap(int newval,
+                                       volatile int *ptr,
+                                       int oldval) {
   for (;;) {
       int prev = *ptr;
       if (prev != oldval)
@@ -267,55 +267,38 @@
                            (volatile intptr_t*) dest);
 }
 
-inline jint Atomic::cmpxchg(jint exchange_value,
-                            volatile jint* dest,
-                            jint compare_value,
-                            cmpxchg_memory_order order) {
+// No direct support for cmpxchg of bytes; emulate using int.
+template<>
+struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
+
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_CAST(4 == sizeof(T));
 #ifdef ARM
-  return arm_compare_and_swap(dest, compare_value, exchange_value);
+  return cmpxchg_using_helper<int>(arm_compare_and_swap, exchange_value, dest, compare_value);
 #else
 #ifdef M68K
-  return m68k_compare_and_swap(dest, compare_value, exchange_value);
+  return cmpxchg_using_helper<int>(m68k_compare_and_swap, exchange_value, dest, compare_value);
 #else
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 #endif // M68K
 #endif // ARM
 }
 
-inline jlong Atomic::cmpxchg(jlong exchange_value,
-                             volatile jlong* dest,
-                             jlong compare_value,
-                             cmpxchg_memory_order order) {
-
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_CAST(8 == sizeof(T));
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,
-                                    volatile intptr_t* dest,
-                                    intptr_t compare_value,
-                                    cmpxchg_memory_order order) {
-#ifdef ARM
-  return arm_compare_and_swap(dest, compare_value, exchange_value);
-#else
-#ifdef M68K
-  return m68k_compare_and_swap(dest, compare_value, exchange_value);
-#else
-  return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
-#endif // M68K
-#endif // ARM
-}
-
-inline void* Atomic::cmpxchg_ptr(void* exchange_value,
-                                 volatile void* dest,
-                                 void* compare_value,
-                                 cmpxchg_memory_order order) {
-
-  return (void *) cmpxchg_ptr((intptr_t) exchange_value,
-                              (volatile intptr_t*) dest,
-                              (intptr_t) compare_value,
-                              order);
-}
-
 inline jlong Atomic::load(const volatile jlong* src) {
   volatile jlong dest;
   os::atomic_copy64(src, &dest);
--- a/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -85,9 +85,13 @@
                            (volatile intptr_t*) dest);
 }
 
-template <typename T> T generic_cmpxchg(T exchange_value, volatile T* dest,
-                                        T compare_value, cmpxchg_memory_order order)
-{
+template<size_t byte_size>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
+                                                        T volatile* dest,
+                                                        T compare_value,
+                                                        cmpxchg_memory_order order) const {
+  STATIC_ASSERT(byte_size == sizeof(T));
   if (order == memory_order_relaxed) {
     T value = compare_value;
     __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false,
@@ -98,17 +102,6 @@
   }
 }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte Atomic::cmpxchg (jbyte exchange_value, volatile jbyte* dest, jbyte compare_value, cmpxchg_memory_order order)
-{
-  return generic_cmpxchg(exchange_value, dest, compare_value, order);
-}
-
-inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value, cmpxchg_memory_order order)
-{
-  return generic_cmpxchg(exchange_value, dest, compare_value, order);
-}
-
 inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
 inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
 
@@ -139,24 +132,6 @@
   return res;
 }
 
-inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value, cmpxchg_memory_order order)
-{
-  return generic_cmpxchg(exchange_value, dest, compare_value, order);
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order)
-{
-  return generic_cmpxchg(exchange_value, dest, compare_value, order);
-}
-
-inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value, cmpxchg_memory_order order)
-{
-  return (void *) cmpxchg_ptr((intptr_t) exchange_value,
-                              (volatile intptr_t*) dest,
-                              (intptr_t) compare_value,
-                              order);
-}
-
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
 
 #endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_HPP
--- a/src/os_cpu/linux_arm/vm/atomic_linux_arm.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/linux_arm/vm/atomic_linux_arm.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -200,9 +200,38 @@
 
 // The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering
 
-inline jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value, cmpxchg_memory_order order) {
+// No direct support for cmpxchg of bytes; emulate using int.
+template<>
+struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
+
+#ifndef AARCH64
+
+inline jint reorder_cmpxchg_func(jint exchange_value,
+                                 jint volatile* dest,
+                                 jint compare_value) {
+  // Warning:  Arguments are swapped to avoid moving them for kernel call
+  return (*os::atomic_cmpxchg_func)(compare_value, exchange_value, dest);
+}
+
+inline jlong reorder_cmpxchg_long_func(jlong exchange_value,
+                                       jlong volatile* dest,
+                                       jlong compare_value) {
+  assert(VM_Version::supports_cx8(), "Atomic compare and exchange jlong not supported on this architecture!");
+  // Warning:  Arguments are swapped to avoid moving them for kernel call
+  return (*os::atomic_cmpxchg_long_func)(compare_value, exchange_value, dest);
+}
+
+#endif // !AARCH64
+
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
 #ifdef AARCH64
-  jint rv;
+  T rv;
   int tmp;
   __asm__ volatile(
     "1:\n\t"
@@ -220,14 +249,19 @@
     : "memory");
   return rv;
 #else
-  // Warning:  Arguments are swapped to avoid moving them for kernel call
-  return (*os::atomic_cmpxchg_func)(compare_value, exchange_value, dest);
+  return cmpxchg_using_helper<jint>(reorder_cmpxchg_func, exchange_value, dest, compare_value);
 #endif
 }
 
-inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
 #ifdef AARCH64
-  jlong rv;
+  T rv;
   int tmp;
   __asm__ volatile(
     "1:\n\t"
@@ -245,21 +279,8 @@
     : "memory");
   return rv;
 #else
-  assert(VM_Version::supports_cx8(), "Atomic compare and exchange jlong not supported on this architecture!");
-  return (*os::atomic_cmpxchg_long_func)(compare_value, exchange_value, dest);
+  return cmpxchg_using_helper<jlong>(reorder_cmpxchg_long_func, exchange_value, dest, compare_value);
 #endif
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-#ifdef AARCH64
-  return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-#else
-  return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
-#endif
-}
-
-inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value, order);
-}
-
 #endif // OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP
--- a/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/linux_ppc/vm/atomic_linux_ppc.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -306,8 +306,13 @@
   }
 }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte Atomic::cmpxchg(jbyte exchange_value, volatile jbyte* dest, jbyte compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(1 == sizeof(T));
 
   // Note that cmpxchg guarantees a two-way memory barrier across
   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
@@ -368,16 +373,22 @@
 
   cmpxchg_post_membar(order);
 
-  return (jbyte)(unsigned char)old_value;
+  return PrimitiveConversions::cast<T>((unsigned char)old_value);
 }
 
-inline jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
 
   // Note that cmpxchg guarantees a two-way memory barrier across
   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
   // specified otherwise (see atomic.hpp).
 
-  unsigned int old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   cmpxchg_pre_membar(order);
@@ -412,16 +423,22 @@
 
   cmpxchg_post_membar(order);
 
-  return (jint) old_value;
+  return old_value;
 }
 
-inline jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong* dest, jlong compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
 
   // Note that cmpxchg guarantees a two-way memory barrier across
   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
   // specified otherwise (see atomic.hpp).
 
-  long old_value;
+  T old_value;
   const uint64_t zero = 0;
 
   cmpxchg_pre_membar(order);
@@ -456,15 +473,7 @@
 
   cmpxchg_post_membar(order);
 
-  return (jlong) old_value;
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
-
-inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
+  return old_value;
 }
 
 #undef strasm_sync
--- a/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/linux_s390/vm/atomic_linux_s390.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -478,8 +478,18 @@
 // function is performed before the operand is fetched and again after the
 // operation is completed."
 
-jint Atomic::cmpxchg(jint xchg_val, volatile jint* dest, jint cmp_val, cmpxchg_memory_order unused) {
-  unsigned long old;
+// No direct support for cmpxchg of bytes; emulate using int.
+template<>
+struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
+
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T xchg_val,
+                                                T volatile* dest,
+                                                T cmp_val,
+                                                cmpxchg_memory_order unused) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  T old;
 
   __asm__ __volatile__ (
     "   CS       %[old],%[upd],%[mem]    \n\t" // Try to xchg upd with mem.
@@ -493,11 +503,17 @@
     : "cc", "memory"
   );
 
-  return (jint)old;
+  return old;
 }
 
-jlong Atomic::cmpxchg(jlong xchg_val, volatile jlong* dest, jlong cmp_val, cmpxchg_memory_order unused) {
-  unsigned long old;
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T xchg_val,
+                                                T volatile* dest,
+                                                T cmp_val,
+                                                cmpxchg_memory_order unused) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T old;
 
   __asm__ __volatile__ (
     "   CSG      %[old],%[upd],%[mem]    \n\t" // Try to xchg upd with mem.
@@ -511,15 +527,7 @@
     : "cc", "memory"
   );
 
-  return (jlong)old;
-}
-
-void* Atomic::cmpxchg_ptr(void *xchg_val, volatile void* dest, void* cmp_val, cmpxchg_memory_order unused) {
-  return (void*)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused);
-}
-
-intptr_t Atomic::cmpxchg_ptr(intptr_t xchg_val, volatile intptr_t* dest, intptr_t cmp_val, cmpxchg_memory_order unused) {
-  return (intptr_t)cmpxchg((jlong)xchg_val, (volatile jlong*)dest, (jlong)cmp_val, unused);
+  return old;
 }
 
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
--- a/src/os_cpu/linux_sparc/vm/atomic_linux_sparc.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/linux_sparc/vm/atomic_linux_sparc.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -121,9 +121,18 @@
   return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
 }
 
+// No direct support for cmpxchg of bytes; emulate using int.
+template<>
+struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
 
-inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value, cmpxchg_memory_order order) {
-  jint rv;
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  T rv;
   __asm__ volatile(
     " cas    [%2], %3, %0"
     : "=r" (rv)
@@ -132,8 +141,14 @@
   return rv;
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
-  jlong rv;
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T rv;
   __asm__ volatile(
     " casx   [%2], %3, %0"
     : "=r" (rv)
@@ -142,18 +157,4 @@
   return rv;
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  intptr_t rv;
-  __asm__ volatile(
-    " casx    [%2], %3, %0"
-    : "=r" (rv)
-    : "0" (exchange_value), "r" (dest), "r" (compare_value)
-    : "memory");
-  return rv;
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value, order);
-}
-
 #endif // OS_CPU_LINUX_SPARC_VM_ATOMIC_LINUX_SPARC_INLINE_HPP
--- a/src/os_cpu/linux_x86/vm/atomic_linux_x86.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/linux_x86/vm/atomic_linux_x86.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -25,8 +25,6 @@
 #ifndef OS_CPU_LINUX_X86_VM_ATOMIC_LINUX_X86_HPP
 #define OS_CPU_LINUX_X86_VM_ATOMIC_LINUX_X86_HPP
 
-#include "runtime/os.hpp"
-
 // Implementation of class atomic
 
 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
@@ -81,8 +79,13 @@
   return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
 }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte    Atomic::cmpxchg    (jbyte    exchange_value, volatile jbyte*    dest, jbyte    compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order /* order */) const {
+  STATIC_ASSERT(1 == sizeof(T));
   __asm__ volatile ("lock cmpxchgb %1,(%3)"
                     : "=a" (exchange_value)
                     : "q" (exchange_value), "a" (compare_value), "r" (dest)
@@ -90,7 +93,13 @@
   return exchange_value;
 }
 
-inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order /* order */) const {
+  STATIC_ASSERT(4 == sizeof(T));
   __asm__ volatile ("lock cmpxchgl %1,(%3)"
                     : "=a" (exchange_value)
                     : "r" (exchange_value), "a" (compare_value), "r" (dest)
@@ -137,7 +146,13 @@
   return exchange_value;
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order /* order */) const {
+  STATIC_ASSERT(8 == sizeof(T));
   __asm__ __volatile__ ("lock cmpxchgq %1,(%3)"
                         : "=a" (exchange_value)
                         : "r" (exchange_value), "a" (compare_value), "r" (dest)
@@ -145,14 +160,6 @@
   return exchange_value;
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
-
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
 
 #else // !AMD64
@@ -184,16 +191,14 @@
   void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst);
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
-  return _Atomic_cmpxchg_long(exchange_value, dest, compare_value);
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  return cmpxchg_using_helper<jlong>(_Atomic_cmpxchg_long, exchange_value, dest, compare_value);
 }
 
 inline jlong Atomic::load(const volatile jlong* src) {
--- a/src/os_cpu/linux_zero/vm/atomic_linux_zero.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/linux_zero/vm/atomic_linux_zero.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -57,9 +57,9 @@
 /* Perform an atomic compare and swap: if the current value of `*PTR'
    is OLDVAL, then write NEWVAL into `*PTR'.  Return the contents of
    `*PTR' before the operation.*/
-static inline int m68k_compare_and_swap(volatile int *ptr,
-                                        int oldval,
-                                        int newval) {
+static inline int m68k_compare_and_swap(int newval,
+                                        volatile int *ptr,
+                                        int oldval) {
   for (;;) {
       int prev = *ptr;
       if (prev != oldval)
@@ -118,9 +118,9 @@
 /* Perform an atomic compare and swap: if the current value of `*PTR'
    is OLDVAL, then write NEWVAL into `*PTR'.  Return the contents of
    `*PTR' before the operation.*/
-static inline int arm_compare_and_swap(volatile int *ptr,
-                                       int oldval,
-                                       int newval) {
+static inline int arm_compare_and_swap(int newval,
+                                       volatile int *ptr,
+                                       int oldval) {
   for (;;) {
       int prev = *ptr;
       if (prev != oldval)
@@ -261,55 +261,38 @@
                            (volatile intptr_t*) dest);
 }
 
-inline jint Atomic::cmpxchg(jint exchange_value,
-                            volatile jint* dest,
-                            jint compare_value,
-                            cmpxchg_memory_order order) {
+// No direct support for cmpxchg of bytes; emulate using int.
+template<>
+struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
+
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
 #ifdef ARM
-  return arm_compare_and_swap(dest, compare_value, exchange_value);
+  return cmpxchg_using_helper<int>(arm_compare_and_swap, exchange_value, dest, compare_value);
 #else
 #ifdef M68K
-  return m68k_compare_and_swap(dest, compare_value, exchange_value);
+  return cmpxchg_using_helper<int>(m68k_compare_and_swap, exchange_value, dest, compare_value);
 #else
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 #endif // M68K
 #endif // ARM
 }
 
-inline jlong Atomic::cmpxchg(jlong exchange_value,
-                             volatile jlong* dest,
-                             jlong compare_value,
-                             cmpxchg_memory_order order) {
-
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
   return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value,
-                                    volatile intptr_t* dest,
-                                    intptr_t compare_value,
-                                    cmpxchg_memory_order order) {
-#ifdef ARM
-  return arm_compare_and_swap(dest, compare_value, exchange_value);
-#else
-#ifdef M68K
-  return m68k_compare_and_swap(dest, compare_value, exchange_value);
-#else
-  return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
-#endif // M68K
-#endif // ARM
-}
-
-inline void* Atomic::cmpxchg_ptr(void* exchange_value,
-                                 volatile void* dest,
-                                 void* compare_value,
-                                 cmpxchg_memory_order order) {
-
-  return (void *) cmpxchg_ptr((intptr_t) exchange_value,
-                              (volatile intptr_t*) dest,
-                              (intptr_t) compare_value,
-                              order);
-}
-
 inline jlong Atomic::load(const volatile jlong* src) {
   volatile jlong dest;
   os::atomic_copy64(src, &dest);
--- a/src/os_cpu/solaris_sparc/vm/atomic_solaris_sparc.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/solaris_sparc/vm/atomic_solaris_sparc.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -25,8 +25,6 @@
 #ifndef OS_CPU_SOLARIS_SPARC_VM_ATOMIC_SOLARIS_SPARC_HPP
 #define OS_CPU_SOLARIS_SPARC_VM_ATOMIC_SOLARIS_SPARC_HPP
 
-#include "runtime/os.hpp"
-
 // Implementation of class atomic
 
 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
@@ -64,10 +62,6 @@
 extern "C" jint     _Atomic_swap32(jint     exchange_value, volatile jint*     dest);
 extern "C" intptr_t _Atomic_swap64(intptr_t exchange_value, volatile intptr_t* dest);
 
-extern "C" jint     _Atomic_cas32(jint     exchange_value, volatile jint*     dest, jint     compare_value);
-extern "C" intptr_t _Atomic_cas64(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
-extern "C" jlong    _Atomic_casl (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value);
-
 extern "C" jint     _Atomic_add32(jint     inc,       volatile jint*     dest);
 extern "C" intptr_t _Atomic_add64(intptr_t add_value, volatile intptr_t* dest);
 
@@ -97,22 +91,40 @@
   return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
 }
 
+// No direct support for cmpxchg of bytes; emulate using int.
+template<>
+struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
 
-inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value, cmpxchg_memory_order order) {
-  return _Atomic_cas32(exchange_value, dest, compare_value);
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  T rv;
+  __asm__ volatile(
+    " cas    [%2], %3, %0"
+    : "=r" (rv)
+    : "0" (exchange_value), "r" (dest), "r" (compare_value)
+    : "memory");
+  return rv;
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
-  // Return 64 bit value in %o0
-  return _Atomic_cas64((intptr_t)exchange_value, (intptr_t *)dest, (intptr_t)compare_value);
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return _Atomic_cas64(exchange_value, dest, compare_value);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value, order);
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  T rv;
+  __asm__ volatile(
+    " casx   [%2], %3, %0"
+    : "=r" (rv)
+    : "0" (exchange_value), "r" (dest), "r" (compare_value)
+    : "memory");
+  return rv;
 }
 
 #endif // OS_CPU_SOLARIS_SPARC_VM_ATOMIC_SOLARIS_SPARC_HPP
--- a/src/os_cpu/solaris_sparc/vm/solaris_sparc.il	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/solaris_sparc/vm/solaris_sparc.il	Thu Aug 24 01:13:04 2017 +0000
@@ -73,74 +73,6 @@
         .end
 
 
-  // Support for jint Atomic::cmpxchg(jint           exchange_value,
-  //                                  volatile jint* dest,
-  //                                  jint           compare_value)
-  //
-  // Arguments:
-  //      exchange_value: O0
-  //      dest:           O1
-  //      compare_value:  O2
-  //
-  // Results:
-  //     O0: the value previously stored in dest
-
-        .inline _Atomic_cas32, 3
-        .volatile
-        cas     [%o1], %o2, %o0
-        .nonvolatile
-        .end
-
-
-  // Support for intptr_t Atomic::cmpxchg_ptr(intptr_t           exchange_value,
-  //                                          volatile intptr_t* dest,
-  //                                          intptr_t           compare_value)
-  //
-  // 64-bit
-  //
-  // Arguments:
-  //      exchange_value: O0
-  //      dest:           O1
-  //      compare_value:  O2
-  //
-  // Results:
-  //     O0: the value previously stored in dest
-
-        .inline _Atomic_cas64, 3
-        .volatile
-        casx    [%o1], %o2, %o0
-        .nonvolatile
-        .end
-
-
-  // Support for jlong Atomic::cmpxchg(jlong           exchange_value,
-  //                                   volatile jlong* dest,
-  //                                   jlong           compare_value)
-  //
-  // 32-bit calling conventions
-  //
-  // Arguments:
-  //      exchange_value: O1:O0
-  //      dest:           O2
-  //      compare_value:  O4:O3
-  //
-  // Results:
-  //     O1:O0: the value previously stored in dest
-
-        .inline _Atomic_casl, 3
-        .volatile
-        sllx    %o0, 32, %o0
-        srl     %o1, 0, %o1
-        or      %o0,%o1,%o0
-        sllx    %o3, 32, %o3
-        srl     %o4, 0, %o4
-        or      %o3,%o4,%o3
-        casx    [%o2], %o3, %o0
-        srl     %o0, 0, %o1
-        srlx    %o0, 32, %o0
-        .nonvolatile
-        .end
-
   // Support for jlong Atomic::load and Atomic::store on v9.
   //
   // void _Atomic_move_long_v9(volatile jlong* src, volatile jlong* dst)
--- a/src/os_cpu/solaris_x86/vm/atomic_solaris_x86.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/solaris_x86/vm/atomic_solaris_x86.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -25,8 +25,6 @@
 #ifndef OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
 #define OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
 
-#include "runtime/os.hpp"
-
 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
 inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
 inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
@@ -49,8 +47,7 @@
 inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
 inline void Atomic::dec_ptr(volatile void*     dest) { (void)add_ptr(-1, dest); }
 
-// For Sun Studio - implementation is in solaris_x86_[32/64].il.
-// For gcc - implementation is just below.
+// For Sun Studio - implementation is in solaris_x86_64.il.
 
 extern "C" {
   jint _Atomic_add(jint add_value, volatile jint* dest);
@@ -71,21 +68,51 @@
   return _Atomic_xchg(exchange_value, dest);
 }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte    Atomic::cmpxchg    (jbyte    exchange_value, volatile jbyte*    dest, jbyte    compare_value, cmpxchg_memory_order order) {
-  return _Atomic_cmpxchg_byte(exchange_value, dest, compare_value);
+// Not using cmpxchg_using_helper here, because some configurations of
+// Solaris compiler don't deal well with passing a "defined in .il"
+// function as an argument.  We *should* switch to using gcc-style
+// inline assembly, but attempting to do so with Studio 12.4 ran into
+// segfaults.
+
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(1 == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    _Atomic_cmpxchg_byte(PrimitiveConversions::cast<jbyte>(exchange_value),
+                         reinterpret_cast<jbyte volatile*>(dest),
+                         PrimitiveConversions::cast<jbyte>(compare_value)));
 }
 
-inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value, cmpxchg_memory_order order) {
-  return _Atomic_cmpxchg(exchange_value, dest, compare_value);
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    _Atomic_cmpxchg(PrimitiveConversions::cast<jint>(exchange_value),
+                    reinterpret_cast<jint volatile*>(dest),
+                    PrimitiveConversions::cast<jint>(compare_value)));
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
-  return _Atomic_cmpxchg_long(exchange_value, dest, compare_value);
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    _Atomic_cmpxchg_long(PrimitiveConversions::cast<jlong>(exchange_value),
+                         reinterpret_cast<jlong volatile*>(dest),
+                         PrimitiveConversions::cast<jlong>(compare_value)));
 }
 
-
-#ifdef AMD64
 inline void Atomic::store    (jlong    store_value, jlong*             dest) { *dest = store_value; }
 inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
 extern "C" jlong _Atomic_add_long(jlong add_value, volatile jlong* dest);
@@ -107,59 +134,6 @@
   return (void*)_Atomic_xchg_long((jlong)exchange_value, (volatile jlong*)dest);
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)_Atomic_cmpxchg_long((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)_Atomic_cmpxchg_long((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value);
-}
-
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
 
-#else // !AMD64
-
-inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) {
-  return (intptr_t)add((jint)add_value, (volatile jint*)dest);
-}
-
-inline void*    Atomic::add_ptr(intptr_t add_value, volatile void*     dest) {
-  return (void*)add((jint)add_value, (volatile jint*)dest);
-}
-
-inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
-  return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
-inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
-  return (void*)xchg((jint)exchange_value, (volatile jint*)dest);
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
-}
-
-extern "C" void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst);
-
-inline jlong Atomic::load(const volatile jlong* src) {
-  volatile jlong dest;
-  _Atomic_move_long(src, &dest);
-  return dest;
-}
-
-inline void Atomic::store(jlong store_value, jlong* dest) {
-  _Atomic_move_long((volatile jlong*)&store_value, (volatile jlong*)dest);
-}
-
-inline void Atomic::store(jlong store_value, volatile jlong* dest) {
-  _Atomic_move_long((volatile jlong*)&store_value, dest);
-}
-
-#endif // AMD64
-
-
 #endif // OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP
--- a/src/os_cpu/windows_x86/vm/atomic_windows_x86.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/os_cpu/windows_x86/vm/atomic_windows_x86.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -109,26 +109,22 @@
   return (void *)(os::atomic_xchg_ptr_func)((intptr_t)exchange_value, (volatile intptr_t*)dest);
 }
 
-inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value, cmpxchg_memory_order order) {
-  return (*os::atomic_cmpxchg_func)(exchange_value, dest, compare_value);
-}
+#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName)               \
+  template<>                                                            \
+  template<typename T>                                                  \
+  inline T Atomic::PlatformCmpxchg<ByteSize>::operator()(T exchange_value, \
+                                                         T volatile* dest, \
+                                                         T compare_value, \
+                                                         cmpxchg_memory_order order) const { \
+    STATIC_ASSERT(ByteSize == sizeof(T));                               \
+    return cmpxchg_using_helper<StubType>(StubName, exchange_value, dest, compare_value); \
+  }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte    Atomic::cmpxchg    (jbyte    exchange_value, volatile jbyte*    dest, jbyte    compare_value, cmpxchg_memory_order order) {
-    return (*os::atomic_cmpxchg_byte_func)(exchange_value, dest, compare_value);
-}
+DEFINE_STUB_CMPXCHG(1, jbyte, os::atomic_cmpxchg_byte_func)
+DEFINE_STUB_CMPXCHG(4, jint,  os::atomic_cmpxchg_func)
+DEFINE_STUB_CMPXCHG(8, jlong, os::atomic_cmpxchg_long_func)
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
-  return (*os::atomic_cmpxchg_long_func)(exchange_value, dest, compare_value);
-}
-
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
-}
+#undef DEFINE_STUB_CMPXCHG
 
 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
 
@@ -201,8 +197,13 @@
   return (void*)xchg((jint)exchange_value, (volatile jint*)dest);
 }
 
-#define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-inline jbyte    Atomic::cmpxchg    (jbyte    exchange_value, volatile jbyte*    dest, jbyte    compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(1 == sizeof(T));
   // alternative for InterlockedCompareExchange
   __asm {
     mov edx, dest
@@ -212,7 +213,13 @@
   }
 }
 
-inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(4 == sizeof(T));
   // alternative for InterlockedCompareExchange
   __asm {
     mov edx, dest
@@ -222,7 +229,13 @@
   }
 }
 
-inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value, cmpxchg_memory_order order) {
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
+                                                T volatile* dest,
+                                                T compare_value,
+                                                cmpxchg_memory_order order) const {
+  STATIC_ASSERT(8 == sizeof(T));
   jint ex_lo  = (jint)exchange_value;
   jint ex_hi  = *( ((jint*)&exchange_value) + 1 );
   jint cmp_lo = (jint)compare_value;
@@ -241,14 +254,6 @@
   }
 }
 
-inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
-  return (intptr_t)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
-}
-
-inline void*    Atomic::cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value, cmpxchg_memory_order order) {
-  return (void*)cmpxchg((jint)exchange_value, (volatile jint*)dest, (jint)compare_value, order);
-}
-
 inline jlong Atomic::load(const volatile jlong* src) {
   volatile jlong dest;
   volatile jlong* pdest = &dest;
--- a/src/share/vm/aot/aotCodeHeap.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/aot/aotCodeHeap.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -316,7 +316,7 @@
   AOTCompiledMethod *aot = new AOTCompiledMethod(code, mh(), meta, metadata_table, metadata_size, state_adr, this, name, code_id, _aot_id);
   assert(_code_to_aot[code_id]._aot == NULL, "should be not initialized");
   _code_to_aot[code_id]._aot = aot; // Should set this first
-  if (Atomic::cmpxchg(in_use, (jint*)&_code_to_aot[code_id]._state, not_set) != not_set) {
+  if (Atomic::cmpxchg(in_use, &_code_to_aot[code_id]._state, not_set) != not_set) {
     _code_to_aot[code_id]._aot = NULL; // Clean
   } else { // success
     // Publish method
@@ -378,7 +378,7 @@
     AOTCompiledMethod* aot = new AOTCompiledMethod(entry, NULL, meta, metadata_table, metadata_size, state_adr, this, full_name, code_id, i);
     assert(_code_to_aot[code_id]._aot  == NULL, "should be not initialized");
     _code_to_aot[code_id]._aot  = aot;
-    if (Atomic::cmpxchg(in_use, (jint*)&_code_to_aot[code_id]._state, not_set) != not_set) {
+    if (Atomic::cmpxchg(in_use, &_code_to_aot[code_id]._state, not_set) != not_set) {
       fatal("stab '%s' code state is %d", full_name, _code_to_aot[code_id]._state);
     }
     // Adjust code buffer boundaries only for stubs because they are last in the buffer.
@@ -649,7 +649,7 @@
     for (int i = 0; i < methods_cnt; ++i) {
       int code_id = indexes[i];
       // Invalidate aot code.
-      if (Atomic::cmpxchg(invalid, (jint*)&_code_to_aot[code_id]._state, not_set) != not_set) {
+      if (Atomic::cmpxchg(invalid, &_code_to_aot[code_id]._state, not_set) != not_set) {
         if (_code_to_aot[code_id]._state == in_use) {
           AOTCompiledMethod* aot = _code_to_aot[code_id]._aot;
           assert(aot != NULL, "aot should be set");
--- a/src/share/vm/aot/aotCodeHeap.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/aot/aotCodeHeap.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -26,6 +26,8 @@
 
 #include "aot/aotCompiledMethod.hpp"
 #include "classfile/symbolTable.hpp"
+#include "metaprogramming/integralConstant.hpp"
+#include "metaprogramming/isRegisteredEnum.hpp"
 #include "oops/metadata.hpp"
 #include "oops/method.hpp"
 
@@ -35,6 +37,8 @@
   invalid = 2  // AOT code is invalidated because dependencies failed
 };
 
+template<> struct IsRegisteredEnum<CodeState> : public TrueType {};
+
 typedef struct {
   AOTCompiledMethod* _aot;
   CodeState _state; // State change cases: not_set->in_use, not_set->invalid
--- a/src/share/vm/classfile/classLoaderData.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/classfile/classLoaderData.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -759,14 +759,18 @@
   return metaspace;
 }
 
-jobject ClassLoaderData::add_handle(Handle h) {
+OopHandle ClassLoaderData::add_handle(Handle h) {
   MutexLockerEx ml(metaspace_lock(),  Mutex::_no_safepoint_check_flag);
-  return (jobject) _handles.add(h());
+  return OopHandle(_handles.add(h()));
 }
 
-void ClassLoaderData::remove_handle_unsafe(jobject h) {
-  assert(_handles.contains((oop*) h), "Got unexpected handle " PTR_FORMAT, p2i((oop*) h));
-  *((oop*) h) = NULL;
+void ClassLoaderData::init_handle_locked(OopHandle& dest, Handle h) {
+  MutexLockerEx ml(metaspace_lock(),  Mutex::_no_safepoint_check_flag);
+  if (dest.resolve() != NULL) {
+    return;
+  } else {
+    dest = _handles.add(h());
+  }
 }
 
 // Add this metadata pointer to be freed when it's safe.  This is only during
--- a/src/share/vm/classfile/classLoaderData.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/classfile/classLoaderData.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -29,6 +29,7 @@
 #include "memory/memRegion.hpp"
 #include "memory/metaspace.hpp"
 #include "memory/metaspaceCounters.hpp"
+#include "oops/oopHandle.hpp"
 #include "runtime/mutex.hpp"
 #include "trace/traceMacros.hpp"
 #include "utilities/growableArray.hpp"
@@ -362,8 +363,8 @@
   void verify();
   const char* loader_name();
 
-  jobject add_handle(Handle h);
-  void remove_handle_unsafe(jobject h);
+  OopHandle add_handle(Handle h);
+  void init_handle_locked(OopHandle& pd, Handle h);  // used for concurrent access to ModuleEntry::_pd field
   void add_class(Klass* k, bool publicize = true);
   void remove_class(Klass* k);
   bool contains_klass(Klass* k);
--- a/src/share/vm/classfile/javaClasses.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/classfile/javaClasses.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -799,7 +799,7 @@
     // If java.base was already defined then patch this particular class with java.base.
     if (javabase_was_defined) {
       ModuleEntry *javabase_entry = ModuleEntryTable::javabase_moduleEntry();
-      assert(javabase_entry != NULL && javabase_entry->module_handle() != NULL,
+      assert(javabase_entry != NULL && javabase_entry->module() != NULL,
              "Setting class module field, " JAVA_BASE_NAME " should be defined");
       Handle javabase_handle(THREAD, javabase_entry->module());
       set_module(mirror(), javabase_handle());
--- a/src/share/vm/classfile/moduleEntry.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/classfile/moduleEntry.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -80,19 +80,16 @@
 }
 
 // Returns the shared ProtectionDomain
-Handle ModuleEntry::shared_protection_domain() {
-  return Handle(Thread::current(), JNIHandles::resolve(_pd));
+oop ModuleEntry::shared_protection_domain() {
+  return _pd.resolve();
 }
 
 // Set the shared ProtectionDomain atomically
 void ModuleEntry::set_shared_protection_domain(ClassLoaderData *loader_data,
                                                Handle pd_h) {
   // Create a handle for the shared ProtectionDomain and save it atomically.
-  // If someone beats us setting the _pd cache, the created handle is destroyed.
-  jobject obj = loader_data->add_handle(pd_h);
-  if (Atomic::cmpxchg_ptr(obj, &_pd, NULL) != NULL) {
-    loader_data->remove_handle_unsafe(obj);
-  }
+  // init_handle_locked checks if someone beats us setting the _pd cache.
+  loader_data->init_handle_locked(_pd, pd_h);
 }
 
 // Returns true if this module can read module m
--- a/src/share/vm/classfile/moduleEntry.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/classfile/moduleEntry.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -27,6 +27,7 @@
 
 #include "classfile/classLoaderData.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "oops/oopHandle.hpp"
 #include "oops/symbol.hpp"
 #include "prims/jni.h"
 #include "runtime/jniHandles.hpp"
@@ -56,8 +57,8 @@
 // data structure.
 class ModuleEntry : public HashtableEntry<Symbol*, mtModule> {
 private:
-  jobject _module;                     // java.lang.Module
-  jobject _pd;                         // java.security.ProtectionDomain, cached
+  OopHandle _module;                   // java.lang.Module
+  OopHandle _pd;                       // java.security.ProtectionDomain, cached
                                        // for shared classes from this module
   ClassLoaderData* _loader_data;
   GrowableArray<ModuleEntry*>* _reads; // list of modules that are readable by this module
@@ -89,16 +90,16 @@
   Symbol*          name() const                        { return literal(); }
   void             set_name(Symbol* n)                 { set_literal(n); }
 
-  oop              module() const                      { return JNIHandles::resolve(_module); }
-  jobject          module_handle() const               { return _module; }
-  void             set_module(jobject j)               { _module = j; }
+  oop              module() const                      { return _module.resolve(); }
+  OopHandle        module_handle() const               { return _module; }
+  void             set_module(OopHandle j)             { _module = j; }
 
   // The shared ProtectionDomain reference is set once the VM loads a shared class
   // originated from the current Module. The referenced ProtectionDomain object is
   // created by the ClassLoader when loading a class (shared or non-shared) from the
   // Module for the first time. This ProtectionDomain object is used for all
   // classes from the Module loaded by the same ClassLoader.
-  Handle           shared_protection_domain();
+  oop              shared_protection_domain();
   void             set_shared_protection_domain(ClassLoaderData *loader_data, Handle pd);
 
   ClassLoaderData* loader_data() const                 { return _loader_data; }
@@ -246,7 +247,7 @@
   static void set_javabase_moduleEntry(ModuleEntry* java_base) { _javabase_module = java_base; }
 
   static bool javabase_defined() { return ((_javabase_module != NULL) &&
-                                           (_javabase_module->module_handle() != NULL)); }
+                                           (_javabase_module->module() != NULL)); }
   static void finalize_javabase(Handle module_handle, Symbol* version, Symbol* location);
   static void patch_javabase_entries(Handle module_handle);
 
--- a/src/share/vm/gc/parallel/psParallelCompact.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/gc/parallel/psParallelCompact.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -593,9 +593,8 @@
 
 inline bool ParallelCompactData::RegionData::claim()
 {
-  const int los = (int) live_obj_size();
-  const int old = Atomic::cmpxchg(dc_claimed | los,
-                                  (volatile int*) &_dc_and_los, los);
+  const region_sz_t los = static_cast<region_sz_t>(live_obj_size());
+  const region_sz_t old = Atomic::cmpxchg(dc_claimed | los, &_dc_and_los, los);
   return old == los;
 }
 
--- a/src/share/vm/gc/shared/workgroup.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/gc/shared/workgroup.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -428,7 +428,7 @@
   assert(t < _n_tasks, "bad task id.");
   uint old = _tasks[t];
   if (old == 0) {
-    old = Atomic::cmpxchg(1, &_tasks[t], 0);
+    old = Atomic::cmpxchg(1u, &_tasks[t], 0u);
   }
   assert(_tasks[t] == 1, "What else?");
   bool res = old != 0;
@@ -442,15 +442,15 @@
 }
 
 void SubTasksDone::all_tasks_completed(uint n_threads) {
-  jint observed = _threads_completed;
-  jint old;
+  uint observed = _threads_completed;
+  uint old;
   do {
     old = observed;
     observed = Atomic::cmpxchg(old+1, &_threads_completed, old);
   } while (observed != old);
   // If this was the last thread checking in, clear the tasks.
   uint adjusted_thread_count = (n_threads == 0 ? 1 : n_threads);
-  if (observed + 1 == (jint)adjusted_thread_count) {
+  if (observed + 1 == adjusted_thread_count) {
     clear();
   }
 }
@@ -474,8 +474,8 @@
 bool SequentialSubTasksDone::is_task_claimed(uint& t) {
   t = _n_claimed;
   while (t < _n_tasks) {
-    jint res = Atomic::cmpxchg(t+1, &_n_claimed, t);
-    if (res == (jint)t) {
+    uint res = Atomic::cmpxchg(t+1, &_n_claimed, t);
+    if (res == t) {
       return false;
     }
     t = res;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/metaprogramming/isRegisteredEnum.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_METAPROGRAMMING_ISREGISTEREDENUM_HPP
+#define SHARE_VM_METAPROGRAMMING_ISREGISTEREDENUM_HPP
+
+#include "memory/allocation.hpp"
+#include "metaprogramming/integralConstant.hpp"
+
+// Recognize registered enum types.
+// Registration is by specializing this trait.
+//
+// This is a manual stand-in for the C++11 std::is_enum<T> type trait.
+// It's a lot of work to implement is_enum portably in C++98, so this
+// manual approach is being taken for those enum types we need to
+// distinguish.
+template<typename T>
+struct IsRegisteredEnum : public FalseType {};
+
+#endif // SHARE_VM_METAPROGRAMMING_ISREGISTEREDENUM_HPP
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/metaprogramming/primitiveConversions.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_METAPROGRAMMING_PRIMITIVECONVERSIONS_HPP
+#define SHARE_VM_METAPROGRAMMING_PRIMITIVECONVERSIONS_HPP
+
+#include "memory/allocation.hpp"
+#include "metaprogramming/enableIf.hpp"
+#include "metaprogramming/integralConstant.hpp"
+#include "metaprogramming/isFloatingPoint.hpp"
+#include "metaprogramming/isIntegral.hpp"
+#include "metaprogramming/isRegisteredEnum.hpp"
+#include "utilities/debug.hpp"
+
+class PrimitiveConversions : public AllStatic {
+public:
+  // Return a value of type T with the same representation as x.
+  //
+  // T and U must be of the same size.
+  //
+  // At least one of T or U must be an integral type.  The other must
+  // be an integral, floating point, or pointer type.
+  template<typename T, typename U> static T cast(U x);
+
+  // Support thin wrappers over primitive types.
+  // If derived from TrueType, provides representational conversion
+  // from T to some other type.  When true, must provide
+  // - Value: typedef for T.
+  // - Decayed: typedef for decayed type.
+  // - static Decayed decay(T x): return value of type Decayed with
+  //   the same representation as x.
+  // - static T recover(Decayed x): return a value of type T with the
+  //   same representation as x.
+  template<typename T> struct Translate : public FalseType {};
+
+private:
+
+  template<typename T,
+           typename U,
+           bool same_size = sizeof(T) == sizeof(U),
+           typename Enable = void>
+  struct Cast;
+
+  template<typename T, typename U> static T cast_using_union(U x);
+};
+
+// Return an object of type T with the same value representation as x.
+//
+// T and U must be of the same size.  It is expected that one of T and
+// U is an integral type, and the other is an integral type, a
+// (registered) enum type, or a floating point type
+//
+// This implementation uses the "union trick", which seems to be the
+// best of a bad set of options.  Though technically undefined
+// behavior, it is widely and well supported, producing good code.  In
+// some cases, such as gcc, that support is explicitly documented.
+//
+// Using memcpy is the correct method, but some compilers produce
+// wretched code for that method, even at maximal optimization levels.
+//
+// Using static_cast is only possible for integral and enum types, not
+// for floating point types.  And for integral and enum conversions,
+// static_cast has unspecified or implementation-defined behavior for
+// some cases.  C++11 <type_traits> can be used to avoid most or all
+// of those unspecified or implementation-defined issues, though that
+// may require multi-step conversions.
+//
+// Using reinterpret_cast of references has undefined behavior for
+// many cases, and there is much less empirical basis for its use, as
+// compared to the union trick.
+template<typename T, typename U>
+inline T PrimitiveConversions::cast_using_union(U x) {
+  STATIC_ASSERT(sizeof(T) == sizeof(U));
+  union { T t; U u; };
+  u = x;
+  return t;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// cast<T>(x)
+//
+// Cast<T, U, same_size, Enable>
+
+// Give an informative error if the sizes differ.
+template<typename T, typename U>
+struct PrimitiveConversions::Cast<T, U, false> VALUE_OBJ_CLASS_SPEC {
+  STATIC_ASSERT(sizeof(T) == sizeof(U));
+};
+
+// Conversion between integral types.
+template<typename T, typename U>
+struct PrimitiveConversions::Cast<
+  T, U, true,
+  typename EnableIf<IsIntegral<T>::value && IsIntegral<U>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(U x) const { return cast_using_union<T>(x); }
+};
+
+// Convert an enum or floating point value to an integer value.
+template<typename T, typename U>
+struct PrimitiveConversions::Cast<
+  T, U, true,
+  typename EnableIf<IsIntegral<T>::value &&
+                    (IsRegisteredEnum<U>::value ||
+                     IsFloatingPoint<U>::value)>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(U x) const { return cast_using_union<T>(x); }
+};
+
+// Convert an integer to an enum or floating point value.
+template<typename T, typename U>
+struct PrimitiveConversions::Cast<
+  T, U, true,
+  typename EnableIf<IsIntegral<U>::value &&
+                    (IsRegisteredEnum<T>::value ||
+                     IsFloatingPoint<T>::value)>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(U x) const { return cast_using_union<T>(x); }
+};
+
+// Convert a pointer to an integral value.
+template<typename T, typename U>
+struct PrimitiveConversions::Cast<
+  T, U*, true,
+  typename EnableIf<IsIntegral<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(U* x) const { return reinterpret_cast<T>(x); }
+};
+
+// Convert an integral value to a pointer.
+template<typename T, typename U>
+struct PrimitiveConversions::Cast<
+  T*, U, true,
+  typename EnableIf<IsIntegral<U>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T* operator()(U x) const { return reinterpret_cast<T*>(x); }
+};
+
+template<typename T, typename U>
+inline T PrimitiveConversions::cast(U x) {
+  return Cast<T, U>()(x);
+}
+
+#endif // SHARE_VM_METAPROGRAMMING_PRIMITIVECONVERSIONS_HPP
--- a/src/share/vm/oops/constantPool.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/oops/constantPool.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -134,7 +134,7 @@
 }
 
 objArrayOop ConstantPool::resolved_references() const {
-  return (objArrayOop)JNIHandles::resolve(_cache->resolved_references());
+  return (objArrayOop)_cache->resolved_references();
 }
 
 // Create resolved_references array and mapping array for original cp indexes
--- a/src/share/vm/oops/constantPool.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/oops/constantPool.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -28,6 +28,7 @@
 #include "oops/arrayOop.hpp"
 #include "oops/cpCache.hpp"
 #include "oops/objArrayOop.hpp"
+#include "oops/oopHandle.hpp"
 #include "oops/symbol.hpp"
 #include "oops/typeArrayOop.hpp"
 #include "runtime/handles.hpp"
@@ -821,7 +822,7 @@
 
  private:
 
-  void set_resolved_references(jobject s) { _cache->set_resolved_references(s); }
+  void set_resolved_references(OopHandle s) { _cache->set_resolved_references(s); }
   Array<u2>* reference_map() const        {  return (_cache == NULL) ? NULL :  _cache->reference_map(); }
   void set_reference_map(Array<u2>* o)    { _cache->set_reference_map(o); }
 
--- a/src/share/vm/oops/cpCache.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/oops/cpCache.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -28,6 +28,7 @@
 #include "interpreter/bytecodes.hpp"
 #include "memory/allocation.hpp"
 #include "oops/array.hpp"
+#include "oops/oopHandle.hpp"
 #include "runtime/orderAccess.hpp"
 #include "utilities/align.hpp"
 
@@ -413,7 +414,7 @@
   // stored in the ConstantPool, which is read-only.
   // Array of resolved objects from the constant pool and map from resolved
   // object index to original constant pool index
-  jobject              _resolved_references;
+  OopHandle            _resolved_references;
   Array<u2>*           _reference_map;
   // The narrowOop pointer to the archived resolved_references. Set at CDS dump
   // time when caching java heap object is supported.
@@ -455,8 +456,8 @@
   oop  archived_references() NOT_CDS_JAVA_HEAP_RETURN_(NULL);
   void set_archived_references(oop o) NOT_CDS_JAVA_HEAP_RETURN;
 
-  jobject resolved_references()           { return _resolved_references; }
-  void set_resolved_references(jobject s) { _resolved_references = s; }
+  oop resolved_references()                 { return _resolved_references.resolve(); }
+  void set_resolved_references(OopHandle s) { _resolved_references = s; }
   Array<u2>* reference_map() const        { return _reference_map; }
   void set_reference_map(Array<u2>* o)    { _reference_map = o; }
 
--- a/src/share/vm/oops/oop.inline.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/oops/oop.inline.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -94,7 +94,7 @@
 }
 
 markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) {
-  return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark);
+  return Atomic::cmpxchg(new_mark, &_mark, old_mark);
 }
 
 void oopDesc::init_mark() {
@@ -408,14 +408,14 @@
     narrowOop val = encode_heap_oop(exchange_value);
     narrowOop cmp = encode_heap_oop(compare_value);
 
-    narrowOop old = (narrowOop) Atomic::cmpxchg(val, (narrowOop*)dest, cmp);
+    narrowOop old = Atomic::cmpxchg(val, (narrowOop*)dest, cmp);
     // decode old from T to oop
     return decode_heap_oop(old);
   } else {
     if (prebarrier) {
       update_barrier_set_pre((oop*)dest, exchange_value);
     }
-    return (oop)Atomic::cmpxchg_ptr(exchange_value, (oop*)dest, compare_value);
+    return Atomic::cmpxchg(exchange_value, (oop*)dest, compare_value);
   }
 }
 
@@ -584,7 +584,7 @@
   assert(sizeof(markOop) == sizeof(intptr_t), "CAS below requires this.");
 
   while (!oldMark->is_marked()) {
-    curMark = (markOop)Atomic::cmpxchg_ptr(forwardPtrMark, &_mark, oldMark);
+    curMark = Atomic::cmpxchg(forwardPtrMark, &_mark, oldMark);
     assert(is_forwarded(), "object should have been forwarded");
     if (curMark == oldMark) {
       return NULL;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/oops/oopHandle.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_OOPS_OOPHANDLE_HPP
+#define SHARE_VM_OOPS_OOPHANDLE_HPP
+
+#include "oops/oop.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/orderAccess.hpp"
+
+// Simple class for encapsulating oop pointers stored in metadata.
+// These are different from Handle.  The Handle class stores pointers
+// to oops on the stack, and manages the allocation from a thread local
+// area in the constructor.
+// This assumes that the caller will allocate the handle in the appropriate
+// area.  The reason for the encapsulation is to help with naming and to allow
+// future uses for read barriers.
+
+class OopHandle {
+private:
+  oop* _obj;
+
+public:
+  OopHandle() : _obj(NULL) {}
+  OopHandle(oop* w) : _obj(w) {}
+
+  oop resolve() const { return (_obj == NULL) ? (oop)NULL : *_obj; }
+};
+
+#endif // SHARE_VM_OOPS_OOPHANDLE_HPP
--- a/src/share/vm/oops/oopsHierarchy.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/oops/oopsHierarchy.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_OOPS_OOPSHIERARCHY_HPP
 #define SHARE_VM_OOPS_OOPSHIERARCHY_HPP
 
+#include "metaprogramming/integralConstant.hpp"
+#include "metaprogramming/primitiveConversions.hpp"
 #include "runtime/globals.hpp"
 #include "utilities/globalDefinitions.hpp"
 
@@ -142,6 +144,15 @@
   operator oop* () const              { return (oop *)obj(); }
 };
 
+template<>
+struct PrimitiveConversions::Translate<oop> : public TrueType {
+  typedef oop Value;
+  typedef oopDesc* Decayed;
+
+  static Decayed decay(Value x) { return x.obj(); }
+  static Value recover(Decayed x) { return oop(x); }
+};
+
 #define DEF_OOP(type)                                                      \
    class type##OopDesc;                                                    \
    class type##Oop : public oop {                                          \
--- a/src/share/vm/prims/jni.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/prims/jni.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -4028,7 +4028,7 @@
 }
 
 _JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_CreateJavaVM(JavaVM **vm, void **penv, void *args) {
-  jint result = 0;
+  jint result = JNI_ERR;
   // On Windows, let CreateJavaVM run with SEH protection
 #ifdef _WIN32
   __try {
@@ -4063,7 +4063,7 @@
 DT_RETURN_MARK_DECL(DestroyJavaVM, jint
                     , HOTSPOT_JNI_DESTROYJAVAVM_RETURN(_ret_ref));
 
-jint JNICALL jni_DestroyJavaVM(JavaVM *vm) {
+static jint JNICALL jni_DestroyJavaVM_inner(JavaVM *vm) {
   HOTSPOT_JNI_DESTROYJAVAVM_ENTRY(vm);
   jint res = JNI_ERR;
   DT_RETURN_MARK(DestroyJavaVM, jint, (const jint&)res);
@@ -4099,6 +4099,20 @@
   }
 }
 
+jint JNICALL jni_DestroyJavaVM(JavaVM *vm) {
+  jint result = JNI_ERR;
+  // On Windows, we need SEH protection
+#ifdef _WIN32
+  __try {
+#endif
+    result = jni_DestroyJavaVM_inner(vm);
+#ifdef _WIN32
+  } __except(topLevelExceptionFilter((_EXCEPTION_POINTERS*)_exception_info())) {
+    // Nothing to do.
+  }
+#endif
+  return result;
+}
 
 static jint attach_current_thread(JavaVM *vm, void **penv, void *_args, bool daemon) {
   JavaVMAttachArgs *args = (JavaVMAttachArgs *) _args;
--- a/src/share/vm/prims/jvmtiEnvBase.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -1490,14 +1490,14 @@
   }
 }
 
-GrowableArray<jobject>* JvmtiModuleClosure::_tbl = NULL;
+GrowableArray<OopHandle>* JvmtiModuleClosure::_tbl = NULL;
 
 jvmtiError
 JvmtiModuleClosure::get_all_modules(JvmtiEnv* env, jint* module_count_ptr, jobject** modules_ptr) {
   ResourceMark rm;
   MutexLocker ml(Module_lock);
 
-  _tbl = new GrowableArray<jobject>(77);
+  _tbl = new GrowableArray<OopHandle>(77);
   if (_tbl == NULL) {
     return JVMTI_ERROR_OUT_OF_MEMORY;
   }
@@ -1513,7 +1513,7 @@
     return JVMTI_ERROR_OUT_OF_MEMORY;
   }
   for (jint idx = 0; idx < len; idx++) {
-    array[idx] = _tbl->at(idx);
+    array[idx] = JNIHandles::make_local(Thread::current(), _tbl->at(idx).resolve());
   }
   _tbl = NULL;
   *modules_ptr = array;
--- a/src/share/vm/prims/jvmtiEnvBase.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/prims/jvmtiEnvBase.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -30,6 +30,7 @@
 #include "prims/jvmtiEventController.hpp"
 #include "prims/jvmtiThreadState.hpp"
 #include "prims/jvmtiThreadState.inline.hpp"
+#include "oops/oopHandle.hpp"
 #include "runtime/fieldDescriptor.hpp"
 #include "runtime/frame.hpp"
 #include "runtime/handles.inline.hpp"
@@ -704,12 +705,12 @@
 // Jvmti module closure to collect all modules loaded to the system.
 class JvmtiModuleClosure : public StackObj {
 private:
-  static GrowableArray<jobject> *_tbl; // Protected with Module_lock
+  static GrowableArray<OopHandle> *_tbl; // Protected with Module_lock
 
   static void do_module(ModuleEntry* entry) {
     assert_locked_or_safepoint(Module_lock);
-    jobject module = entry->module_handle();
-    guarantee(module != NULL, "module object is NULL");
+    OopHandle module = entry->module_handle();
+    guarantee(module.resolve() != NULL, "module object is NULL");
     _tbl->push(module);
   }
 
--- a/src/share/vm/prims/jvmtiExport.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/prims/jvmtiExport.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -764,7 +764,7 @@
         ModuleEntry* module_entry = InstanceKlass::cast(klass)->module();
         assert(module_entry != NULL, "module_entry should always be set");
         if (module_entry->is_named() &&
-            module_entry->module_handle() != NULL &&
+            module_entry->module() != NULL &&
             !module_entry->has_default_read_edges()) {
           if (!module_entry->set_has_default_read_edges()) {
             // We won a potential race.
--- a/src/share/vm/runtime/atomic.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/runtime/atomic.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -26,6 +26,11 @@
 #define SHARE_VM_RUNTIME_ATOMIC_HPP
 
 #include "memory/allocation.hpp"
+#include "metaprogramming/enableIf.hpp"
+#include "metaprogramming/isIntegral.hpp"
+#include "metaprogramming/isSame.hpp"
+#include "metaprogramming/primitiveConversions.hpp"
+#include "metaprogramming/removeCV.hpp"
 #include "utilities/align.hpp"
 #include "utilities/macros.hpp"
 
@@ -111,13 +116,132 @@
   // *dest with exchange_value if the comparison succeeded. Returns prior
   // value of *dest. cmpxchg*() provide:
   // <fence> compare-and-exchange <membar StoreLoad|StoreStore>
-  inline static jbyte        cmpxchg    (jbyte        exchange_value, volatile jbyte*        dest, jbyte        compare_value, cmpxchg_memory_order order = memory_order_conservative);
-  inline static jint         cmpxchg    (jint         exchange_value, volatile jint*         dest, jint         compare_value, cmpxchg_memory_order order = memory_order_conservative);
-  // See comment above about using jlong atomics on 32-bit platforms
-  inline static jlong        cmpxchg    (jlong        exchange_value, volatile jlong*        dest, jlong        compare_value, cmpxchg_memory_order order = memory_order_conservative);
-  inline static unsigned int cmpxchg    (unsigned int exchange_value, volatile unsigned int* dest, unsigned int compare_value, cmpxchg_memory_order order = memory_order_conservative);
-  inline static intptr_t     cmpxchg_ptr(intptr_t     exchange_value, volatile intptr_t*     dest, intptr_t     compare_value, cmpxchg_memory_order order = memory_order_conservative);
-  inline static void*        cmpxchg_ptr(void*        exchange_value, volatile void*         dest, void*        compare_value, cmpxchg_memory_order order = memory_order_conservative);
+
+  template<typename T, typename D, typename U>
+  inline static D cmpxchg(T exchange_value,
+                          D volatile* dest,
+                          U compare_value,
+                          cmpxchg_memory_order order = memory_order_conservative);
+
+  // Performs atomic compare of *dest and NULL, and replaces *dest
+  // with exchange_value if the comparison succeeded.  Returns true if
+  // the comparison succeeded and the exchange occurred.  This is
+  // often used as part of lazy initialization, as a lock-free
+  // alternative to the Double-Checked Locking Pattern.
+  template<typename T, typename D>
+  inline static bool replace_if_null(T* value, D* volatile* dest,
+                                     cmpxchg_memory_order order = memory_order_conservative);
+
+  inline static intptr_t cmpxchg_ptr(intptr_t exchange_value,
+                                     volatile intptr_t* dest,
+                                     intptr_t compare_value,
+                                     cmpxchg_memory_order order = memory_order_conservative) {
+    return cmpxchg(exchange_value, dest, compare_value, order);
+  }
+
+  inline static void* cmpxchg_ptr(void* exchange_value,
+                                  volatile void* dest,
+                                  void* compare_value,
+                                  cmpxchg_memory_order order = memory_order_conservative) {
+    return cmpxchg(exchange_value,
+                   reinterpret_cast<void* volatile*>(dest),
+                   compare_value,
+                   order);
+  }
+
+private:
+  // Test whether From is implicitly convertible to To.
+  // From and To must be pointer types.
+  // Note: Provides the limited subset of C++11 std::is_convertible
+  // that is needed here.
+  template<typename From, typename To> struct IsPointerConvertible;
+
+  // Dispatch handler for cmpxchg.  Provides type-based validity
+  // checking and limited conversions around calls to the
+  // platform-specific implementation layer provided by
+  // PlatformCmpxchg.
+  template<typename T, typename D, typename U, typename Enable = void>
+  struct CmpxchgImpl;
+
+  // Platform-specific implementation of cmpxchg.  Support for sizes
+  // of 1, 4, and 8 are required.  The class is a function object that
+  // must be default constructable, with these requirements:
+  //
+  // - dest is of type T*.
+  // - exchange_value and compare_value are of type T.
+  // - order is of type cmpxchg_memory_order.
+  // - platform_cmpxchg is an object of type PlatformCmpxchg<sizeof(T)>.
+  //
+  // Then
+  //   platform_cmpxchg(exchange_value, dest, compare_value, order)
+  // must be a valid expression, returning a result convertible to T.
+  //
+  // A default definition is provided, which declares a function template
+  //   T operator()(T, T volatile*, T, cmpxchg_memory_order) const
+  //
+  // For each required size, a platform must either provide an
+  // appropriate definition of that function, or must entirely
+  // specialize the class template for that size.
+  template<size_t byte_size> struct PlatformCmpxchg;
+
+  // Support for platforms that implement some variants of cmpxchg
+  // using a (typically out of line) non-template helper function.
+  // The generic arguments passed to PlatformCmpxchg need to be
+  // translated to the appropriate type for the helper function, the
+  // helper invoked on the translated arguments, and the result
+  // translated back.  Type is the parameter / return type of the
+  // helper function.
+  template<typename Type, typename Fn, typename T>
+  static T cmpxchg_using_helper(Fn fn,
+                                T exchange_value,
+                                T volatile* dest,
+                                T compare_value);
+
+  // Support platforms that do not provide Read-Modify-Write
+  // byte-level atomic access. To use, derive PlatformCmpxchg<1> from
+  // this class.
+public: // Temporary, can't be private: C++03 11.4/2. Fixed by C++11.
+  struct CmpxchgByteUsingInt;
+private:
+};
+
+template<typename From, typename To>
+struct Atomic::IsPointerConvertible<From*, To*> : AllStatic {
+  // Determine whether From* is implicitly convertible to To*, using
+  // the "sizeof trick".
+  typedef char yes;
+  typedef char (&no)[2];
+
+  static yes test(To*);
+  static no test(...);
+  static From* test_value;
+
+  static const bool value = (sizeof(yes) == sizeof(test(test_value)));
+};
+
+// Define the class before including platform file, which may specialize
+// the operator definition.  No generic definition of specializations
+// of the operator template are provided, nor are there any generic
+// specializations of the class.  The platform file is responsible for
+// providing those.
+template<size_t byte_size>
+struct Atomic::PlatformCmpxchg VALUE_OBJ_CLASS_SPEC {
+  template<typename T>
+  T operator()(T exchange_value,
+               T volatile* dest,
+               T compare_value,
+               cmpxchg_memory_order order) const;
+};
+
+// Define the class before including platform file, which may use this
+// as a base class, requiring it be complete.  The definition is later
+// in this file, near the other definitions related to cmpxchg.
+struct Atomic::CmpxchgByteUsingInt VALUE_OBJ_CLASS_SPEC {
+  template<typename T>
+  T operator()(T exchange_value,
+               T volatile* dest,
+               T compare_value,
+               cmpxchg_memory_order order) const;
 };
 
 // platform specific in-line definitions - must come before shared definitions
@@ -143,61 +267,152 @@
   dec_ptr((volatile intptr_t*) dest);
 }
 
-#ifndef VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-/*
- * This is the default implementation of byte-sized cmpxchg. It emulates jbyte-sized cmpxchg
- * in terms of jint-sized cmpxchg. Platforms may override this by defining their own inline definition
- * as well as defining VM_HAS_SPECIALIZED_CMPXCHG_BYTE. This will cause the platform specific
- * implementation to be used instead.
- */
-inline jbyte Atomic::cmpxchg(jbyte exchange_value, volatile jbyte* dest,
-                             jbyte compare_value, cmpxchg_memory_order order) {
-  STATIC_ASSERT(sizeof(jbyte) == 1);
-  volatile jint* dest_int =
-      reinterpret_cast<volatile jint*>(align_down(dest, sizeof(jint)));
-  size_t offset = pointer_delta(dest, dest_int, 1);
-  jint cur = *dest_int;
-  jbyte* cur_as_bytes = reinterpret_cast<jbyte*>(&cur);
+template<typename T, typename D, typename U>
+inline D Atomic::cmpxchg(T exchange_value,
+                         D volatile* dest,
+                         U compare_value,
+                         cmpxchg_memory_order order) {
+  return CmpxchgImpl<T, D, U>()(exchange_value, dest, compare_value, order);
+}
+
+template<typename T, typename D>
+inline bool Atomic::replace_if_null(T* value, D* volatile* dest,
+                                    cmpxchg_memory_order order) {
+  // Presently using a trivial implementation in terms of cmpxchg.
+  // Consider adding platform support, to permit the use of compiler
+  // intrinsics like gcc's __sync_bool_compare_and_swap.
+  D* expected_null = NULL;
+  return expected_null == cmpxchg(value, dest, expected_null, order);
+}
+
+// Handle cmpxchg for integral and enum types.
+//
+// All the involved types must be identical.
+template<typename T>
+struct Atomic::CmpxchgImpl<
+  T, T, T,
+  typename EnableIf<IsIntegral<T>::value || IsRegisteredEnum<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(T exchange_value, T volatile* dest, T compare_value,
+               cmpxchg_memory_order order) const {
+    // Forward to the platform handler for the size of T.
+    return PlatformCmpxchg<sizeof(T)>()(exchange_value,
+                                        dest,
+                                        compare_value,
+                                        order);
+  }
+};
+
+// Handle cmpxchg for pointer types.
+//
+// The destination's type and the compare_value type must be the same,
+// ignoring cv-qualifiers; we don't care about the cv-qualifiers of
+// the compare_value.
+//
+// The exchange_value must be implicitly convertible to the
+// destination's type; it must be type-correct to store the
+// exchange_value in the destination.
+template<typename T, typename D, typename U>
+struct Atomic::CmpxchgImpl<
+  T*, D*, U*,
+  typename EnableIf<Atomic::IsPointerConvertible<T*, D*>::value &&
+                    IsSame<typename RemoveCV<D>::type,
+                           typename RemoveCV<U>::type>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  D* operator()(T* exchange_value, D* volatile* dest, U* compare_value,
+               cmpxchg_memory_order order) const {
+    // Allow derived to base conversion, and adding cv-qualifiers.
+    D* new_value = exchange_value;
+    // Don't care what the CV qualifiers for compare_value are,
+    // but we need to match D* when calling platform support.
+    D* old_value = const_cast<D*>(compare_value);
+    return PlatformCmpxchg<sizeof(D*)>()(new_value, dest, old_value, order);
+  }
+};
+
+// Handle cmpxchg for types that have a translator.
+//
+// All the involved types must be identical.
+//
+// This translates the original call into a call on the decayed
+// arguments, and returns the recovered result of that translated
+// call.
+template<typename T>
+struct Atomic::CmpxchgImpl<
+  T, T, T,
+  typename EnableIf<PrimitiveConversions::Translate<T>::value>::type>
+  VALUE_OBJ_CLASS_SPEC
+{
+  T operator()(T exchange_value, T volatile* dest, T compare_value,
+               cmpxchg_memory_order order) const {
+    typedef PrimitiveConversions::Translate<T> Translator;
+    typedef typename Translator::Decayed Decayed;
+    STATIC_ASSERT(sizeof(T) == sizeof(Decayed));
+    return Translator::recover(
+      cmpxchg(Translator::decay(exchange_value),
+              reinterpret_cast<Decayed volatile*>(dest),
+              Translator::decay(compare_value),
+              order));
+  }
+};
+
+template<typename Type, typename Fn, typename T>
+inline T Atomic::cmpxchg_using_helper(Fn fn,
+                                      T exchange_value,
+                                      T volatile* dest,
+                                      T compare_value) {
+  STATIC_ASSERT(sizeof(Type) == sizeof(T));
+  return PrimitiveConversions::cast<T>(
+    fn(PrimitiveConversions::cast<Type>(exchange_value),
+       reinterpret_cast<Type volatile*>(dest),
+       PrimitiveConversions::cast<Type>(compare_value)));
+}
+
+template<typename T>
+inline T Atomic::CmpxchgByteUsingInt::operator()(T exchange_value,
+                                                 T volatile* dest,
+                                                 T compare_value,
+                                                 cmpxchg_memory_order order) const {
+  STATIC_ASSERT(sizeof(T) == sizeof(uint8_t));
+  uint8_t canon_exchange_value = exchange_value;
+  uint8_t canon_compare_value = compare_value;
+  volatile uint32_t* aligned_dest
+    = reinterpret_cast<volatile uint32_t*>(align_down(dest, sizeof(uint32_t)));
+  size_t offset = pointer_delta(dest, aligned_dest, 1);
+  uint32_t cur = *aligned_dest;
+  uint8_t* cur_as_bytes = reinterpret_cast<uint8_t*>(&cur);
 
   // current value may not be what we are looking for, so force it
   // to that value so the initial cmpxchg will fail if it is different
-  cur_as_bytes[offset] = compare_value;
+  cur_as_bytes[offset] = canon_compare_value;
 
   // always execute a real cmpxchg so that we get the required memory
   // barriers even on initial failure
   do {
     // value to swap in matches current value ...
-    jint new_value = cur;
+    uint32_t new_value = cur;
     // ... except for the one jbyte we want to update
-    reinterpret_cast<jbyte*>(&new_value)[offset] = exchange_value;
+    reinterpret_cast<uint8_t*>(&new_value)[offset] = canon_exchange_value;
 
-    jint res = cmpxchg(new_value, dest_int, cur, order);
-    if (res == cur) break; // success
+    uint32_t res = cmpxchg(new_value, aligned_dest, cur, order);
+    if (res == cur) break;      // success
 
-    // at least one jbyte in the jint changed value, so update
-    // our view of the current jint
+    // at least one byte in the int changed value, so update
+    // our view of the current int
     cur = res;
-    // if our jbyte is still as cur we loop and try again
-  } while (cur_as_bytes[offset] == compare_value);
+    // if our byte is still as cur we loop and try again
+  } while (cur_as_bytes[offset] == canon_compare_value);
 
-  return cur_as_bytes[offset];
+  return PrimitiveConversions::cast<T>(cur_as_bytes[offset]);
 }
 
-#endif // VM_HAS_SPECIALIZED_CMPXCHG_BYTE
-
 inline unsigned Atomic::xchg(unsigned int exchange_value, volatile unsigned int* dest) {
   assert(sizeof(unsigned int) == sizeof(jint), "more work to do");
   return (unsigned int)Atomic::xchg((jint)exchange_value, (volatile jint*)dest);
 }
 
-inline unsigned Atomic::cmpxchg(unsigned int exchange_value,
-                         volatile unsigned int* dest, unsigned int compare_value,
-                         cmpxchg_memory_order order) {
-  assert(sizeof(unsigned int) == sizeof(jint), "more work to do");
-  return (unsigned int)Atomic::cmpxchg((jint)exchange_value, (volatile jint*)dest,
-                                       (jint)compare_value, order);
-}
-
 inline jshort Atomic::add(jshort add_value, volatile jshort* dest) {
   // Most platforms do not support atomic add on a 2-byte value. However,
   // if the value occupies the most significant 16 bits of an aligned 32-bit
--- a/src/share/vm/runtime/os.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/runtime/os.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -755,9 +755,9 @@
   // Make updating the random seed thread safe.
   while (true) {
     unsigned int seed = _rand_seed;
-    int rand = random_helper(seed);
+    unsigned int rand = random_helper(seed);
     if (Atomic::cmpxchg(rand, &_rand_seed, seed) == seed) {
-      return rand;
+      return static_cast<int>(rand);
     }
   }
 }
--- a/src/share/vm/runtime/vmStructs.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/runtime/vmStructs.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -83,6 +83,7 @@
 #include "oops/objArrayKlass.hpp"
 #include "oops/objArrayOop.hpp"
 #include "oops/oop.inline.hpp"
+#include "oops/oopHandle.hpp"
 #include "oops/symbol.hpp"
 #include "oops/typeArrayKlass.hpp"
 #include "oops/typeArrayOop.hpp"
@@ -235,7 +236,7 @@
   nonstatic_field(ConstantPool,                _operands,                                     Array<u2>*)                            \
   nonstatic_field(ConstantPool,                _resolved_klasses,                             Array<Klass*>*)                        \
   nonstatic_field(ConstantPool,                _length,                                       int)                                   \
-  nonstatic_field(ConstantPoolCache,           _resolved_references,                          jobject)                               \
+  nonstatic_field(ConstantPoolCache,           _resolved_references,                          OopHandle)                             \
   nonstatic_field(ConstantPoolCache,           _reference_map,                                Array<u2>*)                            \
   nonstatic_field(ConstantPoolCache,           _length,                                       int)                                   \
   nonstatic_field(ConstantPoolCache,           _constant_pool,                                ConstantPool*)                         \
@@ -1438,6 +1439,7 @@
   declare_oop_type(oop)                                                   \
   declare_oop_type(narrowOop)                                             \
   declare_oop_type(typeArrayOop)                                          \
+  declare_oop_type(OopHandle)                                             \
                                                                           \
   /*************************************/                                 \
   /* MethodOop-related data structures */                                 \
--- a/src/share/vm/utilities/bitMap.cpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/utilities/bitMap.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -210,12 +210,12 @@
   // With a valid range (beg <= end), this test ensures that end != 0, as
   // required by inverted_bit_mask_for_range.  Also avoids an unnecessary write.
   if (beg != end) {
-    intptr_t* pw  = (intptr_t*)word_addr(beg);
-    intptr_t  w   = *pw;
-    intptr_t  mr  = (intptr_t)inverted_bit_mask_for_range(beg, end);
-    intptr_t  nw  = value ? (w | ~mr) : (w & mr);
+    bm_word_t* pw = word_addr(beg);
+    bm_word_t  w  = *pw;
+    bm_word_t  mr = inverted_bit_mask_for_range(beg, end);
+    bm_word_t  nw = value ? (w | ~mr) : (w & mr);
     while (true) {
-      intptr_t res = Atomic::cmpxchg_ptr(nw, pw, w);
+      bm_word_t res = Atomic::cmpxchg(nw, pw, w);
       if (res == w) break;
       w  = res;
       nw = value ? (w | ~mr) : (w & mr);
@@ -617,7 +617,7 @@
   return true;
 }
 
-BitMap::idx_t* BitMap::_pop_count_table = NULL;
+const BitMap::idx_t* BitMap::_pop_count_table = NULL;
 
 void BitMap::init_pop_count_table() {
   if (_pop_count_table == NULL) {
@@ -626,11 +626,8 @@
       table[i] = num_set_bits(i);
     }
 
-    intptr_t res = Atomic::cmpxchg_ptr((intptr_t)  table,
-                                       (intptr_t*) &_pop_count_table,
-                                       (intptr_t)  NULL_WORD);
-    if (res != NULL_WORD) {
-      guarantee( _pop_count_table == (void*) res, "invariant" );
+    if (!Atomic::replace_if_null(table, &_pop_count_table)) {
+      guarantee(_pop_count_table != NULL, "invariant");
       FREE_C_HEAP_ARRAY(idx_t, table);
     }
   }
--- a/src/share/vm/utilities/bitMap.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/utilities/bitMap.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -114,7 +114,7 @@
   void verify_range(idx_t beg_index, idx_t end_index) const NOT_DEBUG_RETURN;
 
   // Statistics.
-  static idx_t* _pop_count_table;
+  static const idx_t* _pop_count_table;
   static void init_pop_count_table();
   static idx_t num_set_bits(bm_word_t w);
   static idx_t num_set_bits_from_table(unsigned char c);
--- a/src/share/vm/utilities/bitMap.inline.hpp	Wed Aug 23 14:52:55 2017 -0400
+++ b/src/share/vm/utilities/bitMap.inline.hpp	Thu Aug 24 01:13:04 2017 +0000
@@ -49,9 +49,7 @@
     if (new_val == old_val) {
       return false;     // Someone else beat us to it.
     }
-    const bm_word_t cur_val = (bm_word_t) Atomic::cmpxchg_ptr((void*) new_val,
-                                                      (volatile void*) addr,
-                                                      (void*) old_val);
+    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val);
     if (cur_val == old_val) {
       return true;      // Success.
     }
@@ -70,9 +68,7 @@
     if (new_val == old_val) {
       return false;     // Someone else beat us to it.
     }
-    const bm_word_t cur_val = (bm_word_t) Atomic::cmpxchg_ptr((void*) new_val,
-                                                      (volatile void*) addr,
-                                                      (void*) old_val);
+    const bm_word_t cur_val = Atomic::cmpxchg(new_val, addr, old_val);
     if (cur_val == old_val) {
       return true;      // Success.
     }
--- a/test/gc/logging/TestPrintReferences.java	Wed Aug 23 14:52:55 2017 -0400
+++ b/test/gc/logging/TestPrintReferences.java	Thu Aug 24 01:13:04 2017 +0000
@@ -23,7 +23,7 @@
 
 /*
  * @test TestPrintReferences
- * @bug 8136991
+ * @bug 8136991 8186402
  * @summary Validate the reference processing logging
  * @key gc
  * @library /test/lib
@@ -42,6 +42,8 @@
     ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder("-Xlog:gc+phases+ref=debug",
                                                                       "-XX:+UseG1GC",
                                                                       "-Xmx10M",
+                                                                      // Explicit thread setting is required to avoid using only 1 thread
+                                                                      "-XX:ParallelGCThreads=2",
                                                                       GCTest.class.getName());
     OutputAnalyzer output = new OutputAnalyzer(pb_enabled.start());
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/native/metaprogramming/test_isRegisteredEnum.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "metaprogramming/integralConstant.hpp"
+#include "metaprogramming/isRegisteredEnum.hpp"
+
+#include "unittest.hpp"
+
+struct IsRegisteredEnumTest : AllStatic {
+  enum A { A_x, A_y, A_z };
+  enum B { B_x, B_y, B_z };
+};
+
+typedef IsRegisteredEnumTest::A A;
+typedef IsRegisteredEnumTest::B B;
+
+template<> struct IsRegisteredEnum<A> : public TrueType {};
+
+STATIC_ASSERT(!IsRegisteredEnum<int>::value);
+STATIC_ASSERT(IsRegisteredEnum<A>::value);
+STATIC_ASSERT(!IsRegisteredEnum<B>::value);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/native/metaprogramming/test_primitiveConversions.cpp	Thu Aug 24 01:13:04 2017 +0000
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "metaprogramming/isSame.hpp"
+#include "metaprogramming/primitiveConversions.hpp"
+#include "unittest.hpp"
+#include "utilities/debug.hpp"
+
+struct PrimitiveConversionsTestSupport: AllStatic {
+
+  template<size_t byte_size> struct SignedTypeOfSize;
+  template<size_t byte_size> struct UnsignedTypeOfSize;
+
+  template<typename T> struct Signed;
+  template<typename T> struct Unsigned;
+};
+
+#define DEFINE_CANONICAL_SIGNED_TYPE(T)                                 \
+  template<>                                                            \
+  struct PrimitiveConversionsTestSupport::SignedTypeOfSize<sizeof(T)>   \
+    : public AllStatic                                                  \
+  {                                                                     \
+    typedef T type;                                                     \
+  };
+
+#define DEFINE_CANONICAL_UNSIGNED_TYPE(T)                               \
+  template<>                                                            \
+  struct PrimitiveConversionsTestSupport::UnsignedTypeOfSize<sizeof(T)> \
+    : public AllStatic                                                  \
+  {                                                                     \
+    typedef T type;                                                     \
+  };
+
+#define DEFINE_INTEGER_TYPES_OF_SIZE(NBITS)            \
+  DEFINE_CANONICAL_SIGNED_TYPE(int ## NBITS ## _t)     \
+  DEFINE_CANONICAL_UNSIGNED_TYPE(uint ## NBITS ## _t)
+
+DEFINE_INTEGER_TYPES_OF_SIZE(8)
+DEFINE_INTEGER_TYPES_OF_SIZE(16)
+DEFINE_INTEGER_TYPES_OF_SIZE(32)
+DEFINE_INTEGER_TYPES_OF_SIZE(64)
+
+#undef DEFINE_INTEGER_TYPES_OF_SIZE
+#undef DEFINE_CANONICAL_SIGNED_TYPE
+#undef DEFINE_CANONICAL_UNSIGNED_TYPE
+
+template<typename T>
+struct PrimitiveConversionsTestSupport::Signed
+  : public SignedTypeOfSize<sizeof(T)>
+{};
+
+template<typename T>
+struct PrimitiveConversionsTestSupport::Unsigned
+  : public UnsignedTypeOfSize<sizeof(T)>
+{};
+
+TEST(PrimitiveConversionsTest, round_trip_int) {
+  int  sfive = 5;
+  int  mfive = -5;
+  uint ufive = 5u;
+
+  typedef PrimitiveConversionsTestSupport::Signed<int>::type SI;
+  typedef PrimitiveConversionsTestSupport::Unsigned<int>::type UI;
+
+  EXPECT_EQ(sfive, PrimitiveConversions::cast<int>(PrimitiveConversions::cast<SI>(sfive)));
+  EXPECT_EQ(sfive, PrimitiveConversions::cast<int>(PrimitiveConversions::cast<UI>(sfive)));
+
+  EXPECT_EQ(mfive, PrimitiveConversions::cast<int>(PrimitiveConversions::cast<SI>(mfive)));
+  EXPECT_EQ(mfive, PrimitiveConversions::cast<int>(PrimitiveConversions::cast<UI>(mfive)));
+
+  EXPECT_EQ(ufive, PrimitiveConversions::cast<uint>(PrimitiveConversions::cast<SI>(ufive)));
+  EXPECT_EQ(ufive, PrimitiveConversions::cast<uint>(PrimitiveConversions::cast<UI>(ufive)));
+}
+
+TEST(PrimitiveConversionsTest, round_trip_float) {
+  float  ffive = 5.0f;
+  double dfive = 5.0;
+
+  typedef PrimitiveConversionsTestSupport::Signed<float>::type SF;
+  typedef PrimitiveConversionsTestSupport::Unsigned<float>::type UF;
+
+  typedef PrimitiveConversionsTestSupport::Signed<double>::type SD;
+  typedef PrimitiveConversionsTestSupport::Unsigned<double>::type UD;
+
+  EXPECT_EQ(ffive, PrimitiveConversions::cast<float>(PrimitiveConversions::cast<SF>(ffive)));
+  EXPECT_EQ(ffive, PrimitiveConversions::cast<float>(PrimitiveConversions::cast<UF>(ffive)));
+
+  EXPECT_EQ(dfive, PrimitiveConversions::cast<double>(PrimitiveConversions::cast<SD>(dfive)));
+  EXPECT_EQ(dfive, PrimitiveConversions::cast<double>(PrimitiveConversions::cast<UD>(dfive)));
+}
+
+TEST(PrimitiveConversionsTest, round_trip_ptr) {
+  int five = 5;
+  int* pfive = &five;
+  const int* cpfive = &five;
+
+  typedef PrimitiveConversionsTestSupport::Signed<int*>::type SIP;
+  typedef PrimitiveConversionsTestSupport::Unsigned<int*>::type UIP;
+
+  EXPECT_EQ(pfive, PrimitiveConversions::cast<int*>(PrimitiveConversions::cast<SIP>(pfive)));
+  EXPECT_EQ(pfive, PrimitiveConversions::cast<int*>(PrimitiveConversions::cast<UIP>(pfive)));
+
+  EXPECT_EQ(cpfive, PrimitiveConversions::cast<const int*>(PrimitiveConversions::cast<SIP>(cpfive)));
+  EXPECT_EQ(cpfive, PrimitiveConversions::cast<const int*>(PrimitiveConversions::cast<UIP>(cpfive)));
+}