changeset 8065:ac59b80e21bb jdk8u102-b14-aarch32-160812

Merge
author snazarki
date Fri, 12 Aug 2016 18:10:37 +0300
parents 547b223e8a84 ac29c9c1193a
children 36fd104e90c5
files .hgtags make/defs.make make/linux/makefiles/defs.make src/share/vm/c1/c1_Runtime1.cpp src/share/vm/classfile/verifier.cpp src/share/vm/interpreter/bytecodeStream.hpp src/share/vm/opto/c2_globals.hpp src/share/vm/opto/c2compiler.cpp src/share/vm/opto/compile.cpp src/share/vm/opto/lcm.cpp src/share/vm/opto/runtime.cpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/safepoint.cpp src/share/vm/runtime/stubRoutines.hpp src/share/vm/runtime/vmStructs.cpp test/gc/8000311/Test8000311.java test/gc/TestG1ZeroPGCTJcmdThreadPrint.java
diffstat 171 files changed, 5360 insertions(+), 949 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Mon Aug 08 20:18:53 2016 +0300
+++ b/.hgtags	Fri Aug 12 18:10:37 2016 +0300
@@ -598,6 +598,7 @@
 6824e2475e0432e27f9cc51838bc34ea5fbf5113 jdk8u40-b27
 8220f68a195f6eeed2f5fb6e8a303726b512e899 jdk8u40-b31
 850a290eb1088a61178d1910c500e170ef4f4386 jdk8u40-b32
+e6aa4a8c1b46a05b6c493b4ffe5c2555013f5c99 jdk8u40-b33
 1b3abbeee961dee49780c0e4af5337feb918c555 jdk8u40-b10
 f10fe402dfb1543723b4b117a7cba3ea3d4159f1 hs25.40-b15
 99372b2fee0eb8b3452f47230e84aa6e97003184 jdk8u40-b11
@@ -791,6 +792,8 @@
 bb98a4ba1556d0505461de98aa3dddf75278c09b jdk8u72-b03
 6c8ceb05ccf78f2f8f72f0870e3f6f3bd4895bb1 jdk8u72-b04
 a2969911663ab29c71a61aa3403e53243ad89923 jdk8u72-b05
+acf0d80cb84f14d787c34360abf2bc38b186999a jdk8u72-b06
+a382a72730f480078af5454afe782de9b6c210d3 jdk8u72-b07
 d7b01fb81aa8a5437cb03bc36afe15cf0e55fb89 jdk8u76-b00
 483350e18369cfd6ea5ed9a4b95e38126b56a2fa jdk8u76-b00-aarch32-160415
 f26ad2273cd42cde957084ee953a9dc1a2598b5d jdk8u72-b08
@@ -801,17 +804,28 @@
 a8e4754b89aecc388623394a20f6d43d4c58f083 jdk8u72-b13
 dc2fdd4e0b8105268b8231040f761f27ab4523f2 jdk8u72-b14
 d6670c5d49ba381405ec9f69a78ccc5b8b0c8473 jdk8u72-b15
+da43260704c28b9f19cb652090ae65c258220fd6 jdk8u72-b31
 26b99cd20661a1fa05939d1856a9389311e01c4f jdk8u73-b00
 931c31db01ae873525a1b2c306b01129eb431960 jdk8u73-b01
 67566d815a66d958c1f817d65f1621ba1d2e5f33 jdk8u73-b02
 451dda77f6c29bd3260e87f847a9eadae122a759 jdk8u74-b00
 c1031a924f2c910fad078838b88a2f0146f2de98 jdk8u74-b01
 ca9cae9aa9e989bbe6713c91d55c913edeaecce4 jdk8u74-b02
+a5b78b56841e97ce00463874f1b7f63c54d84934 jdk8u74-b31
+94ec11846b18111e73929b6caa9fbe7262e142c1 jdk8u74-b32
+1b6d4fd2730e58f17820930f797938dc182117c4 jdk8u77-b00
+ddd297e340b1170d3cec011ee64e729f8b493c86 jdk8u77-b01
+1b4072e4bb3ad54c4e894998486a8b33f0689160 jdk8u77-b02
+223b64a19e94222dd97b92bb40abcfbc0bf6ef1f jdk8u77-b03
+dd8507f51d786572dae18af8ffdc5a1ea34c755e jdk8u77-b31
+
+94ec11846b18111e73929b6caa9fbe7262e142c1 jdk8u74-b32
 da43260704c28b9f19cb652090ae65c258220fd6 jdk8u72-b31
 c0242ea4bde19d72be5149feda112a39e8c89b0a jdk8u75-b00
 ca3b8c8e390ab0540b0cc2e5def869b38e460d86 jdk8u75-b01
 9aef5b5e0a68f20059cfa9e2806b4ff0e11a3d31 jdk8u75-b02
 2df9fe896819362b9075a670b78106b249e50d6d jdk8u75-b03
+b374548dcb4834eb8731a06b52faddd0f10bd45d jdk8u101-b00
 32b682649973231b54740c09b10889660f6ebde5 jdk8u75-b04
 1f43bd4fab06d2ca5d1964611df14d8506d6b36e jdk8u75-b05
 916712f178c39d0acbc590f38802133fc86a7346 jdk8u75-b06
@@ -827,3 +841,55 @@
 bbbb05e91c629f8d9eef2ba43933767f68a898b0 jdk8u91-b00
 e36b6ade0499eadfd8673fe62ef0a613af2e6d67 jdk8u91-b13
 e06cbcf769844f032ed4f0b08ef70478b4ac6ca6 jdk8u91-b14-aarch32-160510
+fa8991ccf6e5b74890a0b5672440b3c09d8d8732 jdk8u91-b14
+e1ea97ad19af4d1e0bda449aa43be7e1b118ffe9 jdk8u91-b15
+1b6d4fd2730e58f17820930f797938dc182117c4 jdk8u77-b00
+ddd297e340b1170d3cec011ee64e729f8b493c86 jdk8u77-b01
+1b4072e4bb3ad54c4e894998486a8b33f0689160 jdk8u77-b02
+223b64a19e94222dd97b92bb40abcfbc0bf6ef1f jdk8u77-b03
+d7b01fb81aa8a5437cb03bc36afe15cf0e55fb89 jdk8u76-b00
+c1679cc87ba045219169cabb6b9b378c2b5cc578 jdk8u76-b01
+218483967e52b419d885d34af4488a81c5133804 jdk8u76-b02
+2a2720daacaa8d9a3ba9435cfaaf9751241d2062 jdk8u76-b03
+16f7b676725aadafb79ea105b22df112e2593a78 jdk8u76-b04
+35bfaf7f9021b5c1e86effbeac075753a82e9a0c jdk8u76-b05
+6449ee3bf707225372709ac830524c00984c601f jdk8u76-b06
+7d1074c74d6000ec8257917ebfcee3fed4249f7d jdk8u76-b07
+392f8722fc513e28f78c5c563d51af7dc8466b29 jdk8u76-b08
+3bf0f5b8a892defd0bf9731b4e15926881fcda74 jdk8u76-b09
+a2b0ee820059a44be558a2d435b7d85ed5a8b63a jdk8u76-b10
+16aa1f621ec67db1a55ebf6527750164ab63088d jdk8u76-b11
+9a87701e22b3cae79fdfd8cdb732051e02a710fa jdk8u76-b12
+481dcde745b6aec035781ed9f6797cfc93719f71 jdk8u92-b00
+f3e1e734e2d29101a9537ddeb71ecad413fcd352 jdk8u92-b13
+24a09407d71bb2cc4848bfa21660c890b4d722b1 jdk8u92-b14
+445941ba41c0e3829fe02140690b144281ac2141 jdk8u92-b31
+b374548dcb4834eb8731a06b52faddd0f10bd45d jdk8u81-b00
+ead07188d11107e877e8e4ad215ff6cb238a8a92 jdk8u101-b01
+34429bad9986677f4991c80aeb22665842881cba jdk8u101-b02
+b41d5faaf1d32ed1bf9592f65f2f94ddd4c60fc4 jdk8u101-b03
+ceecf88e5c2c09bfabf5926581e6d0b0f65f5148 jdk8u101-b04
+19e74265fc8def6a7fc96c836d8ebe38ad1cf199 jdk8u101-b05
+7c60503b0888ac16eac80a6cd074195973f8dedb jdk8u101-b06
+cb4af293fe70549b51039bb9197f373e6750fafb jdk8u101-b07
+8ed377d2cec94435d1617a37999960a24be73ad9 jdk8u101-b08
+9be452c4e7161e60d623d55bb72ad013386aefd1 jdk8u101-b09
+218a44a163fa8c2532fd5f2e8ea9bc3c9c2ca8cf jdk8u101-b10
+0095e54dcaa1acfe1614feff9600734c26af7ae8 jdk8u101-b11
+286fe17d81c3d153611a28e50926083ae934cc56 jdk8u101-b12
+77df35b662ed98236f67ab18e23691460f986981 jdk8u101-b13
+d6c92b9e192ef97305a699e868387d55821c81ad jdk8u102-b00
+d6c92b9e192ef97305a699e868387d55821c81ad jdk8u82-b00
+516a64e6d7c2dc29fd932bf3b8313e560a01bcd0 jdk8u102-b01
+83dc7e55f71596e6e76fabfa56b6008e070ff44c jdk8u102-b02
+ef01a1634bb41dd5b36fc9824f8d35f745c6bd5a jdk8u102-b03
+2094cac55c5955b4f19cd9e35e3be8b467e59b57 jdk8u102-b04
+a96cf90239c64f51679d106b852c9a5b343b9488 jdk8u102-b05
+12cd1f9b403eb5024e8642bfa59136cd275899a4 jdk8u102-b06
+9ff5455815c1864ef7ca2d5232decd2023d1d043 jdk8u102-b07
+69f5f6c2beeb3bb126494ed779ae1686f61602b9 jdk8u102-b08
+b5ecd8067e899c4bfb8d327ee7583a32129772d4 jdk8u102-b09
+2672cfc2d7b6ffa07b7714208f9d46a405211d94 jdk8u102-b10
+36a1a2875ed55fa17818f3eb203e27922a7b4589 jdk8u102-b11
+340e1a736ef7169786e70db7f31ffd32bc3be24d jdk8u102-b12
+f6daf04c0f48dab5420ad63d21da82a7fa4e3ad7 jdk8u102-b13
--- a/agent/src/os/linux/LinuxDebuggerLocal.c	Mon Aug 08 20:18:53 2016 +0300
+++ b/agent/src/os/linux/LinuxDebuggerLocal.c	Fri Aug 12 18:10:37 2016 +0300
@@ -209,9 +209,12 @@
   verifyBitness(env, (char *) &buf);
   CHECK_EXCEPTION;
 
+  char err_buf[200];
   struct ps_prochandle* ph;
-  if ( (ph = Pgrab(jpid)) == NULL) {
-    THROW_NEW_DEBUGGER_EXCEPTION("Can't attach to the process");
+  if ( (ph = Pgrab(jpid, err_buf, sizeof(err_buf))) == NULL) {
+    char msg[230];
+    snprintf(msg, sizeof(msg), "Can't attach to the process: %s", err_buf);
+    THROW_NEW_DEBUGGER_EXCEPTION(msg);
   }
   (*env)->SetLongField(env, this_obj, p_ps_prochandle_ID, (jlong)(intptr_t)ph);
   fillThreadsAndLoadObjects(env, this_obj, ph);
--- a/agent/src/os/linux/libproc.h	Mon Aug 08 20:18:53 2016 +0300
+++ b/agent/src/os/linux/libproc.h	Fri Aug 12 18:10:37 2016 +0300
@@ -69,6 +69,7 @@
 
 
 #if defined(sparc) || defined(sparcv9) || defined(ppc64)
+#include <asm/ptrace.h>
 #define user_regs_struct  pt_regs
 #endif
 
@@ -82,7 +83,7 @@
 struct ps_prochandle;
 
 // attach to a process
-struct ps_prochandle* Pgrab(pid_t pid);
+struct ps_prochandle* Pgrab(pid_t pid, char* err_buf, size_t err_buf_len);
 
 // attach to a core dump
 struct ps_prochandle* Pgrab_core(const char* execfile, const char* corefile);
--- a/agent/src/os/linux/ps_proc.c	Mon Aug 08 20:18:53 2016 +0300
+++ b/agent/src/os/linux/ps_proc.c	Fri Aug 12 18:10:37 2016 +0300
@@ -215,9 +215,12 @@
 }
 
 // attach to a process/thread specified by "pid"
-static bool ptrace_attach(pid_t pid) {
+static bool ptrace_attach(pid_t pid, char* err_buf, size_t err_buf_len) {
   if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) < 0) {
-    print_debug("ptrace(PTRACE_ATTACH, ..) failed for %d\n", pid);
+    char buf[200];
+    char* msg = strerror_r(errno, buf, sizeof(buf));
+    snprintf(err_buf, err_buf_len, "ptrace(PTRACE_ATTACH, ..) failed for %d: %s", pid, msg);
+    print_debug("%s\n", err_buf);
     return false;
   } else {
     return ptrace_waitpid(pid);
@@ -339,16 +342,17 @@
 };
 
 // attach to the process. One and only one exposed stuff
-struct ps_prochandle* Pgrab(pid_t pid) {
+struct ps_prochandle* Pgrab(pid_t pid, char* err_buf, size_t err_buf_len) {
   struct ps_prochandle* ph = NULL;
   thread_info* thr = NULL;
 
   if ( (ph = (struct ps_prochandle*) calloc(1, sizeof(struct ps_prochandle))) == NULL) {
-     print_debug("can't allocate memory for ps_prochandle\n");
+     snprintf(err_buf, err_buf_len, "can't allocate memory for ps_prochandle");
+     print_debug("%s\n", err_buf);
      return NULL;
   }
 
-  if (ptrace_attach(pid) != true) {
+  if (ptrace_attach(pid, err_buf, err_buf_len) != true) {
      free(ph);
      return NULL;
   }
@@ -371,7 +375,7 @@
   thr = ph->threads;
   while (thr) {
      // don't attach to the main thread again
-     if (ph->pid != thr->lwp_id && ptrace_attach(thr->lwp_id) != true) {
+    if (ph->pid != thr->lwp_id && ptrace_attach(thr->lwp_id, err_buf, err_buf_len) != true) {
         // even if one attach fails, we get return NULL
         Prelease(ph);
         return NULL;
--- a/agent/src/os/linux/symtab.c	Mon Aug 08 20:18:53 2016 +0300
+++ b/agent/src/os/linux/symtab.c	Fri Aug 12 18:10:37 2016 +0300
@@ -514,6 +514,7 @@
      return (uintptr_t)NULL;
 
   item.key = (char*) strdup(sym_name);
+  item.data = NULL;
   hsearch_r(item, FIND, &ret, symtab->hash_table);
   if (ret) {
     struct elf_symbol * sym = (struct elf_symbol *)(ret->data);
--- a/agent/src/share/classes/sun/jvm/hotspot/CommandProcessor.java	Mon Aug 08 20:18:53 2016 +0300
+++ b/agent/src/share/classes/sun/jvm/hotspot/CommandProcessor.java	Fri Aug 12 18:10:37 2016 +0300
@@ -1446,7 +1446,7 @@
                 if (type.equals("threads")) {
                     Threads threads = VM.getVM().getThreads();
                     for (JavaThread thread = threads.first(); thread != null; thread = thread.next()) {
-                        Address base = thread.getBaseOfStackPointer();
+                        Address base = thread.getStackBase();
                         Address end = thread.getLastJavaSP();
                         if (end == null) continue;
                         if (end.lessThan(base)) {
@@ -1454,11 +1454,13 @@
                             base = end;
                             end = tmp;
                         }
-                        out.println("Searching " + base + " " + end);
+                        //out.println("Searching " + base + " " + end);
                         while (base != null && base.lessThan(end)) {
                             Address val = base.getAddressAt(0);
                             if (AddressOps.equal(val, value)) {
-                                out.println(base);
+                                ByteArrayOutputStream bos = new ByteArrayOutputStream();
+                                thread.printThreadIDOn(new PrintStream(bos));
+                                out.println("found on the stack of thread " + bos.toString() + " at " + base);
                             }
                             base = base.addOffsetTo(stride);
                         }
@@ -1601,6 +1603,8 @@
                         thread.printThreadIDOn(new PrintStream(bos));
                         if (all || bos.toString().equals(name)) {
                             out.println("Thread " + bos.toString() + " Address " + thread.getAddress());
+                            thread.printInfoOn(out);
+                            out.println(" ");
                             if (!all) return;
                         }
                     }
@@ -1618,6 +1622,8 @@
                     for (JavaThread thread = threads.first(); thread != null; thread = thread.next()) {
                         thread.printThreadIDOn(out);
                         out.println(" " + thread.getThreadName());
+                        thread.printInfoOn(out);
+                        out.println("\n...");
                     }
                 }
             }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java	Mon Aug 08 20:18:53 2016 +0300
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -219,7 +219,7 @@
     if (threadNameField == null) {
       SystemDictionary sysDict = VM.getVM().getSystemDictionary();
       InstanceKlass k = sysDict.getThreadKlass();
-      threadNameField  = (OopField) k.findField("name", "[C");
+      threadNameField  = (OopField) k.findField("name", "Ljava/lang/String;");
       threadGroupField = (OopField) k.findField("group", "Ljava/lang/ThreadGroup;");
       threadEETopField = (LongField) k.findField("eetop", "J");
       threadTIDField = (LongField) k.findField("tid", "J");
@@ -258,7 +258,7 @@
 
   public static String threadOopGetName(Oop threadOop) {
     initThreadFields();
-    return charArrayToString((TypeArray) threadNameField.getValue(threadOop));
+    return stringOopToString(threadNameField.getValue(threadOop));
   }
 
   /** May return null if, e.g., thread was not started */
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/JavaThread.java	Mon Aug 08 20:18:53 2016 +0300
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/JavaThread.java	Fri Aug 12 18:10:37 2016 +0300
@@ -415,7 +415,7 @@
     } else {
       tty.println("No Java frames present");
     }
-    tty.println("Base of Stack: " + getBaseOfStackPointer());
+    tty.println("Base of Stack: " + getStackBase());
     tty.println("Last_Java_SP: " + getLastJavaSP());
     tty.println("Last_Java_FP: " + getLastJavaFP());
     tty.println("Last_Java_PC: " + getLastJavaPC());
--- a/make/defs.make	Mon Aug 08 20:18:53 2016 +0300
+++ b/make/defs.make	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -285,7 +285,7 @@
 
   # Use uname output for SRCARCH, but deal with platform differences. If ARCH
   # is not explicitly listed below, it is treated as x86.
-  SRCARCH     ?= $(ARCH/$(filter x86_64 amd64 ia64 sparc sparc64 zero ppc64 aarch32, $(ARCH)))
+  SRCARCH     ?= $(ARCH/$(filter x86_64 amd64 ia64 sparc sparc64 zero ppc ppc64 aarch32, $(ARCH)))
   ARCH/        = x86
   ARCH/x86_64  = x86
   ARCH/amd64   = x86
@@ -294,6 +294,7 @@
   ARCH/sparc64 = sparc
   ARCH/zero    = zero
   ARCH/ppc64   = ppc
+  ARCH/ppc     = ppc
   ARCH/aarch32 = aarch32
 
   # BUILDARCH is usually the same as SRCARCH, except for sparcv9
--- a/make/linux/Makefile	Mon Aug 08 20:18:53 2016 +0300
+++ b/make/linux/Makefile	Fri Aug 12 18:10:37 2016 +0300
@@ -67,8 +67,12 @@
   endif
 endif
 # C1 is not ported on ppc64, so we cannot build a tiered VM:
-ifeq ($(ARCH),ppc64)
-  FORCE_TIERED=0
+# Notice: after 8046471 ARCH will be 'ppc' for top-level ppc64 builds but
+# 'ppc64' for HotSpot-only ppc64 builds. Need to detect both variants here!
+ifneq (,$(findstring $(ARCH), ppc ppc64))
+  ifeq ($(ARCH_DATA_MODEL), 64)
+    FORCE_TIERED=0
+  endif
 endif
 
 ifdef LP64
--- a/make/linux/makefiles/defs.make	Mon Aug 08 20:18:53 2016 +0300
+++ b/make/linux/makefiles/defs.make	Fri Aug 12 18:10:37 2016 +0300
@@ -69,7 +69,7 @@
 endif
 
 # sparc
-ifeq ($(ARCH), sparc64)
+ifneq (,$(findstring $(ARCH), sparc))
   ifeq ($(ARCH_DATA_MODEL), 64)
     ARCH_DATA_MODEL  = 64
     MAKE_ARGS        += LP64=1
@@ -83,39 +83,35 @@
   HS_ARCH            = sparc
 endif
 
-# amd64/x86_64
-ifneq (,$(findstring $(ARCH), amd64 x86_64))
+# i686/i586 and amd64/x86_64
+ifneq (,$(findstring $(ARCH), amd64 x86_64 i686 i586))
   ifeq ($(ARCH_DATA_MODEL), 64)
     ARCH_DATA_MODEL = 64
     MAKE_ARGS       += LP64=1
     PLATFORM        = linux-amd64
     VM_PLATFORM     = linux_amd64
-    HS_ARCH         = x86
   else
     ARCH_DATA_MODEL = 32
     PLATFORM        = linux-i586
     VM_PLATFORM     = linux_i486
-    HS_ARCH         = x86
-    # We have to reset ARCH to i686 since SRCARCH relies on it
-    ARCH            = i686
   endif
+  HS_ARCH           = x86
 endif
 
-# i686/i586 ie 32-bit x86
-ifneq (,$(findstring $(ARCH), i686 i586))
-  ARCH_DATA_MODEL  = 32
-  PLATFORM         = linux-i586
-  VM_PLATFORM      = linux_i486
-  HS_ARCH          = x86
-endif
-
-# PPC64
-ifeq ($(ARCH), ppc64)
-  ARCH_DATA_MODEL  = 64
-  MAKE_ARGS        += LP64=1
-  PLATFORM         = linux-ppc64
-  VM_PLATFORM      = linux_ppc64
-  HS_ARCH          = ppc
+# PPC
+# Notice: after 8046471 ARCH will be 'ppc' for top-level ppc64 builds but
+# 'ppc64' for HotSpot-only ppc64 builds. Need to detect both variants here!
+ifneq (,$(findstring $(ARCH), ppc ppc64))
+  ifeq ($(ARCH_DATA_MODEL), 64)
+    MAKE_ARGS        += LP64=1
+    PLATFORM         = linux-ppc64
+    VM_PLATFORM      = linux_ppc64
+  else
+    ARCH_DATA_MODEL  = 32
+    PLATFORM         = linux-ppc
+    VM_PLATFORM      = linux_ppc
+  endif
+  HS_ARCH = ppc
 endif
 
 # AArch32
--- a/make/windows/makefiles/sa.make	Mon Aug 08 20:18:53 2016 +0300
+++ b/make/windows/makefiles/sa.make	Fri Aug 12 18:10:37 2016 +0300
@@ -44,9 +44,11 @@
 HS_ALT_SRC_REL=src/closed
 HS_ALT_SRC = $(WorkSpace)/$(HS_ALT_SRC_REL)
 !ifndef HS_ALT_MAKE
+!if exist($(WorkSpace)/make/closed)
 HS_ALT_MAKE=$(WorkSpace)/make/closed
 !endif
 !endif
+!endif
 
 HS_COMMON_SRC = $(WorkSpace)/$(HS_COMMON_SRC_REL)
 
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -447,32 +447,6 @@
 }
 #endif // CC_INTERP
 
-
-#ifdef ASSERT
-// Debugging aid
-static frame nth_sender(int n) {
-  frame f = JavaThread::current()->last_frame();
-
-  for(int i = 0; i < n; ++i)
-    f = f.sender((RegisterMap*)NULL);
-
-  printf("first frame %d\n",          f.is_first_frame()       ? 1 : 0);
-  printf("interpreted frame %d\n",    f.is_interpreted_frame() ? 1 : 0);
-  printf("java frame %d\n",           f.is_java_frame()        ? 1 : 0);
-  printf("entry frame %d\n",          f.is_entry_frame()       ? 1 : 0);
-  printf("native frame %d\n",         f.is_native_frame()      ? 1 : 0);
-  if (f.is_compiled_frame()) {
-    if (f.is_deoptimized_frame())
-      printf("deoptimized frame 1\n");
-    else
-      printf("compiled frame 1\n");
-  }
-
-  return f;
-}
-#endif
-
-
 frame frame::sender_for_entry_frame(RegisterMap *map) const {
   assert(map != NULL, "map must be set");
   // Java frame called from C; skip all C frames and return top C
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1326,9 +1326,12 @@
     }
   } else if (dst.first()->is_stack()) {
     // reg to stack
-    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
+    // Some compilers (gcc) expect a clean 32 bit value on function entry
+    __ signx(src.first()->as_Register(), L5);
+    __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
   } else {
-    __ mov(src.first()->as_Register(), dst.first()->as_Register());
+    // Some compilers (gcc) expect a clean 32 bit value on function entry
+    __ signx(src.first()->as_Register(), dst.first()->as_Register());
   }
 }
 
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -40,7 +40,10 @@
 unsigned int VM_Version::_L2_data_cache_line_size = 0;
 
 void VM_Version::initialize() {
-  _features = determine_features();
+
+  assert(_features != VM_Version::unknown_m, "System pre-initialization is not complete.");
+  guarantee(VM_Version::has_v9(), "only SPARC v9 is supported");
+
   PrefetchCopyIntervalInBytes = prefetch_copy_interval_in_bytes();
   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
   PrefetchFieldsAhead         = prefetch_fields_ahead();
@@ -76,8 +79,6 @@
     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
   }
 
-  guarantee(VM_Version::has_v9(), "only SPARC v9 is supported");
-
   assert(ArraycopySrcPrefetchDistance < 4096, "invalid value");
   if (ArraycopySrcPrefetchDistance >= 4096)
     ArraycopySrcPrefetchDistance = 4064;
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -124,6 +124,8 @@
   // Initialization
   static void initialize();
 
+  static void init_before_ergo()        { _features = determine_features(); }
+
   // Instruction support
   static bool has_v8()                  { return (_features & v8_instructions_m) != 0; }
   static bool has_v9()                  { return (_features & v9_instructions_m) != 0; }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -2318,6 +2318,13 @@
   emit_arith(0x0B, 0xC0, dst, src);
 }
 
+void Assembler::orl(Address dst, Register src) {
+  InstructionMark im(this);
+  prefix(dst, src);
+  emit_int8(0x09);
+  emit_operand(src, dst);
+}
+
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -5613,6 +5620,19 @@
   }
 }
 
+void Assembler::rcrq(Register dst, int imm8) {
+  assert(isShiftCount(imm8 >> 1), "illegal shift count");
+  int encode = prefixq_and_encode(dst->encoding());
+  if (imm8 == 1) {
+    emit_int8((unsigned char)0xD1);
+    emit_int8((unsigned char)(0xD8 | encode));
+  } else {
+    emit_int8((unsigned char)0xC1);
+    emit_int8((unsigned char)(0xD8 | encode));
+    emit_int8(imm8);
+  }
+}
+
 void Assembler::rorq(Register dst, int imm8) {
   assert(isShiftCount(imm8 >> 1), "illegal shift count");
   int encode = prefixq_and_encode(dst->encoding());
--- a/src/cpu/x86/vm/assembler_x86.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1455,6 +1455,7 @@
   void orl(Register dst, int32_t imm32);
   void orl(Register dst, Address src);
   void orl(Register dst, Register src);
+  void orl(Address dst, Register src);
 
   void orq(Address dst, int32_t imm32);
   void orq(Register dst, int32_t imm32);
@@ -1555,6 +1556,8 @@
 
   void rclq(Register dst, int imm8);
 
+  void rcrq(Register dst, int imm8);
+
   void rdtsc();
 
   void ret(int imm16);
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1714,8 +1714,8 @@
   Register Rtmp1 = noreg;
 
   // check if it needs to be profiled
-  ciMethodData* md;
-  ciProfileData* data;
+  ciMethodData* md = NULL;
+  ciProfileData* data = NULL;
 
   if (op->should_profile()) {
     ciMethod* method = op->profiled_method();
@@ -1874,8 +1874,8 @@
     CodeStub* stub = op->stub();
 
     // check if it needs to be profiled
-    ciMethodData* md;
-    ciProfileData* data;
+    ciMethodData* md = NULL;
+    ciProfileData* data = NULL;
 
     if (op->should_profile()) {
       ciMethod* method = op->profiled_method();
@@ -2052,7 +2052,8 @@
     case lir_cond_greater:      acond = Assembler::greater;      ncond = Assembler::lessEqual;    break;
     case lir_cond_belowEqual:   acond = Assembler::belowEqual;   ncond = Assembler::above;        break;
     case lir_cond_aboveEqual:   acond = Assembler::aboveEqual;   ncond = Assembler::below;        break;
-    default:                    ShouldNotReachHere();
+    default:                    acond = Assembler::equal;        ncond = Assembler::notEqual;
+                                ShouldNotReachHere();
   }
 
   if (opr1->is_cpu_register()) {
@@ -3237,27 +3238,23 @@
   assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
 
   int elem_size = type2aelembytes(basic_type);
-  int shift_amount;
   Address::ScaleFactor scale;
 
   switch (elem_size) {
     case 1 :
-      shift_amount = 0;
       scale = Address::times_1;
       break;
     case 2 :
-      shift_amount = 1;
       scale = Address::times_2;
       break;
     case 4 :
-      shift_amount = 2;
       scale = Address::times_4;
       break;
     case 8 :
-      shift_amount = 3;
       scale = Address::times_8;
       break;
     default:
+      scale = Address::no_scale;
       ShouldNotReachHere();
   }
 
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -195,7 +195,7 @@
 
 
 LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
-  LIR_Opr r;
+  LIR_Opr r = NULL;
   if (type == T_LONG) {
     r = LIR_OprFact::longConst(x);
   } else if (type == T_INT) {
@@ -485,7 +485,7 @@
     __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
     __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
 
-    address entry;
+    address entry = NULL;
     switch (x->op()) {
     case Bytecodes::_lrem:
       entry = CAST_FROM_FN_PTR(address, SharedRuntime::lrem);
@@ -1025,7 +1025,7 @@
 
 void LIRGenerator::do_Convert(Convert* x) {
   // flags that vary for the different operations and different SSE-settings
-  bool fixed_input, fixed_result, round_result, needs_stub;
+  bool fixed_input = false, fixed_result = false, round_result = false, needs_stub = false;
 
   switch (x->op()) {
     case Bytecodes::_i2l: // fall through
--- a/src/cpu/x86/vm/jniFastGetField_x86_32.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/jniFastGetField_x86_32.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -48,7 +48,7 @@
 // between loads, which is much more efficient than lfence.
 
 address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-  const char *name;
+  const char *name = NULL;
   switch (type) {
     case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
     case T_BYTE:    name = "jni_fast_GetByteField";    break;
@@ -122,7 +122,7 @@
 
   slowcase_entry_pclist[count++] = __ pc();
   __ bind (slow);
-  address slow_case_addr;
+  address slow_case_addr = NULL;
   switch (type) {
     case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
     case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
@@ -256,7 +256,7 @@
 }
 
 address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
-  const char *name;
+  const char *name = NULL;
   switch (type) {
     case T_FLOAT:  name = "jni_fast_GetFloatField";  break;
     case T_DOUBLE: name = "jni_fast_GetDoubleField"; break;
@@ -337,7 +337,7 @@
 
   slowcase_entry_pclist[count++] = __ pc();
   __ bind (slow);
-  address slow_case_addr;
+  address slow_case_addr = NULL;
   switch (type) {
     case T_FLOAT:  slow_case_addr = jni_GetFloatField_addr();  break;
     case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break;
--- a/src/cpu/x86/vm/jniFastGetField_x86_64.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/jniFastGetField_x86_64.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -51,7 +51,7 @@
 // since that may scratch r10!
 
 address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
-  const char *name;
+  const char *name = NULL;
   switch (type) {
     case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
     case T_BYTE:    name = "jni_fast_GetByteField";    break;
@@ -111,7 +111,7 @@
 
   slowcase_entry_pclist[count++] = __ pc();
   __ bind (slow);
-  address slow_case_addr;
+  address slow_case_addr = NULL;
   switch (type) {
     case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
     case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
@@ -153,7 +153,7 @@
 }
 
 address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
-  const char *name;
+  const char *name = NULL;
   switch (type) {
     case T_FLOAT:     name = "jni_fast_GetFloatField";     break;
     case T_DOUBLE:    name = "jni_fast_GetDoubleField";    break;
@@ -206,7 +206,7 @@
 
   slowcase_entry_pclist[count++] = __ pc();
   __ bind (slow);
-  address slow_case_addr;
+  address slow_case_addr = NULL;
   switch (type) {
     case T_FLOAT:     slow_case_addr = jni_GetFloatField_addr();  break;
     case T_DOUBLE:    slow_case_addr = jni_GetDoubleField_addr();
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -7769,6 +7769,503 @@
   pop(tmp2);
   pop(tmp1);
 }
+
+//Helper functions for square_to_len()
+
+/**
+ * Store the squares of x[], right shifted one bit (divided by 2) into z[]
+ * Preserves x and z and modifies rest of the registers.
+ */
+
+void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
+  // Perform square and right shift by 1
+  // Handle odd xlen case first, then for even xlen do the following
+  // jlong carry = 0;
+  // for (int j=0, i=0; j < xlen; j+=2, i+=4) {
+  //     huge_128 product = x[j:j+1] * x[j:j+1];
+  //     z[i:i+1] = (carry << 63) | (jlong)(product >>> 65);
+  //     z[i+2:i+3] = (jlong)(product >>> 1);
+  //     carry = (jlong)product;
+  // }
+
+  xorq(tmp5, tmp5);     // carry
+  xorq(rdxReg, rdxReg);
+  xorl(tmp1, tmp1);     // index for x
+  xorl(tmp4, tmp4);     // index for z
+
+  Label L_first_loop, L_first_loop_exit;
+
+  testl(xlen, 1);
+  jccb(Assembler::zero, L_first_loop); //jump if xlen is even
+
+  // Square and right shift by 1 the odd element using 32 bit multiply
+  movl(raxReg, Address(x, tmp1, Address::times_4, 0));
+  imulq(raxReg, raxReg);
+  shrq(raxReg, 1);
+  adcq(tmp5, 0);
+  movq(Address(z, tmp4, Address::times_4, 0), raxReg);
+  incrementl(tmp1);
+  addl(tmp4, 2);
+
+  // Square and  right shift by 1 the rest using 64 bit multiply
+  bind(L_first_loop);
+  cmpptr(tmp1, xlen);
+  jccb(Assembler::equal, L_first_loop_exit);
+
+  // Square
+  movq(raxReg, Address(x, tmp1, Address::times_4,  0));
+  rorq(raxReg, 32);    // convert big-endian to little-endian
+  mulq(raxReg);        // 64-bit multiply rax * rax -> rdx:rax
+
+  // Right shift by 1 and save carry
+  shrq(tmp5, 1);       // rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1
+  rcrq(rdxReg, 1);
+  rcrq(raxReg, 1);
+  adcq(tmp5, 0);
+
+  // Store result in z
+  movq(Address(z, tmp4, Address::times_4, 0), rdxReg);
+  movq(Address(z, tmp4, Address::times_4, 8), raxReg);
+
+  // Update indices for x and z
+  addl(tmp1, 2);
+  addl(tmp4, 4);
+  jmp(L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+
+/**
+ * Perform the following multiply add operation using BMI2 instructions
+ * carry:sum = sum + op1*op2 + carry
+ * op2 should be in rdx
+ * op2 is preserved, all other registers are modified
+ */
+void MacroAssembler::multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, Register tmp2) {
+  // assert op2 is rdx
+  mulxq(tmp2, op1, op1);  //  op1 * op2 -> tmp2:op1
+  addq(sum, carry);
+  adcq(tmp2, 0);
+  addq(sum, op1);
+  adcq(tmp2, 0);
+  movq(carry, tmp2);
+}
+
+/**
+ * Perform the following multiply add operation:
+ * carry:sum = sum + op1*op2 + carry
+ * Preserves op1, op2 and modifies rest of registers
+ */
+void MacroAssembler::multiply_add_64(Register sum, Register op1, Register op2, Register carry, Register rdxReg, Register raxReg) {
+  // rdx:rax = op1 * op2
+  movq(raxReg, op2);
+  mulq(op1);
+
+  //  rdx:rax = sum + carry + rdx:rax
+  addq(sum, carry);
+  adcq(rdxReg, 0);
+  addq(sum, raxReg);
+  adcq(rdxReg, 0);
+
+  // carry:sum = rdx:sum
+  movq(carry, rdxReg);
+}
+
+/**
+ * Add 64 bit long carry into z[] with carry propogation.
+ * Preserves z and carry register values and modifies rest of registers.
+ *
+ */
+void MacroAssembler::add_one_64(Register z, Register zlen, Register carry, Register tmp1) {
+  Label L_fourth_loop, L_fourth_loop_exit;
+
+  movl(tmp1, 1);
+  subl(zlen, 2);
+  addq(Address(z, zlen, Address::times_4, 0), carry);
+
+  bind(L_fourth_loop);
+  jccb(Assembler::carryClear, L_fourth_loop_exit);
+  subl(zlen, 2);
+  jccb(Assembler::negative, L_fourth_loop_exit);
+  addq(Address(z, zlen, Address::times_4, 0), tmp1);
+  jmp(L_fourth_loop);
+  bind(L_fourth_loop_exit);
+}
+
+/**
+ * Shift z[] left by 1 bit.
+ * Preserves x, len, z and zlen registers and modifies rest of the registers.
+ *
+ */
+void MacroAssembler::lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
+
+  Label L_fifth_loop, L_fifth_loop_exit;
+
+  // Fifth loop
+  // Perform primitiveLeftShift(z, zlen, 1)
+
+  const Register prev_carry = tmp1;
+  const Register new_carry = tmp4;
+  const Register value = tmp2;
+  const Register zidx = tmp3;
+
+  // int zidx, carry;
+  // long value;
+  // carry = 0;
+  // for (zidx = zlen-2; zidx >=0; zidx -= 2) {
+  //    (carry:value)  = (z[i] << 1) | carry ;
+  //    z[i] = value;
+  // }
+
+  movl(zidx, zlen);
+  xorl(prev_carry, prev_carry); // clear carry flag and prev_carry register
+
+  bind(L_fifth_loop);
+  decl(zidx);  // Use decl to preserve carry flag
+  decl(zidx);
+  jccb(Assembler::negative, L_fifth_loop_exit);
+
+  if (UseBMI2Instructions) {
+     movq(value, Address(z, zidx, Address::times_4, 0));
+     rclq(value, 1);
+     rorxq(value, value, 32);
+     movq(Address(z, zidx, Address::times_4,  0), value);  // Store back in big endian form
+  }
+  else {
+    // clear new_carry
+    xorl(new_carry, new_carry);
+
+    // Shift z[i] by 1, or in previous carry and save new carry
+    movq(value, Address(z, zidx, Address::times_4, 0));
+    shlq(value, 1);
+    adcl(new_carry, 0);
+
+    orq(value, prev_carry);
+    rorq(value, 0x20);
+    movq(Address(z, zidx, Address::times_4,  0), value);  // Store back in big endian form
+
+    // Set previous carry = new carry
+    movl(prev_carry, new_carry);
+  }
+  jmp(L_fifth_loop);
+
+  bind(L_fifth_loop_exit);
+}
+
+
+/**
+ * Code for BigInteger::squareToLen() intrinsic
+ *
+ * rdi: x
+ * rsi: len
+ * r8:  z
+ * rcx: zlen
+ * r12: tmp1
+ * r13: tmp2
+ * r14: tmp3
+ * r15: tmp4
+ * rbx: tmp5
+ *
+ */
+void MacroAssembler::square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
+
+  Label L_second_loop, L_second_loop_exit, L_third_loop, L_third_loop_exit, fifth_loop, fifth_loop_exit, L_last_x, L_multiply;
+  push(tmp1);
+  push(tmp2);
+  push(tmp3);
+  push(tmp4);
+  push(tmp5);
+
+  // First loop
+  // Store the squares, right shifted one bit (i.e., divided by 2).
+  square_rshift(x, len, z, tmp1, tmp3, tmp4, tmp5, rdxReg, raxReg);
+
+  // Add in off-diagonal sums.
+  //
+  // Second, third (nested) and fourth loops.
+  // zlen +=2;
+  // for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) {
+  //    carry = 0;
+  //    long op2 = x[xidx:xidx+1];
+  //    for (int j=xidx-2,k=zidx; j >= 0; j-=2) {
+  //       k -= 2;
+  //       long op1 = x[j:j+1];
+  //       long sum = z[k:k+1];
+  //       carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs);
+  //       z[k:k+1] = sum;
+  //    }
+  //    add_one_64(z, k, carry, tmp_regs);
+  // }
+
+  const Register carry = tmp5;
+  const Register sum = tmp3;
+  const Register op1 = tmp4;
+  Register op2 = tmp2;
+
+  push(zlen);
+  push(len);
+  addl(zlen,2);
+  bind(L_second_loop);
+  xorq(carry, carry);
+  subl(zlen, 4);
+  subl(len, 2);
+  push(zlen);
+  push(len);
+  cmpl(len, 0);
+  jccb(Assembler::lessEqual, L_second_loop_exit);
+
+  // Multiply an array by one 64 bit long.
+  if (UseBMI2Instructions) {
+    op2 = rdxReg;
+    movq(op2, Address(x, len, Address::times_4,  0));
+    rorxq(op2, op2, 32);
+  }
+  else {
+    movq(op2, Address(x, len, Address::times_4,  0));
+    rorq(op2, 32);
+  }
+
+  bind(L_third_loop);
+  decrementl(len);
+  jccb(Assembler::negative, L_third_loop_exit);
+  decrementl(len);
+  jccb(Assembler::negative, L_last_x);
+
+  movq(op1, Address(x, len, Address::times_4,  0));
+  rorq(op1, 32);
+
+  bind(L_multiply);
+  subl(zlen, 2);
+  movq(sum, Address(z, zlen, Address::times_4,  0));
+
+  // Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry.
+  if (UseBMI2Instructions) {
+    multiply_add_64_bmi2(sum, op1, op2, carry, tmp2);
+  }
+  else {
+    multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
+  }
+
+  movq(Address(z, zlen, Address::times_4, 0), sum);
+
+  jmp(L_third_loop);
+  bind(L_third_loop_exit);
+
+  // Fourth loop
+  // Add 64 bit long carry into z with carry propogation.
+  // Uses offsetted zlen.
+  add_one_64(z, zlen, carry, tmp1);
+
+  pop(len);
+  pop(zlen);
+  jmp(L_second_loop);
+
+  // Next infrequent code is moved outside loops.
+  bind(L_last_x);
+  movl(op1, Address(x, 0));
+  jmp(L_multiply);
+
+  bind(L_second_loop_exit);
+  pop(len);
+  pop(zlen);
+  pop(len);
+  pop(zlen);
+
+  // Fifth loop
+  // Shift z left 1 bit.
+  lshift_by_1(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4);
+
+  // z[zlen-1] |= x[len-1] & 1;
+  movl(tmp3, Address(x, len, Address::times_4, -4));
+  andl(tmp3, 1);
+  orl(Address(z, zlen, Address::times_4,  -4), tmp3);
+
+  pop(tmp5);
+  pop(tmp4);
+  pop(tmp3);
+  pop(tmp2);
+  pop(tmp1);
+}
+
+/**
+ * Helper function for mul_add()
+ * Multiply the in[] by int k and add to out[] starting at offset offs using
+ * 128 bit by 32 bit multiply and return the carry in tmp5.
+ * Only quad int aligned length of in[] is operated on in this function.
+ * k is in rdxReg for BMI2Instructions, for others it is in tmp2.
+ * This function preserves out, in and k registers.
+ * len and offset point to the appropriate index in "in" & "out" correspondingly
+ * tmp5 has the carry.
+ * other registers are temporary and are modified.
+ *
+ */
+void MacroAssembler::mul_add_128_x_32_loop(Register out, Register in,
+  Register offset, Register len, Register tmp1, Register tmp2, Register tmp3,
+  Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
+
+  Label L_first_loop, L_first_loop_exit;
+
+  movl(tmp1, len);
+  shrl(tmp1, 2);
+
+  bind(L_first_loop);
+  subl(tmp1, 1);
+  jccb(Assembler::negative, L_first_loop_exit);
+
+  subl(len, 4);
+  subl(offset, 4);
+
+  Register op2 = tmp2;
+  const Register sum = tmp3;
+  const Register op1 = tmp4;
+  const Register carry = tmp5;
+
+  if (UseBMI2Instructions) {
+    op2 = rdxReg;
+  }
+
+  movq(op1, Address(in, len, Address::times_4,  8));
+  rorq(op1, 32);
+  movq(sum, Address(out, offset, Address::times_4,  8));
+  rorq(sum, 32);
+  if (UseBMI2Instructions) {
+    multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
+  }
+  else {
+    multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
+  }
+  // Store back in big endian from little endian
+  rorq(sum, 0x20);
+  movq(Address(out, offset, Address::times_4,  8), sum);
+
+  movq(op1, Address(in, len, Address::times_4,  0));
+  rorq(op1, 32);
+  movq(sum, Address(out, offset, Address::times_4,  0));
+  rorq(sum, 32);
+  if (UseBMI2Instructions) {
+    multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
+  }
+  else {
+    multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
+  }
+  // Store back in big endian from little endian
+  rorq(sum, 0x20);
+  movq(Address(out, offset, Address::times_4,  0), sum);
+
+  jmp(L_first_loop);
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Code for BigInteger::mulAdd() intrinsic
+ *
+ * rdi: out
+ * rsi: in
+ * r11: offs (out.length - offset)
+ * rcx: len
+ * r8:  k
+ * r12: tmp1
+ * r13: tmp2
+ * r14: tmp3
+ * r15: tmp4
+ * rbx: tmp5
+ * Multiply the in[] by word k and add to out[], return the carry in rax
+ */
+void MacroAssembler::mul_add(Register out, Register in, Register offs,
+   Register len, Register k, Register tmp1, Register tmp2, Register tmp3,
+   Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
+
+  Label L_carry, L_last_in, L_done;
+
+// carry = 0;
+// for (int j=len-1; j >= 0; j--) {
+//    long product = (in[j] & LONG_MASK) * kLong +
+//                   (out[offs] & LONG_MASK) + carry;
+//    out[offs--] = (int)product;
+//    carry = product >>> 32;
+// }
+//
+  push(tmp1);
+  push(tmp2);
+  push(tmp3);
+  push(tmp4);
+  push(tmp5);
+
+  Register op2 = tmp2;
+  const Register sum = tmp3;
+  const Register op1 = tmp4;
+  const Register carry =  tmp5;
+
+  if (UseBMI2Instructions) {
+    op2 = rdxReg;
+    movl(op2, k);
+  }
+  else {
+    movl(op2, k);
+  }
+
+  xorq(carry, carry);
+
+  //First loop
+
+  //Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply
+  //The carry is in tmp5
+  mul_add_128_x_32_loop(out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg);
+
+  //Multiply the trailing in[] entry using 64 bit by 32 bit, if any
+  decrementl(len);
+  jccb(Assembler::negative, L_carry);
+  decrementl(len);
+  jccb(Assembler::negative, L_last_in);
+
+  movq(op1, Address(in, len, Address::times_4,  0));
+  rorq(op1, 32);
+
+  subl(offs, 2);
+  movq(sum, Address(out, offs, Address::times_4,  0));
+  rorq(sum, 32);
+
+  if (UseBMI2Instructions) {
+    multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
+  }
+  else {
+    multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
+  }
+
+  // Store back in big endian from little endian
+  rorq(sum, 0x20);
+  movq(Address(out, offs, Address::times_4,  0), sum);
+
+  testl(len, len);
+  jccb(Assembler::zero, L_carry);
+
+  //Multiply the last in[] entry, if any
+  bind(L_last_in);
+  movl(op1, Address(in, 0));
+  movl(sum, Address(out, offs, Address::times_4,  -4));
+
+  movl(raxReg, k);
+  mull(op1); //tmp4 * eax -> edx:eax
+  addl(sum, carry);
+  adcl(rdxReg, 0);
+  addl(sum, raxReg);
+  adcl(rdxReg, 0);
+  movl(carry, rdxReg);
+
+  movl(Address(out, offs, Address::times_4,  -4), sum);
+
+  bind(L_carry);
+  //return tmp5/carry as carry in rax
+  movl(rax, carry);
+
+  bind(L_done);
+  pop(tmp5);
+  pop(tmp4);
+  pop(tmp3);
+  pop(tmp2);
+  pop(tmp1);
+}
 #endif
 
 /**
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1241,6 +1241,25 @@
                                Register carry2);
   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
+
+  void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
+                     Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
+  void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
+                            Register tmp2);
+  void multiply_add_64(Register sum, Register op1, Register op2, Register carry,
+                       Register rdxReg, Register raxReg);
+  void add_one_64(Register z, Register zlen, Register carry, Register tmp1);
+  void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
+                       Register tmp3, Register tmp4);
+  void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
+                     Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
+
+  void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1,
+               Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
+               Register raxReg);
+  void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
+               Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
+               Register raxReg);
 #endif
 
   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -23,6 +23,9 @@
  */
 
 #include "precompiled.hpp"
+#ifndef _WINDOWS
+#include "alloca.h"
+#endif
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "code/debugInfoRec.hpp"
@@ -3966,6 +3969,256 @@
 }
 
 
+//------------------------------Montgomery multiplication------------------------
+//
+
+#ifndef _WINDOWS
+
+#define ASM_SUBTRACT
+
+#ifdef ASM_SUBTRACT
+// Subtract 0:b from carry:a.  Return carry.
+static unsigned long
+sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
+  long i = 0, cnt = len;
+  unsigned long tmp;
+  asm volatile("clc; "
+               "0: ; "
+               "mov (%[b], %[i], 8), %[tmp]; "
+               "sbb %[tmp], (%[a], %[i], 8); "
+               "inc %[i]; dec %[cnt]; "
+               "jne 0b; "
+               "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
+               : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
+               : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
+               : "memory");
+  return tmp;
+}
+#else // ASM_SUBTRACT
+typedef int __attribute__((mode(TI))) int128;
+
+// Subtract 0:b from carry:a.  Return carry.
+static unsigned long
+sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
+  int128 tmp = 0;
+  int i;
+  for (i = 0; i < len; i++) {
+    tmp += a[i];
+    tmp -= b[i];
+    a[i] = tmp;
+    tmp >>= 64;
+    assert(-1 <= tmp && tmp <= 0, "invariant");
+  }
+  return tmp + carry;
+}
+#endif // ! ASM_SUBTRACT
+
+// Multiply (unsigned) Long A by Long B, accumulating the double-
+// length result into the accumulator formed of T0, T1, and T2.
+#define MACC(A, B, T0, T1, T2)                                      \
+do {                                                                \
+  unsigned long hi, lo;                                             \
+  asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4"   \
+           : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)      \
+           : "r"(A), "a"(B) : "cc");                                \
+ } while(0)
+
+// As above, but add twice the double-length result into the
+// accumulator.
+#define MACC2(A, B, T0, T1, T2)                                     \
+do {                                                                \
+  unsigned long hi, lo;                                             \
+  asm volatile("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4;"  \
+           "add %%rax, %2; adc %%rdx, %3; adc $0, %4"               \
+           : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)      \
+           : "r"(A), "a"(B) : "cc");                                \
+ } while(0)
+
+// Fast Montgomery multiplication.  The derivation of the algorithm is
+// in  A Cryptographic Library for the Motorola DSP56000,
+// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+
+static void __attribute__((noinline))
+montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
+                    unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    for (j = 0; j < i; j++) {
+      MACC(a[j], b[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    MACC(a[i], b[0], t0, t1, t2);
+    m[i] = t0 * inv;
+    MACC(m[i], n[0], t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery multiply");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2*len; i++) {
+    int j;
+    for (j = i-len+1; j < len; j++) {
+      MACC(a[j], b[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0)
+    t0 = sub(m, n, t0, len);
+}
+
+// Fast Montgomery squaring.  This uses asymptotically 25% fewer
+// multiplies so it should be up to 25% faster than Montgomery
+// multiplication.  However, its loop control is more complex and it
+// may actually run slower on some machines.
+
+static void __attribute__((noinline))
+montgomery_square(unsigned long a[], unsigned long n[],
+                  unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    int end = (i+1)/2;
+    for (j = 0; j < end; j++) {
+      MACC2(a[j], a[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a[j], a[j], t0, t1, t2);
+    }
+    for (; j < i; j++) {
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i] = t0 * inv;
+    MACC(m[i], n[0], t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery square");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2*len; i++) {
+    int start = i-len+1;
+    int end = start + (len - start)/2;
+    int j;
+    for (j = start; j < end; j++) {
+      MACC2(a[j], a[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a[j], a[j], t0, t1, t2);
+    }
+    for (; j < len; j++) {
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0)
+    t0 = sub(m, n, t0, len);
+}
+
+// Swap words in a longword.
+static unsigned long swap(unsigned long x) {
+  return (x << 32) | (x >> 32);
+}
+
+// Copy len longwords from s to d, word-swapping as we go.  The
+// destination array is reversed.
+static void reverse_words(unsigned long *s, unsigned long *d, int len) {
+  d += len;
+  while(len-- > 0) {
+    d--;
+    *d = swap(*s);
+    s++;
+  }
+}
+
+// The threshold at which squaring is advantageous was determined
+// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
+#define MONTGOMERY_SQUARING_THRESHOLD 64
+
+void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
+                                        jint len, jlong inv,
+                                        jint *m_ints) {
+  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow.  512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 8k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 4;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *b = scratch + 1 * longwords,
+    *n = scratch + 2 * longwords,
+    *m = scratch + 3 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)b_ints, b, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
+                                      jint len, jlong inv,
+                                      jint *m_ints) {
+  assert(len % 2 == 0, "array length in montgomery_square must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow.  512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 6k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 3;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *n = scratch + 1 * longwords,
+    *m = scratch + 2 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  //montgomery_square fails to pass BigIntegerTest on solaris amd64
+  //on jdk7 and jdk8.
+#ifndef SOLARIS
+  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
+#else
+  if (0) {
+#endif
+    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
+  } else {
+    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
+  }
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+#endif // WINDOWS
+
 #ifdef COMPILER2
 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
 //
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -3743,6 +3743,107 @@
     return start;
   }
 
+/**
+   *  Arguments:
+   *
+  //  Input:
+  //    c_rarg0   - x address
+  //    c_rarg1   - x length
+  //    c_rarg2   - z address
+  //    c_rarg3   - z lenth
+   *
+   */
+  address generate_squareToLen() {
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "squareToLen");
+
+    address start = __ pc();
+    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
+    // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
+    const Register x      = rdi;
+    const Register len    = rsi;
+    const Register z      = r8;
+    const Register zlen   = rcx;
+
+   const Register tmp1      = r12;
+   const Register tmp2      = r13;
+   const Register tmp3      = r14;
+   const Register tmp4      = r15;
+   const Register tmp5      = rbx;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+       setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
+                          // zlen => rcx
+                          // r9 and r10 may be used to save non-volatile registers
+    __ movptr(r8, rdx);
+    __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
+
+    restore_arg_regs();
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+   /**
+   *  Arguments:
+   *
+   *  Input:
+   *    c_rarg0   - out address
+   *    c_rarg1   - in address
+   *    c_rarg2   - offset
+   *    c_rarg3   - len
+   * not Win64
+   *    c_rarg4   - k
+   * Win64
+   *    rsp+40    - k
+   */
+  address generate_mulAdd() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "mulAdd");
+
+    address start = __ pc();
+    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
+    // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
+    const Register out     = rdi;
+    const Register in      = rsi;
+    const Register offset  = r11;
+    const Register len     = rcx;
+    const Register k       = r8;
+
+    // Next registers will be saved on stack in mul_add().
+    const Register tmp1  = r12;
+    const Register tmp2  = r13;
+    const Register tmp3  = r14;
+    const Register tmp4  = r15;
+    const Register tmp5  = rbx;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
+                       // len => rcx, k => r8
+                       // r9 and r10 may be used to save non-volatile registers
+#ifdef _WIN64
+    // last argument is on stack on Win64
+    __ movl(k, Address(rsp, 6 * wordSize));
+#endif
+    __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
+    __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
+
+    restore_arg_regs();
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
 #undef __
 #define __ masm->
 
@@ -3987,7 +4088,24 @@
     if (UseMultiplyToLenIntrinsic) {
       StubRoutines::_multiplyToLen = generate_multiplyToLen();
     }
-#endif
+    if (UseSquareToLenIntrinsic) {
+      StubRoutines::_squareToLen = generate_squareToLen();
+    }
+    if (UseMulAddIntrinsic) {
+      StubRoutines::_mulAdd = generate_mulAdd();
+    }
+
+#ifndef _WINDOWS
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubRoutines::_montgomeryMultiply
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
+    }
+    if (UseMontgomerySquareIntrinsic) {
+      StubRoutines::_montgomerySquare
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
+    }
+#endif // WINDOWS
+#endif // COMPILER2
   }
 
  public:
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -33,7 +33,7 @@
 
 enum platform_dependent_constants {
   code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
-  code_size2 = 22000           // simply increase if too small (assembler will crash if too small)
+  code_size2 = 23000           // simply increase if too small (assembler will crash if too small)
 };
 
 class x86 {
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -703,6 +703,18 @@
   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
     UseMultiplyToLenIntrinsic = true;
   }
+  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+    UseSquareToLenIntrinsic = false;
+  }
+  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+    UseMulAddIntrinsic = false;
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    UseMontgomeryMultiplyIntrinsic = false;
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    UseMontgomerySquareIntrinsic = false;
+  }
 #else
   if (UseMultiplyToLenIntrinsic) {
     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
@@ -710,6 +722,30 @@
     }
     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
   }
+  if (UseSquareToLenIntrinsic) {
+    if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
+      warning("squareToLen intrinsic is not available in 32-bit VM");
+    }
+    FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
+  }
+  if (UseMulAddIntrinsic) {
+    if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
+      warning("mulAdd intrinsic is not available in 32-bit VM");
+    }
+    FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
+  }
+  if (UseMontgomeryMultiplyIntrinsic) {
+    if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+      warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
+    }
+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
+  }
+  if (UseMontgomerySquareIntrinsic) {
+    if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+      warning("montgomerySquare intrinsic is not available in 32-bit VM");
+    }
+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
+  }
 #endif
 #endif // COMPILER2
 
--- a/src/os/aix/vm/perfMemory_aix.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os/aix/vm/perfMemory_aix.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -201,6 +201,7 @@
 // the backing store files. Returns true if the directory is considered
 // a secure location. Returns false if the statbuf is a symbolic link or
 // if an error occurred.
+//
 static bool is_statbuf_secure(struct stat *statp) {
   if (S_ISLNK(statp->st_mode) || !S_ISDIR(statp->st_mode)) {
     // The path represents a link or some non-directory file type,
@@ -209,15 +210,18 @@
     return false;
   }
   // We have an existing directory, check if the permissions are safe.
+  //
   if ((statp->st_mode & (S_IWGRP|S_IWOTH)) != 0) {
     // The directory is open for writing and could be subjected
     // to a symlink or a hard link attack. Declare it insecure.
+    //
     return false;
   }
-  // See if the uid of the directory matches the effective uid of the process.
-  //
-  if (statp->st_uid != geteuid()) {
+  // If user is not root then see if the uid of the directory matches the effective uid of the process.
+  uid_t euid = geteuid();
+  if ((euid != 0) && (statp->st_uid != euid)) {
     // The directory was not created by this user, declare it insecure.
+    //
     return false;
   }
   return true;
@@ -228,6 +232,7 @@
 // the backing store files. Returns true if the directory exists
 // and is considered a secure location. Returns false if the path
 // is a symbolic link or if an error occurred.
+//
 static bool is_directory_secure(const char* path) {
   struct stat statbuf;
   int result = 0;
--- a/src/os/bsd/dtrace/hotspot.d	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os/bsd/dtrace/hotspot.d	Fri Aug 12 18:10:37 2016 +0300
@@ -47,8 +47,8 @@
   probe mem__pool__gc__end(
     char*, uintptr_t, char*, uintptr_t, 
     uintptr_t, uintptr_t, uintptr_t, uintptr_t);
-  probe thread__probe__start(char*, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
-  probe thread__probe__stop(char*, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+  probe thread__start(char*, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
+  probe thread__stop(char*, uintptr_t, uintptr_t, uintptr_t, uintptr_t);
   probe thread__sleep__begin(long long);
   probe thread__sleep__end(int);
   probe thread__yield();
@@ -68,7 +68,7 @@
   probe monitor__contended__entered(uintptr_t, uintptr_t, char*, uintptr_t);
   probe monitor__contended__exit(uintptr_t, uintptr_t, char*, uintptr_t);
   probe monitor__wait(uintptr_t, uintptr_t, char*, uintptr_t, uintptr_t);
-  probe monitor__probe__waited(uintptr_t, uintptr_t, char*, uintptr_t);
+  probe monitor__waited(uintptr_t, uintptr_t, char*, uintptr_t);
   probe monitor__notify(uintptr_t, uintptr_t, char*, uintptr_t);
   probe monitor__notifyAll(uintptr_t, uintptr_t, char*, uintptr_t);
 
--- a/src/os/bsd/vm/perfMemory_bsd.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os/bsd/vm/perfMemory_bsd.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -217,9 +217,9 @@
     //
     return false;
   }
-  // See if the uid of the directory matches the effective uid of the process.
-  //
-  if (statp->st_uid != geteuid()) {
+  // If user is not root then see if the uid of the directory matches the effective uid of the process.
+  uid_t euid = geteuid();
+  if ((euid != 0) && (statp->st_uid != euid)) {
     // The directory was not created by this user, declare it insecure.
     //
     return false;
--- a/src/os/linux/vm/os_linux.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os/linux/vm/os_linux.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -2755,7 +2755,7 @@
 
 
 int os::Linux::sched_getcpu_syscall(void) {
-  unsigned int cpu;
+  unsigned int cpu = 0;
   int retval = -1;
 
 #if defined(IA32)
@@ -4263,8 +4263,8 @@
       sigaddset(&(actp->sa_mask), sig);
     }
 
-    sa_handler_t hand;
-    sa_sigaction_t sa;
+    sa_handler_t hand = NULL;
+    sa_sigaction_t sa = NULL;
     bool siginfo_flag_set = (actp->sa_flags & SA_SIGINFO) != 0;
     // retrieve the chained handler
     if (siginfo_flag_set) {
@@ -4469,7 +4469,7 @@
 
 static const char* get_signal_handler_name(address handler,
                                            char* buf, int buflen) {
-  int offset;
+  int offset = 0;
   bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset);
   if (found) {
     // skip directory names
--- a/src/os/posix/vm/os_posix.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os/posix/vm/os_posix.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
+* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -678,6 +678,21 @@
 #if defined(IA64) && !defined(AIX)
     { SIGSEGV, SEGV_PSTKOVF, "SEGV_PSTKOVF", "Paragraph stack overflow" },
 #endif
+#if defined(__sparc) && defined(SOLARIS)
+// define Solaris Sparc M7 ADI SEGV signals
+#if !defined(SEGV_ACCADI)
+#define SEGV_ACCADI 3
+#endif
+    { SIGSEGV, SEGV_ACCADI,  "SEGV_ACCADI",  "ADI not enabled for mapped object." },
+#if !defined(SEGV_ACCDERR)
+#define SEGV_ACCDERR 4
+#endif
+    { SIGSEGV, SEGV_ACCDERR, "SEGV_ACCDERR", "ADI disrupting exception." },
+#if !defined(SEGV_ACCPERR)
+#define SEGV_ACCPERR 5
+#endif
+    { SIGSEGV, SEGV_ACCPERR, "SEGV_ACCPERR", "ADI precise exception." },
+#endif // defined(__sparc) && defined(SOLARIS)
     { SIGBUS,  BUS_ADRALN,   "BUS_ADRALN",   "Invalid address alignment." },
     { SIGBUS,  BUS_ADRERR,   "BUS_ADRERR",   "Nonexistent physical address." },
     { SIGBUS,  BUS_OBJERR,   "BUS_OBJERR",   "Object-specific hardware error." },
--- a/src/os/solaris/vm/os_solaris.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -2041,7 +2041,9 @@
   st->print(", physical " UINT64_FORMAT "k", os::physical_memory()>>10);
   st->print("(" UINT64_FORMAT "k free)", os::available_memory() >> 10);
   st->cr();
-  (void) check_addr0(st);
+  if (VMError::fatal_error_in_progress()) {
+     (void) check_addr0(st);
+  }
 }
 
 void os::print_siginfo(outputStream* st, void* siginfo) {
--- a/src/os_cpu/linux_x86/vm/copy_linux_x86.inline.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os_cpu/linux_x86/vm/copy_linux_x86.inline.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -30,7 +30,7 @@
   (void)memmove(to, from, count * HeapWordSize);
 #else
   // Includes a zero-count check.
-  intx temp;
+  intx temp = 0;
   __asm__ volatile("        testl   %6,%6         ;"
                    "        jz      7f            ;"
                    "        cmpl    %4,%5         ;"
@@ -88,7 +88,7 @@
   }
 #else
   // Includes a zero-count check.
-  intx temp;
+  intx temp = 0;
   __asm__ volatile("        testl   %6,%6       ;"
                    "        jz      3f          ;"
                    "        cmpl    $32,%6      ;"
@@ -145,7 +145,7 @@
   (void)memmove(to, from, count);
 #else
   // Includes a zero-count check.
-  intx temp;
+  intx temp = 0;
   __asm__ volatile("        testl   %6,%6          ;"
                    "        jz      13f            ;"
                    "        cmpl    %4,%5          ;"
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -244,7 +244,6 @@
 bool PICL::open_library() {
   _dl_handle = dlopen("libpicl.so.1", RTLD_LAZY);
   if (_dl_handle == NULL) {
-    warning("PICL (libpicl.so.1) is missing. Performance will not be optimal.");
     return false;
   }
   if (!bind_library_functions()) {
--- a/src/share/vm/c1/c1_Canonicalizer.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/c1/c1_Canonicalizer.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -640,7 +640,7 @@
 
   if (l == r && !lt->is_float_kind()) {
     // pattern: If (a cond a) => simplify to Goto
-    BlockBegin* sux;
+    BlockBegin* sux = NULL;
     switch (x->cond()) {
     case If::eql: sux = x->sux_for(true);  break;
     case If::neq: sux = x->sux_for(false); break;
@@ -648,6 +648,7 @@
     case If::leq: sux = x->sux_for(true);  break;
     case If::gtr: sux = x->sux_for(false); break;
     case If::geq: sux = x->sux_for(true);  break;
+    default: ShouldNotReachHere();
     }
     // If is a safepoint then the debug information should come from the state_before of the If.
     set_canonical(new Goto(sux, x->state_before(), is_safepoint(x, sux)));
@@ -685,7 +686,7 @@
       } else {
         // two successors differ and two successors are the same => simplify to: If (x cmp y)
         // determine new condition & successors
-        If::Condition cond;
+        If::Condition cond = If::eql;
         BlockBegin* tsux = NULL;
         BlockBegin* fsux = NULL;
              if (lss_sux == eql_sux) { cond = If::leq; tsux = lss_sux; fsux = gtr_sux; }
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -3971,8 +3971,8 @@
   caller_state->truncate_stack(args_base);
   assert(callee_state->stack_size() == 0, "callee stack must be empty");
 
-  Value lock;
-  BlockBegin* sync_handler;
+  Value lock = NULL;
+  BlockBegin* sync_handler = NULL;
 
   // Inline the locking of the receiver if the callee is synchronized
   if (callee->is_synchronized()) {
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -3347,7 +3347,7 @@
 }
 
 void LIRGenerator::increment_event_counter(CodeEmitInfo* info, int bci, bool backedge) {
-  int freq_log;
+  int freq_log = 0;
   int level = compilation()->env()->comp_level();
   if (level == CompLevel_limited_profile) {
     freq_log = (backedge ? Tier2BackedgeNotifyFreqLog : Tier2InvokeNotifyFreqLog);
@@ -3368,7 +3368,7 @@
   assert(level > CompLevel_simple, "Shouldn't be here");
 
   int offset = -1;
-  LIR_Opr counter_holder;
+  LIR_Opr counter_holder = NULL;
   if (level == CompLevel_limited_profile) {
     MethodCounters* counters_adr = method->ensure_method_counters();
     if (counters_adr == NULL) {
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -410,7 +410,7 @@
   }
 
   static LIR_Condition lir_cond(If::Condition cond) {
-    LIR_Condition l;
+    LIR_Condition l = lir_cond_unknown;
     switch (cond) {
     case If::eql: l = lir_cond_equal;        break;
     case If::neq: l = lir_cond_notEqual;     break;
@@ -420,6 +420,7 @@
     case If::gtr: l = lir_cond_greater;      break;
     case If::aeq: l = lir_cond_aboveEqual;   break;
     case If::beq: l = lir_cond_belowEqual;   break;
+    default: fatal("You must pass valid If::Condition");
     };
     return l;
   }
--- a/src/share/vm/c1/c1_Runtime1.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -312,6 +312,7 @@
   NOT_PRODUCT(_new_instance_slowcase_cnt++;)
 
   assert(klass->is_klass(), "not a class");
+  Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
   instanceKlassHandle h(thread, klass);
   h->check_valid_for_instantiation(true, CHECK);
   // make sure klass is initialized
@@ -347,6 +348,7 @@
   //       anymore after new_objArray() and no GC can happen before.
   //       (This may have to change if this code changes!)
   assert(array_klass->is_klass(), "not a class");
+  Handle holder(THREAD, array_klass->klass_holder()); // keep the klass alive
   Klass* elem_klass = ObjArrayKlass::cast(array_klass)->element_klass();
   objArrayOop obj = oopFactory::new_objArray(elem_klass, length, CHECK);
   thread->set_vm_result(obj);
@@ -363,6 +365,7 @@
 
   assert(klass->is_klass(), "not a class");
   assert(rank >= 1, "rank must be nonzero");
+  Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(klass)->multi_allocate(rank, dims, CHECK);
   thread->set_vm_result(obj);
 JRT_END
--- a/src/share/vm/c1/c1_ValueType.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/c1/c1_ValueType.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -153,7 +153,19 @@
     case T_FLOAT  : return new FloatConstant (value.as_float ());
     case T_DOUBLE : return new DoubleConstant(value.as_double());
     case T_ARRAY  : // fall through (ciConstant doesn't have an array accessor)
-    case T_OBJECT : return new ObjectConstant(value.as_object());
+    case T_OBJECT : {
+      // TODO: Common the code with GraphBuilder::load_constant?
+      ciObject* obj = value.as_object();
+      if (obj->is_null_object())
+        return objectNull;
+      if (obj->is_loaded()) {
+        if (obj->is_array())
+          return new ArrayConstant(obj->as_array());
+        else if (obj->is_instance())
+          return new InstanceConstant(obj->as_instance());
+      }
+      return new ObjectConstant(obj);
+    }
   }
   ShouldNotReachHere();
   return illegalType;
--- a/src/share/vm/ci/ciObjectFactory.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -413,6 +413,7 @@
     metadata_owner_klass = m->as_method()->get_Method()->constants()->pool_holder();
   } else {
     fatal("Not implemented for other types of metadata");
+    return;
   }
 
   oop metadata_holder = metadata_owner_klass->klass_holder();
--- a/src/share/vm/classfile/classFileParser.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/classFileParser.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -3188,19 +3188,19 @@
 
   // Field size and offset computation
   int nonstatic_field_size = _super_klass() == NULL ? 0 : _super_klass()->nonstatic_field_size();
-  int next_static_oop_offset;
-  int next_static_double_offset;
-  int next_static_word_offset;
-  int next_static_short_offset;
-  int next_static_byte_offset;
-  int next_nonstatic_oop_offset;
-  int next_nonstatic_double_offset;
-  int next_nonstatic_word_offset;
-  int next_nonstatic_short_offset;
-  int next_nonstatic_byte_offset;
-  int first_nonstatic_oop_offset;
-  int next_nonstatic_field_offset;
-  int next_nonstatic_padded_offset;
+  int next_static_oop_offset = 0;
+  int next_static_double_offset = 0;
+  int next_static_word_offset = 0;
+  int next_static_short_offset = 0;
+  int next_static_byte_offset = 0;
+  int next_nonstatic_oop_offset = 0;
+  int next_nonstatic_double_offset = 0;
+  int next_nonstatic_word_offset = 0;
+  int next_nonstatic_short_offset = 0;
+  int next_nonstatic_byte_offset = 0;
+  int first_nonstatic_oop_offset = 0;
+  int next_nonstatic_field_offset = 0;
+  int next_nonstatic_padded_offset = 0;
 
   // Count the contended fields by type.
   //
@@ -3353,14 +3353,14 @@
     ShouldNotReachHere();
   }
 
-  int nonstatic_oop_space_count   = 0;
-  int nonstatic_word_space_count  = 0;
-  int nonstatic_short_space_count = 0;
-  int nonstatic_byte_space_count  = 0;
-  int nonstatic_oop_space_offset;
-  int nonstatic_word_space_offset;
-  int nonstatic_short_space_offset;
-  int nonstatic_byte_space_offset;
+  int nonstatic_oop_space_count    = 0;
+  int nonstatic_word_space_count   = 0;
+  int nonstatic_short_space_count  = 0;
+  int nonstatic_byte_space_count   = 0;
+  int nonstatic_oop_space_offset   = 0;
+  int nonstatic_word_space_offset  = 0;
+  int nonstatic_short_space_offset = 0;
+  int nonstatic_byte_space_offset  = 0;
 
   // Try to squeeze some of the fields into the gaps due to
   // long/double alignment.
@@ -3432,7 +3432,7 @@
     // contended instance fields are handled below
     if (fs.is_contended() && !fs.access_flags().is_static()) continue;
 
-    int real_offset;
+    int real_offset = 0;
     FieldAllocationType atype = (FieldAllocationType) fs.allocation_type();
 
     // pack the rest of the fields
@@ -3565,7 +3565,7 @@
         // handle statics below
         if (fs.access_flags().is_static()) continue;
 
-        int real_offset;
+        int real_offset = 0;
         FieldAllocationType atype = (FieldAllocationType) fs.allocation_type();
 
         switch (atype) {
--- a/src/share/vm/classfile/classLoader.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/classLoader.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -414,30 +414,30 @@
 }
 #endif
 
-void ClassLoader::trace_class_path(const char* msg, const char* name) {
+void ClassLoader::trace_class_path(outputStream* out, const char* msg, const char* name) {
   if (!TraceClassPaths) {
     return;
   }
 
   if (msg) {
-    tty->print("%s", msg);
+    out->print("%s", msg);
   }
   if (name) {
     if (strlen(name) < 256) {
-      tty->print("%s", name);
+      out->print("%s", name);
     } else {
       // For very long paths, we need to print each character separately,
       // as print_cr() has a length limit
       while (name[0] != '\0') {
-        tty->print("%c", name[0]);
+        out->print("%c", name[0]);
         name++;
       }
     }
   }
   if (msg && msg[0] == '[') {
-    tty->print_cr("]");
+    out->print_cr("]");
   } else {
-    tty->cr();
+    out->cr();
   }
 }
 
@@ -583,7 +583,7 @@
     // Don't print sys_class_path - this is the bootcp of this current VM process, not necessarily
     // the same as the bootcp of the shared archive.
   } else {
-    trace_class_path("[Bootstrap loader class path=", sys_class_path);
+    trace_class_path(tty, "[Bootstrap loader class path=", sys_class_path);
   }
 #if INCLUDE_CDS
   if (DumpSharedSpaces) {
--- a/src/share/vm/classfile/classLoader.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/classLoader.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -346,7 +346,7 @@
   static void  exit_with_path_failure(const char* error, const char* message);
 #endif
 
-  static void  trace_class_path(const char* msg, const char* name = NULL);
+  static void  trace_class_path(outputStream* out, const char* msg, const char* name = NULL);
 
   // VM monitoring and management support
   static jlong classloader_time_ms();
--- a/src/share/vm/classfile/dictionary.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/dictionary.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "classfile/dictionary.hpp"
 #include "classfile/systemDictionary.hpp"
+#include "classfile/systemDictionaryShared.hpp"
 #include "memory/iterator.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiRedefineClassesTrace.hpp"
@@ -36,9 +37,16 @@
 DictionaryEntry*  Dictionary::_current_class_entry = NULL;
 int               Dictionary::_current_class_index =    0;
 
+size_t Dictionary::entry_size() {
+  if (DumpSharedSpaces) {
+    return SystemDictionaryShared::dictionary_entry_size();
+  } else {
+    return sizeof(DictionaryEntry);
+  }
+}
 
 Dictionary::Dictionary(int table_size)
-  : TwoOopHashtable<Klass*, mtClass>(table_size, sizeof(DictionaryEntry)) {
+  : TwoOopHashtable<Klass*, mtClass>(table_size, (int)entry_size()) {
   _current_class_index = 0;
   _current_class_entry = NULL;
   _pd_cache_table = new ProtectionDomainCacheTable(defaultProtectionDomainCacheSize);
@@ -47,7 +55,7 @@
 
 Dictionary::Dictionary(int table_size, HashtableBucket<mtClass>* t,
                        int number_of_entries)
-  : TwoOopHashtable<Klass*, mtClass>(table_size, sizeof(DictionaryEntry), t, number_of_entries) {
+  : TwoOopHashtable<Klass*, mtClass>(table_size, (int)entry_size(), t, number_of_entries) {
   _current_class_index = 0;
   _current_class_entry = NULL;
   _pd_cache_table = new ProtectionDomainCacheTable(defaultProtectionDomainCacheSize);
@@ -63,6 +71,9 @@
   entry->set_loader_data(loader_data);
   entry->set_pd_set(NULL);
   assert(klass->oop_is_instance(), "Must be");
+  if (DumpSharedSpaces) {
+    SystemDictionaryShared::init_shared_dictionary_entry(klass, entry);
+  }
   return entry;
 }
 
--- a/src/share/vm/classfile/dictionary.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/dictionary.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,6 +53,7 @@
   DictionaryEntry* get_entry(int index, unsigned int hash,
                              Symbol* name, ClassLoaderData* loader_data);
 
+protected:
   DictionaryEntry* bucket(int i) {
     return (DictionaryEntry*)Hashtable<Klass*, mtClass>::bucket(i);
   }
@@ -66,6 +67,8 @@
     Hashtable<Klass*, mtClass>::add_entry(index, (HashtableEntry<Klass*, mtClass>*)new_entry);
   }
 
+  static size_t entry_size();
+
 public:
   Dictionary(int table_size);
   Dictionary(int table_size, HashtableBucket<mtClass>* t, int number_of_entries);
--- a/src/share/vm/classfile/javaClasses.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/javaClasses.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -936,7 +936,7 @@
   assert(_group_offset == 0, "offsets should be initialized only once");
 
   Klass* k = SystemDictionary::Thread_klass();
-  compute_offset(_name_offset,      k, vmSymbols::name_name(),      vmSymbols::char_array_signature());
+  compute_offset(_name_offset,      k, vmSymbols::name_name(),      vmSymbols::string_signature());
   compute_offset(_group_offset,     k, vmSymbols::group_name(),     vmSymbols::threadgroup_signature());
   compute_offset(_contextClassLoader_offset, k, vmSymbols::contextClassLoader_name(), vmSymbols::classloader_signature());
   compute_offset(_inheritedAccessControlContext_offset, k, vmSymbols::inheritedAccessControlContext_name(), vmSymbols::accesscontrolcontext_signature());
@@ -966,15 +966,12 @@
 }
 
 
-typeArrayOop java_lang_Thread::name(oop java_thread) {
-  oop name = java_thread->obj_field(_name_offset);
-  assert(name == NULL || (name->is_typeArray() && TypeArrayKlass::cast(name->klass())->element_type() == T_CHAR), "just checking");
-  return typeArrayOop(name);
-}
-
-
-void java_lang_Thread::set_name(oop java_thread, typeArrayOop name) {
-  assert(java_thread->obj_field(_name_offset) == NULL, "name should be NULL");
+oop java_lang_Thread::name(oop java_thread) {
+  return java_thread->obj_field(_name_offset);
+}
+
+
+void java_lang_Thread::set_name(oop java_thread, oop name) {
   java_thread->obj_field_put(_name_offset, name);
 }
 
--- a/src/share/vm/classfile/javaClasses.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/javaClasses.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -343,8 +343,8 @@
   // Set JavaThread for instance
   static void set_thread(oop java_thread, JavaThread* thread);
   // Name
-  static typeArrayOop name(oop java_thread);
-  static void set_name(oop java_thread, typeArrayOop name);
+  static oop name(oop java_thread);
+  static void set_name(oop java_thread, oop name);
   // Priority
   static ThreadPriority priority(oop java_thread);
   static void set_priority(oop java_thread, ThreadPriority priority);
--- a/src/share/vm/classfile/placeholders.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/placeholders.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -220,7 +220,7 @@
   }
 
   SeenThread* actionToQueue(PlaceholderTable::classloadAction action) {
-    SeenThread* queuehead;
+    SeenThread* queuehead = NULL;
     switch (action) {
       case PlaceholderTable::LOAD_INSTANCE:
          queuehead = _loadInstanceThreadQ;
--- a/src/share/vm/classfile/sharedPathsMiscInfo.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/sharedPathsMiscInfo.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,7 +67,7 @@
 }
 
 bool SharedPathsMiscInfo::fail(const char* msg, const char* name) {
-  ClassLoader::trace_class_path(msg, name);
+  ClassLoader::trace_class_path(tty, msg, name);
   MetaspaceShared::set_archive_loading_failed();
   return false;
 }
--- a/src/share/vm/classfile/sharedPathsMiscInfo.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/sharedPathsMiscInfo.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -64,7 +64,7 @@
   bool read(void* ptr, size_t size);
 
   static void trace_class_path(const char* msg, const char* name = NULL) {
-    ClassLoader::trace_class_path(msg, name);
+    ClassLoader::trace_class_path(tty, msg, name);
   }
 protected:
   static bool fail(const char* msg, const char* name = NULL);
--- a/src/share/vm/classfile/systemDictionary.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/systemDictionary.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1198,8 +1198,13 @@
 
     if (ik->super() != NULL) {
       Symbol*  cn = ik->super()->name();
-      resolve_super_or_fail(class_name, cn,
-                            class_loader, protection_domain, true, CHECK_(nh));
+      Klass *s = resolve_super_or_fail(class_name, cn,
+                                       class_loader, protection_domain, true, CHECK_(nh));
+      if (s != ik->super()) {
+        // The dynamically resolved super class is not the same as the one we used during dump time,
+        // so we cannot use ik.
+        return nh;
+      }
     }
 
     Array<Klass*>* interfaces = ik->local_interfaces();
@@ -1212,7 +1217,12 @@
       // reinitialized yet (they will be once the interface classes
       // are loaded)
       Symbol*  name  = k->name();
-      resolve_super_or_fail(class_name, name, class_loader, protection_domain, false, CHECK_(nh));
+      Klass* i = resolve_super_or_fail(class_name, name, class_loader, protection_domain, false, CHECK_(nh));
+      if (k != i) {
+        // The dynamically resolved interface class is not the same as the one we used during dump time,
+        // so we cannot use ik.
+        return nh;
+      }
     }
 
     // Adjust methods to recover missing data.  They need addresses for
--- a/src/share/vm/classfile/systemDictionaryShared.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/systemDictionaryShared.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #ifndef SHARE_VM_CLASSFILE_SYSTEMDICTIONARYSHARED_HPP
 #define SHARE_VM_CLASSFILE_SYSTEMDICTIONARYSHARED_HPP
 
+#include "classfile/dictionary.hpp"
 #include "classfile/systemDictionary.hpp"
 
 class SystemDictionaryShared: public SystemDictionary {
@@ -42,6 +43,22 @@
     oop class_loader = loader_data->class_loader();
     return (class_loader == NULL);
   }
+
+  static size_t dictionary_entry_size() {
+    return sizeof(DictionaryEntry);
+  }
+  static void init_shared_dictionary_entry(Klass* k, DictionaryEntry* entry) {}
+
+  // The (non-application) CDS implementation supports only classes in the boot
+  // class loader, which ensures that the verification dependencies are the same
+  // during archive creation time and runtime. Thus we can do the dependency checks
+  // entirely during archive creation time.
+  static void add_verification_dependency(Klass* k, Symbol* accessor_clsname,
+                                          Symbol* target_clsname) {}
+  static void finalize_verification_dependencies() {}
+  static bool check_verification_dependencies(Klass* k, Handle class_loader,
+                                              Handle protection_domain,
+                                              char** message_buffer, TRAPS) {return true;}
 };
 
 #endif // SHARE_VM_CLASSFILE_SYSTEMDICTIONARYSHARED_HPP
--- a/src/share/vm/classfile/verificationType.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/verificationType.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/symbolTable.hpp"
+#include "classfile/systemDictionaryShared.hpp"
 #include "classfile/verificationType.hpp"
 #include "classfile/verifier.hpp"
 
@@ -73,7 +74,23 @@
       Klass* from_class = SystemDictionary::resolve_or_fail(
           from.name(), Handle(THREAD, klass->class_loader()),
           Handle(THREAD, klass->protection_domain()), true, CHECK_false);
-      return InstanceKlass::cast(from_class)->is_subclass_of(this_class());
+      bool result = InstanceKlass::cast(from_class)->is_subclass_of(this_class());
+      if (result && DumpSharedSpaces) {
+        if (klass()->is_subclass_of(from_class) && klass()->is_subclass_of(this_class())) {
+          // No need to save verification dependency. At run time, <klass> will be
+          // loaded from the archived only if <from_class> and <this_class> are
+          // also loaded from the archive. I.e., all 3 classes are exactly the same
+          // as we saw at archive creation time.
+        } else {
+          // Save the dependency. At run time, we need to check that the condition
+          // from_class->is_subclass_of(this_class() is still true.
+          Symbol* accessor_clsname = from.name();
+          Symbol* target_clsname = this_class()->name();
+          SystemDictionaryShared::add_verification_dependency(klass(),
+                       accessor_clsname, target_clsname);
+        }
+      }
+      return result;
     }
   } else if (is_array() && from.is_array()) {
     VerificationType comp_this = get_component(context, CHECK_false);
--- a/src/share/vm/classfile/verifier.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/verifier.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -2326,9 +2326,17 @@
       case Bytecodes::_ifnonnull:
         target = bcs.dest();
         if (visited_branches->contains(bci)) {
-          if (bci_stack->is_empty()) return true;
-          // Pop a bytecode starting offset and scan from there.
-          bcs.set_start(bci_stack->pop());
+          if (bci_stack->is_empty()) {
+            if (handler_stack->is_empty()) {
+              return true;
+            } else {
+              // Parse the catch handlers for try blocks containing athrow.
+              bcs.set_start(handler_stack->pop());
+            }
+          } else {
+            // Pop a bytecode starting offset and scan from there.
+            bcs.set_start(bci_stack->pop());
+          }
         } else {
           if (target > bci) { // forward branch
             if (target >= code_length) return false;
@@ -2351,9 +2359,17 @@
       case Bytecodes::_goto_w:
         target = (opcode == Bytecodes::_goto ? bcs.dest() : bcs.dest_w());
         if (visited_branches->contains(bci)) {
-          if (bci_stack->is_empty()) return true;
-          // Been here before, pop new starting offset from stack.
-          bcs.set_start(bci_stack->pop());
+          if (bci_stack->is_empty()) {
+            if (handler_stack->is_empty()) {
+              return true;
+            } else {
+              // Parse the catch handlers for try blocks containing athrow.
+              bcs.set_start(handler_stack->pop());
+            }
+          } else {
+            // Been here before, pop new starting offset from stack.
+            bcs.set_start(bci_stack->pop());
+          }
         } else {
           if (target >= code_length) return false;
           // Continue scanning from the target onward.
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -793,10 +793,26 @@
    do_signature(encodeISOArray_signature,                        "([CI[BII)I")                                          \
                                                                                                                         \
   do_class(java_math_BigInteger,                      "java/math/BigInteger")                                           \
-  do_intrinsic(_multiplyToLen,      java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_R)             \
+  do_intrinsic(_multiplyToLen,      java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S)             \
    do_name(     multiplyToLen_name,                             "multiplyToLen")                                        \
    do_signature(multiplyToLen_signature,                        "([II[II[I)[I")                                         \
                                                                                                                         \
+  do_intrinsic(_squareToLen, java_math_BigInteger, squareToLen_name, squareToLen_signature, F_S)                        \
+   do_name(     squareToLen_name,                             "implSquareToLen")                                        \
+   do_signature(squareToLen_signature,                        "([II[II)[I")                                             \
+                                                                                                                        \
+  do_intrinsic(_mulAdd, java_math_BigInteger, mulAdd_name, mulAdd_signature, F_S)                                       \
+   do_name(     mulAdd_name,                                  "implMulAdd")                                             \
+   do_signature(mulAdd_signature,                             "([I[IIII)I")                                             \
+                                                                                                                        \
+  do_intrinsic(_montgomeryMultiply,      java_math_BigInteger, montgomeryMultiply_name, montgomeryMultiply_signature, F_S) \
+   do_name(     montgomeryMultiply_name,                             "implMontgomeryMultiply")                          \
+   do_signature(montgomeryMultiply_signature,                        "([I[I[IIJ[I)[I")                                  \
+                                                                                                                        \
+  do_intrinsic(_montgomerySquare,      java_math_BigInteger, montgomerySquare_name, montgomerySquare_signature, F_S)    \
+   do_name(     montgomerySquare_name,                             "implMontgomerySquare")                              \
+   do_signature(montgomerySquare_signature,                        "([I[IIJ[I)[I")                                      \
+                                                                                                                        \
   /* java/lang/ref/Reference */                                                                                         \
   do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
                                                                                                                         \
--- a/src/share/vm/code/nmethod.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/code/nmethod.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1619,7 +1619,11 @@
       // During GC the is_alive closure is non-NULL, and is used to
       // determine liveness of dependees that need to be updated.
       if (is_alive == NULL || klass->is_loader_alive(is_alive)) {
-        InstanceKlass::cast(klass)->remove_dependent_nmethod(this);
+        // The GC defers deletion of this entry, since there might be multiple threads
+        // iterating over the _dependencies graph. Other call paths are single-threaded
+        // and may delete it immediately.
+        bool delete_immediately = is_alive == NULL;
+        InstanceKlass::cast(klass)->remove_dependent_nmethod(this, delete_immediately);
       }
     }
   }
--- a/src/share/vm/compiler/oopMap.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/compiler/oopMap.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -75,8 +75,8 @@
 
   // Constructors
   OopMapValue () { set_value(0); set_content_reg(VMRegImpl::Bad()); }
-  OopMapValue (VMReg reg, oop_types t) { set_reg_type(reg,t); }
-  OopMapValue (VMReg reg, oop_types t, VMReg reg2) { set_reg_type(reg,t); set_content_reg(reg2); }
+  OopMapValue (VMReg reg, oop_types t) { set_reg_type(reg, t); set_content_reg(VMRegImpl::Bad()); }
+  OopMapValue (VMReg reg, oop_types t, VMReg reg2) { set_reg_type(reg, t); set_content_reg(reg2); }
   OopMapValue (CompressedReadStream* stream) { read_from(stream); }
 
   // Archiving
@@ -89,7 +89,7 @@
 
   void read_from(CompressedReadStream* stream) {
     set_value(stream->read_int());
-    if(is_callee_saved() || is_derived_oop()) {
+    if (is_callee_saved() || is_derived_oop()) {
       set_content_reg(VMRegImpl::as_VMReg(stream->read_int(), true));
     }
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -2288,7 +2288,7 @@
   }
 
   // Used for PrintGC
-  size_t prev_used;
+  size_t prev_used = 0;
   if (PrintGC && Verbose) {
     prev_used = _cmsGen->used(); // XXXPERM
   }
@@ -8624,7 +8624,7 @@
 
   HeapWord* const fc_addr = (HeapWord*) fc;
 
-  bool coalesce;
+  bool coalesce = false;
   const size_t left  = pointer_delta(fc_addr, freeFinger());
   const size_t right = chunkSize;
   switch (FLSCoalescePolicy) {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -2331,6 +2331,7 @@
     case GCCause::_java_lang_system_gc:     return ExplicitGCInvokesConcurrent;
     case GCCause::_g1_humongous_allocation: return true;
     case GCCause::_update_allocation_context_stats_inc: return true;
+    case GCCause::_wb_conc_mark:            return true;
     default:                                return false;
   }
 }
@@ -3766,6 +3767,9 @@
             _dcq.enqueue(card_ptr);
           }
         }
+        assert(hrrs.n_yielded() == r->rem_set()->occupied(),
+               err_msg("Remembered set hash maps out of sync, cur: " SIZE_FORMAT " entries, next: " SIZE_FORMAT " entries",
+               hrrs.n_yielded(), r->rem_set()->occupied()));
         r->rem_set()->clear_locked();
       }
       assert(r->rem_set()->is_empty(), "At this point any humongous candidate remembered set must be empty.");
@@ -3835,6 +3839,16 @@
   _surviving_young_words = NULL;
 }
 
+class VerifyRegionRemSetClosure : public HeapRegionClosure {
+  public:
+    bool doHeapRegion(HeapRegion* hr) {
+      if (!hr->continuesHumongous()) {
+        hr->verify_rem_set();
+      }
+      return false;
+    }
+};
+
 #ifdef ASSERT
 class VerifyCSetClosure: public HeapRegionClosure {
 public:
@@ -3977,8 +3991,15 @@
 
     TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
 
-    uint active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                                workers()->active_workers() : 1);
+    uint active_workers = AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
+                                                                  workers()->active_workers(),
+                                                                  Threads::number_of_non_daemon_threads());
+    assert(UseDynamicNumberOfGCThreads ||
+           active_workers == workers()->total_workers(),
+           "If not dynamic should be using all the  workers");
+    workers()->set_active_workers(active_workers);
+
+
     double pause_start_sec = os::elapsedTime();
     g1_policy()->phase_times()->note_gc_start(active_workers, mark_in_progress());
     log_gc_header();
@@ -4011,6 +4032,14 @@
       increment_total_collections(false /* full gc */);
       increment_gc_time_stamp();
 
+      if (VerifyRememberedSets) {
+        if (!VerifySilently) {
+          gclog_or_tty->print_cr("[Verifying RemSets before GC]");
+        }
+        VerifyRegionRemSetClosure v_cl;
+        heap_region_iterate(&v_cl);
+      }
+
       verify_before_gc();
       check_bitmaps("GC Start");
 
@@ -4088,6 +4117,13 @@
 
         g1_policy()->finalize_cset(target_pause_time_ms, evacuation_info);
 
+        // Make sure the remembered sets are up to date. This needs to be
+        // done before register_humongous_regions_with_cset(), because the
+        // remembered sets are used there to choose eager reclaim candidates.
+        // If the remembered sets are not up to date we might miss some
+        // entries that need to be handled.
+        g1_rem_set()->cleanupHRRS();
+
         register_humongous_regions_with_in_cset_fast_test();
 
         assert(check_cset_fast_test(), "Inconsistency in the InCSetState table.");
@@ -4235,6 +4271,14 @@
         // scanning cards (see CR 7039627).
         increment_gc_time_stamp();
 
+        if (VerifyRememberedSets) {
+          if (!VerifySilently) {
+            gclog_or_tty->print_cr("[Verifying RemSets after GC]");
+          }
+          VerifyRegionRemSetClosure v_cl;
+          heap_region_iterate(&v_cl);
+        }
+
         verify_after_gc();
         check_bitmaps("GC End");
 
@@ -5045,12 +5089,8 @@
 public:
 
   void clean_klass(InstanceKlass* ik) {
-    ik->clean_implementors_list(_is_alive);
-    ik->clean_method_data(_is_alive);
-
-    // G1 specific cleanup work that has
-    // been moved here to be done in parallel.
-    ik->clean_dependent_nmethods();
+    ik->clean_weak_instanceklass_links(_is_alive);
+
     if (JvmtiExport::has_redefined_a_class()) {
       InstanceKlass::purge_previous_versions(ik);
     }
@@ -5728,23 +5768,11 @@
   hot_card_cache->reset_hot_cache_claimed_index();
   hot_card_cache->set_use_cache(false);
 
-  uint n_workers;
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    n_workers =
-      AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
-                                     workers()->active_workers(),
-                                     Threads::number_of_non_daemon_threads());
+  const uint n_workers = workers()->active_workers();
     assert(UseDynamicNumberOfGCThreads ||
            n_workers == workers()->total_workers(),
            "If not dynamic should be using all the  workers");
-    workers()->set_active_workers(n_workers);
     set_par_threads(n_workers);
-  } else {
-    assert(n_par_threads() == 0,
-           "Should be the original non-parallel value");
-    n_workers = 1;
-  }
-
 
   init_for_evac_failure(NULL);
 
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -154,28 +154,28 @@
     _has_new_data = true;
   }
 
-  double average(){
-    calculate_totals();
+  double average(uint active_threads){
+    calculate_totals(active_threads);
     return _average;
   }
 
-  T sum() {
-    calculate_totals();
+  T sum(uint active_threads) {
+    calculate_totals(active_threads);
     return _sum;
   }
 
-  T minimum() {
-    calculate_totals();
+  T minimum(uint active_threads) {
+    calculate_totals(active_threads);
     return _min;
   }
 
-  T maximum() {
-    calculate_totals();
+  T maximum(uint active_threads) {
+    calculate_totals(active_threads);
     return _max;
   }
 
   void reset() PRODUCT_RETURN;
-  void verify() PRODUCT_RETURN;
+  void verify(uint active_threads) PRODUCT_RETURN;
 
   void set_enabled(bool enabled) { _enabled = enabled; }
 
@@ -183,7 +183,7 @@
 
  private:
 
-  void calculate_totals(){
+  void calculate_totals(uint active_threads){
     if (!_has_new_data) {
       return;
     }
@@ -191,13 +191,14 @@
     _sum = (T)0;
     _min = _data[0];
     _max = _min;
-    for (uint i = 0; i < _length; ++i) {
+    assert(active_threads <= _length, "Wrong number of active threads");
+    for (uint i = 0; i < active_threads; ++i) {
       T val = _data[i];
       _sum += val;
       _min = MIN2(_min, val);
       _max = MAX2(_max, val);
     }
-    _average = (double)_sum / (double)_length;
+    _average = (double)_sum / (double)active_threads;
     _has_new_data = false;
   }
 };
@@ -226,17 +227,18 @@
 }
 
 template <class T>
-void WorkerDataArray<T>::verify() {
+void WorkerDataArray<T>::verify(uint active_threads) {
   if (!_enabled) {
     return;
   }
 
-  for (uint i = 0; i < _length; i++) {
+  assert(active_threads <= _length, "Wrong number of active threads");
+  for (uint i = 0; i < active_threads; i++) {
     assert(_data[i] != WorkerDataArray<T>::uninitialized(),
         err_msg("Invalid data for worker %u in '%s'", i, _title));
   }
   if (_thread_work_items != NULL) {
-    _thread_work_items->verify();
+    _thread_work_items->verify(active_threads);
   }
 }
 
@@ -321,7 +323,7 @@
   }
 
   for (int i = 0; i < GCParPhasesSentinel; i++) {
-    _gc_par_phases[i]->verify();
+    _gc_par_phases[i]->verify(_active_gc_threads);
   }
 }
 
@@ -378,7 +380,7 @@
 
 // return the average time for a phase in milliseconds
 double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
-  return _gc_par_phases[phase]->average() * 1000.0;
+  return _gc_par_phases[phase]->average(_active_gc_threads) * 1000.0;
 }
 
 double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) {
@@ -386,15 +388,15 @@
 }
 
 double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) {
-  return _gc_par_phases[phase]->sum() * 1000.0;
+  return _gc_par_phases[phase]->sum(_active_gc_threads) * 1000.0;
 }
 
 double G1GCPhaseTimes::min_time_ms(GCParPhases phase) {
-  return _gc_par_phases[phase]->minimum() * 1000.0;
+  return _gc_par_phases[phase]->minimum(_active_gc_threads) * 1000.0;
 }
 
 double G1GCPhaseTimes::max_time_ms(GCParPhases phase) {
-  return _gc_par_phases[phase]->maximum() * 1000.0;
+  return _gc_par_phases[phase]->maximum(_active_gc_threads) * 1000.0;
 }
 
 size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) {
@@ -404,22 +406,22 @@
 
 size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
   assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
-  return _gc_par_phases[phase]->thread_work_items()->sum();
+  return _gc_par_phases[phase]->thread_work_items()->sum(_active_gc_threads);
 }
 
 double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) {
   assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
-  return _gc_par_phases[phase]->thread_work_items()->average();
+  return _gc_par_phases[phase]->thread_work_items()->average(_active_gc_threads);
 }
 
 size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) {
   assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
-  return _gc_par_phases[phase]->thread_work_items()->minimum();
+  return _gc_par_phases[phase]->thread_work_items()->minimum(_active_gc_threads);
 }
 
 size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) {
   assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
-  return _gc_par_phases[phase]->thread_work_items()->maximum();
+  return _gc_par_phases[phase]->thread_work_items()->maximum(_active_gc_threads);
 }
 
 class G1GCParPhasePrinter : public StackObj {
@@ -455,14 +457,16 @@
   }
 
   void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
-    for (uint i = 0; i < phase->_length; ++i) {
+    uint active_length = _phase_times->_active_gc_threads;
+    for (uint i = 0; i < active_length; ++i) {
       buf.append("  %.1lf", _phase_times->get_time_ms(phase_id, i));
     }
     buf.print_cr();
   }
 
   void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
-    for (uint i = 0; i < thread_work_items->_length; ++i) {
+    uint active_length = _phase_times->_active_gc_threads;
+    for (uint i = 0; i < active_length; ++i) {
       buf.append("  " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i));
     }
     buf.print_cr();
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -335,7 +335,6 @@
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
-  cleanupHRRS();
   _g1->set_refine_cte_cl_concurrency(false);
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -639,8 +639,8 @@
   G1OffsetTableContigSpace::print_on(st);
 }
 
-class VerifyLiveClosure: public OopClosure {
-private:
+class G1VerificationClosure : public OopClosure {
+protected:
   G1CollectedHeap* _g1h;
   CardTableModRefBS* _bs;
   oop _containing_obj;
@@ -651,7 +651,7 @@
   // _vo == UsePrevMarking -> use "prev" marking information,
   // _vo == UseNextMarking -> use "next" marking information,
   // _vo == UseMarkWord    -> use mark word from object header.
-  VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) :
+  G1VerificationClosure(G1CollectedHeap* g1h, VerifyOption vo) :
     _g1h(g1h), _bs(NULL), _containing_obj(NULL),
     _failures(false), _n_failures(0), _vo(vo)
   {
@@ -667,9 +667,6 @@
   bool failures() { return _failures; }
   int n_failures() { return _n_failures; }
 
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-
   void print_object(outputStream* out, oop obj) {
 #ifdef PRODUCT
     Klass* k = obj->klass();
@@ -679,19 +676,31 @@
     obj->print_on(out);
 #endif // PRODUCT
   }
+};
+
+class VerifyLiveClosure : public G1VerificationClosure {
+public:
+  VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) : G1VerificationClosure(g1h, vo) {}
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p) { do_oop_work(p); }
 
   template <class T>
   void do_oop_work(T* p) {
     assert(_containing_obj != NULL, "Precondition");
     assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
-           "Precondition");
+      "Precondition");
+    verify_liveness(p);
+  }
+
+  template <class T>
+  void verify_liveness(T* p) {
     T heap_oop = oopDesc::load_heap_oop(p);
     if (!oopDesc::is_null(heap_oop)) {
       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
       bool failed = false;
       if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) {
         MutexLockerEx x(ParGCRareEvent_lock,
-                        Mutex::_no_safepoint_check_flag);
+          Mutex::_no_safepoint_check_flag);
 
         if (!_failures) {
           gclog_or_tty->cr();
@@ -727,50 +736,73 @@
         failed = true;
         _n_failures++;
       }
+    }
+  }
+};
 
-      if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) {
-        HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
-        HeapRegion* to   = _g1h->heap_region_containing(obj);
-        if (from != NULL && to != NULL &&
-            from != to &&
-            !to->isHumongous()) {
-          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
-          jbyte cv_field = *_bs->byte_for_const(p);
-          const jbyte dirty = CardTableModRefBS::dirty_card_val();
+class VerifyRemSetClosure : public G1VerificationClosure {
+public:
+  VerifyRemSetClosure(G1CollectedHeap* g1h, VerifyOption vo) : G1VerificationClosure(g1h, vo) {}
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(oop* p) { do_oop_work(p); }
 
-          bool is_bad = !(from->is_young()
-                          || to->rem_set()->contains_reference(p)
-                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
-                              (_containing_obj->is_objArray() ?
-                                  cv_field == dirty
-                               : cv_obj == dirty || cv_field == dirty));
-          if (is_bad) {
-            MutexLockerEx x(ParGCRareEvent_lock,
-                            Mutex::_no_safepoint_check_flag);
+  template <class T>
+  void do_oop_work(T* p) {
+    assert(_containing_obj != NULL, "Precondition");
+    assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
+      "Precondition");
+    verify_remembered_set(p);
+  }
 
-            if (!_failures) {
-              gclog_or_tty->cr();
-              gclog_or_tty->print_cr("----------");
-            }
-            gclog_or_tty->print_cr("Missing rem set entry:");
-            gclog_or_tty->print_cr("Field "PTR_FORMAT" "
-                                   "of obj "PTR_FORMAT", "
-                                   "in region "HR_FORMAT,
-                                   p, (void*) _containing_obj,
-                                   HR_FORMAT_PARAMS(from));
-            _containing_obj->print_on(gclog_or_tty);
-            gclog_or_tty->print_cr("points to obj "PTR_FORMAT" "
-                                   "in region "HR_FORMAT,
-                                   (void*) obj,
-                                   HR_FORMAT_PARAMS(to));
+  template <class T>
+  void verify_remembered_set(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      bool failed = false;
+      HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+      HeapRegion* to   = _g1h->heap_region_containing(obj);
+      if (from != NULL && to != NULL &&
+          from != to &&
+          !to->isHumongous()) {
+        jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
+        jbyte cv_field = *_bs->byte_for_const(p);
+        const jbyte dirty = CardTableModRefBS::dirty_card_val();
+
+        bool is_bad = !(from->is_young()
+                        || to->rem_set()->contains_reference(p)
+                        || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
+                            (_containing_obj->is_objArray() ?
+                                cv_field == dirty
+                             : cv_obj == dirty || cv_field == dirty));
+        if (is_bad) {
+          MutexLockerEx x(ParGCRareEvent_lock,
+                          Mutex::_no_safepoint_check_flag);
+
+          if (!_failures) {
+            gclog_or_tty->cr();
+            gclog_or_tty->print_cr("----------");
+          }
+          gclog_or_tty->print_cr("Missing rem set entry:");
+          gclog_or_tty->print_cr("Field "PTR_FORMAT" "
+                                 "of obj "PTR_FORMAT", "
+                                 "in region "HR_FORMAT,
+                                 p, (void*) _containing_obj,
+                                 HR_FORMAT_PARAMS(from));
+          _containing_obj->print_on(gclog_or_tty);
+          gclog_or_tty->print_cr("points to obj "PTR_FORMAT" "
+                                 "in region "HR_FORMAT,
+                                 (void*) obj,
+                                 HR_FORMAT_PARAMS(to));
+          if (obj->is_oop()) {
             obj->print_on(gclog_or_tty);
-            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
-                          cv_obj, cv_field);
-            gclog_or_tty->print_cr("----------");
-            gclog_or_tty->flush();
-            _failures = true;
-            if (!failed) _n_failures++;
           }
+          gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
+                        cv_obj, cv_field);
+          gclog_or_tty->print_cr("----------");
+          gclog_or_tty->flush();
+          _failures = true;
+          if (!failed) _n_failures++;
         }
       }
     }
@@ -787,6 +819,7 @@
   HeapWord* p = bottom();
   HeapWord* prev_p = NULL;
   VerifyLiveClosure vl_cl(g1, vo);
+  VerifyRemSetClosure vr_cl(g1, vo);
   bool is_humongous = isHumongous();
   bool do_bot_verify = !is_young();
   size_t object_num = 0;
@@ -832,7 +865,23 @@
           return;
         } else {
           vl_cl.set_containing_obj(obj);
-          obj->oop_iterate_no_header(&vl_cl);
+          if (!g1->full_collection() || G1VerifyRSetsDuringFullGC) {
+            // verify liveness and rem_set
+            vr_cl.set_containing_obj(obj);
+            G1Mux2Closure mux(&vl_cl, &vr_cl);
+            obj->oop_iterate_no_header(&mux);
+
+            if (vr_cl.failures()) {
+              *failures = true;
+            }
+            if (G1MaxVerifyFailures >= 0 &&
+              vr_cl.n_failures() >= G1MaxVerifyFailures) {
+              return;
+            }
+          } else {
+            // verify only liveness
+            obj->oop_iterate_no_header(&vl_cl);
+          }
           if (vl_cl.failures()) {
             *failures = true;
           }
@@ -842,7 +891,7 @@
           }
         }
       } else {
-        gclog_or_tty->print_cr(PTR_FORMAT" no an oop", (void *)obj);
+        gclog_or_tty->print_cr(PTR_FORMAT" not an oop", (void *)obj);
         *failures = true;
         return;
       }
@@ -930,6 +979,46 @@
   verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy);
 }
 
+void HeapRegion::verify_rem_set(VerifyOption vo, bool* failures) const {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  *failures = false;
+  HeapWord* p = bottom();
+  HeapWord* prev_p = NULL;
+  VerifyRemSetClosure vr_cl(g1, vo);
+  while (p < top()) {
+    oop obj = oop(p);
+    size_t obj_size = block_size(p);
+
+    if (!g1->is_obj_dead_cond(obj, this, vo)) {
+      if (obj->is_oop()) {
+        vr_cl.set_containing_obj(obj);
+        obj->oop_iterate_no_header(&vr_cl);
+
+        if (vr_cl.failures()) {
+          *failures = true;
+        }
+        if (G1MaxVerifyFailures >= 0 &&
+          vr_cl.n_failures() >= G1MaxVerifyFailures) {
+          return;
+        }
+      } else {
+        gclog_or_tty->print_cr(PTR_FORMAT " not an oop", p2i(obj));
+        *failures = true;
+        return;
+      }
+    }
+
+    prev_p = p;
+    p += obj_size;
+  }
+}
+
+void HeapRegion::verify_rem_set() const {
+  bool failures = false;
+  verify_rem_set(VerifyOption_G1UsePrevMarking, &failures);
+  guarantee(!failures, "HeapRegion RemSet verification failed");
+}
+
 // G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
 // away eventually.
 
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -779,6 +779,9 @@
 
   // Override; it uses the "prev" marking information
   virtual void verify() const;
+
+  void verify_rem_set(VerifyOption vo, bool *failures) const;
+  void verify_rem_set() const;
 };
 
 // HeapRegionClosure is used for iterating over regions.
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -119,7 +119,9 @@
 
 public:
 
-  HeapRegion* hr() const { return _hr; }
+  HeapRegion* hr() const {
+    return (HeapRegion*) OrderAccess::load_ptr_acquire(&_hr);
+  }
 
   jint occupied() const {
     // Overkill, but if we ever need it...
@@ -132,10 +134,12 @@
       set_next(NULL);
       set_prev(NULL);
     }
-    _hr = hr;
     _collision_list_next = NULL;
     _occupied = 0;
     _bm.clear();
+    // Make sure that the bitmap clearing above has been finished before publishing
+    // this PRT to concurrent threads.
+    OrderAccess::release_store_ptr(&_hr, hr);
   }
 
   void add_reference(OopOrNarrowOopStar from) {
@@ -441,7 +445,7 @@
     if (G1TraceHeapRegionRememberedSet) {
       gclog_or_tty->print_cr("  from-card cache hit.");
     }
-    assert(contains_reference(from), "We just added it!");
+    assert(contains_reference(from), err_msg("We just found " PTR_FORMAT " in the FromCardCache", from));
     return;
   }
 
@@ -454,7 +458,7 @@
     if (G1TraceHeapRegionRememberedSet) {
       gclog_or_tty->print_cr("  coarse map hit.");
     }
-    assert(contains_reference(from), "We just added it!");
+    assert(contains_reference(from), err_msg("We just found " PTR_FORMAT " in the Coarse table", from));
     return;
   }
 
@@ -488,7 +492,7 @@
         if (G1TraceHeapRegionRememberedSet) {
           gclog_or_tty->print_cr("   added card to sparse table.");
         }
-        assert(contains_reference_locked(from), "We just added it!");
+        assert(contains_reference_locked(from), err_msg("We just added " PTR_FORMAT " to the Sparse table", from));
         return;
       } else {
         if (G1TraceHeapRegionRememberedSet) {
@@ -547,7 +551,7 @@
                           hr()->bottom(), from);
     }
   }
-  assert(contains_reference(from), "We just added it!");
+  assert(contains_reference(from), err_msg("We just added " PTR_FORMAT " to the PRT", from));
 }
 
 PerRegionTable*
@@ -568,7 +572,7 @@
   assert(_n_fine_entries == _max_fine_entries, "Precondition");
   PerRegionTable* max = NULL;
   jint max_occ = 0;
-  PerRegionTable** max_prev;
+  PerRegionTable** max_prev = NULL;
   size_t max_ind;
 
   size_t i = _fine_eviction_start;
@@ -604,6 +608,7 @@
   }
 
   guarantee(max != NULL, "Since _n_fine_entries > 0");
+  guarantee(max_prev != NULL, "Since max != NULL.");
 
   // Set the corresponding coarse bit.
   size_t max_hrm_index = (size_t) max->hr()->hrm_index();
@@ -1164,7 +1169,7 @@
 
 void HeapRegionRemSet::print_recorded() {
   int cur_evnt = 0;
-  Event cur_evnt_kind;
+  Event cur_evnt_kind = Event_illegal;
   int cur_evnt_ind = 0;
   if (_n_recorded_events > 0) {
     cur_evnt_kind = _recorded_events[cur_evnt];
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -232,7 +232,7 @@
 
 public:
   enum Event {
-    Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd
+    Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd, Event_illegal
   };
 
 private:
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -90,12 +90,8 @@
 
 void VM_G1IncCollectionPause::doit() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  assert(!_should_initiate_conc_mark ||
-  ((_gc_cause == GCCause::_gc_locker && GCLockerInvokesConcurrent) ||
-   (_gc_cause == GCCause::_java_lang_system_gc && ExplicitGCInvokesConcurrent) ||
-    _gc_cause == GCCause::_g1_humongous_allocation ||
-    _gc_cause == GCCause::_update_allocation_context_stats_inc),
-      "only a GC locker, a System.gc(), stats update or a hum allocation induced GC should start a cycle");
+  assert(!_should_initiate_conc_mark || g1h->should_do_concurrent_full_gc(_gc_cause),
+      "only a GC locker, a System.gc(), stats update, whitebox, or a hum allocation induced GC should start a cycle");
 
   if (_word_size > 0) {
     // An allocation has been requested. So, try to do that first.
--- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -168,7 +168,7 @@
 
   if (TraceDynamicGCThreads) {
      gclog_or_tty->print_cr("GCTaskManager::calc_default_active_workers() : "
-       "active_workers(): %d  new_acitve_workers: %d  "
+       "active_workers(): %d  new_active_workers: %d  "
        "prev_active_workers: %d\n"
        " active_workers_by_JT: %d  active_workers_by_heap_size: %d",
        (int) active_workers, (int) new_active_workers, (int) prev_active_workers,
@@ -193,8 +193,9 @@
      (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) {
     new_active_workers = total_workers;
   } else {
+    uintx min_workers = (total_workers == 1) ? 1 : 2;
     new_active_workers = calc_default_active_workers(total_workers,
-                                                     2, /* Minimum number of workers */
+                                                     min_workers,
                                                      active_workers,
                                                      application_workers);
   }
--- a/src/share/vm/gc_interface/gcCause.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_interface/gcCause.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -54,6 +54,9 @@
     case _wb_young_gc:
       return "WhiteBox Initiated Young GC";
 
+    case _wb_conc_mark:
+      return "WhiteBox Initiated Concurrent Mark";
+
     case _update_allocation_context_stats_inc:
     case _update_allocation_context_stats_full:
       return "Update Allocation Context Stats";
--- a/src/share/vm/gc_interface/gcCause.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/gc_interface/gcCause.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -47,6 +47,7 @@
     _heap_inspection,
     _heap_dump,
     _wb_young_gc,
+    _wb_conc_mark,
     _update_allocation_context_stats_inc,
     _update_allocation_context_stats_full,
 
--- a/src/share/vm/interpreter/bytecodeStream.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/interpreter/bytecodeStream.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,12 +31,12 @@
   // set next bytecode position
   address bcp = RawBytecodeStream::bcp();
   address end = method()->code_base() + end_bci();
-  int l = Bytecodes::raw_special_length_at(bcp, end);
-  if (l <= 0 || (_bci + l) > _end_bci) {
+  int len = Bytecodes::raw_special_length_at(bcp, end);
+  // Very large tableswitch or lookupswitch size can cause _next_bci to overflow.
+  if (len <= 0 || (_bci > _end_bci - len) || (_bci - len >= _next_bci)) {
     code = Bytecodes::_illegal;
   } else {
-    _next_bci += l;
-    assert(_bci < _next_bci, "length must be > 0");
+    _next_bci += len;
     // set attributes
     _is_wide = false;
     // check for special (uncommon) cases
--- a/src/share/vm/interpreter/bytecodeStream.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/interpreter/bytecodeStream.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -152,12 +152,15 @@
     code        = Bytecodes::code_or_bp_at(bcp);
 
     // set next bytecode position
-    int l = Bytecodes::length_for(code);
-    if (l > 0 && (_bci + l) <= _end_bci) {
+    int len = Bytecodes::length_for(code);
+    if (len > 0 && (_bci <= _end_bci - len)) {
       assert(code != Bytecodes::_wide && code != Bytecodes::_tableswitch
              && code != Bytecodes::_lookupswitch, "can't be special bytecode");
       _is_wide = false;
-      _next_bci += l;
+      _next_bci += len;
+      if (_next_bci <= _bci) { // Check for integer overflow
+        code = Bytecodes::_illegal;
+      }
       _raw_code = code;
       return code;
     } else {
@@ -206,19 +209,23 @@
       // note that we cannot advance before having the
       // tty bytecode otherwise the stepping is wrong!
       // (carefull: length_for(...) must be used first!)
-      int l = Bytecodes::length_for(code);
-      if (l == 0) l = Bytecodes::length_at(_method(), bcp);
-      _next_bci  += l;
-      assert(_bci < _next_bci, "length must be > 0");
-      // set attributes
-      _is_wide      = false;
-      // check for special (uncommon) cases
-      if (code == Bytecodes::_wide) {
-        raw_code = (Bytecodes::Code)bcp[1];
-        code = raw_code;  // wide BCs are always Java-normal
-        _is_wide = true;
+      int len = Bytecodes::length_for(code);
+      if (len == 0) len = Bytecodes::length_at(_method(), bcp);
+      if (len <= 0 || (_bci > _end_bci - len) || (_bci - len >= _next_bci)) {
+        raw_code = code = Bytecodes::_illegal;
+      } else {
+        _next_bci  += len;
+        assert(_bci < _next_bci, "length must be > 0");
+        // set attributes
+        _is_wide      = false;
+        // check for special (uncommon) cases
+        if (code == Bytecodes::_wide) {
+          raw_code = (Bytecodes::Code)bcp[1];
+          code = raw_code;  // wide BCs are always Java-normal
+          _is_wide = true;
+        }
+        assert(Bytecodes::is_java_code(code), "sanity check");
       }
-      assert(Bytecodes::is_java_code(code), "sanity check");
     }
     _raw_code = raw_code;
     _code = code;
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -528,7 +528,7 @@
   if (StopInterpreterAt > 0)                                     stop_interpreter_at();
   __ verify_FPU(1, t->tos_in());
 #endif // !PRODUCT
-  int step;
+  int step = 0;
   if (!t->does_dispatch()) {
     step = t->is_wide() ? Bytecodes::wide_length_for(t->bytecode()) : Bytecodes::length_for(t->bytecode());
     if (tos_out == ilgl) tos_out = t->tos_out();
--- a/src/share/vm/memory/allocation.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/memory/allocation.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -83,7 +83,7 @@
 }
 
 void* ResourceObj::operator new(size_t size, allocation_type type, MEMFLAGS flags) throw() {
-  address res;
+  address res = NULL;
   switch (type) {
    case C_HEAP:
     res = (address)AllocateHeap(size, flags, CALLER_PC);
@@ -105,8 +105,8 @@
 
 void* ResourceObj::operator new(size_t size, const std::nothrow_t&  nothrow_constant,
     allocation_type type, MEMFLAGS flags) throw() {
-  //should only call this with std::nothrow, use other operator new() otherwise
-  address res;
+  // should only call this with std::nothrow, use other operator new() otherwise
+  address res = NULL;
   switch (type) {
    case C_HEAP:
     res = (address)AllocateHeap(size, flags, CALLER_PC, AllocFailStrategy::RETURN_NULL);
--- a/src/share/vm/memory/metaspaceShared.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/memory/metaspaceShared.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,7 @@
 #include "classfile/sharedClassUtil.hpp"
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
+#include "classfile/systemDictionaryShared.hpp"
 #include "code/codeCache.hpp"
 #include "memory/filemap.hpp"
 #include "memory/gcLocker.hpp"
@@ -53,6 +54,7 @@
 bool MetaspaceShared::_check_classes_made_progress;
 bool MetaspaceShared::_has_error_classes;
 bool MetaspaceShared::_archive_loading_failed = false;
+bool MetaspaceShared::_remapped_readwrite = false;
 // Read/write a data stream for restoring/preserving metadata pointers and
 // miscellaneous data from/to the shared archive file.
 
@@ -684,6 +686,10 @@
       exit(1);
     }
   }
+
+  // Copy the dependencies from C_HEAP-alloced GrowableArrays to RO-alloced
+  // Arrays
+  SystemDictionaryShared::finalize_verification_dependencies();
 }
 
 void MetaspaceShared::prepare_for_dumping() {
@@ -1096,6 +1102,7 @@
     if (!mapinfo->remap_shared_readonly_as_readwrite()) {
       return false;
     }
+    _remapped_readwrite = true;
   }
   return true;
 }
--- a/src/share/vm/memory/metaspaceShared.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/memory/metaspaceShared.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,6 +55,7 @@
   static bool _check_classes_made_progress;
   static bool _has_error_classes;
   static bool _archive_loading_failed;
+  static bool _remapped_readwrite;
  public:
   enum {
     vtbl_list_size         = 17,   // number of entries in the shared space vtable list.
@@ -123,6 +124,10 @@
   // sharing is enabled. Simply returns true if sharing is not enabled
   // or if the remapping has already been done by a prior call.
   static bool remap_shared_readonly_as_readwrite() NOT_CDS_RETURN_(true);
+  static bool remapped_readwrite() {
+    CDS_ONLY(return _remapped_readwrite);
+    NOT_CDS(return false);
+  }
 
   static void print_shared_spaces();
 
--- a/src/share/vm/memory/universe.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/memory/universe.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -114,6 +114,7 @@
 LatestMethodCache* Universe::_finalizer_register_cache = NULL;
 LatestMethodCache* Universe::_loader_addClass_cache    = NULL;
 LatestMethodCache* Universe::_pd_implies_cache         = NULL;
+LatestMethodCache* Universe::_throw_illegal_access_error_cache = NULL;
 oop Universe::_out_of_memory_error_java_heap          = NULL;
 oop Universe::_out_of_memory_error_metaspace          = NULL;
 oop Universe::_out_of_memory_error_class_metaspace    = NULL;
@@ -123,13 +124,13 @@
 objArrayOop Universe::_preallocated_out_of_memory_error_array = NULL;
 volatile jint Universe::_preallocated_out_of_memory_error_avail_count = 0;
 bool Universe::_verify_in_progress                    = false;
+long Universe::verify_flags                           = Universe::Verify_All;
 oop Universe::_null_ptr_exception_instance            = NULL;
 oop Universe::_arithmetic_exception_instance          = NULL;
 oop Universe::_virtual_machine_error_instance         = NULL;
 oop Universe::_vm_exception                           = NULL;
 oop Universe::_allocation_context_notification_obj    = NULL;
 
-Method* Universe::_throw_illegal_access_error         = NULL;
 Array<int>* Universe::_the_empty_int_array            = NULL;
 Array<u2>* Universe::_the_empty_short_array           = NULL;
 Array<Klass*>* Universe::_the_empty_klass_array     = NULL;
@@ -235,6 +236,7 @@
   _finalizer_register_cache->serialize(f);
   _loader_addClass_cache->serialize(f);
   _pd_implies_cache->serialize(f);
+  _throw_illegal_access_error_cache->serialize(f);
 }
 
 void Universe::check_alignment(uintx size, uintx alignment, const char* name) {
@@ -663,6 +665,7 @@
   Universe::_finalizer_register_cache = new LatestMethodCache();
   Universe::_loader_addClass_cache    = new LatestMethodCache();
   Universe::_pd_implies_cache         = new LatestMethodCache();
+  Universe::_throw_illegal_access_error_cache = new LatestMethodCache();
 
   if (UseSharedSpaces) {
     // Read the data structures supporting the shared spaces (shared
@@ -681,6 +684,9 @@
       MetaspaceShared::prepare_for_dumping();
     }
   }
+  if (strlen(VerifySubSet) > 0) {
+    Universe::initialize_verify_flags();
+  }
 
   return JNI_OK;
 }
@@ -847,12 +853,6 @@
     // See needs_explicit_null_check.
     // Only set the heap base for compressed oops because it indicates
     // compressed oops for pstack code.
-    bool verbose = PrintCompressedOopsMode || (PrintMiscellaneous && Verbose);
-    if (verbose) {
-      tty->cr();
-      tty->print("heap address: " PTR_FORMAT ", size: " SIZE_FORMAT " MB",
-                 Universe::heap()->base(), Universe::heap()->reserved_region().byte_size()/M);
-    }
     if (((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax)) {
       // Can't reserve heap below 32Gb.
       // keep the Universe::narrow_oop_base() set in Universe::reserve_heap()
@@ -862,16 +862,8 @@
       // are decoded so that NULL is preserved, so this page will not be accessed.
       Universe::set_narrow_oop_use_implicit_null_checks(false);
 #endif
-      if (verbose) {
-        tty->print(", %s: "PTR_FORMAT,
-            narrow_oop_mode_to_string(HeapBasedNarrowOop),
-            Universe::narrow_oop_base());
-      }
     } else {
       Universe::set_narrow_oop_base(0);
-      if (verbose) {
-        tty->print(", %s", narrow_oop_mode_to_string(ZeroBasedNarrowOop));
-      }
 #ifdef _WIN64
       if (!Universe::narrow_oop_use_implicit_null_checks()) {
         // Don't need guard page for implicit checks in indexed addressing
@@ -884,17 +876,14 @@
         Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
       } else {
         Universe::set_narrow_oop_shift(0);
-        if (verbose) {
-          tty->print(", %s", narrow_oop_mode_to_string(UnscaledNarrowOop));
-        }
       }
     }
 
-    if (verbose) {
-      tty->cr();
-      tty->cr();
+    Universe::set_narrow_ptrs_base(Universe::narrow_oop_base());
+
+    if (PrintCompressedOopsMode || (PrintMiscellaneous && Verbose)) {
+      Universe::print_compressed_oops_mode();
     }
-    Universe::set_narrow_ptrs_base(Universe::narrow_oop_base());
   }
   // Universe::narrow_oop_base() is one page below the heap.
   assert((intptr_t)Universe::narrow_oop_base() <= (intptr_t)(Universe::heap()->base() -
@@ -915,6 +904,24 @@
   return JNI_OK;
 }
 
+void Universe::print_compressed_oops_mode() {
+  tty->cr();
+  tty->print("heap address: " PTR_FORMAT ", size: " SIZE_FORMAT " MB",
+              Universe::heap()->base(), Universe::heap()->reserved_region().byte_size()/M);
+
+  tty->print(", Compressed Oops mode: %s", narrow_oop_mode_to_string(narrow_oop_mode()));
+
+  if (Universe::narrow_oop_base() != 0) {
+    tty->print(":" PTR_FORMAT, Universe::narrow_oop_base());
+  }
+
+  if (Universe::narrow_oop_shift() != 0) {
+    tty->print(", Oop shift amount: %d", Universe::narrow_oop_shift());
+  }
+
+  tty->cr();
+  tty->cr();
+}
 
 // Reserve the Java heap, which is now the same for all GCs.
 ReservedSpace Universe::reserve_heap(size_t heap_size, size_t alignment) {
@@ -984,11 +991,11 @@
 const char* Universe::narrow_oop_mode_to_string(Universe::NARROW_OOP_MODE mode) {
   switch (mode) {
     case UnscaledNarrowOop:
-      return "32-bits Oops";
+      return "32-bit";
     case ZeroBasedNarrowOop:
-      return "zero based Compressed Oops";
+      return "Zero based";
     case HeapBasedNarrowOop:
-      return "Compressed Oops with base";
+      return "Non-zero based";
   }
 
   ShouldNotReachHere();
@@ -1134,7 +1141,8 @@
     tty->print_cr("Unable to link/verify Unsafe.throwIllegalAccessError method");
     return false; // initialization failed (cannot throw exception yet)
   }
-  Universe::_throw_illegal_access_error = m;
+  Universe::_throw_illegal_access_error_cache->init(
+    SystemDictionary::misc_Unsafe_klass(), m);
 
   // Setup method for registering loaded classes in class loader vector
   InstanceKlass::cast(SystemDictionary::ClassLoader_klass())->link_class(CHECK_false);
@@ -1160,7 +1168,7 @@
       return false; // initialization failed
     }
     Universe::_pd_implies_cache->init(
-      SystemDictionary::ProtectionDomain_klass(), m);;
+      SystemDictionary::ProtectionDomain_klass(), m);
   }
 
   // The folowing is initializing converter functions for serialization in
@@ -1357,6 +1365,53 @@
   st->print_cr("}");
 }
 
+void Universe::initialize_verify_flags() {
+  verify_flags = 0;
+  const char delimiter[] = " ,";
+
+  size_t length = strlen(VerifySubSet);
+  char* subset_list = NEW_C_HEAP_ARRAY(char, length + 1, mtInternal);
+  strncpy(subset_list, VerifySubSet, length + 1);
+
+  char* token = strtok(subset_list, delimiter);
+  while (token != NULL) {
+    if (strcmp(token, "threads") == 0) {
+      verify_flags |= Verify_Threads;
+    } else if (strcmp(token, "heap") == 0) {
+      verify_flags |= Verify_Heap;
+    } else if (strcmp(token, "symbol_table") == 0) {
+      verify_flags |= Verify_SymbolTable;
+    } else if (strcmp(token, "string_table") == 0) {
+      verify_flags |= Verify_StringTable;
+    } else if (strcmp(token, "codecache") == 0) {
+      verify_flags |= Verify_CodeCache;
+    } else if (strcmp(token, "dictionary") == 0) {
+      verify_flags |= Verify_SystemDictionary;
+    } else if (strcmp(token, "classloader_data_graph") == 0) {
+      verify_flags |= Verify_ClassLoaderDataGraph;
+    } else if (strcmp(token, "metaspace") == 0) {
+      verify_flags |= Verify_MetaspaceAux;
+    } else if (strcmp(token, "jni_handles") == 0) {
+      verify_flags |= Verify_JNIHandles;
+    } else if (strcmp(token, "c-heap") == 0) {
+      verify_flags |= Verify_CHeap;
+    } else if (strcmp(token, "codecache_oops") == 0) {
+      verify_flags |= Verify_CodeCacheOops;
+    } else {
+      vm_exit_during_initialization(err_msg("VerifySubSet: \'%s\' memory sub-system is unknown, please correct it", token));
+    }
+    token = strtok(NULL, delimiter);
+  }
+  FREE_C_HEAP_ARRAY(char, subset_list, mtInternal);
+}
+
+bool Universe::should_verify_subset(uint subset) {
+  if (verify_flags & subset) {
+    return true;
+  }
+  return false;
+}
+
 void Universe::verify(VerifyOption option, const char* prefix, bool silent) {
   // The use of _verify_in_progress is a temporary work around for
   // 6320749.  Don't bother with a creating a class to set and clear
@@ -1376,33 +1431,55 @@
 
   if (!silent) gclog_or_tty->print("%s", prefix);
   if (!silent) gclog_or_tty->print("[Verifying ");
-  if (!silent) gclog_or_tty->print("threads ");
-  Threads::verify();
-  if (!silent) gclog_or_tty->print("heap ");
-  heap()->verify(silent, option);
-  if (!silent) gclog_or_tty->print("syms ");
-  SymbolTable::verify();
-  if (!silent) gclog_or_tty->print("strs ");
-  StringTable::verify();
+  if (should_verify_subset(Verify_Threads)) {
+    if (!silent) gclog_or_tty->print("Threads ");
+    Threads::verify();
+  }
+  if (should_verify_subset(Verify_Heap)) {
+    if (!silent) gclog_or_tty->print("Heap ");
+    heap()->verify(silent, option);
+  }
+  if (should_verify_subset(Verify_SymbolTable)) {
+    if (!silent) gclog_or_tty->print("SymbolTable ");
+    SymbolTable::verify();
+  }
+  if (should_verify_subset(Verify_StringTable)) {
+    if (!silent) gclog_or_tty->print("StringTable ");
+    StringTable::verify();
+  }
+  if (should_verify_subset(Verify_CodeCache)) {
   {
     MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    if (!silent) gclog_or_tty->print("zone ");
+    if (!silent) gclog_or_tty->print("CodeCache ");
     CodeCache::verify();
   }
-  if (!silent) gclog_or_tty->print("dict ");
-  SystemDictionary::verify();
+  }
+  if (should_verify_subset(Verify_SystemDictionary)) {
+    if (!silent) gclog_or_tty->print("SystemDictionary ");
+    SystemDictionary::verify();
+  }
 #ifndef PRODUCT
-  if (!silent) gclog_or_tty->print("cldg ");
-  ClassLoaderDataGraph::verify();
+  if (should_verify_subset(Verify_ClassLoaderDataGraph)) {
+    if (!silent) gclog_or_tty->print("ClassLoaderDataGraph ");
+    ClassLoaderDataGraph::verify();
+  }
 #endif
-  if (!silent) gclog_or_tty->print("metaspace chunks ");
-  MetaspaceAux::verify_free_chunks();
-  if (!silent) gclog_or_tty->print("hand ");
-  JNIHandles::verify();
-  if (!silent) gclog_or_tty->print("C-heap ");
-  os::check_heap();
-  if (!silent) gclog_or_tty->print("code cache ");
-  CodeCache::verify_oops();
+  if (should_verify_subset(Verify_MetaspaceAux)) {
+    if (!silent) gclog_or_tty->print("MetaspaceAux ");
+    MetaspaceAux::verify_free_chunks();
+  }
+  if (should_verify_subset(Verify_JNIHandles)) {
+    if (!silent) gclog_or_tty->print("JNIHandles ");
+    JNIHandles::verify();
+  }
+  if (should_verify_subset(Verify_CHeap)) {
+    if (!silent) gclog_or_tty->print("C-heap ");
+    os::check_heap();
+  }
+  if (should_verify_subset(Verify_CodeCacheOops)) {
+    if (!silent) gclog_or_tty->print("CodeCache Oops ");
+    CodeCache::verify_oops();
+  }
   if (!silent) gclog_or_tty->print_cr("]");
 
   _verify_in_progress = false;
--- a/src/share/vm/memory/universe.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/memory/universe.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -148,8 +148,7 @@
   static LatestMethodCache* _finalizer_register_cache; // static method for registering finalizable objects
   static LatestMethodCache* _loader_addClass_cache;    // method for registering loaded classes in class loader vector
   static LatestMethodCache* _pd_implies_cache;         // method for checking protection domain attributes
-
-  static Method* _throw_illegal_access_error;
+  static LatestMethodCache* _throw_illegal_access_error_cache; // Unsafe.throwIllegalAccessError() method
 
   // preallocated error objects (no backtrace)
   static oop          _out_of_memory_error_java_heap;
@@ -248,6 +247,7 @@
   static int _verify_count;                           // number of verifies done
   // True during call to verify().  Should only be set/cleared in verify().
   static bool _verify_in_progress;
+  static long verify_flags;
 
   static void compute_verify_oop_data();
 
@@ -305,6 +305,7 @@
   static Method*      loader_addClass_method()        { return _loader_addClass_cache->get_method(); }
 
   static Method*      protection_domain_implies_method() { return _pd_implies_cache->get_method(); }
+  static Method*      throw_illegal_access_error()    { return _throw_illegal_access_error_cache->get_method(); }
 
   static oop          null_ptr_exception_instance()   { return _null_ptr_exception_instance;   }
   static oop          arithmetic_exception_instance() { return _arithmetic_exception_instance; }
@@ -314,8 +315,6 @@
   static inline oop   allocation_context_notification_obj();
   static inline void  set_allocation_context_notification_obj(oop obj);
 
-  static Method*      throw_illegal_access_error()    { return _throw_illegal_access_error; }
-
   static Array<int>*       the_empty_int_array()    { return _the_empty_int_array; }
   static Array<u2>*        the_empty_short_array()  { return _the_empty_short_array; }
   static Array<Method*>* the_empty_method_array() { return _the_empty_method_array; }
@@ -376,6 +375,8 @@
   static void     set_narrow_ptrs_base(address a)         { _narrow_ptrs_base = a; }
   static address  narrow_ptrs_base()                      { return _narrow_ptrs_base; }
 
+  static void     print_compressed_oops_mode();
+
   // this is set in vm_version on sparc (and then reset in universe afaict)
   static void     set_narrow_oop_shift(int shift)         {
     _narrow_oop._shift   = shift;
@@ -425,6 +426,22 @@
   static void init_self_patching_vtbl_list(void** list, int count);
 
   // Debugging
+  enum VERIFY_FLAGS {
+    Verify_Threads = 1,
+    Verify_Heap = 2,
+    Verify_SymbolTable = 4,
+    Verify_StringTable = 8,
+    Verify_CodeCache = 16,
+    Verify_SystemDictionary = 32,
+    Verify_ClassLoaderDataGraph = 64,
+    Verify_MetaspaceAux = 128,
+    Verify_JNIHandles = 256,
+    Verify_CHeap = 512,
+    Verify_CodeCacheOops = 1024,
+    Verify_All = -1
+  };
+  static void initialize_verify_flags();
+  static bool should_verify_subset(uint subset);
   static bool verify_in_progress() { return _verify_in_progress; }
   static void verify(VerifyOption option, const char* prefix, bool silent = VerifySilently);
   static void verify(const char* prefix, bool silent = VerifySilently) {
--- a/src/share/vm/oops/instanceKlass.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/oops/instanceKlass.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/systemDictionary.hpp"
+#include "classfile/systemDictionaryShared.hpp"
 #include "classfile/verifier.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "compiler/compileBroker.hpp"
@@ -706,6 +707,16 @@
 
         // also sets rewritten
         this_oop->rewrite_class(CHECK_false);
+      } else if (this_oop()->is_shared()) {
+        ResourceMark rm(THREAD);
+        char* message_buffer; // res-allocated by check_verification_dependencies
+        Handle loader = this_oop()->class_loader();
+        Handle pd     = this_oop()->protection_domain();
+        bool verified = SystemDictionaryShared::check_verification_dependencies(this_oop(),
+                        loader, pd, &message_buffer, THREAD);
+        if (!verified) {
+          THROW_MSG_(vmSymbols::java_lang_VerifyError(), message_buffer, false);
+        }
       }
 
       // relocate jsrs and link methods after they are all rewritten
@@ -715,7 +726,12 @@
       // methods have been rewritten since rewrite may
       // fabricate new Method*s.
       // also does loader constraint checking
-      if (!this_oop()->is_shared()) {
+      //
+      // Initialize_vtable and initialize_itable need to be rerun for
+      // a shared class if the class is not loaded by the NULL classloader.
+      ClassLoaderData * loader_data = this_oop->class_loader_data();
+      if (!(this_oop()->is_shared() &&
+            loader_data->is_the_null_class_loader_data())) {
         ResourceMark rm(THREAD);
         this_oop->vtable()->initialize_vtable(true, CHECK_false);
         this_oop->itable()->initialize_itable(true, CHECK_false);
@@ -1969,7 +1985,7 @@
 // find a corresponding bucket otherwise there's a bug in the
 // recording of dependecies.
 //
-void InstanceKlass::remove_dependent_nmethod(nmethod* nm) {
+void InstanceKlass::remove_dependent_nmethod(nmethod* nm, bool delete_immediately) {
   assert_locked_or_safepoint(CodeCache_lock);
   nmethodBucket* b = _dependencies;
   nmethodBucket* last = NULL;
@@ -1978,7 +1994,17 @@
       int val = b->decrement();
       guarantee(val >= 0, err_msg("Underflow: %d", val));
       if (val == 0) {
-        set_has_unloaded_dependent(true);
+        if (delete_immediately) {
+          if (last == NULL) {
+            _dependencies = b->next();
+          } else {
+            last->set_next(b->next());
+          }
+          delete b;
+        } else {
+          // The deletion of this entry is deferred until a later, potentially parallel GC phase.
+          set_has_unloaded_dependent(true);
+        }
       }
       return;
     }
@@ -2318,6 +2344,13 @@
 
 #endif // INCLUDE_ALL_GCS
 
+void InstanceKlass::clean_weak_instanceklass_links(BoolObjectClosure* is_alive) {
+  clean_implementors_list(is_alive);
+  clean_method_data(is_alive);
+
+  clean_dependent_nmethods();
+}
+
 void InstanceKlass::clean_implementors_list(BoolObjectClosure* is_alive) {
   assert(class_loader_data()->is_alive(is_alive), "this klass should be live");
   if (is_interface()) {
--- a/src/share/vm/oops/instanceKlass.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/oops/instanceKlass.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -785,7 +785,7 @@
   // maintenance of deoptimization dependencies
   int mark_dependent_nmethods(DepChange& changes);
   void add_dependent_nmethod(nmethod* nm);
-  void remove_dependent_nmethod(nmethod* nm);
+  void remove_dependent_nmethod(nmethod* nm, bool delete_immediately);
 
   // On-stack replacement support
   nmethod* osr_nmethods_head() const         { return _osr_nmethods_head; };
@@ -974,6 +974,7 @@
   void oop_follow_contents(oop obj);
   int  oop_adjust_pointers(oop obj);
 
+  void clean_weak_instanceklass_links(BoolObjectClosure* is_alive);
   void clean_implementors_list(BoolObjectClosure* is_alive);
   void clean_method_data(BoolObjectClosure* is_alive);
   void clean_dependent_nmethods();
--- a/src/share/vm/oops/klass.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/oops/klass.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -454,8 +454,7 @@
     // Clean the implementors list and method data.
     if (clean_alive_klasses && current->oop_is_instance()) {
       InstanceKlass* ik = InstanceKlass::cast(current);
-      ik->clean_implementors_list(is_alive);
-      ik->clean_method_data(is_alive);
+      ik->clean_weak_instanceklass_links(is_alive);
     }
   }
 }
--- a/src/share/vm/oops/klassVtable.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/oops/klassVtable.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -27,6 +27,7 @@
 #include "classfile/vmSymbols.hpp"
 #include "gc_implementation/shared/markSweep.inline.hpp"
 #include "memory/gcLocker.hpp"
+#include "memory/metaspaceShared.hpp"
 #include "memory/resourceArea.hpp"
 #include "memory/universe.inline.hpp"
 #include "oops/instanceKlass.hpp"
@@ -47,6 +48,10 @@
   return (InstanceKlass*)k;
 }
 
+bool klassVtable::is_preinitialized_vtable() {
+  return _klass->is_shared() && !MetaspaceShared::remapped_readwrite();
+}
+
 
 // this function computes the vtable size (including the size needed for miranda
 // methods) and the number of miranda methods in this class.
@@ -128,6 +133,12 @@
 int klassVtable::initialize_from_super(KlassHandle super) {
   if (super.is_null()) {
     return 0;
+  } else if (is_preinitialized_vtable()) {
+    // A shared class' vtable is preinitialized at dump time. No need to copy
+    // methods from super class for shared class, as that was already done
+    // during archiving time. However, if Jvmti has redefined a class,
+    // copy super class's vtable in case the super class has changed.
+    return super->vtable()->length();
   } else {
     // copy methods from superKlass
     // can't inherit from array class, so must be InstanceKlass
@@ -157,6 +168,8 @@
   KlassHandle super (THREAD, klass()->java_super());
   int nofNewEntries = 0;
 
+  bool is_shared = _klass->is_shared();
+
   if (PrintVtables && !klass()->oop_is_array()) {
     ResourceMark rm(THREAD);
     tty->print_cr("Initializing: %s", _klass->name()->as_C_string());
@@ -169,6 +182,7 @@
 #endif
 
   if (Universe::is_bootstrapping()) {
+    assert(!is_shared, "sanity");
     // just clear everything
     for (int i = 0; i < _length; i++) table()[i].clear();
     return;
@@ -208,6 +222,7 @@
       if (len > 0) {
         Array<int>* def_vtable_indices = NULL;
         if ((def_vtable_indices = ik()->default_vtable_indices()) == NULL) {
+          assert(!is_shared, "shared class def_vtable_indices does not exist");
           def_vtable_indices = ik()->create_new_default_vtable_indices(len, CHECK);
         } else {
           assert(def_vtable_indices->length() == len, "reinit vtable len?");
@@ -222,7 +237,15 @@
           // needs new entry
           if (needs_new_entry) {
             put_method_at(mh(), initialized);
-            def_vtable_indices->at_put(i, initialized); //set vtable index
+            if (is_preinitialized_vtable()) {
+              // At runtime initialize_vtable is rerun for a shared class
+              // (loaded by the non-boot loader) as part of link_class_impl().
+              // The dumptime vtable index should be the same as the runtime index.
+              assert(def_vtable_indices->at(i) == initialized,
+                     "dump time vtable index is different from runtime index");
+            } else {
+              def_vtable_indices->at_put(i, initialized); //set vtable index
+            }
             initialized++;
           }
         }
@@ -365,7 +388,8 @@
   }
 
   // we need a new entry if there is no superclass
-  if (klass->super() == NULL) {
+  Klass* super = klass->super();
+  if (super == NULL) {
     return allocate_new;
   }
 
@@ -394,7 +418,15 @@
 
   Symbol* target_classname = target_klass->name();
   for(int i = 0; i < super_vtable_len; i++) {
-    Method* super_method = method_at(i);
+    Method* super_method;
+    if (is_preinitialized_vtable()) {
+      // If this is a shared class, the vtable is already in the final state (fully
+      // initialized). Need to look at the super's vtable.
+      klassVtable* superVtable = super->vtable();
+      super_method = superVtable->method_at(i);
+    } else {
+      super_method = method_at(i);
+    }
     // Check if method name matches
     if (super_method->name() == name && super_method->signature() == signature) {
 
@@ -458,7 +490,15 @@
          target_method()->set_vtable_index(i);
        } else {
          if (def_vtable_indices != NULL) {
-           def_vtable_indices->at_put(default_index, i);
+           if (is_preinitialized_vtable()) {
+             // At runtime initialize_vtable is rerun as part of link_class_impl()
+             // for a shared class loaded by the non-boot loader.
+             // The dumptime vtable index should be the same as the runtime index.
+             assert(def_vtable_indices->at(default_index) == i,
+                    "dump time vtable index is different from runtime index");
+           } else {
+             def_vtable_indices->at_put(default_index, i);
+           }
          }
          assert(super_method->is_default_method() || super_method->is_overpass()
                 || super_method->is_abstract(), "default override error");
@@ -523,24 +563,33 @@
 }
 
 void klassVtable::put_method_at(Method* m, int index) {
+  if (is_preinitialized_vtable()) {
+    // At runtime initialize_vtable is rerun as part of link_class_impl()
+    // for shared class loaded by the non-boot loader to obtain the loader
+    // constraints based on the runtime classloaders' context. The dumptime
+    // method at the vtable index should be the same as the runtime method.
+    assert(table()[index].method() == m,
+           "archived method is different from the runtime method");
+  } else {
 #ifndef PRODUCT
-  if (PrintVtables && Verbose) {
-    ResourceMark rm;
-    const char* sig = (m != NULL) ? m->name_and_sig_as_C_string() : "<NULL>";
-    tty->print("adding %s at index %d, flags: ", sig, index);
-    if (m != NULL) {
-      m->access_flags().print_on(tty);
-      if (m->is_default_method()) {
-        tty->print("default ");
+    if (PrintVtables && Verbose) {
+      ResourceMark rm;
+      const char* sig = (m != NULL) ? m->name_and_sig_as_C_string() : "<NULL>";
+      tty->print("adding %s at index %d, flags: ", sig, index);
+      if (m != NULL) {
+        m->access_flags().print_on(tty);
+        if (m->is_default_method()) {
+          tty->print("default ");
+        }
+        if (m->is_overpass()) {
+          tty->print("overpass");
+        }
       }
-      if (m->is_overpass()) {
-        tty->print("overpass");
-      }
+      tty->cr();
     }
-    tty->cr();
+#endif
+    table()[index].set(m);
   }
-#endif
-  table()[index].set(m);
 }
 
 // Find out if a method "m" with superclass "super", loader "classloader" and
@@ -971,7 +1020,15 @@
 void itableMethodEntry::initialize(Method* m) {
   if (m == NULL) return;
 
-  _method = m;
+  if (MetaspaceShared::is_in_shared_space((void*)&_method) &&
+     !MetaspaceShared::remapped_readwrite()) {
+    // At runtime initialize_itable is rerun as part of link_class_impl()
+    // for a shared class loaded by the non-boot loader.
+    // The dumptime itable method entry should be the same as the runtime entry.
+    assert(_method == m, "sanity");
+  } else {
+    _method = m;
+  }
 }
 
 klassItable::klassItable(instanceKlassHandle klass) {
@@ -1081,7 +1138,11 @@
         tty->cr();
       }
       if (!m->has_vtable_index()) {
-        assert(m->vtable_index() == Method::pending_itable_index, "set by initialize_vtable");
+        // A shared method could have an initialized itable_index that
+        // is < 0.
+        assert(m->vtable_index() == Method::pending_itable_index ||
+               m->is_shared(),
+               "set by initialize_vtable");
         m->set_itable_index(ime_num);
         // Progress to next itable entry
         ime_num++;
@@ -1277,7 +1338,6 @@
 }
 #endif // INCLUDE_JVMTI
 
-
 // Setup
 class InterfaceVisiterClosure : public StackObj {
  public:
--- a/src/share/vm/oops/klassVtable.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/oops/klassVtable.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -142,6 +142,19 @@
       Array<Klass*>* local_interfaces);
   void verify_against(outputStream* st, klassVtable* vt, int index);
   inline InstanceKlass* ik() const;
+  // When loading a class from CDS archive at run time, and no class redefintion
+  // has happened, it is expected that the class's itable/vtables are
+  // laid out exactly the same way as they had been during dump time.
+  // Therefore, in klassVtable::initialize_[iv]table, we do not layout the
+  // tables again. Instead, we only rerun the process to create/check
+  // the class loader constraints. In non-product builds, we add asserts to
+  // guarantee that the table's layout would be the same as at dump time.
+  //
+  // If JVMTI redefines any class, the read-only shared memory are remapped
+  // as read-write. A shared class' vtable/itable are re-initialized and
+  // might have different layout due to class redefinition of the shared class
+  // or its super types.
+  bool is_preinitialized_vtable();
 };
 
 
--- a/src/share/vm/oops/method.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/oops/method.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -36,6 +36,7 @@
 #include "memory/generation.hpp"
 #include "memory/heapInspection.hpp"
 #include "memory/metadataFactory.hpp"
+#include "memory/metaspaceShared.hpp"
 #include "memory/oopFactory.hpp"
 #include "oops/constMethod.hpp"
 #include "oops/methodData.hpp"
@@ -305,6 +306,33 @@
   unlink_method();
 }
 
+void Method::set_vtable_index(int index) {
+  if (is_shared() && !MetaspaceShared::remapped_readwrite()) {
+    // At runtime initialize_vtable is rerun as part of link_class_impl()
+    // for a shared class loaded by the non-boot loader to obtain the loader
+    // constraints based on the runtime classloaders' context.
+    return; // don't write into the shared class
+  } else {
+    _vtable_index = index;
+  }
+}
+
+void Method::set_itable_index(int index) {
+  if (is_shared() && !MetaspaceShared::remapped_readwrite()) {
+    // At runtime initialize_itable is rerun as part of link_class_impl()
+    // for a shared class loaded by the non-boot loader to obtain the loader
+    // constraints based on the runtime classloaders' context. The dumptime
+    // itable index should be the same as the runtime index.
+    assert(_vtable_index == itable_index_max - index,
+           "archived itable index is different from runtime index");
+    return; // don’t write into the shared class
+  } else {
+    _vtable_index = itable_index_max - index;
+  }
+  assert(valid_itable_index(), "");
+}
+
+
 
 bool Method::was_executed_more_than(int n) {
   // Invocation counter is reset when the Method* is compiled.
--- a/src/share/vm/oops/method.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/oops/method.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -471,12 +471,12 @@
   DEBUG_ONLY(bool valid_vtable_index() const     { return _vtable_index >= nonvirtual_vtable_index; })
   bool has_vtable_index() const                  { return _vtable_index >= 0; }
   int  vtable_index() const                      { return _vtable_index; }
-  void set_vtable_index(int index)               { _vtable_index = index; }
+  void set_vtable_index(int index);
   DEBUG_ONLY(bool valid_itable_index() const     { return _vtable_index <= pending_itable_index; })
   bool has_itable_index() const                  { return _vtable_index <= itable_index_max; }
   int  itable_index() const                      { assert(valid_itable_index(), "");
                                                    return itable_index_max - _vtable_index; }
-  void set_itable_index(int index)               { _vtable_index = itable_index_max - index; assert(valid_itable_index(), ""); }
+  void set_itable_index(int index);
 
   // interpreter entry
   address interpreter_entry() const              { return _i2i_entry; }
--- a/src/share/vm/opto/c2_globals.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -208,6 +208,9 @@
   notproduct(bool, TraceProfileTripCount, false,                            \
           "Trace profile loop trip count information")                      \
                                                                             \
+  product(bool, UseCountedLoopSafepoints, false,                            \
+          "Force counted loops to keep a safepoint")                        \
+                                                                            \
   product(bool, UseLoopPredicate, true,                                     \
           "Generate a predicate to select fast/slow loop versions")         \
                                                                             \
@@ -662,6 +665,18 @@
   product(bool, UseMultiplyToLenIntrinsic, false,                           \
           "Enables intrinsification of BigInteger.multiplyToLen()")         \
                                                                             \
+  product(bool, UseSquareToLenIntrinsic, false,                             \
+          "Enables intrinsification of BigInteger.squareToLen()")           \
+                                                                            \
+  product(bool, UseMulAddIntrinsic, false,                                  \
+          "Enables intrinsification of BigInteger.mulAdd()")                \
+                                                                            \
+  product(bool, UseMontgomeryMultiplyIntrinsic, false,                      \
+          "Enables intrinsification of BigInteger.montgomeryMultiply()")    \
+                                                                            \
+  product(bool, UseMontgomerySquareIntrinsic, false,                        \
+          "Enables intrinsification of BigInteger.montgomerySquare()")      \
+                                                                            \
   product(bool, UseTypeSpeculation, true,                                   \
           "Speculatively propagate types from profiles")                    \
                                                                             \
@@ -672,6 +687,9 @@
   product_pd(bool, TrapBasedRangeChecks,                                    \
           "Generate code for range checks that uses a cmp and trap "        \
           "instruction raising SIGTRAP. Used on PPC64.")                    \
+                                                                            \
+  develop(bool, RenumberLiveNodes, true,                                    \
+          "Renumber live nodes")                                            \
 
 C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
 
--- a/src/share/vm/opto/c2compiler.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/c2compiler.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -51,6 +51,9 @@
 const char* C2Compiler::retry_no_escape_analysis() {
   return "retry without escape analysis";
 }
+const char* C2Compiler::retry_class_loading_during_parsing() {
+  return "retry class loading during parsing";
+}
 bool C2Compiler::init_c2_runtime() {
 
   // Check assumptions used while running ADLC
@@ -117,6 +120,10 @@
 
     // Check result and retry if appropriate.
     if (C.failure_reason() != NULL) {
+      if (C.failure_reason_is(retry_class_loading_during_parsing())) {
+        env->record_failure(C.failure_reason());
+        continue;  // retry
+      }
       if (C.failure_reason_is(retry_no_subsuming_loads())) {
         assert(subsume_loads, "must make progress");
         subsume_loads = false;
--- a/src/share/vm/opto/c2compiler.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/c2compiler.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -49,6 +49,7 @@
   // sentinel value used to trigger backtracking in compile_method().
   static const char* retry_no_subsuming_loads();
   static const char* retry_no_escape_analysis();
+  static const char* retry_class_loading_during_parsing();
 
   // Print compilation timers and statistics
   void print_timers();
--- a/src/share/vm/opto/callGenerator.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/callGenerator.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -658,7 +658,7 @@
                                            &exact_receiver);
 
   SafePointNode* slow_map = NULL;
-  JVMState* slow_jvms;
+  JVMState* slow_jvms = NULL;
   { PreserveJVMState pjvms(&kit);
     kit.set_control(slow_ctl);
     if (!kit.stopped()) {
@@ -829,17 +829,18 @@
           }
         }
         // Cast reference arguments to its type.
-        for (int i = 0; i < signature->count(); i++) {
+        for (int i = 0, j = 0; i < signature->count(); i++) {
           ciType* t = signature->type_at(i);
           if (t->is_klass()) {
-            Node* arg = kit.argument(receiver_skip + i);
+            Node* arg = kit.argument(receiver_skip + j);
             const TypeOopPtr* arg_type = arg->bottom_type()->isa_oopptr();
             const Type*       sig_type = TypeOopPtr::make_from_klass(t->as_klass());
             if (arg_type != NULL && !arg_type->higher_equal(sig_type)) {
               Node* cast_obj = gvn.transform(new (C) CheckCastPPNode(kit.control(), arg, sig_type));
-              kit.set_argument(receiver_skip + i, cast_obj);
+              kit.set_argument(receiver_skip + j, cast_obj);
             }
           }
+          j += t->size();  // long and double take two slots
         }
 
         // Try to get the most accurate receiver type
--- a/src/share/vm/opto/cfgnode.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/cfgnode.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -973,7 +973,7 @@
 #ifdef ASSERT
   // The following logic has been moved into TypeOopPtr::filter.
   const Type* jt = t->join_speculative(_type);
-  if( jt->empty() ) {           // Emptied out???
+  if (jt->empty()) {           // Emptied out???
 
     // Check for evil case of 't' being a class and '_type' expecting an
     // interface.  This can happen because the bytecodes do not contain
@@ -984,14 +984,21 @@
     // be 'I' or 'j/l/O'.  Thus we'll pick 'j/l/O'.  If this then flows
     // into a Phi which "knows" it's an Interface type we'll have to
     // uplift the type.
-    if( !t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface() )
-      { assert(ft == _type, ""); } // Uplift to interface
-    else if( !t->empty() && ttkp && ttkp->is_loaded() && ttkp->klass()->is_interface() )
-      { assert(ft == _type, ""); } // Uplift to interface
-    // Otherwise it's something stupid like non-overlapping int ranges
-    // found on dying counted loops.
-    else
-      { assert(ft == Type::TOP, ""); } // Canonical empty value
+    if (!t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface()) {
+      assert(ft == _type, ""); // Uplift to interface
+    } else if (!t->empty() && ttkp && ttkp->is_loaded() && ttkp->klass()->is_interface()) {
+      assert(ft == _type, ""); // Uplift to interface
+    } else {
+      // We also have to handle 'evil cases' of interface- vs. class-arrays
+      Type::get_arrays_base_elements(jt, _type, NULL, &ttip);
+      if (!t->empty() && ttip != NULL && ttip->is_loaded() && ttip->klass()->is_interface()) {
+          assert(ft == _type, "");   // Uplift to array of interface
+      } else {
+        // Otherwise it's something stupid like non-overlapping int ranges
+        // found on dying counted loops.
+        assert(ft == Type::TOP, ""); // Canonical empty value
+      }
+    }
   }
 
   else {
--- a/src/share/vm/opto/compile.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/compile.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -414,6 +414,13 @@
       remove_macro_node(n);
     }
   }
+  // Remove useless CastII nodes with range check dependency
+  for (int i = range_check_cast_count() - 1; i >= 0; i--) {
+    Node* cast = range_check_cast_node(i);
+    if (!useful.member(cast)) {
+      remove_range_check_cast(cast);
+    }
+  }
   // Remove useless expensive node
   for (int i = C->expensive_count()-1; i >= 0; i--) {
     Node* n = C->expensive_node(i);
@@ -786,7 +793,9 @@
     }
     JVMState* jvms = build_start_state(start(), tf());
     if ((jvms = cg->generate(jvms)) == NULL) {
-      record_method_not_compilable("method parse failed");
+      if (!failure_reason_is(C2Compiler::retry_class_loading_during_parsing())) {
+        record_method_not_compilable("method parse failed");
+      }
       return;
     }
     GraphKit kit(jvms);
@@ -1150,6 +1159,7 @@
   _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
+  _range_check_casts = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   register_library_intrinsics();
 }
 
@@ -1878,6 +1888,22 @@
   assert(predicate_count()==0, "should be clean!");
 }
 
+void Compile::add_range_check_cast(Node* n) {
+  assert(n->isa_CastII()->has_range_check(), "CastII should have range check dependency");
+  assert(!_range_check_casts->contains(n), "duplicate entry in range check casts");
+  _range_check_casts->append(n);
+}
+
+// Remove all range check dependent CastIINodes.
+void Compile::remove_range_check_casts(PhaseIterGVN &igvn) {
+  for (int i = range_check_cast_count(); i > 0; i--) {
+    Node* cast = range_check_cast_node(i-1);
+    assert(cast->isa_CastII()->has_range_check(), "CastII should have range check dependency");
+    igvn.replace_node(cast, cast->in(1));
+  }
+  assert(range_check_cast_count() == 0, "should be empty");
+}
+
 // StringOpts and late inlining of string methods
 void Compile::inline_string_calls(bool parse_time) {
   {
@@ -2095,6 +2121,20 @@
   // so keep only the actual candidates for optimizations.
   cleanup_expensive_nodes(igvn);
 
+  if (!failing() && RenumberLiveNodes && live_nodes() + NodeLimitFudgeFactor < unique()) {
+    NOT_PRODUCT(Compile::TracePhase t2("", &_t_renumberLive, TimeCompiler);)
+    initial_gvn()->replace_with(&igvn);
+    for_igvn()->clear();
+    Unique_Node_List new_worklist(C->comp_arena());
+    {
+      ResourceMark rm;
+      PhaseRenumberLive prl = PhaseRenumberLive(initial_gvn(), for_igvn(), &new_worklist);
+    }
+    set_for_igvn(&new_worklist);
+    igvn = PhaseIterGVN(initial_gvn());
+    igvn.optimize();
+  }
+
   // Perform escape analysis
   if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
     if (has_loops()) {
@@ -2206,6 +2246,12 @@
     PhaseIdealLoop::verify(igvn);
   }
 
+  if (range_check_cast_count() > 0) {
+    // No more loop optimizations. Remove all range check dependent CastIINodes.
+    C->remove_range_check_casts(igvn);
+    igvn.optimize();
+  }
+
   {
     NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
     PhaseMacroExpand  mex(igvn);
@@ -2975,6 +3021,16 @@
 
 #endif
 
+#ifdef ASSERT
+  case Op_CastII:
+    // Verify that all range check dependent CastII nodes were removed.
+    if (n->isa_CastII()->has_range_check()) {
+      n->dump(3);
+      assert(false, "Range check dependent CastII node was not removed");
+    }
+    break;
+#endif
+
   case Op_ModI:
     if (UseDivMod) {
       // Check if a%b and a/b both exist
@@ -3660,7 +3716,7 @@
   MacroAssembler _masm(&cb);
   for (int i = 0; i < _constants.length(); i++) {
     Constant con = _constants.at(i);
-    address constant_addr;
+    address constant_addr = NULL;
     switch (con.type()) {
     case T_LONG:   constant_addr = _masm.long_constant(  con.get_jlong()  ); break;
     case T_FLOAT:  constant_addr = _masm.float_constant( con.get_jfloat() ); break;
@@ -4012,6 +4068,24 @@
   }
 }
 
+// Convert integer value to a narrowed long type dependent on ctrl (for example, a range check)
+Node* Compile::constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl) {
+  if (ctrl != NULL) {
+    // Express control dependency by a CastII node with a narrow type.
+    value = new (phase->C) CastIINode(value, itype, false, true /* range check dependency */);
+    // Make the CastII node dependent on the control input to prevent the narrowed ConvI2L
+    // node from floating above the range check during loop optimizations. Otherwise, the
+    // ConvI2L node may be eliminated independently of the range check, causing the data path
+    // to become TOP while the control path is still there (although it's unreachable).
+    value->set_req(0, ctrl);
+    // Save CastII node to remove it after loop optimizations.
+    phase->C->add_range_check_cast(value);
+    value = phase->transform(value);
+  }
+  const TypeLong* ltype = TypeLong::make(itype->_lo, itype->_hi, itype->_widen);
+  return phase->transform(new (phase->C) ConvI2LNode(value, ltype));
+}
+
 // Auxiliary method to support randomized stressing/fuzzing.
 //
 // This method can be called the arbitrary number of times, with current count
--- a/src/share/vm/opto/compile.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/compile.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -75,6 +75,7 @@
 class JVMState;
 class Type;
 class TypeData;
+class TypeInt;
 class TypePtr;
 class TypeOopPtr;
 class TypeFunc;
@@ -334,6 +335,7 @@
   GrowableArray<Node*>* _macro_nodes;           // List of nodes which need to be expanded before matching.
   GrowableArray<Node*>* _predicate_opaqs;       // List of Opaque1 nodes for the loop predicates.
   GrowableArray<Node*>* _expensive_nodes;       // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
+  GrowableArray<Node*>* _range_check_casts;     // List of CastII nodes with a range check dependency
   ConnectionGraph*      _congraph;
 #ifndef PRODUCT
   IdealGraphPrinter*    _printer;
@@ -669,7 +671,7 @@
   void set_congraph(ConnectionGraph* congraph)  { _congraph = congraph;}
   void add_macro_node(Node * n) {
     //assert(n->is_macro(), "must be a macro node");
-    assert(!_macro_nodes->contains(n), " duplicate entry in expand list");
+    assert(!_macro_nodes->contains(n), "duplicate entry in expand list");
     _macro_nodes->append(n);
   }
   void remove_macro_node(Node * n) {
@@ -689,10 +691,23 @@
     }
   }
   void add_predicate_opaq(Node * n) {
-    assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1");
+    assert(!_predicate_opaqs->contains(n), "duplicate entry in predicate opaque1");
     assert(_macro_nodes->contains(n), "should have already been in macro list");
     _predicate_opaqs->append(n);
   }
+
+  // Range check dependent CastII nodes that can be removed after loop optimizations
+  void add_range_check_cast(Node* n);
+  void remove_range_check_cast(Node* n) {
+    if (_range_check_casts->contains(n)) {
+      _range_check_casts->remove(n);
+    }
+  }
+  Node* range_check_cast_node(int idx) const { return _range_check_casts->at(idx);  }
+  int   range_check_cast_count()       const { return _range_check_casts->length(); }
+  // Remove all range check dependent CastIINodes.
+  void  remove_range_check_casts(PhaseIterGVN &igvn);
+
   // remove the opaque nodes that protect the predicates so that the unused checks and
   // uncommon traps will be eliminated from the graph.
   void cleanup_loop_predicates(PhaseIterGVN &igvn);
@@ -1201,6 +1216,9 @@
   // Definitions of pd methods
   static void pd_compiler2_init();
 
+  // Convert integer value to a narrowed long type dependent on ctrl (for example, a range check)
+  static Node* constrained_convI2L(PhaseGVN* phase, Node* value, const TypeInt* itype, Node* ctrl);
+
   // Auxiliary method for randomized fuzzing/stressing
   static bool randomized_select(int count);
 };
--- a/src/share/vm/opto/connode.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/connode.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -535,6 +535,9 @@
   if (_carry_dependency) {
     st->print(" carry dependency");
   }
+  if (_range_check_dependency) {
+    st->print(" range check dependency");
+  }
 }
 #endif
 
@@ -994,7 +997,8 @@
   }
 
 #ifdef _LP64
-  // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) ,
+  // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) or
+  // ConvI2L(CastII(AddI(x, y))) to AddL(ConvI2L(CastII(x)), ConvI2L(CastII(y))),
   // but only if x and y have subranges that cannot cause 32-bit overflow,
   // under the assumption that x+y is in my own subrange this->type().
 
@@ -1018,6 +1022,13 @@
 
   Node* z = in(1);
   int op = z->Opcode();
+  Node* ctrl = NULL;
+  if (op == Op_CastII && z->as_CastII()->has_range_check()) {
+    // Skip CastII node but save control dependency
+    ctrl = z->in(0);
+    z = z->in(1);
+    op = z->Opcode();
+  }
   if (op == Op_AddI || op == Op_SubI) {
     Node* x = z->in(1);
     Node* y = z->in(2);
@@ -1075,9 +1086,10 @@
       rylo = -ryhi;
       ryhi = -rylo0;
     }
-
-    Node* cx = phase->transform( new (phase->C) ConvI2LNode(x, TypeLong::make(rxlo, rxhi, widen)) );
-    Node* cy = phase->transform( new (phase->C) ConvI2LNode(y, TypeLong::make(rylo, ryhi, widen)) );
+    assert(rxlo == (int)rxlo && rxhi == (int)rxhi, "x should not overflow");
+    assert(rylo == (int)rylo && ryhi == (int)ryhi, "y should not overflow");
+    Node* cx = phase->C->constrained_convI2L(phase, x, TypeInt::make(rxlo, rxhi, widen), ctrl);
+    Node* cy = phase->C->constrained_convI2L(phase, y, TypeInt::make(rylo, ryhi, widen), ctrl);
     switch (op) {
     case Op_AddI:  return new (phase->C) AddLNode(cx, cy);
     case Op_SubI:  return new (phase->C) SubLNode(cx, cy);
--- a/src/share/vm/opto/connode.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/connode.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -244,19 +244,31 @@
   private:
   // Can this node be removed post CCP or does it carry a required dependency?
   const bool _carry_dependency;
+  // Is this node dependent on a range check?
+  const bool _range_check_dependency;
 
   protected:
   virtual uint cmp( const Node &n ) const;
   virtual uint size_of() const;
 
 public:
-  CastIINode(Node *n, const Type *t, bool carry_dependency = false)
-    : ConstraintCastNode(n,t), _carry_dependency(carry_dependency) {}
+  CastIINode(Node *n, const Type *t, bool carry_dependency = false, bool range_check_dependency = false)
+    : ConstraintCastNode(n,t), _carry_dependency(carry_dependency), _range_check_dependency(range_check_dependency) {
+    init_class_id(Class_CastII);
+  }
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Identity( PhaseTransform *phase );
   virtual const Type *Value( PhaseTransform *phase ) const;
   virtual Node *Ideal_DU_postCCP( PhaseCCP * );
+  const bool has_range_check() {
+ #ifdef _LP64
+     return _range_check_dependency;
+ #else
+     assert(!_range_check_dependency, "Should not have range check dependency");
+     return false;
+ #endif
+   }
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
 #endif
--- a/src/share/vm/opto/escape.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/escape.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -958,8 +958,12 @@
                   strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0)
-                  ))) {
+                  strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
+                 ))) {
             call->dump();
             fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
           }
--- a/src/share/vm/opto/generateOptoStub.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/generateOptoStub.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -273,7 +273,7 @@
 
   //-----------------------------
   // If this is a normal subroutine return, issue the return and be done.
-  Node *ret;
+  Node *ret = NULL;
   switch( is_fancy_jump ) {
   case 0:                       // Make a return instruction
     // Return to caller, free any space for return address
--- a/src/share/vm/opto/graphKit.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/graphKit.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1645,7 +1645,7 @@
 
 //-------------------------array_element_address-------------------------
 Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt,
-                                      const TypeInt* sizetype) {
+                                      const TypeInt* sizetype, Node* ctrl) {
   uint shift  = exact_log2(type2aelembytes(elembt));
   uint header = arrayOopDesc::base_offset_in_bytes(elembt);
 
@@ -1670,9 +1670,9 @@
   // number.  (The prior range check has ensured this.)
   // This assertion is used by ConvI2LNode::Ideal.
   int index_max = max_jint - 1;  // array size is max_jint, index is one less
-  if (sizetype != NULL)  index_max = sizetype->_hi - 1;
-  const TypeLong* lidxtype = TypeLong::make(CONST64(0), index_max, Type::WidenMax);
-  idx = _gvn.transform( new (C) ConvI2LNode(idx, lidxtype) );
+  if (sizetype != NULL) index_max = sizetype->_hi - 1;
+  const TypeInt* iidxtype = TypeInt::make(0, index_max, Type::WidenMax);
+  idx = C->constrained_convI2L(&_gvn, idx, iidxtype, ctrl);
 #endif
   Node* scale = _gvn.transform( new (C) LShiftXNode(idx, intcon(shift)) );
   return basic_plus_adr(ary, base, scale);
@@ -3491,10 +3491,6 @@
 
   Node* initial_slow_cmp  = _gvn.transform( new (C) CmpUNode( length, intcon( fast_size_limit ) ) );
   Node* initial_slow_test = _gvn.transform( new (C) BoolNode( initial_slow_cmp, BoolTest::gt ) );
-  if (initial_slow_test->is_Bool()) {
-    // Hide it behind a CMoveI, or else PhaseIdealLoop::split_up will get sick.
-    initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn);
-  }
 
   // --- Size Computation ---
   // array_size = round_to_heap(array_header + (length << elem_shift));
@@ -3540,13 +3536,35 @@
   Node* lengthx = ConvI2X(length);
   Node* headerx = ConvI2X(header_size);
 #ifdef _LP64
-  { const TypeLong* tllen = _gvn.find_long_type(lengthx);
-    if (tllen != NULL && tllen->_lo < 0) {
+  { const TypeInt* tilen = _gvn.find_int_type(length);
+    if (tilen != NULL && tilen->_lo < 0) {
       // Add a manual constraint to a positive range.  Cf. array_element_address.
-      jlong size_max = arrayOopDesc::max_array_length(T_BYTE);
-      if (size_max > tllen->_hi)  size_max = tllen->_hi;
-      const TypeLong* tlcon = TypeLong::make(CONST64(0), size_max, Type::WidenMin);
-      lengthx = _gvn.transform( new (C) ConvI2LNode(length, tlcon));
+      jlong size_max = fast_size_limit;
+      if (size_max > tilen->_hi)  size_max = tilen->_hi;
+      const TypeInt* tlcon = TypeInt::make(0, size_max, Type::WidenMin);
+
+      // Only do a narrow I2L conversion if the range check passed.
+      IfNode* iff = new (C) IfNode(control(), initial_slow_test, PROB_MIN, COUNT_UNKNOWN);
+      _gvn.transform(iff);
+      RegionNode* region = new (C) RegionNode(3);
+      _gvn.set_type(region, Type::CONTROL);
+      lengthx = new (C) PhiNode(region, TypeLong::LONG);
+      _gvn.set_type(lengthx, TypeLong::LONG);
+
+      // Range check passed. Use ConvI2L node with narrow type.
+      Node* passed = IfFalse(iff);
+      region->init_req(1, passed);
+      // Make I2L conversion control dependent to prevent it from
+      // floating above the range check during loop optimizations.
+      lengthx->init_req(1, C->constrained_convI2L(&_gvn, length, tlcon, passed));
+
+      // Range check failed. Use ConvI2L with wide type because length may be invalid.
+      region->init_req(2, IfTrue(iff));
+      lengthx->init_req(2, ConvI2X(length));
+
+      set_control(region);
+      record_for_igvn(region);
+      record_for_igvn(lengthx);
     }
   }
 #endif
@@ -3577,6 +3595,11 @@
   Node *mem = reset_memory();
   set_all_memory(mem); // Create new memory state
 
+  if (initial_slow_test->is_Bool()) {
+    // Hide it behind a CMoveI, or else PhaseIdealLoop::split_up will get sick.
+    initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn);
+  }
+
   // Create the AllocateArrayNode and its result projections
   AllocateArrayNode* alloc
     = new (C) AllocateArrayNode(C, AllocateArrayNode::alloc_type(TypeInt::INT),
--- a/src/share/vm/opto/graphKit.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/graphKit.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -626,7 +626,9 @@
   // Return addressing for an array element.
   Node* array_element_address(Node* ary, Node* idx, BasicType elembt,
                               // Optional constraint on the array size:
-                              const TypeInt* sizetype = NULL);
+                              const TypeInt* sizetype = NULL,
+                              // Optional control dependency (for example, on range check)
+                              Node* ctrl = NULL);
 
   // Return a load of array element at idx.
   Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype);
--- a/src/share/vm/opto/lcm.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/lcm.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -677,7 +677,7 @@
   block->insert_node(proj, node_cnt++);
 
   // Select the right register save policy.
-  const char * save_policy;
+  const char *save_policy = NULL;
   switch (op) {
     case Op_CallRuntime:
     case Op_CallLeaf:
--- a/src/share/vm/opto/library_call.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/library_call.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -324,6 +324,10 @@
   bool inline_updateBytesCRC32();
   bool inline_updateByteBufferCRC32();
   bool inline_multiplyToLen();
+  bool inline_squareToLen();
+  bool inline_mulAdd();
+  bool inline_montgomeryMultiply();
+  bool inline_montgomerySquare();
 
   bool inline_profileBoolean();
 };
@@ -527,6 +531,21 @@
     if (!UseMultiplyToLenIntrinsic) return NULL;
     break;
 
+  case vmIntrinsics::_squareToLen:
+    if (!UseSquareToLenIntrinsic) return NULL;
+    break;
+
+  case vmIntrinsics::_mulAdd:
+    if (!UseMulAddIntrinsic) return NULL;
+    break;
+
+  case vmIntrinsics::_montgomeryMultiply:
+     if (!UseMontgomeryMultiplyIntrinsic) return NULL;
+    break;
+  case vmIntrinsics::_montgomerySquare:
+     if (!UseMontgomerySquareIntrinsic) return NULL;
+    break;
+
   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     if (!UseAESIntrinsics) return NULL;
@@ -927,6 +946,17 @@
   case vmIntrinsics::_multiplyToLen:
     return inline_multiplyToLen();
 
+  case vmIntrinsics::_squareToLen:
+    return inline_squareToLen();
+
+  case vmIntrinsics::_mulAdd:
+    return inline_mulAdd();
+
+  case vmIntrinsics::_montgomeryMultiply:
+    return inline_montgomeryMultiply();
+  case vmIntrinsics::_montgomerySquare:
+    return inline_montgomerySquare();
+
   case vmIntrinsics::_encodeISOArray:
     return inline_encodeISOArray();
 
@@ -1619,7 +1649,7 @@
 // public static double Math.log10(double)
 bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
   Node* arg = round_double_node(argument(0));
-  Node* n;
+  Node* n = NULL;
   switch (id) {
   case vmIntrinsics::_dabs:   n = new (C) AbsDNode(                arg);  break;
   case vmIntrinsics::_dsqrt:  n = new (C) SqrtDNode(C, control(),  arg);  break;
@@ -2360,7 +2390,7 @@
 // inline long       Long.reverseBytes(long)
 bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
   Node* arg = argument(0);
-  Node* n;
+  Node* n = NULL;
   switch (id) {
   case vmIntrinsics::_numberOfLeadingZeros_i:   n = new (C) CountLeadingZerosINode( arg);  break;
   case vmIntrinsics::_numberOfLeadingZeros_l:   n = new (C) CountLeadingZerosLNode( arg);  break;
@@ -2955,7 +2985,7 @@
 
   // For now, we handle only those cases that actually exist: ints,
   // longs, and Object. Adding others should be straightforward.
-  Node* load_store;
+  Node* load_store = NULL;
   switch(type) {
   case T_INT:
     if (kind == LS_xadd) {
@@ -3875,7 +3905,7 @@
   Node* end               = is_copyOfRange? argument(2): argument(1);
   Node* array_type_mirror = is_copyOfRange? argument(3): argument(2);
 
-  Node* newcopy;
+  Node* newcopy = NULL;
 
   // Set the original stack and the reexecute bit for the interpreter to reexecute
   // the bytecode that invokes Arrays.copyOf if deoptimization happens.
@@ -4263,7 +4293,7 @@
 
 bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
   Node* arg = argument(0);
-  Node* result;
+  Node* result = NULL;
 
   switch (id) {
   case vmIntrinsics::_floatToRawIntBits:    result = new (C) MoveF2INode(arg);  break;
@@ -5767,11 +5797,12 @@
 
   assert(callee()->signature()->size() == 5, "multiplyToLen has 5 parameters");
 
-  Node* x    = argument(1);
-  Node* xlen = argument(2);
-  Node* y    = argument(3);
-  Node* ylen = argument(4);
-  Node* z    = argument(5);
+  // no receiver because it is a static method
+  Node* x    = argument(0);
+  Node* xlen = argument(1);
+  Node* y    = argument(2);
+  Node* ylen = argument(3);
+  Node* z    = argument(4);
 
   const Type* x_type = x->Value(&_gvn);
   const Type* y_type = y->Value(&_gvn);
@@ -5856,6 +5887,215 @@
   return true;
 }
 
+//-------------inline_squareToLen------------------------------------
+bool LibraryCallKit::inline_squareToLen() {
+  assert(UseSquareToLenIntrinsic, "not implementated on this platform");
+
+  address stubAddr = StubRoutines::squareToLen();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+  const char* stubName = "squareToLen";
+
+  assert(callee()->signature()->size() == 4, "implSquareToLen has 4 parameters");
+
+  Node* x    = argument(0);
+  Node* len  = argument(1);
+  Node* z    = argument(2);
+  Node* zlen = argument(3);
+
+  const Type* x_type = x->Value(&_gvn);
+  const Type* z_type = z->Value(&_gvn);
+  const TypeAryPtr* top_x = x_type->isa_aryptr();
+  const TypeAryPtr* top_z = z_type->isa_aryptr();
+  if (top_x  == NULL || top_x->klass()  == NULL ||
+      top_z  == NULL || top_z->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType x_elem = x_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType z_elem = z_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (x_elem != T_INT || z_elem != T_INT) {
+    return false;
+  }
+
+
+  Node* x_start = array_element_address(x, intcon(0), x_elem);
+  Node* z_start = array_element_address(z, intcon(0), z_elem);
+
+  Node*  call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                  OptoRuntime::squareToLen_Type(),
+                                  stubAddr, stubName, TypePtr::BOTTOM,
+                                  x_start, len, z_start, zlen);
+
+  set_result(z);
+  return true;
+}
+
+//-------------inline_mulAdd------------------------------------------
+bool LibraryCallKit::inline_mulAdd() {
+  assert(UseMulAddIntrinsic, "not implementated on this platform");
+
+  address stubAddr = StubRoutines::mulAdd();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+  const char* stubName = "mulAdd";
+
+  assert(callee()->signature()->size() == 5, "mulAdd has 5 parameters");
+
+  Node* out      = argument(0);
+  Node* in       = argument(1);
+  Node* offset   = argument(2);
+  Node* len      = argument(3);
+  Node* k        = argument(4);
+
+  const Type* out_type = out->Value(&_gvn);
+  const Type* in_type = in->Value(&_gvn);
+  const TypeAryPtr* top_out = out_type->isa_aryptr();
+  const TypeAryPtr* top_in = in_type->isa_aryptr();
+  if (top_out  == NULL || top_out->klass()  == NULL ||
+      top_in == NULL || top_in->klass() == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType out_elem = out_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType in_elem = in_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (out_elem != T_INT || in_elem != T_INT) {
+    return false;
+  }
+
+  Node* outlen = load_array_length(out);
+  Node* new_offset = _gvn.transform(new (C) SubINode(outlen, offset));
+  Node* out_start = array_element_address(out, intcon(0), out_elem);
+  Node* in_start = array_element_address(in, intcon(0), in_elem);
+
+  Node*  call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                  OptoRuntime::mulAdd_Type(),
+                                  stubAddr, stubName, TypePtr::BOTTOM,
+                                  out_start,in_start, new_offset, len, k);
+  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+  return true;
+}
+
+//-------------inline_montgomeryMultiply-----------------------------------
+bool LibraryCallKit::inline_montgomeryMultiply() {
+  address stubAddr = StubRoutines::montgomeryMultiply();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+
+  assert(UseMontgomeryMultiplyIntrinsic, "not implemented on this platform");
+  const char* stubName = "montgomery_square";
+
+  assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters");
+
+  Node* a    = argument(0);
+  Node* b    = argument(1);
+  Node* n    = argument(2);
+  Node* len  = argument(3);
+  Node* inv  = argument(4);
+  Node* m    = argument(6);
+
+  const Type* a_type = a->Value(&_gvn);
+  const TypeAryPtr* top_a = a_type->isa_aryptr();
+  const Type* b_type = b->Value(&_gvn);
+  const TypeAryPtr* top_b = b_type->isa_aryptr();
+  const Type* n_type = a->Value(&_gvn);
+  const TypeAryPtr* top_n = n_type->isa_aryptr();
+  const Type* m_type = a->Value(&_gvn);
+  const TypeAryPtr* top_m = m_type->isa_aryptr();
+  if (top_a  == NULL || top_a->klass()  == NULL ||
+      top_b == NULL || top_b->klass()  == NULL ||
+      top_n == NULL || top_n->klass()  == NULL ||
+      top_m == NULL || top_m->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT || m_elem != T_INT) {
+    return false;
+  }
+
+  // Make the call
+  {
+    Node* a_start = array_element_address(a, intcon(0), a_elem);
+    Node* b_start = array_element_address(b, intcon(0), b_elem);
+    Node* n_start = array_element_address(n, intcon(0), n_elem);
+    Node* m_start = array_element_address(m, intcon(0), m_elem);
+
+    Node* call = make_runtime_call(RC_LEAF,
+                                   OptoRuntime::montgomeryMultiply_Type(),
+                                   stubAddr, stubName, TypePtr::BOTTOM,
+                                   a_start, b_start, n_start, len, inv, top(),
+                                   m_start);
+    set_result(m);
+  }
+
+  return true;
+}
+
+bool LibraryCallKit::inline_montgomerySquare() {
+  address stubAddr = StubRoutines::montgomerySquare();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+
+  assert(UseMontgomerySquareIntrinsic, "not implemented on this platform");
+  const char* stubName = "montgomery_square";
+
+  assert(callee()->signature()->size() == 6, "montgomerySquare has 6 parameters");
+
+  Node* a    = argument(0);
+  Node* n    = argument(1);
+  Node* len  = argument(2);
+  Node* inv  = argument(3);
+  Node* m    = argument(5);
+
+  const Type* a_type = a->Value(&_gvn);
+  const TypeAryPtr* top_a = a_type->isa_aryptr();
+  const Type* n_type = a->Value(&_gvn);
+  const TypeAryPtr* top_n = n_type->isa_aryptr();
+  const Type* m_type = a->Value(&_gvn);
+  const TypeAryPtr* top_m = m_type->isa_aryptr();
+  if (top_a  == NULL || top_a->klass()  == NULL ||
+      top_n == NULL || top_n->klass()  == NULL ||
+      top_m == NULL || top_m->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (a_elem != T_INT || n_elem != T_INT || m_elem != T_INT) {
+    return false;
+  }
+
+  // Make the call
+  {
+    Node* a_start = array_element_address(a, intcon(0), a_elem);
+    Node* n_start = array_element_address(n, intcon(0), n_elem);
+    Node* m_start = array_element_address(m, intcon(0), m_elem);
+
+    Node* call = make_runtime_call(RC_LEAF,
+                                   OptoRuntime::montgomerySquare_Type(),
+                                   stubAddr, stubName, TypePtr::BOTTOM,
+                                   a_start, n_start, len, inv, top(),
+                                   m_start);
+    set_result(m);
+  }
+
+  return true;
+}
+
 
 /**
  * Calculate CRC32 for byte.
@@ -6054,7 +6294,7 @@
 
 //------------------------------inline_aescrypt_Block-----------------------
 bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
-  address stubAddr;
+  address stubAddr = NULL;
   const char *stubName;
   assert(UseAES, "need AES instruction support");
 
@@ -6120,8 +6360,8 @@
 
 //------------------------------inline_cipherBlockChaining_AESCrypt-----------------------
 bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
-  address stubAddr;
-  const char *stubName;
+  address stubAddr = NULL;
+  const char *stubName = NULL;
 
   assert(UseAES, "need AES instruction support");
 
--- a/src/share/vm/opto/loopTransform.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/loopTransform.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1739,6 +1739,12 @@
       }
       return true;
     }
+    if (is_scaled_iv(exp->in(2), iv, p_scale)) {
+      if (p_offset != NULL) {
+        *p_offset = exp->in(1);
+      }
+      return true;
+    }
     if (exp->in(2)->is_Con()) {
       Node* offset2 = NULL;
       if (depth < 2 &&
@@ -2432,7 +2438,7 @@
 
 //=============================================================================
 // Process all the loops in the loop tree and replace any fill
-// patterns with an intrisc version.
+// patterns with an intrinsic version.
 bool PhaseIdealLoop::do_intrinsify_fill() {
   bool changed = false;
   for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
@@ -2530,8 +2536,9 @@
   }
 
   // Make sure the address expression can be handled.  It should be
-  // head->phi * elsize + con.  head->phi might have a ConvI2L.
+  // head->phi * elsize + con.  head->phi might have a ConvI2L(CastII()).
   Node* elements[4];
+  Node* cast = NULL;
   Node* conv = NULL;
   bool found_index = false;
   int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
@@ -2546,6 +2553,12 @@
         conv = value;
         value = value->in(1);
       }
+      if (value->Opcode() == Op_CastII &&
+          value->as_CastII()->has_range_check()) {
+        // Skip range check dependent CastII nodes
+        cast = value;
+        value = value->in(1);
+      }
 #endif
       if (value != head->phi()) {
         msg = "unhandled shift in address";
@@ -2558,9 +2571,16 @@
         }
       }
     } else if (n->Opcode() == Op_ConvI2L && conv == NULL) {
-      if (n->in(1) == head->phi()) {
+      conv = n;
+      n = n->in(1);
+      if (n->Opcode() == Op_CastII &&
+          n->as_CastII()->has_range_check()) {
+        // Skip range check dependent CastII nodes
+        cast = n;
+        n = n->in(1);
+      }
+      if (n == head->phi()) {
         found_index = true;
-        conv = n;
       } else {
         msg = "unhandled input to ConvI2L";
       }
@@ -2619,6 +2639,7 @@
   // Address elements are ok
   if (con)   ok.set(con->_idx);
   if (shift) ok.set(shift->_idx);
+  if (cast)  ok.set(cast->_idx);
   if (conv)  ok.set(conv->_idx);
 
   for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
@@ -2815,7 +2836,7 @@
   // state of the loop.  It's safe in this case to replace it with the
   // result_mem.
   _igvn.replace_node(store->in(MemNode::Memory), result_mem);
-  _igvn.replace_node(exit, result_ctrl);
+  lazy_replace(exit, result_ctrl);
   _igvn.replace_node(store, result_mem);
   // Any uses the increment outside of the loop become the loop limit.
   _igvn.replace_node(head->incr(), head->limit());
--- a/src/share/vm/opto/loopnode.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/loopnode.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -685,14 +685,16 @@
 
   } // LoopLimitCheck
 
-  // Check for SafePoint on backedge and remove
-  Node *sfpt = x->in(LoopNode::LoopBackControl);
-  if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
-    lazy_replace( sfpt, iftrue );
-    if (loop->_safepts != NULL) {
-      loop->_safepts->yank(sfpt);
+  if (!UseCountedLoopSafepoints) {
+    // Check for SafePoint on backedge and remove
+    Node *sfpt = x->in(LoopNode::LoopBackControl);
+    if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
+      lazy_replace( sfpt, iftrue );
+      if (loop->_safepts != NULL) {
+        loop->_safepts->yank(sfpt);
+      }
+      loop->_tail = iftrue;
     }
-    loop->_tail = iftrue;
   }
 
   // Build a canonical trip test.
@@ -748,8 +750,8 @@
     set_loop(iff2, get_loop(iffalse));
 
     // Lazy update of 'get_ctrl' mechanism.
-    lazy_replace_proj( iffalse, iff2 );
-    lazy_replace_proj( iftrue,  ift2 );
+    lazy_replace(iffalse, iff2);
+    lazy_replace(iftrue,  ift2);
 
     // Swap names
     iffalse = iff2;
@@ -781,12 +783,14 @@
   lazy_replace( x, l );
   set_idom(l, init_control, dom_depth(x));
 
-  // Check for immediately preceding SafePoint and remove
-  Node *sfpt2 = le->in(0);
-  if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) {
-    lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
-    if (loop->_safepts != NULL) {
-      loop->_safepts->yank(sfpt2);
+  if (!UseCountedLoopSafepoints) {
+    // Check for immediately preceding SafePoint and remove
+    Node *sfpt2 = le->in(0);
+    if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) {
+      lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
+      if (loop->_safepts != NULL) {
+        loop->_safepts->yank(sfpt2);
+      }
     }
   }
 
@@ -1806,6 +1810,37 @@
   }
 }
 
+void IdealLoopTree::remove_safepoints(PhaseIdealLoop* phase, bool keep_one) {
+  Node* keep = NULL;
+  if (keep_one) {
+    // Look for a safepoint on the idom-path.
+    for (Node* i = tail(); i != _head; i = phase->idom(i)) {
+      if (i->Opcode() == Op_SafePoint && phase->get_loop(i) == this) {
+        keep = i;
+        break; // Found one
+      }
+    }
+  }
+
+  // Don't remove any safepoints if it is requested to keep a single safepoint and
+  // no safepoint was found on idom-path. It is not safe to remove any safepoint
+  // in this case since there's no safepoint dominating all paths in the loop body.
+  bool prune = !keep_one || keep != NULL;
+
+  // Delete other safepoints in this loop.
+  Node_List* sfpts = _safepts;
+  if (prune && sfpts != NULL) {
+    assert(keep == NULL || keep->Opcode() == Op_SafePoint, "not safepoint");
+    for (uint i = 0; i < sfpts->size(); i++) {
+      Node* n = sfpts->at(i);
+      assert(phase->get_loop(n) == this, "");
+      if (n != keep && phase->is_deleteable_safept(n)) {
+        phase->lazy_replace(n, n->in(TypeFunc::Control));
+      }
+    }
+  }
+}
+
 //------------------------------counted_loop-----------------------------------
 // Convert to counted loops where possible
 void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
@@ -1817,42 +1852,23 @@
 
   if (_head->is_CountedLoop() ||
       phase->is_counted_loop(_head, this)) {
-    _has_sfpt = 1;              // Indicate we do not need a safepoint here
-
-    // Look for safepoints to remove.
-    Node_List* sfpts = _safepts;
-    if (sfpts != NULL) {
-      for (uint i = 0; i < sfpts->size(); i++) {
-        Node* n = sfpts->at(i);
-        assert(phase->get_loop(n) == this, "");
-        if (phase->is_deleteable_safept(n)) {
-          phase->lazy_replace(n, n->in(TypeFunc::Control));
-        }
-      }
+
+    if (!UseCountedLoopSafepoints) {
+      // Indicate we do not need a safepoint here
+      _has_sfpt = 1;
     }
 
+    // Remove safepoints
+    bool keep_one_sfpt = !(_has_call || _has_sfpt);
+    remove_safepoints(phase, keep_one_sfpt);
+
     // Look for induction variables
     phase->replace_parallel_iv(this);
 
   } else if (_parent != NULL && !_irreducible) {
-    // Not a counted loop.
-    // Look for a safepoint on the idom-path.
-    Node* sfpt = tail();
-    for (; sfpt != _head; sfpt = phase->idom(sfpt)) {
-      if (sfpt->Opcode() == Op_SafePoint && phase->get_loop(sfpt) == this)
-        break; // Found one
-    }
-    // Delete other safepoints in this loop.
-    Node_List* sfpts = _safepts;
-    if (sfpts != NULL && sfpt != _head && sfpt->Opcode() == Op_SafePoint) {
-      for (uint i = 0; i < sfpts->size(); i++) {
-        Node* n = sfpts->at(i);
-        assert(phase->get_loop(n) == this, "");
-        if (n != sfpt && phase->is_deleteable_safept(n)) {
-          phase->lazy_replace(n, n->in(TypeFunc::Control));
-        }
-      }
-    }
+    // Not a counted loop. Keep one safepoint.
+    bool keep_one_sfpt = true;
+    remove_safepoints(phase, keep_one_sfpt);
   }
 
   // Recursively
@@ -1906,6 +1922,15 @@
     if (cl->is_main_loop()) tty->print(" main");
     if (cl->is_post_loop()) tty->print(" post");
   }
+  if (_has_call) tty->print(" has_call");
+  if (_has_sfpt) tty->print(" has_sfpt");
+  if (_rce_candidate) tty->print(" rce");
+  if (_safepts != NULL && _safepts->size() > 0) {
+    tty->print(" sfpts={"); _safepts->dump_simple(); tty->print(" }");
+  }
+  if (_required_safept != NULL && _required_safept->size() > 0) {
+    tty->print(" req={"); _required_safept->dump_simple(); tty->print(" }");
+  }
   tty->cr();
 }
 
@@ -2303,6 +2328,11 @@
 #endif
 
   if (skip_loop_opts) {
+    // restore major progress flag
+    for (int i = 0; i < old_progress; i++) {
+      C->set_major_progress();
+    }
+
     // Cleanup any modified bits
     _igvn.optimize();
 
--- a/src/share/vm/opto/loopnode.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/loopnode.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -403,6 +403,9 @@
   // encountered.
   void allpaths_check_safepts(VectorSet &visited, Node_List &stack);
 
+  // Remove safepoints from loop. Optionally keeping one.
+  void remove_safepoints(PhaseIdealLoop* phase, bool keep_one);
+
   // Convert to counted loops where possible
   void counted_loop( PhaseIdealLoop *phase );
 
@@ -657,13 +660,18 @@
   }
 
 private:
-  Node *get_ctrl_no_update( Node *i ) const {
+  Node *get_ctrl_no_update_helper(Node *i) const {
+    assert(has_ctrl(i), "should be control, not loop");
+    return (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
+  }
+
+  Node *get_ctrl_no_update(Node *i) const {
     assert( has_ctrl(i), "" );
-    Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
+    Node *n = get_ctrl_no_update_helper(i);
     if (!n->in(0)) {
       // Skip dead CFG nodes
       do {
-        n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
+        n = get_ctrl_no_update_helper(n);
       } while (!n->in(0));
       n = find_non_split_ctrl(n);
     }
@@ -685,22 +693,15 @@
   // from old_node to new_node to support the lazy update.  Reference
   // replaces loop reference, since that is not needed for dead node.
 public:
-  void lazy_update( Node *old_node, Node *new_node ) {
-    assert( old_node != new_node, "no cycles please" );
-    //old_node->set_req( 1, new_node /*NO DU INFO*/ );
-    // Nodes always have DU info now, so re-use the side array slot
-    // for this node to provide the forwarding pointer.
-    _nodes.map( old_node->_idx, (Node*)((intptr_t)new_node + 1) );
+  void lazy_update(Node *old_node, Node *new_node) {
+    assert(old_node != new_node, "no cycles please");
+    // Re-use the side array slot for this node to provide the
+    // forwarding pointer.
+    _nodes.map(old_node->_idx, (Node*)((intptr_t)new_node + 1));
   }
-  void lazy_replace( Node *old_node, Node *new_node ) {
-    _igvn.replace_node( old_node, new_node );
-    lazy_update( old_node, new_node );
-  }
-  void lazy_replace_proj( Node *old_node, Node *new_node ) {
-    assert( old_node->req() == 1, "use this for Projs" );
-    _igvn.hash_delete(old_node); // Must hash-delete before hacking edges
-    old_node->add_req( NULL );
-    lazy_replace( old_node, new_node );
+  void lazy_replace(Node *old_node, Node *new_node) {
+    _igvn.replace_node(old_node, new_node);
+    lazy_update(old_node, new_node);
   }
 
 private:
--- a/src/share/vm/opto/loopopts.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/loopopts.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -43,6 +43,14 @@
     return NULL;
   }
 
+  // Splitting range check CastIIs through a loop induction Phi can
+  // cause new Phis to be created that are left unrelated to the loop
+  // induction Phi and prevent optimizations (vectorization)
+  if (n->Opcode() == Op_CastII && n->as_CastII()->has_range_check() &&
+      region->is_CountedLoop() && n->in(1) == region->as_CountedLoop()->phi()) {
+    return NULL;
+  }
+
   int wins = 0;
   assert(!n->is_CFG(), "");
   assert(region->is_Region(), "");
@@ -772,6 +780,9 @@
 #ifdef _LP64
         if (m->Opcode() == Op_ConvI2L)
           return false;
+        if (m->is_CastII() && m->isa_CastII()->has_range_check()) {
+          return false;
+        }
 #endif
       }
     }
--- a/src/share/vm/opto/macro.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/macro.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -693,10 +693,10 @@
   ciKlass* klass = NULL;
   ciInstanceKlass* iklass = NULL;
   int nfields = 0;
-  int array_base;
-  int element_size;
-  BasicType basic_elem_type;
-  ciType* elem_type;
+  int array_base = 0;
+  int element_size = 0;
+  BasicType basic_elem_type = T_ILLEGAL;
+  ciType* elem_type = NULL;
 
   Node* res = alloc->result_cast();
   assert(res == NULL || res->is_CheckCastPP(), "unexpected AllocateNode result");
@@ -1177,10 +1177,10 @@
   // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
   // they will not be used if "always_slow" is set
   enum { slow_result_path = 1, fast_result_path = 2 };
-  Node *result_region;
-  Node *result_phi_rawmem;
-  Node *result_phi_rawoop;
-  Node *result_phi_i_o;
+  Node *result_region = NULL;
+  Node *result_phi_rawmem = NULL;
+  Node *result_phi_rawoop = NULL;
+  Node *result_phi_i_o = NULL;
 
   // The initial slow comparison is a size check, the comparison
   // we want to do is a BoolTest::gt
--- a/src/share/vm/opto/node.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/node.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -325,6 +325,9 @@
 // Create a Node, with a given number of required edges.
 Node::Node(uint req)
   : _idx(IDX_INIT(req))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   assert( req < Compile::current()->max_node_limit() - NodeLimitFudgeFactor, "Input limit exceeded" );
   debug_only( verify_construction() );
@@ -344,6 +347,9 @@
 //------------------------------Node-------------------------------------------
 Node::Node(Node *n0)
   : _idx(IDX_INIT(1))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   debug_only( verify_construction() );
   NOT_PRODUCT(nodes_created++);
@@ -356,6 +362,9 @@
 //------------------------------Node-------------------------------------------
 Node::Node(Node *n0, Node *n1)
   : _idx(IDX_INIT(2))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   debug_only( verify_construction() );
   NOT_PRODUCT(nodes_created++);
@@ -370,6 +379,9 @@
 //------------------------------Node-------------------------------------------
 Node::Node(Node *n0, Node *n1, Node *n2)
   : _idx(IDX_INIT(3))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   debug_only( verify_construction() );
   NOT_PRODUCT(nodes_created++);
@@ -386,6 +398,9 @@
 //------------------------------Node-------------------------------------------
 Node::Node(Node *n0, Node *n1, Node *n2, Node *n3)
   : _idx(IDX_INIT(4))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   debug_only( verify_construction() );
   NOT_PRODUCT(nodes_created++);
@@ -404,6 +419,9 @@
 //------------------------------Node-------------------------------------------
 Node::Node(Node *n0, Node *n1, Node *n2, Node *n3, Node *n4)
   : _idx(IDX_INIT(5))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   debug_only( verify_construction() );
   NOT_PRODUCT(nodes_created++);
@@ -425,6 +443,9 @@
 Node::Node(Node *n0, Node *n1, Node *n2, Node *n3,
                      Node *n4, Node *n5)
   : _idx(IDX_INIT(6))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   debug_only( verify_construction() );
   NOT_PRODUCT(nodes_created++);
@@ -448,6 +469,9 @@
 Node::Node(Node *n0, Node *n1, Node *n2, Node *n3,
                      Node *n4, Node *n5, Node *n6)
   : _idx(IDX_INIT(7))
+#ifdef ASSERT
+  , _parse_idx(_idx)
+#endif
 {
   debug_only( verify_construction() );
   NOT_PRODUCT(nodes_created++);
@@ -497,6 +521,11 @@
     C->add_macro_node(n);
   if (is_expensive())
     C->add_expensive_node(n);
+  // If the cloned node is a range check dependent CastII, add it to the list.
+  CastIINode* cast = n->isa_CastII();
+  if (cast != NULL && cast->has_range_check()) {
+    C->add_range_check_cast(cast);
+  }
 
   n->set_idx(C->next_unique()); // Get new unique index as well
   debug_only( n->verify_construction() );
@@ -625,6 +654,11 @@
   if (is_expensive()) {
     compile->remove_expensive_node(this);
   }
+  CastIINode* cast = isa_CastII();
+  if (cast != NULL && cast->has_range_check()) {
+    compile->remove_range_check_cast(cast);
+  }
+
   if (is_SafePoint()) {
     as_SafePoint()->delete_replaced_nodes();
   }
@@ -1320,6 +1354,10 @@
       if (dead->is_expensive()) {
         igvn->C->remove_expensive_node(dead);
       }
+      CastIINode* cast = dead->isa_CastII();
+      if (cast != NULL && cast->has_range_check()) {
+        igvn->C->remove_range_check_cast(cast);
+      }
       igvn->C->record_dead_node(dead->_idx);
       // Kill all inputs to the dead guy
       for (uint i=0; i < dead->req(); i++) {
@@ -2083,6 +2121,17 @@
 #endif
 }
 
+void Node_List::dump_simple() const {
+#ifndef PRODUCT
+  for( uint i = 0; i < _cnt; i++ )
+    if( _nodes[i] ) {
+      tty->print(" %d", _nodes[i]->_idx);
+    } else {
+      tty->print(" NULL");
+    }
+#endif
+}
+
 //=============================================================================
 //------------------------------remove-----------------------------------------
 void Unique_Node_List::remove( Node *n ) {
--- a/src/share/vm/opto/node.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/node.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -54,6 +54,7 @@
 class CatchNode;
 class CatchProjNode;
 class CheckCastPPNode;
+class CastIINode;
 class ClearArrayNode;
 class CmpNode;
 class CodeBuffer;
@@ -294,10 +295,16 @@
 
  public:
   // Each Node is assigned a unique small/dense number.  This number is used
-  // to index into auxiliary arrays of data and bitvectors.
-  // It is declared const to defend against inadvertant assignment,
-  // since it is used by clients as a naked field.
+  // to index into auxiliary arrays of data and bit vectors.
+  // The field _idx is declared constant to defend against inadvertent assignments,
+  // since it is used by clients as a naked field. However, the field's value can be
+  // changed using the set_idx() method.
+  //
+  // The PhaseRenumberLive phase renumbers nodes based on liveness information.
+  // Therefore, it updates the value of the _idx field. The parse-time _idx is
+  // preserved in _parse_idx.
   const node_idx_t _idx;
+  DEBUG_ONLY(const node_idx_t _parse_idx;)
 
   // Get the (read-only) number of input edges
   uint req() const { return _cnt; }
@@ -597,6 +604,7 @@
     DEFINE_CLASS_ID(Type,  Node, 2)
       DEFINE_CLASS_ID(Phi,   Type, 0)
       DEFINE_CLASS_ID(ConstraintCast, Type, 1)
+        DEFINE_CLASS_ID(CastII, ConstraintCast, 0)
       DEFINE_CLASS_ID(CheckCastPP, Type, 2)
       DEFINE_CLASS_ID(CMove, Type, 3)
       DEFINE_CLASS_ID(SafePointScalarObject, Type, 4)
@@ -721,6 +729,7 @@
   DEFINE_CLASS_QUERY(Catch)
   DEFINE_CLASS_QUERY(CatchProj)
   DEFINE_CLASS_QUERY(CheckCastPP)
+  DEFINE_CLASS_QUERY(CastII)
   DEFINE_CLASS_QUERY(ConstraintCast)
   DEFINE_CLASS_QUERY(ClearArray)
   DEFINE_CLASS_QUERY(CMove)
@@ -1368,6 +1377,7 @@
   void clear() { _cnt = 0; Node_Array::clear(); } // retain storage
   uint size() const { return _cnt; }
   void dump() const;
+  void dump_simple() const;
 };
 
 //------------------------------Unique_Node_List-------------------------------
--- a/src/share/vm/opto/parse1.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/parse1.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -27,6 +27,7 @@
 #include "interpreter/linkResolver.hpp"
 #include "oops/method.hpp"
 #include "opto/addnode.hpp"
+#include "opto/c2compiler.hpp"
 #include "opto/idealGraphPrinter.hpp"
 #include "opto/locknode.hpp"
 #include "opto/memnode.hpp"
@@ -105,7 +106,7 @@
 
   // Very similar to LoadNode::make, except we handle un-aligned longs and
   // doubles on Sparc.  Intel can handle them just fine directly.
-  Node *l;
+  Node *l = NULL;
   switch (bt) {                // Signature is flattened
   case T_INT:     l = new (C) LoadINode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInt::INT,        MemNode::unordered); break;
   case T_FLOAT:   l = new (C) LoadFNode(ctl, mem, adr, TypeRawPtr::BOTTOM, Type::FLOAT,         MemNode::unordered); break;
@@ -988,7 +989,23 @@
   if (tf()->range()->cnt() > TypeFunc::Parms) {
     const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
     Node*       ret_phi  = _gvn.transform( _exits.argument(0) );
-    assert(_exits.control()->is_top() || !_gvn.type(ret_phi)->empty(), "return value must be well defined");
+    if (!_exits.control()->is_top() && _gvn.type(ret_phi)->empty()) {
+      // In case of concurrent class loading, the type we set for the
+      // ret_phi in build_exits() may have been too optimistic and the
+      // ret_phi may be top now.
+      // Otherwise, we've encountered an error and have to mark the method as
+      // not compilable. Just using an assertion instead would be dangerous
+      // as this could lead to an infinite compile loop in non-debug builds.
+      {
+        MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
+        if (C->env()->system_dictionary_modification_counter_changed()) {
+          C->record_failure(C2Compiler::retry_class_loading_during_parsing());
+        } else {
+          C->record_method_not_compilable("Can't determine return type.");
+        }
+      }
+      return;
+    }
     if (ret_type->isa_int()) {
       BasicType ret_bt = method()->return_type()->basic_type();
       ret_phi = mask_int_value(ret_phi, ret_bt, &_gvn);
@@ -1897,7 +1914,7 @@
   // Now use a Phi here for merging
   assert(!nocreate, "Cannot build a phi for a block already parsed.");
   const JVMState* jvms = map->jvms();
-  const Type* t;
+  const Type* t = NULL;
   if (jvms->is_loc(idx)) {
     t = block()->local_type_at(idx - jvms->locoff());
   } else if (jvms->is_stk(idx)) {
@@ -2116,15 +2133,24 @@
     // here.
     Node* phi = _exits.argument(0);
     const TypeInstPtr *tr = phi->bottom_type()->isa_instptr();
-    if( tr && tr->klass()->is_loaded() &&
-        tr->klass()->is_interface() ) {
+    if (tr && tr->klass()->is_loaded() &&
+        tr->klass()->is_interface()) {
       const TypeInstPtr *tp = value->bottom_type()->isa_instptr();
       if (tp && tp->klass()->is_loaded() &&
           !tp->klass()->is_interface()) {
         // sharpen the type eagerly; this eases certain assert checking
         if (tp->higher_equal(TypeInstPtr::NOTNULL))
           tr = tr->join_speculative(TypeInstPtr::NOTNULL)->is_instptr();
-        value = _gvn.transform(new (C) CheckCastPPNode(0,value,tr));
+        value = _gvn.transform(new (C) CheckCastPPNode(0, value, tr));
+      }
+    } else {
+      // Also handle returns of oop-arrays to an arrays-of-interface return
+      const TypeInstPtr* phi_tip;
+      const TypeInstPtr* val_tip;
+      Type::get_arrays_base_elements(phi->bottom_type(), value->bottom_type(), &phi_tip, &val_tip);
+      if (phi_tip != NULL && phi_tip->is_loaded() && phi_tip->klass()->is_interface() &&
+          val_tip != NULL && val_tip->is_loaded() && !val_tip->klass()->is_interface()) {
+         value = _gvn.transform(new (C) CheckCastPPNode(0, value, phi->bottom_type()));
       }
     }
     phi->add_req(value);
--- a/src/share/vm/opto/parse2.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/parse2.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -158,7 +158,9 @@
   // Check for always knowing you are throwing a range-check exception
   if (stopped())  return top();
 
-  Node* ptr = array_element_address(ary, idx, type, sizetype);
+  // Make array address computation control dependent to prevent it
+  // from floating above the range check during loop optimizations.
+  Node* ptr = array_element_address(ary, idx, type, sizetype, control());
 
   if (result2 != NULL)  *result2 = elemtype;
 
@@ -461,9 +463,12 @@
 #ifdef _LP64
   // Clean the 32-bit int into a real 64-bit offset.
   // Otherwise, the jint value 0 might turn into an offset of 0x0800000000.
-  const TypeLong* lkeytype = TypeLong::make(CONST64(0), num_cases-1, Type::WidenMin);
-  key_val       = _gvn.transform( new (C) ConvI2LNode(key_val, lkeytype) );
+  const TypeInt* ikeytype = TypeInt::make(0, num_cases-1, Type::WidenMin);
+  // Make I2L conversion control dependent to prevent it from
+  // floating above the range check during loop optimizations.
+  key_val = C->constrained_convI2L(&_gvn, key_val, ikeytype, control());
 #endif
+
   // Shift the value by wordsize so we have an index into the table, rather
   // than a switch value
   Node *shiftWord = _gvn.MakeConX(wordSize);
--- a/src/share/vm/opto/phase.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/phase.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -67,6 +67,8 @@
 elapsedTimer   Phase::_t_iterGVN;
 elapsedTimer   Phase::_t_iterGVN2;
 elapsedTimer   Phase::_t_incrInline;
+elapsedTimer   Phase::_t_renumberLive;
+
 
 // Subtimers for _t_registerAllocation
 elapsedTimer   Phase::_t_ctorChaitin;
@@ -115,13 +117,14 @@
     }
     tty->print_cr ("      iterGVN        : %3.3f sec", Phase::_t_iterGVN.seconds());
     tty->print_cr ("      incrInline     : %3.3f sec", Phase::_t_incrInline.seconds());
+    tty->print_cr ("      renumberLive   : %3.3f sec", Phase::_t_renumberLive.seconds());
     tty->print_cr ("      idealLoop      : %3.3f sec", Phase::_t_idealLoop.seconds());
     tty->print_cr ("      idealLoopVerify: %3.3f sec", Phase::_t_idealLoopVerify.seconds());
     tty->print_cr ("      ccp            : %3.3f sec", Phase::_t_ccp.seconds());
     tty->print_cr ("      iterGVN2       : %3.3f sec", Phase::_t_iterGVN2.seconds());
     tty->print_cr ("      macroExpand    : %3.3f sec", Phase::_t_macroExpand.seconds());
     tty->print_cr ("      graphReshape   : %3.3f sec", Phase::_t_graphReshaping.seconds());
-    double optimizer_subtotal = Phase::_t_iterGVN.seconds() + Phase::_t_iterGVN2.seconds() +
+    double optimizer_subtotal = Phase::_t_iterGVN.seconds() + Phase::_t_iterGVN2.seconds() + Phase::_t_renumberLive.seconds() +
       Phase::_t_escapeAnalysis.seconds() + Phase::_t_macroEliminate.seconds() +
       Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
       Phase::_t_macroExpand.seconds() + Phase::_t_graphReshaping.seconds();
--- a/src/share/vm/opto/phase.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/phase.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -40,22 +40,23 @@
 class Phase : public StackObj {
 public:
   enum PhaseNumber {
-    Compiler,                   // Top-level compiler phase
-    Parser,                     // Parse bytecodes
-    Remove_Useless,             // Remove useless nodes
-    Optimistic,                 // Optimistic analysis phase
-    GVN,                        // Pessimistic global value numbering phase
-    Ins_Select,                 // Instruction selection phase
-    CFG,                        // Build a CFG
-    BlockLayout,                // Linear ordering of blocks
-    Register_Allocation,        // Register allocation, duh
-    LIVE,                       // Dragon-book LIVE range problem
-    StringOpts,                 // StringBuilder related optimizations
-    Interference_Graph,         // Building the IFG
-    Coalesce,                   // Coalescing copies
-    Ideal_Loop,                 // Find idealized trip-counted loops
-    Macro_Expand,               // Expand macro nodes
-    Peephole,                   // Apply peephole optimizations
+    Compiler,                         // Top-level compiler phase
+    Parser,                           // Parse bytecodes
+    Remove_Useless,                   // Remove useless nodes
+    Remove_Useless_And_Renumber_Live, // First, remove useless nodes from the graph. Then, renumber live nodes.
+    Optimistic,                       // Optimistic analysis phase
+    GVN,                              // Pessimistic global value numbering phase
+    Ins_Select,                       // Instruction selection phase
+    CFG,                              // Build a CFG
+    BlockLayout,                      // Linear ordering of blocks
+    Register_Allocation,              // Register allocation, duh
+    LIVE,                             // Dragon-book LIVE range problem
+    StringOpts,                       // StringBuilder related optimizations
+    Interference_Graph,               // Building the IFG
+    Coalesce,                         // Coalescing copies
+    Ideal_Loop,                       // Find idealized trip-counted loops
+    Macro_Expand,                     // Expand macro nodes
+    Peephole,                         // Apply peephole optimizations
     last_phase
   };
 protected:
@@ -102,6 +103,7 @@
   static elapsedTimer   _t_iterGVN;
   static elapsedTimer   _t_iterGVN2;
   static elapsedTimer   _t_incrInline;
+  static elapsedTimer   _t_renumberLive;
 
 // Subtimers for _t_registerAllocation
   static elapsedTimer   _t_ctorChaitin;
--- a/src/share/vm/opto/phaseX.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/phaseX.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -398,7 +398,7 @@
 //=============================================================================
 //------------------------------PhaseRemoveUseless-----------------------------
 // 1) Use a breadthfirst walk to collect useful nodes reachable from root.
-PhaseRemoveUseless::PhaseRemoveUseless( PhaseGVN *gvn, Unique_Node_List *worklist ) : Phase(Remove_Useless),
+PhaseRemoveUseless::PhaseRemoveUseless(PhaseGVN *gvn, Unique_Node_List *worklist, PhaseNumber phase_num) : Phase(phase_num),
   _useful(Thread::current()->resource_area()) {
 
   // Implementation requires 'UseLoopSafepoints == true' and an edge from root
@@ -435,6 +435,82 @@
   }
 }
 
+//=============================================================================
+//------------------------------PhaseRenumberLive------------------------------
+// First, remove useless nodes (equivalent to identifying live nodes).
+// Then, renumber live nodes.
+//
+// The set of live nodes is returned by PhaseRemoveUseless in the _useful structure.
+// If the number of live nodes is 'x' (where 'x' == _useful.size()), then the
+// PhaseRenumberLive updates the node ID of each node (the _idx field) with a unique
+// value in the range [0, x).
+//
+// At the end of the PhaseRenumberLive phase, the compiler's count of unique nodes is
+// updated to 'x' and the list of dead nodes is reset (as there are no dead nodes).
+//
+// The PhaseRenumberLive phase updates two data structures with the new node IDs.
+// (1) The worklist is used by the PhaseIterGVN phase to identify nodes that must be
+// processed. A new worklist (with the updated node IDs) is returned in 'new_worklist'.
+// (2) Type information (the field PhaseGVN::_types) maps type information to each
+// node ID. The mapping is updated to use the new node IDs as well. Updated type
+// information is returned in PhaseGVN::_types.
+//
+// The PhaseRenumberLive phase does not preserve the order of elements in the worklist.
+//
+// Other data structures used by the compiler are not updated. The hash table for value
+// numbering (the field PhaseGVN::_table) is not updated because computing the hash
+// values is not based on node IDs. The field PhaseGVN::_nodes is not updated either
+// because it is empty wherever PhaseRenumberLive is used.
+PhaseRenumberLive::PhaseRenumberLive(PhaseGVN* gvn,
+                                     Unique_Node_List* worklist, Unique_Node_List* new_worklist,
+                                     PhaseNumber phase_num) :
+  PhaseRemoveUseless(gvn, worklist, Remove_Useless_And_Renumber_Live) {
+
+  assert(RenumberLiveNodes, "RenumberLiveNodes must be set to true for node renumbering to take place");
+  assert(C->live_nodes() == _useful.size(), "the number of live nodes must match the number of useful nodes");
+  assert(gvn->nodes_size() == 0, "GVN must not contain any nodes at this point");
+
+  uint old_unique_count = C->unique();
+  uint live_node_count = C->live_nodes();
+  uint worklist_size = worklist->size();
+
+  // Storage for the updated type information.
+  Type_Array new_type_array(C->comp_arena());
+
+  // Iterate over the set of live nodes.
+  uint current_idx = 0; // The current new node ID. Incremented after every assignment.
+  for (uint i = 0; i < _useful.size(); i++) {
+    Node* n = _useful.at(i);
+    const Type* type = gvn->type_or_null(n);
+    new_type_array.map(current_idx, type);
+
+    bool in_worklist = false;
+    if (worklist->member(n)) {
+      in_worklist = true;
+    }
+
+    n->set_idx(current_idx); // Update node ID.
+
+    if (in_worklist) {
+      new_worklist->push(n);
+    }
+
+    current_idx++;
+  }
+
+  assert(worklist_size == new_worklist->size(), "the new worklist must have the same size as the original worklist");
+  assert(live_node_count == current_idx, "all live nodes must be processed");
+
+  // Replace the compiler's type information with the updated type information.
+  gvn->replace_types(new_type_array);
+
+  // Update the unique node count of the compilation to the number of currently live nodes.
+  C->set_unique(live_node_count);
+
+  // Set the dead node count to 0 and reset dead node list.
+  C->reset_dead_node_list();
+}
+
 
 //=============================================================================
 //------------------------------PhaseTransform---------------------------------
@@ -1263,6 +1339,10 @@
       if (dead->is_expensive()) {
         C->remove_expensive_node(dead);
       }
+      CastIINode* cast = dead->isa_CastII();
+      if (cast != NULL && cast->has_range_check()) {
+        C->remove_range_check_cast(cast);
+      }
     }
   } // while (_stack.is_nonempty())
 }
--- a/src/share/vm/opto/phaseX.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/phaseX.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -148,11 +148,21 @@
   Unique_Node_List _useful;   // Nodes reachable from root
                               // list is allocated from current resource area
 public:
-  PhaseRemoveUseless( PhaseGVN *gvn, Unique_Node_List *worklist );
+  PhaseRemoveUseless(PhaseGVN *gvn, Unique_Node_List *worklist, PhaseNumber phase_num = Remove_Useless);
 
   Unique_Node_List *get_useful() { return &_useful; }
 };
 
+//------------------------------PhaseRenumber----------------------------------
+// Phase that first performs a PhaseRemoveUseless, then it renumbers compiler
+// structures accordingly.
+class PhaseRenumberLive : public PhaseRemoveUseless {
+public:
+  PhaseRenumberLive(PhaseGVN* gvn,
+                    Unique_Node_List* worklist, Unique_Node_List* new_worklist,
+                    PhaseNumber phase_num = Remove_Useless_And_Renumber_Live);
+};
+
 
 //------------------------------PhaseTransform---------------------------------
 // Phases that analyze, then transform.  Constructing the Phase object does any
@@ -162,7 +172,7 @@
 class PhaseTransform : public Phase {
 protected:
   Arena*     _arena;
-  Node_Array _nodes;           // Map old node indices to new nodes.
+  Node_List  _nodes;           // Map old node indices to new nodes.
   Type_Array _types;           // Map old node indices to Types.
 
   // ConNode caches:
@@ -187,7 +197,13 @@
 
   Arena*      arena()   { return _arena; }
   Type_Array& types()   { return _types; }
+  void replace_types(Type_Array new_types) {
+    _types = new_types;
+  }
   // _nodes is used in varying ways by subclasses, which define local accessors
+  uint nodes_size() {
+    return _nodes.size();
+  }
 
 public:
   // Get a previously recorded type for the node n.
--- a/src/share/vm/opto/runtime.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/runtime.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -233,22 +233,17 @@
 
   // These checks are cheap to make and support reflective allocation.
   int lh = klass->layout_helper();
-  if (Klass::layout_helper_needs_slow_path(lh)
-      || !InstanceKlass::cast(klass)->is_initialized()) {
-    KlassHandle kh(THREAD, klass);
-    kh->check_valid_for_instantiation(false, THREAD);
+  if (Klass::layout_helper_needs_slow_path(lh) || !InstanceKlass::cast(klass)->is_initialized()) {
+    Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
+    klass->check_valid_for_instantiation(false, THREAD);
     if (!HAS_PENDING_EXCEPTION) {
-      InstanceKlass::cast(kh())->initialize(THREAD);
-    }
-    if (!HAS_PENDING_EXCEPTION) {
-      klass = kh();
-    } else {
-      klass = NULL;
+      InstanceKlass::cast(klass)->initialize(THREAD);
     }
   }
 
-  if (klass != NULL) {
+  if (!HAS_PENDING_EXCEPTION) {
     // Scavenge and allocate an instance.
+    Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
     oop result = InstanceKlass::cast(klass)->allocate_instance(THREAD);
     thread->set_vm_result(result);
 
@@ -288,6 +283,7 @@
     // Although the oopFactory likes to work with the elem_type,
     // the compiler prefers the array_type, since it must already have
     // that latter value in hand for the fast path.
+    Handle holder(THREAD, array_type->klass_holder()); // keep the array klass alive
     Klass* elem_type = ObjArrayKlass::cast(array_type)->element_klass();
     result = oopFactory::new_objArray(elem_type, len, THREAD);
   }
@@ -366,6 +362,7 @@
   jint dims[2];
   dims[0] = len1;
   dims[1] = len2;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(2, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -382,6 +379,7 @@
   dims[0] = len1;
   dims[1] = len2;
   dims[2] = len3;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(3, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -399,6 +397,7 @@
   dims[1] = len2;
   dims[2] = len3;
   dims[3] = len4;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(4, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -417,6 +416,7 @@
   dims[2] = len3;
   dims[3] = len4;
   dims[4] = len5;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(5, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -434,6 +434,7 @@
   jint *c_dims = NEW_RESOURCE_ARRAY(jint, len);
   Copy::conjoint_jints_atomic(j_dims, c_dims, len);
 
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(len, c_dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -958,6 +959,94 @@
   return TypeFunc::make(domain, range);
 }
 
+const TypeFunc* OptoRuntime::squareToLen_Type() {
+  // create input type (domain)
+  int num_args      = 4;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // x
+  fields[argp++] = TypeInt::INT;        // len
+  fields[argp++] = TypePtr::NOTNULL;    // z
+  fields[argp++] = TypeInt::INT;        // zlen
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+// for mulAdd calls, 2 pointers and 3 ints, returning int
+const TypeFunc* OptoRuntime::mulAdd_Type() {
+  // create input type (domain)
+  int num_args      = 5;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // out
+  fields[argp++] = TypePtr::NOTNULL;    // in
+  fields[argp++] = TypeInt::INT;        // offset
+  fields[argp++] = TypeInt::INT;        // len
+  fields[argp++] = TypeInt::INT;        // k
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // returning carry (int)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInt::INT;
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::montgomeryMultiply_Type() {
+  // create input type (domain)
+  int num_args      = 7;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // a
+  fields[argp++] = TypePtr::NOTNULL;    // b
+  fields[argp++] = TypePtr::NOTNULL;    // n
+  fields[argp++] = TypeInt::INT;        // len
+  fields[argp++] = TypeLong::LONG;      // inv
+  fields[argp++] = Type::HALF;
+  fields[argp++] = TypePtr::NOTNULL;    // result
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
+
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::montgomerySquare_Type() {
+  // create input type (domain)
+  int num_args      = 6;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // a
+  fields[argp++] = TypePtr::NOTNULL;    // n
+  fields[argp++] = TypeInt::INT;        // len
+  fields[argp++] = TypeLong::LONG;      // inv
+  fields[argp++] = Type::HALF;
+  fields[argp++] = TypePtr::NOTNULL;    // result
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
+
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
 
 
 //------------- Interpreter state access for on stack replacement
--- a/src/share/vm/opto/runtime.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/runtime.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -305,6 +305,12 @@
 
   static const TypeFunc* multiplyToLen_Type();
 
+  static const TypeFunc* squareToLen_Type();
+
+  static const TypeFunc* mulAdd_Type();
+  static const TypeFunc* montgomeryMultiply_Type();
+  static const TypeFunc* montgomerySquare_Type();
+
   static const TypeFunc* updateBytesCRC32_Type();
 
   // leaf on stack replacement interpreter accessor types
--- a/src/share/vm/opto/split_if.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/split_if.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -451,8 +451,8 @@
 
   // Replace both uses of 'new_iff' with Regions merging True/False
   // paths.  This makes 'new_iff' go dead.
-  Node *old_false, *old_true;
-  Node *new_false, *new_true;
+  Node *old_false = NULL, *old_true = NULL;
+  Node *new_false = NULL, *new_true = NULL;
   for (DUIterator_Last j2min, j2 = iff->last_outs(j2min); j2 >= j2min; --j2) {
     Node *ifp = iff->last_out(j2);
     assert( ifp->Opcode() == Op_IfFalse || ifp->Opcode() == Op_IfTrue, "" );
@@ -472,7 +472,7 @@
 
     // Replace in the graph with lazy-update mechanism
     new_iff->set_req(0, new_iff); // hook self so it does not go dead
-    lazy_replace_proj( ifp, ifpx );
+    lazy_replace(ifp, ifpx);
     new_iff->set_req(0, region);
 
     // Record bits for later xforms
--- a/src/share/vm/opto/superword.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/superword.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -2388,6 +2388,11 @@
       return true;
     }
   } else if (opc == Op_ConvI2L) {
+    if (n->in(1)->Opcode() == Op_CastII &&
+        n->in(1)->as_CastII()->has_range_check()) {
+      // Skip range check dependent CastII nodes
+      n = n->in(1);
+    }
     if (scaled_iv_plus_offset(n->in(1))) {
       return true;
     }
--- a/src/share/vm/opto/type.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/type.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -149,6 +149,33 @@
   return bt;
 }
 
+// For two instance arrays of same dimension, return the base element types.
+// Otherwise or if the arrays have different dimensions, return NULL.
+void Type::get_arrays_base_elements(const Type *a1, const Type *a2,
+                                    const TypeInstPtr **e1, const TypeInstPtr **e2) {
+
+  if (e1) *e1 = NULL;
+  if (e2) *e2 = NULL;
+  const TypeAryPtr* a1tap = (a1 == NULL) ? NULL : a1->isa_aryptr();
+  const TypeAryPtr* a2tap = (a2 == NULL) ? NULL : a2->isa_aryptr();
+
+  if (a1tap != NULL && a2tap != NULL) {
+    // Handle multidimensional arrays
+    const TypePtr* a1tp = a1tap->elem()->make_ptr();
+    const TypePtr* a2tp = a2tap->elem()->make_ptr();
+    while (a1tp && a1tp->isa_aryptr() && a2tp && a2tp->isa_aryptr()) {
+      a1tap = a1tp->is_aryptr();
+      a2tap = a2tp->is_aryptr();
+      a1tp = a1tap->elem()->make_ptr();
+      a2tp = a2tap->elem()->make_ptr();
+    }
+    if (a1tp && a1tp->isa_instptr() && a2tp && a2tp->isa_instptr()) {
+      if (e1) *e1 = a1tp->is_instptr();
+      if (e2) *e2 = a2tp->is_instptr();
+    }
+  }
+}
+
 //---------------------------get_typeflow_type---------------------------------
 // Import a type produced by ciTypeFlow.
 const Type* Type::get_typeflow_type(ciType* type) {
@@ -1984,7 +2011,11 @@
 bool TypeAry::interface_vs_oop(const Type *t) const {
   const TypeAry* t_ary = t->is_ary();
   if (t_ary) {
-    return _elem->interface_vs_oop(t_ary->_elem);
+    const TypePtr* this_ptr = _elem->make_ptr(); // In case we have narrow_oops
+    const TypePtr*    t_ptr = t_ary->_elem->make_ptr();
+    if(this_ptr != NULL && t_ptr != NULL) {
+      return this_ptr->interface_vs_oop(t_ptr);
+    }
   }
   return false;
 }
@@ -2836,8 +2867,17 @@
     // be 'I' or 'j/l/O'.  Thus we'll pick 'j/l/O'.  If this then flows
     // into a Phi which "knows" it's an Interface type we'll have to
     // uplift the type.
-    if (!empty() && ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface())
-      return kills;             // Uplift to interface
+    if (!empty()) {
+      if (ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface()) {
+        return kills;           // Uplift to interface
+      }
+      // Also check for evil cases of 'this' being a class array
+      // and 'kills' expecting an array of interfaces.
+      Type::get_arrays_base_elements(ft, kills, NULL, &ktip);
+      if (ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface()) {
+        return kills;           // Uplift to array of interface
+      }
+    }
 
     return Type::TOP;           // Canonical empty value
   }
--- a/src/share/vm/opto/type.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/opto/type.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -367,6 +367,11 @@
     return _const_basic_type[type];
   }
 
+  // For two instance arrays of same dimension, return the base element types.
+  // Otherwise or if the arrays have different dimensions, return NULL.
+  static void get_arrays_base_elements(const Type *a1, const Type *a2,
+                                       const TypeInstPtr **e1, const TypeInstPtr **e2);
+
   // Mapping to the array element's basic type.
   BasicType array_element_basic_type() const;
 
--- a/src/share/vm/prims/jni.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/jni.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1818,34 +1818,34 @@
 
 // the runtime type of subword integral basic types is integer
 DEFINE_CALLMETHODV(jboolean, Boolean, T_BOOLEAN
-                  , HOTSPOT_JNI_CALLBOOLEANMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLBOOLEANMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLBOOLEANMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLBOOLEANMETHODV_RETURN(_ret_ref))
 DEFINE_CALLMETHODV(jbyte,    Byte,    T_BYTE
-                  , HOTSPOT_JNI_CALLBYTEMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLBYTEMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLBYTEMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLBYTEMETHODV_RETURN(_ret_ref))
 DEFINE_CALLMETHODV(jchar,    Char,    T_CHAR
-                  , HOTSPOT_JNI_CALLCHARMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLCHARMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLCHARMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLCHARMETHODV_RETURN(_ret_ref))
 DEFINE_CALLMETHODV(jshort,   Short,   T_SHORT
-                  , HOTSPOT_JNI_CALLSHORTMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLSHORTMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLSHORTMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLSHORTMETHODV_RETURN(_ret_ref))
 
 DEFINE_CALLMETHODV(jobject,  Object,  T_OBJECT
-                  , HOTSPOT_JNI_CALLOBJECTMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLOBJECTMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLOBJECTMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLOBJECTMETHODV_RETURN(_ret_ref))
 DEFINE_CALLMETHODV(jint,     Int,     T_INT,
-                  HOTSPOT_JNI_CALLINTMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLINTMETHOD_RETURN(_ret_ref))
+                  HOTSPOT_JNI_CALLINTMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLINTMETHODV_RETURN(_ret_ref))
 DEFINE_CALLMETHODV(jlong,    Long,    T_LONG
-                  , HOTSPOT_JNI_CALLLONGMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLLONGMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLLONGMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLLONGMETHODV_RETURN(_ret_ref))
 // Float and double probes don't return value because dtrace doesn't currently support it
 DEFINE_CALLMETHODV(jfloat,   Float,   T_FLOAT
-                  , HOTSPOT_JNI_CALLFLOATMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLFLOATMETHOD_RETURN())
+                  , HOTSPOT_JNI_CALLFLOATMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLFLOATMETHODV_RETURN())
 DEFINE_CALLMETHODV(jdouble,  Double,  T_DOUBLE
-                  , HOTSPOT_JNI_CALLDOUBLEMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLDOUBLEMETHOD_RETURN())
+                  , HOTSPOT_JNI_CALLDOUBLEMETHODV_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLDOUBLEMETHODV_RETURN())
 
 #define DEFINE_CALLMETHODA(ResultType, Result, Tag \
                           , EntryProbe, ReturnProbe)    \
@@ -1870,34 +1870,34 @@
 
 // the runtime type of subword integral basic types is integer
 DEFINE_CALLMETHODA(jboolean, Boolean, T_BOOLEAN
-                  , HOTSPOT_JNI_CALLBOOLEANMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLBOOLEANMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLBOOLEANMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLBOOLEANMETHODA_RETURN(_ret_ref))
 DEFINE_CALLMETHODA(jbyte,    Byte,    T_BYTE
-                  , HOTSPOT_JNI_CALLBYTEMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLBYTEMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLBYTEMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLBYTEMETHODA_RETURN(_ret_ref))
 DEFINE_CALLMETHODA(jchar,    Char,    T_CHAR
-                  , HOTSPOT_JNI_CALLCHARMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLCHARMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLCHARMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLCHARMETHODA_RETURN(_ret_ref))
 DEFINE_CALLMETHODA(jshort,   Short,   T_SHORT
-                  , HOTSPOT_JNI_CALLSHORTMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLSHORTMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLSHORTMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLSHORTMETHODA_RETURN(_ret_ref))
 
 DEFINE_CALLMETHODA(jobject,  Object,  T_OBJECT
-                  , HOTSPOT_JNI_CALLOBJECTMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLOBJECTMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLOBJECTMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLOBJECTMETHODA_RETURN(_ret_ref))
 DEFINE_CALLMETHODA(jint,     Int,     T_INT,
-                  HOTSPOT_JNI_CALLINTMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLINTMETHOD_RETURN(_ret_ref))
+                  HOTSPOT_JNI_CALLINTMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLINTMETHODA_RETURN(_ret_ref))
 DEFINE_CALLMETHODA(jlong,    Long,    T_LONG
-                  , HOTSPOT_JNI_CALLLONGMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLLONGMETHOD_RETURN(_ret_ref))
+                  , HOTSPOT_JNI_CALLLONGMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLLONGMETHODA_RETURN(_ret_ref))
 // Float and double probes don't return value because dtrace doesn't currently support it
 DEFINE_CALLMETHODA(jfloat,   Float,   T_FLOAT
-                  , HOTSPOT_JNI_CALLFLOATMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLFLOATMETHOD_RETURN())
+                  , HOTSPOT_JNI_CALLFLOATMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLFLOATMETHODA_RETURN())
 DEFINE_CALLMETHODA(jdouble,  Double,  T_DOUBLE
-                  , HOTSPOT_JNI_CALLDOUBLEMETHOD_ENTRY(env, obj, (uintptr_t)methodID),
-                  HOTSPOT_JNI_CALLDOUBLEMETHOD_RETURN())
+                  , HOTSPOT_JNI_CALLDOUBLEMETHODA_ENTRY(env, obj, (uintptr_t)methodID),
+                  HOTSPOT_JNI_CALLDOUBLEMETHODA_RETURN())
 
 DT_VOID_RETURN_MARK_DECL(CallVoidMethod, HOTSPOT_JNI_CALLVOIDMETHOD_RETURN());
 DT_VOID_RETURN_MARK_DECL(CallVoidMethodV, HOTSPOT_JNI_CALLVOIDMETHODV_RETURN());
@@ -3161,7 +3161,7 @@
 JNI_END
 
 DEFINE_SETSTATICFIELD(jboolean, bool,   Boolean, 'Z', z
-                      , HOTSPOT_JNI_SETBOOLEANFIELD_ENTRY(env, clazz, (uintptr_t)fieldID, value),
+                      , HOTSPOT_JNI_SETSTATICBOOLEANFIELD_ENTRY(env, clazz, (uintptr_t)fieldID, value),
                       HOTSPOT_JNI_SETBOOLEANFIELD_RETURN())
 DEFINE_SETSTATICFIELD(jbyte,    byte,   Byte,    'B', b
                       , HOTSPOT_JNI_SETSTATICBYTEFIELD_ENTRY(env, clazz, (uintptr_t) fieldID, value),
--- a/src/share/vm/prims/jvmtiEnter.xsl	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/jvmtiEnter.xsl	Fri Aug 12 18:10:37 2016 +0300
@@ -631,8 +631,8 @@
   jint trace_flags = JvmtiTrace::trace_flags(</xsl:text>
       <xsl:value-of select="@num"/>
       <xsl:text>);
-  const char *func_name;
-  const char *curr_thread_name;
+  const char *func_name = NULL;
+  const char *curr_thread_name = NULL;
   if (trace_flags) {
     func_name = JvmtiTrace::function_name(</xsl:text>
       <xsl:value-of select="@num"/>
--- a/src/share/vm/prims/jvmtiEnv.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -946,7 +946,7 @@
     return JVMTI_ERROR_INVALID_THREAD;
 
   Handle thread_obj(current_thread, thread_oop);
-  typeArrayHandle    name;
+  Handle name;
   ThreadPriority priority;
   Handle     thread_group;
   Handle context_class_loader;
@@ -954,7 +954,7 @@
 
   { MutexLocker mu(Threads_lock);
 
-    name = typeArrayHandle(current_thread, java_lang_Thread::name(thread_obj()));
+    name = Handle(current_thread, java_lang_Thread::name(thread_obj()));
     priority = java_lang_Thread::priority(thread_obj());
     thread_group = Handle(current_thread, java_lang_Thread::threadGroup(thread_obj()));
     is_daemon = java_lang_Thread::is_daemon(thread_obj());
@@ -965,7 +965,7 @@
   { const char *n;
 
     if (name() != NULL) {
-      n = UNICODE::as_utf8((jchar*) name->base(T_CHAR), name->length());
+      n = java_lang_String::as_utf8_string(name());
     } else {
       n = UNICODE::as_utf8(NULL, 0);
     }
@@ -3010,7 +3010,7 @@
     // in thread.cpp.
     JvmtiPendingMonitors::enter(rmonitor);
   } else {
-    int r;
+    int r = 0;
     Thread* thread = Thread::current();
 
     if (thread->is_Java_thread()) {
@@ -3073,7 +3073,7 @@
       err = JVMTI_ERROR_NOT_MONITOR_OWNER;
     }
   } else {
-    int r;
+    int r = 0;
     Thread* thread = Thread::current();
 
     if (thread->is_Java_thread()) {
@@ -3107,7 +3107,7 @@
 // rmonitor - pre-checked for validity
 jvmtiError
 JvmtiEnv::RawMonitorWait(JvmtiRawMonitor * rmonitor, jlong millis) {
-  int r;
+  int r = 0;
   Thread* thread = Thread::current();
 
   if (thread->is_Java_thread()) {
@@ -3166,7 +3166,7 @@
 // rmonitor - pre-checked for validity
 jvmtiError
 JvmtiEnv::RawMonitorNotify(JvmtiRawMonitor * rmonitor) {
-  int r;
+  int r = 0;
   Thread* thread = Thread::current();
 
   if (thread->is_Java_thread()) {
@@ -3197,7 +3197,7 @@
 // rmonitor - pre-checked for validity
 jvmtiError
 JvmtiEnv::RawMonitorNotifyAll(JvmtiRawMonitor * rmonitor) {
-  int r;
+  int r = 0;
   Thread* thread = Thread::current();
 
   if (thread->is_Java_thread()) {
--- a/src/share/vm/prims/jvmtiEnvBase.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/jvmtiEnvBase.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -511,7 +511,7 @@
 // mean much better out of memory handling
 unsigned char *
 JvmtiEnvBase::jvmtiMalloc(jlong size) {
-  unsigned char* mem;
+  unsigned char* mem = NULL;
   jvmtiError result = allocate(size, &mem);
   assert(result == JVMTI_ERROR_NONE, "Allocate failed");
   return mem;
@@ -1038,7 +1038,7 @@
     // implied else: entry_count == 0
   }
 
-  int nWant,nWait;
+  int nWant = 0, nWait = 0;
   if (mon != NULL) {
     // this object has a heavyweight monitor
     nWant = mon->contentions(); // # of threads contending for monitor
--- a/src/share/vm/prims/jvmtiExport.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/jvmtiExport.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -995,7 +995,9 @@
         // Before we call the JVMTI agent, we have to set the state in the
         // thread for which we are proxying.
         JavaThreadState prev_state = real_thread->thread_state();
-        assert(prev_state == _thread_blocked, "JavaThread should be at safepoint");
+        assert(((Thread *)real_thread)->is_ConcurrentGC_thread() ||
+               (real_thread->is_Java_thread() && prev_state == _thread_blocked),
+               "should be ConcurrentGCThread or JavaThread at safepoint");
         real_thread->set_thread_state(_thread_in_native);
 
         jvmtiExtensionEvent callback = env->ext_callbacks()->ClassUnload;
--- a/src/share/vm/prims/jvmtiRedefineClasses.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/jvmtiRedefineClasses.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -332,7 +332,7 @@
       int new_name_and_type_ref_i = find_or_append_indirect_entry(scratch_cp, name_and_type_ref_i,
                                                           merge_cp_p, merge_cp_length_p, THREAD);
 
-      const char *entry_name;
+      const char *entry_name = NULL;
       switch (scratch_cp->tag_at(scratch_i).value()) {
       case JVM_CONSTANT_Fieldref:
         entry_name = "Fieldref";
@@ -3378,7 +3378,9 @@
     // not yet in the vtable, because the vtable setup is in progress.
     // This must be done after we adjust the default_methods and
     // default_vtable_indices for methods already in the vtable.
+    // If redefining Unsafe, walk all the vtables looking for entries.
     if (ik->vtable_length() > 0 && (_the_class_oop->is_interface()
+        || _the_class_oop == SystemDictionary::misc_Unsafe_klass()
         || ik->is_subtype_of(_the_class_oop))) {
       // ik->vtable() creates a wrapper object; rm cleans it up
       ResourceMark rm(_thread);
@@ -3393,7 +3395,9 @@
     // interface, then we have to call adjust_method_entries() for
     // every InstanceKlass that has an itable since there isn't a
     // subclass relationship between an interface and an InstanceKlass.
+    // If redefining Unsafe, walk all the itables looking for entries.
     if (ik->itable_length() > 0 && (_the_class_oop->is_interface()
+        || _the_class_oop == SystemDictionary::misc_Unsafe_klass()
         || ik->is_subclass_of(_the_class_oop))) {
       // ik->itable() creates a wrapper object; rm cleans it up
       ResourceMark rm(_thread);
@@ -3922,6 +3926,10 @@
   scratch_class->set_methods(_old_methods);     // To prevent potential GCing of the old methods,
                                           // and to be able to undo operation easily.
 
+  Array<int>* old_ordering = the_class->method_ordering();
+  the_class->set_method_ordering(scratch_class->method_ordering());
+  scratch_class->set_method_ordering(old_ordering);
+
   ConstantPool* old_constants = the_class->constants();
   the_class->set_constants(scratch_class->constants());
   scratch_class->set_constants(old_constants);  // See the previous comment.
--- a/src/share/vm/prims/jvmtiTrace.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/jvmtiTrace.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -266,11 +266,11 @@
   if (threadObj == NULL) {
     return "NULL";
   }
-  typeArrayOop name = java_lang_Thread::name(threadObj);
+  oop name = java_lang_Thread::name(threadObj);
   if (name == NULL) {
     return "<NOT FILLED IN>";
   }
-  return UNICODE::as_utf8((jchar*) name->base(T_CHAR), name->length());
+  return java_lang_String::as_utf8_string(name);
 }
 
 
--- a/src/share/vm/prims/unsafe.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/unsafe.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -864,7 +864,7 @@
 
 UNSAFE_ENTRY(jint, Unsafe_ArrayBaseOffset(JNIEnv *env, jobject unsafe, jclass acls))
   UnsafeWrapper("Unsafe_ArrayBaseOffset");
-  int base, scale;
+  int base = 0, scale = 0;
   getBaseAndScale(base, scale, acls, CHECK_0);
   return field_offset_from_byte_offset(base);
 UNSAFE_END
@@ -872,7 +872,7 @@
 
 UNSAFE_ENTRY(jint, Unsafe_ArrayIndexScale(JNIEnv *env, jobject unsafe, jclass acls))
   UnsafeWrapper("Unsafe_ArrayIndexScale");
-  int base, scale;
+  int base = 0, scale = 0;
   getBaseAndScale(base, scale, acls, CHECK_0);
   // This VM packs both fields and array elements down to the byte.
   // But watch out:  If this changes, so that array references for
--- a/src/share/vm/prims/whitebox.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/prims/whitebox.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 
 #include "memory/metadataFactory.hpp"
+#include "memory/metaspaceShared.hpp"
 #include "memory/universe.hpp"
 #include "oops/oop.inline.hpp"
 
@@ -45,6 +46,7 @@
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/parallelScavenge/parallelScavengeHeap.inline.hpp"
 #include "gc_implementation/g1/concurrentMark.hpp"
+#include "gc_implementation/g1/concurrentMarkThread.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #endif // INCLUDE_ALL_GCS
@@ -323,8 +325,16 @@
 
 WB_ENTRY(jboolean, WB_G1InConcurrentMark(JNIEnv* env, jobject o))
   G1CollectedHeap* g1 = G1CollectedHeap::heap();
-  ConcurrentMark* cm = g1->concurrent_mark();
-  return cm->concurrent_marking_in_progress();
+  return g1->concurrent_mark()->cmThread()->during_cycle();
+WB_END
+
+WB_ENTRY(jboolean, WB_G1StartMarkCycle(JNIEnv* env, jobject o))
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  if (!g1h->concurrent_mark()->cmThread()->during_cycle()) {
+    g1h->collect(GCCause::_wb_conc_mark);
+    return true;
+  }
+  return false;
 WB_END
 
 WB_ENTRY(jint, WB_G1RegionSize(JNIEnv* env, jobject o))
@@ -905,6 +915,10 @@
   return (jlong) MetaspaceGC::capacity_until_GC();
 WB_END
 
+WB_ENTRY(jboolean, WB_IsSharedClass(JNIEnv* env, jobject wb, jclass clazz))
+  return (jboolean)MetaspaceShared::is_in_shared_space(java_lang_Class::as_Klass(JNIHandles::resolve_non_null(clazz)));
+WB_END
+
 WB_ENTRY(jboolean, WB_IsMonitorInflated(JNIEnv* env, jobject wb, jobject obj))
   oop obj_oop = JNIHandles::resolve(obj);
   return (jboolean) obj_oop->mark()->has_monitor();
@@ -1025,12 +1039,14 @@
   {CC"runMemoryUnitTests", CC"()V",                   (void*)&WB_RunMemoryUnitTests},
   {CC"readFromNoaccessArea",CC"()V",                  (void*)&WB_ReadFromNoaccessArea},
   {CC"stressVirtualSpaceResize",CC"(JJJ)I",           (void*)&WB_StressVirtualSpaceResize},
+  {CC"isSharedClass", CC"(Ljava/lang/Class;)Z",       (void*)&WB_IsSharedClass },
 #if INCLUDE_ALL_GCS
   {CC"g1InConcurrentMark", CC"()Z",                   (void*)&WB_G1InConcurrentMark},
   {CC"g1IsHumongous",      CC"(Ljava/lang/Object;)Z", (void*)&WB_G1IsHumongous     },
   {CC"g1NumMaxRegions",    CC"()J",                   (void*)&WB_G1NumMaxRegions  },
   {CC"g1NumFreeRegions",   CC"()J",                   (void*)&WB_G1NumFreeRegions  },
   {CC"g1RegionSize",       CC"()I",                   (void*)&WB_G1RegionSize      },
+  {CC"g1StartConcMarkCycle",       CC"()Z",           (void*)&WB_G1StartMarkCycle  },
   {CC"g1AuxiliaryMemoryUsage", CC"()Ljava/lang/management/MemoryUsage;",
                                                       (void*)&WB_G1AuxiliaryMemoryUsage  },
 #endif // INCLUDE_ALL_GCS
--- a/src/share/vm/runtime/arguments.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/arguments.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1675,9 +1675,8 @@
   FLAG_SET_DEFAULT(ParallelGCThreads,
                      Abstract_VM_Version::parallel_worker_threads());
   if (ParallelGCThreads == 0) {
-    FLAG_SET_DEFAULT(ParallelGCThreads,
-                     Abstract_VM_Version::parallel_worker_threads());
-  }
+    vm_exit_during_initialization("The flag -XX:+UseG1GC can not be combined with -XX:ParallelGCThreads=0", NULL);
+    }
 
 #if INCLUDE_ALL_GCS
   if (G1ConcRefinementThreads == 0) {
@@ -3414,7 +3413,7 @@
   }
 
   if (!PrintSharedArchiveAndExit) {
-    ClassLoader::trace_class_path("[classpath: ", _java_class_path->value());
+    ClassLoader::trace_class_path(tty, "[classpath: ", _java_class_path->value());
   }
 }
 
--- a/src/share/vm/runtime/globals.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/globals.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1291,6 +1291,13 @@
           "Decay time (in milliseconds) to re-enable bulk rebiasing of a "  \
           "type after previous bulk rebias")                                \
                                                                             \
+  product(bool, ExitOnOutOfMemoryError, false,                              \
+          "JVM exits on the first occurrence of an out-of-memory error")    \
+                                                                            \
+  product(bool, CrashOnOutOfMemoryError, false,                             \
+          "JVM aborts, producing an error log and core/mini dump, on the "  \
+          "first occurrence of an out-of-memory error")                     \
+                                                                            \
   /* tracing */                                                             \
                                                                             \
   notproduct(bool, TraceRuntimeCalls, false,                                \
@@ -2264,6 +2271,14 @@
   diagnostic(bool, VerifyDuringGC, false,                                   \
           "Verify memory system during GC (between phases)")                \
                                                                             \
+  diagnostic(ccstrlist, VerifySubSet, "",                                   \
+          "Memory sub-systems to verify when Verify*GC flag(s) "            \
+          "are enabled. One or more sub-systems can be specified "          \
+          "in a comma separated string. Sub-systems are: "                  \
+          "threads, heap, symbol_table, string_table, codecache, "          \
+          "dictionary, classloader_data_graph, metaspace, jni_handles, "    \
+          "c-heap, codecache_oops")                                         \
+                                                                            \
   diagnostic(bool, GCParallelVerificationEnabled, true,                     \
           "Enable parallel memory system verification")                     \
                                                                             \
--- a/src/share/vm/runtime/os.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/os.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -325,6 +325,10 @@
   // We need to initialize large page support here because ergonomics takes some
   // decisions depending on large page support and the calculated large page size.
   large_page_init();
+
+  // VM version initialization identifies some characteristics of the
+  // the platform that are used during ergonomic decisions.
+  VM_Version::init_before_ergo();
 }
 
 void os::signal_init() {
--- a/src/share/vm/runtime/safepoint.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/safepoint.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -146,7 +146,7 @@
 
   // Save the starting time, so that it can be compared to see if this has taken
   // too long to complete.
-  jlong safepoint_limit_time;
+  jlong safepoint_limit_time = 0;
   timeout_error_printed = false;
 
   // PrintSafepointStatisticsTimeout can be specified separately. When
@@ -743,80 +743,12 @@
 // ------------------------------------------------------------------------------------------------------
 // Exception handlers
 
-#ifndef PRODUCT
-
-#ifdef SPARC
-
-#ifdef _LP64
-#define PTR_PAD ""
-#else
-#define PTR_PAD "        "
-#endif
-
-static void print_ptrs(intptr_t oldptr, intptr_t newptr, bool wasoop) {
-  bool is_oop = newptr ? (cast_to_oop(newptr))->is_oop() : false;
-  tty->print_cr(PTR_FORMAT PTR_PAD " %s %c " PTR_FORMAT PTR_PAD " %s %s",
-                oldptr, wasoop?"oop":"   ", oldptr == newptr ? ' ' : '!',
-                newptr, is_oop?"oop":"   ", (wasoop && !is_oop) ? "STALE" : ((wasoop==false&&is_oop==false&&oldptr !=newptr)?"STOMP":"     "));
-}
-
-static void print_longs(jlong oldptr, jlong newptr, bool wasoop) {
-  bool is_oop = newptr ? (cast_to_oop(newptr))->is_oop() : false;
-  tty->print_cr(PTR64_FORMAT " %s %c " PTR64_FORMAT " %s %s",
-                oldptr, wasoop?"oop":"   ", oldptr == newptr ? ' ' : '!',
-                newptr, is_oop?"oop":"   ", (wasoop && !is_oop) ? "STALE" : ((wasoop==false&&is_oop==false&&oldptr !=newptr)?"STOMP":"     "));
-}
-
-static void print_me(intptr_t *new_sp, intptr_t *old_sp, bool *was_oops) {
-#ifdef _LP64
-  tty->print_cr("--------+------address-----+------before-----------+-------after----------+");
-  const int incr = 1;           // Increment to skip a long, in units of intptr_t
-#else
-  tty->print_cr("--------+--address-+------before-----------+-------after----------+");
-  const int incr = 2;           // Increment to skip a long, in units of intptr_t
-#endif
-  tty->print_cr("---SP---|");
-  for( int i=0; i<16; i++ ) {
-    tty->print("blob %c%d |"PTR_FORMAT" ","LO"[i>>3],i&7,new_sp); print_ptrs(*old_sp++,*new_sp++,*was_oops++); }
-  tty->print_cr("--------|");
-  for( int i1=0; i1<frame::memory_parameter_word_sp_offset-16; i1++ ) {
-    tty->print("argv pad|"PTR_FORMAT" ",new_sp); print_ptrs(*old_sp++,*new_sp++,*was_oops++); }
-  tty->print("     pad|"PTR_FORMAT" ",new_sp); print_ptrs(*old_sp++,*new_sp++,*was_oops++);
-  tty->print_cr("--------|");
-  tty->print(" G1     |"PTR_FORMAT" ",new_sp); print_longs(*(jlong*)old_sp,*(jlong*)new_sp,was_oops[incr-1]); old_sp += incr; new_sp += incr; was_oops += incr;
-  tty->print(" G3     |"PTR_FORMAT" ",new_sp); print_longs(*(jlong*)old_sp,*(jlong*)new_sp,was_oops[incr-1]); old_sp += incr; new_sp += incr; was_oops += incr;
-  tty->print(" G4     |"PTR_FORMAT" ",new_sp); print_longs(*(jlong*)old_sp,*(jlong*)new_sp,was_oops[incr-1]); old_sp += incr; new_sp += incr; was_oops += incr;
-  tty->print(" G5     |"PTR_FORMAT" ",new_sp); print_longs(*(jlong*)old_sp,*(jlong*)new_sp,was_oops[incr-1]); old_sp += incr; new_sp += incr; was_oops += incr;
-  tty->print_cr(" FSR    |"PTR_FORMAT" "PTR64_FORMAT"       "PTR64_FORMAT,new_sp,*(jlong*)old_sp,*(jlong*)new_sp);
-  old_sp += incr; new_sp += incr; was_oops += incr;
-  // Skip the floats
-  tty->print_cr("--Float-|"PTR_FORMAT,new_sp);
-  tty->print_cr("---FP---|");
-  old_sp += incr*32;  new_sp += incr*32;  was_oops += incr*32;
-  for( int i2=0; i2<16; i2++ ) {
-    tty->print("call %c%d |"PTR_FORMAT" ","LI"[i2>>3],i2&7,new_sp); print_ptrs(*old_sp++,*new_sp++,*was_oops++); }
-  tty->cr();
-}
-#endif  // SPARC
-#endif  // PRODUCT
-
 
 void SafepointSynchronize::handle_polling_page_exception(JavaThread *thread) {
   assert(thread->is_Java_thread(), "polling reference encountered by VM thread");
   assert(thread->thread_state() == _thread_in_Java, "should come from Java code");
   assert(SafepointSynchronize::is_synchronizing(), "polling encountered outside safepoint synchronization");
 
-  // Uncomment this to get some serious before/after printing of the
-  // Sparc safepoint-blob frame structure.
-  /*
-  intptr_t* sp = thread->last_Java_sp();
-  intptr_t stack_copy[150];
-  for( int i=0; i<150; i++ ) stack_copy[i] = sp[i];
-  bool was_oops[150];
-  for( int i=0; i<150; i++ )
-    was_oops[i] = stack_copy[i] ? ((oop)stack_copy[i])->is_oop() : false;
-  */
-
   if (ShowSafepointMsgs) {
     tty->print("handle_polling_page_exception: ");
   }
@@ -828,7 +760,6 @@
   ThreadSafepointState* state = thread->safepoint_state();
 
   state->handle_polling_page_exception();
-  // print_me(sp,stack_copy,was_oops);
 }
 
 
@@ -998,7 +929,7 @@
 
 
 void ThreadSafepointState::print_on(outputStream *st) const {
-  const char *s;
+  const char *s = NULL;
 
   switch(_type) {
     case _running                : s = "_running";              break;
--- a/src/share/vm/runtime/sharedRuntime.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -145,6 +145,12 @@
   static double dsqrt(double f);
 #endif
 
+  // Montgomery multiplication
+  static void montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
+                                  jint len, jlong inv, jint *m_ints);
+  static void montgomery_square(jint *a_ints, jint *n_ints,
+                                jint len, jlong inv, jint *m_ints);
+
 #ifdef __SOFTFP__
   // C++ compiler generates soft float instructions as well as passing
   // float and double in registers.
--- a/src/share/vm/runtime/stubRoutines.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/stubRoutines.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -136,6 +136,10 @@
 address StubRoutines::_crc_table_adr = NULL;
 
 address StubRoutines::_multiplyToLen = NULL;
+address StubRoutines::_squareToLen = NULL;
+address StubRoutines::_mulAdd = NULL;
+address StubRoutines::_montgomeryMultiply = NULL;
+address StubRoutines::_montgomerySquare = NULL;
 
 double (* StubRoutines::_intrinsic_log   )(double) = NULL;
 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/stubRoutines.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -215,6 +215,10 @@
   static address _crc_table_adr;
 
   static address _multiplyToLen;
+  static address _squareToLen;
+  static address _mulAdd;
+  static address _montgomeryMultiply;
+  static address _montgomerySquare;
 
   // These are versions of the java.lang.Math methods which perform
   // the same operations as the intrinsic version.  They are used for
@@ -373,6 +377,10 @@
   static address crc_table_addr()      { return _crc_table_adr; }
 
   static address multiplyToLen()       {return _multiplyToLen; }
+  static address squareToLen()         {return _squareToLen; }
+  static address mulAdd()              {return _mulAdd; }
+  static address montgomeryMultiply()  { return _montgomeryMultiply; }
+  static address montgomerySquare()    { return _montgomerySquare; }
 
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
 
--- a/src/share/vm/runtime/synchronizer.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/synchronizer.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -121,7 +121,7 @@
     }                                                                      \
   }
 
-#define HOTSPOT_MONITOR_PROBE_waited HOTSPOT_MONITOR_PROBE_WAITED
+#define HOTSPOT_MONITOR_PROBE_waited HOTSPOT_MONITOR_WAITED
 
 #define DTRACE_MONITOR_PROBE(probe, monitor, obj, thread)                  \
   {                                                                        \
--- a/src/share/vm/runtime/thread.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/thread.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -141,8 +141,8 @@
 
 #else /* USDT2 */
 
-#define HOTSPOT_THREAD_PROBE_start HOTSPOT_THREAD_PROBE_START
-#define HOTSPOT_THREAD_PROBE_stop HOTSPOT_THREAD_PROBE_STOP
+#define HOTSPOT_THREAD_PROBE_start HOTSPOT_THREAD_START
+#define HOTSPOT_THREAD_PROBE_stop HOTSPOT_THREAD_STOP
 
 #define DTRACE_THREAD_PROBE(probe, javathread)                             \
   {                                                                        \
@@ -2926,13 +2926,13 @@
   const char* name_str;
   oop thread_obj = threadObj();
   if (thread_obj != NULL) {
-    typeArrayOop name = java_lang_Thread::name(thread_obj);
+    oop name = java_lang_Thread::name(thread_obj);
     if (name != NULL) {
       if (buf == NULL) {
-        name_str = UNICODE::as_utf8((jchar*) name->base(T_CHAR), name->length());
+        name_str = java_lang_String::as_utf8_string(name);
       }
       else {
-        name_str = UNICODE::as_utf8((jchar*) name->base(T_CHAR), name->length(), buf, buflen);
+        name_str = java_lang_String::as_utf8_string(name, buf, buflen);
       }
     }
     else if (is_attaching_via_jni()) { // workaround for 6412693 - see 6404306
--- a/src/share/vm/runtime/vmStructs.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/vmStructs.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -821,6 +821,8 @@
      static_field(StubRoutines,                _updateBytesCRC32,                             address)                               \
      static_field(StubRoutines,                _crc_table_adr,                                address)                               \
      static_field(StubRoutines,                _multiplyToLen,                                address)                               \
+     static_field(StubRoutines,                _squareToLen,                                  address)                               \
+     static_field(StubRoutines,                _mulAdd,                                       address)                               \
                                                                                                                                      \
   /*****************/                                                                                                                \
   /* SharedRuntime */                                                                                                                \
--- a/src/share/vm/runtime/vm_version.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/runtime/vm_version.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -54,6 +54,12 @@
                                                   unsigned int dem,
                                                   unsigned int switch_pt);
  public:
+  // Called as part of the runtime services initialization which is
+  // called from the management module initialization (via init_globals())
+  // after argument parsing and attaching of the main thread has
+  // occurred.  Examines a variety of the hardware capabilities of
+  // the platform to determine which features can be used to execute the
+  // program.
   static void initialize();
 
   // This allows for early initialization of VM_Version information
@@ -63,6 +69,11 @@
   // need to specialize this define VM_Version::early_initialize().
   static void early_initialize() { }
 
+  // Called to initialize VM variables needing initialization
+  // after command line parsing. Platforms that need to specialize
+  // this should define VM_Version::init_before_ergo().
+  static void init_before_ergo() {}
+
   // Name
   static const char* vm_name();
   // Vendor
--- a/src/share/vm/services/diagnosticCommand.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/services/diagnosticCommand.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -437,6 +437,10 @@
   ("config.file",
    "set com.sun.management.config.file", "STRING", false),
 
+  _jmxremote_host
+  ("jmxremote.host",
+   "set com.sun.management.jmxremote.host", "STRING", false),
+
   _jmxremote_port
   ("jmxremote.port",
    "set com.sun.management.jmxremote.port", "STRING", false),
@@ -516,6 +520,7 @@
 
   {
     _dcmdparser.add_dcmd_option(&_config_file);
+    _dcmdparser.add_dcmd_option(&_jmxremote_host);
     _dcmdparser.add_dcmd_option(&_jmxremote_port);
     _dcmdparser.add_dcmd_option(&_jmxremote_rmi_port);
     _dcmdparser.add_dcmd_option(&_jmxremote_ssl);
@@ -586,6 +591,7 @@
     }
 
     PUT_OPTION(_config_file);
+    PUT_OPTION(_jmxremote_host);
     PUT_OPTION(_jmxremote_port);
     PUT_OPTION(_jmxremote_rmi_port);
     PUT_OPTION(_jmxremote_ssl);
--- a/src/share/vm/services/diagnosticCommand.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/services/diagnosticCommand.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -280,6 +280,7 @@
   // com.sun.management is omitted
 
   DCmdArgument<char *> _config_file;
+  DCmdArgument<char *> _jmxremote_host;
   DCmdArgument<char *> _jmxremote_port;
   DCmdArgument<char *> _jmxremote_rmi_port;
   DCmdArgument<char *> _jmxremote_ssl;
--- a/src/share/vm/services/heapDumper.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/services/heapDumper.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -376,11 +376,11 @@
   };
 
   int _fd;              // file descriptor (-1 if dump file not open)
-  jlong _bytes_written; // number of byte written to dump file
+  julong _bytes_written; // number of byte written to dump file
 
   char* _buffer;    // internal buffer
-  int _size;
-  int _pos;
+  size_t _size;
+  size_t _pos;
 
   char* _error;   // error message when I/O fails
 
@@ -388,14 +388,14 @@
   int file_descriptor() const                   { return _fd; }
 
   char* buffer() const                          { return _buffer; }
-  int buffer_size() const                       { return _size; }
-  int position() const                          { return _pos; }
-  void set_position(int pos)                    { _pos = pos; }
+  size_t buffer_size() const                    { return _size; }
+  size_t position() const                       { return _pos; }
+  void set_position(size_t pos)                 { _pos = pos; }
 
   void set_error(const char* error)             { _error = (char*)os::strdup(error); }
 
   // all I/O go through this function
-  void write_internal(void* s, int len);
+  void write_internal(void* s, size_t len);
 
  public:
   DumpWriter(const char* path);
@@ -406,14 +406,14 @@
   void flush();
 
   // total number of bytes written to the disk
-  jlong bytes_written() const           { return _bytes_written; }
+  julong bytes_written() const          { return _bytes_written; }
 
   // adjust the number of bytes written to disk (used to keep the count
   // of the number of bytes written in case of rewrites)
-  void adjust_bytes_written(jlong n)     { _bytes_written += n; }
+  void adjust_bytes_written(jlong n)    { _bytes_written += n; }
 
   // number of (buffered) bytes as yet unwritten to the dump file
-  jlong bytes_unwritten() const          { return (jlong)position(); }
+  size_t bytes_unwritten() const        { return position(); }
 
   char* error() const                   { return _error; }
 
@@ -421,7 +421,7 @@
   void seek_to_offset(jlong pos);
 
   // writer functions
-  void write_raw(void* s, int len);
+  void write_raw(void* s, size_t len);
   void write_u1(u1 x)                   { write_raw((void*)&x, 1); }
   void write_u2(u2 x);
   void write_u4(u4 x);
@@ -474,29 +474,33 @@
 }
 
 // write directly to the file
-void DumpWriter::write_internal(void* s, int len) {
+void DumpWriter::write_internal(void* s, size_t len) {
   if (is_open()) {
-    int n = ::write(file_descriptor(), s, len);
-    if (n > 0) {
-      _bytes_written += n;
-    }
-    if (n != len) {
+    const char* pos = (char*)s;
+    ssize_t n = 0;
+    while (len > 0) {
+      uint tmp = (uint)MIN2(len, (size_t)UINT_MAX);
+      n = ::write(file_descriptor(), pos, tmp);
+
       if (n < 0) {
         set_error(strerror(errno));
-      } else {
-        set_error("file size limit");
+        ::close(file_descriptor());
+        set_file_descriptor(-1);
+        return;
       }
-      ::close(file_descriptor());
-      set_file_descriptor(-1);
+
+      _bytes_written += n;
+      pos += n;
+      len -= n;
     }
   }
 }
 
 // write raw bytes
-void DumpWriter::write_raw(void* s, int len) {
+void DumpWriter::write_raw(void* s, size_t len) {
   if (is_open()) {
-    // flush buffer to make toom
-    if ((position()+ len) >= buffer_size()) {
+    // flush buffer to make room
+    if ((position() + len) >= buffer_size()) {
       flush();
     }
 
@@ -519,13 +523,12 @@
   }
 }
 
-
 jlong DumpWriter::current_offset() {
   if (is_open()) {
     // the offset is the file offset plus whatever we have buffered
     jlong offset = os::current_file_offset(file_descriptor());
     assert(offset >= 0, "lseek failed");
-    return offset + (jlong)position();
+    return offset + position();
   } else {
     return (jlong)-1;
   }
@@ -774,7 +777,7 @@
   HandleMark hm;
   instanceKlassHandle ikh = instanceKlassHandle(Thread::current(), k);
 
-  int size = 0;
+  u4 size = 0;
 
   for (FieldStream fld(ikh, false, false); !fld.eos(); fld.next()) {
     if (!fld.access_flags().is_static()) {
@@ -799,7 +802,7 @@
       }
     }
   }
-  return (u4)size;
+  return size;
 }
 
 // dumps static fields of the given class
@@ -1031,8 +1034,7 @@
   }
 
   // If the byte ordering is big endian then we can copy most types directly
-  int length_in_bytes = array->length() * type2aelembytes(type);
-  assert(length_in_bytes > 0, "nothing to copy");
+  u4 length_in_bytes = (u4)array->length() * type2aelembytes(type);
 
   switch (type) {
     case T_INT : {
@@ -1285,22 +1287,18 @@
     }
   }
 
-  // create a HPROF_GC_INSTANCE record for each object
   if (o->is_instance()) {
+    // create a HPROF_GC_INSTANCE record for each object
     DumperSupport::dump_instance(writer(), o);
     mark_end_of_record();
-  } else {
+  } else if (o->is_objArray()) {
     // create a HPROF_GC_OBJ_ARRAY_DUMP record for each object array
-    if (o->is_objArray()) {
-      DumperSupport::dump_object_array(writer(), objArrayOop(o));
-      mark_end_of_record();
-    } else {
-      // create a HPROF_GC_PRIM_ARRAY_DUMP record for each type array
-      if (o->is_typeArray()) {
-        DumperSupport::dump_prim_array(writer(), typeArrayOop(o));
-        mark_end_of_record();
-      }
-    }
+    DumperSupport::dump_object_array(writer(), objArrayOop(o));
+    mark_end_of_record();
+  } else if (o->is_typeArray()) {
+    // create a HPROF_GC_PRIM_ARRAY_DUMP record for each type array
+    DumperSupport::dump_prim_array(writer(), typeArrayOop(o));
+    mark_end_of_record();
   }
 }
 
@@ -1448,11 +1446,11 @@
     assert(dump_start() >= 0, "no dump start recorded");
 
     // calculate the size of the dump record
-    jlong dump_end = writer()->current_offset();
-    jlong dump_len = (dump_end - dump_start() - 4);
+    julong dump_end = writer()->current_offset();
+    julong dump_len = (dump_end - dump_start() - 4);
 
     // record length must fit in a u4
-    if (dump_len > (jlong)(4L*(jlong)G)) {
+    if (dump_len > max_juint) {
       warning("record is too large");
     }
 
@@ -1461,7 +1459,7 @@
     writer()->write_u4((u4)dump_len);
 
     // adjust the total size written to keep the bytes written correct.
-    writer()->adjust_bytes_written(-((long) sizeof(u4)));
+    writer()->adjust_bytes_written(-((jlong) sizeof(u4)));
 
     // seek to dump end so we can continue
     writer()->seek_to_offset(dump_end);
@@ -1477,12 +1475,12 @@
   if (writer()->is_open()) {
     if (is_segmented_dump()) {
       // don't use current_offset that would be too expensive on a per record basis
-      jlong dump_end = writer()->bytes_written() + writer()->bytes_unwritten();
-      assert(dump_end == writer()->current_offset(), "checking");
-      jlong dump_len = (dump_end - dump_start() - 4);
-      assert(dump_len >= 0 && dump_len <= max_juint, "bad dump length");
+      julong dump_end = writer()->bytes_written() + writer()->bytes_unwritten();
+      assert(dump_end == (julong)writer()->current_offset(), "checking");
+      julong dump_len = (dump_end - dump_start() - 4);
+      assert(dump_len <= max_juint, "bad dump length");
 
-      if (dump_len > (jlong)HeapDumpSegmentSize) {
+      if (dump_len > HeapDumpSegmentSize) {
         write_current_dump_record_length();
         write_dump_header();
       }
@@ -1868,13 +1866,8 @@
   if (print_to_tty()) {
     timer()->stop();
     if (error() == NULL) {
-      char msg[256];
-      sprintf(msg, "Heap dump file created [%s bytes in %3.3f secs]",
-        JLONG_FORMAT, timer()->seconds());
-PRAGMA_DIAG_PUSH
-PRAGMA_FORMAT_NONLITERAL_IGNORED_INTERNAL
-      tty->print_cr(msg, writer.bytes_written());
-PRAGMA_DIAG_POP
+      tty->print_cr("Heap dump file created [" JULONG_FORMAT " bytes in %3.3f secs]",
+                    writer.bytes_written(), timer()->seconds());
     } else {
       tty->print_cr("Dump file is incomplete: %s", writer.error());
     }
--- a/src/share/vm/services/threadService.hpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/services/threadService.hpp	Fri Aug 12 18:10:37 2016 +0300
@@ -425,12 +425,12 @@
   }
 
   JavaThreadStatusChanger(JavaThread* java_thread,
-                          java_lang_Thread::ThreadStatus state) {
+                          java_lang_Thread::ThreadStatus state) : _old_state(java_lang_Thread::NEW) {
     save_old_state(java_thread);
     set_thread_status(state);
   }
 
-  JavaThreadStatusChanger(JavaThread* java_thread) {
+  JavaThreadStatusChanger(JavaThread* java_thread) : _old_state(java_lang_Thread::NEW) {
     save_old_state(java_thread);
   }
 
@@ -527,7 +527,7 @@
   // Current thread is the notifying thread which holds the monitor.
   static bool wait_reenter_begin(JavaThread *java_thread, ObjectMonitor *obj_m) {
     assert((java_thread != NULL), "Java thread should not be null here");
-    bool active  = false;
+    bool active = false;
     if (is_alive(java_thread) && ServiceUtil::visible_oop((oop)obj_m->object())) {
       active = contended_enter_begin(java_thread);
     }
@@ -542,7 +542,7 @@
   }
 
   JavaThreadBlockedOnMonitorEnterState(JavaThread *java_thread, ObjectMonitor *obj_m) :
-    JavaThreadStatusChanger(java_thread) {
+    _stat(NULL), _active(false), JavaThreadStatusChanger(java_thread) {
     assert((java_thread != NULL), "Java thread should not be null here");
     // Change thread status and collect contended enter stats for monitor contended
     // enter done for external java world objects and it is contended. All other cases
--- a/src/share/vm/utilities/debug.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/utilities/debug.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -302,6 +302,16 @@
       VMError err(message);
       err.report_java_out_of_memory();
     }
+
+    if (CrashOnOutOfMemoryError) {
+      tty->print_cr("Aborting due to java.lang.OutOfMemoryError: %s", message);
+      fatal(err_msg("OutOfMemory encountered: %s", message));
+    }
+
+    if (ExitOnOutOfMemoryError) {
+      tty->print_cr("Terminating due to java.lang.OutOfMemoryError: %s", message);
+      exit(3);
+    }
   }
 }
 
--- a/src/share/vm/utilities/vmError.cpp	Mon Aug 08 20:18:53 2016 +0300
+++ b/src/share/vm/utilities/vmError.cpp	Fri Aug 12 18:10:37 2016 +0300
@@ -229,7 +229,7 @@
 
   if (signame) {
     jio_snprintf(buf, buflen,
-                 "%s (0x%x) at pc=" PTR_FORMAT ", pid=%d, tid=" UINTX_FORMAT,
+                 "%s (0x%x) at pc=" PTR_FORMAT ", pid=%d, tid=" INTPTR_FORMAT,
                  signame, _id, _pc,
                  os::current_process_id(), os::current_thread_id());
   } else if (_filename != NULL && _lineno > 0) {
@@ -237,7 +237,7 @@
     char separator = os::file_separator()[0];
     const char *p = strrchr(_filename, separator);
     int n = jio_snprintf(buf, buflen,
-                         "Internal Error at %s:%d, pid=%d, tid=" UINTX_FORMAT,
+                         "Internal Error at %s:%d, pid=%d, tid=" INTPTR_FORMAT,
                          p ? p + 1 : _filename, _lineno,
                          os::current_process_id(), os::current_thread_id());
     if (n >= 0 && n < buflen && _message) {
@@ -251,7 +251,7 @@
     }
   } else {
     jio_snprintf(buf, buflen,
-                 "Internal Error (0x%x), pid=%d, tid=" UINTX_FORMAT,
+                 "Internal Error (0x%x), pid=%d, tid=" INTPTR_FORMAT,
                  _id, os::current_process_id(), os::current_thread_id());
   }
 
@@ -438,7 +438,7 @@
 
      // process id, thread id
      st->print(", pid=%d", os::current_process_id());
-     st->print(", tid=" UINTX_FORMAT, os::current_thread_id());
+     st->print(", tid=" INTPTR_FORMAT, os::current_thread_id());
      st->cr();
 
   STEP(40, "(printing error message)")
--- a/test/TEST.groups	Mon Aug 08 20:18:53 2016 +0300
+++ b/test/TEST.groups	Fri Aug 12 18:10:37 2016 +0300
@@ -61,7 +61,6 @@
 # can be resolved in some cases by using tools from the compile-jdk.
 #
 needs_jdk = \
-  gc/TestG1ZeroPGCTJcmdThreadPrint.java \
   gc/metaspace/CompressedClassSpaceSizeInJmapHeap.java \
   gc/metaspace/TestMetaspacePerfCounters.java \
   gc/metaspace/TestPerfCountersAndMemoryPools.java \
@@ -96,7 +95,8 @@
   runtime/Thread/TestThreadDumpMonitorContention.java \
   runtime/XCheckJniJsig/XCheckJSig.java \
   serviceability/attach/AttachWithStalePidFile.java \
-  serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java
+  serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java \
+  testlibrary_tests/
 
 
 # JRE adds further tests to compact3
@@ -215,6 +215,7 @@
   runtime/NMT \
   gc/class_unloading/TestCMSClassUnloadingEnabledHWM.java \
   gc/class_unloading/TestG1ClassUnloadingHWM.java \
+  gc/ergonomics/TestDynamicNumberOfGCThreads.java \
   gc/g1/TestRegionAlignment.java \
   gc/g1/TestShrinkToOneRegion.java \
   gc/metaspace/G1AddMetaspaceDependency.java \
@@ -251,7 +252,6 @@
   serviceability/ \
   compiler/ \
   testlibrary/ \
-  testlibrary_tests/ \
   sanity/ \
   runtime/ \
   gc/ \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,284 @@
+//
+// Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, Red Hat Inc. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * @test
+ * @bug 8130150
+ * @library /testlibrary
+ * @requires (os.simpleArch == "x64") & (os.family != "windows")
+ * @summary Verify that the Montgomery multiply intrinsic works and correctly checks its arguments.
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseMontgomerySquareIntrinsic
+ *      -XX:+UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:+UseMontgomerySquareIntrinsic
+ *      -XX:-UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseMontgomerySquareIntrinsic
+ *      -XX:+UseMontgomeryMultiplyIntrinsic MontgomeryMultiplyTest
+ */
+
+public class MontgomeryMultiplyTest {
+
+    static final MethodHandles.Lookup lookup = MethodHandles.lookup();
+
+    static final MethodHandle montgomeryMultiplyHandle, montgomerySquareHandle;
+    static final MethodHandle bigIntegerConstructorHandle;
+    static final Field bigIntegerMagField;
+
+    static {
+       // Use reflection to gain access to the methods we want to test.
+        try {
+            Method m = BigInteger.class.getDeclaredMethod("montgomeryMultiply",
+                /*a*/int[].class, /*b*/int[].class, /*n*/int[].class, /*len*/int.class,
+                /*inv*/long.class, /*product*/int[].class);
+            m.setAccessible(true);
+            montgomeryMultiplyHandle = lookup.unreflect(m);
+
+            m = BigInteger.class.getDeclaredMethod("montgomerySquare",
+                /*a*/int[].class, /*n*/int[].class, /*len*/int.class,
+                /*inv*/long.class, /*product*/int[].class);
+            m.setAccessible(true);
+            montgomerySquareHandle = lookup.unreflect(m);
+
+            Constructor c
+                = BigInteger.class.getDeclaredConstructor(int.class, int[].class);
+            c.setAccessible(true);
+            bigIntegerConstructorHandle = lookup.unreflectConstructor(c);
+
+            bigIntegerMagField = BigInteger.class.getDeclaredField("mag");
+            bigIntegerMagField.setAccessible(true);
+
+        } catch (Throwable ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+
+    // Invoke either BigInteger.montgomeryMultiply or BigInteger.montgomerySquare.
+    int[] montgomeryMultiply(int[] a, int[] b, int[] n, int len, long inv,
+                             int[] product) throws Throwable {
+        int[] result =
+            (a == b) ? (int[]) montgomerySquareHandle.invokeExact(a, n, len, inv, product)
+                     : (int[]) montgomeryMultiplyHandle.invokeExact(a, b, n, len, inv, product);
+        return Arrays.copyOf(result, len);
+    }
+
+    // Invoke the private constructor BigInteger(int[]).
+    BigInteger newBigInteger(int[] val) throws Throwable {
+        return (BigInteger) bigIntegerConstructorHandle.invokeExact(1, val);
+    }
+
+    // Get the private field BigInteger.mag
+    int[] mag(BigInteger n) {
+        try {
+            return (int[]) bigIntegerMagField.get(n);
+        } catch (Exception ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+
+    // Montgomery multiplication
+    // Calculate a * b * r^-1 mod n)
+    //
+    // R is a power of the word size
+    // N' = R^-1 mod N
+    //
+    // T := ab
+    // m := (T mod R)N' mod R [so 0 <= m < R]
+    // t := (T + mN)/R
+    // if t >= N then return t - N else return t
+    //
+    BigInteger montgomeryMultiply(BigInteger a, BigInteger b, BigInteger N,
+            int len, BigInteger n_prime)
+            throws Throwable {
+        BigInteger T = a.multiply(b);
+        BigInteger R = BigInteger.ONE.shiftLeft(len*32);
+        BigInteger mask = R.subtract(BigInteger.ONE);
+        BigInteger m = (T.and(mask)).multiply(n_prime);
+        m = m.and(mask); // i.e. m.mod(R)
+        T = T.add(m.multiply(N));
+        T = T.shiftRight(len*32); // i.e. T.divide(R)
+        if (T.compareTo(N) > 0) {
+            T = T.subtract(N);
+        }
+        return T;
+    }
+
+    // Call the Montgomery multiply intrinsic.
+    BigInteger montgomeryMultiply(int[] a_words, int[] b_words, int[] n_words,
+            int len, BigInteger inv)
+            throws Throwable {
+        BigInteger t = montgomeryMultiply(
+                newBigInteger(a_words),
+                newBigInteger(b_words),
+                newBigInteger(n_words),
+                len, inv);
+        return t;
+    }
+
+    // Check that the Montgomery multiply intrinsic returns the same
+    // result as the longhand calculation.
+    void check(int[] a_words, int[] b_words, int[] n_words, int len, BigInteger inv)
+            throws Throwable {
+        BigInteger n = newBigInteger(n_words);
+        BigInteger slow = montgomeryMultiply(a_words, b_words, n_words, len, inv);
+        BigInteger fast
+            = newBigInteger(montgomeryMultiply
+                            (a_words, b_words, n_words, len, inv.longValue(), null));
+        // The intrinsic may not return the same value as the longhand
+        // calculation but they must have the same residue mod N.
+        if (!slow.mod(n).equals(fast.mod(n))) {
+            throw new RuntimeException();
+        }
+    }
+
+    Random rnd = new Random(0);
+
+    // Return a random value of length <= bits in an array of even length
+    int[] random_val(int bits) {
+        int len = (bits+63)/64;  // i.e. length in longs
+        int[] val = new int[len*2];
+        for (int i = 0; i < val.length; i++)
+            val[i] = rnd.nextInt();
+        int leadingZeros = 64 - (bits & 64);
+        if (leadingZeros >= 32) {
+            val[0] = 0;
+            val[1] &= ~(-1l << (leadingZeros & 31));
+        } else {
+            val[0] &= ~(-1l << leadingZeros);
+        }
+        return val;
+    }
+
+    void testOneLength(int lenInBits, int lenInInts) throws Throwable {
+        BigInteger mod = new BigInteger(lenInBits, 2, rnd);
+        BigInteger r = BigInteger.ONE.shiftLeft(lenInInts * 32);
+        BigInteger n_prime = mod.modInverse(r).negate();
+
+        // Make n.length even, padding with a zero if necessary
+        int[] n = mag(mod);
+        if (n.length < lenInInts) {
+            int[] x = new int[lenInInts];
+            System.arraycopy(n, 0, x, lenInInts-n.length, n.length);
+            n = x;
+        }
+
+        for (int i = 0; i < 10000; i++) {
+            // multiply
+            check(random_val(lenInBits), random_val(lenInBits), n, lenInInts, n_prime);
+            // square
+            int[] tmp = random_val(lenInBits);
+            check(tmp, tmp, n, lenInInts, n_prime);
+        }
+    }
+
+    // Test the Montgomery multiply intrinsic with a bunch of random
+    // values of varying lengths.  Do this for long enough that the
+    // caller of the intrinsic is C2-compiled.
+    void testResultValues() throws Throwable {
+        // Test a couple of interesting edge cases.
+        testOneLength(1024, 32);
+        testOneLength(1025, 34);
+        for (int j = 10; j > 0; j--) {
+            // Construct a random prime whose length in words is even
+            int lenInBits = rnd.nextInt(2048) + 64;
+            int lenInInts = (lenInBits + 63)/64*2;
+            testOneLength(lenInBits, lenInInts);
+        }
+    }
+
+    // Range checks
+    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, int[] n, int len, long inv,
+                                        int[] product, Class klass) {
+        try {
+            montgomeryMultiply(a, b, n, len, inv, product);
+        } catch (Throwable ex) {
+            if (klass.isAssignableFrom(ex.getClass()))
+                return;
+            throw new RuntimeException(klass + " expected, " + ex + " was thrown");
+        }
+        throw new RuntimeException(klass + " expected, was not thrown");
+    }
+
+    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv,
+            Class klass) {
+        testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), null, klass);
+    }
+
+    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv,
+            int[] product, Class klass) {
+        testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), product, klass);
+    }
+
+    void testMontgomeryMultiplyChecks() {
+        int[] blah = random_val(40);
+        int[] small = random_val(39);
+        BigInteger mod = new BigInteger(40*32 , 2, rnd);
+        BigInteger r = BigInteger.ONE.shiftLeft(40*32);
+        BigInteger n_prime = mod.modInverse(r).negate();
+
+        // Length out of range: square
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 41, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, -1, n_prime, IllegalArgumentException.class);
+        // As above, but for multiply
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 41, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class);
+
+        // Length odd
+        testOneMontgomeryMultiplyCheck(small, small, mod, 39, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small, mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small, mod, -1, n_prime, IllegalArgumentException.class);
+        // As above, but for multiply
+        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 39, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, -1, n_prime, IllegalArgumentException.class);
+
+        // array too small
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 40, n_prime, small, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, blah, mod, 40, n_prime, blah, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, small, mod, 40, n_prime, blah, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small, mod, 40, n_prime, blah, IllegalArgumentException.class);
+    }
+
+    public static void main(String args[]) {
+        try {
+            new MontgomeryMultiplyTest().testMontgomeryMultiplyChecks();
+            new MontgomeryMultiplyTest().testResultValues();
+        } catch (Throwable ex) {
+            throw new RuntimeException(ex);
+        }
+     }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/muladd/TestMulAdd.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8081778
+ * @summary Add C2 x86 intrinsic for BigInteger::mulAdd() method
+ *
+ * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
+ *      -XX:+IgnoreUnrecognizedVMOptions -XX:-UseSquareToLenIntrinsic -XX:-UseMultiplyToLenIntrinsic
+ *      -XX:+UseMulAddIntrinsic
+ *      -XX:CompileCommand=dontinline,TestMulAdd::main
+ *      -XX:CompileCommand=option,TestMulAdd::base_multiply,ccstr,DisableIntrinsic,_mulAdd
+ *      -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_mulAdd
+ *      -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_mulAdd
+ *      -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_mulAdd
+ *      -XX:CompileCommand=option,java.math.BigInteger::mulAdd,ccstr,DisableIntrinsic,_mulAdd
+ *      -XX:CompileCommand=inline,java.math.BigInteger::multiply
+ *      -XX:CompileCommand=inline,java.math.BigInteger::square
+ *      -XX:CompileCommand=inline,java.math.BigInteger::squareToLen
+ *      -XX:CompileCommand=inline,java.math.BigInteger::mulAdd TestMulAdd
+ */
+
+import java.util.Random;
+import java.math.*;
+
+public class TestMulAdd {
+
+    // Avoid intrinsic by preventing inlining multiply() and mulAdd().
+    public static BigInteger base_multiply(BigInteger op1) {
+      return op1.multiply(op1);
+    }
+
+    // Generate mulAdd() intrinsic by inlining multiply().
+    public static BigInteger new_multiply(BigInteger op1) {
+      return op1.multiply(op1);
+    }
+
+    public static boolean bytecompare(BigInteger b1, BigInteger b2) {
+      byte[] data1 = b1.toByteArray();
+      byte[] data2 = b2.toByteArray();
+      if (data1.length != data2.length)
+        return false;
+      for (int i = 0; i < data1.length; i++) {
+        if (data1[i] != data2[i])
+          return false;
+      }
+      return true;
+    }
+
+    public static String stringify(BigInteger b) {
+      String strout= "";
+      byte [] data = b.toByteArray();
+      for (int i = 0; i < data.length; i++) {
+        strout += (String.format("%02x",data[i]) + " ");
+      }
+      return strout;
+    }
+
+    public static void main(String args[]) throws Exception {
+
+      BigInteger oldsum = new BigInteger("0");
+      BigInteger newsum = new BigInteger("0");
+
+      BigInteger b1, b2, oldres, newres;
+
+      Random rand = new Random();
+      long seed = System.nanoTime();
+      Random rand1 = new Random();
+      long seed1 = System.nanoTime();
+      rand.setSeed(seed);
+      rand1.setSeed(seed1);
+
+      for (int j = 0; j < 100000; j++) {
+        int rand_int = rand1.nextInt(3136)+32;
+        b1 = new BigInteger(rand_int, rand);
+
+        oldres = base_multiply(b1);
+        newres = new_multiply(b1);
+
+        oldsum = oldsum.add(oldres);
+        newsum = newsum.add(newres);
+
+        if (!bytecompare(oldres,newres)) {
+          System.out.print("mismatch for:b1:" + stringify(b1) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres));
+          System.out.println(b1);
+          throw new Exception("Failed");
+        }
+      }
+      if (!bytecompare(oldsum,newsum))  {
+        System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum));
+        throw new Exception("Failed");
+      } else {
+        System.out.println("Success");
+      }
+   }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/squaretolen/TestSquareToLen.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8081778
+ * @summary Add C2 x86 intrinsic for BigInteger::squareToLen() method
+ *
+ * @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
+ *      -XX:+IgnoreUnrecognizedVMOptions
+ *      -XX:+UseSquareToLenIntrinsic
+ *      -XX:CompileCommand=exclude,TestSquareToLen::main
+ *      -XX:CompileCommand=option,TestSquareToLen::base_multiply,ccstr,DisableIntrinsic,_squareToLen
+ *      -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_squareToLen
+ *      -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_squareToLen
+ *      -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_squareToLen
+ *      -XX:CompileCommand=inline,java.math.BigInteger::multiply
+ *      -XX:CompileCommand=inline,java.math.BigInteger::square
+ *      -XX:CompileCommand=inline,java.math.BigInteger::squareToLen TestSquareToLen
+ */
+
+import java.util.Random;
+import java.math.*;
+
+public class TestSquareToLen {
+
+    // Avoid intrinsic by preventing inlining multiply() and squareToLen().
+    public static BigInteger base_multiply(BigInteger op1) {
+      return op1.multiply(op1);
+    }
+
+    // Generate squareToLen() intrinsic by inlining multiply().
+    public static BigInteger new_multiply(BigInteger op1) {
+      return op1.multiply(op1);
+    }
+
+    public static boolean bytecompare(BigInteger b1, BigInteger b2) {
+      byte[] data1 = b1.toByteArray();
+      byte[] data2 = b2.toByteArray();
+      if (data1.length != data2.length)
+        return false;
+      for (int i = 0; i < data1.length; i++) {
+        if (data1[i] != data2[i])
+          return false;
+      }
+      return true;
+    }
+
+    public static String stringify(BigInteger b) {
+      String strout= "";
+      byte [] data = b.toByteArray();
+      for (int i = 0; i < data.length; i++) {
+        strout += (String.format("%02x",data[i]) + " ");
+      }
+      return strout;
+    }
+
+    public static void main(String args[]) throws Exception {
+
+      BigInteger oldsum = new BigInteger("0");
+      BigInteger newsum = new BigInteger("0");
+
+      BigInteger b1, b2, oldres, newres;
+
+      Random rand = new Random();
+      long seed = System.nanoTime();
+      Random rand1 = new Random();
+      long seed1 = System.nanoTime();
+      rand.setSeed(seed);
+      rand1.setSeed(seed1);
+
+      for (int j = 0; j < 100000; j++) {
+        int rand_int = rand1.nextInt(3136)+32;
+        b1 = new BigInteger(rand_int, rand);
+
+        oldres = base_multiply(b1);
+        newres = new_multiply(b1);
+
+        oldsum = oldsum.add(oldres);
+        newsum = newsum.add(newres);
+
+        if (!bytecompare(oldres,newres)) {
+          System.out.print("mismatch for:b1:" + stringify(b1) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres));
+          System.out.println(b1);
+          throw new Exception("Failed");
+        }
+      }
+      if (!bytecompare(oldsum,newsum))  {
+        System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum));
+        throw new Exception("Failed");
+      } else {
+        System.out.println("Success");
+      }
+   }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/jsr292/LongReferenceCastingTest.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+import java.lang.invoke.*;
+
+/**
+ * @test
+ * @bug 8148752
+ * @summary Test correct casting of MH arguments during inlining.
+ * @run main LongReferenceCastingTest
+ */
+public class LongReferenceCastingTest {
+    static final String MY_STRING = "myString";
+    static final MethodHandle MH;
+
+    static {
+        try {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodType mt = MethodType.methodType(String.class, long.class, Object.class, String.class);
+            MH = lookup.findVirtual(LongReferenceCastingTest.class, "myMethod", mt);
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+    }
+
+    public String myMethod(long l, Object o, String s) {
+        // The long argument occupies two stack slots, causing C2 to treat it as
+        // two arguments and casting the fist one two long and the second one to Object.
+        // As a result, Object o is casted to String and the o.toString() call is
+        // inlined as String::toString(). We fail at runtime because 'o' is not a String.
+        return o.toString();
+    }
+
+    public String toString() {
+        return MY_STRING;
+    }
+
+    public static void main(String[] args) throws Exception {
+        LongReferenceCastingTest test = new LongReferenceCastingTest();
+        try {
+            for (int i = 0; i < 20_000; ++i) {
+                if (!test.invoke().equals(MY_STRING)) {
+                    throw new RuntimeException("Invalid string");
+                }
+            }
+        } catch (Throwable t) {
+            throw new RuntimeException("Test failed", t);
+        }
+    }
+
+    public String invoke() throws Throwable {
+        return (String) MH.invokeExact(this, 0L, (Object)this, MY_STRING);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/loopopts/TestArraysFillDeadControl.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8147645
+ * @summary Array.fill intrinsification code doesn't mark replaced control as dead
+ * @run main/othervm  -XX:-TieredCompilation -XX:CompileCommand=dontinline,TestArraysFillDeadControl::dont_inline TestArraysFillDeadControl
+ *
+ */
+
+import java.util.Arrays;
+
+public class TestArraysFillDeadControl {
+
+    static void dont_inline() {
+    }
+
+    static int i = 1;
+
+    public static void main(String[] args) {
+        for (int j = 0; j < 200000; j++) {
+            int[] a = new int[2];
+            int b = i;
+
+            Arrays.fill(a, 1);
+            Arrays.fill(a, 1+b);
+
+            dont_inline();
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/loopopts/TestLoopPeeling.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8078262
+ * @summary Tests correct dominator information after loop peeling.
+ * @run main/othervm -Xcomp -XX:CompileCommand=compileonly,TestLoopPeeling::test* TestLoopPeeling
+ */
+public class TestLoopPeeling {
+
+    public int[] array = new int[100];
+
+    public static void main(String args[]) {
+        TestLoopPeeling test = new TestLoopPeeling();
+        try {
+            test.testArrayAccess(0, 1);
+            test.testArrayAllocation(0, 1);
+        } catch (Exception e) {
+            // Ignore exceptions
+        }
+    }
+
+    public void testArrayAccess(int index, int inc) {
+        int storeIndex = -1;
+
+        for (; index < 10; index += inc) {
+            // This loop invariant check triggers loop peeling because it can
+            // be moved out of the loop (see 'IdealLoopTree::policy_peeling').
+            if (inc == 42) return;
+
+            // This loop variant usage of LShiftL( ConvI2L( Phi(storeIndex) ) )
+            // prevents the split if optimization that would otherwise clone the
+            // LShiftL and ConvI2L nodes and assign them to their corresponding array
+            // address computation (see 'PhaseIdealLoop::split_if_with_blocks_post').
+            if (storeIndex > 0 && array[storeIndex] == 42) return;
+
+            if (index == 42) {
+                // This store and the corresponding range check are moved out of the
+                // loop and both used after old loop and the peeled iteration exit.
+                // For the peeled iteration, storeIndex is always -1 and the ConvI2L
+                // is replaced by TOP. However, the range check is not folded because
+                // we don't do the split if optimization in PhaseIdealLoop2.
+                // As a result, we have a (dead) control path from the peeled iteration
+                // to the StoreI but the data path is removed.
+                array[storeIndex] = 1;
+                return;
+            }
+
+            storeIndex++;
+        }
+    }
+
+    public byte[] testArrayAllocation(int index, int inc) {
+        int allocationCount = -1;
+        byte[] result;
+
+        for (; index < 10; index += inc) {
+            // This loop invariant check triggers loop peeling because it can
+            // be moved out of the loop (see 'IdealLoopTree::policy_peeling').
+            if (inc == 42) return null;
+
+            if (index == 42) {
+                // This allocation and the corresponding size check are moved out of the
+                // loop and both used after old loop and the peeled iteration exit.
+                // For the peeled iteration, allocationCount is always -1 and the ConvI2L
+                // is replaced by TOP. However, the size check is not folded because
+                // we don't do the split if optimization in PhaseIdealLoop2.
+                // As a result, we have a (dead) control path from the peeled iteration
+                // to the allocation but the data path is removed.
+                result = new byte[allocationCount];
+                return result;
+            }
+
+            allocationCount++;
+        }
+        return null;
+    }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/loopopts/UseCountedLoopSafepoints.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 6869327
+ * @summary Test that C2 flag UseCountedLoopSafepoints ensures a safepoint is kept in a CountedLoop
+ * @library /testlibrary
+ * @run main UseCountedLoopSafepoints
+ */
+
+import java.util.concurrent.atomic.AtomicLong;
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+
+public class UseCountedLoopSafepoints {
+    private static final AtomicLong _num = new AtomicLong(0);
+
+    // Uses the fact that an EnableBiasedLocking vmop will be started
+    // after 500ms, while we are still in the loop. If there is a
+    // safepoint in the counted loop, then we will reach safepoint
+    // very quickly. Otherwise SafepointTimeout will be hit.
+    public static void main (String args[]) throws Exception {
+        if (args.length == 1) {
+            final int loops = Integer.parseInt(args[0]);
+            for (int i = 0; i < loops; i++) {
+                _num.addAndGet(1);
+            }
+        } else {
+            ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+                    "-XX:+IgnoreUnrecognizedVMOptions",
+                    "-XX:-TieredCompilation",
+                    "-XX:+UseBiasedLocking",
+                    "-XX:BiasedLockingStartupDelay=500",
+                    "-XX:+SafepointTimeout",
+                    "-XX:SafepointTimeoutDelay=2000",
+                    "-XX:+UseCountedLoopSafepoints",
+                    "UseCountedLoopSafepoints",
+                    "2000000000"
+                    );
+            OutputAnalyzer output = new OutputAnalyzer(pb.start());
+            output.shouldNotContain("Timeout detected");
+            output.shouldHaveExitValue(0);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/native/TestDirtyInt.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+public class TestDirtyInt {
+    static {
+        System.loadLibrary("TestDirtyInt");
+    }
+
+    native static int test(int v);
+
+    static int compiled(int v) {
+        return test(v<<2);
+    }
+
+    static public void main(String[] args) {
+        for (int i = 0; i < 20000; i++) {
+            int res = compiled(Integer.MAX_VALUE);
+            if (res != 0x42) {
+                throw new RuntimeException("Test failed");
+            }
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/native/TestDirtyInt.sh	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+#
+#  Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+#  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+#  This code is free software; you can redistribute it and/or modify it
+#  under the terms of the GNU General Public License version 2 only, as
+#  published by the Free Software Foundation.
+#
+#  This code is distributed in the hope that it will be useful, but WITHOUT
+#  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+#  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+#  version 2 for more details (a copy is included in the LICENSE file that
+#  accompanied this code).
+#
+#  You should have received a copy of the GNU General Public License version
+#  2 along with this work; if not, write to the Free Software Foundation,
+#  Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#  Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+#  or visit www.oracle.com if you need additional information or have any
+#  questions.
+#
+
+##
+## @test
+## @bug 8148353
+## @summary gcc on sparc expects clean 32 bit int in 64 bit register on function entry
+## @run shell/timeout=30 TestDirtyInt.sh
+##
+
+if [ "${TESTSRC}" = "" ]
+then
+  TESTSRC=${PWD}
+  echo "TESTSRC not set.  Using "${TESTSRC}" as default"
+fi
+echo "TESTSRC=${TESTSRC}"
+## Adding common setup Variables for running shell tests.
+. ${TESTSRC}/../../test_env.sh
+
+# set platform-dependent variables
+if [ $VM_OS == "linux" -a $VM_CPU == "sparcv9" ]; then
+    echo "Testing on linux-sparc"
+    gcc_cmd=`which gcc`
+    if [ "x$gcc_cmd" == "x" ]; then
+        echo "WARNING: gcc not found. Cannot execute test." 2>&1
+        exit 0;
+    fi
+else
+    echo "Test passed; only valid for linux-sparc"
+    exit 0;
+fi
+
+THIS_DIR=.
+
+cp ${TESTSRC}${FS}*.java ${THIS_DIR}
+${TESTJAVA}${FS}bin${FS}javac *.java
+
+$gcc_cmd -O1 -DLINUX -fPIC -shared \
+    -o ${TESTSRC}${FS}libTestDirtyInt.so \
+    -I${TESTJAVA}${FS}include \
+    -I${TESTJAVA}${FS}include${FS}linux \
+    ${TESTSRC}${FS}libTestDirtyInt.c
+
+# run the java test in the background
+cmd="${TESTJAVA}${FS}bin${FS}java \
+    -Djava.library.path=${TESTSRC}${FS} TestDirtyInt"
+
+echo "$cmd"
+eval $cmd 
+
+if [ $? = 0 ]
+then
+    echo "Test Passed"
+    exit 0
+fi
+
+echo "Test Failed"
+exit 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/native/libTestDirtyInt.c	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jni.h"
+#include <stdio.h>
+
+static int array = 0x42;
+
+JNIEXPORT jint JNICALL Java_TestDirtyInt_test(JNIEnv* env, jclass jclazz, jint v)
+{
+  int* ptr = &array + v + 4;
+  return *ptr;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/types/TestMeetIncompatibleInterfaceArrays.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,351 @@
+/*
+ * Copyright 2015 SAP AG.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8141551
+ * @summary C2 can not handle returns with inccompatible interface arrays
+ * @library /testlibrary /testlibrary/whitebox/
+ * @build sun.hotspot.WhiteBox
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm
+ *        -Xbootclasspath/a:.
+ *        -XX:+UnlockDiagnosticVMOptions
+ *        -XX:+WhiteBoxAPI
+ *        -Xbatch
+ *        -XX:CompileThreshold=1
+ *        -XX:-TieredCompilation
+ *        -XX:CICompilerCount=1
+ *        -XX:+PrintCompilation
+ *        -XX:+PrintInlining
+ *        -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run
+ *        -XX:CompileCommand=dontinline,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=quiet
+ *        TestMeetIncompatibleInterfaceArrays 0
+ * @run main/othervm
+ *        -Xbootclasspath/a:.
+ *        -XX:+UnlockDiagnosticVMOptions
+ *        -XX:+WhiteBoxAPI
+ *        -Xbatch
+ *        -XX:CompileThreshold=1
+ *        -XX:-TieredCompilation
+ *        -XX:CICompilerCount=1
+ *        -XX:+PrintCompilation
+ *        -XX:+PrintInlining
+ *        -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run
+ *        -XX:CompileCommand=inline,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=quiet
+ *        TestMeetIncompatibleInterfaceArrays 1
+ * @run main/othervm
+ *        -Xbootclasspath/a:.
+ *        -XX:+UnlockDiagnosticVMOptions
+ *        -XX:+WhiteBoxAPI
+ *        -Xbatch
+ *        -XX:CompileThreshold=1
+ *        -XX:Tier0InvokeNotifyFreqLog=0 -XX:Tier2InvokeNotifyFreqLog=0 -XX:Tier3InvokeNotifyFreqLog=0 -XX:Tier23InlineeNotifyFreqLog=0
+ *        -XX:Tier3InvocationThreshold=2 -XX:Tier3MinInvocationThreshold=2 -XX:Tier3CompileThreshold=2
+ *        -XX:Tier4InvocationThreshold=1 -XX:Tier4MinInvocationThreshold=1 -XX:Tier4CompileThreshold=1
+ *        -XX:+TieredCompilation
+ *        -XX:CICompilerCount=2
+ *        -XX:+PrintCompilation
+ *        -XX:+PrintInlining
+ *        -XX:CompileCommand=compileonly,MeetIncompatibleInterfaceArrays*.run
+ *        -XX:CompileCommand=compileonly,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=inline,TestMeetIncompatibleInterfaceArrays$Helper.createI2*
+ *        -XX:CompileCommand=quiet
+ *        TestMeetIncompatibleInterfaceArrays 2
+ *
+ * @author volker.simonis@gmail.com
+ */
+
+import java.io.FileOutputStream;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import jdk.internal.org.objectweb.asm.ClassWriter;
+import jdk.internal.org.objectweb.asm.MethodVisitor;
+import static jdk.internal.org.objectweb.asm.Opcodes.*;
+import sun.hotspot.WhiteBox;
+
+public class TestMeetIncompatibleInterfaceArrays extends ClassLoader {
+
+    private static final WhiteBox WB = WhiteBox.getWhiteBox();
+
+    public static interface I1 { public String getName(); }
+    public static interface I2 { public String getName(); }
+    public static class I2C implements I2 { public String getName() { return "I2";} }
+    public static class I21C implements I2, I1 { public String getName() { return "I2 and I1";} }
+
+    public static class Helper {
+        public static I2 createI2Array0() {
+            return new I2C();
+        }
+        public static I2[] createI2Array1() {
+            return new I2C[] { new I2C() };
+        }
+        public static I2[][] createI2Array2() {
+            return new I2C[][] { new I2C[] { new I2C() } };
+        }
+        public static I2[][][] createI2Array3() {
+            return new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } };
+        }
+        public static I2[][][][] createI2Array4() {
+            return new I2C[][][][] { new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } } };
+        }
+        public static I2[][][][][] createI2Array5() {
+            return new I2C[][][][][] { new I2C[][][][] { new I2C[][][] { new I2C[][] { new I2C[] { new I2C() } } } } };
+        }
+        public static I2 createI21Array0() {
+            return new I21C();
+        }
+        public static I2[] createI21Array1() {
+            return new I21C[] { new I21C() };
+        }
+        public static I2[][] createI21Array2() {
+            return new I21C[][] { new I21C[] { new I21C() } };
+        }
+        public static I2[][][] createI21Array3() {
+            return new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } };
+        }
+        public static I2[][][][] createI21Array4() {
+            return new I21C[][][][] { new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } } };
+        }
+        public static I2[][][][][] createI21Array5() {
+            return new I21C[][][][][] { new I21C[][][][] { new I21C[][][] { new I21C[][] { new I21C[] { new I21C() } } } } };
+        }
+    }
+
+    // Location for the generated class files
+    public static final String PATH = System.getProperty("test.classes", ".") + java.io.File.separator;
+
+    /*
+     * With 'good == false' this helper method creates the following classes
+     * (using the nested 'Helper' class and the nested interfaces 'I1' and 'I2').
+     * For brevity I omit the enclosing class 'TestMeetIncompatibleInterfaceArrays' in the
+     * following examples:
+     *
+     * public class MeetIncompatibleInterfaceArrays0ASM {
+     *   public static I1 run() {
+     *     return Helper.createI2Array0(); // returns I2
+     *   }
+     *   public static void test() {
+     *     I1 i1 = run();
+     *     System.out.println(i1.getName());
+     *   }
+     * }
+     * public class MeetIncompatibleInterfaceArrays1ASM {
+     *   public static I1[] run() {
+     *     return Helper.createI2Array1(); // returns I2[]
+     *   }
+     *   public static void test() {
+     *     I1[] i1 = run();
+     *     System.out.println(i1[0].getName());
+     *   }
+     * }
+     * ...
+     * // MeetIncompatibleInterfaceArrays4ASM is special because it creates
+     * // an illegal class which will be rejected by the verifier.
+     * public class MeetIncompatibleInterfaceArrays4ASM {
+     *   public static I1[][][][] run() {
+     *     return Helper.createI2Array3(); // returns I1[][][] which gives a verifier error because return expects I1[][][][]
+     *   }
+     *   public static void test() {
+     *     I1[][][][][] i1 = run();
+     *     System.out.println(i1[0][0][0][0][0].getName());
+     *   }
+     * ...
+     * public class MeetIncompatibleInterfaceArrays5ASM {
+     *   public static I1[][][][][] run() {
+     *     return Helper.createI2Array5(); // returns I2[][][][][]
+     *   }
+     *   public static void test() {
+     *     I1[][][][][] i1 = run();
+     *     System.out.println(i1[0][0][0][0][0].getName());
+     *   }
+     * }
+     *
+     * Notice that this is not legal Java code. We would have to use a cast in "run()" to make it legal:
+     *
+     *   public static I1[] run() {
+     *     return (I1[])Helper.createI2Array1(); // returns I2[]
+     *   }
+     *
+     * But in pure bytecode, the "run()" methods are perfectly legal:
+     *
+     *   public static I1[] run();
+     *     Code:
+     *       0: invokestatic  #16  // Method Helper.createI2Array1:()[LI2;
+     *       3: areturn
+     *
+     * The "test()" method calls the "getName()" function from I1 on the objects returned by "run()".
+     * This will epectedly fail with an "IncompatibleClassChangeError" because the objects returned
+     * by "run()" (and by createI2Array()) are actually of type "I2C" and only implement "I2" but not "I1".
+     *
+     *
+     * With 'good == true' this helper method will create the following classes:
+     *
+     * public class MeetIncompatibleInterfaceArraysGood0ASM {
+     *   public static I1 run() {
+     *     return Helper.createI21Array0(); // returns I2
+     *   }
+     *   public static void test() {
+     *     I1 i1 = run();
+     *     System.out.println(i1.getName());
+     *   }
+     * }
+     *
+     * Calling "test()" on these objects will succeed and output "I2 and I1" because now the "run()"
+     * method calls "createI21Array()" which actually return an object (or an array of objects) of
+     * type "I21C" which implements both "I2" and "I1".
+     *
+     * Notice that at the bytecode level, the code for the "run()" and "test()" methods in
+     * "MeetIncompatibleInterfaceArraysASM" and "MeetIncompatibleInterfaceArraysGoodASM" look exactly
+     * the same. I.e. the verifier has no chance to verify if the I2 object returned by "createI1Array()"
+     * or "createI21Array()" implements "I1" or not. That's actually the reason why both versions of
+     * generated classes are legal from a verifier point of view.
+     *
+     */
+    static void generateTestClass(int dim, boolean good) throws Exception {
+        String baseClassName = "MeetIncompatibleInterfaceArrays";
+        if (good)
+            baseClassName += "Good";
+        String createName = "createI2" + (good ? "1" : "") + "Array";
+        String a = "";
+        for (int i = 0; i < dim; i++)
+            a += "[";
+        ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES);
+        cw.visit(V1_8, ACC_PUBLIC, baseClassName + dim + "ASM", null, "java/lang/Object", null);
+        MethodVisitor constr = cw.visitMethod(ACC_PUBLIC, "<init>", "()V", null, null);
+        constr.visitCode();
+        constr.visitVarInsn(ALOAD, 0);
+        constr.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V", false);
+        constr.visitInsn(RETURN);
+        constr.visitMaxs(0, 0);
+        constr.visitEnd();
+        MethodVisitor run = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "run",
+                "()" + a + "LTestMeetIncompatibleInterfaceArrays$I1;", null, null);
+        run.visitCode();
+        if (dim == 4) {
+            run.visitMethodInsn(INVOKESTATIC, "TestMeetIncompatibleInterfaceArrays$Helper", createName + 3,
+                    "()" + "[[[" + "LTestMeetIncompatibleInterfaceArrays$I2;", false);
+        } else {
+            run.visitMethodInsn(INVOKESTATIC, "TestMeetIncompatibleInterfaceArrays$Helper", createName + dim,
+                    "()" + a + "LTestMeetIncompatibleInterfaceArrays$I2;", false);
+        }
+        run.visitInsn(ARETURN);
+        run.visitMaxs(0, 0);
+        run.visitEnd();
+        MethodVisitor test = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, "test", "()V", null, null);
+        test.visitCode();
+        test.visitMethodInsn(INVOKESTATIC, baseClassName + dim + "ASM", "run",
+                "()" + a + "LTestMeetIncompatibleInterfaceArrays$I1;", false);
+        test.visitVarInsn(ASTORE, 0);
+        if (dim > 0) {
+            test.visitVarInsn(ALOAD, 0);
+            for (int i = 1; i <= dim; i++) {
+                test.visitInsn(ICONST_0);
+                test.visitInsn(AALOAD);
+            }
+            test.visitVarInsn(ASTORE, 1);
+        }
+        test.visitFieldInsn(GETSTATIC, "java/lang/System", "out", "Ljava/io/PrintStream;");
+        test.visitVarInsn(ALOAD, dim > 0 ? 1 : 0);
+        test.visitMethodInsn(INVOKEINTERFACE, "TestMeetIncompatibleInterfaceArrays$I1", "getName",
+                "()Ljava/lang/String;", true);
+        test.visitMethodInsn(INVOKEVIRTUAL, "java/io/PrintStream", "println", "(Ljava/lang/Object;)V", false);
+        test.visitInsn(RETURN);
+        test.visitMaxs(0, 0);
+        test.visitEnd();
+
+        // Get the bytes of the class..
+        byte[] b = cw.toByteArray();
+        // ..and write them into a class file (for debugging)
+        FileOutputStream fos = new FileOutputStream(PATH + baseClassName + dim + "ASM.class");
+        fos.write(b);
+        fos.close();
+
+    }
+
+    public static String[][] tier = { { "interpreted", "C2 (tier 4) without inlining", "C2 (tier4) without inlining" },
+            { "interpreted", "C2 (tier 4) with inlining", "C2 (tier4) with inlining" },
+            { "interpreted", "C1 (tier 3) with inlining", "C2 (tier4) with inlining" } };
+
+    public static void main(String[] args) throws Exception {
+        final int pass = Integer.parseInt(args.length > 0 ? args[0] : "0");
+
+        // Load and initialize some classes required for compilation
+        Class.forName("TestMeetIncompatibleInterfaceArrays$I1");
+        Class.forName("TestMeetIncompatibleInterfaceArrays$I2");
+        Class.forName("TestMeetIncompatibleInterfaceArrays$Helper");
+
+        for (int g = 0; g < 2; g++) {
+            String baseClassName = "MeetIncompatibleInterfaceArrays";
+            boolean good = (g == 0) ? false : true;
+            if (good)
+                baseClassName += "Good";
+            for (int i = 0; i < 6; i++) {
+                System.out.println();
+                System.out.println("Creating " + baseClassName + i + "ASM.class");
+                System.out.println("========================================" + "=" + "=========");
+                // Create the "MeetIncompatibleInterfaceArrays<i>ASM" class
+                generateTestClass(i, good);
+                Class<?> c = null;
+                try {
+                    c = Class.forName(baseClassName + i + "ASM");
+                } catch (VerifyError ve) {
+                    if (i == 4) {
+                        System.out.println("OK - must be (" + ve.getMessage() + ").");
+                    } else {
+                        throw ve;
+                    }
+                    continue;
+                }
+                // Call MeetIncompatibleInterfaceArrays<i>ASM.test()
+                Method m = c.getMethod("test");
+                Method r = c.getMethod("run");
+                for (int j = 0; j < 3; j++) {
+                    System.out.println((j + 1) + ". invokation of " + baseClassName + i + "ASM.test() [should be "
+                            + tier[pass][j] + "]");
+                    try {
+                        m.invoke(null);
+                    } catch (InvocationTargetException ite) {
+                        if (good) {
+                            throw ite;
+                        } else {
+                            if (ite.getCause() instanceof IncompatibleClassChangeError) {
+                                System.out.println("  OK - catched InvocationTargetException("
+                                        + ite.getCause().getMessage() + ").");
+                            } else {
+                                throw ite;
+                            }
+                        }
+                    }
+                }
+                System.out.println("Method " + r + (WB.isMethodCompiled(r) ? " has" : " has not") + " been compiled.");
+                if (!WB.isMethodCompiled(r)) {
+                    throw new Exception("Method " + r + " must be compiled!");
+                }
+            }
+        }
+    }
+}
--- a/test/gc/8000311/Test8000311.java	Mon Aug 08 20:18:53 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/**
- * @test Test8000311
- * @key gc
- * @bug 8000311
- * @summary G1: ParallelGCThreads==0 broken
- * @run main/othervm -XX:+UseG1GC -XX:ParallelGCThreads=0 -XX:+ResizePLAB -XX:+ExplicitGCInvokesConcurrent Test8000311
- * @author filipp.zhinkin@oracle.com
- */
-
-import java.util.*;
-
-public class Test8000311 {
-  public static void main(String args[]) {
-    for(int i = 0; i<100; i++) {
-      byte[] garbage = new byte[1000];
-      System.gc();
-    }
-  }
-}
--- a/test/gc/TestG1ZeroPGCTJcmdThreadPrint.java	Mon Aug 08 20:18:53 2016 +0300
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/* @test TestG1ZeroPGCTJcmdThreadPrint
- * @key gc
- * @bug 8005875
- * @summary Use jcmd to generate a thread dump of a Java program being run with PGCT=0 to verify 8005875
- * @library /testlibrary
- * @run main/othervm -XX:+UseG1GC -XX:ParallelGCThreads=0 -XX:+IgnoreUnrecognizedVMOptions TestG1ZeroPGCTJcmdThreadPrint
- */
-
-import com.oracle.java.testlibrary.*;
-
-public class TestG1ZeroPGCTJcmdThreadPrint {
-  public static void main(String args[]) throws Exception {
-
-    // Grab the pid from the current java process
-    String pid = Integer.toString(ProcessTools.getProcessId());
-
-    // Create a ProcessBuilder
-    ProcessBuilder pb = new ProcessBuilder();
-
-    // Run jcmd <pid> Thread.print
-    pb.command(JDKToolFinder.getJDKTool("jcmd"), pid, "Thread.print");
-
-    OutputAnalyzer output = new OutputAnalyzer(pb.start());
-
-    // There shouldn't be a work gang for concurrent marking.
-    output.shouldNotContain("G1 Parallel Marking Threads");
-
-    // Make sure we didn't crash
-    output.shouldHaveExitValue(0);
-  }
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/TestVerifySubSet.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/* @test TestVerifySubSet.java
+ * @key gc
+ * @bug 8072725
+ * @summary Test VerifySubSet option
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+import java.util.ArrayList;
+import java.util.Collections;
+
+class RunSystemGC {
+    public static void main(String args[]) throws Exception {
+        System.gc();
+    }
+}
+
+public class TestVerifySubSet {
+    private static String[] getTestJavaOpts() {
+        String testVmOptsStr = System.getProperty("test.java.opts");
+        if (!testVmOptsStr.isEmpty()) {
+            return testVmOptsStr.split(" ");
+        } else {
+            return new String[] {};
+        }
+    }
+
+    private static OutputAnalyzer runTest(String subset) throws Exception {
+        ArrayList<String> vmOpts = new ArrayList();
+
+        Collections.addAll(vmOpts, getTestJavaOpts());
+        Collections.addAll(vmOpts, new String[] {"-XX:+UnlockDiagnosticVMOptions",
+                                                 "-XX:+VerifyBeforeGC",
+                                                 "-XX:+VerifyAfterGC",
+                                                 "-XX:VerifySubSet="+subset,
+                                                 RunSystemGC.class.getName()});
+        ProcessBuilder pb =
+            ProcessTools.createJavaProcessBuilder(vmOpts.toArray(new String[vmOpts.size()]));
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+
+        System.out.println("Output:\n" + output.getOutput());
+        return output;
+    }
+
+    public static void main(String args[]) throws Exception {
+
+        OutputAnalyzer output;
+
+        output = runTest("heap, threads, codecache, metaspace");
+        output.shouldContain("Heap");
+        output.shouldContain("Threads");
+        output.shouldContain("CodeCache");
+        output.shouldContain("MetaspaceAux");
+        output.shouldNotContain("SymbolTable");
+        output.shouldNotContain("StringTable");
+        output.shouldNotContain("SystemDictionary");
+        output.shouldNotContain("CodeCache Oops");
+        output.shouldHaveExitValue(0);
+
+        output = runTest("hello, threads, codecache, metaspace");
+        output.shouldContain("memory sub-system is unknown, please correct it");
+        output.shouldNotContain("Threads");
+        output.shouldNotContain("CodeCache");
+        output.shouldNotContain("MetaspaceAux");
+        output.shouldHaveExitValue(1);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/ergonomics/TestDynamicNumberOfGCThreads.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestDynamicNumberOfGCThreads
+ * @bug 8017462
+ * @summary Ensure that UseDynamicNumberOfGCThreads runs
+ * @requires vm.gc=="null"
+ * @key gc
+ * @library /testlibrary
+ */
+
+import com.oracle.java.testlibrary.ProcessTools;
+import com.oracle.java.testlibrary.OutputAnalyzer;
+
+public class TestDynamicNumberOfGCThreads {
+  public static void main(String[] args) throws Exception {
+
+    testDynamicNumberOfGCThreads("UseConcMarkSweepGC");
+
+    testDynamicNumberOfGCThreads("UseG1GC");
+
+    testDynamicNumberOfGCThreads("UseParallelGC");
+  }
+
+  private static void verifyDynamicNumberOfGCThreads(OutputAnalyzer output) {
+    output.shouldHaveExitValue(0); // test should run succesfully
+    output.shouldContain("new_active_workers");
+  }
+
+  private static void testDynamicNumberOfGCThreads(String gcFlag) throws Exception {
+    // UseDynamicNumberOfGCThreads and TraceDynamicGCThreads enabled
+    String[] baseArgs = {"-XX:+" + gcFlag, "-Xmx10M", "-XX:+PrintGCDetails",  "-XX:+UseDynamicNumberOfGCThreads", "-XX:+TraceDynamicGCThreads", GCTest.class.getName()};
+
+    // Base test with gc and +UseDynamicNumberOfGCThreads:
+    ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder(baseArgs);
+    verifyDynamicNumberOfGCThreads(new OutputAnalyzer(pb_enabled.start()));
+
+    // Ensure it also works on uniprocessors or if user specifies -XX:ParallelGCThreads=1:
+    String[] extraArgs = {"-XX:+UnlockDiagnosticVMOptions", "-XX:+ForceDynamicNumberOfGCThreads", "-XX:ParallelGCThreads=1"};
+    String[] finalArgs = new String[baseArgs.length + extraArgs.length];
+    System.arraycopy(extraArgs, 0, finalArgs, 0,                extraArgs.length);
+    System.arraycopy(baseArgs,  0, finalArgs, extraArgs.length, baseArgs.length);
+    pb_enabled = ProcessTools.createJavaProcessBuilder(finalArgs);
+    verifyDynamicNumberOfGCThreads(new OutputAnalyzer(pb_enabled.start()));
+  }
+
+  static class GCTest {
+    private static byte[] garbage;
+    public static void main(String [] args) {
+      System.out.println("Creating garbage");
+      // create 128MB of garbage. This should result in at least one GC
+      for (int i = 0; i < 1024; i++) {
+        garbage = new byte[128 * 1024];
+      }
+      System.out.println("Done");
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc/whitebox/TestConcMarkCycleWB.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestConMarkCycleWB
+ * @bug 8065579
+ * @requires vm.gc=="null" | vm.gc=="G1"
+ * @library /testlibrary /testlibrary/whitebox
+ * @build ClassFileInstaller com.oracle.java.testlibrary.* sun.hotspot.WhiteBox TestConcMarkCycleWB
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:+UseG1GC TestConcMarkCycleWB
+ * @summary Verifies that ConcurrentMarking-related WB works properly
+ */
+import static com.oracle.java.testlibrary.Asserts.assertFalse;
+import static com.oracle.java.testlibrary.Asserts.assertTrue;
+import sun.hotspot.WhiteBox;
+
+public class TestConcMarkCycleWB {
+
+    public static void main(String[] args) throws Exception {
+        WhiteBox wb = WhiteBox.getWhiteBox();
+
+        wb.youngGC();
+        assertTrue(wb.g1StartConcMarkCycle());
+        while (wb.g1InConcurrentMark()) {
+            Thread.sleep(5);
+        }
+
+        wb.fullGC();
+        assertTrue(wb.g1StartConcMarkCycle());
+        while (wb.g1InConcurrentMark()) {
+            Thread.sleep(5);
+        }
+        assertTrue(wb.g1StartConcMarkCycle());
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/gc_implementation/g1/TestNoEagerReclaimOfHumongousRegions.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestNoEagerReclaimOfHumongousRegions
+ * @bug 8139424
+ * @summary Test to check that a live humongous object is not eagerly reclaimed. This is a regression test for
+ *          8139424 and the test will crash if an eager reclaim occur. The test is not 100% deterministic and
+ *          might pass even if there are problems in the code, but it will never crash unless there is a problem.
+ * @requires vm.gc=="G1" | vm.gc=="null"
+ * @key gc
+ * @library /testlibrary /testlibrary/whitebox
+ * @modules java.base/sun.misc
+ * @build TestNoEagerReclaimOfHumongousRegions
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:. -XX:+PrintGC -XX:+UseG1GC -XX:MaxTenuringThreshold=0 -XX:G1RSetSparseRegionEntries=32 -XX:G1HeapRegionSize=1m -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:+UnlockExperimentalVMOptions -XX:+G1TraceEagerReclaimHumongousObjects TestNoEagerReclaimOfHumongousRegions
+ */
+
+import java.util.LinkedList;
+
+import sun.hotspot.WhiteBox;
+
+public class TestNoEagerReclaimOfHumongousRegions {
+    // Helper class to keep reference to humongous byte[].
+    static class LargeRef {
+        private byte[] _ref;
+
+        LargeRef(byte[] ref) {
+            _ref = ref;
+        }
+
+        byte[] ref() { return _ref; }
+    }
+
+    static LargeRef humongous_reference_holder;
+
+    public static void main(String[] args) throws InterruptedException{
+        WhiteBox wb = WhiteBox.getWhiteBox();
+        LinkedList<Object> garbageAndRefList = new LinkedList<Object>();
+        // Creating a 1M large byte array. Since the test specifies the heap
+        // region size to be 1m this will be a humongous object. We then
+        // store a pointer to the array in the static object to keep it live
+        // during the whole test.
+        humongous_reference_holder = new LargeRef(new byte[1 * 1024 * 1024]);
+
+        // Create some garbage and a reference to the humongous object each round.
+        for (int i = 0; i < 32; i++) {
+            garbageAndRefList.add(new byte[400*1000]);
+            garbageAndRefList.add(new LargeRef(humongous_reference_holder.ref()));
+
+            // Promote to old, goal is to get rem-set entries for the humongous
+            // object from different regions. The test specifies MaxTenuringThreshold=0,
+            // this will make sure we get objects promoted to old at once.
+            wb.youngGC();
+        }
+        // Clear the garbage and reference list.
+        garbageAndRefList.clear();
+
+        // Run a concurrent mark cycle to mark all references but the static one as dead.
+        wb.g1StartConcMarkCycle();
+        while (wb.g1InConcurrentMark()) {
+            Thread.sleep(100);
+        }
+
+        // Run a young collection to make sure humongous object still can't be eagerly reclaimed.
+        wb.youngGC();
+        // Will crash/assert if humongous object has been reclaimed.
+        wb.fullGC();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/runtime/ErrorHandling/TestCrashOnOutOfMemoryError.java	Fri Aug 12 18:10:37 2016 +0300
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestCrashOnOutOfMemoryError
+ * @summary Test using -XX:+CrashOnOutOfMemoryError
+ * @library /testlibrary
+ * @build jdk.test.lib.*
+ * @run driver TestCrashOnOutOfMemoryError
+ * @bug 8138745
+ */
+
+import com.oracle.java.testlibrary.OutputAnalyzer;
+import com.oracle.java.testlibrary.ProcessTools;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.IOException;
+
+public class TestCrashOnOutOfMemoryError {
+
+    public static void main(String[] args) throws Exception {
+        if (args.length == 1) {
+            // This should guarantee to throw:
+            // java.lang.OutOfMemoryError: Requested array size exceeds VM limit
+            try {
+                Object[] oa = new Object[Integer.MAX_VALUE];
+                throw new Error("OOME not triggered");
+            } catch (OutOfMemoryError err) {
+                throw new Error("OOME didn't abort JVM!");
+            }
+        }
+        // else this is the main test
+        ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-XX:+CrashOnOutOfMemoryError",
+                "-Xmx64m", TestCrashOnOutOfMemoryError.class.getName(),"throwOOME");
+        OutputAnalyzer output = new OutputAnalyzer(pb.start());
+        int exitValue = output.getExitValue();
+        if (0 == exitValue) {
+            //expecting a non zero value
+            throw new Error("Expected to get non zero exit value");
+        }
+
+        /* Output should look something like this. The actual text will depend on the OS and its core dump processing.
+           Aborting due to java.lang.OutOfMemoryError: Requested array size exceeds VM limit
+           # To suppress the following error report, specify this argument
+           # after -XX: or in .hotspotrc:  SuppressErrorAt=/debug.cpp:303
+           #
+           # A fatal error has been detected by the Java Runtime Environment:
+           #
+           #  Internal Error (/home/cheleswer/Desktop/jdk9/dev/hotspot/src/share/vm/utilities/debug.cpp:303), pid=6212, tid=6213
+           #  fatal error: OutOfMemory encountered: Requested array size exceeds VM limit
+           #
+           # JRE version: OpenJDK Runtime Environment (9.0) (build 1.9.0-internal-debug-cheleswer_2015_10_20_14_32-b00)
+           # Java VM: OpenJDK 64-Bit Server VM (1.9.0-internal-debug-cheleswer_2015_10_20_14_32-b00, mixed mode, tiered, compressed oops, serial gc, linux-amd64)
+           # Core dump will be written. Default location: Core dumps may be processed with "/usr/share/apport/apport %p %s %c %P" (or dumping to
+             /home/cheleswer/Desktop/core.6212)
+           #
+           # An error report file with more information is saved as:
+           # /home/cheleswer/Desktop/hs_err_pid6212.log
+           #
+           # If you would like to submit a bug report, please visit:
+           #   http://bugreport.java.com/bugreport/crash.jsp
+           #
+           Current thread is 6213
+           Dumping core ...
+           Aborted (core dumped)
+        */
+