changeset 19451:2e571ead57dc

Merge
author duke
date Wed, 05 Jul 2017 19:08:56 +0200
parents e92c5abf8936 7e2914bc45a8
children 30930d65aff1
files
diffstat 143 files changed, 5091 insertions(+), 2447 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags-top-repo	Mon Aug 26 17:36:10 2013 -0700
+++ b/.hgtags-top-repo	Wed Jul 05 19:08:56 2017 +0200
@@ -225,3 +225,4 @@
 9f74a220677dc265a724515d8e2617548cef62f1 jdk8-b101
 5eb3c1dc348f72a7f84f7d9d07834e8bbe09a799 jdk8-b102
 b7e64be81c8a7690703df5711f4fc2375da8a9cb jdk8-b103
+96c1b9b7524b52c3fcefc90ffad4c767396727c8 jdk8-b104
--- a/README-builds.html	Mon Aug 26 17:36:10 2013 -0700
+++ b/README-builds.html	Wed Jul 05 19:08:56 2017 +0200
@@ -154,7 +154,7 @@
                     </code>
                 </blockquote>
                 Once you have all the repositories, keep in mind that each
-                repository is it's own independent repository.
+                repository is its own independent repository.
                 You can also re-run <code>./get_source.sh</code> anytime to
                 pull over all the latest changesets in all the repositories.
                 This set of nested repositories has been given the term
@@ -241,6 +241,14 @@
                                 source code for the OpenJDK Corba functionality
                             </td>
                         </tr>
+                        <tr>
+                            <td>
+                                nashorn
+                            </td>
+                            <td>
+                                source code for the OpenJDK JavaScript implementation
+                            </td>
+                        </tr>
                     </tbody>
                 </table>
             </blockquote>
@@ -386,7 +394,7 @@
                         <code>--with-boot-jdk</code>.
                     </li>
                     <li>
-                        Insure that GNU make, the Bootstrap JDK,
+                        Ensure that GNU make, the Bootstrap JDK,
                         and the compilers are all
                         in your PATH environment variable
                     </li>
@@ -1307,9 +1315,9 @@
                     you will need to modify the makefiles. But for normal file
                     additions or removals, no changes are needed. There are certan
                     exceptions for some native libraries where the source files are spread
-                    over many directories which also contain courses for other
+                    over many directories which also contain sources for other
                     libraries. In these cases it was simply easier to create include lists
-                    rather thane excludes.
+                    rather than excludes.
                 </p>
 
                 <p>
@@ -1327,14 +1335,14 @@
                 <p>
                     <b>Q:</b> 
                     <code>configure</code> provides OpenJDK-specific features such as
-                    <code>--enable-jigsaw</code> or <code>--with-builddeps-server</code>
-                    that are not described in this document. What about those?
+                    <code>--with-builddeps-server</code> that are not
+                    described in this document. What about those? 
                     <br>
                     <b>A:</b>
                     Try them out if you like! But be aware that most of these are 
                     experimental features. 
                     Many of them don't do anything at all at the moment; the option 
-                    is just a placeholder. Other depends on
+                    is just a placeholder. Others depend on
                     pieces of code or infrastructure that is currently 
                     not ready for prime time.
                 </p>
@@ -1386,24 +1394,6 @@
                 </p>
 
                 <p>
-                    <b>Q:</b> What is @GenerateNativeHeaders?
-                    <br>
-                    <b>A:</b> 
-                    To speed up compilation, we added a flag to javac which makes it 
-                    do the job of javah as well, as a by-product; that is, generating
-                    native .h header files. These files are only generated 
-                    if a class contains native methods. However, sometimes 
-                    a class contains no native method,
-                    but still contains constants that native code needs to use. 
-                    The new GenerateNativeHeaders annotation tells javac to
-                    force generation of a
-                    header file in these cases. (We don't want to generate 
-                    native headers for all classes that contains constants 
-                    but no native methods, since
-                    that would slow down the compilation process needlessly.)
-                </p>
-
-                <p>
                     <b>Q:</b> 
                     Is anything able to use the results of the new build's default make target?
                     <br>
@@ -1429,10 +1419,9 @@
                     What should I do?
                     <br>
                     <b>A:</b>
-                    It might very well be that we have missed to add support for
+                    It might very well be that we have neglected to add support for
                     an option that was actually used from outside the build system.
-                    Email us and we will
-                    add support for it!
+                    Email us and we will add support for it!
                 </p>
 
             </blockquote>
--- a/common/autoconf/generated-configure.sh	Mon Aug 26 17:36:10 2013 -0700
+++ b/common/autoconf/generated-configure.sh	Wed Jul 05 19:08:56 2017 +0200
@@ -29573,7 +29573,7 @@
     CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DSOLARIS"
 fi
 if test "x$OPENJDK_TARGET_OS" = xmacosx; then
-    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE"
+    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE -D_DARWIN_UNLIMITED_SELECT"
     # Setting these parameters makes it an error to link to macosx APIs that are
     # newer than the given OS version and makes the linked binaries compatible even
     # if built on a newer version of the OS.
--- a/common/autoconf/toolchain.m4	Mon Aug 26 17:36:10 2013 -0700
+++ b/common/autoconf/toolchain.m4	Wed Jul 05 19:08:56 2017 +0200
@@ -905,7 +905,7 @@
     CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DSOLARIS"
 fi
 if test "x$OPENJDK_TARGET_OS" = xmacosx; then
-    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE"
+    CCXXFLAGS_JDK="$CCXXFLAGS_JDK -DMACOSX -D_ALLBSD_SOURCE -D_DARWIN_UNLIMITED_SELECT"
     # Setting these parameters makes it an error to link to macosx APIs that are
     # newer than the given OS version and makes the linked binaries compatible even
     # if built on a newer version of the OS.
--- a/hotspot/.hgtags	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/.hgtags	Wed Jul 05 19:08:56 2017 +0200
@@ -369,3 +369,5 @@
 7f55137d6aa81efc6eb0035813709f2cb6a26b8b hs25-b45
 6f9be7f87b9653e94fd8fb3070891a0cc91b15bf jdk8-b103
 580430d131ccd475e2f2ad4006531b8c4813d102 hs25-b46
+104743074675359cfbf7f4dcd9ab2a5974a16627 jdk8-b104
+c1604d5885a6f2adc0bcea2fa142a8f6bafad2f0 hs25-b47
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/InstanceKlass.java	Wed Jul 05 19:08:56 2017 +0200
@@ -75,19 +75,19 @@
     javaFieldsCount      = new CIntField(type.getCIntegerField("_java_fields_count"), 0);
     constants            = new MetadataField(type.getAddressField("_constants"), 0);
     classLoaderData      = type.getAddressField("_class_loader_data");
-    sourceFileName       = type.getAddressField("_source_file_name");
     sourceDebugExtension = type.getAddressField("_source_debug_extension");
     innerClasses         = type.getAddressField("_inner_classes");
+    sourceFileNameIndex  = new CIntField(type.getCIntegerField("_source_file_name_index"), 0);
     nonstaticFieldSize   = new CIntField(type.getCIntegerField("_nonstatic_field_size"), 0);
     staticFieldSize      = new CIntField(type.getCIntegerField("_static_field_size"), 0);
-    staticOopFieldCount   = new CIntField(type.getCIntegerField("_static_oop_field_count"), 0);
+    staticOopFieldCount  = new CIntField(type.getCIntegerField("_static_oop_field_count"), 0);
     nonstaticOopMapSize  = new CIntField(type.getCIntegerField("_nonstatic_oop_map_size"), 0);
     isMarkedDependent    = new CIntField(type.getCIntegerField("_is_marked_dependent"), 0);
     initState            = new CIntField(type.getCIntegerField("_init_state"), 0);
     vtableLen            = new CIntField(type.getCIntegerField("_vtable_len"), 0);
     itableLen            = new CIntField(type.getCIntegerField("_itable_len"), 0);
     breakpoints          = type.getAddressField("_breakpoints");
-    genericSignature     = type.getAddressField("_generic_signature");
+    genericSignatureIndex = new CIntField(type.getCIntegerField("_generic_signature_index"), 0);
     majorVersion         = new CIntField(type.getCIntegerField("_major_version"), 0);
     minorVersion         = new CIntField(type.getCIntegerField("_minor_version"), 0);
     headerSize           = Oop.alignObjectOffset(type.getSize());
@@ -134,9 +134,9 @@
   private static CIntField javaFieldsCount;
   private static MetadataField constants;
   private static AddressField  classLoaderData;
-  private static AddressField  sourceFileName;
   private static AddressField  sourceDebugExtension;
   private static AddressField  innerClasses;
+  private static CIntField sourceFileNameIndex;
   private static CIntField nonstaticFieldSize;
   private static CIntField staticFieldSize;
   private static CIntField staticOopFieldCount;
@@ -146,7 +146,7 @@
   private static CIntField vtableLen;
   private static CIntField itableLen;
   private static AddressField breakpoints;
-  private static AddressField  genericSignature;
+  private static CIntField genericSignatureIndex;
   private static CIntField majorVersion;
   private static CIntField minorVersion;
 
@@ -346,7 +346,7 @@
   public ConstantPool getConstants()        { return (ConstantPool) constants.getValue(this); }
   public ClassLoaderData getClassLoaderData() { return                ClassLoaderData.instantiateWrapperFor(classLoaderData.getValue(getAddress())); }
   public Oop       getClassLoader()         { return                getClassLoaderData().getClassLoader(); }
-  public Symbol    getSourceFileName()      { return getSymbol(sourceFileName); }
+  public Symbol    getSourceFileName()      { return                getConstants().getSymbolAt(sourceFileNameIndex.getValue(this)); }
   public String    getSourceDebugExtension(){ return                CStringUtilities.getString(sourceDebugExtension.getValue(getAddress())); }
   public long      getNonstaticFieldSize()  { return                nonstaticFieldSize.getValue(this); }
   public long      getStaticOopFieldCount() { return                staticOopFieldCount.getValue(this); }
@@ -354,7 +354,7 @@
   public boolean   getIsMarkedDependent()   { return                isMarkedDependent.getValue(this) != 0; }
   public long      getVtableLen()           { return                vtableLen.getValue(this); }
   public long      getItableLen()           { return                itableLen.getValue(this); }
-  public Symbol    getGenericSignature()    { return getSymbol(genericSignature); }
+  public Symbol    getGenericSignature()    { return                getConstants().getSymbolAt(genericSignatureIndex.getValue(this)); }
   public long      majorVersion()           { return                majorVersion.getValue(this); }
   public long      minorVersion()           { return                minorVersion.getValue(this); }
 
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassDump.java	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/tools/jcore/ClassDump.java	Wed Jul 05 19:08:56 2017 +0200
@@ -92,8 +92,13 @@
                     System.err.println("Warning: Can not create class filter!");
                 }
             }
-            String outputDirectory = System.getProperty("sun.jvm.hotspot.tools.jcore.outputDir", ".");
-            setOutputDirectory(outputDirectory);
+
+            // outputDirectory and jarStream are alternatives: setting one closes the other.
+            // If neither is set, use outputDirectory from the System property:
+            if (outputDirectory == null && jarStream == null) {
+                String dirName = System.getProperty("sun.jvm.hotspot.tools.jcore.outputDir", ".");
+                setOutputDirectory(dirName);
+            }
 
             // walk through the system dictionary
             SystemDictionary dict = VM.getVM().getSystemDictionary();
--- a/hotspot/make/bsd/makefiles/gcc.make	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/make/bsd/makefiles/gcc.make	Wed Jul 05 19:08:56 2017 +0200
@@ -247,7 +247,7 @@
 # Not yet supported by clang in Xcode 4.6.2
 #  WARNINGS_ARE_ERRORS += -Wno-tautological-constant-out-of-range-compare
   WARNINGS_ARE_ERRORS += -Wno-delete-non-virtual-dtor -Wno-deprecated -Wno-format -Wno-dynamic-class-memaccess
-  WARNINGS_ARE_ERRORS += -Wno-return-type -Wno-empty-body
+  WARNINGS_ARE_ERRORS += -Wno-empty-body
 endif
 
 WARNING_FLAGS = -Wpointer-arith -Wsign-compare -Wundef
--- a/hotspot/make/hotspot_version	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/make/hotspot_version	Wed Jul 05 19:08:56 2017 +0200
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=46
+HS_BUILD_NUMBER=47
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,6 +29,7 @@
 #include "interpreter/interpreter.hpp"
 #include "memory/cardTableModRefBS.hpp"
 #include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/interfaceSupport.hpp"
@@ -1145,7 +1146,7 @@
   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int klass_index = oop_recorder()->find_index(k);
   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  narrowOop encoded_k = oopDesc::encode_klass(k);
+  narrowOop encoded_k = Klass::encode_klass(k);
 
   assert_not_delayed();
   // Relocation with special format (see relocInfo_sparc.hpp).
@@ -1419,7 +1420,6 @@
   load_klass(O0_obj, O0_obj);
   // assert((klass != NULL)
   br_null_short(O0_obj, pn, fail);
-  // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers
 
   wrccr( O5_save_flags ); // Restore CCR's
 
@@ -4089,52 +4089,91 @@
 }
 
 void MacroAssembler::encode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
   assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  if (Universe::narrow_klass_base() != NULL)
-    sub(r, G6_heapbase, r);
-  srlx(r, LogKlassAlignmentInBytes, r);
+  assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+  assert(r != G6_heapbase, "bad register choice");
+  set((intptr_t)Universe::narrow_klass_base(), G6_heapbase);
+  sub(r, G6_heapbase, r);
+  if (Universe::narrow_klass_shift() != 0) {
+    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    srlx(r, LogKlassAlignmentInBytes, r);
+  }
+  reinit_heapbase();
 }
 
 void MacroAssembler::encode_klass_not_null(Register src, Register dst) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  if (Universe::narrow_klass_base() == NULL) {
-    srlx(src, LogKlassAlignmentInBytes, dst);
+  if (src == dst) {
+    encode_klass_not_null(src);
   } else {
-    sub(src, G6_heapbase, dst);
-    srlx(dst, LogKlassAlignmentInBytes, dst);
+    assert (UseCompressedKlassPointers, "must be compressed");
+    assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+    set((intptr_t)Universe::narrow_klass_base(), dst);
+    sub(src, dst, dst);
+    if (Universe::narrow_klass_shift() != 0) {
+      srlx(dst, LogKlassAlignmentInBytes, dst);
+    }
   }
 }
 
+// Function instr_size_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null() and reinit_heapbase().  Hence, if
+// the instructions they generate change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+  assert (UseCompressedKlassPointers, "only for compressed klass ptrs");
+  // set + add + set
+  int num_instrs = insts_for_internal_set((intptr_t)Universe::narrow_klass_base()) + 1 +
+    insts_for_internal_set((intptr_t)Universe::narrow_ptrs_base());
+  if (Universe::narrow_klass_shift() == 0) {
+    return num_instrs * BytesPerInstWord;
+  } else { // sllx
+    return (num_instrs + 1) * BytesPerInstWord;
+  }
+}
+
+// !!! If the instructions that get generated here change then function
+// instr_size_for_decode_klass_not_null() needs to get updated.
 void  MacroAssembler::decode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
   // Do not add assert code to this unless you change vtableStubs_sparc.cpp
   // pd_code_size_limit.
   assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  sllx(r, LogKlassAlignmentInBytes, r);
-  if (Universe::narrow_klass_base() != NULL)
-    add(r, G6_heapbase, r);
+  assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+  assert(r != G6_heapbase, "bad register choice");
+  set((intptr_t)Universe::narrow_klass_base(), G6_heapbase);
+  if (Universe::narrow_klass_shift() != 0)
+    sllx(r, LogKlassAlignmentInBytes, r);
+  add(r, G6_heapbase, r);
+  reinit_heapbase();
 }
 
 void  MacroAssembler::decode_klass_not_null(Register src, Register dst) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  // Do not add assert code to this unless you change vtableStubs_sparc.cpp
-  // pd_code_size_limit.
-  assert (UseCompressedKlassPointers, "must be compressed");
-  assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-  sllx(src, LogKlassAlignmentInBytes, dst);
-  if (Universe::narrow_klass_base() != NULL)
-    add(dst, G6_heapbase, dst);
+  if (src == dst) {
+    decode_klass_not_null(src);
+  } else {
+    // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+    // pd_code_size_limit.
+    assert (UseCompressedKlassPointers, "must be compressed");
+    assert(Universe::narrow_klass_base() != NULL, "narrow_klass_base should be initialized");
+    if (Universe::narrow_klass_shift() != 0) {
+      assert((src != G6_heapbase) && (dst != G6_heapbase), "bad register choice");
+      set((intptr_t)Universe::narrow_klass_base(), G6_heapbase);
+      sllx(src, LogKlassAlignmentInBytes, dst);
+      add(dst, G6_heapbase, dst);
+      reinit_heapbase();
+    } else {
+      set((intptr_t)Universe::narrow_klass_base(), dst);
+      add(src, dst, dst);
+    }
+  }
 }
 
 void MacroAssembler::reinit_heapbase() {
   if (UseCompressedOops || UseCompressedKlassPointers) {
-    AddressLiteral base(Universe::narrow_ptrs_base_addr());
-    load_ptr_contents(base, G6_heapbase);
+    if (Universe::heap() != NULL) {
+      set((intptr_t)Universe::narrow_ptrs_base(), G6_heapbase);
+    } else {
+      AddressLiteral base(Universe::narrow_ptrs_base_addr());
+      load_ptr_contents(base, G6_heapbase);
+    }
   }
 }
 
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1177,6 +1177,9 @@
   void push_CPU_state();
   void pop_CPU_state();
 
+  // Returns the byte size of the instructions generated by decode_klass_not_null().
+  static int instr_size_for_decode_klass_not_null();
+
   // if heap base register is used - reinit it with the correct value
   void reinit_heapbase();
 
--- a/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -97,7 +97,7 @@
     guarantee(Assembler::inv_op2(inst)==Assembler::sethi_op2, "must be sethi");
     if (format() != 0) {
       assert(type() == relocInfo::oop_type || type() == relocInfo::metadata_type, "only narrow oops or klasses case");
-      jint np = type() == relocInfo::oop_type ? oopDesc::encode_heap_oop((oop)x) : oopDesc::encode_klass((Klass*)x);
+      jint np = type() == relocInfo::oop_type ? oopDesc::encode_heap_oop((oop)x) : Klass::encode_klass((Klass*)x);
       inst &= ~Assembler::hi22(-1);
       inst |=  Assembler::hi22((intptr_t)np);
       if (verify_only) {
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Wed Jul 05 19:08:56 2017 +0200
@@ -559,10 +559,7 @@
     int klass_load_size;
     if (UseCompressedKlassPointers) {
       assert(Universe::heap() != NULL, "java heap should be initialized");
-      if (Universe::narrow_klass_base() == NULL)
-        klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
-      else
-        klass_load_size = 3*BytesPerInstWord;
+      klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord;
     } else {
       klass_load_size = 1*BytesPerInstWord;
     }
@@ -1663,9 +1660,12 @@
   if (UseCompressedKlassPointers) {
     assert(Universe::heap() != NULL, "java heap should be initialized");
     st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
-    st->print_cr("\tSLL    R_G5,3,R_G5");
-    if (Universe::narrow_klass_base() != NULL)
-      st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
+    st->print_cr("\tSET    Universe::narrow_klass_base,R_G6_heap_base");
+    if (Universe::narrow_klass_shift() != 0) {
+      st->print_cr("\tSLL    R_G5,3,R_G5");
+    }
+    st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
+    st->print_cr("\tSET    Universe::narrow_ptrs_base,R_G6_heap_base");
   } else {
     st->print_cr("\tLDX    [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
   }
@@ -2563,10 +2563,7 @@
       int klass_load_size;
       if (UseCompressedKlassPointers) {
         assert(Universe::heap() != NULL, "java heap should be initialized");
-        if (Universe::narrow_klass_base() == NULL)
-          klass_load_size = 2*BytesPerInstWord;
-        else
-          klass_load_size = 3*BytesPerInstWord;
+        klass_load_size = MacroAssembler::instr_size_for_decode_klass_not_null() + 1*BytesPerInstWord;
       } else {
         klass_load_size = 1*BytesPerInstWord;
       }
--- a/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -219,13 +219,13 @@
       const int basic = 5*BytesPerInstWord +
                         // shift;add for load_klass (only shift with zero heap based)
                         (UseCompressedKlassPointers ?
-                         ((Universe::narrow_klass_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
+                          MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
       return basic + slop;
     } else {
       const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
                         // shift;add for load_klass (only shift with zero heap based)
                         (UseCompressedKlassPointers ?
-                         ((Universe::narrow_klass_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
+                          MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
       return (basic + slop);
     }
   }
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -30,6 +30,7 @@
 #include "interpreter/interpreter.hpp"
 #include "memory/cardTableModRefBS.hpp"
 #include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/interfaceSupport.hpp"
@@ -4810,23 +4811,8 @@
 }
 
 void MacroAssembler::load_prototype_header(Register dst, Register src) {
-#ifdef _LP64
-  if (UseCompressedKlassPointers) {
-    assert (Universe::heap() != NULL, "java heap should be initialized");
-    movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    if (Universe::narrow_klass_shift() != 0) {
-      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
-      movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset()));
-    } else {
-      movq(dst, Address(dst, Klass::prototype_header_offset()));
-    }
-  } else
-#endif
-  {
-    movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
-    movptr(dst, Address(dst, Klass::prototype_header_offset()));
-  }
+  load_klass(dst, src);
+  movptr(dst, Address(dst, Klass::prototype_header_offset()));
 }
 
 void MacroAssembler::store_klass(Register dst, Register src) {
@@ -4914,7 +4900,7 @@
 
 #ifdef ASSERT
 void MacroAssembler::verify_heapbase(const char* msg) {
-  assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed");
+  assert (UseCompressedOops, "should be compressed");
   assert (Universe::heap() != NULL, "java heap should be initialized");
   if (CheckCompressedOops) {
     Label ok;
@@ -5058,69 +5044,80 @@
 }
 
 void MacroAssembler::encode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?");
-#endif
-  if (Universe::narrow_klass_base() != NULL) {
-    subq(r, r12_heapbase);
-  }
+  assert(Universe::narrow_klass_base() != NULL, "Base should be initialized");
+  // Use r12 as a scratch register in which to temporarily load the narrow_klass_base.
+  assert(r != r12_heapbase, "Encoding a klass in r12");
+  mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base());
+  subq(r, r12_heapbase);
   if (Universe::narrow_klass_shift() != 0) {
     assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
     shrq(r, LogKlassAlignmentInBytes);
   }
+  reinit_heapbase();
 }
 
 void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-#ifdef ASSERT
-  verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?");
-#endif
-  if (dst != src) {
-    movq(dst, src);
-  }
-  if (Universe::narrow_klass_base() != NULL) {
-    subq(dst, r12_heapbase);
-  }
-  if (Universe::narrow_klass_shift() != 0) {
-    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    shrq(dst, LogKlassAlignmentInBytes);
-  }
-}
-
+  if (dst == src) {
+    encode_klass_not_null(src);
+  } else {
+    mov64(dst, (int64_t)Universe::narrow_klass_base());
+    negq(dst);
+    addq(dst, src);
+    if (Universe::narrow_klass_shift() != 0) {
+      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      shrq(dst, LogKlassAlignmentInBytes);
+    }
+  }
+}
+
+// Function instr_size_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null(register r) and reinit_heapbase(),
+// when (Universe::heap() != NULL).  Hence, if the instructions they
+// generate change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+  assert (UseCompressedKlassPointers, "only for compressed klass ptrs");
+  // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
+  return (Universe::narrow_klass_shift() == 0 ? 20 : 24);
+}
+
+// !!! If the instructions that get generated here change then function
+// instr_size_for_decode_klass_not_null() needs to get updated.
 void  MacroAssembler::decode_klass_not_null(Register r) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
   // Note: it will change flags
+  assert(Universe::narrow_klass_base() != NULL, "Base should be initialized");
   assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  assert(r != r12_heapbase, "Decoding a klass in r12");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
   // vtableStubs also counts instructions in pd_code_size_limit.
   // Also do not verify_oop as this is called by verify_oop.
   if (Universe::narrow_klass_shift() != 0) {
     assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
     shlq(r, LogKlassAlignmentInBytes);
-    if (Universe::narrow_klass_base() != NULL) {
-      addq(r, r12_heapbase);
-    }
+  }
+  // Use r12 as a scratch register in which to temporarily load the narrow_klass_base.
+  mov64(r12_heapbase, (int64_t)Universe::narrow_klass_base());
+  addq(r, r12_heapbase);
+  reinit_heapbase();
+}
+
+void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+  // Note: it will change flags
+  assert(Universe::narrow_klass_base() != NULL, "Base should be initialized");
+  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
+  if (dst == src) {
+    decode_klass_not_null(dst);
   } else {
-    assert (Universe::narrow_klass_base() == NULL, "sanity");
-  }
-}
-
-void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
-  assert(Metaspace::is_initialized(), "metaspace should be initialized");
-  // Note: it will change flags
-  assert (UseCompressedKlassPointers, "should only be used for compressed headers");
-  // Cannot assert, unverified entry point counts instructions (see .ad file)
-  // vtableStubs also counts instructions in pd_code_size_limit.
-  // Also do not verify_oop as this is called by verify_oop.
-  if (Universe::narrow_klass_shift() != 0) {
-    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
-    assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
-    leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
-  } else {
-    assert (Universe::narrow_klass_base() == NULL, "sanity");
-    if (dst != src) {
-      movq(dst, src);
+    // Cannot assert, unverified entry point counts instructions (see .ad file)
+    // vtableStubs also counts instructions in pd_code_size_limit.
+    // Also do not verify_oop as this is called by verify_oop.
+
+    mov64(dst, (int64_t)Universe::narrow_klass_base());
+    if (Universe::narrow_klass_shift() != 0) {
+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+      leaq(dst, Address(dst, src, Address::times_8, 0));
+    } else {
+      addq(dst, src);
     }
   }
 }
@@ -5148,7 +5145,7 @@
   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int klass_index = oop_recorder()->find_index(k);
   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  mov_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }
 
 void  MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
@@ -5156,7 +5153,7 @@
   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int klass_index = oop_recorder()->find_index(k);
   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  mov_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }
 
 void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
@@ -5182,7 +5179,7 @@
   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int klass_index = oop_recorder()->find_index(k);
   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }
 
 void  MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) {
@@ -5190,14 +5187,23 @@
   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
   int klass_index = oop_recorder()->find_index(k);
   RelocationHolder rspec = metadata_Relocation::spec(klass_index);
-  Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec);
+  Assembler::cmp_narrow_oop(dst, Klass::encode_klass(k), rspec);
 }
 
 void MacroAssembler::reinit_heapbase() {
   if (UseCompressedOops || UseCompressedKlassPointers) {
-    movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
-  }
-}
+    if (Universe::heap() != NULL) {
+      if (Universe::narrow_oop_base() == NULL) {
+        MacroAssembler::xorptr(r12_heapbase, r12_heapbase);
+      } else {
+        mov64(r12_heapbase, (int64_t)Universe::narrow_ptrs_base());
+      }
+    } else {
+      movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+    }
+  }
+}
+
 #endif // _LP64
 
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -371,6 +371,10 @@
   void cmp_narrow_klass(Register dst, Klass* k);
   void cmp_narrow_klass(Address dst, Klass* k);
 
+  // Returns the byte size of the instructions generated by decode_klass_not_null()
+  // when compressed klass pointers are being used.
+  static int instr_size_for_decode_klass_not_null();
+
   // if heap base register is used - reinit it with the correct value
   void reinit_heapbase();
 
--- a/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,9 +55,9 @@
     }
   } else {
       if (verify_only) {
-        assert(*(uint32_t*) disp == oopDesc::encode_klass((Klass*)x), "instructions must match");
+        assert(*(uint32_t*) disp == Klass::encode_klass((Klass*)x), "instructions must match");
       } else {
-        *(int32_t*) disp = oopDesc::encode_klass((Klass*)x);
+        *(int32_t*) disp = Klass::encode_klass((Klass*)x);
       }
     }
   } else {
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -675,7 +675,6 @@
     __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass
     __ testptr(rax, rax);
     __ jcc(Assembler::zero, error);              // if klass is NULL it is broken
-    // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers
 
     // return if everything seems ok
     __ bind(exit);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1021,7 +1021,6 @@
     __ load_klass(rax, rax);  // get klass
     __ testptr(rax, rax);
     __ jcc(Assembler::zero, error); // if klass is NULL it is broken
-    // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers
 
     // return if everything seems ok
     __ bind(exit);
--- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -849,9 +849,9 @@
     address entry = __ pc();
 
     // rbx,: Method*
-    // rsi: senderSP must preserved for slow path, set SP to it on fast path
-    // rdx: scratch
-    // rdi: scratch
+    // r13: senderSP must preserved for slow path, set SP to it on fast path
+    // c_rarg0: scratch (rdi on non-Win64, rcx on Win64)
+    // c_rarg1: scratch (rsi on non-Win64, rdx on Win64)
 
     Label slow_path;
     // If we need a safepoint check, generate full interpreter entry.
@@ -865,8 +865,8 @@
 
     // Load parameters
     const Register crc = rax;  // crc
-    const Register val = rdx;  // source java byte value
-    const Register tbl = rdi;  // scratch
+    const Register val = c_rarg0;  // source java byte value
+    const Register tbl = c_rarg1;  // scratch
 
     // Arguments are reversed on java expression stack
     __ movl(val, Address(rsp,   wordSize)); // byte value
@@ -880,7 +880,7 @@
 
     // _areturn
     __ pop(rdi);                // get return address
-    __ mov(rsp, rsi);           // set sp to sender sp
+    __ mov(rsp, r13);           // set sp to sender sp
     __ jmp(rdi);
 
     // generate a vanilla native entry as the slow path
@@ -919,20 +919,24 @@
     const Register crc = c_rarg0;  // crc
     const Register buf = c_rarg1;  // source java byte array address
     const Register len = c_rarg2;  // length
+    const Register off = len;      // offset (never overlaps with 'len')
 
     // Arguments are reversed on java expression stack
-    __ movl(len,   Address(rsp,   wordSize)); // Length
     // Calculate address of start element
     if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
       __ movptr(buf, Address(rsp, 3*wordSize)); // long buf
-      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
+      __ addq(buf, off); // + offset
       __ movl(crc,   Address(rsp, 5*wordSize)); // Initial CRC
     } else {
       __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array
       __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
-      __ addptr(buf, Address(rsp, 2*wordSize)); // + offset
+      __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset
+      __ addq(buf, off); // + offset
       __ movl(crc,   Address(rsp, 4*wordSize)); // Initial CRC
     }
+    // Can now load 'len' since we're finished with 'off'
+    __ movl(len, Address(rsp, wordSize)); // Length
 
     __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len);
     // result in rax
--- a/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -211,11 +211,11 @@
   if (is_vtable_stub) {
     // Vtable stub size
     return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) +
-           (UseCompressedKlassPointers ? 16 : 0);  // 1 leaq can be 3 bytes + 1 long
+           (UseCompressedKlassPointers ?  MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
   } else {
     // Itable stub size
     return (DebugVtables ? 512 : 74) + (CountCompiledCalls ? 13 : 0) +
-           (UseCompressedKlassPointers ? 32 : 0);  // 2 leaqs
+           (UseCompressedKlassPointers ?  MacroAssembler::instr_size_for_decode_klass_not_null() : 0);
   }
   // In order to tune these parameters, run the JVM with VM options
   // +PrintMiscellaneous and +WizardMode to see information about
--- a/hotspot/src/cpu/x86/vm/x86_64.ad	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/x86/vm/x86_64.ad	Wed Jul 05 19:08:56 2017 +0200
@@ -1393,9 +1393,7 @@
 {
   if (UseCompressedKlassPointers) {
     st->print_cr("movl    rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
-    if (Universe::narrow_klass_shift() != 0) {
-      st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
-    }
+    st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
     st->print_cr("\tcmpq    rax, rscratch1\t # Inline cache check");
   } else {
     st->print_cr("\tcmpq    rax, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
@@ -4035,146 +4033,6 @@
   %}
 %}
 
-operand indirectNarrowKlass(rRegN reg)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(DecodeNKlass reg);
-
-  format %{ "[$reg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-operand indOffset8NarrowKlass(rRegN reg, immL8 off)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) off);
-
-  format %{ "[$reg + $off (8-bit)]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indOffset32NarrowKlass(rRegN reg, immL32 off)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) off);
-
-  format %{ "[$reg + $off (32-bit)]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index(0x4);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indIndexOffsetNarrowKlass(rRegN reg, rRegL lreg, immL32 off)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (AddP (DecodeNKlass reg) lreg) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $lreg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale(0x0);
-    disp($off);
-  %}
-%}
-
-operand indIndexNarrowKlass(rRegN reg, rRegL lreg)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) lreg);
-
-  op_cost(10);
-  format %{"[$reg + $lreg]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale(0x0);
-    disp(0x0);
-  %}
-%}
-
-operand indIndexScaleNarrowKlass(rRegN reg, rRegL lreg, immI2 scale)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) (LShiftL lreg scale));
-
-  op_cost(10);
-  format %{"[$reg + $lreg << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale($scale);
-    disp(0x0);
-  %}
-%}
-
-operand indIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
-%{
-  predicate(Universe::narrow_klass_shift() == 0);
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (AddP (DecodeNKlass reg) (LShiftL lreg scale)) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $lreg << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($lreg);
-    scale($scale);
-    disp($off);
-  %}
-%}
-
-operand indCompressedKlassOffset(rRegN reg, immL32 off) %{
-  predicate(UseCompressedKlassPointers && (Universe::narrow_klass_shift() == Address::times_8));
-  constraint(ALLOC_IN_RC(ptr_reg));
-  match(AddP (DecodeNKlass reg) off);
-
-  op_cost(10);
-  format %{"[R12 + $reg << 3 + $off] (compressed klass addressing)" %}
-  interface(MEMORY_INTER) %{
-    base(0xc); // R12
-    index($reg);
-    scale(0x3);
-    disp($off);
-  %}
-%}
-
-operand indPosIndexScaleOffsetNarrowKlass(rRegN reg, immL32 off, rRegI idx, immI2 scale)
-%{
-  constraint(ALLOC_IN_RC(ptr_reg));
-  predicate(Universe::narrow_klass_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
-  match(AddP (AddP (DecodeNKlass reg) (LShiftL (ConvI2L idx) scale)) off);
-
-  op_cost(10);
-  format %{"[$reg + $off + $idx << $scale]" %}
-  interface(MEMORY_INTER) %{
-    base($reg);
-    index($idx);
-    scale($scale);
-    disp($off);
-  %}
-%}
-
 //----------Special Memory Operands--------------------------------------------
 // Stack Slot Operand - This operand is used for loading and storing temporary
 //                      values on the stack where a match requires a value to
@@ -4345,11 +4203,7 @@
                indCompressedOopOffset,
                indirectNarrow, indOffset8Narrow, indOffset32Narrow,
                indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
-               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow,
-               indCompressedKlassOffset,
-               indirectNarrowKlass, indOffset8NarrowKlass, indOffset32NarrowKlass,
-               indIndexOffsetNarrowKlass, indIndexNarrowKlass, indIndexScaleNarrowKlass,
-               indIndexScaleOffsetNarrowKlass, indPosIndexScaleOffsetNarrowKlass);
+               indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
 
 //----------PIPELINE-----------------------------------------------------------
 // Rules which define the behavior of the target architectures pipeline.
@@ -6665,7 +6519,7 @@
 instruct encodeKlass_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
   match(Set dst (EncodePKlass src));
   effect(KILL cr);
-  format %{ "encode_heap_oop_not_null $dst,$src" %}
+  format %{ "encode_klass_not_null $dst,$src" %}
   ins_encode %{
     __ encode_klass_not_null($dst$$Register, $src$$Register);
   %}
@@ -6675,7 +6529,7 @@
 instruct decodeKlass_not_null(rRegP dst, rRegN src, rFlagsReg cr) %{
   match(Set dst (DecodeNKlass src));
   effect(KILL cr);
-  format %{ "decode_heap_oop_not_null $dst,$src" %}
+  format %{ "decode_klass_not_null $dst,$src" %}
   ins_encode %{
     Register s = $src$$Register;
     Register d = $dst$$Register;
--- a/hotspot/src/cpu/zero/vm/assembler_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/assembler_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -50,6 +50,7 @@
 #ifdef ASSERT
 bool AbstractAssembler::pd_check_instruction_mark() {
   ShouldNotCallThis();
+  return false;
 }
 #endif
 
@@ -73,6 +74,7 @@
 RegisterOrConstant MacroAssembler::delayed_value_impl(
   intptr_t* delayed_value_addr, Register tmpl, int offset) {
   ShouldNotCallThis();
+  return RegisterOrConstant();
 }
 
 void MacroAssembler::store_oop(jobject obj) {
--- a/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1008,6 +1008,7 @@
 
 address CppInterpreter::return_entry(TosState state, int length) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 address CppInterpreter::deopt_entry(TosState state, int length) {
--- a/hotspot/src/cpu/zero/vm/frame_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/frame_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -116,6 +116,7 @@
 
 bool frame::safe_for_sender(JavaThread *thread) {
   ShouldNotCallThis();
+  return false;
 }
 
 void frame::pd_gc_epilog() {
@@ -123,6 +124,7 @@
 
 bool frame::is_interpreted_frame_valid(JavaThread *thread) const {
   ShouldNotCallThis();
+  return false;
 }
 
 BasicType frame::interpreter_frame_result(oop* oop_result,
@@ -184,9 +186,8 @@
 int frame::frame_size(RegisterMap* map) const {
 #ifdef PRODUCT
   ShouldNotCallThis();
-#else
+#endif // PRODUCT
   return 0; // make javaVFrame::print_value work
-#endif // PRODUCT
 }
 
 intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
--- a/hotspot/src/cpu/zero/vm/frame_zero.inline.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/frame_zero.inline.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -36,7 +36,7 @@
   _deopt_state = unknown;
 }
 
-inline address  frame::sender_pc()           const { ShouldNotCallThis();  }
+inline address  frame::sender_pc()           const { ShouldNotCallThis(); return NULL; }
 
 inline frame::frame(ZeroFrame* zf, intptr_t* sp) {
   _zeroframe = zf;
@@ -89,6 +89,7 @@
 
 inline intptr_t* frame::link() const {
   ShouldNotCallThis();
+  return NULL;
 }
 
 #ifdef CC_INTERP
@@ -151,14 +152,17 @@
 
 inline oop frame::saved_oop_result(RegisterMap* map) const {
   ShouldNotCallThis();
+  return NULL;
 }
 
 inline bool frame::is_older(intptr_t* id) const {
   ShouldNotCallThis();
+  return false;
 }
 
 inline intptr_t* frame::entry_frame_argument_at(int offset) const {
   ShouldNotCallThis();
+  return NULL;
 }
 
 inline intptr_t* frame::unextended_sp() const {
--- a/hotspot/src/cpu/zero/vm/icBuffer_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/icBuffer_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -49,8 +49,10 @@
 address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
   // NB ic_stub_code_size() must return the size of the code we generate
   ShouldNotCallThis();
+  return NULL;
 }
 
 void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
   ShouldNotCallThis();
+  return NULL;
 }
--- a/hotspot/src/cpu/zero/vm/interp_masm_zero.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/interp_masm_zero.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -40,6 +40,7 @@
                                         Register  tmp,
                                         int       offset) {
     ShouldNotCallThis();
+    return RegisterOrConstant();
   }
 };
 
--- a/hotspot/src/cpu/zero/vm/interpreter_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/interpreter_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -64,6 +64,7 @@
     return NULL;
 
   Unimplemented();
+  return NULL;
 }
 
 address InterpreterGenerator::generate_abstract_entry() {
--- a/hotspot/src/cpu/zero/vm/nativeInst_zero.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/nativeInst_zero.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -51,15 +51,18 @@
  public:
   bool is_jump() {
     ShouldNotCallThis();
+    return false;
   }
 
   bool is_safepoint_poll() {
     ShouldNotCallThis();
+    return false;
   }
 };
 
 inline NativeInstruction* nativeInstruction_at(address address) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 class NativeCall : public NativeInstruction {
@@ -70,18 +73,22 @@
 
   address instruction_address() const {
     ShouldNotCallThis();
+    return NULL;
   }
 
   address next_instruction_address() const {
     ShouldNotCallThis();
+    return NULL;
   }
 
   address return_address() const {
     ShouldNotCallThis();
+    return NULL;
   }
 
   address destination() const {
     ShouldNotCallThis();
+    return NULL;
   }
 
   void set_destination_mt_safe(address dest) {
@@ -98,25 +105,30 @@
 
   static bool is_call_before(address return_address) {
     ShouldNotCallThis();
+    return false;
   }
 };
 
 inline NativeCall* nativeCall_before(address return_address) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 inline NativeCall* nativeCall_at(address address) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 class NativeMovConstReg : public NativeInstruction {
  public:
   address next_instruction_address() const {
     ShouldNotCallThis();
+    return NULL;
   }
 
   intptr_t data() const {
     ShouldNotCallThis();
+    return 0;
   }
 
   void set_data(intptr_t x) {
@@ -126,12 +138,14 @@
 
 inline NativeMovConstReg* nativeMovConstReg_at(address address) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 class NativeMovRegMem : public NativeInstruction {
  public:
   int offset() const {
     ShouldNotCallThis();
+    return 0;
   }
 
   void set_offset(intptr_t x) {
@@ -145,6 +159,7 @@
 
 inline NativeMovRegMem* nativeMovRegMem_at(address address) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 class NativeJump : public NativeInstruction {
@@ -155,6 +170,7 @@
 
   address jump_destination() const {
     ShouldNotCallThis();
+    return NULL;
   }
 
   void set_jump_destination(address dest) {
@@ -172,12 +188,14 @@
 
 inline NativeJump* nativeJump_at(address address) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 class NativeGeneralJump : public NativeInstruction {
  public:
   address jump_destination() const {
     ShouldNotCallThis();
+    return NULL;
   }
 
   static void insert_unconditional(address code_pos, address entry) {
@@ -191,6 +209,7 @@
 
 inline NativeGeneralJump* nativeGeneralJump_at(address address) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 #endif // CPU_ZERO_VM_NATIVEINST_ZERO_HPP
--- a/hotspot/src/cpu/zero/vm/register_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/register_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -32,8 +32,10 @@
 
 const char* RegisterImpl::name() const {
   ShouldNotCallThis();
+  return NULL;
 }
 
 const char* FloatRegisterImpl::name() const {
   ShouldNotCallThis();
+  return NULL;
 }
--- a/hotspot/src/cpu/zero/vm/relocInfo_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/relocInfo_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -37,6 +37,7 @@
 
 address Relocation::pd_call_destination(address orig_addr) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 void Relocation::pd_set_call_destination(address x) {
@@ -45,6 +46,7 @@
 
 address Relocation::pd_get_address_from_code() {
   ShouldNotCallThis();
+  return NULL;
 }
 
 address* Relocation::pd_address_in_code() {
--- a/hotspot/src/cpu/zero/vm/sharedRuntime_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/sharedRuntime_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -89,6 +89,7 @@
                                                             ret_type);
 #else
   ShouldNotCallThis();
+  return NULL;
 #endif // SHARK
 }
 
@@ -99,6 +100,7 @@
 
 uint SharedRuntime::out_preserve_stack_slots() {
   ShouldNotCallThis();
+  return 0;
 }
 
 JRT_LEAF(void, zero_stub())
@@ -135,4 +137,5 @@
                                          VMRegPair *regs,
                                          int total_args_passed) {
   ShouldNotCallThis();
+  return 0;
 }
--- a/hotspot/src/cpu/zero/vm/vtableStubs_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/cpu/zero/vm/vtableStubs_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -39,16 +39,20 @@
 
 VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
   ShouldNotCallThis();
+  return 0;
 }
 
 int VtableStub::pd_code_alignment() {
   ShouldNotCallThis();
+  return 0;
 }
--- a/hotspot/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -190,7 +190,7 @@
 inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store_fence((volatile jlong*)p,  (jlong)v);  }
 
 inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { *p = v; fence(); }
-inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jdouble_cast(v)); }
+inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); }
 
 inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) {
 #ifdef AMD64
--- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -715,6 +715,7 @@
   err.report_and_die();
 
   ShouldNotReachHere();
+  return false;
 }
 
 // From solaris_i486.s ported to bsd_i486.s
--- a/hotspot/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/os_cpu/bsd_zero/vm/os_bsd_zero.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -66,6 +66,7 @@
 
 frame os::get_sender_for_C_frame(frame* fr) {
   ShouldNotCallThis();
+  return frame();
 }
 
 frame os::current_frame() {
@@ -103,16 +104,19 @@
 
 address os::Bsd::ucontext_get_pc(ucontext_t* uc) {
   ShouldNotCallThis();
+  return NULL;
 }
 
 ExtendedPC os::fetch_frame_from_context(void* ucVoid,
                                         intptr_t** ret_sp,
                                         intptr_t** ret_fp) {
   ShouldNotCallThis();
+  return ExtendedPC();
 }
 
 frame os::fetch_frame_from_context(void* ucVoid) {
   ShouldNotCallThis();
+  return frame();
 }
 
 extern "C" JNIEXPORT int
@@ -240,6 +244,7 @@
 
   sprintf(buf, fmt, sig, info->si_addr);
   fatal(buf);
+  return false;
 }
 
 void os::Bsd::init_thread_fpu_state(void) {
@@ -373,17 +378,7 @@
 
 extern "C" {
   int SpinPause() {
-  }
-
-  int SafeFetch32(int *adr, int errValue) {
-    int value = errValue;
-    value = *adr;
-    return value;
-  }
-  intptr_t SafeFetchN(intptr_t *adr, intptr_t errValue) {
-    intptr_t value = errValue;
-    value = *adr;
-    return value;
+    return 1;
   }
 
   void _Copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
--- a/hotspot/src/os_cpu/bsd_zero/vm/thread_bsd_zero.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/os_cpu/bsd_zero/vm/thread_bsd_zero.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -110,6 +110,7 @@
                                            void* ucontext,
                                            bool isInJava) {
     ShouldNotCallThis();
+    return false;
   }
 
   // These routines are only used on cpu architectures that
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -915,16 +915,6 @@
     // Return to the now deoptimized frame.
   }
 
-  // If we are patching in a non-perm oop, make sure the nmethod
-  // is on the right list.
-  if (ScavengeRootsInCode && mirror.not_null() && mirror()->is_scavengable()) {
-    MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
-    guarantee(nm != NULL, "only nmethods can contain non-perm oops");
-    if (!nm->on_scavenge_root_list())
-      CodeCache::add_scavenge_root_nmethod(nm);
-  }
-
   // Now copy code back
 
   {
@@ -1125,6 +1115,21 @@
       }
     }
   }
+
+  // If we are patching in a non-perm oop, make sure the nmethod
+  // is on the right list.
+  if (ScavengeRootsInCode && mirror.not_null() && mirror()->is_scavengable()) {
+    MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag);
+    nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
+    guarantee(nm != NULL, "only nmethods can contain non-perm oops");
+    if (!nm->on_scavenge_root_list()) {
+      CodeCache::add_scavenge_root_nmethod(nm);
+    }
+
+    // Since we've patched some oops in the nmethod,
+    // (re)register it with the heap.
+    Universe::heap()->register_nmethod(nm);
+  }
 JRT_END
 
 //
--- a/hotspot/src/share/vm/classfile/classFileParser.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/classfile/classFileParser.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -2590,7 +2590,7 @@
     valid_symbol_at(sourcefile_index),
     "Invalid SourceFile attribute at constant pool index %u in class file %s",
     sourcefile_index, CHECK);
-  set_class_sourcefile(_cp->symbol_at(sourcefile_index));
+  set_class_sourcefile_index(sourcefile_index);
 }
 
 
@@ -2728,7 +2728,7 @@
     valid_symbol_at(signature_index),
     "Invalid constant pool index %u in Signature attribute in class file %s",
     signature_index, CHECK);
-  set_class_generic_signature(_cp->symbol_at(signature_index));
+  set_class_generic_signature_index(signature_index);
 }
 
 void ClassFileParser::parse_classfile_bootstrap_methods_attribute(u4 attribute_byte_length, TRAPS) {
@@ -2975,13 +2975,11 @@
 void ClassFileParser::apply_parsed_class_attributes(instanceKlassHandle k) {
   if (_synthetic_flag)
     k->set_is_synthetic();
-  if (_sourcefile != NULL) {
-    _sourcefile->increment_refcount();
-    k->set_source_file_name(_sourcefile);
+  if (_sourcefile_index != 0) {
+    k->set_source_file_name_index(_sourcefile_index);
   }
-  if (_generic_signature != NULL) {
-    _generic_signature->increment_refcount();
-    k->set_generic_signature(_generic_signature);
+  if (_generic_signature_index != 0) {
+    k->set_generic_signature_index(_generic_signature_index);
   }
   if (_sde_buffer != NULL) {
     k->set_source_debug_extension(_sde_buffer, _sde_length);
--- a/hotspot/src/share/vm/classfile/classFileParser.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/classfile/classFileParser.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -62,8 +62,8 @@
   bool       _synthetic_flag;
   int        _sde_length;
   char*      _sde_buffer;
-  Symbol*    _sourcefile;
-  Symbol*    _generic_signature;
+  u2         _sourcefile_index;
+  u2         _generic_signature_index;
 
   // Metadata created before the instance klass is created.  Must be deallocated
   // if not transferred to the InstanceKlass upon successful class loading
@@ -81,16 +81,16 @@
   Array<AnnotationArray*>* _fields_type_annotations;
   InstanceKlass*   _klass;  // InstanceKlass once created.
 
-  void set_class_synthetic_flag(bool x)           { _synthetic_flag = x; }
-  void set_class_sourcefile(Symbol* x)            { _sourcefile = x; }
-  void set_class_generic_signature(Symbol* x)     { _generic_signature = x; }
-  void set_class_sde_buffer(char* x, int len)     { _sde_buffer = x; _sde_length = len; }
+  void set_class_synthetic_flag(bool x)        { _synthetic_flag = x; }
+  void set_class_sourcefile_index(u2 x)        { _sourcefile_index = x; }
+  void set_class_generic_signature_index(u2 x) { _generic_signature_index = x; }
+  void set_class_sde_buffer(char* x, int len)  { _sde_buffer = x; _sde_length = len; }
 
   void init_parsed_class_attributes(ClassLoaderData* loader_data) {
     _loader_data = loader_data;
     _synthetic_flag = false;
-    _sourcefile = NULL;
-    _generic_signature = NULL;
+    _sourcefile_index = 0;
+    _generic_signature_index = 0;
     _sde_buffer = NULL;
     _sde_length = 0;
     // initialize the other flags too:
--- a/hotspot/src/share/vm/code/nmethod.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/code/nmethod.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -687,6 +687,7 @@
     code_buffer->copy_values_to(this);
     if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
       CodeCache::add_scavenge_root_nmethod(this);
+      Universe::heap()->register_nmethod(this);
     }
     debug_only(verify_scavenge_root_oops());
     CodeCache::commit(this);
@@ -881,6 +882,7 @@
     dependencies->copy_to(this);
     if (ScavengeRootsInCode && detect_scavenge_root_oops()) {
       CodeCache::add_scavenge_root_nmethod(this);
+      Universe::heap()->register_nmethod(this);
     }
     debug_only(verify_scavenge_root_oops());
 
@@ -1300,6 +1302,13 @@
   methodHandle the_method(method());
   No_Safepoint_Verifier nsv;
 
+  // during patching, depending on the nmethod state we must notify the GC that
+  // code has been unloaded, unregistering it. We cannot do this right while
+  // holding the Patching_lock because we need to use the CodeCache_lock. This
+  // would be prone to deadlocks.
+  // This flag is used to remember whether we need to later lock and unregister.
+  bool nmethod_needs_unregister = false;
+
   {
     // invalidate osr nmethod before acquiring the patching lock since
     // they both acquire leaf locks and we don't want a deadlock.
@@ -1332,6 +1341,13 @@
       inc_decompile_count();
     }
 
+    // If the state is becoming a zombie, signal to unregister the nmethod with
+    // the heap.
+    // This nmethod may have already been unloaded during a full GC.
+    if ((state == zombie) && !is_unloaded()) {
+      nmethod_needs_unregister = true;
+    }
+
     // Change state
     _state = state;
 
@@ -1367,6 +1383,9 @@
       // safepoint can sneak in, otherwise the oops used by the
       // dependency logic could have become stale.
       MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+      if (nmethod_needs_unregister) {
+        Universe::heap()->unregister_nmethod(this);
+      }
       flush_dependencies(NULL);
     }
 
@@ -1817,21 +1836,10 @@
   if (_method != NULL) f(_method);
 }
 
-
-// This method is called twice during GC -- once while
-// tracing the "active" nmethods on thread stacks during
-// the (strong) marking phase, and then again when walking
-// the code cache contents during the weak roots processing
-// phase. The two uses are distinguished by means of the
-// 'do_strong_roots_only' flag, which is true in the first
-// case. We want to walk the weak roots in the nmethod
-// only in the second case. The weak roots in the nmethod
-// are the oops in the ExceptionCache and the InlineCache
-// oops.
-void nmethod::oops_do(OopClosure* f, bool do_strong_roots_only) {
+void nmethod::oops_do(OopClosure* f, bool allow_zombie) {
   // make sure the oops ready to receive visitors
-  assert(!is_zombie() && !is_unloaded(),
-         "should not call follow on zombie or unloaded nmethod");
+  assert(allow_zombie || !is_zombie(), "should not call follow on zombie nmethod");
+  assert(!is_unloaded(), "should not call follow on unloaded nmethod");
 
   // If the method is not entrant or zombie then a JMP is plastered over the
   // first few bytes.  If an oop in the old code was there, that oop
--- a/hotspot/src/share/vm/code/nmethod.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/code/nmethod.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -566,7 +566,7 @@
   void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map,
                                      OopClosure* f);
   void oops_do(OopClosure* f) { oops_do(f, false); }
-  void oops_do(OopClosure* f, bool do_strong_roots_only);
+  void oops_do(OopClosure* f, bool allow_zombie);
   bool detect_scavenge_root_oops();
   void verify_scavenge_root_oops() PRODUCT_RETURN;
 
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -5478,40 +5478,42 @@
   HandleMark   hm;
 
   SequentialSubTasksDone* pst = space->par_seq_tasks();
-  assert(pst->valid(), "Uninitialized use?");
 
   uint nth_task = 0;
   uint n_tasks  = pst->n_tasks();
 
-  HeapWord *start, *end;
-  while (!pst->is_task_claimed(/* reference */ nth_task)) {
-    // We claimed task # nth_task; compute its boundaries.
-    if (chunk_top == 0) {  // no samples were taken
-      assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task");
-      start = space->bottom();
-      end   = space->top();
-    } else if (nth_task == 0) {
-      start = space->bottom();
-      end   = chunk_array[nth_task];
-    } else if (nth_task < (uint)chunk_top) {
-      assert(nth_task >= 1, "Control point invariant");
-      start = chunk_array[nth_task - 1];
-      end   = chunk_array[nth_task];
-    } else {
-      assert(nth_task == (uint)chunk_top, "Control point invariant");
-      start = chunk_array[chunk_top - 1];
-      end   = space->top();
-    }
-    MemRegion mr(start, end);
-    // Verify that mr is in space
-    assert(mr.is_empty() || space->used_region().contains(mr),
-           "Should be in space");
-    // Verify that "start" is an object boundary
-    assert(mr.is_empty() || oop(mr.start())->is_oop(),
-           "Should be an oop");
-    space->par_oop_iterate(mr, cl);
-  }
-  pst->all_tasks_completed();
+  if (n_tasks > 0) {
+    assert(pst->valid(), "Uninitialized use?");
+    HeapWord *start, *end;
+    while (!pst->is_task_claimed(/* reference */ nth_task)) {
+      // We claimed task # nth_task; compute its boundaries.
+      if (chunk_top == 0) {  // no samples were taken
+        assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task");
+        start = space->bottom();
+        end   = space->top();
+      } else if (nth_task == 0) {
+        start = space->bottom();
+        end   = chunk_array[nth_task];
+      } else if (nth_task < (uint)chunk_top) {
+        assert(nth_task >= 1, "Control point invariant");
+        start = chunk_array[nth_task - 1];
+        end   = chunk_array[nth_task];
+      } else {
+        assert(nth_task == (uint)chunk_top, "Control point invariant");
+        start = chunk_array[chunk_top - 1];
+        end   = space->top();
+      }
+      MemRegion mr(start, end);
+      // Verify that mr is in space
+      assert(mr.is_empty() || space->used_region().contains(mr),
+             "Should be in space");
+      // Verify that "start" is an object boundary
+      assert(mr.is_empty() || oop(mr.start())->is_oop(),
+             "Should be an oop");
+      space->par_oop_iterate(mr, cl);
+    }
+    pst->all_tasks_completed();
+  }
 }
 
 void
@@ -5788,7 +5790,7 @@
   DefNewGeneration* dng = (DefNewGeneration*)_young_gen;
 
   // Eden space
-  {
+  if (!dng->eden()->is_empty()) {
     SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks();
     assert(!pst->valid(), "Clobbering existing data?");
     // Each valid entry in [0, _eden_chunk_index) represents a task.
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -4529,7 +4529,7 @@
     _total_prev_live_bytes(0), _total_next_live_bytes(0),
     _hum_used_bytes(0), _hum_capacity_bytes(0),
     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
-    _total_remset_bytes(0) {
+    _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   MemRegion g1_committed = g1h->g1_committed();
   MemRegion g1_reserved = g1h->g1_reserved();
@@ -4553,9 +4553,11 @@
                 G1PPRL_BYTE_H_FORMAT
                 G1PPRL_BYTE_H_FORMAT
                 G1PPRL_DOUBLE_H_FORMAT
+                G1PPRL_BYTE_H_FORMAT
                 G1PPRL_BYTE_H_FORMAT,
                 "type", "address-range",
-                "used", "prev-live", "next-live", "gc-eff", "remset");
+                "used", "prev-live", "next-live", "gc-eff",
+                "remset", "code-roots");
   _out->print_cr(G1PPRL_LINE_PREFIX
                 G1PPRL_TYPE_H_FORMAT
                 G1PPRL_ADDR_BASE_H_FORMAT
@@ -4563,9 +4565,11 @@
                 G1PPRL_BYTE_H_FORMAT
                 G1PPRL_BYTE_H_FORMAT
                 G1PPRL_DOUBLE_H_FORMAT
+                G1PPRL_BYTE_H_FORMAT
                 G1PPRL_BYTE_H_FORMAT,
                 "", "",
-                "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)", "(bytes)");
+                "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
+                "(bytes)", "(bytes)");
 }
 
 // It takes as a parameter a reference to one of the _hum_* fields, it
@@ -4608,6 +4612,8 @@
   size_t next_live_bytes = r->next_live_bytes();
   double gc_eff          = r->gc_efficiency();
   size_t remset_bytes    = r->rem_set()->mem_size();
+  size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
+
   if (r->used() == 0) {
     type = "FREE";
   } else if (r->is_survivor()) {
@@ -4642,6 +4648,7 @@
   _total_prev_live_bytes += prev_live_bytes;
   _total_next_live_bytes += next_live_bytes;
   _total_remset_bytes    += remset_bytes;
+  _total_strong_code_roots_bytes += strong_code_roots_bytes;
 
   // Print a line for this particular region.
   _out->print_cr(G1PPRL_LINE_PREFIX
@@ -4651,9 +4658,11 @@
                  G1PPRL_BYTE_FORMAT
                  G1PPRL_BYTE_FORMAT
                  G1PPRL_DOUBLE_FORMAT
+                 G1PPRL_BYTE_FORMAT
                  G1PPRL_BYTE_FORMAT,
                  type, bottom, end,
-                 used_bytes, prev_live_bytes, next_live_bytes, gc_eff , remset_bytes);
+                 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
+                 remset_bytes, strong_code_roots_bytes);
 
   return false;
 }
@@ -4669,7 +4678,8 @@
                  G1PPRL_SUM_MB_PERC_FORMAT("used")
                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
-                 G1PPRL_SUM_MB_FORMAT("remset"),
+                 G1PPRL_SUM_MB_FORMAT("remset")
+                 G1PPRL_SUM_MB_FORMAT("code-roots"),
                  bytes_to_mb(_total_capacity_bytes),
                  bytes_to_mb(_total_used_bytes),
                  perc(_total_used_bytes, _total_capacity_bytes),
@@ -4677,6 +4687,7 @@
                  perc(_total_prev_live_bytes, _total_capacity_bytes),
                  bytes_to_mb(_total_next_live_bytes),
                  perc(_total_next_live_bytes, _total_capacity_bytes),
-                 bytes_to_mb(_total_remset_bytes));
+                 bytes_to_mb(_total_remset_bytes),
+                 bytes_to_mb(_total_strong_code_roots_bytes));
   _out->cr();
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1257,6 +1257,9 @@
   // Accumulator for the remembered set size
   size_t _total_remset_bytes;
 
+  // Accumulator for strong code roots memory size
+  size_t _total_strong_code_roots_bytes;
+
   static double perc(size_t val, size_t total) {
     if (total == 0) {
       return 0.0;
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "code/codeCache.hpp"
 #include "code/icBuffer.hpp"
 #include "gc_implementation/g1/bufferingOopClosure.hpp"
 #include "gc_implementation/g1/concurrentG1Refine.hpp"
@@ -1176,20 +1177,27 @@
   ModRefBarrierSet* _mr_bs;
 public:
   PostMCRemSetClearClosure(G1CollectedHeap* g1h, ModRefBarrierSet* mr_bs) :
-    _g1h(g1h), _mr_bs(mr_bs) { }
+    _g1h(g1h), _mr_bs(mr_bs) {}
+
   bool doHeapRegion(HeapRegion* r) {
+    HeapRegionRemSet* hrrs = r->rem_set();
+
     if (r->continuesHumongous()) {
+      // We'll assert that the strong code root list and RSet is empty
+      assert(hrrs->strong_code_roots_list_length() == 0, "sanity");
+      assert(hrrs->occupied() == 0, "RSet should be empty");
       return false;
     }
+
     _g1h->reset_gc_time_stamps(r);
-    HeapRegionRemSet* hrrs = r->rem_set();
-    if (hrrs != NULL) hrrs->clear();
+    hrrs->clear();
     // You might think here that we could clear just the cards
     // corresponding to the used region.  But no: if we leave a dirty card
     // in a region we might allocate into, then it would prevent that card
     // from being enqueued, and cause it to be missed.
     // Re: the performance cost: we shouldn't be doing full GC anyway!
     _mr_bs->clear(MemRegion(r->bottom(), r->end()));
+
     return false;
   }
 };
@@ -1269,30 +1277,6 @@
   heap_region_iterate(&cl);
 }
 
-double G1CollectedHeap::verify(bool guard, const char* msg) {
-  double verify_time_ms = 0.0;
-
-  if (guard && total_collections() >= VerifyGCStartAt) {
-    double verify_start = os::elapsedTime();
-    HandleMark hm;  // Discard invalid handles created during verification
-    prepare_for_verify();
-    Universe::verify(VerifyOption_G1UsePrevMarking, msg);
-    verify_time_ms = (os::elapsedTime() - verify_start) * 1000;
-  }
-
-  return verify_time_ms;
-}
-
-void G1CollectedHeap::verify_before_gc() {
-  double verify_time_ms = verify(VerifyBeforeGC, " VerifyBeforeGC:");
-  g1_policy()->phase_times()->record_verify_before_time_ms(verify_time_ms);
-}
-
-void G1CollectedHeap::verify_after_gc() {
-  double verify_time_ms = verify(VerifyAfterGC, " VerifyAfterGC:");
-  g1_policy()->phase_times()->record_verify_after_time_ms(verify_time_ms);
-}
-
 bool G1CollectedHeap::do_collection(bool explicit_gc,
                                     bool clear_all_soft_refs,
                                     size_t word_size) {
@@ -1433,7 +1417,7 @@
 
       // Delete metaspaces for unloaded class loaders and clean up loader_data graph
       ClassLoaderDataGraph::purge();
-    MetaspaceAux::verify_metrics();
+      MetaspaceAux::verify_metrics();
 
       // Note: since we've just done a full GC, concurrent
       // marking is no longer active. Therefore we need not
@@ -1504,6 +1488,9 @@
         heap_region_iterate(&rebuild_rs);
       }
 
+      // Rebuild the strong code root lists for each region
+      rebuild_strong_code_roots();
+
       if (true) { // FIXME
         MetaspaceGC::compute_new_size();
       }
@@ -3109,6 +3096,145 @@
   return NULL; // keep some compilers happy
 }
 
+// TODO: VerifyRootsClosure extends OopsInGenClosure so that we can
+//       pass it as the perm_blk to SharedHeap::process_strong_roots.
+//       When process_strong_roots stop calling perm_blk->younger_refs_iterate
+//       we can change this closure to extend the simpler OopClosure.
+class VerifyRootsClosure: public OopsInGenClosure {
+private:
+  G1CollectedHeap* _g1h;
+  VerifyOption     _vo;
+  bool             _failures;
+public:
+  // _vo == UsePrevMarking -> use "prev" marking information,
+  // _vo == UseNextMarking -> use "next" marking information,
+  // _vo == UseMarkWord    -> use mark word from object header.
+  VerifyRootsClosure(VerifyOption vo) :
+    _g1h(G1CollectedHeap::heap()),
+    _vo(vo),
+    _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  template <class T> void do_oop_nv(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      if (_g1h->is_obj_dead_cond(obj, _vo)) {
+        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
+                              "points to dead obj "PTR_FORMAT, p, (void*) obj);
+        if (_vo == VerifyOption_G1UseMarkWord) {
+          gclog_or_tty->print_cr("  Mark word: "PTR_FORMAT, (void*)(obj->mark()));
+        }
+        obj->print_on(gclog_or_tty);
+        _failures = true;
+      }
+    }
+  }
+
+  void do_oop(oop* p)       { do_oop_nv(p); }
+  void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+class G1VerifyCodeRootOopClosure: public OopsInGenClosure {
+  G1CollectedHeap* _g1h;
+  OopClosure* _root_cl;
+  nmethod* _nm;
+  VerifyOption _vo;
+  bool _failures;
+
+  template <class T> void do_oop_work(T* p) {
+    // First verify that this root is live
+    _root_cl->do_oop(p);
+
+    if (!G1VerifyHeapRegionCodeRoots) {
+      // We're not verifying the code roots attached to heap region.
+      return;
+    }
+
+    // Don't check the code roots during marking verification in a full GC
+    if (_vo == VerifyOption_G1UseMarkWord) {
+      return;
+    }
+
+    // Now verify that the current nmethod (which contains p) is
+    // in the code root list of the heap region containing the
+    // object referenced by p.
+
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+
+      // Now fetch the region containing the object
+      HeapRegion* hr = _g1h->heap_region_containing(obj);
+      HeapRegionRemSet* hrrs = hr->rem_set();
+      // Verify that the strong code root list for this region
+      // contains the nmethod
+      if (!hrrs->strong_code_roots_list_contains(_nm)) {
+        gclog_or_tty->print_cr("Code root location "PTR_FORMAT" "
+                              "from nmethod "PTR_FORMAT" not in strong "
+                              "code roots for region ["PTR_FORMAT","PTR_FORMAT")",
+                              p, _nm, hr->bottom(), hr->end());
+        _failures = true;
+      }
+    }
+  }
+
+public:
+  G1VerifyCodeRootOopClosure(G1CollectedHeap* g1h, OopClosure* root_cl, VerifyOption vo):
+    _g1h(g1h), _root_cl(root_cl), _vo(vo), _nm(NULL), _failures(false) {}
+
+  void do_oop(oop* p) { do_oop_work(p); }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+
+  void set_nmethod(nmethod* nm) { _nm = nm; }
+  bool failures() { return _failures; }
+};
+
+class G1VerifyCodeRootBlobClosure: public CodeBlobClosure {
+  G1VerifyCodeRootOopClosure* _oop_cl;
+
+public:
+  G1VerifyCodeRootBlobClosure(G1VerifyCodeRootOopClosure* oop_cl):
+    _oop_cl(oop_cl) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      _oop_cl->set_nmethod(nm);
+      nm->oops_do(_oop_cl);
+    }
+  }
+};
+
+class YoungRefCounterClosure : public OopClosure {
+  G1CollectedHeap* _g1h;
+  int              _count;
+ public:
+  YoungRefCounterClosure(G1CollectedHeap* g1h) : _g1h(g1h), _count(0) {}
+  void do_oop(oop* p)       { if (_g1h->is_in_young(*p)) { _count++; } }
+  void do_oop(narrowOop* p) { ShouldNotReachHere(); }
+
+  int count() { return _count; }
+  void reset_count() { _count = 0; };
+};
+
+class VerifyKlassClosure: public KlassClosure {
+  YoungRefCounterClosure _young_ref_counter_closure;
+  OopClosure *_oop_closure;
+ public:
+  VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
+  void do_klass(Klass* k) {
+    k->oops_do(_oop_closure);
+
+    _young_ref_counter_closure.reset_count();
+    k->oops_do(&_young_ref_counter_closure);
+    if (_young_ref_counter_closure.count() > 0) {
+      guarantee(k->has_modified_oops(), err_msg("Klass %p, has young refs but is not dirty.", k));
+    }
+  }
+};
+
 class VerifyLivenessOopClosure: public OopClosure {
   G1CollectedHeap* _g1h;
   VerifyOption _vo;
@@ -3242,75 +3368,7 @@
   }
 };
 
-class YoungRefCounterClosure : public OopClosure {
-  G1CollectedHeap* _g1h;
-  int              _count;
- public:
-  YoungRefCounterClosure(G1CollectedHeap* g1h) : _g1h(g1h), _count(0) {}
-  void do_oop(oop* p)       { if (_g1h->is_in_young(*p)) { _count++; } }
-  void do_oop(narrowOop* p) { ShouldNotReachHere(); }
-
-  int count() { return _count; }
-  void reset_count() { _count = 0; };
-};
-
-class VerifyKlassClosure: public KlassClosure {
-  YoungRefCounterClosure _young_ref_counter_closure;
-  OopClosure *_oop_closure;
- public:
-  VerifyKlassClosure(G1CollectedHeap* g1h, OopClosure* cl) : _young_ref_counter_closure(g1h), _oop_closure(cl) {}
-  void do_klass(Klass* k) {
-    k->oops_do(_oop_closure);
-
-    _young_ref_counter_closure.reset_count();
-    k->oops_do(&_young_ref_counter_closure);
-    if (_young_ref_counter_closure.count() > 0) {
-      guarantee(k->has_modified_oops(), err_msg("Klass %p, has young refs but is not dirty.", k));
-    }
-  }
-};
-
-// TODO: VerifyRootsClosure extends OopsInGenClosure so that we can
-//       pass it as the perm_blk to SharedHeap::process_strong_roots.
-//       When process_strong_roots stop calling perm_blk->younger_refs_iterate
-//       we can change this closure to extend the simpler OopClosure.
-class VerifyRootsClosure: public OopsInGenClosure {
-private:
-  G1CollectedHeap* _g1h;
-  VerifyOption     _vo;
-  bool             _failures;
-public:
-  // _vo == UsePrevMarking -> use "prev" marking information,
-  // _vo == UseNextMarking -> use "next" marking information,
-  // _vo == UseMarkWord    -> use mark word from object header.
-  VerifyRootsClosure(VerifyOption vo) :
-    _g1h(G1CollectedHeap::heap()),
-    _vo(vo),
-    _failures(false) { }
-
-  bool failures() { return _failures; }
-
-  template <class T> void do_oop_nv(T* p) {
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (!oopDesc::is_null(heap_oop)) {
-      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-      if (_g1h->is_obj_dead_cond(obj, _vo)) {
-        gclog_or_tty->print_cr("Root location "PTR_FORMAT" "
-                              "points to dead obj "PTR_FORMAT, p, (void*) obj);
-        if (_vo == VerifyOption_G1UseMarkWord) {
-          gclog_or_tty->print_cr("  Mark word: "PTR_FORMAT, (void*)(obj->mark()));
-        }
-        obj->print_on(gclog_or_tty);
-        _failures = true;
-      }
-    }
-  }
-
-  void do_oop(oop* p)       { do_oop_nv(p); }
-  void do_oop(narrowOop* p) { do_oop_nv(p); }
-};
-
-// This is the task used for parallel heap verification.
+// This is the task used for parallel verification of the heap regions
 
 class G1ParVerifyTask: public AbstractGangTask {
 private:
@@ -3344,20 +3402,15 @@
   }
 };
 
-void G1CollectedHeap::verify(bool silent) {
-  verify(silent, VerifyOption_G1UsePrevMarking);
-}
-
-void G1CollectedHeap::verify(bool silent,
-                             VerifyOption vo) {
+void G1CollectedHeap::verify(bool silent, VerifyOption vo) {
   if (SafepointSynchronize::is_at_safepoint()) {
+    assert(Thread::current()->is_VM_thread(),
+           "Expected to be executed serially by the VM thread at this point");
+
     if (!silent) { gclog_or_tty->print("Roots "); }
     VerifyRootsClosure rootsCl(vo);
-
-    assert(Thread::current()->is_VM_thread(),
-           "Expected to be executed serially by the VM thread at this point");
-
-    CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
+    G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
+    G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
     VerifyKlassClosure klassCl(this, &rootsCl);
 
     // We apply the relevant closures to all the oops in the
@@ -3376,7 +3429,7 @@
                          &klassCl
                          );
 
-    bool failures = rootsCl.failures();
+    bool failures = rootsCl.failures() || codeRootsCl.failures();
 
     if (vo != VerifyOption_G1UseMarkWord) {
       // If we're verifying during a full GC then the region sets
@@ -3445,6 +3498,34 @@
   }
 }
 
+void G1CollectedHeap::verify(bool silent) {
+  verify(silent, VerifyOption_G1UsePrevMarking);
+}
+
+double G1CollectedHeap::verify(bool guard, const char* msg) {
+  double verify_time_ms = 0.0;
+
+  if (guard && total_collections() >= VerifyGCStartAt) {
+    double verify_start = os::elapsedTime();
+    HandleMark hm;  // Discard invalid handles created during verification
+    prepare_for_verify();
+    Universe::verify(VerifyOption_G1UsePrevMarking, msg);
+    verify_time_ms = (os::elapsedTime() - verify_start) * 1000;
+  }
+
+  return verify_time_ms;
+}
+
+void G1CollectedHeap::verify_before_gc() {
+  double verify_time_ms = verify(VerifyBeforeGC, " VerifyBeforeGC:");
+  g1_policy()->phase_times()->record_verify_before_time_ms(verify_time_ms);
+}
+
+void G1CollectedHeap::verify_after_gc() {
+  double verify_time_ms = verify(VerifyAfterGC, " VerifyAfterGC:");
+  g1_policy()->phase_times()->record_verify_after_time_ms(verify_time_ms);
+}
+
 class PrintRegionClosure: public HeapRegionClosure {
   outputStream* _st;
 public:
@@ -3866,8 +3947,9 @@
       append_secondary_free_list_if_not_empty_with_lock();
     }
 
-    assert(check_young_list_well_formed(),
-      "young list should be well formed");
+    assert(check_young_list_well_formed(), "young list should be well formed");
+    assert(check_heap_region_claim_values(HeapRegion::InitialClaimValue),
+           "sanity check");
 
     // Don't dynamically change the number of GC threads this early.  A value of
     // 0 is used to indicate serial work.  When parallel work is done,
@@ -4987,7 +5069,11 @@
 
       G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
 
-      int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+      // Don't scan the scavengable methods in the code cache as part
+      // of strong root scanning. The code roots that point into a
+      // region in the collection set are scanned when we scan the
+      // region's RSet.
+      int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings;
 
       pss.start_strong_roots();
       _g1h->g1_process_strong_roots(/* is scavenging */ true,
@@ -5029,67 +5115,6 @@
 
 // *** Common G1 Evacuation Stuff
 
-// Closures that support the filtering of CodeBlobs scanned during
-// external root scanning.
-
-// Closure applied to reference fields in code blobs (specifically nmethods)
-// to determine whether an nmethod contains references that point into
-// the collection set. Used as a predicate when walking code roots so
-// that only nmethods that point into the collection set are added to the
-// 'marked' list.
-
-class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure {
-
-  class G1PointsIntoCSOopClosure : public OopClosure {
-    G1CollectedHeap* _g1;
-    bool _points_into_cs;
-  public:
-    G1PointsIntoCSOopClosure(G1CollectedHeap* g1) :
-      _g1(g1), _points_into_cs(false) { }
-
-    bool points_into_cs() const { return _points_into_cs; }
-
-    template <class T>
-    void do_oop_nv(T* p) {
-      if (!_points_into_cs) {
-        T heap_oop = oopDesc::load_heap_oop(p);
-        if (!oopDesc::is_null(heap_oop) &&
-            _g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) {
-          _points_into_cs = true;
-        }
-      }
-    }
-
-    virtual void do_oop(oop* p)        { do_oop_nv(p); }
-    virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
-  };
-
-  G1CollectedHeap* _g1;
-
-public:
-  G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) :
-    CodeBlobToOopClosure(cl, true), _g1(g1) { }
-
-  virtual void do_code_blob(CodeBlob* cb) {
-    nmethod* nm = cb->as_nmethod_or_null();
-    if (nm != NULL && !(nm->test_oops_do_mark())) {
-      G1PointsIntoCSOopClosure predicate_cl(_g1);
-      nm->oops_do(&predicate_cl);
-
-      if (predicate_cl.points_into_cs()) {
-        // At least one of the reference fields or the oop relocations
-        // in the nmethod points into the collection set. We have to
-        // 'mark' this nmethod.
-        // Note: Revisit the following if CodeBlobToOopClosure::do_code_blob()
-        // or MarkingCodeBlobClosure::do_code_blob() change.
-        if (!nm->test_set_oops_do_mark()) {
-          do_newly_marked_nmethod(nm);
-        }
-      }
-    }
-  }
-};
-
 // This method is run in a GC worker.
 
 void
@@ -5107,9 +5132,10 @@
 
   BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
 
-  // Walk the code cache w/o buffering, because StarTask cannot handle
-  // unaligned oop locations.
-  G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots);
+  assert(so & SO_CodeCache || scan_rs != NULL, "must scan code roots somehow");
+  // Walk the code cache/strong code roots w/o buffering, because StarTask
+  // cannot handle unaligned oop locations.
+  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, true /* do_marking */);
 
   process_strong_roots(false, // no scoping; this is parallel code
                        is_scavenging, so,
@@ -5154,9 +5180,22 @@
   }
   g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
+  // If this is an initial mark pause, and we're not scanning
+  // the entire code cache, we need to mark the oops in the
+  // strong code root lists for the regions that are not in
+  // the collection set.
+  // Note all threads participate in this set of root tasks.
+  double mark_strong_code_roots_ms = 0.0;
+  if (g1_policy()->during_initial_mark_pause() && !(so & SO_CodeCache)) {
+    double mark_strong_roots_start = os::elapsedTime();
+    mark_strong_code_roots(worker_i);
+    mark_strong_code_roots_ms = (os::elapsedTime() - mark_strong_roots_start) * 1000.0;
+  }
+  g1_policy()->phase_times()->record_strong_code_root_mark_time(worker_i, mark_strong_code_roots_ms);
+
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
-    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
+    g1_rem_set()->oops_into_collection_set_do(scan_rs, &eager_scan_code_roots, worker_i);
   }
   _process_strong_tasks->all_tasks_completed();
 }
@@ -5774,9 +5813,6 @@
   process_discovered_references(n_workers);
 
   // Weak root processing.
-  // Note: when JSR 292 is enabled and code blobs can contain
-  // non-perm oops then we will need to process the code blobs
-  // here too.
   {
     G1STWIsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
@@ -5792,6 +5828,17 @@
   hot_card_cache->reset_hot_cache();
   hot_card_cache->set_use_cache(true);
 
+  // Migrate the strong code roots attached to each region in
+  // the collection set. Ideally we would like to do this
+  // after we have finished the scanning/evacuation of the
+  // strong code roots for a particular heap region.
+  migrate_strong_code_roots();
+
+  if (g1_policy()->during_initial_mark_pause()) {
+    // Reset the claim values set during marking the strong code roots
+    reset_heap_region_claim_values();
+  }
+
   finalize_for_evac_failure();
 
   if (evacuation_failed()) {
@@ -6588,3 +6635,208 @@
   _humongous_set.verify_end();
   _free_list.verify_end();
 }
+
+// Optimized nmethod scanning
+
+class RegisterNMethodOopClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  nmethod* _nm;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      HeapRegion* hr = _g1h->heap_region_containing(obj);
+      assert(!hr->isHumongous(), "code root in humongous region?");
+
+      // HeapRegion::add_strong_code_root() avoids adding duplicate
+      // entries but having duplicates is  OK since we "mark" nmethods
+      // as visited when we scan the strong code root lists during the GC.
+      hr->add_strong_code_root(_nm);
+      assert(hr->rem_set()->strong_code_roots_list_contains(_nm), "add failed?");
+    }
+  }
+
+public:
+  RegisterNMethodOopClosure(G1CollectedHeap* g1h, nmethod* nm) :
+    _g1h(g1h), _nm(nm) {}
+
+  void do_oop(oop* p)       { do_oop_work(p); }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+class UnregisterNMethodOopClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  nmethod* _nm;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      HeapRegion* hr = _g1h->heap_region_containing(obj);
+      assert(!hr->isHumongous(), "code root in humongous region?");
+      hr->remove_strong_code_root(_nm);
+      assert(!hr->rem_set()->strong_code_roots_list_contains(_nm), "remove failed?");
+    }
+  }
+
+public:
+  UnregisterNMethodOopClosure(G1CollectedHeap* g1h, nmethod* nm) :
+    _g1h(g1h), _nm(nm) {}
+
+  void do_oop(oop* p)       { do_oop_work(p); }
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+};
+
+void G1CollectedHeap::register_nmethod(nmethod* nm) {
+  CollectedHeap::register_nmethod(nm);
+
+  guarantee(nm != NULL, "sanity");
+  RegisterNMethodOopClosure reg_cl(this, nm);
+  nm->oops_do(&reg_cl);
+}
+
+void G1CollectedHeap::unregister_nmethod(nmethod* nm) {
+  CollectedHeap::unregister_nmethod(nm);
+
+  guarantee(nm != NULL, "sanity");
+  UnregisterNMethodOopClosure reg_cl(this, nm);
+  nm->oops_do(&reg_cl, true);
+}
+
+class MigrateCodeRootsHeapRegionClosure: public HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion *hr) {
+    assert(!hr->isHumongous(), "humongous region in collection set?");
+    hr->migrate_strong_code_roots();
+    return false;
+  }
+};
+
+void G1CollectedHeap::migrate_strong_code_roots() {
+  MigrateCodeRootsHeapRegionClosure cl;
+  double migrate_start = os::elapsedTime();
+  collection_set_iterate(&cl);
+  double migration_time_ms = (os::elapsedTime() - migrate_start) * 1000.0;
+  g1_policy()->phase_times()->record_strong_code_root_migration_time(migration_time_ms);
+}
+
+// Mark all the code roots that point into regions *not* in the
+// collection set.
+//
+// Note we do not want to use a "marking" CodeBlobToOopClosure while
+// walking the the code roots lists of regions not in the collection
+// set. Suppose we have an nmethod (M) that points to objects in two
+// separate regions - one in the collection set (R1) and one not (R2).
+// Using a "marking" CodeBlobToOopClosure here would result in "marking"
+// nmethod M when walking the code roots for R1. When we come to scan
+// the code roots for R2, we would see that M is already marked and it
+// would be skipped and the objects in R2 that are referenced from M
+// would not be evacuated.
+
+class MarkStrongCodeRootCodeBlobClosure: public CodeBlobClosure {
+
+  class MarkStrongCodeRootOopClosure: public OopClosure {
+    ConcurrentMark* _cm;
+    HeapRegion* _hr;
+    uint _worker_id;
+
+    template <class T> void do_oop_work(T* p) {
+      T heap_oop = oopDesc::load_heap_oop(p);
+      if (!oopDesc::is_null(heap_oop)) {
+        oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+        // Only mark objects in the region (which is assumed
+        // to be not in the collection set).
+        if (_hr->is_in(obj)) {
+          _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
+        }
+      }
+    }
+
+  public:
+    MarkStrongCodeRootOopClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id) :
+      _cm(cm), _hr(hr), _worker_id(worker_id) {
+      assert(!_hr->in_collection_set(), "sanity");
+    }
+
+    void do_oop(narrowOop* p) { do_oop_work(p); }
+    void do_oop(oop* p)       { do_oop_work(p); }
+  };
+
+  MarkStrongCodeRootOopClosure _oop_cl;
+
+public:
+  MarkStrongCodeRootCodeBlobClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id):
+    _oop_cl(cm, hr, worker_id) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      nm->oops_do(&_oop_cl);
+    }
+  }
+};
+
+class MarkStrongCodeRootsHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  uint _worker_id;
+
+public:
+  MarkStrongCodeRootsHRClosure(G1CollectedHeap* g1h, uint worker_id) :
+    _g1h(g1h), _worker_id(worker_id) {}
+
+  bool doHeapRegion(HeapRegion *hr) {
+    HeapRegionRemSet* hrrs = hr->rem_set();
+    if (hr->isHumongous()) {
+      // Code roots should never be attached to a humongous region
+      assert(hrrs->strong_code_roots_list_length() == 0, "sanity");
+      return false;
+    }
+
+    if (hr->in_collection_set()) {
+      // Don't mark code roots into regions in the collection set here.
+      // They will be marked when we scan them.
+      return false;
+    }
+
+    MarkStrongCodeRootCodeBlobClosure cb_cl(_g1h->concurrent_mark(), hr, _worker_id);
+    hr->strong_code_roots_do(&cb_cl);
+    return false;
+  }
+};
+
+void G1CollectedHeap::mark_strong_code_roots(uint worker_id) {
+  MarkStrongCodeRootsHRClosure cl(this, worker_id);
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    heap_region_par_iterate_chunked(&cl,
+                                    worker_id,
+                                    workers()->active_workers(),
+                                    HeapRegion::ParMarkRootClaimValue);
+  } else {
+    heap_region_iterate(&cl);
+  }
+}
+
+class RebuildStrongCodeRootClosure: public CodeBlobClosure {
+  G1CollectedHeap* _g1h;
+
+public:
+  RebuildStrongCodeRootClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = (cb != NULL) ? cb->as_nmethod_or_null() : NULL;
+    if (nm == NULL) {
+      return;
+    }
+
+    if (ScavengeRootsInCode && nm->detect_scavenge_root_oops()) {
+      _g1h->register_nmethod(nm);
+    }
+  }
+};
+
+void G1CollectedHeap::rebuild_strong_code_roots() {
+  RebuildStrongCodeRootClosure blob_cl(this);
+  CodeCache::blobs_do(&blob_cl);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -46,6 +46,7 @@
 // may combine concurrent marking with parallel, incremental compaction of
 // heap subsets that will yield large amounts of garbage.
 
+// Forward declarations
 class HeapRegion;
 class HRRSCleanupTask;
 class GenerationSpec;
@@ -69,6 +70,7 @@
 class G1NewTracer;
 class G1OldTracer;
 class EvacuationFailedInfo;
+class nmethod;
 
 typedef OverflowTaskQueue<StarTask, mtGC>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue, mtGC> RefToScanQueueSet;
@@ -163,18 +165,6 @@
     : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { }
 };
 
-// The G1 STW is alive closure.
-// An instance is embedded into the G1CH and used as the
-// (optional) _is_alive_non_header closure in the STW
-// reference processor. It is also extensively used during
-// reference processing during STW evacuation pauses.
-class G1STWIsAliveClosure: public BoolObjectClosure {
-  G1CollectedHeap* _g1;
-public:
-  G1STWIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
-  bool do_object_b(oop p);
-};
-
 class SurvivorGCAllocRegion : public G1AllocRegion {
 protected:
   virtual HeapRegion* allocate_new_region(size_t word_size, bool force);
@@ -193,6 +183,18 @@
   : G1AllocRegion("Old GC Alloc Region", true /* bot_updates */) { }
 };
 
+// The G1 STW is alive closure.
+// An instance is embedded into the G1CH and used as the
+// (optional) _is_alive_non_header closure in the STW
+// reference processor. It is also extensively used during
+// reference processing during STW evacuation pauses.
+class G1STWIsAliveClosure: public BoolObjectClosure {
+  G1CollectedHeap* _g1;
+public:
+  G1STWIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
+  bool do_object_b(oop p);
+};
+
 class RefineCardTableEntryClosure;
 
 class G1CollectedHeap : public SharedHeap {
@@ -1549,42 +1551,6 @@
 
   virtual jlong millis_since_last_gc();
 
-  // Perform any cleanup actions necessary before allowing a verification.
-  virtual void prepare_for_verify();
-
-  // Perform verification.
-
-  // vo == UsePrevMarking  -> use "prev" marking information,
-  // vo == UseNextMarking -> use "next" marking information
-  // vo == UseMarkWord    -> use the mark word in the object header
-  //
-  // NOTE: Only the "prev" marking information is guaranteed to be
-  // consistent most of the time, so most calls to this should use
-  // vo == UsePrevMarking.
-  // Currently, there is only one case where this is called with
-  // vo == UseNextMarking, which is to verify the "next" marking
-  // information at the end of remark.
-  // Currently there is only one place where this is called with
-  // vo == UseMarkWord, which is to verify the marking during a
-  // full GC.
-  void verify(bool silent, VerifyOption vo);
-
-  // Override; it uses the "prev" marking information
-  virtual void verify(bool silent);
-
-  virtual void print_on(outputStream* st) const;
-  virtual void print_extended_on(outputStream* st) const;
-  virtual void print_on_error(outputStream* st) const;
-
-  virtual void print_gc_threads_on(outputStream* st) const;
-  virtual void gc_threads_do(ThreadClosure* tc) const;
-
-  // Override
-  void print_tracing_info() const;
-
-  // The following two methods are helpful for debugging RSet issues.
-  void print_cset_rsets() PRODUCT_RETURN;
-  void print_all_rsets() PRODUCT_RETURN;
 
   // Convenience function to be used in situations where the heap type can be
   // asserted to be this type.
@@ -1661,13 +1627,86 @@
     else return is_obj_ill(obj, hr);
   }
 
+  bool allocated_since_marking(oop obj, HeapRegion* hr, VerifyOption vo);
+  HeapWord* top_at_mark_start(HeapRegion* hr, VerifyOption vo);
+  bool is_marked(oop obj, VerifyOption vo);
+  const char* top_at_mark_start_str(VerifyOption vo);
+
+  ConcurrentMark* concurrent_mark() const { return _cm; }
+
+  // Refinement
+
+  ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
+
+  // The dirty cards region list is used to record a subset of regions
+  // whose cards need clearing. The list if populated during the
+  // remembered set scanning and drained during the card table
+  // cleanup. Although the methods are reentrant, population/draining
+  // phases must not overlap. For synchronization purposes the last
+  // element on the list points to itself.
+  HeapRegion* _dirty_cards_region_list;
+  void push_dirty_cards_region(HeapRegion* hr);
+  HeapRegion* pop_dirty_cards_region();
+
+  // Optimized nmethod scanning support routines
+
+  // Register the given nmethod with the G1 heap
+  virtual void register_nmethod(nmethod* nm);
+
+  // Unregister the given nmethod from the G1 heap
+  virtual void unregister_nmethod(nmethod* nm);
+
+  // Migrate the nmethods in the code root lists of the regions
+  // in the collection set to regions in to-space. In the event
+  // of an evacuation failure, nmethods that reference objects
+  // that were not successfullly evacuated are not migrated.
+  void migrate_strong_code_roots();
+
+  // During an initial mark pause, mark all the code roots that
+  // point into regions *not* in the collection set.
+  void mark_strong_code_roots(uint worker_id);
+
+  // Rebuild the stong code root lists for each region
+  // after a full GC
+  void rebuild_strong_code_roots();
+
+  // Verification
+
+  // The following is just to alert the verification code
+  // that a full collection has occurred and that the
+  // remembered sets are no longer up to date.
+  bool _full_collection;
+  void set_full_collection() { _full_collection = true;}
+  void clear_full_collection() {_full_collection = false;}
+  bool full_collection() {return _full_collection;}
+
+  // Perform any cleanup actions necessary before allowing a verification.
+  virtual void prepare_for_verify();
+
+  // Perform verification.
+
+  // vo == UsePrevMarking  -> use "prev" marking information,
+  // vo == UseNextMarking -> use "next" marking information
+  // vo == UseMarkWord    -> use the mark word in the object header
+  //
+  // NOTE: Only the "prev" marking information is guaranteed to be
+  // consistent most of the time, so most calls to this should use
+  // vo == UsePrevMarking.
+  // Currently, there is only one case where this is called with
+  // vo == UseNextMarking, which is to verify the "next" marking
+  // information at the end of remark.
+  // Currently there is only one place where this is called with
+  // vo == UseMarkWord, which is to verify the marking during a
+  // full GC.
+  void verify(bool silent, VerifyOption vo);
+
+  // Override; it uses the "prev" marking information
+  virtual void verify(bool silent);
+
   // The methods below are here for convenience and dispatch the
   // appropriate method depending on value of the given VerifyOption
-  // parameter. The options for that parameter are:
-  //
-  // vo == UsePrevMarking -> use "prev" marking information,
-  // vo == UseNextMarking -> use "next" marking information,
-  // vo == UseMarkWord    -> use mark word from object header
+  // parameter. The values for that parameter, and their meanings,
+  // are the same as those above.
 
   bool is_obj_dead_cond(const oop obj,
                         const HeapRegion* hr,
@@ -1692,31 +1731,21 @@
     return false; // keep some compilers happy
   }
 
-  bool allocated_since_marking(oop obj, HeapRegion* hr, VerifyOption vo);
-  HeapWord* top_at_mark_start(HeapRegion* hr, VerifyOption vo);
-  bool is_marked(oop obj, VerifyOption vo);
-  const char* top_at_mark_start_str(VerifyOption vo);
+  // Printing
 
-  // The following is just to alert the verification code
-  // that a full collection has occurred and that the
-  // remembered sets are no longer up to date.
-  bool _full_collection;
-  void set_full_collection() { _full_collection = true;}
-  void clear_full_collection() {_full_collection = false;}
-  bool full_collection() {return _full_collection;}
+  virtual void print_on(outputStream* st) const;
+  virtual void print_extended_on(outputStream* st) const;
+  virtual void print_on_error(outputStream* st) const;
 
-  ConcurrentMark* concurrent_mark() const { return _cm; }
-  ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; }
+  virtual void print_gc_threads_on(outputStream* st) const;
+  virtual void gc_threads_do(ThreadClosure* tc) const;
 
-  // The dirty cards region list is used to record a subset of regions
-  // whose cards need clearing. The list if populated during the
-  // remembered set scanning and drained during the card table
-  // cleanup. Although the methods are reentrant, population/draining
-  // phases must not overlap. For synchronization purposes the last
-  // element on the list points to itself.
-  HeapRegion* _dirty_cards_region_list;
-  void push_dirty_cards_region(HeapRegion* hr);
-  HeapRegion* pop_dirty_cards_region();
+  // Override
+  void print_tracing_info() const;
+
+  // The following two methods are helpful for debugging RSet issues.
+  void print_cset_rsets() PRODUCT_RETURN;
+  void print_all_rsets() PRODUCT_RETURN;
 
 public:
   void stop_conc_gc_threads();
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -161,6 +161,8 @@
   _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
   _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
   _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
+  _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
+  _last_strong_code_root_mark_times_ms(_max_gc_threads, "%.1lf"),
   _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
@@ -182,6 +184,8 @@
   _last_update_rs_times_ms.reset();
   _last_update_rs_processed_buffers.reset();
   _last_scan_rs_times_ms.reset();
+  _last_strong_code_root_scan_times_ms.reset();
+  _last_strong_code_root_mark_times_ms.reset();
   _last_obj_copy_times_ms.reset();
   _last_termination_times_ms.reset();
   _last_termination_attempts.reset();
@@ -197,6 +201,8 @@
   _last_update_rs_times_ms.verify();
   _last_update_rs_processed_buffers.verify();
   _last_scan_rs_times_ms.verify();
+  _last_strong_code_root_scan_times_ms.verify();
+  _last_strong_code_root_mark_times_ms.verify();
   _last_obj_copy_times_ms.verify();
   _last_termination_times_ms.verify();
   _last_termination_attempts.verify();
@@ -210,6 +216,8 @@
                                _last_satb_filtering_times_ms.get(i) +
                                _last_update_rs_times_ms.get(i) +
                                _last_scan_rs_times_ms.get(i) +
+                               _last_strong_code_root_scan_times_ms.get(i) +
+                               _last_strong_code_root_mark_times_ms.get(i) +
                                _last_obj_copy_times_ms.get(i) +
                                _last_termination_times_ms.get(i);
 
@@ -239,6 +247,9 @@
     // Now subtract the time taken to fix up roots in generated code
     misc_time_ms += _cur_collection_code_root_fixup_time_ms;
 
+    // Strong code root migration time
+    misc_time_ms += _cur_strong_code_root_migration_time_ms;
+
     // Subtract the time taken to clean the card table from the
     // current value of "other time"
     misc_time_ms += _cur_clear_ct_time_ms;
@@ -257,9 +268,13 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
     }
+    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
+     _last_strong_code_root_mark_times_ms.print(2, "Code Root Marking (ms)");
+    }
     _last_update_rs_times_ms.print(2, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(3, "Processed Buffers");
     _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
+    _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
     _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
     _last_termination_times_ms.print(2, "Termination (ms)");
     if (G1Log::finest()) {
@@ -273,12 +288,17 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
     }
+    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
+      _last_strong_code_root_mark_times_ms.print(1, "Code Root Marking (ms)");
+    }
     _last_update_rs_times_ms.print(1, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(2, "Processed Buffers");
     _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
+    _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)");
     _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
   }
   print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
+  print_stats(1, "Code Root Migration", _cur_strong_code_root_migration_time_ms);
   print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
   print_stats(1, "Other", misc_time_ms);
--- a/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -119,6 +119,8 @@
   WorkerDataArray<double> _last_update_rs_times_ms;
   WorkerDataArray<int>    _last_update_rs_processed_buffers;
   WorkerDataArray<double> _last_scan_rs_times_ms;
+  WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
+  WorkerDataArray<double> _last_strong_code_root_mark_times_ms;
   WorkerDataArray<double> _last_obj_copy_times_ms;
   WorkerDataArray<double> _last_termination_times_ms;
   WorkerDataArray<size_t> _last_termination_attempts;
@@ -128,6 +130,7 @@
 
   double _cur_collection_par_time_ms;
   double _cur_collection_code_root_fixup_time_ms;
+  double _cur_strong_code_root_migration_time_ms;
 
   double _cur_clear_ct_time_ms;
   double _cur_ref_proc_time_ms;
@@ -179,6 +182,14 @@
     _last_scan_rs_times_ms.set(worker_i, ms);
   }
 
+  void record_strong_code_root_scan_time(uint worker_i, double ms) {
+    _last_strong_code_root_scan_times_ms.set(worker_i, ms);
+  }
+
+  void record_strong_code_root_mark_time(uint worker_i, double ms) {
+    _last_strong_code_root_mark_times_ms.set(worker_i, ms);
+  }
+
   void record_obj_copy_time(uint worker_i, double ms) {
     _last_obj_copy_times_ms.set(worker_i, ms);
   }
@@ -208,6 +219,10 @@
     _cur_collection_code_root_fixup_time_ms = ms;
   }
 
+  void record_strong_code_root_migration_time(double ms) {
+    _cur_strong_code_root_migration_time_ms = ms;
+  }
+
   void record_ref_proc_time(double ms) {
     _cur_ref_proc_time_ms = ms;
   }
@@ -294,6 +309,14 @@
     return _last_scan_rs_times_ms.average();
   }
 
+  double average_last_strong_code_root_scan_time(){
+    return _last_strong_code_root_scan_times_ms.average();
+  }
+
+  double average_last_strong_code_root_mark_time(){
+    return _last_strong_code_root_mark_times_ms.average();
+  }
+
   double average_last_obj_copy_time() {
     return _last_obj_copy_times_ms.average();
   }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MonitoringSupport.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -262,6 +262,7 @@
     old_collection_counters()->update_all();
     young_collection_counters()->update_all();
     MetaspaceCounters::update_performance_counters();
+    CompressedClassSpaceCounters::update_performance_counters();
   }
 }
 
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -104,15 +104,25 @@
 class ScanRSClosure : public HeapRegionClosure {
   size_t _cards_done, _cards;
   G1CollectedHeap* _g1h;
+
   OopsInHeapRegionClosure* _oc;
+  CodeBlobToOopClosure* _code_root_cl;
+
   G1BlockOffsetSharedArray* _bot_shared;
   CardTableModRefBS *_ct_bs;
-  int _worker_i;
-  int _block_size;
-  bool _try_claimed;
+
+  double _strong_code_root_scan_time_sec;
+  int    _worker_i;
+  int    _block_size;
+  bool   _try_claimed;
+
 public:
-  ScanRSClosure(OopsInHeapRegionClosure* oc, int worker_i) :
+  ScanRSClosure(OopsInHeapRegionClosure* oc,
+                CodeBlobToOopClosure* code_root_cl,
+                int worker_i) :
     _oc(oc),
+    _code_root_cl(code_root_cl),
+    _strong_code_root_scan_time_sec(0.0),
     _cards(0),
     _cards_done(0),
     _worker_i(worker_i),
@@ -160,6 +170,12 @@
                            card_start, card_start + G1BlockOffsetSharedArray::N_words);
   }
 
+  void scan_strong_code_roots(HeapRegion* r) {
+    double scan_start = os::elapsedTime();
+    r->strong_code_roots_do(_code_root_cl);
+    _strong_code_root_scan_time_sec += (os::elapsedTime() - scan_start);
+  }
+
   bool doHeapRegion(HeapRegion* r) {
     assert(r->in_collection_set(), "should only be called on elements of CS.");
     HeapRegionRemSet* hrrs = r->rem_set();
@@ -173,6 +189,7 @@
     //   _try_claimed || r->claim_iter()
     // is true: either we're supposed to work on claimed-but-not-complete
     // regions, or we successfully claimed the region.
+
     HeapRegionRemSetIterator iter(hrrs);
     size_t card_index;
 
@@ -205,30 +222,43 @@
       }
     }
     if (!_try_claimed) {
+      // Scan the strong code root list attached to the current region
+      scan_strong_code_roots(r);
+
       hrrs->set_iter_complete();
     }
     return false;
   }
+
+  double strong_code_root_scan_time_sec() {
+    return _strong_code_root_scan_time_sec;
+  }
+
   size_t cards_done() { return _cards_done;}
   size_t cards_looked_up() { return _cards;}
 };
 
-void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
+void G1RemSet::scanRS(OopsInHeapRegionClosure* oc,
+                      CodeBlobToOopClosure* code_root_cl,
+                      int worker_i) {
   double rs_time_start = os::elapsedTime();
   HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i);
 
-  ScanRSClosure scanRScl(oc, worker_i);
+  ScanRSClosure scanRScl(oc, code_root_cl, worker_i);
 
   _g1->collection_set_iterate_from(startRegion, &scanRScl);
   scanRScl.set_try_claimed();
   _g1->collection_set_iterate_from(startRegion, &scanRScl);
 
-  double scan_rs_time_sec = os::elapsedTime() - rs_time_start;
+  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start)
+                            - scanRScl.strong_code_root_scan_time_sec();
 
-  assert( _cards_scanned != NULL, "invariant" );
+  assert(_cards_scanned != NULL, "invariant");
   _cards_scanned[worker_i] = scanRScl.cards_done();
 
   _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
+  _g1p->phase_times()->record_strong_code_root_scan_time(worker_i,
+                                                         scanRScl.strong_code_root_scan_time_sec() * 1000.0);
 }
 
 // Closure used for updating RSets and recording references that
@@ -288,7 +318,8 @@
 }
 
 void G1RemSet::oops_into_collection_set_do(OopsInHeapRegionClosure* oc,
-                                             int worker_i) {
+                                           CodeBlobToOopClosure* code_root_cl,
+                                           int worker_i) {
 #if CARD_REPEAT_HISTO
   ct_freq_update_histo_and_reset();
 #endif
@@ -328,7 +359,7 @@
     _g1p->phase_times()->record_update_rs_time(worker_i, 0.0);
   }
   if (G1UseParallelRSetScanning || (worker_i == 0)) {
-    scanRS(oc, worker_i);
+    scanRS(oc, code_root_cl, worker_i);
   } else {
     _g1p->phase_times()->record_scan_rs_time(worker_i, 0.0);
   }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -81,14 +81,23 @@
   G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
   ~G1RemSet();
 
-  // Invoke "blk->do_oop" on all pointers into the CS in objects in regions
-  // outside the CS (having invoked "blk->set_region" to set the "from"
-  // region correctly beforehand.) The "worker_i" param is for the
-  // parallel case where the number of the worker thread calling this
-  // function can be helpful in partitioning the work to be done. It
-  // should be the same as the "i" passed to the calling thread's
-  // work(i) function. In the sequential case this param will be ingored.
-  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk, int worker_i);
+  // Invoke "blk->do_oop" on all pointers into the collection set
+  // from objects in regions outside the collection set (having
+  // invoked "blk->set_region" to set the "from" region correctly
+  // beforehand.)
+  //
+  // Invoke code_root_cl->do_code_blob on the unmarked nmethods
+  // on the strong code roots list for each region in the
+  // collection set.
+  //
+  // The "worker_i" param is for the parallel case where the id
+  // of the worker thread calling this function can be helpful in
+  // partitioning the work to be done. It should be the same as
+  // the "i" passed to the calling thread's work(i) function.
+  // In the sequential case this param will be ignored.
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   CodeBlobToOopClosure* code_root_cl,
+                                   int worker_i);
 
   // Prepare for and cleanup after an oops_into_collection_set_do
   // call.  Must call each of these once before and after (in sequential
@@ -98,7 +107,10 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanRS(OopsInHeapRegionClosure* oc,
+              CodeBlobToOopClosure* code_root_cl,
+              int worker_i);
+
   void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i);
 
   CardTableModRefBS* ct_bs() { return _ct_bs; }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSetSummary.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -127,32 +127,55 @@
 
 class HRRSStatsIter: public HeapRegionClosure {
   size_t _occupied;
-  size_t _total_mem_sz;
-  size_t _max_mem_sz;
-  HeapRegion* _max_mem_sz_region;
+
+  size_t _total_rs_mem_sz;
+  size_t _max_rs_mem_sz;
+  HeapRegion* _max_rs_mem_sz_region;
+
+  size_t _total_code_root_mem_sz;
+  size_t _max_code_root_mem_sz;
+  HeapRegion* _max_code_root_mem_sz_region;
 public:
   HRRSStatsIter() :
     _occupied(0),
-    _total_mem_sz(0),
-    _max_mem_sz(0),
-    _max_mem_sz_region(NULL)
+    _total_rs_mem_sz(0),
+    _max_rs_mem_sz(0),
+    _max_rs_mem_sz_region(NULL),
+    _total_code_root_mem_sz(0),
+    _max_code_root_mem_sz(0),
+    _max_code_root_mem_sz_region(NULL)
   {}
 
   bool doHeapRegion(HeapRegion* r) {
-    size_t mem_sz = r->rem_set()->mem_size();
-    if (mem_sz > _max_mem_sz) {
-      _max_mem_sz = mem_sz;
-      _max_mem_sz_region = r;
+    HeapRegionRemSet* hrrs = r->rem_set();
+
+    // HeapRegionRemSet::mem_size() includes the
+    // size of the strong code roots
+    size_t rs_mem_sz = hrrs->mem_size();
+    if (rs_mem_sz > _max_rs_mem_sz) {
+      _max_rs_mem_sz = rs_mem_sz;
+      _max_rs_mem_sz_region = r;
     }
-    _total_mem_sz += mem_sz;
-    size_t occ = r->rem_set()->occupied();
+    _total_rs_mem_sz += rs_mem_sz;
+
+    size_t code_root_mem_sz = hrrs->strong_code_roots_mem_size();
+    if (code_root_mem_sz > _max_code_root_mem_sz) {
+      _max_code_root_mem_sz = code_root_mem_sz;
+      _max_code_root_mem_sz_region = r;
+    }
+    _total_code_root_mem_sz += code_root_mem_sz;
+
+    size_t occ = hrrs->occupied();
     _occupied += occ;
     return false;
   }
-  size_t total_mem_sz() { return _total_mem_sz; }
-  size_t max_mem_sz() { return _max_mem_sz; }
+  size_t total_rs_mem_sz() { return _total_rs_mem_sz; }
+  size_t max_rs_mem_sz() { return _max_rs_mem_sz; }
+  HeapRegion* max_rs_mem_sz_region() { return _max_rs_mem_sz_region; }
+  size_t total_code_root_mem_sz() { return _total_code_root_mem_sz; }
+  size_t max_code_root_mem_sz() { return _max_code_root_mem_sz; }
+  HeapRegion* max_code_root_mem_sz_region() { return _max_code_root_mem_sz_region; }
   size_t occupied() { return _occupied; }
-  HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; }
 };
 
 double calc_percentage(size_t numerator, size_t denominator) {
@@ -184,22 +207,33 @@
 
   HRRSStatsIter blk;
   G1CollectedHeap::heap()->heap_region_iterate(&blk);
+  // RemSet stats
   out->print_cr("  Total heap region rem set sizes = "SIZE_FORMAT"K."
                 "  Max = "SIZE_FORMAT"K.",
-                blk.total_mem_sz()/K, blk.max_mem_sz()/K);
+                blk.total_rs_mem_sz()/K, blk.max_rs_mem_sz()/K);
   out->print_cr("  Static structures = "SIZE_FORMAT"K,"
                 " free_lists = "SIZE_FORMAT"K.",
                 HeapRegionRemSet::static_mem_size() / K,
                 HeapRegionRemSet::fl_mem_size() / K);
   out->print_cr("    "SIZE_FORMAT" occupied cards represented.",
                 blk.occupied());
-  HeapRegion* max_mem_sz_region = blk.max_mem_sz_region();
-  HeapRegionRemSet* rem_set = max_mem_sz_region->rem_set();
+  HeapRegion* max_rs_mem_sz_region = blk.max_rs_mem_sz_region();
+  HeapRegionRemSet* max_rs_rem_set = max_rs_mem_sz_region->rem_set();
   out->print_cr("    Max size region = "HR_FORMAT", "
                 "size = "SIZE_FORMAT "K, occupied = "SIZE_FORMAT"K.",
-                HR_FORMAT_PARAMS(max_mem_sz_region),
-                (rem_set->mem_size() + K - 1)/K,
-                (rem_set->occupied() + K - 1)/K);
-
+                HR_FORMAT_PARAMS(max_rs_mem_sz_region),
+                (max_rs_rem_set->mem_size() + K - 1)/K,
+                (max_rs_rem_set->occupied() + K - 1)/K);
   out->print_cr("    Did %d coarsenings.", num_coarsenings());
+  // Strong code root stats
+  out->print_cr("  Total heap region code-root set sizes = "SIZE_FORMAT"K."
+                "  Max = "SIZE_FORMAT"K.",
+                blk.total_code_root_mem_sz()/K, blk.max_code_root_mem_sz()/K);
+  HeapRegion* max_code_root_mem_sz_region = blk.max_code_root_mem_sz_region();
+  HeapRegionRemSet* max_code_root_rem_set = max_code_root_mem_sz_region->rem_set();
+  out->print_cr("    Max size region = "HR_FORMAT", "
+                "size = "SIZE_FORMAT "K, num_elems = "SIZE_FORMAT".",
+                HR_FORMAT_PARAMS(max_code_root_mem_sz_region),
+                (max_code_root_rem_set->strong_code_roots_mem_size() + K - 1)/K,
+                (max_code_root_rem_set->strong_code_roots_list_length()));
 }
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -319,7 +319,10 @@
                                                                             \
   diagnostic(bool, G1VerifyRSetsDuringFullGC, false,                        \
              "If true, perform verification of each heap region's "         \
-             "remembered set when verifying the heap during a full GC.")
+             "remembered set when verifying the heap during a full GC.")    \
+                                                                            \
+  diagnostic(bool, G1VerifyHeapRegionCodeRoots, false,                      \
+             "Verify the code root lists attached to each heap region.")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "code/nmethod.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
@@ -50,144 +51,6 @@
                                                    OopClosure* oc) :
   _r_bottom(r->bottom()), _r_end(r->end()), _oc(oc) { }
 
-class VerifyLiveClosure: public OopClosure {
-private:
-  G1CollectedHeap* _g1h;
-  CardTableModRefBS* _bs;
-  oop _containing_obj;
-  bool _failures;
-  int _n_failures;
-  VerifyOption _vo;
-public:
-  // _vo == UsePrevMarking -> use "prev" marking information,
-  // _vo == UseNextMarking -> use "next" marking information,
-  // _vo == UseMarkWord    -> use mark word from object header.
-  VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) :
-    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
-    _failures(false), _n_failures(0), _vo(vo)
-  {
-    BarrierSet* bs = _g1h->barrier_set();
-    if (bs->is_a(BarrierSet::CardTableModRef))
-      _bs = (CardTableModRefBS*)bs;
-  }
-
-  void set_containing_obj(oop obj) {
-    _containing_obj = obj;
-  }
-
-  bool failures() { return _failures; }
-  int n_failures() { return _n_failures; }
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-
-  void print_object(outputStream* out, oop obj) {
-#ifdef PRODUCT
-    Klass* k = obj->klass();
-    const char* class_name = InstanceKlass::cast(k)->external_name();
-    out->print_cr("class name %s", class_name);
-#else // PRODUCT
-    obj->print_on(out);
-#endif // PRODUCT
-  }
-
-  template <class T>
-  void do_oop_work(T* p) {
-    assert(_containing_obj != NULL, "Precondition");
-    assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
-           "Precondition");
-    T heap_oop = oopDesc::load_heap_oop(p);
-    if (!oopDesc::is_null(heap_oop)) {
-      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-      bool failed = false;
-      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) {
-        MutexLockerEx x(ParGCRareEvent_lock,
-                        Mutex::_no_safepoint_check_flag);
-
-        if (!_failures) {
-          gclog_or_tty->print_cr("");
-          gclog_or_tty->print_cr("----------");
-        }
-        if (!_g1h->is_in_closed_subset(obj)) {
-          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
-          gclog_or_tty->print_cr("Field "PTR_FORMAT
-                                 " of live obj "PTR_FORMAT" in region "
-                                 "["PTR_FORMAT", "PTR_FORMAT")",
-                                 p, (void*) _containing_obj,
-                                 from->bottom(), from->end());
-          print_object(gclog_or_tty, _containing_obj);
-          gclog_or_tty->print_cr("points to obj "PTR_FORMAT" not in the heap",
-                                 (void*) obj);
-        } else {
-          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
-          HeapRegion* to   = _g1h->heap_region_containing((HeapWord*)obj);
-          gclog_or_tty->print_cr("Field "PTR_FORMAT
-                                 " of live obj "PTR_FORMAT" in region "
-                                 "["PTR_FORMAT", "PTR_FORMAT")",
-                                 p, (void*) _containing_obj,
-                                 from->bottom(), from->end());
-          print_object(gclog_or_tty, _containing_obj);
-          gclog_or_tty->print_cr("points to dead obj "PTR_FORMAT" in region "
-                                 "["PTR_FORMAT", "PTR_FORMAT")",
-                                 (void*) obj, to->bottom(), to->end());
-          print_object(gclog_or_tty, obj);
-        }
-        gclog_or_tty->print_cr("----------");
-        gclog_or_tty->flush();
-        _failures = true;
-        failed = true;
-        _n_failures++;
-      }
-
-      if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) {
-        HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
-        HeapRegion* to   = _g1h->heap_region_containing(obj);
-        if (from != NULL && to != NULL &&
-            from != to &&
-            !to->isHumongous()) {
-          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
-          jbyte cv_field = *_bs->byte_for_const(p);
-          const jbyte dirty = CardTableModRefBS::dirty_card_val();
-
-          bool is_bad = !(from->is_young()
-                          || to->rem_set()->contains_reference(p)
-                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
-                              (_containing_obj->is_objArray() ?
-                                  cv_field == dirty
-                               : cv_obj == dirty || cv_field == dirty));
-          if (is_bad) {
-            MutexLockerEx x(ParGCRareEvent_lock,
-                            Mutex::_no_safepoint_check_flag);
-
-            if (!_failures) {
-              gclog_or_tty->print_cr("");
-              gclog_or_tty->print_cr("----------");
-            }
-            gclog_or_tty->print_cr("Missing rem set entry:");
-            gclog_or_tty->print_cr("Field "PTR_FORMAT" "
-                                   "of obj "PTR_FORMAT", "
-                                   "in region "HR_FORMAT,
-                                   p, (void*) _containing_obj,
-                                   HR_FORMAT_PARAMS(from));
-            _containing_obj->print_on(gclog_or_tty);
-            gclog_or_tty->print_cr("points to obj "PTR_FORMAT" "
-                                   "in region "HR_FORMAT,
-                                   (void*) obj,
-                                   HR_FORMAT_PARAMS(to));
-            obj->print_on(gclog_or_tty);
-            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
-                          cv_obj, cv_field);
-            gclog_or_tty->print_cr("----------");
-            gclog_or_tty->flush();
-            _failures = true;
-            if (!failed) _n_failures++;
-          }
-        }
-      }
-    }
-  }
-};
-
 template<class ClosureType>
 HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
                                HeapRegion* hr,
@@ -368,7 +231,7 @@
   if (!par) {
     // If this is parallel, this will be done later.
     HeapRegionRemSet* hrrs = rem_set();
-    if (hrrs != NULL) hrrs->clear();
+    hrrs->clear();
     _claimed = InitialClaimValue;
   }
   zero_marked_bytes();
@@ -505,6 +368,7 @@
     _rem_set(NULL), _recorded_rs_length(0), _predicted_elapsed_time_ms(0),
     _predicted_bytes_to_copy(0)
 {
+  _rem_set = new HeapRegionRemSet(sharedOffsetArray, this);
   _orig_end = mr.end();
   // Note that initialize() will set the start of the unmarked area of the
   // region.
@@ -512,8 +376,6 @@
   set_top(bottom());
   set_saved_mark();
 
-  _rem_set =  new HeapRegionRemSet(sharedOffsetArray, this);
-
   assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
 }
 
@@ -733,6 +595,160 @@
   return NULL;
 }
 
+// Code roots support
+
+void HeapRegion::add_strong_code_root(nmethod* nm) {
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->add_strong_code_root(nm);
+}
+
+void HeapRegion::remove_strong_code_root(nmethod* nm) {
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->remove_strong_code_root(nm);
+}
+
+void HeapRegion::migrate_strong_code_roots() {
+  assert(in_collection_set(), "only collection set regions");
+  assert(!isHumongous(), "not humongous regions");
+
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->migrate_strong_code_roots();
+}
+
+void HeapRegion::strong_code_roots_do(CodeBlobClosure* blk) const {
+  HeapRegionRemSet* hrrs = rem_set();
+  hrrs->strong_code_roots_do(blk);
+}
+
+class VerifyStrongCodeRootOopClosure: public OopClosure {
+  const HeapRegion* _hr;
+  nmethod* _nm;
+  bool _failures;
+  bool _has_oops_in_region;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+
+      // Note: not all the oops embedded in the nmethod are in the
+      // current region. We only look at those which are.
+      if (_hr->is_in(obj)) {
+        // Object is in the region. Check that its less than top
+        if (_hr->top() <= (HeapWord*)obj) {
+          // Object is above top
+          gclog_or_tty->print_cr("Object "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT") is above "
+                                 "top "PTR_FORMAT,
+                                 obj, _hr->bottom(), _hr->end(), _hr->top());
+          _failures = true;
+          return;
+        }
+        // Nmethod has at least one oop in the current region
+        _has_oops_in_region = true;
+      }
+    }
+  }
+
+public:
+  VerifyStrongCodeRootOopClosure(const HeapRegion* hr, nmethod* nm):
+    _hr(hr), _failures(false), _has_oops_in_region(false) {}
+
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+  void do_oop(oop* p)       { do_oop_work(p); }
+
+  bool failures()           { return _failures; }
+  bool has_oops_in_region() { return _has_oops_in_region; }
+};
+
+class VerifyStrongCodeRootCodeBlobClosure: public CodeBlobClosure {
+  const HeapRegion* _hr;
+  bool _failures;
+public:
+  VerifyStrongCodeRootCodeBlobClosure(const HeapRegion* hr) :
+    _hr(hr), _failures(false) {}
+
+  void do_code_blob(CodeBlob* cb) {
+    nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null();
+    if (nm != NULL) {
+      // Verify that the nemthod is live
+      if (!nm->is_alive()) {
+        gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has dead nmethod "
+                               PTR_FORMAT" in its strong code roots",
+                               _hr->bottom(), _hr->end(), nm);
+        _failures = true;
+      } else {
+        VerifyStrongCodeRootOopClosure oop_cl(_hr, nm);
+        nm->oops_do(&oop_cl);
+        if (!oop_cl.has_oops_in_region()) {
+          gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has nmethod "
+                                 PTR_FORMAT" in its strong code roots "
+                                 "with no pointers into region",
+                                 _hr->bottom(), _hr->end(), nm);
+          _failures = true;
+        } else if (oop_cl.failures()) {
+          gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] has other "
+                                 "failures for nmethod "PTR_FORMAT,
+                                 _hr->bottom(), _hr->end(), nm);
+          _failures = true;
+        }
+      }
+    }
+  }
+
+  bool failures()       { return _failures; }
+};
+
+void HeapRegion::verify_strong_code_roots(VerifyOption vo, bool* failures) const {
+  if (!G1VerifyHeapRegionCodeRoots) {
+    // We're not verifying code roots.
+    return;
+  }
+  if (vo == VerifyOption_G1UseMarkWord) {
+    // Marking verification during a full GC is performed after class
+    // unloading, code cache unloading, etc so the strong code roots
+    // attached to each heap region are in an inconsistent state. They won't
+    // be consistent until the strong code roots are rebuilt after the
+    // actual GC. Skip verifying the strong code roots in this particular
+    // time.
+    assert(VerifyDuringGC, "only way to get here");
+    return;
+  }
+
+  HeapRegionRemSet* hrrs = rem_set();
+  int strong_code_roots_length = hrrs->strong_code_roots_list_length();
+
+  // if this region is empty then there should be no entries
+  // on its strong code root list
+  if (is_empty()) {
+    if (strong_code_roots_length > 0) {
+      gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is empty "
+                             "but has "INT32_FORMAT" code root entries",
+                             bottom(), end(), strong_code_roots_length);
+      *failures = true;
+    }
+    return;
+  }
+
+  // An H-region should have an empty strong code root list
+  if (isHumongous()) {
+    if (strong_code_roots_length > 0) {
+      gclog_or_tty->print_cr("region ["PTR_FORMAT","PTR_FORMAT"] is humongous "
+                             "but has "INT32_FORMAT" code root entries",
+                             bottom(), end(), strong_code_roots_length);
+      *failures = true;
+    }
+    return;
+  }
+
+  VerifyStrongCodeRootCodeBlobClosure cb_cl(this);
+  strong_code_roots_do(&cb_cl);
+
+  if (cb_cl.failures()) {
+    *failures = true;
+  }
+}
+
 void HeapRegion::print() const { print_on(gclog_or_tty); }
 void HeapRegion::print_on(outputStream* st) const {
   if (isHumongous()) {
@@ -761,10 +777,143 @@
   G1OffsetTableContigSpace::print_on(st);
 }
 
-void HeapRegion::verify() const {
-  bool dummy = false;
-  verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy);
-}
+class VerifyLiveClosure: public OopClosure {
+private:
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _bs;
+  oop _containing_obj;
+  bool _failures;
+  int _n_failures;
+  VerifyOption _vo;
+public:
+  // _vo == UsePrevMarking -> use "prev" marking information,
+  // _vo == UseNextMarking -> use "next" marking information,
+  // _vo == UseMarkWord    -> use mark word from object header.
+  VerifyLiveClosure(G1CollectedHeap* g1h, VerifyOption vo) :
+    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
+    _failures(false), _n_failures(0), _vo(vo)
+  {
+    BarrierSet* bs = _g1h->barrier_set();
+    if (bs->is_a(BarrierSet::CardTableModRef))
+      _bs = (CardTableModRefBS*)bs;
+  }
+
+  void set_containing_obj(oop obj) {
+    _containing_obj = obj;
+  }
+
+  bool failures() { return _failures; }
+  int n_failures() { return _n_failures; }
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+
+  void print_object(outputStream* out, oop obj) {
+#ifdef PRODUCT
+    Klass* k = obj->klass();
+    const char* class_name = InstanceKlass::cast(k)->external_name();
+    out->print_cr("class name %s", class_name);
+#else // PRODUCT
+    obj->print_on(out);
+#endif // PRODUCT
+  }
+
+  template <class T>
+  void do_oop_work(T* p) {
+    assert(_containing_obj != NULL, "Precondition");
+    assert(!_g1h->is_obj_dead_cond(_containing_obj, _vo),
+           "Precondition");
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      bool failed = false;
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead_cond(obj, _vo)) {
+        MutexLockerEx x(ParGCRareEvent_lock,
+                        Mutex::_no_safepoint_check_flag);
+
+        if (!_failures) {
+          gclog_or_tty->print_cr("");
+          gclog_or_tty->print_cr("----------");
+        }
+        if (!_g1h->is_in_closed_subset(obj)) {
+          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                                 " of live obj "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT")",
+                                 p, (void*) _containing_obj,
+                                 from->bottom(), from->end());
+          print_object(gclog_or_tty, _containing_obj);
+          gclog_or_tty->print_cr("points to obj "PTR_FORMAT" not in the heap",
+                                 (void*) obj);
+        } else {
+          HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+          HeapRegion* to   = _g1h->heap_region_containing((HeapWord*)obj);
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                                 " of live obj "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT")",
+                                 p, (void*) _containing_obj,
+                                 from->bottom(), from->end());
+          print_object(gclog_or_tty, _containing_obj);
+          gclog_or_tty->print_cr("points to dead obj "PTR_FORMAT" in region "
+                                 "["PTR_FORMAT", "PTR_FORMAT")",
+                                 (void*) obj, to->bottom(), to->end());
+          print_object(gclog_or_tty, obj);
+        }
+        gclog_or_tty->print_cr("----------");
+        gclog_or_tty->flush();
+        _failures = true;
+        failed = true;
+        _n_failures++;
+      }
+
+      if (!_g1h->full_collection() || G1VerifyRSetsDuringFullGC) {
+        HeapRegion* from = _g1h->heap_region_containing((HeapWord*)p);
+        HeapRegion* to   = _g1h->heap_region_containing(obj);
+        if (from != NULL && to != NULL &&
+            from != to &&
+            !to->isHumongous()) {
+          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
+          jbyte cv_field = *_bs->byte_for_const(p);
+          const jbyte dirty = CardTableModRefBS::dirty_card_val();
+
+          bool is_bad = !(from->is_young()
+                          || to->rem_set()->contains_reference(p)
+                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
+                              (_containing_obj->is_objArray() ?
+                                  cv_field == dirty
+                               : cv_obj == dirty || cv_field == dirty));
+          if (is_bad) {
+            MutexLockerEx x(ParGCRareEvent_lock,
+                            Mutex::_no_safepoint_check_flag);
+
+            if (!_failures) {
+              gclog_or_tty->print_cr("");
+              gclog_or_tty->print_cr("----------");
+            }
+            gclog_or_tty->print_cr("Missing rem set entry:");
+            gclog_or_tty->print_cr("Field "PTR_FORMAT" "
+                                   "of obj "PTR_FORMAT", "
+                                   "in region "HR_FORMAT,
+                                   p, (void*) _containing_obj,
+                                   HR_FORMAT_PARAMS(from));
+            _containing_obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("points to obj "PTR_FORMAT" "
+                                   "in region "HR_FORMAT,
+                                   (void*) obj,
+                                   HR_FORMAT_PARAMS(to));
+            obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
+                          cv_obj, cv_field);
+            gclog_or_tty->print_cr("----------");
+            gclog_or_tty->flush();
+            _failures = true;
+            if (!failed) _n_failures++;
+          }
+        }
+      }
+    }
+  }
+};
 
 // This really ought to be commoned up into OffsetTableContigSpace somehow.
 // We would need a mechanism to make that code skip dead objects.
@@ -904,6 +1053,13 @@
     *failures = true;
     return;
   }
+
+  verify_strong_code_roots(vo, failures);
+}
+
+void HeapRegion::verify() const {
+  bool dummy = false;
+  verify(VerifyOption_G1UsePrevMarking, /* failures */ &dummy);
 }
 
 // G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -52,6 +52,7 @@
 class HeapRegionRemSetIterator;
 class HeapRegion;
 class HeapRegionSetBase;
+class nmethod;
 
 #define HR_FORMAT "%u:(%s)["PTR_FORMAT","PTR_FORMAT","PTR_FORMAT"]"
 #define HR_FORMAT_PARAMS(_hr_) \
@@ -371,7 +372,8 @@
     RebuildRSClaimValue        = 5,
     ParEvacFailureClaimValue   = 6,
     AggregateCountClaimValue   = 7,
-    VerifyCountClaimValue      = 8
+    VerifyCountClaimValue      = 8,
+    ParMarkRootClaimValue      = 9
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@@ -796,6 +798,25 @@
 
   virtual void reset_after_compaction();
 
+  // Routines for managing a list of code roots (attached to the
+  // this region's RSet) that point into this heap region.
+  void add_strong_code_root(nmethod* nm);
+  void remove_strong_code_root(nmethod* nm);
+
+  // During a collection, migrate the successfully evacuated
+  // strong code roots that referenced into this region to the
+  // new regions that they now point into. Unsuccessfully
+  // evacuated code roots are not migrated.
+  void migrate_strong_code_roots();
+
+  // Applies blk->do_code_blob() to each of the entries in
+  // the strong code roots list for this region
+  void strong_code_roots_do(CodeBlobClosure* blk) const;
+
+  // Verify that the entries on the strong code root list for this
+  // region are live and include at least one pointer into this region.
+  void verify_strong_code_roots(VerifyOption vo, bool* failures) const;
+
   void print() const;
   void print_on(outputStream* st) const;
 
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -33,6 +33,7 @@
 #include "oops/oop.inline.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/growableArray.hpp"
 
 class PerRegionTable: public CHeapObj<mtGC> {
   friend class OtherRegionsTable;
@@ -849,7 +850,7 @@
 
 HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
                                    HeapRegion* hr)
-  : _bosa(bosa), _other_regions(hr) {
+  : _bosa(bosa), _strong_code_roots_list(NULL), _other_regions(hr) {
   reset_for_par_iteration();
 }
 
@@ -908,6 +909,12 @@
 }
 
 void HeapRegionRemSet::clear() {
+  if (_strong_code_roots_list != NULL) {
+    delete _strong_code_roots_list;
+  }
+  _strong_code_roots_list = new (ResourceObj::C_HEAP, mtGC)
+                                GrowableArray<nmethod*>(10, 0, NULL, true);
+
   _other_regions.clear();
   assert(occupied() == 0, "Should be clear.");
   reset_for_par_iteration();
@@ -925,6 +932,121 @@
   _other_regions.scrub(ctbs, region_bm, card_bm);
 }
 
+
+// Code roots support
+
+void HeapRegionRemSet::add_strong_code_root(nmethod* nm) {
+  assert(nm != NULL, "sanity");
+  // Search for the code blob from the RHS to avoid
+  // duplicate entries as much as possible
+  if (_strong_code_roots_list->find_from_end(nm) < 0) {
+    // Code blob isn't already in the list
+    _strong_code_roots_list->push(nm);
+  }
+}
+
+void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) {
+  assert(nm != NULL, "sanity");
+  int idx = _strong_code_roots_list->find(nm);
+  if (idx >= 0) {
+    _strong_code_roots_list->remove_at(idx);
+  }
+  // Check that there were no duplicates
+  guarantee(_strong_code_roots_list->find(nm) < 0, "duplicate entry found");
+}
+
+class NMethodMigrationOopClosure : public OopClosure {
+  G1CollectedHeap* _g1h;
+  HeapRegion* _from;
+  nmethod* _nm;
+
+  uint _num_self_forwarded;
+
+  template <class T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      if (_from->is_in(obj)) {
+        // Reference still points into the source region.
+        // Since roots are immediately evacuated this means that
+        // we must have self forwarded the object
+        assert(obj->is_forwarded(),
+               err_msg("code roots should be immediately evacuated. "
+                       "Ref: "PTR_FORMAT", "
+                       "Obj: "PTR_FORMAT", "
+                       "Region: "HR_FORMAT,
+                       p, (void*) obj, HR_FORMAT_PARAMS(_from)));
+        assert(obj->forwardee() == obj,
+               err_msg("not self forwarded? obj = "PTR_FORMAT, (void*)obj));
+
+        // The object has been self forwarded.
+        // Note, if we're during an initial mark pause, there is
+        // no need to explicitly mark object. It will be marked
+        // during the regular evacuation failure handling code.
+        _num_self_forwarded++;
+      } else {
+        // The reference points into a promotion or to-space region
+        HeapRegion* to = _g1h->heap_region_containing(obj);
+        to->rem_set()->add_strong_code_root(_nm);
+      }
+    }
+  }
+
+public:
+  NMethodMigrationOopClosure(G1CollectedHeap* g1h, HeapRegion* from, nmethod* nm):
+    _g1h(g1h), _from(from), _nm(nm), _num_self_forwarded(0) {}
+
+  void do_oop(narrowOop* p) { do_oop_work(p); }
+  void do_oop(oop* p)       { do_oop_work(p); }
+
+  uint retain() { return _num_self_forwarded > 0; }
+};
+
+void HeapRegionRemSet::migrate_strong_code_roots() {
+  assert(hr()->in_collection_set(), "only collection set regions");
+  assert(!hr()->isHumongous(), "not humongous regions");
+
+  ResourceMark rm;
+
+  // List of code blobs to retain for this region
+  GrowableArray<nmethod*> to_be_retained(10);
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  while (_strong_code_roots_list->is_nonempty()) {
+    nmethod *nm = _strong_code_roots_list->pop();
+    if (nm != NULL) {
+      NMethodMigrationOopClosure oop_cl(g1h, hr(), nm);
+      nm->oops_do(&oop_cl);
+      if (oop_cl.retain()) {
+        to_be_retained.push(nm);
+      }
+    }
+  }
+
+  // Now push any code roots we need to retain
+  assert(to_be_retained.is_empty() || hr()->evacuation_failed(),
+         "Retained nmethod list must be empty or "
+         "evacuation of this region failed");
+
+  while (to_be_retained.is_nonempty()) {
+    nmethod* nm = to_be_retained.pop();
+    assert(nm != NULL, "sanity");
+    add_strong_code_root(nm);
+  }
+}
+
+void HeapRegionRemSet::strong_code_roots_do(CodeBlobClosure* blk) const {
+  for (int i = 0; i < _strong_code_roots_list->length(); i += 1) {
+    nmethod* nm = _strong_code_roots_list->at(i);
+    blk->do_code_blob(nm);
+  }
+}
+
+size_t HeapRegionRemSet::strong_code_roots_mem_size() {
+  return sizeof(GrowableArray<nmethod*>) +
+         _strong_code_roots_list->max_length() * sizeof(nmethod*);
+}
+
 //-------------------- Iteration --------------------
 
 HeapRegionRemSetIterator:: HeapRegionRemSetIterator(const HeapRegionRemSet* hrrs) :
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -37,6 +37,7 @@
 class HeapRegionRemSetIterator;
 class PerRegionTable;
 class SparsePRT;
+class nmethod;
 
 // Essentially a wrapper around SparsePRTCleanupTask. See
 // sparsePRT.hpp for more details.
@@ -191,6 +192,10 @@
   G1BlockOffsetSharedArray* _bosa;
   G1BlockOffsetSharedArray* bosa() const { return _bosa; }
 
+  // A list of code blobs (nmethods) whose code contains pointers into
+  // the region that owns this RSet.
+  GrowableArray<nmethod*>* _strong_code_roots_list;
+
   OtherRegionsTable _other_regions;
 
   enum ParIterState { Unclaimed, Claimed, Complete };
@@ -282,11 +287,13 @@
   }
 
   // The actual # of bytes this hr_remset takes up.
+  // Note also includes the strong code root set.
   size_t mem_size() {
     return _other_regions.mem_size()
       // This correction is necessary because the above includes the second
       // part.
-      + sizeof(this) - sizeof(OtherRegionsTable);
+      + (sizeof(this) - sizeof(OtherRegionsTable))
+      + strong_code_roots_mem_size();
   }
 
   // Returns the memory occupancy of all static data structures associated
@@ -304,6 +311,37 @@
   bool contains_reference(OopOrNarrowOopStar from) const {
     return _other_regions.contains_reference(from);
   }
+
+  // Routines for managing the list of code roots that point into
+  // the heap region that owns this RSet.
+  void add_strong_code_root(nmethod* nm);
+  void remove_strong_code_root(nmethod* nm);
+
+  // During a collection, migrate the successfully evacuated strong
+  // code roots that referenced into the region that owns this RSet
+  // to the RSets of the new regions that they now point into.
+  // Unsuccessfully evacuated code roots are not migrated.
+  void migrate_strong_code_roots();
+
+  // Applies blk->do_code_blob() to each of the entries in
+  // the strong code roots list
+  void strong_code_roots_do(CodeBlobClosure* blk) const;
+
+  // Returns the number of elements in the strong code roots list
+  int strong_code_roots_list_length() {
+    return _strong_code_roots_list->length();
+  }
+
+  // Returns true if the strong code roots contains the given
+  // nmethod.
+  bool strong_code_roots_list_contains(nmethod* nm) {
+    return _strong_code_roots_list->contains(nm);
+  }
+
+  // Returns the amount of memory, in bytes, currently
+  // consumed by the strong code roots.
+  size_t strong_code_roots_mem_size();
+
   void print() const;
 
   // Called during a stop-world phase to perform any deferred cleanups.
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -216,6 +216,7 @@
   young_gen()->update_counters();
   old_gen()->update_counters();
   MetaspaceCounters::update_performance_counters();
+  CompressedClassSpaceCounters::update_performance_counters();
 }
 
 size_t ParallelScavengeHeap::capacity() const {
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -118,6 +118,14 @@
   }
 }
 
+void CollectedHeap::register_nmethod(nmethod* nm) {
+  assert_locked_or_safepoint(CodeCache_lock);
+}
+
+void CollectedHeap::unregister_nmethod(nmethod* nm) {
+  assert_locked_or_safepoint(CodeCache_lock);
+}
+
 void CollectedHeap::trace_heap(GCWhen::Type when, GCTracer* gc_tracer) {
   const GCHeapSummary& heap_summary = create_heap_summary();
   const MetaspaceSummary& metaspace_summary = create_metaspace_summary();
--- a/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/gc_interface/collectedHeap.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -49,6 +49,7 @@
 class Thread;
 class ThreadClosure;
 class VirtualSpaceSummary;
+class nmethod;
 
 class GCMessage : public FormatBuffer<1024> {
  public:
@@ -603,6 +604,11 @@
   void print_heap_before_gc();
   void print_heap_after_gc();
 
+  // Registering and unregistering an nmethod (compiled code) with the heap.
+  // Override with specific mechanism for each specialized heap type.
+  virtual void register_nmethod(nmethod* nm);
+  virtual void unregister_nmethod(nmethod* nm);
+
   void trace_heap_before_gc(GCTracer* gc_tracer);
   void trace_heap_after_gc(GCTracer* gc_tracer);
 
--- a/hotspot/src/share/vm/memory/filemap.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/filemap.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -362,15 +362,12 @@
 ReservedSpace FileMapInfo::reserve_shared_memory() {
   struct FileMapInfo::FileMapHeader::space_info* si = &_header._space[0];
   char* requested_addr = si->_base;
-  size_t alignment = os::vm_allocation_granularity();
 
-  size_t size = align_size_up(SharedReadOnlySize + SharedReadWriteSize +
-                              SharedMiscDataSize + SharedMiscCodeSize,
-                              alignment);
+  size_t size = FileMapInfo::shared_spaces_size();
 
   // Reserve the space first, then map otherwise map will go right over some
   // other reserved memory (like the code cache).
-  ReservedSpace rs(size, alignment, false, requested_addr);
+  ReservedSpace rs(size, os::vm_allocation_granularity(), false, requested_addr);
   if (!rs.is_reserved()) {
     fail_continue(err_msg("Unable to reserve shared space at required address " INTPTR_FORMAT, requested_addr));
     return rs;
@@ -559,3 +556,19 @@
                         si->_base, si->_base + si->_used);
   }
 }
+
+// Unmap mapped regions of shared space.
+void FileMapInfo::stop_sharing_and_unmap(const char* msg) {
+  FileMapInfo *map_info = FileMapInfo::current_info();
+  if (map_info) {
+    map_info->fail_continue(msg);
+    for (int i = 0; i < MetaspaceShared::n_regions; i++) {
+      if (map_info->_header._space[i]._base != NULL) {
+        map_info->unmap_region(i);
+        map_info->_header._space[i]._base = NULL;
+      }
+    }
+  } else if (DumpSharedSpaces) {
+    fail_stop(msg, NULL);
+  }
+}
--- a/hotspot/src/share/vm/memory/filemap.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/filemap.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -150,6 +150,15 @@
   // Return true if given address is in the mapped shared space.
   bool is_in_shared_space(const void* p) NOT_CDS_RETURN_(false);
   void print_shared_spaces() NOT_CDS_RETURN;
+
+  static size_t shared_spaces_size() {
+    return align_size_up(SharedReadOnlySize + SharedReadWriteSize +
+                         SharedMiscDataSize + SharedMiscCodeSize,
+                         os::vm_allocation_granularity());
+  }
+
+  // Stop CDS sharing and unmap CDS regions.
+  static void stop_sharing_and_unmap(const char* msg);
 };
 
 #endif // SHARE_VM_MEMORY_FILEMAP_HPP
--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1211,6 +1211,7 @@
   }
 
   MetaspaceCounters::update_performance_counters();
+  CompressedClassSpaceCounters::update_performance_counters();
 
   always_do_update_barrier = UseConcMarkSweepGC;
 };
--- a/hotspot/src/share/vm/memory/heap.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/heap.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -118,9 +118,12 @@
   _number_of_committed_segments = size_to_segments(_memory.committed_size());
   _number_of_reserved_segments  = size_to_segments(_memory.reserved_size());
   assert(_number_of_reserved_segments >= _number_of_committed_segments, "just checking");
+  const size_t reserved_segments_alignment = MAX2((size_t)os::vm_page_size(), granularity);
+  const size_t reserved_segments_size = align_size_up(_number_of_reserved_segments, reserved_segments_alignment);
+  const size_t committed_segments_size = align_to_page_size(_number_of_committed_segments);
 
   // reserve space for _segmap
-  if (!_segmap.initialize(align_to_page_size(_number_of_reserved_segments), align_to_page_size(_number_of_committed_segments))) {
+  if (!_segmap.initialize(reserved_segments_size, committed_segments_size)) {
     return false;
   }
 
--- a/hotspot/src/share/vm/memory/iterator.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/iterator.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -64,7 +64,7 @@
 }
 
 void CodeBlobToOopClosure::do_newly_marked_nmethod(nmethod* nm) {
-  nm->oops_do(_cl, /*do_strong_roots_only=*/ true);
+  nm->oops_do(_cl, /*allow_zombie=*/ false);
 }
 
 void CodeBlobToOopClosure::do_code_blob(CodeBlob* cb) {
--- a/hotspot/src/share/vm/memory/metaspace.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/metaspace.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -35,6 +35,7 @@
 #include "memory/resourceArea.hpp"
 #include "memory/universe.hpp"
 #include "runtime/globals.hpp"
+#include "runtime/java.hpp"
 #include "runtime/mutex.hpp"
 #include "runtime/orderAccess.hpp"
 #include "services/memTracker.hpp"
@@ -54,6 +55,8 @@
 
 MetaWord* last_allocated = 0;
 
+size_t Metaspace::_class_metaspace_size;
+
 // Used in declarations in SpaceManager and ChunkManager
 enum ChunkIndex {
   ZeroIndex = 0,
@@ -261,10 +264,6 @@
   // count of chunks contained in this VirtualSpace
   uintx _container_count;
 
-  // Convenience functions for logical bottom and end
-  MetaWord* bottom() const { return (MetaWord*) _virtual_space.low(); }
-  MetaWord* end() const { return (MetaWord*) _virtual_space.high(); }
-
   // Convenience functions to access the _virtual_space
   char* low()  const { return virtual_space()->low(); }
   char* high() const { return virtual_space()->high(); }
@@ -284,6 +283,10 @@
   VirtualSpaceNode(ReservedSpace rs) : _top(NULL), _next(NULL), _rs(rs), _container_count(0) {}
   ~VirtualSpaceNode();
 
+  // Convenience functions for logical bottom and end
+  MetaWord* bottom() const { return (MetaWord*) _virtual_space.low(); }
+  MetaWord* end() const { return (MetaWord*) _virtual_space.high(); }
+
   // address of next available space in _virtual_space;
   // Accessors
   VirtualSpaceNode* next() { return _next; }
@@ -1313,7 +1316,8 @@
 
   // Class virtual space should always be expanded.  Call GC for the other
   // metadata virtual space.
-  if (vsl == Metaspace::class_space_list()) return true;
+  if (Metaspace::using_class_space() &&
+      (vsl == Metaspace::class_space_list())) return true;
 
   // If this is part of an allocation after a GC, expand
   // unconditionally.
@@ -2257,7 +2261,7 @@
   size_t raw_word_size = get_raw_word_size(word_size);
   size_t min_size = TreeChunk<Metablock, FreeList>::min_size();
   assert(raw_word_size >= min_size,
-    err_msg("Should not deallocate dark matter " SIZE_FORMAT, word_size));
+         err_msg("Should not deallocate dark matter " SIZE_FORMAT "<" SIZE_FORMAT, word_size, min_size));
   block_freelists()->return_block(p, raw_word_size);
 }
 
@@ -2374,7 +2378,7 @@
   if (result == NULL) {
     result = grow_and_allocate(word_size);
   }
-  if (result > 0) {
+  if (result != 0) {
     inc_used_metrics(word_size);
     assert(result != (MetaWord*) chunks_in_use(MediumIndex),
            "Head of the list is being allocated");
@@ -2476,15 +2480,13 @@
 size_t MetaspaceAux::_allocated_capacity_words[] = {0, 0};
 size_t MetaspaceAux::_allocated_used_words[] = {0, 0};
 
+size_t MetaspaceAux::free_bytes(Metaspace::MetadataType mdtype) {
+  VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
+  return list == NULL ? 0 : list->free_bytes();
+}
+
 size_t MetaspaceAux::free_bytes() {
-  size_t result = 0;
-  if (Metaspace::class_space_list() != NULL) {
-    result = result + Metaspace::class_space_list()->free_bytes();
-  }
-  if (Metaspace::space_list() != NULL) {
-    result = result + Metaspace::space_list()->free_bytes();
-  }
-  return result;
+  return free_bytes(Metaspace::ClassType) + free_bytes(Metaspace::NonClassType);
 }
 
 void MetaspaceAux::dec_capacity(Metaspace::MetadataType mdtype, size_t words) {
@@ -2549,6 +2551,9 @@
 }
 
 size_t MetaspaceAux::capacity_bytes_slow(Metaspace::MetadataType mdtype) {
+  if ((mdtype == Metaspace::ClassType) && !Metaspace::using_class_space()) {
+    return 0;
+  }
   // Don't count the space in the freelists.  That space will be
   // added to the capacity calculation as needed.
   size_t capacity = 0;
@@ -2563,18 +2568,18 @@
 }
 
 size_t MetaspaceAux::reserved_in_bytes(Metaspace::MetadataType mdtype) {
-  size_t reserved = (mdtype == Metaspace::ClassType) ?
-                       Metaspace::class_space_list()->virtual_space_total() :
-                       Metaspace::space_list()->virtual_space_total();
-  return reserved * BytesPerWord;
+  VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
+  return list == NULL ? 0 : list->virtual_space_total();
 }
 
 size_t MetaspaceAux::min_chunk_size() { return Metaspace::first_chunk_word_size(); }
 
 size_t MetaspaceAux::free_chunks_total(Metaspace::MetadataType mdtype) {
-  ChunkManager* chunk = (mdtype == Metaspace::ClassType) ?
-                            Metaspace::class_space_list()->chunk_manager() :
-                            Metaspace::space_list()->chunk_manager();
+  VirtualSpaceList* list = Metaspace::get_space_list(mdtype);
+  if (list == NULL) {
+    return 0;
+  }
+  ChunkManager* chunk = list->chunk_manager();
   chunk->slow_verify();
   return chunk->free_chunks_total();
 }
@@ -2615,7 +2620,6 @@
 
 // This is printed when PrintGCDetails
 void MetaspaceAux::print_on(outputStream* out) {
-  Metaspace::MetadataType ct = Metaspace::ClassType;
   Metaspace::MetadataType nct = Metaspace::NonClassType;
 
   out->print_cr(" Metaspace total "
@@ -2629,12 +2633,15 @@
                 allocated_capacity_bytes(nct)/K,
                 allocated_used_bytes(nct)/K,
                 reserved_in_bytes(nct)/K);
-  out->print_cr("  class space    "
-                SIZE_FORMAT "K, used " SIZE_FORMAT "K,"
-                " reserved " SIZE_FORMAT "K",
-                allocated_capacity_bytes(ct)/K,
-                allocated_used_bytes(ct)/K,
-                reserved_in_bytes(ct)/K);
+  if (Metaspace::using_class_space()) {
+    Metaspace::MetadataType ct = Metaspace::ClassType;
+    out->print_cr("  class space    "
+                  SIZE_FORMAT "K, used " SIZE_FORMAT "K,"
+                  " reserved " SIZE_FORMAT "K",
+                  allocated_capacity_bytes(ct)/K,
+                  allocated_used_bytes(ct)/K,
+                  reserved_in_bytes(ct)/K);
+  }
 }
 
 // Print information for class space and data space separately.
@@ -2659,13 +2666,37 @@
   assert(!SafepointSynchronize::is_at_safepoint() || used_and_free == capacity_bytes, "Accounting is wrong");
 }
 
-// Print total fragmentation for class and data metaspaces separately
+// Print total fragmentation for class metaspaces
+void MetaspaceAux::print_class_waste(outputStream* out) {
+  assert(Metaspace::using_class_space(), "class metaspace not used");
+  size_t cls_specialized_waste = 0, cls_small_waste = 0, cls_medium_waste = 0;
+  size_t cls_specialized_count = 0, cls_small_count = 0, cls_medium_count = 0, cls_humongous_count = 0;
+  ClassLoaderDataGraphMetaspaceIterator iter;
+  while (iter.repeat()) {
+    Metaspace* msp = iter.get_next();
+    if (msp != NULL) {
+      cls_specialized_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SpecializedIndex);
+      cls_specialized_count += msp->class_vsm()->sum_count_in_chunks_in_use(SpecializedIndex);
+      cls_small_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SmallIndex);
+      cls_small_count += msp->class_vsm()->sum_count_in_chunks_in_use(SmallIndex);
+      cls_medium_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(MediumIndex);
+      cls_medium_count += msp->class_vsm()->sum_count_in_chunks_in_use(MediumIndex);
+      cls_humongous_count += msp->class_vsm()->sum_count_in_chunks_in_use(HumongousIndex);
+    }
+  }
+  out->print_cr(" class: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", "
+                SIZE_FORMAT " small(s) " SIZE_FORMAT ", "
+                SIZE_FORMAT " medium(s) " SIZE_FORMAT ", "
+                "large count " SIZE_FORMAT,
+                cls_specialized_count, cls_specialized_waste,
+                cls_small_count, cls_small_waste,
+                cls_medium_count, cls_medium_waste, cls_humongous_count);
+}
+
+// Print total fragmentation for data and class metaspaces separately
 void MetaspaceAux::print_waste(outputStream* out) {
-
   size_t specialized_waste = 0, small_waste = 0, medium_waste = 0;
   size_t specialized_count = 0, small_count = 0, medium_count = 0, humongous_count = 0;
-  size_t cls_specialized_waste = 0, cls_small_waste = 0, cls_medium_waste = 0;
-  size_t cls_specialized_count = 0, cls_small_count = 0, cls_medium_count = 0, cls_humongous_count = 0;
 
   ClassLoaderDataGraphMetaspaceIterator iter;
   while (iter.repeat()) {
@@ -2678,14 +2709,6 @@
       medium_waste += msp->vsm()->sum_waste_in_chunks_in_use(MediumIndex);
       medium_count += msp->vsm()->sum_count_in_chunks_in_use(MediumIndex);
       humongous_count += msp->vsm()->sum_count_in_chunks_in_use(HumongousIndex);
-
-      cls_specialized_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SpecializedIndex);
-      cls_specialized_count += msp->class_vsm()->sum_count_in_chunks_in_use(SpecializedIndex);
-      cls_small_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(SmallIndex);
-      cls_small_count += msp->class_vsm()->sum_count_in_chunks_in_use(SmallIndex);
-      cls_medium_waste += msp->class_vsm()->sum_waste_in_chunks_in_use(MediumIndex);
-      cls_medium_count += msp->class_vsm()->sum_count_in_chunks_in_use(MediumIndex);
-      cls_humongous_count += msp->class_vsm()->sum_count_in_chunks_in_use(HumongousIndex);
     }
   }
   out->print_cr("Total fragmentation waste (words) doesn't count free space");
@@ -2695,13 +2718,9 @@
                         "large count " SIZE_FORMAT,
              specialized_count, specialized_waste, small_count,
              small_waste, medium_count, medium_waste, humongous_count);
-  out->print_cr(" class: " SIZE_FORMAT " specialized(s) " SIZE_FORMAT ", "
-                           SIZE_FORMAT " small(s) " SIZE_FORMAT ", "
-                           SIZE_FORMAT " medium(s) " SIZE_FORMAT ", "
-                           "large count " SIZE_FORMAT,
-             cls_specialized_count, cls_specialized_waste,
-             cls_small_count, cls_small_waste,
-             cls_medium_count, cls_medium_waste, cls_humongous_count);
+  if (Metaspace::using_class_space()) {
+    print_class_waste(out);
+  }
 }
 
 // Dump global metaspace things from the end of ClassLoaderDataGraph
@@ -2714,7 +2733,9 @@
 
 void MetaspaceAux::verify_free_chunks() {
   Metaspace::space_list()->chunk_manager()->verify();
-  Metaspace::class_space_list()->chunk_manager()->verify();
+  if (Metaspace::using_class_space()) {
+    Metaspace::class_space_list()->chunk_manager()->verify();
+  }
 }
 
 void MetaspaceAux::verify_capacity() {
@@ -2776,7 +2797,9 @@
 
 Metaspace::~Metaspace() {
   delete _vsm;
-  delete _class_vsm;
+  if (using_class_space()) {
+    delete _class_vsm;
+  }
 }
 
 VirtualSpaceList* Metaspace::_space_list = NULL;
@@ -2784,9 +2807,123 @@
 
 #define VIRTUALSPACEMULTIPLIER 2
 
+#ifdef _LP64
+void Metaspace::set_narrow_klass_base_and_shift(address metaspace_base, address cds_base) {
+  // Figure out the narrow_klass_base and the narrow_klass_shift.  The
+  // narrow_klass_base is the lower of the metaspace base and the cds base
+  // (if cds is enabled).  The narrow_klass_shift depends on the distance
+  // between the lower base and higher address.
+  address lower_base;
+  address higher_address;
+  if (UseSharedSpaces) {
+    higher_address = MAX2((address)(cds_base + FileMapInfo::shared_spaces_size()),
+                          (address)(metaspace_base + class_metaspace_size()));
+    lower_base = MIN2(metaspace_base, cds_base);
+  } else {
+    higher_address = metaspace_base + class_metaspace_size();
+    lower_base = metaspace_base;
+  }
+  Universe::set_narrow_klass_base(lower_base);
+  if ((uint64_t)(higher_address - lower_base) < (uint64_t)max_juint) {
+    Universe::set_narrow_klass_shift(0);
+  } else {
+    assert(!UseSharedSpaces, "Cannot shift with UseSharedSpaces");
+    Universe::set_narrow_klass_shift(LogKlassAlignmentInBytes);
+  }
+}
+
+// Return TRUE if the specified metaspace_base and cds_base are close enough
+// to work with compressed klass pointers.
+bool Metaspace::can_use_cds_with_metaspace_addr(char* metaspace_base, address cds_base) {
+  assert(cds_base != 0 && UseSharedSpaces, "Only use with CDS");
+  assert(UseCompressedKlassPointers, "Only use with CompressedKlassPtrs");
+  address lower_base = MIN2((address)metaspace_base, cds_base);
+  address higher_address = MAX2((address)(cds_base + FileMapInfo::shared_spaces_size()),
+                                (address)(metaspace_base + class_metaspace_size()));
+  return ((uint64_t)(higher_address - lower_base) < (uint64_t)max_juint);
+}
+
+// Try to allocate the metaspace at the requested addr.
+void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base) {
+  assert(using_class_space(), "called improperly");
+  assert(UseCompressedKlassPointers, "Only use with CompressedKlassPtrs");
+  assert(class_metaspace_size() < KlassEncodingMetaspaceMax,
+         "Metaspace size is too big");
+
+  ReservedSpace metaspace_rs = ReservedSpace(class_metaspace_size(),
+                                             os::vm_allocation_granularity(),
+                                             false, requested_addr, 0);
+  if (!metaspace_rs.is_reserved()) {
+    if (UseSharedSpaces) {
+      // Keep trying to allocate the metaspace, increasing the requested_addr
+      // by 1GB each time, until we reach an address that will no longer allow
+      // use of CDS with compressed klass pointers.
+      char *addr = requested_addr;
+      while (!metaspace_rs.is_reserved() && (addr + 1*G > addr) &&
+             can_use_cds_with_metaspace_addr(addr + 1*G, cds_base)) {
+        addr = addr + 1*G;
+        metaspace_rs = ReservedSpace(class_metaspace_size(),
+                                     os::vm_allocation_granularity(), false, addr, 0);
+      }
+    }
+
+    // If no successful allocation then try to allocate the space anywhere.  If
+    // that fails then OOM doom.  At this point we cannot try allocating the
+    // metaspace as if UseCompressedKlassPointers is off because too much
+    // initialization has happened that depends on UseCompressedKlassPointers.
+    // So, UseCompressedKlassPointers cannot be turned off at this point.
+    if (!metaspace_rs.is_reserved()) {
+      metaspace_rs = ReservedSpace(class_metaspace_size(),
+                                   os::vm_allocation_granularity(), false);
+      if (!metaspace_rs.is_reserved()) {
+        vm_exit_during_initialization(err_msg("Could not allocate metaspace: %d bytes",
+                                              class_metaspace_size()));
+      }
+    }
+  }
+
+  // If we got here then the metaspace got allocated.
+  MemTracker::record_virtual_memory_type((address)metaspace_rs.base(), mtClass);
+
+  // Verify that we can use shared spaces.  Otherwise, turn off CDS.
+  if (UseSharedSpaces && !can_use_cds_with_metaspace_addr(metaspace_rs.base(), cds_base)) {
+    FileMapInfo::stop_sharing_and_unmap(
+        "Could not allocate metaspace at a compatible address");
+  }
+
+  set_narrow_klass_base_and_shift((address)metaspace_rs.base(),
+                                  UseSharedSpaces ? (address)cds_base : 0);
+
+  initialize_class_space(metaspace_rs);
+
+  if (PrintCompressedOopsMode || (PrintMiscellaneous && Verbose)) {
+    gclog_or_tty->print_cr("Narrow klass base: " PTR_FORMAT ", Narrow klass shift: " SIZE_FORMAT,
+                            Universe::narrow_klass_base(), Universe::narrow_klass_shift());
+    gclog_or_tty->print_cr("Metaspace Size: " SIZE_FORMAT " Address: " PTR_FORMAT " Req Addr: " PTR_FORMAT,
+                           class_metaspace_size(), metaspace_rs.base(), requested_addr);
+  }
+}
+
+// For UseCompressedKlassPointers the class space is reserved above the top of
+// the Java heap.  The argument passed in is at the base of the compressed space.
+void Metaspace::initialize_class_space(ReservedSpace rs) {
+  // The reserved space size may be bigger because of alignment, esp with UseLargePages
+  assert(rs.size() >= ClassMetaspaceSize,
+         err_msg(SIZE_FORMAT " != " UINTX_FORMAT, rs.size(), ClassMetaspaceSize));
+  assert(using_class_space(), "Must be using class space");
+  _class_space_list = new VirtualSpaceList(rs);
+}
+
+#endif
+
 void Metaspace::global_initialize() {
   // Initialize the alignment for shared spaces.
   int max_alignment = os::vm_page_size();
+  size_t cds_total = 0;
+
+  set_class_metaspace_size(align_size_up(ClassMetaspaceSize,
+                                         os::vm_allocation_granularity()));
+
   MetaspaceShared::set_max_alignment(max_alignment);
 
   if (DumpSharedSpaces) {
@@ -2798,15 +2935,31 @@
     // Initialize with the sum of the shared space sizes.  The read-only
     // and read write metaspace chunks will be allocated out of this and the
     // remainder is the misc code and data chunks.
-    size_t total = align_size_up(SharedReadOnlySize + SharedReadWriteSize +
-                                 SharedMiscDataSize + SharedMiscCodeSize,
-                                 os::vm_allocation_granularity());
-    size_t word_size = total/wordSize;
-    _space_list = new VirtualSpaceList(word_size);
+    cds_total = FileMapInfo::shared_spaces_size();
+    _space_list = new VirtualSpaceList(cds_total/wordSize);
+
+#ifdef _LP64
+    // Set the compressed klass pointer base so that decoding of these pointers works
+    // properly when creating the shared archive.
+    assert(UseCompressedOops && UseCompressedKlassPointers,
+      "UseCompressedOops and UseCompressedKlassPointers must be set");
+    Universe::set_narrow_klass_base((address)_space_list->current_virtual_space()->bottom());
+    if (TraceMetavirtualspaceAllocation && Verbose) {
+      gclog_or_tty->print_cr("Setting_narrow_klass_base to Address: " PTR_FORMAT,
+                             _space_list->current_virtual_space()->bottom());
+    }
+
+    // Set the shift to zero.
+    assert(class_metaspace_size() < (uint64_t)(max_juint) - cds_total,
+           "CDS region is too large");
+    Universe::set_narrow_klass_shift(0);
+#endif
+
   } else {
     // If using shared space, open the file that contains the shared space
     // and map in the memory before initializing the rest of metaspace (so
     // the addresses don't conflict)
+    address cds_address = NULL;
     if (UseSharedSpaces) {
       FileMapInfo* mapinfo = new FileMapInfo();
       memset(mapinfo, 0, sizeof(FileMapInfo));
@@ -2821,8 +2974,22 @@
         assert(!mapinfo->is_open() && !UseSharedSpaces,
                "archive file not closed or shared spaces not disabled.");
       }
+      cds_total = FileMapInfo::shared_spaces_size();
+      cds_address = (address)mapinfo->region_base(0);
     }
 
+#ifdef _LP64
+    // If UseCompressedKlassPointers is set then allocate the metaspace area
+    // above the heap and above the CDS area (if it exists).
+    if (using_class_space()) {
+      if (UseSharedSpaces) {
+        allocate_metaspace_compressed_klass_ptrs((char *)(cds_address + cds_total), cds_address);
+      } else {
+        allocate_metaspace_compressed_klass_ptrs((char *)CompressedKlassPointersBase, 0);
+      }
+    }
+#endif
+
     // Initialize these before initializing the VirtualSpaceList
     _first_chunk_word_size = InitialBootClassLoaderMetaspaceSize / BytesPerWord;
     _first_chunk_word_size = align_word_size_up(_first_chunk_word_size);
@@ -2840,39 +3007,28 @@
   }
 }
 
-// For UseCompressedKlassPointers the class space is reserved as a piece of the
-// Java heap because the compression algorithm is the same for each.  The
-// argument passed in is at the top of the compressed space
-void Metaspace::initialize_class_space(ReservedSpace rs) {
-  // The reserved space size may be bigger because of alignment, esp with UseLargePages
-  assert(rs.size() >= ClassMetaspaceSize,
-         err_msg(SIZE_FORMAT " != " UINTX_FORMAT, rs.size(), ClassMetaspaceSize));
-  _class_space_list = new VirtualSpaceList(rs);
-}
-
-void Metaspace::initialize(Mutex* lock,
-                           MetaspaceType type) {
+void Metaspace::initialize(Mutex* lock, MetaspaceType type) {
 
   assert(space_list() != NULL,
     "Metadata VirtualSpaceList has not been initialized");
 
-  _vsm = new SpaceManager(Metaspace::NonClassType, lock, space_list());
+  _vsm = new SpaceManager(NonClassType, lock, space_list());
   if (_vsm == NULL) {
     return;
   }
   size_t word_size;
   size_t class_word_size;
-  vsm()->get_initial_chunk_sizes(type,
-                                 &word_size,
-                                 &class_word_size);
-
-  assert(class_space_list() != NULL,
-    "Class VirtualSpaceList has not been initialized");
-
-  // Allocate SpaceManager for classes.
-  _class_vsm = new SpaceManager(Metaspace::ClassType, lock, class_space_list());
-  if (_class_vsm == NULL) {
-    return;
+  vsm()->get_initial_chunk_sizes(type, &word_size, &class_word_size);
+
+  if (using_class_space()) {
+    assert(class_space_list() != NULL,
+      "Class VirtualSpaceList has not been initialized");
+
+    // Allocate SpaceManager for classes.
+    _class_vsm = new SpaceManager(ClassType, lock, class_space_list());
+    if (_class_vsm == NULL) {
+      return;
+    }
   }
 
   MutexLockerEx cl(SpaceManager::expand_lock(), Mutex::_no_safepoint_check_flag);
@@ -2888,11 +3044,13 @@
   }
 
   // Allocate chunk for class metadata objects
-  Metachunk* class_chunk =
-     class_space_list()->get_initialization_chunk(class_word_size,
-                                                  class_vsm()->medium_chunk_bunch());
-  if (class_chunk != NULL) {
-    class_vsm()->add_chunk(class_chunk, true);
+  if (using_class_space()) {
+    Metachunk* class_chunk =
+       class_space_list()->get_initialization_chunk(class_word_size,
+                                                    class_vsm()->medium_chunk_bunch());
+    if (class_chunk != NULL) {
+      class_vsm()->add_chunk(class_chunk, true);
+    }
   }
 
   _alloc_record_head = NULL;
@@ -2906,7 +3064,8 @@
 
 MetaWord* Metaspace::allocate(size_t word_size, MetadataType mdtype) {
   // DumpSharedSpaces doesn't use class metadata area (yet)
-  if (mdtype == ClassType && !DumpSharedSpaces) {
+  // Also, don't use class_vsm() unless UseCompressedKlassPointers is true.
+  if (mdtype == ClassType && using_class_space()) {
     return  class_vsm()->allocate(word_size);
   } else {
     return  vsm()->allocate(word_size);
@@ -2937,14 +3096,19 @@
 }
 
 size_t Metaspace::used_words_slow(MetadataType mdtype) const {
-  // return vsm()->allocated_used_words();
-  return mdtype == ClassType ? class_vsm()->sum_used_in_chunks_in_use() :
-                               vsm()->sum_used_in_chunks_in_use();  // includes overhead!
+  if (mdtype == ClassType) {
+    return using_class_space() ? class_vsm()->sum_used_in_chunks_in_use() : 0;
+  } else {
+    return vsm()->sum_used_in_chunks_in_use();  // includes overhead!
+  }
 }
 
 size_t Metaspace::free_words(MetadataType mdtype) const {
-  return mdtype == ClassType ? class_vsm()->sum_free_in_chunks_in_use() :
-                               vsm()->sum_free_in_chunks_in_use();
+  if (mdtype == ClassType) {
+    return using_class_space() ? class_vsm()->sum_free_in_chunks_in_use() : 0;
+  } else {
+    return vsm()->sum_free_in_chunks_in_use();
+  }
 }
 
 // Space capacity in the Metaspace.  It includes
@@ -2953,8 +3117,11 @@
 // in the space available in the dictionary which
 // is already counted in some chunk.
 size_t Metaspace::capacity_words_slow(MetadataType mdtype) const {
-  return mdtype == ClassType ? class_vsm()->sum_capacity_in_chunks_in_use() :
-                               vsm()->sum_capacity_in_chunks_in_use();
+  if (mdtype == ClassType) {
+    return using_class_space() ? class_vsm()->sum_capacity_in_chunks_in_use() : 0;
+  } else {
+    return vsm()->sum_capacity_in_chunks_in_use();
+  }
 }
 
 size_t Metaspace::used_bytes_slow(MetadataType mdtype) const {
@@ -2977,8 +3144,8 @@
 #endif
       return;
     }
-    if (is_class) {
-       class_vsm()->deallocate(ptr, word_size);
+    if (is_class && using_class_space()) {
+      class_vsm()->deallocate(ptr, word_size);
     } else {
       vsm()->deallocate(ptr, word_size);
     }
@@ -2992,7 +3159,7 @@
 #endif
       return;
     }
-    if (is_class) {
+    if (is_class && using_class_space()) {
       class_vsm()->deallocate(ptr, word_size);
     } else {
       vsm()->deallocate(ptr, word_size);
@@ -3101,14 +3268,18 @@
   MutexLockerEx cl(SpaceManager::expand_lock(),
                    Mutex::_no_safepoint_check_flag);
   space_list()->purge();
-  class_space_list()->purge();
+  if (using_class_space()) {
+    class_space_list()->purge();
+  }
 }
 
 void Metaspace::print_on(outputStream* out) const {
   // Print both class virtual space counts and metaspace.
   if (Verbose) {
-      vsm()->print_on(out);
+    vsm()->print_on(out);
+    if (using_class_space()) {
       class_vsm()->print_on(out);
+    }
   }
 }
 
@@ -3122,17 +3293,21 @@
   // be needed.  Note, locking this can cause inversion problems with the
   // caller in MetaspaceObj::is_metadata() function.
   return space_list()->contains(ptr) ||
-         class_space_list()->contains(ptr);
+         (using_class_space() && class_space_list()->contains(ptr));
 }
 
 void Metaspace::verify() {
   vsm()->verify();
-  class_vsm()->verify();
+  if (using_class_space()) {
+    class_vsm()->verify();
+  }
 }
 
 void Metaspace::dump(outputStream* const out) const {
   out->print_cr("\nVirtual space manager: " INTPTR_FORMAT, vsm());
   vsm()->dump(out);
-  out->print_cr("\nClass space manager: " INTPTR_FORMAT, class_vsm());
-  class_vsm()->dump(out);
+  if (using_class_space()) {
+    out->print_cr("\nClass space manager: " INTPTR_FORMAT, class_vsm());
+    class_vsm()->dump(out);
+  }
 }
--- a/hotspot/src/share/vm/memory/metaspace.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/metaspace.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -105,6 +105,16 @@
   // Align up the word size to the allocation word size
   static size_t align_word_size_up(size_t);
 
+  // Aligned size of the metaspace.
+  static size_t _class_metaspace_size;
+
+  static size_t class_metaspace_size() {
+    return _class_metaspace_size;
+  }
+  static void set_class_metaspace_size(size_t metaspace_size) {
+    _class_metaspace_size = metaspace_size;
+  }
+
   static size_t _first_chunk_word_size;
   static size_t _first_class_chunk_word_size;
 
@@ -126,11 +136,26 @@
 
   static VirtualSpaceList* space_list()       { return _space_list; }
   static VirtualSpaceList* class_space_list() { return _class_space_list; }
+  static VirtualSpaceList* get_space_list(MetadataType mdtype) {
+    assert(mdtype != MetadataTypeCount, "MetadaTypeCount can't be used as mdtype");
+    return mdtype == ClassType ? class_space_list() : space_list();
+  }
 
   // This is used by DumpSharedSpaces only, where only _vsm is used. So we will
   // maintain a single list for now.
   void record_allocation(void* ptr, MetaspaceObj::Type type, size_t word_size);
 
+#ifdef _LP64
+  static void set_narrow_klass_base_and_shift(address metaspace_base, address cds_base);
+
+  // Returns true if can use CDS with metaspace allocated as specified address.
+  static bool can_use_cds_with_metaspace_addr(char* metaspace_base, address cds_base);
+
+  static void allocate_metaspace_compressed_klass_ptrs(char* requested_addr, address cds_base);
+
+  static void initialize_class_space(ReservedSpace rs);
+#endif
+
   class AllocRecord : public CHeapObj<mtClass> {
   public:
     AllocRecord(address ptr, MetaspaceObj::Type type, int byte_size)
@@ -151,7 +176,6 @@
 
   // Initialize globals for Metaspace
   static void global_initialize();
-  static void initialize_class_space(ReservedSpace rs);
 
   static size_t first_chunk_word_size() { return _first_chunk_word_size; }
   static size_t first_class_chunk_word_size() { return _first_class_chunk_word_size; }
@@ -172,8 +196,6 @@
   MetaWord* expand_and_allocate(size_t size,
                                 MetadataType mdtype);
 
-  static bool is_initialized() { return _class_space_list != NULL; }
-
   static bool contains(const void *ptr);
   void dump(outputStream* const out) const;
 
@@ -190,11 +212,16 @@
   };
 
   void iterate(AllocRecordClosure *closure);
+
+  // Return TRUE only if UseCompressedKlassPointers is True and DumpSharedSpaces is False.
+  static bool using_class_space() {
+    return NOT_LP64(false) LP64_ONLY(UseCompressedKlassPointers && !DumpSharedSpaces);
+  }
+
 };
 
 class MetaspaceAux : AllStatic {
   static size_t free_chunks_total(Metaspace::MetadataType mdtype);
-  static size_t free_chunks_total_in_bytes(Metaspace::MetadataType mdtype);
 
  public:
   // Statistics for class space and data space in metaspace.
@@ -238,13 +265,15 @@
   // Used by MetaspaceCounters
   static size_t free_chunks_total();
   static size_t free_chunks_total_in_bytes();
+  static size_t free_chunks_total_in_bytes(Metaspace::MetadataType mdtype);
 
   static size_t allocated_capacity_words(Metaspace::MetadataType mdtype) {
     return _allocated_capacity_words[mdtype];
   }
   static size_t allocated_capacity_words() {
-    return _allocated_capacity_words[Metaspace::ClassType] +
-           _allocated_capacity_words[Metaspace::NonClassType];
+    return _allocated_capacity_words[Metaspace::NonClassType] +
+           (Metaspace::using_class_space() ?
+           _allocated_capacity_words[Metaspace::ClassType] : 0);
   }
   static size_t allocated_capacity_bytes(Metaspace::MetadataType mdtype) {
     return allocated_capacity_words(mdtype) * BytesPerWord;
@@ -257,8 +286,9 @@
     return _allocated_used_words[mdtype];
   }
   static size_t allocated_used_words() {
-    return _allocated_used_words[Metaspace::ClassType] +
-           _allocated_used_words[Metaspace::NonClassType];
+    return _allocated_used_words[Metaspace::NonClassType] +
+           (Metaspace::using_class_space() ?
+           _allocated_used_words[Metaspace::ClassType] : 0);
   }
   static size_t allocated_used_bytes(Metaspace::MetadataType mdtype) {
     return allocated_used_words(mdtype) * BytesPerWord;
@@ -268,6 +298,7 @@
   }
 
   static size_t free_bytes();
+  static size_t free_bytes(Metaspace::MetadataType mdtype);
 
   // Total capacity in all Metaspaces
   static size_t capacity_bytes_slow() {
@@ -300,6 +331,7 @@
   static void print_on(outputStream * out);
   static void print_on(outputStream * out, Metaspace::MetadataType mdtype);
 
+  static void print_class_waste(outputStream* out);
   static void print_waste(outputStream* out);
   static void dump(outputStream* out);
   static void verify_free_chunks();
--- a/hotspot/src/share/vm/memory/metaspaceCounters.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/metaspaceCounters.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -25,11 +25,47 @@
 #include "precompiled.hpp"
 #include "memory/metaspaceCounters.hpp"
 #include "memory/resourceArea.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/perfData.hpp"
 #include "utilities/exceptions.hpp"
 
-MetaspaceCounters* MetaspaceCounters::_metaspace_counters = NULL;
+class MetaspacePerfCounters: public CHeapObj<mtInternal> {
+  friend class VMStructs;
+  PerfVariable*      _capacity;
+  PerfVariable*      _used;
+  PerfVariable*      _max_capacity;
 
-size_t MetaspaceCounters::calc_total_capacity() {
+  PerfVariable* create_variable(const char *ns, const char *name, size_t value, TRAPS) {
+    const char *path = PerfDataManager::counter_name(ns, name);
+    return PerfDataManager::create_variable(SUN_GC, path, PerfData::U_Bytes, value, THREAD);
+  }
+
+  void create_constant(const char *ns, const char *name, size_t value, TRAPS) {
+    const char *path = PerfDataManager::counter_name(ns, name);
+    PerfDataManager::create_constant(SUN_GC, path, PerfData::U_Bytes, value, THREAD);
+  }
+
+ public:
+  MetaspacePerfCounters(const char* ns, size_t min_capacity, size_t curr_capacity, size_t max_capacity, size_t used) {
+    EXCEPTION_MARK;
+    ResourceMark rm;
+
+    create_constant(ns, "minCapacity", min_capacity, THREAD);
+    _capacity = create_variable(ns, "capacity", curr_capacity, THREAD);
+    _max_capacity = create_variable(ns, "maxCapacity", max_capacity, THREAD);
+    _used = create_variable(ns, "used", used, THREAD);
+  }
+
+  void update(size_t capacity, size_t max_capacity, size_t used) {
+    _capacity->set_value(capacity);
+    _max_capacity->set_value(max_capacity);
+    _used->set_value(used);
+  }
+};
+
+MetaspacePerfCounters* MetaspaceCounters::_perf_counters = NULL;
+
+size_t MetaspaceCounters::calculate_capacity() {
   // The total capacity is the sum of
   //   1) capacity of Metachunks in use by all Metaspaces
   //   2) unused space at the end of each Metachunk
@@ -39,95 +75,65 @@
   return total_capacity;
 }
 
-MetaspaceCounters::MetaspaceCounters() :
-    _capacity(NULL),
-    _used(NULL),
-    _max_capacity(NULL) {
+void MetaspaceCounters::initialize_performance_counters() {
   if (UsePerfData) {
+    assert(_perf_counters == NULL, "Should only be initialized once");
+
     size_t min_capacity = MetaspaceAux::min_chunk_size();
+    size_t capacity = calculate_capacity();
     size_t max_capacity = MetaspaceAux::reserved_in_bytes();
-    size_t curr_capacity = calc_total_capacity();
     size_t used = MetaspaceAux::allocated_used_bytes();
 
-    initialize(min_capacity, max_capacity, curr_capacity, used);
-  }
-}
-
-static PerfVariable* create_ms_variable(const char *ns,
-                                        const char *name,
-                                        size_t value,
-                                        TRAPS) {
-  const char *path = PerfDataManager::counter_name(ns, name);
-  PerfVariable *result =
-      PerfDataManager::create_variable(SUN_GC, path, PerfData::U_Bytes, value,
-                                       CHECK_NULL);
-  return result;
-}
-
-static void create_ms_constant(const char *ns,
-                               const char *name,
-                               size_t value,
-                               TRAPS) {
-  const char *path = PerfDataManager::counter_name(ns, name);
-  PerfDataManager::create_constant(SUN_GC, path, PerfData::U_Bytes, value, CHECK);
-}
-
-void MetaspaceCounters::initialize(size_t min_capacity,
-                                   size_t max_capacity,
-                                   size_t curr_capacity,
-                                   size_t used) {
-
-  if (UsePerfData) {
-    EXCEPTION_MARK;
-    ResourceMark rm;
-
-    const char *ms = "metaspace";
-
-    create_ms_constant(ms, "minCapacity", min_capacity, CHECK);
-    _max_capacity = create_ms_variable(ms, "maxCapacity", max_capacity, CHECK);
-    _capacity = create_ms_variable(ms, "capacity", curr_capacity, CHECK);
-    _used = create_ms_variable(ms, "used", used, CHECK);
-  }
-}
-
-void MetaspaceCounters::update_capacity() {
-  assert(UsePerfData, "Should not be called unless being used");
-  size_t total_capacity = calc_total_capacity();
-  _capacity->set_value(total_capacity);
-}
-
-void MetaspaceCounters::update_used() {
-  assert(UsePerfData, "Should not be called unless being used");
-  size_t used_in_bytes = MetaspaceAux::allocated_used_bytes();
-  _used->set_value(used_in_bytes);
-}
-
-void MetaspaceCounters::update_max_capacity() {
-  assert(UsePerfData, "Should not be called unless being used");
-  assert(_max_capacity != NULL, "Should be initialized");
-  size_t reserved_in_bytes = MetaspaceAux::reserved_in_bytes();
-  _max_capacity->set_value(reserved_in_bytes);
-}
-
-void MetaspaceCounters::update_all() {
-  if (UsePerfData) {
-    update_used();
-    update_capacity();
-    update_max_capacity();
-  }
-}
-
-void MetaspaceCounters::initialize_performance_counters() {
-  if (UsePerfData) {
-    assert(_metaspace_counters == NULL, "Should only be initialized once");
-    _metaspace_counters = new MetaspaceCounters();
+    _perf_counters = new MetaspacePerfCounters("metaspace", min_capacity, capacity, max_capacity, used);
   }
 }
 
 void MetaspaceCounters::update_performance_counters() {
   if (UsePerfData) {
-    assert(_metaspace_counters != NULL, "Should be initialized");
-    _metaspace_counters->update_all();
+    assert(_perf_counters != NULL, "Should be initialized");
+
+    size_t capacity = calculate_capacity();
+    size_t max_capacity = MetaspaceAux::reserved_in_bytes();
+    size_t used = MetaspaceAux::allocated_used_bytes();
+
+    _perf_counters->update(capacity, max_capacity, used);
   }
 }
 
+MetaspacePerfCounters* CompressedClassSpaceCounters::_perf_counters = NULL;
+
+size_t CompressedClassSpaceCounters::calculate_capacity() {
+    return MetaspaceAux::allocated_capacity_bytes(_class_type) +
+           MetaspaceAux::free_bytes(_class_type) +
+           MetaspaceAux::free_chunks_total_in_bytes(_class_type);
+}
+
+void CompressedClassSpaceCounters::update_performance_counters() {
+  if (UsePerfData && UseCompressedKlassPointers) {
+    assert(_perf_counters != NULL, "Should be initialized");
+
+    size_t capacity = calculate_capacity();
+    size_t max_capacity = MetaspaceAux::reserved_in_bytes(_class_type);
+    size_t used = MetaspaceAux::allocated_used_bytes(_class_type);
+
+    _perf_counters->update(capacity, max_capacity, used);
+  }
+}
+
+void CompressedClassSpaceCounters::initialize_performance_counters() {
+  if (UsePerfData) {
+    assert(_perf_counters == NULL, "Should only be initialized once");
+    const char* ns = "compressedclassspace";
+
+    if (UseCompressedKlassPointers) {
+      size_t min_capacity = MetaspaceAux::min_chunk_size();
+      size_t capacity = calculate_capacity();
+      size_t max_capacity = MetaspaceAux::reserved_in_bytes(_class_type);
+      size_t used = MetaspaceAux::allocated_used_bytes(_class_type);
+
+      _perf_counters = new MetaspacePerfCounters(ns, min_capacity, capacity, max_capacity, used);
+    } else {
+      _perf_counters = new MetaspacePerfCounters(ns, 0, 0, 0, 0);
+    }
+  }
+}
--- a/hotspot/src/share/vm/memory/metaspaceCounters.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/metaspaceCounters.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -25,31 +25,27 @@
 #ifndef SHARE_VM_MEMORY_METASPACECOUNTERS_HPP
 #define SHARE_VM_MEMORY_METASPACECOUNTERS_HPP
 
-#include "runtime/perfData.hpp"
+#include "memory/metaspace.hpp"
 
-class MetaspaceCounters: public CHeapObj<mtClass> {
-  friend class VMStructs;
-  PerfVariable*      _capacity;
-  PerfVariable*      _used;
-  PerfVariable*      _max_capacity;
-  static MetaspaceCounters* _metaspace_counters;
-  void initialize(size_t min_capacity,
-                  size_t max_capacity,
-                  size_t curr_capacity,
-                  size_t used);
-  size_t calc_total_capacity();
+class MetaspacePerfCounters;
+
+class MetaspaceCounters: public AllStatic {
+  static MetaspacePerfCounters* _perf_counters;
+  static size_t calculate_capacity();
+
  public:
-  MetaspaceCounters();
-  ~MetaspaceCounters();
-
-  void update_capacity();
-  void update_used();
-  void update_max_capacity();
-
-  void update_all();
-
   static void initialize_performance_counters();
   static void update_performance_counters();
+};
 
+class CompressedClassSpaceCounters: public AllStatic {
+  static MetaspacePerfCounters* _perf_counters;
+  static size_t calculate_capacity();
+  static const Metaspace::MetadataType _class_type = Metaspace::ClassType;
+
+ public:
+  static void initialize_performance_counters();
+  static void update_performance_counters();
 };
+
 #endif // SHARE_VM_MEMORY_METASPACECOUNTERS_HPP
--- a/hotspot/src/share/vm/memory/metaspaceShared.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/metaspaceShared.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -52,7 +52,6 @@
   int tag = 0;
   soc->do_tag(--tag);
 
-  assert(!UseCompressedOops, "UseCompressedOops doesn't work with shared archive");
   // Verify the sizes of various metadata in the system.
   soc->do_tag(sizeof(Method));
   soc->do_tag(sizeof(ConstMethod));
--- a/hotspot/src/share/vm/memory/universe.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/universe.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -145,8 +145,6 @@
 NarrowPtrStruct Universe::_narrow_klass = { NULL, 0, true };
 address Universe::_narrow_ptrs_base;
 
-size_t          Universe::_class_metaspace_size;
-
 void Universe::basic_type_classes_do(void f(Klass*)) {
   f(boolArrayKlassObj());
   f(byteArrayKlassObj());
@@ -641,6 +639,8 @@
     return status;
   }
 
+  Metaspace::global_initialize();
+
   // Create memory for metadata.  Must be after initializing heap for
   // DumpSharedSpaces.
   ClassLoaderData::init_null_class_loader_data();
@@ -693,13 +693,9 @@
     if (!FLAG_IS_DEFAULT(HeapBaseMinAddress) && (mode == UnscaledNarrowOop)) {
       base = HeapBaseMinAddress;
 
-    // If the total size and the metaspace size are small enough to allow
-    // UnscaledNarrowOop then just use UnscaledNarrowOop.
-    } else if ((total_size <= OopEncodingHeapMax) && (mode != HeapBasedNarrowOop) &&
-        (!UseCompressedKlassPointers ||
-          (((OopEncodingHeapMax - heap_size) + Universe::class_metaspace_size()) <= KlassEncodingMetaspaceMax))) {
-      // We don't need to check the metaspace size here because it is always smaller
-      // than total_size.
+    // If the total size is small enough to allow UnscaledNarrowOop then
+    // just use UnscaledNarrowOop.
+    } else if ((total_size <= OopEncodingHeapMax) && (mode != HeapBasedNarrowOop)) {
       if ((total_size <= NarrowOopHeapMax) && (mode == UnscaledNarrowOop) &&
           (Universe::narrow_oop_shift() == 0)) {
         // Use 32-bits oops without encoding and
@@ -716,13 +712,6 @@
           base = (OopEncodingHeapMax - heap_size);
         }
       }
-
-    // See if ZeroBaseNarrowOop encoding will work for a heap based at
-    // (KlassEncodingMetaspaceMax - class_metaspace_size()).
-    } else if (UseCompressedKlassPointers && (mode != HeapBasedNarrowOop) &&
-        (Universe::class_metaspace_size() + HeapBaseMinAddress <= KlassEncodingMetaspaceMax) &&
-        (KlassEncodingMetaspaceMax + heap_size - Universe::class_metaspace_size() <= OopEncodingHeapMax)) {
-      base = (KlassEncodingMetaspaceMax - Universe::class_metaspace_size());
     } else {
       // UnscaledNarrowOop encoding didn't work, and no base was found for ZeroBasedOops or
       // HeapBasedNarrowOop encoding was requested.  So, can't reserve below 32Gb.
@@ -732,8 +721,7 @@
     // Set narrow_oop_base and narrow_oop_use_implicit_null_checks
     // used in ReservedHeapSpace() constructors.
     // The final values will be set in initialize_heap() below.
-    if ((base != 0) && ((base + heap_size) <= OopEncodingHeapMax) &&
-        (!UseCompressedKlassPointers || (base + Universe::class_metaspace_size()) <= KlassEncodingMetaspaceMax)) {
+    if ((base != 0) && ((base + heap_size) <= OopEncodingHeapMax)) {
       // Use zero based compressed oops
       Universe::set_narrow_oop_base(NULL);
       // Don't need guard page for implicit checks in indexed
@@ -816,9 +804,7 @@
       tty->print("heap address: " PTR_FORMAT ", size: " SIZE_FORMAT " MB",
                  Universe::heap()->base(), Universe::heap()->reserved_region().byte_size()/M);
     }
-    if (((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) ||
-        (UseCompressedKlassPointers &&
-        ((uint64_t)Universe::heap()->base() + Universe::class_metaspace_size() > KlassEncodingMetaspaceMax))) {
+    if (((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax)) {
       // Can't reserve heap below 32Gb.
       // keep the Universe::narrow_oop_base() set in Universe::reserve_heap()
       Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
@@ -849,20 +835,16 @@
         }
       }
     }
+
     if (verbose) {
       tty->cr();
       tty->cr();
     }
-    if (UseCompressedKlassPointers) {
-      Universe::set_narrow_klass_base(Universe::narrow_oop_base());
-      Universe::set_narrow_klass_shift(MIN2(Universe::narrow_oop_shift(), LogKlassAlignmentInBytes));
-    }
     Universe::set_narrow_ptrs_base(Universe::narrow_oop_base());
   }
-  // Universe::narrow_oop_base() is one page below the metaspace
-  // base. The actual metaspace base depends on alignment constraints
-  // so we don't know its exact location here.
-  assert((intptr_t)Universe::narrow_oop_base() <= (intptr_t)(Universe::heap()->base() - os::vm_page_size() - ClassMetaspaceSize) ||
+  // Universe::narrow_oop_base() is one page below the heap.
+  assert((intptr_t)Universe::narrow_oop_base() <= (intptr_t)(Universe::heap()->base() -
+         os::vm_page_size()) ||
          Universe::narrow_oop_base() == NULL, "invalid value");
   assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes ||
          Universe::narrow_oop_shift() == 0, "invalid value");
@@ -882,12 +864,7 @@
 
 // Reserve the Java heap, which is now the same for all GCs.
 ReservedSpace Universe::reserve_heap(size_t heap_size, size_t alignment) {
-  // Add in the class metaspace area so the classes in the headers can
-  // be compressed the same as instances.
-  // Need to round class space size up because it's below the heap and
-  // the actual alignment depends on its size.
-  Universe::set_class_metaspace_size(align_size_up(ClassMetaspaceSize, alignment));
-  size_t total_reserved = align_size_up(heap_size + Universe::class_metaspace_size(), alignment);
+  size_t total_reserved = align_size_up(heap_size, alignment);
   assert(!UseCompressedOops || (total_reserved <= (OopEncodingHeapMax - os::vm_page_size())),
       "heap size is too big for compressed oops");
   char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
@@ -923,28 +900,17 @@
     return total_rs;
   }
 
-  // Split the reserved space into main Java heap and a space for
-  // classes so that they can be compressed using the same algorithm
-  // as compressed oops. If compress oops and compress klass ptrs are
-  // used we need the meta space first: if the alignment used for
-  // compressed oops is greater than the one used for compressed klass
-  // ptrs, a metadata space on top of the heap could become
-  // unreachable.
-  ReservedSpace class_rs = total_rs.first_part(Universe::class_metaspace_size());
-  ReservedSpace heap_rs = total_rs.last_part(Universe::class_metaspace_size(), alignment);
-  Metaspace::initialize_class_space(class_rs);
-
   if (UseCompressedOops) {
     // Universe::initialize_heap() will reset this to NULL if unscaled
     // or zero-based narrow oops are actually used.
     address base = (address)(total_rs.base() - os::vm_page_size());
     Universe::set_narrow_oop_base(base);
   }
-  return heap_rs;
+  return total_rs;
 }
 
 
-// It's the caller's repsonsibility to ensure glitch-freedom
+// It's the caller's responsibility to ensure glitch-freedom
 // (if required).
 void Universe::update_heap_info_at_gc() {
   _heap_capacity_at_last_gc = heap()->capacity();
@@ -1135,6 +1101,8 @@
 
   // Initialize performance counters for metaspaces
   MetaspaceCounters::initialize_performance_counters();
+  CompressedClassSpaceCounters::initialize_performance_counters();
+
   MemoryService::add_metaspace_memory_pools();
 
   GC_locker::unlock();  // allow gc after bootstrapping
--- a/hotspot/src/share/vm/memory/universe.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/memory/universe.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -75,10 +75,10 @@
 };
 
 
-// For UseCompressedOops and UseCompressedKlassPointers.
+// For UseCompressedOops.
 struct NarrowPtrStruct {
-  // Base address for oop/klass-within-java-object materialization.
-  // NULL if using wide oops/klasses or zero based narrow oops/klasses.
+  // Base address for oop-within-java-object materialization.
+  // NULL if using wide oops or zero based narrow oops.
   address _base;
   // Number of shift bits for encoding/decoding narrow ptrs.
   // 0 if using wide ptrs or zero based unscaled narrow ptrs,
@@ -106,6 +106,7 @@
   friend class SystemDictionary;
   friend class VMStructs;
   friend class VM_PopulateDumpSharedSpace;
+  friend class Metaspace;
 
   friend jint  universe_init();
   friend void  universe2_init();
@@ -184,9 +185,6 @@
   static struct NarrowPtrStruct _narrow_klass;
   static address _narrow_ptrs_base;
 
-  // Aligned size of the metaspace.
-  static size_t _class_metaspace_size;
-
   // array of dummy objects used with +FullGCAlot
   debug_only(static objArrayOop _fullgc_alot_dummy_array;)
   // index of next entry to clear
@@ -238,15 +236,6 @@
     assert(UseCompressedOops, "no compressed ptrs?");
     _narrow_oop._use_implicit_null_checks   = use;
   }
-  static bool     reserve_metaspace_helper(bool with_base = false);
-  static ReservedHeapSpace reserve_heap_metaspace(size_t heap_size, size_t alignment, bool& contiguous);
-
-  static size_t  class_metaspace_size() {
-    return _class_metaspace_size;
-  }
-  static void    set_class_metaspace_size(size_t metaspace_size) {
-    _class_metaspace_size = metaspace_size;
-  }
 
   // Debugging
   static int _verify_count;                           // number of verifies done
--- a/hotspot/src/share/vm/oops/instanceKlass.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -269,7 +269,7 @@
   set_fields(NULL, 0);
   set_constants(NULL);
   set_class_loader_data(NULL);
-  set_source_file_name(NULL);
+  set_source_file_name_index(0);
   set_source_debug_extension(NULL, 0);
   set_array_name(NULL);
   set_inner_classes(NULL);
@@ -284,7 +284,7 @@
   set_osr_nmethods_head(NULL);
   set_breakpoints(NULL);
   init_previous_versions();
-  set_generic_signature(NULL);
+  set_generic_signature_index(0);
   release_set_methods_jmethod_ids(NULL);
   release_set_methods_cached_itable_indices(NULL);
   set_annotations(NULL);
@@ -2368,18 +2368,12 @@
   // unreference array name derived from this class name (arrays of an unloaded
   // class can't be referenced anymore).
   if (_array_name != NULL)  _array_name->decrement_refcount();
-  if (_source_file_name != NULL) _source_file_name->decrement_refcount();
   if (_source_debug_extension != NULL) FREE_C_HEAP_ARRAY(char, _source_debug_extension, mtClass);
 
   assert(_total_instanceKlass_count >= 1, "Sanity check");
   Atomic::dec(&_total_instanceKlass_count);
 }
 
-void InstanceKlass::set_source_file_name(Symbol* n) {
-  _source_file_name = n;
-  if (_source_file_name != NULL) _source_file_name->increment_refcount();
-}
-
 void InstanceKlass::set_source_debug_extension(char* array, int length) {
   if (array == NULL) {
     _source_debug_extension = NULL;
--- a/hotspot/src/share/vm/oops/instanceKlass.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/instanceKlass.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -201,14 +201,10 @@
   // number_of_inner_classes * 4 + enclosing_method_attribute_size.
   Array<jushort>* _inner_classes;
 
-  // Name of source file containing this klass, NULL if not specified.
-  Symbol*         _source_file_name;
   // the source debug extension for this klass, NULL if not specified.
   // Specified as UTF-8 string without terminating zero byte in the classfile,
   // it is stored in the instanceklass as a NULL-terminated UTF-8 string
   char*           _source_debug_extension;
-  // Generic signature, or null if none.
-  Symbol*         _generic_signature;
   // Array name derived from this class which needs unreferencing
   // if this class is unloaded.
   Symbol*         _array_name;
@@ -217,6 +213,12 @@
   // (including inherited fields but after header_size()).
   int             _nonstatic_field_size;
   int             _static_field_size;    // number words used by static fields (oop and non-oop) in this klass
+  // Constant pool index to the utf8 entry of the Generic signature,
+  // or 0 if none.
+  u2              _generic_signature_index;
+  // Constant pool index to the utf8 entry for the name of source file
+  // containing this klass, 0 if not specified.
+  u2              _source_file_name_index;
   u2              _static_oop_field_count;// number of static oop fields in this klass
   u2              _java_fields_count;    // The number of declared Java fields
   int             _nonstatic_oop_map_size;// size in words of nonstatic oop map blocks
@@ -570,8 +572,16 @@
   }
 
   // source file name
-  Symbol* source_file_name() const         { return _source_file_name; }
-  void set_source_file_name(Symbol* n);
+  Symbol* source_file_name() const               {
+    return (_source_file_name_index == 0) ?
+      (Symbol*)NULL : _constants->symbol_at(_source_file_name_index);
+  }
+  u2 source_file_name_index() const              {
+    return _source_file_name_index;
+  }
+  void set_source_file_name_index(u2 sourcefile_index) {
+    _source_file_name_index = sourcefile_index;
+  }
 
   // minor and major version numbers of class file
   u2 minor_version() const                 { return _minor_version; }
@@ -648,8 +658,16 @@
   void set_initial_method_idnum(u2 value)             { _idnum_allocated_count = value; }
 
   // generics support
-  Symbol* generic_signature() const                   { return _generic_signature; }
-  void set_generic_signature(Symbol* sig)             { _generic_signature = sig; }
+  Symbol* generic_signature() const                   {
+    return (_generic_signature_index == 0) ?
+      (Symbol*)NULL : _constants->symbol_at(_generic_signature_index);
+  }
+  u2 generic_signature_index() const                  {
+    return _generic_signature_index;
+  }
+  void set_generic_signature_index(u2 sig_index)      {
+    _generic_signature_index = sig_index;
+  }
 
   u2 enclosing_method_data(int offset);
   u2 enclosing_method_class_index() {
--- a/hotspot/src/share/vm/oops/klass.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/klass.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -352,7 +352,8 @@
   static int layout_helper_log2_element_size(jint lh) {
     assert(lh < (jint)_lh_neutral_value, "must be array");
     int l2esz = (lh >> _lh_log2_element_size_shift) & _lh_log2_element_size_mask;
-    assert(l2esz <= LogBitsPerLong, "sanity");
+    assert(l2esz <= LogBitsPerLong,
+        err_msg("sanity. l2esz: 0x%x for lh: 0x%x", (uint)l2esz, (uint)lh));
     return l2esz;
   }
   static jint array_layout_helper(jint tag, int hsize, BasicType etype, int log2_esize) {
@@ -703,6 +704,16 @@
 
   virtual void oop_verify_on(oop obj, outputStream* st);
 
+  static bool is_null(narrowKlass obj);
+  static bool is_null(Klass* obj);
+
+  // klass encoding for klass pointer in objects.
+  static narrowKlass encode_klass_not_null(Klass* v);
+  static narrowKlass encode_klass(Klass* v);
+
+  static Klass* decode_klass_not_null(narrowKlass v);
+  static Klass* decode_klass(narrowKlass v);
+
  private:
   // barriers used by klass_oop_store
   void klass_update_barrier_set(oop v);
--- a/hotspot/src/share/vm/oops/klass.inline.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/klass.inline.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_OOPS_KLASS_INLINE_HPP
 #define SHARE_VM_OOPS_KLASS_INLINE_HPP
 
+#include "memory/universe.hpp"
 #include "oops/klass.hpp"
 #include "oops/markOop.hpp"
 
@@ -33,4 +34,41 @@
   _prototype_header = header;
 }
 
+inline bool Klass::is_null(Klass* obj)  { return obj == NULL; }
+inline bool Klass::is_null(narrowKlass obj) { return obj == 0; }
+
+// Encoding and decoding for klass field.
+
+inline bool check_klass_alignment(Klass* obj) {
+  return (intptr_t)obj % KlassAlignmentInBytes == 0;
+}
+
+inline narrowKlass Klass::encode_klass_not_null(Klass* v) {
+  assert(!is_null(v), "klass value can never be zero");
+  assert(check_klass_alignment(v), "Address not aligned");
+  int    shift = Universe::narrow_klass_shift();
+  uint64_t pd = (uint64_t)(pointer_delta((void*)v, Universe::narrow_klass_base(), 1));
+  assert(KlassEncodingMetaspaceMax > pd, "change encoding max if new encoding");
+  uint64_t result = pd >> shift;
+  assert((result & CONST64(0xffffffff00000000)) == 0, "narrow klass pointer overflow");
+  assert(decode_klass(result) == v, "reversibility");
+  return (narrowKlass)result;
+}
+
+inline narrowKlass Klass::encode_klass(Klass* v) {
+  return is_null(v) ? (narrowKlass)0 : encode_klass_not_null(v);
+}
+
+inline Klass* Klass::decode_klass_not_null(narrowKlass v) {
+  assert(!is_null(v), "narrow klass value can never be zero");
+  int    shift = Universe::narrow_klass_shift();
+  Klass* result = (Klass*)(void*)((uintptr_t)Universe::narrow_klass_base() + ((uintptr_t)v << shift));
+  assert(check_klass_alignment(result), err_msg("address not aligned: " PTR_FORMAT, (void*) result));
+  return result;
+}
+
+inline Klass* Klass::decode_klass(narrowKlass v) {
+  return is_null(v) ? (Klass*)NULL : decode_klass_not_null(v);
+}
+
 #endif // SHARE_VM_OOPS_KLASS_INLINE_HPP
--- a/hotspot/src/share/vm/oops/method.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/method.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -747,6 +747,7 @@
       set_not_c2_compilable();
   }
   CompilationPolicy::policy()->disable_compilation(this);
+  assert(!CompilationPolicy::can_be_compiled(this, comp_level), "sanity check");
 }
 
 bool Method::is_not_osr_compilable(int comp_level) const {
@@ -773,6 +774,7 @@
       set_not_c2_osr_compilable();
   }
   CompilationPolicy::policy()->disable_compilation(this);
+  assert(!CompilationPolicy::can_be_osr_compiled(this, comp_level), "sanity check");
 }
 
 // Revert to using the interpreter and clear out the nmethod
--- a/hotspot/src/share/vm/oops/oop.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/oop.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -62,7 +62,7 @@
   volatile markOop  _mark;
   union _metadata {
     Klass*      _klass;
-    narrowOop       _compressed_klass;
+    narrowKlass _compressed_klass;
   } _metadata;
 
   // Fast access to barrier set.  Must be initialized.
@@ -84,7 +84,7 @@
   Klass* klass() const;
   Klass* klass_or_null() const volatile;
   Klass** klass_addr();
-  narrowOop* compressed_klass_addr();
+  narrowKlass* compressed_klass_addr();
 
   void set_klass(Klass* k);
 
@@ -189,13 +189,6 @@
                                          oop compare_value,
                                          bool prebarrier = false);
 
-  // klass encoding for klass pointer in objects.
-  static narrowOop encode_klass_not_null(Klass* v);
-  static narrowOop encode_klass(Klass* v);
-
-  static Klass* decode_klass_not_null(narrowOop v);
-  static Klass* decode_klass(narrowOop v);
-
   // Access to fields in a instanceOop through these methods.
   oop obj_field(int offset) const;
   volatile oop obj_field_volatile(int offset) const;
--- a/hotspot/src/share/vm/oops/oop.inline.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -35,7 +35,7 @@
 #include "memory/specialized_oop_closures.hpp"
 #include "oops/arrayKlass.hpp"
 #include "oops/arrayOop.hpp"
-#include "oops/klass.hpp"
+#include "oops/klass.inline.hpp"
 #include "oops/markOop.inline.hpp"
 #include "oops/oop.hpp"
 #include "runtime/atomic.hpp"
@@ -70,7 +70,7 @@
 
 inline Klass* oopDesc::klass() const {
   if (UseCompressedKlassPointers) {
-    return decode_klass_not_null(_metadata._compressed_klass);
+    return Klass::decode_klass_not_null(_metadata._compressed_klass);
   } else {
     return _metadata._klass;
   }
@@ -79,7 +79,7 @@
 inline Klass* oopDesc::klass_or_null() const volatile {
   // can be NULL in CMS
   if (UseCompressedKlassPointers) {
-    return decode_klass(_metadata._compressed_klass);
+    return Klass::decode_klass(_metadata._compressed_klass);
   } else {
     return _metadata._klass;
   }
@@ -87,7 +87,7 @@
 
 inline int oopDesc::klass_gap_offset_in_bytes() {
   assert(UseCompressedKlassPointers, "only applicable to compressed klass pointers");
-  return oopDesc::klass_offset_in_bytes() + sizeof(narrowOop);
+  return oopDesc::klass_offset_in_bytes() + sizeof(narrowKlass);
 }
 
 inline Klass** oopDesc::klass_addr() {
@@ -97,9 +97,9 @@
   return (Klass**) &_metadata._klass;
 }
 
-inline narrowOop* oopDesc::compressed_klass_addr() {
+inline narrowKlass* oopDesc::compressed_klass_addr() {
   assert(UseCompressedKlassPointers, "only called by compressed klass pointers");
-  return (narrowOop*) &_metadata._compressed_klass;
+  return &_metadata._compressed_klass;
 }
 
 inline void oopDesc::set_klass(Klass* k) {
@@ -107,7 +107,7 @@
   assert(Universe::is_bootstrapping() || k != NULL, "must be a real Klass*");
   assert(Universe::is_bootstrapping() || k->is_klass(), "not a Klass*");
   if (UseCompressedKlassPointers) {
-    *compressed_klass_addr() = encode_klass_not_null(k);
+    *compressed_klass_addr() = Klass::encode_klass_not_null(k);
   } else {
     *klass_addr() = k;
   }
@@ -127,7 +127,7 @@
   // This is only to be used during GC, for from-space objects, so no
   // barrier is needed.
   if (UseCompressedKlassPointers) {
-    _metadata._compressed_klass = encode_heap_oop(k);  // may be null (parnew overflow handling)
+    _metadata._compressed_klass = (narrowKlass)encode_heap_oop(k);  // may be null (parnew overflow handling)
   } else {
     _metadata._klass = (Klass*)(address)k;
   }
@@ -136,7 +136,7 @@
 inline oop oopDesc::list_ptr_from_klass() {
   // This is only to be used during GC, for from-space objects.
   if (UseCompressedKlassPointers) {
-    return decode_heap_oop(_metadata._compressed_klass);
+    return decode_heap_oop((narrowOop)_metadata._compressed_klass);
   } else {
     // Special case for GC
     return (oop)(address)_metadata._klass;
@@ -176,7 +176,6 @@
 // the right type and inlines the appopriate code).
 
 inline bool oopDesc::is_null(oop obj)       { return obj == NULL; }
-inline bool oopDesc::is_null(Klass* obj)  { return obj == NULL; }
 inline bool oopDesc::is_null(narrowOop obj) { return obj == 0; }
 
 // Algorithm for encoding and decoding oops from 64 bit pointers to 32 bit
@@ -186,9 +185,6 @@
 inline bool check_obj_alignment(oop obj) {
   return (intptr_t)obj % MinObjAlignmentInBytes == 0;
 }
-inline bool check_klass_alignment(Klass* obj) {
-  return (intptr_t)obj % KlassAlignmentInBytes == 0;
-}
 
 inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
   assert(!is_null(v), "oop value can never be zero");
@@ -224,39 +220,6 @@
 inline oop oopDesc::decode_heap_oop_not_null(oop v) { return v; }
 inline oop oopDesc::decode_heap_oop(oop v)  { return v; }
 
-// Encoding and decoding for klass field.  It is copied code, but someday
-// might not be the same as oop.
-
-inline narrowOop oopDesc::encode_klass_not_null(Klass* v) {
-  assert(!is_null(v), "klass value can never be zero");
-  assert(check_klass_alignment(v), "Address not aligned");
-  address base = Universe::narrow_klass_base();
-  int    shift = Universe::narrow_klass_shift();
-  uint64_t  pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
-  assert(KlassEncodingMetaspaceMax > pd, "change encoding max if new encoding");
-  uint64_t result = pd >> shift;
-  assert((result & CONST64(0xffffffff00000000)) == 0, "narrow klass pointer overflow");
-  assert(decode_klass(result) == v, "reversibility");
-  return (narrowOop)result;
-}
-
-inline narrowOop oopDesc::encode_klass(Klass* v) {
-  return (is_null(v)) ? (narrowOop)0 : encode_klass_not_null(v);
-}
-
-inline Klass* oopDesc::decode_klass_not_null(narrowOop v) {
-  assert(!is_null(v), "narrow oop value can never be zero");
-  address base = Universe::narrow_klass_base();
-  int    shift = Universe::narrow_klass_shift();
-  Klass* result = (Klass*)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
-  assert(check_klass_alignment(result), err_msg("address not aligned: " PTR_FORMAT, (void*) result));
-  return result;
-}
-
-inline Klass* oopDesc::decode_klass(narrowOop v) {
-  return is_null(v) ? (Klass*)NULL : decode_klass_not_null(v);
-}
-
 // Load an oop out of the Java heap as is without decoding.
 // Called by GC to check for null before decoding.
 inline oop       oopDesc::load_heap_oop(oop* p)          { return *p; }
--- a/hotspot/src/share/vm/oops/oopsHierarchy.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/oops/oopsHierarchy.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,10 @@
 // of B, A's representation is a prefix of B's representation.
 
 typedef juint narrowOop; // Offset instead of address for an oop within a java object
+
+// If compressed klass pointers then use narrowKlass.
+typedef juint  narrowKlass;
+
 typedef void* OopOrNarrowOopStar;
 typedef class   markOopDesc*                markOop;
 
--- a/hotspot/src/share/vm/opto/block.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/block.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -35,10 +35,6 @@
 #include "opto/rootnode.hpp"
 #include "utilities/copy.hpp"
 
-// Optimization - Graph Style
-
-
-//-----------------------------------------------------------------------------
 void Block_Array::grow( uint i ) {
   assert(i >= Max(), "must be an overflow");
   debug_only(_limit = i+1);
@@ -54,7 +50,6 @@
   Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
 }
 
-//=============================================================================
 void Block_List::remove(uint i) {
   assert(i < _cnt, "index out of bounds");
   Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*)));
@@ -76,8 +71,6 @@
 }
 #endif
 
-//=============================================================================
-
 uint Block::code_alignment() {
   // Check for Root block
   if (_pre_order == 0) return CodeEntryAlignment;
@@ -113,7 +106,6 @@
   return unit_sz; // no particular alignment
 }
 
-//-----------------------------------------------------------------------------
 // Compute the size of first 'inst_cnt' instructions in this block.
 // Return the number of instructions left to compute if the block has
 // less then 'inst_cnt' instructions. Stop, and return 0 if sum_size
@@ -138,7 +130,6 @@
   return inst_cnt;
 }
 
-//-----------------------------------------------------------------------------
 uint Block::find_node( const Node *n ) const {
   for( uint i = 0; i < _nodes.size(); i++ ) {
     if( _nodes[i] == n )
@@ -153,7 +144,6 @@
   _nodes.remove(find_node(n));
 }
 
-//------------------------------is_Empty---------------------------------------
 // Return empty status of a block.  Empty blocks contain only the head, other
 // ideal nodes, and an optional trailing goto.
 int Block::is_Empty() const {
@@ -192,7 +182,6 @@
   return not_empty;
 }
 
-//------------------------------has_uncommon_code------------------------------
 // Return true if the block's code implies that it is likely to be
 // executed infrequently.  Check to see if the block ends in a Halt or
 // a low probability call.
@@ -218,7 +207,6 @@
   return op == Op_Halt;
 }
 
-//------------------------------is_uncommon------------------------------------
 // True if block is low enough frequency or guarded by a test which
 // mostly does not go here.
 bool Block::is_uncommon(PhaseCFG* cfg) const {
@@ -271,7 +259,6 @@
   return false;
 }
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void Block::dump_bidx(const Block* orig, outputStream* st) const {
   if (_pre_order) st->print("B%d",_pre_order);
@@ -364,13 +351,12 @@
 }
 #endif
 
-//=============================================================================
-//------------------------------PhaseCFG---------------------------------------
 PhaseCFG::PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher)
 : Phase(CFG)
 , _block_arena(arena)
+, _root(root)
+, _matcher(matcher)
 , _node_to_block_mapping(arena)
-, _root(root)
 , _node_latency(NULL)
 #ifndef PRODUCT
 , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
@@ -390,11 +376,10 @@
   _goto->set_req(0,_goto);
 
   // Build the CFG in Reverse Post Order
-  _num_blocks = build_cfg();
-  _broot = get_block_for_node(_root);
+  _number_of_blocks = build_cfg();
+  _root_block = get_block_for_node(_root);
 }
 
-//------------------------------build_cfg--------------------------------------
 // Build a proper looking CFG.  Make every block begin with either a StartNode
 // or a RegionNode.  Make every block end with either a Goto, If or Return.
 // The RootNode both starts and ends it's own block.  Do this with a recursive
@@ -496,13 +481,12 @@
   return sum;
 }
 
-//------------------------------insert_goto_at---------------------------------
 // Inserts a goto & corresponding basic block between
 // block[block_no] and its succ_no'th successor block
 void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
   // get block with block_no
-  assert(block_no < _num_blocks, "illegal block number");
-  Block* in  = _blocks[block_no];
+  assert(block_no < number_of_blocks(), "illegal block number");
+  Block* in  = get_block(block_no);
   // get successor block succ_no
   assert(succ_no < in->_num_succs, "illegal successor number");
   Block* out = in->_succs[succ_no];
@@ -537,11 +521,9 @@
   // Set the frequency of the new block
   block->_freq = freq;
   // add new basic block to basic block list
-  _blocks.insert(block_no + 1, block);
-  _num_blocks++;
+  add_block_at(block_no + 1, block);
 }
 
-//------------------------------no_flip_branch---------------------------------
 // Does this block end in a multiway branch that cannot have the default case
 // flipped for another case?
 static bool no_flip_branch( Block *b ) {
@@ -560,7 +542,6 @@
   return false;
 }
 
-//------------------------------convert_NeverBranch_to_Goto--------------------
 // Check for NeverBranch at block end.  This needs to become a GOTO to the
 // true target.  NeverBranch are treated as a conditional branch that always
 // goes the same direction for most of the optimizer and are used to give a
@@ -598,7 +579,6 @@
     dead->_nodes[k]->del_req(j);
 }
 
-//------------------------------move_to_next-----------------------------------
 // Helper function to move block bx to the slot following b_index. Return
 // true if the move is successful, otherwise false
 bool PhaseCFG::move_to_next(Block* bx, uint b_index) {
@@ -606,20 +586,22 @@
 
   // Return false if bx is already scheduled.
   uint bx_index = bx->_pre_order;
-  if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) {
+  if ((bx_index <= b_index) && (get_block(bx_index) == bx)) {
     return false;
   }
 
   // Find the current index of block bx on the block list
   bx_index = b_index + 1;
-  while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++;
-  assert(_blocks[bx_index] == bx, "block not found");
+  while (bx_index < number_of_blocks() && get_block(bx_index) != bx) {
+    bx_index++;
+  }
+  assert(get_block(bx_index) == bx, "block not found");
 
   // If the previous block conditionally falls into bx, return false,
   // because moving bx will create an extra jump.
   for(uint k = 1; k < bx->num_preds(); k++ ) {
     Block* pred = get_block_for_node(bx->pred(k));
-    if (pred == _blocks[bx_index-1]) {
+    if (pred == get_block(bx_index - 1)) {
       if (pred->_num_succs != 1) {
         return false;
       }
@@ -632,7 +614,6 @@
   return true;
 }
 
-//------------------------------move_to_end------------------------------------
 // Move empty and uncommon blocks to the end.
 void PhaseCFG::move_to_end(Block *b, uint i) {
   int e = b->is_Empty();
@@ -650,31 +631,31 @@
   _blocks.push(b);
 }
 
-//---------------------------set_loop_alignment--------------------------------
 // Set loop alignment for every block
 void PhaseCFG::set_loop_alignment() {
-  uint last = _num_blocks;
-  assert( _blocks[0] == _broot, "" );
+  uint last = number_of_blocks();
+  assert(get_block(0) == get_root_block(), "");
 
-  for (uint i = 1; i < last; i++ ) {
-    Block *b = _blocks[i];
-    if (b->head()->is_Loop()) {
-      b->set_loop_alignment(b);
+  for (uint i = 1; i < last; i++) {
+    Block* block = get_block(i);
+    if (block->head()->is_Loop()) {
+      block->set_loop_alignment(block);
     }
   }
 }
 
-//-----------------------------remove_empty------------------------------------
 // Make empty basic blocks to be "connector" blocks, Move uncommon blocks
 // to the end.
-void PhaseCFG::remove_empty() {
+void PhaseCFG::remove_empty_blocks() {
   // Move uncommon blocks to the end
-  uint last = _num_blocks;
-  assert( _blocks[0] == _broot, "" );
+  uint last = number_of_blocks();
+  assert(get_block(0) == get_root_block(), "");
 
   for (uint i = 1; i < last; i++) {
-    Block *b = _blocks[i];
-    if (b->is_connector()) break;
+    Block* block = get_block(i);
+    if (block->is_connector()) {
+      break;
+    }
 
     // Check for NeverBranch at block end.  This needs to become a GOTO to the
     // true target.  NeverBranch are treated as a conditional branch that
@@ -682,124 +663,127 @@
     // to give a fake exit path to infinite loops.  At this late stage they
     // need to turn into Goto's so that when you enter the infinite loop you
     // indeed hang.
-    if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch )
-      convert_NeverBranch_to_Goto(b);
+    if (block->_nodes[block->end_idx()]->Opcode() == Op_NeverBranch) {
+      convert_NeverBranch_to_Goto(block);
+    }
 
     // Look for uncommon blocks and move to end.
     if (!C->do_freq_based_layout()) {
-      if (b->is_uncommon(this)) {
-        move_to_end(b, i);
+      if (block->is_uncommon(this)) {
+        move_to_end(block, i);
         last--;                   // No longer check for being uncommon!
-        if( no_flip_branch(b) ) { // Fall-thru case must follow?
-          b = _blocks[i];         // Find the fall-thru block
-          move_to_end(b, i);
+        if (no_flip_branch(block)) { // Fall-thru case must follow?
+          // Find the fall-thru block
+          block = get_block(i);
+          move_to_end(block, i);
           last--;
         }
-        i--;                      // backup block counter post-increment
+        // backup block counter post-increment
+        i--;
       }
     }
   }
 
   // Move empty blocks to the end
-  last = _num_blocks;
+  last = number_of_blocks();
   for (uint i = 1; i < last; i++) {
-    Block *b = _blocks[i];
-    if (b->is_Empty() != Block::not_empty) {
-      move_to_end(b, i);
+    Block* block = get_block(i);
+    if (block->is_Empty() != Block::not_empty) {
+      move_to_end(block, i);
       last--;
       i--;
     }
   } // End of for all blocks
 }
 
-//-----------------------------fixup_flow--------------------------------------
 // Fix up the final control flow for basic blocks.
 void PhaseCFG::fixup_flow() {
   // Fixup final control flow for the blocks.  Remove jump-to-next
   // block.  If neither arm of a IF follows the conditional branch, we
   // have to add a second jump after the conditional.  We place the
   // TRUE branch target in succs[0] for both GOTOs and IFs.
-  for (uint i=0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    b->_pre_order = i;          // turn pre-order into block-index
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    block->_pre_order = i;          // turn pre-order into block-index
 
     // Connector blocks need no further processing.
-    if (b->is_connector()) {
-      assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(),
-             "All connector blocks should sink to the end");
+    if (block->is_connector()) {
+      assert((i+1) == number_of_blocks() || get_block(i + 1)->is_connector(), "All connector blocks should sink to the end");
       continue;
     }
-    assert(b->is_Empty() != Block::completely_empty,
-           "Empty blocks should be connectors");
+    assert(block->is_Empty() != Block::completely_empty, "Empty blocks should be connectors");
 
-    Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL;
-    Block *bs0 = b->non_connector_successor(0);
+    Block* bnext = (i < number_of_blocks() - 1) ? get_block(i + 1) : NULL;
+    Block* bs0 = block->non_connector_successor(0);
 
     // Check for multi-way branches where I cannot negate the test to
     // exchange the true and false targets.
-    if( no_flip_branch( b ) ) {
+    if (no_flip_branch(block)) {
       // Find fall through case - if must fall into its target
-      int branch_idx = b->_nodes.size() - b->_num_succs;
-      for (uint j2 = 0; j2 < b->_num_succs; j2++) {
-        const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj();
+      int branch_idx = block->_nodes.size() - block->_num_succs;
+      for (uint j2 = 0; j2 < block->_num_succs; j2++) {
+        const ProjNode* p = block->_nodes[branch_idx + j2]->as_Proj();
         if (p->_con == 0) {
           // successor j2 is fall through case
-          if (b->non_connector_successor(j2) != bnext) {
+          if (block->non_connector_successor(j2) != bnext) {
             // but it is not the next block => insert a goto
             insert_goto_at(i, j2);
           }
           // Put taken branch in slot 0
-          if( j2 == 0 && b->_num_succs == 2) {
+          if (j2 == 0 && block->_num_succs == 2) {
             // Flip targets in succs map
-            Block *tbs0 = b->_succs[0];
-            Block *tbs1 = b->_succs[1];
-            b->_succs.map( 0, tbs1 );
-            b->_succs.map( 1, tbs0 );
+            Block *tbs0 = block->_succs[0];
+            Block *tbs1 = block->_succs[1];
+            block->_succs.map(0, tbs1);
+            block->_succs.map(1, tbs0);
           }
           break;
         }
       }
+
       // Remove all CatchProjs
-      for (uint j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
+      for (uint j = 0; j < block->_num_succs; j++) {
+        block->_nodes.pop();
+      }
 
-    } else if (b->_num_succs == 1) {
+    } else if (block->_num_succs == 1) {
       // Block ends in a Goto?
       if (bnext == bs0) {
         // We fall into next block; remove the Goto
-        b->_nodes.pop();
+        block->_nodes.pop();
       }
 
-    } else if( b->_num_succs == 2 ) { // Block ends in a If?
+    } else if(block->_num_succs == 2) { // Block ends in a If?
       // Get opcode of 1st projection (matches _succs[0])
       // Note: Since this basic block has 2 exits, the last 2 nodes must
       //       be projections (in any order), the 3rd last node must be
       //       the IfNode (we have excluded other 2-way exits such as
       //       CatchNodes already).
-      MachNode *iff   = b->_nodes[b->_nodes.size()-3]->as_Mach();
-      ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
-      ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
+      MachNode* iff   = block->_nodes[block->_nodes.size() - 3]->as_Mach();
+      ProjNode* proj0 = block->_nodes[block->_nodes.size() - 2]->as_Proj();
+      ProjNode* proj1 = block->_nodes[block->_nodes.size() - 1]->as_Proj();
 
       // Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].
-      assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0");
-      assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1");
+      assert(proj0->raw_out(0) == block->_succs[0]->head(), "Mismatch successor 0");
+      assert(proj1->raw_out(0) == block->_succs[1]->head(), "Mismatch successor 1");
 
-      Block *bs1 = b->non_connector_successor(1);
+      Block* bs1 = block->non_connector_successor(1);
 
       // Check for neither successor block following the current
       // block ending in a conditional. If so, move one of the
       // successors after the current one, provided that the
       // successor was previously unscheduled, but moveable
       // (i.e., all paths to it involve a branch).
-      if( !C->do_freq_based_layout() && bnext != bs0 && bnext != bs1 ) {
+      if (!C->do_freq_based_layout() && bnext != bs0 && bnext != bs1) {
         // Choose the more common successor based on the probability
         // of the conditional branch.
-        Block *bx = bs0;
-        Block *by = bs1;
+        Block* bx = bs0;
+        Block* by = bs1;
 
         // _prob is the probability of taking the true path. Make
         // p the probability of taking successor #1.
         float p = iff->as_MachIf()->_prob;
-        if( proj0->Opcode() == Op_IfTrue ) {
+        if (proj0->Opcode() == Op_IfTrue) {
           p = 1.0 - p;
         }
 
@@ -826,14 +810,16 @@
       // succs[1].
       if (bnext == bs0) {
         // Fall-thru case in succs[0], so flip targets in succs map
-        Block *tbs0 = b->_succs[0];
-        Block *tbs1 = b->_succs[1];
-        b->_succs.map( 0, tbs1 );
-        b->_succs.map( 1, tbs0 );
+        Block* tbs0 = block->_succs[0];
+        Block* tbs1 = block->_succs[1];
+        block->_succs.map(0, tbs1);
+        block->_succs.map(1, tbs0);
         // Flip projection for each target
-        { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
+        ProjNode* tmp = proj0;
+        proj0 = proj1;
+        proj1 = tmp;
 
-      } else if( bnext != bs1 ) {
+      } else if(bnext != bs1) {
         // Need a double-branch
         // The existing conditional branch need not change.
         // Add a unconditional branch to the false target.
@@ -843,12 +829,12 @@
       }
 
       // Make sure we TRUE branch to the target
-      if( proj0->Opcode() == Op_IfFalse ) {
+      if (proj0->Opcode() == Op_IfFalse) {
         iff->as_MachIf()->negate();
       }
 
-      b->_nodes.pop();          // Remove IfFalse & IfTrue projections
-      b->_nodes.pop();
+      block->_nodes.pop();          // Remove IfFalse & IfTrue projections
+      block->_nodes.pop();
 
     } else {
       // Multi-exit block, e.g. a switch statement
@@ -858,7 +844,6 @@
 }
 
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited  ) const {
   const Node *x = end->is_block_proj();
@@ -884,10 +869,11 @@
 }
 
 void PhaseCFG::dump( ) const {
-  tty->print("\n--- CFG --- %d BBs\n",_num_blocks);
+  tty->print("\n--- CFG --- %d BBs\n", number_of_blocks());
   if (_blocks.size()) {        // Did we do basic-block layout?
-    for (uint i = 0; i < _num_blocks; i++) {
-      _blocks[i]->dump(this);
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      const Block* block = get_block(i);
+      block->dump(this);
     }
   } else {                      // Else do it with a DFS
     VectorSet visited(_block_arena);
@@ -896,27 +882,26 @@
 }
 
 void PhaseCFG::dump_headers() {
-  for( uint i = 0; i < _num_blocks; i++ ) {
-    if (_blocks[i]) {
-      _blocks[i]->dump_head(this);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (block != NULL) {
+      block->dump_head(this);
     }
   }
 }
 
-void PhaseCFG::verify( ) const {
+void PhaseCFG::verify() const {
 #ifdef ASSERT
   // Verify sane CFG
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    uint cnt = b->_nodes.size();
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    uint cnt = block->_nodes.size();
     uint j;
     for (j = 0; j < cnt; j++)  {
-      Node *n = b->_nodes[j];
-      assert(get_block_for_node(n) == b, "");
-      if (j >= 1 && n->is_Mach() &&
-          n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
-        assert(j == 1 || b->_nodes[j-1]->is_Phi(),
-               "CreateEx must be first instruction in block");
+      Node *n = block->_nodes[j];
+      assert(get_block_for_node(n) == block, "");
+      if (j >= 1 && n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CreateEx) {
+        assert(j == 1 || block->_nodes[j-1]->is_Phi(), "CreateEx must be first instruction in block");
       }
       for (uint k = 0; k < n->req(); k++) {
         Node *def = n->in(k);
@@ -926,8 +911,7 @@
           // Uses must follow their definition if they are at the same block.
           // Mostly done to check that MachSpillCopy nodes are placed correctly
           // when CreateEx node is moved in build_ifg_physical().
-          if (get_block_for_node(def) == b &&
-              !(b->head()->is_Loop() && n->is_Phi()) &&
+          if (get_block_for_node(def) == block && !(block->head()->is_Loop() && n->is_Phi()) &&
               // See (+++) comment in reg_split.cpp
               !(n->jvms() != NULL && n->jvms()->is_monitor_use(k))) {
             bool is_loop = false;
@@ -939,29 +923,29 @@
                 }
               }
             }
-            assert(is_loop || b->find_node(def) < j, "uses must follow definitions");
+            assert(is_loop || block->find_node(def) < j, "uses must follow definitions");
           }
         }
       }
     }
 
-    j = b->end_idx();
-    Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj();
-    assert( bp, "last instruction must be a block proj" );
-    assert( bp == b->_nodes[j], "wrong number of successors for this block" );
+    j = block->end_idx();
+    Node* bp = (Node*)block->_nodes[block->_nodes.size() - 1]->is_block_proj();
+    assert(bp, "last instruction must be a block proj");
+    assert(bp == block->_nodes[j], "wrong number of successors for this block");
     if (bp->is_Catch()) {
-      while (b->_nodes[--j]->is_MachProj()) ;
-      assert(b->_nodes[j]->is_MachCall(), "CatchProj must follow call");
+      while (block->_nodes[--j]->is_MachProj()) {
+        ;
+      }
+      assert(block->_nodes[j]->is_MachCall(), "CatchProj must follow call");
     } else if (bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If) {
-      assert(b->_num_succs == 2, "Conditional branch must have two targets");
+      assert(block->_num_succs == 2, "Conditional branch must have two targets");
     }
   }
 #endif
 }
 #endif
 
-//=============================================================================
-//------------------------------UnionFind--------------------------------------
 UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {
   Copy::zero_to_bytes( _indices, sizeof(uint)*max );
 }
@@ -986,7 +970,6 @@
   for( uint i=0; i<max; i++ ) map(i,i);
 }
 
-//------------------------------Find_compress----------------------------------
 // Straight out of Tarjan's union-find algorithm
 uint UnionFind::Find_compress( uint idx ) {
   uint cur  = idx;
@@ -1006,7 +989,6 @@
   return idx;
 }
 
-//------------------------------Find_const-------------------------------------
 // Like Find above, but no path compress, so bad asymptotic behavior
 uint UnionFind::Find_const( uint idx ) const {
   if( idx == 0 ) return idx;    // Ignore the zero idx
@@ -1021,7 +1003,6 @@
   return next;
 }
 
-//------------------------------Union------------------------------------------
 // union 2 sets together.
 void UnionFind::Union( uint idx1, uint idx2 ) {
   uint src = Find(idx1);
@@ -1070,9 +1051,6 @@
 }
 #endif
 
-//=============================================================================
-
-//------------------------------edge_order-------------------------------------
 // Comparison function for edges
 static int edge_order(CFGEdge **e0, CFGEdge **e1) {
   float freq0 = (*e0)->freq();
@@ -1087,7 +1065,6 @@
   return dist1 - dist0;
 }
 
-//------------------------------trace_frequency_order--------------------------
 // Comparison function for edges
 extern "C" int trace_frequency_order(const void *p0, const void *p1) {
   Trace *tr0 = *(Trace **) p0;
@@ -1113,17 +1090,15 @@
   return diff;
 }
 
-//------------------------------find_edges-------------------------------------
 // Find edges of interest, i.e, those which can fall through. Presumes that
 // edges which don't fall through are of low frequency and can be generally
 // ignored.  Initialize the list of traces.
-void PhaseBlockLayout::find_edges()
-{
+void PhaseBlockLayout::find_edges() {
   // Walk the blocks, creating edges and Traces
   uint i;
   Trace *tr = NULL;
-  for (i = 0; i < _cfg._num_blocks; i++) {
-    Block *b = _cfg._blocks[i];
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* b = _cfg.get_block(i);
     tr = new Trace(b, next, prev);
     traces[tr->id()] = tr;
 
@@ -1147,7 +1122,7 @@
       if (n->num_preds() != 1) break;
 
       i++;
-      assert(n = _cfg._blocks[i], "expecting next block");
+      assert(n = _cfg.get_block(i), "expecting next block");
       tr->append(n);
       uf->map(n->_pre_order, tr->id());
       traces[n->_pre_order] = NULL;
@@ -1171,8 +1146,8 @@
   }
 
   // Group connector blocks into one trace
-  for (i++; i < _cfg._num_blocks; i++) {
-    Block *b = _cfg._blocks[i];
+  for (i++; i < _cfg.number_of_blocks(); i++) {
+    Block *b = _cfg.get_block(i);
     assert(b->is_connector(), "connector blocks at the end");
     tr->append(b);
     uf->map(b->_pre_order, tr->id());
@@ -1180,10 +1155,8 @@
   }
 }
 
-//------------------------------union_traces----------------------------------
 // Union two traces together in uf, and null out the trace in the list
-void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace)
-{
+void PhaseBlockLayout::union_traces(Trace* updated_trace, Trace* old_trace) {
   uint old_id = old_trace->id();
   uint updated_id = updated_trace->id();
 
@@ -1207,10 +1180,8 @@
   traces[hi_id] = NULL;
 }
 
-//------------------------------grow_traces-------------------------------------
 // Append traces together via the most frequently executed edges
-void PhaseBlockLayout::grow_traces()
-{
+void PhaseBlockLayout::grow_traces() {
   // Order the edges, and drive the growth of Traces via the most
   // frequently executed edges.
   edges->sort(edge_order);
@@ -1252,11 +1223,9 @@
   }
 }
 
-//------------------------------merge_traces-----------------------------------
 // Embed one trace into another, if the fork or join points are sufficiently
 // balanced.
-void PhaseBlockLayout::merge_traces(bool fall_thru_only)
-{
+void PhaseBlockLayout::merge_traces(bool fall_thru_only) {
   // Walk the edge list a another time, looking at unprocessed edges.
   // Fold in diamonds
   for (int i = 0; i < edges->length(); i++) {
@@ -1310,7 +1279,7 @@
         src_trace->insert_after(src_block, targ_trace);
         union_traces(src_trace, targ_trace);
       } else if (src_at_tail) {
-        if (src_trace != trace(_cfg._broot)) {
+        if (src_trace != trace(_cfg.get_root_block())) {
           e->set_state(CFGEdge::connected);
           targ_trace->insert_before(targ_block, src_trace);
           union_traces(targ_trace, src_trace);
@@ -1319,7 +1288,7 @@
     } else if (e->state() == CFGEdge::open) {
       // Append traces, even without a fall-thru connection.
       // But leave root entry at the beginning of the block list.
-      if (targ_trace != trace(_cfg._broot)) {
+      if (targ_trace != trace(_cfg.get_root_block())) {
         e->set_state(CFGEdge::connected);
         src_trace->append(targ_trace);
         union_traces(src_trace, targ_trace);
@@ -1328,11 +1297,9 @@
   }
 }
 
-//----------------------------reorder_traces-----------------------------------
 // Order the sequence of the traces in some desirable way, and fixup the
 // jumps at the end of each block.
-void PhaseBlockLayout::reorder_traces(int count)
-{
+void PhaseBlockLayout::reorder_traces(int count) {
   ResourceArea *area = Thread::current()->resource_area();
   Trace ** new_traces = NEW_ARENA_ARRAY(area, Trace *, count);
   Block_List worklist;
@@ -1347,15 +1314,14 @@
   }
 
   // The entry block should be first on the new trace list.
-  Trace *tr = trace(_cfg._broot);
+  Trace *tr = trace(_cfg.get_root_block());
   assert(tr == new_traces[0], "entry trace misplaced");
 
   // Sort the new trace list by frequency
   qsort(new_traces + 1, new_count - 1, sizeof(new_traces[0]), trace_frequency_order);
 
   // Patch up the successor blocks
-  _cfg._blocks.reset();
-  _cfg._num_blocks = 0;
+  _cfg.clear_blocks();
   for (int i = 0; i < new_count; i++) {
     Trace *tr = new_traces[i];
     if (tr != NULL) {
@@ -1364,17 +1330,15 @@
   }
 }
 
-//------------------------------PhaseBlockLayout-------------------------------
 // Order basic blocks based on frequency
-PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg) :
-  Phase(BlockLayout),
-  _cfg(cfg)
-{
+PhaseBlockLayout::PhaseBlockLayout(PhaseCFG &cfg)
+: Phase(BlockLayout)
+, _cfg(cfg) {
   ResourceMark rm;
   ResourceArea *area = Thread::current()->resource_area();
 
   // List of traces
-  int size = _cfg._num_blocks + 1;
+  int size = _cfg.number_of_blocks() + 1;
   traces = NEW_ARENA_ARRAY(area, Trace *, size);
   memset(traces, 0, size*sizeof(Trace*));
   next = NEW_ARENA_ARRAY(area, Block *, size);
@@ -1407,11 +1371,10 @@
   // Re-order all the remaining traces by frequency
   reorder_traces(size);
 
-  assert(_cfg._num_blocks >= (uint) (size - 1), "number of blocks can not shrink");
+  assert(_cfg.number_of_blocks() >= (uint) (size - 1), "number of blocks can not shrink");
 }
 
 
-//------------------------------backedge---------------------------------------
 // Edge e completes a loop in a trace. If the target block is head of the
 // loop, rotate the loop block so that the loop ends in a conditional branch.
 bool Trace::backedge(CFGEdge *e) {
@@ -1463,14 +1426,12 @@
   return loop_rotated;
 }
 
-//------------------------------fixup_blocks-----------------------------------
 // push blocks onto the CFG list
 // ensure that blocks have the correct two-way branch sense
 void Trace::fixup_blocks(PhaseCFG &cfg) {
   Block *last = last_block();
   for (Block *b = first_block(); b != NULL; b = next(b)) {
-    cfg._blocks.push(b);
-    cfg._num_blocks++;
+    cfg.add_block(b);
     if (!b->is_connector()) {
       int nfallthru = b->num_fall_throughs();
       if (b != last) {
--- a/hotspot/src/share/vm/opto/block.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/block.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -348,20 +348,77 @@
 class PhaseCFG : public Phase {
   friend class VMStructs;
  private:
+
+  // Root of whole program
+  RootNode* _root;
+
+  // The block containing the root node
+  Block* _root_block;
+
+  // List of basic blocks that are created during CFG creation
+  Block_List _blocks;
+
+  // Count of basic blocks
+  uint _number_of_blocks;
+
   // Arena for the blocks to be stored in
   Arena* _block_arena;
 
+  // The matcher for this compilation
+  Matcher& _matcher;
+
   // Map nodes to owning basic block
   Block_Array _node_to_block_mapping;
 
+  // Loop from the root
+  CFGLoop* _root_loop;
+
+  // Outmost loop frequency
+  float _outer_loop_frequency;
+
+  // Per node latency estimation, valid only during GCM
+  GrowableArray<uint>* _node_latency;
+
   // Build a proper looking cfg.  Return count of basic blocks
   uint build_cfg();
 
-  // Perform DFS search.
+  // Build the dominator tree so that we know where we can move instructions
+  void build_dominator_tree();
+
+  // Estimate block frequencies based on IfNode probabilities, so that we know where we want to move instructions
+  void estimate_block_frequency();
+
+  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
+  // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
+  // Move nodes to ensure correctness from GVN and also try to move nodes out of loops.
+  void global_code_motion();
+
+  // Schedule Nodes early in their basic blocks.
+  bool schedule_early(VectorSet &visited, Node_List &roots);
+
+  // For each node, find the latest block it can be scheduled into
+  // and then select the cheapest block between the latest and earliest
+  // block to place the node.
+  void schedule_late(VectorSet &visited, Node_List &stack);
+
+  // Compute the (backwards) latency of a node from a single use
+  int latency_from_use(Node *n, const Node *def, Node *use);
+
+  // Compute the (backwards) latency of a node from the uses of this instruction
+  void partial_latency_of_defs(Node *n);
+
+  // Compute the instruction global latency with a backwards walk
+  void compute_latencies_backwards(VectorSet &visited, Node_List &stack);
+
+  // Pick a block between early and late that is a cheaper alternative
+  // to late. Helper for schedule_late.
+  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
+
+  // Perform a Depth First Search (DFS).
   // Setup 'vertex' as DFS to vertex mapping.
   // Setup 'semi' as vertex to DFS mapping.
   // Set 'parent' to DFS parent.
-  uint DFS( Tarjan *tarjan );
+  uint do_DFS(Tarjan* tarjan, uint rpo_counter);
 
   // Helper function to insert a node into a block
   void schedule_node_into_block( Node *n, Block *b );
@@ -372,7 +429,8 @@
   void schedule_pinned_nodes( VectorSet &visited );
 
   // I'll need a few machine-specific GotoNodes.  Clone from this one.
-  MachNode *_goto;
+  // Used when building the CFG and creating end nodes for blocks.
+  MachNode* _goto;
 
   Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
   void verify_anti_dependences(Block* LCA, Node* load) {
@@ -380,17 +438,77 @@
     insert_anti_dependences(LCA, load, true);
   }
 
+  bool move_to_next(Block* bx, uint b_index);
+  void move_to_end(Block* bx, uint b_index);
+
+  void insert_goto_at(uint block_no, uint succ_no);
+
+  // Check for NeverBranch at block end.  This needs to become a GOTO to the
+  // true target.  NeverBranch are treated as a conditional branch that always
+  // goes the same direction for most of the optimizer and are used to give a
+  // fake exit path to infinite loops.  At this late stage they need to turn
+  // into Goto's so that when you enter the infinite loop you indeed hang.
+  void convert_NeverBranch_to_Goto(Block *b);
+
+  CFGLoop* create_loop_tree();
+
+  #ifndef PRODUCT
+  bool _trace_opto_pipelining;  // tracing flag
+  #endif
+
  public:
   PhaseCFG(Arena* arena, RootNode* root, Matcher& matcher);
 
-  uint _num_blocks;             // Count of basic blocks
-  Block_List _blocks;           // List of basic blocks
-  RootNode *_root;              // Root of whole program
-  Block *_broot;                // Basic block of root
-  uint _rpo_ctr;
-  CFGLoop* _root_loop;
-  float _outer_loop_freq;       // Outmost loop frequency
+  void set_latency_for_node(Node* node, int latency) {
+    _node_latency->at_put_grow(node->_idx, latency);
+  }
 
+  uint get_latency_for_node(Node* node) {
+    return _node_latency->at_grow(node->_idx);
+  }
+
+  // Get the outer most frequency
+  float get_outer_loop_frequency() const {
+    return _outer_loop_frequency;
+  }
+
+  // Get the root node of the CFG
+  RootNode* get_root_node() const {
+    return _root;
+  }
+
+  // Get the block of the root node
+  Block* get_root_block() const {
+    return _root_block;
+  }
+
+  // Add a block at a position and moves the later ones one step
+  void add_block_at(uint pos, Block* block) {
+    _blocks.insert(pos, block);
+    _number_of_blocks++;
+  }
+
+  // Adds a block to the top of the block list
+  void add_block(Block* block) {
+    _blocks.push(block);
+    _number_of_blocks++;
+  }
+
+  // Clear the list of blocks
+  void clear_blocks() {
+    _blocks.reset();
+    _number_of_blocks = 0;
+  }
+
+  // Get the block at position pos in _blocks
+  Block* get_block(uint pos) const {
+    return _blocks[pos];
+  }
+
+  // Number of blocks
+  uint number_of_blocks() const {
+    return _number_of_blocks;
+  }
 
   // set which block this node should reside in
   void map_node_to_block(const Node* node, Block* block) {
@@ -412,72 +530,26 @@
     return (_node_to_block_mapping.lookup(node->_idx) != NULL);
   }
 
-  // Per node latency estimation, valid only during GCM
-  GrowableArray<uint> *_node_latency;
-
-#ifndef PRODUCT
-  bool _trace_opto_pipelining;  // tracing flag
-#endif
-
 #ifdef ASSERT
   Unique_Node_List _raw_oops;
 #endif
 
-  // Build dominators
-  void Dominators();
-
-  // Estimate block frequencies based on IfNode probabilities
-  void Estimate_Block_Frequency();
-
-  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
-  // basic blocks; i.e. _node_to_block_mapping now maps _idx for all Nodes to some Block.
-  void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
+  // Do global code motion by first building dominator tree and estimate block frequency
+  // Returns true on success
+  bool do_global_code_motion();
 
   // Compute the (backwards) latency of a node from the uses
   void latency_from_uses(Node *n);
 
-  // Compute the (backwards) latency of a node from a single use
-  int latency_from_use(Node *n, const Node *def, Node *use);
-
-  // Compute the (backwards) latency of a node from the uses of this instruction
-  void partial_latency_of_defs(Node *n);
-
-  // Schedule Nodes early in their basic blocks.
-  bool schedule_early(VectorSet &visited, Node_List &roots);
-
-  // For each node, find the latest block it can be scheduled into
-  // and then select the cheapest block between the latest and earliest
-  // block to place the node.
-  void schedule_late(VectorSet &visited, Node_List &stack);
-
-  // Pick a block between early and late that is a cheaper alternative
-  // to late. Helper for schedule_late.
-  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
-
-  // Compute the instruction global latency with a backwards walk
-  void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
-
   // Set loop alignment
   void set_loop_alignment();
 
   // Remove empty basic blocks
-  void remove_empty();
+  void remove_empty_blocks();
   void fixup_flow();
-  bool move_to_next(Block* bx, uint b_index);
-  void move_to_end(Block* bx, uint b_index);
-  void insert_goto_at(uint block_no, uint succ_no);
 
-  // Check for NeverBranch at block end.  This needs to become a GOTO to the
-  // true target.  NeverBranch are treated as a conditional branch that always
-  // goes the same direction for most of the optimizer and are used to give a
-  // fake exit path to infinite loops.  At this late stage they need to turn
-  // into Goto's so that when you enter the infinite loop you indeed hang.
-  void convert_NeverBranch_to_Goto(Block *b);
-
-  CFGLoop* create_loop_tree();
-
-  // Insert a node into a block, and update the _bbs
-  void insert( Block *b, uint idx, Node *n ) {
+  // Insert a node into a block at index and map the node to the block
+  void insert(Block *b, uint idx, Node *n) {
     b->_nodes.insert( idx, n );
     map_node_to_block(n, b);
   }
--- a/hotspot/src/share/vm/opto/buildOopMap.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/buildOopMap.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -87,7 +87,6 @@
 // OptoReg::Bad for not-callee-saved.
 
 
-//------------------------------OopFlow----------------------------------------
 // Structure to pass around
 struct OopFlow : public ResourceObj {
   short *_callees;              // Array mapping register to callee-saved
@@ -119,7 +118,6 @@
   OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
 };
 
-//------------------------------compute_reach----------------------------------
 // Given reaching-defs for this block start, compute it for this block end
 void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) {
 
@@ -177,7 +175,6 @@
   }
 }
 
-//------------------------------merge------------------------------------------
 // Merge the given flow into the 'this' flow
 void OopFlow::merge( OopFlow *flow, int max_reg ) {
   assert( _b == NULL, "merging into a happy flow" );
@@ -197,14 +194,12 @@
 
 }
 
-//------------------------------clone------------------------------------------
 void OopFlow::clone( OopFlow *flow, int max_size ) {
   _b = flow->_b;
   memcpy( _callees, flow->_callees, sizeof(short)*max_size);
   memcpy( _defs   , flow->_defs   , sizeof(Node*)*max_size);
 }
 
-//------------------------------make-------------------------------------------
 OopFlow *OopFlow::make( Arena *A, int max_size, Compile* C ) {
   short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
   Node **defs    = NEW_ARENA_ARRAY(A,Node*,max_size+1);
@@ -215,7 +210,6 @@
   return flow;
 }
 
-//------------------------------bit twiddlers----------------------------------
 static int get_live_bit( int *live, int reg ) {
   return live[reg>>LogBitsPerInt] &   (1<<(reg&(BitsPerInt-1))); }
 static void set_live_bit( int *live, int reg ) {
@@ -223,7 +217,6 @@
 static void clr_live_bit( int *live, int reg ) {
          live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); }
 
-//------------------------------build_oop_map----------------------------------
 // Build an oopmap from the current flow info
 OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) {
   int framesize = regalloc->_framesize;
@@ -412,19 +405,18 @@
   return omap;
 }
 
-//------------------------------do_liveness------------------------------------
 // Compute backwards liveness on registers
-static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) {
-  int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints);
-  int *tmp_live = &live[cfg->_num_blocks * max_reg_ints];
-  Node *root = cfg->C->root();
+static void do_liveness(PhaseRegAlloc* regalloc, PhaseCFG* cfg, Block_List* worklist, int max_reg_ints, Arena* A, Dict* safehash) {
+  int* live = NEW_ARENA_ARRAY(A, int, (cfg->number_of_blocks() + 1) * max_reg_ints);
+  int* tmp_live = &live[cfg->number_of_blocks() * max_reg_ints];
+  Node* root = cfg->get_root_node();
   // On CISC platforms, get the node representing the stack pointer  that regalloc
   // used for spills
   Node *fp = NodeSentinel;
   if (UseCISCSpill && root->req() > 1) {
     fp = root->in(1)->in(TypeFunc::FramePtr);
   }
-  memset( live, 0, cfg->_num_blocks * (max_reg_ints<<LogBytesPerInt) );
+  memset(live, 0, cfg->number_of_blocks() * (max_reg_ints << LogBytesPerInt));
   // Push preds onto worklist
   for (uint i = 1; i < root->req(); i++) {
     Block* block = cfg->get_block_for_node(root->in(i));
@@ -549,29 +541,32 @@
     // Scan for any missing safepoints.  Happens to infinite loops
     // ala ZKM.jar
     uint i;
-    for( i=1; i<cfg->_num_blocks; i++ ) {
-      Block *b = cfg->_blocks[i];
+    for (i = 1; i < cfg->number_of_blocks(); i++) {
+      Block* block = cfg->get_block(i);
       uint j;
-      for( j=1; j<b->_nodes.size(); j++ )
-        if( b->_nodes[j]->jvms() &&
-            (*safehash)[b->_nodes[j]] == NULL )
+      for (j = 1; j < block->_nodes.size(); j++) {
+        if (block->_nodes[j]->jvms() && (*safehash)[block->_nodes[j]] == NULL) {
            break;
-      if( j<b->_nodes.size() ) break;
+        }
+      }
+      if (j < block->_nodes.size()) {
+        break;
+      }
     }
-    if( i == cfg->_num_blocks )
+    if (i == cfg->number_of_blocks()) {
       break;                    // Got 'em all
+    }
 #ifndef PRODUCT
     if( PrintOpto && Verbose )
       tty->print_cr("retripping live calc");
 #endif
     // Force the issue (expensively): recheck everybody
-    for( i=1; i<cfg->_num_blocks; i++ )
-      worklist->push(cfg->_blocks[i]);
+    for (i = 1; i < cfg->number_of_blocks(); i++) {
+      worklist->push(cfg->get_block(i));
+    }
   }
-
 }
 
-//------------------------------BuildOopMaps-----------------------------------
 // Collect GC mask info - where are all the OOPs?
 void Compile::BuildOopMaps() {
   NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
@@ -592,12 +587,12 @@
   OopFlow *free_list = NULL;    // Free, unused
 
   // Array mapping blocks to completed oopflows
-  OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks);
-  memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) );
+  OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->number_of_blocks());
+  memset( flows, 0, _cfg->number_of_blocks() * sizeof(OopFlow*) );
 
 
   // Do the first block 'by hand' to prime the worklist
-  Block *entry = _cfg->_blocks[1];
+  Block *entry = _cfg->get_block(1);
   OopFlow *rootflow = OopFlow::make(A,max_reg,this);
   // Initialize to 'bottom' (not 'top')
   memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
@@ -623,7 +618,9 @@
 
     Block *b = worklist.pop();
     // Ignore root block
-    if( b == _cfg->_broot ) continue;
+    if (b == _cfg->get_root_block()) {
+      continue;
+    }
     // Block is already done?  Happens if block has several predecessors,
     // he can get on the worklist more than once.
     if( flows[b->_pre_order] ) continue;
--- a/hotspot/src/share/vm/opto/chaitin.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/chaitin.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -40,10 +40,8 @@
 #include "opto/opcodes.hpp"
 #include "opto/rootnode.hpp"
 
-//=============================================================================
-
 #ifndef PRODUCT
-void LRG::dump( ) const {
+void LRG::dump() const {
   ttyLocker ttyl;
   tty->print("%d ",num_regs());
   _mask.dump();
@@ -94,7 +92,6 @@
 }
 #endif
 
-//------------------------------score------------------------------------------
 // Compute score from cost and area.  Low score is best to spill.
 static double raw_score( double cost, double area ) {
   return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
@@ -125,7 +122,6 @@
   return score;
 }
 
-//------------------------------LRG_List---------------------------------------
 LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) {
   memset( _lidxs, 0, sizeof(uint)*max );
 }
@@ -211,7 +207,6 @@
   return next;
 }
 
-//------------------------------Chaitin----------------------------------------
 PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
   : PhaseRegAlloc(unique, cfg, matcher,
 #ifndef PRODUCT
@@ -232,31 +227,31 @@
 {
   NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
 
-  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq);
+  _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
 
   // Build a list of basic blocks, sorted by frequency
-  _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+  _blks = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
   // Experiment with sorting strategies to speed compilation
   double  cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
   Block **buckets[NUMBUCKS];             // Array of buckets
   uint    buckcnt[NUMBUCKS];             // Array of bucket counters
   double  buckval[NUMBUCKS];             // Array of bucket value cutoffs
   for (uint i = 0; i < NUMBUCKS; i++) {
-    buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg._num_blocks);
+    buckets[i] = NEW_RESOURCE_ARRAY(Block *, _cfg.number_of_blocks());
     buckcnt[i] = 0;
     // Bump by three orders of magnitude each time
     cutoff *= 0.001;
     buckval[i] = cutoff;
-    for (uint j = 0; j < _cfg._num_blocks; j++) {
+    for (uint j = 0; j < _cfg.number_of_blocks(); j++) {
       buckets[i][j] = NULL;
     }
   }
   // Sort blocks into buckets
-  for (uint i = 0; i < _cfg._num_blocks; i++) {
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
     for (uint j = 0; j < NUMBUCKS; j++) {
-      if ((j == NUMBUCKS - 1) || (_cfg._blocks[i]->_freq > buckval[j])) {
+      if ((j == NUMBUCKS - 1) || (_cfg.get_block(i)->_freq > buckval[j])) {
         // Assign block to end of list for appropriate bucket
-        buckets[j][buckcnt[j]++] = _cfg._blocks[i];
+        buckets[j][buckcnt[j]++] = _cfg.get_block(i);
         break; // kick out of inner loop
       }
     }
@@ -269,10 +264,9 @@
     }
   }
 
-  assert(blkcnt == _cfg._num_blocks, "Block array not totally filled");
+  assert(blkcnt == _cfg.number_of_blocks(), "Block array not totally filled");
 }
 
-//------------------------------Union------------------------------------------
 // union 2 sets together.
 void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
   uint src = _lrg_map.find(src_n);
@@ -285,7 +279,6 @@
   _lrg_map.uf_map(dst, src);
 }
 
-//------------------------------new_lrg----------------------------------------
 void PhaseChaitin::new_lrg(const Node *x, uint lrg) {
   // Make the Node->LRG mapping
   _lrg_map.extend(x->_idx,lrg);
@@ -294,24 +287,28 @@
 }
 
 
-bool PhaseChaitin::clone_projs_shared(Block *b, uint idx, Node *con, Node *copy, uint max_lrg_id) {
-  Block* bcon = _cfg.get_block_for_node(con);
-  uint cindex = bcon->find_node(con);
-  Node *con_next = bcon->_nodes[cindex+1];
-  if (con_next->in(0) != con || !con_next->is_MachProj()) {
-    return false;               // No MachProj's follow
+int PhaseChaitin::clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id) {
+  assert(b->find_node(copy) == (idx - 1), "incorrect insert index for copy kill projections");
+  DEBUG_ONLY( Block* borig = _cfg.get_block_for_node(orig); )
+  int found_projs = 0;
+  uint cnt = orig->outcnt();
+  for (uint i = 0; i < cnt; i++) {
+    Node* proj = orig->raw_out(i);
+    if (proj->is_MachProj()) {
+      assert(proj->outcnt() == 0, "only kill projections are expected here");
+      assert(_cfg.get_block_for_node(proj) == borig, "incorrect block for kill projections");
+      found_projs++;
+      // Copy kill projections after the cloned node
+      Node* kills = proj->clone();
+      kills->set_req(0, copy);
+      b->_nodes.insert(idx++, kills);
+      _cfg.map_node_to_block(kills, b);
+      new_lrg(kills, max_lrg_id++);
+    }
   }
-
-  // Copy kills after the cloned constant
-  Node *kills = con_next->clone();
-  kills->set_req(0, copy);
-  b->_nodes.insert(idx, kills);
-  _cfg.map_node_to_block(kills, b);
-  new_lrg(kills, max_lrg_id);
-  return true;
+  return found_projs;
 }
 
-//------------------------------compact----------------------------------------
 // Renumber the live ranges to compact them.  Makes the IFG smaller.
 void PhaseChaitin::compact() {
   // Current the _uf_map contains a series of short chains which are headed
@@ -677,20 +674,19 @@
   C->set_indexSet_arena(NULL);  // ResourceArea is at end of scope
 }
 
-//------------------------------de_ssa-----------------------------------------
 void PhaseChaitin::de_ssa() {
   // Set initial Names for all Nodes.  Most Nodes get the virtual register
   // number.  A few get the ZERO live range number.  These do not
   // get allocated, but instead rely on correct scheduling to ensure that
   // only one instance is simultaneously live at a time.
   uint lr_counter = 1;
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    uint cnt = b->_nodes.size();
+  for( uint i = 0; i < _cfg.number_of_blocks(); i++ ) {
+    Block* block = _cfg.get_block(i);
+    uint cnt = block->_nodes.size();
 
     // Handle all the normal Nodes in the block
     for( uint j = 0; j < cnt; j++ ) {
-      Node *n = b->_nodes[j];
+      Node *n = block->_nodes[j];
       // Pre-color to the zero live range, or pick virtual register
       const RegMask &rm = n->out_RegMask();
       _lrg_map.map(n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0);
@@ -701,52 +697,55 @@
 }
 
 
-//------------------------------gather_lrg_masks-------------------------------
 // Gather LiveRanGe information, including register masks.  Modification of
 // cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
 void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
 
   // Nail down the frame pointer live range
-  uint fp_lrg = _lrg_map.live_range_id(_cfg._root->in(1)->in(TypeFunc::FramePtr));
+  uint fp_lrg = _lrg_map.live_range_id(_cfg.get_root_node()->in(1)->in(TypeFunc::FramePtr));
   lrgs(fp_lrg)._cost += 1e12;   // Cost is infinite
 
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
 
     // For all instructions
-    for( uint j = 1; j < b->_nodes.size(); j++ ) {
-      Node *n = b->_nodes[j];
+    for (uint j = 1; j < block->_nodes.size(); j++) {
+      Node* n = block->_nodes[j];
       uint input_edge_start =1; // Skip control most nodes
-      if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base();
+      if (n->is_Mach()) {
+        input_edge_start = n->as_Mach()->oper_input_base();
+      }
       uint idx = n->is_Copy();
 
       // Get virtual register number, same as LiveRanGe index
       uint vreg = _lrg_map.live_range_id(n);
-      LRG &lrg = lrgs(vreg);
-      if( vreg ) {              // No vreg means un-allocable (e.g. memory)
+      LRG& lrg = lrgs(vreg);
+      if (vreg) {              // No vreg means un-allocable (e.g. memory)
 
         // Collect has-copy bit
-        if( idx ) {
+        if (idx) {
           lrg._has_copy = 1;
           uint clidx = _lrg_map.live_range_id(n->in(idx));
-          LRG &copy_src = lrgs(clidx);
+          LRG& copy_src = lrgs(clidx);
           copy_src._has_copy = 1;
         }
 
         // Check for float-vs-int live range (used in register-pressure
         // calculations)
         const Type *n_type = n->bottom_type();
-        if (n_type->is_floatingpoint())
+        if (n_type->is_floatingpoint()) {
           lrg._is_float = 1;
+        }
 
         // Check for twice prior spilling.  Once prior spilling might have
         // spilled 'soft', 2nd prior spill should have spilled 'hard' and
         // further spilling is unlikely to make progress.
-        if( _spilled_once.test(n->_idx) ) {
+        if (_spilled_once.test(n->_idx)) {
           lrg._was_spilled1 = 1;
-          if( _spilled_twice.test(n->_idx) )
+          if (_spilled_twice.test(n->_idx)) {
             lrg._was_spilled2 = 1;
+          }
         }
 
 #ifndef PRODUCT
@@ -783,16 +782,18 @@
 
         // Check for bound register masks
         const RegMask &lrgmask = lrg.mask();
-        if (lrgmask.is_bound(ireg))
+        if (lrgmask.is_bound(ireg)) {
           lrg._is_bound = 1;
+        }
 
         // Check for maximum frequency value
-        if (lrg._maxfreq < b->_freq)
-          lrg._maxfreq = b->_freq;
+        if (lrg._maxfreq < block->_freq) {
+          lrg._maxfreq = block->_freq;
+        }
 
         // Check for oop-iness, or long/double
         // Check for multi-kill projection
-        switch( ireg ) {
+        switch (ireg) {
         case MachProjNode::fat_proj:
           // Fat projections have size equal to number of registers killed
           lrg.set_num_regs(rm.Size());
@@ -962,7 +963,7 @@
         // AggressiveCoalesce.  This effectively pre-virtual-splits
         // around uncommon uses of common defs.
         const RegMask &rm = n->in_RegMask(k);
-        if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * b->_freq) {
+        if (!after_aggressive && _cfg.get_block_for_node(n->in(k))->_freq > 1000 * block->_freq) {
           // Since we are BEFORE aggressive coalesce, leave the register
           // mask untrimmed by the call.  This encourages more coalescing.
           // Later, AFTER aggressive, this live range will have to spill
@@ -1006,8 +1007,9 @@
         }
 
         // Check for maximum frequency value
-        if( lrg._maxfreq < b->_freq )
-          lrg._maxfreq = b->_freq;
+        if (lrg._maxfreq < block->_freq) {
+          lrg._maxfreq = block->_freq;
+        }
 
       } // End for all allocated inputs
     } // end for all instructions
@@ -1029,7 +1031,6 @@
   }
 }
 
-//------------------------------set_was_low------------------------------------
 // Set the was-lo-degree bit.  Conservative coalescing should not change the
 // colorability of the graph.  If any live range was of low-degree before
 // coalescing, it should Simplify.  This call sets the was-lo-degree bit.
@@ -1066,7 +1067,6 @@
 
 #define REGISTER_CONSTRAINED 16
 
-//------------------------------cache_lrg_info---------------------------------
 // Compute cost/area ratio, in case we spill.  Build the lo-degree list.
 void PhaseChaitin::cache_lrg_info( ) {
 
@@ -1100,7 +1100,6 @@
   }
 }
 
-//------------------------------Pre-Simplify-----------------------------------
 // Simplify the IFG by removing LRGs of low degree that have NO copies
 void PhaseChaitin::Pre_Simplify( ) {
 
@@ -1151,7 +1150,6 @@
   // No more lo-degree no-copy live ranges to simplify
 }
 
-//------------------------------Simplify---------------------------------------
 // Simplify the IFG by removing LRGs of low degree.
 void PhaseChaitin::Simplify( ) {
 
@@ -1288,7 +1286,6 @@
 
 }
 
-//------------------------------is_legal_reg-----------------------------------
 // Is 'reg' register legal for 'lrg'?
 static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
   if (reg >= chunk && reg < (chunk + RegMask::CHUNK_SIZE) &&
@@ -1315,7 +1312,6 @@
   return false;
 }
 
-//------------------------------bias_color-------------------------------------
 // Choose a color using the biasing heuristic
 OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
 
@@ -1377,7 +1373,6 @@
   return OptoReg::add( reg, chunk );
 }
 
-//------------------------------choose_color-----------------------------------
 // Choose a color in the current chunk
 OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
   assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
@@ -1399,7 +1394,6 @@
   return lrg.mask().find_last_elem();
 }
 
-//------------------------------Select-----------------------------------------
 // Select colors by re-inserting LRGs back into the IFG.  LRGs are re-inserted
 // in reverse order of removal.  As long as nothing of hi-degree was yanked,
 // everything going back is guaranteed a color.  Select that color.  If some
@@ -1574,8 +1568,6 @@
   return spill_reg-LRG::SPILL_REG;      // Return number of spills
 }
 
-
-//------------------------------copy_was_spilled-------------------------------
 // Copy 'was_spilled'-edness from the source Node to the dst Node.
 void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
   if( _spilled_once.test(src->_idx) ) {
@@ -1588,14 +1580,12 @@
   }
 }
 
-//------------------------------set_was_spilled--------------------------------
 // Set the 'spilled_once' or 'spilled_twice' flag on a node.
 void PhaseChaitin::set_was_spilled( Node *n ) {
   if( _spilled_once.test_set(n->_idx) )
     _spilled_twice.set(n->_idx);
 }
 
-//------------------------------fixup_spills-----------------------------------
 // Convert Ideal spill instructions into proper FramePtr + offset Loads and
 // Stores.  Use-def chains are NOT preserved, but Node->LRG->reg maps are.
 void PhaseChaitin::fixup_spills() {
@@ -1605,16 +1595,16 @@
   NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
 
   // Grab the Frame Pointer
-  Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr);
+  Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);
 
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
 
     // For all instructions in block
-    uint last_inst = b->end_idx();
-    for( uint j = 1; j <= last_inst; j++ ) {
-      Node *n = b->_nodes[j];
+    uint last_inst = block->end_idx();
+    for (uint j = 1; j <= last_inst; j++) {
+      Node* n = block->_nodes[j];
 
       // Dead instruction???
       assert( n->outcnt() != 0 ||// Nothing dead after post alloc
@@ -1651,7 +1641,7 @@
             assert( cisc->oper_input_base() == 2, "Only adding one edge");
             cisc->ins_req(1,src);         // Requires a memory edge
           }
-          b->_nodes.map(j,cisc);          // Insert into basic block
+          block->_nodes.map(j,cisc);          // Insert into basic block
           n->subsume_by(cisc, C); // Correct graph
           //
           ++_used_cisc_instructions;
@@ -1677,7 +1667,6 @@
   } // End of for all blocks
 }
 
-//------------------------------find_base_for_derived--------------------------
 // Helper to stretch above; recursively discover the base Node for a
 // given derived Node.  Easy for AddP-related machine nodes, but needs
 // to be recursive for derived Phis.
@@ -1707,7 +1696,7 @@
       // Initialize it once and make it shared:
       // set control to _root and place it into Start block
       // (where top() node is placed).
-      base->init_req(0, _cfg._root);
+      base->init_req(0, _cfg.get_root_node());
       Block *startb = _cfg.get_block_for_node(C->top());
       startb->_nodes.insert(startb->find_node(C->top()), base );
       _cfg.map_node_to_block(base, startb);
@@ -1716,7 +1705,7 @@
     if (_lrg_map.live_range_id(base) == 0) {
       new_lrg(base, maxlrg++);
     }
-    assert(base->in(0) == _cfg._root && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
+    assert(base->in(0) == _cfg.get_root_node() && _cfg.get_block_for_node(base) == _cfg.get_block_for_node(C->top()), "base NULL should be shared");
     derived_base_map[derived->_idx] = base;
     return base;
   }
@@ -1779,8 +1768,6 @@
   return base;
 }
 
-
-//------------------------------stretch_base_pointer_live_ranges---------------
 // At each Safepoint, insert extra debug edges for each pair of derived value/
 // base pointer that is live across the Safepoint for oopmap building.  The
 // edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
@@ -1792,14 +1779,14 @@
   memset( derived_base_map, 0, sizeof(Node*)*C->unique() );
 
   // For all blocks in RPO do...
-  for( uint i=0; i<_cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
     // Note use of deep-copy constructor.  I cannot hammer the original
     // liveout bits, because they are needed by the following coalesce pass.
-    IndexSet liveout(_live->live(b));
+    IndexSet liveout(_live->live(block));
 
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->_nodes[j - 1];
 
       // Pre-split compares of loop-phis.  Loop-phis form a cycle we would
       // like to see in the same register.  Compare uses the loop-phi and so
@@ -1814,7 +1801,7 @@
         Node *phi = n->in(1);
         if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
           Block *phi_block = _cfg.get_block_for_node(phi);
-          if (_cfg.get_block_for_node(phi_block->pred(2)) == b) {
+          if (_cfg.get_block_for_node(phi_block->pred(2)) == block) {
             const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
             Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
             insert_proj( phi_block, 1, spill, maxlrg++ );
@@ -1868,7 +1855,7 @@
             if ((_lrg_map.live_range_id(base) >= _lrg_map.max_lrg_id() || // (Brand new base (hence not live) or
                  !liveout.member(_lrg_map.live_range_id(base))) && // not live) AND
                  (_lrg_map.live_range_id(base) > 0) && // not a constant
-                 _cfg.get_block_for_node(base) != b) { // base not def'd in blk)
+                 _cfg.get_block_for_node(base) != block) { // base not def'd in blk)
               // Base pointer is not currently live.  Since I stretched
               // the base pointer to here and it crosses basic-block
               // boundaries, the global live info is now incorrect.
@@ -1903,15 +1890,12 @@
   return must_recompute_live != 0;
 }
 
-
-//------------------------------add_reference----------------------------------
 // Extend the node to LRG mapping
 
 void PhaseChaitin::add_reference(const Node *node, const Node *old_node) {
   _lrg_map.extend(node->_idx, _lrg_map.live_range_id(old_node));
 }
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void PhaseChaitin::dump(const Node *n) const {
   uint r = (n->_idx < _lrg_map.size()) ? _lrg_map.find_const(n) : 0;
@@ -2017,8 +2001,9 @@
               _matcher._new_SP, _framesize );
 
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ )
-    dump(_cfg._blocks[i]);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    dump(_cfg.get_block(i));
+  }
   // End of per-block dump
   tty->print("\n");
 
@@ -2059,7 +2044,6 @@
   tty->print_cr("");
 }
 
-//------------------------------dump_degree_lists------------------------------
 void PhaseChaitin::dump_degree_lists() const {
   // Dump lo-degree list
   tty->print("Lo degree: ");
@@ -2080,7 +2064,6 @@
   tty->print_cr("");
 }
 
-//------------------------------dump_simplified--------------------------------
 void PhaseChaitin::dump_simplified() const {
   tty->print("Simplified: ");
   for( uint i = _simplified; i; i = lrgs(i)._next )
@@ -2099,7 +2082,6 @@
   return buf+strlen(buf);
 }
 
-//------------------------------dump_register----------------------------------
 // Dump a register name into a buffer.  Be intelligent if we get called
 // before allocation is complete.
 char *PhaseChaitin::dump_register( const Node *n, char *buf  ) const {
@@ -2133,7 +2115,6 @@
   return buf+strlen(buf);
 }
 
-//----------------------dump_for_spill_split_recycle--------------------------
 void PhaseChaitin::dump_for_spill_split_recycle() const {
   if( WizardMode && (PrintCompilation || PrintOpto) ) {
     // Display which live ranges need to be split and the allocator's state
@@ -2149,7 +2130,6 @@
   }
 }
 
-//------------------------------dump_frame------------------------------------
 void PhaseChaitin::dump_frame() const {
   const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
   const TypeTuple *domain = C->tf()->domain();
@@ -2255,17 +2235,16 @@
   tty->print_cr("#");
 }
 
-//------------------------------dump_bb----------------------------------------
 void PhaseChaitin::dump_bb( uint pre_order ) const {
   tty->print_cr("---dump of B%d---",pre_order);
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    if( b->_pre_order == pre_order )
-      dump(b);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    if (block->_pre_order == pre_order) {
+      dump(block);
+    }
   }
 }
 
-//------------------------------dump_lrg---------------------------------------
 void PhaseChaitin::dump_lrg( uint lidx, bool defs_only ) const {
   tty->print_cr("---dump of L%d---",lidx);
 
@@ -2287,17 +2266,17 @@
     tty->cr();
   }
   // For all blocks
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
     int dump_once = 0;
 
     // For all instructions
-    for( uint j = 0; j < b->_nodes.size(); j++ ) {
-      Node *n = b->_nodes[j];
+    for( uint j = 0; j < block->_nodes.size(); j++ ) {
+      Node *n = block->_nodes[j];
       if (_lrg_map.find_const(n) == lidx) {
         if (!dump_once++) {
           tty->cr();
-          b->dump_head(&_cfg);
+          block->dump_head(&_cfg);
         }
         dump(n);
         continue;
@@ -2312,7 +2291,7 @@
           if (_lrg_map.find_const(m) == lidx) {
             if (!dump_once++) {
               tty->cr();
-              b->dump_head(&_cfg);
+              block->dump_head(&_cfg);
             }
             dump(n);
           }
@@ -2324,7 +2303,6 @@
 }
 #endif // not PRODUCT
 
-//------------------------------print_chaitin_statistics-------------------------------
 int PhaseChaitin::_final_loads  = 0;
 int PhaseChaitin::_final_stores = 0;
 int PhaseChaitin::_final_memoves= 0;
--- a/hotspot/src/share/vm/opto/chaitin.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/chaitin.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -412,33 +412,22 @@
   uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
   uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );
 
-  bool clone_projs(Block *b, uint idx, Node *con, Node *copy, LiveRangeMap &lrg_map) {
-    bool found_projs = clone_projs_shared(b, idx, con, copy, lrg_map.max_lrg_id());
-
-    if(found_projs) {
-      uint max_lrg_id = lrg_map.max_lrg_id();
-      lrg_map.set_max_lrg_id(max_lrg_id + 1);
-    }
-
-    return found_projs;
-  }
-
   //------------------------------clone_projs------------------------------------
   // After cloning some rematerialized instruction, clone any MachProj's that
   // follow it.  Example: Intel zero is XOR, kills flags.  Sparc FP constants
   // use G3 as an address temp.
-  bool clone_projs(Block *b, uint idx, Node *con, Node *copy, uint &max_lrg_id) {
-    bool found_projs = clone_projs_shared(b, idx, con, copy, max_lrg_id);
+  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, uint& max_lrg_id);
 
-    if(found_projs) {
-      max_lrg_id++;
+  int clone_projs(Block* b, uint idx, Node* orig, Node* copy, LiveRangeMap& lrg_map) {
+    uint max_lrg_id = lrg_map.max_lrg_id();
+    int found_projs = clone_projs(b, idx, orig, copy, max_lrg_id);
+    if (found_projs > 0) {
+      // max_lrg_id is updated during call above
+      lrg_map.set_max_lrg_id(max_lrg_id);
     }
-
     return found_projs;
   }
 
-  bool clone_projs_shared(Block *b, uint idx, Node *con, Node *copy, uint max_lrg_id);
-
   Node *split_Rematerialize(Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits,
                             int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru);
   // True if lidx is used before any real register is def'd in the block
--- a/hotspot/src/share/vm/opto/coalesce.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/coalesce.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -34,8 +34,6 @@
 #include "opto/matcher.hpp"
 #include "opto/regmask.hpp"
 
-//=============================================================================
-//------------------------------Dump-------------------------------------------
 #ifndef PRODUCT
 void PhaseCoalesce::dump(Node *n) const {
   // Being a const function means I cannot use 'Find'
@@ -43,12 +41,11 @@
   tty->print("L%d/N%d ",r,n->_idx);
 }
 
-//------------------------------dump-------------------------------------------
 void PhaseCoalesce::dump() const {
   // I know I have a block layout now, so I can print blocks in a loop
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+  for( uint i=0; i<_phc._cfg.number_of_blocks(); i++ ) {
     uint j;
-    Block *b = _phc._cfg._blocks[i];
+    Block* b = _phc._cfg.get_block(i);
     // Print a nice block header
     tty->print("B%d: ",b->_pre_order);
     for( j=1; j<b->num_preds(); j++ )
@@ -85,7 +82,6 @@
 }
 #endif
 
-//------------------------------combine_these_two------------------------------
 // Combine the live ranges def'd by these 2 Nodes.  N2 is an input to N1.
 void PhaseCoalesce::combine_these_two(Node *n1, Node *n2) {
   uint lr1 = _phc._lrg_map.find(n1);
@@ -127,18 +123,15 @@
   }
 }
 
-//------------------------------coalesce_driver--------------------------------
 // Copy coalescing
-void PhaseCoalesce::coalesce_driver( ) {
-
+void PhaseCoalesce::coalesce_driver() {
   verify();
   // Coalesce from high frequency to low
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ )
-    coalesce( _phc._blks[i] );
-
+  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
+    coalesce(_phc._blks[i]);
+  }
 }
 
-//------------------------------insert_copy_with_overlap-----------------------
 // I am inserting copies to come out of SSA form.  In the general case, I am
 // doing a parallel renaming.  I'm in the Named world now, so I can't do a
 // general parallel renaming.  All the copies now use  "names" (live-ranges)
@@ -216,7 +209,6 @@
   b->_nodes.insert(last_use_idx+1,copy);
 }
 
-//------------------------------insert_copies----------------------------------
 void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
   // We do LRGs compressing and fix a liveout data only here since the other
   // place in Split() is guarded by the assert which we never hit.
@@ -225,8 +217,8 @@
   for (uint lrg = 1; lrg < _phc._lrg_map.max_lrg_id(); lrg++) {
     uint compressed_lrg = _phc._lrg_map.find(lrg);
     if (lrg != compressed_lrg) {
-      for (uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++) {
-        IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
+      for (uint bidx = 0; bidx < _phc._cfg.number_of_blocks(); bidx++) {
+        IndexSet *liveout = _phc._live->live(_phc._cfg.get_block(bidx));
         if (liveout->member(lrg)) {
           liveout->remove(lrg);
           liveout->insert(compressed_lrg);
@@ -239,10 +231,10 @@
   // Nodes with index less than '_unique' are original, non-virtual Nodes.
   _unique = C->unique();
 
-  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+  for (uint i = 0; i < _phc._cfg.number_of_blocks(); i++) {
     C->check_node_count(NodeLimitFudgeFactor, "out of nodes in coalesce");
     if (C->failing()) return;
-    Block *b = _phc._cfg._blocks[i];
+    Block *b = _phc._cfg.get_block(i);
     uint cnt = b->num_preds();  // Number of inputs to the Phi
 
     for( uint l = 1; l<b->_nodes.size(); l++ ) {
@@ -330,9 +322,7 @@
               copy = m->clone();
               // Insert the copy in the basic block, just before us
               b->_nodes.insert(l++, copy);
-              if(_phc.clone_projs(b, l, m, copy, _phc._lrg_map)) {
-                l++;
-              }
+              l += _phc.clone_projs(b, l, m, copy, _phc._lrg_map);
             } else {
               const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
               copy = new (C) MachSpillCopyNode(m, *rm, *rm);
@@ -403,8 +393,7 @@
   } // End of for all blocks
 }
 
-//=============================================================================
-//------------------------------coalesce---------------------------------------
+
 // Aggressive (but pessimistic) copy coalescing of a single block
 
 // The following coalesce pass represents a single round of aggressive
@@ -464,20 +453,16 @@
   } // End of for all instructions in block
 }
 
-//=============================================================================
-//------------------------------PhaseConservativeCoalesce----------------------
 PhaseConservativeCoalesce::PhaseConservativeCoalesce(PhaseChaitin &chaitin) : PhaseCoalesce(chaitin) {
   _ulr.initialize(_phc._lrg_map.max_lrg_id());
 }
 
-//------------------------------verify-----------------------------------------
 void PhaseConservativeCoalesce::verify() {
 #ifdef ASSERT
   _phc.set_was_low();
 #endif
 }
 
-//------------------------------union_helper-----------------------------------
 void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
   // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
   // union-find tree
@@ -520,7 +505,6 @@
   }
 }
 
-//------------------------------compute_separating_interferences---------------
 // Factored code from copy_copy that computes extra interferences from
 // lengthening a live range by double-coalescing.
 uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
@@ -586,7 +570,6 @@
   return reg_degree;
 }
 
-//------------------------------update_ifg-------------------------------------
 void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
   // Some original neighbors of lr1 might have gone away
   // because the constrained register mask prevented them.
@@ -616,7 +599,6 @@
       lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
 }
 
-//------------------------------record_bias------------------------------------
 static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
   // Tag copy bias here
   if( !ifg->lrgs(lr1)._copy_bias )
@@ -625,7 +607,6 @@
     ifg->lrgs(lr2)._copy_bias = lr1;
 }
 
-//------------------------------copy_copy--------------------------------------
 // See if I can coalesce a series of multiple copies together.  I need the
 // final dest copy and the original src copy.  They can be the same Node.
 // Compute the compatible register masks.
@@ -785,7 +766,6 @@
   return true;
 }
 
-//------------------------------coalesce---------------------------------------
 // Conservative (but pessimistic) copy coalescing of a single block
 void PhaseConservativeCoalesce::coalesce( Block *b ) {
   // Bail out on infrequent blocks
--- a/hotspot/src/share/vm/opto/compile.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/compile.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -2136,7 +2136,9 @@
 //------------------------------Code_Gen---------------------------------------
 // Given a graph, generate code for it
 void Compile::Code_Gen() {
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }
 
   // Perform instruction selection.  You might think we could reclaim Matcher
   // memory PDQ, but actually the Matcher is used in generating spill code.
@@ -2148,12 +2150,11 @@
   // nodes.  Mapping is only valid at the root of each matched subtree.
   NOT_PRODUCT( verify_graph_edges(); )
 
-  Node_List proj_list;
-  Matcher m(proj_list);
-  _matcher = &m;
+  Matcher matcher;
+  _matcher = &matcher;
   {
     TracePhase t2("matcher", &_t_matcher, true);
-    m.match();
+    matcher.match();
   }
   // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
   // nodes.  Mapping is only valid at the root of each matched subtree.
@@ -2161,31 +2162,26 @@
 
   // If you have too many nodes, or if matching has failed, bail out
   check_node_count(0, "out of nodes matching instructions");
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }
 
   // Build a proper-looking CFG
-  PhaseCFG cfg(node_arena(), root(), m);
+  PhaseCFG cfg(node_arena(), root(), matcher);
   _cfg = &cfg;
   {
     NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
-    cfg.Dominators();
-    if (failing())  return;
-
+    bool success = cfg.do_global_code_motion();
+    if (!success) {
+      return;
+    }
+
+    print_method(PHASE_GLOBAL_CODE_MOTION, 2);
     NOT_PRODUCT( verify_graph_edges(); )
-
-    cfg.Estimate_Block_Frequency();
-    cfg.GlobalCodeMotion(m,unique(),proj_list);
-    if (failing())  return;
-
-    print_method(PHASE_GLOBAL_CODE_MOTION, 2);
-
-    NOT_PRODUCT( verify_graph_edges(); )
-
     debug_only( cfg.verify(); )
   }
-  NOT_PRODUCT( verify_graph_edges(); )
-
-  PhaseChaitin regalloc(unique(), cfg, m);
+
+  PhaseChaitin regalloc(unique(), cfg, matcher);
   _regalloc = &regalloc;
   {
     TracePhase t2("regalloc", &_t_registerAllocation, true);
@@ -2206,7 +2202,7 @@
   // can now safely remove it.
   {
     NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
-    cfg.remove_empty();
+    cfg.remove_empty_blocks();
     if (do_freq_based_layout()) {
       PhaseBlockLayout layout(cfg);
     } else {
@@ -2253,38 +2249,50 @@
   _regalloc->dump_frame();
 
   Node *n = NULL;
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    if (VMThread::should_terminate()) { cut_short = true; break; }
-    Block *b = _cfg->_blocks[i];
-    if (b->is_connector() && !Verbose) continue;
-    n = b->_nodes[0];
-    if (pcs && n->_idx < pc_limit)
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    if (VMThread::should_terminate()) {
+      cut_short = true;
+      break;
+    }
+    Block* block = _cfg->get_block(i);
+    if (block->is_connector() && !Verbose) {
+      continue;
+    }
+    n = block->_nodes[0];
+    if (pcs && n->_idx < pc_limit) {
       tty->print("%3.3x   ", pcs[n->_idx]);
-    else
+    } else {
       tty->print("      ");
-    b->dump_head(_cfg);
-    if (b->is_connector()) {
+    }
+    block->dump_head(_cfg);
+    if (block->is_connector()) {
       tty->print_cr("        # Empty connector block");
-    } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
+    } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
       tty->print_cr("        # Block is sole successor of call");
     }
 
     // For all instructions
     Node *delay = NULL;
-    for( uint j = 0; j<b->_nodes.size(); j++ ) {
-      if (VMThread::should_terminate()) { cut_short = true; break; }
-      n = b->_nodes[j];
+    for (uint j = 0; j < block->_nodes.size(); j++) {
+      if (VMThread::should_terminate()) {
+        cut_short = true;
+        break;
+      }
+      n = block->_nodes[j];
       if (valid_bundle_info(n)) {
-        Bundle *bundle = node_bundling(n);
+        Bundle* bundle = node_bundling(n);
         if (bundle->used_in_unconditional_delay()) {
           delay = n;
           continue;
         }
-        if (bundle->starts_bundle())
+        if (bundle->starts_bundle()) {
           starts_bundle = '+';
+        }
       }
 
-      if (WizardMode) n->dump();
+      if (WizardMode) {
+        n->dump();
+      }
 
       if( !n->is_Region() &&    // Dont print in the Assembly
           !n->is_Phi() &&       // a few noisely useless nodes
--- a/hotspot/src/share/vm/opto/domgraph.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/domgraph.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -32,9 +32,6 @@
 
 // Portions of code courtesy of Clifford Click
 
-// Optimization - Graph Style
-
-//------------------------------Tarjan-----------------------------------------
 // A data structure that holds all the information needed to find dominators.
 struct Tarjan {
   Block *_block;                // Basic block for this info
@@ -60,23 +57,21 @@
 
 };
 
-//------------------------------Dominator--------------------------------------
 // Compute the dominator tree of the CFG.  The CFG must already have been
 // constructed.  This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
-void PhaseCFG::Dominators( ) {
+void PhaseCFG::build_dominator_tree() {
   // Pre-grow the blocks array, prior to the ResourceMark kicking in
-  _blocks.map(_num_blocks,0);
+  _blocks.map(number_of_blocks(), 0);
 
   ResourceMark rm;
   // Setup mappings from my Graph to Tarjan's stuff and back
   // Note: Tarjan uses 1-based arrays
-  Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1);
+  Tarjan* tarjan = NEW_RESOURCE_ARRAY(Tarjan, number_of_blocks() + 1);
 
   // Tarjan's algorithm, almost verbatim:
   // Step 1:
-  _rpo_ctr = _num_blocks;
-  uint dfsnum = DFS( tarjan );
-  if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops!
+  uint dfsnum = do_DFS(tarjan, number_of_blocks());
+  if (dfsnum - 1 != number_of_blocks()) { // Check for unreachable loops!
     // If the returned dfsnum does not match the number of blocks, then we
     // must have some unreachable loops.  These can be made at any time by
     // IterGVN.  They are cleaned up by CCP or the loop opts, but the last
@@ -93,14 +88,13 @@
     C->record_method_not_compilable("unreachable loop");
     return;
   }
-  _blocks._cnt = _num_blocks;
+  _blocks._cnt = number_of_blocks();
 
   // Tarjan is using 1-based arrays, so these are some initialize flags
   tarjan[0]._size = tarjan[0]._semi = 0;
   tarjan[0]._label = &tarjan[0];
 
-  uint i;
-  for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order
+  for (uint i = number_of_blocks(); i >= 2; i--) { // For all vertices in DFS order
     Tarjan *w = &tarjan[i];     // Get vertex from DFS
 
     // Step 2:
@@ -130,19 +124,19 @@
   }
 
   // Step 4:
-  for( i=2; i <= _num_blocks; i++ ) {
+  for (uint i = 2; i <= number_of_blocks(); i++) {
     Tarjan *w = &tarjan[i];
     if( w->_dom != &tarjan[w->_semi] )
       w->_dom = w->_dom->_dom;
     w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
   }
   // No immediate dominator for the root
-  Tarjan *w = &tarjan[_broot->_pre_order];
+  Tarjan *w = &tarjan[get_root_block()->_pre_order];
   w->_dom = NULL;
   w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
 
   // Convert the dominator tree array into my kind of graph
-  for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices
+  for(uint i = 1; i <= number_of_blocks(); i++){ // For all Tarjan vertices
     Tarjan *t = &tarjan[i];     // Handy access
     Tarjan *tdom = t->_dom;     // Handy access to immediate dominator
     if( tdom )  {               // Root has no immediate dominator
@@ -152,11 +146,10 @@
     } else
       t->_block->_idom = NULL;  // Root
   }
-  w->setdepth( _num_blocks+1 ); // Set depth in dominator tree
+  w->setdepth(number_of_blocks() + 1); // Set depth in dominator tree
 
 }
 
-//----------------------------Block_Stack--------------------------------------
 class Block_Stack {
   private:
     struct Block_Descr {
@@ -214,7 +207,6 @@
     }
 };
 
-//-------------------------most_frequent_successor-----------------------------
 // Find the index into the b->succs[] array of the most frequent successor.
 uint Block_Stack::most_frequent_successor( Block *b ) {
   uint freq_idx = 0;
@@ -258,40 +250,38 @@
   return freq_idx;
 }
 
-//------------------------------DFS--------------------------------------------
 // Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
 // 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
-uint PhaseCFG::DFS( Tarjan *tarjan ) {
-  Block *b = _broot;
+uint PhaseCFG::do_DFS(Tarjan *tarjan, uint rpo_counter) {
+  Block* root_block = get_root_block();
   uint pre_order = 1;
-  // Allocate stack of size _num_blocks+1 to avoid frequent realloc
-  Block_Stack bstack(tarjan, _num_blocks+1);
+  // Allocate stack of size number_of_blocks() + 1 to avoid frequent realloc
+  Block_Stack bstack(tarjan, number_of_blocks() + 1);
 
   // Push on stack the state for the first block
-  bstack.push(pre_order, b);
+  bstack.push(pre_order, root_block);
   ++pre_order;
 
   while (bstack.is_nonempty()) {
     if (!bstack.last_successor()) {
       // Walk over all successors in pre-order (DFS).
-      Block *s = bstack.next_successor();
-      if (s->_pre_order == 0) { // Check for no-pre-order, not-visited
+      Block* next_block = bstack.next_successor();
+      if (next_block->_pre_order == 0) { // Check for no-pre-order, not-visited
         // Push on stack the state of successor
-        bstack.push(pre_order, s);
+        bstack.push(pre_order, next_block);
         ++pre_order;
       }
     }
     else {
       // Build a reverse post-order in the CFG _blocks array
       Block *stack_top = bstack.pop();
-      stack_top->_rpo = --_rpo_ctr;
+      stack_top->_rpo = --rpo_counter;
       _blocks.map(stack_top->_rpo, stack_top);
     }
   }
   return pre_order;
 }
 
-//------------------------------COMPRESS---------------------------------------
 void Tarjan::COMPRESS()
 {
   assert( _ancestor != 0, "" );
@@ -303,14 +293,12 @@
   }
 }
 
-//------------------------------EVAL-------------------------------------------
 Tarjan *Tarjan::EVAL() {
   if( !_ancestor ) return _label;
   COMPRESS();
   return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
 }
 
-//------------------------------LINK-------------------------------------------
 void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
   Tarjan *s = w;
   while( w->_label->_semi < s->_child->_label->_semi ) {
@@ -333,7 +321,6 @@
   }
 }
 
-//------------------------------setdepth---------------------------------------
 void Tarjan::setdepth( uint stack_size ) {
   Tarjan **top  = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
   Tarjan **next = top;
@@ -362,8 +349,7 @@
   } while (last < top);
 }
 
-//*********************** DOMINATORS ON THE SEA OF NODES***********************
-//------------------------------NTarjan----------------------------------------
+// Compute dominators on the Sea of Nodes form
 // A data structure that holds all the information needed to find dominators.
 struct NTarjan {
   Node *_control;               // Control node associated with this info
@@ -396,7 +382,6 @@
 #endif
 };
 
-//------------------------------Dominator--------------------------------------
 // Compute the dominator tree of the sea of nodes.  This version walks all CFG
 // nodes (using the is_CFG() call) and places them in a dominator tree.  Thus,
 // it needs a count of the CFG nodes for the mapping table. This is the
@@ -517,7 +502,6 @@
   }
 }
 
-//------------------------------DFS--------------------------------------------
 // Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
 // 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
 int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
@@ -560,7 +544,6 @@
   return dfsnum;
 }
 
-//------------------------------COMPRESS---------------------------------------
 void NTarjan::COMPRESS()
 {
   assert( _ancestor != 0, "" );
@@ -572,14 +555,12 @@
   }
 }
 
-//------------------------------EVAL-------------------------------------------
 NTarjan *NTarjan::EVAL() {
   if( !_ancestor ) return _label;
   COMPRESS();
   return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
 }
 
-//------------------------------LINK-------------------------------------------
 void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
   NTarjan *s = w;
   while( w->_label->_semi < s->_child->_label->_semi ) {
@@ -602,7 +583,6 @@
   }
 }
 
-//------------------------------setdepth---------------------------------------
 void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
   NTarjan **top  = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
   NTarjan **next = top;
@@ -631,7 +611,6 @@
   } while (last < top);
 }
 
-//------------------------------dump-------------------------------------------
 #ifndef PRODUCT
 void NTarjan::dump(int offset) const {
   // Dump the data from this node
--- a/hotspot/src/share/vm/opto/gcm.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/gcm.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -121,27 +121,30 @@
 
 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
-void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
+void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) {
   // Allocate node stack of size C->unique()+8 to avoid frequent realloc
-  GrowableArray <Node *> spstack(C->unique()+8);
+  GrowableArray <Node *> spstack(C->unique() + 8);
   spstack.push(_root);
-  while ( spstack.is_nonempty() ) {
-    Node *n = spstack.pop();
-    if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
-      if( n->pinned() && !has_block(n)) {  // Pinned?  Nail it down!
-        assert( n->in(0), "pinned Node must have Control" );
+  while (spstack.is_nonempty()) {
+    Node* node = spstack.pop();
+    if (!visited.test_set(node->_idx)) { // Test node and flag it as visited
+      if (node->pinned() && !has_block(node)) {  // Pinned?  Nail it down!
+        assert(node->in(0), "pinned Node must have Control");
         // Before setting block replace block_proj control edge
-        replace_block_proj_ctrl(n);
-        Node *input = n->in(0);
+        replace_block_proj_ctrl(node);
+        Node* input = node->in(0);
         while (!input->is_block_start()) {
           input = input->in(0);
         }
-        Block *b = get_block_for_node(input); // Basic block of controlling input
-        schedule_node_into_block(n, b);
+        Block* block = get_block_for_node(input); // Basic block of controlling input
+        schedule_node_into_block(node, block);
       }
-      for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
-        if( n->in(i) != NULL )
-          spstack.push(n->in(i));
+
+      // process all inputs that are non NULL
+      for (int i = node->req() - 1; i >= 0; --i) {
+        if (node->in(i) != NULL) {
+          spstack.push(node->in(i));
+        }
       }
     }
   }
@@ -205,32 +208,29 @@
 // which all their inputs occur.
 bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
   // Allocate stack with enough space to avoid frequent realloc
-  Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
-  // roots.push(_root); _root will be processed among C->top() inputs
+  Node_Stack nstack(roots.Size() + 8);
+  // _root will be processed among C->top() inputs
   roots.push(C->top());
   visited.set(C->top()->_idx);
 
   while (roots.size() != 0) {
     // Use local variables nstack_top_n & nstack_top_i to cache values
     // on stack's top.
-    Node *nstack_top_n = roots.pop();
-    uint  nstack_top_i = 0;
-//while_nstack_nonempty:
+    Node* parent_node = roots.pop();
+    uint  input_index = 0;
+
     while (true) {
-      // Get parent node and next input's index from stack's top.
-      Node *n = nstack_top_n;
-      uint  i = nstack_top_i;
-
-      if (i == 0) {
+      if (input_index == 0) {
         // Fixup some control.  Constants without control get attached
         // to root and nodes that use is_block_proj() nodes should be attached
         // to the region that starts their block.
-        const Node *in0 = n->in(0);
-        if (in0 != NULL) {              // Control-dependent?
-          replace_block_proj_ctrl(n);
-        } else {               // n->in(0) == NULL
-          if (n->req() == 1) { // This guy is a constant with NO inputs?
-            n->set_req(0, _root);
+        const Node* control_input = parent_node->in(0);
+        if (control_input != NULL) {
+          replace_block_proj_ctrl(parent_node);
+        } else {
+          // Is a constant with NO inputs?
+          if (parent_node->req() == 1) {
+            parent_node->set_req(0, _root);
           }
         }
       }
@@ -239,37 +239,47 @@
       // input is already in a block we quit following inputs (to avoid
       // cycles). Instead we put that Node on a worklist to be handled
       // later (since IT'S inputs may not have a block yet).
-      bool done = true;              // Assume all n's inputs will be processed
-      while (i < n->len()) {         // For all inputs
-        Node *in = n->in(i);         // Get input
-        ++i;
-        if (in == NULL) continue;    // Ignore NULL, missing inputs
+
+      // Assume all n's inputs will be processed
+      bool done = true;
+
+      while (input_index < parent_node->len()) {
+        Node* in = parent_node->in(input_index++);
+        if (in == NULL) {
+          continue;
+        }
+
         int is_visited = visited.test_set(in->_idx);
-        if (!has_block(in)) { // Missing block selection?
+        if (!has_block(in)) {
           if (is_visited) {
-            // assert( !visited.test(in->_idx), "did not schedule early" );
             return false;
           }
-          nstack.push(n, i);         // Save parent node and next input's index.
-          nstack_top_n = in;         // Process current input now.
-          nstack_top_i = 0;
-          done = false;              // Not all n's inputs processed.
-          break; // continue while_nstack_nonempty;
-        } else if (!is_visited) {    // Input not yet visited?
-          roots.push(in);            // Visit this guy later, using worklist
+          // Save parent node and next input's index.
+          nstack.push(parent_node, input_index);
+          // Process current input now.
+          parent_node = in;
+          input_index = 0;
+          // Not all n's inputs processed.
+          done = false;
+          break;
+        } else if (!is_visited) {
+          // Visit this guy later, using worklist
+          roots.push(in);
         }
       }
+
       if (done) {
         // All of n's inputs have been processed, complete post-processing.
 
         // Some instructions are pinned into a block.  These include Region,
         // Phi, Start, Return, and other control-dependent instructions and
         // any projections which depend on them.
-        if (!n->pinned()) {
+        if (!parent_node->pinned()) {
           // Set earliest legal block.
-          map_node_to_block(n, find_deepest_input(n, this));
+          Block* earliest_block = find_deepest_input(parent_node, this);
+          map_node_to_block(parent_node, earliest_block);
         } else {
-          assert(get_block_for_node(n) == get_block_for_node(n->in(0)), "Pinned Node should be at the same block as its control edge");
+          assert(get_block_for_node(parent_node) == get_block_for_node(parent_node->in(0)), "Pinned Node should be at the same block as its control edge");
         }
 
         if (nstack.is_empty()) {
@@ -278,12 +288,12 @@
           break;
         }
         // Get saved parent node and next input's index.
-        nstack_top_n = nstack.node();
-        nstack_top_i = nstack.index();
+        parent_node = nstack.node();
+        input_index = nstack.index();
         nstack.pop();
-      } //    if (done)
-    }   // while (true)
-  }     // while (roots.size() != 0)
+      }
+    }
+  }
   return true;
 }
 
@@ -847,7 +857,7 @@
 
 //------------------------------ComputeLatenciesBackwards----------------------
 // Compute the latency of all the instructions.
-void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
+void PhaseCFG::compute_latencies_backwards(VectorSet &visited, Node_List &stack) {
 #ifndef PRODUCT
   if (trace_opto_pipelining())
     tty->print("\n#---- ComputeLatenciesBackwards ----\n");
@@ -870,31 +880,34 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_to_inputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
 
-  if (n->is_Proj())
+  if (n->is_Proj()) {
     n = n->in(0);
+  }
 
-  if (n->is_Root())
+  if (n->is_Root()) {
     return;
+  }
 
   uint nlen = n->len();
-  uint use_latency = _node_latency->at_grow(n->_idx);
+  uint use_latency = get_latency_for_node(n);
   uint use_pre_order = get_block_for_node(n)->_pre_order;
 
-  for ( uint j=0; j<nlen; j++ ) {
+  for (uint j = 0; j < nlen; j++) {
     Node *def = n->in(j);
 
-    if (!def || def == n)
+    if (!def || def == n) {
       continue;
+    }
 
     // Walk backwards thru projections
-    if (def->is_Proj())
+    if (def->is_Proj()) {
       def = def->in(0);
+    }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
@@ -907,22 +920,20 @@
     Block *def_block = get_block_for_node(def);
     uint def_pre_order = def_block ? def_block->_pre_order : 0;
 
-    if ( (use_pre_order <  def_pre_order) ||
-         (use_pre_order == def_pre_order && n->is_Phi()) )
+    if ((use_pre_order <  def_pre_order) || (use_pre_order == def_pre_order && n->is_Phi())) {
       continue;
+    }
 
     uint delta_latency = n->latency(j);
     uint current_latency = delta_latency + use_latency;
 
-    if (_node_latency->at_grow(def->_idx) < current_latency) {
-      _node_latency->at_put_grow(def->_idx, current_latency);
+    if (get_latency_for_node(def) < current_latency) {
+      set_latency_for_node(def, current_latency);
     }
 
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
-      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
-                    use_latency, j, delta_latency, current_latency, def->_idx,
-                    _node_latency->at_grow(def->_idx));
+      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d", use_latency, j, delta_latency, current_latency, def->_idx, get_latency_for_node(def));
     }
 #endif
   }
@@ -957,7 +968,7 @@
       return 0;
 
     uint nlen = use->len();
-    uint nl = _node_latency->at_grow(use->_idx);
+    uint nl = get_latency_for_node(use);
 
     for ( uint j=0; j<nlen; j++ ) {
       if (use->in(j) == n) {
@@ -992,8 +1003,7 @@
   // Set the latency for this instruction
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
-               n->_idx, _node_latency->at_grow(n->_idx));
+    tty->print("# latency_from_outputs: node_latency[%d] = %d for node", n->_idx, get_latency_for_node(n));
     dump();
   }
 #endif
@@ -1006,7 +1016,7 @@
     if (latency < l) latency = l;
   }
 
-  _node_latency->at_put_grow(n->_idx, latency);
+  set_latency_for_node(n, latency);
 }
 
 //------------------------------hoist_to_cheaper_block-------------------------
@@ -1016,9 +1026,9 @@
   const double delta = 1+PROB_UNLIKELY_MAG(4);
   Block* least       = LCA;
   double least_freq  = least->_freq;
-  uint target        = _node_latency->at_grow(self->_idx);
-  uint start_latency = _node_latency->at_grow(LCA->_nodes[0]->_idx);
-  uint end_latency   = _node_latency->at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
+  uint target        = get_latency_for_node(self);
+  uint start_latency = get_latency_for_node(LCA->_nodes[0]);
+  uint end_latency   = get_latency_for_node(LCA->_nodes[LCA->end_idx()]);
   bool in_latency    = (target <= start_latency);
   const Block* root_block = get_block_for_node(_root);
 
@@ -1035,8 +1045,7 @@
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
-    tty->print("# Find cheaper block for latency %d: ",
-      _node_latency->at_grow(self->_idx));
+    tty->print("# Find cheaper block for latency %d: ", get_latency_for_node(self));
     self->dump();
     tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
       LCA->_pre_order,
@@ -1065,9 +1074,9 @@
     if (mach && LCA == root_block)
       break;
 
-    uint start_lat = _node_latency->at_grow(LCA->_nodes[0]->_idx);
+    uint start_lat = get_latency_for_node(LCA->_nodes[0]);
     uint end_idx   = LCA->end_idx();
-    uint end_lat   = _node_latency->at_grow(LCA->_nodes[end_idx]->_idx);
+    uint end_lat   = get_latency_for_node(LCA->_nodes[end_idx]);
     double LCA_freq = LCA->_freq;
 #ifndef PRODUCT
     if (trace_opto_pipelining()) {
@@ -1109,7 +1118,7 @@
       tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
     }
 #endif
-    _node_latency->at_put_grow(self->_idx, end_latency);
+    set_latency_for_node(self, end_latency);
     partial_latency_of_defs(self);
   }
 
@@ -1255,7 +1264,7 @@
 } // end ScheduleLate
 
 //------------------------------GlobalCodeMotion-------------------------------
-void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
+void PhaseCFG::global_code_motion() {
   ResourceMark rm;
 
 #ifndef PRODUCT
@@ -1265,21 +1274,22 @@
 #endif
 
   // Initialize the node to block mapping for things on the proj_list
-  for (uint i = 0; i < proj_list.size(); i++) {
-    unmap_node_from_block(proj_list[i]);
+  for (uint i = 0; i < _matcher.number_of_projections(); i++) {
+    unmap_node_from_block(_matcher.get_projection(i));
   }
 
   // Set the basic block for Nodes pinned into blocks
-  Arena *a = Thread::current()->resource_area();
-  VectorSet visited(a);
-  schedule_pinned_nodes( visited );
+  Arena* arena = Thread::current()->resource_area();
+  VectorSet visited(arena);
+  schedule_pinned_nodes(visited);
 
   // Find the earliest Block any instruction can be placed in.  Some
   // instructions are pinned into Blocks.  Unpinned instructions can
   // appear in last block in which all their inputs occur.
   visited.Clear();
-  Node_List stack(a);
-  stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
+  Node_List stack(arena);
+  // Pre-grow the list
+  stack.map((C->unique() >> 1) + 16, NULL);
   if (!schedule_early(visited, stack)) {
     // Bailout without retry
     C->record_method_not_compilable("early schedule failed");
@@ -1287,29 +1297,25 @@
   }
 
   // Build Def-Use edges.
-  proj_list.push(_root);        // Add real root as another root
-  proj_list.pop();
-
   // Compute the latency information (via backwards walk) for all the
   // instructions in the graph
   _node_latency = new GrowableArray<uint>(); // resource_area allocation
 
-  if( C->do_scheduling() )
-    ComputeLatenciesBackwards(visited, stack);
+  if (C->do_scheduling()) {
+    compute_latencies_backwards(visited, stack);
+  }
 
   // Now schedule all codes as LATE as possible.  This is the LCA in the
   // dominator tree of all USES of a value.  Pick the block with the least
   // loop nesting depth that is lowest in the dominator tree.
   // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
   schedule_late(visited, stack);
-  if( C->failing() ) {
+  if (C->failing()) {
     // schedule_late fails only when graph is incorrect.
     assert(!VerifyGraphEdges, "verification should have failed");
     return;
   }
 
-  unique = C->unique();
-
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- Detect implicit null checks ----\n");
@@ -1332,10 +1338,11 @@
     // By reversing the loop direction we get a very minor gain on mpegaudio.
     // Feel free to revert to a forward loop for clarity.
     // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
-    for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
-      Node *proj = matcher._null_check_tests[i  ];
-      Node *val  = matcher._null_check_tests[i+1];
-      get_block_for_node(proj)->implicit_null_check(this, proj, val, allowed_reasons);
+    for (int i = _matcher._null_check_tests.size() - 2; i >= 0; i -= 2) {
+      Node* proj = _matcher._null_check_tests[i];
+      Node* val  = _matcher._null_check_tests[i + 1];
+      Block* block = get_block_for_node(proj);
+      block->implicit_null_check(this, proj, val, allowed_reasons);
       // The implicit_null_check will only perform the transformation
       // if the null branch is truly uncommon, *and* it leads to an
       // uncommon trap.  Combined with the too_many_traps guards
@@ -1352,11 +1359,11 @@
 
   // Schedule locally.  Right now a simple topological sort.
   // Later, do a real latency aware scheduler.
-  uint max_idx = C->unique();
-  GrowableArray<int> ready_cnt(max_idx, max_idx, -1);
+  GrowableArray<int> ready_cnt(C->unique(), C->unique(), -1);
   visited.Clear();
-  for (uint i = 0; i < _num_blocks; i++) {
-    if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    if (!block->schedule_local(this, _matcher, ready_cnt, visited)) {
       if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
         C->record_method_not_compilable("local schedule failed");
       }
@@ -1366,15 +1373,17 @@
 
   // If we inserted any instructions between a Call and his CatchNode,
   // clone the instructions on all paths below the Catch.
-  for (uint i = 0; i < _num_blocks; i++) {
-    _blocks[i]->call_catch_cleanup(this, C);
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    block->call_catch_cleanup(this, C);
   }
 
 #ifndef PRODUCT
   if (trace_opto_pipelining()) {
     tty->print("\n---- After GlobalCodeMotion ----\n");
-    for (uint i = 0; i < _num_blocks; i++) {
-      _blocks[i]->dump();
+    for (uint i = 0; i < number_of_blocks(); i++) {
+      Block* block = get_block(i);
+      block->dump();
     }
   }
 #endif
@@ -1382,10 +1391,29 @@
   _node_latency = (GrowableArray<uint> *)0xdeadbeef;
 }
 
+bool PhaseCFG::do_global_code_motion() {
+
+  build_dominator_tree();
+  if (C->failing()) {
+    return false;
+  }
+
+  NOT_PRODUCT( C->verify_graph_edges(); )
+
+  estimate_block_frequency();
+
+  global_code_motion();
+
+  if (C->failing()) {
+    return false;
+  }
+
+  return true;
+}
 
 //------------------------------Estimate_Block_Frequency-----------------------
 // Estimate block frequencies based on IfNode probabilities.
-void PhaseCFG::Estimate_Block_Frequency() {
+void PhaseCFG::estimate_block_frequency() {
 
   // Force conditional branches leading to uncommon traps to be unlikely,
   // not because we get to the uncommon_trap with less relative frequency,
@@ -1393,7 +1421,7 @@
   // there once.
   if (C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
       Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
@@ -1402,7 +1430,9 @@
     }
     while (worklist.size() > 0) {
       Block* uct = worklist.pop();
-      if (uct == _broot) continue;
+      if (uct == get_root_block()) {
+        continue;
+      }
       for (uint i = 1; i < uct->num_preds(); i++) {
         Block *pb = get_block_for_node(uct->pred(i));
         if (pb->_num_succs == 1) {
@@ -1426,12 +1456,12 @@
   _root_loop->scale_freq();
 
   // Save outmost loop frequency for LRG frequency threshold
-  _outer_loop_freq = _root_loop->outer_loop_freq();
+  _outer_loop_frequency = _root_loop->outer_loop_freq();
 
   // force paths ending at uncommon traps to be infrequent
   if (!C->do_freq_based_layout()) {
     Block_List worklist;
-    Block* root_blk = _blocks[0];
+    Block* root_blk = get_block(0);
     for (uint i = 1; i < root_blk->num_preds(); i++) {
       Block *pb = get_block_for_node(root_blk->pred(i));
       if (pb->has_uncommon_code()) {
@@ -1451,8 +1481,8 @@
   }
 
 #ifdef ASSERT
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* b = get_block(i);
     assert(b->_freq >= MIN_BLOCK_FREQUENCY, "Register Allocator requires meaningful block frequency");
   }
 #endif
@@ -1476,16 +1506,16 @@
 CFGLoop* PhaseCFG::create_loop_tree() {
 
 #ifdef ASSERT
-  assert( _blocks[0] == _broot, "" );
-  for (uint i = 0; i < _num_blocks; i++ ) {
-    Block *b = _blocks[i];
+  assert(get_block(0) == get_root_block(), "first block should be root block");
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
     // Check that _loop field are clear...we could clear them if not.
-    assert(b->_loop == NULL, "clear _loop expected");
+    assert(block->_loop == NULL, "clear _loop expected");
     // Sanity check that the RPO numbering is reflected in the _blocks array.
     // It doesn't have to be for the loop tree to be built, but if it is not,
     // then the blocks have been reordered since dom graph building...which
     // may question the RPO numbering
-    assert(b->_rpo == i, "unexpected reverse post order number");
+    assert(block->_rpo == i, "unexpected reverse post order number");
   }
 #endif
 
@@ -1495,11 +1525,11 @@
   Block_List worklist;
 
   // Assign blocks to loops
-  for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
-    Block *b = _blocks[i];
+  for(uint i = number_of_blocks() - 1; i > 0; i-- ) { // skip Root block
+    Block* block = get_block(i);
 
-    if (b->head()->is_Loop()) {
-      Block* loop_head = b;
+    if (block->head()->is_Loop()) {
+      Block* loop_head = block;
       assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
       Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
       Block* tail = get_block_for_node(tail_n);
@@ -1533,23 +1563,23 @@
 
   // Create a member list for each loop consisting
   // of both blocks and (immediate child) loops.
-  for (uint i = 0; i < _num_blocks; i++) {
-    Block *b = _blocks[i];
-    CFGLoop* lp = b->_loop;
+  for (uint i = 0; i < number_of_blocks(); i++) {
+    Block* block = get_block(i);
+    CFGLoop* lp = block->_loop;
     if (lp == NULL) {
       // Not assigned to a loop. Add it to the method's pseudo loop.
-      b->_loop = root_loop;
+      block->_loop = root_loop;
       lp = root_loop;
     }
-    if (lp == root_loop || b != lp->head()) { // loop heads are already members
-      lp->add_member(b);
+    if (lp == root_loop || block != lp->head()) { // loop heads are already members
+      lp->add_member(block);
     }
     if (lp != root_loop) {
       if (lp->parent() == NULL) {
         // Not a nested loop. Make it a child of the method's pseudo loop.
         root_loop->add_nested_loop(lp);
       }
-      if (b == lp->head()) {
+      if (block == lp->head()) {
         // Add nested loop to member list of parent loop.
         lp->parent()->add_member(lp);
       }
--- a/hotspot/src/share/vm/opto/idealGraphPrinter.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/idealGraphPrinter.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -416,7 +416,7 @@
     if (C->cfg() != NULL) {
       Block* block = C->cfg()->get_block_for_node(node);
       if (block == NULL) {
-        print_prop("block", C->cfg()->_blocks[0]->_pre_order);
+        print_prop("block", C->cfg()->get_block(0)->_pre_order);
       } else {
         print_prop("block", block->_pre_order);
       }
@@ -637,10 +637,10 @@
   if (C->cfg() != NULL) {
     // once we have a CFG there are some nodes that aren't really
     // reachable but are in the CFG so add them here.
-    for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
-      Block *b = C->cfg()->_blocks[i];
-      for (uint s = 0; s < b->_nodes.size(); s++) {
-        nodeStack.push(b->_nodes[s]);
+    for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
+      Block* block = C->cfg()->get_block(i);
+      for (uint s = 0; s < block->_nodes.size(); s++) {
+        nodeStack.push(block->_nodes[s]);
       }
     }
   }
@@ -698,24 +698,24 @@
   tail(EDGES_ELEMENT);
   if (C->cfg() != NULL) {
     head(CONTROL_FLOW_ELEMENT);
-    for (uint i = 0; i < C->cfg()->_blocks.size(); i++) {
-      Block *b = C->cfg()->_blocks[i];
+    for (uint i = 0; i < C->cfg()->number_of_blocks(); i++) {
+      Block* block = C->cfg()->get_block(i);
       begin_head(BLOCK_ELEMENT);
-      print_attr(BLOCK_NAME_PROPERTY, b->_pre_order);
+      print_attr(BLOCK_NAME_PROPERTY, block->_pre_order);
       end_head();
 
       head(SUCCESSORS_ELEMENT);
-      for (uint s = 0; s < b->_num_succs; s++) {
+      for (uint s = 0; s < block->_num_succs; s++) {
         begin_elem(SUCCESSOR_ELEMENT);
-        print_attr(BLOCK_NAME_PROPERTY, b->_succs[s]->_pre_order);
+        print_attr(BLOCK_NAME_PROPERTY, block->_succs[s]->_pre_order);
         end_elem();
       }
       tail(SUCCESSORS_ELEMENT);
 
       head(NODES_ELEMENT);
-      for (uint s = 0; s < b->_nodes.size(); s++) {
+      for (uint s = 0; s < block->_nodes.size(); s++) {
         begin_elem(NODE_ELEMENT);
-        print_attr(NODE_ID_PROPERTY, get_node_id(b->_nodes[s]));
+        print_attr(NODE_ID_PROPERTY, get_node_id(block->_nodes[s]));
         end_elem();
       }
       tail(NODES_ELEMENT);
--- a/hotspot/src/share/vm/opto/ifg.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/ifg.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -37,12 +37,9 @@
 #include "opto/memnode.hpp"
 #include "opto/opcodes.hpp"
 
-//=============================================================================
-//------------------------------IFG--------------------------------------------
 PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
 }
 
-//------------------------------init-------------------------------------------
 void PhaseIFG::init( uint maxlrg ) {
   _maxlrg = maxlrg;
   _yanked = new (_arena) VectorSet(_arena);
@@ -59,7 +56,6 @@
   }
 }
 
-//------------------------------add--------------------------------------------
 // Add edge between vertices a & b.  These are sorted (triangular matrix),
 // then the smaller number is inserted in the larger numbered array.
 int PhaseIFG::add_edge( uint a, uint b ) {
@@ -71,7 +67,6 @@
   return _adjs[a].insert( b );
 }
 
-//------------------------------add_vector-------------------------------------
 // Add an edge between 'a' and everything in the vector.
 void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
   // IFG is triangular, so do the inserts where 'a' < 'b'.
@@ -86,7 +81,6 @@
   }
 }
 
-//------------------------------test-------------------------------------------
 // Is there an edge between a and b?
 int PhaseIFG::test_edge( uint a, uint b ) const {
   // Sort a and b, so that a is larger
@@ -95,7 +89,6 @@
   return _adjs[a].member(b);
 }
 
-//------------------------------SquareUp---------------------------------------
 // Convert triangular matrix to square matrix
 void PhaseIFG::SquareUp() {
   assert( !_is_square, "only on triangular" );
@@ -111,7 +104,6 @@
   _is_square = true;
 }
 
-//------------------------------Compute_Effective_Degree-----------------------
 // Compute effective degree in bulk
 void PhaseIFG::Compute_Effective_Degree() {
   assert( _is_square, "only on square" );
@@ -120,7 +112,6 @@
     lrgs(i).set_degree(effective_degree(i));
 }
 
-//------------------------------test_edge_sq-----------------------------------
 int PhaseIFG::test_edge_sq( uint a, uint b ) const {
   assert( _is_square, "only on square" );
   // Swap, so that 'a' has the lesser count.  Then binary search is on
@@ -130,7 +121,6 @@
   return _adjs[a].member(b);
 }
 
-//------------------------------Union------------------------------------------
 // Union edges of B into A
 void PhaseIFG::Union( uint a, uint b ) {
   assert( _is_square, "only on square" );
@@ -146,7 +136,6 @@
   }
 }
 
-//------------------------------remove_node------------------------------------
 // Yank a Node and all connected edges from the IFG.  Return a
 // list of neighbors (edges) yanked.
 IndexSet *PhaseIFG::remove_node( uint a ) {
@@ -165,7 +154,6 @@
   return neighbors(a);
 }
 
-//------------------------------re_insert--------------------------------------
 // Re-insert a yanked Node.
 void PhaseIFG::re_insert( uint a ) {
   assert( _is_square, "only on square" );
@@ -180,7 +168,6 @@
   }
 }
 
-//------------------------------compute_degree---------------------------------
 // Compute the degree between 2 live ranges.  If both live ranges are
 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
@@ -196,7 +183,6 @@
   return tmp;
 }
 
-//------------------------------effective_degree-------------------------------
 // Compute effective degree for this live range.  If both live ranges are
 // aligned-adjacent powers-of-2 then we use the MAX size.  If either is
 // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
@@ -221,7 +207,6 @@
 
 
 #ifndef PRODUCT
-//------------------------------dump-------------------------------------------
 void PhaseIFG::dump() const {
   tty->print_cr("-- Interference Graph --%s--",
                 _is_square ? "square" : "triangular" );
@@ -260,7 +245,6 @@
   tty->print("\n");
 }
 
-//------------------------------stats------------------------------------------
 void PhaseIFG::stats() const {
   ResourceMark rm;
   int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
@@ -276,7 +260,6 @@
   tty->print_cr("");
 }
 
-//------------------------------verify-----------------------------------------
 void PhaseIFG::verify( const PhaseChaitin *pc ) const {
   // IFG is square, sorted and no need for Find
   for( uint i = 0; i < _maxlrg; i++ ) {
@@ -298,7 +281,6 @@
 }
 #endif
 
-//------------------------------interfere_with_live----------------------------
 // Interfere this register with everything currently live.  Use the RegMasks
 // to trim the set of possible interferences. Return a count of register-only
 // interferences as an estimate of register pressure.
@@ -315,7 +297,6 @@
       _ifg->add_edge( r, l );
 }
 
-//------------------------------build_ifg_virtual------------------------------
 // Actually build the interference graph.  Uses virtual registers only, no
 // physical register masks.  This allows me to be very aggressive when
 // coalescing copies.  Some of this aggressiveness will have to be undone
@@ -325,9 +306,9 @@
 void PhaseChaitin::build_ifg_virtual( ) {
 
   // For all blocks (in any order) do...
-  for( uint i=0; i<_cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    IndexSet *liveout = _live->live(b);
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    IndexSet* liveout = _live->live(block);
 
     // The IFG is built by a single reverse pass over each basic block.
     // Starting with the known live-out set, we remove things that get
@@ -337,8 +318,8 @@
     // The defined value interferes with everything currently live.  The
     // value is then removed from the live-ness set and it's inputs are
     // added to the live-ness set.
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->_nodes[j - 1];
 
       // Get value being defined
       uint r = _lrg_map.live_range_id(n);
@@ -408,7 +389,6 @@
   } // End of forall blocks
 }
 
-//------------------------------count_int_pressure-----------------------------
 uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
   IndexSetIterator elements(liveout);
   uint lidx;
@@ -424,7 +404,6 @@
   return cnt;
 }
 
-//------------------------------count_float_pressure---------------------------
 uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
   IndexSetIterator elements(liveout);
   uint lidx;
@@ -438,7 +417,6 @@
   return cnt;
 }
 
-//------------------------------lower_pressure---------------------------------
 // Adjust register pressure down by 1.  Capture last hi-to-low transition,
 static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
   if (lrg->mask().is_UP() && lrg->mask_size()) {
@@ -460,40 +438,41 @@
   }
 }
 
-//------------------------------build_ifg_physical-----------------------------
 // Build the interference graph using physical registers when available.
 // That is, if 2 live ranges are simultaneously alive but in their acceptable
 // register sets do not overlap, then they do not interfere.
 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
   NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); )
 
-  uint spill_reg = LRG::SPILL_REG;
   uint must_spill = 0;
 
   // For all blocks (in any order) do...
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
     // Clone (rather than smash in place) the liveout info, so it is alive
     // for the "collect_gc_info" phase later.
-    IndexSet liveout(_live->live(b));
-    uint last_inst = b->end_idx();
+    IndexSet liveout(_live->live(block));
+    uint last_inst = block->end_idx();
     // Compute first nonphi node index
     uint first_inst;
-    for( first_inst = 1; first_inst < last_inst; first_inst++ )
-      if( !b->_nodes[first_inst]->is_Phi() )
+    for (first_inst = 1; first_inst < last_inst; first_inst++) {
+      if (!block->_nodes[first_inst]->is_Phi()) {
         break;
+      }
+    }
 
     // Spills could be inserted before CreateEx node which should be
     // first instruction in block after Phis. Move CreateEx up.
-    for( uint insidx = first_inst; insidx < last_inst; insidx++ ) {
-      Node *ex = b->_nodes[insidx];
-      if( ex->is_SpillCopy() ) continue;
-      if( insidx > first_inst && ex->is_Mach() &&
-          ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+    for (uint insidx = first_inst; insidx < last_inst; insidx++) {
+      Node *ex = block->_nodes[insidx];
+      if (ex->is_SpillCopy()) {
+        continue;
+      }
+      if (insidx > first_inst && ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
         // If the CreateEx isn't above all the MachSpillCopies
         // then move it to the top.
-        b->_nodes.remove(insidx);
-        b->_nodes.insert(first_inst, ex);
+        block->_nodes.remove(insidx);
+        block->_nodes.insert(first_inst, ex);
       }
       // Stop once a CreateEx or any other node is found
       break;
@@ -503,12 +482,12 @@
     uint pressure[2], hrp_index[2];
     pressure[0] = pressure[1] = 0;
     hrp_index[0] = hrp_index[1] = last_inst+1;
-    b->_reg_pressure = b->_freg_pressure = 0;
+    block->_reg_pressure = block->_freg_pressure = 0;
     // Liveout things are presumed live for the whole block.  We accumulate
     // 'area' accordingly.  If they get killed in the block, we'll subtract
     // the unused part of the block from the area.
     int inst_count = last_inst - first_inst;
-    double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+    double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
     assert(!(cost < 0.0), "negative spill cost" );
     IndexSetIterator elements(&liveout);
     uint lidx;
@@ -519,13 +498,15 @@
       if (lrg.mask().is_UP() && lrg.mask_size()) {
         if (lrg._is_float || lrg._is_vector) {   // Count float pressure
           pressure[1] += lrg.reg_pressure();
-          if( pressure[1] > b->_freg_pressure )
-            b->_freg_pressure = pressure[1];
+          if (pressure[1] > block->_freg_pressure) {
+            block->_freg_pressure = pressure[1];
+          }
           // Count int pressure, but do not count the SP, flags
-        } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+        } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
           pressure[0] += lrg.reg_pressure();
-          if( pressure[0] > b->_reg_pressure )
-            b->_reg_pressure = pressure[0];
+          if (pressure[0] > block->_reg_pressure) {
+            block->_reg_pressure = pressure[0];
+          }
         }
       }
     }
@@ -541,8 +522,8 @@
     // value is then removed from the live-ness set and it's inputs are added
     // to the live-ness set.
     uint j;
-    for( j = last_inst + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j - 1];
+    for (j = last_inst + 1; j > 1; j--) {
+      Node* n = block->_nodes[j - 1];
 
       // Get value being defined
       uint r = _lrg_map.live_range_id(n);
@@ -551,7 +532,7 @@
       if(r) {
         // A DEF normally costs block frequency; rematerialized values are
         // removed from the DEF sight, so LOWER costs here.
-        lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq;
+        lrgs(r)._cost += n->rematerialize() ? 0 : block->_freq;
 
         // If it is not live, then this instruction is dead.  Probably caused
         // by spilling and rematerialization.  Who cares why, yank this baby.
@@ -560,7 +541,7 @@
           if( !n->is_Proj() ||
               // Could also be a flags-projection of a dead ADD or such.
               (_lrg_map.live_range_id(def) && !liveout.member(_lrg_map.live_range_id(def)))) {
-            b->_nodes.remove(j - 1);
+            block->_nodes.remove(j - 1);
             if (lrgs(r)._def == n) {
               lrgs(r)._def = 0;
             }
@@ -580,21 +561,21 @@
             RegMask itmp = lrgs(r).mask();
             itmp.AND(*Matcher::idealreg2regmask[Op_RegI]);
             int iregs = itmp.Size();
-            if( pressure[0]+iregs > b->_reg_pressure )
-              b->_reg_pressure = pressure[0]+iregs;
-            if( pressure[0]       <= (uint)INTPRESSURE &&
-                pressure[0]+iregs >  (uint)INTPRESSURE ) {
-              hrp_index[0] = j-1;
+            if (pressure[0]+iregs > block->_reg_pressure) {
+              block->_reg_pressure = pressure[0] + iregs;
+            }
+            if (pressure[0] <= (uint)INTPRESSURE && pressure[0] + iregs > (uint)INTPRESSURE) {
+              hrp_index[0] = j - 1;
             }
             // Count the float-only registers
             RegMask ftmp = lrgs(r).mask();
             ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]);
             int fregs = ftmp.Size();
-            if( pressure[1]+fregs > b->_freg_pressure )
-              b->_freg_pressure = pressure[1]+fregs;
-            if( pressure[1]       <= (uint)FLOATPRESSURE &&
-                pressure[1]+fregs >  (uint)FLOATPRESSURE ) {
-              hrp_index[1] = j-1;
+            if (pressure[1] + fregs > block->_freg_pressure) {
+              block->_freg_pressure = pressure[1] + fregs;
+            }
+            if(pressure[1] <= (uint)FLOATPRESSURE && pressure[1]+fregs > (uint)FLOATPRESSURE) {
+              hrp_index[1] = j - 1;
             }
           }
 
@@ -607,7 +588,7 @@
           if( n->is_SpillCopy()
               && lrgs(r).is_singledef()        // MultiDef live range can still split
               && n->outcnt() == 1              // and use must be in this block
-              && _cfg.get_block_for_node(n->unique_out()) == b ) {
+              && _cfg.get_block_for_node(n->unique_out()) == block) {
             // All single-use MachSpillCopy(s) that immediately precede their
             // use must color early.  If a longer live range steals their
             // color, the spill copy will split and may push another spill copy
@@ -617,14 +598,16 @@
             //
 
             Node *single_use = n->unique_out();
-            assert( b->find_node(single_use) >= j, "Use must be later in block");
+            assert(block->find_node(single_use) >= j, "Use must be later in block");
             // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
 
             // Find first non SpillCopy 'm' that follows the current instruction
             // (j - 1) is index for current instruction 'n'
             Node *m = n;
-            for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; }
-            if( m == single_use ) {
+            for (uint i = j; i <= last_inst && m->is_SpillCopy(); ++i) {
+              m = block->_nodes[i];
+            }
+            if (m == single_use) {
               lrgs(r)._area = 0.0;
             }
           }
@@ -633,7 +616,7 @@
           if( liveout.remove(r) ) {
             // Adjust register pressure.
             // Capture last hi-to-lo pressure transition
-            lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index );
+            lower_pressure(&lrgs(r), j - 1, block, pressure, hrp_index);
             assert( pressure[0] == count_int_pressure  (&liveout), "" );
             assert( pressure[1] == count_float_pressure(&liveout), "" );
           }
@@ -646,7 +629,7 @@
             if (liveout.remove(x)) {
               lrgs(x)._area -= cost;
               // Adjust register pressure.
-              lower_pressure(&lrgs(x), j-1, b, pressure, hrp_index);
+              lower_pressure(&lrgs(x), j - 1, block, pressure, hrp_index);
               assert( pressure[0] == count_int_pressure  (&liveout), "" );
               assert( pressure[1] == count_float_pressure(&liveout), "" );
             }
@@ -718,7 +701,7 @@
 
       // Area remaining in the block
       inst_count--;
-      cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
+      cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
 
       // Make all inputs live
       if( !n->is_Phi() ) {      // Phi function uses come from prior block
@@ -743,7 +726,7 @@
           if (k < debug_start) {
             // A USE costs twice block frequency (once for the Load, once
             // for a Load-delay).  Rematerialized uses only cost once.
-            lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq));
+            lrg._cost += (def->rematerialize() ? block->_freq : (block->_freq + block->_freq));
           }
           // It is live now
           if (liveout.insert(x)) {
@@ -753,12 +736,14 @@
             if (lrg.mask().is_UP() && lrg.mask_size()) {
               if (lrg._is_float || lrg._is_vector) {
                 pressure[1] += lrg.reg_pressure();
-                if( pressure[1] > b->_freg_pressure )
-                  b->_freg_pressure = pressure[1];
+                if (pressure[1] > block->_freg_pressure)  {
+                  block->_freg_pressure = pressure[1];
+                }
               } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
                 pressure[0] += lrg.reg_pressure();
-                if( pressure[0] > b->_reg_pressure )
-                  b->_reg_pressure = pressure[0];
+                if (pressure[0] > block->_reg_pressure) {
+                  block->_reg_pressure = pressure[0];
+                }
               }
             }
             assert( pressure[0] == count_int_pressure  (&liveout), "" );
@@ -772,44 +757,47 @@
     // If we run off the top of the block with high pressure and
     // never see a hi-to-low pressure transition, just record that
     // the whole block is high pressure.
-    if( pressure[0] > (uint)INTPRESSURE   ) {
+    if (pressure[0] > (uint)INTPRESSURE) {
       hrp_index[0] = 0;
-      if( pressure[0] > b->_reg_pressure )
-        b->_reg_pressure = pressure[0];
+      if (pressure[0] > block->_reg_pressure) {
+        block->_reg_pressure = pressure[0];
+      }
     }
-    if( pressure[1] > (uint)FLOATPRESSURE ) {
+    if (pressure[1] > (uint)FLOATPRESSURE) {
       hrp_index[1] = 0;
-      if( pressure[1] > b->_freg_pressure )
-        b->_freg_pressure = pressure[1];
+      if (pressure[1] > block->_freg_pressure) {
+        block->_freg_pressure = pressure[1];
+      }
     }
 
     // Compute high pressure indice; avoid landing in the middle of projnodes
     j = hrp_index[0];
-    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
-      Node *cur = b->_nodes[j];
-      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+    if (j < block->_nodes.size() && j < block->end_idx() + 1) {
+      Node* cur = block->_nodes[j];
+      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
         j--;
-        cur = b->_nodes[j];
+        cur = block->_nodes[j];
       }
     }
-    b->_ihrp_index = j;
+    block->_ihrp_index = j;
     j = hrp_index[1];
-    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
-      Node *cur = b->_nodes[j];
-      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+    if (j < block->_nodes.size() && j < block->end_idx() + 1) {
+      Node* cur = block->_nodes[j];
+      while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
         j--;
-        cur = b->_nodes[j];
+        cur = block->_nodes[j];
       }
     }
-    b->_fhrp_index = j;
+    block->_fhrp_index = j;
 
 #ifndef PRODUCT
     // Gather Register Pressure Statistics
     if( PrintOptoStatistics ) {
-      if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE )
+      if (block->_reg_pressure > (uint)INTPRESSURE || block->_freg_pressure > (uint)FLOATPRESSURE) {
         _high_pressure++;
-      else
+      } else {
         _low_pressure++;
+      }
     }
 #endif
   } // End of for all blocks
--- a/hotspot/src/share/vm/opto/lcm.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/lcm.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -501,7 +501,7 @@
       n_choice = 1;
     }
 
-    uint n_latency = cfg->_node_latency->at_grow(n->_idx);
+    uint n_latency = cfg->get_latency_for_node(n);
     uint n_score   = n->req();   // Many inputs get high score to break ties
 
     // Keep best latency found
@@ -797,7 +797,7 @@
         Node     *n = _nodes[j];
         int     idx = n->_idx;
         tty->print("#   ready cnt:%3d  ", ready_cnt.at(idx));
-        tty->print("latency:%3d  ", cfg->_node_latency->at_grow(idx));
+        tty->print("latency:%3d  ", cfg->get_latency_for_node(n));
         tty->print("%4d: %s\n", idx, n->Name());
       }
     }
@@ -825,7 +825,7 @@
 #ifndef PRODUCT
     if (cfg->trace_opto_pipelining()) {
       tty->print("#    select %d: %s", n->_idx, n->Name());
-      tty->print(", latency:%d", cfg->_node_latency->at_grow(n->_idx));
+      tty->print(", latency:%d", cfg->get_latency_for_node(n));
       n->dump();
       if (Verbose) {
         tty->print("#   ready list:");
--- a/hotspot/src/share/vm/opto/library_call.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -213,6 +213,7 @@
   void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
   bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
   bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
+  static bool klass_needs_init_guard(Node* kls);
   bool inline_unsafe_allocate();
   bool inline_unsafe_copyMemory();
   bool inline_native_currentThread();
@@ -2892,8 +2893,21 @@
   }
 }
 
+bool LibraryCallKit::klass_needs_init_guard(Node* kls) {
+  if (!kls->is_Con()) {
+    return true;
+  }
+  const TypeKlassPtr* klsptr = kls->bottom_type()->isa_klassptr();
+  if (klsptr == NULL) {
+    return true;
+  }
+  ciInstanceKlass* ik = klsptr->klass()->as_instance_klass();
+  // don't need a guard for a klass that is already initialized
+  return !ik->is_initialized();
+}
+
 //----------------------------inline_unsafe_allocate---------------------------
-// public native Object sun.mics.Unsafe.allocateInstance(Class<?> cls);
+// public native Object sun.misc.Unsafe.allocateInstance(Class<?> cls);
 bool LibraryCallKit::inline_unsafe_allocate() {
   if (callee()->is_static())  return false;  // caller must have the capability!
 
@@ -2905,16 +2919,19 @@
   kls = null_check(kls);
   if (stopped())  return true;  // argument was like int.class
 
-  // Note:  The argument might still be an illegal value like
-  // Serializable.class or Object[].class.   The runtime will handle it.
-  // But we must make an explicit check for initialization.
-  Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset()));
-  // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
-  // can generate code to load it as unsigned byte.
-  Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
-  Node* bits = intcon(InstanceKlass::fully_initialized);
-  Node* test = _gvn.transform(new (C) SubINode(inst, bits));
-  // The 'test' is non-zero if we need to take a slow path.
+  Node* test = NULL;
+  if (LibraryCallKit::klass_needs_init_guard(kls)) {
+    // Note:  The argument might still be an illegal value like
+    // Serializable.class or Object[].class.   The runtime will handle it.
+    // But we must make an explicit check for initialization.
+    Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset()));
+    // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler
+    // can generate code to load it as unsigned byte.
+    Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN);
+    Node* bits = intcon(InstanceKlass::fully_initialized);
+    test = _gvn.transform(new (C) SubINode(inst, bits));
+    // The 'test' is non-zero if we need to take a slow path.
+  }
 
   Node* obj = new_instance(kls, test);
   set_result(obj);
--- a/hotspot/src/share/vm/opto/live.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/live.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -30,9 +30,6 @@
 #include "opto/machnode.hpp"
 
 
-
-//=============================================================================
-//------------------------------PhaseLive--------------------------------------
 // Compute live-in/live-out.  We use a totally incremental algorithm.  The LIVE
 // problem is monotonic.  The steady-state solution looks like this: pull a
 // block from the worklist.  It has a set of delta's - values which are newly
@@ -53,9 +50,9 @@
 
   // Init the sparse live arrays.  This data is live on exit from here!
   // The _live info is the live-out info.
-  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
+  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet) * _cfg.number_of_blocks());
   uint i;
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
     _live[i].initialize(_maxlrg);
   }
 
@@ -65,14 +62,14 @@
   // Does the memory used by _defs and _deltas get reclaimed?  Does it matter?  TT
 
   // Array of values defined locally in blocks
-  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg.number_of_blocks());
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
     _defs[i].initialize(_maxlrg);
   }
 
   // Array of delta-set pointers, indexed by block pre_order-1.
-  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
-  memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);
+  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks());
+  memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks());
 
   _free_IndexSet = NULL;
 
@@ -80,31 +77,32 @@
   VectorSet first_pass(Thread::current()->resource_area());
 
   // Outer loop: must compute local live-in sets and push into predecessors.
-  uint iters = _cfg._num_blocks;        // stat counters
-  for( uint j=_cfg._num_blocks; j>0; j-- ) {
-    Block *b = _cfg._blocks[j-1];
+  for (uint j = _cfg.number_of_blocks(); j > 0; j--) {
+    Block* block = _cfg.get_block(j - 1);
 
     // Compute the local live-in set.  Start with any new live-out bits.
-    IndexSet *use = getset( b );
-    IndexSet *def = &_defs[b->_pre_order-1];
+    IndexSet* use = getset(block);
+    IndexSet* def = &_defs[block->_pre_order-1];
     DEBUG_ONLY(IndexSet *def_outside = getfreeset();)
     uint i;
-    for( i=b->_nodes.size(); i>1; i-- ) {
-      Node *n = b->_nodes[i-1];
-      if( n->is_Phi() ) break;
+    for (i = block->_nodes.size(); i > 1; i--) {
+      Node* n = block->_nodes[i-1];
+      if (n->is_Phi()) {
+        break;
+      }
 
       uint r = _names[n->_idx];
       assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
       def->insert( r );
       use->remove( r );
       uint cnt = n->req();
-      for( uint k=1; k<cnt; k++ ) {
+      for (uint k = 1; k < cnt; k++) {
         Node *nk = n->in(k);
         uint nkidx = nk->_idx;
-        if (_cfg.get_block_for_node(nk) != b) {
+        if (_cfg.get_block_for_node(nk) != block) {
           uint u = _names[nkidx];
-          use->insert( u );
-          DEBUG_ONLY(def_outside->insert( u );)
+          use->insert(u);
+          DEBUG_ONLY(def_outside->insert(u);)
         }
       }
     }
@@ -113,41 +111,38 @@
     _free_IndexSet = def_outside;     // Drop onto free list
 #endif
     // Remove anything defined by Phis and the block start instruction
-    for( uint k=i; k>0; k-- ) {
-      uint r = _names[b->_nodes[k-1]->_idx];
-      def->insert( r );
-      use->remove( r );
+    for (uint k = i; k > 0; k--) {
+      uint r = _names[block->_nodes[k - 1]->_idx];
+      def->insert(r);
+      use->remove(r);
     }
 
     // Push these live-in things to predecessors
-    for( uint l=1; l<b->num_preds(); l++ ) {
-      Block *p = _cfg.get_block_for_node(b->pred(l));
-      add_liveout( p, use, first_pass );
+    for (uint l = 1; l < block->num_preds(); l++) {
+      Block* p = _cfg.get_block_for_node(block->pred(l));
+      add_liveout(p, use, first_pass);
 
       // PhiNode uses go in the live-out set of prior blocks.
-      for( uint k=i; k>0; k-- )
-        add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
+      for (uint k = i; k > 0; k--) {
+        add_liveout(p, _names[block->_nodes[k-1]->in(l)->_idx], first_pass);
+      }
     }
-    freeset( b );
-    first_pass.set(b->_pre_order);
+    freeset(block);
+    first_pass.set(block->_pre_order);
 
     // Inner loop: blocks that picked up new live-out values to be propagated
-    while( _worklist->size() ) {
-        // !!!!!
-// #ifdef ASSERT
-      iters++;
-// #endif
-      Block *b = _worklist->pop();
-      IndexSet *delta = getset(b);
+    while (_worklist->size()) {
+      Block* block = _worklist->pop();
+      IndexSet *delta = getset(block);
       assert( delta->count(), "missing delta set" );
 
       // Add new-live-in to predecessors live-out sets
-      for (uint l = 1; l < b->num_preds(); l++) {
-        Block* block = _cfg.get_block_for_node(b->pred(l));
-        add_liveout(block, delta, first_pass);
+      for (uint l = 1; l < block->num_preds(); l++) {
+        Block* predecessor = _cfg.get_block_for_node(block->pred(l));
+        add_liveout(predecessor, delta, first_pass);
       }
 
-      freeset(b);
+      freeset(block);
     } // End of while-worklist-not-empty
 
   } // End of for-all-blocks-outer-loop
@@ -155,7 +150,7 @@
   // We explicitly clear all of the IndexSets which we are about to release.
   // This allows us to recycle their internal memory into IndexSet's free list.
 
-  for( i=0; i<_cfg._num_blocks; i++ ) {
+  for (i = 0; i < _cfg.number_of_blocks(); i++) {
     _defs[i].clear();
     if (_deltas[i]) {
       // Is this always true?
@@ -171,13 +166,11 @@
 
 }
 
-//------------------------------stats------------------------------------------
 #ifndef PRODUCT
 void PhaseLive::stats(uint iters) const {
 }
 #endif
 
-//------------------------------getset-----------------------------------------
 // Get an IndexSet for a block.  Return existing one, if any.  Make a new
 // empty one if a prior one does not exist.
 IndexSet *PhaseLive::getset( Block *p ) {
@@ -188,7 +181,6 @@
   return delta;                 // Return set of new live-out items
 }
 
-//------------------------------getfreeset-------------------------------------
 // Pull from free list, or allocate.  Internal allocation on the returned set
 // is always from thread local storage.
 IndexSet *PhaseLive::getfreeset( ) {
@@ -207,7 +199,6 @@
   return f;
 }
 
-//------------------------------freeset----------------------------------------
 // Free an IndexSet from a block.
 void PhaseLive::freeset( const Block *p ) {
   IndexSet *f = _deltas[p->_pre_order-1];
@@ -216,7 +207,6 @@
   _deltas[p->_pre_order-1] = NULL;
 }
 
-//------------------------------add_liveout------------------------------------
 // Add a live-out value to a given blocks live-out set.  If it is new, then
 // also add it to the delta set and stick the block on the worklist.
 void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
@@ -233,8 +223,6 @@
   }
 }
 
-
-//------------------------------add_liveout------------------------------------
 // Add a vector of live-out values to a given blocks live-out set.
 void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
   IndexSet *live = &_live[p->_pre_order-1];
@@ -262,7 +250,6 @@
 }
 
 #ifndef PRODUCT
-//------------------------------dump-------------------------------------------
 // Dump the live-out set for a block
 void PhaseLive::dump( const Block *b ) const {
   tty->print("Block %d: ",b->_pre_order);
@@ -275,18 +262,19 @@
   tty->print("\n");
 }
 
-//------------------------------verify_base_ptrs-------------------------------
 // Verify that base pointers and derived pointers are still sane.
 void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
 #ifdef ASSERT
   Unique_Node_List worklist(a);
-  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
-    Block *b = _cfg._blocks[i];
-    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
-      Node *n = b->_nodes[j-1];
-      if( n->is_Phi() ) break;
+  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+    Block* block = _cfg.get_block(i);
+    for (uint j = block->end_idx() + 1; j > 1; j--) {
+      Node* n = block->_nodes[j-1];
+      if (n->is_Phi()) {
+        break;
+      }
       // Found a safepoint?
-      if( n->is_MachSafePoint() ) {
+      if (n->is_MachSafePoint()) {
         MachSafePointNode *sfpt = n->as_MachSafePoint();
         JVMState* jvms = sfpt->jvms();
         if (jvms != NULL) {
@@ -358,7 +346,6 @@
 #endif
 }
 
-//------------------------------verify-------------------------------------
 // Verify that graphs and base pointers are still sane.
 void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const {
 #ifdef ASSERT
--- a/hotspot/src/share/vm/opto/matcher.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/matcher.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -67,8 +67,8 @@
 const uint Matcher::_end_rematerialize   = _END_REMATERIALIZE;
 
 //---------------------------Matcher-------------------------------------------
-Matcher::Matcher( Node_List &proj_list ) :
-  PhaseTransform( Phase::Ins_Select ),
+Matcher::Matcher()
+: PhaseTransform( Phase::Ins_Select ),
 #ifdef ASSERT
   _old2new_map(C->comp_arena()),
   _new2old_map(C->comp_arena()),
@@ -78,7 +78,7 @@
   _swallowed(swallowed),
   _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
   _end_inst_chain_rule(_END_INST_CHAIN_RULE),
-  _must_clone(must_clone), _proj_list(proj_list),
+  _must_clone(must_clone),
   _register_save_policy(register_save_policy),
   _c_reg_save_policy(c_reg_save_policy),
   _register_save_type(register_save_type),
@@ -1304,8 +1304,9 @@
       for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
         proj->_rout.Insert(OptoReg::Name(i));
     }
-    if( proj->_rout.is_NotEmpty() )
-      _proj_list.push(proj);
+    if (proj->_rout.is_NotEmpty()) {
+      push_projection(proj);
+    }
   }
   // Transfer the safepoint information from the call to the mcall
   // Move the JVMState list
@@ -1685,14 +1686,15 @@
   }
 
   // If the _leaf is an AddP, insert the base edge
-  if( leaf->is_AddP() )
+  if (leaf->is_AddP()) {
     mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
+  }
 
-  uint num_proj = _proj_list.size();
+  uint number_of_projections_prior = number_of_projections();
 
   // Perform any 1-to-many expansions required
-  MachNode *ex = mach->Expand(s,_proj_list, mem);
-  if( ex != mach ) {
+  MachNode *ex = mach->Expand(s, _projection_list, mem);
+  if (ex != mach) {
     assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
     if( ex->in(1)->is_Con() )
       ex->in(1)->set_req(0, C->root());
@@ -1713,7 +1715,7 @@
   // generated belatedly during spill code generation.
   if (_allocation_started) {
     guarantee(ex == mach, "no expand rules during spill generation");
-    guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
+    guarantee(number_of_projections_prior == number_of_projections(), "no allocation during spill generation");
   }
 
   if (leaf->is_Con() || leaf->is_DecodeNarrowPtr()) {
--- a/hotspot/src/share/vm/opto/matcher.hpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/matcher.hpp	Wed Jul 05 19:08:56 2017 +0200
@@ -88,7 +88,7 @@
 
   Node *transform( Node *dummy );
 
-  Node_List &_proj_list;        // For Machine nodes killing many values
+  Node_List _projection_list;        // For Machine nodes killing many values
 
   Node_Array _shared_nodes;
 
@@ -183,10 +183,30 @@
   void collect_null_checks( Node *proj, Node *orig_proj );
   void validate_null_checks( );
 
-  Matcher( Node_List &proj_list );
+  Matcher();
+
+  // Get a projection node at position pos
+  Node* get_projection(uint pos) {
+    return _projection_list[pos];
+  }
+
+  // Push a projection node onto the projection list
+  void push_projection(Node* node) {
+    _projection_list.push(node);
+  }
+
+  Node* pop_projection() {
+    return _projection_list.pop();
+  }
+
+  // Number of nodes in the projection list
+  uint number_of_projections() const {
+    return _projection_list.size();
+  }
 
   // Select instructions for entire method
-  void  match( );
+  void match();
+
   // Helper for match
   OptoReg::Name warp_incoming_stk_arg( VMReg reg );
 
--- a/hotspot/src/share/vm/opto/output.cpp	Mon Aug 26 17:36:10 2013 -0700
+++ b/hotspot/src/share/vm/opto/output.cpp	Wed Jul 05 19:08:56 2017 +0200
@@ -54,11 +54,10 @@
 extern int emit_exception_handler(CodeBuffer &cbuf);
 extern int emit_deopt_handler(CodeBuffer &cbuf);
 
-//------------------------------Output-----------------------------------------
 // Convert Nodes to instruction bits and pass off to the VM
 void Compile::Output() {
   // RootNode goes
-  assert( _cfg->_broot->_nodes.size() == 0, "" );
+  assert( _cfg->get_root_block()->_nodes.size() == 0, "" );
 
   // The number of new nodes (mostly MachNop) is proportional to
   // the number of java calls and inner loops which are aligned.
@@ -68,8 +67,8 @@
     return;
   }
   // Make sure I can find the Start Node
-  Block *entry = _cfg->_blocks[1];
-  Block *broot = _cfg->_broot;
+  Block *entry = _cfg->get_block(1);
+  Block *broot = _cfg->get_root_block();
 
   const StartNode *start = entry->_nodes[0]->as_Start();
 
@@ -109,40 +108,44 @@
   }
 
   // Insert epilogs before every return
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
-    if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
-      Node *m = b->end();
-      if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
-        MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
-        b->add_inst( epilog );
-        _cfg->map_node_to_block(epilog, b);
+  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
+    Block* block = _cfg->get_block(i);
+    if (!block->is_connector() && block->non_connector_successor(0) == _cfg->get_root_block()) { // Found a program exit point?
+      Node* m = block->end();
+      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt) {
+        MachEpilogNode* epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
+        block->add_inst(epilog);
+        _cfg->map_node_to_block(epilog, block);
       }
     }
   }
 
 # ifdef ENABLE_ZAP_DEAD_LOCALS
-  if ( ZapDeadCompiledLocals )  Insert_zap_nodes();
+  if (ZapDeadCompiledLocals) {
+    Insert_zap_nodes();
+  }
 # endif
 
-  uint* blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
-  blk_starts[0]    = 0;
+  uint* blk_starts = NEW_RESOURCE_ARRAY(uint, _cfg->number_of_blocks() + 1);
+  blk_starts[0] = 0;
 
   // Initialize code buffer and process short branches.
   CodeBuffer* cb = init_buffer(blk_starts);
 
-  if (cb == NULL || failing())  return;
+  if (cb == NULL || failing()) {
+    return;
+  }
 
   ScheduleAndBundle();
 
 #ifndef PRODUCT
   if (trace_opto_output()) {
     tty->print("\n---- After ScheduleAndBundle ----\n");
-    for (uint i = 0; i < _cfg->_num_blocks; i++) {
+    for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
       tty->print("\nBB#%03d:\n", i);
-      Block *bb = _cfg->_blocks[i];
-      for (uint j = 0; j < bb->_nodes.size(); j++) {
-        Node *n = bb->_nodes[j];
+      Block* block = _cfg->get_block(i);
+      for (uint j = 0; j < block->_nodes.size(); j++) {
+        Node* n = block->_nodes[j];
         OptoReg::Name reg = _regalloc->get_reg_first(n);
         tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
         n->dump();
@@ -151,11 +154,15 @@
   }
 #endif
 
-  if (failing())  return;
+  if (failing()) {
+    return;
+  }
 
   BuildOopMaps();
 
-  if (failing())  return;
+  if (failing())  {
+    return;
+  }
 
   fill_buffer(cb, blk_starts);
 }
@@ -217,8 +224,8 @@
     return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
 
   // Insert call to zap runtime stub before every node with an oop map
-  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
-    Block *b = _cfg->_blocks[i];
+  for( uint i=0; i<_cfg->number_of_blocks(); i++ ) {
+    Block *b = _cfg->get_block(i);
     for ( uint j = 0;  j < b->_nodes.size();  ++j ) {
       Node *n = b->_nodes[j];
 
@@ -275,7 +282,6 @@
   return _matcher->match_sfpt(ideal_node);
 }
 
-//------------------------------is_node_getting_a_safepoint--------------------
 bool Compile::is_node_getting_a_safepoint( Node* n) {
   // This code duplicates the logic prior to the call of add_safepoint
   // below in this file.
@@ -285,7 +291,6 @@
 
 # endif // ENABLE_ZAP_DEAD_LOCALS
 
-//------------------------------compute_loop_first_inst_sizes------------------
 // Compute the size of first NumberOfLoopInstrToAlign instructions at the top
 // of a loop. When aligning a loop we need to provide enough instructions
 // in cpu's fetch buffer to feed decoders. The loop alignment could be
@@ -302,42 +307,39 @@
   // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
   // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
   // equal to 11 bytes which is the largest address NOP instruction.
-  if( MaxLoopPad < OptoLoopAlignment-1 ) {
-    uint last_block = _cfg->_num_blocks-1;
-    for( uint i=1; i <= last_block; i++ ) {
-      Block *b = _cfg->_blocks[i];
+  if (MaxLoopPad < OptoLoopAlignment - 1) {
+    uint last_block = _cfg->number_of_blocks() - 1;
+    for (uint i = 1; i <= last_block; i++) {
+      Block* block = _cfg->get_block(i);
       // Check the first loop's block which requires an alignment.
-      if( b->loop_alignment() > (uint)relocInfo::addr_unit() ) {
+      if (block->loop_alignment() > (uint)relocInfo::addr_unit()) {
         uint sum_size = 0;
         uint inst_cnt = NumberOfLoopInstrToAlign;
-        inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
+        inst_cnt = block->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
 
         // Check subsequent fallthrough blocks if the loop's first
         // block(s) does not have enough instructions.
-        Block *nb = b;
-        while( inst_cnt > 0 &&
-               i < last_block &&
-               !_cfg->_blocks[i+1]->has_loop_alignment() &&
-               !nb->has_successor(b) ) {
+        Block *nb = block;
+        while(inst_cnt > 0 &&
+              i < last_block &&
+              !_cfg->get_block(i + 1)->has_loop_alignment() &&
+              !nb->has_successor(block)) {
           i++;
-          nb = _cfg->_blocks[i];
+          nb = _cfg->get_block(i);
           inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt, _regalloc);
         } // while( inst_cnt > 0 && i < last_block  )
 
-        b->set_first_inst_size(sum_size);
+        block->set_first_inst_size(sum_size);
       } // f( b->head()->is_Loop() )
     } // for( i <= last_block )
   } // if( MaxLoopPad < OptoLoopAlignment-1 )
 }
 
-//----------------------shorten_branches---------------------------------------
 // The architecture description provides short branch variants for some long
 // branch instructions. Replace eligible long branches with short branches.
 void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size, int& stub_size) {
-
-  // ------------------
   // Compute size of each block, method size, and relocation information size