changeset 10564:5322bfdc760a

Merge
author jwilhelm
date Wed, 09 Mar 2016 14:18:12 +0100
parents f81ea6b38171 797e6aac6d53
children 572d669d17cd 80706cc25494
files src/share/vm/code/codeCache.cpp src/share/vm/code/codeCache.hpp src/share/vm/code/nmethod.cpp src/share/vm/jvmci/commandLineFlagConstraintsJVMCI.cpp src/share/vm/jvmci/commandLineFlagConstraintsJVMCI.hpp src/share/vm/runtime/arguments.cpp test/TEST.groups
diffstat 171 files changed, 7481 insertions(+), 2689 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Wed Mar 09 14:54:18 2016 +0100
+++ b/.hgtags	Wed Mar 09 14:18:12 2016 +0100
@@ -510,3 +510,4 @@
 266fa9bb5297bf02cb2a7b038b10a109817d2b48 jdk-9+105
 7232de4c17c37f60aecec4f3191090bd3d41d334 jdk-9+106
 c5146d4da417f76edfc43097d2e2ced042a65b4e jdk-9+107
+934f6793f5f7dca44f69b4559d525fa64b31840d jdk-9+108
--- a/make/test/JtregNative.gmk	Wed Mar 09 14:54:18 2016 +0100
+++ b/make/test/JtregNative.gmk	Wed Mar 09 14:18:12 2016 +0100
@@ -48,6 +48,7 @@
     $(HOTSPOT_TOPDIR)/test/runtime/SameObject \
     $(HOTSPOT_TOPDIR)/test/compiler/floatingpoint/ \
     $(HOTSPOT_TOPDIR)/test/compiler/calls \
+    $(HOTSPOT_TOPDIR)/test/compiler/native \
     #
 
 # Add conditional directories here when needed.
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Mar 09 14:18:12 2016 +0100
@@ -1041,10 +1041,8 @@
   bool is_card_mark_membar(const MemBarNode *barrier);
   bool is_CAS(int opcode);
 
-  MemBarNode *leading_to_normal(MemBarNode *leading);
-  MemBarNode *normal_to_leading(const MemBarNode *barrier);
-  MemBarNode *card_mark_to_trailing(const MemBarNode *barrier);
-  MemBarNode *trailing_to_card_mark(const MemBarNode *trailing);
+  MemBarNode *leading_to_trailing(MemBarNode *leading);
+  MemBarNode *card_mark_to_leading(const MemBarNode *barrier);
   MemBarNode *trailing_to_leading(const MemBarNode *trailing);
 
   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
@@ -1418,23 +1416,28 @@
   // leading MemBarRelease and a trailing MemBarVolatile as follows
   //
   //   MemBarRelease
-  //  {      ||      } -- optional
+  //  {    ||        } -- optional
   //  {MemBarCPUOrder}
-  //         ||     \\
-  //         ||     StoreX[mo_release]
-  //         | \     /
-  //         | MergeMem
-  //         | /
+  //       ||       \\
+  //       ||     StoreX[mo_release]
+  //       | \ Bot    / ???
+  //       | MergeMem
+  //       | /
   //   MemBarVolatile
   //
   // where
   //  || and \\ represent Ctl and Mem feeds via Proj nodes
   //  | \ and / indicate further routing of the Ctl and Mem feeds
   //
-  // this is the graph we see for non-object stores. however, for a
-  // volatile Object store (StoreN/P) we may see other nodes below the
-  // leading membar because of the need for a GC pre- or post-write
-  // barrier.
+  // Note that the memory feed from the CPUOrder membar to the
+  // MergeMem node is an AliasIdxBot slice while the feed from the
+  // StoreX is for a slice determined by the type of value being
+  // written.
+  //
+  // the diagram above shows the graph we see for non-object stores.
+  // for a volatile Object store (StoreN/P) we may see other nodes
+  // below the leading membar because of the need for a GC pre- or
+  // post-write barrier.
   //
   // with most GC configurations we with see this simple variant which
   // includes a post-write barrier card mark.
@@ -1442,7 +1445,7 @@
   //   MemBarRelease______________________________
   //         ||    \\               Ctl \        \\
   //         ||    StoreN/P[mo_release] CastP2X  StoreB/CM
-  //         | \     /                       . . .  /
+  //         | \ Bot  / oop                 . . .  /
   //         | MergeMem
   //         | /
   //         ||      /
@@ -1452,152 +1455,142 @@
   // the object address to an int used to compute the card offset) and
   // Ctl+Mem to a StoreB node (which does the actual card mark).
   //
-  // n.b. a StoreCM node will only appear in this configuration when
-  // using CMS. StoreCM differs from a normal card mark write (StoreB)
-  // because it implies a requirement to order visibility of the card
-  // mark (StoreCM) relative to the object put (StoreP/N) using a
-  // StoreStore memory barrier (arguably this ought to be represented
-  // explicitly in the ideal graph but that is not how it works). This
-  // ordering is required for both non-volatile and volatile
-  // puts. Normally that means we need to translate a StoreCM using
-  // the sequence
+  // n.b. a StoreCM node is only ever used when CMS (with or without
+  // CondCardMark) or G1 is configured. This abstract instruction
+  // differs from a normal card mark write (StoreB) because it implies
+  // a requirement to order visibility of the card mark (StoreCM)
+  // after that of the object put (StoreP/N) using a StoreStore memory
+  // barrier. Note that this is /not/ a requirement to order the
+  // instructions in the generated code (that is already guaranteed by
+  // the order of memory dependencies). Rather it is a requirement to
+  // ensure visibility order which only applies on architectures like
+  // AArch64 which do not implement TSO. This ordering is required for
+  // both non-volatile and volatile puts.
+  //
+  // That implies that we need to translate a StoreCM using the
+  // sequence
   //
   //   dmb ishst
   //   stlrb
   //
-  // However, in the case of a volatile put if we can recognise this
-  // configuration and plant an stlr for the object write then we can
-  // omit the dmb and just plant an strb since visibility of the stlr
-  // is ordered before visibility of subsequent stores. StoreCM nodes
-  // also arise when using G1 or using CMS with conditional card
-  // marking. In these cases (as we shall see) we don't need to insert
-  // the dmb when translating StoreCM because there is already an
-  // intervening StoreLoad barrier between it and the StoreP/N.
-  //
-  // It is also possible to perform the card mark conditionally on it
-  // currently being unmarked in which case the volatile put graph
-  // will look slightly different
+  // This dmb cannot be omitted even when the associated StoreX or
+  // CompareAndSwapX is implemented using stlr. However, as described
+  // below there are circumstances where a specific GC configuration
+  // requires a stronger barrier in which case it can be omitted.
+  // 
+  // With the Serial or Parallel GC using +CondCardMark the card mark
+  // is performed conditionally on it currently being unmarked in
+  // which case the volatile put graph looks slightly different
   //
   //   MemBarRelease____________________________________________
   //         ||    \\               Ctl \     Ctl \     \\  Mem \
   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
-  //         | \     /                              \            |
+  //         | \ Bot / oop                          \            |
   //         | MergeMem                            . . .      StoreB
   //         | /                                                /
   //         ||     /
   //   MemBarVolatile
   //
-  // It is worth noting at this stage that both the above
+  // It is worth noting at this stage that all the above
   // configurations can be uniquely identified by checking that the
   // memory flow includes the following subgraph:
   //
   //   MemBarRelease
   //  {MemBarCPUOrder}
-  //          |  \      . . .
-  //          |  StoreX[mo_release]  . . .
-  //          |   /
-  //         MergeMem
-  //          |
+  //      |  \      . . .
+  //      |  StoreX[mo_release]  . . .
+  //  Bot |   / oop
+  //     MergeMem
+  //      |
   //   MemBarVolatile
   //
-  // This is referred to as a *normal* subgraph. It can easily be
-  // detected starting from any candidate MemBarRelease,
-  // StoreX[mo_release] or MemBarVolatile.
-  //
-  // A simple variation on this normal case occurs for an unsafe CAS
-  // operation. The basic graph for a non-object CAS is
+  // This is referred to as a *normal* volatile store subgraph. It can
+  // easily be detected starting from any candidate MemBarRelease,
+  // StoreX[mo_release] or MemBarVolatile node.
+  //
+  // A small variation on this normal case occurs for an unsafe CAS
+  // operation. The basic memory flow subgraph for a non-object CAS is
+  // as follows
   //
   //   MemBarRelease
   //         ||
   //   MemBarCPUOrder
-  //         ||     \\   . . .
-  //         ||     CompareAndSwapX
-  //         ||       |
-  //         ||     SCMemProj
-  //         | \     /
-  //         | MergeMem
-  //         | /
+  //          |     \\   . . .
+  //          |     CompareAndSwapX
+  //          |       |
+  //      Bot |     SCMemProj
+  //           \     / Bot
+  //           MergeMem
+  //           /
   //   MemBarCPUOrder
   //         ||
   //   MemBarAcquire
   //
   // The same basic variations on this arrangement (mutatis mutandis)
-  // occur when a card mark is introduced. i.e. we se the same basic
-  // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
-  // tail of the graph is a pair comprising a MemBarCPUOrder +
-  // MemBarAcquire.
-  //
-  // So, in the case of a CAS the normal graph has the variant form
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder
-  //          |   \      . . .
-  //          |  CompareAndSwapX  . . .
-  //          |    |
-  //          |   SCMemProj
-  //          |   /  . . .
-  //         MergeMem
-  //          |
-  //   MemBarCPUOrder
-  //   MemBarAcquire
-  //
-  // This graph can also easily be detected starting from any
-  // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
-  //
-  // the code below uses two helper predicates, leading_to_normal and
-  // normal_to_leading to identify these normal graphs, one validating
-  // the layout starting from the top membar and searching down and
-  // the other validating the layout starting from the lower membar
-  // and searching up.
-  //
-  // There are two special case GC configurations when a normal graph
-  // may not be generated: when using G1 (which always employs a
-  // conditional card mark); and when using CMS with conditional card
-  // marking configured. These GCs are both concurrent rather than
-  // stop-the world GCs. So they introduce extra Ctl+Mem flow into the
-  // graph between the leading and trailing membar nodes, in
-  // particular enforcing stronger memory serialisation beween the
-  // object put and the corresponding conditional card mark. CMS
-  // employs a post-write GC barrier while G1 employs both a pre- and
-  // post-write GC barrier. Of course the extra nodes may be absent --
-  // they are only inserted for object puts. This significantly
-  // complicates the task of identifying whether a MemBarRelease,
-  // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
-  // when using these GC configurations (see below). It adds similar
-  // complexity to the task of identifying whether a MemBarRelease,
-  // CompareAndSwapX or MemBarAcquire forms part of a CAS.
-  //
-  // In both cases the post-write subtree includes an auxiliary
-  // MemBarVolatile (StoreLoad barrier) separating the object put and
-  // the read of the corresponding card. This poses two additional
-  // problems.
-  //
-  // Firstly, a card mark MemBarVolatile needs to be distinguished
-  // from a normal trailing MemBarVolatile. Resolving this first
-  // problem is straightforward: a card mark MemBarVolatile always
-  // projects a Mem feed to a StoreCM node and that is a unique marker
+  // occur when a card mark is introduced. i.e. the CPUOrder MemBar
+  // feeds the extra CastP2X, LoadB etc nodes but the above memory
+  // flow subgraph is still present.
+  // 
+  // This is referred to as a *normal* CAS subgraph. It can easily be
+  // detected starting from any candidate MemBarRelease,
+  // StoreX[mo_release] or MemBarAcquire node.
+  //
+  // The code below uses two helper predicates, leading_to_trailing
+  // and trailing_to_leading to identify these normal graphs, one
+  // validating the layout starting from the top membar and searching
+  // down and the other validating the layout starting from the lower
+  // membar and searching up.
+  //
+  // There are two special case GC configurations when the simple
+  // normal graphs above may not be generated: when using G1 (which
+  // always employs a conditional card mark); and when using CMS with
+  // conditional card marking (+CondCardMark) configured. These GCs
+  // are both concurrent rather than stop-the world GCs. So they
+  // introduce extra Ctl+Mem flow into the graph between the leading
+  // and trailing membar nodes, in particular enforcing stronger
+  // memory serialisation beween the object put and the corresponding
+  // conditional card mark. CMS employs a post-write GC barrier while
+  // G1 employs both a pre- and post-write GC barrier.
+  //
+  // The post-write barrier subgraph for these configurations includes
+  // a MemBarVolatile node -- referred to as a card mark membar --
+  // which is needed to order the card write (StoreCM) operation in
+  // the barrier, the preceding StoreX (or CompareAndSwapX) and Store
+  // operations performed by GC threads i.e. a card mark membar
+  // constitutes a StoreLoad barrier hence must be translated to a dmb
+  // ish (whether or not it sits inside a volatile store sequence).
+  //
+  // Of course, the use of the dmb ish for the card mark membar also
+  // implies theat the StoreCM which follows can omit the dmb ishst
+  // instruction. The necessary visibility ordering will already be
+  // guaranteed by the dmb ish. In sum, the dmb ishst instruction only
+  // needs to be generated for as part of the StoreCM sequence with GC
+  // configuration +CMS -CondCardMark.
+  // 
+  // Of course all these extra barrier nodes may well be absent --
+  // they are only inserted for object puts. Their potential presence
+  // significantly complicates the task of identifying whether a
+  // MemBarRelease, StoreX[mo_release], MemBarVolatile or
+  // MemBarAcquire forms part of a volatile put or CAS when using
+  // these GC configurations (see below) and also complicates the
+  // decision as to how to translate a MemBarVolatile and StoreCM.
+  //
+  // So, thjis means that a card mark MemBarVolatile occurring in the
+  // post-barrier graph it needs to be distinguished from a normal
+  // trailing MemBarVolatile. Resolving this is straightforward: a
+  // card mark MemBarVolatile always projects a Mem feed to a StoreCM
+  // node and that is a unique marker
   //
   //      MemBarVolatile (card mark)
   //       C |    \     . . .
   //         |   StoreCM   . . .
   //       . . .
   //
-  // The second problem is how the code generator is to translate the
-  // card mark barrier? It always needs to be translated to a "dmb
-  // ish" instruction whether or not it occurs as part of a volatile
-  // put. A StoreLoad barrier is needed after the object put to ensure
-  // i) visibility to GC threads of the object put and ii) visibility
-  // to the mutator thread of any card clearing write by a GC
-  // thread. Clearly a normal store (str) will not guarantee this
-  // ordering but neither will a releasing store (stlr). The latter
-  // guarantees that the object put is visible but does not guarantee
-  // that writes by other threads have also been observed.
-  //
-  // So, returning to the task of translating the object put and the
-  // leading/trailing membar nodes: what do the non-normal node graph
-  // look like for these 2 special cases? and how can we determine the
-  // status of a MemBarRelease, StoreX[mo_release] or MemBarVolatile
-  // in both normal and non-normal cases?
+  // Returning to the task of translating the object put and the
+  // leading/trailing membar nodes: what do the node graphs look like
+  // for these 2 special cases? and how can we determine the status of
+  // a MemBarRelease, StoreX[mo_release] or MemBarVolatile in both
+  // normal and non-normal cases?
   //
   // A CMS GC post-barrier wraps its card write (StoreCM) inside an If
   // which selects conditonal execution based on the value loaded
@@ -1608,91 +1601,117 @@
   // which looks like this
   //
   //   MemBarRelease
-  //   MemBarCPUOrder_(leading)__________________
-  //     C |    M \       \\                   C \
-  //       |       \    StoreN/P[mo_release]  CastP2X
-  //       |    Bot \    /
-  //       |       MergeMem
-  //       |         /
-  //      MemBarVolatile (card mark)
-  //     C |  ||    M |
-  //       | LoadB    |
-  //       |   |      |
-  //       | Cmp      |\
-  //       | /        | \
-  //       If         |  \
-  //       | \        |   \
-  // IfFalse  IfTrue  |    \
-  //       \     / \  |     \
-  //        \   / StoreCM    |
-  //         \ /      |      |
-  //        Region   . . .   |
-  //          | \           /
-  //          |  . . .  \  / Bot
+  //   MemBarCPUOrder_(leading)____________________
+  //     C |  | M \       \\               M |   C \
+  //       |  |    \    StoreN/P[mo_release] |  CastP2X
+  //       |  | Bot \    / oop      \        |
+  //       |  |    MergeMem          \      / 
+  //       |  |      /                |    /
+  //     MemBarVolatile (card mark)   |   /
+  //     C |  ||    M |               |  /
+  //       | LoadB    | Bot       oop | / Bot
+  //       |   |      |              / /
+  //       | Cmp      |\            / /
+  //       | /        | \          / /
+  //       If         |  \        / /
+  //       | \        |   \      / /
+  // IfFalse  IfTrue  |    \    / /
+  //       \     / \  |    |   / /
+  //        \   / StoreCM  |  / /
+  //         \ /      \   /  / /
+  //        Region     Phi  / /
+  //          | \   Raw |  / /
+  //          |  . . .  | / /
   //          |       MergeMem
-  //          |          |
+  //          |           |
   //        MemBarVolatile (trailing)
   //
-  // The first MergeMem merges the AliasIdxBot Mem slice from the
-  // leading membar and the oopptr Mem slice from the Store into the
-  // card mark membar. The trailing MergeMem merges the AliasIdxBot
-  // Mem slice from the card mark membar and the AliasIdxRaw slice
-  // from the StoreCM into the trailing membar (n.b. the latter
-  // proceeds via a Phi associated with the If region).
-  //
-  // The graph for a CAS varies slightly, the obvious difference being
-  // that the StoreN/P node is replaced by a CompareAndSwapP/N node
-  // and the trailing MemBarVolatile by a MemBarCPUOrder +
-  // MemBarAcquire pair. The other important difference is that the
-  // CompareAndSwap node's SCMemProj is not merged into the card mark
-  // membar - it still feeds the trailing MergeMem. This also means
-  // that the card mark membar receives its Mem feed directly from the
-  // leading membar rather than via a MergeMem.
+  // Notice that there are two MergeMem nodes below the leading
+  // membar. The first MergeMem merges the AliasIdxBot Mem slice from
+  // the leading membar and the oopptr Mem slice from the Store into
+  // the card mark membar. The trailing MergeMem merges the
+  // AliasIdxBot Mem slice from the leading membar, the AliasIdxRaw
+  // slice from the StoreCM and an oop slice from the StoreN/P node
+  // into the trailing membar (n.b. the raw slice proceeds via a Phi
+  // associated with the If region).
+  //
+  // So, in the case of CMS + CondCardMark the volatile object store
+  // graph still includes a normal volatile store subgraph from the
+  // leading membar to the trailing membar. However, it also contains
+  // the same shape memory flow to the card mark membar. The two flows
+  // can be distinguished by testing whether or not the downstream
+  // membar is a card mark membar.
+  //
+  // The graph for a CAS also varies with CMS + CondCardMark, in
+  // particular employing a control feed from the CompareAndSwapX node
+  // through a CmpI and If to the card mark membar and StoreCM which
+  // updates the associated card. This avoids executing the card mark
+  // if the CAS fails. However, it can be seen from the diagram below
+  // that the presence of the barrier does not alter the normal CAS
+  // memory subgraph where the leading membar feeds a CompareAndSwapX,
+  // an SCMemProj, a MergeMem then a final trailing MemBarCPUOrder and
+  // MemBarAcquire pair.
   //
   //   MemBarRelease
-  //   MemBarCPUOrder__(leading)_________________________
-  //       ||                       \\                 C \
-  //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
-  //     C |  ||    M |              |
-  //       | LoadB    |       ______/|
-  //       |   |      |      /       |
-  //       | Cmp      |     /      SCMemProj
-  //       | /        |    /         |
-  //       If         |   /         /
-  //       | \        |  /         /
-  // IfFalse  IfTrue  | /         /
-  //       \     / \  |/ prec    /
-  //        \   / StoreCM       /
-  //         \ /      |        /
-  //        Region   . . .    /
-  //          | \            /
-  //          |  . . .  \   / Bot
-  //          |       MergeMem
-  //          |          |
-  //        MemBarCPUOrder
-  //        MemBarAcquire (trailing)
+  //   MemBarCPUOrder__(leading)_______________________
+  //   C /  M |                        \\            C \
+  //  . . .   | Bot                CompareAndSwapN/P   CastP2X
+  //          |                  C /  M |
+  //          |                 CmpI    |
+  //          |                  /      |
+  //          |               . . .     |
+  //          |              IfTrue     |
+  //          |              /          |
+  //       MemBarVolatile (card mark)   |
+  //        C |  ||    M |              |
+  //          | LoadB    | Bot   ______/|
+  //          |   |      |      /       |
+  //          | Cmp      |     /      SCMemProj
+  //          | /        |    /         |
+  //          If         |   /         /
+  //          | \        |  /         / Bot
+  //     IfFalse  IfTrue | /         /
+  //          |   / \   / / prec    /
+  //   . . .  |  /  StoreCM        /
+  //        \ | /      | raw      /
+  //        Region    . . .      /
+  //           | \              /
+  //           |   . . .   \    / Bot
+  //           |        MergeMem
+  //           |          /
+  //         MemBarCPUOrder
+  //         MemBarAcquire (trailing)
   //
   // This has a slightly different memory subgraph to the one seen
-  // previously but the core of it is the same as for the CAS normal
-  // sungraph
+  // previously but the core of it has a similar memory flow to the
+  // CAS normal subgraph:
   //
   //   MemBarRelease
   //   MemBarCPUOrder____
-  //      ||             \      . . .
-  //   MemBarVolatile  CompareAndSwapX  . . .
-  //      |  \            |
-  //        . . .   SCMemProj
-  //          |     /  . . .
-  //         MergeMem
-  //          |
+  //         |          \      . . .
+  //         |       CompareAndSwapX  . . .
+  //         |       C /  M |
+  //         |      CmpI    |
+  //         |       /      |
+  //         |      . .    /
+  //     Bot |   IfTrue   /
+  //         |   /       /
+  //    MemBarVolatile  /
+  //         | ...     /
+  //      StoreCM ... /
+  //         |       / 
+  //       . . .  SCMemProj
+  //      Raw \    / Bot
+  //        MergeMem
+  //           |
   //   MemBarCPUOrder
   //   MemBarAcquire
   //
-  //
-  // G1 is quite a lot more complicated. The nodes inserted on behalf
-  // of G1 may comprise: a pre-write graph which adds the old value to
-  // the SATB queue; the releasing store itself; and, finally, a
-  // post-write graph which performs a card mark.
+  // The G1 graph for a volatile object put is a lot more complicated.
+  // Nodes inserted on behalf of G1 may comprise: a pre-write graph
+  // which adds the old value to the SATB queue; the releasing store
+  // itself; and, finally, a post-write graph which performs a card
+  // mark.
   //
   // The pre-write graph may be omitted, but only when the put is
   // writing to a newly allocated (young gen) object and then only if
@@ -1730,25 +1749,60 @@
   //       | CastP2X | StoreN/P[mo_release] |
   //       |         |         |            |
   //     C |       M |       M |          M |
-  //        \        |         |           /
+  //        \        | Raw     | oop       / Bot
   //                  . . .
   //          (post write subtree elided)
   //                    . . .
   //             C \         M /
   //         MemBarVolatile (trailing)
   //
+  // Note that the three memory feeds into the post-write tree are an
+  // AliasRawIdx slice associated with the writes in the pre-write
+  // tree, an oop type slice from the StoreX specific to the type of
+  // the volatile field and the AliasBotIdx slice emanating from the
+  // leading membar.
+  //
   // n.b. the LoadB in this subgraph is not the card read -- it's a
   // read of the SATB queue active flag.
   //
-  // Once again the CAS graph is a minor variant on the above with the
-  // expected substitutions of CompareAndSawpX for StoreN/P and
-  // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
+  // The CAS graph is once again a variant of the above with a
+  // CompareAndSwapX node and SCMemProj in place of the StoreX.  The
+  // value from the CompareAndSwapX node is fed into the post-write
+  // graph aling with the AliasIdxRaw feed from the pre-barrier and
+  // the AliasIdxBot feeds from the leading membar and the ScMemProj.
+  //
+  //  MemBarRelease (leading)____________
+  //     C |  ||  M \   M \    M \  M \ . . .
+  //       | LoadB   \  LoadL  LoadN   \
+  //       | /        \                 \
+  //       If         |\                 \
+  //       | \        | \                 \
+  //  IfFalse  IfTrue |  \                 \
+  //       |     |    |   \                 \
+  //       |     If   |    \                 |
+  //       |     |          \                |
+  //       |                 \               |
+  //       |    . . .         \              |
+  //       | /       | /       \             |
+  //      Region  Phi[M]        \            |
+  //       | \       |           \           |
+  //       |  \_____ |            |          |
+  //     C | C \     |            |          |
+  //       | CastP2X |     CompareAndSwapX   |
+  //       |         |   res |     |         |
+  //     C |       M |       |  SCMemProj  M |
+  //        \        | Raw   |     | Bot    / Bot
+  //                  . . .
+  //          (post write subtree elided)
+  //                    . . .
+  //             C \         M /
+  //         MemBarVolatile (trailing)
   //
   // The G1 post-write subtree is also optional, this time when the
   // new value being written is either null or can be identified as a
   // newly allocated (young gen) object with no intervening control
   // flow. The latter cannot happen but the former may, in which case
-  // the card mark membar is omitted and the memory feeds form the
+  // the card mark membar is omitted and the memory feeds from the
   // leading membar and the SToreN/P are merged direct into the
   // trailing membar as per the normal subgraph. So, the only special
   // case which arises is when the post-write subgraph is generated.
@@ -1770,94 +1824,106 @@
   //
   //                (pre-write subtree elided)
   //        . . .                  . . .    . . .  . . .
-  //        C |                    M |     M |    M |
-  //       Region                  Phi[M] StoreN    |
-  //          |                     / \      |      |
-  //         / \_______            /   \     |      |
-  //      C / C \      . . .            \    |      |
-  //       If   CastP2X . . .            |   |      |
-  //       / \                           |   |      |
-  //      /   \                          |   |      |
-  // IfFalse IfTrue                      |   |      |
-  //   |       |                         |   |     /|
-  //   |       If                        |   |    / |
-  //   |      / \                        |   |   /  |
-  //   |     /   \                        \  |  /   |
-  //   | IfFalse IfTrue                   MergeMem  |
-  //   |  . . .    / \                       /      |
-  //   |          /   \                     /       |
-  //   |     IfFalse IfTrue                /        |
-  //   |      . . .    |                  /         |
-  //   |               If                /          |
-  //   |               / \              /           |
-  //   |              /   \            /            |
-  //   |         IfFalse IfTrue       /             |
-  //   |           . . .   |         /              |
-  //   |                    \       /               |
-  //   |                     \     /                |
-  //   |             MemBarVolatile__(card mark)    |
-  //   |                ||   C |  M \  M \          |
-  //   |               LoadB   If    |    |         |
-  //   |                      / \    |    |         |
-  //   |                     . . .   |    |         |
-  //   |                          \  |    |        /
-  //   |                        StoreCM   |       /
-  //   |                          . . .   |      /
-  //   |                        _________/      /
-  //   |                       /  _____________/
-  //   |   . . .       . . .  |  /            /
-  //   |    |                 | /   _________/
-  //   |    |               Phi[M] /        /
-  //   |    |                 |   /        /
-  //   |    |                 |  /        /
-  //   |  Region  . . .     Phi[M]  _____/
-  //   |    /                 |    /
-  //   |                      |   /
-  //   | . . .   . . .        |  /
-  //   | /                    | /
-  // Region           |  |  Phi[M]
-  //   |              |  |  / Bot
-  //    \            MergeMem
-  //     \            /
-  //     MemBarVolatile
-  //
-  // As with CMS the initial MergeMem merges the AliasIdxBot Mem slice
-  // from the leading membar and the oopptr Mem slice from the Store
-  // into the card mark membar i.e. the memory flow to the card mark
-  // membar still looks like a normal graph.
-  //
-  // The trailing MergeMem merges an AliasIdxBot Mem slice with other
-  // Mem slices (from the StoreCM and other card mark queue stores).
-  // However in this case the AliasIdxBot Mem slice does not come
-  // direct from the card mark membar. It is merged through a series
-  // of Phi nodes. These are needed to merge the AliasIdxBot Mem flow
-  // from the leading membar with the Mem feed from the card mark
-  // membar. Each Phi corresponds to one of the Ifs which may skip
-  // around the card mark membar. So when the If implementing the NULL
-  // value check has been elided the total number of Phis is 2
-  // otherwise it is 3.
-  //
-  // The CAS graph when using G1GC also includes a pre-write subgraph
-  // and an optional post-write subgraph. Teh sam evarioations are
-  // introduced as for CMS with conditional card marking i.e. the
-  // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
-  // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
-  // Mem feed from the CompareAndSwapP/N includes a precedence
-  // dependency feed to the StoreCM and a feed via an SCMemProj to the
-  // trailing membar. So, as before the configuration includes the
-  // normal CAS graph as a subgraph of the memory flow.
-  //
-  // So, the upshot is that in all cases the volatile put graph will
-  // include a *normal* memory subgraph betwen the leading membar and
-  // its child membar, either a volatile put graph (including a
-  // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
-  // When that child is not a card mark membar then it marks the end
-  // of the volatile put or CAS subgraph. If the child is a card mark
-  // membar then the normal subgraph will form part of a volatile put
-  // subgraph if and only if the child feeds an AliasIdxBot Mem feed
-  // to a trailing barrier via a MergeMem. That feed is either direct
-  // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
-  // memory flow (for G1).
+  //        C |               M |    M |    M |
+  //       Region            Phi[M] StoreN    |
+  //          |            Raw  |  oop |  Bot |
+  //         / \_______         |\     |\     |\
+  //      C / C \      . . .    | \    | \    | \
+  //       If   CastP2X . . .   |  \   |  \   |  \
+  //       / \                  |   \  |   \  |   \
+  //      /   \                 |    \ |    \ |    \
+  // IfFalse IfTrue             |      |      |     \
+  //   |       |                 \     |     /       |
+  //   |       If                 \    | \  /   \    |
+  //   |      / \                  \   |   /     \   |
+  //   |     /   \                  \  |  / \     |  |
+  //   | IfFalse IfTrue           MergeMem   \    |  |
+  //   |  . . .    / \                 |      \   |  |
+  //   |          /   \                |       |  |  |
+  //   |     IfFalse IfTrue            |       |  |  |
+  //   |      . . .    |               |       |  |  |
+  //   |               If             /        |  |  |
+  //   |               / \           /         |  |  |
+  //   |              /   \         /          |  |  |
+  //   |         IfFalse IfTrue    /           |  |  |
+  //   |           . . .   |      /            |  |  |
+  //   |                    \    /             |  |  |
+  //   |                     \  /              |  |  |
+  //   |         MemBarVolatile__(card mark  ) |  |  |
+  //   |              ||   C |     \           |  |  |
+  //   |             LoadB   If     |         /   |  |
+  //   |                    / \ Raw |        /   /  /
+  //   |                   . . .    |       /   /  /
+  //   |                        \   |      /   /  /
+  //   |                        StoreCM   /   /  /
+  //   |                           |     /   /  /
+  //   |                            . . .   /  /
+  //   |                                   /  /
+  //   |   . . .                          /  /
+  //   |    |             | /            /  /
+  //   |    |           Phi[M] /        /  /
+  //   |    |             |   /        /  /
+  //   |    |             |  /        /  /
+  //   |  Region  . . .  Phi[M]      /  /
+  //   |    |             |         /  /
+  //    \   |             |        /  /
+  //     \  | . . .       |       /  /
+  //      \ |             |      /  /
+  //      Region         Phi[M] /  /
+  //        |               \  /  /
+  //         \             MergeMem
+  //          \            /
+  //          MemBarVolatile
+  //
+  // As with CMS + CondCardMark the first MergeMem merges the
+  // AliasIdxBot Mem slice from the leading membar and the oopptr Mem
+  // slice from the Store into the card mark membar. However, in this
+  // case it may also merge an AliasRawIdx mem slice from the pre
+  // barrier write.
+  //
+  // The trailing MergeMem merges an AliasIdxBot Mem slice from the
+  // leading membar with an oop slice from the StoreN and an
+  // AliasRawIdx slice from the post barrier writes. In this case the
+  // AliasIdxRaw Mem slice is merged through a series of Phi nodes
+  // which combine feeds from the If regions in the post barrier
+  // subgraph.
+  //
+  // So, for G1 the same characteristic subgraph arises as for CMS +
+  // CondCardMark. There is a normal subgraph feeding the card mark
+  // membar and a normal subgraph feeding the trailing membar.
+  //
+  // The CAS graph when using G1GC also includes an optional
+  // post-write subgraph. It is very similar to the above graph except
+  // for a few details.
+  // 
+  // - The control flow is gated by an additonal If which tests the
+  // result from the CompareAndSwapX node
+  // 
+  //  - The MergeMem which feeds the card mark membar only merges the
+  // AliasIdxBot slice from the leading membar and the AliasIdxRaw
+  // slice from the pre-barrier. It does not merge the SCMemProj
+  // AliasIdxBot slice. So, this subgraph does not look like the
+  // normal CAS subgraph.
+  //
+  // - The MergeMem which feeds the trailing membar merges the
+  // AliasIdxBot slice from the leading membar, the AliasIdxRaw slice
+  // from the post-barrier and the SCMemProj AliasIdxBot slice i.e. it
+  // has two AliasIdxBot input slices. However, this subgraph does
+  // still look like the normal CAS subgraph.
+  //
+  // So, the upshot is:
+  //
+  // In all cases a volatile put graph will include a *normal*
+  // volatile store subgraph betwen the leading membar and the
+  // trailing membar. It may also include a normal volatile store
+  // subgraph betwen the leading membar and the card mark membar.
+  //
+  // In all cases a CAS graph will contain a unique normal CAS graph
+  // feeding the trailing membar.
+  //
+  // In all cases where there is a card mark membar (either as part of
+  // a volatile object put or CAS) it will be fed by a MergeMem whose
+  // AliasIdxBot slice feed will be a leading membar.
   //
   // The predicates controlling generation of instructions for store
   // and barrier nodes employ a few simple helper functions (described
@@ -1878,24 +1944,24 @@
 	    opcode == Op_CompareAndSwapP);
   }
 
-  // leading_to_normal
+  // leading_to_trailing
   //
   //graph traversal helper which detects the normal case Mem feed from
   // a release membar (or, optionally, its cpuorder child) to a
   // dependent volatile membar i.e. it ensures that one or other of
   // the following Mem flow subgraph is present.
   //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
-  //          |  \      . . .
-  //          |  StoreN/P[mo_release]  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //   MemBarVolatile {trailing or card mark}
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
+  //   MemBarRelease {leading}
+  //   {MemBarCPUOrder} {optional}
+  //     Bot |  \      . . .
+  //         |  StoreN/P[mo_release]  . . .
+  //         |   /
+  //        MergeMem
+  //         |
+  //   MemBarVolatile {not card mark}
+  //
+  //   MemBarRelease {leading}
+  //   {MemBarCPUOrder} {optional}
   //      |       \      . . .
   //      |     CompareAndSwapX  . . .
   //               |
@@ -1906,6 +1972,23 @@
   //    MemBarCPUOrder
   //    MemBarAcquire {trailing}
   //
+  // the predicate needs to be capable of distinguishing the following
+  // volatile put graph which may arises when a GC post barrier
+  // inserts a card mark membar
+  //
+  //   MemBarRelease {leading}
+  //   {MemBarCPUOrder}__
+  //     Bot |   \       \
+  //         |   StoreN/P \
+  //         |    / \     |
+  //        MergeMem \    |
+  //         |        \   |
+  //   MemBarVolatile  \  |
+  //    {card mark}     \ |
+  //                  MergeMem
+  //                      |
+  // {not card mark} MemBarVolatile
+  //
   // if the correct configuration is present returns the trailing
   // membar otherwise NULL.
   //
@@ -1916,7 +1999,7 @@
   // the returned value may be a card mark or trailing membar
   //
 
-  MemBarNode *leading_to_normal(MemBarNode *leading)
+  MemBarNode *leading_to_trailing(MemBarNode *leading)
   {
     assert((leading->Opcode() == Op_MemBarRelease ||
 	    leading->Opcode() == Op_MemBarCPUOrder),
@@ -1933,15 +2016,21 @@
     StoreNode * st = NULL;
     LoadStoreNode *cas = NULL;
     MergeMemNode *mm = NULL;
+    MergeMemNode *mm2 = NULL;
 
     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       x = mem->fast_out(i);
       if (x->is_MergeMem()) {
 	if (mm != NULL) {
-	  return NULL;
+	  if (mm2 != NULL) {
+	  // should not see more than 2 merge mems
+	    return NULL;
+	  } else {
+	    mm2 = x->as_MergeMem();
+	  }
+	} else {
+	  mm = x->as_MergeMem();
 	}
-	// two merge mems is one too many
-	mm = x->as_MergeMem();
       } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
 	// two releasing stores/CAS nodes is one too many
 	if (st != NULL || cas != NULL) {
@@ -1961,13 +2050,13 @@
       return NULL;
     }
 
-    // must have a merge if we also have st
+    // must have at least one merge if we also have st
     if (st && !mm) {
       return NULL;
     }
 
-    Node *y = NULL;
     if (cas) {
+      Node *y = NULL;
       // look for an SCMemProj
       for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
 	x = cas->fast_out(i);
@@ -1987,10 +2076,29 @@
 	  break;
 	}
       }
-      if (mm == NULL)
+      if (mm == NULL) {
 	return NULL;
+      }
+      MemBarNode *mbar = NULL;
+      // ensure the merge feeds a trailing membar cpuorder + acquire pair
+      for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
+	x = mm->fast_out(i);
+	if (x->is_MemBar()) {
+	  int opcode = x->Opcode();
+	  if (opcode == Op_MemBarCPUOrder) {
+	    MemBarNode *z =  x->as_MemBar();
+	    z = child_membar(z);
+	    if (z != NULL && z->Opcode() == Op_MemBarAcquire) {
+	      mbar = z;
+	    }
+	  }
+	  break;
+	}
+      }
+      return mbar;
     } else {
-      // ensure the store feeds the existing mergemem;
+      Node *y = NULL;
+      // ensure the store feeds the first mergemem;
       for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
 	if (st->fast_out(i) == mm) {
 	  y = st;
@@ -2000,55 +2108,89 @@
       if (y == NULL) {
 	return NULL;
       }
-    }
-
-    MemBarNode *mbar = NULL;
-    // ensure the merge feeds to the expected type of membar
-    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
-      x = mm->fast_out(i);
-      if (x->is_MemBar()) {
-	int opcode = x->Opcode();
-	if (opcode == Op_MemBarVolatile && st) {
-	  mbar = x->as_MemBar();
-	} else if (cas && opcode == Op_MemBarCPUOrder) {
-	  MemBarNode *y =  x->as_MemBar();
-	  y = child_membar(y);
-	  if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
-	    mbar = y;
+      if (mm2 != NULL) {
+	// ensure the store feeds the second mergemem;
+	y = NULL;
+	for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
+	  if (st->fast_out(i) == mm2) {
+	    y = st;
 	  }
 	}
-	break;
+	if (y == NULL) {
+	  return NULL;
+	}
       }
-    }
-
-    return mbar;
-  }
-
-  // normal_to_leading
+
+      MemBarNode *mbar = NULL;
+      // ensure the first mergemem feeds a volatile membar
+      for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
+	x = mm->fast_out(i);
+	if (x->is_MemBar()) {
+	  int opcode = x->Opcode();
+	  if (opcode == Op_MemBarVolatile) {
+	    mbar = x->as_MemBar();
+	  }
+	  break;
+	}
+      }
+      if (mm2 == NULL) {
+	// this is our only option for a trailing membar
+	return mbar;
+      }
+      // ensure the second mergemem feeds a volatile membar
+      MemBarNode *mbar2 = NULL;
+      for (DUIterator_Fast imax, i = mm2->fast_outs(imax); i < imax; i++) {
+	x = mm2->fast_out(i);
+	if (x->is_MemBar()) {
+	  int opcode = x->Opcode();
+	  if (opcode == Op_MemBarVolatile) {
+	    mbar2 = x->as_MemBar();
+	  }
+	  break;
+	}
+      }
+      // if we have two merge mems we must have two volatile membars
+      if (mbar == NULL || mbar2 == NULL) {
+	return NULL;
+      }
+      // return the trailing membar
+      if (is_card_mark_membar(mbar2)) {
+	return mbar;
+      } else {
+	if (is_card_mark_membar(mbar)) {
+	  return mbar2;
+	} else {
+	  return NULL;
+	}
+      }
+    }
+  }
+
+  // trailing_to_leading
   //
   // graph traversal helper which detects the normal case Mem feed
-  // from either a card mark or a trailing membar to a preceding
-  // release membar (optionally its cpuorder child) i.e. it ensures
-  // that one or other of the following Mem flow subgraphs is present.
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
-  //          |  \      . . .
-  //          |  StoreN/P[mo_release]  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //   MemBarVolatile {card mark or trailing}
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder {leading}
+  // from a trailing membar to a preceding release membar (optionally
+  // its cpuorder child) i.e. it ensures that one or other of the
+  // following Mem flow subgraphs is present.
+  //
+  //   MemBarRelease {leading}
+  //   MemBarCPUOrder {optional}
+  //    | Bot |  \      . . .
+  //    |     |  StoreN/P[mo_release]  . . .
+  //    |     |   /
+  //    |    MergeMem
+  //    |     |
+  //   MemBarVolatile {not card mark}
+  //
+  //   MemBarRelease {leading}
+  //   MemBarCPUOrder {optional}
   //      |       \      . . .
   //      |     CompareAndSwapX  . . .
   //               |
   //     . . .    SCMemProj
   //           \   |
   //      |    MergeMem
-  //      |        /
+  //      |       |
   //    MemBarCPUOrder
   //    MemBarAcquire {trailing}
   //
@@ -2058,15 +2200,20 @@
   // if the configuration is present returns the cpuorder member for
   // preference or when absent the release membar otherwise NULL.
   //
-  // n.b. the input membar is expected to be a MemBarVolatile but
-  // need not be a card mark membar.
-
-  MemBarNode *normal_to_leading(const MemBarNode *barrier)
+  // n.b. the input membar is expected to be a MemBarVolatile or
+  // MemBarAcquire. if it is a MemBarVolatile it must *not* be a card
+  // mark membar.
+
+  MemBarNode *trailing_to_leading(const MemBarNode *barrier)
   {
     // input must be a volatile membar
     assert((barrier->Opcode() == Op_MemBarVolatile ||
 	    barrier->Opcode() == Op_MemBarAcquire),
 	   "expecting a volatile or an acquire membar");
+
+    assert((barrier->Opcode() != Op_MemBarVolatile) ||
+	   !is_card_mark_membar(barrier),
+	   "not expecting a card mark membar");
     Node *x;
     bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
 
@@ -2179,169 +2326,35 @@
     return NULL;
   }
 
-  // card_mark_to_trailing
-  //
-  // graph traversal helper which detects extra, non-normal Mem feed
-  // from a card mark volatile membar to a trailing membar i.e. it
-  // ensures that one of the following three GC post-write Mem flow
-  // subgraphs is present.
-  //
-  // 1)
-  //     . . .
-  //       |
-  //   MemBarVolatile (card mark)
-  //      |          |
-  //      |        StoreCM
-  //      |          |
-  //      |        . . .
-  //  Bot |  /
-  //   MergeMem
-  //      |
-  //      |
-  //    MemBarVolatile {trailing}
-  //
-  // 2)
-  //   MemBarRelease/CPUOrder (leading)
-  //    |
-  //    |
-  //    |\       . . .
-  //    | \        |
-  //    |  \  MemBarVolatile (card mark)
-  //    |   \   |     |
-  //     \   \  |   StoreCM    . . .
-  //      \   \ |
-  //       \  Phi
-  //        \ /
-  //        Phi  . . .
+  // card_mark_to_leading
+  //
+  // graph traversal helper which traverses from a card mark volatile
+  // membar to a leading membar i.e. it ensures that the following Mem
+  // flow subgraph is present.
+  //
+  //    MemBarRelease {leading}
+  //   {MemBarCPUOrder} {optional}
+  //         |   . . .
   //     Bot |   /
-  //       MergeMem
+  //      MergeMem
   //         |
-  //    MemBarVolatile {trailing}
-  //
-  //
-  // 3)
-  //   MemBarRelease/CPUOrder (leading)
-  //    |
-  //    |\
-  //    | \
-  //    |  \      . . .
-  //    |   \       |
-  //    |\   \  MemBarVolatile (card mark)
-  //    | \   \   |     |
-  //    |  \   \  |   StoreCM    . . .
-  //    |   \   \ |
-  //     \   \  Phi
-  //      \   \ /
-  //       \  Phi
-  //        \ /
-  //        Phi  . . .
-  //     Bot |   /
-  //       MergeMem
-  //         |
-  //         |
-  //    MemBarVolatile {trailing}
-  //
-  // configuration 1 is only valid if UseConcMarkSweepGC &&
-  // UseCondCardMark
-  //
-  // configurations 2 and 3 are only valid if UseG1GC.
-  //
-  // if a valid configuration is present returns the trailing membar
-  // otherwise NULL.
-  //
-  // n.b. the supplied membar is expected to be a card mark
-  // MemBarVolatile i.e. the caller must ensure the input node has the
-  // correct operand and feeds Mem to a StoreCM node
-
-  MemBarNode *card_mark_to_trailing(const MemBarNode *barrier)
+  //     MemBarVolatile (card mark)
+  //        |     \
+  //      . . .   StoreCM
+  //
+  // if the configuration is present returns the cpuorder member for
+  // preference or when absent the release membar otherwise NULL.
+  //
+  // n.b. the input membar is expected to be a MemBarVolatile amd must
+  // be a card mark membar.
+
+  MemBarNode *card_mark_to_leading(const MemBarNode *barrier)
   {
     // input must be a card mark volatile membar
     assert(is_card_mark_membar(barrier), "expecting a card mark membar");
 
-    Node *feed = barrier->proj_out(TypeFunc::Memory);
-    Node *x;
-    MergeMemNode *mm = NULL;
-
-    const int MAX_PHIS = 3;	// max phis we will search through
-    int phicount = 0; 		// current search count
-
-    bool retry_feed = true;
-    while (retry_feed) {
-      // see if we have a direct MergeMem feed
-      for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
-	x = feed->fast_out(i);
-	// the correct Phi will be merging a Bot memory slice
-	if (x->is_MergeMem()) {
-	  mm = x->as_MergeMem();
-	  break;
-	}
-      }
-      if (mm) {
-	retry_feed = false;
-      } else if (UseG1GC & phicount++ < MAX_PHIS) {
-	// the barrier may feed indirectly via one or two Phi nodes
-	PhiNode *phi = NULL;
-	for (DUIterator_Fast imax, i = feed->fast_outs(imax); i < imax; i++) {
-	  x = feed->fast_out(i);
-	  // the correct Phi will be merging a Bot memory slice
-	  if (x->is_Phi() && x->adr_type() == TypePtr::BOTTOM) {
-	    phi = x->as_Phi();
-	    break;
-	  }
-	}
-	if (!phi) {
-	  return NULL;
-	}
-	// look for another merge below this phi
-	feed = phi;
-      } else {
-	// couldn't find a merge
-	return NULL;
-      }
-    }
-
-    // sanity check this feed turns up as the expected slice
-    assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
-
-    MemBarNode *trailing = NULL;
-    // be sure we have a trailing membar the merge
-    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
-      x = mm->fast_out(i);
-      if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
-	trailing = x->as_MemBar();
-	break;
-      }
-    }
-
-    return trailing;
-  }
-
-  // trailing_to_card_mark
-  //
-  // graph traversal helper which detects extra, non-normal Mem feed
-  // from a trailing volatile membar to a preceding card mark volatile
-  // membar i.e. it identifies whether one of the three possible extra
-  // GC post-write Mem flow subgraphs is present
-  //
-  // this predicate checks for the same flow as the previous predicate
-  // but starting from the bottom rather than the top.
-  //
-  // if the configuration is present returns the card mark membar
-  // otherwise NULL
-  //
-  // n.b. the supplied membar is expected to be a trailing
-  // MemBarVolatile i.e. the caller must ensure the input node has the
-  // correct opcode
-
-  MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
-  {
-    assert(trailing->Opcode() == Op_MemBarVolatile,
-	   "expecting a volatile membar");
-    assert(!is_card_mark_membar(trailing),
-	   "not expecting a card mark membar");
-
     // the Mem feed to the membar should be a merge
-    Node *x = trailing->in(TypeFunc::Memory);
+    Node *x = barrier->in(TypeFunc::Memory);
     if (!x->is_MergeMem()) {
       return NULL;
     }
@@ -2349,117 +2362,19 @@
     MergeMemNode *mm = x->as_MergeMem();
 
     x = mm->in(Compile::AliasIdxBot);
-    // with G1 we may possibly see a Phi or two before we see a Memory
-    // Proj from the card mark membar
-
-    const int MAX_PHIS = 3;	// max phis we will search through
-    int phicount = 0; 		// current search count
-
-    bool retry_feed = !x->is_Proj();
-
-    while (retry_feed) {
-      if (UseG1GC && x->is_Phi() && phicount++ < MAX_PHIS) {
-	PhiNode *phi = x->as_Phi();
-	ProjNode *proj = NULL;
-	PhiNode *nextphi = NULL;
-	bool found_leading = false;
-	for (uint i = 1; i < phi->req(); i++) {
-	  x = phi->in(i);
-	  if (x->is_Phi()) {
-	    nextphi = x->as_Phi();
-	  } else if (x->is_Proj()) {
-	    int opcode = x->in(0)->Opcode();
-	    if (opcode == Op_MemBarVolatile) {
-	      proj = x->as_Proj();
-	    } else if (opcode == Op_MemBarRelease ||
-		       opcode == Op_MemBarCPUOrder) {
-	      // probably a leading membar
-	      found_leading = true;
-	    }
-	  }
-	}
-	// if we found a correct looking proj then retry from there
-	// otherwise we must see a leading and a phi or this the
-	// wrong config
-	if (proj != NULL) {
-	  x = proj;
-	  retry_feed = false;
-	} else if (found_leading && nextphi != NULL) {
-	  // retry from this phi to check phi2
-	  x = nextphi;
-	} else {
-	  // not what we were looking for
-	  return NULL;
-	}
-      } else {
-	return NULL;
-      }
-    }
-    // the proj has to come from the card mark membar
-    x = x->in(0);
+
     if (!x->is_MemBar()) {
       return NULL;
     }
 
-    MemBarNode *card_mark_membar = x->as_MemBar();
-
-    if (!is_card_mark_membar(card_mark_membar)) {
-      return NULL;
-    }
-
-    return card_mark_membar;
-  }
-
-  // trailing_to_leading
-  //
-  // graph traversal helper which checks the Mem flow up the graph
-  // from a (non-card mark) trailing membar attempting to locate and
-  // return an associated leading membar. it first looks for a
-  // subgraph in the normal configuration (relying on helper
-  // normal_to_leading). failing that it then looks for one of the
-  // possible post-write card mark subgraphs linking the trailing node
-  // to a the card mark membar (relying on helper
-  // trailing_to_card_mark), and then checks that the card mark membar
-  // is fed by a leading membar (once again relying on auxiliary
-  // predicate normal_to_leading).
-  //
-  // if the configuration is valid returns the cpuorder member for
-  // preference or when absent the release membar otherwise NULL.
-  //
-  // n.b. the input membar is expected to be either a volatile or
-  // acquire membar but in the former case must *not* be a card mark
-  // membar.
-
-  MemBarNode *trailing_to_leading(const MemBarNode *trailing)
-  {
-    assert((trailing->Opcode() == Op_MemBarAcquire ||
-	    trailing->Opcode() == Op_MemBarVolatile),
-	   "expecting an acquire or volatile membar");
-    assert((trailing->Opcode() != Op_MemBarVolatile ||
-	    !is_card_mark_membar(trailing)),
-	   "not expecting a card mark membar");
-
-    MemBarNode *leading = normal_to_leading(trailing);
-
-    if (leading) {
+    MemBarNode *leading = x->as_MemBar();
+
+    if (leading_membar(leading)) {
       return leading;
     }
 
-    // nothing more to do if this is an acquire
-    if (trailing->Opcode() == Op_MemBarAcquire) {
-      return NULL;
-    }
-
-    MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
-
-    if (!card_mark_membar) {
-      return NULL;
-    }
-
-    return normal_to_leading(card_mark_membar);
-  }
-
-  // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
+    return NULL;
+  }
 
 bool unnecessary_acquire(const Node *barrier)
 {
@@ -2675,19 +2590,8 @@
   }
 
   // must start with a normal feed
-  MemBarNode *child_barrier = leading_to_normal(barrier);
-
-  if (!child_barrier) {
-    return false;
-  }
-
-  if (!is_card_mark_membar(child_barrier)) {
-    // this is the trailing membar and we are done
-    return true;
-  }
-
-  // must be sure this card mark feeds a trailing membar
-  MemBarNode *trailing = card_mark_to_trailing(child_barrier);
+  MemBarNode *trailing = leading_to_trailing(barrier);
+
   return (trailing != NULL);
 }
 
@@ -2709,7 +2613,7 @@
   }
 
   // ok, if it's not a card mark then we still need to check if it is
-  // a trailing membar of a volatile put hgraph.
+  // a trailing membar of a volatile put graph.
 
   return (trailing_to_leading(mbvol) != NULL);
 }
@@ -2759,20 +2663,9 @@
   }
 
   // does this lead a normal subgraph?
-  MemBarNode *mbvol = leading_to_normal(barrier);
-
-  if (!mbvol) {
-    return false;
-  }
-
-  // all done unless this is a card mark
-  if (!is_card_mark_membar(mbvol)) {
-    return true;
-  }
-
-  // we found a card mark -- just make sure we have a trailing barrier
-
-  return (card_mark_to_trailing(mbvol) != NULL);
+  MemBarNode *trailing = leading_to_trailing(barrier);
+
+  return (trailing != NULL);
 }
 
 // predicate controlling translation of CAS
@@ -2814,7 +2707,7 @@
 	  "CAS not fed by cpuorder+release membar pair!");
 
   // does this lead a normal subgraph?
-  MemBarNode *mbar = leading_to_normal(barrier);
+  MemBarNode *mbar = leading_to_trailing(barrier);
 
   assert(mbar != NULL, "CAS not embedded in normal graph!");
 
@@ -2835,48 +2728,27 @@
 
   // we only ever need to generate a dmb ishst between an object put
   // and the associated card mark when we are using CMS without
-  // conditional card marking
+  // conditional card marking. Any other occurence will happen when
+  // performing a card mark using CMS with conditional card marking or
+  // G1. In those cases the preceding MamBarVolatile will be
+  // translated to a dmb ish which guarantes visibility of the
+  // preceding StoreN/P before this StoreCM
 
   if (!UseConcMarkSweepGC || UseCondCardMark) {
     return true;
   }
 
-  // if we are implementing volatile puts using barriers then the
-  // object put as an str so we must insert the dmb ishst
+  // if we are implementing volatile puts using barriers then we must
+  // insert the dmb ishst
 
   if (UseBarriersForVolatile) {
     return false;
   }
 
-  // we can omit the dmb ishst if this StoreCM is part of a volatile
-  // put because in thta case the put will be implemented by stlr
-  //
-  // we need to check for a normal subgraph feeding this StoreCM.
-  // that means the StoreCM must be fed Memory from a leading membar,
-  // either a MemBarRelease or its dependent MemBarCPUOrder, and the
-  // leading membar must be part of a normal subgraph
-
-  Node *x = storecm->in(StoreNode::Memory);
-
-  if (!x->is_Proj()) {
-    return false;
-  }
-
-  x = x->in(0);
-
-  if (!x->is_MemBar()) {
-    return false;
-  }
-
-  MemBarNode *leading = x->as_MemBar();
-
-  // reject invalid candidates
-  if (!leading_membar(leading)) {
-    return false;
-  }
-
-  // we can omit the StoreStore if it is the head of a normal subgraph
-  return (leading_to_normal(leading) != NULL);
+  // we must be using CMS with conditional card marking so we ahve to
+  // generate the StoreStore
+
+  return false;
 }
 
 
@@ -13409,7 +13281,7 @@
     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
   %}
 
-  ins_pipe(pipe_class_memory);
+  ins_pipe(fp_f2i);
 
 %}
 
@@ -13427,7 +13299,7 @@
     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
   %}
 
-  ins_pipe(pipe_class_memory);
+  ins_pipe(fp_i2f);
 
 %}
 
@@ -13445,7 +13317,7 @@
     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
   %}
 
-  ins_pipe(pipe_class_memory);
+  ins_pipe(fp_d2l);
 
 %}
 
@@ -13463,7 +13335,7 @@
     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
   %}
 
-  ins_pipe(pipe_class_memory);
+  ins_pipe(fp_l2d);
 
 %}
 
@@ -14319,6 +14191,25 @@
   ins_pipe(pipe_cmp_branch);
 %}
 
+instruct cmpN_imm0_branch(cmpOp cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpN op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+	    || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cbw$cmp   $op1, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbzw($op1$$Register, *L);
+    else
+      __ cbnzw($op1$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
 instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
   match(If cmp (CmpP (DecodeN oop) zero));
   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
@@ -14911,19 +14802,19 @@
 %}
 
 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
-                        iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
+                        iRegI_R0 result, rFlagsReg cr)
 %{
   predicate(!CompactStrings);
   match(Set result (StrEquals (Binary str1 str2) cnt));
-  effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
-
-  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp" %}
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
+
+  format %{ "String Equals $str1,$str2,$cnt -> $result" %}
   ins_encode %{
     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
     __ asrw($cnt$$Register, $cnt$$Register, 1);
-    __ string_equals($str1$$Register, $str2$$Register,
-                      $cnt$$Register, $result$$Register,
-                      $tmp$$Register);
+    __ arrays_equals($str1$$Register, $str2$$Register,
+                     $result$$Register, $cnt$$Register,
+                     2, /*is_string*/true);
   %}
   ins_pipe(pipe_class_memory);
 %}
@@ -14937,9 +14828,10 @@
 
   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
   ins_encode %{
-    __ byte_arrays_equals($ary1$$Register, $ary2$$Register,
-                          $result$$Register, $tmp$$Register);
-  %}
+    __ arrays_equals($ary1$$Register, $ary2$$Register,
+                     $result$$Register, $tmp$$Register,
+                     1, /*is_string*/false);
+    %}
   ins_pipe(pipe_class_memory);
 %}
 
@@ -14952,12 +14844,14 @@
 
   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
   ins_encode %{
-    __ char_arrays_equals($ary1$$Register, $ary2$$Register,
-                          $result$$Register, $tmp$$Register);
+    __ arrays_equals($ary1$$Register, $ary2$$Register,
+                     $result$$Register, $tmp$$Register,
+                     2, /*is_string*/false);
   %}
   ins_pipe(pipe_class_memory);
 %}
 
+
 // encode char[] to byte[] in ISO_8859_1
 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
@@ -16608,7 +16502,7 @@
             as_FloatRegister($src$$reg),
             as_FloatRegister($shift$$reg));
   %}
-  ins_pipe(vshift64_imm);
+  ins_pipe(vshift64);
 %}
 
 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
@@ -16622,7 +16516,7 @@
             as_FloatRegister($src$$reg),
             as_FloatRegister($shift$$reg));
   %}
-  ins_pipe(vshift128_imm);
+  ins_pipe(vshift128);
 %}
 
 instruct vsrl2I(vecD dst, vecD src, vecX shift) %{
@@ -16635,7 +16529,7 @@
             as_FloatRegister($src$$reg),
             as_FloatRegister($shift$$reg));
   %}
-  ins_pipe(vshift64_imm);
+  ins_pipe(vshift64);
 %}
 
 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
@@ -16648,7 +16542,7 @@
             as_FloatRegister($src$$reg),
             as_FloatRegister($shift$$reg));
   %}
-  ins_pipe(vshift128_imm);
+  ins_pipe(vshift128);
 %}
 
 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
@@ -16766,7 +16660,7 @@
            as_FloatRegister($src$$reg),
            (int)$shift$$constant & 63);
   %}
-  ins_pipe(vshift128);
+  ins_pipe(vshift128_imm);
 %}
 
 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
--- a/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -74,7 +74,7 @@
 void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   NativeInstruction* inst = nativeInstruction_at(pc);
-  if (inst->is_adr_aligned()) {
+  if (inst->is_adr_aligned() || inst->is_ldr_literal()) {
     address dest = _constants->start() + data_offset;
     _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
     TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -4481,225 +4481,126 @@
   BLOCK_COMMENT("} string_compare");
 }
 
-
-void MacroAssembler::string_equals(Register str1, Register str2,
-                                   Register cnt, Register result,
-                                   Register tmp1) {
-  Label SAME_CHARS, DONE, SHORT_LOOP, SHORT_STRING,
-    NEXT_WORD;
-
-  const Register tmp2 = rscratch1;
-  assert_different_registers(str1, str2, cnt, result, tmp1, tmp2, rscratch2);
-
-  BLOCK_COMMENT("string_equals {");
-
-  // Start by assuming that the strings are not equal.
-  mov(result, zr);
-
-  // A very short string
-  cmpw(cnt, 4);
-  br(Assembler::LT, SHORT_STRING);
-
-  // Check if the strings start at the same location.
-  cmp(str1, str2);
-  br(Assembler::EQ, SAME_CHARS);
-
-  // Compare longwords
+// Compare Strings or char/byte arrays.
+
+// is_string is true iff this is a string comparison.
+
+// For Strings we're passed the address of the first characters in a1
+// and a2 and the length in cnt1.
+
+// For byte and char arrays we're passed the arrays themselves and we
+// have to extract length fields and do null checks here.
+
+// elem_size is the element size in bytes: either 1 or 2.
+
+// There are two implementations.  For arrays >= 8 bytes, all
+// comparisons (including the final one, which may overlap) are
+// performed 8 bytes at a time.  For arrays < 8 bytes, we compare a
+// halfword, then a short, and then a byte.
+
+void MacroAssembler::arrays_equals(Register a1, Register a2,
+                                   Register result, Register cnt1,
+                                   int elem_size, bool is_string)
+{
+  Label SAME, DONE, SHORT, NEXT_WORD, ONE;
+  Register tmp1 = rscratch1;
+  Register tmp2 = rscratch2;
+  Register cnt2 = tmp2;  // cnt2 only used in array length compare
+  int elem_per_word = wordSize/elem_size;
+  int log_elem_size = exact_log2(elem_size);
+  int length_offset = arrayOopDesc::length_offset_in_bytes();
+  int base_offset
+    = arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
+
+  assert(elem_size == 1 || elem_size == 2, "must be char or byte");
+  assert_different_registers(a1, a2, result, cnt1, rscratch1, rscratch2);
+
+  BLOCK_COMMENT(is_string ? "string_equals {" : "array_equals {");
+
+  mov(result, false);
+
+  if (!is_string) {
+    // if (a==a2)
+    //     return true;
+    eor(rscratch1, a1, a2);
+    cbz(rscratch1, SAME);
+    // if (a==null || a2==null)
+    //     return false;
+    cbz(a1, DONE);
+    cbz(a2, DONE);
+    // if (a1.length != a2.length)
+    //      return false;
+    ldrw(cnt1, Address(a1, length_offset));
+    ldrw(cnt2, Address(a2, length_offset));
+    eorw(tmp1, cnt1, cnt2);
+    cbnzw(tmp1, DONE);
+
+    lea(a1, Address(a1, base_offset));
+    lea(a2, Address(a2, base_offset));
+  }
+
+  // Check for short strings, i.e. smaller than wordSize.
+  subs(cnt1, cnt1, elem_per_word);
+  br(Assembler::LT, SHORT);
+  // Main 8 byte comparison loop.
+  bind(NEXT_WORD); {
+    ldr(tmp1, Address(post(a1, wordSize)));
+    ldr(tmp2, Address(post(a2, wordSize)));
+    subs(cnt1, cnt1, elem_per_word);
+    eor(tmp1, tmp1, tmp2);
+    cbnz(tmp1, DONE);
+  } br(GT, NEXT_WORD);
+  // Last longword.  In the case where length == 4 we compare the
+  // same longword twice, but that's still faster than another
+  // conditional branch.
+  // cnt1 could be 0, -1, -2, -3, -4 for chars; -4 only happens when
+  // length == 4.
+  if (log_elem_size > 0)
+    lsl(cnt1, cnt1, log_elem_size);
+  ldr(tmp1, Address(a1, cnt1));
+  ldr(tmp2, Address(a2, cnt1));
+  eor(tmp1, tmp1, tmp2);
+  cbnz(tmp1, DONE);
+  b(SAME);
+
+  bind(SHORT);
+  Label TAIL03, TAIL01;
+
+  tbz(cnt1, 2 - log_elem_size, TAIL03); // 0-7 bytes left.
   {
-    subw(cnt, cnt, 4); // The last longword is a special case
-
-    // Move both string pointers to the last longword of their
-    // strings, negate the remaining count, and convert it to bytes.
-    lea(str1, Address(str1, cnt, Address::uxtw(1)));
-    lea(str2, Address(str2, cnt, Address::uxtw(1)));
-    sub(cnt, zr, cnt, LSL, 1);
-
-    // Loop, loading longwords and comparing them into rscratch2.
-    bind(NEXT_WORD);
-    ldr(tmp1, Address(str1, cnt));
-    ldr(tmp2, Address(str2, cnt));
-    adds(cnt, cnt, wordSize);
-    eor(rscratch2, tmp1, tmp2);
-    cbnz(rscratch2, DONE);
-    br(Assembler::LT, NEXT_WORD);
-
-    // Last longword.  In the case where length == 4 we compare the
-    // same longword twice, but that's still faster than another
-    // conditional branch.
-
-    ldr(tmp1, Address(str1));
-    ldr(tmp2, Address(str2));
-    eor(rscratch2, tmp1, tmp2);
-    cbz(rscratch2, SAME_CHARS);
-    b(DONE);
+    ldrw(tmp1, Address(post(a1, 4)));
+    ldrw(tmp2, Address(post(a2, 4)));
+    eorw(tmp1, tmp1, tmp2);
+    cbnzw(tmp1, DONE);
   }
-
-  bind(SHORT_STRING);
-  // Is the length zero?
-  cbz(cnt, SAME_CHARS);
-
-  bind(SHORT_LOOP);
-  load_unsigned_short(tmp1, Address(post(str1, 2)));
-  load_unsigned_short(tmp2, Address(post(str2, 2)));
-  subw(tmp1, tmp1, tmp2);
-  cbnz(tmp1, DONE);
-  sub(cnt, cnt, 1);
-  cbnz(cnt, SHORT_LOOP);
-
-  // Strings are equal.
-  bind(SAME_CHARS);
+  bind(TAIL03);
+  tbz(cnt1, 1 - log_elem_size, TAIL01); // 0-3 bytes left.
+  {
+    ldrh(tmp1, Address(post(a1, 2)));
+    ldrh(tmp2, Address(post(a2, 2)));
+    eorw(tmp1, tmp1, tmp2);
+    cbnzw(tmp1, DONE);
+  }
+  bind(TAIL01);
+  if (elem_size == 1) { // Only needed when comparing byte arrays.
+    tbz(cnt1, 0, SAME); // 0-1 bytes left.
+    {
+      ldrb(tmp1, a1);
+      ldrb(tmp2, a2);
+      eorw(tmp1, tmp1, tmp2);
+      cbnzw(tmp1, DONE);
+    }
+  }
+  // Arrays are equal.
+  bind(SAME);
   mov(result, true);
 
-  // That's it
+  // That's it.
   bind(DONE);
-
-  BLOCK_COMMENT("} string_equals");
+  BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
 }
 
 
-void MacroAssembler::byte_arrays_equals(Register ary1, Register ary2,
-                                        Register result, Register tmp1)
-{
-  Register cnt1 = rscratch1;
-  Register cnt2 = rscratch2;
-  Register tmp2 = rscratch2;
-
-  Label SAME, DIFFER, NEXT, TAIL07, TAIL03, TAIL01;
-
-  int length_offset  = arrayOopDesc::length_offset_in_bytes();
-  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_BYTE);
-
-  BLOCK_COMMENT("byte_arrays_equals  {");
-
-    // different until proven equal
-    mov(result, false);
-
-    // same array?
-    cmp(ary1, ary2);
-    br(Assembler::EQ, SAME);
-
-    // ne if either null
-    cbz(ary1, DIFFER);
-    cbz(ary2, DIFFER);
-
-    // lengths ne?
-    ldrw(cnt1, Address(ary1, length_offset));
-    ldrw(cnt2, Address(ary2, length_offset));
-    cmp(cnt1, cnt2);
-    br(Assembler::NE, DIFFER);
-
-    lea(ary1, Address(ary1, base_offset));
-    lea(ary2, Address(ary2, base_offset));
-
-    subs(cnt1, cnt1, 8);
-    br(LT, TAIL07);
-
-  BIND(NEXT);
-    ldr(tmp1, Address(post(ary1, 8)));
-    ldr(tmp2, Address(post(ary2, 8)));
-    subs(cnt1, cnt1, 8);
-    eor(tmp1, tmp1, tmp2);
-    cbnz(tmp1, DIFFER);
-    br(GE, NEXT);
-
-  BIND(TAIL07);  // 0-7 bytes left, cnt1 = #bytes left - 4
-    tst(cnt1, 0b100);
-    br(EQ, TAIL03);
-    ldrw(tmp1, Address(post(ary1, 4)));
-    ldrw(tmp2, Address(post(ary2, 4)));
-    cmp(tmp1, tmp2);
-    br(NE, DIFFER);
-
-  BIND(TAIL03);  // 0-3 bytes left, cnt1 = #bytes left - 4
-    tst(cnt1, 0b10);
-    br(EQ, TAIL01);
-    ldrh(tmp1, Address(post(ary1, 2)));
-    ldrh(tmp2, Address(post(ary2, 2)));
-    cmp(tmp1, tmp2);
-    br(NE, DIFFER);
-  BIND(TAIL01);  // 0-1 byte left
-    tst(cnt1, 0b01);
-    br(EQ, SAME);
-    ldrb(tmp1, ary1);
-    ldrb(tmp2, ary2);
-    cmp(tmp1, tmp2);
-    br(NE, DIFFER);
-
-  BIND(SAME);
-    mov(result, true);
-  BIND(DIFFER); // result already set
-
-  BLOCK_COMMENT("} byte_arrays_equals");
-}
-
-// Compare char[] arrays aligned to 4 bytes
-void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
-                                        Register result, Register tmp1)
-{
-  Register cnt1 = rscratch1;
-  Register cnt2 = rscratch2;
-  Register tmp2 = rscratch2;
-
-  Label SAME, DIFFER, NEXT, TAIL03, TAIL01;
-
-  int length_offset  = arrayOopDesc::length_offset_in_bytes();
-  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-  BLOCK_COMMENT("char_arrays_equals  {");
-
-    // different until proven equal
-    mov(result, false);
-
-    // same array?
-    cmp(ary1, ary2);
-    br(Assembler::EQ, SAME);
-
-    // ne if either null
-    cbz(ary1, DIFFER);
-    cbz(ary2, DIFFER);
-
-    // lengths ne?
-    ldrw(cnt1, Address(ary1, length_offset));
-    ldrw(cnt2, Address(ary2, length_offset));
-    cmp(cnt1, cnt2);
-    br(Assembler::NE, DIFFER);
-
-    lea(ary1, Address(ary1, base_offset));
-    lea(ary2, Address(ary2, base_offset));
-
-    subs(cnt1, cnt1, 4);
-    br(LT, TAIL03);
-
-  BIND(NEXT);
-    ldr(tmp1, Address(post(ary1, 8)));
-    ldr(tmp2, Address(post(ary2, 8)));
-    subs(cnt1, cnt1, 4);
-    eor(tmp1, tmp1, tmp2);
-    cbnz(tmp1, DIFFER);
-    br(GE, NEXT);
-
-  BIND(TAIL03);  // 0-3 chars left, cnt1 = #chars left - 4
-    tst(cnt1, 0b10);
-    br(EQ, TAIL01);
-    ldrw(tmp1, Address(post(ary1, 4)));
-    ldrw(tmp2, Address(post(ary2, 4)));
-    cmp(tmp1, tmp2);
-    br(NE, DIFFER);
-  BIND(TAIL01);  // 0-1 chars left
-    tst(cnt1, 0b01);
-    br(EQ, SAME);
-    ldrh(tmp1, ary1);
-    ldrh(tmp2, ary2);
-    cmp(tmp1, tmp2);
-    br(NE, DIFFER);
-
-  BIND(SAME);
-    mov(result, true);
-  BIND(DIFFER); // result already set
-
-  BLOCK_COMMENT("} char_arrays_equals");
-}
-
 // encode char[] to byte[] in ISO_8859_1
 void MacroAssembler::encode_iso_array(Register src, Register dst,
                       Register len, Register result,
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1186,13 +1186,11 @@
   void string_compare(Register str1, Register str2,
                       Register cnt1, Register cnt2, Register result,
                       Register tmp1);
-  void string_equals(Register str1, Register str2,
-                     Register cnt, Register result,
-                     Register tmp1);
-  void char_arrays_equals(Register ary1, Register ary2,
-                          Register result, Register tmp1);
-  void byte_arrays_equals(Register ary1, Register ary2,
-                          Register result, Register tmp1);
+
+  void arrays_equals(Register a1, Register a2,
+                     Register result, Register cnt1,
+                     int elem_size, bool is_string);
+
   void encode_iso_array(Register src, Register dst,
                         Register len, Register result,
                         FloatRegister Vtmp1, FloatRegister Vtmp2,
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -105,13 +105,20 @@
   inline friend NativeInstruction* nativeInstruction_at(address address);
 
   static bool is_adrp_at(address instr);
+
   static bool is_ldr_literal_at(address instr);
+
+  bool is_ldr_literal() {
+    return is_ldr_literal_at(addr_at(0));
+  }
+
   static bool is_ldrw_to_zr(address instr);
 
   static bool is_call_at(address instr) {
     const uint32_t insn = (*(uint32_t*)instr);
     return (insn >> 26) == 0b100101;
   }
+
   bool is_call() {
     return is_call_at(addr_at(0));
   }
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -163,30 +163,20 @@
     sp_after_call_off = -26,
 
     d15_off            = -26,
-    d14_off            = -25,
     d13_off            = -24,
-    d12_off            = -23,
     d11_off            = -22,
-    d10_off            = -21,
     d9_off             = -20,
-    d8_off             = -19,
 
     r28_off            = -18,
-    r27_off            = -17,
     r26_off            = -16,
-    r25_off            = -15,
     r24_off            = -14,
-    r23_off            = -13,
     r22_off            = -12,
-    r21_off            = -11,
     r20_off            = -10,
-    r19_off            =  -9,
     call_wrapper_off   =  -8,
     result_off         =  -7,
     result_type_off    =  -6,
     method_off         =  -5,
     entry_point_off    =  -4,
-    parameters_off     =  -3,
     parameter_size_off =  -2,
     thread_off         =  -1,
     fp_f               =   0,
@@ -208,30 +198,20 @@
     const Address result_type   (rfp, result_type_off    * wordSize);
     const Address method        (rfp, method_off         * wordSize);
     const Address entry_point   (rfp, entry_point_off    * wordSize);
-    const Address parameters    (rfp, parameters_off     * wordSize);
     const Address parameter_size(rfp, parameter_size_off * wordSize);
 
     const Address thread        (rfp, thread_off         * wordSize);
 
     const Address d15_save      (rfp, d15_off * wordSize);
-    const Address d14_save      (rfp, d14_off * wordSize);
     const Address d13_save      (rfp, d13_off * wordSize);
-    const Address d12_save      (rfp, d12_off * wordSize);
     const Address d11_save      (rfp, d11_off * wordSize);
-    const Address d10_save      (rfp, d10_off * wordSize);
     const Address d9_save       (rfp, d9_off * wordSize);
-    const Address d8_save       (rfp, d8_off * wordSize);
 
     const Address r28_save      (rfp, r28_off * wordSize);
-    const Address r27_save      (rfp, r27_off * wordSize);
     const Address r26_save      (rfp, r26_off * wordSize);
-    const Address r25_save      (rfp, r25_off * wordSize);
     const Address r24_save      (rfp, r24_off * wordSize);
-    const Address r23_save      (rfp, r23_off * wordSize);
     const Address r22_save      (rfp, r22_off * wordSize);
-    const Address r21_save      (rfp, r21_off * wordSize);
     const Address r20_save      (rfp, r20_off * wordSize);
-    const Address r19_save      (rfp, r19_off * wordSize);
 
     // stub code
 
@@ -254,31 +234,20 @@
     // rthread because we want to sanity check rthread later
     __ str(c_rarg7,  thread);
     __ strw(c_rarg6, parameter_size);
-    __ str(c_rarg5,  parameters);
-    __ str(c_rarg4,  entry_point);
-    __ str(c_rarg3,  method);
-    __ str(c_rarg2,  result_type);
-    __ str(c_rarg1,  result);
-    __ str(c_rarg0,  call_wrapper);
-    __ str(r19,      r19_save);
-    __ str(r20,      r20_save);
-    __ str(r21,      r21_save);
-    __ str(r22,      r22_save);
-    __ str(r23,      r23_save);
-    __ str(r24,      r24_save);
-    __ str(r25,      r25_save);
-    __ str(r26,      r26_save);
-    __ str(r27,      r27_save);
-    __ str(r28,      r28_save);
-
-    __ strd(v8,      d8_save);
-    __ strd(v9,      d9_save);
-    __ strd(v10,     d10_save);
-    __ strd(v11,     d11_save);
-    __ strd(v12,     d12_save);
-    __ strd(v13,     d13_save);
-    __ strd(v14,     d14_save);
-    __ strd(v15,     d15_save);
+    __ stp(c_rarg4, c_rarg5,  entry_point);
+    __ stp(c_rarg2, c_rarg3,  result_type);
+    __ stp(c_rarg0, c_rarg1,  call_wrapper);
+
+    __ stp(r20, r19,   r20_save);
+    __ stp(r22, r21,   r22_save);
+    __ stp(r24, r23,   r24_save);
+    __ stp(r26, r25,   r26_save);
+    __ stp(r28, r27,   r28_save);
+
+    __ stpd(v9,  v8,   d9_save);
+    __ stpd(v11, v10,  d11_save);
+    __ stpd(v13, v12,  d13_save);
+    __ stpd(v15, v14,  d15_save);
 
     // install Java thread in global register now we have saved
     // whatever value it held
@@ -385,33 +354,22 @@
 #endif
 
     // restore callee-save registers
-    __ ldrd(v15,      d15_save);
-    __ ldrd(v14,      d14_save);
-    __ ldrd(v13,      d13_save);
-    __ ldrd(v12,      d12_save);
-    __ ldrd(v11,      d11_save);
-    __ ldrd(v10,      d10_save);
-    __ ldrd(v9,       d9_save);
-    __ ldrd(v8,       d8_save);
-
-    __ ldr(r28,      r28_save);
-    __ ldr(r27,      r27_save);
-    __ ldr(r26,      r26_save);
-    __ ldr(r25,      r25_save);
-    __ ldr(r24,      r24_save);
-    __ ldr(r23,      r23_save);
-    __ ldr(r22,      r22_save);
-    __ ldr(r21,      r21_save);
-    __ ldr(r20,      r20_save);
-    __ ldr(r19,      r19_save);
-    __ ldr(c_rarg0,  call_wrapper);
-    __ ldr(c_rarg1,  result);
+    __ ldpd(v15, v14,  d15_save);
+    __ ldpd(v13, v12,  d13_save);
+    __ ldpd(v11, v10,  d11_save);
+    __ ldpd(v9,  v8,   d9_save);
+
+    __ ldp(r28, r27,   r28_save);
+    __ ldp(r26, r25,   r26_save);
+    __ ldp(r24, r23,   r24_save);
+    __ ldp(r22, r21,   r22_save);
+    __ ldp(r20, r19,   r20_save);
+
+    __ ldp(c_rarg0, c_rarg1,  call_wrapper);
     __ ldrw(c_rarg2, result_type);
     __ ldr(c_rarg3,  method);
-    __ ldr(c_rarg4,  entry_point);
-    __ ldr(c_rarg5,  parameters);
-    __ ldr(c_rarg6,  parameter_size);
-    __ ldr(c_rarg7,  thread);
+    __ ldp(c_rarg4, c_rarg5,  entry_point);
+    __ ldp(c_rarg6, c_rarg7,  parameter_size);
 
 #ifndef PRODUCT
     // tell the simulator we are about to end Java execution
@@ -666,7 +624,7 @@
   //     count   -  element count
   //     tmp     - scratch register
   //
-  //     Destroy no registers!
+  //     Destroy no registers except rscratch1 and rscratch2
   //
   void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
     BarrierSet* bs = Universe::heap()->barrier_set();
@@ -674,12 +632,13 @@
     case BarrierSet::G1SATBCTLogging:
       // With G1, don't generate the call if we statically know that the target in uninitialized
       if (!dest_uninitialized) {
-        __ push(RegSet::range(r0, r29), sp);         // integer registers except lr & sp
+        __ push_call_clobbered_registers();
         if (count == c_rarg0) {
           if (addr == c_rarg1) {
             // exactly backwards!!
-            __ stp(c_rarg0, c_rarg1, __ pre(sp, -2 * wordSize));
-            __ ldp(c_rarg1, c_rarg0, __ post(sp, -2 * wordSize));
+            __ mov(rscratch1, c_rarg0);
+            __ mov(c_rarg0, c_rarg1);
+            __ mov(c_rarg1, rscratch1);
           } else {
             __ mov(c_rarg1, count);
             __ mov(c_rarg0, addr);
@@ -689,7 +648,7 @@
           __ mov(c_rarg1, count);
         }
         __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
-        __ pop(RegSet::range(r0, r29), sp);         // integer registers except lr & sp        }
+        __ pop_call_clobbered_registers();
         break;
       case BarrierSet::CardTableForRS:
       case BarrierSet::CardTableExtension:
@@ -719,7 +678,7 @@
       case BarrierSet::G1SATBCTLogging:
 
         {
-          __ push(RegSet::range(r0, r29), sp);         // integer registers except lr & sp
+          __ push_call_clobbered_registers();
           // must compute element count unless barrier set interface is changed (other platforms supply count)
           assert_different_registers(start, end, scratch);
           __ lea(scratch, Address(end, BytesPerHeapOop));
@@ -728,7 +687,7 @@
           __ mov(c_rarg0, start);
           __ mov(c_rarg1, scratch);
           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
-          __ pop(RegSet::range(r0, r29), sp);         // integer registers except lr & sp        }
+          __ pop_call_clobbered_registers();
         }
         break;
       case BarrierSet::CardTableForRS:
@@ -1394,10 +1353,10 @@
   //   no-overlap entry point used by generate_conjoint_long_oop_copy().
   //
   address generate_disjoint_oop_copy(bool aligned, address *entry,
-                                     const char *name, bool dest_uninitialized = false) {
+                                     const char *name, bool dest_uninitialized) {
     const bool is_oop = true;
     const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-    return generate_disjoint_copy(size, aligned, is_oop, entry, name);
+    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
   }
 
   // Arguments:
@@ -1412,10 +1371,11 @@
   //
   address generate_conjoint_oop_copy(bool aligned,
                                      address nooverlap_target, address *entry,
-                                     const char *name, bool dest_uninitialized = false) {
+                                     const char *name, bool dest_uninitialized) {
     const bool is_oop = true;
     const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
-    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, name);
+    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
+                                  name, dest_uninitialized);
   }
 
 
@@ -1522,6 +1482,8 @@
     }
 #endif //ASSERT
 
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
+
     // save the original count
     __ mov(count_save, count);
 
@@ -1988,9 +1950,11 @@
       bool aligned = !UseCompressedOops;
 
       StubRoutines::_arrayof_oop_disjoint_arraycopy
-        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy");
+        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
+                                     /*dest_uninitialized*/false);
       StubRoutines::_arrayof_oop_arraycopy
-        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy");
+        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
+                                     /*dest_uninitialized*/false);
       // Aligned versions without pre-barriers
       StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
         = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
+ * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,8 +74,7 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
-// No performance work done here yet.
-define_pd_global(bool, CompactStrings, false);
+define_pd_global(bool, CompactStrings, true);
 
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint)  \
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -45,6 +45,9 @@
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
+#ifdef COMPILER2
+#include "opto/intrinsicnode.hpp"
+#endif
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) // nothing
@@ -3168,6 +3171,553 @@
 
 /////////////////////////////////////////// String intrinsics ////////////////////////////////////////////
 
+#ifdef COMPILER2
+// Intrinsics for CompactStrings
+
+// Compress char[] to byte[] by compressing 16 bytes at once.
+void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt,
+                                        Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+                                        Label& Lfailure) {
+
+  const Register tmp0 = R0;
+  assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
+  Label Lloop, Lslow;
+
+  // Check if cnt >= 8 (= 16 bytes)
+  lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF00FF00FF
+  srwi_(tmp2, cnt, 3);
+  beq(CCR0, Lslow);
+  ori(tmp1, tmp1, 0xFF);
+  rldimi(tmp1, tmp1, 32, 0);
+  mtctr(tmp2);
+
+  // 2x unrolled loop
+  bind(Lloop);
+  ld(tmp2, 0, src);               // _0_1_2_3 (Big Endian)
+  ld(tmp4, 8, src);               // _4_5_6_7
+
+  orr(tmp0, tmp2, tmp4);
+  rldicl(tmp3, tmp2, 6*8, 64-24); // _____1_2
+  rldimi(tmp2, tmp2, 2*8, 2*8);   // _0_2_3_3
+  rldicl(tmp5, tmp4, 6*8, 64-24); // _____5_6
+  rldimi(tmp4, tmp4, 2*8, 2*8);   // _4_6_7_7
+
+  andc_(tmp0, tmp0, tmp1);
+  bne(CCR0, Lfailure);            // Not latin1.
+  addi(src, src, 16);
+
+  rlwimi(tmp3, tmp2, 0*8, 24, 31);// _____1_3
+  srdi(tmp2, tmp2, 3*8);          // ____0_2_
+  rlwimi(tmp5, tmp4, 0*8, 24, 31);// _____5_7
+  srdi(tmp4, tmp4, 3*8);          // ____4_6_
+
+  orr(tmp2, tmp2, tmp3);          // ____0123
+  orr(tmp4, tmp4, tmp5);          // ____4567
+
+  stw(tmp2, 0, dst);
+  stw(tmp4, 4, dst);
+  addi(dst, dst, 8);
+  bdnz(Lloop);
+
+  bind(Lslow);                    // Fallback to slow version
+}
+
+// Compress char[] to byte[]. cnt must be positive int.
+void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure) {
+  Label Lloop;
+  mtctr(cnt);
+
+  bind(Lloop);
+  lhz(tmp, 0, src);
+  cmplwi(CCR0, tmp, 0xff);
+  bgt(CCR0, Lfailure);            // Not latin1.
+  addi(src, src, 2);
+  stb(tmp, 0, dst);
+  addi(dst, dst, 1);
+  bdnz(Lloop);
+}
+
+// Inflate byte[] to char[] by inflating 16 bytes at once.
+void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt,
+                                       Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) {
+  const Register tmp0 = R0;
+  assert_different_registers(src, dst, cnt, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5);
+  Label Lloop, Lslow;
+
+  // Check if cnt >= 8
+  srwi_(tmp2, cnt, 3);
+  beq(CCR0, Lslow);
+  lis(tmp1, 0xFF);                // tmp1 = 0x00FF00FF
+  ori(tmp1, tmp1, 0xFF);
+  mtctr(tmp2);
+
+  // 2x unrolled loop
+  bind(Lloop);
+  lwz(tmp2, 0, src);              // ____0123 (Big Endian)
+  lwz(tmp4, 4, src);              // ____4567
+  addi(src, src, 8);
+
+  rldicl(tmp3, tmp2, 7*8, 64-8);  // _______2
+  rlwimi(tmp2, tmp2, 3*8, 16, 23);// ____0113
+  rldicl(tmp5, tmp4, 7*8, 64-8);  // _______6
+  rlwimi(tmp4, tmp4, 3*8, 16, 23);// ____4557
+
+  andc(tmp0, tmp2, tmp1);         // ____0_1_
+  rlwimi(tmp2, tmp3, 2*8, 0, 23); // _____2_3
+  andc(tmp3, tmp4, tmp1);         // ____4_5_
+  rlwimi(tmp4, tmp5, 2*8, 0, 23); // _____6_7
+
+  rldimi(tmp2, tmp0, 3*8, 0*8);   // _0_1_2_3
+  rldimi(tmp4, tmp3, 3*8, 0*8);   // _4_5_6_7
+
+  std(tmp2, 0, dst);
+  std(tmp4, 8, dst);
+  addi(dst, dst, 16);
+  bdnz(Lloop);
+
+  bind(Lslow);                    // Fallback to slow version
+}
+
+// Inflate byte[] to char[]. cnt must be positive int.
+void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp) {
+  Label Lloop;
+  mtctr(cnt);
+
+  bind(Lloop);
+  lbz(tmp, 0, src);
+  addi(src, src, 1);
+  sth(tmp, 0, dst);
+  addi(dst, dst, 2);
+  bdnz(Lloop);
+}
+
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2,
+                                    Register tmp1, Register result, int ae) {
+  const Register tmp0 = R0,
+                 diff = tmp1;
+
+  assert_different_registers(str1, str2, cnt1, cnt2, tmp0, tmp1, result);
+  Label Ldone, Lslow, Lloop, Lreturn_diff;
+
+  // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
+  // we interchange str1 and str2 in the UL case and negate the result.
+  // Like this, str1 is always latin1 encoded, except for the UU case.
+  // In addition, we need 0 (or sign which is 0) extend.
+
+  if (ae == StrIntrinsicNode::UU) {
+    srwi(cnt1, cnt1, 1);
+  } else {
+    clrldi(cnt1, cnt1, 32);
+  }
+
+  if (ae != StrIntrinsicNode::LL) {
+    srwi(cnt2, cnt2, 1);
+  } else {
+    clrldi(cnt2, cnt2, 32);
+  }
+
+  // See if the lengths are different, and calculate min in cnt1.
+  // Save diff in case we need it for a tie-breaker.
+  subf_(diff, cnt2, cnt1); // diff = cnt1 - cnt2
+  // if (diff > 0) { cnt1 = cnt2; }
+  if (VM_Version::has_isel()) {
+    isel(cnt1, CCR0, Assembler::greater, /*invert*/ false, cnt2);
+  } else {
+    Label Lskip;
+    blt(CCR0, Lskip);
+    mr(cnt1, cnt2);
+    bind(Lskip);
+  }
+
+  // Rename registers
+  Register chr1 = result;
+  Register chr2 = tmp0;
+
+  // Compare multiple characters in fast loop (only implemented for same encoding).
+  int stride1 = 8, stride2 = 8;
+  if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+    int log2_chars_per_iter = (ae == StrIntrinsicNode::LL) ? 3 : 2;
+    Label Lfastloop, Lskipfast;
+
+    srwi_(tmp0, cnt1, log2_chars_per_iter);
+    beq(CCR0, Lskipfast);
+    rldicl(cnt2, cnt1, 0, 64 - log2_chars_per_iter); // Remaining characters.
+    li(cnt1, 1 << log2_chars_per_iter); // Initialize for failure case: Rescan characters from current iteration.
+    mtctr(tmp0);
+
+    bind(Lfastloop);
+    ld(chr1, 0, str1);
+    ld(chr2, 0, str2);
+    cmpd(CCR0, chr1, chr2);
+    bne(CCR0, Lslow);
+    addi(str1, str1, stride1);
+    addi(str2, str2, stride2);
+    bdnz(Lfastloop);
+    mr(cnt1, cnt2); // Remaining characters.
+    bind(Lskipfast);
+  }
+
+  // Loop which searches the first difference character by character.
+  cmpwi(CCR0, cnt1, 0);
+  beq(CCR0, Lreturn_diff);
+  bind(Lslow);
+  mtctr(cnt1);
+
+  switch (ae) {
+    case StrIntrinsicNode::LL: stride1 = 1; stride2 = 1; break;
+    case StrIntrinsicNode::UL: // fallthru (see comment above)
+    case StrIntrinsicNode::LU: stride1 = 1; stride2 = 2; break;
+    case StrIntrinsicNode::UU: stride1 = 2; stride2 = 2; break;
+    default: ShouldNotReachHere(); break;
+  }
+
+  bind(Lloop);
+  if (stride1 == 1) { lbz(chr1, 0, str1); } else { lhz(chr1, 0, str1); }
+  if (stride2 == 1) { lbz(chr2, 0, str2); } else { lhz(chr2, 0, str2); }
+  subf_(result, chr2, chr1); // result = chr1 - chr2
+  bne(CCR0, Ldone);
+  addi(str1, str1, stride1);
+  addi(str2, str2, stride2);
+  bdnz(Lloop);
+
+  // If strings are equal up to min length, return the length difference.
+  bind(Lreturn_diff);
+  mr(result, diff);
+
+  // Otherwise, return the difference between the first mismatched chars.
+  bind(Ldone);
+  if (ae == StrIntrinsicNode::UL) {
+    neg(result, result); // Negate result (see note above).
+  }
+}
+
+void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
+                                  Register limit, Register tmp1, Register result, bool is_byte) {
+  const Register tmp0 = R0;
+  assert_different_registers(ary1, ary2, limit, tmp0, tmp1, result);
+  Label Ldone, Lskiploop, Lloop, Lfastloop, Lskipfast;
+  bool limit_needs_shift = false;
+
+  if (is_array_equ) {
+    const int length_offset = arrayOopDesc::length_offset_in_bytes();
+    const int base_offset   = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
+
+    // Return true if the same array.
+    cmpd(CCR0, ary1, ary2);
+    beq(CCR0, Lskiploop);
+
+    // Return false if one of them is NULL.
+    cmpdi(CCR0, ary1, 0);
+    cmpdi(CCR1, ary2, 0);
+    li(result, 0);
+    cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
+    beq(CCR0, Ldone);
+
+    // Load the lengths of arrays.
+    lwz(limit, length_offset, ary1);
+    lwz(tmp0, length_offset, ary2);
+
+    // Return false if the two arrays are not equal length.
+    cmpw(CCR0, limit, tmp0);
+    bne(CCR0, Ldone);
+
+    // Load array addresses.
+    addi(ary1, ary1, base_offset);
+    addi(ary2, ary2, base_offset);
+  } else {
+    limit_needs_shift = !is_byte;
+    li(result, 0); // Assume not equal.
+  }
+
+  // Rename registers
+  Register chr1 = tmp0;
+  Register chr2 = tmp1;
+
+  // Compare 8 bytes per iteration in fast loop.
+  const int log2_chars_per_iter = is_byte ? 3 : 2;
+
+  srwi_(tmp0, limit, log2_chars_per_iter + (limit_needs_shift ? 1 : 0));
+  beq(CCR0, Lskipfast);
+  mtctr(tmp0);
+
+  bind(Lfastloop);
+  ld(chr1, 0, ary1);
+  ld(chr2, 0, ary2);
+  addi(ary1, ary1, 8);
+  addi(ary2, ary2, 8);
+  cmpd(CCR0, chr1, chr2);
+  bne(CCR0, Ldone);
+  bdnz(Lfastloop);
+
+  bind(Lskipfast);
+  rldicl_(limit, limit, limit_needs_shift ? 64 - 1 : 0, 64 - log2_chars_per_iter); // Remaining characters.
+  beq(CCR0, Lskiploop);
+  mtctr(limit);
+
+  // Character by character.
+  bind(Lloop);
+  if (is_byte) {
+    lbz(chr1, 0, ary1);
+    lbz(chr2, 0, ary2);
+    addi(ary1, ary1, 1);
+    addi(ary2, ary2, 1);
+  } else {
+    lhz(chr1, 0, ary1);
+    lhz(chr2, 0, ary2);
+    addi(ary1, ary1, 2);
+    addi(ary2, ary2, 2);
+  }
+  cmpw(CCR0, chr1, chr2);
+  bne(CCR0, Ldone);
+  bdnz(Lloop);
+
+  bind(Lskiploop);
+  li(result, 1); // All characters are equal.
+  bind(Ldone);
+}
+
+void MacroAssembler::string_indexof(Register result, Register haystack, Register haycnt,
+                                    Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
+                                    Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae) {
+
+  // Ensure 0<needlecnt<=haycnt in ideal graph as prerequisite!
+  Label L_TooShort, L_Found, L_NotFound, L_End;
+  Register last_addr = haycnt, // Kill haycnt at the beginning.
+  addr      = tmp1,
+  n_start   = tmp2,
+  ch1       = tmp3,
+  ch2       = R0;
+
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+  const int h_csize = (ae == StrIntrinsicNode::LL) ? 1 : 2;
+  const int n_csize = (ae == StrIntrinsicNode::UU) ? 2 : 1;
+
+  // **************************************************************************************************
+  // Prepare for main loop: optimized for needle count >=2, bail out otherwise.
+  // **************************************************************************************************
+
+  // Compute last haystack addr to use if no match gets found.
+  clrldi(haycnt, haycnt, 32);         // Ensure positive int is valid as 64 bit value.
+  addi(addr, haystack, -h_csize);     // Accesses use pre-increment.
+  if (needlecntval == 0) { // variable needlecnt
+   cmpwi(CCR6, needlecnt, 2);
+   clrldi(needlecnt, needlecnt, 32);  // Ensure positive int is valid as 64 bit value.
+   blt(CCR6, L_TooShort);             // Variable needlecnt: handle short needle separately.
+  }
+
+  if (n_csize == 2) { lwz(n_start, 0, needle); } else { lhz(n_start, 0, needle); } // Load first 2 characters of needle.
+
+  if (needlecntval == 0) { // variable needlecnt
+   subf(ch1, needlecnt, haycnt);      // Last character index to compare is haycnt-needlecnt.
+   addi(needlecnt, needlecnt, -2);    // Rest of needle.
+  } else { // constant needlecnt
+  guarantee(needlecntval != 1, "IndexOf with single-character needle must be handled separately");
+  assert((needlecntval & 0x7fff) == needlecntval, "wrong immediate");
+   addi(ch1, haycnt, -needlecntval);  // Last character index to compare is haycnt-needlecnt.
+   if (needlecntval > 3) { li(needlecnt, needlecntval - 2); } // Rest of needle.
+  }
+
+  if (h_csize == 2) { slwi(ch1, ch1, 1); } // Scale to number of bytes.
+
+  if (ae ==StrIntrinsicNode::UL) {
+   srwi(tmp4, n_start, 1*8);          // ___0
+   rlwimi(n_start, tmp4, 2*8, 0, 23); // _0_1
+  }
+
+  add(last_addr, haystack, ch1);      // Point to last address to compare (haystack+2*(haycnt-needlecnt)).
+
+  // Main Loop (now we have at least 2 characters).
+  Label L_OuterLoop, L_InnerLoop, L_FinalCheck, L_Comp1, L_Comp2;
+  bind(L_OuterLoop); // Search for 1st 2 characters.
+  Register addr_diff = tmp4;
+   subf(addr_diff, addr, last_addr);  // Difference between already checked address and last address to check.
+   addi(addr, addr, h_csize);         // This is the new address we want to use for comparing.
+   srdi_(ch2, addr_diff, h_csize);
+   beq(CCR0, L_FinalCheck);           // 2 characters left?
+   mtctr(ch2);                        // num of characters / 2
+  bind(L_InnerLoop);                  // Main work horse (2x unrolled search loop)
+   if (h_csize == 2) {                // Load 2 characters of haystack (ignore alignment).
+    lwz(ch1, 0, addr);
+    lwz(ch2, 2, addr);
+   } else {
+    lhz(ch1, 0, addr);
+    lhz(ch2, 1, addr);
+   }
+   cmpw(CCR0, ch1, n_start);          // Compare 2 characters (1 would be sufficient but try to reduce branches to CompLoop).
+   cmpw(CCR1, ch2, n_start);
+   beq(CCR0, L_Comp1);                // Did we find the needle start?
+   beq(CCR1, L_Comp2);
+   addi(addr, addr, 2 * h_csize);
+   bdnz(L_InnerLoop);
+  bind(L_FinalCheck);
+   andi_(addr_diff, addr_diff, h_csize); // Remaining characters not covered by InnerLoop: (num of characters) & 1.
+   beq(CCR0, L_NotFound);
+   if (h_csize == 2) { lwz(ch1, 0, addr); } else { lhz(ch1, 0, addr); } // One position left at which we have to compare.
+   cmpw(CCR1, ch1, n_start);
+   beq(CCR1, L_Comp1);
+  bind(L_NotFound);
+   li(result, -1);                    // not found
+   b(L_End);
+
+   // **************************************************************************************************
+   // Special Case: unfortunately, the variable needle case can be called with needlecnt<2
+   // **************************************************************************************************
+  if (needlecntval == 0) {           // We have to handle these cases separately.
+  Label L_OneCharLoop;
+  bind(L_TooShort);
+   mtctr(haycnt);
+   if (n_csize == 2) { lhz(n_start, 0, needle); } else { lbz(n_start, 0, needle); } // First character of needle
+  bind(L_OneCharLoop);
+   if (h_csize == 2) { lhzu(ch1, 2, addr); } else { lbzu(ch1, 1, addr); }
+   cmpw(CCR1, ch1, n_start);
+   beq(CCR1, L_Found);               // Did we find the one character needle?
+   bdnz(L_OneCharLoop);
+   li(result, -1);                   // Not found.
+   b(L_End);
+  }
+
+  // **************************************************************************************************
+  // Regular Case Part II: compare rest of needle (first 2 characters have been compared already)
+  // **************************************************************************************************
+
+  // Compare the rest
+  bind(L_Comp2);
+   addi(addr, addr, h_csize);        // First comparison has failed, 2nd one hit.
+  bind(L_Comp1);                     // Addr points to possible needle start.
+  if (needlecntval != 2) {           // Const needlecnt==2?
+   if (needlecntval != 3) {
+    if (needlecntval == 0) { beq(CCR6, L_Found); } // Variable needlecnt==2?
+    Register n_ind = tmp4,
+             h_ind = n_ind;
+    li(n_ind, 2 * n_csize);          // First 2 characters are already compared, use index 2.
+    mtctr(needlecnt);                // Decremented by 2, still > 0.
+   Label L_CompLoop;
+   bind(L_CompLoop);
+    if (ae ==StrIntrinsicNode::UL) {
+      h_ind = ch1;
+      sldi(h_ind, n_ind, 1);
+    }
+    if (n_csize == 2) { lhzx(ch2, needle, n_ind); } else { lbzx(ch2, needle, n_ind); }
+    if (h_csize == 2) { lhzx(ch1, addr, h_ind); } else { lbzx(ch1, addr, h_ind); }
+    cmpw(CCR1, ch1, ch2);
+    bne(CCR1, L_OuterLoop);
+    addi(n_ind, n_ind, n_csize);
+    bdnz(L_CompLoop);
+   } else { // No loop required if there's only one needle character left.
+    if (n_csize == 2) { lhz(ch2, 2 * 2, needle); } else { lbz(ch2, 2 * 1, needle); }
+    if (h_csize == 2) { lhz(ch1, 2 * 2, addr); } else { lbz(ch1, 2 * 1, addr); }
+    cmpw(CCR1, ch1, ch2);
+    bne(CCR1, L_OuterLoop);
+   }
+  }
+  // Return index ...
+  bind(L_Found);
+   subf(result, haystack, addr);     // relative to haystack, ...
+   if (h_csize == 2) { srdi(result, result, 1); } // in characters.
+  bind(L_End);
+} // string_indexof
+
+void MacroAssembler::string_indexof_char(Register result, Register haystack, Register haycnt,
+                                         Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte) {
+  assert_different_registers(haystack, haycnt, needle, tmp1, tmp2);
+
+  Label L_InnerLoop, L_FinalCheck, L_Found1, L_Found2, L_NotFound, L_End;
+  Register addr = tmp1,
+           ch1 = tmp2,
+           ch2 = R0;
+
+  const int h_csize = is_byte ? 1 : 2;
+
+//4:
+   srwi_(tmp2, haycnt, 1);   // Shift right by exact_log2(UNROLL_FACTOR).
+   mr(addr, haystack);
+   beq(CCR0, L_FinalCheck);
+   mtctr(tmp2);              // Move to count register.
+//8:
+  bind(L_InnerLoop);         // Main work horse (2x unrolled search loop).
+   if (!is_byte) {
+    lhz(ch1, 0, addr);
+    lhz(ch2, 2, addr);
+   } else {
+    lbz(ch1, 0, addr);
+    lbz(ch2, 1, addr);
+   }
+   (needle != R0) ? cmpw(CCR0, ch1, needle) : cmplwi(CCR0, ch1, (unsigned int)needleChar);
+   (needle != R0) ? cmpw(CCR1, ch2, needle) : cmplwi(CCR1, ch2, (unsigned int)needleChar);
+   beq(CCR0, L_Found1);      // Did we find the needle?
+   beq(CCR1, L_Found2);
+   addi(addr, addr, 2 * h_csize);
+   bdnz(L_InnerLoop);
+//16:
+  bind(L_FinalCheck);
+   andi_(R0, haycnt, 1);
+   beq(CCR0, L_NotFound);
+   if (!is_byte) { lhz(ch1, 0, addr); } else { lbz(ch1, 0, addr); } // One position left at which we have to compare.
+   (needle != R0) ? cmpw(CCR1, ch1, needle) : cmplwi(CCR1, ch1, (unsigned int)needleChar);
+   beq(CCR1, L_Found1);
+//21:
+  bind(L_NotFound);
+   li(result, -1);           // Not found.
+   b(L_End);
+
+  bind(L_Found2);
+   addi(addr, addr, h_csize);
+//24:
+  bind(L_Found1);            // Return index ...
+   subf(result, haystack, addr); // relative to haystack, ...
+   if (!is_byte) { srdi(result, result, 1); } // in characters.
+  bind(L_End);
+} // string_indexof_char
+
+
+void MacroAssembler::has_negatives(Register src, Register cnt, Register result,
+                                   Register tmp1, Register tmp2) {
+  const Register tmp0 = R0;
+  assert_different_registers(src, result, cnt, tmp0, tmp1, tmp2);
+  Label Lfastloop, Lslow, Lloop, Lnoneg, Ldone;
+
+  // Check if cnt >= 8 (= 16 bytes)
+  lis(tmp1, (int)(short)0x8080);  // tmp1 = 0x8080808080808080
+  srwi_(tmp2, cnt, 4);
+  li(result, 1);                  // Assume there's a negative byte.
+  beq(CCR0, Lslow);
+  ori(tmp1, tmp1, 0x8080);
+  rldimi(tmp1, tmp1, 32, 0);
+  mtctr(tmp2);
+
+  // 2x unrolled loop
+  bind(Lfastloop);
+  ld(tmp2, 0, src);
+  ld(tmp0, 8, src);
+
+  orr(tmp0, tmp2, tmp0);
+
+  and_(tmp0, tmp0, tmp1);
+  bne(CCR0, Ldone);               // Found negative byte.
+  addi(src, src, 16);
+
+  bdnz(Lfastloop);
+
+  bind(Lslow);                    // Fallback to slow version
+  rldicl_(tmp0, cnt, 0, 64-4);
+  beq(CCR0, Lnoneg);
+  mtctr(tmp0);
+  bind(Lloop);
+  lbz(tmp0, 0, src);
+  addi(src, src, 1);
+  andi_(tmp0, tmp0, 0x80);
+  bne(CCR0, Ldone);               // Found negative byte.
+  bdnz(Lloop);
+  bind(Lnoneg);
+  li(result, 0);
+
+  bind(Ldone);
+}
+
+
+// Intrinsics for non-CompactStrings
+
 // Search for a single jchar in an jchar[].
 //
 // Assumes that result differs from all other registers.
@@ -3613,6 +4163,8 @@
   bind(Ldone_false);
 }
 
+#endif // Compiler2
+
 // Helpers for Intrinsic Emitters
 //
 // Revert the byte order of a 32bit value in a register
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -679,6 +679,39 @@
 
   void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0);
 
+#ifdef COMPILER2
+  // Intrinsics for CompactStrings
+  // Compress char[] to byte[] by compressing 16 bytes at once.
+  void string_compress_16(Register src, Register dst, Register cnt,
+                          Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
+                          Label& Lfailure);
+
+  // Compress char[] to byte[]. cnt must be positive int.
+  void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
+
+  // Inflate byte[] to char[] by inflating 16 bytes at once.
+  void string_inflate_16(Register src, Register dst, Register cnt,
+                         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
+
+  // Inflate byte[] to char[]. cnt must be positive int.
+  void string_inflate(Register src, Register dst, Register cnt, Register tmp);
+
+  void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
+                      Register tmp1, Register result, int ae);
+
+  void array_equals(bool is_array_equ, Register ary1, Register ary2,
+                    Register limit, Register tmp1, Register result, bool is_byte);
+
+  void string_indexof(Register result, Register haystack, Register haycnt,
+                      Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
+                      Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
+
+  void string_indexof_char(Register result, Register haystack, Register haycnt,
+                           Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
+
+  void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
+
+  // Intrinsics for non-CompactStrings
   // Needle of length 1.
   void string_indexof_1(Register result, Register haystack, Register haycnt,
                         Register needle, jchar needleChar,
@@ -694,6 +727,7 @@
                           Register tmp5_reg);
   void char_arrays_equalsImm(Register str1_reg, Register str2_reg, int cntval, Register result_reg,
                              Register tmp1_reg, Register tmp2_reg);
+#endif
 
   // Emitters for BigInteger.multiplyToLen intrinsic.
   inline void multiply64(Register dest_hi, Register dest_lo,
--- a/src/cpu/ppc/vm/ppc.ad	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/ppc/vm/ppc.ad	Wed Mar 09 14:18:12 2016 +0100
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
-// Copyright (c) 2012, 2015 SAP SE. All rights reserved.
+// Copyright (c) 2012, 2016 SAP SE. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -2024,13 +2024,13 @@
     return (UsePopCountInstruction && VM_Version::has_popcntw());
 
   case Op_StrComp:
-    return SpecialStringCompareTo && !CompactStrings;
+    return SpecialStringCompareTo;
   case Op_StrEquals:
-    return SpecialStringEquals && !CompactStrings;
+    return SpecialStringEquals;
   case Op_StrIndexOf:
-    return SpecialStringIndexOf && !CompactStrings;
+    return SpecialStringIndexOf;
   case Op_StrIndexOfChar:
-    return SpecialStringIndexOf && !CompactStrings;
+    return SpecialStringIndexOf;
   }
 
   return true;  // Per default match rules are supported.
@@ -11022,6 +11022,584 @@
   ins_pipe(pipe_class_default);
 %}
 
+instruct string_compareL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
+                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
+  ins_cost(300);
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp$$Register,
+                      $result$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct string_compareU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
+                         iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
+  ins_cost(300);
+  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp$$Register,
+                      $result$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct string_compareLU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
+                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
+  ins_cost(300);
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp$$Register,
+                      $result$$Register, StrIntrinsicNode::LU);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct string_compareUL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
+                          iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ctr, KILL cr0, TEMP tmp);
+  ins_cost(300);
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result \t// KILL $tmp" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_compare($str2$$Register, $str1$$Register,
+                      $cnt2$$Register, $cnt1$$Register,
+                      $tmp$$Register,
+                      $result$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct string_equalsL(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
+                        iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
+  ins_cost(300);
+  format %{ "String Equals byte[] $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ array_equals(false, $str1$$Register, $str2$$Register,
+                    $cnt$$Register, $tmp$$Register,
+                    $result$$Register, true /* byte */);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct string_equalsU(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt, iRegIdst result,
+                        iRegIdst tmp, regCTR ctr, flagsRegCR0 cr0) %{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(TEMP_DEF result, USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp, KILL ctr, KILL cr0);
+  ins_cost(300);
+  format %{ "String Equals char[]  $str1,$str2,$cnt -> $result \t// KILL $tmp" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ array_equals(false, $str1$$Register, $str2$$Register,
+                    $cnt$$Register, $tmp$$Register,
+                    $result$$Register, false /* byte */);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct array_equalsB(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
+                       iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
+  match(Set result (AryEq ary1 ary2));
+  effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
+  ins_cost(300);
+  format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ array_equals(true, $ary1$$Register, $ary2$$Register,
+                    $tmp1$$Register, $tmp2$$Register,
+                    $result$$Register, true /* byte */);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct array_equalsC(rarg1RegP ary1, rarg2RegP ary2, iRegIdst result,
+                       iRegIdst tmp1, iRegIdst tmp2, regCTR ctr, flagsRegCR0 cr0, flagsRegCR0 cr1) %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (AryEq ary1 ary2));
+  effect(TEMP_DEF result, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0, KILL cr1);
+  ins_cost(300);
+  format %{ "Array Equals $ary1,$ary2 -> $result \t// KILL $tmp1,$tmp2" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ array_equals(true, $ary1$$Register, $ary2$$Register,
+                    $tmp1$$Register, $tmp2$$Register,
+                    $result$$Register, false /* byte */);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct indexOf_imm1_char_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+                             immP needleImm, immL offsetImm, immI_1 needlecntImm,
+                             iRegIdst tmp1, iRegIdst tmp2,
+                             flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
+  effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
+  ins_cost(150);
+
+  format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
+            "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
+
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    immPOper *needleOper = (immPOper *)$needleImm;
+    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
+    jchar chr;
+#ifdef VM_LITTLE_ENDIAN
+    chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
+#else
+    chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
+#endif
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           R0, chr,
+                           $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm1_char_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+                             immP needleImm, immL offsetImm, immI_1 needlecntImm,
+                             iRegIdst tmp1, iRegIdst tmp2,
+                             flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
+  effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(150);
+
+  format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
+            "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
+
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    immPOper *needleOper = (immPOper *)$needleImm;
+    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
+    jchar chr = (jchar)needle_values->element_value(0).as_byte();
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           R0, chr,
+                           $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm1_char_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+                              immP needleImm, immL offsetImm, immI_1 needlecntImm,
+                              iRegIdst tmp1, iRegIdst tmp2,
+                              flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
+  effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  ins_cost(150);
+
+  format %{ "String IndexOf CSCL1 $haystack[0..$haycnt], $needleImm+$offsetImm[0..$needlecntImm]"
+            "-> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
+
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    immPOper *needleOper = (immPOper *)$needleImm;
+    const TypeOopPtr *t = needleOper->type()->isa_oopptr();
+    ciTypeArray* needle_values = t->const_oop()->as_type_array();  // Pointer to live char *
+    jchar chr = (jchar)needle_values->element_value(0).as_byte();
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           R0, chr,
+                           $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm1_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+                        rscratch2RegP needle, immI_1 needlecntImm,
+                        iRegIdst tmp1, iRegIdst tmp2,
+                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
+  ins_cost(180);
+
+  format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
+            " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Node *ndl = in(operand_index($needle));  // The node that defines needle.
+    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
+    guarantee(needle_values, "sanity");
+    jchar chr;
+#ifdef VM_LITTLE_ENDIAN
+    chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
+#else
+    chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
+#endif
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           R0, chr,
+                           $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm1_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+                        rscratch2RegP needle, immI_1 needlecntImm,
+                        iRegIdst tmp1, iRegIdst tmp2,
+                        flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
+  ins_cost(180);
+
+  format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
+            " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Node *ndl = in(operand_index($needle));  // The node that defines needle.
+    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
+    guarantee(needle_values, "sanity");
+    jchar chr = (jchar)needle_values->element_value(0).as_byte();
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           R0, chr,
+                           $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm1_UL(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+                         rscratch2RegP needle, immI_1 needlecntImm,
+                         iRegIdst tmp1, iRegIdst tmp2,
+                         flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(USE_KILL needle, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
+  ins_cost(180);
+
+  format %{ "String IndexOf SCL1 $haystack[0..$haycnt], $needle[0..$needlecntImm]"
+            " -> $result \t// KILL $haycnt, $needle, $tmp1, $tmp2, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Node *ndl = in(operand_index($needle));  // The node that defines needle.
+    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
+    guarantee(needle_values, "sanity");
+    jchar chr = (jchar)needle_values->element_value(0).as_byte();
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           R0, chr,
+                           $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+                       iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
+                       flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+  match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
+  effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+  predicate(CompactStrings);
+  ins_cost(180);
+
+  format %{ "String IndexOfChar $haystack[0..$haycnt], $ch"
+            " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_indexof_char($result$$Register,
+                           $haystack$$Register, $haycnt$$Register,
+                           $ch$$Register, 0 /* this is not used if the character is already in a register */,
+                           $tmp1$$Register, $tmp2$$Register, false /*is_byte*/);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
+                       iRegPsrc needle, uimmI15 needlecntImm,
+                       iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
+                       flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
+  ins_cost(250);
+
+  format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
+            " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Node *ndl = in(operand_index($needle));  // The node that defines needle.
+    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
+
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
+                       iRegPsrc needle, uimmI15 needlecntImm,
+                       iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
+                       flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
+  ins_cost(250);
+
+  format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
+            " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Node *ndl = in(operand_index($needle));  // The node that defines needle.
+    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
+
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_imm_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
+                        iRegPsrc needle, uimmI15 needlecntImm,
+                        iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
+                        flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecntImm)));
+  effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
+  // Required for EA: check if it is still a type_array.
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+            n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
+  ins_cost(250);
+
+  format %{ "String IndexOf SCL $haystack[0..$haycnt], $needle[0..$needlecntImm]"
+            " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $tmp3, $tmp4, $tmp5, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Node *ndl = in(operand_index($needle));  // The node that defines needle.
+    ciTypeArray* needle_values = ndl->bottom_type()->is_aryptr()->const_oop()->as_type_array();
+
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, needle_values, $tmp5$$Register, $needlecntImm$$constant,
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
+                   iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
+                   flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
+         TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
+  ins_cost(300);
+
+  format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
+             " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_L(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
+                   iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
+                   flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
+         TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
+  ins_cost(300);
+
+  format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
+             " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct indexOf_UL(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, rscratch2RegI needlecnt,
+                    iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4,
+                    flagsRegCR0 cr0, flagsRegCR1 cr1, flagsRegCR6 cr6, regCTR ctr) %{
+  match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
+  effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
+         TEMP_DEF result,
+         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6, KILL ctr);
+  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
+  ins_cost(300);
+
+  format %{ "String IndexOf $haystack[0..$haycnt], $needle[0..$needlecnt]"
+             " -> $result \t// KILL $haycnt, $needlecnt, $tmp1, $tmp2, $tmp3, $tmp4, $cr0, $cr1" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ string_indexof($result$$Register,
+                      $haystack$$Register, $haycnt$$Register,
+                      $needle$$Register, NULL, $needlecnt$$Register, 0,  // needlecnt not constant.
+                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+// char[] to byte[] compression
+instruct string_compress(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
+                         iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
+         USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
+  ins_cost(300);
+  format %{ "String Compress $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Label Lskip, Ldone;
+    __ li($result$$Register, 0);
+    __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
+                          $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Ldone);
+    __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
+    __ beq(CCR0, Lskip);
+    __ string_compress($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register, Ldone);
+    __ bind(Lskip);
+    __ mr($result$$Register, $len$$Register);
+    __ bind(Ldone);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// byte[] to char[] inflation
+instruct string_inflate(Universe dummy, rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegLdst tmp1,
+                        iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
+  ins_cost(300);
+  format %{ "String Inflate $src,$dst,$len \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Label Ldone;
+    __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
+                         $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register);
+    __ rldicl_($tmp1$$Register, $len$$Register, 0, 64-3); // Remaining characters.
+    __ beq(CCR0, Ldone);
+    __ string_inflate($src$$Register, $dst$$Register, $tmp1$$Register, $tmp2$$Register);
+    __ bind(Ldone);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// StringCoding.java intrinsics
+instruct has_negatives(rarg1RegP ary1, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2,
+                       regCTR ctr, flagsRegCR0 cr0)
+%{
+  match(Set result (HasNegatives ary1 len));
+  effect(TEMP_DEF result, USE_KILL ary1, TEMP tmp1, TEMP tmp2, KILL ctr, KILL cr0);
+  ins_cost(300);
+  format %{ "has negatives byte[] $ary1,$len -> $result \t// KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ has_negatives($ary1$$Register, $len$$Register, $result$$Register,
+                     $tmp1$$Register, $tmp2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// encode char[] to byte[] in ISO_8859_1
+instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1,
+                          iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{
+  match(Set result (EncodeISOArray src (Binary dst len)));
+  effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
+         USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0);
+  ins_cost(300);
+  format %{ "Encode array $src,$dst,$len -> $result \t// KILL $tmp1, $tmp2, $tmp3, $tmp4, $tmp5" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    Label Lslow, Lfailure1, Lfailure2, Ldone;
+    __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $tmp1$$Register,
+                          $tmp2$$Register, $tmp3$$Register, $tmp4$$Register, $tmp5$$Register, Lfailure1);
+    __ rldicl_($result$$Register, $len$$Register, 0, 64-3); // Remaining characters.
+    __ beq(CCR0, Ldone);
+    __ bind(Lslow);
+    __ string_compress($src$$Register, $dst$$Register, $result$$Register, $tmp2$$Register, Lfailure2);
+    __ li($result$$Register, 0);
+    __ b(Ldone);
+
+    __ bind(Lfailure1);
+    __ mr($result$$Register, $len$$Register);
+    __ mfctr($tmp1$$Register);
+    __ rldimi_($result$$Register, $tmp1$$Register, 3, 0); // Remaining characters.
+    __ beq(CCR0, Ldone);
+    __ b(Lslow);
+
+    __ bind(Lfailure2);
+    __ mfctr($result$$Register); // Remaining characters.
+
+    __ bind(Ldone);
+    __ subf($result$$Register, $result$$Register, $len$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+
 // String_IndexOf for needle of length 1.
 //
 // Match needle into immediate operands: no loadConP node needed. Saves one
@@ -11060,11 +11638,11 @@
     if (java_lang_String::has_coder_field()) {
       // New compact strings byte array strings
 #ifdef VM_LITTLE_ENDIAN
-      chr = (((jchar)needle_values->element_value(1).as_byte()) << 8) |
-              (jchar)needle_values->element_value(0).as_byte();
+    chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
 #else
-      chr = (((jchar)needle_values->element_value(0).as_byte()) << 8) |
-              (jchar)needle_values->element_value(1).as_byte();
+    chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
 #endif
     } else {
       // Old char array strings
@@ -11115,11 +11693,11 @@
     if (java_lang_String::has_coder_field()) {
       // New compact strings byte array strings
 #ifdef VM_LITTLE_ENDIAN
-      chr = (((jchar)needle_values->element_value(1).as_byte()) << 8) |
-              (jchar)needle_values->element_value(0).as_byte();
+    chr = (((jchar)(unsigned char)needle_values->element_value(1).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(0).as_byte());
 #else
-      chr = (((jchar)needle_values->element_value(0).as_byte()) << 8) |
-              (jchar)needle_values->element_value(1).as_byte();
+    chr = (((jchar)(unsigned char)needle_values->element_value(0).as_byte()) << 8) |
+           ((jchar)(unsigned char)needle_values->element_value(1).as_byte());
 #endif
     } else {
       // Old char array strings
@@ -11321,6 +11899,20 @@
   %}
 %}
 
+instruct minI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set dst (MinI src1 src2));
+  effect(KILL cr0);
+  predicate(VM_Version::has_isel());
+  ins_cost(DEFAULT_COST*2);
+
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ cmpw(CCR0, $src1$$Register, $src2$$Register);
+    __ isel($dst$$Register, CCR0, Assembler::less, /*invert*/false, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 instruct maxI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{
   match(Set dst (MaxI src1 src2));
   ins_cost(DEFAULT_COST*6);
@@ -11341,6 +11933,20 @@
   %}
 %}
 
+instruct maxI_reg_reg_isel(iRegIdst dst, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
+  match(Set dst (MaxI src1 src2));
+  effect(KILL cr0);
+  predicate(VM_Version::has_isel());
+  ins_cost(DEFAULT_COST*2);
+
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ cmpw(CCR0, $src1$$Register, $src2$$Register);
+    __ isel($dst$$Register, CCR0, Assembler::greater, /*invert*/false, $src1$$Register, $src2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 //---------- Population Count Instructions ------------------------------------
 
 // Popcnt for Power7.
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -2609,9 +2609,7 @@
    *   R5_ARG3    - int   length (of buffer)
    *
    * scratch:
-   *   R6_ARG4    - crc table address
-   *   R7_ARG5    - tmp1
-   *   R8_ARG6    - tmp2
+   *   R2, R6-R12
    *
    * Ouput:
    *   R3_RET     - int   crc result
@@ -2623,22 +2621,25 @@
     address start = __ function_entry();  // Remember stub start address (is rtn value).
 
     // arguments to kernel_crc32:
-    Register       crc     = R3_ARG1;  // Current checksum, preset by caller or result from previous call.
-    Register       data    = R4_ARG2;  // source byte array
-    Register       dataLen = R5_ARG3;  // #bytes to process
-    Register       table   = R6_ARG4;  // crc table address
-
-    Register       t0      = R9;       // work reg for kernel* emitters
-    Register       t1      = R10;      // work reg for kernel* emitters
-    Register       t2      = R11;      // work reg for kernel* emitters
-    Register       t3      = R12;      // work reg for kernel* emitters
+    const Register crc     = R3_ARG1;  // Current checksum, preset by caller or result from previous call.
+    const Register data    = R4_ARG2;  // source byte array
+    const Register dataLen = R5_ARG3;  // #bytes to process
+    const Register table   = R6_ARG4;  // crc table address
+
+    const Register t0      = R2;
+    const Register t1      = R7;
+    const Register t2      = R8;
+    const Register t3      = R9;
+    const Register tc0     = R10;
+    const Register tc1     = R11;
+    const Register tc2     = R12;
 
     BLOCK_COMMENT("Stub body {");
     assert_different_registers(crc, data, dataLen, table);
 
     StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
 
-    __ kernel_crc32_1byte(crc, data, dataLen, table, t0, t1, t2, t3);
+    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table);
 
     BLOCK_COMMENT("return");
     __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,7 +53,7 @@
 
   // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
   if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
-    if (VM_Version::has_tcheck() && VM_Version::has_lqarx()) {
+    if (VM_Version::has_lqarx()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
     } else if (VM_Version::has_popcntw()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
@@ -68,8 +68,7 @@
 
   bool PowerArchitecturePPC64_ok = false;
   switch (PowerArchitecturePPC64) {
-    case 8: if (!VM_Version::has_tcheck() ) break;
-            if (!VM_Version::has_lqarx()  ) break;
+    case 8: if (!VM_Version::has_lqarx()  ) break;
     case 7: if (!VM_Version::has_popcntw()) break;
     case 6: if (!VM_Version::has_cmpb()   ) break;
     case 5: if (!VM_Version::has_popcntb()) break;
@@ -80,7 +79,7 @@
             UINTX_FORMAT " on this machine", PowerArchitecturePPC64);
 
   // Power 8: Configure Data Stream Control Register.
-  if (PowerArchitecturePPC64 >= 8) {
+  if (has_mfdscr()) {
     config_dscr();
   }
 
@@ -112,7 +111,7 @@
   // Create and print feature-string.
   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
   jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_fsqrt()   ? " fsqrt"   : ""),
                (has_isel()    ? " isel"    : ""),
                (has_lxarxeh() ? " lxarxeh" : ""),
@@ -125,7 +124,8 @@
                (has_lqarx()   ? " lqarx"   : ""),
                (has_vcipher() ? " vcipher" : ""),
                (has_vpmsumb() ? " vpmsumb" : ""),
-               (has_tcheck()  ? " tcheck"  : "")
+               (has_tcheck()  ? " tcheck"  : ""),
+               (has_mfdscr()  ? " mfdscr"  : "")
                // Make sure number of %s matches num_features!
               );
   _features_string = os::strdup(buf);
@@ -610,6 +610,7 @@
   a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
   a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
   a->tcheck(0);                                // code[12] -> tcheck
+  a->mfdscr(R0);                               // code[13] -> mfdscr
   a->blr();
 
   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -657,6 +658,7 @@
   if (code[feature_cntr++]) features |= vcipher_m;
   if (code[feature_cntr++]) features |= vpmsumb_m;
   if (code[feature_cntr++]) features |= tcheck_m;
+  if (code[feature_cntr++]) features |= mfdscr_m;
 
   // Print the detection code.
   if (PrintAssembly) {
@@ -670,8 +672,6 @@
 
 // Power 8: Configure Data Stream Control Register.
 void VM_Version::config_dscr() {
-  assert(has_tcheck(), "Only execute on Power 8 or later!");
-
   // 7 InstWords for each call (function descriptor + blr instruction).
   const int code_size = (2+2*7)*BytesPerInstWord;
 
--- a/src/cpu/ppc/vm/vm_version_ppc.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/ppc/vm/vm_version_ppc.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -45,6 +45,7 @@
     vcipher,
     vpmsumb,
     tcheck,
+    mfdscr,
     num_features // last entry to count features
   };
   enum Feature_Flag_Set {
@@ -62,6 +63,7 @@
     vcipher_m             = (1 << vcipher),
     vpmsumb_m             = (1 << vpmsumb),
     tcheck_m              = (1 << tcheck ),
+    mfdscr_m              = (1 << mfdscr ),
     all_features_m        = (unsigned long)-1
   };
 
@@ -94,6 +96,7 @@
   static bool has_vcipher() { return (_features & vcipher_m) != 0; }
   static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
   static bool has_tcheck()  { return (_features & tcheck_m) != 0; }
+  static bool has_mfdscr()  { return (_features & mfdscr_m) != 0; }
 
   // Assembler testing
   static void allow_all();
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1349,9 +1349,12 @@
     }
   } else if (dst.first()->is_stack()) {
     // reg to stack
-    __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS);
+    // Some compilers (gcc) expect a clean 32 bit value on function entry
+    __ signx(src.first()->as_Register(), L5);
+    __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
   } else {
-    __ mov(src.first()->as_Register(), dst.first()->as_Register());
+    // Some compilers (gcc) expect a clean 32 bit value on function entry
+    __ signx(src.first()->as_Register(), dst.first()->as_Register());
   }
 }
 
--- a/src/cpu/sparc/vm/sparc.ad	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/sparc/vm/sparc.ad	Wed Mar 09 14:18:12 2016 +0100
@@ -948,28 +948,28 @@
   }
 #endif
 
-  uint instr;
-  instr = (Assembler::ldst_op << 30)
-        | (dst_enc        << 25)
-        | (primary        << 19)
-        | (src1_enc       << 14);
+  uint instr = (Assembler::ldst_op << 30)
+             | (dst_enc        << 25)
+             | (primary        << 19)
+             | (src1_enc       << 14);
 
   uint index = src2_enc;
   int disp = disp32;
 
   if (src1_enc == R_SP_enc || src1_enc == R_FP_enc) {
     disp += STACK_BIAS;
-    // Quick fix for JDK-8029668: check that stack offset fits, bailout if not
+    // Check that stack offset fits, load into O7 if not
     if (!Assembler::is_simm13(disp)) {
-      ra->C->record_method_not_compilable("unable to handle large constant offsets");
-      return;
+      MacroAssembler _masm(&cbuf);
+      __ set(disp, O7);
+      if (index != R_G0_enc) {
+        __ add(O7, reg_to_register_object(index), O7);
+      }
+      index = R_O7_enc;
+      disp = 0;
     }
   }
 
-  // We should have a compiler bailout here rather than a guarantee.
-  // Better yet would be some mechanism to handle variable-size matches correctly.
-  guarantee(Assembler::is_simm13(disp), "Do not match large constant offsets" );
-
   if( disp == 0 ) {
     // use reg-reg form
     // bit 13 is already zero
@@ -983,7 +983,7 @@
   cbuf.insts()->emit_int32(instr);
 
 #ifdef ASSERT
-  {
+  if (VerifyOops) {
     MacroAssembler _masm(&cbuf);
     if (is_verified_oop_base) {
       __ verify_oop(reg_to_register_object(src1_enc));
@@ -1342,7 +1342,7 @@
 // Figure out which register class each belongs in: rc_int, rc_float, rc_stack
 enum RC { rc_bad, rc_int, rc_float, rc_stack };
 static enum RC rc_class( OptoReg::Name reg ) {
-  if( !OptoReg::is_valid(reg)  ) return rc_bad;
+  if (!OptoReg::is_valid(reg)) return rc_bad;
   if (OptoReg::is_stack(reg)) return rc_stack;
   VMReg r = OptoReg::as_VMReg(reg);
   if (r->is_Register()) return rc_int;
@@ -1350,66 +1350,79 @@
   return rc_float;
 }
 
-static int impl_helper(const MachNode* mach, CodeBuffer* cbuf, PhaseRegAlloc* ra, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size, outputStream* st ) {
+#ifndef PRODUCT
+ATTRIBUTE_PRINTF(2, 3)
+static void print_helper(outputStream* st, const char* format, ...) {
+  if (st->position() > 0) {
+    st->cr();
+    st->sp();
+  }
+  va_list ap;
+  va_start(ap, format);
+  st->vprint(format, ap);
+  va_end(ap);
+}
+#endif // !PRODUCT
+
+static void impl_helper(const MachNode* mach, CodeBuffer* cbuf, PhaseRegAlloc* ra, bool is_load, int offset, int reg, int opcode, const char *op_str, outputStream* st) {
   if (cbuf) {
     emit_form3_mem_reg(*cbuf, ra, mach, opcode, -1, R_SP_enc, offset, 0, Matcher::_regEncode[reg]);
   }
 #ifndef PRODUCT
-  else if (!do_size) {
-    if (size != 0) st->print("\n\t");
-    if (is_load) st->print("%s   [R_SP + #%d],R_%s\t! spill",op_str,offset,OptoReg::regname(reg));
-    else         st->print("%s   R_%s,[R_SP + #%d]\t! spill",op_str,OptoReg::regname(reg),offset);
+  else {
+    if (is_load) {
+      print_helper(st, "%s   [R_SP + #%d],R_%s\t! spill", op_str, offset, OptoReg::regname(reg));
+    } else {
+      print_helper(st, "%s   R_%s,[R_SP + #%d]\t! spill", op_str, OptoReg::regname(reg), offset);
+    }
   }
 #endif
-  return size+4;
 }
 
-static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int op1, int op2, const char *op_str, int size, outputStream* st ) {
-  if( cbuf ) emit3( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src] );
+static void impl_mov_helper(CodeBuffer *cbuf, int src, int dst, int op1, int op2, const char *op_str, outputStream* st) {
+  if (cbuf) {
+    emit3(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src]);
+  }
 #ifndef PRODUCT
-  else if( !do_size ) {
-    if( size != 0 ) st->print("\n\t");
-    st->print("%s  R_%s,R_%s\t! spill",op_str,OptoReg::regname(src),OptoReg::regname(dst));
+  else {
+    print_helper(st, "%s  R_%s,R_%s\t! spill", op_str, OptoReg::regname(src), OptoReg::regname(dst));
   }
 #endif
-  return size+4;
 }
 
-uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
-                                        PhaseRegAlloc *ra_,
-                                        bool do_size,
-                                        outputStream* st ) const {
+static void mach_spill_copy_implementation_helper(const MachNode* mach,
+                                                  CodeBuffer *cbuf,
+                                                  PhaseRegAlloc *ra_,
+                                                  outputStream* st) {
   // Get registers to move
-  OptoReg::Name src_second = ra_->get_reg_second(in(1));
-  OptoReg::Name src_first = ra_->get_reg_first(in(1));
-  OptoReg::Name dst_second = ra_->get_reg_second(this );
-  OptoReg::Name dst_first = ra_->get_reg_first(this );
+  OptoReg::Name src_second = ra_->get_reg_second(mach->in(1));
+  OptoReg::Name src_first  = ra_->get_reg_first(mach->in(1));
+  OptoReg::Name dst_second = ra_->get_reg_second(mach);
+  OptoReg::Name dst_first  = ra_->get_reg_first(mach);
 
   enum RC src_second_rc = rc_class(src_second);
-  enum RC src_first_rc = rc_class(src_first);
+  enum RC src_first_rc  = rc_class(src_first);
   enum RC dst_second_rc = rc_class(dst_second);
-  enum RC dst_first_rc = rc_class(dst_first);
-
-  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
-
-  // Generate spill code!
-  int size = 0;
-
-  if( src_first == dst_first && src_second == dst_second )
-    return size;            // Self copy, no move
+  enum RC dst_first_rc  = rc_class(dst_first);
+
+  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register");
+
+  if (src_first == dst_first && src_second == dst_second) {
+    return; // Self copy, no move
+  }
 
   // --------------------------------------
   // Check for mem-mem move.  Load into unused float registers and fall into
   // the float-store case.
-  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
+  if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
     int offset = ra_->reg2offset(src_first);
     // Further check for aligned-adjacent pair, so we can use a double load
-    if( (src_first&1)==0 && src_first+1 == src_second ) {
+    if ((src_first&1) == 0 && src_first+1 == src_second) {
       src_second    = OptoReg::Name(R_F31_num);
       src_second_rc = rc_float;
-      size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::lddf_op3,"LDDF",size, st);
+      impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::lddf_op3, "LDDF", st);
     } else {
-      size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::ldf_op3 ,"LDF ",size, st);
+      impl_helper(mach, cbuf, ra_, true, offset, R_F30_num, Assembler::ldf_op3, "LDF ", st);
     }
     src_first    = OptoReg::Name(R_F30_num);
     src_first_rc = rc_float;
@@ -1417,7 +1430,7 @@
 
   if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) {
     int offset = ra_->reg2offset(src_second);
-    size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F31_num,Assembler::ldf_op3,"LDF ",size, st);
+    impl_helper(mach, cbuf, ra_, true, offset, R_F31_num, Assembler::ldf_op3, "LDF ", st);
     src_second    = OptoReg::Name(R_F31_num);
     src_second_rc = rc_float;
   }
@@ -1427,36 +1440,38 @@
   if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS < 3) {
     int offset = frame::register_save_words*wordSize;
     if (cbuf) {
-      emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16 );
-      impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
-      impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
-      emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16 );
+      emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16);
+      impl_helper(mach, cbuf, ra_, false, offset, src_first,  Assembler::stf_op3, "STF ", st);
+      impl_helper(mach, cbuf, ra_,  true, offset, dst_first, Assembler::lduw_op3, "LDUW", st);
+      emit3_simm13(*cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16);
     }
 #ifndef PRODUCT
-    else if (!do_size) {
-      if (size != 0) st->print("\n\t");
-      st->print(  "SUB    R_SP,16,R_SP\n");
-      impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
-      impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
-      st->print("\tADD    R_SP,16,R_SP\n");
+    else {
+      print_helper(st, "SUB    R_SP,16,R_SP");
+      impl_helper(mach, cbuf, ra_, false, offset, src_first,  Assembler::stf_op3, "STF ", st);
+      impl_helper(mach, cbuf, ra_,  true, offset, dst_first, Assembler::lduw_op3, "LDUW", st);
+      print_helper(st, "ADD    R_SP,16,R_SP");
     }
 #endif
-    size += 16;
   }
 
   // Check for float->int copy on T4
   if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS >= 3) {
     // Further check for aligned-adjacent pair, so we can use a double move
-    if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second)
-      return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mdtox_opf,"MOVDTOX",size, st);
-    size  =  impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mstouw_opf,"MOVSTOUW",size, st);
+    if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+      impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mdtox_opf, "MOVDTOX", st);
+      return;
+    }
+    impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mstouw_opf, "MOVSTOUW", st);
   }
   // Check for int->float copy on T4
   if (src_first_rc == rc_int && dst_first_rc == rc_float && UseVIS >= 3) {
     // Further check for aligned-adjacent pair, so we can use a double move
-    if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second)
-      return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mxtod_opf,"MOVXTOD",size, st);
-    size  =  impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mwtos_opf,"MOVWTOS",size, st);
+    if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+      impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mxtod_opf, "MOVXTOD", st);
+      return;
+    }
+    impl_mov_helper(cbuf, src_first, dst_first, Assembler::mftoi_op3, Assembler::mwtos_opf, "MOVWTOS", st);
   }
 
   // --------------------------------------
@@ -1466,10 +1481,10 @@
   // there.  Misaligned sources only come from native-long-returns (handled
   // special below).
 #ifndef _LP64
-  if( src_first_rc == rc_int &&     // source is already big-endian
+  if (src_first_rc == rc_int &&     // source is already big-endian
       src_second_rc != rc_bad &&    // 64-bit move
-      ((dst_first&1)!=0 || dst_second != dst_first+1) ) { // misaligned dst
-    assert( (src_first&1)==0 && src_second == src_first+1, "source must be aligned" );
+      ((dst_first & 1) != 0 || dst_second != dst_first + 1)) { // misaligned dst
+    assert((src_first & 1) == 0 && src_second == src_first + 1, "source must be aligned");
     // Do the big-endian flop.
     OptoReg::Name tmp    = dst_first   ; dst_first    = dst_second   ; dst_second    = tmp   ;
     enum RC       tmp_rc = dst_first_rc; dst_first_rc = dst_second_rc; dst_second_rc = tmp_rc;
@@ -1478,30 +1493,28 @@
 
   // --------------------------------------
   // Check for integer reg-reg copy
-  if( src_first_rc == rc_int && dst_first_rc == rc_int ) {
+  if (src_first_rc == rc_int && dst_first_rc == rc_int) {
 #ifndef _LP64
-    if( src_first == R_O0_num && src_second == R_O1_num ) {  // Check for the evil O0/O1 native long-return case
+    if (src_first == R_O0_num && src_second == R_O1_num) {  // Check for the evil O0/O1 native long-return case
       // Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value
       //       as stored in memory.  On a big-endian machine like SPARC, this means that the _second
       //       operand contains the least significant word of the 64-bit value and vice versa.
       OptoReg::Name tmp = OptoReg::Name(R_O7_num);
-      assert( (dst_first&1)==0 && dst_second == dst_first+1, "return a native O0/O1 long to an aligned-adjacent 64-bit reg" );
+      assert((dst_first & 1) == 0 && dst_second == dst_first + 1, "return a native O0/O1 long to an aligned-adjacent 64-bit reg" );
       // Shift O0 left in-place, zero-extend O1, then OR them into the dst
-      if( cbuf ) {
-        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tmp], Assembler::sllx_op3, Matcher::_regEncode[src_first], 0x1020 );
-        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[src_second], Assembler::srl_op3, Matcher::_regEncode[src_second], 0x0000 );
-        emit3       ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler:: or_op3, Matcher::_regEncode[tmp], 0, Matcher::_regEncode[src_second] );
+      if ( cbuf ) {
+        emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[tmp], Assembler::sllx_op3, Matcher::_regEncode[src_first], 0x1020);
+        emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[src_second], Assembler::srl_op3, Matcher::_regEncode[src_second], 0x0000);
+        emit3       (*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler:: or_op3, Matcher::_regEncode[tmp], 0, Matcher::_regEncode[src_second]);
 #ifndef PRODUCT
-      } else if( !do_size ) {
-        if( size != 0 ) st->print("\n\t");
-        st->print("SLLX   R_%s,32,R_%s\t! Move O0-first to O7-high\n\t", OptoReg::regname(src_first), OptoReg::regname(tmp));
-        st->print("SRL    R_%s, 0,R_%s\t! Zero-extend O1\n\t", OptoReg::regname(src_second), OptoReg::regname(src_second));
-        st->print("OR     R_%s,R_%s,R_%s\t! spill",OptoReg::regname(tmp), OptoReg::regname(src_second), OptoReg::regname(dst_first));
+      } else {
+        print_helper(st, "SLLX   R_%s,32,R_%s\t! Move O0-first to O7-high\n\t", OptoReg::regname(src_first), OptoReg::regname(tmp));
+        print_helper(st, "SRL    R_%s, 0,R_%s\t! Zero-extend O1\n\t", OptoReg::regname(src_second), OptoReg::regname(src_second));
+        print_helper(st, "OR     R_%s,R_%s,R_%s\t! spill",OptoReg::regname(tmp), OptoReg::regname(src_second), OptoReg::regname(dst_first));
 #endif
       }
-      return size+12;
-    }
-    else if( dst_first == R_I0_num && dst_second == R_I1_num ) {
+      return;
+    } else if (dst_first == R_I0_num && dst_second == R_I1_num) {
       // returning a long value in I0/I1
       // a SpillCopy must be able to target a return instruction's reg_class
       // Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value
@@ -1511,27 +1524,25 @@
 
       if (src_first == dst_first) {
         tdest = OptoReg::Name(R_O7_num);
-        size += 4;
       }
 
-      if( cbuf ) {
-        assert( (src_first&1) == 0 && (src_first+1) == src_second, "return value was in an aligned-adjacent 64-bit reg");
+      if (cbuf) {
+        assert((src_first & 1) == 0 && (src_first + 1) == src_second, "return value was in an aligned-adjacent 64-bit reg");
         // Shift value in upper 32-bits of src to lower 32-bits of I0; move lower 32-bits to I1
         // ShrL_reg_imm6
-        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tdest], Assembler::srlx_op3, Matcher::_regEncode[src_second], 32 | 0x1000 );
+        emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[tdest], Assembler::srlx_op3, Matcher::_regEncode[src_second], 32 | 0x1000);
         // ShrR_reg_imm6  src, 0, dst
-        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srl_op3, Matcher::_regEncode[src_first], 0x0000 );
+        emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srl_op3, Matcher::_regEncode[src_first], 0x0000);
         if (tdest != dst_first) {
-          emit3     ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler::or_op3, 0/*G0*/, 0/*op2*/, Matcher::_regEncode[tdest] );
+          emit3     (*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler::or_op3, 0/*G0*/, 0/*op2*/, Matcher::_regEncode[tdest]);
         }
       }
 #ifndef PRODUCT
-      else if( !do_size ) {
-        if( size != 0 ) st->print("\n\t");  // %%%%% !!!!!
-        st->print("SRLX   R_%s,32,R_%s\t! Extract MSW\n\t",OptoReg::regname(src_second),OptoReg::regname(tdest));
-        st->print("SRL    R_%s, 0,R_%s\t! Extract LSW\n\t",OptoReg::regname(src_first),OptoReg::regname(dst_second));
+      else {
+        print_helper(st, "SRLX   R_%s,32,R_%s\t! Extract MSW\n\t",OptoReg::regname(src_second),OptoReg::regname(tdest));
+        print_helper(st, "SRL    R_%s, 0,R_%s\t! Extract LSW\n\t",OptoReg::regname(src_first),OptoReg::regname(dst_second));
         if (tdest != dst_first) {
-          st->print("MOV    R_%s,R_%s\t! spill\n\t", OptoReg::regname(tdest), OptoReg::regname(dst_first));
+          print_helper(st, "MOV    R_%s,R_%s\t! spill\n\t", OptoReg::regname(tdest), OptoReg::regname(dst_first));
         }
       }
 #endif // PRODUCT
@@ -1539,65 +1550,77 @@
     }
 #endif // !_LP64
     // Else normal reg-reg copy
-    assert( src_second != dst_first, "smashed second before evacuating it" );
-    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::or_op3,0,"MOV  ",size, st);
-    assert( (src_first&1) == 0 && (dst_first&1) == 0, "never move second-halves of int registers" );
+    assert(src_second != dst_first, "smashed second before evacuating it");
+    impl_mov_helper(cbuf, src_first, dst_first, Assembler::or_op3, 0, "MOV  ", st);
+    assert((src_first & 1) == 0 && (dst_first & 1) == 0, "never move second-halves of int registers");
     // This moves an aligned adjacent pair.
     // See if we are done.
-    if( src_first+1 == src_second && dst_first+1 == dst_second )
-      return size;
+    if (src_first + 1 == src_second && dst_first + 1 == dst_second) {
+      return;
+    }
   }
 
   // Check for integer store
-  if( src_first_rc == rc_int && dst_first_rc == rc_stack ) {
+  if (src_first_rc == rc_int && dst_first_rc == rc_stack) {
     int offset = ra_->reg2offset(dst_first);
     // Further check for aligned-adjacent pair, so we can use a double store
-    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
-      return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stx_op3,"STX ",size, st);
-    size  =  impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stw_op3,"STW ",size, st);
+    if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+      impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stx_op3, "STX ", st);
+      return;
+    }
+    impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stw_op3, "STW ", st);
   }
 
   // Check for integer load
-  if( dst_first_rc == rc_int && src_first_rc == rc_stack ) {
+  if (dst_first_rc == rc_int && src_first_rc == rc_stack) {
     int offset = ra_->reg2offset(src_first);
     // Further check for aligned-adjacent pair, so we can use a double load
-    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
-      return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldx_op3 ,"LDX ",size, st);
-    size  =  impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
+    if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+      impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldx_op3, "LDX ", st);
+      return;
+    }
+    impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lduw_op3, "LDUW", st);
   }
 
   // Check for float reg-reg copy
-  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
+  if (src_first_rc == rc_float && dst_first_rc == rc_float) {
     // Further check for aligned-adjacent pair, so we can use a double move
-    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
-      return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovd_opf,"FMOVD",size, st);
-    size  =  impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovs_opf,"FMOVS",size, st);
+    if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+      impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovd_opf, "FMOVD", st);
+      return;
+    }
+    impl_mov_helper(cbuf, src_first, dst_first, Assembler::fpop1_op3, Assembler::fmovs_opf, "FMOVS", st);
   }
 
   // Check for float store
-  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
+  if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
     int offset = ra_->reg2offset(dst_first);
     // Further check for aligned-adjacent pair, so we can use a double store
-    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
-      return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stdf_op3,"STDF",size, st);
-    size  =  impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
+    if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+      impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stdf_op3, "STDF", st);
+      return;
+    }
+    impl_helper(mach, cbuf, ra_, false, offset, src_first, Assembler::stf_op3, "STF ", st);
   }
 
   // Check for float load
-  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
+  if (dst_first_rc == rc_float && src_first_rc == rc_stack) {
     int offset = ra_->reg2offset(src_first);
     // Further check for aligned-adjacent pair, so we can use a double load
-    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
-      return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lddf_op3,"LDDF",size, st);
-    size  =  impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldf_op3 ,"LDF ",size, st);
+    if ((src_first & 1) == 0 && src_first + 1 == src_second && (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+      impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::lddf_op3, "LDDF", st);
+      return;
+    }
+    impl_helper(mach, cbuf, ra_, true, offset, dst_first, Assembler::ldf_op3, "LDF ", st);
   }
 
   // --------------------------------------------------------------------
   // Check for hi bits still needing moving.  Only happens for misaligned
   // arguments to native calls.
-  if( src_second == dst_second )
-    return size;               // Self copy; no move
-  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
+  if (src_second == dst_second) {
+    return; // Self copy; no move
+  }
+  assert(src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad");
 
 #ifndef _LP64
   // In the LP64 build, all registers can be moved as aligned/adjacent
@@ -1609,52 +1632,57 @@
   // 32-bits of a 64-bit register, but are needed in low bits of another
   // register (else it's a hi-bits-to-hi-bits copy which should have
   // happened already as part of a 64-bit move)
-  if( src_second_rc == rc_int && dst_second_rc == rc_int ) {
-    assert( (src_second&1)==1, "its the evil O0/O1 native return case" );
-    assert( (dst_second&1)==0, "should have moved with 1 64-bit move" );
+  if (src_second_rc == rc_int && dst_second_rc == rc_int) {
+    assert((src_second & 1) == 1, "its the evil O0/O1 native return case");
+    assert((dst_second & 1) == 0, "should have moved with 1 64-bit move");
     // Shift src_second down to dst_second's low bits.
-    if( cbuf ) {
-      emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 );
+    if (cbuf) {
+      emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020);
 #ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      st->print("SRLX   R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(dst_second));
+    } else  {
+      print_helper(st, "SRLX   R_%s,32,R_%s\t! spill: Move high bits down low", OptoReg::regname(src_second - 1), OptoReg::regname(dst_second));
 #endif
     }
-    return size+4;
+    return;
   }
 
   // Check for high word integer store.  Must down-shift the hi bits
   // into a temp register, then fall into the case of storing int bits.
-  if( src_second_rc == rc_int && dst_second_rc == rc_stack && (src_second&1)==1 ) {
+  if (src_second_rc == rc_int && dst_second_rc == rc_stack && (src_second & 1) == 1) {
     // Shift src_second down to dst_second's low bits.
-    if( cbuf ) {
-      emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[R_O7_num], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 );
+    if (cbuf) {
+      emit3_simm13(*cbuf, Assembler::arith_op, Matcher::_regEncode[R_O7_num], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020);
 #ifndef PRODUCT
-    } else if( !do_size ) {
-      if( size != 0 ) st->print("\n\t");
-      st->print("SRLX   R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(R_O7_num));
+    } else {
+      print_helper(st, "SRLX   R_%s,32,R_%s\t! spill: Move high bits down low", OptoReg::regname(src_second-1), OptoReg::regname(R_O7_num));
 #endif
     }
-    size+=4;
     src_second = OptoReg::Name(R_O7_num); // Not R_O7H_num!
   }
 
   // Check for high word integer load
-  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
-    return impl_helper(this,cbuf,ra_,do_size,true ,ra_->reg2offset(src_second),dst_second,Assembler::lduw_op3,"LDUW",size, st);
+  if (dst_second_rc == rc_int && src_second_rc == rc_stack)
+    return impl_helper(this, cbuf, ra_, true, ra_->reg2offset(src_second), dst_second, Assembler::lduw_op3, "LDUW", size, st);
 
   // Check for high word integer store
-  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
-    return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stw_op3 ,"STW ",size, st);
+  if (src_second_rc == rc_int && dst_second_rc == rc_stack)
+    return impl_helper(this, cbuf, ra_, false, ra_->reg2offset(dst_second), src_second, Assembler::stw_op3, "STW ", size, st);
 
   // Check for high word float store
-  if( src_second_rc == rc_float && dst_second_rc == rc_stack )
-    return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stf_op3 ,"STF ",size, st);
+  if (src_second_rc == rc_float && dst_second_rc == rc_stack)
+    return impl_helper(this, cbuf, ra_, false, ra_->reg2offset(dst_second), src_second, Assembler::stf_op3, "STF ", size, st);
 
 #endif // !_LP64
 
   Unimplemented();
+}
+
+uint MachSpillCopyNode::implementation(CodeBuffer *cbuf,
+                                       PhaseRegAlloc *ra_,
+                                       bool do_size,
+                                       outputStream* st) const {
+  assert(!do_size, "not supported");
+  mach_spill_copy_implementation_helper(this, cbuf, ra_, st);
   return 0;
 }
 
@@ -1669,19 +1697,19 @@
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
-  return implementation( NULL, ra_, true, NULL );
+  return MachNode::size(ra_);
 }
 
 //=============================================================================
 #ifndef PRODUCT
-void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
+void MachNopNode::format(PhaseRegAlloc *, outputStream *st) const {
   st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
 }
 #endif
 
-void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
+void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *) const {
   MacroAssembler _masm(&cbuf);
-  for(int i = 0; i < _count; i += 1) {
+  for (int i = 0; i < _count; i += 1) {
     __ nop();
   }
 }
@@ -5197,7 +5225,6 @@
   // No match rule to avoid chain rule match.
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "LDF    $src,$dst\t! stkI to regF" %}
   opcode(Assembler::ldf_op3);
   ins_encode(simple_form3_mem_reg(src, dst));
@@ -5208,7 +5235,6 @@
   // No match rule to avoid chain rule match.
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "LDDF   $src,$dst\t! stkL to regD" %}
   opcode(Assembler::lddf_op3);
   ins_encode(simple_form3_mem_reg(src, dst));
@@ -5219,7 +5245,6 @@
   // No match rule to avoid chain rule match.
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "STF    $src,$dst\t! regF to stkI" %}
   opcode(Assembler::stf_op3);
   ins_encode(simple_form3_mem_reg(dst, src));
@@ -5230,7 +5255,6 @@
   // No match rule to avoid chain rule match.
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "STDF   $src,$dst\t! regD to stkL" %}
   opcode(Assembler::stdf_op3);
   ins_encode(simple_form3_mem_reg(dst, src));
@@ -5240,7 +5264,6 @@
 instruct regI_to_stkLHi(stackSlotL dst, iRegI src) %{
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST*2);
-  size(8);
   format %{ "STW    $src,$dst.hi\t! long\n\t"
             "STW    R_G0,$dst.lo" %}
   opcode(Assembler::stw_op3);
@@ -5252,7 +5275,6 @@
   // No match rule to avoid chain rule match.
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "STX    $src,$dst\t! regL to stkD" %}
   opcode(Assembler::stx_op3);
   ins_encode(simple_form3_mem_reg( dst, src ) );
@@ -5266,7 +5288,6 @@
   match(Set dst src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDUW   $src,$dst\t!stk" %}
   opcode(Assembler::lduw_op3);
   ins_encode(simple_form3_mem_reg( src, dst ) );
@@ -5278,7 +5299,6 @@
   match(Set dst src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STW    $src,$dst\t!stk" %}
   opcode(Assembler::stw_op3);
   ins_encode(simple_form3_mem_reg( dst, src ) );
@@ -5290,7 +5310,6 @@
   match(Set dst src);
 
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "LDX    $src,$dst\t! long" %}
   opcode(Assembler::ldx_op3);
   ins_encode(simple_form3_mem_reg( src, dst ) );
@@ -5302,7 +5321,6 @@
   match(Set dst src);
 
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "STX    $src,$dst\t! long" %}
   opcode(Assembler::stx_op3);
   ins_encode(simple_form3_mem_reg( dst, src ) );
@@ -5314,7 +5332,6 @@
 instruct stkP_to_regP( iRegP dst, stackSlotP src ) %{
   match(Set dst src);
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "LDX    $src,$dst\t!ptr" %}
   opcode(Assembler::ldx_op3);
   ins_encode(simple_form3_mem_reg( src, dst ) );
@@ -5325,7 +5342,6 @@
 instruct regP_to_stkP(stackSlotP dst, iRegP src) %{
   match(Set dst src);
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "STX    $src,$dst\t!ptr" %}
   opcode(Assembler::stx_op3);
   ins_encode(simple_form3_mem_reg( dst, src ) );
@@ -5771,7 +5787,6 @@
   match(Set dst (LoadL_unaligned mem));
   effect(KILL tmp);
   ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
-  size(16);
   format %{ "LDUW   $mem+4,R_O7\t! misaligned long\n"
           "\tLDUW   $mem  ,$dst\n"
           "\tSLLX   #32, $dst, $dst\n"
@@ -5786,7 +5801,6 @@
   match(Set dst (LoadRange mem));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDUW   $mem,$dst\t! range" %}
   opcode(Assembler::lduw_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
@@ -5797,7 +5811,6 @@
 instruct loadI_freg(regF dst, memory mem) %{
   match(Set dst (LoadI mem));
   ins_cost(MEMORY_REF_COST);
-  size(4);
 
   format %{ "LDF    $mem,$dst\t! for fitos/fitod" %}
   opcode(Assembler::ldf_op3);
@@ -5876,7 +5889,6 @@
   match(Set dst (LoadD mem));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDDF   $mem,$dst" %}
   opcode(Assembler::lddf_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
@@ -5887,7 +5899,6 @@
 instruct loadD_unaligned(regD_low dst, memory mem ) %{
   match(Set dst (LoadD_unaligned mem));
   ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
-  size(8);
   format %{ "LDF    $mem  ,$dst.hi\t! misaligned double\n"
           "\tLDF    $mem+4,$dst.lo\t!" %}
   opcode(Assembler::ldf_op3);
@@ -5900,7 +5911,6 @@
   match(Set dst (LoadF mem));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDF    $mem,$dst" %}
   opcode(Assembler::ldf_op3);
   ins_encode(simple_form3_mem_reg( mem, dst ) );
@@ -6119,7 +6129,6 @@
   predicate(AllocatePrefetchInstr == 0);
   match( PrefetchAllocation mem );
   ins_cost(MEMORY_REF_COST);
-  size(4);
 
   format %{ "PREFETCH $mem,2\t! Prefetch allocation" %}
   opcode(Assembler::prefetch_op3);
@@ -6175,7 +6184,6 @@
   match(Set mem (StoreB mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STB    $src,$mem\t! byte" %}
   opcode(Assembler::stb_op3);
   ins_encode(simple_form3_mem_reg( mem, src ) );
@@ -6186,7 +6194,6 @@
   match(Set mem (StoreB mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STB    $src,$mem\t! byte" %}
   opcode(Assembler::stb_op3);
   ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
@@ -6197,7 +6204,6 @@
   match(Set mem (StoreCM mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STB    $src,$mem\t! CMS card-mark byte 0" %}
   opcode(Assembler::stb_op3);
   ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
@@ -6209,7 +6215,6 @@
   match(Set mem (StoreC mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STH    $src,$mem\t! short" %}
   opcode(Assembler::sth_op3);
   ins_encode(simple_form3_mem_reg( mem, src ) );
@@ -6220,7 +6225,6 @@
   match(Set mem (StoreC mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STH    $src,$mem\t! short" %}
   opcode(Assembler::sth_op3);
   ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
@@ -6232,7 +6236,6 @@
   match(Set mem (StoreI mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STW    $src,$mem" %}
   opcode(Assembler::stw_op3);
   ins_encode(simple_form3_mem_reg( mem, src ) );
@@ -6243,7 +6246,6 @@
 instruct storeL(memory mem, iRegL src) %{
   match(Set mem (StoreL mem src));
   ins_cost(MEMORY_REF_COST);
-  size(4);
   format %{ "STX    $src,$mem\t! long" %}
   opcode(Assembler::stx_op3);
   ins_encode(simple_form3_mem_reg( mem, src ) );
@@ -6254,7 +6256,6 @@
   match(Set mem (StoreI mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STW    $src,$mem" %}
   opcode(Assembler::stw_op3);
   ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
@@ -6265,7 +6266,6 @@
   match(Set mem (StoreL mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STX    $src,$mem" %}
   opcode(Assembler::stx_op3);
   ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
@@ -6277,7 +6277,6 @@
   match(Set mem (StoreI mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STF    $src,$mem\t! after fstoi/fdtoi" %}
   opcode(Assembler::stf_op3);
   ins_encode(simple_form3_mem_reg( mem, src ) );
@@ -6288,7 +6287,6 @@
 instruct storeP(memory dst, sp_ptr_RegP src) %{
   match(Set dst (StoreP dst src));
   ins_cost(MEMORY_REF_COST);
-  size(4);
 
 #ifndef _LP64
   format %{ "STW    $src,$dst\t! ptr" %}
@@ -6304,7 +6302,6 @@
 instruct storeP0(memory dst, immP0 src) %{
   match(Set dst (StoreP dst src));
   ins_cost(MEMORY_REF_COST);
-  size(4);
 
 #ifndef _LP64
   format %{ "STW    $src,$dst\t! ptr" %}
@@ -6379,7 +6376,6 @@
   match(Set mem (StoreD mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STDF   $src,$mem" %}
   opcode(Assembler::stdf_op3);
   ins_encode(simple_form3_mem_reg( mem, src ) );
@@ -6390,7 +6386,6 @@
   match(Set mem (StoreD mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STX    $src,$mem" %}
   opcode(Assembler::stx_op3);
   ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
@@ -6402,7 +6397,6 @@
   match(Set mem (StoreF mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STF    $src,$mem" %}
   opcode(Assembler::stf_op3);
   ins_encode(simple_form3_mem_reg( mem, src ) );
@@ -6413,7 +6407,6 @@
   match(Set mem (StoreF mem src));
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STW    $src,$mem\t! storeF0" %}
   opcode(Assembler::stw_op3);
   ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
@@ -7068,7 +7061,6 @@
   ins_cost(MEMORY_REF_COST);
 
 #ifndef _LP64
-  size(4);
   format %{ "LDUW   $mem,$dst\t! ptr" %}
   opcode(Assembler::lduw_op3, 0, REGP_OP);
 #else
@@ -8138,7 +8130,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDUW   $src,$dst\t! MoveF2I" %}
   opcode(Assembler::lduw_op3);
   ins_encode(simple_form3_mem_reg( src, dst ) );
@@ -8150,7 +8141,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDF    $src,$dst\t! MoveI2F" %}
   opcode(Assembler::ldf_op3);
   ins_encode(simple_form3_mem_reg(src, dst));
@@ -8162,7 +8152,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDX    $src,$dst\t! MoveD2L" %}
   opcode(Assembler::ldx_op3);
   ins_encode(simple_form3_mem_reg( src, dst ) );
@@ -8174,7 +8163,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "LDDF   $src,$dst\t! MoveL2D" %}
   opcode(Assembler::lddf_op3);
   ins_encode(simple_form3_mem_reg(src, dst));
@@ -8186,7 +8174,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STF   $src,$dst\t! MoveF2I" %}
   opcode(Assembler::stf_op3);
   ins_encode(simple_form3_mem_reg(dst, src));
@@ -8198,7 +8185,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STW    $src,$dst\t! MoveI2F" %}
   opcode(Assembler::stw_op3);
   ins_encode(simple_form3_mem_reg( dst, src ) );
@@ -8210,7 +8196,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STDF   $src,$dst\t! MoveD2L" %}
   opcode(Assembler::stdf_op3);
   ins_encode(simple_form3_mem_reg(dst, src));
@@ -8222,7 +8207,6 @@
   effect(DEF dst, USE src);
   ins_cost(MEMORY_REF_COST);
 
-  size(4);
   format %{ "STX    $src,$dst\t! MoveL2D" %}
   opcode(Assembler::stx_op3);
   ins_encode(simple_form3_mem_reg( dst, src ) );
@@ -8427,7 +8411,6 @@
 instruct convI2D_mem(regD_low dst, memory mem) %{
   match(Set dst (ConvI2D (LoadI mem)));
   ins_cost(DEFAULT_COST + MEMORY_REF_COST);
-  size(8);
   format %{ "LDF    $mem,$dst\n\t"
             "FITOD  $dst,$dst" %}
   opcode(Assembler::ldf_op3, Assembler::fitod_opf);
@@ -8468,7 +8451,6 @@
 instruct convI2F_mem( regF dst, memory mem ) %{
   match(Set dst (ConvI2F (LoadI mem)));
   ins_cost(DEFAULT_COST + MEMORY_REF_COST);
-  size(8);
   format %{ "LDF    $mem,$dst\n\t"
             "FITOS  $dst,$dst" %}
   opcode(Assembler::ldf_op3, Assembler::fitos_opf);
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -463,3 +463,37 @@
   }
   return result;
 }
+
+
+int VM_Version::parse_features(const char* implementation) {
+  int features = unknown_m;
+  // Convert to UPPER case before compare.
+  char* impl = os::strdup_check_oom(implementation);
+
+  for (int i = 0; impl[i] != 0; i++)
+    impl[i] = (char)toupper((uint)impl[i]);
+
+  if (strstr(impl, "SPARC64") != NULL) {
+    features |= sparc64_family_m;
+  } else if (strstr(impl, "SPARC-M") != NULL) {
+    // M-series SPARC is based on T-series.
+    features |= (M_family_m | T_family_m);
+  } else if (strstr(impl, "SPARC-T") != NULL) {
+    features |= T_family_m;
+    if (strstr(impl, "SPARC-T1") != NULL) {
+      features |= T1_model_m;
+    }
+  } else {
+    if (strstr(impl, "SPARC") == NULL) {
+#ifndef PRODUCT
+      // kstat on Solaris 8 virtual machines (branded zones)
+      // returns "(unsupported)" implementation. Solaris 8 is not
+      // supported anymore, but include this check to be on the
+      // safe side.
+      warning("Can't parse CPU implementation = '%s', assume generic SPARC", impl);
+#endif
+    }
+  }
+  os::free((void*)impl);
+  return features;
+}
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -121,7 +121,7 @@
   static bool is_T1_model(int features) { return is_T_family(features) && ((features & T1_model_m) != 0); }
 
   static int maximum_niagara1_processor_count() { return 32; }
-
+  static int parse_features(const char* implementation);
 public:
   // Initialization
   static void initialize();
--- a/src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -161,13 +161,7 @@
                                      create_klass_exception),
                rarg, rarg2);
   } else {
-    // kind of lame ExternalAddress can't take NULL because
-    // external_word_Relocation will assert.
-    if (message != NULL) {
-      __ lea(rarg2, ExternalAddress((address)message));
-    } else {
-      __ movptr(rarg2, NULL_WORD);
-    }
+    __ lea(rarg2, ExternalAddress((address)message));
     __ call_VM(rax,
                CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
                rarg, rarg2);
--- a/src/cpu/x86/vm/x86_32.ad	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/x86/vm/x86_32.ad	Wed Mar 09 14:18:12 2016 +0100
@@ -7236,6 +7236,7 @@
 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
   predicate(VM_Version::supports_cx8());
   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
             "MOV    $res,0\n\t"
@@ -7249,6 +7250,7 @@
 
 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
             "MOV    $res,0\n\t"
@@ -7261,6 +7263,7 @@
 
 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
             "MOV    $res,0\n\t"
@@ -7271,6 +7274,31 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
+instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+  predicate(VM_Version::supports_cx8());
+  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
+  effect(KILL cr);
+  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
+  ins_encode( enc_cmpxchg8(mem_ptr) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
+  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
+  effect(KILL cr);
+  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
+  ins_encode( enc_cmpxchg(mem_ptr) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
+  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
+  effect(KILL cr);
+  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
+  ins_encode( enc_cmpxchg(mem_ptr) );
+  ins_pipe( pipe_cmpxchg );
+%}
+
 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
   predicate(n->as_LoadStore()->result_not_used());
   match(Set dummy (GetAndAddI mem add));
--- a/src/cpu/x86/vm/x86_64.ad	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/cpu/x86/vm/x86_64.ad	Wed Mar 09 14:18:12 2016 +0100
@@ -7281,6 +7281,7 @@
 %{
   predicate(VM_Version::supports_cx8());
   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
 
   format %{ "cmpxchgq $mem_ptr,$newval\t# "
@@ -7305,6 +7306,7 @@
 %{
   predicate(VM_Version::supports_cx8());
   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
 
   format %{ "cmpxchgq $mem_ptr,$newval\t# "
@@ -7328,6 +7330,7 @@
                          rFlagsReg cr)
 %{
   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
 
   format %{ "cmpxchgl $mem_ptr,$newval\t# "
@@ -7351,6 +7354,7 @@
                           rax_RegN oldval, rRegN newval,
                           rFlagsReg cr) %{
   match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+  match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
 
   format %{ "cmpxchgl $mem_ptr,$newval\t# "
@@ -7368,6 +7372,83 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
+instruct compareAndExchangeI(
+                         memory mem_ptr,
+                         rax_RegI oldval, rRegI newval,
+                         rFlagsReg cr)
+%{
+  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
+  effect(KILL cr);
+
+  format %{ "cmpxchgl $mem_ptr,$newval\t# "
+            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
+  opcode(0x0F, 0xB1);
+  ins_encode(lock_prefix,
+             REX_reg_mem(newval, mem_ptr),
+             OpcP, OpcS,
+             reg_mem(newval, mem_ptr) // lock cmpxchg
+             );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct compareAndExchangeL(
+                         memory mem_ptr,
+                         rax_RegL oldval, rRegL newval,
+                         rFlagsReg cr)
+%{
+  predicate(VM_Version::supports_cx8());
+  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
+  effect(KILL cr);
+
+  format %{ "cmpxchgq $mem_ptr,$newval\t# "
+            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
+  opcode(0x0F, 0xB1);
+  ins_encode(lock_prefix,
+             REX_reg_mem_wide(newval, mem_ptr),
+             OpcP, OpcS,
+             reg_mem(newval, mem_ptr)  // lock cmpxchg
+            );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct compareAndExchangeN(
+                          memory mem_ptr,
+                          rax_RegN oldval, rRegN newval,
+                          rFlagsReg cr) %{
+  match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
+  effect(KILL cr);
+
+  format %{ "cmpxchgl $mem_ptr,$newval\t# "
+            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
+  opcode(0x0F, 0xB1);
+  ins_encode(lock_prefix,
+             REX_reg_mem(newval, mem_ptr),
+             OpcP, OpcS,
+             reg_mem(newval, mem_ptr)  // lock cmpxchg
+          );
+  ins_pipe( pipe_cmpxchg );
+%}
+
+instruct compareAndExchangeP(
+                         memory mem_ptr,
+                         rax_RegP oldval, rRegP newval,
+                         rFlagsReg cr)
+%{
+  predicate(VM_Version::supports_cx8());
+  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
+  effect(KILL cr);
+
+  format %{ "cmpxchgq $mem_ptr,$newval\t# "
+            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
+  opcode(0x0F, 0xB1);
+  ins_encode(lock_prefix,
+             REX_reg_mem_wide(newval, mem_ptr),
+             OpcP, OpcS,
+             reg_mem(newval, mem_ptr)  // lock cmpxchg
+          );
+  ins_pipe( pipe_cmpxchg );
+%}
+
 instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
   predicate(n->as_LoadStore()->result_not_used());
   match(Set dummy (GetAndAddI mem add));
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java	Wed Mar 09 14:18:12 2016 +0100
@@ -22,6 +22,7 @@
  */
 package jdk.vm.ci.amd64;
 
+import static jdk.vm.ci.code.MemoryBarriers.LOAD_LOAD;
 import static jdk.vm.ci.code.MemoryBarriers.LOAD_STORE;
 import static jdk.vm.ci.code.MemoryBarriers.STORE_STORE;
 import static jdk.vm.ci.code.Register.SPECIAL;
@@ -220,7 +221,7 @@
     private final AMD64Kind largestKind;
 
     public AMD64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
-        super("AMD64", AMD64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_STORE | STORE_STORE, 1, 8);
+        super("AMD64", AMD64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_LOAD | LOAD_STORE | STORE_STORE, 1, 8);
         this.features = features;
         this.flags = flags;
         assert features.contains(CPUFeature.SSE2) : "minimum config for x64";
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Wed Mar 09 14:18:12 2016 +0100
@@ -1141,7 +1141,7 @@
 
     @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_sp", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET) @Stable private int javaFrameAnchorLastJavaSpOffset;
     @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_pc", type = "address", get = HotSpotVMField.Type.OFFSET) @Stable private int javaFrameAnchorLastJavaPcOffset;
-    @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_fp", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET, archs = {"amd64"}) @Stable private int javaFrameAnchorLastJavaFpOffset;
+    @HotSpotVMField(name = "JavaFrameAnchor::_last_Java_fp", type = "intptr_t*", get = HotSpotVMField.Type.OFFSET, archs = {"aarch64, amd64"}) @Stable private int javaFrameAnchorLastJavaFpOffset;
     @HotSpotVMField(name = "JavaFrameAnchor::_flags", type = "int", get = HotSpotVMField.Type.OFFSET, archs = {"sparc"}) @Stable private int javaFrameAnchorFlagsOffset;
 
     public int threadLastJavaSpOffset() {
@@ -1152,11 +1152,8 @@
         return javaThreadAnchorOffset + javaFrameAnchorLastJavaPcOffset;
     }
 
-    /**
-     * This value is only valid on AMD64.
-     */
     public int threadLastJavaFpOffset() {
-        // TODO add an assert for AMD64
+        assert getHostArchitectureName().equals("aarch64") || getHostArchitectureName().equals("amd64");
         return javaThreadAnchorOffset + javaFrameAnchorLastJavaFpOffset;
     }
 
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -264,6 +264,7 @@
 
 // We need to keep these here as long as we have to build on Solaris
 // versions before 10.
+
 #ifndef SI_ARCHITECTURE_32
 #define SI_ARCHITECTURE_32      516     /* basic 32-bit SI_ARCHITECTURE */
 #endif
@@ -272,36 +273,87 @@
 #define SI_ARCHITECTURE_64      517     /* basic 64-bit SI_ARCHITECTURE */
 #endif
 
-static void do_sysinfo(int si, const char* string, int* features, int mask) {
-  char   tmp;
-  size_t bufsize = sysinfo(si, &tmp, 1);
+#ifndef SI_CPUBRAND
+#define SI_CPUBRAND             523     /* return cpu brand string */
+#endif
 
-  // All SI defines used below must be supported.
-  guarantee(bufsize != -1, "must be supported");
+class Sysinfo {
+  char* _string;
+public:
+  Sysinfo(int si) : _string(NULL) {
+    char   tmp;
+    size_t bufsize = sysinfo(si, &tmp, 1);
 
-  char* buf = (char*) os::malloc(bufsize, mtInternal);
-
-  if (buf == NULL)
-    return;
-
-  if (sysinfo(si, buf, bufsize) == bufsize) {
-    // Compare the string.
-    if (strcmp(buf, string) == 0) {
-      *features |= mask;
+    if (bufsize != -1) {
+      char* buf = (char*) os::malloc(bufsize, mtInternal);
+      if (buf != NULL) {
+        if (sysinfo(si, buf, bufsize) == bufsize) {
+          _string = buf;
+        } else {
+          os::free(buf);
+        }
+      }
     }
   }
 
-  os::free(buf);
-}
+  ~Sysinfo() {
+    if (_string != NULL) {
+      os::free(_string);
+    }
+  }
+
+  const char* value() const {
+    return _string;
+  }
+
+  bool valid() const {
+    return _string != NULL;
+  }
+
+  bool match(const char* s) const {
+    return valid() ? strcmp(_string, s) == 0 : false;
+  }
+
+  bool match_substring(const char* s) const {
+    return valid() ? strstr(_string, s) != NULL : false;
+  }
+};
+
+class Sysconf {
+  int _value;
+public:
+  Sysconf(int sc) : _value(-1) {
+    _value = sysconf(sc);
+  }
+  bool valid() const {
+    return _value != -1;
+  }
+  int value() const {
+    return _value;
+  }
+};
+
+
+#ifndef _SC_DCACHE_LINESZ
+#define _SC_DCACHE_LINESZ       508     /* Data cache line size */
+#endif
+
+#ifndef _SC_L2CACHE_LINESZ
+#define _SC_L2CACHE_LINESZ      527     /* Size of L2 cache line */
+#endif
 
 int VM_Version::platform_features(int features) {
   assert(os::Solaris::supports_getisax(), "getisax() must be available");
 
   // Check 32-bit architecture.
-  do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m);
+  if (Sysinfo(SI_ARCHITECTURE_32).match("sparc")) {
+    features |= v8_instructions_m;
+  }
 
   // Check 64-bit architecture.
-  do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m);
+  if (Sysinfo(SI_ARCHITECTURE_64).match("sparcv9")) {
+    features |= generic_v9_m;
+  }
 
   // Extract valid instruction set extensions.
   uint_t avs[2];
@@ -388,67 +440,63 @@
   if (av & AV_SPARC_SHA512)       features |= sha512_instruction_m;
 
   // Determine the machine type.
-  do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m);
+  if (Sysinfo(SI_MACHINE).match("sun4v")) {
+    features |= sun4v_m;
+  }
 
-  {
-    // Using kstat to determine the machine type.
+  bool use_solaris_12_api = false;
+  Sysinfo impl(SI_CPUBRAND);
+  if (impl.valid()) {
+    // If SI_CPUBRAND works, that means Solaris 12 API to get the cache line sizes
+    // is available to us as well
+    use_solaris_12_api = true;
+    features |= parse_features(impl.value());
+  } else {
+    // Otherwise use kstat to determine the machine type.
     kstat_ctl_t* kc = kstat_open();
     kstat_t* ksp = kstat_lookup(kc, (char*)"cpu_info", -1, NULL);
-    const char* implementation = "UNKNOWN";
+    const char* implementation;
+    bool has_implementation = false;
     if (ksp != NULL) {
       if (kstat_read(kc, ksp, NULL) != -1 && ksp->ks_data != NULL) {
         kstat_named_t* knm = (kstat_named_t *)ksp->ks_data;
         for (int i = 0; i < ksp->ks_ndata; i++) {
           if (strcmp((const char*)&(knm[i].name),"implementation") == 0) {
             implementation = KSTAT_NAMED_STR_PTR(&knm[i]);
+            has_implementation = true;
 #ifndef PRODUCT
             if (PrintMiscellaneous && Verbose) {
               tty->print_cr("cpu_info.implementation: %s", implementation);
             }
 #endif
-            // Convert to UPPER case before compare.
-            char* impl = os::strdup_check_oom(implementation);
-
-            for (int i = 0; impl[i] != 0; i++)
-              impl[i] = (char)toupper((uint)impl[i]);
-
-            if (strstr(impl, "SPARC64") != NULL) {
-              features |= sparc64_family_m;
-            } else if (strstr(impl, "SPARC-M") != NULL) {
-              // M-series SPARC is based on T-series.
-              features |= (M_family_m | T_family_m);
-            } else if (strstr(impl, "SPARC-T") != NULL) {
-              features |= T_family_m;
-              if (strstr(impl, "SPARC-T1") != NULL) {
-                features |= T1_model_m;
-              }
-            } else {
-              if (strstr(impl, "SPARC") == NULL) {
-#ifndef PRODUCT
-                // kstat on Solaris 8 virtual machines (branded zones)
-                // returns "(unsupported)" implementation. Solaris 8 is not
-                // supported anymore, but include this check to be on the
-                // safe side.
-                warning("kstat cpu_info implementation = '%s', assume generic SPARC", impl);
-#endif
-                implementation = "SPARC";
-              }
-            }
-            os::free((void*)impl);
+            features |= parse_features(implementation);
             break;
           }
         } // for(
       }
     }
-    assert(strcmp(implementation, "UNKNOWN") != 0,
-           "unknown cpu info (changed kstat interface?)");
+    assert(has_implementation, "unknown cpu info (changed kstat interface?)");
     kstat_close(kc);
   }
 
-  // Figure out cache line sizes using PICL
-  PICL picl((features & sparc64_family_m) != 0, (features & sun4v_m) != 0);
-  _L1_data_cache_line_size = picl.L1_data_cache_line_size();
-  _L2_data_cache_line_size = picl.L2_data_cache_line_size();
+  bool is_sun4v = (features & sun4v_m) != 0;
+  if (use_solaris_12_api && is_sun4v) {
+    // If Solaris 12 API is supported and it's sun4v use sysconf() to get the cache line sizes
+    Sysconf l1_dcache_line_size(_SC_DCACHE_LINESZ);
+    if (l1_dcache_line_size.valid()) {
+      _L1_data_cache_line_size =  l1_dcache_line_size.value();
+    }
 
+    Sysconf l2_dcache_line_size(_SC_L2CACHE_LINESZ);
+    if (l2_dcache_line_size.valid()) {
+      _L2_data_cache_line_size = l2_dcache_line_size.value();
+    }
+  } else {
+    // Otherwise figure out the cache line sizes using PICL
+    bool is_fujitsu = (features & sparc64_family_m) != 0;
+    PICL picl(is_fujitsu, is_sun4v);
+    _L1_data_cache_line_size = picl.L1_data_cache_line_size();
+    _L2_data_cache_line_size = picl.L2_data_cache_line_size();
+  }
   return features;
 }
--- a/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -51,11 +51,8 @@
 // movl reg, [reg + thread_ptr_offset]     Load thread
 //
 void MacroAssembler::get_thread(Register thread) {
-  // can't use ExternalAddress because it can't take NULL
-  AddressLiteral null(0, relocInfo::none);
-
   prefix(FS_segment);
-  movptr(thread, null);
+  movptr(thread, ExternalAddress(NULL));
   assert(os::win32::get_thread_ptr_offset() != 0,
          "Thread Pointer Offset has not been initialized");
   movl(thread, Address(thread, os::win32::get_thread_ptr_offset()));
--- a/src/share/vm/adlc/formssel.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/adlc/formssel.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -3491,6 +3491,8 @@
     "LoadPLocked",
     "StorePConditional", "StoreIConditional", "StoreLConditional",
     "CompareAndSwapI", "CompareAndSwapL", "CompareAndSwapP", "CompareAndSwapN",
+    "WeakCompareAndSwapI", "WeakCompareAndSwapL", "WeakCompareAndSwapP", "WeakCompareAndSwapN",
+    "CompareAndExchangeI", "CompareAndExchangeL", "CompareAndExchangeP", "CompareAndExchangeN",
     "StoreCM",
     "ClearArray",
     "GetAndAddI", "GetAndSetI", "GetAndSetP",
--- a/src/share/vm/c1/c1_CFGPrinter.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/c1/c1_CFGPrinter.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,9 @@
 // compilation for later analysis.
 
 class CFGPrinterOutput;
-class IntervalList;
+class Interval;
+
+typedef GrowableArray<Interval*> IntervalList;
 
 class CFGPrinter : public AllStatic {
 private:
--- a/src/share/vm/c1/c1_Canonicalizer.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/c1/c1_Canonicalizer.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -222,27 +222,36 @@
 }
 
 void Canonicalizer::do_ArrayLength    (ArrayLength*     x) {
-  NewArray* array = x->array()->as_NewArray();
-  if (array != NULL && array->length() != NULL) {
-    Constant* length = array->length()->as_Constant();
-    if (length != NULL) {
-      // do not use the Constant itself, but create a new Constant
-      // with same value Otherwise a Constant is live over multiple
-      // blocks without being registered in a state array.
+  NewArray*  na;
+  Constant*  ct;
+  LoadField* lf;
+
+  if ((na = x->array()->as_NewArray()) != NULL) {
+    // New arrays might have the known length.
+    // Do not use the Constant itself, but create a new Constant
+    // with same value Otherwise a Constant is live over multiple
+    // blocks without being registered in a state array.
+    Constant* length;
+    if (na->length() != NULL &&
+        (length = na->length()->as_Constant()) != NULL) {
       assert(length->type()->as_IntConstant() != NULL, "array length must be integer");
       set_constant(length->type()->as_IntConstant()->value());
     }
-  } else {
-    LoadField* lf = x->array()->as_LoadField();
-    if (lf != NULL) {
-      ciField* field = lf->field();
-      if (field->is_constant() && field->is_static()) {
-        // final static field
-        ciObject* c = field->constant_value().as_object();
-        if (c->is_array()) {
-          ciArray* array = (ciArray*) c;
-          set_constant(array->length());
-        }
+
+  } else if ((ct = x->array()->as_Constant()) != NULL) {
+    // Constant arrays have constant lengths.
+    ArrayConstant* cnst = ct->type()->as_ArrayConstant();
+    if (cnst != NULL) {
+      set_constant(cnst->value()->length());
+    }
+
+  } else if ((lf = x->array()->as_LoadField()) != NULL) {
+    ciField* field = lf->field();
+    if (field->is_constant() && field->is_static()) {
+      assert(PatchALot || ScavengeRootsInCode < 2, "Constant field loads are folded during parsing");
+      ciObject* c = field->constant_value().as_object();
+      if (!c->is_null_object()) {
+        set_constant(c->as_array()->length());
       }
     }
   }
--- a/src/share/vm/c1/c1_LinearScan.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/c1/c1_LinearScan.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1434,41 +1434,73 @@
 }
 
 #ifndef PRODUCT
+int interval_cmp(Interval* const& l, Interval* const& r) {
+  return l->from() - r->from();
+}
+
+bool find_interval(Interval* interval, IntervalArray* intervals) {
+  bool found;
+  int idx = intervals->find_sorted<Interval*, interval_cmp>(interval, found);
+
+  if (!found) {
+    return false;
+  }
+
+  int from = interval->from();
+
+  // The index we've found using binary search is pointing to an interval
+  // that is defined in the same place as the interval we were looking for.
+  // So now we have to look around that index and find exact interval.
+  for (int i = idx; i >= 0; i--) {
+    if (intervals->at(i) == interval) {
+      return true;
+    }
+    if (intervals->at(i)->from() != from) {
+      break;
+    }
+  }
+
+  for (int i = idx + 1; i < intervals->length(); i++) {
+    if (intervals->at(i) == interval) {
+      return true;
+    }
+    if (intervals->at(i)->from() != from) {
+      break;
+    }
+  }
+
+  return false;
+}
+
 bool LinearScan::is_sorted(IntervalArray* intervals) {
   int from = -1;
-  int i, j;
-  for (i = 0; i < intervals->length(); i ++) {
+  int null_count = 0;
+
+  for (int i = 0; i < intervals->length(); i++) {
     Interval* it = intervals->at(i);
     if (it != NULL) {
-      if (from > it->from()) {
-        assert(false, "");
-        return false;
-      }
+      assert(from <= it->from(), "Intervals are unordered");
       from = it->from();
-    }
-  }
-
-  // check in both directions if sorted list and unsorted list contain same intervals
-  for (i = 0; i < interval_count(); i++) {
-    if (interval_at(i) != NULL) {
-      int num_found = 0;
-      for (j = 0; j < intervals->length(); j++) {
-        if (interval_at(i) == intervals->at(j)) {
-          num_found++;
-        }
-      }
-      assert(num_found == 1, "lists do not contain same intervals");
-    }
-  }
-  for (j = 0; j < intervals->length(); j++) {
-    int num_found = 0;
-    for (i = 0; i < interval_count(); i++) {
-      if (interval_at(i) == intervals->at(j)) {
-        num_found++;
-      }
-    }
-    assert(num_found == 1, "lists do not contain same intervals");
-  }
+    } else {
+      null_count++;
+    }
+  }
+
+  assert(null_count == 0, "Sorted intervals should not contain nulls");
+
+  null_count = 0;
+
+  for (int i = 0; i < interval_count(); i++) {
+    Interval* interval = interval_at(i);
+    if (interval != NULL) {
+      assert(find_interval(interval, intervals), "Lists do not contain same intervals");
+    } else {
+      null_count++;
+    }
+  }
+
+  assert(interval_count() - null_count == intervals->length(),
+      "Sorted list should contain the same amount of non-NULL intervals as unsorted list");
 
   return true;
 }
@@ -1536,7 +1568,7 @@
       sorted_len++;
     }
   }
-  IntervalArray* sorted_list = new IntervalArray(sorted_len);
+  IntervalArray* sorted_list = new IntervalArray(sorted_len, sorted_len, NULL);
 
   // special sorting algorithm: the original interval-list is almost sorted,
   // only some intervals are swapped. So this is much faster than a complete QuickSort
@@ -1574,8 +1606,8 @@
     _needs_full_resort = false;
   }
 
-  IntervalArray* old_list      = _sorted_intervals;
-  IntervalList*  new_list      = _new_intervals_from_allocation;
+  IntervalArray* old_list = _sorted_intervals;
+  IntervalList* new_list = _new_intervals_from_allocation;
   int old_len = old_list->length();
   int new_len = new_list->length();
 
@@ -1589,7 +1621,8 @@
   new_list->sort(interval_cmp);
 
   // merge old and new list (both already sorted) into one combined list
-  IntervalArray* combined_list = new IntervalArray(old_len + new_len);
+  int combined_list_len = old_len + new_len;
+  IntervalArray* combined_list = new IntervalArray(combined_list_len, combined_list_len, NULL);
   int old_idx = 0;
   int new_idx = 0;
 
@@ -3211,6 +3244,10 @@
       has_error = true;
     }
 
+    // special intervals that are created in MoveResolver
+    // -> ignore them because the range information has no meaning there
+    if (i1->from() == 1 && i1->to() == 2) continue;
+
     if (i1->first() == Range::end()) {
       tty->print_cr("Interval %d has no Range", i1->reg_num()); i1->print(); tty->cr();
       has_error = true;
@@ -3225,18 +3262,13 @@
 
     for (int j = i + 1; j < len; j++) {
       Interval* i2 = interval_at(j);
-      if (i2 == NULL) continue;
-
-      // special intervals that are created in MoveResolver
-      // -> ignore them because the range information has no meaning there
-      if (i1->from() == 1 && i1->to() == 2) continue;
-      if (i2->from() == 1 && i2->to() == 2) continue;
+      if (i2 == NULL || (i2->from() == 1 && i2->to() == 2)) continue;
 
       int r1 = i1->assigned_reg();
       int r1Hi = i1->assigned_regHi();
       int r2 = i2->assigned_reg();
       int r2Hi = i2->assigned_regHi();
-      if (i1->intersects(i2) && (r1 == r2 || r1 == r2Hi || (r1Hi != any_reg && (r1Hi == r2 || r1Hi == r2Hi)))) {
+      if ((r1 == r2 || r1 == r2Hi || (r1Hi != any_reg && (r1Hi == r2 || r1Hi == r2Hi))) && i1->intersects(i2)) {
         tty->print_cr("Intervals %d and %d overlap and have the same register assigned", i1->reg_num(), i2->reg_num());
         i1->print(); tty->cr();
         i2->print(); tty->cr();
@@ -3429,7 +3461,8 @@
 
 void RegisterVerifier::verify(BlockBegin* start) {
   // setup input registers (method arguments) for first block
-  IntervalList* input_state = new IntervalList(state_size(), NULL);
+  int input_state_len = state_size();
+  IntervalList* input_state = new IntervalList(input_state_len, input_state_len, NULL);
   CallingConvention* args = compilation()->frame_map()->incoming_arguments();
   for (int n = 0; n < args->length(); n++) {
     LIR_Opr opr = args->at(n);
@@ -3543,7 +3576,7 @@
 
 IntervalList* RegisterVerifier::copy(IntervalList* input_state) {
   IntervalList* copy_state = new IntervalList(input_state->length());
-  copy_state->push_all(input_state);
+  copy_state->appendAll(input_state);
   return copy_state;
 }
 
@@ -5506,7 +5539,7 @@
     IntervalList* processed = _spill_intervals[reg];
     for (int i = 0; i < _spill_intervals[regHi]->length(); i++) {
       Interval* it = _spill_intervals[regHi]->at(i);
-      if (processed->index_of(it) == -1) {
+      if (processed->find_from_end(it) == -1) {
         remove_from_list(it);
         split_and_spill_interval(it);
       }
--- a/src/share/vm/c1/c1_LinearScan.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/c1/c1_LinearScan.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,8 +42,8 @@
 class MoveResolver;
 class Range;
 
-define_array(IntervalArray, Interval*)
-define_stack(IntervalList, IntervalArray)
+typedef GrowableArray<Interval*> IntervalArray;
+typedef GrowableArray<Interval*> IntervalList;
 
 define_array(IntervalsArray, IntervalList*)
 define_stack(IntervalsList, IntervalsArray)
--- a/src/share/vm/ci/ciArray.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/ci/ciArray.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -107,8 +107,9 @@
   intptr_t header = arrayOopDesc::base_offset_in_bytes(elembt);
   intptr_t index = (element_offset - header) >> shift;
   intptr_t offset = header + ((intptr_t)index << shift);
-  if (offset != element_offset || index != (jint)index)
+  if (offset != element_offset || index != (jint)index || index < 0 || index >= length()) {
     return ciConstant();
+  }
   return element_value((jint) index);
 }
 
--- a/src/share/vm/ci/ciConstantPoolCache.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/ci/ciConstantPoolCache.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -41,15 +41,21 @@
   _keys = new (arena) GrowableArray<int>(arena, expected_size, 0, 0);
 }
 
+int ciConstantPoolCache::key_compare(const int& key, const int& elt) {
+  if (key < elt)      return -1;
+  else if (key > elt) return 1;
+  else                  return 0;
+}
+
 // ------------------------------------------------------------------
 // ciConstantPoolCache::get
 //
 // Get the entry at some index
 void* ciConstantPoolCache::get(int index) {
   ASSERT_IN_VM;
-  int pos = find(index);
-  if (pos >= _keys->length() ||
-      _keys->at(pos) != index) {
+  bool found = false;
+  int pos = _keys->find_sorted<int, ciConstantPoolCache::key_compare>(index, found);
+  if (!found) {
     // This element is not present in the cache.
     return NULL;
   }
@@ -57,42 +63,15 @@
 }
 
 // ------------------------------------------------------------------
-// ciConstantPoolCache::find
-//
-// Use binary search to find the position of this index in the cache.
-// If there is no entry in the cache corresponding to this oop, return
-// the position at which the index would be inserted.
-int ciConstantPoolCache::find(int key) {
-  int min = 0;
-  int max = _keys->length()-1;
-
-  while (max >= min) {
-    int mid = (max + min) / 2;
-    int value = _keys->at(mid);
-    if (value < key) {
-      min = mid + 1;
-    } else if (value > key) {
-      max = mid - 1;
-    } else {
-      return mid;
-    }
-  }
-  return min;
-}
-
-// ------------------------------------------------------------------
 // ciConstantPoolCache::insert
 //
 // Insert a ciObject into the table at some index.
 void ciConstantPoolCache::insert(int index, void* elem) {
-  int i;
-  int pos = find(index);
-  for (i = _keys->length()-1; i >= pos; i--) {
-    _keys->at_put_grow(i+1, _keys->at(i));
-    _elements->at_put_grow(i+1, _elements->at(i));
-  }
-  _keys->at_put_grow(pos, index);
-  _elements->at_put_grow(pos, elem);
+  bool found = false;
+  int pos = _keys->find_sorted<int, ciConstantPoolCache::key_compare>(index, found);
+  assert(!found, "duplicate");
+  _keys->insert_before(pos, index);
+  _elements->insert_before(pos, elem);
 }
 
 // ------------------------------------------------------------------
--- a/src/share/vm/ci/ciConstantPoolCache.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/ci/ciConstantPoolCache.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -38,7 +38,7 @@
   GrowableArray<int>*   _keys;
   GrowableArray<void*>* _elements;
 
-  int find(int index);
+  static int key_compare(const int& key, const int& elt);
 
 public:
   ciConstantPoolCache(Arena* arena, int expected_size);
--- a/src/share/vm/ci/ciObjectFactory.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -260,6 +260,13 @@
   return new_object;
 }
 
+int ciObjectFactory::metadata_compare(Metadata* const& key, ciMetadata* const& elt) {
+  Metadata* value = elt->constant_encoding();
+  if (key < value)      return -1;
+  else if (key > value) return 1;
+  else                  return 0;
+}
+
 // ------------------------------------------------------------------
 // ciObjectFactory::get_metadata
 //
@@ -280,7 +287,8 @@
   }
 #endif // ASSERT
   int len = _ci_metadata->length();
-  int index = find(key, _ci_metadata);
+  bool found = false;
+  int index = _ci_metadata->find_sorted<Metadata*, ciObjectFactory::metadata_compare>(key, found);
 #ifdef ASSERT
   if (CIObjectFactoryVerify) {
     for (int i=0; i<_ci_metadata->length(); i++) {
@@ -290,7 +298,8 @@
     }
   }
 #endif
-  if (!is_found_at(index, key, _ci_metadata)) {
+
+  if (!found) {
     // The ciMetadata does not yet exist. Create it and insert it
     // into the cache.
     ciMetadata* new_object = create_new_metadata(key);
@@ -300,10 +309,10 @@
     if (len != _ci_metadata->length()) {
       // creating the new object has recursively entered new objects
       // into the table.  We need to recompute our index.
-      index = find(key, _ci_metadata);
+      index = _ci_metadata->find_sorted<Metadata*, ciObjectFactory::metadata_compare>(key, found);
     }
-    assert(!is_found_at(index, key, _ci_metadata), "no double insert");
-    insert(index, new_object, _ci_metadata);
+    assert(!found, "no double insert");
+    _ci_metadata->insert_before(index, new_object);
     return new_object;
   }
   return _ci_metadata->at(index)->as_metadata();
@@ -655,60 +664,6 @@
   obj->set_ident(_next_ident++);
 }
 
-// ------------------------------------------------------------------
-// ciObjectFactory::find
-//
-// Use binary search to find the position of this oop in the cache.
-// If there is no entry in the cache corresponding to this oop, return
-// the position at which the oop should be inserted.
-int ciObjectFactory::find(Metadata* key, GrowableArray<ciMetadata*>* objects) {
-  int min = 0;
-  int max = objects->length()-1;
-
-  // print_contents();
-
-  while (max >= min) {
-    int mid = (max + min) / 2;
-    Metadata* value = objects->at(mid)->constant_encoding();
-    if (value < key) {
-      min = mid + 1;
-    } else if (value > key) {
-      max = mid - 1;
-    } else {
-      return mid;
-    }
-  }
-  return min;
-}
-
-// ------------------------------------------------------------------
-// ciObjectFactory::is_found_at
-//
-// Verify that the binary seach found the given key.
-bool ciObjectFactory::is_found_at(int index, Metadata* key, GrowableArray<ciMetadata*>* objects) {
-  return (index < objects->length() &&
-          objects->at(index)->constant_encoding() == key);
-}
-
-
-// ------------------------------------------------------------------
-// ciObjectFactory::insert
-//
-// Insert a ciObject into the table at some index.
-void ciObjectFactory::insert(int index, ciMetadata* obj, GrowableArray<ciMetadata*>* objects) {
-  int len = objects->length();
-  if (len == index) {
-    objects->append(obj);
-  } else {
-    objects->append(objects->at(len-1));
-    int pos;
-    for (pos = len-2; pos >= index; pos--) {
-      objects->at_put(pos+1,objects->at(pos));
-    }
-    objects->at_put(index, obj);
-  }
-}
-
 static ciObjectFactory::NonPermObject* emptyBucket = NULL;
 
 // ------------------------------------------------------------------
--- a/src/share/vm/ci/ciObjectFactory.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/ci/ciObjectFactory.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -68,9 +68,7 @@
   NonPermObject* _non_perm_bucket[NON_PERM_BUCKETS];
   int _non_perm_count;
 
-  int find(Metadata* key, GrowableArray<ciMetadata*>* objects);
-  bool is_found_at(int index, Metadata* key, GrowableArray<ciMetadata*>* objects);
-  void insert(int index, ciMetadata* obj, GrowableArray<ciMetadata*>* objects);
+  static int metadata_compare(Metadata* const& key, ciMetadata* const& elt);
 
   ciObject* create_new_object(oop o);
   ciMetadata* create_new_metadata(Metadata* o);
--- a/src/share/vm/classfile/vmSymbols.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/classfile/vmSymbols.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -542,6 +542,42 @@
   case vmIntrinsics::_putLongVolatile:
   case vmIntrinsics::_putFloatVolatile:
   case vmIntrinsics::_putDoubleVolatile:
+  case vmIntrinsics::_getObjectAcquire:
+  case vmIntrinsics::_getBooleanAcquire:
+  case vmIntrinsics::_getByteAcquire:
+  case vmIntrinsics::_getShortAcquire:
+  case vmIntrinsics::_getCharAcquire:
+  case vmIntrinsics::_getIntAcquire:
+  case vmIntrinsics::_getLongAcquire:
+  case vmIntrinsics::_getFloatAcquire:
+  case vmIntrinsics::_getDoubleAcquire:
+  case vmIntrinsics::_putObjectRelease:
+  case vmIntrinsics::_putBooleanRelease:
+  case vmIntrinsics::_putByteRelease:
+  case vmIntrinsics::_putShortRelease:
+  case vmIntrinsics::_putCharRelease:
+  case vmIntrinsics::_putIntRelease:
+  case vmIntrinsics::_putLongRelease:
+  case vmIntrinsics::_putFloatRelease:
+  case vmIntrinsics::_putDoubleRelease:
+  case vmIntrinsics::_getObjectOpaque:
+  case vmIntrinsics::_getBooleanOpaque:
+  case vmIntrinsics::_getByteOpaque:
+  case vmIntrinsics::_getShortOpaque:
+  case vmIntrinsics::_getCharOpaque:
+  case vmIntrinsics::_getIntOpaque:
+  case vmIntrinsics::_getLongOpaque:
+  case vmIntrinsics::_getFloatOpaque:
+  case vmIntrinsics::_getDoubleOpaque:
+  case vmIntrinsics::_putObjectOpaque:
+  case vmIntrinsics::_putBooleanOpaque:
+  case vmIntrinsics::_putByteOpaque:
+  case vmIntrinsics::_putShortOpaque:
+  case vmIntrinsics::_putCharOpaque:
+  case vmIntrinsics::_putIntOpaque:
+  case vmIntrinsics::_putLongOpaque:
+  case vmIntrinsics::_putFloatOpaque:
+  case vmIntrinsics::_putDoubleOpaque:
   case vmIntrinsics::_getByte_raw:
   case vmIntrinsics::_getShort_raw:
   case vmIntrinsics::_getChar_raw:
@@ -567,9 +603,27 @@
   case vmIntrinsics::_loadFence:
   case vmIntrinsics::_storeFence:
   case vmIntrinsics::_fullFence:
+  case vmIntrinsics::_compareAndSwapLong:
+  case vmIntrinsics::_weakCompareAndSwapLong:
+  case vmIntrinsics::_weakCompareAndSwapLongAcquire:
+  case vmIntrinsics::_weakCompareAndSwapLongRelease:
+  case vmIntrinsics::_compareAndSwapInt:
+  case vmIntrinsics::_weakCompareAndSwapInt:
+  case vmIntrinsics::_weakCompareAndSwapIntAcquire:
+  case vmIntrinsics::_weakCompareAndSwapIntRelease:
   case vmIntrinsics::_compareAndSwapObject:
-  case vmIntrinsics::_compareAndSwapLong:
-  case vmIntrinsics::_compareAndSwapInt:
+  case vmIntrinsics::_weakCompareAndSwapObject:
+  case vmIntrinsics::_weakCompareAndSwapObjectAcquire:
+  case vmIntrinsics::_weakCompareAndSwapObjectRelease:
+  case vmIntrinsics::_compareAndExchangeIntVolatile:
+  case vmIntrinsics::_compareAndExchangeIntAcquire:
+  case vmIntrinsics::_compareAndExchangeIntRelease:
+  case vmIntrinsics::_compareAndExchangeLongVolatile:
+  case vmIntrinsics::_compareAndExchangeLongAcquire:
+  case vmIntrinsics::_compareAndExchangeLongRelease:
+  case vmIntrinsics::_compareAndExchangeObjectVolatile:
+  case vmIntrinsics::_compareAndExchangeObjectAcquire:
+  case vmIntrinsics::_compareAndExchangeObjectRelease:
     if (!InlineUnsafeOps) return true;
     break;
   case vmIntrinsics::_getShortUnaligned:
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/classfile/vmSymbols.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -123,7 +123,7 @@
   template(sun_misc_Launcher_ExtClassLoader,          "sun/misc/Launcher$ExtClassLoader")         \
                                                                                                   \
   /* Java runtime version access */                                                               \
-  template(sun_misc_Version,                          "sun/misc/Version")                         \
+  template(java_lang_VersionProps,                    "java/lang/VersionProps")                   \
   template(java_runtime_name_name,                    "java_runtime_name")                        \
   template(java_runtime_version_name,                 "java_runtime_version")                     \
                                                                                                   \
@@ -1146,6 +1146,64 @@
   do_intrinsic(_putFloatVolatile,         jdk_internal_misc_Unsafe,     putFloatVolatile_name, putFloat_signature,     F_RN)  \
   do_intrinsic(_putDoubleVolatile,        jdk_internal_misc_Unsafe,     putDoubleVolatile_name, putDouble_signature,   F_RN)  \
                                                                                                                         \
+  do_name(getObjectOpaque_name,"getObjectOpaque")     do_name(putObjectOpaque_name,"putObjectOpaque")                   \
+  do_name(getBooleanOpaque_name,"getBooleanOpaque")   do_name(putBooleanOpaque_name,"putBooleanOpaque")                 \
+  do_name(getByteOpaque_name,"getByteOpaque")         do_name(putByteOpaque_name,"putByteOpaque")                       \
+  do_name(getShortOpaque_name,"getShortOpaque")       do_name(putShortOpaque_name,"putShortOpaque")                     \
+  do_name(getCharOpaque_name,"getCharOpaque")         do_name(putCharOpaque_name,"putCharOpaque")                       \
+  do_name(getIntOpaque_name,"getIntOpaque")           do_name(putIntOpaque_name,"putIntOpaque")                         \
+  do_name(getLongOpaque_name,"getLongOpaque")         do_name(putLongOpaque_name,"putLongOpaque")                       \
+  do_name(getFloatOpaque_name,"getFloatOpaque")       do_name(putFloatOpaque_name,"putFloatOpaque")                     \
+  do_name(getDoubleOpaque_name,"getDoubleOpaque")     do_name(putDoubleOpaque_name,"putDoubleOpaque")                   \
+                                                                                                                        \
+  do_intrinsic(_getObjectOpaque,          jdk_internal_misc_Unsafe,        getObjectOpaque_name, getObject_signature,   F_R)  \
+  do_intrinsic(_getBooleanOpaque,         jdk_internal_misc_Unsafe,        getBooleanOpaque_name, getBoolean_signature, F_R)  \
+  do_intrinsic(_getByteOpaque,            jdk_internal_misc_Unsafe,        getByteOpaque_name, getByte_signature,       F_R)  \
+  do_intrinsic(_getShortOpaque,           jdk_internal_misc_Unsafe,        getShortOpaque_name, getShort_signature,     F_R)  \
+  do_intrinsic(_getCharOpaque,            jdk_internal_misc_Unsafe,        getCharOpaque_name, getChar_signature,       F_R)  \
+  do_intrinsic(_getIntOpaque,             jdk_internal_misc_Unsafe,        getIntOpaque_name, getInt_signature,         F_R)  \
+  do_intrinsic(_getLongOpaque,            jdk_internal_misc_Unsafe,        getLongOpaque_name, getLong_signature,       F_R)  \
+  do_intrinsic(_getFloatOpaque,           jdk_internal_misc_Unsafe,        getFloatOpaque_name, getFloat_signature,     F_R)  \
+  do_intrinsic(_getDoubleOpaque,          jdk_internal_misc_Unsafe,        getDoubleOpaque_name, getDouble_signature,   F_R)  \
+  do_intrinsic(_putObjectOpaque,          jdk_internal_misc_Unsafe,        putObjectOpaque_name, putObject_signature,   F_R)  \
+  do_intrinsic(_putBooleanOpaque,         jdk_internal_misc_Unsafe,        putBooleanOpaque_name, putBoolean_signature, F_R)  \
+  do_intrinsic(_putByteOpaque,            jdk_internal_misc_Unsafe,        putByteOpaque_name, putByte_signature,       F_R)  \
+  do_intrinsic(_putShortOpaque,           jdk_internal_misc_Unsafe,        putShortOpaque_name, putShort_signature,     F_R)  \
+  do_intrinsic(_putCharOpaque,            jdk_internal_misc_Unsafe,        putCharOpaque_name, putChar_signature,       F_R)  \
+  do_intrinsic(_putIntOpaque,             jdk_internal_misc_Unsafe,        putIntOpaque_name, putInt_signature,         F_R)  \
+  do_intrinsic(_putLongOpaque,            jdk_internal_misc_Unsafe,        putLongOpaque_name, putLong_signature,       F_R)  \
+  do_intrinsic(_putFloatOpaque,           jdk_internal_misc_Unsafe,        putFloatOpaque_name, putFloat_signature,     F_R)  \
+  do_intrinsic(_putDoubleOpaque,          jdk_internal_misc_Unsafe,        putDoubleOpaque_name, putDouble_signature,   F_R)  \
+                                                                                                                        \
+  do_name(getObjectAcquire_name,  "getObjectAcquire")    do_name(putObjectRelease_name,  "putObjectRelease")            \
+  do_name(getBooleanAcquire_name, "getBooleanAcquire")   do_name(putBooleanRelease_name, "putBooleanRelease")           \
+  do_name(getByteAcquire_name,    "getByteAcquire")      do_name(putByteRelease_name,    "putByteRelease")              \
+  do_name(getShortAcquire_name,   "getShortAcquire")     do_name(putShortRelease_name,   "putShortRelease")             \
+  do_name(getCharAcquire_name,    "getCharAcquire")      do_name(putCharRelease_name,    "putCharRelease")              \
+  do_name(getIntAcquire_name,     "getIntAcquire")       do_name(putIntRelease_name,     "putIntRelease")               \
+  do_name(getLongAcquire_name,    "getLongAcquire")      do_name(putLongRelease_name,    "putLongRelease")              \
+  do_name(getFloatAcquire_name,   "getFloatAcquire")     do_name(putFloatRelease_name,   "putFloatRelease")             \
+  do_name(getDoubleAcquire_name,  "getDoubleAcquire")    do_name(putDoubleRelease_name,  "putDoubleRelease")            \
+                                                                                                                        \
+  do_intrinsic(_getObjectAcquire,        jdk_internal_misc_Unsafe,        getObjectAcquire_name, getObject_signature,   F_R)  \
+  do_intrinsic(_getBooleanAcquire,       jdk_internal_misc_Unsafe,        getBooleanAcquire_name, getBoolean_signature, F_R)  \
+  do_intrinsic(_getByteAcquire,          jdk_internal_misc_Unsafe,        getByteAcquire_name, getByte_signature,       F_R)  \
+  do_intrinsic(_getShortAcquire,         jdk_internal_misc_Unsafe,        getShortAcquire_name, getShort_signature,     F_R)  \
+  do_intrinsic(_getCharAcquire,          jdk_internal_misc_Unsafe,        getCharAcquire_name, getChar_signature,       F_R)  \
+  do_intrinsic(_getIntAcquire,           jdk_internal_misc_Unsafe,        getIntAcquire_name, getInt_signature,         F_R)  \
+  do_intrinsic(_getLongAcquire,          jdk_internal_misc_Unsafe,        getLongAcquire_name, getLong_signature,       F_R)  \
+  do_intrinsic(_getFloatAcquire,         jdk_internal_misc_Unsafe,        getFloatAcquire_name, getFloat_signature,     F_R)  \
+  do_intrinsic(_getDoubleAcquire,        jdk_internal_misc_Unsafe,        getDoubleAcquire_name, getDouble_signature,   F_R)  \
+  do_intrinsic(_putObjectRelease,        jdk_internal_misc_Unsafe,        putObjectRelease_name, putObject_signature,   F_R)  \
+  do_intrinsic(_putBooleanRelease,       jdk_internal_misc_Unsafe,        putBooleanRelease_name, putBoolean_signature, F_R)  \
+  do_intrinsic(_putByteRelease,          jdk_internal_misc_Unsafe,        putByteRelease_name, putByte_signature,       F_R)  \
+  do_intrinsic(_putShortRelease,         jdk_internal_misc_Unsafe,        putShortRelease_name, putShort_signature,     F_R)  \
+  do_intrinsic(_putCharRelease,          jdk_internal_misc_Unsafe,        putCharRelease_name, putChar_signature,       F_R)  \
+  do_intrinsic(_putIntRelease,           jdk_internal_misc_Unsafe,        putIntRelease_name, putInt_signature,         F_R)  \
+  do_intrinsic(_putLongRelease,          jdk_internal_misc_Unsafe,        putLongRelease_name, putLong_signature,       F_R)  \
+  do_intrinsic(_putFloatRelease,         jdk_internal_misc_Unsafe,        putFloatRelease_name, putFloat_signature,     F_R)  \
+  do_intrinsic(_putDoubleRelease,        jdk_internal_misc_Unsafe,        putDoubleRelease_name, putDouble_signature,   F_R)  \
+                                                                                                                        \
   do_name(getShortUnaligned_name,"getShortUnaligned")     do_name(putShortUnaligned_name,"putShortUnaligned")           \
   do_name(getCharUnaligned_name,"getCharUnaligned")       do_name(putCharUnaligned_name,"putCharUnaligned")             \
   do_name(getIntUnaligned_name,"getIntUnaligned")         do_name(putIntUnaligned_name,"putIntUnaligned")               \
@@ -1197,24 +1255,68 @@
   do_intrinsic(_putDouble_raw,            jdk_internal_misc_Unsafe,     putDouble_name, putDouble_raw_signature,       F_R)  \
   do_intrinsic(_putAddress_raw,           jdk_internal_misc_Unsafe,     putAddress_name, putAddress_raw_signature,     F_R)  \
                                                                                                                         \
-  do_intrinsic(_compareAndSwapObject,     jdk_internal_misc_Unsafe,     compareAndSwapObject_name, compareAndSwapObject_signature, F_R) \
-   do_name(     compareAndSwapObject_name,                              "compareAndSwapObject")                                \
-   do_signature(compareAndSwapObject_signature,                         "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z")          \
-  do_intrinsic(_compareAndSwapLong,       jdk_internal_misc_Unsafe,     compareAndSwapLong_name, compareAndSwapLong_signature, F_R) \
-   do_name(     compareAndSwapLong_name,                                "compareAndSwapLong")                                  \
-   do_signature(compareAndSwapLong_signature,                           "(Ljava/lang/Object;JJJ)Z")                            \
-  do_intrinsic(_compareAndSwapInt,        jdk_internal_misc_Unsafe,     compareAndSwapInt_name, compareAndSwapInt_signature, F_R) \
-   do_name(     compareAndSwapInt_name,                                 "compareAndSwapInt")                                   \
-   do_signature(compareAndSwapInt_signature,                            "(Ljava/lang/Object;JII)Z")                            \
-  do_intrinsic(_putOrderedObject,         jdk_internal_misc_Unsafe,     putOrderedObject_name, putOrderedObject_signature, F_R) \
-   do_name(     putOrderedObject_name,                                  "putOrderedObject")                                    \
-   do_alias(    putOrderedObject_signature,                             /*(LObject;JLObject;)V*/ putObject_signature)           \
-  do_intrinsic(_putOrderedLong,           jdk_internal_misc_Unsafe,     putOrderedLong_name, putOrderedLong_signature, F_R)  \
-   do_name(     putOrderedLong_name,                                    "putOrderedLong")                                      \
-   do_alias(    putOrderedLong_signature,                               /*(Ljava/lang/Object;JJ)V*/ putLong_signature)          \
-  do_intrinsic(_putOrderedInt,            jdk_internal_misc_Unsafe,     putOrderedInt_name, putOrderedInt_signature,   F_R)  \
-   do_name(     putOrderedInt_name,                                     "putOrderedInt")                                       \
-   do_alias(    putOrderedInt_signature,                                 /*(Ljava/lang/Object;JI)V*/ putInt_signature)           \
+  do_signature(compareAndSwapObject_signature,     "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z")        \
+  do_signature(compareAndExchangeObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;") \
+  do_signature(compareAndSwapLong_signature,       "(Ljava/lang/Object;JJJ)Z")                                          \
+  do_signature(compareAndExchangeLong_signature,   "(Ljava/lang/Object;JJJ)J")                                          \
+  do_signature(compareAndSwapInt_signature,        "(Ljava/lang/Object;JII)Z")                                          \
+  do_signature(compareAndExchangeInt_signature,    "(Ljava/lang/Object;JII)I")                                          \
+                                                                                                                        \
+  do_name(compareAndSwapObject_name,             "compareAndSwapObject")                                                \
+  do_name(compareAndExchangeObjectVolatile_name, "compareAndExchangeObjectVolatile")                                    \
+  do_name(compareAndExchangeObjectAcquire_name,  "compareAndExchangeObjectAcquire")                                     \
+  do_name(compareAndExchangeObjectRelease_name,  "compareAndExchangeObjectRelease")                                     \
+  do_name(compareAndSwapLong_name,               "compareAndSwapLong")                                                  \
+  do_name(compareAndExchangeLongVolatile_name,   "compareAndExchangeLongVolatile")                                      \
+  do_name(compareAndExchangeLongAcquire_name,    "compareAndExchangeLongAcquire")                                       \
+  do_name(compareAndExchangeLongRelease_name,    "compareAndExchangeLongRelease")                                       \
+  do_name(compareAndSwapInt_name,                "compareAndSwapInt")                                                   \
+  do_name(compareAndExchangeIntVolatile_name,    "compareAndExchangeIntVolatile")                                       \
+  do_name(compareAndExchangeIntAcquire_name,     "compareAndExchangeIntAcquire")                                        \
+  do_name(compareAndExchangeIntRelease_name,     "compareAndExchangeIntRelease")                                        \
+                                                                                                                        \
+  do_name(weakCompareAndSwapObject_name,         "weakCompareAndSwapObject")                                            \
+  do_name(weakCompareAndSwapObjectAcquire_name,  "weakCompareAndSwapObjectAcquire")                                     \
+  do_name(weakCompareAndSwapObjectRelease_name,  "weakCompareAndSwapObjectRelease")                                     \
+  do_name(weakCompareAndSwapLong_name,           "weakCompareAndSwapLong")                                              \
+  do_name(weakCompareAndSwapLongAcquire_name,    "weakCompareAndSwapLongAcquire")                                       \
+  do_name(weakCompareAndSwapLongRelease_name,    "weakCompareAndSwapLongRelease")                                       \
+  do_name(weakCompareAndSwapInt_name,            "weakCompareAndSwapInt")                                               \
+  do_name(weakCompareAndSwapIntAcquire_name,     "weakCompareAndSwapIntAcquire")                                        \
+  do_name(weakCompareAndSwapIntRelease_name,     "weakCompareAndSwapIntRelease")                                        \
+                                                                                                                        \
+  do_intrinsic(_compareAndSwapObject,             jdk_internal_misc_Unsafe,  compareAndSwapObject_name,             compareAndSwapObject_signature,     F_RN) \
+  do_intrinsic(_compareAndExchangeObjectVolatile, jdk_internal_misc_Unsafe,  compareAndExchangeObjectVolatile_name, compareAndExchangeObject_signature, F_RN) \
+  do_intrinsic(_compareAndExchangeObjectAcquire,  jdk_internal_misc_Unsafe,  compareAndExchangeObjectAcquire_name,  compareAndExchangeObject_signature, F_R)  \
+  do_intrinsic(_compareAndExchangeObjectRelease,  jdk_internal_misc_Unsafe,  compareAndExchangeObjectRelease_name,  compareAndExchangeObject_signature, F_R)  \
+  do_intrinsic(_compareAndSwapLong,               jdk_internal_misc_Unsafe,  compareAndSwapLong_name,               compareAndSwapLong_signature,       F_RN) \
+  do_intrinsic(_compareAndExchangeLongVolatile,   jdk_internal_misc_Unsafe,  compareAndExchangeLongVolatile_name,   compareAndExchangeLong_signature,   F_RN) \
+  do_intrinsic(_compareAndExchangeLongAcquire,    jdk_internal_misc_Unsafe,  compareAndExchangeLongAcquire_name,    compareAndExchangeLong_signature,   F_R)  \
+  do_intrinsic(_compareAndExchangeLongRelease,    jdk_internal_misc_Unsafe,  compareAndExchangeLongRelease_name,    compareAndExchangeLong_signature,   F_R)  \
+  do_intrinsic(_compareAndSwapInt,                jdk_internal_misc_Unsafe,  compareAndSwapInt_name,                compareAndSwapInt_signature,        F_RN) \
+  do_intrinsic(_compareAndExchangeIntVolatile,    jdk_internal_misc_Unsafe,  compareAndExchangeIntVolatile_name,    compareAndExchangeInt_signature,    F_RN) \
+  do_intrinsic(_compareAndExchangeIntAcquire,     jdk_internal_misc_Unsafe,  compareAndExchangeIntAcquire_name,     compareAndExchangeInt_signature,    F_R)  \
+  do_intrinsic(_compareAndExchangeIntRelease,     jdk_internal_misc_Unsafe,  compareAndExchangeIntRelease_name,     compareAndExchangeInt_signature,    F_R)  \
+                                                                                                                                                              \
+  do_intrinsic(_weakCompareAndSwapObject,         jdk_internal_misc_Unsafe,  weakCompareAndSwapObject_name,         compareAndSwapObject_signature,     F_R) \
+  do_intrinsic(_weakCompareAndSwapObjectAcquire,  jdk_internal_misc_Unsafe,  weakCompareAndSwapObjectAcquire_name,  compareAndSwapObject_signature,     F_R) \
+  do_intrinsic(_weakCompareAndSwapObjectRelease,  jdk_internal_misc_Unsafe,  weakCompareAndSwapObjectRelease_name,  compareAndSwapObject_signature,     F_R) \
+  do_intrinsic(_weakCompareAndSwapLong,           jdk_internal_misc_Unsafe,  weakCompareAndSwapLong_name,           compareAndSwapLong_signature,       F_R) \
+  do_intrinsic(_weakCompareAndSwapLongAcquire,    jdk_internal_misc_Unsafe,  weakCompareAndSwapLongAcquire_name,    compareAndSwapLong_signature,       F_R) \
+  do_intrinsic(_weakCompareAndSwapLongRelease,    jdk_internal_misc_Unsafe,  weakCompareAndSwapLongRelease_name,    compareAndSwapLong_signature,       F_R) \
+  do_intrinsic(_weakCompareAndSwapInt,            jdk_internal_misc_Unsafe,  weakCompareAndSwapInt_name,            compareAndSwapInt_signature,        F_R) \
+  do_intrinsic(_weakCompareAndSwapIntAcquire,     jdk_internal_misc_Unsafe,  weakCompareAndSwapIntAcquire_name,     compareAndSwapInt_signature,        F_R) \
+  do_intrinsic(_weakCompareAndSwapIntRelease,     jdk_internal_misc_Unsafe,  weakCompareAndSwapIntRelease_name,     compareAndSwapInt_signature,        F_R) \
+                                                                                                                        \
+  do_intrinsic(_putOrderedObject,         jdk_internal_misc_Unsafe,        putOrderedObject_name, putOrderedObject_signature, F_RN) \
+   do_name(     putOrderedObject_name,                           "putOrderedObject")                                    \
+   do_alias(    putOrderedObject_signature,                     /*(LObject;JLObject;)V*/ putObject_signature)           \
+  do_intrinsic(_putOrderedLong,           jdk_internal_misc_Unsafe,        putOrderedLong_name, putOrderedLong_signature, F_RN)  \
+   do_name(     putOrderedLong_name,                             "putOrderedLong")                                      \
+   do_alias(    putOrderedLong_signature,                       /*(Ljava/lang/Object;JJ)V*/ putLong_signature)          \
+  do_intrinsic(_putOrderedInt,            jdk_internal_misc_Unsafe,        putOrderedInt_name, putOrderedInt_signature,   F_RN)  \
+   do_name(     putOrderedInt_name,                              "putOrderedInt")                                       \
+   do_alias(    putOrderedInt_signature,                        /*(Ljava/lang/Object;JI)V*/ putInt_signature)           \
                                                                                                                         \
   do_intrinsic(_getAndAddInt,             jdk_internal_misc_Unsafe,     getAndAddInt_name, getAndAddInt_signature, F_R)       \
    do_name(     getAndAddInt_name,                                      "getAndAddInt")                                       \
--- a/src/share/vm/code/codeCache.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/codeCache.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1045,7 +1045,7 @@
 // Keeps track of time spent for checking dependencies
 NOT_PRODUCT(static elapsedTimer dependentCheckTime;)
 
-int CodeCache::mark_for_deoptimization(DepChange& changes) {
+int CodeCache::mark_for_deoptimization(KlassDepChange& changes) {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   int number_of_marked_CodeBlobs = 0;
 
--- a/src/share/vm/code/codeCache.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/codeCache.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -72,7 +72,7 @@
 // Solaris and BSD.
 
 class OopClosure;
-class DepChange;
+class KlassDepChange;
 
 class CodeCache : AllStatic {
   friend class VMStructs;
@@ -231,7 +231,7 @@
 
   // Deoptimization
  private:
-  static int  mark_for_deoptimization(DepChange& changes);
+  static int  mark_for_deoptimization(KlassDepChange& changes);
 #ifdef HOTSWAP
   static int  mark_for_evol_deoptimization(instanceKlassHandle dependee);
 #endif // HOTSWAP
--- a/src/share/vm/code/dependencies.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/dependencies.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -664,6 +664,8 @@
   virtual bool is_klass_change()     const { return false; }
   virtual bool is_call_site_change() const { return false; }
 
+  virtual void mark_for_deoptimization(nmethod* nm) = 0;
+
   // Subclass casting with assertions.
   KlassDepChange*    as_klass_change() {
     assert(is_klass_change(), "bad cast");
@@ -753,6 +755,10 @@
   // What kind of DepChange is this?
   virtual bool is_klass_change() const { return true; }
 
+  virtual void mark_for_deoptimization(nmethod* nm) {
+    nm->mark_for_deoptimization(/*inc_recompile_counts=*/true);
+  }
+
   Klass* new_type() { return _new_type(); }
 
   // involves_context(k) is true if k is new_type or any of the super types
@@ -772,6 +778,10 @@
   // What kind of DepChange is this?
   virtual bool is_call_site_change() const { return true; }
 
+  virtual void mark_for_deoptimization(nmethod* nm) {
+    nm->mark_for_deoptimization(/*inc_recompile_counts=*/false);
+  }
+
   oop call_site()     const { return _call_site();     }
   oop method_handle() const { return _method_handle(); }
 };
--- a/src/share/vm/code/dependencyContext.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/dependencyContext.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -73,7 +73,7 @@
         nm->print();
         nm->print_dependencies();
       }
-      nm->mark_for_deoptimization();
+      changes.mark_for_deoptimization(nm);
       found++;
     }
   }
--- a/src/share/vm/code/nmethod.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/nmethod.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -536,7 +536,7 @@
   _has_method_handle_invokes  = 0;
   _lazy_critical_native       = 0;
   _has_wide_vectors           = 0;
-  _marked_for_deoptimization  = 0;
+  _mark_for_deoptimization_status = not_marked;
   _lock_count                 = 0;
   _stack_traversal_mark       = 0;
   _unload_reported            = false; // jvmti state
@@ -1458,7 +1458,7 @@
                   SharedRuntime::get_handle_wrong_method_stub());
     }
 
-    if (is_in_use()) {
+    if (is_in_use() && update_recompile_counts()) {
       // It's a true state change, so mark the method as decompiled.
       // Do it only for transition from alive.
       inc_decompile_count();
--- a/src/share/vm/code/nmethod.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/nmethod.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -107,6 +107,7 @@
 //  [Implicit Null Pointer exception table]
 //  - implicit null table array
 
+class DepChange;
 class Dependencies;
 class ExceptionHandlerTable;
 class ImplicitExceptionTable;
@@ -188,7 +189,13 @@
   bool _has_flushed_dependencies;            // Used for maintenance of dependencies (CodeCache_lock)
 
   bool _marked_for_reclamation;              // Used by NMethodSweeper (set only by sweeper)
-  bool _marked_for_deoptimization;           // Used for stack deoptimization
+
+  enum MarkForDeoptimizationStatus {
+    not_marked,
+    deoptimize,
+    deoptimize_noupdate };
+
+  MarkForDeoptimizationStatus _mark_for_deoptimization_status; // Used for stack deoptimization
 
   // used by jvmti to track if an unload event has been posted for this nmethod.
   bool _unload_reported;
@@ -462,8 +469,16 @@
   void set_unloading_clock(unsigned char unloading_clock);
   unsigned char unloading_clock();
 
-  bool  is_marked_for_deoptimization() const      { return _marked_for_deoptimization; }
-  void  mark_for_deoptimization()                 { _marked_for_deoptimization = true; }
+  bool  is_marked_for_deoptimization() const      { return _mark_for_deoptimization_status != not_marked; }
+  void  mark_for_deoptimization(bool inc_recompile_counts = true) {
+    _mark_for_deoptimization_status = (inc_recompile_counts ? deoptimize : deoptimize_noupdate);
+  }
+  bool update_recompile_counts() const {
+    // Update recompile counts when either the update is explicitly requested (deoptimize)
+    // or the nmethod is not marked for deoptimization at all (not_marked).
+    // The latter happens during uncommon traps when deoptimized nmethod is made not entrant.
+    return _mark_for_deoptimization_status != deoptimize_noupdate;
+  }
 
   void  make_unloaded(BoolObjectClosure* is_alive, oop cause);
 
--- a/src/share/vm/code/relocInfo.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/relocInfo.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -457,49 +457,6 @@
   return itr._rh;
 }
 
-int32_t Relocation::runtime_address_to_index(address runtime_address) {
-  assert(!is_reloc_index((intptr_t)runtime_address), "must not look like an index");
-
-  if (runtime_address == NULL)  return 0;
-
-  StubCodeDesc* p = StubCodeDesc::desc_for(runtime_address);
-  if (p != NULL && p->begin() == runtime_address) {
-    assert(is_reloc_index(p->index()), "there must not be too many stubs");
-    return (int32_t)p->index();
-  } else {
-    // Known "miscellaneous" non-stub pointers:
-    // os::get_polling_page(), SafepointSynchronize::address_of_state()
-    if (PrintRelocations) {
-      tty->print_cr("random unregistered address in relocInfo: " INTPTR_FORMAT, p2i(runtime_address));
-    }
-#ifndef _LP64
-    return (int32_t) (intptr_t)runtime_address;
-#else
-    // didn't fit return non-index
-    return -1;
-#endif /* _LP64 */
-  }
-}
-
-
-address Relocation::index_to_runtime_address(int32_t index) {
-  if (index == 0)  return NULL;
-
-  if (is_reloc_index(index)) {
-    StubCodeDesc* p = StubCodeDesc::desc_for_index(index);
-    assert(p != NULL, "there must be a stub for this index");
-    return p->begin();
-  } else {
-#ifndef _LP64
-    // this only works on 32bit machines
-    return (address) ((intptr_t) index);
-#else
-    fatal("Relocation::index_to_runtime_address, int32_t not pointer sized");
-    return NULL;
-#endif /* _LP64 */
-  }
-}
-
 address Relocation::old_addr_for(address newa,
                                  const CodeBuffer* src, CodeBuffer* dest) {
   int sect = dest->section_index_of(newa);
@@ -623,20 +580,13 @@
 
 void external_word_Relocation::pack_data_to(CodeSection* dest) {
   short* p = (short*) dest->locs_end();
-  int32_t index = runtime_address_to_index(_target);
 #ifndef _LP64
-  p = pack_1_int_to(p, index);
+  p = pack_1_int_to(p, (int32_t) (intptr_t)_target);
 #else
-  if (is_reloc_index(index)) {
-    p = pack_2_ints_to(p, index, 0);
-  } else {
-    jlong t = (jlong) _target;
-    int32_t lo = low(t);
-    int32_t hi = high(t);
-    p = pack_2_ints_to(p, lo, hi);
-    DEBUG_ONLY(jlong t1 = jlong_from(hi, lo));
-    assert(!is_reloc_index(t1) && (address) t1 == _target, "not symmetric");
-  }
+  jlong t = (jlong) _target;
+  int32_t lo = low(t);
+  int32_t hi = high(t);
+  p = pack_2_ints_to(p, lo, hi);
 #endif /* _LP64 */
   dest->set_locs_end((relocInfo*) p);
 }
@@ -644,16 +594,12 @@
 
 void external_word_Relocation::unpack_data() {
 #ifndef _LP64
-  _target = index_to_runtime_address(unpack_1_int());
+  _target = (address) (intptr_t)unpack_1_int();
 #else
   int32_t lo, hi;
   unpack_2_ints(lo, hi);
   jlong t = jlong_from(hi, lo);;
-  if (is_reloc_index(t)) {
-    _target = index_to_runtime_address(t);
-  } else {
-    _target = (address) t;
-  }
+  _target = (address) t;
 #endif /* _LP64 */
 }
 
--- a/src/share/vm/code/relocInfo.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/code/relocInfo.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -707,10 +707,6 @@
     assert(datalen()==0 || type()==relocInfo::none, "no data here");
   }
 
-  static bool is_reloc_index(intptr_t index) {
-    return 0 < index && index < os::vm_page_size();
-  }
-
  protected:
   // Helper functions for pack_data_to() and unpack_data().
 
@@ -806,10 +802,6 @@
     return base + byte_offset;
   }
 
-  // these convert between indexes and addresses in the runtime system
-  static int32_t runtime_address_to_index(address runtime_address);
-  static address index_to_runtime_address(int32_t index);
-
   // helpers for mapping between old and new addresses after a move or resize
   address old_addr_for(address newa, const CodeBuffer* src, CodeBuffer* dest);
   address new_addr_for(address olda, const CodeBuffer* src, CodeBuffer* dest);
@@ -1253,7 +1245,8 @@
   // Some address looking values aren't safe to treat as relocations
   // and should just be treated as constants.
   static bool can_be_relocated(address target) {
-    return target != NULL && !is_reloc_index((intptr_t)target);
+    assert(target == NULL || (uintptr_t)target >= (uintptr_t)os::vm_page_size(), INTPTR_FORMAT, (intptr_t)target);
+    return target != NULL;
   }
 
  private:
--- a/src/share/vm/compiler/compileBroker.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/compiler/compileBroker.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -469,7 +469,6 @@
 void CompileBroker::print_compile_queues(outputStream* st) {
   st->print_cr("Current compiles: ");
   MutexLocker locker(MethodCompileQueue_lock);
-  MutexLocker locker2(Threads_lock);
 
   char buf[2000];
   int buflen = sizeof(buf);
@@ -2152,18 +2151,33 @@
 
     if (CITime) {
       int bytes_compiled = method->code_size() + task->num_inlined_bytecodes();
-      JVMCI_ONLY(CompilerStatistics* stats = compiler(task->comp_level())->stats();)
       if (is_osr) {
         _t_osr_compilation.add(time);
         _sum_osr_bytes_compiled += bytes_compiled;
-        JVMCI_ONLY(stats->_osr.update(time, bytes_compiled);)
       } else {
         _t_standard_compilation.add(time);
         _sum_standard_bytes_compiled += method->code_size() + task->num_inlined_bytecodes();
-        JVMCI_ONLY(stats->_standard.update(time, bytes_compiled);)
       }
-      JVMCI_ONLY(stats->_nmethods_size += code->total_size();)
-      JVMCI_ONLY(stats->_nmethods_code_size += code->insts_size();)
+
+#if INCLUDE_JVMCI
+      AbstractCompiler* comp = compiler(task->comp_level());
+      if (comp) {
+        CompilerStatistics* stats = comp->stats();
+        if (stats) {
+          if (is_osr) {
+            stats->_osr.update(time, bytes_compiled);
+          } else {
+            stats->_standard.update(time, bytes_compiled);
+          }
+          stats->_nmethods_size += code->total_size();
+          stats->_nmethods_code_size += code->insts_size();
+        } else { // if (!stats)
+          assert(false, "Compiler statistics object must exist");
+        }
+      } else { // if (!comp)
+        assert(false, "Compiler object must exist");
+      }
+#endif // INCLUDE_JVMCI
     }
 
     if (UsePerfData) {
@@ -2222,11 +2236,15 @@
 #if INCLUDE_JVMCI
 void CompileBroker::print_times(AbstractCompiler* comp) {
   CompilerStatistics* stats = comp->stats();
-  tty->print_cr("  %s {speed: %d bytes/s; standard: %6.3f s, %d bytes, %d methods; osr: %6.3f s, %d bytes, %d methods; nmethods_size: %d bytes; nmethods_code_size: %d bytes}",
+  if (stats) {
+    tty->print_cr("  %s {speed: %d bytes/s; standard: %6.3f s, %d bytes, %d methods; osr: %6.3f s, %d bytes, %d methods; nmethods_size: %d bytes; nmethods_code_size: %d bytes}",
                 comp->name(), stats->bytes_per_second(),
                 stats->_standard._time.seconds(), stats->_standard._bytes, stats->_standard._count,
                 stats->_osr._time.seconds(), stats->_osr._bytes, stats->_osr._count,
                 stats->_nmethods_size, stats->_nmethods_code_size);
+  } else { // if (!stats)
+    assert(false, "Compiler statistics object must exist");
+  }
   comp->print_timers();
 }
 #endif // INCLUDE_JVMCI
@@ -2260,17 +2278,21 @@
       }
       CompilerStatistics* stats = comp->stats();
 
-      standard_compilation.add(stats->_standard._time);
-      osr_compilation.add(stats->_osr._time);
+      if (stats) {
+        standard_compilation.add(stats->_standard._time);
+        osr_compilation.add(stats->_osr._time);
 
-      standard_bytes_compiled += stats->_standard._bytes;
-      osr_bytes_compiled += stats->_osr._bytes;
+        standard_bytes_compiled += stats->_standard._bytes;
+        osr_bytes_compiled += stats->_osr._bytes;
 
-      standard_compile_count += stats->_standard._count;
-      osr_compile_count += stats->_osr._count;
+        standard_compile_count += stats->_standard._count;
+        osr_compile_count += stats->_osr._count;
 
-      nmethods_size += stats->_nmethods_size;
-      nmethods_code_size += stats->_nmethods_code_size;
+        nmethods_size += stats->_nmethods_size;
+        nmethods_code_size += stats->_nmethods_code_size;
+      } else { // if (!stats)
+        assert(false, "Compiler statistics object must exist");
+      }
 
       if (per_compiler) {
         print_times(comp);
--- a/src/share/vm/jvmci/commandLineFlagConstraintsJVMCI.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "jvmci/commandLineFlagConstraintsJVMCI.hpp"
-#include "runtime/arguments.hpp"
-#include "runtime/globals.hpp"
-#include "utilities/defaultStream.hpp"
-
-Flag::Error EnableJVMCIMustBeEnabledConstraintFunc(bool value, bool verbose) {
-  if (!EnableJVMCI) {
-    if (verbose == true) {
-      jio_fprintf(defaultStream::error_stream(), "EnableJVMCI must be enabled\n");
-    }
-    return Flag::VIOLATES_CONSTRAINT;
-  } else {
-    return Flag::SUCCESS;
-  }
-}
-
-Flag::Error EnableJVMCIMustBeEnabledConstraintFunc(intx value, bool verbose) {
-  if (!EnableJVMCI) {
-    if (verbose == true) {
-      jio_fprintf(defaultStream::error_stream(), "EnableJVMCI must be enabled\n");
-    }
-    return Flag::VIOLATES_CONSTRAINT;
-  } else {
-    return Flag::SUCCESS;
-  }
-}
--- a/src/share/vm/jvmci/commandLineFlagConstraintsJVMCI.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_JVMCI_COMMANDLINEFLAGCONSTRAINTSJVMCI_HPP
-#define SHARE_VM_JVMCI_COMMANDLINEFLAGCONSTRAINTSJVMCI_HPP
-
-#include "runtime/globals.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-/*
- * Here we have JVMCI arguments constraints functions, which are called automatically
- * whenever flag's value changes. If the constraint fails the function should return
- * an appropriate error value.
- */
-
-Flag::Error EnableJVMCIMustBeEnabledConstraintFunc(bool value, bool verbose);
-Flag::Error EnableJVMCIMustBeEnabledConstraintFunc(intx value, bool verbose);
-
-#endif /* SHARE_VM_JVMCI_COMMANDLINEFLAGCONSTRAINTSJVMCI_HPP */
--- a/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/jvmci/jvmciCodeInstaller.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -551,6 +551,14 @@
                                        compiler, _debug_recorder, _dependencies, env, id,
                                        has_unsafe_access, _has_wide_vector, installed_code, compiled_code, speculation_log);
     cb = nm;
+    if (nm != NULL && env == NULL) {
+      DirectiveSet* directive = DirectivesStack::getMatchingDirective(method, compiler);
+      bool printnmethods = directive->PrintAssemblyOption || directive->PrintNMethodsOption;
+      if (printnmethods || PrintDebugInfo || PrintRelocations || PrintDependencies || PrintExceptionHandlers) {
+        nm->print_nmethod(printnmethods);
+      }
+      DirectivesStack::release(directive);
+    }
   }
 
   if (cb != NULL) {
--- a/src/share/vm/jvmci/jvmciRuntime.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/jvmci/jvmciRuntime.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -293,13 +293,11 @@
     // tracing
     if (log_is_enabled(Info, exceptions)) {
       ResourceMark rm;
-      log_info(exceptions)("Exception <%s> (" INTPTR_FORMAT ") thrown in"
-                           " compiled method <%s> at PC " INTPTR_FORMAT
-                           " for thread " INTPTR_FORMAT,
-                           exception->print_value_string(),
-                           p2i((address)exception()),
-                           nm->method()->print_value_string(), p2i(pc),
-                           p2i(thread));
+      stringStream tempst;
+      tempst.print("compiled method <%s>\n"
+                   " at PC" INTPTR_FORMAT " for thread " INTPTR_FORMAT,
+                   nm->method()->print_value_string(), p2i(pc), p2i(thread));
+      Exceptions::log_exception(exception, tempst);
     }
     // for AbortVMOnException flag
     NOT_PRODUCT(Exceptions::debug_check_abort(exception));
--- a/src/share/vm/jvmci/jvmci_globals.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/jvmci/jvmci_globals.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -24,6 +24,8 @@
 
 #include "precompiled.hpp"
 #include "jvmci/jvmci_globals.hpp"
+#include "utilities/defaultStream.hpp"
+#include "runtime/globals_extension.hpp"
 
 JVMCI_FLAGS(MATERIALIZE_DEVELOPER_FLAG, \
             MATERIALIZE_PD_DEVELOPER_FLAG, \
@@ -34,3 +36,185 @@
             MATERIALIZE_NOTPRODUCT_FLAG,
             IGNORE_RANGE, \
             IGNORE_CONSTRAINT)
+
+#define JVMCI_IGNORE_FLAG_FOUR_PARAM(type, name, value, doc)
+#define JVMCI_IGNORE_FLAG_THREE_PARAM(type, name, doc)
+
+// Return true if jvmci flags are consistent.
+bool JVMCIGlobals::check_jvmci_flags_are_consistent() {
+  if (EnableJVMCI) {
+    return true;
+  }
+
+  // "FLAG_IS_DEFAULT" fail count.
+  int fail_count = 0;
+  // Number of "FLAG_IS_DEFAULT" fails that should be skipped before code
+  // detect real consistency failure.
+  int skip_fail_count;
+
+  // EnableJVMCI flag is false here.
+  // If any other flag is changed, consistency check should fail.
+  // JVMCI_FLAGS macros added below can handle all JVMCI flags automatically.
+  // But it contains check for EnableJVMCI flag too, which is required to be
+  // skipped. This can't be handled easily!
+  // So the code looks for at-least two flag changes to detect consistency
+  // failure when EnableJVMCI flag is changed.
+  // Otherwise one flag change is sufficient to detect consistency failure.
+  // Set skip_fail_count to 0 if EnableJVMCI flag is default.
+  // Set skip_fail_count to 1 if EnableJVMCI flag is changed.
+  // This value will be used to skip fails in macro expanded code later.
+  if (!FLAG_IS_DEFAULT(EnableJVMCI)) {
+    skip_fail_count = 1;
+  } else {
+    skip_fail_count = 0;
+  }
+
+#define EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(FLAG)  \
+  if (!FLAG_IS_DEFAULT(FLAG)) {                   \
+    fail_count++;                                 \
+    if (fail_count > skip_fail_count) {           \
+      return false;                               \
+    }                                             \
+  }
+
+#define JVMCI_DIAGNOSTIC_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, value, doc)     EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(name)
+#define JVMCI_EXPERIMENTAL_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, value, doc)   EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(name)
+
+  // Check consistency of diagnostic flags if UnlockDiagnosticVMOptions is true
+  // or not default. UnlockDiagnosticVMOptions is default true in debug builds.
+  if (UnlockDiagnosticVMOptions || !FLAG_IS_DEFAULT(UnlockDiagnosticVMOptions)) {
+    JVMCI_FLAGS(JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_DIAGNOSTIC_FLAG_VALUE_CHANGED_CHECK_CODE, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                IGNORE_RANGE, \
+                IGNORE_CONSTRAINT)
+  }
+
+  // Check consistency of experimental flags if UnlockExperimentalVMOptions is
+  // true or not default.
+  if (UnlockExperimentalVMOptions || !FLAG_IS_DEFAULT(UnlockExperimentalVMOptions)) {
+    JVMCI_FLAGS(JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_EXPERIMENTAL_FLAG_VALUE_CHANGED_CHECK_CODE, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                IGNORE_RANGE, \
+                IGNORE_CONSTRAINT)
+  }
+
+#ifndef PRODUCT
+#define JVMCI_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, value, doc)        EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(name)
+#define JVMCI_PD_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, doc)            EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(name)
+#define JVMCI_NOTPRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, value, doc)     EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(name)
+#else
+#define JVMCI_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, value, doc)
+#define JVMCI_PD_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, doc)
+#define JVMCI_NOTPRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, value, doc)
+#endif
+
+#define JVMCI_PD_PRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, doc)            EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(name)
+#define JVMCI_PRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE(type, name, value, doc)        EMIT_FLAG_VALUE_CHANGED_CHECK_CODE(name)
+
+  JVMCI_FLAGS(JVMCI_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE, \
+              JVMCI_PD_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE, \
+              JVMCI_PRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE, \
+              JVMCI_PD_PRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE, \
+              JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+              JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+              JVMCI_NOTPRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE, \
+              IGNORE_RANGE, \
+              IGNORE_CONSTRAINT)
+
+#undef EMIT_FLAG_VALUE_CHANGED_CHECK_CODE
+#undef JVMCI_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE
+#undef JVMCI_PD_DEVELOP_FLAG_VALUE_CHANGED_CHECK_CODE
+#undef JVMCI_NOTPRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE
+#undef JVMCI_DIAGNOSTIC_FLAG_VALUE_CHANGED_CHECK_CODE
+#undef JVMCI_PD_PRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE
+#undef JVMCI_PRODUCT_FLAG_VALUE_CHANGED_CHECK_CODE
+#undef JVMCI_EXPERIMENTAL_FLAG_VALUE_CHANGED_CHECK_CODE
+
+  return true;
+}
+
+// Print jvmci arguments inconsistency error message.
+void JVMCIGlobals::print_jvmci_args_inconsistency_error_message() {
+  const char* error_msg = "Improperly specified VM option '%s'\n";
+  jio_fprintf(defaultStream::error_stream(), "EnableJVMCI must be enabled\n");
+
+#define EMIT_CHECK_PRINT_ERR_MSG_CODE(FLAG)                         \
+  if (!FLAG_IS_DEFAULT(FLAG)) {                                     \
+    if (strcmp(#FLAG, "EnableJVMCI")) {                             \
+      jio_fprintf(defaultStream::error_stream(), error_msg, #FLAG); \
+    }                                                               \
+  }
+
+#define JVMCI_DIAGNOSTIC_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, value, doc)     EMIT_CHECK_PRINT_ERR_MSG_CODE(name)
+#define JVMCI_EXPERIMENTAL_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, value, doc)   EMIT_CHECK_PRINT_ERR_MSG_CODE(name)
+
+  if (UnlockDiagnosticVMOptions || !FLAG_IS_DEFAULT(UnlockDiagnosticVMOptions)) {
+    JVMCI_FLAGS(JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_DIAGNOSTIC_FLAG_CHECK_PRINT_ERR_MSG_CODE, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                IGNORE_RANGE, \
+                IGNORE_CONSTRAINT)
+  }
+
+  if (UnlockExperimentalVMOptions || !FLAG_IS_DEFAULT(UnlockExperimentalVMOptions)) {
+    JVMCI_FLAGS(JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_IGNORE_FLAG_THREE_PARAM, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                JVMCI_EXPERIMENTAL_FLAG_CHECK_PRINT_ERR_MSG_CODE, \
+                JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+                IGNORE_RANGE, \
+                IGNORE_CONSTRAINT)
+  }
+
+#ifndef PRODUCT
+#define JVMCI_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, value, doc)        EMIT_CHECK_PRINT_ERR_MSG_CODE(name)
+#define JVMCI_PD_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, doc)            EMIT_CHECK_PRINT_ERR_MSG_CODE(name)
+#define JVMCI_NOTPRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, value, doc)     EMIT_CHECK_PRINT_ERR_MSG_CODE(name)
+#else
+#define JVMCI_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, value, doc)
+#define JVMCI_PD_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, doc)
+#define JVMCI_NOTPRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, value, doc)
+#endif
+
+#define JVMCI_PD_PRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, doc)            EMIT_CHECK_PRINT_ERR_MSG_CODE(name)
+#define JVMCI_PRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE(type, name, value, doc)        EMIT_CHECK_PRINT_ERR_MSG_CODE(name)
+
+  JVMCI_FLAGS(JVMCI_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE, \
+              JVMCI_PD_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE, \
+              JVMCI_PRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE, \
+              JVMCI_PD_PRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE, \
+              JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+              JVMCI_IGNORE_FLAG_FOUR_PARAM, \
+              JVMCI_NOTPRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE, \
+              IGNORE_RANGE, \
+              IGNORE_CONSTRAINT)
+
+#undef EMIT_CHECK_PRINT_ERR_MSG_CODE
+#undef JVMCI_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE
+#undef JVMCI_PD_DEVELOP_FLAG_CHECK_PRINT_ERR_MSG_CODE
+#undef JVMCI_NOTPRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE
+#undef JVMCI_PD_PRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE
+#undef JVMCI_PRODUCT_FLAG_CHECK_PRINT_ERR_MSG_CODE
+#undef JVMCI_DIAGNOSTIC_FLAG_CHECK_PRINT_ERR_MSG_CODE
+#undef JVMCI_EXPERIMENTAL_FLAG_CHECK_PRINT_ERR_MSG_CODE
+
+}
+
+#undef JVMCI_IGNORE_FLAG_FOUR_PARAM
+#undef JVMCI_IGNORE_FLAG_THREE_PARAM
--- a/src/share/vm/jvmci/jvmci_globals.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/jvmci/jvmci_globals.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -39,29 +39,23 @@
                                                                             \
   experimental(bool, UseJVMCICompiler, false,                               \
           "Use JVMCI as the default compiler")                              \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(bool, BootstrapJVMCI, false,                                 \
           "Bootstrap JVMCI before running Java main method")                \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(bool, PrintBootstrap, true,                                  \
           "Print JVMCI bootstrap progress and summary")                     \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(intx, JVMCIThreads, 1,                                       \
           "Force number of JVMCI compiler threads to use")                  \
           range(1, max_jint)                                                \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(intx, JVMCIHostThreads, 1,                                   \
           "Force number of compiler threads for JVMCI host compiler")       \
           range(1, max_jint)                                                \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(bool, CodeInstallSafepointChecks, true,                      \
           "Perform explicit safepoint checks while installing code")        \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   NOT_COMPILER2(product(intx, MaxVectorSize, 64,                            \
           "Max vector size in bytes, "                                      \
@@ -74,28 +68,22 @@
           "Trace level for JVMCI: "                                         \
           "1 means emit a message for each CompilerToVM call,"              \
           "levels greater than 1 provide progressively greater detail")     \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(intx, JVMCICounterSize, 0,                                   \
           "Reserved size for benchmark counters")                           \
           range(0, max_jint)                                                \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(bool, JVMCICountersExcludeCompiler, true,                    \
           "Exclude JVMCI compiler threads from benchmark counters")         \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   develop(bool, JVMCIUseFastLocking, true,                                  \
           "Use fast inlined locking code")                                  \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   experimental(intx, JVMCINMethodSizeLimit, (80*K)*wordSize,                \
           "Maximum size of a compiled method.")                             \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
                                                                             \
   develop(bool, TraceUncollectedSpeculations, false,                        \
-          "Print message when a failed speculation was not collected")      \
-          constraint(EnableJVMCIMustBeEnabledConstraintFunc,AtParse)        \
+          "Print message when a failed speculation was not collected")
 
 
 // Read default values for JVMCI globals
@@ -110,4 +98,11 @@
             IGNORE_RANGE, \
             IGNORE_CONSTRAINT)
 
+class JVMCIGlobals {
+ public:
+  // Return true if jvmci flags are consistent.
+  static bool check_jvmci_flags_are_consistent();
+  // Print jvmci arguments inconsistency error message.
+  static void print_jvmci_args_inconsistency_error_message();
+};
 #endif // SHARE_VM_JVMCI_JVMCIGLOBALS_HPP
--- a/src/share/vm/jvmci/vmStructs_jvmci.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/jvmci/vmStructs_jvmci.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -592,6 +592,14 @@
 #endif // TARGET_OS_FAMILY_bsd
 
 
+#ifdef TARGET_ARCH_aarch64
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
+
+#endif // TARGET_ARCH_aarch64
+
+
 #ifdef TARGET_ARCH_x86
 
 #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
--- a/src/share/vm/oops/instanceKlass.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/oops/instanceKlass.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1879,7 +1879,7 @@
   return dep_context;
 }
 
-int InstanceKlass::mark_dependent_nmethods(DepChange& changes) {
+int InstanceKlass::mark_dependent_nmethods(KlassDepChange& changes) {
   return dependencies().mark_dependent_nmethods(changes);
 }
 
--- a/src/share/vm/oops/instanceKlass.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/oops/instanceKlass.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -56,7 +56,7 @@
 // forward declaration for class -- see below for definition
 class BreakpointInfo;
 class ClassFileParser;
-class DepChange;
+class KlassDepChange;
 class DependencyContext;
 class fieldDescriptor;
 class jniIdMapBase;
@@ -821,7 +821,7 @@
 
   // maintenance of deoptimization dependencies
   inline DependencyContext dependencies();
-  int  mark_dependent_nmethods(DepChange& changes);
+  int  mark_dependent_nmethods(KlassDepChange& changes);
   void add_dependent_nmethod(nmethod* nm);
   void remove_dependent_nmethod(nmethod* nm, bool delete_immediately);
 
--- a/src/share/vm/opto/c2compiler.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/c2compiler.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -243,14 +243,72 @@
   case vmIntrinsics::_reverseBytes_l:
     if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return false;
     break;
+
+  /* CompareAndSwap, Object: */
   case vmIntrinsics::_compareAndSwapObject:
 #ifdef _LP64
+    if ( UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapN)) return false;
     if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return false;
+#else
+    if (!Matcher::match_rule_supported(Op_CompareAndSwapP)) return false;
 #endif
     break;
+  case vmIntrinsics::_weakCompareAndSwapObject:
+  case vmIntrinsics::_weakCompareAndSwapObjectAcquire:
+  case vmIntrinsics::_weakCompareAndSwapObjectRelease:
+#ifdef _LP64
+    if ( UseCompressedOops && !Matcher::match_rule_supported(Op_WeakCompareAndSwapN)) return false;
+    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_WeakCompareAndSwapP)) return false;
+#else
+    if (!Matcher::match_rule_supported(Op_WeakCompareAndSwapP)) return false;
+#endif
+    break;
+  /* CompareAndSwap, Long: */
   case vmIntrinsics::_compareAndSwapLong:
     if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return false;
     break;
+  case vmIntrinsics::_weakCompareAndSwapLong:
+  case vmIntrinsics::_weakCompareAndSwapLongAcquire:
+  case vmIntrinsics::_weakCompareAndSwapLongRelease:
+    if (!Matcher::match_rule_supported(Op_WeakCompareAndSwapL)) return false;
+    break;
+
+  /* CompareAndSwap, Int: */
+  case vmIntrinsics::_compareAndSwapInt:
+    if (!Matcher::match_rule_supported(Op_CompareAndSwapI)) return false;
+    break;
+  case vmIntrinsics::_weakCompareAndSwapInt:
+  case vmIntrinsics::_weakCompareAndSwapIntAcquire:
+  case vmIntrinsics::_weakCompareAndSwapIntRelease:
+    if (!Matcher::match_rule_supported(Op_WeakCompareAndSwapL)) return false;
+    break;
+
+  /* CompareAndExchange, Object: */
+  case vmIntrinsics::_compareAndExchangeObjectVolatile:
+  case vmIntrinsics::_compareAndExchangeObjectAcquire:
+  case vmIntrinsics::_compareAndExchangeObjectRelease:
+#ifdef _LP64
+    if ( UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndExchangeN)) return false;
+    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndExchangeP)) return false;
+#else
+    if (!Matcher::match_rule_supported(Op_CompareAndExchangeP)) return false;
+#endif
+    break;
+
+  /* CompareAndExchange, Long: */
+  case vmIntrinsics::_compareAndExchangeLongVolatile:
+  case vmIntrinsics::_compareAndExchangeLongAcquire:
+  case vmIntrinsics::_compareAndExchangeLongRelease:
+    if (!Matcher::match_rule_supported(Op_CompareAndExchangeL)) return false;
+    break;
+
+  /* CompareAndExchange, Int: */
+  case vmIntrinsics::_compareAndExchangeIntVolatile:
+  case vmIntrinsics::_compareAndExchangeIntAcquire:
+  case vmIntrinsics::_compareAndExchangeIntRelease:
+    if (!Matcher::match_rule_supported(Op_CompareAndExchangeI)) return false;
+    break;
+
   case vmIntrinsics::_getAndAddInt:
     if (!Matcher::match_rule_supported(Op_GetAndAddI)) return false;
     break;
@@ -382,6 +440,42 @@
   case vmIntrinsics::_putLongVolatile:
   case vmIntrinsics::_putFloatVolatile:
   case vmIntrinsics::_putDoubleVolatile:
+  case vmIntrinsics::_getObjectAcquire:
+  case vmIntrinsics::_getBooleanAcquire:
+  case vmIntrinsics::_getByteAcquire:
+  case vmIntrinsics::_getShortAcquire:
+  case vmIntrinsics::_getCharAcquire:
+  case vmIntrinsics::_getIntAcquire:
+  case vmIntrinsics::_getLongAcquire:
+  case vmIntrinsics::_getFloatAcquire:
+  case vmIntrinsics::_getDoubleAcquire:
+  case vmIntrinsics::_putObjectRelease:
+  case vmIntrinsics::_putBooleanRelease:
+  case vmIntrinsics::_putByteRelease:
+  case vmIntrinsics::_putShortRelease:
+  case vmIntrinsics::_putCharRelease:
+  case vmIntrinsics::_putIntRelease:
+  case vmIntrinsics::_putLongRelease:
+  case vmIntrinsics::_putFloatRelease:
+  case vmIntrinsics::_putDoubleRelease:
+  case vmIntrinsics::_getObjectOpaque:
+  case vmIntrinsics::_getBooleanOpaque:
+  case vmIntrinsics::_getByteOpaque:
+  case vmIntrinsics::_getShortOpaque:
+  case vmIntrinsics::_getCharOpaque:
+  case vmIntrinsics::_getIntOpaque:
+  case vmIntrinsics::_getLongOpaque:
+  case vmIntrinsics::_getFloatOpaque:
+  case vmIntrinsics::_getDoubleOpaque:
+  case vmIntrinsics::_putObjectOpaque:
+  case vmIntrinsics::_putBooleanOpaque:
+  case vmIntrinsics::_putByteOpaque:
+  case vmIntrinsics::_putShortOpaque:
+  case vmIntrinsics::_putCharOpaque:
+  case vmIntrinsics::_putIntOpaque:
+  case vmIntrinsics::_putLongOpaque:
+  case vmIntrinsics::_putFloatOpaque:
+  case vmIntrinsics::_putDoubleOpaque:
   case vmIntrinsics::_getShortUnaligned:
   case vmIntrinsics::_getCharUnaligned:
   case vmIntrinsics::_getIntUnaligned:
@@ -390,7 +484,6 @@
   case vmIntrinsics::_putCharUnaligned:
   case vmIntrinsics::_putIntUnaligned:
   case vmIntrinsics::_putLongUnaligned:
-  case vmIntrinsics::_compareAndSwapInt:
   case vmIntrinsics::_putOrderedObject:
   case vmIntrinsics::_putOrderedInt:
   case vmIntrinsics::_putOrderedLong:
--- a/src/share/vm/opto/classes.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/classes.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -85,6 +85,14 @@
 macro(CompareAndSwapL)
 macro(CompareAndSwapP)
 macro(CompareAndSwapN)
+macro(WeakCompareAndSwapI)
+macro(WeakCompareAndSwapL)
+macro(WeakCompareAndSwapP)
+macro(WeakCompareAndSwapN)
+macro(CompareAndExchangeI)
+macro(CompareAndExchangeL)
+macro(CompareAndExchangeP)
+macro(CompareAndExchangeN)
 macro(GetAndAddI)
 macro(GetAndAddL)
 macro(GetAndSetI)
--- a/src/share/vm/opto/compile.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/compile.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -88,7 +88,27 @@
 
 // Return the index at which m must be inserted (or already exists).
 // The sort order is by the address of the ciMethod, with is_virtual as minor key.
-int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
+class IntrinsicDescPair {
+ private:
+  ciMethod* _m;
+  bool _is_virtual;
+ public:
+  IntrinsicDescPair(ciMethod* m, bool is_virtual) : _m(m), _is_virtual(is_virtual) {}
+  static int compare(IntrinsicDescPair* const& key, CallGenerator* const& elt) {
+    ciMethod* m= elt->method();
+    ciMethod* key_m = key->_m;
+    if (key_m < m)      return -1;
+    else if (key_m > m) return 1;
+    else {
+      bool is_virtual = elt->is_virtual();
+      bool key_virtual = key->_is_virtual;
+      if (key_virtual < is_virtual)      return -1;
+      else if (key_virtual > is_virtual) return 1;
+      else                               return 0;
+    }
+  }
+};
+int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual, bool& found) {
 #ifdef ASSERT
   for (int i = 1; i < _intrinsics->length(); i++) {
     CallGenerator* cg1 = _intrinsics->at(i-1);
@@ -99,63 +119,28 @@
            "compiler intrinsics list must stay sorted");
   }
 #endif
-  // Binary search sorted list, in decreasing intervals [lo, hi].
-  int lo = 0, hi = _intrinsics->length()-1;
-  while (lo <= hi) {
-    int mid = (uint)(hi + lo) / 2;
-    ciMethod* mid_m = _intrinsics->at(mid)->method();
-    if (m < mid_m) {
-      hi = mid-1;
-    } else if (m > mid_m) {
-      lo = mid+1;
-    } else {
-      // look at minor sort key
-      bool mid_virt = _intrinsics->at(mid)->is_virtual();
-      if (is_virtual < mid_virt) {
-        hi = mid-1;
-      } else if (is_virtual > mid_virt) {
-        lo = mid+1;
-      } else {
-        return mid;  // exact match
-      }
-    }
-  }
-  return lo;  // inexact match
+  IntrinsicDescPair pair(m, is_virtual);
+  return _intrinsics->find_sorted<IntrinsicDescPair*, IntrinsicDescPair::compare>(&pair, found);
 }
 
 void Compile::register_intrinsic(CallGenerator* cg) {
   if (_intrinsics == NULL) {
     _intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);
   }
-  // This code is stolen from ciObjectFactory::insert.
-  // Really, GrowableArray should have methods for
-  // insert_at, remove_at, and binary_search.
   int len = _intrinsics->length();
-  int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
-  if (index == len) {
-    _intrinsics->append(cg);
-  } else {
-#ifdef ASSERT
-    CallGenerator* oldcg = _intrinsics->at(index);
-    assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
-#endif
-    _intrinsics->append(_intrinsics->at(len-1));
-    int pos;
-    for (pos = len-2; pos >= index; pos--) {
-      _intrinsics->at_put(pos+1,_intrinsics->at(pos));
-    }
-    _intrinsics->at_put(index, cg);
-  }
+  bool found = false;
+  int index = intrinsic_insertion_index(cg->method(), cg->is_virtual(), found);
+  assert(!found, "registering twice");
+  _intrinsics->insert_before(index, cg);
   assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
 }
 
 CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
   assert(m->is_loaded(), "don't try this on unloaded methods");
   if (_intrinsics != NULL) {
-    int index = intrinsic_insertion_index(m, is_virtual);
-    if (index < _intrinsics->length()
-        && _intrinsics->at(index)->method() == m
-        && _intrinsics->at(index)->is_virtual() == is_virtual) {
+    bool found = false;
+    int index = intrinsic_insertion_index(m, is_virtual, found);
+     if (found) {
       return _intrinsics->at(index);
     }
   }
@@ -2801,6 +2786,14 @@
   case Op_CompareAndSwapL:
   case Op_CompareAndSwapP:
   case Op_CompareAndSwapN:
+  case Op_WeakCompareAndSwapI:
+  case Op_WeakCompareAndSwapL:
+  case Op_WeakCompareAndSwapP:
+  case Op_WeakCompareAndSwapN:
+  case Op_CompareAndExchangeI:
+  case Op_CompareAndExchangeL:
+  case Op_CompareAndExchangeP:
+  case Op_CompareAndExchangeN:
   case Op_GetAndAddI:
   case Op_GetAndAddL:
   case Op_GetAndSetI:
--- a/src/share/vm/opto/compile.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/compile.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1250,7 +1250,7 @@
   // Intrinsic setup.
   void           register_library_intrinsics();                            // initializer
   CallGenerator* make_vm_intrinsic(ciMethod* m, bool is_virtual);          // constructor
-  int            intrinsic_insertion_index(ciMethod* m, bool is_virtual);  // helper
+  int            intrinsic_insertion_index(ciMethod* m, bool is_virtual, bool& found);  // helper
   CallGenerator* find_intrinsic(ciMethod* m, bool is_virtual);             // query fn
   void           register_intrinsic(CallGenerator* cg);                    // update fn
 
--- a/src/share/vm/opto/escape.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/escape.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -490,6 +490,8 @@
       }
       break;
     }
+    case Op_CompareAndExchangeP:
+    case Op_CompareAndExchangeN:
     case Op_GetAndSetP:
     case Op_GetAndSetN: {
       add_objload_to_connection_graph(n, delayed_worklist);
@@ -499,6 +501,8 @@
     case Op_StoreN:
     case Op_StoreNKlass:
     case Op_StorePConditional:
+    case Op_WeakCompareAndSwapP:
+    case Op_WeakCompareAndSwapN:
     case Op_CompareAndSwapP:
     case Op_CompareAndSwapN: {
       Node* adr = n->in(MemNode::Address);
@@ -698,8 +702,12 @@
     case Op_StoreN:
     case Op_StoreNKlass:
     case Op_StorePConditional:
+    case Op_CompareAndExchangeP:
+    case Op_CompareAndExchangeN:
     case Op_CompareAndSwapP:
     case Op_CompareAndSwapN:
+    case Op_WeakCompareAndSwapP:
+    case Op_WeakCompareAndSwapN:
     case Op_GetAndSetP:
     case Op_GetAndSetN: {
       Node* adr = n->in(MemNode::Address);
--- a/src/share/vm/opto/graphKit.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/graphKit.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1179,8 +1179,10 @@
 // Helper function to do a NULL pointer check.  Returned value is
 // the incoming address with NULL casted away.  You are allowed to use the
 // not-null value only if you are control dependent on the test.
+#ifndef PRODUCT
 extern int explicit_null_checks_inserted,
            explicit_null_checks_elided;
+#endif
 Node* GraphKit::null_check_common(Node* value, BasicType type,
                                   // optional arguments for variations:
                                   bool assert_null,
@@ -1193,7 +1195,7 @@
     value = cast_not_null(value);   // Make it appear to be non-null (4962416).
     return value;
   }
-  explicit_null_checks_inserted++;
+  NOT_PRODUCT(explicit_null_checks_inserted++);
 
   // Construct NULL check
   Node *chk = NULL;
@@ -1233,7 +1235,7 @@
         // See if the type is contained in NULL_PTR.
         // If so, then the value is already null.
         if (t->higher_equal(TypePtr::NULL_PTR)) {
-          explicit_null_checks_elided++;
+          NOT_PRODUCT(explicit_null_checks_elided++);
           return value;           // Elided null assert quickly!
         }
       } else {
@@ -1242,7 +1244,7 @@
         // type.  In other words, "value" was not-null.
         if (t->meet(TypePtr::NULL_PTR) != t->remove_speculative()) {
           // same as: if (!TypePtr::NULL_PTR->higher_equal(t)) ...
-          explicit_null_checks_elided++;
+          NOT_PRODUCT(explicit_null_checks_elided++);
           return value;           // Elided null check quickly!
         }
       }
@@ -1282,7 +1284,7 @@
         set_control(cfg);
         Node *res = cast_not_null(value);
         set_control(oldcontrol);
-        explicit_null_checks_elided++;
+        NOT_PRODUCT(explicit_null_checks_elided++);
         return res;
       }
       cfg = IfNode::up_one_dom(cfg, /*linear_only=*/ true);
@@ -1326,15 +1328,18 @@
     IfNode* iff = create_and_map_if(control(), tst, ok_prob, COUNT_UNKNOWN);
     Node* null_true = _gvn.transform( new IfFalseNode(iff));
     set_control(      _gvn.transform( new IfTrueNode(iff)));
-    if (null_true == top())
+#ifndef PRODUCT
+    if (null_true == top()) {
       explicit_null_checks_elided++;
+    }
+#endif
     (*null_control) = null_true;
   } else {
     BuildCutout unless(this, tst, ok_prob);
     // Check for optimizer eliding test at parse time
     if (stopped()) {
       // Failure not possible; do not bother making uncommon trap.
-      explicit_null_checks_elided++;
+      NOT_PRODUCT(explicit_null_checks_elided++);
     } else if (assert_null) {
       uncommon_trap(reason,
                     Deoptimization::Action_make_not_entrant,
@@ -3149,6 +3154,19 @@
   return membar;
 }
 
+void GraphKit::insert_store_load_for_barrier() {
+  Node* mem = reset_memory();
+  MemBarNode* mb = MemBarNode::make(C, Op_MemBarVolatile, Compile::AliasIdxBot);
+  mb->init_req(TypeFunc::Control, control());
+  mb->init_req(TypeFunc::Memory, mem);
+  Node* membar = _gvn.transform(mb);
+  set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control)));
+  Node* newmem = _gvn.transform(new ProjNode(membar, TypeFunc::Memory));
+  set_all_memory(mem);
+  set_memory(newmem, Compile::AliasIdxRaw);
+}
+
+
 //------------------------------shared_lock------------------------------------
 // Emit locking code.
 FastLockNode* GraphKit::shared_lock(Node* obj) {
@@ -3840,7 +3858,7 @@
   BasicType bt = T_BYTE;
 
   if (UseConcMarkSweepGC && UseCondCardMark) {
-    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
+    insert_store_load_for_barrier();
     __ sync_kit(this);
   }
 
@@ -4280,8 +4298,7 @@
 
         __ if_then(card_val, BoolTest::ne, young_card); {
           sync_kit(ideal);
-          // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier.
-          insert_mem_bar(Op_MemBarVolatile, oop_store);
+          insert_store_load_for_barrier();
           __ sync_kit(this);
 
           Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
--- a/src/share/vm/opto/graphKit.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/graphKit.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -834,6 +834,7 @@
   int next_monitor();
   Node* insert_mem_bar(int opcode, Node* precedent = NULL);
   Node* insert_mem_bar_volatile(int opcode, int alias_idx, Node* precedent = NULL);
+  void insert_store_load_for_barrier();
   // Optional 'precedent' is appended as an extra edge, to force ordering.
   FastLockNode* shared_lock(Node* obj);
   void shared_unlock(Node* box, Node* obj);
--- a/src/share/vm/opto/ifnode.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/ifnode.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -40,7 +40,9 @@
 // Optimization - Graph Style
 
 
+#ifndef PRODUCT
 extern int explicit_null_checks_elided;
+#endif
 
 //=============================================================================
 //------------------------------Value------------------------------------------
@@ -1504,24 +1506,28 @@
   Node* prev_dom = this;
   int op = Opcode();
   // Search up the dominator tree for an If with an identical test
-  while( dom->Opcode() != op    ||  // Not same opcode?
+  while (dom->Opcode() != op    ||  // Not same opcode?
          dom->in(1)    != in(1) ||  // Not same input 1?
          (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
-         prev_dom->in(0) != dom ) { // One path of test does not dominate?
-    if( dist < 0 ) return NULL;
+         prev_dom->in(0) != dom) {  // One path of test does not dominate?
+    if (dist < 0) return NULL;
 
     dist--;
     prev_dom = dom;
-    dom = up_one_dom( dom );
-    if( !dom ) return NULL;
+    dom = up_one_dom(dom);
+    if (!dom) return NULL;
   }
 
   // Check that we did not follow a loop back to ourselves
-  if( this == dom )
+  if (this == dom) {
     return NULL;
+  }
 
-  if( dist > 2 )              // Add to count of NULL checks elided
+#ifndef PRODUCT
+  if (dist > 2) { // Add to count of NULL checks elided
     explicit_null_checks_elided++;
+  }
+#endif
 
   return prev_dom;
 }
--- a/src/share/vm/opto/lcm.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/lcm.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -348,8 +348,10 @@
   }
 
   // ---- Found an implicit null check
+#ifndef PRODUCT
   extern int implicit_null_checks;
   implicit_null_checks++;
+#endif
 
   if( is_decoden ) {
     // Check if we need to hoist decodeHeapOop_not_null first.
--- a/src/share/vm/opto/library_call.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/library_call.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -243,7 +243,9 @@
   // Generates the guards that check whether the result of
   // Unsafe.getObject should be recorded in an SATB log buffer.
   void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
-  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool is_unaligned);
+
+  typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
+  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
   static bool klass_needs_init_guard(Node* kls);
   bool inline_unsafe_allocate();
   bool inline_unsafe_copyMemory();
@@ -273,9 +275,10 @@
   JVMState* arraycopy_restore_alloc_state(AllocateArrayNode* alloc, int& saved_reexecute_sp);
   void arraycopy_move_allocation_here(AllocateArrayNode* alloc, Node* dest, JVMState* saved_jvms, int saved_reexecute_sp);
 
-  typedef enum { LS_xadd, LS_xchg, LS_cmpxchg } LoadStoreKind;
-  bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind);
-  bool inline_unsafe_ordered_store(BasicType type);
+  typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
+  MemNode::MemOrd access_kind_to_memord_LS(AccessKind access_kind, bool is_store);
+  MemNode::MemOrd access_kind_to_memord(AccessKind access_kind);
+  bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind, AccessKind access_kind);
   bool inline_unsafe_fence(vmIntrinsics::ID id);
   bool inline_fp_conversions(vmIntrinsics::ID id);
   bool inline_number_methods(vmIntrinsics::ID id);
@@ -552,86 +555,147 @@
   case vmIntrinsics::_inflateStringC:
   case vmIntrinsics::_inflateStringB:           return inline_string_copy(!is_compress);
 
-  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,  !is_volatile, false);
-  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile, false);
-  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
-  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
-  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
-  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile, false);
-  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
-  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
-  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);
-  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile, false);
-  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile, false);
-  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
-  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
-  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
-  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile, false);
-  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
-  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
-  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);
-
-  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,    !is_volatile, false);
-  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,   !is_volatile, false);
-  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,    !is_volatile, false);
-  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,     !is_volatile, false);
-  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,    !is_volatile, false);
-  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,   !is_volatile, false);
-  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,  !is_volatile, false);
-  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile, false);
-
-  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,    !is_volatile, false);
-  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,   !is_volatile, false);
-  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,    !is_volatile, false);
-  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,     !is_volatile, false);
-  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,    !is_volatile, false);
-  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,   !is_volatile, false);
-  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,  !is_volatile, false);
-  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS, !is_volatile, false);
-
-  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   is_volatile, false);
-  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  is_volatile, false);
-  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     is_volatile, false);
-  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    is_volatile, false);
-  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     is_volatile, false);
-  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      is_volatile, false);
-  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     is_volatile, false);
-  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    is_volatile, false);
-  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   is_volatile, false);
-
-  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   is_volatile, false);
-  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  is_volatile, false);
-  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     is_volatile, false);
-  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    is_volatile, false);
-  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     is_volatile, false);
-  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      is_volatile, false);
-  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     is_volatile, false);
-  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    is_volatile, false);
-  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   is_volatile, false);
-
-  case vmIntrinsics::_getShortUnaligned:        return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,   !is_volatile, true);
-  case vmIntrinsics::_getCharUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,    !is_volatile, true);
-  case vmIntrinsics::_getIntUnaligned:          return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,     !is_volatile, true);
-  case vmIntrinsics::_getLongUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile, true);
-
-  case vmIntrinsics::_putShortUnaligned:        return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,   !is_volatile, true);
-  case vmIntrinsics::_putCharUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,    !is_volatile, true);
-  case vmIntrinsics::_putIntUnaligned:          return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,     !is_volatile, true);
-  case vmIntrinsics::_putLongUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,    !is_volatile, true);
-
-  case vmIntrinsics::_compareAndSwapObject:     return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg);
-  case vmIntrinsics::_compareAndSwapInt:        return inline_unsafe_load_store(T_INT,    LS_cmpxchg);
-  case vmIntrinsics::_compareAndSwapLong:       return inline_unsafe_load_store(T_LONG,   LS_cmpxchg);
-
-  case vmIntrinsics::_putOrderedObject:         return inline_unsafe_ordered_store(T_OBJECT);
-  case vmIntrinsics::_putOrderedInt:            return inline_unsafe_ordered_store(T_INT);
-  case vmIntrinsics::_putOrderedLong:           return inline_unsafe_ordered_store(T_LONG);
-
-  case vmIntrinsics::_getAndAddInt:             return inline_unsafe_load_store(T_INT,    LS_xadd);
-  case vmIntrinsics::_getAndAddLong:            return inline_unsafe_load_store(T_LONG,   LS_xadd);
-  case vmIntrinsics::_getAndSetInt:             return inline_unsafe_load_store(T_INT,    LS_xchg);
-  case vmIntrinsics::_getAndSetLong:            return inline_unsafe_load_store(T_LONG,   LS_xchg);
-  case vmIntrinsics::_getAndSetObject:          return inline_unsafe_load_store(T_OBJECT, LS_xchg);
+  case vmIntrinsics::_getObject:                return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   Relaxed, false);
+  case vmIntrinsics::_getBoolean:               return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  Relaxed, false);
+  case vmIntrinsics::_getByte:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     Relaxed, false);
+  case vmIntrinsics::_getShort:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    Relaxed, false);
+  case vmIntrinsics::_getChar:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     Relaxed, false);
+  case vmIntrinsics::_getInt:                   return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      Relaxed, false);
+  case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     Relaxed, false);
+  case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    Relaxed, false);
+  case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   Relaxed, false);
+
+  case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   Relaxed, false);
+  case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  Relaxed, false);
+  case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     Relaxed, false);
+  case vmIntrinsics::_putShort:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    Relaxed, false);
+  case vmIntrinsics::_putChar:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     Relaxed, false);
+  case vmIntrinsics::_putInt:                   return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      Relaxed, false);
+  case vmIntrinsics::_putLong:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     Relaxed, false);
+  case vmIntrinsics::_putFloat:                 return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    Relaxed, false);
+  case vmIntrinsics::_putDouble:                return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   Relaxed, false);
+
+  case vmIntrinsics::_getByte_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE,     Relaxed, false);
+  case vmIntrinsics::_getShort_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT,    Relaxed, false);
+  case vmIntrinsics::_getChar_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR,     Relaxed, false);
+  case vmIntrinsics::_getInt_raw:               return inline_unsafe_access( is_native_ptr, !is_store, T_INT,      Relaxed, false);
+  case vmIntrinsics::_getLong_raw:              return inline_unsafe_access( is_native_ptr, !is_store, T_LONG,     Relaxed, false);
+  case vmIntrinsics::_getFloat_raw:             return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT,    Relaxed, false);
+  case vmIntrinsics::_getDouble_raw:            return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE,   Relaxed, false);
+  case vmIntrinsics::_getAddress_raw:           return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS,  Relaxed, false);
+
+  case vmIntrinsics::_putByte_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_BYTE,     Relaxed, false);
+  case vmIntrinsics::_putShort_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_SHORT,    Relaxed, false);
+  case vmIntrinsics::_putChar_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_CHAR,     Relaxed, false);
+  case vmIntrinsics::_putInt_raw:               return inline_unsafe_access( is_native_ptr,  is_store, T_INT,      Relaxed, false);
+  case vmIntrinsics::_putLong_raw:              return inline_unsafe_access( is_native_ptr,  is_store, T_LONG,     Relaxed, false);
+  case vmIntrinsics::_putFloat_raw:             return inline_unsafe_access( is_native_ptr,  is_store, T_FLOAT,    Relaxed, false);
+  case vmIntrinsics::_putDouble_raw:            return inline_unsafe_access( is_native_ptr,  is_store, T_DOUBLE,   Relaxed, false);
+  case vmIntrinsics::_putAddress_raw:           return inline_unsafe_access( is_native_ptr,  is_store, T_ADDRESS,  Relaxed, false);
+
+  case vmIntrinsics::_getObjectVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   Volatile, false);
+  case vmIntrinsics::_getBooleanVolatile:       return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  Volatile, false);
+  case vmIntrinsics::_getByteVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     Volatile, false);
+  case vmIntrinsics::_getShortVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    Volatile, false);
+  case vmIntrinsics::_getCharVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     Volatile, false);
+  case vmIntrinsics::_getIntVolatile:           return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      Volatile, false);
+  case vmIntrinsics::_getLongVolatile:          return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     Volatile, false);
+  case vmIntrinsics::_getFloatVolatile:         return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    Volatile, false);
+  case vmIntrinsics::_getDoubleVolatile:        return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   Volatile, false);
+
+  case vmIntrinsics::_putObjectVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   Volatile, false);
+  case vmIntrinsics::_putBooleanVolatile:       return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  Volatile, false);
+  case vmIntrinsics::_putByteVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     Volatile, false);
+  case vmIntrinsics::_putShortVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    Volatile, false);
+  case vmIntrinsics::_putCharVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     Volatile, false);
+  case vmIntrinsics::_putIntVolatile:           return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      Volatile, false);
+  case vmIntrinsics::_putLongVolatile:          return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     Volatile, false);
+  case vmIntrinsics::_putFloatVolatile:         return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    Volatile, false);
+  case vmIntrinsics::_putDoubleVolatile:        return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   Volatile, false);
+
+  case vmIntrinsics::_getShortUnaligned:        return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    Relaxed, true);
+  case vmIntrinsics::_getCharUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     Relaxed, true);
+  case vmIntrinsics::_getIntUnaligned:          return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      Relaxed, true);
+  case vmIntrinsics::_getLongUnaligned:         return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     Relaxed, true);
+
+  case vmIntrinsics::_putShortUnaligned:        return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    Relaxed, true);
+  case vmIntrinsics::_putCharUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     Relaxed, true);
+  case vmIntrinsics::_putIntUnaligned:          return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      Relaxed, true);
+  case vmIntrinsics::_putLongUnaligned:         return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     Relaxed, true);
+
+  case vmIntrinsics::_putOrderedObject:         return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   Release, false);
+  case vmIntrinsics::_putOrderedInt:            return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      Release, false);
+  case vmIntrinsics::_putOrderedLong:           return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     Release, false);
+
+  case vmIntrinsics::_getObjectAcquire:         return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   Acquire, false);
+  case vmIntrinsics::_getBooleanAcquire:        return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  Acquire, false);
+  case vmIntrinsics::_getByteAcquire:           return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     Acquire, false);
+  case vmIntrinsics::_getShortAcquire:          return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    Acquire, false);
+  case vmIntrinsics::_getCharAcquire:           return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     Acquire, false);
+  case vmIntrinsics::_getIntAcquire:            return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      Acquire, false);
+  case vmIntrinsics::_getLongAcquire:           return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     Acquire, false);
+  case vmIntrinsics::_getFloatAcquire:          return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    Acquire, false);
+  case vmIntrinsics::_getDoubleAcquire:         return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   Acquire, false);
+
+  case vmIntrinsics::_putObjectRelease:         return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   Release, false);
+  case vmIntrinsics::_putBooleanRelease:        return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  Release, false);
+  case vmIntrinsics::_putByteRelease:           return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     Release, false);
+  case vmIntrinsics::_putShortRelease:          return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    Release, false);
+  case vmIntrinsics::_putCharRelease:           return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     Release, false);
+  case vmIntrinsics::_putIntRelease:            return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      Release, false);
+  case vmIntrinsics::_putLongRelease:           return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     Release, false);
+  case vmIntrinsics::_putFloatRelease:          return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    Release, false);
+  case vmIntrinsics::_putDoubleRelease:         return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   Release, false);
+
+  case vmIntrinsics::_getObjectOpaque:          return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT,   Opaque, false);
+  case vmIntrinsics::_getBooleanOpaque:         return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN,  Opaque, false);
+  case vmIntrinsics::_getByteOpaque:            return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE,     Opaque, false);
+  case vmIntrinsics::_getShortOpaque:           return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT,    Opaque, false);
+  case vmIntrinsics::_getCharOpaque:            return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR,     Opaque, false);
+  case vmIntrinsics::_getIntOpaque:             return inline_unsafe_access(!is_native_ptr, !is_store, T_INT,      Opaque, false);
+  case vmIntrinsics::_getLongOpaque:            return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,     Opaque, false);
+  case vmIntrinsics::_getFloatOpaque:           return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,    Opaque, false);
+  case vmIntrinsics::_getDoubleOpaque:          return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,   Opaque, false);
+
+  case vmIntrinsics::_putObjectOpaque:          return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,   Opaque, false);
+  case vmIntrinsics::_putBooleanOpaque:         return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN,  Opaque, false);
+  case vmIntrinsics::_putByteOpaque:            return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,     Opaque, false);
+  case vmIntrinsics::_putShortOpaque:           return inline_unsafe_access(!is_native_ptr,  is_store, T_SHORT,    Opaque, false);
+  case vmIntrinsics::_putCharOpaque:            return inline_unsafe_access(!is_native_ptr,  is_store, T_CHAR,     Opaque, false);
+  case vmIntrinsics::_putIntOpaque:             return inline_unsafe_access(!is_native_ptr,  is_store, T_INT,      Opaque, false);
+  case vmIntrinsics::_putLongOpaque:            return inline_unsafe_access(!is_native_ptr,  is_store, T_LONG,     Opaque, false);
+  case vmIntrinsics::_putFloatOpaque:           return inline_unsafe_access(!is_native_ptr,  is_store, T_FLOAT,    Opaque, false);
+  case vmIntrinsics::_putDoubleOpaque:          return inline_unsafe_access(!is_native_ptr,  is_store, T_DOUBLE,   Opaque, false);
+
+  case vmIntrinsics::_compareAndSwapObject:             return inline_unsafe_load_store(T_OBJECT, LS_cmp_swap,      Volatile);
+  case vmIntrinsics::_compareAndSwapInt:                return inline_unsafe_load_store(T_INT,    LS_cmp_swap,      Volatile);
+  case vmIntrinsics::_compareAndSwapLong:               return inline_unsafe_load_store(T_LONG,   LS_cmp_swap,      Volatile);
+
+  case vmIntrinsics::_weakCompareAndSwapObject:         return inline_unsafe_load_store(T_OBJECT, LS_cmp_swap_weak, Relaxed);
+  case vmIntrinsics::_weakCompareAndSwapObjectAcquire:  return inline_unsafe_load_store(T_OBJECT, LS_cmp_swap_weak, Acquire);
+  case vmIntrinsics::_weakCompareAndSwapObjectRelease:  return inline_unsafe_load_store(T_OBJECT, LS_cmp_swap_weak, Release);
+  case vmIntrinsics::_weakCompareAndSwapInt:            return inline_unsafe_load_store(T_INT,    LS_cmp_swap_weak, Relaxed);
+  case vmIntrinsics::_weakCompareAndSwapIntAcquire:     return inline_unsafe_load_store(T_INT,    LS_cmp_swap_weak, Acquire);
+  case vmIntrinsics::_weakCompareAndSwapIntRelease:     return inline_unsafe_load_store(T_INT,    LS_cmp_swap_weak, Release);
+  case vmIntrinsics::_weakCompareAndSwapLong:           return inline_unsafe_load_store(T_LONG,   LS_cmp_swap_weak, Relaxed);
+  case vmIntrinsics::_weakCompareAndSwapLongAcquire:    return inline_unsafe_load_store(T_LONG,   LS_cmp_swap_weak, Acquire);
+  case vmIntrinsics::_weakCompareAndSwapLongRelease:    return inline_unsafe_load_store(T_LONG,   LS_cmp_swap_weak, Release);
+
+  case vmIntrinsics::_compareAndExchangeObjectVolatile: return inline_unsafe_load_store(T_OBJECT, LS_cmp_exchange,  Volatile);
+  case vmIntrinsics::_compareAndExchangeObjectAcquire:  return inline_unsafe_load_store(T_OBJECT, LS_cmp_exchange,  Acquire);
+  case vmIntrinsics::_compareAndExchangeObjectRelease:  return inline_unsafe_load_store(T_OBJECT, LS_cmp_exchange,  Release);
+  case vmIntrinsics::_compareAndExchangeIntVolatile:    return inline_unsafe_load_store(T_INT,    LS_cmp_exchange,  Volatile);
+  case vmIntrinsics::_compareAndExchangeIntAcquire:     return inline_unsafe_load_store(T_INT,    LS_cmp_exchange,  Acquire);
+  case vmIntrinsics::_compareAndExchangeIntRelease:     return inline_unsafe_load_store(T_INT,    LS_cmp_exchange,  Release);
+  case vmIntrinsics::_compareAndExchangeLongVolatile:   return inline_unsafe_load_store(T_LONG,   LS_cmp_exchange,  Volatile);
+  case vmIntrinsics::_compareAndExchangeLongAcquire:    return inline_unsafe_load_store(T_LONG,   LS_cmp_exchange,  Acquire);
+  case vmIntrinsics::_compareAndExchangeLongRelease:    return inline_unsafe_load_store(T_LONG,   LS_cmp_exchange,  Release);
+
+  case vmIntrinsics::_getAndAddInt:                     return inline_unsafe_load_store(T_INT,    LS_get_add,       Volatile);
+  case vmIntrinsics::_getAndAddLong:                    return inline_unsafe_load_store(T_LONG,   LS_get_add,       Volatile);
+  case vmIntrinsics::_getAndSetInt:                     return inline_unsafe_load_store(T_INT,    LS_get_set,       Volatile);
+  case vmIntrinsics::_getAndSetLong:                    return inline_unsafe_load_store(T_LONG,   LS_get_set,       Volatile);
+  case vmIntrinsics::_getAndSetObject:                  return inline_unsafe_load_store(T_OBJECT, LS_get_set,       Volatile);
 
   case vmIntrinsics::_loadFence:
   case vmIntrinsics::_storeFence:
@@ -1581,6 +1645,13 @@
   assert (type2aelembytes(T_CHAR) == type2aelembytes(T_BYTE)*2,
           "sanity: byte[] and char[] scales agree");
 
+  // Bail when getChar over constants is requested: constant folding would
+  // reject folding mismatched char access over byte[]. A normal inlining for getChar
+  // Java method would constant fold nicely instead.
+  if (!is_store && value->is_Con() && index->is_Con()) {
+    return false;
+  }
+
   Node* adr = array_element_address(value, index, T_CHAR);
   if (is_store) {
     (void) store_to_memory(control(), adr, ch, T_CHAR, TypeAryPtr::BYTES, MemNode::unordered,
@@ -2274,8 +2345,10 @@
   return NULL;
 }
 
-bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile, bool unaligned) {
+bool LibraryCallKit::inline_unsafe_access(const bool is_native_ptr, bool is_store, const BasicType type, const AccessKind kind, const bool unaligned) {
   if (callee()->is_static())  return false;  // caller must have the capability!
+  guarantee(!is_store || kind != Acquire, "Acquire accesses can be produced only for loads");
+  guarantee( is_store || kind != Release, "Release accesses can be produced only for stores");
 
 #ifndef PRODUCT
   {
@@ -2364,7 +2437,42 @@
   // the barriers get omitted and the unsafe reference begins to "pollute"
   // the alias analysis of the rest of the graph, either Compile::can_alias
   // or Compile::must_alias will throw a diagnostic assert.)
-  bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);
+  bool need_mem_bar;
+  switch (kind) {
+      case Relaxed:
+          need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);
+          break;
+      case Opaque:
+          // Opaque uses CPUOrder membars for protection against code movement.
+      case Acquire:
+      case Release:
+      case Volatile:
+          need_mem_bar = true;
+          break;
+      default:
+          ShouldNotReachHere();
+  }
+
+  // Some accesses require access atomicity for all types, notably longs and doubles.
+  // When AlwaysAtomicAccesses is enabled, all accesses are atomic.
+  bool requires_atomic_access = false;
+  switch (kind) {
+      case Relaxed:
+      case Opaque:
+          requires_atomic_access = AlwaysAtomicAccesses;
+          break;
+      case Acquire:
+      case Release:
+      case Volatile:
+          requires_atomic_access = true;
+          break;
+      default:
+          ShouldNotReachHere();
+  }
+
+  // Figure out the memory ordering.
+  // Acquire/Release/Volatile accesses require marking the loads/stores with MemOrd
+  MemNode::MemOrd mo = access_kind_to_memord_LS(kind, is_store);
 
   // If we are reading the value of the referent field of a Reference
   // object (either by using Unsafe directly or through reflection)
@@ -2391,22 +2499,30 @@
   // and it is not possible to fully distinguish unintended nulls
   // from intended ones in this API.
 
-  if (is_volatile) {
-    // We need to emit leading and trailing CPU membars (see below) in
-    // addition to memory membars when is_volatile. This is a little
-    // too strong, but avoids the need to insert per-alias-type
-    // volatile membars (for stores; compare Parse::do_put_xxx), which
-    // we cannot do effectively here because we probably only have a
-    // rough approximation of type.
-    need_mem_bar = true;
-    // For Stores, place a memory ordering barrier now.
-    if (is_store) {
-      insert_mem_bar(Op_MemBarRelease);
-    } else {
-      if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-        insert_mem_bar(Op_MemBarVolatile);
+  // We need to emit leading and trailing CPU membars (see below) in
+  // addition to memory membars for special access modes. This is a little
+  // too strong, but avoids the need to insert per-alias-type
+  // volatile membars (for stores; compare Parse::do_put_xxx), which
+  // we cannot do effectively here because we probably only have a
+  // rough approximation of type.
+
+  switch(kind) {
+    case Relaxed:
+    case Opaque:
+    case Acquire:
+      break;
+    case Release:
+    case Volatile:
+      if (is_store) {
+        insert_mem_bar(Op_MemBarRelease);
+      } else {
+        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+          insert_mem_bar(Op_MemBarVolatile);
+        }
       }
-    }
+      break;
+    default:
+      ShouldNotReachHere();
   }
 
   // Memory barrier to prevent normal and 'unsafe' accesses from
@@ -2422,10 +2538,12 @@
   if (alias_type->element() != NULL || alias_type->field() != NULL) {
     BasicType bt;
     if (alias_type->element() != NULL) {
-      const Type* element = alias_type->element();
+      // Use address type to get the element type. Alias type doesn't provide
+      // enough information (e.g., doesn't differentiate between byte[] and boolean[]).
+      const Type* element = adr_type->is_aryptr()->elem();
       bt = element->isa_narrowoop() ? T_OBJECT : element->array_element_basic_type();
     } else {
-      bt = alias_type->field()->type()->basic_type();
+      bt = alias_type->field()->layout_type();
     }
     if (bt == T_ARRAY) {
       // accessing an array field with getObject is not a mismatch
@@ -2442,7 +2560,7 @@
     // Try to constant fold a load from a constant field
     ciField* field = alias_type->field();
     if (heap_base_oop != top() &&
-        field != NULL && field->is_constant() && field->layout_type() == type) {
+        field != NULL && field->is_constant() && !mismatched) {
       // final or stable field
       const Type* con_type = Type::make_constant(alias_type->field(), heap_base_oop);
       if (con_type != NULL) {
@@ -2450,10 +2568,9 @@
       }
     }
     if (p == NULL) {
-      MemNode::MemOrd mo = is_volatile ? MemNode::acquire : MemNode::unordered;
       // To be valid, unsafe loads may depend on other conditions than
       // the one that guards them: pin the Load node
-      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, is_volatile, unaligned, mismatched);
+      p = make_load(control(), adr, value_type, type, adr_type, mo, LoadNode::Pinned, requires_atomic_access, unaligned, mismatched);
       // load value
       switch (type) {
       case T_BOOLEAN:
@@ -2467,7 +2584,9 @@
         break;
       case T_OBJECT:
         if (need_read_barrier) {
-          insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar));
+          // We do not require a mem bar inside pre_barrier if need_mem_bar
+          // is set: the barriers would be emitted by us.
+          insert_pre_barrier(heap_base_oop, offset, p, !need_mem_bar);
         }
         break;
       case T_ADDRESS:
@@ -2498,9 +2617,8 @@
       break;
     }
 
-    MemNode::MemOrd mo = is_volatile ? MemNode::release : MemNode::unordered;
-    if (type != T_OBJECT ) {
-      (void) store_to_memory(control(), adr, val, type, adr_type, mo, is_volatile, unaligned, mismatched);
+    if (type != T_OBJECT) {
+      (void) store_to_memory(control(), adr, val, type, adr_type, mo, requires_atomic_access, unaligned, mismatched);
     } else {
       // Possibly an oop being stored to Java heap or native memory
       if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
@@ -2521,7 +2639,7 @@
           // Update IdealKit memory.
           __ sync_kit(this);
         } __ else_(); {
-          __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, is_volatile, mismatched);
+          __ store(__ ctrl(), adr, val, type, alias_type->index(), mo, requires_atomic_access, mismatched);
         } __ end_if();
         // Final sync IdealKit and GraphKit.
         final_sync(ideal);
@@ -2530,14 +2648,23 @@
     }
   }
 
-  if (is_volatile) {
-    if (!is_store) {
-      insert_mem_bar(Op_MemBarAcquire);
-    } else {
-      if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
-        insert_mem_bar(Op_MemBarVolatile);
+  switch(kind) {
+    case Relaxed:
+    case Opaque:
+    case Release:
+      break;
+    case Acquire:
+    case Volatile:
+      if (!is_store) {
+        insert_mem_bar(Op_MemBarAcquire);
+      } else {
+        if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
+          insert_mem_bar(Op_MemBarVolatile);
+        }
       }
-    }
+      break;
+    default:
+      ShouldNotReachHere();
   }
 
   if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
@@ -2548,21 +2675,52 @@
 //----------------------------inline_unsafe_load_store----------------------------
 // This method serves a couple of different customers (depending on LoadStoreKind):
 //
-// LS_cmpxchg:
-//   public final native boolean compareAndSwapObject(Object o, long offset, Object expected, Object x);
-//   public final native boolean compareAndSwapInt(   Object o, long offset, int    expected, int    x);
-//   public final native boolean compareAndSwapLong(  Object o, long offset, long   expected, long   x);
+// LS_cmp_swap:
 //
-// LS_xadd:
-//   public int  getAndAddInt( Object o, long offset, int  delta)
-//   public long getAndAddLong(Object o, long offset, long delta)
+//   boolean compareAndSwapObject(Object o, long offset, Object expected, Object x);
+//   boolean compareAndSwapInt(   Object o, long offset, int    expected, int    x);
+//   boolean compareAndSwapLong(  Object o, long offset, long   expected, long   x);
 //
-// LS_xchg:
+// LS_cmp_swap_weak:
+//
+//   boolean weakCompareAndSwapObject(       Object o, long offset, Object expected, Object x);
+//   boolean weakCompareAndSwapObjectAcquire(Object o, long offset, Object expected, Object x);
+//   boolean weakCompareAndSwapObjectRelease(Object o, long offset, Object expected, Object x);
+//
+//   boolean weakCompareAndSwapInt(          Object o, long offset, int    expected, int    x);
+//   boolean weakCompareAndSwapIntAcquire(   Object o, long offset, int    expected, int    x);
+//   boolean weakCompareAndSwapIntRelease(   Object o, long offset, int    expected, int    x);
+//
+//   boolean weakCompareAndSwapLong(         Object o, long offset, long   expected, long   x);
+//   boolean weakCompareAndSwapLongAcquire(  Object o, long offset, long   expected, long   x);
+//   boolean weakCompareAndSwapLongRelease(  Object o, long offset, long   expected, long   x);
+//
+// LS_cmp_exchange:
+//
+//   Object compareAndExchangeObjectVolatile(Object o, long offset, Object expected, Object x);
+//   Object compareAndExchangeObjectAcquire( Object o, long offset, Object expected, Object x);
+//   Object compareAndExchangeObjectRelease( Object o, long offset, Object expected, Object x);
+//
+//   Object compareAndExchangeIntVolatile(   Object o, long offset, Object expected, Object x);
+//   Object compareAndExchangeIntAcquire(    Object o, long offset, Object expected, Object x);
+//   Object compareAndExchangeIntRelease(    Object o, long offset, Object expected, Object x);
+//
+//   Object compareAndExchangeLongVolatile(  Object o, long offset, Object expected, Object x);
+//   Object compareAndExchangeLongAcquire(   Object o, long offset, Object expected, Object x);
+//   Object compareAndExchangeLongRelease(   Object o, long offset, Object expected, Object x);
+//
+// LS_get_add:
+//
+//   int  getAndAddInt( Object o, long offset, int  delta)
+//   long getAndAddLong(Object o, long offset, long delta)
+//
+// LS_get_set:
+//
 //   int    getAndSet(Object o, long offset, int    newValue)
 //   long   getAndSet(Object o, long offset, long   newValue)
 //   Object getAndSet(Object o, long offset, Object newValue)
 //
-bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) {
+bool LibraryCallKit::inline_unsafe_load_store(const BasicType type, const LoadStoreKind kind, const AccessKind access_kind) {
   // This basic scheme here is the same as inline_unsafe_access, but
   // differs in enough details that combining them would make the code
   // overly confusing.  (This is a true fact! I originally combined
@@ -2579,7 +2737,9 @@
     // Check the signatures.
     ciSignature* sig = callee()->signature();
     rtype = sig->return_type()->basic_type();
-    if (kind == LS_xadd || kind == LS_xchg) {
+    switch(kind) {
+      case LS_get_add:
+      case LS_get_set: {
       // Check the signatures.
 #ifdef ASSERT
       assert(rtype == type, "get and set must return the expected type");
@@ -2588,7 +2748,10 @@
       assert(sig->type_at(1)->basic_type() == T_LONG, "get and set offset is long");
       assert(sig->type_at(2)->basic_type() == type, "get and set must take expected type as new value/delta");
 #endif // ASSERT
-    } else if (kind == LS_cmpxchg) {
+        break;
+      }
+      case LS_cmp_swap:
+      case LS_cmp_swap_weak: {
       // Check the signatures.
 #ifdef ASSERT
       assert(rtype == T_BOOLEAN, "CAS must return boolean");
@@ -2596,8 +2759,20 @@
       assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
       assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
 #endif // ASSERT
-    } else {
-      ShouldNotReachHere();
+        break;
+      }
+      case LS_cmp_exchange: {
+      // Check the signatures.
+#ifdef ASSERT
+      assert(rtype == type, "CAS must return the expected type");
+      assert(sig->count() == 4, "CAS has 4 arguments");
+      assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
+      assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
+#endif // ASSERT
+        break;
+      }
+      default:
+        ShouldNotReachHere();
     }
   }
 #endif //PRODUCT
@@ -2610,19 +2785,29 @@
   Node* offset   = NULL;
   Node* oldval   = NULL;
   Node* newval   = NULL;
-  if (kind == LS_cmpxchg) {
-    const bool two_slot_type = type2size[type] == 2;
-    receiver = argument(0);  // type: oop
-    base     = argument(1);  // type: oop
-    offset   = argument(2);  // type: long
-    oldval   = argument(4);  // type: oop, int, or long
-    newval   = argument(two_slot_type ? 6 : 5);  // type: oop, int, or long
-  } else if (kind == LS_xadd || kind == LS_xchg){
-    receiver = argument(0);  // type: oop
-    base     = argument(1);  // type: oop
-    offset   = argument(2);  // type: long
-    oldval   = NULL;
-    newval   = argument(4);  // type: oop, int, or long
+  switch(kind) {
+    case LS_cmp_swap:
+    case LS_cmp_swap_weak:
+    case LS_cmp_exchange: {
+      const bool two_slot_type = type2size[type] == 2;
+      receiver = argument(0);  // type: oop
+      base     = argument(1);  // type: oop
+      offset   = argument(2);  // type: long
+      oldval   = argument(4);  // type: oop, int, or long
+      newval   = argument(two_slot_type ? 6 : 5);  // type: oop, int, or long
+      break;
+    }
+    case LS_get_add:
+    case LS_get_set: {
+      receiver = argument(0);  // type: oop
+      base     = argument(1);  // type: oop
+      offset   = argument(2);  // type: long
+      oldval   = NULL;
+      newval   = argument(4);  // type: oop, int, or long
+      break;
+    }
+    default:
+      ShouldNotReachHere();
   }
 
   // Null check receiver.
@@ -2647,11 +2832,23 @@
   Compile::AliasType* alias_type = C->alias_type(adr_type);
   assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
 
-  if (kind == LS_xchg && type == T_OBJECT) {
-    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
-    if (tjp != NULL) {
-      value_type = tjp;
+  switch (kind) {
+    case LS_get_set:
+    case LS_cmp_exchange: {
+      if (type == T_OBJECT) {
+        const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
+        if (tjp != NULL) {
+          value_type = tjp;
+        }
+      }
+      break;
     }
+    case LS_cmp_swap:
+    case LS_cmp_swap_weak:
+    case LS_get_add:
+      break;
+    default:
+      ShouldNotReachHere();
   }
 
   int alias_idx = C->get_alias_index(adr_type);
@@ -2661,9 +2858,22 @@
   // into actual barriers on most machines, but we still need rest of
   // compiler to respect ordering.
 
-  insert_mem_bar(Op_MemBarRelease);
+  switch (access_kind) {
+    case Relaxed:
+    case Acquire:
+      break;
+    case Release:
+    case Volatile:
+      insert_mem_bar(Op_MemBarRelease);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
   insert_mem_bar(Op_MemBarCPUOrder);
 
+  // Figure out the memory ordering.
+  MemNode::MemOrd mo = access_kind_to_memord(access_kind);
+
   // 4984716: MemBars must be inserted before this
   //          memory node in order to avoid a false
   //          dependency which will confuse the scheduler.
@@ -2674,25 +2884,45 @@
   Node* load_store = NULL;
   switch(type) {
   case T_INT:
-    if (kind == LS_xadd) {
-      load_store = _gvn.transform(new GetAndAddINode(control(), mem, adr, newval, adr_type));
-    } else if (kind == LS_xchg) {
-      load_store = _gvn.transform(new GetAndSetINode(control(), mem, adr, newval, adr_type));
-    } else if (kind == LS_cmpxchg) {
-      load_store = _gvn.transform(new CompareAndSwapINode(control(), mem, adr, newval, oldval));
-    } else {
-      ShouldNotReachHere();
+    switch(kind) {
+      case LS_get_add:
+        load_store = _gvn.transform(new GetAndAddINode(control(), mem, adr, newval, adr_type));
+        break;
+      case LS_get_set:
+        load_store = _gvn.transform(new GetAndSetINode(control(), mem, adr, newval, adr_type));
+        break;
+      case LS_cmp_swap_weak:
+        load_store = _gvn.transform(new WeakCompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
+        break;
+      case LS_cmp_swap:
+        load_store = _gvn.transform(new CompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
+        break;
+      case LS_cmp_exchange:
+        load_store = _gvn.transform(new CompareAndExchangeINode(control(), mem, adr, newval, oldval, adr_type, mo));
+        break;
+      default:
+        ShouldNotReachHere();
     }
     break;
   case T_LONG:
-    if (kind == LS_xadd) {
-      load_store = _gvn.transform(new GetAndAddLNode(control(), mem, adr, newval, adr_type));
-    } else if (kind == LS_xchg) {
-      load_store = _gvn.transform(new GetAndSetLNode(control(), mem, adr, newval, adr_type));
-    } else if (kind == LS_cmpxchg) {
-      load_store = _gvn.transform(new CompareAndSwapLNode(control(), mem, adr, newval, oldval));
-    } else {
-      ShouldNotReachHere();
+    switch(kind) {
+      case LS_get_add:
+        load_store = _gvn.transform(new GetAndAddLNode(control(), mem, adr, newval, adr_type));
+        break;
+      case LS_get_set:
+        load_store = _gvn.transform(new GetAndSetLNode(control(), mem, adr, newval, adr_type));
+        break;
+      case LS_cmp_swap_weak:
+        load_store = _gvn.transform(new WeakCompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
+        break;
+      case LS_cmp_swap:
+        load_store = _gvn.transform(new CompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
+        break;
+      case LS_cmp_exchange:
+        load_store = _gvn.transform(new CompareAndExchangeLNode(control(), mem, adr, newval, oldval, adr_type, mo));
+        break;
+      default:
+        ShouldNotReachHere();
     }
     break;
   case T_OBJECT:
@@ -2703,65 +2933,109 @@
       newval = _gvn.makecon(TypePtr::NULL_PTR);
 
     // Reference stores need a store barrier.
-    if (kind == LS_xchg) {
-      // If pre-barrier must execute before the oop store, old value will require do_load here.
-      if (!can_move_pre_barrier()) {
-        pre_barrier(true /* do_load*/,
-                    control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
-                    NULL /* pre_val*/,
+    switch(kind) {
+      case LS_get_set: {
+        // If pre-barrier must execute before the oop store, old value will require do_load here.
+        if (!can_move_pre_barrier()) {
+          pre_barrier(true /* do_load*/,
+                      control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
+                      NULL /* pre_val*/,
+                      T_OBJECT);
+        } // Else move pre_barrier to use load_store value, see below.
+        break;
+      }
+      case LS_cmp_swap_weak:
+      case LS_cmp_swap:
+      case LS_cmp_exchange: {
+        // Same as for newval above:
+        if (_gvn.type(oldval) == TypePtr::NULL_PTR) {
+          oldval = _gvn.makecon(TypePtr::NULL_PTR);
+        }
+        // The only known value which might get overwritten is oldval.
+        pre_barrier(false /* do_load */,
+                    control(), NULL, NULL, max_juint, NULL, NULL,
+                    oldval /* pre_val */,
                     T_OBJECT);
-      } // Else move pre_barrier to use load_store value, see below.
-    } else if (kind == LS_cmpxchg) {
-      // Same as for newval above:
-      if (_gvn.type(oldval) == TypePtr::NULL_PTR) {
-        oldval = _gvn.makecon(TypePtr::NULL_PTR);
+        break;
       }
-      // The only known value which might get overwritten is oldval.
-      pre_barrier(false /* do_load */,
-                  control(), NULL, NULL, max_juint, NULL, NULL,
-                  oldval /* pre_val */,
-                  T_OBJECT);
-    } else {
-      ShouldNotReachHere();
+      default:
+        ShouldNotReachHere();
     }
 
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
       Node *newval_enc = _gvn.transform(new EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
-      if (kind == LS_xchg) {
-        load_store = _gvn.transform(new GetAndSetNNode(control(), mem, adr,
-                                                       newval_enc, adr_type, value_type->make_narrowoop()));
-      } else {
-        assert(kind == LS_cmpxchg, "wrong LoadStore operation");
-        Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-        load_store = _gvn.transform(new CompareAndSwapNNode(control(), mem, adr,
-                                                                newval_enc, oldval_enc));
+
+      switch(kind) {
+        case LS_get_set:
+          load_store = _gvn.transform(new GetAndSetNNode(control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
+          break;
+        case LS_cmp_swap_weak: {
+          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
+          load_store = _gvn.transform(new WeakCompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
+          break;
+        }
+        case LS_cmp_swap: {
+          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
+          load_store = _gvn.transform(new CompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
+          break;
+        }
+        case LS_cmp_exchange: {
+          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
+          load_store = _gvn.transform(new CompareAndExchangeNNode(control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
+          break;
+        }
+        default:
+          ShouldNotReachHere();
       }
     } else
 #endif
-    {
-      if (kind == LS_xchg) {
+    switch (kind) {
+      case LS_get_set:
         load_store = _gvn.transform(new GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr()));
-      } else {
-        assert(kind == LS_cmpxchg, "wrong LoadStore operation");
-        load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval));
+        break;
+      case LS_cmp_swap_weak:
+        load_store = _gvn.transform(new WeakCompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
+        break;
+      case LS_cmp_swap:
+        load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
+        break;
+      case LS_cmp_exchange:
+        load_store = _gvn.transform(new CompareAndExchangePNode(control(), mem, adr, newval, oldval, adr_type, value_type->is_oopptr(), mo));
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+
+    // Emit the post barrier only when the actual store happened. This makes sense
+    // to check only for LS_cmp_* that can fail to set the value.
+    // LS_cmp_exchange does not produce any branches by default, so there is no
+    // boolean result to piggyback on. TODO: When we merge CompareAndSwap with
+    // CompareAndExchange and move branches here, it would make sense to conditionalize
+    // post_barriers for LS_cmp_exchange as well.
+    //
+    // CAS success path is marked more likely since we anticipate this is a performance
+    // critical path, while CAS failure path can use the penalty for going through unlikely
+    // path as backoff. Which is still better than doing a store barrier there.
+    switch (kind) {
+      case LS_get_set:
+      case LS_cmp_exchange: {
+        post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
+        break;
       }
-    }
-    if (kind == LS_cmpxchg) {
-      // Emit the post barrier only when the actual store happened.
-      // This makes sense to check only for compareAndSet that can fail to set the value.
-      // CAS success path is marked more likely since we anticipate this is a performance
-      // critical path, while CAS failure path can use the penalty for going through unlikely
-      // path as backoff. Which is still better than doing a store barrier there.
-      IdealKit ideal(this);
-      ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
-        sync_kit(ideal);
-        post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
-        ideal.sync_kit(this);
-      } ideal.end_if();
-      final_sync(ideal);
-    } else {
-      post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
+      case LS_cmp_swap_weak:
+      case LS_cmp_swap: {
+        IdealKit ideal(this);
+        ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
+          sync_kit(ideal);
+          post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
+          ideal.sync_kit(this);
+        } ideal.end_if();
+        final_sync(ideal);
+        break;
+      }
+      default:
+        ShouldNotReachHere();
     }
     break;
   default:
@@ -2775,7 +3049,7 @@
   Node* proj = _gvn.transform(new SCMemProjNode(load_store));
   set_memory(proj, alias_idx);
 
-  if (type == T_OBJECT && kind == LS_xchg) {
+  if (type == T_OBJECT && (kind == LS_get_set || kind == LS_cmp_exchange)) {
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
       load_store = _gvn.transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
@@ -2794,74 +3068,52 @@
 
   // Add the trailing membar surrounding the access
   insert_mem_bar(Op_MemBarCPUOrder);
-  insert_mem_bar(Op_MemBarAcquire);
+
+  switch (access_kind) {
+    case Relaxed:
+    case Release:
+      break; // do nothing
+    case Acquire:
+    case Volatile:
+      insert_mem_bar(Op_MemBarAcquire);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
 
   assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
   set_result(load_store);
   return true;
 }
 
-//----------------------------inline_unsafe_ordered_store----------------------
-// public native void Unsafe.putOrderedObject(Object o, long offset, Object x);
-// public native void Unsafe.putOrderedInt(Object o, long offset, int x);
-// public native void Unsafe.putOrderedLong(Object o, long offset, long x);
-bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
-  // This is another variant of inline_unsafe_access, differing in
-  // that it always issues store-store ("release") barrier and ensures
-  // store-atomicity (which only matters for "long").
-
-  if (callee()->is_static())  return false;  // caller must have the capability!
-
-#ifndef PRODUCT
-  {
-    ResourceMark rm;
-    // Check the signatures.
-    ciSignature* sig = callee()->signature();
-#ifdef ASSERT
-    BasicType rtype = sig->return_type()->basic_type();
-    assert(rtype == T_VOID, "must return void");
-    assert(sig->count() == 3, "has 3 arguments");
-    assert(sig->type_at(0)->basic_type() == T_OBJECT, "base is object");
-    assert(sig->type_at(1)->basic_type() == T_LONG, "offset is long");
-#endif // ASSERT
-  }
-#endif //PRODUCT
-
-  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
-
-  // Get arguments:
-  Node* receiver = argument(0);  // type: oop
-  Node* base     = argument(1);  // type: oop
-  Node* offset   = argument(2);  // type: long
-  Node* val      = argument(4);  // type: oop, int, or long
-
-  // Null check receiver.
-  receiver = null_check(receiver);
-  if (stopped()) {
-    return true;
-  }
-
-  // Build field offset expression.
-  assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
-  // 32-bit machines ignore the high half of long offsets
-  offset = ConvL2X(offset);
-  Node* adr = make_unsafe_address(base, offset);
-  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
-  const Type *value_type = Type::get_const_basic_type(type);
-  Compile::AliasType* alias_type = C->alias_type(adr_type);
-
-  insert_mem_bar(Op_MemBarRelease);
-  insert_mem_bar(Op_MemBarCPUOrder);
-  // Ensure that the store is atomic for longs:
-  const bool require_atomic_access = true;
-  Node* store;
-  if (type == T_OBJECT) // reference stores need a store barrier.
-    store = store_oop_to_unknown(control(), base, adr, adr_type, val, type, MemNode::release);
-  else {
-    store = store_to_memory(control(), adr, val, type, adr_type, MemNode::release, require_atomic_access);
-  }
-  insert_mem_bar(Op_MemBarCPUOrder);
-  return true;
+MemNode::MemOrd LibraryCallKit::access_kind_to_memord_LS(AccessKind kind, bool is_store) {
+  MemNode::MemOrd mo = MemNode::unset;
+  switch(kind) {
+    case Opaque:
+    case Relaxed:  mo = MemNode::unordered; break;
+    case Acquire:  mo = MemNode::acquire;   break;
+    case Release:  mo = MemNode::release;   break;
+    case Volatile: mo = is_store ? MemNode::release : MemNode::acquire; break;
+    default:
+      ShouldNotReachHere();
+  }
+  guarantee(mo != MemNode::unset, "Should select memory ordering");
+  return mo;
+}
+
+MemNode::MemOrd LibraryCallKit::access_kind_to_memord(AccessKind kind) {
+  MemNode::MemOrd mo = MemNode::unset;
+  switch(kind) {
+    case Opaque:
+    case Relaxed:  mo = MemNode::unordered; break;
+    case Acquire:  mo = MemNode::acquire;   break;
+    case Release:  mo = MemNode::release;   break;
+    case Volatile: mo = MemNode::seqcst;    break;
+    default:
+      ShouldNotReachHere();
+  }
+  guarantee(mo != MemNode::unset, "Should select memory ordering");
+  return mo;
 }
 
 bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
--- a/src/share/vm/opto/loopTransform.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/loopTransform.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -2417,6 +2417,14 @@
             ((bol->in(1)->Opcode() == Op_StorePConditional ) ||
              (bol->in(1)->Opcode() == Op_StoreIConditional ) ||
              (bol->in(1)->Opcode() == Op_StoreLConditional ) ||
+             (bol->in(1)->Opcode() == Op_CompareAndExchangeI ) ||
+             (bol->in(1)->Opcode() == Op_CompareAndExchangeL ) ||
+             (bol->in(1)->Opcode() == Op_CompareAndExchangeP ) ||
+             (bol->in(1)->Opcode() == Op_CompareAndExchangeN ) ||
+             (bol->in(1)->Opcode() == Op_WeakCompareAndSwapI ) ||
+             (bol->in(1)->Opcode() == Op_WeakCompareAndSwapL ) ||
+             (bol->in(1)->Opcode() == Op_WeakCompareAndSwapP ) ||
+             (bol->in(1)->Opcode() == Op_WeakCompareAndSwapN ) ||
              (bol->in(1)->Opcode() == Op_CompareAndSwapI ) ||
              (bol->in(1)->Opcode() == Op_CompareAndSwapL ) ||
              (bol->in(1)->Opcode() == Op_CompareAndSwapP ) ||
--- a/src/share/vm/opto/loopnode.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/loopnode.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -285,20 +285,29 @@
   Node *incr() const                { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
   Node *limit() const               { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; }
   Node *stride() const              { Node *tmp = incr    (); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; }
-  Node *phi() const                 { Node *tmp = incr    (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
   Node *init_trip() const           { Node *tmp = phi     (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
   int stride_con() const;
   bool stride_is_con() const        { Node *tmp = stride  (); return (tmp != NULL && tmp->is_Con()); }
   BoolTest::mask test_trip() const  { return in(TestValue)->as_Bool()->_test._test; }
+  PhiNode *phi() const {
+    Node *tmp = incr();
+    if (tmp && tmp->req() == 3) {
+      Node* phi = tmp->in(1);
+      if (phi->is_Phi()) {
+        return phi->as_Phi();
+      }
+    }
+    return NULL;
+  }
   CountedLoopNode *loopnode() const {
     // The CountedLoopNode that goes with this CountedLoopEndNode may
     // have been optimized out by the IGVN so be cautious with the
     // pattern matching on the graph
-    if (phi() == NULL) {
+    PhiNode* iv_phi = phi();
+    if (iv_phi == NULL) {
       return NULL;
     }
-    assert(phi()->is_Phi(), "should be PhiNode");
-    Node *ln = phi()->in(0);
+    Node *ln = iv_phi->in(0);
     if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) {
       return (CountedLoopNode*)ln;
     }
--- a/src/share/vm/opto/macroArrayCopy.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/macroArrayCopy.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -880,8 +880,14 @@
       Node* sptr = basic_plus_adr(src,  src_off);
       Node* dptr = basic_plus_adr(dest, dest_off);
       uint alias_idx = C->get_alias_index(adr_type);
-      Node* sval = transform_later(LoadNode::make(_igvn, *ctrl, (*mem)->memory_at(alias_idx), sptr, adr_type, TypeInt::INT, T_INT, MemNode::unordered));
-      Node* st = transform_later(StoreNode::make(_igvn, *ctrl, (*mem)->memory_at(alias_idx), dptr, adr_type, sval, T_INT, MemNode::unordered));
+      bool is_mismatched = (basic_elem_type != T_INT);
+      Node* sval = transform_later(
+          LoadNode::make(_igvn, *ctrl, (*mem)->memory_at(alias_idx), sptr, adr_type,
+                         TypeInt::INT, T_INT, MemNode::unordered, LoadNode::DependsOnlyOnTest,
+                         false /*unaligned*/, is_mismatched));
+      Node* st = transform_later(
+          StoreNode::make(_igvn, *ctrl, (*mem)->memory_at(alias_idx), dptr, adr_type,
+                          sval, T_INT, MemNode::unordered));
       (*mem)->set_memory_at(alias_idx, st);
       src_off += BytesPerInt;
       dest_off += BytesPerInt;
--- a/src/share/vm/opto/matcher.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/matcher.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -2307,6 +2307,14 @@
       case Op_StorePConditional:
       case Op_StoreIConditional:
       case Op_StoreLConditional:
+      case Op_CompareAndExchangeI:
+      case Op_CompareAndExchangeL:
+      case Op_CompareAndExchangeP:
+      case Op_CompareAndExchangeN:
+      case Op_WeakCompareAndSwapI:
+      case Op_WeakCompareAndSwapL:
+      case Op_WeakCompareAndSwapP:
+      case Op_WeakCompareAndSwapN:
       case Op_CompareAndSwapI:
       case Op_CompareAndSwapL:
       case Op_CompareAndSwapP:
@@ -2407,8 +2415,10 @@
 
       bool push_it = false;
       if( proj->Opcode() == Op_IfTrue ) {
+#ifndef PRODUCT
         extern int all_null_checks_found;
         all_null_checks_found++;
+#endif
         if( b->_test._test == BoolTest::ne ) {
           push_it = true;
         }
@@ -2522,6 +2532,14 @@
     // that a monitor exit operation contains a serializing instruction.
 
     if (xop == Op_MemBarVolatile ||
+        xop == Op_CompareAndExchangeI ||
+        xop == Op_CompareAndExchangeL ||
+        xop == Op_CompareAndExchangeP ||
+        xop == Op_CompareAndExchangeN ||
+        xop == Op_WeakCompareAndSwapL ||
+        xop == Op_WeakCompareAndSwapP ||
+        xop == Op_WeakCompareAndSwapN ||
+        xop == Op_WeakCompareAndSwapI ||
         xop == Op_CompareAndSwapL ||
         xop == Op_CompareAndSwapP ||
         xop == Op_CompareAndSwapN ||
--- a/src/share/vm/opto/memnode.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/memnode.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1582,6 +1582,21 @@
   return NULL;
 }
 
+static bool is_mismatched_access(ciConstant con, BasicType loadbt) {
+  BasicType conbt = con.basic_type();
+  switch (conbt) {
+    case T_BOOLEAN: conbt = T_BYTE;   break;
+    case T_ARRAY:   conbt = T_OBJECT; break;
+  }
+  switch (loadbt) {
+    case T_BOOLEAN:   loadbt = T_BYTE;   break;
+    case T_NARROWOOP: loadbt = T_OBJECT; break;
+    case T_ARRAY:     loadbt = T_OBJECT; break;
+    case T_ADDRESS:   loadbt = T_OBJECT; break;
+  }
+  return (conbt != loadbt);
+}
+
 // Try to constant-fold a stable array element.
 static const Type* fold_stable_ary_elem(const TypeAryPtr* ary, int off, BasicType loadbt) {
   assert(ary->const_oop(), "array should be constant");
@@ -1590,10 +1605,12 @@
   // Decode the results of GraphKit::array_element_address.
   ciArray* aobj = ary->const_oop()->as_array();
   ciConstant con = aobj->element_value_by_offset(off);
-
   if (con.basic_type() != T_ILLEGAL && !con.is_null_or_zero()) {
+    bool is_mismatched = is_mismatched_access(con, loadbt);
+    assert(!is_mismatched, "conbt=%s; loadbt=%s", type2name(con.basic_type()), type2name(loadbt));
     const Type* con_type = Type::make_from_constant(con);
-    if (con_type != NULL) {
+    // Guard against erroneous constant folding.
+    if (!is_mismatched && con_type != NULL) {
       if (con_type->isa_aryptr()) {
         // Join with the array element type, in case it is also stable.
         int dim = ary->stable_dimension();
@@ -1642,7 +1659,7 @@
     const bool off_beyond_header = ((uint)off >= (uint)min_base_off);
 
     // Try to constant-fold a stable array element.
-    if (FoldStableValues && ary->is_stable() && ary->const_oop() != NULL) {
+    if (FoldStableValues && !is_mismatched_access() && ary->is_stable() && ary->const_oop() != NULL) {
       // Make sure the reference is not into the header and the offset is constant
       if (off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) {
         const Type* con_type = fold_stable_ary_elem(ary, off, memory_type());
--- a/src/share/vm/opto/memnode.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/memnode.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -56,7 +56,9 @@
   };
   typedef enum { unordered = 0,
                  acquire,       // Load has to acquire or be succeeded by MemBarAcquire.
-                 release        // Store has to release or be preceded by MemBarRelease.
+                 release,       // Store has to release or be preceded by MemBarRelease.
+                 seqcst,        // LoadStore has to have both acquire and release semantics.
+                 unset          // The memory ordering is not set (used for testing)
   } MemOrd;
 protected:
   MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
@@ -848,34 +850,121 @@
   virtual uint ideal_reg() const { return Op_RegFlags; }
 };
 
+class CompareAndSwapNode : public LoadStoreConditionalNode {
+private:
+  const MemNode::MemOrd _mem_ord;
+public:
+  CompareAndSwapNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : LoadStoreConditionalNode(c, mem, adr, val, ex), _mem_ord(mem_ord) {}
+  MemNode::MemOrd order() const {
+    return _mem_ord;
+  }
+};
+
+class CompareAndExchangeNode : public LoadStoreNode {
+private:
+  const MemNode::MemOrd _mem_ord;
+public:
+  enum {
+    ExpectedIn = MemNode::ValueIn+1 // One more input than MemNode
+  };
+  CompareAndExchangeNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord, const TypePtr* at, const Type* t) :
+    LoadStoreNode(c, mem, adr, val, at, t, 5), _mem_ord(mem_ord) {
+     init_req(ExpectedIn, ex );
+  }
+
+  MemNode::MemOrd order() const {
+    return _mem_ord;
+  }
+};
 
 //------------------------------CompareAndSwapLNode---------------------------
-class CompareAndSwapLNode : public LoadStoreConditionalNode {
+class CompareAndSwapLNode : public CompareAndSwapNode {
 public:
-  CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
+  CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
   virtual int Opcode() const;
 };
 
 
 //------------------------------CompareAndSwapINode---------------------------
-class CompareAndSwapINode : public LoadStoreConditionalNode {
+class CompareAndSwapINode : public CompareAndSwapNode {
 public:
-  CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
+  CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
   virtual int Opcode() const;
 };
 
 
 //------------------------------CompareAndSwapPNode---------------------------
-class CompareAndSwapPNode : public LoadStoreConditionalNode {
+class CompareAndSwapPNode : public CompareAndSwapNode {
 public:
-  CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
+  CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
   virtual int Opcode() const;
 };
 
 //------------------------------CompareAndSwapNNode---------------------------
-class CompareAndSwapNNode : public LoadStoreConditionalNode {
+class CompareAndSwapNNode : public CompareAndSwapNode {
 public:
-  CompareAndSwapNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreConditionalNode(c, mem, adr, val, ex) { }
+  CompareAndSwapNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------WeakCompareAndSwapLNode---------------------------
+class WeakCompareAndSwapLNode : public CompareAndSwapNode {
+public:
+  WeakCompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------WeakCompareAndSwapINode---------------------------
+class WeakCompareAndSwapINode : public CompareAndSwapNode {
+public:
+  WeakCompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------WeakCompareAndSwapPNode---------------------------
+class WeakCompareAndSwapPNode : public CompareAndSwapNode {
+public:
+  WeakCompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------WeakCompareAndSwapNNode---------------------------
+class WeakCompareAndSwapNNode : public CompareAndSwapNode {
+public:
+  WeakCompareAndSwapNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, MemNode::MemOrd mem_ord) : CompareAndSwapNode(c, mem, adr, val, ex, mem_ord) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------CompareAndExchangeLNode---------------------------
+class CompareAndExchangeLNode : public CompareAndExchangeNode {
+public:
+  CompareAndExchangeLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, MemNode::MemOrd mem_ord) : CompareAndExchangeNode(c, mem, adr, val, ex, mem_ord, at, TypeLong::LONG) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndExchangeINode---------------------------
+class CompareAndExchangeINode : public CompareAndExchangeNode {
+public:
+  CompareAndExchangeINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, MemNode::MemOrd mem_ord) : CompareAndExchangeNode(c, mem, adr, val, ex, mem_ord, at, TypeInt::INT) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndExchangePNode---------------------------
+class CompareAndExchangePNode : public CompareAndExchangeNode {
+public:
+  CompareAndExchangePNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangeNode(c, mem, adr, val, ex, mem_ord, at, t) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------CompareAndExchangeNNode---------------------------
+class CompareAndExchangeNNode : public CompareAndExchangeNode {
+public:
+  CompareAndExchangeNNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex, const TypePtr* at, const Type* t, MemNode::MemOrd mem_ord) : CompareAndExchangeNode(c, mem, adr, val, ex, mem_ord, at, t) { }
   virtual int Opcode() const;
 };
 
--- a/src/share/vm/opto/node.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/node.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -576,17 +576,11 @@
 
 //------------------------------~Node------------------------------------------
 // Fancy destructor; eagerly attempt to reclaim Node numberings and storage
-extern int reclaim_idx ;
-extern int reclaim_in  ;
-extern int reclaim_node;
 void Node::destruct() {
   // Eagerly reclaim unique Node numberings
   Compile* compile = Compile::current();
   if ((uint)_idx+1 == compile->unique()) {
     compile->set_unique(compile->unique()-1);
-#ifdef ASSERT
-    reclaim_idx++;
-#endif
   }
   // Clear debug info:
   Node_Notes* nn = compile->node_notes_at(_idx);
@@ -604,43 +598,25 @@
   int out_edge_size = _outmax*sizeof(void*);
   char *edge_end = ((char*)_in) + edge_size;
   char *out_array = (char*)(_out == NO_OUT_ARRAY? NULL: _out);
-  char *out_edge_end = out_array + out_edge_size;
   int node_size = size_of();
 
   // Free the output edge array
   if (out_edge_size > 0) {
-#ifdef ASSERT
-    if( out_edge_end == compile->node_arena()->hwm() )
-      reclaim_in  += out_edge_size;  // count reclaimed out edges with in edges
-#endif
     compile->node_arena()->Afree(out_array, out_edge_size);
   }
 
   // Free the input edge array and the node itself
   if( edge_end == (char*)this ) {
-#ifdef ASSERT
-    if( edge_end+node_size == compile->node_arena()->hwm() ) {
-      reclaim_in  += edge_size;
-      reclaim_node+= node_size;
-    }
-#else
     // It was; free the input array and object all in one hit
+#ifndef ASSERT
     compile->node_arena()->Afree(_in,edge_size+node_size);
 #endif
   } else {
-
     // Free just the input array
-#ifdef ASSERT
-    if( edge_end == compile->node_arena()->hwm() )
-      reclaim_in  += edge_size;
-#endif
     compile->node_arena()->Afree(_in,edge_size);
 
     // Free just the object
-#ifdef ASSERT
-    if( ((char*)this) + node_size == compile->node_arena()->hwm() )
-      reclaim_node+= node_size;
-#else
+#ifndef ASSERT
     compile->node_arena()->Afree(this,node_size);
 #endif
   }
--- a/src/share/vm/opto/node.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/node.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -60,6 +60,8 @@
 class CodeBuffer;
 class ConstraintCastNode;
 class ConNode;
+class CompareAndSwapNode;
+class CompareAndExchangeNode;
 class CountedLoopNode;
 class CountedLoopEndNode;
 class DecodeNarrowPtrNode;
@@ -679,6 +681,9 @@
       DEFINE_CLASS_ID(Store, Mem, 1)
         DEFINE_CLASS_ID(StoreVector, Store, 0)
       DEFINE_CLASS_ID(LoadStore, Mem, 2)
+        DEFINE_CLASS_ID(LoadStoreConditional, LoadStore, 0)
+          DEFINE_CLASS_ID(CompareAndSwap, LoadStoreConditional, 0)
+        DEFINE_CLASS_ID(CompareAndExchangeNode, LoadStore, 1)
 
     DEFINE_CLASS_ID(Region, Node, 5)
       DEFINE_CLASS_ID(Loop, Region, 0)
--- a/src/share/vm/opto/parse.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/parse.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -104,13 +104,6 @@
   // For temporary (stack-allocated, stateless) ilts:
   InlineTree(Compile* c, ciMethod* callee_method, JVMState* caller_jvms, float site_invoke_ratio, int max_inline_level);
 
-  // InlineTree enum
-  enum InlineStyle {
-    Inline_do_not_inline             =   0, //
-    Inline_cha_is_monomorphic        =   1, //
-    Inline_type_profile_monomorphic  =   2  //
-  };
-
   // See if it is OK to inline.
   // The receiver is the inline tree for the caller.
   //
@@ -349,9 +342,6 @@
   Block*            _block;     // block currently getting parsed
   ciBytecodeStream  _iter;      // stream of this method's bytecodes
 
-  int           _blocks_merged; // Progress meter: state merges from BB preds
-  int           _blocks_parsed; // Progress meter: BBs actually parsed
-
   const FastLockNode* _synch_lock; // FastLockNode for synchronized method
 
 #ifndef PRODUCT
--- a/src/share/vm/opto/parse1.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/parse1.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -45,6 +45,7 @@
 // the most. Some of the non-static variables are needed in bytecodeInfo.cpp
 // and eventually should be encapsulated in a proper class (gri 8/18/98).
 
+#ifndef PRODUCT
 int nodes_created              = 0;
 int methods_parsed             = 0;
 int methods_seen               = 0;
@@ -53,42 +54,42 @@
 
 int explicit_null_checks_inserted = 0;
 int explicit_null_checks_elided   = 0;
-int all_null_checks_found         = 0, implicit_null_checks              = 0;
-int implicit_null_throws          = 0;
+int all_null_checks_found         = 0;
+int implicit_null_checks          = 0;
 
-int reclaim_idx  = 0;
-int reclaim_in   = 0;
-int reclaim_node = 0;
-
-#ifndef PRODUCT
 bool Parse::BytecodeParseHistogram::_initialized = false;
 uint Parse::BytecodeParseHistogram::_bytecodes_parsed [Bytecodes::number_of_codes];
 uint Parse::BytecodeParseHistogram::_nodes_constructed[Bytecodes::number_of_codes];
 uint Parse::BytecodeParseHistogram::_nodes_transformed[Bytecodes::number_of_codes];
 uint Parse::BytecodeParseHistogram::_new_values       [Bytecodes::number_of_codes];
-#endif
 
 //------------------------------print_statistics-------------------------------
-#ifndef PRODUCT
 void Parse::print_statistics() {
   tty->print_cr("--- Compiler Statistics ---");
   tty->print("Methods seen: %d  Methods parsed: %d", methods_seen, methods_parsed);
   tty->print("  Nodes created: %d", nodes_created);
   tty->cr();
-  if (methods_seen != methods_parsed)
+  if (methods_seen != methods_parsed) {
     tty->print_cr("Reasons for parse failures (NOT cumulative):");
+  }
   tty->print_cr("Blocks parsed: %d  Blocks seen: %d", blocks_parsed, blocks_seen);
 
-  if( explicit_null_checks_inserted )
-    tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
-  if( all_null_checks_found )
+  if (explicit_null_checks_inserted) {
+    tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,",
+                  explicit_null_checks_inserted, explicit_null_checks_elided,
+                  (100*explicit_null_checks_elided)/explicit_null_checks_inserted,
+                  all_null_checks_found);
+  }
+  if (all_null_checks_found) {
     tty->print_cr("%d made implicit (%2d%%)", implicit_null_checks,
                   (100*implicit_null_checks)/all_null_checks_found);
-  if( implicit_null_throws )
+  }
+  if (SharedRuntime::_implicit_null_throws) {
     tty->print_cr("%d implicit null exceptions at runtime",
-                  implicit_null_throws);
+                  SharedRuntime::_implicit_null_throws);
+  }
 
-  if( PrintParseStatistics && BytecodeParseHistogram::initialized() ) {
+  if (PrintParseStatistics && BytecodeParseHistogram::initialized()) {
     BytecodeParseHistogram::print();
   }
 }
@@ -495,7 +496,7 @@
     C->dependencies()->assert_evol_method(method());
   }
 
-  methods_seen++;
+  NOT_PRODUCT(methods_seen++);
 
   // Do some special top-level things.
   if (depth() == 1 && C->is_osr_compilation()) {
@@ -530,8 +531,8 @@
   }
 #endif
 
+#ifndef PRODUCT
   methods_parsed++;
-#ifndef PRODUCT
   // add method size here to guarantee that inlined methods are added too
   if (CITime)
     _total_bytes_compiled += method()->code_size();
@@ -652,7 +653,7 @@
         continue;
       }
 
-      blocks_parsed++;
+      NOT_PRODUCT(blocks_parsed++);
 
       progress = true;
       if (block->is_loop_head() || block->is_handler() || has_irreducible && !block->is_ready()) {
@@ -712,9 +713,9 @@
     }
   }
 
+#ifndef PRODUCT
   blocks_seen += block_count();
 
-#ifndef PRODUCT
   // Make sure there are no half-processed blocks remaining.
   // Every remaining unprocessed block is dead and may be ignored now.
   for (int rpo = 0; rpo < block_count(); rpo++) {
@@ -1446,7 +1447,6 @@
 
   assert(block()->is_merged(), "must be merged before being parsed");
   block()->mark_parsed();
-  ++_blocks_parsed;
 
   // Set iterator to start of block.
   iter().reset_to_bci(block()->start());
@@ -1596,9 +1596,6 @@
       return;
     }
 
-    // Record that a new block has been merged.
-    ++_blocks_merged;
-
     // Make a region if we know there are multiple or unpredictable inputs.
     // (Also, if this is a plain fall-through, we might see another region,
     // which must not be allowed into this block's map.)
--- a/src/share/vm/opto/parse2.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/parse2.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -44,8 +44,10 @@
 #include "runtime/deoptimization.hpp"
 #include "runtime/sharedRuntime.hpp"
 
+#ifndef PRODUCT
 extern int explicit_null_checks_inserted,
            explicit_null_checks_elided;
+#endif
 
 //---------------------------------array_load----------------------------------
 void Parse::array_load(BasicType elem_type) {
@@ -997,7 +999,7 @@
     return;
   }
 
-  explicit_null_checks_inserted++;
+  NOT_PRODUCT(explicit_null_checks_inserted++);
 
   // Generate real control flow
   Node   *tst = _gvn.transform( new BoolNode( c, btest ) );
@@ -1013,7 +1015,7 @@
     set_control(iftrue);
 
     if (stopped()) {            // Path is dead?
-      explicit_null_checks_elided++;
+      NOT_PRODUCT(explicit_null_checks_elided++);
       if (C->eliminate_boxing()) {
         // Mark the successor block as parsed
         branch_block->next_path_num();
@@ -1033,7 +1035,7 @@
   set_control(iffalse);
 
   if (stopped()) {              // Path is dead?
-    explicit_null_checks_elided++;
+    NOT_PRODUCT(explicit_null_checks_elided++);
     if (C->eliminate_boxing()) {
       // Mark the successor block as parsed
       next_block->next_path_num();
--- a/src/share/vm/opto/phaseX.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/phaseX.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1471,6 +1471,27 @@
   }
 }
 
+// Return counted loop Phi if as a counted loop exit condition, cmp
+// compares the the induction variable with n
+static PhiNode* countedloop_phi_from_cmp(CmpINode* cmp, Node* n) {
+  for (DUIterator_Fast imax, i = cmp->fast_outs(imax); i < imax; i++) {
+    Node* bol = cmp->fast_out(i);
+    for (DUIterator_Fast i2max, i2 = bol->fast_outs(i2max); i2 < i2max; i2++) {
+      Node* iff = bol->fast_out(i2);
+      if (iff->is_CountedLoopEnd()) {
+        CountedLoopEndNode* cle = iff->as_CountedLoopEnd();
+        if (cle->limit() == n) {
+          PhiNode* phi = cle->phi();
+          if (phi != NULL) {
+            return phi;
+          }
+        }
+      }
+    }
+  }
+  return NULL;
+}
+
 void PhaseIterGVN::add_users_to_worklist( Node *n ) {
   add_users_to_worklist0(n);
 
@@ -1500,18 +1521,7 @@
         Node* bol = use->raw_out(0);
         if (bol->outcnt() > 0) {
           Node* iff = bol->raw_out(0);
-          if (use_op == Op_CmpI &&
-              iff->is_CountedLoopEnd()) {
-            CountedLoopEndNode* cle = iff->as_CountedLoopEnd();
-            if (cle->limit() == n && cle->phi() != NULL) {
-              // If an opaque node feeds into the limit condition of a
-              // CountedLoop, we need to process the Phi node for the
-              // induction variable when the opaque node is removed:
-              // the range of values taken by the Phi is now known and
-              // so its type is also known.
-              _worklist.push(cle->phi());
-            }
-          } else if (iff->outcnt() == 2) {
+          if (iff->outcnt() == 2) {
             // Look for the 'is_x2logic' pattern: "x ? : 0 : 1" and put the
             // phi merging either 0 or 1 onto the worklist
             Node* ifproj0 = iff->raw_out(0);
@@ -1526,6 +1536,15 @@
         }
       }
       if (use_op == Op_CmpI) {
+        Node* phi = countedloop_phi_from_cmp((CmpINode*)use, n);
+        if (phi != NULL) {
+          // If an opaque node feeds into the limit condition of a
+          // CountedLoop, we need to process the Phi node for the
+          // induction variable when the opaque node is removed:
+          // the range of values taken by the Phi is now known and
+          // so its type is also known.
+          _worklist.push(phi);
+        }
         Node* in1 = use->in(1);
         for (uint i = 0; i < in1->outcnt(); i++) {
           if (in1->raw_out(i)->Opcode() == Op_CastII) {
@@ -1714,6 +1733,15 @@
             }
           }
         }
+        // If n is used in a counted loop exit condition then the type
+        // of the counted loop's Phi depends on the type of n. See
+        // PhiNode::Value().
+        if (m_op == Op_CmpI) {
+          PhiNode* phi = countedloop_phi_from_cmp((CmpINode*)m, n);
+          if (phi != NULL) {
+            worklist.push(phi);
+          }
+        }
       }
     }
   }
--- a/src/share/vm/opto/phaseX.hpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/opto/phaseX.hpp	Wed Mar 09 14:18:12 2016 +0100
@@ -431,7 +431,7 @@
 // Phase for iteratively performing local, pessimistic GVN-style optimizations.
 // and ideal transformations on the graph.
 class PhaseIterGVN : public PhaseGVN {
- private:
+private:
   bool _delay_transform;  // When true simply register the node when calling transform
                           // instead of actually optimizing it
 
--- a/src/share/vm/prims/jvmtiCodeBlobEvents.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/prims/jvmtiCodeBlobEvents.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -173,9 +173,7 @@
   _global_code_blobs = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<JvmtiCodeBlobDesc*>(50,true);
 
   // iterate over the stub code descriptors and put them in the list first.
-  int index = 0;
-  StubCodeDesc* desc;
-  while ((desc = StubCodeDesc::desc_for_index(++index)) != NULL) {
+  for (StubCodeDesc* desc = StubCodeDesc::first(); desc != NULL; desc = StubCodeDesc::next(desc)) {
     _global_code_blobs->append(new JvmtiCodeBlobDesc(desc->name(), desc->begin(), desc->end()));
   }
 
--- a/src/share/vm/prims/unsafe.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/prims/unsafe.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -1117,6 +1117,44 @@
 
 // JSR166 ------------------------------------------------------------------
 
+UNSAFE_ENTRY(jobject, Unsafe_CompareAndExchangeObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h))
+  UnsafeWrapper("Unsafe_CompareAndExchangeObject");
+  oop x = JNIHandles::resolve(x_h);
+  oop e = JNIHandles::resolve(e_h);
+  oop p = JNIHandles::resolve(obj);
+  HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
+  oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e, true);
+  if (res == e)
+    update_barrier_set((void*)addr, x);
+  return JNIHandles::make_local(env, res);
+UNSAFE_END
+
+UNSAFE_ENTRY(jint, Unsafe_CompareAndExchangeInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x))
+  UnsafeWrapper("Unsafe_CompareAndExchangeInt");
+  oop p = JNIHandles::resolve(obj);
+  jint* addr = (jint *) index_oop_from_field_offset_long(p, offset);
+  return (jint)(Atomic::cmpxchg(x, addr, e));
+UNSAFE_END
+
+UNSAFE_ENTRY(jlong, Unsafe_CompareAndExchangeLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x))
+  UnsafeWrapper("Unsafe_CompareAndExchangeLong");
+  Handle p (THREAD, JNIHandles::resolve(obj));
+  jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
+#ifdef SUPPORTS_NATIVE_CX8
+  return (jlong)(Atomic::cmpxchg(x, addr, e));
+#else
+  if (VM_Version::supports_cx8())
+    return (jlong)(Atomic::cmpxchg(x, addr, e));
+  else {
+    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
+    jlong val = Atomic::load(addr);
+    if (val == e)
+      Atomic::store(x, addr);
+    return val;
+  }
+#endif
+UNSAFE_END
+
 UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h))
   UnsafeWrapper("Unsafe_CompareAndSwapObject");
   oop x = JNIHandles::resolve(x_h);
@@ -1384,6 +1422,10 @@
     {CC "compareAndSwapObject", CC "(" OBJ "J" OBJ "" OBJ ")Z", FN_PTR(Unsafe_CompareAndSwapObject)},
     {CC "compareAndSwapInt",  CC "(" OBJ "J""I""I"")Z",  FN_PTR(Unsafe_CompareAndSwapInt)},
     {CC "compareAndSwapLong", CC "(" OBJ "J""J""J"")Z",  FN_PTR(Unsafe_CompareAndSwapLong)},
+    {CC "compareAndExchangeObjectVolatile", CC "(" OBJ "J" OBJ "" OBJ ")" OBJ, FN_PTR(Unsafe_CompareAndExchangeObject)},
+    {CC "compareAndExchangeIntVolatile",  CC "(" OBJ "J""I""I"")I", FN_PTR(Unsafe_CompareAndExchangeInt)},
+    {CC "compareAndExchangeLongVolatile", CC "(" OBJ "J""J""J"")J", FN_PTR(Unsafe_CompareAndExchangeLong)},
+
     {CC "putOrderedObject",   CC "(" OBJ "J" OBJ ")V",   FN_PTR(Unsafe_SetOrderedObject)},
     {CC "putOrderedInt",      CC "(" OBJ "JI)V",         FN_PTR(Unsafe_SetOrderedInt)},
     {CC "putOrderedLong",     CC "(" OBJ "JJ)V",         FN_PTR(Unsafe_SetOrderedLong)},
--- a/src/share/vm/prims/whitebox.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/prims/whitebox.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -34,6 +34,7 @@
 #include "memory/metadataFactory.hpp"
 #include "memory/metaspaceShared.hpp"
 #include "memory/universe.hpp"
+#include "oops/constantPool.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/wbtestmethods/parserTests.hpp"
 #include "prims/whitebox.hpp"
@@ -643,12 +644,12 @@
   return (mh->queued_for_compilation() || nm != NULL);
 WB_END
 
-WB_ENTRY(jboolean, WB_ShouldPrintAssembly(JNIEnv* env, jobject o, jobject method))
+WB_ENTRY(jboolean, WB_ShouldPrintAssembly(JNIEnv* env, jobject o, jobject method, jint comp_level))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
   CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
 
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
-  DirectiveSet* directive = DirectivesStack::getMatchingDirective(mh, CompileBroker::compiler(CompLevel_simple));
+  DirectiveSet* directive = DirectivesStack::getMatchingDirective(mh, CompileBroker::compiler(comp_level));
   bool result = directive->PrintAssemblyOption;
   DirectivesStack::release(directive);
 
@@ -1305,6 +1306,38 @@
   return (jlong) ikh->constants();
 WB_END
 
+WB_ENTRY(jint, WB_GetConstantPoolCacheIndexTag(JNIEnv* env, jobject wb))
+  return ConstantPool::CPCACHE_INDEX_TAG;
+WB_END
+
+WB_ENTRY(jint, WB_GetConstantPoolCacheLength(JNIEnv* env, jobject wb, jclass klass))
+  instanceKlassHandle ikh(java_lang_Class::as_Klass(JNIHandles::resolve(klass)));
+  ConstantPool* cp = ikh->constants();
+  if (cp->cache() == NULL) {
+      return -1;
+  }
+  return cp->cache()->length();
+WB_END
+
+WB_ENTRY(jint, WB_ConstantPoolRemapInstructionOperandFromCache(JNIEnv* env, jobject wb, jclass klass, jint index))
+  instanceKlassHandle ikh(java_lang_Class::as_Klass(JNIHandles::resolve(klass)));
+  ConstantPool* cp = ikh->constants();
+  if (cp->cache() == NULL) {
+    THROW_MSG_0(vmSymbols::java_lang_IllegalStateException(), "Constant pool does not have a cache");
+  }
+  jint cpci = index;
+  jint cpciTag = ConstantPool::CPCACHE_INDEX_TAG;
+  if (cpciTag > cpci || cpci >= cp->cache()->length() + cpciTag) {
+    THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "Constant pool cache index is out of range");
+  }
+  jint cpi = cp->remap_instruction_operand_from_cache(cpci);
+  return cpi;
+WB_END
+
+WB_ENTRY(jint, WB_ConstantPoolEncodeIndyIndex(JNIEnv* env, jobject wb, jint index))
+  return ConstantPool::encode_invokedynamic_index(index);
+WB_END
+
 WB_ENTRY(void, WB_ClearInlineCaches(JNIEnv* env, jobject wb))
   VM_ClearICs clear_ics;
   VMThread::execute(&clear_ics);
@@ -1523,8 +1556,8 @@
 #endif // INCLUDE_NMT
   {CC"deoptimizeFrames",   CC"(Z)I",                  (void*)&WB_DeoptimizeFrames  },
   {CC"deoptimizeAll",      CC"()V",                   (void*)&WB_DeoptimizeAll     },
-    {CC"deoptimizeMethod0",   CC"(Ljava/lang/reflect/Executable;Z)I",
-                                                        (void*)&WB_DeoptimizeMethod  },
+  {CC"deoptimizeMethod0",   CC"(Ljava/lang/reflect/Executable;Z)I",
+                                                      (void*)&WB_DeoptimizeMethod  },
   {CC"isMethodCompiled0",   CC"(Ljava/lang/reflect/Executable;Z)Z",
                                                       (void*)&WB_IsMethodCompiled  },
   {CC"isMethodCompilable0", CC"(Ljava/lang/reflect/Executable;IZ)Z",
@@ -1559,7 +1592,7 @@
       CC"(Ljava/lang/reflect/Executable;Ljava/lang/String;)I",
                                                       (void*)&WB_MatchesInline},
   {CC"shouldPrintAssembly",
-        CC"(Ljava/lang/reflect/Executable;)Z",
+        CC"(Ljava/lang/reflect/Executable;I)Z",
                                                         (void*)&WB_ShouldPrintAssembly},
 
   {CC"isConstantVMFlag",   CC"(Ljava/lang/String;)Z", (void*)&WB_IsConstantVMFlag},
@@ -1620,6 +1653,12 @@
   {CC"isMonitorInflated0", CC"(Ljava/lang/Object;)Z", (void*)&WB_IsMonitorInflated  },
   {CC"forceSafepoint",     CC"()V",                   (void*)&WB_ForceSafepoint     },
   {CC"getConstantPool0",   CC"(Ljava/lang/Class;)J",  (void*)&WB_GetConstantPool    },
+  {CC"getConstantPoolCacheIndexTag0", CC"()I",  (void*)&WB_GetConstantPoolCacheIndexTag},
+  {CC"getConstantPoolCacheLength0", CC"(Ljava/lang/Class;)I",  (void*)&WB_GetConstantPoolCacheLength},
+  {CC"remapInstructionOperandFromCPCache0",
+      CC"(Ljava/lang/Class;I)I",                      (void*)&WB_ConstantPoolRemapInstructionOperandFromCache},
+  {CC"encodeConstantPoolIndyIndex0",
+      CC"(I)I",                      (void*)&WB_ConstantPoolEncodeIndyIndex},
   {CC"getMethodBooleanOption",
       CC"(Ljava/lang/reflect/Executable;Ljava/lang/String;)Ljava/lang/Boolean;",
                                                       (void*)&WB_GetMethodBooleaneOption},
--- a/src/share/vm/runtime/arguments.cpp	Wed Mar 09 14:54:18 2016 +0100
+++ b/src/share/vm/runtime/arguments.cpp	Wed Mar 09 14:18:12 2016 +0100
@@ -2318,6 +2318,17 @@
 //===========================================================================================================
 // Parsing of main arguments
 
+#if INCLUDE_JVMCI
+// Check consistency of jvmci vm argument settings.
+bool Arguments::check_jvmci_args_consistency() {
+  if (!EnableJVMCI && !JVMCIGlobals::check_jvmci_flags_are_consistent()) {
+    JVMCIGlobals::print_jvmci_args_inconsistency_error_message();
+    return false;
+  }
+  return true;
+}
+#endif //INCLUDE_JVMCI
+
 // Check consistency of GC selection