changeset 2621:de147f62e695

Merge
author kvn
date Fri, 19 Aug 2011 08:55:53 -0700
parents 00ed4ccfe642 739a9abbbd4b
children 9f12ede5571a
files agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeFastAAccess0.java agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeFastIAccess0.java
diffstat 145 files changed, 101948 insertions(+), 3269 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecode.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecode.java	Fri Aug 19 08:55:53 2011 -0700
@@ -26,6 +26,7 @@
 
 import sun.jvm.hotspot.oops.*;
 import sun.jvm.hotspot.utilities.*;
+import sun.jvm.hotspot.runtime.VM;
 
 public class Bytecode {
   Method method;
@@ -45,6 +46,23 @@
     return Bits.roundTo(bci + offset, jintSize) - bci;
   }
 
+  public int     getIndexU1()               { return method.getBytecodeOrBPAt(bci() + 1) & 0xFF; }
+  public int     getIndexU2(int bc, boolean isWide) {
+    if (can_use_native_byte_order(bc, isWide)) {
+      return method.getNativeShortArg(bci() + (isWide ? 2 : 1)) & 0xFFFF;
+    }
+    return method.getBytecodeShortArg(bci() + (isWide ? 2 : 1)) & 0xFFFF;
+  }
+  public int     getIndexU4()               { return method.getNativeIntArg(bci() + 1); }
+  public boolean hasIndexU4()               { return code() == Bytecodes._invokedynamic; }
+
+  public int     getIndexU1Cpcache()        { return method.getBytecodeOrBPAt(bci() + 1) & 0xFF; }
+  public int     getIndexU2Cpcache()        { return method.getNativeShortArg(bci() + 1) & 0xFFFF; }
+
+  static boolean can_use_native_byte_order(int bc, boolean is_wide) {
+    return (VM.getVM().isBigEndian() || Bytecodes.native_byte_order(bc /*, is_wide*/));
+  }
+
   int javaSignedWordAt(int offset) {
     return method.getBytecodeIntArg(bci + offset);
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeFastAAccess0.java	Wed Aug 17 07:05:42 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.interpreter;
-
-import sun.jvm.hotspot.oops.*;
-import sun.jvm.hotspot.utilities.*;
-
-public class BytecodeFastAAccess0 extends BytecodeGetPut {
-  BytecodeFastAAccess0(Method method, int bci) {
-    super(method, bci);
-  }
-
-  public int index() {
-    return (int) (0xFF & javaShortAt(2));
-  }
-
-  public boolean isStatic() {
-    return false;
-  }
-
-  public void verify() {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(isValid(), "check fast_aaccess_0");
-    }
-  }
-
-  public boolean isValid() {
-    return code() == Bytecodes._fast_aaccess_0;
-  }
-
-  public static BytecodeFastAAccess0 at(Method method, int bci) {
-    BytecodeFastAAccess0 b = new BytecodeFastAAccess0(method, bci);
-    if (Assert.ASSERTS_ENABLED) {
-      b.verify();
-    }
-    return b;
-  }
-
-  /** Like at, but returns null if the BCI is not at fast_aaccess_0  */
-  public static BytecodeFastAAccess0 atCheck(Method method, int bci) {
-    BytecodeFastAAccess0 b = new BytecodeFastAAccess0(method, bci);
-    return (b.isValid() ? b : null);
-  }
-
-  public static BytecodeFastAAccess0 at(BytecodeStream bcs) {
-    return new BytecodeFastAAccess0(bcs.method(), bcs.bci());
-  }
-
-  public String toString() {
-    StringBuffer buf = new StringBuffer();
-    buf.append("aload_0");
-    buf.append(spaces);
-    buf.append(super.toString());
-    return buf.toString();
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeFastIAccess0.java	Wed Aug 17 07:05:42 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.interpreter;
-
-import sun.jvm.hotspot.oops.*;
-import sun.jvm.hotspot.utilities.*;
-
-public class BytecodeFastIAccess0 extends BytecodeGetPut {
-  BytecodeFastIAccess0(Method method, int bci) {
-    super(method, bci);
-  }
-
-  public int index() {
-    return (int) (0xFF & javaShortAt(2));
-  }
-
-  public boolean isStatic() {
-    return false;
-  }
-
-  public void verify() {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(isValid(), "check fast_iaccess_0");
-    }
-  }
-
-  public boolean isValid() {
-    return code() == Bytecodes._fast_iaccess_0;
-  }
-
-  public static BytecodeFastIAccess0 at(Method method, int bci) {
-    BytecodeFastIAccess0 b = new BytecodeFastIAccess0(method, bci);
-    if (Assert.ASSERTS_ENABLED) {
-      b.verify();
-    }
-    return b;
-  }
-
-  /** Like at, but returns null if the BCI is not at fast_iaccess_0  */
-  public static BytecodeFastIAccess0 atCheck(Method method, int bci) {
-    BytecodeFastIAccess0 b = new BytecodeFastIAccess0(method, bci);
-    return (b.isValid() ? b : null);
-  }
-
-  public static BytecodeFastIAccess0 at(BytecodeStream bcs) {
-    return new BytecodeFastIAccess0(bcs.method(), bcs.bci());
-  }
-
-  public String toString() {
-    StringBuffer buf = new StringBuffer();
-    buf.append("aload_0");
-    buf.append(spaces);
-    buf.append(super.toString());
-    return buf.toString();
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeLoadConstant.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeLoadConstant.java	Fri Aug 19 08:55:53 2011 -0700
@@ -28,29 +28,25 @@
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.utilities.*;
 
-public class BytecodeLoadConstant extends BytecodeWithCPIndex {
+public class BytecodeLoadConstant extends Bytecode {
   BytecodeLoadConstant(Method method, int bci) {
     super(method, bci);
   }
 
   public boolean hasCacheIndex() {
     // normal ldc uses CP index, but fast_aldc uses swapped CP cache index
-    return javaCode() != code();
+    return code() >= Bytecodes.number_of_java_codes;
   }
 
-  public int index() {
-    int i = javaCode() == Bytecodes._ldc ?
-                 (int) (0xFF & javaByteAt(1))
-               : (int) (0xFFFF & javaShortAt(1));
-    if (hasCacheIndex()) {
-      return (0xFFFF & VM.getVM().getBytes().swapShort((short) i));
-    } else {
-      return i;
-    }
+  int rawIndex() {
+    if (javaCode() == Bytecodes._ldc)
+      return getIndexU1();
+    else
+      return getIndexU2(code(), false);
   }
 
   public int poolIndex() {
-    int i = index();
+    int i = rawIndex();
     if (hasCacheIndex()) {
       ConstantPoolCache cpCache = method().getConstants().getCache();
       return cpCache.getEntryAt(i).getConstantPoolIndex();
@@ -61,12 +57,18 @@
 
   public int cacheIndex() {
     if (hasCacheIndex()) {
-      return index();
+      return rawIndex();
     } else {
       return -1;  // no cache index
     }
   }
 
+  public BasicType resultType() {
+    int index = poolIndex();
+    ConstantTag tag = method().getConstants().getTagAt(index);
+    return tag.basicType();
+  }
+
   private Oop getCachedConstant() {
     int i = cacheIndex();
     if (i >= 0) {
@@ -88,7 +90,7 @@
            jcode == Bytecodes._ldc2_w;
     if (! codeOk) return false;
 
-    ConstantTag ctag = method().getConstants().getTagAt(index());
+    ConstantTag ctag = method().getConstants().getTagAt(rawIndex());
     if (jcode == Bytecodes._ldc2_w) {
        // has to be double or long
        return (ctag.isDouble() || ctag.isLong()) ? true: false;
@@ -107,7 +109,7 @@
        return false;
     }
 
-    ConstantTag ctag = method().getConstants().getTagAt(index());
+    ConstantTag ctag = method().getConstants().getTagAt(poolIndex());
     return ctag.isKlass() || ctag.isUnresolvedKlass();
   }
 
@@ -120,7 +122,7 @@
     // We just look at the object at the corresponding index and
     // decide based on the oop type.
     ConstantPool cpool = method().getConstants();
-    int cpIndex = index();
+    int cpIndex = poolIndex();
     ConstantPool.CPSlot oop = cpool.getSlotAt(cpIndex);
     if (oop.isOop()) {
       return (Klass) oop.getOop();
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeStream.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeStream.java	Fri Aug 19 08:55:53 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -130,7 +130,13 @@
   public int     getIndex()           { return (isWide())
                                           ? (_method.getBytecodeShortArg(bci() + 2) & 0xFFFF)
                                           : (_method.getBytecodeOrBPAt(bci() + 1) & 0xFF); }
-  public int     getIndexBig()        { return _method.getBytecodeShortArg(bci() + 1); }
+  public int     getIndexU1()         { return _method.getBytecodeOrBPAt(bci() + 1) & 0xFF; }
+  public int     getIndexU2()         { return _method.getBytecodeShortArg(bci() + 1) & 0xFFFF; }
+  public int     getIndexU4()         { return _method.getNativeIntArg(bci() + 1); }
+  public boolean hasIndexU4()         { return code() == Bytecodes._invokedynamic; }
+
+  public int     getIndexU1Cpcache()         { return _method.getBytecodeOrBPAt(bci() + 1) & 0xFF; }
+  public int     getIndexU2Cpcache()         { return _method.getNativeShortArg(bci() + 1) & 0xFFFF; }
 
   // Fetch at absolute BCI (for manual parsing of certain bytecodes)
   public int     codeAt(int bci) {
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeWideable.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeWideable.java	Fri Aug 19 08:55:53 2011 -0700
@@ -38,7 +38,6 @@
 
   // the local variable index
   public int getLocalVarIndex() {
-    return (isWide()) ? (int) (0xFFFF & javaShortAt(1))
-            : (int) (0xFF & javaByteAt(1));
+    return (isWide()) ? getIndexU2(code(), true) : getIndexU1();
   }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeWithCPIndex.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/BytecodeWithCPIndex.java	Fri Aug 19 08:55:53 2011 -0700
@@ -35,7 +35,7 @@
   }
 
   // the constant pool index for this bytecode
-  public int index() { return 0xFFFF & javaShortAt(1); }
+  public int index() { return getIndexU2(code(), false); }
 
   public int getSecondaryIndex() {
      throw new IllegalArgumentException("must be invokedynamic");
--- a/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/interpreter/Bytecodes.java	Fri Aug 19 08:55:53 2011 -0700
@@ -276,6 +276,34 @@
 
   public static final int number_of_codes       = 233;
 
+  // Flag bits derived from format strings, can_trap, can_rewrite, etc.:
+  // semantic flags:
+  static final int  _bc_can_trap      = 1<<0;     // bytecode execution can trap or block
+  static final int  _bc_can_rewrite   = 1<<1;     // bytecode execution has an alternate form
+
+  // format bits (determined only by the format string):
+  static final int  _fmt_has_c        = 1<<2;     // constant, such as sipush "bcc"
+  static final int  _fmt_has_j        = 1<<3;     // constant pool cache index, such as getfield "bjj"
+  static final int  _fmt_has_k        = 1<<4;     // constant pool index, such as ldc "bk"
+  static final int  _fmt_has_i        = 1<<5;     // local index, such as iload
+  static final int  _fmt_has_o        = 1<<6;     // offset, such as ifeq
+  static final int  _fmt_has_nbo      = 1<<7;     // contains native-order field(s)
+  static final int  _fmt_has_u2       = 1<<8;     // contains double-byte field(s)
+  static final int  _fmt_has_u4       = 1<<9;     // contains quad-byte field
+  static final int  _fmt_not_variable = 1<<10;    // not of variable length (simple or wide)
+  static final int  _fmt_not_simple   = 1<<11;    // either wide or variable length
+  static final int  _all_fmt_bits     = (_fmt_not_simple*2 - _fmt_has_c);
+
+  // Example derived format syndromes:
+  static final int  _fmt_b      = _fmt_not_variable;
+  static final int  _fmt_bc     = _fmt_b | _fmt_has_c;
+  static final int  _fmt_bi     = _fmt_b | _fmt_has_i;
+  static final int  _fmt_bkk    = _fmt_b | _fmt_has_k | _fmt_has_u2;
+  static final int  _fmt_bJJ    = _fmt_b | _fmt_has_j | _fmt_has_u2 | _fmt_has_nbo;
+  static final int  _fmt_bo2    = _fmt_b | _fmt_has_o | _fmt_has_u2;
+  static final int  _fmt_bo4    = _fmt_b | _fmt_has_o | _fmt_has_u4;
+
+
   public static int specialLengthAt(Method method, int bci) {
     int code = codeAt(method, bci);
     switch (code) {
@@ -337,18 +365,20 @@
   //   static Code       non_breakpoint_code_at(address bcp, methodOop method = null);
 
   // Bytecode attributes
-  public static boolean   isDefined    (int code) { return 0 <= code && code < number_of_codes && _format[code] != null; }
-  public static boolean   wideIsDefined(int code) { return isDefined(code) && _wide_format[code] != null; }
+  public static boolean   isDefined    (int code) { return 0 <= code && code < number_of_codes && flags(code, false) != 0; }
+  public static boolean   wideIsDefined(int code) { return isDefined(code) && flags(code, true) != 0; }
   public static String    name         (int code) { check(code);      return _name          [code]; }
   public static String    format       (int code) { check(code);      return _format        [code]; }
   public static String    wideFormat   (int code) { wideCheck(code);  return _wide_format   [code]; }
   public static int       resultType   (int code) { check(code);      return _result_type   [code]; }
   public static int       depth        (int code) { check(code);      return _depth         [code]; }
-  public static int       lengthFor    (int code) { check(code);      return _length        [code]; }
-  public static boolean   canTrap      (int code) { check(code);      return _can_trap      [code]; }
+  public static int       lengthFor    (int code) { check(code);      return _lengths       [code] & 0xF; }
+  public static int       wideLengthFor(int code) { check(code);      return _lengths       [code] >> 4; }
+  public static boolean   canTrap      (int code) { check(code);      return has_all_flags(code, _bc_can_trap, false); }
   public static int       javaCode     (int code) { check(code);      return _java_code     [code]; }
-  public static boolean   canRewrite   (int code) { check(code);      return _can_rewrite   [code]; }
-  public static int       wideLengthFor(int code) { wideCheck(code);  return wideFormat(code).length(); }
+  public static boolean   canRewrite   (int code) { check(code);      return has_all_flags(code, _bc_can_rewrite, false); }
+  public static boolean   native_byte_order(int code)  { check(code);      return has_all_flags(code, _fmt_has_nbo, false); }
+  public static boolean   uses_cp_cache  (int code)    { check(code);      return has_all_flags(code, _fmt_has_j, false); }
   public static int       lengthAt     (Method method, int bci) { int l = lengthFor(codeAt(method, bci)); return l > 0 ? l : specialLengthAt(method, bci); }
   public static int       javaLengthAt (Method method, int bci) { int l = lengthFor(javaCode(codeAt(method, bci))); return l > 0 ? l : specialLengthAt(method, bci); }
   public static boolean   isJavaCode   (int code) { return 0 <= code && code < number_of_java_codes; }
@@ -362,6 +392,92 @@
   public static boolean   isZeroConst  (int code) { return (code == _aconst_null || code == _iconst_0
                                                                                  || code == _fconst_0 || code == _dconst_0); }
 
+  static int         flags          (int code, boolean is_wide) {
+    assert code == (code & 0xff) : "must be a byte";
+    return _flags[code + (is_wide ? 256 : 0)];
+  }
+  static int         format_bits    (int code, boolean is_wide) { return flags(code, is_wide) & _all_fmt_bits; }
+  static boolean     has_all_flags  (int code, int test_flags, boolean is_wide) {
+    return (flags(code, is_wide) & test_flags) == test_flags;
+  }
+
+  static char compute_flags(String format) {
+    return compute_flags(format, 0);
+  }
+  static char compute_flags(String format, int more_flags) {
+    if (format == null)  return 0;  // not even more_flags
+    int flags = more_flags;
+    int fp = 0;
+    if (format.length() == 0) {
+      flags |= _fmt_not_simple; // but variable
+    } else {
+      switch (format.charAt(fp)) {
+      case 'b':
+        flags |= _fmt_not_variable;  // but simple
+        ++fp;  // skip 'b'
+        break;
+      case 'w':
+        flags |= _fmt_not_variable | _fmt_not_simple;
+        ++fp;  // skip 'w'
+      assert(format.charAt(fp) == 'b') : "wide format must start with 'wb'";
+        ++fp;  // skip 'b'
+        break;
+      }
+    }
+
+    boolean has_nbo = false, has_jbo = false;
+    int has_size = 0;
+    while (fp < format.length()) {
+      int this_flag = 0;
+      char fc = format.charAt(fp++);
+      switch (fc) {
+      case '_': continue;         // ignore these
+
+      case 'j': this_flag = _fmt_has_j; has_jbo = true; break;
+      case 'k': this_flag = _fmt_has_k; has_jbo = true; break;
+      case 'i': this_flag = _fmt_has_i; has_jbo = true; break;
+      case 'c': this_flag = _fmt_has_c; has_jbo = true; break;
+      case 'o': this_flag = _fmt_has_o; has_jbo = true; break;
+
+        // uppercase versions mark native byte order (from Rewriter)
+        // actually, only the 'J' case happens currently
+      case 'J': this_flag = _fmt_has_j; has_nbo = true; break;
+      case 'K': this_flag = _fmt_has_k; has_nbo = true; break;
+      case 'I': this_flag = _fmt_has_i; has_nbo = true; break;
+      case 'C': this_flag = _fmt_has_c; has_nbo = true; break;
+      case 'O': this_flag = _fmt_has_o; has_nbo = true; break;
+      default:  assert false : "bad char in format";
+      }
+
+      flags |= this_flag;
+
+      assert !(has_jbo && has_nbo) : "mixed byte orders in format";
+      if (has_nbo)
+        flags |= _fmt_has_nbo;
+
+      int this_size = 1;
+      if (fp < format.length() && format.charAt(fp) == fc) {
+        // advance beyond run of the same characters
+        this_size = 2;
+        while (fp  + 1 < format.length() && format.charAt(++fp) == fc)  this_size++;
+        switch (this_size) {
+        case 2: flags |= _fmt_has_u2; break;
+        case 4: flags |= _fmt_has_u4; break;
+        default: assert false : "bad rep count in format";
+        }
+      }
+      assert has_size == 0 ||                     // no field yet
+        this_size == has_size ||             // same size
+        this_size < has_size && fp == format.length() : // last field can be short
+             "mixed field sizes in format";
+      has_size = this_size;
+    }
+
+    assert flags == (char)flags : "change _format_flags";
+    return (char)flags;
+  }
+
+
   //----------------------------------------------------------------------
   // Internals only below this point
   //
@@ -371,10 +487,9 @@
   private static String[]    _wide_format;
   private static int[]       _result_type;
   private static byte[]      _depth;
-  private static byte[]      _length;
-  private static boolean[]   _can_trap;
+  private static byte[]      _lengths;
   private static int[]       _java_code;
-  private static boolean[]   _can_rewrite;
+  private static char[]      _flags;
 
   static {
     _name           = new String [number_of_codes];
@@ -382,10 +497,9 @@
     _wide_format    = new String [number_of_codes];
     _result_type    = new int    [number_of_codes]; // See BasicType.java
     _depth          = new byte   [number_of_codes];
-    _length         = new byte   [number_of_codes];
-    _can_trap       = new boolean[number_of_codes];
+    _lengths        = new byte   [number_of_codes];
     _java_code      = new int    [number_of_codes];
-    _can_rewrite    = new boolean[number_of_codes];
+    _flags          = new char[256 * 2]; // all second page for wide formats
 
     // In case we want to fetch this information from the VM in the
     // future
@@ -712,18 +826,19 @@
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(wide_format == null || format != null, "short form must exist if there's a wide form");
     }
+    int len  = (format      != null ? format.length()      : 0);
+    int wlen = (wide_format != null ? wide_format.length() : 0);
     _name          [code] = name;
+    _result_type   [code] = result_type;
+    _depth         [code] = (byte) depth;
+    _lengths       [code] = (byte)((wlen << 4) | (len & 0xF));
+    _java_code     [code] = java_code;
     _format        [code] = format;
     _wide_format   [code] = wide_format;
-    _result_type   [code] = result_type;
-    _depth         [code] = (byte) depth;
-    _can_trap      [code] = can_trap;
-    _length        [code] = (byte) (format != null ? format.length() : 0);
-    _java_code     [code] = java_code;
-    if (java_code != code) {
-      _can_rewrite[java_code] = true;
-    } else {
-      _can_rewrite[java_code] = false;
-    }
+    int bc_flags = 0;
+    if (can_trap)           bc_flags |= _bc_can_trap;
+    if (java_code != code)  bc_flags |= _bc_can_rewrite;
+    _flags[code+0*256] = compute_flags(format,      bc_flags);
+    _flags[code+1*256] = compute_flags(wide_format, bc_flags);
   }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstMethod.java	Fri Aug 19 08:55:53 2011 -0700
@@ -164,6 +164,18 @@
     return (short) ((hi << 8) | lo);
   }
 
+  /** Fetches a 16-bit native ordered value from the
+      bytecode stream */
+  public short getNativeShortArg(int bci) {
+    int hi = getBytecodeOrBPAt(bci);
+    int lo = getBytecodeOrBPAt(bci + 1);
+    if (VM.getVM().isBigEndian()) {
+        return (short) ((hi << 8) | lo);
+    } else {
+        return (short) ((lo << 8) | hi);
+    }
+  }
+
   /** Fetches a 32-bit big-endian ("Java ordered") value from the
       bytecode stream */
   public int getBytecodeIntArg(int bci) {
@@ -175,6 +187,21 @@
     return (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
   }
 
+  /** Fetches a 32-bit native ordered value from the
+      bytecode stream */
+  public int getNativeIntArg(int bci) {
+    int b4 = getBytecodeOrBPAt(bci);
+    int b3 = getBytecodeOrBPAt(bci + 1);
+    int b2 = getBytecodeOrBPAt(bci + 2);
+    int b1 = getBytecodeOrBPAt(bci + 3);
+
+    if (VM.getVM().isBigEndian()) {
+        return (b4 << 24) | (b3 << 16) | (b2 << 8) | b1;
+    } else {
+        return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
+    }
+  }
+
   public byte[] getByteCode() {
      byte[] bc = new byte[ (int) getCodeSize() ];
      for( int i=0; i < bc.length; i++ )
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Fri Aug 19 08:55:53 2011 -0700
@@ -212,13 +212,60 @@
   }
 
   public Symbol getNameRefAt(int which) {
-    int nameIndex = getNameAndTypeAt(getNameAndTypeRefIndexAt(which))[0];
-    return getSymbolAt(nameIndex);
+    return implGetNameRefAt(which, false);
+  }
+
+  private Symbol implGetNameRefAt(int which, boolean uncached) {
+    int signatureIndex = getNameRefIndexAt(implNameAndTypeRefIndexAt(which, uncached));
+    return getSymbolAt(signatureIndex);
   }
 
   public Symbol getSignatureRefAt(int which) {
-    int sigIndex = getNameAndTypeAt(getNameAndTypeRefIndexAt(which))[1];
-    return getSymbolAt(sigIndex);
+    return implGetSignatureRefAt(which, false);
+  }
+
+  private Symbol implGetSignatureRefAt(int which, boolean uncached) {
+    int signatureIndex = getSignatureRefIndexAt(implNameAndTypeRefIndexAt(which, uncached));
+    return getSymbolAt(signatureIndex);
+  }
+
+
+  private int implNameAndTypeRefIndexAt(int which, boolean uncached) {
+    int i = which;
+    if (!uncached && getCache() != null) {
+      if (ConstantPoolCache.isSecondaryIndex(which)) {
+        // Invokedynamic index.
+        int pool_index = getCache().getMainEntryAt(which).getConstantPoolIndex();
+        pool_index = invokeDynamicNameAndTypeRefIndexAt(pool_index);
+        // assert(tagAt(pool_index).isNameAndType(), "");
+        return pool_index;
+      }
+      // change byte-ordering and go via cache
+      i = remapInstructionOperandFromCache(which);
+    } else {
+      if (getTagAt(which).isInvokeDynamic()) {
+        int pool_index = invokeDynamicNameAndTypeRefIndexAt(which);
+        // assert(tag_at(pool_index).is_name_and_type(), "");
+        return pool_index;
+      }
+    }
+    // assert(tag_at(i).is_field_or_method(), "Corrupted constant pool");
+    // assert(!tag_at(i).is_invoke_dynamic(), "Must be handled above");
+    int ref_index = getIntAt(i);
+    return extractHighShortFromInt(ref_index);
+  }
+
+  private int remapInstructionOperandFromCache(int operand) {
+    int cpc_index = operand;
+    // DEBUG_ONLY(cpc_index -= CPCACHE_INDEX_TAG);
+    // assert((int)(u2)cpc_index == cpc_index, "clean u2");
+    int member_index = getCache().getEntryAt(cpc_index).getConstantPoolIndex();
+    return member_index;
+  }
+
+  int invokeDynamicNameAndTypeRefIndexAt(int which) {
+    // assert(tag_at(which).is_invoke_dynamic(), "Corrupted constant pool");
+    return extractHighShortFromInt(getIntAt(which));
   }
 
   // returns null, if not resolved.
@@ -253,15 +300,7 @@
   }
 
   public int getNameAndTypeRefIndexAt(int index) {
-    int refIndex = getFieldOrMethodAt(index);
-    if (DEBUG) {
-      System.err.println("ConstantPool.getNameAndTypeRefIndexAt(" + index + "): refIndex = " + refIndex);
-    }
-    int i = extractHighShortFromInt(refIndex);
-    if (DEBUG) {
-      System.err.println("ConstantPool.getNameAndTypeRefIndexAt(" + index + "): result = " + i);
-    }
-    return i;
+    return implNameAndTypeRefIndexAt(index, false);
   }
 
   /** Lookup for entries consisting of (name_index, signature_index) */
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPoolCache.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPoolCache.java	Fri Aug 19 08:55:53 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -72,9 +72,7 @@
   }
 
   public ConstantPoolCacheEntry getEntryAt(int i) {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(0 <= i && i < getLength(), "index out of bounds");
-    }
+    if (i < 0 || i >= getLength()) throw new IndexOutOfBoundsException(i + " " + getLength());
     return new ConstantPoolCacheEntry(this, i);
   }
 
@@ -84,21 +82,27 @@
 
   // secondary entries hold invokedynamic call site bindings
   public ConstantPoolCacheEntry getSecondaryEntryAt(int i) {
-    ConstantPoolCacheEntry e = new ConstantPoolCacheEntry(this, decodeSecondaryIndex(i));
+    int rawIndex = i;
+    if (isSecondaryIndex(i)) {
+      rawIndex = decodeSecondaryIndex(i);
+    }
+    ConstantPoolCacheEntry e = getEntryAt(rawIndex);
     if (Assert.ASSERTS_ENABLED) {
-      Assert.that(e.isSecondaryEntry(), "must be a secondary entry");
+      Assert.that(e.isSecondaryEntry(), "must be a secondary entry:" + rawIndex);
     }
     return e;
   }
 
   public ConstantPoolCacheEntry getMainEntryAt(int i) {
+    int primaryIndex = i;
     if (isSecondaryIndex(i)) {
       // run through an extra level of indirection:
-      i = getSecondaryEntryAt(i).getMainEntryIndex();
+      int rawIndex = decodeSecondaryIndex(i);
+      primaryIndex = getEntryAt(rawIndex).getMainEntryIndex();
     }
-    ConstantPoolCacheEntry e = new ConstantPoolCacheEntry(this, i);
+    ConstantPoolCacheEntry e = getEntryAt(primaryIndex);
     if (Assert.ASSERTS_ENABLED) {
-      Assert.that(!e.isSecondaryEntry(), "must not be a secondary entry");
+      Assert.that(!e.isSecondaryEntry(), "must not be a secondary entry:" + primaryIndex);
     }
     return e;
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/GenerateOopMap.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/GenerateOopMap.java	Fri Aug 19 08:55:53 2011 -0700
@@ -569,10 +569,10 @@
       case Bytecodes._invokedynamic:
         // FIXME: print signature of referenced method (need more
         // accessors in ConstantPool and ConstantPoolCache)
-        int idx = currentBC.getIndexBig();
+        int idx = currentBC.hasIndexU4() ? currentBC.getIndexU4() : currentBC.getIndexU2();
         tty.print(" idx " + idx);
         /*
-          int idx = currentBC.getIndexBig();
+          int idx = currentBC.getIndexU2();
           ConstantPool cp       = method().getConstants();
           int nameAndTypeIdx    = cp.name_and_type_ref_index_at(idx);
           int signatureIdx      = cp.signature_ref_index_at(nameAndTypeIdx);
@@ -609,10 +609,10 @@
       case Bytecodes._invokedynamic:
         // FIXME: print signature of referenced method (need more
         // accessors in ConstantPool and ConstantPoolCache)
-        int idx = currentBC.getIndexBig();
+        int idx = currentBC.hasIndexU4() ? currentBC.getIndexU4() : currentBC.getIndexU2();
         tty.print(" idx " + idx);
         /*
-          int idx = currentBC.getIndexBig();
+          int idx = currentBC.getIndexU2();
           constantPoolOop cp    = method().constants();
           int nameAndTypeIdx    = cp.name_and_type_ref_index_at(idx);
           int signatureIdx      = cp.signature_ref_index_at(nameAndTypeIdx);
@@ -1118,7 +1118,8 @@
       current instruction, starting in the current state. */
   void  interp1                             (BytecodeStream itr) {
     if (DEBUG) {
-      System.err.println(" - bci " + itr.bci());
+      System.err.println(" - bci " + itr.bci() + " " + itr.code());
+      printCurrentState(System.err, itr, false);
     }
 
     //    if (TraceNewOopMapGeneration) {
@@ -1179,8 +1180,8 @@
 
     case Bytecodes._ldc2_w:            ppush(vvCTS);               break;
 
-    case Bytecodes._ldc:               doLdc(itr.getIndex(), itr.bci());    break;
-    case Bytecodes._ldc_w:             doLdc(itr.getIndexBig(), itr.bci());break;
+    case Bytecodes._ldc:               doLdc(itr.bci());           break;
+    case Bytecodes._ldc_w:             doLdc(itr.bci());           break;
 
     case Bytecodes._iload:
     case Bytecodes._fload:             ppload(vCTS, itr.getIndex()); break;
@@ -1372,18 +1373,16 @@
     case Bytecodes._jsr:               doJsr(itr.dest());          break;
     case Bytecodes._jsr_w:             doJsr(itr.dest_w());        break;
 
-    case Bytecodes._getstatic:         doField(true,  true,
-                                               itr.getIndexBig(),
-                                               itr.bci()); break;
-    case Bytecodes._putstatic:         doField(false, true,  itr.getIndexBig(), itr.bci()); break;
-    case Bytecodes._getfield:          doField(true,  false, itr.getIndexBig(), itr.bci()); break;
-    case Bytecodes._putfield:          doField(false, false, itr.getIndexBig(), itr.bci()); break;
+    case Bytecodes._getstatic:         doField(true,  true,  itr.getIndexU2Cpcache(), itr.bci()); break;
+    case Bytecodes._putstatic:         doField(false, true,  itr.getIndexU2Cpcache(), itr.bci()); break;
+    case Bytecodes._getfield:          doField(true,  false, itr.getIndexU2Cpcache(), itr.bci()); break;
+    case Bytecodes._putfield:          doField(false, false, itr.getIndexU2Cpcache(), itr.bci()); break;
 
     case Bytecodes._invokevirtual:
-    case Bytecodes._invokespecial:     doMethod(false, false, itr.getIndexBig(), itr.bci()); break;
-    case Bytecodes._invokestatic:      doMethod(true,  false, itr.getIndexBig(), itr.bci()); break;
-    case Bytecodes._invokedynamic:     doMethod(false, true,  itr.getIndexBig(), itr.bci()); break;
-    case Bytecodes._invokeinterface:   doMethod(false, true,  itr.getIndexBig(), itr.bci()); break;
+    case Bytecodes._invokespecial:     doMethod(false, false, itr.getIndexU2Cpcache(), itr.bci()); break;
+    case Bytecodes._invokestatic:      doMethod(true,  false, itr.getIndexU2Cpcache(), itr.bci()); break;
+    case Bytecodes._invokedynamic:     doMethod(true,  false, itr.getIndexU4(),        itr.bci()); break;
+    case Bytecodes._invokeinterface:   doMethod(false,  true, itr.getIndexU2Cpcache(), itr.bci()); break;
     case Bytecodes._newarray:
     case Bytecodes._anewarray:         ppNewRef(vCTS, itr.bci()); break;
     case Bytecodes._checkcast:         doCheckcast(); break;
@@ -1665,13 +1664,11 @@
     }
   }
 
-  void  doLdc                               (int idx, int bci) {
+  void  doLdc                               (int bci) {
+    BytecodeLoadConstant ldc = BytecodeLoadConstant.at(_method, bci);
     ConstantPool  cp  = method().getConstants();
-    ConstantTag   tag = cp.getTagAt(idx);
-    CellTypeState cts = (tag.isString() || tag.isUnresolvedString() ||
-                         tag.isKlass() || tag.isUnresolvedKlass())
-                          ? CellTypeState.makeLineRef(bci)
-                          : valCTS;
+    BasicType     bt = ldc.resultType();
+    CellTypeState cts = (bt == BasicType.T_OBJECT) ? CellTypeState.makeLineRef(bci) : valCTS;
     ppush1(cts);
   }
 
@@ -1729,15 +1726,7 @@
   void  doMethod                            (boolean is_static, boolean is_interface, int idx, int bci) {
     // Dig up signature for field in constant pool
     ConstantPool cp       = _method.getConstants();
-    int nameAndTypeIdx    = cp.getTagAt(idx).isNameAndType() ? idx : cp.getNameAndTypeRefIndexAt(idx);
-    int signatureIdx      = cp.getSignatureRefIndexAt(nameAndTypeIdx);
-    Symbol signature      = cp.getSymbolAt(signatureIdx);
-
-    if (DEBUG) {
-      System.err.println("doMethod: signature = " + signature.asString() + ", idx = " + idx +
-                         ", nameAndTypeIdx = " + nameAndTypeIdx + ", signatureIdx = " + signatureIdx +
-                         ", bci = " + bci);
-    }
+    Symbol signature      = cp.getSignatureRefAt(idx);
 
     // Parse method signature
     CellTypeStateList out = new CellTypeStateList(4);
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/Method.java	Fri Aug 19 08:55:53 2011 -0700
@@ -180,12 +180,24 @@
     return getConstMethod().getBytecodeShortArg(bci);
   }
 
+  /** Fetches a 16-bit native ordered value from the
+      bytecode stream */
+  public short getNativeShortArg(int bci) {
+    return getConstMethod().getNativeShortArg(bci);
+  }
+
   /** Fetches a 32-bit big-endian ("Java ordered") value from the
       bytecode stream */
   public int getBytecodeIntArg(int bci) {
     return getConstMethod().getBytecodeIntArg(bci);
   }
 
+  /** Fetches a 32-bit native ordered value from the
+      bytecode stream */
+  public int getNativeIntArg(int bci) {
+    return getConstMethod().getNativeIntArg(bci);
+  }
+
   public byte[] getByteCode() {
     return getConstMethod().getByteCode();
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/TypeArray.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/TypeArray.java	Fri Aug 19 08:55:53 2011 -0700
@@ -53,6 +53,9 @@
   public boolean isTypeArray()         { return true; }
 
   public byte getByteAt(long index) {
+    if (index < 0 || index >= getLength()) {
+      throw new ArrayIndexOutOfBoundsException(index + " " + getLength());
+    }
     long offset = baseOffsetInBytes(BasicType.T_BYTE) + index * getHeap().getByteSize();
     return getHandle().getJByteAt(offset);
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/ConstantTag.java	Wed Aug 17 07:05:42 2011 -0400
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/ConstantTag.java	Fri Aug 19 08:55:53 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,31 +24,33 @@
 
 package sun.jvm.hotspot.utilities;
 
+import sun.jvm.hotspot.runtime.BasicType;
+
 public class ConstantTag {
   // These replicated from the VM to save space
-  private static int JVM_CONSTANT_Utf8                    = 1;
-  private static int JVM_CONSTANT_Unicode                 = 2; // unused
-  private static int JVM_CONSTANT_Integer                 = 3;
-  private static int JVM_CONSTANT_Float                   = 4;
-  private static int JVM_CONSTANT_Long                    = 5;
-  private static int JVM_CONSTANT_Double                  = 6;
-  private static int JVM_CONSTANT_Class                   = 7;
-  private static int JVM_CONSTANT_String                  = 8;
-  private static int JVM_CONSTANT_Fieldref                = 9;
-  private static int JVM_CONSTANT_Methodref               = 10;
-  private static int JVM_CONSTANT_InterfaceMethodref      = 11;
-  private static int JVM_CONSTANT_NameAndType             = 12;
-  private static int JVM_CONSTANT_MethodHandle            = 15;  // JSR 292
-  private static int JVM_CONSTANT_MethodType              = 16;  // JSR 292
-  //      static int JVM_CONSTANT_(unused)                = 17;  // JSR 292 early drafts only
-  private static int JVM_CONSTANT_InvokeDynamic           = 18;  // JSR 292
-  private static int JVM_CONSTANT_Invalid                 = 0;   // For bad value initialization
-  private static int JVM_CONSTANT_UnresolvedClass         = 100; // Temporary tag until actual use
-  private static int JVM_CONSTANT_ClassIndex              = 101; // Temporary tag while constructing constant pool
-  private static int JVM_CONSTANT_UnresolvedString        = 102; // Temporary tag until actual use
-  private static int JVM_CONSTANT_StringIndex             = 103; // Temporary tag while constructing constant pool
-  private static int JVM_CONSTANT_UnresolvedClassInError  = 104; // Resolution failed
-  private static int JVM_CONSTANT_Object                  = 105; // Required for BoundMethodHandle arguments.
+  private static final int JVM_CONSTANT_Utf8                    = 1;
+  private static final int JVM_CONSTANT_Unicode                 = 2; // unused
+  private static final int JVM_CONSTANT_Integer                 = 3;
+  private static final int JVM_CONSTANT_Float                   = 4;
+  private static final int JVM_CONSTANT_Long                    = 5;
+  private static final int JVM_CONSTANT_Double                  = 6;
+  private static final int JVM_CONSTANT_Class                   = 7;
+  private static final int JVM_CONSTANT_String                  = 8;
+  private static final int JVM_CONSTANT_Fieldref                = 9;
+  private static final int JVM_CONSTANT_Methodref               = 10;
+  private static final int JVM_CONSTANT_InterfaceMethodref      = 11;
+  private static final int JVM_CONSTANT_NameAndType             = 12;
+  private static final int JVM_CONSTANT_MethodHandle            = 15;  // JSR 292
+  private static final int JVM_CONSTANT_MethodType              = 16;  // JSR 292
+  //      static final int JVM_CONSTANT_(unused)                = 17;  // JSR 292 early drafts only
+  private static final int JVM_CONSTANT_InvokeDynamic           = 18;  // JSR 292
+  private static final int JVM_CONSTANT_Invalid                 = 0;   // For bad value initialization
+  private static final int JVM_CONSTANT_UnresolvedClass         = 100; // Temporary tag until actual use
+  private static final int JVM_CONSTANT_ClassIndex              = 101; // Temporary tag while constructing constant pool
+  private static final int JVM_CONSTANT_UnresolvedString        = 102; // Temporary tag until actual use
+  private static final int JVM_CONSTANT_StringIndex             = 103; // Temporary tag while constructing constant pool
+  private static final int JVM_CONSTANT_UnresolvedClassInError  = 104; // Resolution failed
+  private static final int JVM_CONSTANT_Object                  = 105; // Required for BoundMethodHandle arguments.
 
   // JVM_CONSTANT_MethodHandle subtypes //FIXME: connect these to data structure
   private static int JVM_REF_getField                = 1;
@@ -99,4 +101,31 @@
   public boolean isKlassReference()   { return isKlassIndex() || isUnresolvedKlass(); }
   public boolean isFieldOrMethod()    { return isField() || isMethod() || isInterfaceMethod(); }
   public boolean isSymbol()           { return isUtf8(); }
+
+  public BasicType basicType() {
+    switch (tag) {
+    case JVM_CONSTANT_Integer :
+      return BasicType.T_INT;
+    case JVM_CONSTANT_Float :
+      return BasicType.T_FLOAT;
+    case JVM_CONSTANT_Long :
+      return BasicType.T_LONG;
+    case JVM_CONSTANT_Double :
+      return BasicType.T_DOUBLE;
+
+    case JVM_CONSTANT_Class :
+    case JVM_CONSTANT_String :
+    case JVM_CONSTANT_UnresolvedClass :
+    case JVM_CONSTANT_UnresolvedClassInError :
+    case JVM_CONSTANT_ClassIndex :
+    case JVM_CONSTANT_UnresolvedString :
+    case JVM_CONSTANT_StringIndex :
+    case JVM_CONSTANT_MethodHandle :
+    case JVM_CONSTANT_MethodType :
+    case JVM_CONSTANT_Object :
+      return BasicType.T_OBJECT;
+    default:
+      throw new InternalError("unexpected tag: " + tag);
+    }
+  }
 }
--- a/make/linux/makefiles/defs.make	Wed Aug 17 07:05:42 2011 -0400
+++ b/make/linux/makefiles/defs.make	Fri Aug 19 08:55:53 2011 -0700
@@ -124,6 +124,7 @@
 # client and server subdirectories have symbolic links to ../libjsig.so
 EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.so
 EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
 
 ifndef BUILD_CLIENT_ONLY
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
@@ -132,7 +133,6 @@
 
 ifneq ($(ZERO_BUILD), true)
   ifeq ($(ARCH_DATA_MODEL), 32)
-    EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
     EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
     EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.so 
   endif
--- a/make/solaris/makefiles/defs.make	Wed Aug 17 07:05:42 2011 -0400
+++ b/make/solaris/makefiles/defs.make	Fri Aug 19 08:55:53 2011 -0700
@@ -70,6 +70,8 @@
 EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.so
 
 EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
+
 ifneq ($(BUILD_CLIENT_ONLY),true)
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.so
@@ -77,7 +79,6 @@
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.so
 endif
 ifeq ($(ARCH_DATA_MODEL), 32)
-  EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm.so 
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/libjvm_db.so 
--- a/make/solaris/makefiles/saproc.make	Wed Aug 17 07:05:42 2011 -0400
+++ b/make/solaris/makefiles/saproc.make	Fri Aug 19 08:55:53 2011 -0700
@@ -72,9 +72,9 @@
           -e '/^[0-4]\. /b' \
           -e '/^5\.[0-9] /b' \
           -e '/^5\.10 /b' \
-          -e '/ snv_[0-9][0-9]$/b' \
-          -e '/ snv_[01][0-4][0-9]$/b' \
-          -e '/ snv_15[0-8]$/b' \
+          -e '/ snv_[0-9][0-9]$$/b' \
+          -e '/ snv_[01][0-4][0-9]$$/b' \
+          -e '/ snv_15[0-8]$$/b' \
           -e 's/.*/-DSOLARIS_11_B159_OR_LATER/' \
           -e 'p' \
           )
--- a/make/windows/makefiles/defs.make	Wed Aug 17 07:05:42 2011 -0400
+++ b/make/windows/makefiles/defs.make	Fri Aug 19 08:55:53 2011 -0700
@@ -171,19 +171,20 @@
 endif
 
 EXPORT_SERVER_DIR = $(EXPORT_JRE_BIN_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_JRE_BIN_DIR)/client
+EXPORT_KERNEL_DIR = $(EXPORT_JRE_BIN_DIR)/kernel
+
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.dll
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.pdb
 EXPORT_LIST += $(EXPORT_SERVER_DIR)/jvm.map
 EXPORT_LIST += $(EXPORT_LIB_DIR)/jvm.lib
 ifeq ($(ARCH_DATA_MODEL), 32)
-  EXPORT_CLIENT_DIR = $(EXPORT_JRE_BIN_DIR)/client
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/jvm.dll
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/jvm.pdb
   EXPORT_LIST += $(EXPORT_CLIENT_DIR)/jvm.map
   # kernel vm
-  EXPORT_KERNEL_DIR = $(EXPORT_JRE_BIN_DIR)/kernel
   EXPORT_LIST += $(EXPORT_KERNEL_DIR)/Xusage.txt
   EXPORT_LIST += $(EXPORT_KERNEL_DIR)/jvm.dll
   EXPORT_LIST += $(EXPORT_KERNEL_DIR)/jvm.pdb
--- a/make/windows/makefiles/sa.make	Wed Aug 17 07:05:42 2011 -0400
+++ b/make/windows/makefiles/sa.make	Fri Aug 19 08:55:53 2011 -0700
@@ -66,7 +66,7 @@
 	$(QUIETLY) mkdir $(SA_CLASSDIR)\sun\jvm\hotspot\ui\resources
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/ui/resources/*.png $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp -r $(AGENT_SRC_DIR)/images/* $(SA_CLASSDIR)
-	$(RUN_JAR) cf $@ -C saclasses .
+	$(RUN_JAR) cf $@ -C $(SA_CLASSDIR) .
 	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR:/=\) META-INF\services\com.sun.jdi.connect.Connector
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.windbg.WindbgDebuggerLocal
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext 
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -100,12 +100,19 @@
   case call_op:    s = "call"; break;
   case branch_op:
     switch (inv_op2(inst)) {
-      case bpr_op2:    s = "bpr";  break;
       case fb_op2:     s = "fb";   break;
       case fbp_op2:    s = "fbp";  break;
       case br_op2:     s = "br";   break;
       case bp_op2:     s = "bp";   break;
       case cb_op2:     s = "cb";   break;
+      case bpr_op2: {
+        if (is_cbcond(inst)) {
+          s = is_cxb(inst) ? "cxb" : "cwb";
+        } else {
+          s = "bpr";
+        }
+        break;
+      }
       default:         s = "????"; break;
     }
   }
@@ -127,12 +134,21 @@
   case call_op:    m = wdisp(word_aligned_ones, 0, 30);  v = wdisp(dest_pos, inst_pos, 30); break;
   case branch_op:
     switch (inv_op2(inst)) {
-      case bpr_op2:    m = wdisp16(word_aligned_ones, 0);      v = wdisp16(dest_pos, inst_pos);     break;
       case fbp_op2:    m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
       case bp_op2:     m = wdisp(  word_aligned_ones, 0, 19);  v = wdisp(  dest_pos, inst_pos, 19); break;
       case fb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
       case br_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
       case cb_op2:     m = wdisp(  word_aligned_ones, 0, 22);  v = wdisp(  dest_pos, inst_pos, 22); break;
+      case bpr_op2: {
+        if (is_cbcond(inst)) {
+          m = wdisp10(word_aligned_ones, 0);
+          v = wdisp10(dest_pos, inst_pos);
+        } else {
+          m = wdisp16(word_aligned_ones, 0);
+          v = wdisp16(dest_pos, inst_pos);
+        }
+        break;
+      }
       default: ShouldNotReachHere();
     }
   }
@@ -149,12 +165,19 @@
   case call_op:        r = inv_wdisp(inst, pos, 30);  break;
   case branch_op:
     switch (inv_op2(inst)) {
-      case bpr_op2:    r = inv_wdisp16(inst, pos);    break;
       case fbp_op2:    r = inv_wdisp(  inst, pos, 19);  break;
       case bp_op2:     r = inv_wdisp(  inst, pos, 19);  break;
       case fb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
       case br_op2:     r = inv_wdisp(  inst, pos, 22);  break;
       case cb_op2:     r = inv_wdisp(  inst, pos, 22);  break;
+      case bpr_op2: {
+        if (is_cbcond(inst)) {
+          r = inv_wdisp10(inst, pos);
+        } else {
+          r = inv_wdisp16(inst, pos);
+        }
+        break;
+      }
       default: ShouldNotReachHere();
     }
   }
@@ -968,13 +991,7 @@
   Label PcOk;
   save_frame(0);                // to avoid clobbering O0
   ld_ptr(pc_addr, L0);
-  tst(L0);
-#ifdef _LP64
-  brx(Assembler::zero, false, Assembler::pt, PcOk);
-#else
-  br(Assembler::zero, false, Assembler::pt, PcOk);
-#endif // _LP64
-  delayed() -> nop();
+  br_null_short(L0, Assembler::pt, PcOk);
   stop("last_Java_pc not zeroed before leaving Java");
   bind(PcOk);
 
@@ -1003,7 +1020,7 @@
   Label StackOk;
   andcc(last_java_sp, 0x01, G0);
   br(Assembler::notZero, false, Assembler::pt, StackOk);
-  delayed() -> nop();
+  delayed()->nop();
   stop("Stack Not Biased in set_last_Java_frame");
   bind(StackOk);
 #endif // ASSERT
@@ -1099,8 +1116,7 @@
 
   Address exception_addr(G2_thread, Thread::pending_exception_offset());
   ld_ptr(exception_addr, scratch_reg);
-  br_null(scratch_reg,false,pt,L);
-  delayed()->nop();
+  br_null_short(scratch_reg, pt, L);
   // we use O7 linkage so that forward_exception_entry has the issuing PC
   call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
   delayed()->nop();
@@ -1874,14 +1890,11 @@
 
   // assert((obj & oop_mask) == oop_bits);
   and3(O0_obj, O2_mask, O4_temp);
-  cmp(O4_temp, O3_bits);
-  brx(notEqual, false, pn, null_or_fail);
-  delayed()->nop();
+  cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail);
 
   if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) {
     // the null_or_fail case is useless; must test for null separately
-    br_null(O0_obj, false, pn, succeed);
-    delayed()->nop();
+    br_null_short(O0_obj, pn, succeed);
   }
 
   // Check the klassOop of this object for being in the right area of memory.
@@ -1893,9 +1906,7 @@
   if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
     set(Universe::verify_klass_bits(), O3_bits);
   and3(O0_obj, O2_mask, O4_temp);
-  cmp(O4_temp, O3_bits);
-  brx(notEqual, false, pn, fail);
-  delayed()->nop();
+  cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, fail);
   // Check the klass's klass
   load_klass(O0_obj, O0_obj);
   and3(O0_obj, O2_mask, O4_temp);
@@ -2122,13 +2133,12 @@
   return Assembler::rc_z;
 }
 
-// compares register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
-void MacroAssembler::br_zero( Condition c, bool a, Predict p, Register s1, Label& L) {
+// compares (32 bit) register with zero and branches.  NOT FOR USE WITH 64-bit POINTERS
+void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) {
   tst(s1);
   br (c, a, p, L);
 }
 
-
 // Compares a pointer register with zero and branches on null.
 // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
 void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) {
@@ -2154,6 +2164,7 @@
 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
                                      Register s1, address d,
                                      relocInfo::relocType rt ) {
+  assert_not_delayed();
   if (VM_Version::v9_instructions_work()) {
     bpr(rc, a, p, s1, d, rt);
   } else {
@@ -2164,6 +2175,7 @@
 
 void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
                                      Register s1, Label& L ) {
+  assert_not_delayed();
   if (VM_Version::v9_instructions_work()) {
     bpr(rc, a, p, s1, L);
   } else {
@@ -2172,6 +2184,91 @@
   }
 }
 
+// Compare registers and branch with nop in delay slot or cbcond without delay slot.
+
+// Compare integer (32 bit) values (icc only).
+void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c,
+                                      Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(c, icc, s1, s2, L);
+  } else {
+    cmp(s1, s2);
+    br(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Compare integer (32 bit) values (icc only).
+void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c,
+                                      Predict p, Label& L) {
+  assert_not_delayed();
+  if (is_simm(simm13a,5) && use_cbcond(L)) {
+    Assembler::cbcond(c, icc, s1, simm13a, L);
+  } else {
+    cmp(s1, simm13a);
+    br(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Branch that tests xcc in LP64 and icc in !LP64
+void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c,
+                                       Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(c, ptr_cc, s1, s2, L);
+  } else {
+    cmp(s1, s2);
+    brx(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Branch that tests xcc in LP64 and icc in !LP64
+void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c,
+                                       Predict p, Label& L) {
+  assert_not_delayed();
+  if (is_simm(simm13a,5) && use_cbcond(L)) {
+    Assembler::cbcond(c, ptr_cc, s1, simm13a, L);
+  } else {
+    cmp(s1, simm13a);
+    brx(c, false, p, L);
+    delayed()->nop();
+  }
+}
+
+// Short branch version for compares a pointer with zero.
+
+void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(zero, ptr_cc, s1, 0, L);
+    return;
+  }
+  br_null(s1, false, p, L);
+  delayed()->nop();
+}
+
+void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
+  assert_not_delayed();
+  if (use_cbcond(L)) {
+    Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
+    return;
+  }
+  br_notnull(s1, false, p, L);
+  delayed()->nop();
+}
+
+// Unconditional short branch
+void MacroAssembler::ba_short(Label& L) {
+  if (use_cbcond(L)) {
+    Assembler::cbcond(equal, icc, G0, G0, L);
+    return;
+  }
+  br(always, false, pt, L);
+  delayed()->nop();
+}
 
 // instruction sequences factored across compiler & interpreter
 
@@ -2197,11 +2294,9 @@
   // since that triplet is reached only after finding the high halves differ.
 
   if (VM_Version::v9_instructions_work()) {
-
-                                    mov  (                     -1, Rresult);
-    ba( false, done );  delayed()-> movcc(greater, false, icc,  1, Rresult);
-  }
-  else {
+    mov(-1, Rresult);
+    ba(done);  delayed()-> movcc(greater, false, icc,  1, Rresult);
+  } else {
     br(less,    true, pt, done); delayed()-> set(-1, Rresult);
     br(greater, true, pt, done); delayed()-> set( 1, Rresult);
   }
@@ -2212,9 +2307,8 @@
     mov(                               -1, Rresult);
     movcc(equal,           false, icc,  0, Rresult);
     movcc(greaterUnsigned, false, icc,  1, Rresult);
-  }
-  else {
-                                                    set(-1, Rresult);
+  } else {
+    set(-1, Rresult);
     br(equal,           true, pt, done); delayed()->set( 0, Rresult);
     br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult);
   }
@@ -2250,11 +2344,10 @@
   // This code can be optimized to use the 64 bit shifts in V9.
   // Here we use the 32 bit shifts.
 
-  and3( Rcount,         0x3f,           Rcount);     // take least significant 6 bits
-  subcc(Rcount,         31,             Ralt_count);
+  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
+  subcc(Rcount,   31, Ralt_count);
   br(greater, true, pn, big_shift);
-  delayed()->
-  dec(Ralt_count);
+  delayed()->dec(Ralt_count);
 
   // shift < 32 bits, Ralt_count = Rcount-31
 
@@ -2263,28 +2356,27 @@
   // more to take care of the special (rare) case where count is zero
   // (shifting by 32 would not work).
 
-  neg(  Ralt_count                                 );
+  neg(Ralt_count);
 
   // The order of the next two instructions is critical in the case where
   // Rin and Rout are the same and should not be reversed.
 
-  srl(  Rin_low,        Ralt_count,     Rxfer_bits ); // shift right by 31-count
+  srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count
   if (Rcount != Rout_low) {
-    sll(        Rin_low,        Rcount,         Rout_low   ); // low half
+    sll(Rin_low, Rcount, Rout_low); // low half
   }
-  sll(  Rin_high,       Rcount,         Rout_high  );
+  sll(Rin_high, Rcount, Rout_high);
   if (Rcount == Rout_low) {
-    sll(        Rin_low,        Rcount,         Rout_low   ); // low half
+    sll(Rin_low, Rcount, Rout_low); // low half
   }
-  srl(  Rxfer_bits,     1,              Rxfer_bits ); // shift right by one more
-  ba (false, done);
-  delayed()->
-  or3(  Rout_high,      Rxfer_bits,     Rout_high);   // new hi value: or in shifted old hi part and xfer from low
+  srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more
+  ba(done);
+  delayed()->or3(Rout_high, Rxfer_bits, Rout_high);   // new hi value: or in shifted old hi part and xfer from low
 
   // shift >= 32 bits, Ralt_count = Rcount-32
   bind(big_shift);
-  sll(  Rin_low,        Ralt_count,     Rout_high  );
-  clr(  Rout_low                                   );
+  sll(Rin_low, Ralt_count, Rout_high  );
+  clr(Rout_low);
 
   bind(done);
 }
@@ -2313,8 +2405,8 @@
   // This code can be optimized to use the 64 bit shifts in V9.
   // Here we use the 32 bit shifts.
 
-  and3( Rcount,         0x3f,           Rcount);     // take least significant 6 bits
-  subcc(Rcount,         31,             Ralt_count);
+  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
+  subcc(Rcount,   31, Ralt_count);
   br(greater, true, pn, big_shift);
   delayed()->dec(Ralt_count);
 
@@ -2325,29 +2417,28 @@
   // more to take care of the special (rare) case where count is zero
   // (shifting by 32 would not work).
 
-  neg(  Ralt_count                                  );
+  neg(Ralt_count);
   if (Rcount != Rout_low) {
-    srl(        Rin_low,        Rcount,         Rout_low    );
+    srl(Rin_low, Rcount, Rout_low);
   }
 
   // The order of the next two instructions is critical in the case where
   // Rin and Rout are the same and should not be reversed.
 
-  sll(  Rin_high,       Ralt_count,     Rxfer_bits  ); // shift left by 31-count
-  sra(  Rin_high,       Rcount,         Rout_high   ); // high half
-  sll(  Rxfer_bits,     1,              Rxfer_bits  ); // shift left by one more
+  sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
+  sra(Rin_high,     Rcount, Rout_high ); // high half
+  sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
   if (Rcount == Rout_low) {
-    srl(        Rin_low,        Rcount,         Rout_low    );
+    srl(Rin_low, Rcount, Rout_low);
   }
-  ba (false, done);
-  delayed()->
-  or3(  Rout_low,       Rxfer_bits,     Rout_low    ); // new low value: or shifted old low part and xfer from high
+  ba(done);
+  delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
 
   // shift >= 32 bits, Ralt_count = Rcount-32
   bind(big_shift);
 
-  sra(  Rin_high,       Ralt_count,     Rout_low    );
-  sra(  Rin_high,       31,             Rout_high   ); // sign into hi
+  sra(Rin_high, Ralt_count, Rout_low);
+  sra(Rin_high,         31, Rout_high); // sign into hi
 
   bind( done );
 }
@@ -2377,8 +2468,8 @@
   // This code can be optimized to use the 64 bit shifts in V9.
   // Here we use the 32 bit shifts.
 
-  and3( Rcount,         0x3f,           Rcount);     // take least significant 6 bits
-  subcc(Rcount,         31,             Ralt_count);
+  and3( Rcount, 0x3f, Rcount);     // take least significant 6 bits
+  subcc(Rcount,   31, Ralt_count);
   br(greater, true, pn, big_shift);
   delayed()->dec(Ralt_count);
 
@@ -2389,29 +2480,28 @@
   // more to take care of the special (rare) case where count is zero
   // (shifting by 32 would not work).
 
-  neg(  Ralt_count                                  );
+  neg(Ralt_count);
   if (Rcount != Rout_low) {
-    srl(        Rin_low,        Rcount,         Rout_low    );
+    srl(Rin_low, Rcount, Rout_low);
   }
 
   // The order of the next two instructions is critical in the case where
   // Rin and Rout are the same and should not be reversed.
 
-  sll(  Rin_high,       Ralt_count,     Rxfer_bits  ); // shift left by 31-count
-  srl(  Rin_high,       Rcount,         Rout_high   ); // high half
-  sll(  Rxfer_bits,     1,              Rxfer_bits  ); // shift left by one more
+  sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count
+  srl(Rin_high,     Rcount, Rout_high ); // high half
+  sll(Rxfer_bits,        1, Rxfer_bits); // shift left by one more
   if (Rcount == Rout_low) {
-    srl(        Rin_low,        Rcount,         Rout_low    );
+    srl(Rin_low, Rcount, Rout_low);
   }
-  ba (false, done);
-  delayed()->
-  or3(  Rout_low,       Rxfer_bits,     Rout_low    ); // new low value: or shifted old low part and xfer from high
+  ba(done);
+  delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high
 
   // shift >= 32 bits, Ralt_count = Rcount-32
   bind(big_shift);
 
-  srl(  Rin_high,       Ralt_count,     Rout_low    );
-  clr(  Rout_high                                   );
+  srl(Rin_high, Ralt_count, Rout_low);
+  clr(Rout_high);
 
   bind( done );
 }
@@ -2419,7 +2509,7 @@
 #ifdef _LP64
 void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) {
   cmp(Ra, Rb);
-  mov(                       -1, Rresult);
+  mov(-1, Rresult);
   movcc(equal,   false, xcc,  0, Rresult);
   movcc(greater, false, xcc,  1, Rresult);
 }
@@ -2459,14 +2549,14 @@
 
   if (VM_Version::v9_instructions_work()) {
 
-    mov(                   -1, Rresult );
-    movcc( eq, true, fcc0,  0, Rresult );
-    movcc( gt, true, fcc0,  1, Rresult );
+    mov(-1, Rresult);
+    movcc(eq, true, fcc0, 0, Rresult);
+    movcc(gt, true, fcc0, 1, Rresult);
 
   } else {
     Label done;
 
-                                         set( -1, Rresult );
+    set( -1, Rresult );
     //fb(lt, true, pn, done); delayed()->set( -1, Rresult );
     fb( eq, true, pn, done);  delayed()->set(  0, Rresult );
     fb( gt, true, pn, done);  delayed()->set(  1, Rresult );
@@ -2668,9 +2758,7 @@
     set(StubRoutines::Sparc::locked, lock_reg);
 
     bind(retry_get_lock);
-    cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
-    br(Assembler::less, false, Assembler::pt, dont_yield);
-    delayed()->nop();
+    cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield);
 
     if(use_call_vm) {
       Untested("Need to verify global reg consistancy");
@@ -2700,9 +2788,7 @@
 
     // yes, got lock.  do we have the same top?
     ld(top_ptr_reg_after_save, 0, value_reg);
-    cmp(value_reg, top_reg_after_save);
-    br(Assembler::notEqual, false, Assembler::pn, not_same);
-    delayed()->nop();
+    cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same);
 
     // yes, same top.
     st(ptr_reg_after_save, top_ptr_reg_after_save, 0);
@@ -2952,8 +3038,7 @@
 
   // on success:
   restore();
-  ba(false, L_success);
-  delayed()->nop();
+  ba_short(L_success);
 
   // on failure:
   bind(L_pop_to_failure);
@@ -2969,8 +3054,7 @@
                                                    Label* L_success,
                                                    Label* L_failure,
                                                    Label* L_slow_path,
-                                        RegisterOrConstant super_check_offset,
-                                        Register instanceof_hack) {
+                                        RegisterOrConstant super_check_offset) {
   int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
                    Klass::secondary_super_cache_offset_in_bytes());
   int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
@@ -2993,29 +3077,10 @@
   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
-  assert(label_nulls <= 1 || instanceof_hack != noreg ||
+  assert(label_nulls <= 1 ||
          (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
          "at most one NULL in the batch, usually");
 
-  // Support for the instanceof hack, which uses delay slots to
-  // set a destination register to zero or one.
-  bool do_bool_sets = (instanceof_hack != noreg);
-#define BOOL_SET(bool_value)                            \
-  if (do_bool_sets && bool_value >= 0)                  \
-    set(bool_value, instanceof_hack)
-#define DELAYED_BOOL_SET(bool_value)                    \
-  if (do_bool_sets && bool_value >= 0)                  \
-    delayed()->set(bool_value, instanceof_hack);        \
-  else delayed()->nop()
-  // Hacked ba(), which may only be used just before L_fallthrough.
-#define FINAL_JUMP(label, bool_value)                   \
-  if (&(label) == &L_fallthrough) {                     \
-    BOOL_SET(bool_value);                               \
-  } else {                                              \
-    ba((do_bool_sets && bool_value >= 0), label);       \
-    DELAYED_BOOL_SET(bool_value);                       \
-  }
-
   // If the pointers are equal, we are done (e.g., String[] elements).
   // This self-check enables sharing of secondary supertype arrays among
   // non-primary types such as array-of-interface.  Otherwise, each such
@@ -3024,8 +3089,8 @@
   // type checks are in fact trivially successful in this manner,
   // so we get a nicely predicted branch right at the start of the check.
   cmp(super_klass, sub_klass);
-  brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
-  DELAYED_BOOL_SET(1);
+  brx(Assembler::equal, false, Assembler::pn, *L_success);
+  delayed()->nop();
 
   // Check the supertype display:
   if (must_load_sco) {
@@ -3049,50 +3114,49 @@
   // So if it was a primary super, we can just fail immediately.
   // Otherwise, it's the slow path for us (no success at this point).
 
+  // Hacked ba(), which may only be used just before L_fallthrough.
+#define FINAL_JUMP(label)            \
+  if (&(label) != &L_fallthrough) {  \
+    ba(label);  delayed()->nop();    \
+  }
+
   if (super_check_offset.is_register()) {
-    brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
-    delayed(); if (do_bool_sets)  BOOL_SET(1);
-    // if !do_bool_sets, sneak the next cmp into the delay slot:
-    cmp(super_check_offset.as_register(), sc_offset);
+    brx(Assembler::equal, false, Assembler::pn, *L_success);
+    delayed()->cmp(super_check_offset.as_register(), sc_offset);
 
     if (L_failure == &L_fallthrough) {
-      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
+      brx(Assembler::equal, false, Assembler::pt, *L_slow_path);
       delayed()->nop();
-      BOOL_SET(0);  // fallthrough on failure
     } else {
-      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
-      DELAYED_BOOL_SET(0);
-      FINAL_JUMP(*L_slow_path, -1);  // -1 => vanilla delay slot
+      brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
+      delayed()->nop();
+      FINAL_JUMP(*L_slow_path);
     }
   } else if (super_check_offset.as_constant() == sc_offset) {
     // Need a slow path; fast failure is impossible.
     if (L_slow_path == &L_fallthrough) {
-      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
-      DELAYED_BOOL_SET(1);
+      brx(Assembler::equal, false, Assembler::pt, *L_success);
+      delayed()->nop();
     } else {
       brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
       delayed()->nop();
-      FINAL_JUMP(*L_success, 1);
+      FINAL_JUMP(*L_success);
     }
   } else {
     // No slow path; it's a fast decision.
     if (L_failure == &L_fallthrough) {
-      brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
-      DELAYED_BOOL_SET(1);
-      BOOL_SET(0);
+      brx(Assembler::equal, false, Assembler::pt, *L_success);
+      delayed()->nop();
     } else {
-      brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
-      DELAYED_BOOL_SET(0);
-      FINAL_JUMP(*L_success, 1);
+      brx(Assembler::notEqual, false, Assembler::pn, *L_failure);
+      delayed()->nop();
+      FINAL_JUMP(*L_success);
     }
   }
 
   bind(L_fallthrough);
 
-#undef final_jump
-#undef bool_set
-#undef DELAYED_BOOL_SET
-#undef final_jump
+#undef FINAL_JUMP
 }
 
 
@@ -3185,7 +3249,7 @@
   st_ptr(super_klass, sub_klass, sc_offset);
 
   if (L_success != &L_fallthrough) {
-    ba(false, *L_success);
+    ba(*L_success);
     delayed()->nop();
   }
 
@@ -3200,9 +3264,7 @@
   // compare method type against that of the receiver
   RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
   load_heap_oop(mh_reg, mhtype_offset, temp_reg);
-  cmp(temp_reg, mtype_reg);
-  br(Assembler::notEqual, false, Assembler::pn, wrong_method_type);
-  delayed()->nop();
+  cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
 }
 
 
@@ -3295,9 +3357,7 @@
   // pointers to allow age to be placed into low bits
   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
   and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
-  cmp(temp_reg, markOopDesc::biased_lock_pattern);
-  brx(Assembler::notEqual, false, Assembler::pn, cas_label);
-  delayed()->nop();
+  cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label);
 
   load_klass(obj_reg, temp_reg);
   ld_ptr(Address(temp_reg, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
@@ -3364,8 +3424,7 @@
     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
     delayed()->nop();
   }
-  br(Assembler::always, false, Assembler::pt, done);
-  delayed()->nop();
+  ba_short(done);
 
   bind(try_rebias);
   // At this point we know the epoch has expired, meaning that the
@@ -3393,8 +3452,7 @@
     brx(Assembler::notEqual, true, Assembler::pn, *slow_case);
     delayed()->nop();
   }
-  br(Assembler::always, false, Assembler::pt, done);
-  delayed()->nop();
+  ba_short(done);
 
   bind(try_revoke_bias);
   // The prototype mark in the klass doesn't have the bias bit set any
@@ -3445,7 +3503,7 @@
 // Solaris/SPARC's "as".  Another apt name would be cas_ptr()
 
 void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) {
-  casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()) ;
+  casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
 }
 
 
@@ -3486,9 +3544,9 @@
    }
 
    if (EmitSync & 1) {
-     mov    (3, Rscratch) ;
-     st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-     cmp    (SP, G0) ;
+     mov(3, Rscratch);
+     st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+     cmp(SP, G0);
      return ;
    }
 
@@ -3529,7 +3587,7 @@
      assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
      andcc(Rscratch, 0xfffff003, Rscratch);
      st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-     bind (done) ;
+     bind (done);
      return ;
    }
 
@@ -3538,7 +3596,7 @@
    if (EmitSync & 256) {
       Label IsInflated ;
 
-      ld_ptr (mark_addr, Rmark);           // fetch obj->mark
+      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
       // Triage: biased, stack-locked, neutral, inflated
       if (try_bias) {
         biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
@@ -3549,49 +3607,49 @@
       // Store mark into displaced mark field in the on-stack basic-lock "box"
       // Critically, this must happen before the CAS
       // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty.
-      st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
-      andcc  (Rmark, 2, G0) ;
-      brx    (Assembler::notZero, false, Assembler::pn, IsInflated) ;
-      delayed() ->
+      st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
+      andcc(Rmark, 2, G0);
+      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
+      delayed()->
 
       // Try stack-lock acquisition.
       // Beware: the 1st instruction is in a delay slot
-      mov    (Rbox,  Rscratch);
-      or3    (Rmark, markOopDesc::unlocked_value, Rmark);
-      assert (mark_addr.disp() == 0, "cas must take a zero displacement");
-      casn   (mark_addr.base(), Rmark, Rscratch) ;
-      cmp    (Rmark, Rscratch);
-      brx    (Assembler::equal, false, Assembler::pt, done);
+      mov(Rbox,  Rscratch);
+      or3(Rmark, markOopDesc::unlocked_value, Rmark);
+      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+      casn(mark_addr.base(), Rmark, Rscratch);
+      cmp(Rmark, Rscratch);
+      brx(Assembler::equal, false, Assembler::pt, done);
       delayed()->sub(Rscratch, SP, Rscratch);
 
       // Stack-lock attempt failed - check for recursive stack-lock.
       // See the comments below about how we might remove this case.
 #ifdef _LP64
-      sub    (Rscratch, STACK_BIAS, Rscratch);
+      sub(Rscratch, STACK_BIAS, Rscratch);
 #endif
       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-      andcc  (Rscratch, 0xfffff003, Rscratch);
-      br     (Assembler::always, false, Assembler::pt, done) ;
-      delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
-
-      bind   (IsInflated) ;
+      andcc(Rscratch, 0xfffff003, Rscratch);
+      br(Assembler::always, false, Assembler::pt, done);
+      delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+
+      bind(IsInflated);
       if (EmitSync & 64) {
          // If m->owner != null goto IsLocked
          // Pessimistic form: Test-and-CAS vs CAS
          // The optimistic form avoids RTS->RTO cache line upgrades.
-         ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
-         andcc  (Rscratch, Rscratch, G0) ;
-         brx    (Assembler::notZero, false, Assembler::pn, done) ;
-         delayed()->nop() ;
+         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+         andcc(Rscratch, Rscratch, G0);
+         brx(Assembler::notZero, false, Assembler::pn, done);
+         delayed()->nop();
          // m->owner == null : it's unlocked.
       }
 
       // Try to CAS m->owner from null to Self
       // Invariant: if we acquire the lock then _recursions should be 0.
-      add    (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
-      mov    (G2_thread, Rscratch) ;
-      casn   (Rmark, G0, Rscratch) ;
-      cmp    (Rscratch, G0) ;
+      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      mov(G2_thread, Rscratch);
+      casn(Rmark, G0, Rscratch);
+      cmp(Rscratch, G0);
       // Intentional fall-through into done
    } else {
       // Aggressively avoid the Store-before-CAS penalty
@@ -3599,9 +3657,9 @@
       Label IsInflated, Recursive ;
 
 // Anticipate CAS -- Avoid RTS->RTO upgrade
-// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
-
-      ld_ptr (mark_addr, Rmark);           // fetch obj->mark
+// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
+
+      ld_ptr(mark_addr, Rmark);           // fetch obj->mark
       // Triage: biased, stack-locked, neutral, inflated
 
       if (try_bias) {
@@ -3609,8 +3667,8 @@
         // Invariant: if control reaches this point in the emitted stream
         // then Rmark has not been modified.
       }
-      andcc  (Rmark, 2, G0) ;
-      brx    (Assembler::notZero, false, Assembler::pn, IsInflated) ;
+      andcc(Rmark, 2, G0);
+      brx(Assembler::notZero, false, Assembler::pn, IsInflated);
       delayed()->                         // Beware - dangling delay-slot
 
       // Try stack-lock acquisition.
@@ -3620,23 +3678,21 @@
       //   ST obj->mark = box    -- overwrite transient 0 value
       // This presumes TSO, of course.
 
-      mov    (0, Rscratch) ;
-      or3    (Rmark, markOopDesc::unlocked_value, Rmark);
-      assert (mark_addr.disp() == 0, "cas must take a zero displacement");
-      casn   (mark_addr.base(), Rmark, Rscratch) ;
-// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
-      cmp    (Rscratch, Rmark) ;
-      brx    (Assembler::notZero, false, Assembler::pn, Recursive) ;
-      delayed() ->
-        st_ptr (Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
+      mov(0, Rscratch);
+      or3(Rmark, markOopDesc::unlocked_value, Rmark);
+      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+      casn(mark_addr.base(), Rmark, Rscratch);
+// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads);
+      cmp(Rscratch, Rmark);
+      brx(Assembler::notZero, false, Assembler::pn, Recursive);
+      delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes());
       if (counters != NULL) {
         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
       }
-      br     (Assembler::always, false, Assembler::pt, done);
-      delayed() ->
-        st_ptr (Rbox, mark_addr) ;
-
-      bind   (Recursive) ;
+      ba(done);
+      delayed()->st_ptr(Rbox, mark_addr);
+
+      bind(Recursive);
       // Stack-lock attempt failed - check for recursive stack-lock.
       // Tests show that we can remove the recursive case with no impact
       // on refworkload 0.83.  If we need to reduce the size of the code
@@ -3653,49 +3709,48 @@
 
       // RScratch contains the fetched obj->mark value from the failed CASN.
 #ifdef _LP64
-      sub    (Rscratch, STACK_BIAS, Rscratch);
+      sub(Rscratch, STACK_BIAS, Rscratch);
 #endif
       sub(Rscratch, SP, Rscratch);
       assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-      andcc  (Rscratch, 0xfffff003, Rscratch);
+      andcc(Rscratch, 0xfffff003, Rscratch);
       if (counters != NULL) {
         // Accounting needs the Rscratch register
-        st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+        st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
         cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch);
-        br     (Assembler::always, false, Assembler::pt, done) ;
-        delayed()->nop() ;
+        ba_short(done);
       } else {
-        br     (Assembler::always, false, Assembler::pt, done) ;
-        delayed()-> st_ptr (Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
+        ba(done);
+        delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes());
       }
 
-      bind   (IsInflated) ;
+      bind   (IsInflated);
       if (EmitSync & 64) {
          // If m->owner != null goto IsLocked
          // Test-and-CAS vs CAS
          // Pessimistic form avoids futile (doomed) CAS attempts
          // The optimistic form avoids RTS->RTO cache line upgrades.
-         ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
-         andcc  (Rscratch, Rscratch, G0) ;
-         brx    (Assembler::notZero, false, Assembler::pn, done) ;
-         delayed()->nop() ;
+         ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+         andcc(Rscratch, Rscratch, G0);
+         brx(Assembler::notZero, false, Assembler::pn, done);
+         delayed()->nop();
          // m->owner == null : it's unlocked.
       }
 
       // Try to CAS m->owner from null to Self
       // Invariant: if we acquire the lock then _recursions should be 0.
-      add    (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
-      mov    (G2_thread, Rscratch) ;
-      casn   (Rmark, G0, Rscratch) ;
-      cmp    (Rscratch, G0) ;
+      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      mov(G2_thread, Rscratch);
+      casn(Rmark, G0, Rscratch);
+      cmp(Rscratch, G0);
       // ST box->displaced_header = NonZero.
       // Any non-zero value suffices:
       //    unused_mark(), G2_thread, RBox, RScratch, rsp, etc.
-      st_ptr (Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
+      st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes());
       // Intentional fall-through into done
    }
 
-   bind   (done) ;
+   bind   (done);
 }
 
 void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
@@ -3706,7 +3761,7 @@
    Label done ;
 
    if (EmitSync & 4) {
-     cmp  (SP, G0) ;
+     cmp(SP, G0);
      return ;
    }
 
@@ -3717,18 +3772,16 @@
 
      // Test first if it is a fast recursive unlock
      ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
-     cmp(Rmark, G0);
-     brx(Assembler::equal, false, Assembler::pt, done);
-     delayed()->nop();
+     br_null_short(Rmark, Assembler::pt, done);
 
      // Check if it is still a light weight lock, this is is true if we see
      // the stack address of the basicLock in the markOop of the object
      assert(mark_addr.disp() == 0, "cas must take a zero displacement");
      casx_under_lock(mark_addr.base(), Rbox, Rmark,
        (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-     br (Assembler::always, false, Assembler::pt, done);
+     ba(done);
      delayed()->cmp(Rbox, Rmark);
-     bind (done) ;
+     bind(done);
      return ;
    }
 
@@ -3743,14 +3796,14 @@
       biased_locking_exit(mark_addr, Rscratch, done);
    }
 
-   ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ;
-   ld_ptr (Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
-   andcc  (Rscratch, Rscratch, G0);
-   brx    (Assembler::zero, false, Assembler::pn, done);
-   delayed()-> nop() ;      // consider: relocate fetch of mark, above, into this DS
-   andcc  (Rmark, 2, G0) ;
-   brx    (Assembler::zero, false, Assembler::pt, LStacked) ;
-   delayed()-> nop() ;
+   ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark);
+   ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch);
+   andcc(Rscratch, Rscratch, G0);
+   brx(Assembler::zero, false, Assembler::pn, done);
+   delayed()->nop();      // consider: relocate fetch of mark, above, into this DS
+   andcc(Rmark, 2, G0);
+   brx(Assembler::zero, false, Assembler::pt, LStacked);
+   delayed()->nop();
 
    // It's inflated
    // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
@@ -3761,48 +3814,45 @@
    // Note that we use 1-0 locking by default for the inflated case.  We
    // close the resultant (and rare) race by having contented threads in
    // monitorenter periodically poll _owner.
-   ld_ptr (Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
-   ld_ptr (Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
-   xor3   (Rscratch, G2_thread, Rscratch) ;
-   orcc   (Rbox, Rscratch, Rbox) ;
-   brx    (Assembler::notZero, false, Assembler::pn, done) ;
+   ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch);
+   ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox);
+   xor3(Rscratch, G2_thread, Rscratch);
+   orcc(Rbox, Rscratch, Rbox);
+   brx(Assembler::notZero, false, Assembler::pn, done);
    delayed()->
-   ld_ptr (Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
-   ld_ptr (Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
-   orcc   (Rbox, Rscratch, G0) ;
+   ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch);
+   ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox);
+   orcc(Rbox, Rscratch, G0);
    if (EmitSync & 65536) {
       Label LSucc ;
-      brx    (Assembler::notZero, false, Assembler::pn, LSucc) ;
-      delayed()->nop() ;
-      br     (Assembler::always, false, Assembler::pt, done) ;
-      delayed()->
-      st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
-
-      bind   (LSucc) ;
-      st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
-      if (os::is_MP()) { membar (StoreLoad) ; }
-      ld_ptr (Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
-      andcc  (Rscratch, Rscratch, G0) ;
-      brx    (Assembler::notZero, false, Assembler::pt, done) ;
-      delayed()-> andcc (G0, G0, G0) ;
-      add    (Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark) ;
-      mov    (G2_thread, Rscratch) ;
-      casn   (Rmark, G0, Rscratch) ;
-      cmp    (Rscratch, G0) ;
+      brx(Assembler::notZero, false, Assembler::pn, LSucc);
+      delayed()->nop();
+      ba(done);
+      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+
+      bind(LSucc);
+      st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+      if (os::is_MP()) { membar (StoreLoad); }
+      ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch);
+      andcc(Rscratch, Rscratch, G0);
+      brx(Assembler::notZero, false, Assembler::pt, done);
+      delayed()->andcc(G0, G0, G0);
+      add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark);
+      mov(G2_thread, Rscratch);
+      casn(Rmark, G0, Rscratch);
       // invert icc.zf and goto done
-      brx    (Assembler::notZero, false, Assembler::pt, done) ;
-      delayed() -> cmp (G0, G0) ;
-      br     (Assembler::always, false, Assembler::pt, done);
-      delayed() -> cmp (G0, 1) ;
+      br_notnull(Rscratch, false, Assembler::pt, done);
+      delayed()->cmp(G0, G0);
+      ba(done);
+      delayed()->cmp(G0, 1);
    } else {
-      brx    (Assembler::notZero, false, Assembler::pn, done) ;
-      delayed()->nop() ;
-      br     (Assembler::always, false, Assembler::pt, done) ;
-      delayed()->
-      st_ptr (G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
+      brx(Assembler::notZero, false, Assembler::pn, done);
+      delayed()->nop();
+      ba(done);
+      delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2);
    }
 
-   bind   (LStacked) ;
+   bind   (LStacked);
    // Consider: we could replace the expensive CAS in the exit
    // path with a simple ST of the displaced mark value fetched from
    // the on-stack basiclock box.  That admits a race where a thread T2
@@ -3831,11 +3881,11 @@
    // A prototype implementation showed excellent results, although
    // the scavenger and timeout code was rather involved.
 
-   casn   (mark_addr.base(), Rbox, Rscratch) ;
-   cmp    (Rbox, Rscratch);
+   casn(mark_addr.base(), Rbox, Rscratch);
+   cmp(Rbox, Rscratch);
    // Intentional fall through into done ...
 
-   bind   (done) ;
+   bind(done);
 }
 
 
@@ -3891,9 +3941,7 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
     or3(t1, t2, t3);
-    cmp(t1, t2);
-    br(Assembler::greaterEqual, false, Assembler::pn, next);
-    delayed()->nop();
+    cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
     stop("assert(top >= start)");
     should_not_reach_here();
 
@@ -3901,17 +3949,13 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1);
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
     or3(t3, t2, t3);
-    cmp(t1, t2);
-    br(Assembler::lessEqual, false, Assembler::pn, next2);
-    delayed()->nop();
+    cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
     stop("assert(top <= end)");
     should_not_reach_here();
 
     bind(next2);
     and3(t3, MinObjAlignmentInBytesMask, t3);
-    cmp(t3, 0);
-    br(Assembler::lessEqual, false, Assembler::pn, ok);
-    delayed()->nop();
+    cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
     stop("assert(aligned)");
     should_not_reach_here();
 
@@ -3937,8 +3981,7 @@
 
   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
     // No allocation in the shared eden.
-    br(Assembler::always, false, Assembler::pt, slow_case);
-    delayed()->nop();
+    ba_short(slow_case);
   } else {
     // get eden boundaries
     // note: we need both top & top_addr!
@@ -4072,8 +4115,7 @@
 
   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
     // No allocation in the shared eden.
-    br(Assembler::always, false, Assembler::pt, slow_case);
-    delayed()->nop();
+    ba_short(slow_case);
   }
 
   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top);
@@ -4098,8 +4140,7 @@
     add(t2, 1, t2);
     stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()));
   }
-  br(Assembler::always, false, Assembler::pt, try_eden);
-  delayed()->nop();
+  ba_short(try_eden);
 
   bind(discard_tlab);
   if (TLABStats) {
@@ -4115,8 +4156,7 @@
 
   // if tlab is currently allocated (top or end != null) then
   // fill [top, end + alignment_reserve) with array object
-  br_null(top, false, Assembler::pn, do_refill);
-  delayed()->nop();
+  br_null_short(top, Assembler::pn, do_refill);
 
   set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
   st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
@@ -4151,9 +4191,7 @@
     Label ok;
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
     sll_ptr(t2, LogHeapWordSize, t2);
-    cmp(t1, t2);
-    br(Assembler::equal, false, Assembler::pt, ok);
-    delayed()->nop();
+    cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
     stop("assert(t1 == tlab_size)");
     should_not_reach_here();
 
@@ -4164,8 +4202,7 @@
   sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top);
   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset()));
   verify_tlab();
-  br(Assembler::always, false, Assembler::pt, retry);
-  delayed()->nop();
+  ba_short(retry);
 }
 
 void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes,
@@ -4290,12 +4327,15 @@
   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
   CodeBuffer buf(bb);
   MacroAssembler masm(&buf);
-  address start = masm.pc();
+
+#define __ masm.
+
+  address start = __ pc();
   Register pre_val;
 
   Label refill, restart;
   if (with_frame) {
-    masm.save_frame(0);
+    __ save_frame(0);
     pre_val = I0;  // Was O0 before the save.
   } else {
     pre_val = O0;
@@ -4310,57 +4350,59 @@
          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
          "check sizes in assembly below");
 
-  masm.bind(restart);
-  masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
-
-  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
+  __ bind(restart);
+  __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
   // If the branch is taken, no harm in executing this in the delay slot.
-  masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
-  masm.sub(L0, oopSize, L0);
-
-  masm.st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+  __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  __ sub(L0, oopSize, L0);
+
+  __ st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
   if (!with_frame) {
     // Use return-from-leaf
-    masm.retl();
-    masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+    __ retl();
+    __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
   } else {
     // Not delayed.
-    masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+    __ st_ptr(L0, G2_thread, satb_q_index_byte_offset);
   }
   if (with_frame) {
-    masm.ret();
-    masm.delayed()->restore();
+    __ ret();
+    __ delayed()->restore();
   }
-  masm.bind(refill);
+  __ bind(refill);
 
   address handle_zero =
     CAST_FROM_FN_PTR(address,
                      &SATBMarkQueueSet::handle_zero_index_for_thread);
   // This should be rare enough that we can afford to save all the
   // scratch registers that the calling context might be using.
-  masm.mov(G1_scratch, L0);
-  masm.mov(G3_scratch, L1);
-  masm.mov(G4, L2);
+  __ mov(G1_scratch, L0);
+  __ mov(G3_scratch, L1);
+  __ mov(G4, L2);
   // We need the value of O0 above (for the write into the buffer), so we
   // save and restore it.
-  masm.mov(O0, L3);
+  __ mov(O0, L3);
   // Since the call will overwrite O7, we save and restore that, as well.
-  masm.mov(O7, L4);
-  masm.call_VM_leaf(L5, handle_zero, G2_thread);
-  masm.mov(L0, G1_scratch);
-  masm.mov(L1, G3_scratch);
-  masm.mov(L2, G4);
-  masm.mov(L3, O0);
-  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
-  masm.delayed()->mov(L4, O7);
+  __ mov(O7, L4);
+  __ call_VM_leaf(L5, handle_zero, G2_thread);
+  __ mov(L0, G1_scratch);
+  __ mov(L1, G3_scratch);
+  __ mov(L2, G4);
+  __ mov(L3, O0);
+  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  __ delayed()->mov(L4, O7);
 
   if (with_frame) {
     satb_log_enqueue_with_frame = start;
-    satb_log_enqueue_with_frame_end = masm.pc();
+    satb_log_enqueue_with_frame_end = __ pc();
   } else {
     satb_log_enqueue_frameless = start;
-    satb_log_enqueue_frameless_end = masm.pc();
+    satb_log_enqueue_frameless_end = __ pc();
   }
+
+#undef __
 }
 
 static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
@@ -4426,7 +4468,7 @@
 
   // Check on whether to annul.
   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
-  delayed() -> nop();
+  delayed()->nop();
 
   // Do we need to load the previous value?
   if (obj != noreg) {
@@ -4450,7 +4492,7 @@
   // Is the previous value null?
   // Check on whether to annul.
   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
-  delayed() -> nop();
+  delayed()->nop();
 
   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
   // case, pre_val will be a scratch G-reg, but there are some cases in
@@ -4518,79 +4560,83 @@
   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
   CodeBuffer buf(bb);
   MacroAssembler masm(&buf);
-  address start = masm.pc();
+#define __ masm.
+  address start = __ pc();
 
   Label not_already_dirty, restart, refill;
 
 #ifdef _LP64
-  masm.srlx(O0, CardTableModRefBS::card_shift, O0);
+  __ srlx(O0, CardTableModRefBS::card_shift, O0);
 #else
-  masm.srl(O0, CardTableModRefBS::card_shift, O0);
+  __ srl(O0, CardTableModRefBS::card_shift, O0);
 #endif
   AddressLiteral addrlit(byte_map_base);
-  masm.set(addrlit, O1); // O1 := <card table base>
-  masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
-
-  masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+  __ set(addrlit, O1); // O1 := <card table base>
+  __ ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
                       O2, not_already_dirty);
   // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
   // case, harmless if not.
-  masm.delayed()->add(O0, O1, O3);
+  __ delayed()->add(O0, O1, O3);
 
   // We didn't take the branch, so we're already dirty: return.
   // Use return-from-leaf
-  masm.retl();
-  masm.delayed()->nop();
+  __ retl();
+  __ delayed()->nop();
 
   // Not dirty.
-  masm.bind(not_already_dirty);
+  __ bind(not_already_dirty);
   // First, dirty it.
-  masm.stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+  __ stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
   int dirty_card_q_index_byte_offset =
     in_bytes(JavaThread::dirty_card_queue_offset() +
              PtrQueue::byte_offset_of_index());
   int dirty_card_q_buf_byte_offset =
     in_bytes(JavaThread::dirty_card_queue_offset() +
              PtrQueue::byte_offset_of_buf());
-  masm.bind(restart);
-  masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
-
-  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+  __ bind(restart);
+  __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
                       L0, refill);
   // If the branch is taken, no harm in executing this in the delay slot.
-  masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
-  masm.sub(L0, oopSize, L0);
-
-  masm.st_ptr(O3, L1, L0);  // [_buf + index] := I0
+  __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  __ sub(L0, oopSize, L0);
+
+  __ st_ptr(O3, L1, L0);  // [_buf + index] := I0
   // Use return-from-leaf
-  masm.retl();
-  masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
-
-  masm.bind(refill);
+  __ retl();
+  __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+  __ bind(refill);
   address handle_zero =
     CAST_FROM_FN_PTR(address,
                      &DirtyCardQueueSet::handle_zero_index_for_thread);
   // This should be rare enough that we can afford to save all the
   // scratch registers that the calling context might be using.
-  masm.mov(G1_scratch, L3);
-  masm.mov(G3_scratch, L5);
+  __ mov(G1_scratch, L3);
+  __ mov(G3_scratch, L5);
   // We need the value of O3 above (for the write into the buffer), so we
   // save and restore it.
-  masm.mov(O3, L6);
+  __ mov(O3, L6);
   // Since the call will overwrite O7, we save and restore that, as well.
-  masm.mov(O7, L4);
-
-  masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
-  masm.mov(L3, G1_scratch);
-  masm.mov(L5, G3_scratch);
-  masm.mov(L6, O3);
-  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
-  masm.delayed()->mov(L4, O7);
+  __ mov(O7, L4);
+
+  __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+  __ mov(L3, G1_scratch);
+  __ mov(L5, G3_scratch);
+  __ mov(L6, O3);
+  __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  __ delayed()->mov(L4, O7);
 
   dirty_card_log_enqueue = start;
-  dirty_card_log_enqueue_end = masm.pc();
+  dirty_card_log_enqueue_end = __ pc();
   // XXX Should have a guarantee here about not going off the end!
   // Does it already do so?  Do an experiment...
+
+#undef __
+
 }
 
 static inline void
@@ -4903,7 +4949,7 @@
   delayed()->mov(G0, result);     // not equal
 
   // only one char ?
-  br_on_reg_cond(rc_z, true, Assembler::pn, limit, Ldone);
+  cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
   delayed()->add(G0, 1, result); // zero-length arrays are equal
 
   // word by word compare, dont't need alignment check
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -761,7 +761,7 @@
     mwtos_opf   = 0x119
   };
 
-  enum RCondition {  rc_z = 1,  rc_lez = 2,  rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7  };
+  enum RCondition {  rc_z = 1,  rc_lez = 2,  rc_lz = 3, rc_nz = 5, rc_gz = 6, rc_gez = 7, rc_last = rc_gez  };
 
   enum Condition {
      // for FBfcc & FBPfcc instruction
@@ -866,9 +866,18 @@
     return is_simm(d, nbits + 2);
   }
 
+  address target_distance(Label& L) {
+    // Assembler::target(L) should be called only when
+    // a branch instruction is emitted since non-bound
+    // labels record current pc() as a branch address.
+    if (L.is_bound()) return target(L);
+    // Return current address for non-bound labels.
+    return pc();
+  }
+
   // test if label is in simm16 range in words (wdisp16).
   bool is_in_wdisp16_range(Label& L) {
-    return is_in_wdisp_range(target(L), pc(), 16);
+    return is_in_wdisp_range(target_distance(L), pc(), 16);
   }
   // test if the distance between two addresses fits in simm30 range in words
   static bool is_in_wdisp30_range(address a, address b) {
@@ -877,7 +886,11 @@
 
   enum ASIs { // page 72, v9
     ASI_PRIMARY        = 0x80,
-    ASI_PRIMARY_LITTLE = 0x88
+    ASI_PRIMARY_LITTLE = 0x88,
+    // Block initializing store
+    ASI_ST_BLKINIT_PRIMARY = 0xE2,
+    // Most-Recently-Used (MRU) BIS variant
+    ASI_ST_BLKINIT_MRU_PRIMARY = 0xF2
     // add more from book as needed
   };
 
@@ -975,6 +988,20 @@
   static int sx(       int         i)  { return  u_field(i,             12, 12); } // shift x=1 means 64-bit
   static int opf(      int         x)  { return  u_field(x,             13,  5); }
 
+  static bool is_cbcond( int x ) {
+    return (VM_Version::has_cbcond() && (inv_cond(x) > rc_last) &&
+            inv_op(x) == branch_op && inv_op2(x) == bpr_op2);
+  }
+  static bool is_cxb( int x ) {
+    assert(is_cbcond(x), "wrong instruction");
+    return (x & (1<<21)) != 0;
+  }
+  static int cond_cbcond( int         x)  { return  u_field((((x & 8)<<1) + 8 + (x & 7)), 29, 25); }
+  static int inv_cond_cbcond(int      x)  {
+    assert(is_cbcond(x), "wrong instruction");
+    return inv_u_field(x, 27, 25) | (inv_u_field(x, 29, 29)<<3);
+  }
+
   static int opf_cc(   CC          c, bool useFloat ) { return u_field((useFloat ? 0 : 4) + c, 13, 11); }
   static int mov_cc(   CC          c, bool useFloat ) { return u_field(useFloat ? 0 : 1,  18, 18) | u_field(c, 12, 11); }
 
@@ -1026,6 +1053,26 @@
     return r;
   }
 
+  // compute inverse of wdisp10
+  static intptr_t inv_wdisp10(int x, intptr_t pos) {
+    assert(is_cbcond(x), "wrong instruction");
+    int lo = inv_u_field(x, 12, 5);
+    int hi = (x >> 19) & 3;
+    if (hi >= 2) hi |= ~1;
+    return (((hi << 8) | lo) << 2) + pos;
+  }
+
+  // word offset for cbcond, 8 bits at [B12,B5], 2 bits at [B20,B19]
+  static int wdisp10(intptr_t x, intptr_t off) {
+    assert(VM_Version::has_cbcond(), "This CPU does not have CBCOND instruction");
+    intptr_t xx = x - off;
+    assert_signed_word_disp_range(xx, 10);
+    int r =  ( ( (xx >>  2   ) & ((1 << 8) - 1) ) <<  5 )
+           | ( ( (xx >> (2+8)) & 3              ) << 19 );
+    // Have to fake cbcond instruction to pass assert in inv_wdisp10()
+    assert(inv_wdisp10((r | op(branch_op) | cond_cbcond(rc_last+1) | op2(bpr_op2)), off) == x,  "inverse is not inverse");
+    return r;
+  }
 
   // word displacement in low-order nbits bits
 
@@ -1138,7 +1185,26 @@
 #endif
   }
 
+  // cbcond instruction should not be generated one after an other
+  bool cbcond_before() {
+    if (offset() == 0) return false; // it is first instruction
+    int x = *(int*)(intptr_t(pc()) - 4); // previous instruction
+    return is_cbcond(x);
+  }
+
+  void no_cbcond_before() {
+    assert(offset() == 0 || !cbcond_before(), "cbcond should not follow an other cbcond");
+  }
+
 public:
+
+  bool use_cbcond(Label& L) {
+    if (!UseCBCond || cbcond_before()) return false;
+    intptr_t x = intptr_t(target_distance(L)) - intptr_t(pc());
+    assert( (x & 3) == 0, "not word aligned");
+    return is_simm(x, 12);
+  }
+
   // Tells assembler you know that next instruction is delayed
   Assembler* delayed() {
 #ifdef CHECK_DELAY
@@ -1181,10 +1247,15 @@
   void addccc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); }
   void addccc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(addc_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
+
   // pp 136
 
-  inline void bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
-  inline void bpr( RCondition c, bool a, Predict p, Register s1, Label& L);
+  inline void bpr(RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none);
+  inline void bpr(RCondition c, bool a, Predict p, Register s1, Label& L);
+
+  // compare and branch
+  inline void cbcond(Condition c, CC cc, Register s1, Register s2, Label& L);
+  inline void cbcond(Condition c, CC cc, Register s1, int simm5, Label& L);
 
  protected: // use MacroAssembler::br instead
 
@@ -1198,8 +1269,6 @@
   inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
 
- public:
-
   // pp 144
 
   inline void br( Condition c, bool a, address d, relocInfo::relocType rt = relocInfo::none );
@@ -1220,6 +1289,8 @@
   inline void call( address d,  relocInfo::relocType rt = relocInfo::runtime_call_type );
   inline void call( Label& L,   relocInfo::relocType rt = relocInfo::runtime_call_type );
 
+ public:
+
   // pp 150
 
   // These instructions compare the contents of s2 with the contents of
@@ -1862,8 +1933,8 @@
   inline void fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void fb( Condition c, bool a, Predict p, Label& L );
 
-  // compares register with zero and branches (V9 and V8 instructions)
-  void br_zero( Condition c, bool a, Predict p, Register s1, Label& L);
+  // compares register with zero (32 bit) and branches (V9 and V8 instructions)
+  void cmp_zero_and_br( Condition c, Register s1, Label& L, bool a = false, Predict p = pn );
   // Compares a pointer register with zero and branches on (not)null.
   // Does a test & branch on 32-bit systems and a register-branch on 64-bit.
   void br_null   ( Register s1, bool a, Predict p, Label& L );
@@ -1875,6 +1946,26 @@
   void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
   void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L);
 
+  //
+  // Compare registers and branch with nop in delay slot or cbcond without delay slot.
+  //
+  // ATTENTION: use these instructions with caution because cbcond instruction
+  //            has very short distance: 512 instructions (2Kbyte).
+
+  // Compare integer (32 bit) values (icc only).
+  void cmp_and_br_short(Register s1, Register s2, Condition c, Predict p, Label& L);
+  void cmp_and_br_short(Register s1, int simm13a, Condition c, Predict p, Label& L);
+  // Platform depending version for pointer compare (icc on !LP64 and xcc on LP64).
+  void cmp_and_brx_short(Register s1, Register s2, Condition c, Predict p, Label& L);
+  void cmp_and_brx_short(Register s1, int simm13a, Condition c, Predict p, Label& L);
+
+  // Short branch version for compares a pointer pwith zero.
+  void br_null_short   ( Register s1, Predict p, Label& L );
+  void br_notnull_short( Register s1, Predict p, Label& L );
+
+  // unconditional short branch
+  void ba_short(Label& L);
+
   inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
 
@@ -1882,8 +1973,8 @@
   inline void brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
   inline void brx( Condition c, bool a, Predict p, Label& L );
 
-  // unconditional short branch
-  inline void ba( bool a, Label& L );
+  // unconditional branch
+  inline void ba( Label& L );
 
   // Branch that tests fp condition codes
   inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
@@ -2167,7 +2258,6 @@
 
   inline void stbool(Register d, const Address& a) { stb(d, a); }
   inline void ldbool(const Address& a, Register d) { ldsb(a, d); }
-  inline void tstbool( Register s ) { tst(s); }
   inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); }
 
   // klass oop manipulations if compressed
@@ -2469,8 +2559,7 @@
                                      Label* L_success,
                                      Label* L_failure,
                                      Label* L_slow_path,
-                RegisterOrConstant super_check_offset = RegisterOrConstant(-1),
-                Register instanceof_hack = noreg);
+                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
 
   // The rest of the type check; must be wired to a corresponding fast path.
   // It does not repeat the fast path logic, so don't use it standalone.
--- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -80,32 +80,36 @@
 inline void Assembler::add(Register s1, int simm13a, Register d, relocInfo::relocType rtype ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rtype ); }
 inline void Assembler::add(Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec ); }
 
-inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt);  has_delay_slot(); }
+inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt);  has_delay_slot(); }
 inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); }
 
-inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
+inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
 inline void Assembler::fb( Condition c, bool a, Label& L ) { fb(c, a, target(L)); }
 
-inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
+inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
 inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { fbp(c, a, cc, p, target(L)); }
 
-inline void Assembler::cb( Condition c, bool a, address d, relocInfo::relocType rt ) { v8_only();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(cb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
+inline void Assembler::cb( Condition c, bool a, address d, relocInfo::relocType rt ) { v8_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(cb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
 inline void Assembler::cb( Condition c, bool a, Label& L ) { cb(c, a, target(L)); }
 
-inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep();   emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
+inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep();  cti();   emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
 inline void Assembler::br( Condition c, bool a, Label& L ) { br(c, a, target(L)); }
 
-inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
+inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
 inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { bp(c, a, cc, p, target(L)); }
 
-inline void Assembler::call( address d,  relocInfo::relocType rt ) { emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt);  has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
+// compare and branch
+inline void Assembler::cbcond(Condition c, CC cc, Register s1, Register s2, Label& L) { cti();  no_cbcond_before();  emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | rs2(s2)); }
+inline void Assembler::cbcond(Condition c, CC cc, Register s1, int simm5, Label& L)   { cti();  no_cbcond_before();  emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | immed(true) | simm(simm5, 5)); }
+
+inline void Assembler::call( address d,  relocInfo::relocType rt ) { cti();  emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt);  has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
 inline void Assembler::call( Label& L,   relocInfo::relocType rt ) { call( target(L), rt); }
 
 inline void Assembler::flush( Register s1, Register s2) { emit_long( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); }
 inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::jmpl( Register s1, Register s2, Register d                          ) { emit_long( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
-inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);  has_delay_slot(); }
+inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti();  emit_long( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
+inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti();  emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);  has_delay_slot(); }
 
 inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d) {
   if (s2.is_register()) ldf(w, s1, s2.as_register(), d);
@@ -240,8 +244,8 @@
 inline void Assembler::prefetch(const Address& a, PrefetchFcn f, int offset) { v9_only(); relocate(a.rspec(offset)); prefetch(a.base(), a.disp() + offset, f); }
 
 
-inline void Assembler::rett( Register s1, Register s2                         ) { emit_long( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
-inline void Assembler::rett( Register s1, int simm13a, relocInfo::relocType rt) { emit_data( op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt);  has_delay_slot(); }
+inline void Assembler::rett( Register s1, Register s2                         ) { cti();  emit_long( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
+inline void Assembler::rett( Register s1, int simm13a, relocInfo::relocType rt) { cti();  emit_data( op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt);  has_delay_slot(); }
 
 inline void Assembler::sethi( int imm22a, Register d, RelocationHolder const& rspec ) { emit_data( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(imm22a), rspec); }
 
@@ -557,8 +561,8 @@
   brx(c, a, p, target(L));
 }
 
-inline void MacroAssembler::ba( bool a, Label& L ) {
-  br(always, a, pt, L);
+inline void MacroAssembler::ba( Label& L ) {
+  br(always, false, pt, L);
 }
 
 // Warning: V9 only functions
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -303,9 +303,7 @@
     assert(_oop_index >= 0, "must have oop index");
     __ load_heap_oop(_obj, java_lang_Class::klass_offset_in_bytes(), G3);
     __ ld_ptr(G3, instanceKlass::init_thread_offset_in_bytes() + sizeof(klassOopDesc), G3);
-    __ cmp(G2_thread, G3);
-    __ br(Assembler::notEqual, false, Assembler::pn, call_patch);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(G2_thread, G3, Assembler::notEqual, Assembler::pn, call_patch);
 
     // load_klass patches may execute the patched code before it's
     // copied back into place so we need to jump back into the main
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -217,9 +217,7 @@
       {
         Label L;
         __ ld_ptr(OSR_buf, slot_offset + 1*BytesPerWord, O7);
-        __ cmp(G0, O7);
-        __ br(Assembler::notEqual, false, Assembler::pt, L);
-        __ delayed()->nop();
+        __ cmp_and_br_short(O7, G0, Assembler::notEqual, Assembler::pt, L);
         __ stop("locked object is NULL");
         __ bind(L);
       }
@@ -2096,10 +2094,10 @@
       __ xor3(O0, -1, tmp);
       __ sub(length, tmp, length);
       __ add(src_pos, tmp, src_pos);
-      __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry());
+      __ cmp_zero_and_br(Assembler::less, O0, *stub->entry());
       __ delayed()->add(dst_pos, tmp, dst_pos);
     } else {
-      __ br_zero(Assembler::less, false, Assembler::pn, O0, *stub->entry());
+      __ cmp_zero_and_br(Assembler::less, O0, *stub->entry());
       __ delayed()->nop();
     }
     __ bind(*stub->continuation());
@@ -2123,22 +2121,19 @@
 
   if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
     // test src_pos register
-    __ tst(src_pos);
-    __ br(Assembler::less, false, Assembler::pn, *stub->entry());
+    __ cmp_zero_and_br(Assembler::less, src_pos, *stub->entry());
     __ delayed()->nop();
   }
 
   if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
     // test dst_pos register
-    __ tst(dst_pos);
-    __ br(Assembler::less, false, Assembler::pn, *stub->entry());
+    __ cmp_zero_and_br(Assembler::less, dst_pos, *stub->entry());
     __ delayed()->nop();
   }
 
   if (flags & LIR_OpArrayCopy::length_positive_check) {
     // make sure length isn't negative
-    __ tst(length);
-    __ br(Assembler::less, false, Assembler::pn, *stub->entry());
+    __ cmp_zero_and_br(Assembler::less, length, *stub->entry());
     __ delayed()->nop();
   }
 
@@ -2261,8 +2256,7 @@
 #ifndef PRODUCT
         if (PrintC1Statistics) {
           Label failed;
-          __ br_notnull(O0, false, Assembler::pn,  failed);
-          __ delayed()->nop();
+          __ br_notnull_short(O0, Assembler::pn, failed);
           __ inc_counter((address)&Runtime1::_arraycopy_checkcast_cnt, G1, G3);
           __ bind(failed);
         }
@@ -2314,9 +2308,7 @@
         __ br(Assembler::notEqual, false, Assembler::pn, halt);
         // load the raw value of the src klass.
         __ delayed()->lduw(src, oopDesc::klass_offset_in_bytes(), tmp2);
-        __ cmp(tmp, tmp2);
-        __ br(Assembler::equal, false, Assembler::pn, known_ok);
-        __ delayed()->nop();
+        __ cmp_and_br_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok);
       } else {
         __ cmp(tmp, tmp2);
         __ br(Assembler::equal, false, Assembler::pn, known_ok);
@@ -2330,9 +2322,7 @@
         __ cmp(tmp, tmp2);
         __ brx(Assembler::notEqual, false, Assembler::pn, halt);
         __ delayed()->ld_ptr(src, oopDesc::klass_offset_in_bytes(), tmp2);
-        __ cmp(tmp, tmp2);
-        __ brx(Assembler::equal, false, Assembler::pn, known_ok);
-        __ delayed()->nop();
+        __ cmp_and_brx_short(tmp, tmp2, Assembler::equal, Assembler::pn, known_ok);
       } else {
         __ cmp(tmp, tmp2);
         __ brx(Assembler::equal, false, Assembler::pn, known_ok);
@@ -2530,15 +2520,13 @@
                           mdo_offset_bias);
     __ ld_ptr(receiver_addr, tmp1);
     __ verify_oop(tmp1);
-    __ cmp(recv, tmp1);
-    __ brx(Assembler::notEqual, false, Assembler::pt, next_test);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(recv, tmp1, Assembler::notEqual, Assembler::pt, next_test);
     Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
                       mdo_offset_bias);
     __ ld_ptr(data_addr, tmp1);
     __ add(tmp1, DataLayout::counter_increment, tmp1);
     __ st_ptr(tmp1, data_addr);
-    __ ba(false, *update_done);
+    __ ba(*update_done);
     __ delayed()->nop();
     __ bind(next_test);
   }
@@ -2549,13 +2537,12 @@
     Address recv_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) -
                       mdo_offset_bias);
     __ ld_ptr(recv_addr, tmp1);
-    __ br_notnull(tmp1, false, Assembler::pt, next_test);
-    __ delayed()->nop();
+    __ br_notnull_short(tmp1, Assembler::pt, next_test);
     __ st_ptr(recv, recv_addr);
     __ set(DataLayout::counter_increment, tmp1);
     __ st_ptr(tmp1, mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) -
               mdo_offset_bias);
-    __ ba(false, *update_done);
+    __ ba(*update_done);
     __ delayed()->nop();
     __ bind(next_test);
   }
@@ -2601,8 +2588,7 @@
     setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
 
     Label not_null;
-    __ br_notnull(obj, false, Assembler::pn, not_null);
-    __ delayed()->nop();
+    __ br_notnull_short(obj, Assembler::pn, not_null);
     Register mdo      = k_RInfo;
     Register data_val = Rtmp1;
     jobject2reg(md->constant_encoding(), mdo);
@@ -2614,7 +2600,7 @@
     __ ldub(flags_addr, data_val);
     __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
     __ stb(data_val, flags_addr);
-    __ ba(false, *obj_is_null);
+    __ ba(*obj_is_null);
     __ delayed()->nop();
     __ bind(not_null);
   } else {
@@ -2682,7 +2668,7 @@
     __ load_klass(obj, recv);
     type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, success);
     // Jump over the failure case
-    __ ba(false, *success);
+    __ ba(*success);
     __ delayed()->nop();
     // Cast failure case
     __ bind(profile_cast_failure);
@@ -2695,10 +2681,10 @@
     __ ld_ptr(data_addr, tmp1);
     __ sub(tmp1, DataLayout::counter_increment, tmp1);
     __ st_ptr(tmp1, data_addr);
-    __ ba(false, *failure);
+    __ ba(*failure);
     __ delayed()->nop();
   }
-  __ ba(false, *success);
+  __ ba(*success);
   __ delayed()->nop();
 }
 
@@ -2728,8 +2714,7 @@
 
     if (op->should_profile()) {
       Label not_null;
-      __ br_notnull(value, false, Assembler::pn, not_null);
-      __ delayed()->nop();
+      __ br_notnull_short(value, Assembler::pn, not_null);
       Register mdo      = k_RInfo;
       Register data_val = Rtmp1;
       jobject2reg(md->constant_encoding(), mdo);
@@ -2741,12 +2726,10 @@
       __ ldub(flags_addr, data_val);
       __ or3(data_val, BitData::null_seen_byte_constant(), data_val);
       __ stb(data_val, flags_addr);
-      __ ba(false, done);
-      __ delayed()->nop();
+      __ ba_short(done);
       __ bind(not_null);
     } else {
-      __ br_null(value, false, Assembler::pn, done);
-      __ delayed()->nop();
+      __ br_null_short(value, Assembler::pn, done);
     }
     add_debug_info_for_null_check_here(op->info_for_exception());
     __ load_klass(array, k_RInfo);
@@ -2777,8 +2760,7 @@
       }
       __ load_klass(value, recv);
       type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &done);
-      __ ba(false, done);
-      __ delayed()->nop();
+      __ ba_short(done);
       // Cast failure case
       __ bind(profile_cast_failure);
       jobject2reg(md->constant_encoding(), mdo);
@@ -2790,7 +2772,7 @@
       __ ld_ptr(data_addr, tmp1);
       __ sub(tmp1, DataLayout::counter_increment, tmp1);
       __ st_ptr(tmp1, data_addr);
-      __ ba(false, *stub->entry());
+      __ ba(*stub->entry());
       __ delayed()->nop();
     }
     __ bind(done);
@@ -2808,8 +2790,7 @@
     emit_typecheck_helper(op, &success, &failure, &failure);
     __ bind(failure);
     __ set(0, dst);
-    __ ba(false, done);
-    __ delayed()->nop();
+    __ ba_short(done);
     __ bind(success);
     __ set(1, dst);
     __ bind(done);
--- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -41,9 +41,7 @@
   // Note: needs more testing of out-of-line vs. inline slow case
   verify_oop(receiver);
   load_klass(receiver, temp_reg);
-  cmp(temp_reg, iCache);
-  brx(Assembler::equal, true, Assembler::pt, L);
-  delayed()->nop();
+  cmp_and_brx_short(temp_reg, iCache, Assembler::equal, Assembler::pt, L);
   AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
   jump_to(ic_miss, temp_reg);
   delayed()->nop();
@@ -142,8 +140,7 @@
   }
   // Test first it it is a fast recursive unlock
   ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
-  br_null(Rmark, false, Assembler::pt, done);
-  delayed()->nop();
+  br_null_short(Rmark, Assembler::pt, done);
   if (!UseBiasedLocking) {
     // load object
     ld_ptr(Rbox, BasicObjectLock::obj_offset_in_bytes(), Roop);
@@ -231,7 +228,7 @@
   if (!is_simm13(obj_size * wordSize)) {
     // would need to use extra register to load
     // object size => go the slow case for now
-    br(Assembler::always, false, Assembler::pt, slow_case);
+    ba(slow_case);
     delayed()->nop();
     return;
   }
@@ -257,12 +254,10 @@
     Label ok;
     ld(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), t1);
     if (var_size_in_bytes != noreg) {
-      cmp(t1, var_size_in_bytes);
+      cmp_and_brx_short(t1, var_size_in_bytes, Assembler::equal, Assembler::pt, ok);
     } else {
-      cmp(t1, con_size_in_bytes);
+      cmp_and_brx_short(t1, con_size_in_bytes, Assembler::equal, Assembler::pt, ok);
     }
-    brx(Assembler::equal, false, Assembler::pt, ok);
-    delayed()->nop();
     stop("bad size in initialize_object");
     should_not_reach_here();
 
@@ -387,8 +382,7 @@
 
 void C1_MacroAssembler::verify_not_null_oop(Register r) {
   Label not_null;
-  br_notnull(r, false, Assembler::pt, not_null);
-  delayed()->nop();
+  br_notnull_short(r, Assembler::pt, not_null);
   stop("non-null oop required");
   bind(not_null);
   if (!VerifyOops) return;
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -71,8 +71,7 @@
   { Label L;
     Address exception_addr(G2_thread, Thread::pending_exception_offset());
     ld_ptr(exception_addr, Gtemp);
-    br_null(Gtemp, false, pt, L);
-    delayed()->nop();
+    br_null_short(Gtemp, pt, L);
     Address vm_result_addr(G2_thread, JavaThread::vm_result_offset());
     st_ptr(G0, vm_result_addr);
     Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset());
@@ -333,9 +332,7 @@
   assert(deopt_blob != NULL, "deoptimization blob must have been created");
 
   Label no_deopt;
-  __ tst(O0);
-  __ brx(Assembler::equal, false, Assembler::pt, no_deopt);
-  __ delayed()->nop();
+  __ br_null_short(O0, Assembler::pt, no_deopt);
 
   // return to the deoptimization handler entry for unpacking and rexecute
   // if we simply returned the we'd deopt as if any call we patched had just
@@ -402,18 +399,15 @@
           if (id == fast_new_instance_init_check_id) {
             // make sure the klass is initialized
             __ ld(G5_klass, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc), G3_t1);
-            __ cmp(G3_t1, instanceKlass::fully_initialized);
-            __ br(Assembler::notEqual, false, Assembler::pn, slow_path);
-            __ delayed()->nop();
+            __ cmp_and_br_short(G3_t1, instanceKlass::fully_initialized, Assembler::notEqual, Assembler::pn, slow_path);
           }
 #ifdef ASSERT
           // assert object can be fast path allocated
           {
             Label ok, not_ok;
           __ ld(G5_klass, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc), G1_obj_size);
-          __ cmp(G1_obj_size, 0);  // make sure it's an instance (LH > 0)
-          __ br(Assembler::lessEqual, false, Assembler::pn, not_ok);
-          __ delayed()->nop();
+          // make sure it's an instance (LH > 0)
+          __ cmp_and_br_short(G1_obj_size, 0, Assembler::lessEqual, Assembler::pn, not_ok);
           __ btst(Klass::_lh_instance_slow_path_bit, G1_obj_size);
           __ br(Assembler::zero, false, Assembler::pn, ok);
           __ delayed()->nop();
@@ -501,9 +495,7 @@
           int tag = ((id == new_type_array_id)
                      ? Klass::_lh_array_tag_type_value
                      : Klass::_lh_array_tag_obj_value);
-          __ cmp(G3_t1, tag);
-          __ brx(Assembler::equal, false, Assembler::pt, ok);
-          __ delayed()->nop();
+          __ cmp_and_brx_short(G3_t1, tag, Assembler::equal, Assembler::pt, ok);
           __ stop("assert(is an array klass)");
           __ should_not_reach_here();
           __ bind(ok);
@@ -519,9 +511,7 @@
 
           // check that array length is small enough for fast path
           __ set(C1_MacroAssembler::max_array_allocation_length, G3_t1);
-          __ cmp(G4_length, G3_t1);
-          __ br(Assembler::greaterUnsigned, false, Assembler::pn, slow_path);
-          __ delayed()->nop();
+          __ cmp_and_br_short(G4_length, G3_t1, Assembler::greaterUnsigned, Assembler::pn, slow_path);
 
           // if we got here then the TLAB allocation failed, so try
           // refilling the TLAB or allocating directly from eden.
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -544,7 +544,7 @@
 
     // Generate regular method entry
     __ bind(slow_path);
-    __ ba(false, fast_accessor_slow_entry_path);
+    __ ba(fast_accessor_slow_entry_path);
     __ delayed()->nop();
     return entry;
   }
@@ -719,8 +719,7 @@
 
     Address exception_addr(G2_thread, 0, in_bytes(Thread::pending_exception_offset()));
     __ ld_ptr(exception_addr, G3_scratch);
-    __ br_notnull(G3_scratch, false, Assembler::pn, pending_exception_present);
-    __ delayed()->nop();
+    __ br_notnull_short(G3_scratch, Assembler::pn, pending_exception_present);
     __ ld_ptr(Address(G5_method, 0, in_bytes(methodOopDesc::signature_handler_offset())), G3_scratch);
     __ bind(L);
   }
@@ -1292,7 +1291,7 @@
   deopt_frame_manager_return_atos  = __ pc();
 
   // O0/O1 live
-  __ ba(false, return_from_deopt_common);
+  __ ba(return_from_deopt_common);
   __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_OBJECT), L3_scratch);    // Result stub address array index
 
 
@@ -1300,14 +1299,14 @@
   deopt_frame_manager_return_btos  = __ pc();
 
   // O0/O1 live
-  __ ba(false, return_from_deopt_common);
+  __ ba(return_from_deopt_common);
   __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_BOOLEAN), L3_scratch);    // Result stub address array index
 
   // deopt needs to jump to here to enter the interpreter (return a result)
   deopt_frame_manager_return_itos  = __ pc();
 
   // O0/O1 live
-  __ ba(false, return_from_deopt_common);
+  __ ba(return_from_deopt_common);
   __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_INT), L3_scratch);    // Result stub address array index
 
   // deopt needs to jump to here to enter the interpreter (return a result)
@@ -1327,21 +1326,21 @@
   __ srlx(G1,32,O0);
 #endif /* !_LP64 && COMPILER2 */
   // O0/O1 live
-  __ ba(false, return_from_deopt_common);
+  __ ba(return_from_deopt_common);
   __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_LONG), L3_scratch);    // Result stub address array index
 
   // deopt needs to jump to here to enter the interpreter (return a result)
 
   deopt_frame_manager_return_ftos  = __ pc();
   // O0/O1 live
-  __ ba(false, return_from_deopt_common);
+  __ ba(return_from_deopt_common);
   __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_FLOAT), L3_scratch);    // Result stub address array index
 
   // deopt needs to jump to here to enter the interpreter (return a result)
   deopt_frame_manager_return_dtos  = __ pc();
 
   // O0/O1 live
-  __ ba(false, return_from_deopt_common);
+  __ ba(return_from_deopt_common);
   __ delayed()->set(AbstractInterpreter::BasicType_as_index(T_DOUBLE), L3_scratch);    // Result stub address array index
 
   // deopt needs to jump to here to enter the interpreter (return a result)
@@ -1398,7 +1397,7 @@
   __ ld_ptr(STATE(_stack), L1_scratch);                // Get current stack top
   __ sub(L1_scratch, entry_size, L1_scratch);
   __ st_ptr(L1_scratch, STATE(_stack));
-  __ ba(false, entry);
+  __ ba(entry);
   __ delayed()->add(L1_scratch, wordSize, L1_scratch);        // first real entry (undo prepush)
 
   // 2. move expression stack
@@ -1651,7 +1650,7 @@
 
   __ set((int)BytecodeInterpreter::got_monitors, L1_scratch);
   VALIDATE_STATE(G3_scratch, 5);
-  __ ba(false, call_interpreter);
+  __ ba(call_interpreter);
   __ delayed()->st(L1_scratch, STATE(_msg));
 
   // uncommon trap needs to jump to here to enter the interpreter (re-execute current bytecode)
@@ -1659,7 +1658,7 @@
 
   // QQQ what message do we send
 
-  __ ba(false, call_interpreter);
+  __ ba(call_interpreter);
   __ delayed()->ld_ptr(STATE(_frame_bottom), SP);                  // restore to full stack frame
 
   //=============================================================================
@@ -1675,7 +1674,7 @@
   // ready to resume the interpreter
 
   __ set((int)BytecodeInterpreter::deopt_resume, L1_scratch);
-  __ ba(false, call_interpreter);
+  __ ba(call_interpreter);
   __ delayed()->st(L1_scratch, STATE(_msg));
 
   // Current frame has caught an exception we need to dispatch to the
@@ -1763,7 +1762,7 @@
 
   // L1_scratch points to top of stack (prepushed)
 
-  __ ba(false, resume_interpreter);
+  __ ba(resume_interpreter);
   __ delayed()->mov(L1_scratch, O1);
 
   // An exception is being caught on return to a vanilla interpreter frame.
@@ -1773,7 +1772,7 @@
 
   __ ld_ptr(STATE(_frame_bottom), SP);                             // restore to full stack frame
   __ ld_ptr(STATE(_stack_base), O1);                               // empty java expression stack
-  __ ba(false, resume_interpreter);
+  __ ba(resume_interpreter);
   __ delayed()->sub(O1, wordSize, O1);                             // account for prepush
 
   // Return from interpreted method we return result appropriate to the caller (i.e. "recursive"
@@ -1852,7 +1851,7 @@
 
   __ set((int)BytecodeInterpreter::method_resume, L1_scratch);
   __ st(L1_scratch, STATE(_msg));
-  __ ba(false, call_interpreter_2);
+  __ ba(call_interpreter_2);
   __ delayed()->st_ptr(O1, STATE(_stack));
 
 
@@ -1867,8 +1866,8 @@
     __ cmp(Gtmp1, O7);                                                // returning to interpreter?
     __ brx(Assembler::equal, true, Assembler::pt, re_dispatch);       // yep
     __ delayed()->nop();
-    __ ba(false, re_dispatch);
-    __ delayed()->mov(G0, prevState);                                   // initial entry
+    __ ba(re_dispatch);
+    __ delayed()->mov(G0, prevState);                                 // initial entry
 
   }
 
@@ -2031,8 +2030,8 @@
   __ brx(Assembler::zero, false, Assembler::pt, unwind_and_forward);
   __ delayed()->nop();
 
-  __ ld_ptr(STATE(_locals), O1);                                   // get result of popping callee's args
-  __ ba(false, unwind_recursive_activation);
+  __ ld_ptr(STATE(_locals), O1); // get result of popping callee's args
+  __ ba(unwind_recursive_activation);
   __ delayed()->nop();
 
   interpreter_frame_manager = entry_point;
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -236,17 +236,13 @@
     Label L;
     Register thr_state = G3_scratch;
     ld_ptr(G2_thread, JavaThread::jvmti_thread_state_offset(), thr_state);
-    tst(thr_state);
-    br(zero, false, pt, L); // if (thread->jvmti_thread_state() == NULL) exit;
-    delayed()->nop();
+    br_null_short(thr_state, pt, L); // if (thread->jvmti_thread_state() == NULL) exit;
 
     // Initiate earlyret handling only if it is not already being processed.
     // If the flag has the earlyret_processing bit set, it means that this code
     // is called *during* earlyret handling - we don't want to reenter.
     ld(thr_state, JvmtiThreadState::earlyret_state_offset(), G4_scratch);
-    cmp(G4_scratch, JvmtiThreadState::earlyret_pending);
-    br(Assembler::notEqual, false, pt, L);
-    delayed()->nop();
+    cmp_and_br_short(G4_scratch, JvmtiThreadState::earlyret_pending, Assembler::notEqual, pt, L);
 
     // Call Interpreter::remove_activation_early_entry() to get the address of the
     // same-named entrypoint in the generated interpreter code
@@ -566,9 +562,7 @@
 #ifdef _LP64
   sub(Rtemp, STACK_BIAS, Rtemp);  // Bias Rtemp before cmp to FP
 #endif
-  cmp(Rtemp, FP);
-  brx(Assembler::greaterUnsigned, false, Assembler::pn, Bad);
-  delayed()->nop();
+  cmp_and_brx_short(Rtemp, FP, Assembler::greaterUnsigned, Assembler::pn, Bad);
 
   // Saved SP must not be ridiculously below current SP.
   size_t maxstack = MAX2(JavaThread::stack_size_at_create(), (size_t) 4*K*K);
@@ -577,12 +571,9 @@
 #ifdef _LP64
   add(Rtemp, STACK_BIAS, Rtemp);  // Unbias Rtemp before cmp to Rsp
 #endif
-  cmp(Rsp, Rtemp);
-  brx(Assembler::lessUnsigned, false, Assembler::pn, Bad);
-  delayed()->nop();
-
-  br(Assembler::always, false, Assembler::pn, OK);
-  delayed()->nop();
+  cmp_and_brx_short(Rsp, Rtemp, Assembler::lessUnsigned, Assembler::pn, Bad);
+
+  ba_short(OK);
 
   bind(Bad);
   stop("on return to interpreted call, restored SP is corrupted");
@@ -630,8 +621,7 @@
 
     const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset());
     ld(interp_only, scratch);
-    tst(scratch);
-    br(Assembler::notZero, true, Assembler::pn, skip_compiled_code);
+    cmp_zero_and_br(Assembler::notZero, scratch, skip_compiled_code, true, Assembler::pn);
     delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target);
     bind(skip_compiled_code);
   }
@@ -641,8 +631,7 @@
 #ifdef ASSERT
   {
     Label ok;
-    br_notnull(target, false, Assembler::pt, ok);
-    delayed()->nop();
+    br_notnull_short(target, Assembler::pt, ok);
     stop("null entry point");
     bind(ok);
   }
@@ -769,6 +758,20 @@
 }
 
 
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register temp,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, temp, bcp_offset, index_size);
+  ld_ptr(cache, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset(), bytecode);
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  srl( bytecode, shift_count, bytecode);
+  and3(bytecode,        0xFF, bytecode);
+}
+
+
 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp,
                                                                int bcp_offset, size_t index_size) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
@@ -982,8 +985,7 @@
 
   // Don't unlock anything if the _do_not_unlock_if_synchronized flag
   // is set.
-  tstbool(G1_scratch);
-  br(Assembler::notZero, false, pn, no_unlock);
+  cmp_zero_and_br(Assembler::notZero, G1_scratch, no_unlock);
   delayed()->nop();
 
   // BasicObjectLock will be first in list, since this is a synchronized method. However, need
@@ -997,8 +999,7 @@
   add( top_most_monitor(), O1 );
 
   ld_ptr(O1, BasicObjectLock::obj_offset_in_bytes(), G3_scratch);
-  br_notnull(G3_scratch, false, pt, unlock);
-  delayed()->nop();
+  br_notnull_short(G3_scratch, pt, unlock);
 
   if (throw_monitor_exception) {
     // Entry already unlocked need to throw an exception
@@ -1011,8 +1012,7 @@
     if (install_monitor_exception) {
       MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
     }
-    ba(false, unlocked);
-    delayed()->nop();
+    ba_short(unlocked);
   }
 
   bind(unlock);
@@ -1037,15 +1037,13 @@
     add(top_most_monitor(), Rmptr, delta);
     { Label L;
       // ensure that Rmptr starts out above (or at) Rlimit
-      cmp(Rmptr, Rlimit);
-      brx(Assembler::greaterEqualUnsigned, false, pn, L);
-      delayed()->nop();
+      cmp_and_brx_short(Rmptr, Rlimit, Assembler::greaterEqualUnsigned, pn, L);
       stop("monitor stack has negative size");
       bind(L);
     }
     #endif
     bind(restart);
-    ba(false, entry);
+    ba(entry);
     delayed()->
     add(top_most_monitor(), Rmptr, delta);      // points to current entry, starting with bottom-most entry
 
@@ -1061,8 +1059,7 @@
       if (install_monitor_exception) {
         MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception));
       }
-      ba(false, restart);
-      delayed()->nop();
+      ba_short(restart);
     }
 
     bind(loop);
@@ -1073,9 +1070,7 @@
     #ifdef ASSERT
     { Label L;
       // ensure that Rmptr has not somehow stepped below Rlimit
-      cmp(Rmptr, Rlimit);
-      brx(Assembler::greaterEqualUnsigned, false, pn, L);
-      delayed()->nop();
+      cmp_and_brx_short(Rmptr, Rlimit, Assembler::greaterEqualUnsigned, pn, L);
       stop("ran off the end of the monitor stack");
       bind(L);
     }
@@ -1196,9 +1191,7 @@
       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
 
     // if the compare and exchange succeeded we are done (we saw an unlocked object)
-    cmp(mark_reg, temp_reg);
-    brx(Assembler::equal, true, Assembler::pt, done);
-    delayed()->nop();
+    cmp_and_brx_short(mark_reg, temp_reg, Assembler::equal, Assembler::pt, done);
 
     // We did not see an unlocked object so try the fast recursive case
 
@@ -1324,13 +1317,7 @@
 
 void InterpreterMacroAssembler::test_method_data_pointer(Label& zero_continue) {
   assert(ProfileInterpreter, "must be profiling interpreter");
-#ifdef _LP64
-  bpr(Assembler::rc_z, false, Assembler::pn, ImethodDataPtr, zero_continue);
-#else
-  tst(ImethodDataPtr);
-  br(Assembler::zero, false, Assembler::pn, zero_continue);
-#endif
-  delayed()->nop();
+  br_null_short(ImethodDataPtr, Assembler::pn, zero_continue);
 }
 
 void InterpreterMacroAssembler::verify_method_data_pointer() {
@@ -1376,31 +1363,18 @@
   Label done;
 
   // if no method data exists, and the counter is high enough, make one
-#ifdef _LP64
-  bpr(Assembler::rc_nz, false, Assembler::pn, ImethodDataPtr, done);
-#else
-  tst(ImethodDataPtr);
-  br(Assembler::notZero, false, Assembler::pn, done);
-#endif
+  br_notnull_short(ImethodDataPtr, Assembler::pn, done);
 
   // Test to see if we should create a method data oop
   AddressLiteral profile_limit((address) &InvocationCounter::InterpreterProfileLimit);
-#ifdef _LP64
-  delayed()->nop();
   sethi(profile_limit, Rtmp);
-#else
-  delayed()->sethi(profile_limit, Rtmp);
-#endif
   ld(Rtmp, profile_limit.low10(), Rtmp);
-  cmp(invocation_count, Rtmp);
-  br(Assembler::lessUnsigned, false, Assembler::pn, profile_continue);
-  delayed()->nop();
+  cmp_and_br_short(invocation_count, Rtmp, Assembler::lessUnsigned, Assembler::pn, profile_continue);
 
   // Build it now.
   call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
   set_method_data_pointer_for_bcp();
-  ba(false, profile_continue);
-  delayed()->nop();
+  ba_short(profile_continue);
   bind(done);
 }
 
@@ -1632,13 +1606,10 @@
     Label skip_receiver_profile;
     if (receiver_can_be_null) {
       Label not_null;
-      tst(receiver);
-      brx(Assembler::notZero, false, Assembler::pt, not_null);
-      delayed()->nop();
+      br_notnull_short(receiver, Assembler::pt, not_null);
       // We are making a call.  Increment the count for null receiver.
       increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch);
-      ba(false, skip_receiver_profile);
-      delayed()->nop();
+      ba_short(skip_receiver_profile);
       bind(not_null);
     }
 
@@ -1682,8 +1653,7 @@
     // The receiver is receiver[n].  Increment count[n].
     int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
     increment_mdp_data_at(count_offset, scratch);
-    ba(false, done);
-    delayed()->nop();
+    ba_short(done);
     bind(next_test);
 
     if (test_for_null_also) {
@@ -1697,8 +1667,7 @@
           // Receiver did not match any saved receiver and there is no empty row for it.
           // Increment total counter to indicate polymorphic case.
           increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch);
-          ba(false, done);
-          delayed()->nop();
+          ba_short(done);
           bind(found_null);
         } else {
           brx(Assembler::notZero, false, Assembler::pt, done);
@@ -1729,8 +1698,7 @@
   mov(DataLayout::counter_increment, scratch);
   set_mdp_data_at(count_offset, scratch);
   if (start_row > 0) {
-    ba(false, done);
-    delayed()->nop();
+    ba_short(done);
   }
 }
 
@@ -1772,8 +1740,7 @@
 
       // The method data pointer needs to be updated to reflect the new target.
       update_mdp_by_offset(in_bytes(RetData::bci_displacement_offset(row)), scratch);
-      ba(false, profile_continue);
-      delayed()->nop();
+      ba_short(profile_continue);
       bind(next_test);
     }
 
@@ -1922,8 +1889,8 @@
 
     // untested("monitor stack expansion");
     compute_stack_base(Rtemp);
-    ba( false, start_copying );
-    delayed()->cmp( Rtemp, Rlimit); // done? duplicated below
+    ba(start_copying);
+    delayed()->cmp(Rtemp, Rlimit); // done? duplicated below
 
     // note: must copy from low memory upwards
     // On entry to loop,
@@ -2010,9 +1977,7 @@
   // untested("reg area corruption");
   add(Rindex, offset, Rscratch);
   add(Rlimit, 64 + STACK_BIAS, Rscratch1);
-  cmp(Rscratch, Rscratch1);
-  brx(Assembler::greaterEqualUnsigned, false, pn, L);
-  delayed()->nop();
+  cmp_and_brx_short(Rscratch, Rscratch1, Assembler::greaterEqualUnsigned, pn, L);
   stop("regsave area is being clobbered");
   bind(L);
 }
@@ -2174,9 +2139,7 @@
 
   AddressLiteral limit(&InvocationCounter::InterpreterBackwardBranchLimit);
   load_contents(limit, Rtmp);
-  cmp(backedge_count, Rtmp);
-  br(Assembler::lessUnsigned, false, Assembler::pt, did_not_overflow);
-  delayed()->nop();
+  cmp_and_br_short(backedge_count, Rtmp, Assembler::lessUnsigned, Assembler::pt, did_not_overflow);
 
   // When ProfileInterpreter is on, the backedge_count comes from the
   // methodDataOop, which value does not get reset on the call to
@@ -2196,15 +2159,11 @@
 
   // Was an OSR adapter generated?
   // O0 = osr nmethod
-  tst(O0);
-  brx(Assembler::zero, false, Assembler::pn, overflow_with_error);
-  delayed()->nop();
+  br_null_short(O0, Assembler::pn, overflow_with_error);
 
   // Has the nmethod been invalidated already?
   ld(O0, nmethod::entry_bci_offset(), O2);
-  cmp(O2, InvalidOSREntryBci);
-  br(Assembler::equal, false, Assembler::pn, overflow_with_error);
-  delayed()->nop();
+  cmp_and_br_short(O2, InvalidOSREntryBci, Assembler::equal, Assembler::pn, overflow_with_error);
 
   // migrate the interpreter frame off of the stack
 
@@ -2270,8 +2229,7 @@
   mov(reg, Rtmp);
   const int log2_bytecode_size_limit = 16;
   srl(Rtmp, log2_bytecode_size_limit, Rtmp);
-  br_notnull( Rtmp, false, pt, test );
-  delayed()->nop();
+  br_notnull_short( Rtmp, pt, test );
 
   // %%% should use call_VM_leaf here?
   save_frame_and_mov(0, Lmethod, O0, reg, O1);
@@ -2320,9 +2278,7 @@
     Register temp_reg = O5;
     const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset());
     ld(interp_only, temp_reg);
-    tst(temp_reg);
-    br(zero, false, pt, L);
-    delayed()->nop();
+    cmp_and_br_short(temp_reg, 0, equal, pt, L);
     call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry));
     bind(L);
   }
@@ -2372,9 +2328,7 @@
     Register temp_reg = O5;
     const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset());
     ld(interp_only, temp_reg);
-    tst(temp_reg);
-    br(zero, false, pt, L);
-    delayed()->nop();
+    cmp_and_br_short(temp_reg, 0, equal, pt, L);
 
     // Note: frame::interpreter_frame_result has a dependency on how the
     // method result is saved across the call to post_method_exit. For
--- a/src/cpu/sparc/vm/interp_masm_sparc.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -189,6 +189,7 @@
                                   setCCOrNot should_set_CC = dont_set_CC );
 
   void get_cache_and_index_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register temp, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
   void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
   void get_cache_index_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
 
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -191,22 +191,19 @@
     // Optimization, see if there are any more args and get out prior to checking
     // all 16 float registers.  My guess is that this is rare.
     // If is_register is false, then we are done the first six integer args.
-      __ tst(G4_scratch);
-      __ brx(Assembler::zero, false, Assembler::pt, done);
-      __ delayed()->nop();
-
+      __ br_null_short(G4_scratch, Assembler::pt, done);
     }
-    __ ba(false, NextArg);
+    __ ba(NextArg);
     __ delayed()->srl( G4_scratch, 2, G4_scratch );
 
     __ bind(LoadFloatArg);
     __ ldf( FloatRegisterImpl::S, a, ldarg.as_float_register(), 4);
-    __ ba(false, NextArg);
+    __ ba(NextArg);
     __ delayed()->srl( G4_scratch, 2, G4_scratch );
 
     __ bind(LoadDoubleArg);
     __ ldf( FloatRegisterImpl::D, a, ldarg.as_double_register() );
-    __ ba(false, NextArg);
+    __ ba(NextArg);
     __ delayed()->srl( G4_scratch, 2, G4_scratch );
 
     __ bind(NextArg);
@@ -234,8 +231,7 @@
   __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), O2, O2, true);
   // returns verified_entry_point or NULL
   // we ignore it in any case
-  __ ba(false, Lcontinue);
-  __ delayed()->nop();
+  __ ba_short(Lcontinue);
 
 }
 
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -287,9 +287,7 @@
   BLOCK_COMMENT("verify_clean {");
   // Magic numbers must check out:
   __ set((int32_t) MAGIC_NUMBER_1, O7_temp);
-  __ cmp(O7_temp, L0_magic_number_1);
-  __ br(Assembler::equal, false, Assembler::pt, L_ok_1);
-  __ delayed()->nop();
+  __ cmp_and_br_short(O7_temp, L0_magic_number_1, Assembler::equal, Assembler::pt, L_ok_1);
   __ stop("damaged ricochet frame: MAGIC_NUMBER_1 not found");
 
   __ BIND(L_ok_1);
@@ -301,9 +299,7 @@
 #else
   Register FP_temp = FP;
 #endif
-  __ cmp(L4_saved_args_base, FP_temp);
-  __ br(Assembler::greaterEqualUnsigned, false, Assembler::pt, L_ok_2);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(L4_saved_args_base, FP_temp, Assembler::greaterEqualUnsigned, Assembler::pt, L_ok_2);
   __ stop("damaged ricochet frame: L4 < FP");
 
   __ BIND(L_ok_2);
@@ -316,15 +312,11 @@
 
   __ BIND(L_ok_3);
   extract_conversion_dest_type(_masm, L5_conversion, O7_temp);
-  __ cmp(O7_temp, T_VOID);
-  __ br(Assembler::equal, false, Assembler::pt, L_ok_4);
-  __ delayed()->nop();
+  __ cmp_and_br_short(O7_temp, T_VOID, Assembler::equal, Assembler::pt, L_ok_4);
   extract_conversion_vminfo(_masm, L5_conversion, O5_temp);
   __ ld_ptr(L4_saved_args_base, __ argument_offset(O5_temp, O5_temp), O7_temp);
   assert(__ is_simm13(RETURN_VALUE_PLACEHOLDER), "must be simm13");
-  __ cmp(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER);
-  __ brx(Assembler::equal, false, Assembler::pt, L_ok_4);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER, Assembler::equal, Assembler::pt, L_ok_4);
   __ stop("damaged ricochet frame: RETURN_VALUE_PLACEHOLDER not found");
   __ BIND(L_ok_4);
   BLOCK_COMMENT("} verify_clean");
@@ -363,9 +355,7 @@
   if (VerifyMethodHandles) {
     Label L_ok, L_bad;
     int32_t stack_move_limit = 0x0800;  // extra-large
-    __ cmp(stack_move_reg, stack_move_limit);
-    __ br(Assembler::greaterEqual, false, Assembler::pn, L_bad);
-    __ delayed()->nop();
+    __ cmp_and_br_short(stack_move_reg, stack_move_limit, Assembler::greaterEqual, Assembler::pn, L_bad);
     __ cmp(stack_move_reg, -stack_move_limit);
     __ br(Assembler::greater, false, Assembler::pt, L_ok);
     __ delayed()->nop();
@@ -401,13 +391,9 @@
   // Verify that argslot lies within (Gargs, FP].
   Label L_ok, L_bad;
   BLOCK_COMMENT("verify_argslot {");
+  __ cmp_and_brx_short(Gargs, argslot_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
   __ add(FP, STACK_BIAS, temp_reg);  // STACK_BIAS is zero on !_LP64
-  __ cmp(argslot_reg, temp_reg);
-  __ brx(Assembler::greaterUnsigned, false, Assembler::pn, L_bad);
-  __ delayed()->nop();
-  __ cmp(Gargs, argslot_reg);
-  __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(argslot_reg, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
   __ BIND(L_bad);
   __ stop(error_message);
   __ BIND(L_ok);
@@ -434,14 +420,10 @@
   }
   __ add(arg_slot_base_reg, __ argument_offset(arg_slots, temp_reg), temp_reg);
   __ add(FP, STACK_BIAS, temp2_reg);  // STACK_BIAS is zero on !_LP64
-  __ cmp(temp_reg, temp2_reg);
-  __ brx(Assembler::greaterUnsigned, false, Assembler::pn, L_bad);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
   // Gargs points to the first word so adjust by BytesPerWord
   __ add(arg_slot_base_reg, BytesPerWord, temp_reg);
-  __ cmp(Gargs, temp_reg);
-  __ brx(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(Gargs, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
   __ BIND(L_bad);
   __ stop(error_message);
   __ BIND(L_ok);
@@ -502,21 +484,16 @@
   Label L_ok, L_bad;
   BLOCK_COMMENT("verify_klass {");
   __ verify_oop(obj_reg);
-  __ br_null(obj_reg, false, Assembler::pn, L_bad);
-  __ delayed()->nop();
+  __ br_null_short(obj_reg, Assembler::pn, L_bad);
   __ load_klass(obj_reg, temp_reg);
   __ set(ExternalAddress(klass_addr), temp2_reg);
   __ ld_ptr(Address(temp2_reg, 0), temp2_reg);
-  __ cmp(temp_reg, temp2_reg);
-  __ brx(Assembler::equal, false, Assembler::pt, L_ok);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok);
   intptr_t super_check_offset = klass->super_check_offset();
   __ ld_ptr(Address(temp_reg, super_check_offset), temp_reg);
   __ set(ExternalAddress(klass_addr), temp2_reg);
   __ ld_ptr(Address(temp2_reg, 0), temp2_reg);
-  __ cmp(temp_reg, temp2_reg);
-  __ brx(Assembler::equal, false, Assembler::pt, L_ok);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok);
   __ BIND(L_bad);
   __ stop(error_message);
   __ BIND(L_ok);
@@ -671,9 +648,7 @@
 #ifdef ASSERT
     {
       Label L_ok;
-      __ cmp(arg_slots.as_register(), 0);
-      __ br(Assembler::greaterEqual, false, Assembler::pt, L_ok);
-      __ delayed()->nop();
+      __ cmp_and_br_short(arg_slots.as_register(), 0, Assembler::greaterEqual, Assembler::pt, L_ok);
       __ stop("negative arg_slots");
       __ bind(L_ok);
     }
@@ -748,9 +723,7 @@
     __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
     __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
     __ add(temp_reg, wordSize, temp_reg);
-    __ cmp(temp_reg, argslot_reg);
-    __ brx(Assembler::lessUnsigned, false, Assembler::pt, loop);
-    __ delayed()->nop();  // FILLME
+    __ cmp_and_brx_short(temp_reg, argslot_reg, Assembler::lessUnsigned, Assembler::pt, loop);
   }
 
   // Now move the argslot down, to point to the opened-up space.
@@ -797,9 +770,7 @@
     __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
     __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
     __ sub(temp_reg, wordSize, temp_reg);
-    __ cmp(temp_reg, Gargs);
-    __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, L_loop);
-    __ delayed()->nop();  // FILLME
+    __ cmp_and_brx_short(temp_reg, Gargs, Assembler::greaterEqualUnsigned, Assembler::pt, L_loop);
   }
 
   // And adjust the argslot address to point at the deletion point.
@@ -848,8 +819,7 @@
     __ delayed()->nop();
     __ ld_ptr(          Address(argslot_reg, 0), temp_reg);
     __ st_ptr(temp_reg, Address(Gargs,       0));
-    __ ba(false, L_break);
-    __ delayed()->nop();  // FILLME
+    __ ba_short(L_break);
     __ BIND(L_plural);
 
     // Loop for 2 or more:
@@ -863,9 +833,7 @@
     __ sub(Gargs,   wordSize, Gargs  );
     __ ld_ptr(           Address(top_reg, 0), temp2_reg);
     __ st_ptr(temp2_reg, Address(Gargs,   0));
-    __ cmp(top_reg, argslot_reg);
-    __ brx(Assembler::greaterUnsigned, false, Assembler::pt, L_loop);
-    __ delayed()->nop();  // FILLME
+    __ cmp_and_brx_short(top_reg, argslot_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
     __ BIND(L_break);
   }
   BLOCK_COMMENT("} push_arg_slots");
@@ -897,17 +865,13 @@
       __ br(Assembler::lessEqual, false, Assembler::pn, L_bad);
       __ delayed()->nop();
     }
-    __ cmp(bottom_reg, top_reg);
-    __ brx(Assembler::lessUnsigned, false, Assembler::pt, L_ok);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
     __ BIND(L_bad);
     __ stop("valid bounds (copy up)");
     __ BIND(L_ok);
   }
 #endif
-  __ cmp(bottom_reg, top_reg);
-  __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pn, L_break);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
   // work top down to bottom, copying contiguous data upwards
   // In pseudo-code:
   //   while (--top >= bottom) *(top + distance) = *(top + 0);
@@ -916,9 +880,7 @@
   __ sub(top_reg, wordSize, top_reg);
   __ ld_ptr(           Address(top_reg, 0     ), temp2_reg);
   __ st_ptr(temp2_reg, Address(top_reg, offset)           );
-  __ cmp(top_reg, bottom_reg);
-  __ brx(Assembler::greaterUnsigned, false, Assembler::pt, L_loop);
-  __ delayed()->nop();  // FILLME
+  __ cmp_and_brx_short(top_reg, bottom_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
   assert(Interpreter::stackElementSize == wordSize, "else change loop");
   __ BIND(L_break);
   BLOCK_COMMENT("} move_arg_slots_up");
@@ -951,17 +913,13 @@
       __ br(Assembler::greaterEqual, false, Assembler::pn, L_bad);
       __ delayed()->nop();
     }
-    __ cmp(bottom_reg, top_reg);
-    __ brx(Assembler::lessUnsigned, false, Assembler::pt, L_ok);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
     __ BIND(L_bad);
     __ stop("valid bounds (copy down)");
     __ BIND(L_ok);
   }
 #endif
-  __ cmp(bottom_reg, top_reg);
-  __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pn, L_break);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
   // work bottom up to top, copying contiguous data downwards
   // In pseudo-code:
   //   while (bottom < top) *(bottom - distance) = *(bottom + 0), bottom++;
@@ -970,9 +928,7 @@
   __ ld_ptr(           Address(bottom_reg, 0     ), temp2_reg);
   __ st_ptr(temp2_reg, Address(bottom_reg, offset)           );
   __ add(bottom_reg, wordSize, bottom_reg);
-  __ cmp(bottom_reg, top_reg);
-  __ brx(Assembler::lessUnsigned, false, Assembler::pt, L_loop);
-  __ delayed()->nop();  // FILLME
+  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_loop);
   assert(Interpreter::stackElementSize == wordSize, "else change loop");
   __ BIND(L_break);
   BLOCK_COMMENT("} move_arg_slots_down");
@@ -1170,7 +1126,7 @@
 
       adjust_SP_and_Gargs_down_by_slots(_masm, 3, noreg, noreg);
 
-      __ st_ptr(O0_code,     __ argument_address(constant(2), noreg, 0));
+      __ st    (O0_code,     __ argument_address(constant(2), noreg, 0));
       __ st_ptr(O1_actual,   __ argument_address(constant(1), noreg, 0));
       __ st_ptr(O2_required, __ argument_address(constant(0), noreg, 0));
       jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
@@ -1329,9 +1285,7 @@
 
       Label L_done;
       __ ld_ptr(vmarg, O2_scratch);
-      __ tst(O2_scratch);
-      __ brx(Assembler::zero, false, Assembler::pn, L_done);  // No cast if null.
-      __ delayed()->nop();
+      __ br_null_short(O2_scratch, Assembler::pn, L_done);  // No cast if null.
       __ load_klass(O2_scratch, O2_scratch);
 
       // Live at this point:
@@ -1436,8 +1390,7 @@
 
       // this path is taken for int->byte, int->short
       __ sra(O1_scratch, G5_vminfo, O1_scratch);
-      __ ba(false, done);
-      __ delayed()->nop();
+      __ ba_short(done);
 
       __ bind(zero_extend);
       // this is taken for int->char
@@ -1860,9 +1813,7 @@
           BLOCK_COMMENT("verify collect_count_constant {");
           __ load_method_handle_vmslots(O3_scratch, G3_method_handle, O2_scratch);
           Label L_count_ok;
-          __ cmp(O3_scratch, collect_count_constant);
-          __ br(Assembler::equal, false, Assembler::pt, L_count_ok);
-          __ delayed()->nop();
+          __ cmp_and_br_short(O3_scratch, collect_count_constant, Assembler::equal, Assembler::pt, L_count_ok);
           __ stop("bad vminfo in AMH.conv");
           __ BIND(L_count_ok);
           BLOCK_COMMENT("} verify collect_count_constant");
@@ -1909,9 +1860,7 @@
           BLOCK_COMMENT("verify dest_slot_constant {");
           extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O3_scratch);
           Label L_vminfo_ok;
-          __ cmp(O3_scratch, dest_slot_constant);
-          __ br(Assembler::equal, false, Assembler::pt, L_vminfo_ok);
-          __ delayed()->nop();
+          __ cmp_and_br_short(O3_scratch, dest_slot_constant, Assembler::equal, Assembler::pt, L_vminfo_ok);
           __ stop("bad vminfo in AMH.conv");
           __ BIND(L_vminfo_ok);
           BLOCK_COMMENT("} verify dest_slot_constant");
@@ -1951,14 +1900,10 @@
       // If there are variable parameters, use dynamic checks to skip around the whole mess.
       Label L_done;
       if (keep3_count.is_register()) {
-        __ tst(keep3_count.as_register());
-        __ br(Assembler::zero, false, Assembler::pn, L_done);
-        __ delayed()->nop();
+        __ cmp_and_br_short(keep3_count.as_register(), 0, Assembler::equal, Assembler::pn, L_done);
       }
       if (close_count.is_register()) {
-        __ cmp(close_count.as_register(), open_count);
-        __ br(Assembler::equal, false, Assembler::pn, L_done);
-        __ delayed()->nop();
+        __ cmp_and_br_short(close_count.as_register(), open_count, Assembler::equal, Assembler::pn, L_done);
       }
 
       if (move_keep3 && fix_arg_base) {
@@ -1999,8 +1944,7 @@
         }
 
         if (emit_guard) {
-          __ ba(false, L_done);  // assumes emit_move_up is true also
-          __ delayed()->nop();
+          __ ba_short(L_done);  // assumes emit_move_up is true also
           __ BIND(L_move_up);
         }
 
@@ -2133,8 +2077,7 @@
 
 #ifdef ASSERT
       { Label L_ok;
-        __ br_notnull(O7_temp, false, Assembler::pt, L_ok);
-        __ delayed()->nop();
+        __ br_notnull_short(O7_temp, Assembler::pt, L_ok);
         __ stop("bad method handle return");
         __ BIND(L_ok);
       }
@@ -2192,11 +2135,10 @@
         Label L_skip;
         if (length_constant < 0) {
           load_conversion_vminfo(_masm, G3_amh_conversion, O3_scratch);
-          __ br_zero(Assembler::notZero, false, Assembler::pn, O3_scratch, L_skip);
-          __ delayed()->nop();
+          __ cmp_zero_and_br(Assembler::notZero, O3_scratch, L_skip);
+          __ delayed()->nop(); // to avoid back-to-back cbcond instructions
         }
-        __ br_null(O1_array, false, Assembler::pn, L_array_is_empty);
-        __ delayed()->nop();
+        __ br_null_short(O1_array, Assembler::pn, L_array_is_empty);
         __ BIND(L_skip);
       }
       __ null_check(O1_array, oopDesc::klass_offset_in_bytes());
@@ -2210,8 +2152,7 @@
       Label L_ok_array_klass, L_bad_array_klass, L_bad_array_length;
       __ check_klass_subtype(O2_array_klass, O3_klass, O4_scratch, G5_scratch, L_ok_array_klass);
       // If we get here, the type check failed!
-      __ ba(false, L_bad_array_klass);
-      __ delayed()->nop();
+      __ ba_short(L_bad_array_klass);
       __ BIND(L_ok_array_klass);
 
       // Check length.
@@ -2247,8 +2188,7 @@
         __ BIND(L_array_is_empty);
         remove_arg_slots(_masm, -stack_move_unit() * array_slots,
                          O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-        __ ba(false, L_args_done);  // no spreading to do
-        __ delayed()->nop();
+        __ ba_short(L_args_done);  // no spreading to do
         __ BIND(L_insert_arg_space);
         // come here in the usual case, stack_move < 0 (2 or more spread arguments)
         // Live: O1_array, O2_argslot_limit, O3_stack_move
@@ -2289,9 +2229,7 @@
                        Address(O1_source, 0), Address(O4_fill_ptr, 0),
                        O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
         __ add(O1_source, type2aelembytes(elem_type), O1_source);
-        __ cmp(O4_fill_ptr, O0_argslot);
-        __ brx(Assembler::greaterUnsigned, false, Assembler::pt, L_loop);
-        __ delayed()->nop();  // FILLME
+        __ cmp_and_brx_short(O4_fill_ptr, O0_argslot, Assembler::greaterUnsigned, Assembler::pt, L_loop);
       } else if (length_constant == 0) {
         // nothing to copy
       } else {
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -600,7 +600,7 @@
 void AdapterGenerator::patch_callers_callsite() {
   Label L;
   __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
-  __ br_null(G3_scratch, false, __ pt, L);
+  __ br_null(G3_scratch, false, Assembler::pt, L);
   // Schedule the branch target address early.
   __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
   // Call into the VM to patch the caller, then jump to compiled callee
@@ -1127,8 +1127,7 @@
       Label loop;
       __ bind(loop);
       __ sub(L0, 1, L0);
-      __ br_null(L0, false, Assembler::pt, loop);
-      __ delayed()->nop();
+      __ br_null_short(L0, Assembler::pt, loop);
 
       __ restore();
     }
@@ -1202,7 +1201,7 @@
     // the call site corrected.
     __ ld_ptr(G5_method, in_bytes(methodOopDesc::code_offset()), G3_scratch);
     __ bind(ok2);
-    __ br_null(G3_scratch, false, __ pt, skip_fixup);
+    __ br_null(G3_scratch, false, Assembler::pt, skip_fixup);
     __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), G3_scratch);
     __ jump_to(ic_miss, G3_scratch);
     __ delayed()->nop();
@@ -1779,9 +1778,7 @@
     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
     __ verify_oop(O0);
     __ load_klass(O0, temp_reg);
-    __ cmp(temp_reg, G5_inline_cache_reg);
-    __ brx(Assembler::equal, true, Assembler::pt, L);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L);
 
     __ jump_to(ic_miss, temp_reg);
     __ delayed()->nop();
@@ -2182,8 +2179,7 @@
 #ifdef ASSERT
     { Label L;
     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
-    __ br_null(O0, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_null_short(O0, Assembler::pt, L);
     __ stop("no pending exception allowed on exit from IR::monitorenter");
     __ bind(L);
     }
@@ -2298,9 +2294,7 @@
     Address suspend_state(G2_thread, JavaThread::suspend_flags_offset());
     __ br(Assembler::notEqual, false, Assembler::pn, L);
     __ delayed()->ld(suspend_state, G3_scratch);
-    __ cmp(G3_scratch, 0);
-    __ br(Assembler::equal, false, Assembler::pt, no_block);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block);
     __ bind(L);
 
     // Block.  Save any potential method result value before the operation and
@@ -2328,9 +2322,7 @@
 
   Label no_reguard;
   __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch);
-  __ cmp(G3_scratch, JavaThread::stack_guard_yellow_disabled);
-  __ br(Assembler::notEqual, false, Assembler::pt, no_reguard);
-  __ delayed()->nop();
+  __ cmp_and_br_short(G3_scratch, JavaThread::stack_guard_yellow_disabled, Assembler::notEqual, Assembler::pt, no_reguard);
 
     save_native_result(masm, ret_type, stack_slots);
   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
@@ -2382,8 +2374,7 @@
 #ifdef ASSERT
     { Label L;
     __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0);
-    __ br_null(O0, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_null_short(O0, Assembler::pt, L);
     __ stop("no pending exception allowed on exit from IR::monitorexit");
     __ bind(L);
     }
@@ -2639,9 +2630,7 @@
     AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub());
     __ verify_oop(O0);
     __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
-    __ cmp(temp_reg, G5_inline_cache_reg);
-    __ brx(Assembler::equal, true, Assembler::pt, L);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L);
 
     __ jump_to(ic_miss, temp_reg);
     __ delayed()->nop();
@@ -3143,8 +3132,7 @@
 
   gen_new_frame(masm, deopt);        // allocate an interpreter frame
 
-  __ tst(O4array_size);
-  __ br(Assembler::notZero, false, Assembler::pn, loop);
+  __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop);
   __ delayed()->add(O3array, wordSize, O3array);
   __ ld_ptr(G3pcs, 0, O7);                      // load final frame new pc
 
@@ -3221,7 +3209,7 @@
   // pc is now in O7. Return values are still in the expected places
 
   map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
-  __ ba(false, cont);
+  __ ba(cont);
   __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);
 
   int exception_offset = __ offset() - start;
@@ -3256,8 +3244,7 @@
     // verify that there is really an exception oop in exception_oop
     Label has_exception;
     __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception);
-    __ br_notnull(Oexception, false, Assembler::pt, has_exception);
-    __ delayed()-> nop();
+    __ br_notnull_short(Oexception, Assembler::pt, has_exception);
     __ stop("no exception in thread");
     __ bind(has_exception);
 
@@ -3265,14 +3252,13 @@
     Label no_pending_exception;
     Address exception_addr(G2_thread, Thread::pending_exception_offset());
     __ ld_ptr(exception_addr, Oexception);
-    __ br_null(Oexception, false, Assembler::pt, no_pending_exception);
-    __ delayed()->nop();
+    __ br_null_short(Oexception, Assembler::pt, no_pending_exception);
     __ stop("must not have pending exception here");
     __ bind(no_pending_exception);
   }
 #endif
 
-  __ ba(false, cont);
+  __ ba(cont);
   __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);;
 
   //
@@ -3313,9 +3299,7 @@
   RegisterSaver::restore_result_registers(masm);
 
   Label noException;
-  __ cmp(G4deopt_mode, Deoptimization::Unpack_exception);   // Was exception pending?
-  __ br(Assembler::notEqual, false, Assembler::pt, noException);
-  __ delayed()->nop();
+  __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException);
 
   // Move the pending exception from exception_oop to Oexception so
   // the pending exception will be picked up the interpreter.
@@ -3359,9 +3343,7 @@
   // In 32 bit, C2 returns longs in G1 so restore the saved G1 into
   // I0/I1 if the return value is long.
   Label not_long;
-  __ cmp(O0,T_LONG);
-  __ br(Assembler::notEqual, false, Assembler::pt, not_long);
-  __ delayed()->nop();
+  __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long);
   __ ldd(saved_Greturn1_addr,I0);
   __ bind(not_long);
 #endif
@@ -3534,9 +3516,7 @@
   Label pending;
 
   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
-  __ tst(O1);
-  __ brx(Assembler::notEqual, true, Assembler::pn, pending);
-  __ delayed()->nop();
+  __ br_notnull_short(O1, Assembler::pn, pending);
 
   RegisterSaver::restore_live_registers(masm);
 
@@ -3623,9 +3603,7 @@
   Label pending;
 
   __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1);
-  __ tst(O1);
-  __ brx(Assembler::notEqual, true, Assembler::pn, pending);
-  __ delayed()->nop();
+  __ br_notnull_short(O1, Assembler::pn, pending);
 
   // get the returned methodOop
 
--- a/src/cpu/sparc/vm/sparc.ad	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Aug 19 08:55:53 2011 -0700
@@ -471,9 +471,6 @@
 source %{
 #define __ _masm.
 
-// Block initializing store
-#define ASI_BLK_INIT_QUAD_LDD_P    0xE2
-
 // tertiary op of a LoadP or StoreP encoding
 #define REGP_OP true
 
@@ -1693,7 +1690,6 @@
 
 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   MacroAssembler _masm(&cbuf);
-  Label L;
   Register G5_ic_reg  = reg_to_register_object(Matcher::inline_cache_reg_encode());
   Register temp_reg   = G3;
   assert( G5_ic_reg != temp_reg, "conflicting registers" );
@@ -1835,8 +1831,10 @@
 //
 // NOTE: If the platform does not provide any short branch variants, then
 //       this method should return false for offset 0.
-bool Matcher::is_short_branch_offset(int rule, int offset) {
-  return false;
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // The passed offset is relative to address of the branch.
+  // Don't need to adjust the offset.
+  return UseCBCond && Assembler::is_simm(offset, 12);
 }
 
 const bool Matcher::isSimpleConstant64(jlong value) {
@@ -2315,60 +2313,23 @@
     __ delayed()->nop();
   %}
 
-  enc_class enc_bp( Label labl, cmpOp cmp, flagsReg cc ) %{
+  enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{
     MacroAssembler _masm(&cbuf);
-    Label &L = *($labl$$label);
+    Label* L = $labl$$label;
     Assembler::Predict predict_taken =
-      cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
-
-    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, L);
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+
+    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
     __ delayed()->nop();
   %}
 
-  enc_class enc_bpl( Label labl, cmpOp cmp, flagsRegL cc ) %{
+  enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{
     MacroAssembler _masm(&cbuf);
-    Label &L = *($labl$$label);
+    Label* L = $labl$$label;
     Assembler::Predict predict_taken =
-      cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
-
-    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, L);
-    __ delayed()->nop();
-  %}
-
-  enc_class enc_bpx( Label labl, cmpOp cmp, flagsRegP cc ) %{
-    MacroAssembler _masm(&cbuf);
-    Label &L = *($labl$$label);
-    Assembler::Predict predict_taken =
-      cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
-
-    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, L);
-    __ delayed()->nop();
-  %}
-
-  enc_class enc_fbp( Label labl, cmpOpF cmp, flagsRegF cc ) %{
-    MacroAssembler _masm(&cbuf);
-    Label &L = *($labl$$label);
-    Assembler::Predict predict_taken =
-      cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
-
-    __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($cc$$reg), predict_taken, L);
-    __ delayed()->nop();
-  %}
-
-  enc_class enc_ba( Label labl ) %{
-    MacroAssembler _masm(&cbuf);
-    Label &L = *($labl$$label);
-    __ ba(false, L);
-    __ delayed()->nop();
-  %}
-
-  enc_class enc_bpr( Label labl, cmpOp_reg cmp, iRegI op1 ) %{
-    MacroAssembler _masm(&cbuf);
-    Label &L = *$labl$$label;
-    Assembler::Predict predict_taken =
-      cbuf.is_backward_branch(L) ? Assembler::pt : Assembler::pn;
-
-    __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), L);
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+
+    __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), *L);
     __ delayed()->nop();
   %}
 
@@ -2855,10 +2816,10 @@
     Register    nof_bytes_arg   = reg_to_register_object($cnt$$reg);
     Register    nof_bytes_tmp    = reg_to_register_object($temp$$reg);
     Register    base_pointer_arg = reg_to_register_object($base$$reg);
-
+  
     Label loop;
     __ mov(nof_bytes_arg, nof_bytes_tmp);
-
+  
     // Loop and clear, walking backwards through the array.
     // nof_bytes_tmp (if >0) is always the number of bytes to zero
     __ bind(loop);
@@ -2986,7 +2947,7 @@
     __ brx(Assembler::equal, true, Assembler::pn, Ldone);
     __ delayed()->add(G0, 1, result_reg);
 
-    __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, cnt_reg, Ldone);
+    __ cmp_zero_and_br(Assembler::zero, cnt_reg, Ldone, true, Assembler::pn);
     __ delayed()->add(G0, 1, result_reg); // count == 0
 
     //rename registers
@@ -3006,7 +2967,7 @@
     // Compare char[] arrays aligned to 4 bytes.
     __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
                           chr1_reg, chr2_reg, Ldone);
-    __ ba(false,Ldone);
+    __ ba(Ldone);
     __ delayed()->add(G0, 1, result_reg);
 
     // char by char compare
@@ -3065,7 +3026,7 @@
     __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
     __ delayed()->mov(G0, result_reg);     // not equal
 
-    __ br_on_reg_cond(Assembler::rc_z, true, Assembler::pn, tmp1_reg, Ldone);
+    __ cmp_zero_and_br(Assembler::zero, tmp1_reg, Ldone, true, Assembler::pn);
     __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
 
     // load array addresses
@@ -3352,10 +3313,10 @@
 
 //----------Instruction Attributes---------------------------------------------
 ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
-ins_attrib ins_size(32);       // Required size attribute (in bits)
-ins_attrib ins_pc_relative(0); // Required PC Relative flag
-ins_attrib ins_short_branch(0); // Required flag: is this instruction a
-                                // non-matching short branch variant of some
+ins_attrib ins_size(32);           // Required size attribute (in bits)
+ins_attrib ins_avoid_back_to_back(0); // instruction should not be generated back to back
+ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
+                                   // non-matching short branch variant of some
                                                             // long branch?
 
 //----------OPERANDS-----------------------------------------------------------
@@ -3441,6 +3402,15 @@
   interface(CONST_INTER);
 %}
 
+// Integer Immediate: 5-bit
+operand immI5() %{
+  predicate(Assembler::is_simm(n->get_int(), 5));
+  match(ConI);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Integer Immediate: 0-bit
 operand immI0() %{
   predicate(n->get_int() == 0);
@@ -3664,6 +3634,15 @@
   interface(CONST_INTER);
 %}
 
+// Integer Immediate: 5-bit
+operand immL5() %{
+  predicate(n->get_long() == (int)n->get_long() && Assembler::is_simm((int)n->get_long(), 5));
+  match(ConL);
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
 // Long Immediate: 13-bit
 operand immL13() %{
   predicate((-4096L < n->get_long()) && (n->get_long() <= 4095L));
@@ -5196,6 +5175,42 @@
     MS  : R;
 %}
 
+// Compare and branch
+pipe_class cmp_br_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
+    instruction_count(2); has_delay_slot;
+    cr    : E(write);
+    src1  : R(read);
+    src2  : R(read);
+    IALU  : R;
+    BR    : R;
+%}
+
+// Compare and branch
+pipe_class cmp_br_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI13 src2, label labl, flagsReg cr) %{
+    instruction_count(2); has_delay_slot;
+    cr    : E(write);
+    src1  : R(read);
+    IALU  : R;
+    BR    : R;
+%}
+
+// Compare and branch using cbcond
+pipe_class cbcond_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl) %{
+    single_instruction;
+    src1  : E(read);
+    src2  : E(read);
+    IALU  : R;
+    BR    : R;
+%}
+
+// Compare and branch using cbcond
+pipe_class cbcond_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI5 src2, label labl) %{
+    single_instruction;
+    src1  : E(read);
+    IALU  : R;
+    BR    : R;
+%}
+
 pipe_class br_fcc(Universe br, cmpOpF cc, flagsReg cr, label labl) %{
     single_instruction_with_delay_slot;
     cr    : E(read);
@@ -6251,6 +6266,7 @@
 instruct prefetchr( memory mem ) %{
   match( PrefetchRead mem );
   ins_cost(MEMORY_REF_COST);
+  size(4);
 
   format %{ "PREFETCH $mem,0\t! Prefetch read-many" %}
   opcode(Assembler::prefetch_op3);
@@ -6259,9 +6275,9 @@
 %}
 
 instruct prefetchw( memory mem ) %{
-  predicate(AllocatePrefetchStyle != 3 );
   match( PrefetchWrite mem );
   ins_cost(MEMORY_REF_COST);
+  size(4);
 
   format %{ "PREFETCH $mem,2\t! Prefetch write-many (and read)" %}
   opcode(Assembler::prefetch_op3);
@@ -6269,24 +6285,62 @@
   ins_pipe(iload_mem);
 %}
 
-// Use BIS instruction to prefetch.
-instruct prefetchw_bis( memory mem ) %{
-  predicate(AllocatePrefetchStyle == 3);
-  match( PrefetchWrite mem );
-  ins_cost(MEMORY_REF_COST);
-
-  format %{ "STXA   G0,$mem\t! // Block initializing store" %}
-  ins_encode %{
-     Register base = as_Register($mem$$base);
-     int disp = $mem$$disp;
-     if (disp != 0) {
-       __ add(base, AllocatePrefetchStepSize, base);
-     }
-     __ stxa(G0, base, G0, ASI_BLK_INIT_QUAD_LDD_P);
+// Prefetch instructions for allocation.
+
+instruct prefetchAlloc( memory mem ) %{
+  predicate(AllocatePrefetchInstr == 0);
+  match( PrefetchAllocation mem );
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "PREFETCH $mem,2\t! Prefetch allocation" %}
+  opcode(Assembler::prefetch_op3);
+  ins_encode( form3_mem_prefetch_write( mem ) );
+  ins_pipe(iload_mem);
+%}
+
+// Use BIS instruction to prefetch for allocation.
+// Could fault, need space at the end of TLAB.
+instruct prefetchAlloc_bis( iRegP dst ) %{
+  predicate(AllocatePrefetchInstr == 1);
+  match( PrefetchAllocation dst );
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+
+  format %{ "STXA   [$dst]\t! // Prefetch allocation using BIS" %}
+  ins_encode %{
+    __ stxa(G0, $dst$$Register, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
   %}
   ins_pipe(istore_mem_reg);
 %}
 
+// Next code is used for finding next cache line address to prefetch.
+#ifndef _LP64
+instruct cacheLineAdr( iRegP dst, iRegP src, immI13 mask ) %{
+  match(Set dst (CastX2P (AndI (CastP2X src) mask)));
+  ins_cost(DEFAULT_COST);
+  size(4);
+
+  format %{ "AND    $src,$mask,$dst\t! next cache line address" %}
+  ins_encode %{
+    __ and3($src$$Register, $mask$$constant, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+#else
+instruct cacheLineAdr( iRegP dst, iRegP src, immL13 mask ) %{
+  match(Set dst (CastX2P (AndL (CastP2X src) mask)));
+  ins_cost(DEFAULT_COST);
+  size(4);
+
+  format %{ "AND    $src,$mask,$dst\t! next cache line address" %}
+  ins_encode %{
+    __ and3($src$$Register, $mask$$constant, $dst$$Register);
+  %}
+  ins_pipe(ialu_reg_imm);
+%}
+#endif
+
 //----------Store Instructions-------------------------------------------------
 // Store Byte
 instruct storeB(memory mem, iRegI src) %{
@@ -6644,8 +6698,7 @@
 %}
 
 instruct membar_acquire_lock() %{
-  match(MemBarAcquire);
-  predicate(Matcher::prior_fast_lock(n));
+  match(MemBarAcquireLock);
   ins_cost(0);
 
   size(0);
@@ -6665,8 +6718,7 @@
 %}
 
 instruct membar_release_lock() %{
-  match(MemBarRelease);
-  predicate(Matcher::post_fast_unlock(n));
+  match(MemBarReleaseLock);
   ins_cost(0);
 
   size(0);
@@ -9220,7 +9272,6 @@
     __ jmp(label_reg, G0);
     __ delayed()->nop();
   %}
-  ins_pc_relative(1);
   ins_pipe(ialu_reg_reg);
 %}
 
@@ -9232,13 +9283,33 @@
   size(8);
   ins_cost(BRANCH_COST);
   format %{ "BA     $labl" %}
-  // Prim = bits 24-22, Secnd = bits 31-30, Tert = cond
-  opcode(Assembler::br_op2, Assembler::branch_op, Assembler::always);
-  ins_encode( enc_ba( labl ) );
-  ins_pc_relative(1);
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ ba(*L);
+    __ delayed()->nop();
+  %}
   ins_pipe(br);
 %}
 
+// Direct Branch, short with no delay slot
+instruct branch_short(label labl) %{
+  match(Goto);
+  predicate(UseCBCond);
+  effect(USE labl);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "BA     $labl\t! short branch" %}
+  ins_encode %{ 
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ ba_short(*L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_imm);
+%}
+
 // Conditional Direct Branch
 instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{
   match(If cmp icc);
@@ -9249,10 +9320,539 @@
   format %{ "BP$cmp   $icc,$labl" %}
   // Prim = bits 24-22, Secnd = bits 31-30
   ins_encode( enc_bp( labl, cmp, icc ) );
-  ins_pc_relative(1);
   ins_pipe(br_cc);
 %}
 
+instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
+  match(If cmp icc);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "BP$cmp  $icc,$labl" %}
+  // Prim = bits 24-22, Secnd = bits 31-30
+  ins_encode( enc_bp( labl, cmp, icc ) );
+  ins_pipe(br_cc);
+%}
+
+instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
+  match(If cmp pcc);
+  effect(USE labl);
+
+  size(8);
+  ins_cost(BRANCH_COST);
+  format %{ "BP$cmp  $pcc,$labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+
+    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(br_cc);
+%}
+
+instruct branchConF(cmpOpF cmp, flagsRegF fcc, label labl) %{
+  match(If cmp fcc);
+  effect(USE labl);
+
+  size(8);
+  ins_cost(BRANCH_COST);
+  format %{ "FBP$cmp $fcc,$labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+
+    __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(br_fcc);
+%}
+
+instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
+  match(CountedLoopEnd cmp icc);
+  effect(USE labl);
+
+  size(8);
+  ins_cost(BRANCH_COST);
+  format %{ "BP$cmp   $icc,$labl\t! Loop end" %}
+  // Prim = bits 24-22, Secnd = bits 31-30
+  ins_encode( enc_bp( labl, cmp, icc ) );
+  ins_pipe(br_cc);
+%}
+
+instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
+  match(CountedLoopEnd cmp icc);
+  effect(USE labl);
+
+  size(8);
+  ins_cost(BRANCH_COST);
+  format %{ "BP$cmp  $icc,$labl\t! Loop end" %}
+  // Prim = bits 24-22, Secnd = bits 31-30
+  ins_encode( enc_bp( labl, cmp, icc ) );
+  ins_pipe(br_cc);
+%}
+
+// Compare and branch instructions
+instruct cmpI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
+  match(If cmp (CmpI op1 op2));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! int\n\t"
+            "BP$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$Register);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+instruct cmpI_imm_branch(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
+  match(If cmp (CmpI op1 op2));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! int\n\t"
+            "BP$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$constant);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_imm);
+%}
+
+instruct cmpU_reg_branch(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{
+  match(If cmp (CmpU op1 op2));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! unsigned\n\t"
+            "BP$cmp  $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$Register);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+instruct cmpU_imm_branch(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{
+  match(If cmp (CmpU op1 op2));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! unsigned\n\t"
+            "BP$cmp  $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$constant);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_imm);
+%}
+
+instruct cmpL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{
+  match(If cmp (CmpL op1 op2));
+  effect(USE labl, KILL xcc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! long\n\t"
+            "BP$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$Register);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+instruct cmpL_imm_branch(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{
+  match(If cmp (CmpL op1 op2));
+  effect(USE labl, KILL xcc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! long\n\t"
+            "BP$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$constant);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_imm);
+%}
+
+// Compare Pointers and branch
+instruct cmpP_reg_branch(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{
+  match(If cmp (CmpP op1 op2));
+  effect(USE labl, KILL pcc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! ptr\n\t"
+            "B$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$Register);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+instruct cmpP_null_branch(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{
+  match(If cmp (CmpP op1 null));
+  effect(USE labl, KILL pcc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,0\t! ptr\n\t"
+            "B$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, G0);
+    // bpr() is not used here since it has shorter distance.
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+instruct cmpN_reg_branch(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{
+  match(If cmp (CmpN op1 op2));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
+            "BP$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$Register);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+instruct cmpN_null_branch(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{
+  match(If cmp (CmpN op1 null));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
+            "BP$cmp   $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, G0);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+// Loop back branch
+instruct cmpI_reg_branchLoopEnd(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
+  match(CountedLoopEnd cmp (CmpI op1 op2));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! int\n\t"
+            "BP$cmp   $labl\t! Loop end" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$Register);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_reg);
+%}
+
+instruct cmpI_imm_branchLoopEnd(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
+  match(CountedLoopEnd cmp (CmpI op1 op2));
+  effect(USE labl, KILL icc);
+
+  size(12);
+  ins_cost(BRANCH_COST);
+  format %{ "CMP    $op1,$op2\t! int\n\t"
+            "BP$cmp   $labl\t! Loop end" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+    __ cmp($op1$$Register, $op2$$constant);
+    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
+  ins_pipe(cmp_br_reg_imm);
+%}
+
+// Short compare and branch instructions
+instruct cmpI_reg_branch_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp  $op1,$op2,$labl\t! int" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+instruct cmpI_imm_branch_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp  $op1,$op2,$labl\t! int" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_imm);
+%}
+
+instruct cmpU_reg_branch_short(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{
+  match(If cmp (CmpU op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+instruct cmpU_imm_branch_short(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{
+  match(If cmp (CmpU op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_imm);
+%}
+
+instruct cmpL_reg_branch_short(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL xcc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CXB$cmp  $op1,$op2,$labl\t! long" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+instruct cmpL_imm_branch_short(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL xcc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CXB$cmp  $op1,$op2,$labl\t! long" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_imm);
+%}
+
+// Compare Pointers and branch
+instruct cmpP_reg_branch_short(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{
+  match(If cmp (CmpP op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL pcc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+#ifdef _LP64
+  format %{ "CXB$cmp $op1,$op2,$labl\t! ptr" %}
+#else
+  format %{ "CWB$cmp $op1,$op2,$labl\t! ptr" %}
+#endif
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, $op2$$Register, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+instruct cmpP_null_branch_short(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{
+  match(If cmp (CmpP op1 null));
+  predicate(UseCBCond);
+  effect(USE labl, KILL pcc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+#ifdef _LP64
+  format %{ "CXB$cmp $op1,0,$labl\t! ptr" %}
+#else
+  format %{ "CWB$cmp $op1,0,$labl\t! ptr" %}
+#endif
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, G0, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+instruct cmpN_reg_branch_short(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{
+  match(If cmp (CmpN op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp  $op1,op2,$labl\t! compressed ptr" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+instruct cmpN_null_branch_short(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{
+  match(If cmp (CmpN op1 null));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp  $op1,0,$labl\t! compressed ptr" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, G0, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+// Loop back branch
+instruct cmpI_reg_branchLoopEnd_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
+  match(CountedLoopEnd cmp (CmpI op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp  $op1,$op2,$labl\t! Loop end" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_reg);
+%}
+
+instruct cmpI_imm_branchLoopEnd_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
+  match(CountedLoopEnd cmp (CmpI op1 op2));
+  predicate(UseCBCond);
+  effect(USE labl, KILL icc);
+
+  size(4);
+  ins_cost(BRANCH_COST);
+  format %{ "CWB$cmp  $op1,$op2,$labl\t! Loop end" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    assert(__ use_cbcond(*L), "back to back cbcond");
+    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
+  %}
+  ins_short_branch(1);
+  ins_avoid_back_to_back(1);
+  ins_pipe(cbcond_reg_imm);
+%}
+
 // Branch-on-register tests all 64 bits.  We assume that values
 // in 64-bit registers always remains zero or sign extended
 // unless our code munges the high bits.  Interrupts can chop
@@ -9266,7 +9866,6 @@
   ins_cost(BRANCH_COST);
   format %{ "BR$cmp   $op1,$labl" %}
   ins_encode( enc_bpr( labl, cmp, op1 ) );
-  ins_pc_relative(1);
   ins_pipe(br_reg);
 %}
 
@@ -9279,7 +9878,6 @@
   ins_cost(BRANCH_COST);
   format %{ "BR$cmp   $op1,$labl" %}
   ins_encode( enc_bpr( labl, cmp, op1 ) );
-  ins_pc_relative(1);
   ins_pipe(br_reg);
 %}
 
@@ -9292,72 +9890,9 @@
   ins_cost(BRANCH_COST);
   format %{ "BR$cmp   $op1,$labl" %}
   ins_encode( enc_bpr( labl, cmp, op1 ) );
-  ins_pc_relative(1);
   ins_pipe(br_reg);
 %}
 
-instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
-  match(If cmp icc);
-  effect(USE labl);
-
-  format %{ "BP$cmp  $icc,$labl" %}
-  // Prim = bits 24-22, Secnd = bits 31-30
-  ins_encode( enc_bp( labl, cmp, icc ) );
-  ins_pc_relative(1);
-  ins_pipe(br_cc);
-%}
-
-instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
-  match(If cmp pcc);
-  effect(USE labl);
-
-  size(8);
-  ins_cost(BRANCH_COST);
-  format %{ "BP$cmp  $pcc,$labl" %}
-  // Prim = bits 24-22, Secnd = bits 31-30
-  ins_encode( enc_bpx( labl, cmp, pcc ) );
-  ins_pc_relative(1);
-  ins_pipe(br_cc);
-%}
-
-instruct branchConF(cmpOpF cmp, flagsRegF fcc, label labl) %{
-  match(If cmp fcc);
-  effect(USE labl);
-
-  size(8);
-  ins_cost(BRANCH_COST);
-  format %{ "FBP$cmp $fcc,$labl" %}
-  // Prim = bits 24-22, Secnd = bits 31-30
-  ins_encode( enc_fbp( labl, cmp, fcc ) );
-  ins_pc_relative(1);
-  ins_pipe(br_fcc);
-%}
-
-instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
-  match(CountedLoopEnd cmp icc);
-  effect(USE labl);
-
-  size(8);
-  ins_cost(BRANCH_COST);
-  format %{ "BP$cmp   $icc,$labl\t! Loop end" %}
-  // Prim = bits 24-22, Secnd = bits 31-30
-  ins_encode( enc_bp( labl, cmp, icc ) );
-  ins_pc_relative(1);
-  ins_pipe(br_cc);
-%}
-
-instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
-  match(CountedLoopEnd cmp icc);
-  effect(USE labl);
-
-  size(8);
-  ins_cost(BRANCH_COST);
-  format %{ "BP$cmp  $icc,$labl\t! Loop end" %}
-  // Prim = bits 24-22, Secnd = bits 31-30
-  ins_encode( enc_bp( labl, cmp, icc ) );
-  ins_pc_relative(1);
-  ins_pipe(br_cc);
-%}
 
 // ============================================================================
 // Long Compare
@@ -9387,9 +9922,14 @@
   size(8);
   ins_cost(BRANCH_COST);
   format %{ "BP$cmp   $xcc,$labl" %}
-  // Prim = bits 24-22, Secnd = bits 31-30
-  ins_encode( enc_bpl( labl, cmp, xcc ) );
-  ins_pc_relative(1);
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Predict predict_taken =
+      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
+
+    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
+    __ delayed()->nop();
+  %}
   ins_pipe(br_cc);
 %}
 
@@ -9517,7 +10057,6 @@
   ins_cost(CALL_COST);
   format %{ "CALL,static  ; NOP ==> " %}
   ins_encode( Java_Static_Call( meth ), call_epilog );
-  ins_pc_relative(1);
   ins_pipe(simple_call);
 %}
 
@@ -9527,11 +10066,10 @@
   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
   effect(USE meth, KILL l7_mh_SP_save);
 
-  size(8);
+  size(16);
   ins_cost(CALL_COST);
   format %{ "CALL,static/MethodHandle" %}
   ins_encode(preserve_SP, Java_Static_Call(meth), restore_SP, call_epilog);
-  ins_pc_relative(1);
   ins_pipe(simple_call);
 %}
 
@@ -9544,7 +10082,6 @@
   format %{ "SET    (empty),R_G5\n\t"
             "CALL,dynamic  ; NOP ==> " %}
   ins_encode( Java_Dynamic_Call( meth ), call_epilog );
-  ins_pc_relative(1);
   ins_pipe(call);
 %}
 
@@ -9556,7 +10093,6 @@
   format %{ "CALL,runtime" %}
   ins_encode( Java_To_Runtime( meth ),
               call_epilog, adjust_long_from_native_call );
-  ins_pc_relative(1);
   ins_pipe(simple_call);
 %}
 
@@ -9569,7 +10105,6 @@
   ins_encode( Java_To_Runtime( meth ),
               call_epilog,
               adjust_long_from_native_call );
-  ins_pc_relative(1);
   ins_pipe(simple_call);
 %}
 
@@ -9582,7 +10117,6 @@
   ins_encode( Java_To_Runtime( meth ),
               call_epilog,
               adjust_long_from_native_call );
-  ins_pc_relative(1);
   ins_pipe(simple_call);
 %}
 
@@ -9707,7 +10241,6 @@
   effect(KILL scratch, TEMP scratch2);
   ins_cost(100);
 
-  size(4*112);       // conservative overestimation ...
   format %{ "FASTLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
   ins_encode( Fast_Lock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
@@ -9719,7 +10252,6 @@
   effect(KILL scratch, TEMP scratch2);
   ins_cost(100);
 
-  size(4*120);       // conservative overestimation ...
   format %{ "FASTUNLOCK  $object, $box; KILL $scratch, $scratch2, $box" %}
   ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
   ins_pipe(long_memory_op);
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -150,8 +150,7 @@
     { const Register t = G3_scratch;
       Label L;
       __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), t);
-      __ br_null(t, false, Assembler::pt, L);
-      __ delayed()->nop();
+      __ br_null_short(t, Assembler::pt, L);
       __ stop("StubRoutines::call_stub: entered with pending exception");
       __ bind(L);
     }
@@ -207,8 +206,7 @@
       Label exit;
       __ ld_ptr(parameter_size.as_in().as_address(), cnt);      // parameter counter
       __ add( FP, STACK_BIAS, dst );
-      __ tst(cnt);
-      __ br(Assembler::zero, false, Assembler::pn, exit);
+      __ cmp_zero_and_br(Assembler::zero, cnt, exit);
       __ delayed()->sub(dst, BytesPerWord, dst);                 // setup Lentry_args
 
       // copy parameters if any
@@ -282,20 +280,20 @@
       __ delayed()->restore();
 
       __ BIND(is_object);
-      __ ba(false, exit);
+      __ ba(exit);
       __ delayed()->st_ptr(O0, addr, G0);
 
       __ BIND(is_float);
-      __ ba(false, exit);
+      __ ba(exit);
       __ delayed()->stf(FloatRegisterImpl::S, F0, addr, G0);
 
       __ BIND(is_double);
-      __ ba(false, exit);
+      __ ba(exit);
       __ delayed()->stf(FloatRegisterImpl::D, F0, addr, G0);
 
       __ BIND(is_long);
 #ifdef _LP64
-      __ ba(false, exit);
+      __ ba(exit);
       __ delayed()->st_long(O0, addr, G0);      // store entire long
 #else
 #if defined(COMPILER2)
@@ -307,11 +305,11 @@
   // do this here. Unfortunately if we did a rethrow we'd see an machepilog node
   // first which would move g1 -> O0/O1 and destroy the exception we were throwing.
 
-      __ ba(false, exit);
+      __ ba(exit);
       __ delayed()->stx(G1, addr, G0);  // store entire long
 #else
       __ st(O1, addr, BytesPerInt);
-      __ ba(false, exit);
+      __ ba(exit);
       __ delayed()->st(O0, addr, G0);
 #endif /* COMPILER2 */
 #endif /* _LP64 */
@@ -382,8 +380,7 @@
     // make sure that this code is only executed if there is a pending exception
     { Label L;
       __ ld_ptr(exception_addr, Gtemp);
-      __ br_notnull(Gtemp, false, Assembler::pt, L);
-      __ delayed()->nop();
+      __ br_notnull_short(Gtemp, Assembler::pt, L);
       __ stop("StubRoutines::forward exception: no pending exception (1)");
       __ bind(L);
     }
@@ -406,8 +403,7 @@
 #ifdef ASSERT
     // make sure exception is set
     { Label L;
-      __ br_notnull(Oexception, false, Assembler::pt, L);
-      __ delayed()->nop();
+      __ br_notnull_short(Oexception, Assembler::pt, L);
       __ stop("StubRoutines::forward exception: no pending exception (2)");
       __ bind(L);
     }
@@ -501,8 +497,7 @@
     Address exception_addr(G2_thread, Thread::pending_exception_offset());
     Register scratch_reg = Gtemp;
     __ ld_ptr(exception_addr, scratch_reg);
-    __ br_notnull(scratch_reg, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_notnull_short(scratch_reg, Assembler::pt, L);
     __ should_not_reach_here();
     __ bind(L);
 #endif // ASSERT
@@ -614,9 +609,7 @@
     __ mov(G0,yield_reg);
 
     __ BIND(retry);
-    __ cmp(yield_reg, V8AtomicOperationUnderLockSpinCount);
-    __ br(Assembler::less, false, Assembler::pt, dontyield);
-    __ delayed()->nop();
+    __ cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dontyield);
 
     // This code can only be called from inside the VM, this
     // stub is only invoked from Atomic::add().  We do not
@@ -676,9 +669,7 @@
       // try to replace O2 with O3
       __ cas_under_lock(O1, O2, O3,
       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr(),false);
-      __ cmp(O2, O3);
-      __ br(Assembler::notEqual, false, Assembler::pn, retry);
-      __ delayed()->nop();
+      __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
 
       __ retl(false);
       __ delayed()->mov(O2, O0);  // report previous value to caller
@@ -798,11 +789,9 @@
       __ BIND(retry);
 
       __ lduw(O1, 0, O2);
-      __ add(O0,   O2, O3);
-      __ cas(O1,   O2, O3);
-      __ cmp(      O2, O3);
-      __ br(Assembler::notEqual, false, Assembler::pn, retry);
-      __ delayed()->nop();
+      __ add(O0, O2, O3);
+      __ cas(O1, O2, O3);
+      __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pn, retry);
       __ retl(false);
       __ delayed()->add(O0, O2, O0); // note that cas made O2==O3
     } else {
@@ -1370,8 +1359,7 @@
 
     // copy tailing bytes
     __ BIND(L_copy_byte);
-      __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
-      __ delayed()->nop();
+      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
       __ align(OptoLoopAlignment);
     __ BIND(L_copy_byte_loop);
       __ ldub(from, offset, O3);
@@ -1482,8 +1470,7 @@
 
     // copy 1 element (2 bytes) at a time
     __ BIND(L_copy_byte);
-      __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
-      __ delayed()->nop();
+      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
       __ align(OptoLoopAlignment);
     __ BIND(L_copy_byte_loop);
       __ dec(end_from);
@@ -1600,8 +1587,7 @@
 
     // copy 1 element at a time
     __ BIND(L_copy_2_bytes);
-      __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
-      __ delayed()->nop();
+      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
       __ align(OptoLoopAlignment);
     __ BIND(L_copy_2_bytes_loop);
       __ lduh(from, offset, O3);
@@ -1946,8 +1932,7 @@
 
     // copy 1 element (2 bytes) at a time
     __ BIND(L_copy_2_bytes);
-      __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
-      __ delayed()->nop();
+      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
     __ BIND(L_copy_2_bytes_loop);
       __ dec(end_from, 2);
       __ dec(end_to, 2);
@@ -2060,8 +2045,7 @@
 
     // copy 1 element at a time
     __ BIND(L_copy_4_bytes);
-      __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
-      __ delayed()->nop();
+      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
     __ BIND(L_copy_4_bytes_loop);
       __ ld(from, offset, O3);
       __ deccc(count);
@@ -2193,8 +2177,7 @@
 
     // copy 1 element (4 bytes) at a time
     __ BIND(L_copy_4_bytes);
-      __ br_zero(Assembler::zero, false, Assembler::pt, count, L_exit);
-      __ delayed()->nop();
+      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
     __ BIND(L_copy_4_bytes_loop);
       __ dec(end_from, 4);
       __ dec(end_to, 4);
@@ -2576,7 +2559,7 @@
                                      super_klass->after_save(),
                                      L0, L1, L2, L4,
                                      NULL, &L_pop_to_miss);
-    __ ba(false, L_success);
+    __ ba(L_success);
     __ delayed()->restore();
 
     __ bind(L_pop_to_miss);
@@ -2673,8 +2656,7 @@
     // ======== loop entry is here ========
     __ BIND(load_element);
     __ load_heap_oop(O0_from, O5_offset, G3_oop);  // load the oop
-    __ br_null(G3_oop, true, Assembler::pt, store_element);
-    __ delayed()->nop();
+    __ br_null_short(G3_oop, Assembler::pt, store_element);
 
     __ load_klass(G3_oop, G4_klass); // query the object klass
 
@@ -2896,8 +2878,7 @@
     //  assert(src->klass() != NULL);
     BLOCK_COMMENT("assert klasses not null");
     { Label L_a, L_b;
-      __ br_notnull(G3_src_klass, false, Assembler::pt, L_b); // it is broken if klass is NULL
-      __ delayed()->nop();
+      __ br_notnull_short(G3_src_klass, Assembler::pt, L_b); // it is broken if klass is NULL
       __ bind(L_a);
       __ stop("broken null klass");
       __ bind(L_b);
@@ -2937,9 +2918,7 @@
     }
 
     //  if (src->klass() != dst->klass()) return -1;
-    __ cmp(G3_src_klass, G4_dst_klass);
-    __ brx(Assembler::notEqual, false, Assembler::pn, L_failed);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(G3_src_klass, G4_dst_klass, Assembler::notEqual, Assembler::pn, L_failed);
 
     //  if (!src->is_Array()) return -1;
     __ cmp(G5_lh, Klass::_lh_neutral_value); // < 0
@@ -3007,9 +2986,7 @@
     __ delayed()->signx(length, count); // length
 #ifdef ASSERT
     { Label L;
-      __ cmp(G3_elsize, LogBytesPerLong);
-      __ br(Assembler::equal, false, Assembler::pt, L);
-      __ delayed()->nop();
+      __ cmp_and_br_short(G3_elsize, LogBytesPerLong, Assembler::equal, Assembler::pt, L);
       __ stop("must be long copy, but elsize is wrong");
       __ bind(L);
     }
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -190,9 +190,7 @@
   const Register size  = G1_scratch;
   if (EnableInvokeDynamic) {
     __ ldub(Address(Lbcp, 0), G1_scratch);  // Load current bytecode.
-    __ cmp(G1_scratch, Bytecodes::_invokedynamic);
-    __ br(Assembler::equal, false, Assembler::pn, L_giant_index);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G1_scratch, Bytecodes::_invokedynamic, Assembler::equal, Assembler::pn, L_giant_index);
   }
   __ get_cache_and_index_at_bcp(cache, G1_scratch, 1);
   __ bind(L_got_cache);
@@ -207,8 +205,7 @@
   if (EnableInvokeDynamic) {
     __ bind(L_giant_index);
     __ get_cache_and_index_at_bcp(cache, G1_scratch, 1, sizeof(u4));
-    __ ba(false, L_got_cache);
-    __ delayed()->nop();
+    __ ba_short(L_got_cache);
   }
 
   return entry;
@@ -221,9 +218,7 @@
   { Label L;
     Address exception_addr(G2_thread, Thread::pending_exception_offset());
     __ ld_ptr(exception_addr, Gtemp);  // Load pending exception.
-    __ tst(Gtemp);
-    __ brx(Assembler::equal, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_null_short(Gtemp, Assembler::pt, L);
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
     __ should_not_reach_here();
     __ bind(L);
@@ -304,8 +299,7 @@
     if (ProfileInterpreter) {
       // If no method data exists, go to profile_continue.
       __ ld_ptr(Lmethod, methodOopDesc::method_data_offset(), G4_scratch);
-      __ br_null(G4_scratch, false, Assembler::pn, no_mdo);
-      __ delayed()->nop();
+      __ br_null_short(G4_scratch, Assembler::pn, no_mdo);
       // Increment counter
       Address mdo_invocation_counter(G4_scratch,
                                      in_bytes(methodDataOopDesc::invocation_counter_offset()) +
@@ -313,8 +307,7 @@
       __ increment_mask_and_jump(mdo_invocation_counter, increment, mask,
                                  G3_scratch, Lscratch,
                                  Assembler::zero, overflow);
-      __ ba(false, done);
-      __ delayed()->nop();
+      __ ba_short(done);
     }
 
     // Increment counter in methodOop
@@ -340,9 +333,7 @@
       // Test to see if we should create a method data oop
       AddressLiteral profile_limit((address)&InvocationCounter::InterpreterProfileLimit);
       __ load_contents(profile_limit, G3_scratch);
-      __ cmp(O0, G3_scratch);
-      __ br(Assembler::lessUnsigned, false, Assembler::pn, *profile_method_continue);
-      __ delayed()->nop();
+      __ cmp_and_br_short(O0, G3_scratch, Assembler::lessUnsigned, Assembler::pn, *profile_method_continue);
 
       // if no method data exists, go to profile_method
       __ test_method_data_pointer(*profile_method);
@@ -351,7 +342,7 @@
     AddressLiteral invocation_limit((address)&InvocationCounter::InterpreterInvocationLimit);
     __ load_contents(invocation_limit, G3_scratch);
     __ cmp(O0, G3_scratch);
-    __ br(Assembler::greaterEqualUnsigned, false, Assembler::pn, *overflow);
+    __ br(Assembler::greaterEqualUnsigned, false, Assembler::pn, *overflow); // Far distance
     __ delayed()->nop();
   }
 
@@ -410,19 +401,14 @@
 
   assert_different_registers(Rframe_size, Rscratch, Rscratch2);
 
-  __ set( page_size,   Rscratch );
-  __ cmp( Rframe_size, Rscratch );
-
-  __ br( Assembler::lessEqual, false, Assembler::pt, after_frame_check );
-  __ delayed()->nop();
+  __ set(page_size, Rscratch);
+  __ cmp_and_br_short(Rframe_size, Rscratch, Assembler::lessEqual, Assembler::pt, after_frame_check);
 
   // get the stack base, and in debug, verify it is non-zero
   __ ld_ptr( G2_thread, Thread::stack_base_offset(), Rscratch );
 #ifdef ASSERT
   Label base_not_zero;
-  __ cmp( Rscratch, G0 );
-  __ brx( Assembler::notEqual, false, Assembler::pn, base_not_zero );
-  __ delayed()->nop();
+  __ br_notnull_short(Rscratch, Assembler::pn, base_not_zero);
   __ stop("stack base is zero in generate_stack_overflow_check");
   __ bind(base_not_zero);
 #endif
@@ -432,9 +418,7 @@
   __ ld_ptr( G2_thread, Thread::stack_size_offset(), Rscratch2 );
 #ifdef ASSERT
   Label size_not_zero;
-  __ cmp( Rscratch2, G0 );
-  __ brx( Assembler::notEqual, false, Assembler::pn, size_not_zero );
-  __ delayed()->nop();
+  __ br_notnull_short(Rscratch2, Assembler::pn, size_not_zero);
   __ stop("stack size is zero in generate_stack_overflow_check");
   __ bind(size_not_zero);
 #endif
@@ -450,9 +434,7 @@
 
   // the frame is greater than one page in size, so check against
   // the bottom of the stack
-  __ cmp( SP, Rscratch );
-  __ brx( Assembler::greater, false, Assembler::pt, after_frame_check );
-  __ delayed()->nop();
+  __ cmp_and_brx_short(SP, Rscratch, Assembler::greater, Assembler::pt, after_frame_check);
 
   // Save the return address as the exception pc
   __ st_ptr(O7, saved_exception_pc);
@@ -624,9 +606,7 @@
     // If we need a safepoint check, generate full interpreter entry.
     AddressLiteral sync_state(SafepointSynchronize::address_of_state());
     __ set(sync_state, G3_scratch);
-    __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
-    __ br(Assembler::notEqual, false, Assembler::pn, slow_path);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G3_scratch, SafepointSynchronize::_not_synchronized, Assembler::notEqual, Assembler::pn, slow_path);
 
     // Code: _return
     __ retl();
@@ -664,14 +644,12 @@
     AddressLiteral sync_state(SafepointSynchronize::address_of_state());
     __ load_contents(sync_state, G3_scratch);
     __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized);
-    __ br(Assembler::notEqual, false, Assembler::pn, slow_path);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G3_scratch, SafepointSynchronize::_not_synchronized, Assembler::notEqual, Assembler::pn, slow_path);
 
     // Check if local 0 != NULL
     __ ld_ptr(Gargs, G0, Otos_i ); // get local 0
-    __ tst(Otos_i);  // check if local 0 == NULL and go the slow path
-    __ brx(Assembler::zero, false, Assembler::pn, slow_path);
-    __ delayed()->nop();
+    // check if local 0 == NULL and go the slow path
+    __ br_null_short(Otos_i, Assembler::pn, slow_path);
 
 
     // read first instruction word and extract bytecode @ 1 and index @ 2
@@ -697,9 +675,7 @@
     __ ld_ptr(G3_scratch, cp_base_offset + ConstantPoolCacheEntry::indices_offset(), G1_scratch);
     __ srl(G1_scratch, 2*BitsPerByte, G1_scratch);
     __ and3(G1_scratch, 0xFF, G1_scratch);
-    __ cmp(G1_scratch, Bytecodes::_getfield);
-    __ br(Assembler::notEqual, false, Assembler::pn, slow_path);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G1_scratch, Bytecodes::_getfield, Assembler::notEqual, Assembler::pn, slow_path);
 
     // Get the type and return field offset from the constant pool cache
     __ ld_ptr(G3_scratch, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), G1_scratch);
@@ -787,9 +763,8 @@
     // Check if local 0 != NULL
     // If the receiver is null then it is OK to jump to the slow path.
     __ ld_ptr(Gargs, G0, Otos_i ); // get local 0
-    __ tst(Otos_i);  // check if local 0 == NULL and go the slow path
-    __ brx(Assembler::zero, false, Assembler::pn, slow_path);
-    __ delayed()->nop();
+    // check if local 0 == NULL and go the slow path
+    __ cmp_and_brx_short(Otos_i, 0, Assembler::equal, Assembler::pn, slow_path);
 
 
     // Load the value of the referent field.
@@ -952,9 +927,7 @@
   { Label L;
     Address signature_handler(Lmethod, methodOopDesc::signature_handler_offset());
     __ ld_ptr(signature_handler, G3_scratch);
-    __ tst(G3_scratch);
-    __ brx(Assembler::notZero, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_notnull_short(G3_scratch, Assembler::pt, L);
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), Lmethod);
     __ ld_ptr(signature_handler, G3_scratch);
     __ bind(L);
@@ -1019,9 +992,7 @@
 #ifdef ASSERT
     if (!PrintSignatureHandlers)  // do not dirty the output with this
     { Label L;
-      __ tst(O1);
-      __ brx(Assembler::notZero, false, Assembler::pt, L);
-      __ delayed()->nop();
+      __ br_notnull_short(O1, Assembler::pt, L);
       __ stop("mirror is missing");
       __ bind(L);
     }
@@ -1038,9 +1009,7 @@
 
 #ifdef ASSERT
   { Label L;
-    __ tst(O0);
-    __ brx(Assembler::notZero, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_notnull_short(O0, Assembler::pt, L);
     __ stop("native entry point is missing");
     __ bind(L);
   }
@@ -1079,9 +1048,7 @@
 #ifdef ASSERT
   { Label L;
     __ ld(thread_state, G3_scratch);
-    __ cmp(G3_scratch, _thread_in_Java);
-    __ br(Assembler::equal, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G3_scratch, _thread_in_Java, Assembler::equal, Assembler::pt, L);
     __ stop("Wrong thread state in native stub");
     __ bind(L);
   }
@@ -1134,9 +1101,7 @@
     Label L;
     __ br(Assembler::notEqual, false, Assembler::pn, L);
     __ delayed()->ld(G2_thread, JavaThread::suspend_flags_offset(), G3_scratch);
-    __ cmp(G3_scratch, 0);
-    __ br(Assembler::equal, false, Assembler::pt, no_block);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block);
     __ bind(L);
 
     // Block.  Save any potential method result value before the operation and
@@ -1185,9 +1150,7 @@
     Label no_oop, store_result;
 
     __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch);
-    __ cmp(G3_scratch, Lscratch);
-    __ brx(Assembler::notEqual, false, Assembler::pt, no_oop);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop);
     __ addcc(G0, O0, O0);
     __ brx(Assembler::notZero, true, Assembler::pt, store_result);     // if result is not NULL:
     __ delayed()->ld_ptr(O0, 0, O0);                                   // unbox it
@@ -1206,9 +1169,7 @@
   { Label L;
     Address exception_addr(G2_thread, Thread::pending_exception_offset());
     __ ld_ptr(exception_addr, Gtemp);
-    __ tst(Gtemp);
-    __ brx(Assembler::equal, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_null_short(Gtemp, Assembler::pt, L);
     // Note: This could be handled more efficiently since we know that the native
     //       method doesn't have an exception handler. We could directly return
     //       to the exception handler for the caller.
@@ -1245,9 +1206,7 @@
 #ifdef ASSERT
   {
     Label ok;
-    __ cmp(I5_savedSP, FP);
-    __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, ok);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(I5_savedSP, FP, Assembler::greaterEqualUnsigned, Assembler::pt, ok);
     __ stop("bad I5_savedSP value");
     __ should_not_reach_here();
     __ bind(ok);
@@ -1429,8 +1388,7 @@
 
       __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
       __ set_method_data_pointer_for_bcp();
-      __ ba(false, profile_method_continue);
-      __ delayed()->nop();
+      __ ba_short(profile_method_continue);
     }
 
     // handle invocation counter overflow
@@ -1856,9 +1814,7 @@
     // adapter frames in C2.
     Label caller_not_deoptimized;
     __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), I7);
-    __ tst(O0);
-    __ brx(Assembler::notEqual, false, Assembler::pt, caller_not_deoptimized);
-    __ delayed()->nop();
+    __ br_notnull_short(O0, Assembler::pt, caller_not_deoptimized);
 
     const Register Gtmp1 = G3_scratch;
     const Register Gtmp2 = G1_scratch;
@@ -1992,10 +1948,10 @@
 void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) {
   assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
   Label L;
-  aep = __ pc(); __ push_ptr(); __ ba(false, L); __ delayed()->nop();
-  fep = __ pc(); __ push_f();   __ ba(false, L); __ delayed()->nop();
-  dep = __ pc(); __ push_d();   __ ba(false, L); __ delayed()->nop();
-  lep = __ pc(); __ push_l();   __ ba(false, L); __ delayed()->nop();
+  aep = __ pc(); __ push_ptr(); __ ba_short(L);
+  fep = __ pc(); __ push_f();   __ ba_short(L);
+  dep = __ pc(); __ push_d();   __ ba_short(L);
+  lep = __ pc(); __ push_l();   __ ba_short(L);
   iep = __ pc(); __ push_i();
   bep = cep = sep = iep;                        // there aren't any
   vep = __ pc(); __ bind(L);                    // fall through
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -149,39 +149,68 @@
 }
 
 
-void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register Rbyte_code,
-                                   Register Rscratch,
-                                   bool load_bc_into_scratch /*=true*/) {
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
+                                   int byte_no) {
   // With sharing on, may need to test methodOop flag.
-  if (!RewriteBytecodes) return;
-  if (load_bc_into_scratch) __ set(bc, Rbyte_code);
-  Label patch_done;
+  if (!RewriteBytecodes)  return;
+  Label L_patch_done;
+
+  switch (bc) {
+  case Bytecodes::_fast_aputfield:
+  case Bytecodes::_fast_bputfield:
+  case Bytecodes::_fast_cputfield:
+  case Bytecodes::_fast_dputfield:
+  case Bytecodes::_fast_fputfield:
+  case Bytecodes::_fast_iputfield:
+  case Bytecodes::_fast_lputfield:
+  case Bytecodes::_fast_sputfield:
+    {
+      // We skip bytecode quickening for putfield instructions when
+      // the put_code written to the constant pool cache is zero.
+      // This is required so that every execution of this instruction
+      // calls out to InterpreterRuntime::resolve_get_put to do
+      // additional, required work.
+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+      __ get_cache_and_index_and_bytecode_at_bcp(bc_reg, temp_reg, temp_reg, byte_no, 1);
+      __ set(bc, bc_reg);
+      __ cmp_and_br_short(temp_reg, 0, Assembler::equal, Assembler::pn, L_patch_done);  // don't patch
+    }
+    break;
+  default:
+    assert(byte_no == -1, "sanity");
+    if (load_bc_into_bc_reg) {
+      __ set(bc, bc_reg);
+    }
+  }
+
   if (JvmtiExport::can_post_breakpoint()) {
-    Label fast_patch;
-    __ ldub(at_bcp(0), Rscratch);
-    __ cmp(Rscratch, Bytecodes::_breakpoint);
-    __ br(Assembler::notEqual, false, Assembler::pt, fast_patch);
-    __ delayed()->nop();  // don't bother to hoist the stb here
+    Label L_fast_patch;
+    __ ldub(at_bcp(0), temp_reg);
+    __ cmp_and_br_short(temp_reg, Bytecodes::_breakpoint, Assembler::notEqual, Assembler::pt, L_fast_patch);
     // perform the quickening, slowly, in the bowels of the breakpoint table
-    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), Lmethod, Lbcp, Rbyte_code);
-    __ ba(false, patch_done);
-    __ delayed()->nop();
-    __ bind(fast_patch);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), Lmethod, Lbcp, bc_reg);
+    __ ba_short(L_patch_done);
+    __ bind(L_fast_patch);
   }
+
 #ifdef ASSERT
   Bytecodes::Code orig_bytecode =  Bytecodes::java_code(bc);
-  Label okay;
-  __ ldub(at_bcp(0), Rscratch);
-  __ cmp(Rscratch, orig_bytecode);
-  __ br(Assembler::equal, false, Assembler::pt, okay);
-  __ delayed() ->cmp(Rscratch, Rbyte_code);
-  __ br(Assembler::equal, false, Assembler::pt, okay);
+  Label L_okay;
+  __ ldub(at_bcp(0), temp_reg);
+  __ cmp(temp_reg, orig_bytecode);
+  __ br(Assembler::equal, false, Assembler::pt, L_okay);
+  __ delayed()->cmp(temp_reg, bc_reg);
+  __ br(Assembler::equal, false, Assembler::pt, L_okay);
   __ delayed()->nop();
-  __ stop("Rewriting wrong bytecode location");
-  __ bind(okay);
+  __ stop("patching the wrong bytecode");
+  __ bind(L_okay);
 #endif
-  __ stb(Rbyte_code, at_bcp(0));
-  __ bind(patch_done);
+
+  // patch bytecode
+  __ stb(bc_reg, at_bcp(0));
+  __ bind(L_patch_done);
 }
 
 //----------------------------------------------------------------------------------------------------
@@ -281,17 +310,14 @@
   // get type from tags
   __ add(O2, tags_offset, O2);
   __ ldub(O2, O1, O2);
-  __ cmp(O2, JVM_CONSTANT_UnresolvedString);    // unresolved string? If so, must resolve
-  __ brx(Assembler::equal, true, Assembler::pt, call_ldc);
-  __ delayed()->nop();
-
-  __ cmp(O2, JVM_CONSTANT_UnresolvedClass);     // unresolved class? If so, must resolve
-  __ brx(Assembler::equal, true, Assembler::pt, call_ldc);
-  __ delayed()->nop();
-
-  __ cmp(O2, JVM_CONSTANT_UnresolvedClassInError);     // unresolved class in error state
-  __ brx(Assembler::equal, true, Assembler::pn, call_ldc);
-  __ delayed()->nop();
+  // unresolved string? If so, must resolve
+  __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedString, Assembler::equal, Assembler::pt, call_ldc);
+
+  // unresolved class? If so, must resolve
+  __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedClass, Assembler::equal, Assembler::pt, call_ldc);
+
+  // unresolved class in error state
+  __ cmp_and_brx_short(O2, JVM_CONSTANT_UnresolvedClassInError, Assembler::equal, Assembler::pn, call_ldc);
 
   __ cmp(O2, JVM_CONSTANT_Class);      // need to call vm to get java mirror of the class
   __ brx(Assembler::notEqual, true, Assembler::pt, notClass);
@@ -301,8 +327,7 @@
   __ set(wide, O1);
   call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), O1);
   __ push(atos);
-  __ ba(false, exit);
-  __ delayed()->nop();
+  __ ba_short(exit);
 
   __ bind(notClass);
  // __ add(O0, base_offset, O0);
@@ -312,8 +337,7 @@
   __ delayed()->cmp(O2, JVM_CONSTANT_String);
   __ ld(O0, O1, Otos_i);
   __ push(itos);
-  __ ba(false, exit);
-  __ delayed()->nop();
+  __ ba_short(exit);
 
   __ bind(notInt);
  // __ cmp(O2, JVM_CONSTANT_String);
@@ -325,8 +349,7 @@
   __ ld_ptr(O0, O1, Otos_i);
   __ verify_oop(Otos_i);
   __ push(atos);
-  __ ba(false, exit);
-  __ delayed()->nop();
+  __ ba_short(exit);
 
   __ bind(notString);
  // __ ldf(FloatRegisterImpl::S, O0, O1, Ftos_f);
@@ -365,9 +388,7 @@
   __ load_klass(Otos_i, Rcon_klass);
   AddressLiteral array_klass_addr((address)Universe::systemObjArrayKlassObj_addr());
   __ load_contents(array_klass_addr, Rarray_klass);
-  __ cmp(Rarray_klass, Rcon_klass);
-  __ brx(Assembler::notEqual, false, Assembler::pt, L_done);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(Rarray_klass, Rcon_klass, Assembler::notEqual, Assembler::pt, L_done);
   __ ld(Address(Otos_i, arrayOopDesc::length_offset_in_bytes()), Rcon_klass);
   __ tst(Rcon_klass);
   __ brx(Assembler::zero, true, Assembler::pt, L_done);
@@ -397,9 +418,7 @@
   __ sll(O1, LogBytesPerWord, O1);
   __ add(O0, O1, G3_scratch);
 
-  __ cmp(O2, JVM_CONSTANT_Double);
-  __ brx(Assembler::notEqual, false, Assembler::pt, Long);
-  __ delayed()->nop();
+  __ cmp_and_brx_short(O2, JVM_CONSTANT_Double, Assembler::notEqual, Assembler::pt, Long);
   // A double can be placed at word-aligned locations in the constant pool.
   // Check out Conversions.java for an example.
   // Also constantPoolOopDesc::header_size() is 20, which makes it very difficult
@@ -413,8 +432,7 @@
          f->successor());
 #endif
   __ push(dtos);
-  __ ba(false, exit);
-  __ delayed()->nop();
+  __ ba_short(exit);
 
   __ bind(Long);
 #ifdef _LP64
@@ -453,9 +471,7 @@
     // last two iloads in a pair.  Comparing against fast_iload means that
     // the next bytecode is neither an iload or a caload, and therefore
     // an iload pair.
-    __ cmp(G3_scratch, (int)Bytecodes::_iload);
-    __ br(Assembler::equal, false, Assembler::pn, done);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G3_scratch, (int)Bytecodes::_iload, Assembler::equal, Assembler::pn, done);
 
     __ cmp(G3_scratch, (int)Bytecodes::_fast_iload);
     __ br(Assembler::equal, false, Assembler::pn, rewrite);
@@ -697,9 +713,7 @@
     aload(0);
 
     // if _getfield then wait with rewrite
-    __ cmp(G3_scratch, (int)Bytecodes::_getfield);
-    __ br(Assembler::equal, false, Assembler::pn, done);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G3_scratch, (int)Bytecodes::_getfield, Assembler::equal, Assembler::pn, done);
 
     // if _igetfield then rewrite to _fast_iaccess_0
     assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "adjust fast bytecode def");
@@ -867,8 +881,7 @@
   __ index_check_without_pop(O3, O2, UseCompressedOops ? 2 : LogBytesPerWord, G3_scratch, O1);
 
   // do array store check - check for NULL value first
-  __ br_null( Otos_i, false, Assembler::pn, is_null );
-  __ delayed()->nop();
+  __ br_null_short( Otos_i, Assembler::pn, is_null );
 
   __ load_klass(O3, O4); // get array klass
   __ load_klass(Otos_i, O5); // get value klass
@@ -899,7 +912,7 @@
   __ bind(store_ok);
   do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true);
 
-  __ ba(false,done);
+  __ ba(done);
   __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize); // adj sp (pops array, index and value)
 
   __ bind(is_null);
@@ -1633,16 +1646,14 @@
       if (ProfileInterpreter) {
         // If no method data exists, go to profile_continue.
         __ ld_ptr(Lmethod, methodOopDesc::method_data_offset(), G4_scratch);
-        __ br_null(G4_scratch, false, Assembler::pn, Lno_mdo);
-        __ delayed()->nop();
+        __ br_null_short(G4_scratch, Assembler::pn, Lno_mdo);
 
         // Increment backedge counter in the MDO
         Address mdo_backedge_counter(G4_scratch, in_bytes(methodDataOopDesc::backedge_counter_offset()) +
                                                  in_bytes(InvocationCounter::counter_offset()));
         __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, G3_scratch, Lscratch,
                                    Assembler::notZero, &Lforward);
-        __ ba(false, Loverflow);
-        __ delayed()->nop();
+        __ ba_short(Loverflow);
       }
 
       // If there's no MDO, increment counter in methodOop
@@ -1658,14 +1669,11 @@
 
       // Was an OSR adapter generated?
       // O0 = osr nmethod
-      __ br_null(O0, false, Assembler::pn, Lforward);
-      __ delayed()->nop();
+      __ br_null_short(O0, Assembler::pn, Lforward);
 
       // Has the nmethod been invalidated already?
       __ ld(O0, nmethod::entry_bci_offset(), O2);
-      __ cmp(O2, InvalidOSREntryBci);
-      __ br(Assembler::equal, false, Assembler::pn, Lforward);
-      __ delayed()->nop();
+      __ cmp_and_br_short(O2, InvalidOSREntryBci, Assembler::equal, Assembler::pn, Lforward);
 
       // migrate the interpreter frame off of the stack
 
@@ -1830,7 +1838,7 @@
   __ profile_switch_case(O2, O3, G3_scratch, G4_scratch);
   __ sll(O2, LogBytesPerInt, O2);
   __ add(O2, 3 * BytesPerInt, O2);
-  __ ba(false, continue_execution);
+  __ ba(continue_execution);
   __ delayed()->ld(O1, O2, O2);
   // handle default
   __ bind(default_case);
@@ -1858,7 +1866,7 @@
   __ ld(O1, BytesPerInt, O2);
   __ sll(O2, LogBytesPerInt + 1, O2); // in word-pairs
   __ add(O1, 2 * BytesPerInt, O3); // set first pair addr
-  __ ba(false, loop_entry);
+  __ ba(loop_entry);
   __ delayed()->add(O3, O2, O2); // counter now points past last pair
 
   // table search
@@ -1877,8 +1885,7 @@
   __ ld(O1, 0, O4); // get default offset
   if (ProfileInterpreter) {
     __ profile_switch_default(O3);
-    __ ba(false, continue_execution);
-    __ delayed()->nop();
+    __ ba_short(continue_execution);
   }
 
   // entry found -> get offset
@@ -1944,7 +1951,7 @@
 
   // and start
   Label entry;
-  __ ba(false, entry);
+  __ ba(entry);
   __ delayed()->ld( Rarray, -BytesPerInt, Rj);
   // (Rj is already in the native byte-ordering.)
 
@@ -2002,8 +2009,7 @@
   // (Rj is already in the native byte-ordering.)
 
   if (ProfileInterpreter) {
-    __ ba(false, continue_execution);
-    __ delayed()->nop();
+    __ ba_short(continue_execution);
   }
 
   __ bind(default_case); // fall through (if not profiling)
@@ -2087,12 +2093,12 @@
   // Depends on cpCacheOop layout!
   Label resolved;
 
-  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
   if (byte_no == f1_oop) {
     // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
     // This kind of CP cache entry does not need to match the flags byte, because
     // there is a 1-1 relation between bytecode type and CP entry type.
     assert_different_registers(result, Rcache);
+    __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
     __ ld_ptr(Rcache, constantPoolCacheOopDesc::base_offset() +
               ConstantPoolCacheEntry::f1_offset(), result);
     __ tst(result);
@@ -2101,15 +2107,9 @@
   } else {
     assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
     assert(result == noreg, "");  //else change code for setting result
-    const int shift_count = (1 + byte_no)*BitsPerByte;
-
-    __ ld_ptr(Rcache, constantPoolCacheOopDesc::base_offset() +
-              ConstantPoolCacheEntry::indices_offset(), Lbyte_code);
-
-    __ srl(  Lbyte_code, shift_count, Lbyte_code );
-    __ and3( Lbyte_code,        0xFF, Lbyte_code );
-    __ cmp(  Lbyte_code, (int)bytecode());
-    __ br(   Assembler::equal, false, Assembler::pt, resolved);
+    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, Lbyte_code, byte_no, 1, index_size);
+    __ cmp(Lbyte_code, (int) bytecode());  // have we resolved this bytecode?
+    __ br(Assembler::equal, false, Assembler::pt, resolved);
     __ delayed()->set((int)bytecode(), O1);
   }
 
@@ -2216,9 +2216,7 @@
     assert_different_registers(Rcache, index, G1_scratch);
     AddressLiteral get_field_access_count_addr(JvmtiExport::get_field_access_count_addr());
     __ load_contents(get_field_access_count_addr, G1_scratch);
-    __ tst(G1_scratch);
-    __ br(Assembler::zero, false, Assembler::pt, Label1);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G1_scratch, 0, Assembler::equal, Assembler::pt, Label1);
 
     __ add(Rcache, in_bytes(cp_base_offset), Rcache);
 
@@ -2298,7 +2296,7 @@
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_agetfield, G3_scratch, G4_scratch);
   }
-  __ ba(false, checkVolatile);
+  __ ba(checkVolatile);
   __ delayed()->tst(Lscratch);
 
   __ bind(notObj);
@@ -2313,7 +2311,7 @@
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_igetfield, G3_scratch, G4_scratch);
   }
-  __ ba(false, checkVolatile);
+  __ ba(checkVolatile);
   __ delayed()->tst(Lscratch);
 
   __ bind(notInt);
@@ -2329,7 +2327,7 @@
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_lgetfield, G3_scratch, G4_scratch);
   }
-  __ ba(false, checkVolatile);
+  __ ba(checkVolatile);
   __ delayed()->tst(Lscratch);
 
   __ bind(notLong);
@@ -2344,7 +2342,7 @@
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_bgetfield, G3_scratch, G4_scratch);
   }
-  __ ba(false, checkVolatile);
+  __ ba(checkVolatile);
   __ delayed()->tst(Lscratch);
 
   __ bind(notByte);
@@ -2359,7 +2357,7 @@
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_cgetfield, G3_scratch, G4_scratch);
   }
-  __ ba(false, checkVolatile);
+  __ ba(checkVolatile);
   __ delayed()->tst(Lscratch);
 
   __ bind(notChar);
@@ -2374,7 +2372,7 @@
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_sgetfield, G3_scratch, G4_scratch);
   }
-  __ ba(false, checkVolatile);
+  __ ba(checkVolatile);
   __ delayed()->tst(Lscratch);
 
   __ bind(notShort);
@@ -2390,7 +2388,7 @@
   if (!is_static) {
     patch_bytecode(Bytecodes::_fast_fgetfield, G3_scratch, G4_scratch);
   }
-  __ ba(false, checkVolatile);
+  __ ba(checkVolatile);
   __ delayed()->tst(Lscratch);
 
   __ bind(notFloat);
@@ -2499,9 +2497,7 @@
     Label done;
     AddressLiteral get_field_modification_count_addr(JvmtiExport::get_field_modification_count_addr());
     __ load_contents(get_field_modification_count_addr, G4_scratch);
-    __ tst(G4_scratch);
-    __ br(Assembler::zero, false, Assembler::pt, done);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G4_scratch, 0, Assembler::equal, Assembler::pt, done);
     __ pop_ptr(G4_scratch);     // copy the object pointer from tos
     __ verify_oop(G4_scratch);
     __ push_ptr(G4_scratch);    // put the object pointer back on tos
@@ -2552,9 +2548,7 @@
     assert_different_registers(Rcache, index, G1_scratch);
     AddressLiteral get_field_modification_count_addr(JvmtiExport::get_field_modification_count_addr());
     __ load_contents(get_field_modification_count_addr, G1_scratch);
-    __ tst(G1_scratch);
-    __ br(Assembler::zero, false, Assembler::pt, Label1);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G1_scratch, 0, Assembler::zero, Assembler::pt, Label1);
 
     // The Rcache and index registers have been already set.
     // This allows to eliminate this call but the Rcache and index
@@ -2584,8 +2578,7 @@
       __ br(Assembler::equal, false, Assembler::pt, two_word);
       __ delayed()->nop();
       __ inc(G4_scratch, Interpreter::expr_offset_in_bytes(1));
-      __ br(Assembler::always, false, Assembler::pt, valsizeknown);
-      __ delayed()->nop();
+      __ ba_short(valsizeknown);
       __ bind(two_word);
 
       __ inc(G4_scratch, Interpreter::expr_offset_in_bytes(2));
@@ -2636,9 +2629,7 @@
     __ and3(Rflags, Lscratch, Lscratch);
 
     if (__ membar_has_effect(read_bits)) {
-      __ tst(Lscratch);
-      __ br(Assembler::zero, false, Assembler::pt, notVolatile);
-      __ delayed()->nop();
+      __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile);
       volatile_barrier(read_bits);
       __ bind(notVolatile);
     }
@@ -2653,150 +2644,162 @@
 
   if (is_static) {
     // putstatic with object type most likely, check that first
-    __ cmp(Rflags, atos );
+    __ cmp(Rflags, atos);
     __ br(Assembler::notEqual, false, Assembler::pt, notObj);
-    __ delayed() ->cmp(Rflags, itos );
+    __ delayed()->cmp(Rflags, itos);
 
     // atos
-    __ pop_ptr();
-    __ verify_oop(Otos_i);
-
-    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
-
-    __ ba(false, checkVolatile);
-    __ delayed()->tst(Lscratch);
+    {
+      __ pop_ptr();
+      __ verify_oop(Otos_i);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+      __ ba(checkVolatile);
+      __ delayed()->tst(Lscratch);
+    }
 
     __ bind(notObj);
-
-    // cmp(Rflags, itos );
+    // cmp(Rflags, itos);
     __ br(Assembler::notEqual, false, Assembler::pt, notInt);
-    __ delayed() ->cmp(Rflags, btos );
+    __ delayed()->cmp(Rflags, btos);
 
     // itos
-    __ pop_i();
-    __ st(Otos_i, Rclass, Roffset);
-    __ ba(false, checkVolatile);
-    __ delayed()->tst(Lscratch);
+    {
+      __ pop_i();
+      __ st(Otos_i, Rclass, Roffset);
+      __ ba(checkVolatile);
+      __ delayed()->tst(Lscratch);
+    }
 
     __ bind(notInt);
-
   } else {
     // putfield with int type most likely, check that first
-    __ cmp(Rflags, itos );
+    __ cmp(Rflags, itos);
     __ br(Assembler::notEqual, false, Assembler::pt, notInt);
-    __ delayed() ->cmp(Rflags, atos );
+    __ delayed()->cmp(Rflags, atos);
 
     // itos
-    __ pop_i();
-    pop_and_check_object(Rclass);
-    __ st(Otos_i, Rclass, Roffset);
-    patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch);
-    __ ba(false, checkVolatile);
-    __ delayed()->tst(Lscratch);
+    {
+      __ pop_i();
+      pop_and_check_object(Rclass);
+      __ st(Otos_i, Rclass, Roffset);
+      patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch, true, byte_no);
+      __ ba(checkVolatile);
+      __ delayed()->tst(Lscratch);
+    }
 
     __ bind(notInt);
-    // cmp(Rflags, atos );
+    // cmp(Rflags, atos);
     __ br(Assembler::notEqual, false, Assembler::pt, notObj);
-    __ delayed() ->cmp(Rflags, btos );
+    __ delayed()->cmp(Rflags, btos);
 
     // atos
-    __ pop_ptr();
-    pop_and_check_object(Rclass);
-    __ verify_oop(Otos_i);
-
-    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
-
-    patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch);
-    __ ba(false, checkVolatile);
-    __ delayed()->tst(Lscratch);
+    {
+      __ pop_ptr();
+      pop_and_check_object(Rclass);
+      __ verify_oop(Otos_i);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+      patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch, true, byte_no);
+      __ ba(checkVolatile);
+      __ delayed()->tst(Lscratch);
+    }
 
     __ bind(notObj);
   }
 
-  // cmp(Rflags, btos );
+  // cmp(Rflags, btos);
   __ br(Assembler::notEqual, false, Assembler::pt, notByte);
-  __ delayed() ->cmp(Rflags, ltos );
+  __ delayed()->cmp(Rflags, ltos);
 
   // btos
-  __ pop_i();
-  if (!is_static) pop_and_check_object(Rclass);
-  __ stb(Otos_i, Rclass, Roffset);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_bputfield, G3_scratch, G4_scratch);
+  {
+    __ pop_i();
+    if (!is_static) pop_and_check_object(Rclass);
+    __ stb(Otos_i, Rclass, Roffset);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_bputfield, G3_scratch, G4_scratch, true, byte_no);
+    }
+    __ ba(checkVolatile);
+    __ delayed()->tst(Lscratch);
   }
-  __ ba(false, checkVolatile);
-  __ delayed()->tst(Lscratch);
 
   __ bind(notByte);
-
-  // cmp(Rflags, ltos );
+  // cmp(Rflags, ltos);
   __ br(Assembler::notEqual, false, Assembler::pt, notLong);
-  __ delayed() ->cmp(Rflags, ctos );
+  __ delayed()->cmp(Rflags, ctos);
 
   // ltos
-  __ pop_l();
-  if (!is_static) pop_and_check_object(Rclass);
-  __ st_long(Otos_l, Rclass, Roffset);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_lputfield, G3_scratch, G4_scratch);
+  {
+    __ pop_l();
+    if (!is_static) pop_and_check_object(Rclass);
+    __ st_long(Otos_l, Rclass, Roffset);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_lputfield, G3_scratch, G4_scratch, true, byte_no);
+    }
+    __ ba(checkVolatile);
+    __ delayed()->tst(Lscratch);
   }
-  __ ba(false, checkVolatile);
-  __ delayed()->tst(Lscratch);
 
   __ bind(notLong);
-
-  // cmp(Rflags, ctos );
+  // cmp(Rflags, ctos);
   __ br(Assembler::notEqual, false, Assembler::pt, notChar);
-  __ delayed() ->cmp(Rflags, stos );
+  __ delayed()->cmp(Rflags, stos);
 
   // ctos (char)
-  __ pop_i();
-  if (!is_static) pop_and_check_object(Rclass);
-  __ sth(Otos_i, Rclass, Roffset);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_cputfield, G3_scratch, G4_scratch);
+  {
+    __ pop_i();
+    if (!is_static) pop_and_check_object(Rclass);
+    __ sth(Otos_i, Rclass, Roffset);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_cputfield, G3_scratch, G4_scratch, true, byte_no);
+    }
+    __ ba(checkVolatile);
+    __ delayed()->tst(Lscratch);
   }
-  __ ba(false, checkVolatile);
-  __ delayed()->tst(Lscratch);
 
   __ bind(notChar);
-  // cmp(Rflags, stos );
+  // cmp(Rflags, stos);
   __ br(Assembler::notEqual, false, Assembler::pt, notShort);
-  __ delayed() ->cmp(Rflags, ftos );
-
-  // stos (char)
-  __ pop_i();
-  if (!is_static) pop_and_check_object(Rclass);
-  __ sth(Otos_i, Rclass, Roffset);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_sputfield, G3_scratch, G4_scratch);
+  __ delayed()->cmp(Rflags, ftos);
+
+  // stos (short)
+  {
+    __ pop_i();
+    if (!is_static) pop_and_check_object(Rclass);
+    __ sth(Otos_i, Rclass, Roffset);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_sputfield, G3_scratch, G4_scratch, true, byte_no);
+    }
+    __ ba(checkVolatile);
+    __ delayed()->tst(Lscratch);
   }
-  __ ba(false, checkVolatile);
-  __ delayed()->tst(Lscratch);
 
   __ bind(notShort);
-  // cmp(Rflags, ftos );
+  // cmp(Rflags, ftos);
   __ br(Assembler::notZero, false, Assembler::pt, notFloat);
   __ delayed()->nop();
 
   // ftos
-  __ pop_f();
-  if (!is_static) pop_and_check_object(Rclass);
-  __ stf(FloatRegisterImpl::S, Ftos_f, Rclass, Roffset);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_fputfield, G3_scratch, G4_scratch);
+  {
+    __ pop_f();
+    if (!is_static) pop_and_check_object(Rclass);
+    __ stf(FloatRegisterImpl::S, Ftos_f, Rclass, Roffset);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_fputfield, G3_scratch, G4_scratch, true, byte_no);
+    }
+    __ ba(checkVolatile);
+    __ delayed()->tst(Lscratch);
   }
-  __ ba(false, checkVolatile);
-  __ delayed()->tst(Lscratch);
 
   __ bind(notFloat);
 
   // dtos
-  __ pop_d();
-  if (!is_static) pop_and_check_object(Rclass);
-  __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_dputfield, G3_scratch, G4_scratch);
+  {
+    __ pop_d();
+    if (!is_static) pop_and_check_object(Rclass);
+    __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_dputfield, G3_scratch, G4_scratch, true, byte_no);
+    }
   }
 
   __ bind(checkVolatile);
@@ -2833,9 +2836,7 @@
     __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
     __ and3(Rflags, Lscratch, Lscratch);
     if (__ membar_has_effect(read_bits)) {
-      __ tst(Lscratch);
-      __ br(Assembler::zero, false, Assembler::pt, notVolatile);
-      __ delayed()->nop();
+      __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile);
       volatile_barrier(read_bits);
       __ bind(notVolatile);
     }
@@ -2864,9 +2865,7 @@
   }
 
   if (__ membar_has_effect(write_bits)) {
-    __ tst(Lscratch);
-    __ br(Assembler::zero, false, Assembler::pt, exit);
-    __ delayed()->nop();
+    __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, exit);
     volatile_barrier(Assembler::StoreLoad);
     __ bind(exit);
   }
@@ -3226,8 +3225,7 @@
     // the VM should throw IncompatibleClassChangeError.  linkResolver checks
     // this too but that's only if the entry isn't already resolved, so we
     // need to check again.
-    __ br_notnull( Rtemp, false, Assembler::pt, ok);
-    __ delayed()->nop();
+    __ br_notnull_short( Rtemp, Assembler::pt, ok);
     call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeError));
     __ should_not_reach_here();
     __ bind(ok);
@@ -3251,9 +3249,7 @@
   // Check for abstract method error.
   {
     Label ok;
-    __ tst(G5_method);
-    __ brx(Assembler::notZero, false, Assembler::pt, ok);
-    __ delayed()->nop();
+    __ br_notnull_short(G5_method, Assembler::pt, ok);
     call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
     __ should_not_reach_here();
     __ bind(ok);
@@ -3408,17 +3404,14 @@
 #else
       __ srl(RfreeValue, LogHeapWordSize, RfreeValue);
 #endif
-      __ cmp(RtlabWasteLimitValue, RfreeValue);
-      __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, slow_case); // tlab waste is small
-      __ delayed()->nop();
+      __ cmp_and_brx_short(RtlabWasteLimitValue, RfreeValue, Assembler::greaterEqualUnsigned, Assembler::pt, slow_case); // tlab waste is small
 
       // increment waste limit to prevent getting stuck on this slow path
       __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
       __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
     } else {
       // No allocation in the shared eden.
-      __ br(Assembler::always, false, Assembler::pt, slow_case);
-      __ delayed()->nop();
+      __ ba_short(slow_case);
     }
   }
 
@@ -3440,18 +3433,14 @@
 
     // RnewTopValue contains the top address after the new object
     // has been allocated.
-    __ cmp(RnewTopValue, RendValue);
-    __ brx(Assembler::greaterUnsigned, false, Assembler::pn, slow_case);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(RnewTopValue, RendValue, Assembler::greaterUnsigned, Assembler::pn, slow_case);
 
     __ casx_under_lock(RtopAddr, RoldTopValue, RnewTopValue,
       VM_Version::v9_instructions_work() ? NULL :
       (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
 
     // if someone beat us on the allocation, try again, otherwise continue
-    __ cmp(RoldTopValue, RnewTopValue);
-    __ brx(Assembler::notEqual, false, Assembler::pn, retry);
-    __ delayed()->nop();
+    __ cmp_and_brx_short(RoldTopValue, RnewTopValue, Assembler::notEqual, Assembler::pn, retry);
 
     // bump total bytes allocated by this thread
     // RoldTopValue and RtopAddr are dead, so can use G1 and G3
@@ -3474,8 +3463,7 @@
       __ br(Assembler::notEqual, false, Assembler::pt, loop);
       __ delayed()->subcc(Roffset, wordSize, Roffset);
     }
-    __ br(Assembler::always, false, Assembler::pt, initialize_header);
-    __ delayed()->nop();
+    __ ba_short(initialize_header);
   }
 
   // slow case
@@ -3485,8 +3473,7 @@
 
   call_VM(Otos_i, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), O1, O2);
 
-  __ ba(false, done);
-  __ delayed()->nop();
+  __ ba_short(done);
 
   // Initialize the header: mark, klass
   __ bind(initialize_header);
@@ -3550,8 +3537,7 @@
   Register RspecifiedKlass = O4;
 
   // Check for casting a NULL
-  __ br_null(Otos_i, false, Assembler::pn, is_null);
-  __ delayed()->nop();
+  __ br_null_short(Otos_i, Assembler::pn, is_null);
 
   // Get value klass in RobjKlass
   __ load_klass(Otos_i, RobjKlass); // get value klass
@@ -3571,8 +3557,7 @@
   call_VM(RspecifiedKlass, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
   __ pop_ptr(Otos_i, G3_scratch); // restore receiver
 
-  __ br(Assembler::always, false, Assembler::pt, resolved);
-  __ delayed()->nop();
+  __ ba_short(resolved);
 
   // Extract target class from constant pool
   __ bind(quicked);
@@ -3591,8 +3576,7 @@
   __ bind(cast_ok);
 
   if (ProfileInterpreter) {
-    __ ba(false, done);
-    __ delayed()->nop();
+    __ ba_short(done);
   }
   __ bind(is_null);
   __ profile_null_seen(G3_scratch);
@@ -3608,8 +3592,7 @@
   Register RspecifiedKlass = O4;
 
   // Check for casting a NULL
-  __ br_null(Otos_i, false, Assembler::pt, is_null);
-  __ delayed()->nop();
+  __ br_null_short(Otos_i, Assembler::pt, is_null);
 
   // Get value klass in RobjKlass
   __ load_klass(Otos_i, RobjKlass); // get value klass
@@ -3629,9 +3612,7 @@
   call_VM(RspecifiedKlass, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc) );
   __ pop_ptr(Otos_i, G3_scratch); // restore receiver
 
-  __ br(Assembler::always, false, Assembler::pt, resolved);
-  __ delayed()->nop();
-
+  __ ba_short(resolved);
 
   // Extract target class from constant pool
   __ bind(quicked);
@@ -3649,8 +3630,7 @@
   __ clr( Otos_i );
 
   if (ProfileInterpreter) {
-    __ ba(false, done);
-    __ delayed()->nop();
+    __ ba_short(done);
   }
   __ bind(is_null);
   __ profile_null_seen(G3_scratch);
@@ -3724,7 +3704,7 @@
   {
     Label entry, loop, exit;
     __ add( __ top_most_monitor(), O2 ); // last one to check
-    __ ba( false, entry );
+    __ ba( entry );
     __ delayed()->mov( Lmonitors, O3 ); // first one to check
 
 
@@ -3757,8 +3737,7 @@
   { Label allocated;
 
     // found free slot?
-    __ br_notnull(O1, false, Assembler::pn, allocated);
-    __ delayed()->nop();
+    __ br_notnull_short(O1, Assembler::pn, allocated);
 
     __ add_monitor_to_stack( false, O2, O3 );
     __ mov(Lmonitors, O1);
@@ -3791,7 +3770,7 @@
 
   { Label entry, loop, found;
     __ add( __ top_most_monitor(), O2 ); // last one to check
-    __ ba(false, entry );
+    __ ba(entry);
     // use Lscratch to hold monitor elem to check, start with most recent monitor,
     // By using a local it survives the call to the C routine.
     __ delayed()->mov( Lmonitors, Lscratch );
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -44,24 +44,40 @@
   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
   PrefetchFieldsAhead         = prefetch_fields_ahead();
 
+  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 1, "invalid value");
+  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
+  if( AllocatePrefetchInstr > 1 ) AllocatePrefetchInstr = 0;
+
   // Allocation prefetch settings
-  intx cache_line_size = L1_data_cache_line_size();
+  intx cache_line_size = prefetch_data_size();
   if( cache_line_size > AllocatePrefetchStepSize )
     AllocatePrefetchStepSize = cache_line_size;
-  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
-    AllocatePrefetchLines = 3; // Optimistic value
-  assert( AllocatePrefetchLines > 0, "invalid value");
-  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
-    AllocatePrefetchLines = 1; // Conservative value
+
+  assert(AllocatePrefetchLines > 0, "invalid value");
+  if( AllocatePrefetchLines < 1 )     // set valid value in product VM
+    AllocatePrefetchLines = 3;
+  assert(AllocateInstancePrefetchLines > 0, "invalid value");
+  if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM
+    AllocateInstancePrefetchLines = 1;
 
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
+  assert((AllocatePrefetchDistance % AllocatePrefetchStepSize) == 0 &&
+         (AllocatePrefetchDistance > 0), "invalid value");
+  if ((AllocatePrefetchDistance % AllocatePrefetchStepSize) != 0 ||
+      (AllocatePrefetchDistance <= 0)) {
+    AllocatePrefetchDistance = AllocatePrefetchStepSize;
+  }
+
+  if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
+    warning("BIS instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
+  }
 
   UseSSE = 0; // Only on x86 and x64
 
-  _supports_cx8               = has_v9();
+  _supports_cx8 = has_v9();
 
   if (is_niagara()) {
     // Indirect branch is the same cost as direct
@@ -94,19 +110,42 @@
       FLAG_SET_DEFAULT(InteriorEntryAlignment, 4);
     }
     if (is_niagara_plus()) {
-      if (has_blk_init() && AllocatePrefetchStyle > 0 &&
-          FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
-        // Use BIS instruction for allocation prefetch.
-        FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
+      if (has_blk_init() && UseTLAB &&
+          FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
+        // Use BIS instruction for TLAB allocation prefetch.
+        FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1);
+        if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+          FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3);
+        }
         if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-          // Use smaller prefetch distance on N2 with BIS
+          // Use smaller prefetch distance with BIS
           FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
         }
       }
+      if (is_T4()) {
+        // Double number of prefetched cache lines on T4
+        // since L2 cache line size is smaller (32 bytes).
+        if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
+          FLAG_SET_ERGO(intx, AllocatePrefetchLines, AllocatePrefetchLines*2);
+        }
+        if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
+          FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2);
+        }
+      }
       if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
         // Use different prefetch distance without BIS
         FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
       }
+      if (AllocatePrefetchInstr == 1) {
+        // Need a space at the end of TLAB for BIS since it
+        // will fault when accessing memory outside of heap.
+
+        // +1 for rounding up to next cache line, +1 to be safe
+        int lines = AllocatePrefetchLines + 2;
+        int step_size = AllocatePrefetchStepSize;
+        int distance = AllocatePrefetchDistance;
+        _reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
+      }
     }
 #endif
   }
@@ -116,27 +155,49 @@
     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
       FLAG_SET_DEFAULT(UsePopCountInstruction, true);
     }
+  } else if (UsePopCountInstruction) {
+    warning("POPC instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UsePopCountInstruction, false);
+  }
+
+  // T4 and newer Sparc cpus have new compare and branch instruction.
+  if (has_cbcond()) {
+    if (FLAG_IS_DEFAULT(UseCBCond)) {
+      FLAG_SET_DEFAULT(UseCBCond, true);
+    }
+  } else if (UseCBCond) {
+    warning("CBCOND instruction is not available on this CPU");
+    FLAG_SET_DEFAULT(UseCBCond, false);
   }
 
 #ifdef COMPILER2
+  // T4 and newer Sparc cpus have fast RDPC.
+  if (has_fast_rdpc() && FLAG_IS_DEFAULT(UseRDPCForConstantTableBase)) {
+//    FLAG_SET_DEFAULT(UseRDPCForConstantTableBase, true);
+  }
+
   // Currently not supported anywhere.
   FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+
+  assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
 #endif
 
+  assert((CodeEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
+  assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
+
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
-               (has_v8() ? ", has_v8" : ""),
-               (has_v9() ? ", has_v9" : ""),
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+               (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
                (has_hardware_popc() ? ", popc" : ""),
-               (has_vis1() ? ", has_vis1" : ""),
-               (has_vis2() ? ", has_vis2" : ""),
-               (has_vis3() ? ", has_vis3" : ""),
-               (has_blk_init() ? ", has_blk_init" : ""),
-               (is_ultra3() ? ", is_ultra3" : ""),
-               (is_sun4v() ? ", is_sun4v" : ""),
-               (is_niagara() ? ", is_niagara" : ""),
-               (is_niagara_plus() ? ", is_niagara_plus" : ""),
-               (is_sparc64() ? ", is_sparc64" : ""),
+               (has_vis1() ? ", vis1" : ""),
+               (has_vis2() ? ", vis2" : ""),
+               (has_vis3() ? ", vis3" : ""),
+               (has_blk_init() ? ", blk_init" : ""),
+               (has_cbcond() ? ", cbcond" : ""),
+               (is_ultra3() ? ", ultra3" : ""),
+               (is_sun4v() ? ", sun4v" : ""),
+               (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
+               (is_sparc64() ? ", sparc64" : ""),
                (!has_hardware_mul32() ? ", no-mul32" : ""),
                (!has_hardware_div32() ? ", no-div32" : ""),
                (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
@@ -158,14 +219,20 @@
 
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
-    tty->print("Allocation: ");
+    tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0) {
-      tty->print_cr("no prefetching");
+      tty->print_cr(": no prefetching");
     } else {
+      tty->print(" prefetching: ");
+      if (AllocatePrefetchInstr == 0) {
+          tty->print("PREFETCH");
+      } else if (AllocatePrefetchInstr == 1) {
+          tty->print("BIS");
+      }
       if (AllocatePrefetchLines > 1) {
-        tty->print_cr("PREFETCH %d, %d lines of size %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
+        tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
       } else {
-        tty->print_cr("PREFETCH %d, one line", AllocatePrefetchDistance);
+        tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
       }
     }
     if (PrefetchCopyIntervalInBytes > 0) {
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -31,44 +31,46 @@
 class VM_Version: public Abstract_VM_Version {
 protected:
   enum Feature_Flag {
-    v8_instructions    = 0,
-    hardware_mul32     = 1,
-    hardware_div32     = 2,
-    hardware_fsmuld    = 3,
-    hardware_popc      = 4,
-    v9_instructions    = 5,
-    vis1_instructions  = 6,
-    vis2_instructions  = 7,
-    sun4v_instructions = 8,
+    v8_instructions      = 0,
+    hardware_mul32       = 1,
+    hardware_div32       = 2,
+    hardware_fsmuld      = 3,
+    hardware_popc        = 4,
+    v9_instructions      = 5,
+    vis1_instructions    = 6,
+    vis2_instructions    = 7,
+    sun4v_instructions   = 8,
     blk_init_instructions = 9,
-    fmaf_instructions  = 10,
-    fmau_instructions  = 11,
-    vis3_instructions  = 12,
-    sparc64_family     = 13,
-    T_family           = 14,
-    T1_model           = 15
+    fmaf_instructions    = 10,
+    fmau_instructions    = 11,
+    vis3_instructions    = 12,
+    sparc64_family       = 13,
+    T_family             = 14,
+    T1_model             = 15,
+    cbcond_instructions  = 16
   };
 
   enum Feature_Flag_Set {
     unknown_m           = 0,
     all_features_m      = -1,
 
-    v8_instructions_m   = 1 << v8_instructions,
-    hardware_mul32_m    = 1 << hardware_mul32,
-    hardware_div32_m    = 1 << hardware_div32,
-    hardware_fsmuld_m   = 1 << hardware_fsmuld,
-    hardware_popc_m     = 1 << hardware_popc,
-    v9_instructions_m   = 1 << v9_instructions,
-    vis1_instructions_m = 1 << vis1_instructions,
-    vis2_instructions_m = 1 << vis2_instructions,
-    sun4v_m             = 1 << sun4v_instructions,
+    v8_instructions_m       = 1 << v8_instructions,
+    hardware_mul32_m        = 1 << hardware_mul32,
+    hardware_div32_m        = 1 << hardware_div32,
+    hardware_fsmuld_m       = 1 << hardware_fsmuld,
+    hardware_popc_m         = 1 << hardware_popc,
+    v9_instructions_m       = 1 << v9_instructions,
+    vis1_instructions_m     = 1 << vis1_instructions,
+    vis2_instructions_m     = 1 << vis2_instructions,
+    sun4v_m                 = 1 << sun4v_instructions,
     blk_init_instructions_m = 1 << blk_init_instructions,
-    fmaf_instructions_m = 1 << fmaf_instructions,
-    fmau_instructions_m = 1 << fmau_instructions,
-    vis3_instructions_m = 1 << vis3_instructions,
-    sparc64_family_m    = 1 << sparc64_family,
-    T_family_m          = 1 << T_family,
-    T1_model_m          = 1 << T1_model,
+    fmaf_instructions_m     = 1 << fmaf_instructions,
+    fmau_instructions_m     = 1 << fmau_instructions,
+    vis3_instructions_m     = 1 << vis3_instructions,
+    sparc64_family_m        = 1 << sparc64_family,
+    T_family_m              = 1 << T_family,
+    T1_model_m              = 1 << T1_model,
+    cbcond_instructions_m   = 1 << cbcond_instructions,
 
     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -111,25 +113,35 @@
   static bool has_vis2()                { return (_features & vis2_instructions_m) != 0; }
   static bool has_vis3()                { return (_features & vis3_instructions_m) != 0; }
   static bool has_blk_init()            { return (_features & blk_init_instructions_m) != 0; }
+  static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
 
   static bool supports_compare_and_exchange()
                                         { return has_v9(); }
 
-  static bool is_ultra3()               { return (_features & ultra3_m) == ultra3_m; }
-  static bool is_sun4v()                { return (_features & sun4v_m) != 0; }
   // Returns true if the platform is in the niagara line (T series)
   // and newer than the niagara1.
   static bool is_niagara_plus()         { return is_T_family(_features) && !is_T1_model(_features); }
+  static bool is_T4()                   { return is_T_family(_features) && has_cbcond(); }
+
   // Fujitsu SPARC64
   static bool is_sparc64()              { return (_features & sparc64_family_m) != 0; }
 
+  static bool is_sun4v()                { return (_features & sun4v_m) != 0; }
+  static bool is_ultra3()               { return (_features & ultra3_m) == ultra3_m && !is_sun4v() && !is_sparc64(); }
+
   static bool has_fast_fxtof()          { return is_niagara() || is_sparc64() || has_v9() && !is_ultra3(); }
   static bool has_fast_idiv()           { return is_niagara_plus() || is_sparc64(); }
 
+  // T4 and newer Sparc have fast RDPC instruction.
+  static bool has_fast_rdpc()           { return is_T4(); }
+
+  // T4 and newer Sparc have Most-Recently-Used (MRU) BIS.
+  static bool has_mru_blk_init()        { return has_blk_init() && is_T4(); }
+
   static const char* cpu_features()     { return _features_str; }
 
-  static intx L1_data_cache_line_size()  {
-    return 64;  // default prefetch block size on sparc
+  static intx prefetch_data_size()  {
+    return is_T4() ? 32 : 64;  // default prefetch block size on sparc
   }
 
   // Prefetch
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -76,9 +76,7 @@
     Label L;
     // check offset vs vtable length
     __ ld(G3_scratch, instanceKlass::vtable_length_offset()*wordSize, G5);
-    __ cmp(G5, vtable_index*vtableEntry::size());
-    __ br(Assembler::greaterUnsigned, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L);
     __ set(vtable_index, O2);
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2);
     __ bind(L);
@@ -95,8 +93,7 @@
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
-    __ br_notnull(G5_method, false, Assembler::pt, L);
-    __ delayed()->nop();
+    __ br_notnull_short(G5_method, Assembler::pt, L);
     __ stop("Vtable entry is ZERO");
     __ bind(L);
   }
@@ -177,8 +174,7 @@
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L01;
-    __ bpr(Assembler::rc_nz, false, Assembler::pt, L5_method, L01);
-    __ delayed()->nop();
+    __ br_notnull_short(L5_method, Assembler::pt, L01);
     __ stop("methodOop is null");
     __ bind(L01);
     __ verify_oop(L5_method);
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1339,9 +1339,8 @@
   emit_operand(rax, dst);
 }
 
-void Assembler::jcc(Condition cc, Label& L, relocInfo::relocType rtype) {
-  InstructionMark im(this);
-  relocate(rtype);
+void Assembler::jcc(Condition cc, Label& L, bool maybe_short) {
+  InstructionMark im(this);
   assert((0 <= cc) && (cc < 16), "illegal cc");
   if (L.is_bound()) {
     address dst = target(L);
@@ -1350,7 +1349,7 @@
     const int short_size = 2;
     const int long_size = 6;
     intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos;
-    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
+    if (maybe_short && is8bit(offs - short_size)) {
       // 0111 tttn #8-bit disp
       emit_byte(0x70 | cc);
       emit_byte((offs - short_size) & 0xFF);
@@ -1399,7 +1398,7 @@
   emit_operand(rsp, adr);
 }
 
-void Assembler::jmp(Label& L, relocInfo::relocType rtype) {
+void Assembler::jmp(Label& L, bool maybe_short) {
   if (L.is_bound()) {
     address entry = target(L);
     assert(entry != NULL, "jmp most probably wrong");
@@ -1407,7 +1406,7 @@
     const int short_size = 2;
     const int long_size = 5;
     intptr_t offs = entry - _code_pos;
-    if (rtype == relocInfo::none && is8bit(offs - short_size)) {
+    if (maybe_short && is8bit(offs - short_size)) {
       emit_byte(0xEB);
       emit_byte((offs - short_size) & 0xFF);
     } else {
@@ -1420,7 +1419,6 @@
     // the forward jump will not run beyond 256 bytes, use jmpb to
     // force an 8-bit displacement.
     InstructionMark im(this);
-    relocate(rtype);
     L.add_patch_at(code(), locator());
     emit_byte(0xE9);
     emit_long(0);
@@ -2317,7 +2315,7 @@
 }
 
 void Assembler::prefetchr(Address src) {
-  NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
+  assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
   emit_byte(0x0D);
@@ -2349,7 +2347,7 @@
 }
 
 void Assembler::prefetchw(Address src) {
-  NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
+  assert(VM_Version::supports_3dnow_prefetch(), "must support");
   InstructionMark im(this);
   prefetch_prefix(src);
   emit_byte(0x0D);
@@ -3674,7 +3672,7 @@
     } else {
       if (adr.index_needs_rex()) {
         prefix(REX_X);
-      } else if (reg->encoding() >= 4 ) {
+      } else if (byteinst && reg->encoding() >= 4 ) {
         prefix(REX);
       }
     }
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1065,8 +1065,7 @@
   // Note: The same Label can be used for forward and backward branches
   // but it may be bound only once.
 
-  void jcc(Condition cc, Label& L,
-           relocInfo::relocType rtype = relocInfo::none);
+  void jcc(Condition cc, Label& L, bool maybe_short = true);
 
   // Conditional jump to a 8-bit offset to L.
   // WARNING: be very careful using this for forward jumps.  If the label is
@@ -1077,7 +1076,7 @@
   void jmp(Address entry);    // pc <- entry
 
   // Label operations & relative jumps (PPUM Appendix D)
-  void jmp(Label& L, relocInfo::relocType rtype = relocInfo::none);   // unconditional jump to L
+  void jmp(Label& L, bool maybe_short = true);   // unconditional jump to L
 
   void jmp(Register entry); // pc <- entry
 
--- a/src/cpu/x86/vm/interp_masm_x86_32.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/interp_masm_x86_32.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -233,7 +233,7 @@
 
 void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index,
                                                            int bcp_offset, size_t index_size) {
-  assert(cache != index, "must use different registers");
+  assert_different_registers(cache, index);
   get_cache_index_at_bcp(index, bcp_offset, index_size);
   movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
   assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below");
@@ -241,6 +241,20 @@
 }
 
 
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register index,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+  movptr(bytecode, Address(cache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  shrptr(bytecode, shift_count);
+  andptr(bytecode, 0xFF);
+}
+
+
 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register tmp,
                                                                int bcp_offset, size_t index_size) {
   assert(cache != tmp, "must use different register");
--- a/src/cpu/x86/vm/interp_masm_x86_32.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/interp_masm_x86_32.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -83,6 +83,7 @@
   }
   void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
   void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
   void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
   void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
 
--- a/src/cpu/x86/vm/interp_masm_x86_64.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/interp_masm_x86_64.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -233,7 +233,7 @@
                                                            Register index,
                                                            int bcp_offset,
                                                            size_t index_size) {
-  assert(cache != index, "must use different registers");
+  assert_different_registers(cache, index);
   get_cache_index_at_bcp(index, bcp_offset, index_size);
   movptr(cache, Address(rbp, frame::interpreter_frame_cache_offset * wordSize));
   assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
@@ -242,6 +242,22 @@
 }
 
 
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register index,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+  // We use a 32-bit load here since the layout of 64-bit words on
+  // little-endian machines allow us that.
+  movl(bytecode, Address(cache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  shrl(bytecode, shift_count);
+  andl(bytecode, 0xFF);
+}
+
+
 void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
                                                                Register tmp,
                                                                int bcp_offset,
--- a/src/cpu/x86/vm/interp_masm_x86_64.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/interp_masm_x86_64.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -100,13 +100,11 @@
   }
 
   void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
-  void get_cache_and_index_at_bcp(Register cache, Register index,
-                                  int bcp_offset, size_t index_size = sizeof(u2));
-  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp,
-                                      int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
   void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
 
-
   void pop_ptr(Register r = rax);
   void pop_i(Register r = rax);
   void pop_l(Register r = rax);
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1192,11 +1192,11 @@
       const int jobject_oop_offset = 0;
       __ movptr(rbx_method, Address(rbx_method, jobject_oop_offset));  // dereference the jobject
 
-      __ movptr(rsi, rsp);
+      __ movptr(saved_last_sp, rsp);
       __ subptr(rsp, 3 * wordSize);
       __ push(rax_pc);         // restore caller PC
 
-      __ movptr(__ argument_address(constant(2)), rarg0_code);
+      __ movl  (__ argument_address(constant(2)), rarg0_code);
       __ movptr(__ argument_address(constant(1)), rarg1_actual);
       __ movptr(__ argument_address(constant(0)), rarg2_required);
       jump_from_method_handle(_masm, rbx_method, rax);
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -202,45 +202,74 @@
 }
 
 
-void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
-                                   Register scratch,
-                                   bool load_bc_into_scratch/*=true*/) {
-
-  if (!RewriteBytecodes) return;
-  // the pair bytecodes have already done the load.
-  if (load_bc_into_scratch) {
-    __ movl(bc, bytecode);
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
+                                   int byte_no) {
+  if (!RewriteBytecodes)  return;
+  Label L_patch_done;
+
+  switch (bc) {
+  case Bytecodes::_fast_aputfield:
+  case Bytecodes::_fast_bputfield:
+  case Bytecodes::_fast_cputfield:
+  case Bytecodes::_fast_dputfield:
+  case Bytecodes::_fast_fputfield:
+  case Bytecodes::_fast_iputfield:
+  case Bytecodes::_fast_lputfield:
+  case Bytecodes::_fast_sputfield:
+    {
+      // We skip bytecode quickening for putfield instructions when
+      // the put_code written to the constant pool cache is zero.
+      // This is required so that every execution of this instruction
+      // calls out to InterpreterRuntime::resolve_get_put to do
+      // additional, required work.
+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+      __ get_cache_and_index_and_bytecode_at_bcp(bc_reg, temp_reg, temp_reg, byte_no, 1);
+      __ movl(bc_reg, bc);
+      __ cmpl(temp_reg, (int) 0);
+      __ jcc(Assembler::zero, L_patch_done);  // don't patch
+    }
+    break;
+  default:
+    assert(byte_no == -1, "sanity");
+    // the pair bytecodes have already done the load.
+    if (load_bc_into_bc_reg) {
+      __ movl(bc_reg, bc);
+    }
   }
-  Label patch_done;
+
   if (JvmtiExport::can_post_breakpoint()) {
-    Label fast_patch;
+    Label L_fast_patch;
     // if a breakpoint is present we can't rewrite the stream directly
-    __ movzbl(scratch, at_bcp(0));
-    __ cmpl(scratch, Bytecodes::_breakpoint);
-    __ jcc(Assembler::notEqual, fast_patch);
-    __ get_method(scratch);
+    __ movzbl(temp_reg, at_bcp(0));
+    __ cmpl(temp_reg, Bytecodes::_breakpoint);
+    __ jcc(Assembler::notEqual, L_fast_patch);
+    __ get_method(temp_reg);
     // Let breakpoint table handling rewrite to quicker bytecode
-    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, rsi, bc);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rsi, bc_reg);
 #ifndef ASSERT
-    __ jmpb(patch_done);
+    __ jmpb(L_patch_done);
 #else
-    __ jmp(patch_done);
+    __ jmp(L_patch_done);
 #endif
-    __ bind(fast_patch);
+    __ bind(L_fast_patch);
   }
+
 #ifdef ASSERT
-  Label okay;
-  __ load_unsigned_byte(scratch, at_bcp(0));
-  __ cmpl(scratch, (int)Bytecodes::java_code(bytecode));
-  __ jccb(Assembler::equal, okay);
-  __ cmpl(scratch, bc);
-  __ jcc(Assembler::equal, okay);
+  Label L_okay;
+  __ load_unsigned_byte(temp_reg, at_bcp(0));
+  __ cmpl(temp_reg, (int)Bytecodes::java_code(bc));
+  __ jccb(Assembler::equal, L_okay);
+  __ cmpl(temp_reg, bc_reg);
+  __ jcc(Assembler::equal, L_okay);
   __ stop("patching the wrong bytecode");
-  __ bind(okay);
+  __ bind(L_okay);
 #endif
+
   // patch bytecode
-  __ movb(at_bcp(0), bc);
-  __ bind(patch_done);
+  __ movb(at_bcp(0), bc_reg);
+  __ bind(L_patch_done);
 }
 
 //----------------------------------------------------------------------------------------------------
@@ -2060,24 +2089,20 @@
   assert_different_registers(result, Rcache, index, temp);
 
   Label resolved;
-  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
   if (byte_no == f1_oop) {
     // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
     // This kind of CP cache entry does not need to match the flags byte, because
     // there is a 1-1 relation between bytecode type and CP entry type.
     assert(result != noreg, ""); //else do cmpptr(Address(...), (int32_t) NULL_WORD)
+    __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
     __ movptr(result, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()));
     __ testptr(result, result);
     __ jcc(Assembler::notEqual, resolved);
   } else {
     assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
     assert(result == noreg, "");  //else change code for setting result
-    const int shift_count = (1 + byte_no)*BitsPerByte;
-    __ movl(temp, Address(Rcache, index, Address::times_4, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
-    __ shrl(temp, shift_count);
-    // have we resolved this bytecode?
-    __ andl(temp, 0xFF);
-    __ cmpl(temp, (int)bytecode());
+    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+    __ cmpl(temp, (int) bytecode());  // have we resolved this bytecode?
     __ jcc(Assembler::equal, resolved);
   }
 
@@ -2453,138 +2478,153 @@
 
   __ shrl(flags, ConstantPoolCacheEntry::tosBits);
   assert(btos == 0, "change code, btos != 0");
-  // btos
   __ andl(flags, 0x0f);
   __ jcc(Assembler::notZero, notByte);
 
-  __ pop(btos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movb(lo, rax );
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_bputfield, rcx, rbx);
+  // btos
+  {
+    __ pop(btos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movb(lo, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_bputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notByte);
+  __ cmpl(flags, itos);
+  __ jcc(Assembler::notEqual, notInt);
+
   // itos
-  __ cmpl(flags, itos );
-  __ jcc(Assembler::notEqual, notInt);
-
-  __ pop(itos);
-  if (!is_static) pop_and_check_object(obj);
-
-  __ movl(lo, rax );
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_iputfield, rcx, rbx);
+  {
+    __ pop(itos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movl(lo, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_iputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notInt);
+  __ cmpl(flags, atos);
+  __ jcc(Assembler::notEqual, notObj);
+
   // atos
-  __ cmpl(flags, atos );
-  __ jcc(Assembler::notEqual, notObj);
-
-  __ pop(atos);
-  if (!is_static) pop_and_check_object(obj);
-
-  do_oop_store(_masm, lo, rax, _bs->kind(), false);
-
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
+  {
+    __ pop(atos);
+    if (!is_static) pop_and_check_object(obj);
+    do_oop_store(_masm, lo, rax, _bs->kind(), false);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
 
-  __ jmp(Done);
-
   __ bind(notObj);
+  __ cmpl(flags, ctos);
+  __ jcc(Assembler::notEqual, notChar);
+
   // ctos
-  __ cmpl(flags, ctos );
-  __ jcc(Assembler::notEqual, notChar);
-
-  __ pop(ctos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movw(lo, rax );
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_cputfield, rcx, rbx);
+  {
+    __ pop(ctos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movw(lo, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_cputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notChar);
+  __ cmpl(flags, stos);
+  __ jcc(Assembler::notEqual, notShort);
+
   // stos
-  __ cmpl(flags, stos );
-  __ jcc(Assembler::notEqual, notShort);
-
-  __ pop(stos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movw(lo, rax );
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_sputfield, rcx, rbx);
+  {
+    __ pop(stos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movw(lo, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_sputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notShort);
+  __ cmpl(flags, ltos);
+  __ jcc(Assembler::notEqual, notLong);
+
   // ltos
-  __ cmpl(flags, ltos );
-  __ jcc(Assembler::notEqual, notLong);
-
-  Label notVolatileLong;
-  __ testl(rdx, rdx);
-  __ jcc(Assembler::zero, notVolatileLong);
-
-  __ pop(ltos);  // overwrites rdx, do this after testing volatile.
-  if (!is_static) pop_and_check_object(obj);
-
-  // Replace with real volatile test
-  __ push(rdx);
-  __ push(rax);                 // Must update atomically with FIST
-  __ fild_d(Address(rsp,0));    // So load into FPU register
-  __ fistp_d(lo);               // and put into memory atomically
-  __ addptr(rsp, 2*wordSize);
-  // volatile_barrier();
-  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
-                                               Assembler::StoreStore));
-  // Don't rewrite volatile version
-  __ jmp(notVolatile);
-
-  __ bind(notVolatileLong);
-
-  __ pop(ltos);  // overwrites rdx
-  if (!is_static) pop_and_check_object(obj);
-  NOT_LP64(__ movptr(hi, rdx));
-  __ movptr(lo, rax);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_lputfield, rcx, rbx);
+  {
+    Label notVolatileLong;
+    __ testl(rdx, rdx);
+    __ jcc(Assembler::zero, notVolatileLong);
+
+    __ pop(ltos);  // overwrites rdx, do this after testing volatile.
+    if (!is_static) pop_and_check_object(obj);
+
+    // Replace with real volatile test
+    __ push(rdx);
+    __ push(rax);                 // Must update atomically with FIST
+    __ fild_d(Address(rsp,0));    // So load into FPU register
+    __ fistp_d(lo);               // and put into memory atomically
+    __ addptr(rsp, 2*wordSize);
+    // volatile_barrier();
+    volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
+                                                 Assembler::StoreStore));
+    // Don't rewrite volatile version
+    __ jmp(notVolatile);
+
+    __ bind(notVolatileLong);
+
+    __ pop(ltos);  // overwrites rdx
+    if (!is_static) pop_and_check_object(obj);
+    NOT_LP64(__ movptr(hi, rdx));
+    __ movptr(lo, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_lputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(notVolatile);
   }
-  __ jmp(notVolatile);
 
   __ bind(notLong);
+  __ cmpl(flags, ftos);
+  __ jcc(Assembler::notEqual, notFloat);
+
   // ftos
-  __ cmpl(flags, ftos );
-  __ jcc(Assembler::notEqual, notFloat);
-
-  __ pop(ftos);
-  if (!is_static) pop_and_check_object(obj);
-  __ fstp_s(lo);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_fputfield, rcx, rbx);
+  {
+    __ pop(ftos);
+    if (!is_static) pop_and_check_object(obj);
+    __ fstp_s(lo);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_fputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notFloat);
+#ifdef ASSERT
+  __ cmpl(flags, dtos);
+  __ jcc(Assembler::notEqual, notDouble);
+#endif
+
   // dtos
-  __ cmpl(flags, dtos );
-  __ jcc(Assembler::notEqual, notDouble);
-
-  __ pop(dtos);
-  if (!is_static) pop_and_check_object(obj);
-  __ fstp_d(lo);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_dputfield, rcx, rbx);
+  {
+    __ pop(dtos);
+    if (!is_static) pop_and_check_object(obj);
+    __ fstp_d(lo);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_dputfield, rcx, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
-
+
+#ifdef ASSERT
   __ bind(notDouble);
-
   __ stop("Bad state");
+#endif
 
   __ bind(Done);
 
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -203,46 +203,74 @@
   return Address(r13, offset);
 }
 
-void TemplateTable::patch_bytecode(Bytecodes::Code bytecode, Register bc,
-                                   Register scratch,
-                                   bool load_bc_into_scratch/*=true*/) {
-  if (!RewriteBytecodes) {
-    return;
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
+                                   int byte_no) {
+  if (!RewriteBytecodes)  return;
+  Label L_patch_done;
+
+  switch (bc) {
+  case Bytecodes::_fast_aputfield:
+  case Bytecodes::_fast_bputfield:
+  case Bytecodes::_fast_cputfield:
+  case Bytecodes::_fast_dputfield:
+  case Bytecodes::_fast_fputfield:
+  case Bytecodes::_fast_iputfield:
+  case Bytecodes::_fast_lputfield:
+  case Bytecodes::_fast_sputfield:
+    {
+      // We skip bytecode quickening for putfield instructions when
+      // the put_code written to the constant pool cache is zero.
+      // This is required so that every execution of this instruction
+      // calls out to InterpreterRuntime::resolve_get_put to do
+      // additional, required work.
+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
+      __ movl(bc_reg, bc);
+      __ cmpl(temp_reg, (int) 0);
+      __ jcc(Assembler::zero, L_patch_done);  // don't patch
+    }
+    break;
+  default:
+    assert(byte_no == -1, "sanity");
+    // the pair bytecodes have already done the load.
+    if (load_bc_into_bc_reg) {
+      __ movl(bc_reg, bc);
+    }
   }
-  // the pair bytecodes have already done the load.
-  if (load_bc_into_scratch) {
-    __ movl(bc, bytecode);
+
+  if (JvmtiExport::can_post_breakpoint()) {
+    Label L_fast_patch;
+    // if a breakpoint is present we can't rewrite the stream directly
+    __ movzbl(temp_reg, at_bcp(0));
+    __ cmpl(temp_reg, Bytecodes::_breakpoint);
+    __ jcc(Assembler::notEqual, L_fast_patch);
+    __ get_method(temp_reg);
+    // Let breakpoint table handling rewrite to quicker bytecode
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, r13, bc_reg);
+#ifndef ASSERT
+    __ jmpb(L_patch_done);
+#else
+    __ jmp(L_patch_done);
+#endif
+    __ bind(L_fast_patch);
   }
-  Label patch_done;
-  if (JvmtiExport::can_post_breakpoint()) {
-    Label fast_patch;
-    // if a breakpoint is present we can't rewrite the stream directly
-    __ movzbl(scratch, at_bcp(0));
-    __ cmpl(scratch, Bytecodes::_breakpoint);
-    __ jcc(Assembler::notEqual, fast_patch);
-    __ get_method(scratch);
-    // Let breakpoint table handling rewrite to quicker bytecode
-    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), scratch, r13, bc);
-#ifndef ASSERT
-    __ jmpb(patch_done);
-#else
-    __ jmp(patch_done);
+
+#ifdef ASSERT
+  Label L_okay;
+  __ load_unsigned_byte(temp_reg, at_bcp(0));
+  __ cmpl(temp_reg, (int) Bytecodes::java_code(bc));
+  __ jcc(Assembler::equal, L_okay);
+  __ cmpl(temp_reg, bc_reg);
+  __ jcc(Assembler::equal, L_okay);
+  __ stop("patching the wrong bytecode");
+  __ bind(L_okay);
 #endif
-    __ bind(fast_patch);
-  }
-#ifdef ASSERT
-  Label okay;
-  __ load_unsigned_byte(scratch, at_bcp(0));
-  __ cmpl(scratch, (int) Bytecodes::java_code(bytecode));
-  __ jcc(Assembler::equal, okay);
-  __ cmpl(scratch, bc);
-  __ jcc(Assembler::equal, okay);
-  __ stop("patching the wrong bytecode");
-  __ bind(okay);
-#endif
+
   // patch bytecode
-  __ movb(at_bcp(0), bc);
-  __ bind(patch_done);
+  __ movb(at_bcp(0), bc_reg);
+  __ bind(L_patch_done);
 }
 
 
@@ -2098,24 +2126,20 @@
   assert_different_registers(result, Rcache, index, temp);
 
   Label resolved;
-  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
   if (byte_no == f1_oop) {
     // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
     // This kind of CP cache entry does not need to match the flags byte, because
     // there is a 1-1 relation between bytecode type and CP entry type.
     assert(result != noreg, ""); //else do cmpptr(Address(...), (int32_t) NULL_WORD)
+    __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
     __ movptr(result, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::f1_offset()));
     __ testptr(result, result);
     __ jcc(Assembler::notEqual, resolved);
   } else {
     assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
     assert(result == noreg, "");  //else change code for setting result
-    const int shift_count = (1 + byte_no) * BitsPerByte;
-    __ movl(temp, Address(Rcache, index, Address::times_ptr, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()));
-    __ shrl(temp, shift_count);
-    // have we resolved this bytecode?
-    __ andl(temp, 0xFF);
-    __ cmpl(temp, (int) bytecode());
+    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+    __ cmpl(temp, (int) bytecode());  // have we resolved this bytecode?
     __ jcc(Assembler::equal, resolved);
   }
 
@@ -2507,101 +2531,123 @@
   assert(btos == 0, "change code, btos != 0");
   __ andl(flags, 0x0f);
   __ jcc(Assembler::notZero, notByte);
+
   // btos
-  __ pop(btos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movb(field, rax);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx);
+  {
+    __ pop(btos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movb(field, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notByte);
   __ cmpl(flags, atos);
   __ jcc(Assembler::notEqual, notObj);
+
   // atos
-  __ pop(atos);
-  if (!is_static) pop_and_check_object(obj);
-
-  // Store into the field
-  do_oop_store(_masm, field, rax, _bs->kind(), false);
-
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
+  {
+    __ pop(atos);
+    if (!is_static) pop_and_check_object(obj);
+    // Store into the field
+    do_oop_store(_masm, field, rax, _bs->kind(), false);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notObj);
   __ cmpl(flags, itos);
   __ jcc(Assembler::notEqual, notInt);
+
   // itos
-  __ pop(itos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movl(field, rax);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx);
+  {
+    __ pop(itos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movl(field, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notInt);
   __ cmpl(flags, ctos);
   __ jcc(Assembler::notEqual, notChar);
+
   // ctos
-  __ pop(ctos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movw(field, rax);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx);
+  {
+    __ pop(ctos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movw(field, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notChar);
   __ cmpl(flags, stos);
   __ jcc(Assembler::notEqual, notShort);
+
   // stos
-  __ pop(stos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movw(field, rax);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx);
+  {
+    __ pop(stos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movw(field, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notShort);
   __ cmpl(flags, ltos);
   __ jcc(Assembler::notEqual, notLong);
+
   // ltos
-  __ pop(ltos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movq(field, rax);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx);
+  {
+    __ pop(ltos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movq(field, rax);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notLong);
   __ cmpl(flags, ftos);
   __ jcc(Assembler::notEqual, notFloat);
+
   // ftos
-  __ pop(ftos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movflt(field, xmm0);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx);
+  {
+    __ pop(ftos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movflt(field, xmm0);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
+    }
+    __ jmp(Done);
   }
-  __ jmp(Done);
 
   __ bind(notFloat);
 #ifdef ASSERT
   __ cmpl(flags, dtos);
   __ jcc(Assembler::notEqual, notDouble);
 #endif
+
   // dtos
-  __ pop(dtos);
-  if (!is_static) pop_and_check_object(obj);
-  __ movdbl(field, xmm0);
-  if (!is_static) {
-    patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx);
+  {
+    __ pop(dtos);
+    if (!is_static) pop_and_check_object(obj);
+    __ movdbl(field, xmm0);
+    if (!is_static) {
+      patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
+    }
   }
 
 #ifdef ASSERT
@@ -2612,12 +2658,12 @@
 #endif
 
   __ bind(Done);
+
   // Check for volatile store
   __ testl(rdx, rdx);
   __ jcc(Assembler::zero, notVolatile);
   volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
                                                Assembler::StoreStore));
-
   __ bind(notVolatile);
 }
 
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -557,14 +557,16 @@
   if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
 
   // Allocation prefetch settings
-  intx cache_line_size = L1_data_cache_line_size();
+  intx cache_line_size = prefetch_data_size();
   if( cache_line_size > AllocatePrefetchStepSize )
     AllocatePrefetchStepSize = cache_line_size;
-  if( FLAG_IS_DEFAULT(AllocatePrefetchLines) )
-    AllocatePrefetchLines = 3; // Optimistic value
+
   assert(AllocatePrefetchLines > 0, "invalid value");
-  if( AllocatePrefetchLines < 1 ) // set valid value in product VM
-    AllocatePrefetchLines = 1; // Conservative value
+  if( AllocatePrefetchLines < 1 )     // set valid value in product VM
+    AllocatePrefetchLines = 3;
+  assert(AllocateInstancePrefetchLines > 0, "invalid value");
+  if( AllocateInstancePrefetchLines < 1 ) // set valid value in product VM
+    AllocateInstancePrefetchLines = 1;
 
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
@@ -601,10 +603,11 @@
     tty->print_cr("Logical CPUs per core: %u",
                   logical_processors_per_package());
     tty->print_cr("UseSSE=%d",UseSSE);
-    tty->print("Allocation: ");
+    tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
-      tty->print_cr("no prefetching");
+      tty->print_cr(": no prefetching");
     } else {
+      tty->print(" prefetching: ");
       if (UseSSE == 0 && supports_3dnow_prefetch()) {
         tty->print("PREFETCHW");
       } else if (UseSSE >= 1) {
@@ -619,9 +622,9 @@
         }
       }
       if (AllocatePrefetchLines > 1) {
-        tty->print_cr(" %d, %d lines with step %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
+        tty->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
       } else {
-        tty->print_cr(" %d, one line", AllocatePrefetchDistance);
+        tty->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
       }
     }
 
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -419,7 +419,7 @@
     return result;
   }
 
-  static intx L1_data_cache_line_size()  {
+  static intx prefetch_data_size()  {
     intx result = 0;
     if (is_intel()) {
       result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
--- a/src/cpu/x86/vm/x86_32.ad	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Aug 19 08:55:53 2011 -0700
@@ -1369,7 +1369,12 @@
 //
 // NOTE: If the platform does not provide any short branch variants, then
 //       this method should return false for offset 0.
-bool Matcher::is_short_branch_offset(int rule, int offset) {
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // The passed offset is relative to address of the branch.
+  // On 86 a branch displacement is calculated relative to address
+  // of a next instruction.
+  offset -= br_size;
+
   // the short version of jmpConUCF2 contains multiple branches,
   // making the reach slightly less
   if (rule == jmpConUCF2_rule)
@@ -1713,18 +1718,6 @@
     else                               emit_d32(cbuf,con);
   %}
 
-  enc_class Lbl (label labl) %{ // JMP, CALL
-    Label *l = $labl$$label;
-    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
-  %}
-
-  enc_class LblShort (label labl) %{ // JMP, CALL
-    Label *l = $labl$$label;
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
-    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
-    emit_d8(cbuf, disp);
-  %}
-
   enc_class OpcSReg (eRegI dst) %{    // BSWAP
     emit_cc(cbuf, $secondary, $dst$$reg );
   %}
@@ -1747,21 +1740,6 @@
     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
   %}
 
-  enc_class Jcc (cmpOp cop, label labl) %{    // JCC
-    Label *l = $labl$$label;
-    $$$emit8$primary;
-    emit_cc(cbuf, $secondary, $cop$$cmpcode);
-    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
-  %}
-
-  enc_class JccShort (cmpOp cop, label labl) %{    // JCC
-    Label *l = $labl$$label;
-    emit_cc(cbuf, $primary, $cop$$cmpcode);
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
-    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
-    emit_d8(cbuf, disp);
-  %}
-
   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
     $$$emit8$primary;
     emit_cc(cbuf, $secondary, $cop$$cmpcode);
@@ -4496,7 +4474,6 @@
 //----------Instruction Attributes---------------------------------------------
 ins_attrib ins_cost(100);       // Required cost attribute
 ins_attrib ins_size(8);         // Required size attribute (in bits)
-ins_attrib ins_pc_relative(0);  // Required PC Relative flag
 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
                                 // non-matching short branch variant of some
                                                             // long branch?
@@ -7348,8 +7325,9 @@
   ins_cost(100);
 
   format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
-  opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
+  ins_encode %{
+    __ prefetchr($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -7359,8 +7337,9 @@
   ins_cost(100);
 
   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
+  ins_encode %{
+    __ prefetchnta($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -7370,8 +7349,9 @@
   ins_cost(100);
 
   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
+  ins_encode %{
+    __ prefetcht0($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -7381,8 +7361,9 @@
   ins_cost(100);
 
   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
+  ins_encode %{
+    __ prefetcht2($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -7397,46 +7378,86 @@
 %}
 
 instruct prefetchw( memory mem ) %{
-  predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || AllocatePrefetchInstr==3);
+  predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch());
   match( PrefetchWrite mem );
   ins_cost(100);
 
   format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
-  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
+  ins_encode %{
+    __ prefetchw($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
 instruct prefetchwNTA( memory mem ) %{
-  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
+  predicate(UseSSE>=1);
   match(PrefetchWrite mem);
   ins_cost(100);
 
   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
+  ins_encode %{
+    __ prefetchnta($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
-instruct prefetchwT0( memory mem ) %{
+// Prefetch instructions for allocation.
+
+instruct prefetchAlloc0( memory mem ) %{
+  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
+  match(PrefetchAllocation mem);
+  ins_cost(0);
+  size(0);
+  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+instruct prefetchAlloc( memory mem ) %{
+  predicate(AllocatePrefetchInstr==3);
+  match( PrefetchAllocation mem );
+  ins_cost(100);
+
+  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
+  ins_encode %{
+    __ prefetchw($mem$$Address);
+  %}
+  ins_pipe(ialu_mem);
+%}
+
+instruct prefetchAllocNTA( memory mem ) %{
+  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
+  match(PrefetchAllocation mem);
+  ins_cost(100);
+
+  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
+  ins_encode %{
+    __ prefetchnta($mem$$Address);
+  %}
+  ins_pipe(ialu_mem);
+%}
+
+instruct prefetchAllocT0( memory mem ) %{
   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
-  match(PrefetchWrite mem);
+  match(PrefetchAllocation mem);
   ins_cost(100);
 
-  format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
+  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
+  ins_encode %{
+    __ prefetcht0($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
-instruct prefetchwT2( memory mem ) %{
+instruct prefetchAllocT2( memory mem ) %{
   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
-  match(PrefetchWrite mem);
+  match(PrefetchAllocation mem);
   ins_cost(100);
 
-  format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
-  ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
+  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
+  ins_encode %{
+    __ prefetcht2($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -7806,8 +7827,7 @@
 %}
 
 instruct membar_acquire_lock() %{
-  match(MemBarAcquire);
-  predicate(Matcher::prior_fast_lock(n));
+  match(MemBarAcquireLock);
   ins_cost(0);
 
   size(0);
@@ -7827,8 +7847,7 @@
 %}
 
 instruct membar_release_lock() %{
-  match(MemBarRelease);
-  predicate(Matcher::post_fast_unlock(n));
+  match(MemBarReleaseLock);
   ins_cost(0);
 
   size(0);
@@ -13047,7 +13066,6 @@
     Address index(noreg, $switch_val$$Register, Address::times_1);
     __ jump(ArrayAddress($constantaddress, index));
   %}
-  ins_pc_relative(1);
   ins_pipe(pipe_jmp);
 %}
 
@@ -13059,10 +13077,11 @@
   ins_cost(300);
   format %{ "JMP    $labl" %}
   size(5);
-  opcode(0xE9);
-  ins_encode( OpcP, Lbl( labl ) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jmp(*L, false); // Always long jump
+  %}
   ins_pipe( pipe_jmp );
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - Label defines a relative address from Jcc+1
@@ -13073,10 +13092,11 @@
   ins_cost(300);
   format %{ "J$cop    $labl" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode( Jcc( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - Label defines a relative address from Jcc+1
@@ -13087,10 +13107,11 @@
   ins_cost(300);
   format %{ "J$cop    $labl\t# Loop end" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode( Jcc( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - Label defines a relative address from Jcc+1
@@ -13101,10 +13122,11 @@
   ins_cost(300);
   format %{ "J$cop,u  $labl\t# Loop end" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode( Jcc( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
 %}
 
 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
@@ -13114,10 +13136,11 @@
   ins_cost(200);
   format %{ "J$cop,u  $labl\t# Loop end" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode( Jcc( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - using unsigned comparison
@@ -13128,10 +13151,11 @@
   ins_cost(300);
   format %{ "J$cop,u  $labl" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
@@ -13141,10 +13165,11 @@
   ins_cost(200);
   format %{ "J$cop,u  $labl" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
@@ -13162,31 +13187,21 @@
       $$emit$$"done:"
     }
   %}
-  size(12);
-  opcode(0x0F, 0x80);
   ins_encode %{
     Label* l = $labl$$label;
-    $$$emit8$primary;
-    emit_cc(cbuf, $secondary, Assembler::parity);
-    int parity_disp = -1;
-    bool ok = false;
     if ($cop$$cmpcode == Assembler::notEqual) {
-       // the two jumps 6 bytes apart so the jump distances are too
-       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
+      __ jcc(Assembler::parity, *l, false);
+      __ jcc(Assembler::notEqual, *l, false);
     } else if ($cop$$cmpcode == Assembler::equal) {
-       parity_disp = 6;
-       ok = true;
+      Label done;
+      __ jccb(Assembler::parity, done);
+      __ jcc(Assembler::equal, *l, false);
+      __ bind(done);
     } else {
        ShouldNotReachHere();
     }
-    emit_d32(cbuf, parity_disp);
-    $$$emit8$primary;
-    emit_cc(cbuf, $secondary, $cop$$cmpcode);
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
-    emit_d32(cbuf, disp);
   %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 // ============================================================================
@@ -13251,10 +13266,11 @@
   ins_cost(300);
   format %{ "JMP,s  $labl" %}
   size(2);
-  opcode(0xEB);
-  ins_encode( OpcP, LblShort( labl ) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jmpb(*L);
+  %}
   ins_pipe( pipe_jmp );
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13266,10 +13282,11 @@
   ins_cost(300);
   format %{ "J$cop,s  $labl" %}
   size(2);
-  opcode(0x70);
-  ins_encode( JccShort( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13281,10 +13298,11 @@
   ins_cost(300);
   format %{ "J$cop,s  $labl\t# Loop end" %}
   size(2);
-  opcode(0x70);
-  ins_encode( JccShort( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13296,10 +13314,11 @@
   ins_cost(300);
   format %{ "J$cop,us $labl\t# Loop end" %}
   size(2);
-  opcode(0x70);
-  ins_encode( JccShort( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13310,10 +13329,11 @@
   ins_cost(300);
   format %{ "J$cop,us $labl\t# Loop end" %}
   size(2);
-  opcode(0x70);
-  ins_encode( JccShort( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13325,10 +13345,11 @@
   ins_cost(300);
   format %{ "J$cop,us $labl" %}
   size(2);
-  opcode(0x70);
-  ins_encode( JccShort( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13339,10 +13360,11 @@
   ins_cost(300);
   format %{ "J$cop,us $labl" %}
   size(2);
-  opcode(0x70);
-  ins_encode( JccShort( cop, labl) );
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe( pipe_jcc );
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13362,27 +13384,21 @@
     }
   %}
   size(4);
-  opcode(0x70);
   ins_encode %{
     Label* l = $labl$$label;
-    emit_cc(cbuf, $primary, Assembler::parity);
-    int parity_disp = -1;
     if ($cop$$cmpcode == Assembler::notEqual) {
-      parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
+      __ jccb(Assembler::parity, *l);
+      __ jccb(Assembler::notEqual, *l);
     } else if ($cop$$cmpcode == Assembler::equal) {
-      parity_disp = 2;
+      Label done;
+      __ jccb(Assembler::parity, done);
+      __ jccb(Assembler::equal, *l);
+      __ bind(done);
     } else {
-      ShouldNotReachHere();
-    }
-    emit_d8(cbuf, parity_disp);
-    emit_cc(cbuf, $primary, $cop$$cmpcode);
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
-    emit_d8(cbuf, disp);
-    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
-    assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
+       ShouldNotReachHere();
+    }
   %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -13855,7 +13871,6 @@
               call_epilog,
               post_call_FPU );
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
   ins_alignment(4);
 %}
 
@@ -13879,7 +13894,6 @@
               call_epilog,
               post_call_FPU );
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
   ins_alignment(4);
 %}
 
@@ -13899,7 +13913,6 @@
               call_epilog,
               post_call_FPU );
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
   ins_alignment(4);
 %}
 
@@ -13917,7 +13930,6 @@
               Java_To_Runtime( meth ),
               post_call_FPU );
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
 %}
 
 // Call runtime without safepoint
@@ -13933,7 +13945,6 @@
               Java_To_Runtime( meth ),
               Verify_FPU_For_Leaf, post_call_FPU );
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
 %}
 
 instruct CallLeafNoFPDirect(method meth) %{
@@ -13945,7 +13956,6 @@
   opcode(0xE8); /* E8 cd */
   ins_encode(Java_To_Runtime(meth));
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
 %}
 
 
@@ -14024,7 +14034,6 @@
   format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
   ins_encode( Fast_Lock(object,box,tmp,scr) );
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
 %}
 
 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
@@ -14034,7 +14043,6 @@
   format %{ "FASTUNLOCK $object, $box, $tmp" %}
   ins_encode( Fast_Unlock(object,box,tmp) );
   ins_pipe( pipe_slow );
-  ins_pc_relative(1);
 %}
 
 
--- a/src/cpu/x86/vm/x86_64.ad	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Aug 19 08:55:53 2011 -0700
@@ -1966,7 +1966,12 @@
 //
 // NOTE: If the platform does not provide any short branch variants, then
 //       this method should return false for offset 0.
-bool Matcher::is_short_branch_offset(int rule, int offset) {
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // The passed offset is relative to address of the branch.
+  // On 86 a branch displacement is calculated relative to address
+  // of a next instruction.
+  offset -= br_size;
+
   // the short version of jmpConUCF2 contains multiple branches,
   // making the reach slightly less
   if (rule == jmpConUCF2_rule)
@@ -2426,22 +2431,6 @@
     }
   %}
 
-  enc_class Lbl(label labl)
-  %{
-    // JMP, CALL
-    Label* l = $labl$$label;
-    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
-  %}
-
-  enc_class LblShort(label labl)
-  %{
-    // JMP, CALL
-    Label* l = $labl$$label;
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
-    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
-    emit_d8(cbuf, disp);
-  %}
-
   enc_class opc2_reg(rRegI dst)
   %{
     // BSWAP
@@ -2460,25 +2449,6 @@
     emit_rm(cbuf, 0x3, $secondary, $div$$reg & 7);
   %}
 
-  enc_class Jcc(cmpOp cop, label labl)
-  %{
-    // JCC
-    Label* l = $labl$$label;
-    $$$emit8$primary;
-    emit_cc(cbuf, $secondary, $cop$$cmpcode);
-    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0);
-  %}
-
-  enc_class JccShort (cmpOp cop, label labl)
-  %{
-  // JCC
-    Label *l = $labl$$label;
-    emit_cc(cbuf, $primary, $cop$$cmpcode);
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
-    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
-    emit_d8(cbuf, disp);
-  %}
-
   enc_class enc_cmov(cmpOp cop)
   %{
     // CMOV
@@ -4013,7 +3983,6 @@
 //----------Instruction Attributes---------------------------------------------
 ins_attrib ins_cost(100);       // Required cost attribute
 ins_attrib ins_size(8);         // Required size attribute (in bits)
-ins_attrib ins_pc_relative(0);  // Required PC Relative flag
 ins_attrib ins_short_branch(0); // Required flag: is this instruction
                                 // a non-matching short branch variant
                                 // of some long branch?
@@ -6648,8 +6617,9 @@
   ins_cost(125);
 
   format %{ "PREFETCHR $mem\t# Prefetch into level 1 cache" %}
-  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /0 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
+  ins_encode %{
+    __ prefetchr($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -6659,8 +6629,9 @@
   ins_cost(125);
 
   format %{ "PREFETCHNTA $mem\t# Prefetch into non-temporal cache for read" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
+  ins_encode %{
+    __ prefetchnta($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -6670,8 +6641,9 @@
   ins_cost(125);
 
   format %{ "PREFETCHT0 $mem\t# prefetch into L1 and L2 caches for read" %}
-  opcode(0x0F, 0x18); /* Opcode 0F 18 /1 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
+  ins_encode %{
+    __ prefetcht0($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -6681,52 +6653,70 @@
   ins_cost(125);
 
   format %{ "PREFETCHT2 $mem\t# prefetch into L2 caches for read" %}
-  opcode(0x0F, 0x18); /* Opcode 0F 18 /3 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
+  ins_encode %{
+    __ prefetcht2($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
-instruct prefetchw( memory mem ) %{
-  predicate(AllocatePrefetchInstr==3);
+instruct prefetchwNTA( memory mem ) %{
   match(PrefetchWrite mem);
   ins_cost(125);
 
-  format %{ "PREFETCHW $mem\t# Prefetch into level 1 cache and mark modified" %}
-  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
+  format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
+  ins_encode %{
+    __ prefetchnta($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
-instruct prefetchwNTA( memory mem ) %{
+// Prefetch instructions for allocation.
+
+instruct prefetchAlloc( memory mem ) %{
+  predicate(AllocatePrefetchInstr==3);
+  match(PrefetchAllocation mem);
+  ins_cost(125);
+
+  format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
+  ins_encode %{
+    __ prefetchw($mem$$Address);
+  %}
+  ins_pipe(ialu_mem);
+%}
+
+instruct prefetchAllocNTA( memory mem ) %{
   predicate(AllocatePrefetchInstr==0);
-  match(PrefetchWrite mem);
+  match(PrefetchAllocation mem);
   ins_cost(125);
 
-  format %{ "PREFETCHNTA $mem\t# Prefetch to non-temporal cache for write" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x00, mem));
+  format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
+  ins_encode %{
+    __ prefetchnta($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
-instruct prefetchwT0( memory mem ) %{
+instruct prefetchAllocT0( memory mem ) %{
   predicate(AllocatePrefetchInstr==1);
-  match(PrefetchWrite mem);
+  match(PrefetchAllocation mem);
   ins_cost(125);
 
-  format %{ "PREFETCHT0 $mem\t# Prefetch to level 1 and 2 caches for write" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x01, mem));
+  format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
+  ins_encode %{
+    __ prefetcht0($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
-instruct prefetchwT2( memory mem ) %{
+instruct prefetchAllocT2( memory mem ) %{
   predicate(AllocatePrefetchInstr==2);
-  match(PrefetchWrite mem);
+  match(PrefetchAllocation mem);
   ins_cost(125);
 
-  format %{ "PREFETCHT2 $mem\t# Prefetch to level 2 cache for write" %}
-  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
-  ins_encode(REX_mem(mem), OpcP, OpcS, RM_opc_mem(0x03, mem));
+  format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
+  ins_encode %{
+    __ prefetcht2($mem$$Address);
+  %}
   ins_pipe(ialu_mem);
 %}
 
@@ -7376,8 +7366,7 @@
 
 instruct membar_acquire_lock()
 %{
-  match(MemBarAcquire);
-  predicate(Matcher::prior_fast_lock(n));
+  match(MemBarAcquireLock);
   ins_cost(0);
 
   size(0);
@@ -7399,8 +7388,7 @@
 
 instruct membar_release_lock()
 %{
-  match(MemBarRelease);
-  predicate(Matcher::post_fast_unlock(n));
+  match(MemBarReleaseLock);
   ins_cost(0);
 
   size(0);
@@ -7547,7 +7535,6 @@
     __ jmp(dispatch);
   %}
   ins_pipe(pipe_jmp);
-  ins_pc_relative(1);
 %}
 
 instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
@@ -7568,7 +7555,6 @@
     __ jmp(dispatch);
   %}
   ins_pipe(pipe_jmp);
-  ins_pc_relative(1);
 %}
 
 instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
@@ -7589,7 +7575,6 @@
     __ jmp(dispatch);
   %}
   ins_pipe(pipe_jmp);
-  ins_pc_relative(1);
 %}
 
 // Conditional move
@@ -12017,10 +12002,11 @@
   ins_cost(300);
   format %{ "jmp     $labl" %}
   size(5);
-  opcode(0xE9);
-  ins_encode(OpcP, Lbl(labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jmp(*L, false); // Always long jump
+  %}
   ins_pipe(pipe_jmp);
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - Label defines a relative address from Jcc+1
@@ -12032,10 +12018,11 @@
   ins_cost(300);
   format %{ "j$cop     $labl" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - Label defines a relative address from Jcc+1
@@ -12047,10 +12034,11 @@
   ins_cost(300);
   format %{ "j$cop     $labl\t# loop end" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - Label defines a relative address from Jcc+1
@@ -12061,10 +12049,11 @@
   ins_cost(300);
   format %{ "j$cop,u   $labl\t# loop end" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
@@ -12074,10 +12063,11 @@
   ins_cost(200);
   format %{ "j$cop,u   $labl\t# loop end" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 // Jump Direct Conditional - using unsigned comparison
@@ -12088,10 +12078,11 @@
   ins_cost(300);
   format %{ "j$cop,u  $labl" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
@@ -12101,10 +12092,11 @@
   ins_cost(200);
   format %{ "j$cop,u  $labl" %}
   size(6);
-  opcode(0x0F, 0x80);
-  ins_encode(Jcc(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
@@ -12122,29 +12114,21 @@
       $$emit$$"done:"
     }
   %}
-  size(12);
-  opcode(0x0F, 0x80);
   ins_encode %{
     Label* l = $labl$$label;
-    $$$emit8$primary;
-    emit_cc(cbuf, $secondary, Assembler::parity);
-    int parity_disp = -1;
     if ($cop$$cmpcode == Assembler::notEqual) {
-       // the two jumps 6 bytes apart so the jump distances are too
-       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
+      __ jcc(Assembler::parity, *l, false);
+      __ jcc(Assembler::notEqual, *l, false);
     } else if ($cop$$cmpcode == Assembler::equal) {
-       parity_disp = 6;
+      Label done;
+      __ jccb(Assembler::parity, done);
+      __ jcc(Assembler::equal, *l, false);
+      __ bind(done);
     } else {
        ShouldNotReachHere();
     }
-    emit_d32(cbuf, parity_disp);
-    $$$emit8$primary;
-    emit_cc(cbuf, $secondary, $cop$$cmpcode);
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
-    emit_d32(cbuf, disp);
   %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
 %}
 
 // ============================================================================
@@ -12218,10 +12202,11 @@
   ins_cost(300);
   format %{ "jmp,s   $labl" %}
   size(2);
-  opcode(0xEB);
-  ins_encode(OpcP, LblShort(labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jmpb(*L);
+  %}
   ins_pipe(pipe_jmp);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12233,10 +12218,11 @@
   ins_cost(300);
   format %{ "j$cop,s   $labl" %}
   size(2);
-  opcode(0x70);
-  ins_encode(JccShort(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12248,10 +12234,11 @@
   ins_cost(300);
   format %{ "j$cop,s   $labl\t# loop end" %}
   size(2);
-  opcode(0x70);
-  ins_encode(JccShort(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12263,10 +12250,11 @@
   ins_cost(300);
   format %{ "j$cop,us  $labl\t# loop end" %}
   size(2);
-  opcode(0x70);
-  ins_encode(JccShort(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12277,10 +12265,11 @@
   ins_cost(300);
   format %{ "j$cop,us  $labl\t# loop end" %}
   size(2);
-  opcode(0x70);
-  ins_encode(JccShort(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12292,10 +12281,11 @@
   ins_cost(300);
   format %{ "j$cop,us  $labl" %}
   size(2);
-  opcode(0x70);
-  ins_encode(JccShort(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12306,10 +12296,11 @@
   ins_cost(300);
   format %{ "j$cop,us  $labl" %}
   size(2);
-  opcode(0x70);
-  ins_encode(JccShort(cop, labl));
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
+  %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12329,27 +12320,21 @@
     }
   %}
   size(4);
-  opcode(0x70);
   ins_encode %{
     Label* l = $labl$$label;
-    emit_cc(cbuf, $primary, Assembler::parity);
-    int parity_disp = -1;
     if ($cop$$cmpcode == Assembler::notEqual) {
-      parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
+      __ jccb(Assembler::parity, *l);
+      __ jccb(Assembler::notEqual, *l);
     } else if ($cop$$cmpcode == Assembler::equal) {
-      parity_disp = 2;
+      Label done;
+      __ jccb(Assembler::parity, done);
+      __ jccb(Assembler::equal, *l);
+      __ bind(done);
     } else {
-      ShouldNotReachHere();
-    }
-    emit_d8(cbuf, parity_disp);
-    emit_cc(cbuf, $primary, $cop$$cmpcode);
-    int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
-    emit_d8(cbuf, disp);
-    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
-    assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
+       ShouldNotReachHere();
+    }
   %}
   ins_pipe(pipe_jcc);
-  ins_pc_relative(1);
   ins_short_branch(1);
 %}
 
@@ -12366,7 +12351,6 @@
   format %{ "fastlock $object,$box,$tmp,$scr" %}
   ins_encode(Fast_Lock(object, box, tmp, scr));
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
 %}
 
 instruct cmpFastUnlock(rFlagsReg cr,
@@ -12379,7 +12363,6 @@
   format %{ "fastunlock $object, $box, $tmp" %}
   ins_encode(Fast_Unlock(object, box, tmp));
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
 %}
 
 
@@ -12432,7 +12415,6 @@
   opcode(0xE8); /* E8 cd */
   ins_encode(Java_Static_Call(meth), call_epilog);
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
   ins_alignment(4);
 %}
 
@@ -12454,7 +12436,6 @@
              restore_SP,
              call_epilog);
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
   ins_alignment(4);
 %}
 
@@ -12472,7 +12453,6 @@
   opcode(0xE8); /* E8 cd */
   ins_encode(Java_Dynamic_Call(meth), call_epilog);
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
   ins_alignment(4);
 %}
 
@@ -12487,7 +12467,6 @@
   opcode(0xE8); /* E8 cd */
   ins_encode(Java_To_Runtime(meth));
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
 %}
 
 // Call runtime without safepoint
@@ -12501,7 +12480,6 @@
   opcode(0xE8); /* E8 cd */
   ins_encode(Java_To_Runtime(meth));
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
 %}
 
 // Call runtime without safepoint
@@ -12515,7 +12493,6 @@
   opcode(0xE8); /* E8 cd */
   ins_encode(Java_To_Runtime(meth));
   ins_pipe(pipe_slow);
-  ins_pc_relative(1);
 %}
 
 // Return Instruction
--- a/src/cpu/zero/vm/frame_zero.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/zero/vm/frame_zero.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
+ * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -417,3 +417,11 @@
     return;
   }
 }
+
+#ifdef ASSERT
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/zero/vm/methodHandles_zero.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Red Hat, Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+// Adapters
+enum /* platform_dependent_constants */ {
+  adapter_code_size = 0
+};
+
+#define TARGET_ARCH_NYI_6939861 1
+// ..#ifdef TARGET_ARCH_NYI_6939861
+// ..  // Here are some backward compatible declarations until the 6939861 ports are updated.
+// ..  #define _adapter_flyby    (_EK_LIMIT + 10)
+// ..  #define _adapter_ricochet (_EK_LIMIT + 11)
+// ..  #define _adapter_opt_spread_1    _adapter_opt_spread_1_ref
+// ..  #define _adapter_opt_spread_more _adapter_opt_spread_ref
+// ..  enum {
+// ..    _INSERT_NO_MASK   = -1,
+// ..    _INSERT_REF_MASK  = 0,
+// ..    _INSERT_INT_MASK  = 1,
+// ..    _INSERT_LONG_MASK = 3
+// ..  };
+// ..  static void get_ek_bound_mh_info(EntryKind ek, BasicType& arg_type, int& arg_mask, int& arg_slots) {
+// ..    arg_type = ek_bound_mh_arg_type(ek);
+// ..    arg_mask = 0;
+// ..    arg_slots = type2size[arg_type];;
+// ..  }
+// ..  static void get_ek_adapter_opt_swap_rot_info(EntryKind ek, int& swap_bytes, int& rotate) {
+// ..    int swap_slots = ek_adapter_opt_swap_slots(ek);
+// ..    rotate = ek_adapter_opt_swap_mode(ek);
+// ..    swap_bytes = swap_slots * Interpreter::stackElementSize;
+// ..  }
+// ..  static int get_ek_adapter_opt_spread_info(EntryKind ek) {
+// ..    return ek_adapter_opt_spread_count(ek);
+// ..  }
+// ..
+// ..  static void insert_arg_slots(MacroAssembler* _masm,
+// ..                               RegisterOrConstant arg_slots,
+// ..                               int arg_mask,
+// ..                               Register argslot_reg,
+// ..                               Register temp_reg, Register temp2_reg, Register temp3_reg = noreg);
+// ..
+// ..  static void remove_arg_slots(MacroAssembler* _masm,
+// ..                               RegisterOrConstant arg_slots,
+// ..                               Register argslot_reg,
+// ..                               Register temp_reg, Register temp2_reg, Register temp3_reg = noreg);
+// ..
+// ..  static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
+// ..#endif //TARGET_ARCH_NYI_6939861
--- a/src/cpu/zero/vm/sharedRuntime_zero.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/zero/vm/sharedRuntime_zero.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -46,14 +46,6 @@
 #include "shark/sharkCompiler.hpp"
 #endif
 
-DeoptimizationBlob *SharedRuntime::_deopt_blob;
-SafepointBlob      *SharedRuntime::_polling_page_safepoint_handler_blob;
-SafepointBlob      *SharedRuntime::_polling_page_return_handler_blob;
-RuntimeStub        *SharedRuntime::_wrong_method_blob;
-RuntimeStub        *SharedRuntime::_ic_miss_blob;
-RuntimeStub        *SharedRuntime::_resolve_opt_virtual_call_blob;
-RuntimeStub        *SharedRuntime::_resolve_virtual_call_blob;
-RuntimeStub        *SharedRuntime::_resolve_static_call_blob;
 
 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
                                            VMRegPair *regs,
@@ -114,22 +106,22 @@
   return SafepointBlob::create(&buffer, NULL, 0);
 }
 
-void SharedRuntime::generate_stubs() {
-  _wrong_method_blob =
-    generate_empty_runtime_stub("wrong_method_stub");
-  _ic_miss_blob =
-    generate_empty_runtime_stub("ic_miss_stub");
-  _resolve_opt_virtual_call_blob =
-    generate_empty_runtime_stub("resolve_opt_virtual_call");
-  _resolve_virtual_call_blob =
-    generate_empty_runtime_stub("resolve_virtual_call");
-  _resolve_static_call_blob =
-    generate_empty_runtime_stub("resolve_static_call");
+static DeoptimizationBlob* generate_empty_deopt_blob() {
+  CodeBuffer buffer("handler_blob", 0, 0);
+  return DeoptimizationBlob::create(&buffer, NULL, 0, 0, 0, 0);
+}
 
-  _polling_page_safepoint_handler_blob =
-    generate_empty_safepoint_blob();
-  _polling_page_return_handler_blob =
-    generate_empty_safepoint_blob();
+
+void SharedRuntime::generate_deopt_blob() {
+  _deopt_blob = generate_empty_deopt_blob();
+}
+
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
+  return generate_empty_safepoint_blob();
+}
+
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+  return generate_empty_runtime_stub("resolve_blob");
 }
 
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
--- a/src/cpu/zero/vm/stack_zero.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/cpu/zero/vm/stack_zero.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -69,7 +69,8 @@
     break;
 
   case _thread_in_vm:
-    Exceptions::throw_stack_overflow_exception(thread, __FILE__, __LINE__);
+    Exceptions::throw_stack_overflow_exception(thread, __FILE__, __LINE__,
+                                               methodHandle());
     break;
 
   default:
--- a/src/os_cpu/linux_x86/vm/linux_x86_32.ad	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/os_cpu/linux_x86/vm/linux_x86_32.ad	Fri Aug 19 08:55:53 2011 -0700
@@ -154,7 +154,7 @@
 
 
 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-  return 5;
+  return MachNode::size(ra_);
 }
 
 %}
--- a/src/os_cpu/linux_x86/vm/linux_x86_64.ad	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/os_cpu/linux_x86/vm/linux_x86_64.ad	Fri Aug 19 08:55:53 2011 -0700
@@ -167,7 +167,8 @@
 }
 
 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
-  return 5;
+  // distance could be far and requires load and call through register
+  return MachNode::size(ra_);
 }
 
 %}
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -114,6 +114,11 @@
 #endif
     if (av & AV_SPARC_VIS3)         features |= vis3_instructions_m;
 
+#ifndef AV_SPARC_CBCOND
+#define AV_SPARC_CBCOND 0x10000000  /* compare and branch instrs supported */
+#endif
+    if (av & AV_SPARC_CBCOND)       features |= cbcond_instructions_m;
+
   } else {
     // getisax(2) failed, use the old legacy code.
 #ifndef PRODUCT
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_32.ad	Fri Aug 19 08:55:53 2011 -0700
@@ -161,7 +161,7 @@
 
 
 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
-  return 5;
+  return MachNode::size(ra_);
 }
 
 %}
--- a/src/os_cpu/solaris_x86/vm/solaris_x86_64.ad	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/os_cpu/solaris_x86/vm/solaris_x86_64.ad	Fri Aug 19 08:55:53 2011 -0700
@@ -180,7 +180,8 @@
 
 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const
 {
-  return 5;
+  // distance could be far and requires load and call through register
+  return MachNode::size(ra_);
 }
 
 %}
--- a/src/share/vm/adlc/adlparse.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/adlc/adlparse.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -126,9 +126,6 @@
   if (_globalNames[AttributeForm::_ins_cost] == NULL) {
     parse_err(SEMERR, "Did not declare 'ins_cost' attribute");
   }
-  if (_globalNames[AttributeForm::_ins_pc_relative] == NULL) {
-    parse_err(SEMERR, "Did not declare 'ins_pc_relative' attribute");
-  }
   if (_globalNames[AttributeForm::_op_cost] == NULL) {
     parse_err(SEMERR, "Did not declare 'op_cost' attribute");
   }
--- a/src/share/vm/adlc/archDesc.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/adlc/archDesc.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -331,10 +331,18 @@
     // Find result type for match
     const char *result  = instr->reduce_result();
 
+    if ( instr->is_ideal_branch() && instr->label_position() == -1 ||
+        !instr->is_ideal_branch() && instr->label_position() != -1) {
+      syntax_err(instr->_linenum, "%s: Only branches to a label are supported\n", rootOp);
+    }
+
     Attribute *attr = instr->_attribs;
     while (attr != NULL) {
       if (strcmp(attr->_ident,"ins_short_branch") == 0 &&
           attr->int_val(*this) != 0) {
+        if (!instr->is_ideal_branch() || instr->label_position() == -1) {
+          syntax_err(instr->_linenum, "%s: Only short branch to a label is supported\n", rootOp);
+        }
         instr->set_short_branch(true);
       } else if (strcmp(attr->_ident,"ins_alignment") == 0 &&
           attr->int_val(*this) != 0) {
--- a/src/share/vm/adlc/formssel.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/adlc/formssel.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -291,15 +291,6 @@
 }
 
 
-// Return 'true' if this instruction matches an ideal 'Copy*' node
-bool InstructForm::is_ideal_unlock() const {
-  return _matrule ? _matrule->is_ideal_unlock() : false;
-}
-
-bool InstructForm::is_ideal_call_leaf() const {
-  return _matrule ? _matrule->is_ideal_call_leaf() : false;
-}
-
 // Return 'true' if this instruction matches an ideal 'If' node
 bool InstructForm::is_ideal_if() const {
   if( _matrule == NULL ) return false;
@@ -349,12 +340,11 @@
   return _matrule->is_ideal_jump();
 }
 
-// Return 'true' if instruction matches ideal 'If' | 'Goto' |
-//                    'CountedLoopEnd' | 'Jump'
+// Return 'true' if instruction matches ideal 'If' | 'Goto' | 'CountedLoopEnd'
 bool InstructForm::is_ideal_branch() const {
   if( _matrule == NULL ) return false;
 
-  return _matrule->is_ideal_if() || _matrule->is_ideal_goto() || _matrule->is_ideal_jump();
+  return _matrule->is_ideal_if() || _matrule->is_ideal_goto();
 }
 
 
@@ -392,7 +382,7 @@
 bool InstructForm::is_ideal_control() const {
   if ( ! _matrule)  return false;
 
-  return is_ideal_return() || is_ideal_branch() || is_ideal_halt();
+  return is_ideal_return() || is_ideal_branch() || _matrule->is_ideal_jump() || is_ideal_halt();
 }
 
 // Return 'true' if this instruction matches an ideal 'Call' node
@@ -633,6 +623,8 @@
 
   if( strcmp(_matrule->_opType,"MemBarRelease") == 0 ) return true;
   if( strcmp(_matrule->_opType,"MemBarAcquire") == 0 ) return true;
+  if( strcmp(_matrule->_opType,"MemBarReleaseLock") == 0 ) return true;
+  if( strcmp(_matrule->_opType,"MemBarAcquireLock") == 0 ) return true;
 
   return false;
 }
@@ -1094,6 +1086,9 @@
   else if (is_ideal_if()) {
     return "MachIfNode";
   }
+  else if (is_ideal_goto()) {
+    return "MachGotoNode";
+  }
   else if (is_ideal_fastlock()) {
     return "MachFastLockNode";
   }
@@ -1185,6 +1180,34 @@
       strcmp(reduce_result(), short_branch->reduce_result()) == 0 &&
       _matrule->equivalent(AD.globalNames(), short_branch->_matrule)) {
     // The instructions are equivalent.
+
+    // Now verify that both instructions have the same parameters and
+    // the same effects. Both branch forms should have the same inputs
+    // and resulting projections to correctly replace a long branch node
+    // with corresponding short branch node during code generation.
+
+    bool different = false;
+    if (short_branch->_components.count() != _components.count()) {
+       different = true;
+    } else if (_components.count() > 0) {
+      short_branch->_components.reset();
+      _components.reset();
+      Component *comp;
+      while ((comp = _components.iter()) != NULL) {
+        Component *short_comp = short_branch->_components.iter();
+        if (short_comp == NULL ||
+            short_comp->_type != comp->_type ||
+            short_comp->_usedef != comp->_usedef) {
+          different = true;
+          break;
+        }
+      }
+      if (short_branch->_components.iter() != NULL)
+        different = true;
+    }
+    if (different) {
+      globalAD->syntax_err(short_branch->_linenum, "Instruction %s and its short form %s have different parameters\n", _ident, short_branch->_ident);
+    }
     if (AD._short_branch_debug) {
       fprintf(stderr, "Instruction %s has short form %s\n", _ident, short_branch->_ident);
     }
@@ -2706,7 +2729,6 @@
 int         AttributeForm::_insId   = 0;           // start counter at 0
 int         AttributeForm::_opId    = 0;           // start counter at 0
 const char* AttributeForm::_ins_cost = "ins_cost"; // required name
-const char* AttributeForm::_ins_pc_relative = "ins_pc_relative";
 const char* AttributeForm::_op_cost  = "op_cost";  // required name
 
 AttributeForm::AttributeForm(char *attr, int type, char *attrdef)
@@ -3368,7 +3390,9 @@
     "ClearArray"
   };
   int cnt = sizeof(needs_ideal_memory_list)/sizeof(char*);
-  if( strcmp(_opType,"PrefetchRead")==0 || strcmp(_opType,"PrefetchWrite")==0 )
+  if( strcmp(_opType,"PrefetchRead")==0 ||
+      strcmp(_opType,"PrefetchWrite")==0 ||
+      strcmp(_opType,"PrefetchAllocation")==0 )
     return 1;
   if( _lChild ) {
     const char *opType = _lChild->_opType;
@@ -3623,7 +3647,27 @@
   assert( mNode2->_opType, "Must have _opType");
   const Form *form  = globals[_opType];
   const Form *form2 = globals[mNode2->_opType];
-  return (form == form2);
+  if( form != form2 ) {
+    return false;
+  }
+
+  // Check that their children also match
+  if (_lChild ) {
+    if( !_lChild->equivalent(globals, mNode2->_lChild) )
+      return false;
+  } else if (mNode2->_lChild) {
+    return false; // I have NULL left child, mNode2 has non-NULL left child.
+  }
+
+  if (_rChild ) {
+    if( !_rChild->equivalent(globals, mNode2->_rChild) )
+      return false;
+  } else if (mNode2->_rChild) {
+    return false; // I have NULL right child, mNode2 has non-NULL right child.
+  }
+
+  // We've made it through the gauntlet.
+  return true;
 }
 
 //-------------------------- has_commutative_op -------------------------------
@@ -3909,19 +3953,6 @@
   return 0;
 }
 
-bool MatchRule::is_ideal_unlock() const {
-  if( !_opType ) return false;
-  return !strcmp(_opType,"Unlock") || !strcmp(_opType,"FastUnlock");
-}
-
-
-bool MatchRule::is_ideal_call_leaf() const {
-  if( !_opType ) return false;
-  return !strcmp(_opType,"CallLeaf")     ||
-         !strcmp(_opType,"CallLeafNoFP");
-}
-
-
 bool MatchRule::is_ideal_if() const {
   if( !_opType ) return false;
   return
@@ -3941,6 +3972,8 @@
   return
     !strcmp(_opType,"MemBarAcquire"  ) ||
     !strcmp(_opType,"MemBarRelease"  ) ||
+    !strcmp(_opType,"MemBarAcquireLock") ||
+    !strcmp(_opType,"MemBarReleaseLock") ||
     !strcmp(_opType,"MemBarVolatile" ) ||
     !strcmp(_opType,"MemBarCPUOrder" ) ;
 }
--- a/src/share/vm/adlc/formssel.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/adlc/formssel.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -145,8 +145,6 @@
   virtual int         is_empty_encoding() const; // _size=0 and/or _insencode empty
   virtual int         is_tls_instruction() const; // tlsLoadP rule or ideal ThreadLocal
   virtual int         is_ideal_copy() const;    // node matches ideal 'Copy*'
-  virtual bool        is_ideal_unlock() const;  // node matches ideal 'Unlock'
-  virtual bool        is_ideal_call_leaf() const; // node matches ideal 'CallLeaf'
   virtual bool        is_ideal_if()   const;    // node matches ideal 'If'
   virtual bool        is_ideal_fastlock() const; // node matches 'FastLock'
   virtual bool        is_ideal_membar() const;  // node matches ideal 'MemBarXXX'
@@ -857,7 +855,6 @@
   int  type() { return id;}        // return this object's "id"
 
   static const char* _ins_cost;        // "ins_cost"
-  static const char* _ins_pc_relative; // "ins_pc_relative"
   static const char* _op_cost;         // "op_cost"
 
   void dump();                     // Debug printer
@@ -1002,8 +999,6 @@
   bool       is_chain_rule(FormDict &globals) const;
   int        is_ideal_copy() const;
   int        is_expensive() const;     // node matches ideal 'CosD'
-  bool       is_ideal_unlock() const;
-  bool       is_ideal_call_leaf() const;
   bool       is_ideal_if()   const;    // node matches ideal 'If'
   bool       is_ideal_fastlock() const; // node matches ideal 'FastLock'
   bool       is_ideal_jump()   const;  // node matches ideal 'Jump'
--- a/src/share/vm/adlc/output_c.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/adlc/output_c.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -3088,12 +3088,19 @@
     int label_position = instr->label_position();
     if( label_position != -1 ) {
       // Set the label
-      fprintf(fp,"void %sNode::label_set( Label& label, uint block_num ) {\n", instr->_ident);
+      fprintf(fp,"void %sNode::label_set( Label* label, uint block_num ) {\n", instr->_ident);
       fprintf(fp,"  labelOper* oper  = (labelOper*)(opnd_array(%d));\n",
               label_position );
-      fprintf(fp,"  oper->_label     = &label;\n");
+      fprintf(fp,"  oper->_label     = label;\n");
       fprintf(fp,"  oper->_block_num = block_num;\n");
       fprintf(fp,"}\n");
+      // Save the label
+      fprintf(fp,"void %sNode::save_label( Label** label, uint* block_num ) {\n", instr->_ident);
+      fprintf(fp,"  labelOper* oper  = (labelOper*)(opnd_array(%d));\n",
+              label_position );
+      fprintf(fp,"  *label = oper->_label;\n");
+      fprintf(fp,"  *block_num = oper->_block_num;\n");
+      fprintf(fp,"}\n");
     }
   }
 
--- a/src/share/vm/adlc/output_h.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/adlc/output_h.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1519,8 +1519,9 @@
     // Declare Node::methods that set operand Label's contents
     int label_position = instr->label_position();
     if( label_position != -1 ) {
-      // Set the label, stored in labelOper::_branch_label
-      fprintf(fp,"  virtual void           label_set( Label& label, uint block_num );\n");
+      // Set/Save the label, stored in labelOper::_branch_label
+      fprintf(fp,"  virtual void           label_set( Label* label, uint block_num );\n");
+      fprintf(fp,"  virtual void           save_label( Label** label, uint* block_num );\n");
     }
 
     // If this instruction contains a methodOper
@@ -1536,16 +1537,16 @@
     // Each instruction attribute results in a virtual call of same name.
     // The ins_cost is not handled here.
     Attribute *attr = instr->_attribs;
-    bool is_pc_relative = false;
+    bool avoid_back_to_back = false;
     while (attr != NULL) {
       if (strcmp(attr->_ident,"ins_cost") &&
-          strcmp(attr->_ident,"ins_pc_relative")) {
+          strcmp(attr->_ident,"ins_short_branch")) {
         fprintf(fp,"  int             %s() const { return %s; }\n",
                 attr->_ident, attr->_val);
       }
-      // Check value for ins_pc_relative, and if it is true (1), set the flag
-      if (!strcmp(attr->_ident,"ins_pc_relative") && attr->int_val(*this) != 0)
-        is_pc_relative = true;
+      // Check value for ins_avoid_back_to_back, and if it is true (1), set the flag
+      if (!strcmp(attr->_ident,"ins_avoid_back_to_back") && attr->int_val(*this) != 0)
+        avoid_back_to_back = true;
       attr = (Attribute *)attr->_next;
     }
 
@@ -1657,20 +1658,10 @@
     fprintf(fp," _num_opnds = %d; _opnds = _opnd_array; ", instr->num_opnds());
 
     bool node_flags_set = false;
-    // flag: if this instruction matches an ideal 'Goto' node
-    if ( instr->is_ideal_goto() ) {
-      fprintf(fp,"init_flags(Flag_is_Goto");
-      node_flags_set = true;
-    }
-
     // flag: if this instruction matches an ideal 'Copy*' node
     if ( instr->is_ideal_copy() != 0 ) {
-      if ( node_flags_set ) {
-        fprintf(fp," | Flag_is_Copy");
-      } else {
-        fprintf(fp,"init_flags(Flag_is_Copy");
-        node_flags_set = true;
-      }
+      fprintf(fp,"init_flags(Flag_is_Copy");
+      node_flags_set = true;
     }
 
     // Is an instruction is a constant?  If so, get its type
@@ -1688,16 +1679,6 @@
       }
     }
 
-    // flag: if instruction matches 'If' | 'Goto' | 'CountedLoopEnd | 'Jump'
-    if ( instr->is_ideal_branch() ) {
-      if ( node_flags_set ) {
-        fprintf(fp," | Flag_is_Branch");
-      } else {
-        fprintf(fp,"init_flags(Flag_is_Branch");
-        node_flags_set = true;
-      }
-    }
-
     // flag: if this instruction is cisc alternate
     if ( can_cisc_spill() && instr->is_cisc_alternate() ) {
       if ( node_flags_set ) {
@@ -1708,16 +1689,6 @@
       }
     }
 
-    // flag: if this instruction is pc relative
-    if ( is_pc_relative ) {
-      if ( node_flags_set ) {
-        fprintf(fp," | Flag_is_pc_relative");
-      } else {
-        fprintf(fp,"init_flags(Flag_is_pc_relative");
-        node_flags_set = true;
-      }
-    }
-
     // flag: if this instruction has short branch form
     if ( instr->has_short_branch_form() ) {
       if ( node_flags_set ) {
@@ -1728,6 +1699,16 @@
       }
     }
 
+    // flag: if this instruction should not be generated back to back.
+    if ( avoid_back_to_back ) {
+      if ( node_flags_set ) {
+        fprintf(fp," | Flag_avoid_back_to_back");
+      } else {
+        fprintf(fp,"init_flags(Flag_avoid_back_to_back");
+        node_flags_set = true;
+      }
+    }
+
     // Check if machine instructions that USE memory, but do not DEF memory,
     // depend upon a node that defines memory in machine-independent graph.
     if ( instr->needs_anti_dependence_check(_globalNames) ) {
@@ -1743,10 +1724,6 @@
       fprintf(fp,"); ");
     }
 
-    if (instr->is_ideal_unlock() || instr->is_ideal_call_leaf()) {
-      fprintf(fp,"clear_flag(Flag_is_safepoint_node); ");
-    }
-
     fprintf(fp,"}\n");
 
     // size_of, used by base class's clone to obtain the correct size.
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -3033,6 +3033,9 @@
   if (callee->should_exclude()) {
     // callee is excluded
     INLINE_BAILOUT("excluded by CompilerOracle")
+  } else if (callee->should_not_inline()) {
+    // callee is excluded
+    INLINE_BAILOUT("disallowed by CompilerOracle")
   } else if (!callee->can_be_compiled()) {
     // callee is not compilable (prob. has breakpoints)
     INLINE_BAILOUT("not compilable")
@@ -3410,24 +3413,6 @@
   // Proper inlining of methods with jsrs requires a little more work.
   if (callee->has_jsrs()                 ) INLINE_BAILOUT("jsrs not handled properly by inliner yet");
 
-  // now perform tests that are based on flag settings
-  if (inline_level() > MaxInlineLevel                         ) INLINE_BAILOUT("too-deep inlining");
-  if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("too-deep recursive inlining");
-  if (callee->code_size() > max_inline_size()                 ) INLINE_BAILOUT("callee is too large");
-
-  // don't inline throwable methods unless the inlining tree is rooted in a throwable class
-  if (callee->name() == ciSymbol::object_initializer_name() &&
-      callee->holder()->is_subclass_of(ciEnv::current()->Throwable_klass())) {
-    // Throwable constructor call
-    IRScope* top = scope();
-    while (top->caller() != NULL) {
-      top = top->caller();
-    }
-    if (!top->method()->holder()->is_subclass_of(ciEnv::current()->Throwable_klass())) {
-      INLINE_BAILOUT("don't inline Throwable constructors");
-    }
-  }
-
   // When SSE2 is used on intel, then no special handling is needed
   // for strictfp because the enum-constant is fixed at compile time,
   // the check for UseSSE2 is needed here
@@ -3435,13 +3420,36 @@
     INLINE_BAILOUT("caller and callee have different strict fp requirements");
   }
 
-  if (compilation()->env()->num_inlined_bytecodes() > DesiredMethodLimit) {
-    INLINE_BAILOUT("total inlining greater than DesiredMethodLimit");
-  }
-
   if (is_profiling() && !callee->ensure_method_data()) {
     INLINE_BAILOUT("mdo allocation failed");
   }
+
+  // now perform tests that are based on flag settings
+  if (callee->should_inline()) {
+    // ignore heuristic controls on inlining
+  } else {
+    if (inline_level() > MaxInlineLevel                         ) INLINE_BAILOUT("too-deep inlining");
+    if (recursive_inline_level(callee) > MaxRecursiveInlineLevel) INLINE_BAILOUT("too-deep recursive inlining");
+    if (callee->code_size() > max_inline_size()                 ) INLINE_BAILOUT("callee is too large");
+
+    // don't inline throwable methods unless the inlining tree is rooted in a throwable class
+    if (callee->name() == ciSymbol::object_initializer_name() &&
+        callee->holder()->is_subclass_of(ciEnv::current()->Throwable_klass())) {
+      // Throwable constructor call
+      IRScope* top = scope();
+      while (top->caller() != NULL) {
+        top = top->caller();
+      }
+      if (!top->method()->holder()->is_subclass_of(ciEnv::current()->Throwable_klass())) {
+        INLINE_BAILOUT("don't inline Throwable constructors");
+      }
+    }
+
+    if (compilation()->env()->num_inlined_bytecodes() > DesiredMethodLimit) {
+      INLINE_BAILOUT("total inlining greater than DesiredMethodLimit");
+    }
+  }
+
 #ifndef PRODUCT
       // printing
   if (PrintInlining) {
--- a/src/share/vm/ci/ciCallSite.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/ci/ciCallSite.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -28,6 +28,16 @@
 
 // ciCallSite
 
+bool ciCallSite::is_constant_call_site() {
+  return klass()->is_subclass_of(CURRENT_ENV->ConstantCallSite_klass());
+}
+bool ciCallSite::is_mutable_call_site() {
+  return klass()->is_subclass_of(CURRENT_ENV->MutableCallSite_klass());
+}
+bool ciCallSite::is_volatile_call_site() {
+  return klass()->is_subclass_of(CURRENT_ENV->VolatileCallSite_klass());
+}
+
 // ------------------------------------------------------------------
 // ciCallSite::get_target
 //
--- a/src/share/vm/ci/ciCallSite.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/ci/ciCallSite.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -37,6 +37,10 @@
   // What kind of ciObject is this?
   bool is_call_site() const { return true; }
 
+  bool is_constant_call_site();
+  bool is_mutable_call_site();
+  bool is_volatile_call_site();
+
   // Return the target MethodHandle of this CallSite.
   ciMethodHandle* get_target() const;
 
--- a/src/share/vm/ci/ciField.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/ci/ciField.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -178,6 +178,8 @@
   bool is_volatile    () { return flags().is_volatile(); }
   bool is_transient   () { return flags().is_transient(); }
 
+  bool is_call_site_target() { return ((holder() == CURRENT_ENV->CallSite_klass()) && (name() == ciSymbol::target_name())); }
+
   // Debugging output
   void print();
   void print_name_on(outputStream* st);
--- a/src/share/vm/classfile/classLoader.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/classfile/classLoader.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1350,13 +1350,13 @@
                 _codecache_sweep_counter = 0;
               }
               // Force compilation
-              CompileBroker::compile_method(m, InvocationEntryBci, CompLevel_initial_compile,
+              CompileBroker::compile_method(m, InvocationEntryBci, CompilationPolicy::policy()->initial_compile_level(),
                                             methodHandle(), 0, "CTW", THREAD);
               if (HAS_PENDING_EXCEPTION) {
                 clear_pending_exception_if_not_oom(CHECK);
                 tty->print_cr("CompileTheWorld (%d) : Skipping method: %s", _compile_the_world_counter, m->name()->as_C_string());
               }
-              if (TieredCompilation) {
+              if (TieredCompilation && TieredStopAtLevel >= CompLevel_full_optimization) {
                 // Clobber the first compile and force second tier compilation
                 nmethod* nm = m->code();
                 if (nm != NULL) {
--- a/src/share/vm/classfile/systemDictionary.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/classfile/systemDictionary.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -1978,7 +1978,7 @@
 
   // JSR 292 classes
   WKID jsr292_group_start = WK_KLASS_ENUM_NAME(MethodHandle_klass);
-  WKID jsr292_group_end   = WK_KLASS_ENUM_NAME(CallSite_klass);
+  WKID jsr292_group_end   = WK_KLASS_ENUM_NAME(VolatileCallSite_klass);
   initialize_wk_klasses_until(jsr292_group_start, scan, CHECK);
   if (EnableInvokeDynamic) {
     initialize_wk_klasses_through(jsr292_group_end, scan, CHECK);
--- a/src/share/vm/classfile/systemDictionary.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/classfile/systemDictionary.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -144,18 +144,21 @@
   template(reflect_UnsafeStaticFieldAccessorImpl_klass, sun_reflect_UnsafeStaticFieldAccessorImpl, Opt_Only_JDK15) \
                                                                               \
   /* support for dynamic typing; it's OK if these are NULL in earlier JDKs */ \
-  template(MethodHandle_klass,           java_lang_invoke_MethodHandle,     Pre_JSR292) \
-  template(MemberName_klass,             java_lang_invoke_MemberName,       Pre_JSR292) \
-  template(MethodHandleNatives_klass,    java_lang_invoke_MethodHandleNatives, Pre_JSR292) \
-  template(AdapterMethodHandle_klass,    java_lang_invoke_AdapterMethodHandle, Pre_JSR292) \
-  template(BoundMethodHandle_klass,      java_lang_invoke_BoundMethodHandle, Pre_JSR292) \
-  template(DirectMethodHandle_klass,     java_lang_invoke_DirectMethodHandle, Pre_JSR292) \
-  template(MethodType_klass,             java_lang_invoke_MethodType,       Pre_JSR292) \
-  template(MethodTypeForm_klass,         java_lang_invoke_MethodTypeForm,   Pre_JSR292) \
-  template(BootstrapMethodError_klass,   java_lang_BootstrapMethodError, Pre_JSR292) \
+  template(MethodHandle_klass,             java_lang_invoke_MethodHandle,             Pre_JSR292) \
+  template(MemberName_klass,               java_lang_invoke_MemberName,               Pre_JSR292) \
+  template(MethodHandleNatives_klass,      java_lang_invoke_MethodHandleNatives,      Pre_JSR292) \
+  template(AdapterMethodHandle_klass,      java_lang_invoke_AdapterMethodHandle,      Pre_JSR292) \
+  template(BoundMethodHandle_klass,        java_lang_invoke_BoundMethodHandle,        Pre_JSR292) \
+  template(DirectMethodHandle_klass,       java_lang_invoke_DirectMethodHandle,       Pre_JSR292) \
+  template(MethodType_klass,               java_lang_invoke_MethodType,               Pre_JSR292) \
+  template(MethodTypeForm_klass,           java_lang_invoke_MethodTypeForm,           Pre_JSR292) \
+  template(BootstrapMethodError_klass,     java_lang_BootstrapMethodError,            Pre_JSR292) \
   template(WrongMethodTypeException_klass, java_lang_invoke_WrongMethodTypeException, Pre_JSR292) \
-  template(CallSite_klass,               java_lang_invoke_CallSite,         Pre_JSR292) \
-  /* Note: MethodHandle must be first, and CallSite last in group */          \
+  template(CallSite_klass,                 java_lang_invoke_CallSite,                 Pre_JSR292) \
+  template(ConstantCallSite_klass,         java_lang_invoke_ConstantCallSite,         Pre_JSR292) \
+  template(MutableCallSite_klass,          java_lang_invoke_MutableCallSite,          Pre_JSR292) \
+  template(VolatileCallSite_klass,         java_lang_invoke_VolatileCallSite,         Pre_JSR292) \
+  /* Note: MethodHandle must be first, and VolatileCallSite last in group */  \
                                                                               \
   template(StringBuffer_klass,           java_lang_StringBuffer,         Pre) \
   template(StringBuilder_klass,          java_lang_StringBuilder,        Pre) \
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Aug 19 08:55:53 2011 -0700
@@ -233,6 +233,9 @@
   template(java_lang_invoke_InvokeDynamic,            "java/lang/invoke/InvokeDynamic")           \
   template(java_lang_invoke_Linkage,                  "java/lang/invoke/Linkage")                 \
   template(java_lang_invoke_CallSite,                 "java/lang/invoke/CallSite")                \
+  template(java_lang_invoke_ConstantCallSite,         "java/lang/invoke/ConstantCallSite")        \
+  template(java_lang_invoke_MutableCallSite,          "java/lang/invoke/MutableCallSite")         \
+  template(java_lang_invoke_VolatileCallSite,         "java/lang/invoke/VolatileCallSite")        \
   template(java_lang_invoke_MethodHandle,             "java/lang/invoke/MethodHandle")            \
   template(java_lang_invoke_MethodType,               "java/lang/invoke/MethodType")              \
   template(java_lang_invoke_WrongMethodTypeException, "java/lang/invoke/WrongMethodTypeException") \
--- a/src/share/vm/code/dependencies.cpp	Wed Aug 17 07:05:42 2011 -0400
+++ b/src/share/vm/code/dependencies.cpp	Fri Aug 19 08:55:53 2011 -0700
@@ -113,6 +113,11 @@
   assert_common_1(no_finalizable_subclasses, ctxk);
 }
 
+void Dependencies::assert_call_site_target_value(ciKlass* ctxk, ciCallSite* call_site, ciMethodHandle* method_handle) {
+  check_ctxk(ctxk);
+  assert_common_3(call_site_target_value, ctxk, call_site, method_handle);
+}
+
 // Helper function.  If we are adding a new dep. under ctxk2,
 // try to find an old dep. under a broader* ctxk1.  If there is
 //
@@ -341,7 +346,8 @@
   "unique_concrete_method",
   "abstract_with_exclusive_concrete_subtypes_2",
   "exclusive_concrete_methods_2",
-  "no_finalizable_subclasses"
+  "no_finalizable_subclasses",
+  "call_site_target_value"
 };
 
 int Dependencies::_dep_args[TYPE_LIMIT] = {
@@ -354,7 +360,8 @@
   2, // unique_concrete_method ctxk, m
   3, // unique_concrete_subtypes_2 ctxk, k1, k2
   3, // unique_concrete_methods_2 ctxk, m1, m2
-  1  // no_finalizable_subclasses ctxk
+  1, // no_finalizable_subclasses ctxk
+  3  // call_site_target_value ctxk, call_site, method_handle
 };
 
 const char* Dependencies::dep_name(Dependencies::DepType dept) {
@@ -367,6 +374,13 @@
   return _dep_args[dept];
 }
 
+void Dependencies::check_valid_dependency_type(DepType dept) {
+  for (int deptv = (int) FIRST_TYPE; deptv < (int) TYPE_LIMIT; deptv++) {
+    if (dept == ((DepType) deptv))  return;
+  }
+  ShouldNotReachHere();
+}
+
 // for the sake of the compiler log, print out current dependencies:
 void Dependencies::log_all_dependencies() {
   if (log() == NULL)  return;
@@ -800,11 +814,11 @@
                                  bool participants_hide_witnesses,
                                  bool top_level_call = true);
   // the spot-checking version:
-  klassOop find_witness_in(DepChange& changes,
+  klassOop find_witness_in(KlassDepChange& changes,
                            klassOop context_type,
                            bool participants_hide_witnesses);
  public:
-  klassOop find_witness_subtype(klassOop context_type, DepChange* changes = NULL) {
+  klassOop find_witness_subtype(klassOop context_type, KlassDepChange* changes = NULL) {
     assert(doing_subtype_search(), "must set up a subtype search");
     // When looking for unexpected concrete types,
     // do not look beneath expected ones.
@@ -817,7 +831,7 @@
       return find_witness_anywhere(context_type, participants_hide_witnesses);
     }
   }
-  klassOop find_witness_definer(klassOop context_type, DepChange* changes = NULL) {
+  klassOop find_witness_definer(klassOop context_type, KlassDepChange* changes = NULL) {
     assert(!doing_subtype_search(), "must set up a method definer search");
     // When looking for unexpected concrete methods,
     // look beneath expected ones, to see if there are overrides.
@@ -878,7 +892,7 @@
 #endif //PRODUCT
 
 
-klassOop ClassHierarchyWalker::find_witness_in(DepChange& changes,
+klassOop ClassHierarchyWalker::find_witness_in(KlassDepChange