changeset 34228:035e020c8c0d

Merge
author neliasso
date Thu, 26 Nov 2015 19:16:37 +0100
parents ec723a529700 001483ee066c
children 4558141028a9
files hotspot/test/compiler/compilercontrol/parser/DirectiveParser.java hotspot/test/compiler/compilercontrol/share/processors/QuietProcessor.java hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestI.java hotspot/test/compiler/intrinsics/bmi/verifycode/AddnTestL.java
diffstat 435 files changed, 27937 insertions(+), 5651 deletions(-) [+]
line wrap: on
line diff
--- a/hotspot/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad	Thu Nov 26 19:16:37 2015 +0100
@@ -1079,10 +1079,10 @@
   // and for a volatile write we need
   //
   //   stlr<x>
-  // 
+  //
   // Alternatively, we can implement them by pairing a normal
   // load/store with a memory barrier. For a volatile read we need
-  // 
+  //
   //   ldr<x>
   //   dmb ishld
   //
@@ -1240,7 +1240,7 @@
   // Alternatively, we can elide generation of the dmb instructions
   // and plant the alternative CompareAndSwap macro-instruction
   // sequence (which uses ldaxr<x>).
-  // 
+  //
   // Of course, the above only applies when we see these signature
   // configurations. We still want to plant dmb instructions in any
   // other cases where we may see a MemBarAcquire, MemBarRelease or
@@ -1367,7 +1367,7 @@
     opcode = parent->Opcode();
     return opcode == Op_MemBarRelease;
   }
- 
+
   // 2) card mark detection helper
 
   // helper predicate which can be used to detect a volatile membar
@@ -1383,7 +1383,7 @@
   // true
   //
   // iii) the node's Mem projection feeds a StoreCM node.
-  
+
   bool is_card_mark_membar(const MemBarNode *barrier)
   {
     if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
@@ -1402,7 +1402,7 @@
 	return true;
       }
     }
-  
+
     return false;
   }
 
@@ -1430,7 +1430,7 @@
   // where
   //  || and \\ represent Ctl and Mem feeds via Proj nodes
   //  | \ and / indicate further routing of the Ctl and Mem feeds
-  // 
+  //
   // this is the graph we see for non-object stores. however, for a
   // volatile Object store (StoreN/P) we may see other nodes below the
   // leading membar because of the need for a GC pre- or post-write
@@ -1592,7 +1592,7 @@
   // ordering but neither will a releasing store (stlr). The latter
   // guarantees that the object put is visible but does not guarantee
   // that writes by other threads have also been observed.
-  // 
+  //
   // So, returning to the task of translating the object put and the
   // leading/trailing membar nodes: what do the non-normal node graph
   // look like for these 2 special cases? and how can we determine the
@@ -1731,7 +1731,7 @@
   //       |         |         |            |
   //     C |       M |       M |          M |
   //        \        |         |           /
-  //                  . . . 
+  //                  . . .
   //          (post write subtree elided)
   //                    . . .
   //             C \         M /
@@ -1812,12 +1812,12 @@
   //   |    |                 |  /        /
   //   |  Region  . . .     Phi[M]  _____/
   //   |    /                 |    /
-  //   |                      |   /   
+  //   |                      |   /
   //   | . . .   . . .        |  /
   //   | /                    | /
   // Region           |  |  Phi[M]
   //   |              |  |  / Bot
-  //    \            MergeMem 
+  //    \            MergeMem
   //     \            /
   //     MemBarVolatile
   //
@@ -1858,7 +1858,7 @@
   // to a trailing barrier via a MergeMem. That feed is either direct
   // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
   // memory flow (for G1).
-  // 
+  //
   // The predicates controlling generation of instructions for store
   // and barrier nodes employ a few simple helper functions (described
   // below) which identify the presence or absence of all these
@@ -2112,8 +2112,8 @@
       x = x->in(MemNode::Memory);
     } else {
       // the merge should get its Bottom mem feed from the leading membar
-      x = mm->in(Compile::AliasIdxBot);      
-    } 
+      x = mm->in(Compile::AliasIdxBot);
+    }
 
     // ensure this is a non control projection
     if (!x->is_Proj() || x->is_CFG()) {
@@ -2190,12 +2190,12 @@
   //     . . .
   //       |
   //   MemBarVolatile (card mark)
-  //      |          |     
+  //      |          |
   //      |        StoreCM
   //      |          |
   //      |        . . .
-  //  Bot |  / 
-  //   MergeMem 
+  //  Bot |  /
+  //   MergeMem
   //      |
   //      |
   //    MemBarVolatile {trailing}
@@ -2203,10 +2203,10 @@
   // 2)
   //   MemBarRelease/CPUOrder (leading)
   //    |
-  //    | 
+  //    |
   //    |\       . . .
-  //    | \        | 
-  //    |  \  MemBarVolatile (card mark) 
+  //    | \        |
+  //    |  \  MemBarVolatile (card mark)
   //    |   \   |     |
   //     \   \  |   StoreCM    . . .
   //      \   \ |
@@ -2231,7 +2231,7 @@
   //    |  \   \  |   StoreCM    . . .
   //    |   \   \ |
   //     \   \  Phi
-  //      \   \ /  
+  //      \   \ /
   //       \  Phi
   //        \ /
   //        Phi  . . .
@@ -2506,7 +2506,7 @@
 
     return (x->is_Load() && x->as_Load()->is_acquire());
   }
-  
+
   // now check for an unsafe volatile get
 
   // need to check for
@@ -2644,7 +2644,7 @@
   }
 
   membar = child_membar(membar);
-  
+
   if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
     return false;
   }
@@ -2703,7 +2703,7 @@
 
   // first we check if this is part of a card mark. if so then we have
   // to generate a StoreLoad barrier
-  
+
   if (is_card_mark_membar(mbvol)) {
       return false;
   }
@@ -2769,7 +2769,7 @@
   if (!is_card_mark_membar(mbvol)) {
     return true;
   }
-  
+
   // we found a card mark -- just make sure we have a trailing barrier
 
   return (card_mark_to_trailing(mbvol) != NULL);
@@ -2808,7 +2808,7 @@
 
   assert(barrier->Opcode() == Op_MemBarCPUOrder,
 	 "CAS not fed by cpuorder membar!");
-      
+
   MemBarNode *b = parent_membar(barrier);
   assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
 	  "CAS not fed by cpuorder+release membar pair!");
@@ -3463,6 +3463,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -4663,7 +4674,7 @@
       call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
     }
     if (call == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
 
@@ -4671,7 +4682,7 @@
       // Emit stub for static call
       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
       if (stub == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     }
@@ -4681,7 +4692,7 @@
     MacroAssembler _masm(&cbuf);
     address call = __ ic_call((address)$meth$$method);
     if (call == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
   %}
@@ -4706,7 +4717,7 @@
     if (cb) {
       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
       if (call == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     } else {
--- a/hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -41,7 +41,9 @@
 
 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
-  ce->store_parameter(_method->as_register(), 1);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  __ mov_metadata(rscratch1, m);
+  ce->store_parameter(rscratch1, 1);
   ce->store_parameter(_bci, 0);
   __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
   ce->add_call_info_here(_info);
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -70,6 +70,7 @@
 LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
 LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
 LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
 LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::r0_opr; }
 LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
 
--- a/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -105,4 +105,7 @@
 
   void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
 
+  // This platform only uses signal-based null checks. The Label is not needed.
+  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
 #endif // CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
--- a/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -73,6 +73,7 @@
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -29,16 +29,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_aarch64.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   Unimplemented();
   return 0;
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
@@ -46,20 +46,20 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   Unimplemented();
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   return NULL;
 }
 
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -91,20 +91,18 @@
       unsigned offset_lo = dest & 0xfff;
       offset = adr_page - pc_page;
 
-      // We handle 3 types of PC relative addressing
+      // We handle 4 types of PC relative addressing
       //   1 - adrp    Rx, target_page
       //       ldr/str Ry, [Rx, #offset_in_page]
       //   2 - adrp    Rx, target_page
       //       add     Ry, Rx, #offset_in_page
       //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
-      // In the first 2 cases we must check that Rx is the same in the adrp and the
-      // subsequent ldr/str or add instruction. Otherwise we could accidentally end
-      // up treating a type 3 relocation as a type 1 or 2 just because it happened
-      // to be followed by a random unrelated ldr/str or add instruction.
-      //
-      // In the case of a type 3 relocation, we know that these are only generated
-      // for the safepoint polling page, or for the card type byte map base so we
-      // assert as much and of course that the offset is 0.
+      //       movk    Rx, #imm16<<32
+      //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      // In the first 3 cases we must check that Rx is the same in the adrp and the
+      // subsequent ldr/str, add or movk instruction. Otherwise we could accidentally end
+      // up treating a type 4 relocation as a type 1, 2 or 3 just because it happened
+      // to be followed by a random unrelated ldr/str, add or movk instruction.
       //
       unsigned insn2 = ((unsigned*)branch)[1];
       if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
@@ -123,13 +121,13 @@
         Instruction_aarch64::patch(branch + sizeof (unsigned),
                                    21, 10, offset_lo);
         instructions = 2;
-      } else {
-        assert((jbyte *)target ==
-                ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
-               target == StubRoutines::crc_table_addr() ||
-               (address)target == os::get_polling_page(),
-               "adrp must be polling page or byte map base");
-        assert(offset_lo == 0, "offset must be 0 for polling page or byte map base");
+      } else if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110 &&
+                   Instruction_aarch64::extract(insn, 4, 0) ==
+                     Instruction_aarch64::extract(insn2, 4, 0)) {
+        // movk #imm16<<32
+        Instruction_aarch64::patch(branch + 4, 20, 5, (uint64_t)target >> 32);
+        offset &= (1<<20)-1;
+        instructions = 2;
       }
     }
     int offset_lo = offset & 3;
@@ -212,16 +210,16 @@
       // Return the target address for the following sequences
       //   1 - adrp    Rx, target_page
       //       ldr/str Ry, [Rx, #offset_in_page]
-      //   2 - adrp    Rx, target_page         ]
+      //   2 - adrp    Rx, target_page
       //       add     Ry, Rx, #offset_in_page
       //   3 - adrp    Rx, target_page (page aligned reloc, offset == 0)
+      //       movk    Rx, #imm12<<32
+      //   4 - adrp    Rx, target_page (page aligned reloc, offset == 0)
       //
       // In the first two cases  we check that the register is the same and
       // return the target_page + the offset within the page.
       // Otherwise we assume it is a page aligned relocation and return
-      // the target page only. The only cases this is generated is for
-      // the safepoint polling page or for the card table byte map base so
-      // we assert as much.
+      // the target page only.
       //
       unsigned insn2 = ((unsigned*)insn_addr)[1];
       if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
@@ -238,10 +236,12 @@
         unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
         return address(target_page + byte_offset);
       } else {
-        assert((jbyte *)target_page ==
-                ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
-               (address)target_page == os::get_polling_page(),
-               "adrp must be polling page or byte map base");
+        if (Instruction_aarch64::extract(insn2, 31, 21) == 0b11110010110  &&
+               Instruction_aarch64::extract(insn, 4, 0) ==
+                 Instruction_aarch64::extract(insn2, 4, 0)) {
+          target_page = (target_page & 0xffffffff) |
+                         ((uint64_t)Instruction_aarch64::extract(insn2, 20, 5) << 32);
+        }
         return (address)target_page;
       }
     } else {
@@ -3964,22 +3964,26 @@
 
 void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
   relocInfo::relocType rtype = dest.rspec().reloc()->type();
-  if (uabs(pc() - dest.target()) >= (1LL << 32)) {
-    guarantee(rtype == relocInfo::none
-              || rtype == relocInfo::external_word_type
-              || rtype == relocInfo::poll_type
-              || rtype == relocInfo::poll_return_type,
-              "can only use a fixed address with an ADRP");
-    // Out of range.  This doesn't happen very often, but we have to
-    // handle it
-    mov(reg1, dest);
-    byte_offset = 0;
+  unsigned long low_page = (unsigned long)CodeCache::low_bound() >> 12;
+  unsigned long high_page = (unsigned long)(CodeCache::high_bound()-1) >> 12;
+  unsigned long dest_page = (unsigned long)dest.target() >> 12;
+  long offset_low = dest_page - low_page;
+  long offset_high = dest_page - high_page;
+
+  InstructionMark im(this);
+  code_section()->relocate(inst_mark(), dest.rspec());
+  // 8143067: Ensure that the adrp can reach the dest from anywhere within
+  // the code cache so that if it is relocated we know it will still reach
+  if (offset_high >= -(1<<20) && offset_low < (1<<20)) {
+    _adrp(reg1, dest.target());
   } else {
-    InstructionMark im(this);
-    code_section()->relocate(inst_mark(), dest.rspec());
-    byte_offset = (uint64_t)dest.target() & 0xfff;
-    _adrp(reg1, dest.target());
+    unsigned long pc_page = (unsigned long)pc() >> 12;
+    long offset = dest_page - pc_page;
+    offset = (offset & ((1<<20)-1)) << 12;
+    _adrp(reg1, pc()+offset);
+    movk(reg1, ((unsigned long)dest.target() >> 32) & 0xffff, 32);
   }
+  byte_offset = (unsigned long)dest.target() & 0xfff;
 }
 
 void MacroAssembler::build_frame(int framesize) {
--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -2384,6 +2384,7 @@
   }
 #endif // ASSERT
   __ mov(c_rarg0, rthread);
+  __ mov(c_rarg1, rcpool);
   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
   __ blrt(rscratch1, 1, 0, 1);
   __ bind(retaddr);
@@ -2397,6 +2398,7 @@
   // Load UnrollBlock* into rdi
   __ mov(r5, r0);
 
+  __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
    Label noException;
   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
   __ br(Assembler::NE, noException);
@@ -2609,6 +2611,7 @@
   // n.b. 2 gp args, 0 fp args, integral return type
 
   __ mov(c_rarg0, rthread);
+  __ movw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
   __ lea(rscratch1,
          RuntimeAddress(CAST_FROM_FN_PTR(address,
                                          Deoptimization::uncommon_trap)));
@@ -2628,6 +2631,16 @@
   // move UnrollBlock* into r4
   __ mov(r4, r0);
 
+#ifdef ASSERT
+  { Label L;
+    __ ldrw(rscratch1, Address(r4, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
+    __ cmpw(rscratch1, (unsigned)Deoptimization::Unpack_uncommon_trap);
+    __ br(Assembler::EQ, L);
+    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
+    __ bind(L);
+  }
+#endif
+
   // Pop all the frames we must move/replace.
   //
   // Frame picture (youngest to oldest)
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -61,6 +61,7 @@
 define_pd_global(bool, UseCISCSpill,                 false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 // GL:
 // Detected a problem with unscaled compressed oops and
 // narrow_oop_use_complex_address() == false.
--- a/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/cppInterpreter_ppc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -2697,7 +2697,7 @@
   // Provide a debugger breakpoint in the frame manager if breakpoints
   // in osr'd methods are requested.
 #ifdef COMPILER2
-  NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } )
+  if (OptoBreakpointOSR) { __ illtrap(); }
 #endif
 
   // Load callee's pointer to locals array from callee's state.
--- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -297,8 +297,16 @@
   __ bind(do_float);
   __ lfs(floatSlot, 0, arg_java);
 #if defined(LINUX)
+  // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
+  // in the least significant word of an argument slot.
+#if defined(VM_LITTLE_ENDIAN)
+  __ stfs(floatSlot, 0, arg_c);
+#else
   __ stfs(floatSlot, 4, arg_c);
+#endif
 #elif defined(AIX)
+  // Although AIX runs on big endian CPU, float is in most significant
+  // word of an argument slot.
   __ stfs(floatSlot, 0, arg_c);
 #else
 #error "unknown OS"
--- a/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -29,16 +29,16 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_ppc.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   Unimplemented();
   return 0;
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   Unimplemented();
 }
 
@@ -46,20 +46,20 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   Unimplemented();
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   return NULL;
 }
 
--- a/hotspot/src/cpu/ppc/vm/ppc.ad	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad	Thu Nov 26 19:16:37 2015 +0100
@@ -2064,6 +2064,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -3416,7 +3427,7 @@
       // The stub for call to interpreter.
       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
       if (stub == NULL) {
-        ciEnv::current()->record_failure("CodeCache is full"); 
+        ciEnv::current()->record_failure("CodeCache is full");
         return;
       }
     }
@@ -3465,7 +3476,7 @@
     // The stub for call to interpreter.
     address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
     if (stub == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full"); 
+      ciEnv::current()->record_failure("CodeCache is full");
       return;
     }
 
@@ -6911,7 +6922,7 @@
     n_compare->_opnds[0] = op_crx;
     n_compare->_opnds[1] = op_src;
     n_compare->_opnds[2] = new immN_0Oper(TypeNarrowOop::NULL_PTR);
-    
+
     decodeN_mergeDisjointNode *n2 = new decodeN_mergeDisjointNode();
     n2->add_req(n_region, n_src, n1);
     n2->_opnds[0] = op_dst;
@@ -10588,7 +10599,7 @@
 
 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
   // Needs matchrule, see cmpDUnordered.
-  match(Set crx (CmpF src1 src2)); 
+  match(Set crx (CmpF src1 src2));
   // no match-rule, false predicate
   predicate(false);
 
@@ -10697,13 +10708,13 @@
 %}
 
 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
-  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the 
-  // node right before the conditional move using it. 
+  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the
+  // node right before the conditional move using it.
   // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
   // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
   // crashed in register allocation where the flags Reg between cmpDUnoredered and a
   // conditional move was supposed to be spilled.
-  match(Set crx (CmpD src1 src2)); 
+  match(Set crx (CmpD src1 src2));
   // False predicate, shall not be matched.
   predicate(false);
 
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -753,6 +753,21 @@
     // in farg_reg[j] if argument i is the j-th float argument of this call.
     //
     case T_FLOAT:
+#if defined(LINUX)
+      // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
+      // in the least significant word of an argument slot.
+#if defined(VM_LITTLE_ENDIAN)
+#define FLOAT_WORD_OFFSET_IN_SLOT 0
+#else
+#define FLOAT_WORD_OFFSET_IN_SLOT 1
+#endif
+#elif defined(AIX)
+      // Although AIX runs on big endian CPU, float is in the most
+      // significant word of an argument slot.
+#define FLOAT_WORD_OFFSET_IN_SLOT 0
+#else
+#error "unknown OS"
+#endif
       if (freg < Argument::n_float_register_parameters_c) {
         // Put float in register ...
         reg = farg_reg[freg];
@@ -766,14 +781,14 @@
         if (arg >= Argument::n_regs_not_on_stack_c) {
           // ... and on the stack.
           guarantee(regs2 != NULL, "must pass float in register and stack slot");
-          VMReg reg2 = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
+          VMReg reg2 = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
           regs2[i].set1(reg2);
           stk += inc_stk_for_intfloat;
         }
 
       } else {
         // Put float on stack.
-        reg = VMRegImpl::stack2reg(stk LINUX_ONLY(+1));
+        reg = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
         stk += inc_stk_for_intfloat;
       }
       regs[i].set1(reg);
@@ -2802,7 +2817,7 @@
   __ set_last_Java_frame(R1_SP, noreg);
 
   // With EscapeAnalysis turned on, this call may safepoint!
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
   address calls_return_pc = __ last_calls_return_pc();
   // Set an oopmap for the call site that describes all our saved registers.
   oop_maps->add_gc_map(calls_return_pc - start, map);
@@ -2815,6 +2830,8 @@
   // by save_volatile_registers(...).
   RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
 
+  // reload the exec mode from the UnrollBlock (it might have changed)
+  __ lwz(exec_mode_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
   // In excp_deopt_mode, restore and clear exception oop which we
   // stored in the thread during exception entry above. The exception
   // oop will be the return value of this stub.
@@ -2945,8 +2962,9 @@
   __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
 
   __ mr(klass_index_reg, R3);
+  __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
-                  R16_thread, klass_index_reg);
+                  R16_thread, klass_index_reg, R5_ARG3);
 
   // Set an oopmap for the call site.
   oop_maps->add_gc_map(gc_map_pc - start, map);
@@ -2966,6 +2984,12 @@
 
   // stack: (caller_of_deoptee, ...).
 
+#ifdef ASSERT
+  __ lwz(R22_tmp2, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
+  __ cmpdi(CCR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
+  __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
+#endif
+
   // Allocate new interpreter frame(s) and possibly a c2i adapter
   // frame.
   push_skeleton_frames(masm, false/*deopt*/,
--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -94,8 +94,10 @@
 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
   __ set(_bci, G4);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  __ set_metadata_constant(m, G5);
   __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
-  __ delayed()->mov_or_nop(_method->as_register(), G5);
+  __ delayed()->nop();
   ce->add_call_info_here(_info);
   ce->verify_oop_map(_info);
 
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -2812,7 +2812,23 @@
 }
 
 void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
-  fatal("CRC32 intrinsic is not implemented on this platform");
+  assert(op->crc()->is_single_cpu(),  "crc must be register");
+  assert(op->val()->is_single_cpu(),  "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register table = op->result_opr()->as_register();
+  Register res   = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, table);
+
+  __ set(ExternalAddress(StubRoutines::crc_table_addr()), table);
+  __ not1(crc);
+  __ clruwu(crc);
+  __ update_byte_crc32(crc, val, table);
+  __ not1(crc);
+
+  __ mov(crc, res);
 }
 
 void LIR_Assembler::emit_lock(LIR_OpLock* op) {
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -68,6 +68,7 @@
 
 LIR_Opr LIRGenerator::exceptionOopOpr()              { return FrameMap::Oexception_opr;  }
 LIR_Opr LIRGenerator::exceptionPcOpr()               { return FrameMap::Oissuing_pc_opr; }
+LIR_Opr LIRGenerator::syncLockOpr()                  { return new_register(T_INT); }
 LIR_Opr LIRGenerator::syncTempOpr()                  { return new_register(T_OBJECT); }
 LIR_Opr LIRGenerator::getThreadTemp()                { return rlock_callee_saved(NOT_LP64(T_INT) LP64_ONLY(T_LONG)); }
 
@@ -785,7 +786,86 @@
 }
 
 void LIRGenerator::do_update_CRC32(Intrinsic* x) {
-  fatal("CRC32 intrinsic is not implemented on this platform");
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      // val is destroyed by update_crc32
+      val.set_destroys_register();
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+        offset += off.result()->as_jint();
+      }
+
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+        if (index->is_constant()) {
+          offset += index->as_constant_ptr()->as_jint();
+          index = LIR_OprFact::illegalOpr;
+        } else if (index->is_register()) {
+          LIR_Opr tmp2 = new_register(T_LONG);
+          LIR_Opr tmp3 = new_register(T_LONG);
+          __ move(base_op, tmp2);
+          __ move(index, tmp3);
+          __ add(tmp2, tmp3, tmp2);
+          base_op = tmp2;
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+
+      LIR_Address* a = new LIR_Address(base_op, offset, T_BYTE);
+
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      len.load_item_force(cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
 }
 
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
--- a/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -88,4 +88,7 @@
   void invalidate_registers(bool iregisters, bool lregisters, bool oregisters,
                             Register preserve1 = noreg, Register preserve2 = noreg);
 
+  // This platform only uses signal-based null checks. The Label is not needed.
+  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
 #endif // CPU_SPARC_VM_C1_MACROASSEMBLER_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -65,6 +65,7 @@
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoScheduling,               true);
 define_pd_global(bool, OptoRegScheduling,            false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
 
 #ifdef _LP64
 // We need to make sure that all generated code is within
--- a/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -43,8 +43,9 @@
   void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
   void generate_counter_overflow(Label& Lcontinue);
 
+  address generate_CRC32_update_entry();
+  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+
   // Not supported
-  address generate_CRC32_update_entry() { return NULL; }
-  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
   address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
 #endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -29,7 +29,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "vmreg_sparc.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   if (inst->is_call() || inst->is_jump()) {
     return pc_offset + NativeCall::instruction_size;
   } else if (inst->is_call_reg()) {
@@ -37,12 +37,12 @@
   } else if (inst->is_sethi()) {
     return pc_offset + NativeFarCall::instruction_size;
   } else {
-    fatal("unsupported type of instruction for call site");
+    JVMCI_ERROR_0("unsupported type of instruction for call site");
     return 0;
   }
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   Handle obj = HotSpotObjectConstantImpl::object(constant);
   jobject value = JNIHandles::make_local(obj());
@@ -52,7 +52,7 @@
     RelocationHolder rspec = oop_Relocation::spec(oop_index);
     _instructions->relocate(pc, rspec, 1);
 #else
-    fatal("compressed oop on 32bit");
+    JVMCI_ERROR("compressed oop on 32bit");
 #endif
   } else {
     NativeMovConstReg* move = nativeMovConstReg_at(pc);
@@ -66,20 +66,20 @@
   }
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
     NativeMovConstReg32* move = nativeMovConstReg32_at(pc);
-    narrowKlass narrowOop = record_narrow_metadata_reference(constant);
+    narrowKlass narrowOop = record_narrow_metadata_reference(constant, CHECK);
     move->set_data((intptr_t)narrowOop);
     TRACE_jvmci_3("relocating (narrow metaspace constant) at %p/%p", pc, narrowOop);
 #else
-    fatal("compressed Klass* on 32bit");
+    JVMCI_ERROR("compressed Klass* on 32bit");
 #endif
   } else {
     NativeMovConstReg* move = nativeMovConstReg_at(pc);
-    Metadata* reference = record_metadata_reference(constant);
+    Metadata* reference = record_metadata_reference(constant, CHECK);
     move->set_data((intptr_t)reference);
     TRACE_jvmci_3("relocating (metaspace constant) at %p/%p", pc, reference);
   }
@@ -106,7 +106,7 @@
   }
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   address pc = (address) inst;
   if (inst->is_call()) {
     NativeCall* call = nativeCall_at(pc);
@@ -117,17 +117,17 @@
     jump->set_jump_destination((address) foreign_call_destination);
     _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
   } else {
-    fatal(err_msg("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)));
+    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
   }
   TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
 #ifdef ASSERT
   Method* method = NULL;
   // we need to check, this might also be an unresolved method
   if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
-    method = getMethodFromHotSpotMethod(hotspot_method);
+    method = getMethodFromHotSpotMethod(hotspot_method());
   }
 #endif
   switch (_next_call_type) {
@@ -156,33 +156,33 @@
       break;
     }
     default:
-      fatal("invalid _next_call_type value");
+      JVMCI_ERROR("invalid _next_call_type value");
       break;
   }
 }
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   switch (mark) {
     case POLL_NEAR:
-      fatal("unimplemented");
+      JVMCI_ERROR("unimplemented");
       break;
     case POLL_FAR:
       _instructions->relocate(pc, relocInfo::poll_type);
       break;
     case POLL_RETURN_NEAR:
-      fatal("unimplemented");
+      JVMCI_ERROR("unimplemented");
       break;
     case POLL_RETURN_FAR:
       _instructions->relocate(pc, relocInfo::poll_return_type);
       break;
     default:
-      fatal("invalid mark value");
+      JVMCI_ERROR("invalid mark value");
       break;
   }
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   // JVMCI Registers are numbered as follows:
   //   0..31: Thirty-two General Purpose registers (CPU Registers)
   //   32..63: Thirty-two single precision float registers
@@ -199,7 +199,7 @@
     } else if(jvmci_reg < 112) {
       floatRegisterNumber = 4 * (jvmci_reg - 96);
     } else {
-      fatal("Unknown jvmci register");
+      JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
     }
     return as_FloatRegister(floatRegisterNumber)->as_VMReg();
   }
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -4771,3 +4771,243 @@
   movdtox(src, tmp1);
   reverse_bytes_32(tmp1, dst, tmp2);
 }
+
+void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset) {
+  xmulx(xcrc_hi, xK_hi, xtmp_lo);
+  xmulxhi(xcrc_hi, xK_hi, xtmp_hi);
+  xmulxhi(xcrc_lo, xK_lo, xcrc_hi);
+  xmulx(xcrc_lo, xK_lo, xcrc_lo);
+  xor3(xcrc_lo, xtmp_lo, xcrc_lo);
+  xor3(xcrc_hi, xtmp_hi, xcrc_hi);
+  ldxl(buf, G0, xtmp_lo);
+  inc(buf, 8);
+  ldxl(buf, G0, xtmp_hi);
+  inc(buf, 8);
+  xor3(xcrc_lo, xtmp_lo, xcrc_lo);
+  xor3(xcrc_hi, xtmp_hi, xcrc_hi);
+}
+
+void MacroAssembler::fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo) {
+  mov(xcrc_lo, xtmp_lo);
+  mov(xcrc_hi, xtmp_hi);
+  xmulx(xtmp_hi, xK_hi, xtmp_lo);
+  xmulxhi(xtmp_hi, xK_hi, xtmp_hi);
+  xmulxhi(xcrc_lo, xK_lo, xcrc_hi);
+  xmulx(xcrc_lo, xK_lo, xcrc_lo);
+  xor3(xcrc_lo, xbuf_lo, xcrc_lo);
+  xor3(xcrc_hi, xbuf_hi, xcrc_hi);
+  xor3(xcrc_lo, xtmp_lo, xcrc_lo);
+  xor3(xcrc_hi, xtmp_hi, xcrc_hi);
+}
+
+void MacroAssembler::fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp) {
+  and3(xcrc, 0xFF, tmp);
+  sllx(tmp, 2, tmp);
+  lduw(table, tmp, xtmp);
+  srlx(xcrc, 8, xcrc);
+  xor3(xtmp, xcrc, xcrc);
+}
+
+void MacroAssembler::fold_8bit_crc32(Register crc, Register table, Register tmp) {
+  and3(crc, 0xFF, tmp);
+  srlx(crc, 8, crc);
+  sllx(tmp, 2, tmp);
+  lduw(table, tmp, tmp);
+  xor3(tmp, crc, crc);
+}
+
+#define CRC32_TMP_REG_NUM 18
+
+#define CRC32_CONST_64  0x163cd6124
+#define CRC32_CONST_96  0x0ccaa009e
+#define CRC32_CONST_160 0x1751997d0
+#define CRC32_CONST_480 0x1c6e41596
+#define CRC32_CONST_544 0x154442bd4
+
+void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register table) {
+
+  Label L_cleanup_loop, L_cleanup_check, L_align_loop, L_align_check;
+  Label L_main_loop_prologue;
+  Label L_fold_512b, L_fold_512b_loop, L_fold_128b;
+  Label L_fold_tail, L_fold_tail_loop;
+  Label L_8byte_fold_loop, L_8byte_fold_check;
+
+  const Register tmp[CRC32_TMP_REG_NUM] = {L0, L1, L2, L3, L4, L5, L6, G1, I0, I1, I2, I3, I4, I5, I7, O4, O5, G3};
+
+  Register const_64  = tmp[CRC32_TMP_REG_NUM-1];
+  Register const_96  = tmp[CRC32_TMP_REG_NUM-1];
+  Register const_160 = tmp[CRC32_TMP_REG_NUM-2];
+  Register const_480 = tmp[CRC32_TMP_REG_NUM-1];
+  Register const_544 = tmp[CRC32_TMP_REG_NUM-2];
+
+  set(ExternalAddress(StubRoutines::crc_table_addr()), table);
+
+  not1(crc); // ~c
+  clruwu(crc); // clear upper 32 bits of crc
+
+  // Check if below cutoff, proceed directly to cleanup code
+  mov(31, G4);
+  cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
+
+  // Align buffer to 8 byte boundry
+  mov(8, O5);
+  and3(buf, 0x7, O4);
+  sub(O5, O4, O5);
+  and3(O5, 0x7, O5);
+  sub(len, O5, len);
+  ba(L_align_check);
+  delayed()->nop();
+
+  // Alignment loop, table look up method for up to 7 bytes
+  bind(L_align_loop);
+  ldub(buf, 0, O4);
+  inc(buf);
+  dec(O5);
+  xor3(O4, crc, O4);
+  and3(O4, 0xFF, O4);
+  sllx(O4, 2, O4);
+  lduw(table, O4, O4);
+  srlx(crc, 8, crc);
+  xor3(O4, crc, crc);
+  bind(L_align_check);
+  nop();
+  cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_align_loop);
+
+  // Aligned on 64-bit (8-byte) boundry at this point
+  // Check if still above cutoff (31-bytes)
+  mov(31, G4);
+  cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_check);
+  // At least 32 bytes left to process
+
+  // Free up registers by storing them to FP registers
+  for (int i = 0; i < CRC32_TMP_REG_NUM; i++) {
+    movxtod(tmp[i], as_FloatRegister(2*i));
+  }
+
+  // Determine which loop to enter
+  // Shared prologue
+  ldxl(buf, G0, tmp[0]);
+  inc(buf, 8);
+  ldxl(buf, G0, tmp[1]);
+  inc(buf, 8);
+  xor3(tmp[0], crc, tmp[0]); // Fold CRC into first few bytes
+  and3(crc, 0, crc); // Clear out the crc register
+  // Main loop needs 128-bytes at least
+  mov(128, G4);
+  mov(64, tmp[2]);
+  cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_main_loop_prologue);
+  // Less than 64 bytes
+  nop();
+  cmp_and_br_short(len, tmp[2], Assembler::lessUnsigned, Assembler::pt, L_fold_tail);
+  // Between 64 and 127 bytes
+  set64(CRC32_CONST_96,  const_96,  tmp[8]);
+  set64(CRC32_CONST_160, const_160, tmp[9]);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[4], tmp[5], buf, 16);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[6], tmp[7], buf, 32);
+  dec(len, 48);
+  ba(L_fold_tail);
+  delayed()->nop();
+
+  bind(L_main_loop_prologue);
+  for (int i = 2; i < 8; i++) {
+    ldxl(buf, G0, tmp[i]);
+    inc(buf, 8);
+  }
+
+  // Fold total 512 bits of polynomial on each iteration,
+  // 128 bits per each of 4 parallel streams
+  set64(CRC32_CONST_480, const_480, tmp[8]);
+  set64(CRC32_CONST_544, const_544, tmp[9]);
+
+  mov(128, G4);
+  bind(L_fold_512b_loop);
+  fold_128bit_crc32(tmp[1], tmp[0], const_480, const_544, tmp[9],  tmp[8],  buf,  0);
+  fold_128bit_crc32(tmp[3], tmp[2], const_480, const_544, tmp[11], tmp[10], buf, 16);
+  fold_128bit_crc32(tmp[5], tmp[4], const_480, const_544, tmp[13], tmp[12], buf, 32);
+  fold_128bit_crc32(tmp[7], tmp[6], const_480, const_544, tmp[15], tmp[14], buf, 64);
+  dec(len, 64);
+  cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_512b_loop);
+
+  // Fold 512 bits to 128 bits
+  bind(L_fold_512b);
+  set64(CRC32_CONST_96,  const_96,  tmp[8]);
+  set64(CRC32_CONST_160, const_160, tmp[9]);
+
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[3], tmp[2]);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[5], tmp[4]);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[8], tmp[9], tmp[7], tmp[6]);
+  dec(len, 48);
+
+  // Fold the rest of 128 bits data chunks
+  bind(L_fold_tail);
+  mov(32, G4);
+  cmp_and_br_short(len, G4, Assembler::lessEqualUnsigned, Assembler::pt, L_fold_128b);
+
+  set64(CRC32_CONST_96,  const_96,  tmp[8]);
+  set64(CRC32_CONST_160, const_160, tmp[9]);
+
+  bind(L_fold_tail_loop);
+  fold_128bit_crc32(tmp[1], tmp[0], const_96, const_160, tmp[2], tmp[3], buf, 0);
+  sub(len, 16, len);
+  cmp_and_br_short(len, G4, Assembler::greaterEqualUnsigned, Assembler::pt, L_fold_tail_loop);
+
+  // Fold the 128 bits in tmps 0 - 1 into tmp 1
+  bind(L_fold_128b);
+
+  set64(CRC32_CONST_64, const_64, tmp[4]);
+
+  xmulx(const_64, tmp[0], tmp[2]);
+  xmulxhi(const_64, tmp[0], tmp[3]);
+
+  srl(tmp[2], G0, tmp[4]);
+  xmulx(const_64, tmp[4], tmp[4]);
+
+  srlx(tmp[2], 32, tmp[2]);
+  sllx(tmp[3], 32, tmp[3]);
+  or3(tmp[2], tmp[3], tmp[2]);
+
+  xor3(tmp[4], tmp[1], tmp[4]);
+  xor3(tmp[4], tmp[2], tmp[1]);
+  dec(len, 8);
+
+  // Use table lookup for the 8 bytes left in tmp[1]
+  dec(len, 8);
+
+  // 8 8-bit folds to compute 32-bit CRC.
+  for (int j = 0; j < 4; j++) {
+    fold_8bit_crc32(tmp[1], table, tmp[2], tmp[3]);
+  }
+  srl(tmp[1], G0, crc); // move 32 bits to general register
+  for (int j = 0; j < 4; j++) {
+    fold_8bit_crc32(crc, table, tmp[3]);
+  }
+
+  bind(L_8byte_fold_check);
+
+  // Restore int registers saved in FP registers
+  for (int i = 0; i < CRC32_TMP_REG_NUM; i++) {
+    movdtox(as_FloatRegister(2*i), tmp[i]);
+  }
+
+  ba(L_cleanup_check);
+  delayed()->nop();
+
+  // Table look-up method for the remaining few bytes
+  bind(L_cleanup_loop);
+  ldub(buf, 0, O4);
+  inc(buf);
+  dec(len);
+  xor3(O4, crc, O4);
+  and3(O4, 0xFF, O4);
+  sllx(O4, 2, O4);
+  lduw(table, O4, O4);
+  srlx(crc, 8, crc);
+  xor3(O4, crc, crc);
+  bind(L_cleanup_check);
+  nop();
+  cmp_and_br_short(len, 0, Assembler::greaterUnsigned, Assembler::pt, L_cleanup_loop);
+
+  not1(crc);
+}
+
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -904,7 +904,9 @@
   inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0);
 
   // little-endian
-  inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); }
+  inline void lduwl(Register s1, Register s2, Register d) { lduwa(s1, s2, ASI_PRIMARY_LITTLE, d); }
+  inline void ldswl(Register s1, Register s2, Register d) { ldswa(s1, s2, ASI_PRIMARY_LITTLE, d);}
+  inline void ldxl( Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); }
   inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); }
 
   // membar psuedo instruction.  takes into account target memory model.
@@ -1469,6 +1471,15 @@
   void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2);
   void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2);
 
+  // CRC32 code for java.util.zip.CRC32::updateBytes0() instrinsic.
+  void kernel_crc32(Register crc, Register buf, Register len, Register table);
+  // Fold 128-bit data chunk
+  void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset);
+  void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register xbuf_hi, Register xbuf_lo);
+  // Fold 8-bit data
+  void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
+  void fold_8bit_crc32(Register crc, Register table, Register tmp);
+
 #undef VIRTUAL
 };
 
--- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -3036,6 +3036,7 @@
 
     __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode);
     __ mov(G2_thread, O0);
+    __ mov(L0deopt_mode, O2);
     __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
     __ delayed()->nop();
     oop_maps->add_gc_map( __ offset()-start, map->deep_copy());
@@ -3121,6 +3122,7 @@
   // do the call by hand so we can get the oopmap
 
   __ mov(G2_thread, L7_thread_cache);
+  __ mov(L0deopt_mode, O1);
   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
   __ delayed()->mov(G2_thread, O0);
 
@@ -3146,6 +3148,7 @@
 
   RegisterSaver::restore_result_registers(masm);
 
+  __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), G4deopt_mode);
   Label noException;
   __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException);
 
@@ -3269,7 +3272,8 @@
   __ save_frame(0);
   __ set_last_Java_frame(SP, noreg);
   __ mov(I0, O2klass_index);
-  __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index);
+  __ mov(Deoptimization::Unpack_uncommon_trap, O3); // exec mode
+  __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index, O3);
   __ reset_last_Java_frame();
   __ mov(O0, O2UnrollBlock->after_save());
   __ restore();
@@ -3278,6 +3282,15 @@
   __ mov(O2UnrollBlock, O2UnrollBlock->after_save());
   __ restore();
 
+#ifdef ASSERT
+  { Label L;
+    __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), O1);
+    __ cmp_and_br_short(O1, Deoptimization::Unpack_uncommon_trap, Assembler::equal, Assembler::pt, L);
+    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
+    __ bind(L);
+  }
+#endif
+
   // Allocate new interpreter frame(s) and possible c2i adapter frame
 
   make_new_frames(masm, false);
--- a/hotspot/src/cpu/sparc/vm/sparc.ad	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad	Thu Nov 26 19:16:37 2015 +0100
@@ -1860,6 +1860,17 @@
   return true;  // Per default match rules are supported.
 }
 
+const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
+
+  // TODO
+  // identify extra cases that we might want to provide match rules for
+  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
+  bool ret_value = match_rule_supported(opcode);
+  // Add rules here.
+
+  return ret_value;  // Per default match rules are supported.
+}
+
 const int Matcher::float_pressure(int default_pressure_threshold) {
   return default_pressure_threshold;
 }
@@ -1905,7 +1916,7 @@
 }
 
 // Current (2013) SPARC platforms need to read original key
-// to construct decryption expanded key 
+// to construct decryption expanded key
 const bool Matcher::pass_original_key_for_aes() {
   return true;
 }
@@ -2612,7 +2623,7 @@
       if (stub == NULL && !(TraceJumps && Compile::current()->in_scratch_emit_size())) {
         ciEnv::current()->record_failure("CodeCache is full");
         return;
-      } 
+      }
     }
   %}
 
@@ -3132,10 +3143,10 @@
 // AVOID_NONE   - instruction can be placed anywhere
 // AVOID_BEFORE - instruction cannot be placed after an
 //                instruction with MachNode::AVOID_AFTER
-// AVOID_AFTER  - the next instruction cannot be the one 
+// AVOID_AFTER  - the next instruction cannot be the one
 //                with MachNode::AVOID_BEFORE
-// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at 
-//                          the same time                                
+// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at
+//                          the same time
 ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE);
 
 ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -5292,6 +5292,38 @@
     return start;
   }
 
+/**
+   *  Arguments:
+   *
+   * Inputs:
+   *   O0   - int   crc
+   *   O1   - byte* buf
+   *   O2   - int   len
+   *   O3   - int*  table
+   *
+   * Output:
+   *   O0   - int crc result
+   */
+  address generate_updateBytesCRC32() {
+    assert(UseCRC32Intrinsics, "need VIS3 instructions");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+    address start = __ pc();
+
+    const Register crc   = O0; // crc
+    const Register buf   = O1; // source java byte array address
+    const Register len   = O2; // length
+    const Register table = O3; // crc_table address (reuse register)
+
+    __ kernel_crc32(crc, buf, len, table);
+
+    __ retl();
+    __ delayed()->nop();
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -5324,6 +5356,12 @@
 
     // Build this early so it's available for the interpreter.
     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
+
+    if (UseCRC32Intrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+    }
   }
 
 
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,3 +52,98 @@
 address StubRoutines::Sparc::_flush_callers_register_windows_entry = CAST_FROM_FN_PTR(address, bootstrap_flush_windows);
 
 address StubRoutines::Sparc::_partial_subtype_check = NULL;
+
+uint64_t StubRoutines::Sparc::_crc_by128_masks[] =
+{
+  /* The fields in this structure are arranged so that they can be
+   * picked up two at a time with 128-bit loads.
+   *
+   * Because of flipped bit order for this CRC polynomials
+   * the constant for X**N is left-shifted by 1.  This is because
+   * a 64 x 64 polynomial multiply produces a 127-bit result
+   * but the highest term is always aligned to bit 0 in the container.
+   * Pre-shifting by one fixes this, at the cost of potentially making
+   * the 32-bit constant no longer fit in a 32-bit container (thus the
+   * use of uint64_t, though this is also the size used by the carry-
+   * less multiply instruction.
+   *
+   * In addition, the flipped bit order and highest-term-at-least-bit
+   * multiply changes the constants used.  The 96-bit result will be
+   * aligned to the high-term end of the target 128-bit container,
+   * not the low-term end; that is, instead of a 512-bit or 576-bit fold,
+   * instead it is a 480 (=512-32) or 544 (=512+64-32) bit fold.
+   *
+   * This cause additional problems in the 128-to-64-bit reduction; see the
+   * code for details.  By storing a mask in the otherwise unused half of
+   * a 128-bit constant, bits can be cleared before multiplication without
+   * storing and reloading.  Note that staying on a 128-bit datapath means
+   * that some data is uselessly stored and some unused data is intersected
+   * with an irrelevant constant.
+   */
+
+  ((uint64_t) 0xffffffffUL),     /* low  of K_M_64    */
+  ((uint64_t) 0xb1e6b092U << 1), /* high of K_M_64    */
+  ((uint64_t) 0xba8ccbe8U << 1), /* low  of K_160_96  */
+  ((uint64_t) 0x6655004fU << 1), /* high of K_160_96  */
+  ((uint64_t) 0xaa2215eaU << 1), /* low  of K_544_480 */
+  ((uint64_t) 0xe3720acbU << 1)  /* high of K_544_480 */
+};
+
+/**
+ *  crc_table[] from jdk/src/java.base/share/native/libzip/zlib-1.2.8/crc32.h
+ */
+juint StubRoutines::Sparc::_crc_table[] =
+{
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+};
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -53,6 +53,9 @@
   static address _flush_callers_register_windows_entry;
 
   static address _partial_subtype_check;
+  // masks and table for CRC32
+  static uint64_t _crc_by128_masks[];
+  static juint    _crc_table[];
 
  public:
   // test assembler stop routine by setting registers
@@ -65,6 +68,8 @@
   static intptr_t* (*flush_callers_register_windows_func())() { return CAST_TO_FN_PTR(intptr_t* (*)(void), _flush_callers_register_windows_entry); }
 
   static address partial_subtype_check()                  { return _partial_subtype_check; }
+
+  static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
 };
 
 #endif // CPU_SPARC_VM_STUBROUTINES_SPARC_HPP
--- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -803,6 +803,106 @@
   return NULL;
 }
 
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    Label L_slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
+    __ set(SafepointSynchronize::_not_synchronized, O3);
+    __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
+
+    // Load parameters
+    const Register crc   = O0; // initial crc
+    const Register val   = O1; // byte to update with
+    const Register table = O2; // address of 256-entry lookup table
+
+    __ ldub(Gargs, 3, val);
+    __ lduw(Gargs, 8, crc);
+
+    __ set(ExternalAddress(StubRoutines::crc_table_addr()), table);
+
+    __ not1(crc); // ~crc
+    __ clruwu(crc);
+    __ update_byte_crc32(crc, val, table);
+    __ not1(crc); // ~crc
+
+    // result in O0
+    __ retl();
+    __ delayed()->nop();
+
+    // generate a vanilla native entry as the slow path
+    __ bind(L_slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
+    return entry;
+  }
+  return NULL;
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    Label L_slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    ExternalAddress state(SafepointSynchronize::address_of_state());
+    __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2);
+    __ set(SafepointSynchronize::_not_synchronized, O3);
+    __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path);
+
+    // Load parameters from the stack
+    const Register crc    = O0; // initial crc
+    const Register buf    = O1; // source java byte array address
+    const Register len    = O2; // len
+    const Register offset = O3; // offset
+
+    // Arguments are reversed on java expression stack
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ lduw(Gargs, 0,  len);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 32, crc);
+      __ add(buf, offset, buf);
+    } else {
+      __ lduw(Gargs, 0,  len);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 24, crc);
+      __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
+      __ add(buf ,offset, buf);
+    }
+
+    // Call the crc32 kernel
+    __ MacroAssembler::save_thread(L7_thread_cache);
+    __ kernel_crc32(crc, buf, len, O3);
+    __ MacroAssembler::restore_thread(L7_thread_cache);
+
+    // result in O0
+    __ retl();
+    __ delayed()->nop();
+
+    // generate a vanilla native entry as the slow path
+    __ bind(L_slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
+    return entry;
+  }
+  return NULL;
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -229,35 +229,35 @@
 
   // SPARC T4 and above should have support for AES instructions
   if (has_aes()) {
-    if (UseVIS > 2) { // AES intrinsics use MOVxTOd/MOVdTOx which are VIS3
-      if (FLAG_IS_DEFAULT(UseAES)) {
-        FLAG_SET_DEFAULT(UseAES, true);
+    if (FLAG_IS_DEFAULT(UseAES)) {
+      FLAG_SET_DEFAULT(UseAES, true);
+    }
+    if (!UseAES) {
+      if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+        warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
       }
-      if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
-        FLAG_SET_DEFAULT(UseAESIntrinsics, true);
-      }
-      // we disable both the AES flags if either of them is disabled on the command line
-      if (!UseAES || !UseAESIntrinsics) {
-        FLAG_SET_DEFAULT(UseAES, false);
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    } else {
+      // The AES intrinsic stubs require AES instruction support (of course)
+      // but also require VIS3 mode or higher for instructions it use.
+      if (UseVIS > 2) {
+        if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+        }
+      } else {
+        if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+          warning("SPARC AES intrinsics require VIS3 instructions. Intrinsics will be disabled.");
+        }
         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
       }
-    } else {
-        if (UseAES || UseAESIntrinsics) {
-          warning("SPARC AES intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
-          if (UseAES) {
-            FLAG_SET_DEFAULT(UseAES, false);
-          }
-          if (UseAESIntrinsics) {
-            FLAG_SET_DEFAULT(UseAESIntrinsics, false);
-          }
-        }
     }
   } else if (UseAES || UseAESIntrinsics) {
-    warning("AES instructions are not available on this CPU");
-    if (UseAES) {
+    if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
+      warning("AES instructions are not available on this CPU");
       FLAG_SET_DEFAULT(UseAES, false);
     }
-    if (UseAESIntrinsics) {
+    if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      warning("AES intrinsics are not available on this CPU");
       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
     }
   }
@@ -347,6 +347,15 @@
     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
   }
 
+  if (UseVIS > 2) {
+    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+      FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
+    }
+  } else if (UseCRC32Intrinsics) {
+    warning("SPARC CRC32 intrinsics require VIS3 insructions support. Intriniscs will be disabled");
+    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+  }
+
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;
@@ -358,7 +367,6 @@
     FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
   }
 
-#ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
     tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
@@ -391,7 +399,6 @@
       tty->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
     }
   }
-#endif // PRODUCT
 }
 
 void VM_Version::print_features() {
@@ -400,7 +407,7 @@
 
 int VM_Version::determine_features() {
   if (UseV8InstrsOnly) {
-    NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-V8");)
+    if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-V8"); }
     return generic_v8_m;
   }
 
@@ -416,12 +423,12 @@
     if (is_T_family(features)) {
       // Happy to accomodate...
     } else {
-      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Niagara");)
+      if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Niagara"); }
       features |= T_family_m;
     }
   } else {
     if (is_T_family(features) && !FLAG_IS_DEFAULT(UseNiagaraInstrs)) {
-      NOT_PRODUCT(if (PrintMiscellaneous && Verbose) tty->print_cr("Version is Forced-Not-Niagara");)
+      if (PrintMiscellaneous && Verbose) { tty->print_cr("Version is Forced-Not-Niagara"); }
       features &= ~(T_family_m | T1_model_m);
     } else {
       // Happy to accomodate...
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -313,7 +313,7 @@
     switch (cur_tuple_type) {
     case EVEX_FV:
       if ((cur_encoding & VEX_W) == VEX_W) {
-        mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+        mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
       } else {
         mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       }
@@ -394,25 +394,27 @@
   int mod_idx = 0;
   // We will test if the displacement fits the compressed format and if so
   // apply the compression to the displacment iff the result is8bit.
-  if (VM_Version::supports_evex() && _is_evex_instruction) {
-    switch (_tuple_type) {
+  if (VM_Version::supports_evex() && (_attributes != NULL) && _attributes->is_evex_instruction()) {
+    int evex_encoding = _attributes->get_evex_encoding();
+    int tuple_type = _attributes->get_tuple_type();
+    switch (tuple_type) {
     case EVEX_FV:
-      if ((_evex_encoding & VEX_W) == VEX_W) {
-        mod_idx += 2 + ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      if ((evex_encoding & VEX_W) == VEX_W) {
+        mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 3 : 2;
       } else {
-        mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+        mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       }
       break;
 
     case EVEX_HV:
-      mod_idx = ((_evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
       break;
 
     case EVEX_FVM:
       break;
 
     case EVEX_T1S:
-      switch (_input_size_in_bits) {
+      switch (_attributes->get_input_size()) {
       case EVEX_8bit:
         break;
 
@@ -433,7 +435,7 @@
     case EVEX_T1F:
     case EVEX_T2:
     case EVEX_T4:
-      mod_idx = (_input_size_in_bits == EVEX_64bit) ? 1 : 0;
+      mod_idx = (_attributes->get_input_size() == EVEX_64bit) ? 1 : 0;
       break;
 
     case EVEX_T8:
@@ -459,8 +461,9 @@
       break;
     }
 
-    if (_avx_vector_len >= AVX_128bit && _avx_vector_len <= AVX_512bit) {
-      int disp_factor = tuple_table[_tuple_type + mod_idx][_avx_vector_len];
+    int vector_len = _attributes->get_vector_len();
+    if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
+      int disp_factor = tuple_table[tuple_type + mod_idx][vector_len];
       if ((disp % disp_factor) == 0) {
         int new_disp = disp / disp_factor;
         if (is8bit(new_disp)) {
@@ -591,7 +594,6 @@
       emit_data(disp, rspec, disp32_operand);
     }
   }
-  _is_evex_instruction = false;
 }
 
 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
@@ -770,7 +772,7 @@
     case 0x55: // andnps
     case 0x56: // orps
     case 0x57: // xorps
-    case 0x59: //mulpd
+    case 0x59: // mulpd
     case 0x6E: // movd
     case 0x7E: // movd
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
@@ -1234,51 +1236,53 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDE);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDE);
   emit_int8(0xC0 | encode);
 }
@@ -1286,16 +1290,16 @@
 void Assembler::aesdeclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDF);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38,  /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDF);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1303,16 +1307,16 @@
 void Assembler::aesenc(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDC);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDC);
   emit_int8(0xC0 | encode);
 }
@@ -1320,16 +1324,16 @@
 void Assembler::aesenclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit,  /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDD);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xDD);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1361,15 +1365,17 @@
 
 void Assembler::andnl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andnl(Register dst, Register src1, Address src2) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(dst, src1, src2);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -1396,45 +1402,51 @@
 
 void Assembler::blsil(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsil(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rbx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsmskl(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rdx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsrl(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_legacy(rcx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -1581,36 +1593,38 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);;
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_operand(dst, src);
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_operand(dst, src);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cpuid() {
@@ -1699,100 +1713,113 @@
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE6);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5B);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1F;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VM_Version::supports_evex());
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-    emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2A);
+  emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5A);
+  emit_operand(dst, src);
 }
 
 
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1807,36 +1834,38 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::emms() {
@@ -2082,36 +2111,26 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512novl()) {
-    int vector_len = AVX_512bit;
-    int dst_enc = dst->encoding();
-    int src_enc = src->encoding();
-    int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F,
-                                       /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
-    emit_int8(0x28);
-    emit_int8((unsigned char)(0xC0 | encode));
-  } else if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
-  }
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x28);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_avx512novl()) {
-    int vector_len = AVX_512bit;
-    int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, vector_len);
-    emit_int8(0x28);
-    emit_int8((unsigned char)(0xC0 | encode));
-  } else {
-    emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
-  }
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x28);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2125,39 +2144,53 @@
 }
 
 void Assembler::movddup(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, VEX_OPCODE_0F,
-                                      /* rex_w */ VM_Version::supports_evex(), AVX_128bit, /* legacy_mode */ false);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x12);
   emit_int8(0xC0 | encode);
-
+}
+
+void Assembler::kmovwl(KRegister dst, Register src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x92);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovdl(KRegister dst, Register src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x92);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::kmovql(KRegister dst, KRegister src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
-                                      /* no_mask_reg */ true, VEX_OPCODE_0F, /* rex_w */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::kmovql(KRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int dst_enc = dst->encoding();
-  int nds_enc = 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
-             VEX_OPCODE_0F, /* vex_w */  true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand((Register)dst, src);
 }
 
 void Assembler::kmovql(Address dst, KRegister src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int src_enc = src->encoding();
-  int nds_enc = 0;
-  vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
-             VEX_OPCODE_0F, /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x90);
   emit_operand((Register)src, dst);
 }
@@ -2165,24 +2198,45 @@
 void Assembler::kmovql(KRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
   VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* legacy_mode */ !_legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_bw, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0x92);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::kmovdl(KRegister dst, Register src) {
+// This instruction produces ZF or CF flags
+void Assembler::kortestbl(KRegister src1, KRegister src2) {
+  NOT_LP64(assert(VM_Version::supports_avx512dq(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestwl(KRegister src1, KRegister src2) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, /* no_mask_reg */ true);
-  emit_int8((unsigned char)0x92);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::kmovwl(KRegister dst, Register src) {
-  NOT_LP64(assert(VM_Version::supports_evex(), ""));
-  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
-  emit_int8((unsigned char)0x92);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestdl(KRegister src1, KRegister src2) {
+  NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// This instruction produces ZF or CF flags
+void Assembler::kortestql(KRegister src1, KRegister src2) {
+  NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x98);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
@@ -2205,190 +2259,231 @@
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_operand(dst, src);
 }
 
 void Assembler::movdl(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_reg_mask */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_operand(src, dst);
 }
 
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdqa(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 // Move Unaligned 256bit Vector
 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  int vector_len = AVX_256bit;
-  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::vmovdqu(Address dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  int vector_len = AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   // swap src<->dst for encoding
   assert(src != xnoreg, "sanity");
-  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
 // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
-void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
-void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  InstructionMark im(this);
+void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  // swap src<->dst for encoding
-  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
 
+void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  assert(src != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x7F);
+  emit_operand(src, dst);
+}
+void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  assert(src != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x7F);
+  emit_operand(src, dst);
+}
+
 void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 0, "");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 2, "");
-  InstructionMark im(this);
-  _tuple_type = EVEX_FVM;
-  vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 2, "");
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
-  _tuple_type = EVEX_FVM;
-  // swap src<->dst for encoding
-  vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
@@ -2434,13 +2529,12 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-    emit_simd_arith_q(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x12, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x12);
+  emit_operand(dst, src);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -2466,13 +2560,9 @@
 void Assembler::movq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, /* no_mask_reg */ true);
-  } else {
-    simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_operand(dst, src);
 }
@@ -2480,14 +2570,9 @@
 void Assembler::movq(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix(src, xnoreg, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                VEX_OPCODE_0F, /* rex_w */ true);
-  } else {
-    simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD6);
   emit_operand(src, dst);
 }
@@ -2510,60 +2595,56 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_operand(dst, src);
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
-  } else {
-    simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, /* no_mask_reg */ false);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x10);
+  emit_operand(dst, src);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(src, xnoreg, dst, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x11);
   emit_operand(src, dst);
 }
@@ -2655,36 +2736,38 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::negl(Register dst) {
@@ -2985,28 +3068,35 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_operand(dst, src);
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "some form of AVX must be enabled");
-  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x67);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, vector_len);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x00);
   emit_int8(0xC0 | encode);
   emit_int8(imm8);
@@ -3020,8 +3110,8 @@
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_3A,
-              /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x61);
   emit_operand(dst, src);
   emit_int8(imm8);
@@ -3029,46 +3119,162 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x61);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
 }
 
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x74);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x74);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx512bw(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x74);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
 void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x75, dst, src, VEX_SIMD_66,
-                  false, (VM_Version::supports_avx512dq() == false));
-}
-
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
 void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  assert(UseAVX > 0, "some form of AVX must be enabled");
-  emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len,
-                 false, (VM_Version::supports_avx512dq() == false));
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx512bw(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x75);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x76);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x76);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x76);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse4_1(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
+void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// In this context, kdst is written the mask used to process the equal components
+void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int dst_enc = kdst->encoding();
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x29);
+  emit_operand(as_Register(dst_enc), src);
 }
 
 void Assembler::pmovmskb(Register dst, XMMRegister src) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F,
-                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD7);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmovmskb(Register dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66,
-                                     vector_len, VEX_OPCODE_0F, true, false);
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xD7);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3076,8 +3282,8 @@
 
 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */  true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3085,8 +3291,8 @@
 
 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC5);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3094,8 +3300,8 @@
 
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3103,8 +3309,8 @@
 
 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ true, AVX_128bit, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3112,8 +3318,8 @@
 
 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC4);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -3121,29 +3327,29 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_HVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_operand(dst, src);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::vpmovzxbw(XMMRegister dst, Address src) {
+void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
-  bool vector256 = true;
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_operand(dst, src);
 }
@@ -3246,43 +3452,41 @@
 
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pshufb(XMMRegister dst, Address src) {
   assert(VM_Version::supports_ssse3(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
   emit_operand(dst, src);
 }
 
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
-  _instruction_uses_vl = true;
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_128bit;
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x70);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(mode & 0xFF);
 }
 
 void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
-  _instruction_uses_vl = true;
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, /* no_mask_reg */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -3291,7 +3495,10 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x70);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(mode & 0xFF);
 }
 
@@ -3299,12 +3506,10 @@
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false,
-              VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -3313,9 +3518,9 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   // XMM3 is for /3 encoding: 66 0F 73 /3 ib
-  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
@@ -3324,9 +3529,9 @@
 void Assembler::pslldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
-  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, /* no_mask_reg */ true,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
@@ -3336,16 +3541,16 @@
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-              VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3353,20 +3558,18 @@
 void Assembler::vptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
-  int vector_len = AVX_256bit;
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* rex_w */ false,
-             vector_len, /* legacy_mode  */ true, /* no_mask_reg */ false);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3374,42 +3577,47 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x60);
+  emit_operand(dst, src);
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_vlbw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x60);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x62);
+  emit_operand(dst, src);
 }
 
 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x62);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x6C, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::push(int32_t imm32) {
@@ -3454,16 +3662,18 @@
 
 void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x53);
-  emit_int8(0xC0 | encode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x53);
-  emit_int8(0xC0 | encode);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::rdtsc() {
@@ -3622,27 +3832,28 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::std() {
@@ -3651,11 +3862,12 @@
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::stmxcsr( Address dst) {
@@ -3705,38 +3917,38 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-  }
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::testb(Register dst, int imm8) {
@@ -3765,7 +3977,7 @@
   emit_arith(0x85, 0xC0, dst, src);
 }
 
-void Assembler::testl(Register dst, Address  src) {
+void Assembler::testl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
   emit_int8((unsigned char)0x85);
@@ -3792,36 +4004,38 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
-  } else {
-    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_operand(dst, src);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x2E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xabort(int8_t imm8) {
@@ -3903,138 +4117,162 @@
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 //====================VECTOR ARITHMETIC=====================================
@@ -4042,414 +4280,433 @@
 // Float-point vector arithmetic
 
 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::addps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x58);
+  emit_operand(dst, src);
 }
 
 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::subps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5C);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::mulpd(XMMRegister dst, Address src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x59);
+  emit_operand(dst, src);
 }
 
 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::divps(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x5E);
+  emit_operand(dst, src);
 }
 
 void Assembler::vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vsqrtpd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x51, dst, xnoreg, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x51);
+  emit_operand(dst, src);
 }
 
 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::andpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false,  /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x54);
+  emit_operand(dst, src);
 }
 
 void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x15);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x14);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::xorpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_avx512dq()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ true);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x57);
+  emit_operand(dst, src);
 }
 
 // Integer vector arithmetic
 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -4457,280 +4714,324 @@
 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38, /* legacy_mode */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_38, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFC);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFD);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD4);
+  emit_operand(dst, src);
 }
 
 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psubd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::psubq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
-}
-
-void Assembler::psubq(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF8);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF9);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFA);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
-  }
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
-                                      /* no_mask_reg */ false, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(UseAVX > 2, "requires some form of AVX");
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FVM;
-  }
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD5);
+  emit_operand(dst, src);
 }
 
 void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
-             VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_operand(dst, src);
 }
 
 void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_64bit;
-  }
-  InstructionMark im(this);
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
-             VEX_OPCODE_0F_38, /* vex_w */ true, vector_len, /* legacy_mode */ _legacy_mode_dq);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x40);
   emit_operand(dst, src);
 }
@@ -4738,29 +5039,29 @@
 // Shift packed integers left by specified number of bits.
 void Assembler::psllw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F,
-                                      /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::pslld(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psllq(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ true);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4768,102 +5069,111 @@
 
 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
-  }
+  int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xF3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Shift packed integers logically right by specified number of bits.
 void Assembler::psrlw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrld(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrlq(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   // Do not confuse it with psrldq SSE2 instruction which
   // shifts 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ VM_Version::supports_evex());
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4871,89 +5181,98 @@
 
 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
-  } else {
-    emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
-  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
-  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
-  }
+  int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
-  } else {
-    emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
-  }
+  InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xD3);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // Shift packed integers arithmetically right by specified number of bits.
 void Assembler::psraw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::psrad(XMMRegister dst, int shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, /* no_mask_reg */ false);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -4961,128 +5280,157 @@
 
 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x71);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
+  int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x72);
+  emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
 }
 
 void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_bw);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE1);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xE2);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 
 // logical operations packed integers
 void Assembler::pand(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pandn(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
-  }
-  else {
-    emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::por(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEB);
+  emit_operand(dst, src);
 }
 
 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(UseAVX > 0, "requires some form of AVX");
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_FV;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xEF);
+  emit_operand(dst, src);
 }
 
 
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5090,45 +5438,38 @@
   emit_int8(0x01);
 }
 
-void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
+  emit_int8(value & 0x01);
+}
+
+void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ true, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_operand(dst, src);
+  // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into q0 128 bits (0..127)
@@ -5139,15 +5480,14 @@
 }
 
 void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
-  assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
+  assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x00 - insert into q0 128 bits (0..127)
@@ -5159,17 +5499,13 @@
 
 void Assembler::vinsertf128h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
@@ -5178,11 +5514,9 @@
 
 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5192,16 +5526,12 @@
 
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
@@ -5210,11 +5540,10 @@
 
 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5222,34 +5551,27 @@
   emit_int8(0x01);
 }
 
-void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_reg_mask */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vinserti128h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
@@ -5258,11 +5580,9 @@
 
 void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    vector_len = AVX_512bit;
-  }
-  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -5272,48 +5592,33 @@
 
 void Assembler::vextracti128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  int vector_len = AVX_256bit;
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T4;
-    _input_size_in_bits = EVEX_32bit;
-    vector_len = AVX_512bit;
-  }
-  InstructionMark im(this);
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
   emit_int8(0x01);
 }
 
-void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
+void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x3B);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode;
-  if (VM_Version::supports_avx512dq()) {
-    encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                   /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
-  } else {
-    encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                   /* vex_w */ false, vector_len, /* legacy_mode */ true, /* no_mask_reg */ false);
-  }
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x01 - extract from bits 255:128
@@ -5322,42 +5627,36 @@
   emit_int8(value & 0x3);
 }
 
-void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
+void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
+  emit_int8(value & 0x1);
+}
+
+void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-             /* vex_w */ true, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_operand(src, dst);
+  // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(0x01);
+  emit_int8(value & 0x01);
 }
 
 void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
-  assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_avx(), "");
+  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - extract from bits 127:0
@@ -5369,13 +5668,11 @@
 
 void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  _tuple_type = EVEX_T4;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
-  int vector_len = AVX_512bit;
   assert(src != xnoreg, "sanity");
-  int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, /* vex_w */ false, vector_len);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
   // 0x00 - extract from bits 127:0
@@ -5387,11 +5684,8 @@
 
 void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
   assert(VM_Version::supports_evex(), "");
-  int vector_len = AVX_512bit;
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ !_legacy_mode_dq, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x01 - extract from bits 255:128
@@ -5402,10 +5696,9 @@
 
 // duplicate 4-bytes integer data from src into 8 locations in dest
 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_avx2(), "");
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5413,189 +5706,170 @@
 // duplicate 2-bytes integer data from src into 16 locations in dest
 void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  bool vector_len = AVX_256bit;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
-                                     vector_len, VEX_OPCODE_0F_38, false);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x78);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_8bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x78);
   emit_operand(dst, src);
 }
 
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_16bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_operand(dst, src);
 }
 
 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_38);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_operand(dst, src);
 }
 
 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /* no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x59);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
+  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x59);
   emit_operand(dst, src);
 }
 
 // duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_32bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ false, vector_len);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
 }
 
 // duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /*vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  assert(VM_Version::supports_evex(), "");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
-  _instruction_uses_vl = true;
-  assert(UseAVX > 1, "");
-  _tuple_type = EVEX_T1S;
-  _input_size_in_bits = EVEX_64bit;
-  InstructionMark im(this);
+  assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
-  int dst_enc = dst->encoding();
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, /* vex_w */ true, vector_len);
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x19);
   emit_operand(dst, src);
 }
 
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /*vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7B);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ false, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 // duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
-  _instruction_uses_vl = true;
   assert(VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38,
-                                     /* vex_w */ true, vector_len, /* legacy_mode */ false, /*no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x7C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5603,8 +5877,8 @@
 // Carry-Less Multiplication Quadword
 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
   assert(VM_Version::supports_clmul(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, /* no_mask_reg */ false,
-                                      VEX_OPCODE_0F_3A, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -5613,8 +5887,9 @@
 // Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
-  int vector_len = AVX_128bit;
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* legacy_mode */ true);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -5622,11 +5897,9 @@
 
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
-  if (UseAVX < 3)
-  {
-    (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
-    emit_int8(0x77);
-  }
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  (void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x77);
 }
 
 
@@ -6130,8 +6403,7 @@
   if (pre > 0) {
     emit_int8(simd_pre[pre]);
   }
-  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) :
-                          prefix_and_encode(dst_enc, src_enc);
+  int encode = (rex_w) ? prefixq_and_encode(dst_enc, src_enc) : prefix_and_encode(dst_enc, src_enc);
   if (opc > 0) {
     emit_int8(0x0F);
     int opc2 = simd_opc[opc];
@@ -6143,7 +6415,9 @@
 }
 
 
-void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc) {
+  int vector_len = _attributes->get_vector_len();
+  bool vex_w = _attributes->is_rex_vex_w();
   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
     prefix(VEX_3bytes);
 
@@ -6167,13 +6441,13 @@
 }
 
 // This is a 4 byte encoding
-void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
-                            int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                            bool is_extended_context, bool is_merge_context,
-                            int vector_len, bool no_mask_reg ){
+void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v, int nds_enc, VexSimdPrefix pre, VexOpcode opc){
   // EVEX 0x62 prefix
   prefix(EVEX_4bytes);
-  _evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
+  bool vex_w = _attributes->is_rex_vex_w();
+  int evex_encoding = (vex_w ? VEX_W : 0);
+  // EVEX.b is not currently used for broadcast of single element or data rounding modes
+  _attributes->set_evex_encoding(evex_encoding);
 
   // P0: byte 2, initialized to RXBR`00mm
   // instead of not'd
@@ -6195,214 +6469,127 @@
   emit_int8(byte3);
 
   // P2: byte 4 as zL'Lbv'aaa
-  int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+  int byte4 = (_attributes->is_no_reg_mask()) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
   // EVEX.v` for extending EVEX.vvvv or VIDX
   byte4 |= (evex_v ? 0: EVEX_V);
   // third EXEC.b for broadcast actions
-  byte4 |= (is_extended_context ? EVEX_Rb : 0);
+  byte4 |= (_attributes->is_extended_context() ? EVEX_Rb : 0);
   // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
-  byte4 |= ((vector_len) & 0x3) << 5;
+  byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
   // last is EVEX.z for zero/merge actions
-  byte4 |= (is_merge_context ? EVEX_Z : 0);
+  byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
   emit_int8(byte4);
 }
 
-void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
-                           VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
   bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
   bool vex_b = adr.base_needs_rex();
   bool vex_x = adr.index_needs_rex();
-  _avx_vector_len = vector_len;
+  set_attributes(attributes);
+  attributes->set_current_assembler(this);
 
   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (_legacy_mode_vl && _instruction_uses_vl) {
-    switch (vector_len) {
+  if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) {
+    switch (attributes->get_vector_len()) {
     case AVX_128bit:
     case AVX_256bit:
-      legacy_mode = true;
+      attributes->set_is_legacy_mode();
       break;
     }
   }
 
-  if ((UseAVX > 2) && (legacy_mode == false))
+  if ((UseAVX > 2) && !attributes->is_legacy_mode())
   {
     bool evex_r = (xreg_enc >= 16);
     bool evex_v = (nds_enc >= 16);
-    _is_evex_instruction = true;
-    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+    attributes->set_is_evex_instruction();
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
   } else {
-    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+    vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
   }
-  _instruction_uses_vl = false;
-}
-
-int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
-                                     bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes) {
   bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
   bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
   bool vex_x = false;
-  _avx_vector_len = vector_len;
+  set_attributes(attributes);
+  attributes->set_current_assembler(this);
 
   // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (_legacy_mode_vl && _instruction_uses_vl) {
-    switch (vector_len) {
+  if ((UseAVX > 2) && _legacy_mode_vl && attributes->uses_vl()) {
+    switch (attributes->get_vector_len()) {
     case AVX_128bit:
     case AVX_256bit:
-      legacy_mode = true;
+      if ((dst_enc >= 16) | (nds_enc >= 16) | (src_enc >= 16)) {
+        // up propagate arithmetic instructions to meet RA requirements
+        attributes->set_vector_len(AVX_512bit);
+      } else {
+        attributes->set_is_legacy_mode();
+      }
       break;
     }
   }
 
-  if ((UseAVX > 2) && (legacy_mode == false))
+  if ((UseAVX > 2) && !attributes->is_legacy_mode())
   {
     bool evex_r = (dst_enc >= 16);
     bool evex_v = (nds_enc >= 16);
     // can use vex_x as bank extender on rm encoding
     vex_x = (src_enc >= 16);
-    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+    attributes->set_is_evex_instruction();
+    evex_prefix(vex_r, vex_b, vex_x, evex_r, evex_v, nds_enc, pre, opc);
   } else {
-    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+    vex_prefix(vex_r, vex_b, vex_x, nds_enc, pre, opc);
   }
 
-  _instruction_uses_vl = false;
-
   // return modrm byte components for operands
   return (((dst_enc & 7) << 3) | (src_enc & 7));
 }
 
 
 void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
-                            bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
+                            VexOpcode opc, InstructionAttr *attributes) {
   if (UseAVX > 0) {
     int xreg_enc = xreg->encoding();
-    int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, attributes);
   } else {
     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
-    rex_prefix(adr, xreg, pre, opc, rex_w);
+    rex_prefix(adr, xreg, pre, opc, attributes->is_rex_vex_w());
   }
 }
 
 int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int dst_enc = dst->encoding();
   int src_enc = src->encoding();
   if (UseAVX > 0) {
     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, attributes);
   } else {
     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
-    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
+    return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, attributes->is_rex_vex_w());
   }
 }
 
 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
+  return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
 }
 
 int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
-                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
+                                      VexOpcode opc, InstructionAttr *attributes) {
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
-}
-
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, pre, no_mask_reg);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-// Versions with no second source register (non-destructive source).
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
-  InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
-  InstructionMark im(this);
-  simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-// 3-operands AVX instructions
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
-                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                                 Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
-  InstructionMark im(this);
-  vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
-  emit_int8(opcode);
-  emit_operand(dst, src);
-}
-
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
-                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
-  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, legacy_mode, no_mask_reg);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
-                                 VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
-  int src_enc = src->encoding();
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
-  emit_int8(opcode);
-  emit_int8((unsigned char)(0xC0 | encode));
+  return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
 }
 
 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F, /* no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC2);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)(0xF & cop));
@@ -6411,7 +6598,9 @@
 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src1, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* no_mask_reg */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x4B);
   emit_int8((unsigned char)(0xC0 | encode));
   int src2_enc = src2->encoding();
@@ -6430,7 +6619,7 @@
   leal(dst, src);
 }
 
-void Assembler::mov_literal32(Address dst, int32_t imm32,  RelocationHolder const& rspec) {
+void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) {
   InstructionMark im(this);
   emit_int8((unsigned char)0xC7);
   emit_operand(rax, dst);
@@ -6948,15 +7137,17 @@
 
 void Assembler::andnq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andnq(Register dst, Register src1, Address src2) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(dst, src1, src2);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src2, src1->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -6983,45 +7174,51 @@
 
 void Assembler::blsiq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rbx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsiq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rbx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rbx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rdx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsmskq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rdx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rdx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(rcx->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::blsrq(Register dst, Address src) {
-  InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q_legacy(rcx, dst, src);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  vex_prefix(src, dst->encoding(), rcx->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -7095,45 +7292,44 @@
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F2, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  if (VM_Version::supports_evex()) {
-    _tuple_type = EVEX_T1S;
-    _input_size_in_bits = EVEX_32bit;
-  }
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ true);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+  simd_prefix(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7316,7 +7512,8 @@
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, /* no_mask_reg */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7324,8 +7521,9 @@
 void Assembler::movdq(Register dst, XMMRegister src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, /* no_mask_reg */ true);
+  int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7458,8 +7656,8 @@
 
 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38,
-                                    /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF6);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -7621,8 +7819,8 @@
 
 void Assembler::rorxq(Register dst, Register src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A,
-                                     /* vex_w */ true, AVX_128bit, /* legacy_mode */ true, /* no_mask_reg */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0xF0);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -438,6 +438,8 @@
 
 };
 
+class InstructionAttr;
+
 // 64-bit refect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
 // See fxsave and xsave(EVEX enabled) documentation for layout
 const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize);
@@ -568,7 +570,8 @@
     EVEX_8bit  = 0,
     EVEX_16bit = 1,
     EVEX_32bit = 2,
-    EVEX_64bit = 3
+    EVEX_64bit = 3,
+    EVEX_NObit = 4
   };
 
   enum WhichOperand {
@@ -598,16 +601,12 @@
 
 private:
 
-  int _evex_encoding;
-  int _input_size_in_bits;
-  int _avx_vector_len;
-  int _tuple_type;
-  bool _is_evex_instruction;
   bool _legacy_mode_bw;
   bool _legacy_mode_dq;
   bool _legacy_mode_vl;
   bool _legacy_mode_vlbw;
-  bool _instruction_uses_vl;
+
+  class InstructionAttr *_attributes;
 
   // 64bit prefixes
   int prefix_and_encode(int reg_enc, bool byteinst = false);
@@ -637,181 +636,30 @@
   int  rex_prefix_and_encode(int dst_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc, bool rex_w);
 
-  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
-                  int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                  int vector_len);
-
-  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
-                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                   bool is_extended_context, bool is_merge_context,
-                   int vector_len, bool no_mask_reg );
+  void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
+
+  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, bool evex_v,
+                   int nds_enc, VexSimdPrefix pre, VexOpcode opc);
 
   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
                   VexSimdPrefix pre, VexOpcode opc,
-                  bool vex_w, int vector_len,
-                  bool legacy_mode = false, bool no_mask_reg = false);
-
-  void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
-                  VexSimdPrefix pre, int vector_len = AVX_128bit,
-                  bool no_mask_reg = false, bool legacy_mode = false) {
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
-  }
-
-  void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
-                    VexSimdPrefix pre, int vector_len = AVX_128bit,
-                    bool no_mask_reg = false) {
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
-  }
-
-  void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, true, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, no_mask_reg);
-  }
-
-  void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
-               vector_len, true, no_mask_reg);
-  }
+                  InstructionAttr *attributes);
 
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc,
-                             bool vex_w, int vector_len,
-                             bool legacy_mode, bool no_mask_reg);
-
-  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 false, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = false;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-      VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-      true, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 false, no_mask_reg);
-  }
-
-  int  vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
-    bool vex_w = true;
-    int vector_len = AVX_128bit;
-    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
-                                 true, no_mask_reg);
-  }
-
-  int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, int vector_len = AVX_128bit,
-                             VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
-                             bool no_mask_reg = false) {
-    int src_enc = src->encoding();
-    int dst_enc = dst->encoding();
-    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
-  }
-
-  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
-                   VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
-                   bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
-
-  void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
-                   bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
-    simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
-  }
-
-  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
-    simd_prefix(src, dst, pre, no_mask_reg);
-  }
-  void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
-                     VexSimdPrefix pre, bool no_mask_reg = false) {
-    bool rex_w = true;
-    simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
-  }
-
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit,
-                             bool legacy_mode = false);
-
-  int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit);
-
-  int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
-                             VexSimdPrefix pre, bool no_mask_reg,
-                             VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, int vector_len = AVX_128bit);
-
-  // Move/convert 32-bit integer value.
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
-                             VexSimdPrefix pre, bool no_mask_reg) {
-    // It is OK to cast from Register to XMMRegister to pass argument here
-    // since only encoding is used in simd_prefix_and_encode() and number of
-    // Gen and Xmm registers are the same.
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
-  }
-  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
-  }
-  int simd_prefix_and_encode(Register dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                             bool no_mask_reg = false) {
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
-  }
-
-  // Move/convert 64-bit integer value.
-  int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
-                               VexSimdPrefix pre, bool no_mask_reg = false) {
-    bool rex_w = true;
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
-  }
-  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
-    return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
-  }
-  int simd_prefix_and_encode_q(Register dst, XMMRegister src,
-                               VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                               bool no_mask_reg = false) {
-    bool rex_w = true;
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
-  }
+                             InstructionAttr *attributes);
+
+  void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
+                   VexOpcode opc, InstructionAttr *attributes);
+
+  int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
+
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
+
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
+                             VexOpcode opc, InstructionAttr *attributes);
 
   // Helper functions for groups of instructions
   void emit_arith_b(int op1, int op2, Register dst, int imm8);
@@ -821,27 +669,6 @@
   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
-  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
-  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      Address src, VexSimdPrefix pre, int vector_len,
-                      bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                        Address src, VexSimdPrefix pre, int vector_len,
-                        bool no_mask_reg = false);
-  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      XMMRegister src, VexSimdPrefix pre, int vector_len,
-                      bool no_mask_reg = false, bool legacy_mode = false);
-  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
-                        XMMRegister src, VexSimdPrefix pre, int vector_len,
-                        bool no_mask_reg = false);
-
   bool emit_compressed_disp_byte(int &disp);
 
   void emit_operand(Register reg,
@@ -986,18 +813,16 @@
   // belong in macro assembler but there is no need for both varieties to exist
 
   void init_attributes(void) {
-    _evex_encoding = 0;
-    _input_size_in_bits = 0;
-    _avx_vector_len = AVX_NoVec;
-    _tuple_type = EVEX_ETUP;
-    _is_evex_instruction = false;
     _legacy_mode_bw = (VM_Version::supports_avx512bw() == false);
     _legacy_mode_dq = (VM_Version::supports_avx512dq() == false);
     _legacy_mode_vl = (VM_Version::supports_avx512vl() == false);
     _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false);
-    _instruction_uses_vl = false;
+    _attributes = NULL;
   }
 
+  void set_attributes(InstructionAttr *attributes) { _attributes = attributes; }
+  void clear_attributes(void) { _attributes = NULL; }
+
   void lea(Register dst, Address src);
 
   void mov(Register dst, Register src);
@@ -1506,13 +1331,18 @@
 
   void movddup(XMMRegister dst, XMMRegister src);
 
+  void kmovwl(KRegister dst, Register src);
+  void kmovdl(KRegister dst, Register src);
   void kmovql(KRegister dst, KRegister src);
   void kmovql(KRegister dst, Register src);
-  void kmovdl(KRegister dst, Register src);
-  void kmovwl(KRegister dst, Register src);
   void kmovql(Address dst, KRegister src);
   void kmovql(KRegister dst, Address src);
 
+  void kortestbl(KRegister dst, KRegister src);
+  void kortestwl(KRegister dst, KRegister src);
+  void kortestdl(KRegister dst, KRegister src);
+  void kortestql(KRegister dst, KRegister src);
+
   void movdl(XMMRegister dst, Register src);
   void movdl(Register dst, XMMRegister src);
   void movdl(XMMRegister dst, Address src);
@@ -1537,6 +1367,12 @@
   void vmovdqu(XMMRegister dst, XMMRegister src);
 
    // Move Unaligned 512bit Vector
+  void evmovdqub(Address dst, XMMRegister src, int vector_len);
+  void evmovdqub(XMMRegister dst, Address src, int vector_len);
+  void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
+  void evmovdquw(Address dst, XMMRegister src, int vector_len);
+  void evmovdquw(XMMRegister dst, Address src, int vector_len);
+  void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
   void evmovdqul(Address dst, XMMRegister src, int vector_len);
   void evmovdqul(XMMRegister dst, Address src, int vector_len);
   void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
@@ -1682,8 +1518,22 @@
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
 
+  void pcmpeqb(XMMRegister dst, XMMRegister src);
+  void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+
   void pcmpeqw(XMMRegister dst, XMMRegister src);
   void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+
+  void pcmpeqd(XMMRegister dst, XMMRegister src);
+  void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+
+  void pcmpeqq(XMMRegister dst, XMMRegister src);
+  void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
+  void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
 
   void pmovmskb(Register dst, XMMRegister src);
   void vpmovmskb(Register dst, XMMRegister src);
@@ -1704,7 +1554,7 @@
   void pmovzxbw(XMMRegister dst, XMMRegister src);
   void pmovzxbw(XMMRegister dst, Address src);
 
-  void vpmovzxbw(XMMRegister dst, Address src);
+  void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
 
 #ifndef _LP64 // no 32bit push/pop on amd64
   void popl(Address dst);
@@ -2106,12 +1956,12 @@
   void vextracti128h(Address dst, XMMRegister src);
 
   // Copy low 256bit into high 256bit of ZMM registers.
-  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vextracti64x4h(XMMRegister dst, XMMRegister src);
-  void vextractf64x4h(XMMRegister dst, XMMRegister src);
-  void vextractf64x4h(Address dst, XMMRegister src);
-  void vinsertf64x4h(XMMRegister dst, Address src);
+  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
+  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
+  void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf64x4h(Address dst, XMMRegister src, int value);
+  void vinsertf64x4h(XMMRegister dst, Address src, int value);
 
   // Copy targeted 128bit segments of the ZMM registers
   void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
@@ -2173,4 +2023,95 @@
 
 };
 
+// The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
+// Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
+// are applied.
+class InstructionAttr {
+public:
+  InstructionAttr(
+    int vector_len,
+    bool rex_vex_w,
+    bool legacy_mode,
+    bool no_reg_mask,
+    bool uses_vl)
+    :
+      _avx_vector_len(vector_len),
+      _rex_vex_w(rex_vex_w),
+      _legacy_mode(legacy_mode),
+      _no_reg_mask(no_reg_mask),
+      _uses_vl(uses_vl),
+      _tuple_type(Assembler::EVEX_ETUP),
+      _input_size_in_bits(Assembler::EVEX_NObit),
+      _is_evex_instruction(false),
+      _evex_encoding(0),
+      _is_clear_context(false),
+      _is_extended_context(false),
+      _current_assembler(NULL) {
+    if (UseAVX < 3) _legacy_mode = true;
+  }
+
+  ~InstructionAttr() {
+    if (_current_assembler != NULL) {
+      _current_assembler->clear_attributes();
+    }
+    _current_assembler = NULL;
+  }
+
+private:
+  int  _avx_vector_len;
+  bool _rex_vex_w;
+  bool _legacy_mode;
+  bool _no_reg_mask;
+  bool _uses_vl;
+  int  _tuple_type;
+  int  _input_size_in_bits;
+  bool _is_evex_instruction;
+  int  _evex_encoding;
+  bool _is_clear_context;
+  bool _is_extended_context;
+
+  Assembler *_current_assembler;
+
+public:
+  // query functions for field accessors
+  int  get_vector_len(void) const { return _avx_vector_len; }
+  bool is_rex_vex_w(void) const { return _rex_vex_w; }
+  bool is_legacy_mode(void) const { return _legacy_mode; }
+  bool is_no_reg_mask(void) const { return _no_reg_mask; }
+  bool uses_vl(void) const { return _uses_vl; }
+  int  get_tuple_type(void) const { return _tuple_type; }
+  int  get_input_size(void) const { return _input_size_in_bits; }
+  int  is_evex_instruction(void) const { return _is_evex_instruction; }
+  int  get_evex_encoding(void) const { return _evex_encoding; }
+  bool is_clear_context(void) const { return _is_clear_context; }
+  bool is_extended_context(void) const { return _is_extended_context; }
+
+  // Set the vector len manually
+  void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
+
+  // Set the instruction to be encoded in AVX mode
+  void set_is_legacy_mode(void) { _legacy_mode = true; }
+
+  // Set the current instuction to be encoded as an EVEX instuction
+  void set_is_evex_instruction(void) { _is_evex_instruction = true; }
+
+  // Internal encoding data used in compressed immediate offset programming
+  void set_evex_encoding(int value) { _evex_encoding = value; }
+
+  // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
+  void set_is_clear_context(void) { _is_clear_context = true; }
+
+  // Map back to current asembler so that we can manage object level assocation
+  void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
+
+  // Address modifiers used for compressed displacement calculation
+  void set_address_attributes(int tuple_type, int input_size_in_bits) {
+    if (VM_Version::supports_evex()) {
+      _tuple_type = tuple_type;
+      _input_size_in_bits = input_size_in_bits;
+    }
+  }
+
+};
+
 #endif // CPU_X86_VM_ASSEMBLER_X86_HPP
--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -81,7 +81,8 @@
 
 void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
   __ bind(_entry);
-  ce->store_parameter(_method->as_register(), 1);
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  ce->store_parameter(m, 1);
   ce->store_parameter(_bci, 0);
   __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
   ce->add_call_info_here(_info);
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -2971,6 +2971,14 @@
 }
 
 
+void LIR_Assembler::store_parameter(Metadata* m,  int offset_from_rsp_in_words) {
+  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
+  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
+  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ mov_metadata(Address(rsp, offset_from_rsp_in_bytes), m);
+}
+
+
 // This code replaces a call to arraycopy; no exception may
 // be thrown in this code, they must be thrown in the System.arraycopy
 // activation frame; we could save some checks if this would not be the case
@@ -3711,7 +3719,7 @@
     if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
       __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
     }
-    if (UseAVX > 1) {
+    if (UseAVX > 0) {
       __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
                    ExternalAddress((address)float_signflip_pool));
     } else {
@@ -3722,7 +3730,7 @@
     if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
       __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
     }
-    if (UseAVX > 1) {
+    if (UseAVX > 0) {
       __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
                    ExternalAddress((address)double_signflip_pool));
     } else {
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,9 +49,10 @@
                            Register recv, Label* update_done);
 public:
 
-  void store_parameter(Register r, int offset_from_esp_in_words);
-  void store_parameter(jint c,     int offset_from_esp_in_words);
-  void store_parameter(jobject c,  int offset_from_esp_in_words);
+  void store_parameter(Register r,  int offset_from_esp_in_words);
+  void store_parameter(jint c,      int offset_from_esp_in_words);
+  void store_parameter(jobject c,   int offset_from_esp_in_words);
+  void store_parameter(Metadata* c, int offset_from_esp_in_words);
 
   enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28),
          exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -80,6 +80,7 @@
 LIR_Opr LIRGenerator::divOutOpr()       { return FrameMap::rax_opr; }
 LIR_Opr LIRGenerator::remOutOpr()       { return FrameMap::rdx_opr; }
 LIR_Opr LIRGenerator::shiftCountOpr()   { return FrameMap::rcx_opr; }
+LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
 LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::rax_opr; }
 LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
 
--- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -117,4 +117,7 @@
 
   void invalidate_registers(bool inv_rax, bool inv_rbx, bool inv_rcx, bool inv_rdx, bool inv_rsi, bool inv_rdi) PRODUCT_RETURN;
 
+  // This platform only uses signal-based null checks. The Label is not needed.
+  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
 #endif // CPU_X86_VM_C1_MACROASSEMBLER_X86_HPP
--- a/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/c2_globals_x86.hpp	Thu Nov 26 19:16:37 2015 +0100
@@ -84,6 +84,7 @@
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            true);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -58,6 +58,4 @@
       OptoReg::invalidate(i);
     }
   }
-
-  SuperWordLoopUnrollAnalysis = true;
 }
--- a/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -36,7 +36,7 @@
 #include "code/vmreg.hpp"
 #include "vmreg_x86.inline.hpp"
 
-jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
   if (inst->is_call() || inst->is_jump()) {
     assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size");
     return (pc_offset + NativeCall::instruction_size);
@@ -53,18 +53,17 @@
     return (offset);
   } else if (inst->is_call_reg()) {
     // the inlined vtable stub contains a "call register" instruction
-    assert(method != NULL, "only valid for virtual calls");
+    assert(method.not_null(), "only valid for virtual calls");
     return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset());
   } else if (inst->is_cond_jump()) {
     address pc = (address) (inst);
     return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc);
   } else {
-    fatal("unsupported type of instruction for call site");
-    return 0;
+    JVMCI_ERROR_0("unsupported type of instruction for call site");
   }
 }
 
-void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   Handle obj = HotSpotObjectConstantImpl::object(constant);
   jobject value = JNIHandles::make_local(obj());
@@ -75,7 +74,7 @@
     _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand);
     TRACE_jvmci_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
 #else
-    fatal("compressed oop on 32bit");
+    JVMCI_ERROR("compressed oop on 32bit");
 #endif
   } else {
     address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
@@ -85,19 +84,19 @@
   }
 }
 
-void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
   address pc = _instructions->start() + pc_offset;
   if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
 #ifdef _LP64
     address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
-    *((narrowKlass*) operand) = record_narrow_metadata_reference(constant);
+    *((narrowKlass*) operand) = record_narrow_metadata_reference(constant, CHECK);
     TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
 #else
-    fatal("compressed Klass* on 32bit");
+    JVMCI_ERROR("compressed Klass* on 32bit");
 #endif
   } else {
     address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
-    *((Metadata**) operand) = record_metadata_reference(constant);
+    *((Metadata**) operand) = record_metadata_reference(constant, CHECK);
     TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
   }
 }
@@ -117,7 +116,7 @@
   TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset);
 }
 
-void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
   address pc = (address) inst;
   if (inst->is_call()) {
     // NOTE: for call without a mov, the offset must fit a 32-bit immediate
@@ -139,18 +138,18 @@
     *(jint*) disp += ((address) foreign_call_destination) - old_dest;
     _instructions->relocate(pc, runtime_call_Relocation::spec(), Assembler::call32_operand);
   } else {
-    fatal("unsupported relocation for foreign call");
+    JVMCI_ERROR("unsupported relocation for foreign call");
   }
 
   TRACE_jvmci_3("relocating (foreign call)  at " PTR_FORMAT, p2i(inst));
 }
 
-void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+void CodeInstaller::pd_relocate_JavaMethod(Handle hotspot_method, jint pc_offset, TRAPS) {
 #ifdef ASSERT
   Method* method = NULL;
   // we need to check, this might also be an unresolved method
   if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
-    method = getMethodFromHotSpotMethod(hotspot_method);
+    method = getMethodFromHotSpotMethod(hotspot_method());
   }
 #endif
   switch (_next_call_type) {
@@ -185,6 +184,7 @@
       break;
     }
     default:
+      JVMCI_ERROR("invalid _next_call_type value");
       break;
   }
 }
@@ -198,7 +198,7 @@
 }
 
 
-void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
   switch (mark) {
     case POLL_NEAR: {
       relocate_poll_near(pc);
@@ -222,13 +222,13 @@
       _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::imm_operand);
       break;
     default:
-      fatal("invalid mark value");
+      JVMCI_ERROR("invalid mark value: %d", mark);
       break;
   }
 }
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
-VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
   if (jvmci_reg < RegisterImpl::number_of_registers) {
     return as_Register(jvmci_reg)->as_VMReg();
   } else {
@@ -236,8 +236,7 @@
     if (floatRegisterNumber < XMMRegisterImpl::number_of_registers) {
       return as_XMMRegister(floatRegisterNumber)->as_VMReg();
     }
-    ShouldNotReachHere();
-    return NULL;
+    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
   }
 }
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Nov 25 13:39:52 2015 +0100
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Nov 26 19:16:37 2015 +0100
@@ -3651,12 +3651,71 @@
   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
 }
 
+void MacroAssembler::movdqu(Address dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
+    Assembler::vextractf32x4h(dst, src, 0);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, Address src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
+    Assembler::vinsertf32x4h(dst, src, 0);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
+void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
+  } else {
+    Assembler::movdqu(dst, src);
+  }
+}
+
 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
-    Assembler::movdqu(dst, as_Address(src));
+    movdqu(dst, as_Address(src));
   } else {
     lea(rscratch1, src);
-    Assembler::movdqu(dst, Address(rscratch1, 0));
+    movdqu(dst, Address(rscratch1, 0));
+  }
+}
+
+void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
+    Assembler::vextractf64x4h(dst, src, 0);
+  } else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
+    Assembler::vinsertf64x4h(dst, src, 0);
+  } else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
+  }
+  else {
+    Assembler::vmovdqu(dst, src);
+  }
+}
+
+void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    vmovdqu(dst, as_Address(src));
+  }
+  else {
+    lea(rscratch1, src);
+    vmovdqu(dst, Address(rscratch1, 0));
   }
 }
 
@@ -3726,6 +3785,10 @@
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
 }
 
+#ifdef _LP64
+#define XSTATE_BV 0x200
+#endif
+
 void MacroAssembler::pop_CPU_state() {
   pop_FPU_state();
   pop_IU_state();
@@ -3735,27 +3798,7 @@
 #ifndef _LP64
   frstor(Address(rsp, 0));
 #else
-  // AVX will continue to use the fxsave area.
-  // EVEX needs to utilize the xsave area, which is under different
-  // management.
-  if(VM_Version::supports_evex()) {
-    // EDX:EAX describe the XSAVE header and
-    // are obtained while fetching info for XCR0 via cpuid.
-    // These two registers make up 64-bits in the header for which bits
-    // 62:10 are currently reserved for future implementations and unused.  Bit 63
-    // is unused for our implementation as we do not utilize
-    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
-    // the functionality for PKRU state and MSR tracing.
-    // Ergo we are primarily concerned with bits 7..0, which define
-    // which ISA extensions and features are enabled for a given machine and are
-    // defined in XemXcr0Eax and is used to map the XSAVE area
-    // for restoring registers as described via XCR0.
-    movl(rdx,VM_Version::get_xsave_header_upper_segment());
-    movl(rax,VM_Version::get_xsave_header_lower_segment());
-    xrstor(Address(rsp, 0));
-  } else {
-    fxrstor(Address(rsp, 0));
-  }
+  fxrstor(Address(rsp, 0));
 #endif
   addptr(rsp, FPUStateSizeInWords * wordSize);
 }
@@ -3773,49 +3816,13 @@
   push_FPU_state();
 }
 
-#ifdef _LP64
-#define XSTATE_BV 0x200
-#endif
-
 void MacroAssembler::push_FPU_state() {
   subptr(rsp, FPUStateSizeInWords * wordSize);
 #ifndef _LP64
   fnsave(Address(rsp, 0));
   fwait();
 #else
-  // AVX will continue to use the fxsave area.
-  // EVEX needs to utilize the xsave area, which is under different
-  // management.
-  if(VM_Version::supports_evex()) {
-    // Save a copy of EAX and EDX
-    push(rax);
-    push(rdx);
-    // EDX:EAX describe the XSAVE header and
-    // are obtained while fetching info for XCR0 via cpuid.
-    // These two registers make up 64-bits in the header for which bits
-    // 62:10 are currently reserved for future implementations and unused.  Bit 63
-    // is unused for our implementation as we do not utilize
-    // compressed XSAVE areas.  Bits 9..8 are currently ignored as we do not use
-    // the functionality for PKRU state and MSR tracing.
-    // Ergo we are primarily concerned with bits 7..0, which define
-    // which ISA extensions and features are enabled for a given machine and are
-    // defined in XemXcr0Eax and is used to program XSAVE area
-    // for saving the required registers as defined in XCR0.
-    int xcr0_edx = VM_Version::get_xsave_header_upper_segment();
-    int xcr0_eax = VM_Version::get_xsave_header_lower_segment();
-    movl(rdx,xcr0_edx);
-    movl(rax,xcr0_eax);
-    xsave(Address(rsp, wordSize*2));
-    // now Apply control bits and clear bytes 8..23 in the header
-    pop(rdx);
-    pop(rax);
-    movl(Address(rsp, XSTATE_BV), xcr0_eax);
-    movl(Address(rsp, XSTATE_BV+4), xcr0_edx);
-    andq(Address(rsp, XSTATE_BV+8), 0);
-    andq(Address(rsp, XSTATE_BV+16), 0);
-  } else {
-    fxsave(Address(rsp, 0));
-  }
+  fxsave(Address(rsp, 0));
 #endif // LP64
 }
 
@@ -3942,6 +3949,236 @@
   testl(dst, as_Address(src));
 }
 
+void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::pcmpeqb(dst, src);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pcmpeqb(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpeqb(xmm0, src);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pcmpeqb(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::pcmpeqb(xmm1, xmm0);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::pcmpeqw(dst, src);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pcmpeqw(dst, src);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpeqw(xmm0, src);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pcmpeqw(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::pcmpeqw(xmm1, xmm0);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
+  int dst_enc = dst->encoding();
+  if (dst_enc < 16) {
+    Assembler::pcmpestri(dst, src, imm8);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpestri(xmm0, src, imm8);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pcmpestri(dst, src, imm8);
+  } else if (src_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
+    Assembler::pcmpestri(xmm0, src, imm8);
+    movdqu(dst, xmm0);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else if (dst_enc < 16) {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    evmovdqul(xmm0, src, Assembler::AVX_512bit);
+    Assembler::pcmpestri(dst, xmm0, imm8);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  } else {
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm0, Assembler::AVX_512bit);
+    subptr(rsp, 64);
+    evmovdqul(Address(rsp, 0), xmm1, Assembler::AVX_512bit);
+    movdqu(xmm0, src);
+    movdqu(xmm1, dst);
+    Assembler::pcmpestri(xmm1, xmm0, imm8);
+    movdqu(dst, xmm1);
+    evmovdqul(xmm1, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+    evmovdqul(xmm0, Address(rsp, 0), Assembler::AVX_512bit);
+    addptr(rsp, 64);
+  }
+}
+
+void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
+    Assembler::pmovzxbw(dst, src);
+  } else if ((dst_enc < 16) && (src_enc < 16)) {
+    Assembler::pmovzxbw(dst, src);<