changeset 48002:efc459cf351e

8189596: AArch64: implementation for Thread-local handshakes Reviewed-by: adinn
author aph
date Fri, 24 Nov 2017 17:19:47 +0000
parents cce885f4baab
children 2d91c9a4f409
files src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp src/hotspot/cpu/aarch64/globals_aarch64.hpp src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
diffstat 11 files changed, 148 insertions(+), 99 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Fri Nov 24 17:19:47 2017 +0000
@@ -494,42 +494,6 @@
   }
 }
 
-// Rather than take a segfault when the polling page is protected,
-// explicitly check for a safepoint in progress and if there is one,
-// fake a call to the handler as if a segfault had been caught.
-void LIR_Assembler::poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info) {
-  __ mov(rscratch1, SafepointSynchronize::address_of_state());
-  __ ldrb(rscratch1, Address(rscratch1));
-  Label nope, poll;
-  __ cbz(rscratch1, nope);
-  __ block_comment("safepoint");
-  __ enter();
-  __ push(0x3, sp);                // r0 & r1
-  __ push(0x3ffffffc, sp);         // integer registers except lr & sp & r0 & r1
-  __ adr(r0, poll);
-  __ str(r0, Address(rthread, JavaThread::saved_exception_pc_offset()));
-  __ mov(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::get_poll_stub));
-  __ blrt(rscratch1, 1, 0, 1);
-  __ maybe_isb();
-  __ pop(0x3ffffffc, sp);          // integer registers except lr & sp & r0 & r1
-  __ mov(rscratch1, r0);
-  __ pop(0x3, sp);                 // r0 & r1
-  __ leave();
-  __ br(rscratch1);
-  address polling_page(os::get_polling_page());
-  assert(os::is_poll_address(polling_page), "should be");
-  unsigned long off;
-  __ adrp(rscratch1, Address(polling_page, rtype), off);
-  __ bind(poll);
-  if (info)
-    add_debug_info_for_branch(info);  // This isn't just debug info:
-                                      // it's the oop map
-  else
-    __ code_section()->relocate(pc(), rtype);
-  __ ldrw(zr, Address(rscratch1, off));
-  __ bind(nope);
-}
-
 void LIR_Assembler::return_op(LIR_Opr result) {
   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
 
@@ -549,11 +513,9 @@
   address polling_page(os::get_polling_page());
   guarantee(info != NULL, "Shouldn't be NULL");
   assert(os::is_poll_address(polling_page), "should be");
-  unsigned long off;
-  __ adrp(rscratch1, Address(polling_page, relocInfo::poll_type), off);
-  assert(off == 0, "must be");
+  __ get_polling_page(rscratch1, polling_page, relocInfo::poll_type);
   add_debug_info_for_branch(info);  // This isn't just debug info:
-  // it's the oop map
+                                    // it's the oop map
   __ read_polling_page(rscratch1, relocInfo::poll_type);
   return __ offset();
 }
--- a/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp	Fri Nov 24 17:19:47 2017 +0000
@@ -51,4 +51,6 @@
 
 #define SUPPORT_RESERVED_STACK_AREA
 
+#define THREAD_LOCAL_POLL
+
 #endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp	Fri Nov 24 17:19:47 2017 +0000
@@ -79,7 +79,7 @@
 // Clear short arrays bigger than one word in an arch-specific way
 define_pd_global(intx, InitArrayShortSize, BytesPerLong);
 
-define_pd_global(bool, ThreadLocalHandshakes, false);
+define_pd_global(bool, ThreadLocalHandshakes, true);
 
 #if defined(COMPILER1) || defined(COMPILER2)
 define_pd_global(intx, InlineSmallCode,          1000);
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp	Fri Nov 24 17:19:47 2017 +0000
@@ -30,12 +30,13 @@
 #include "logging/log.hpp"
 #include "oops/arrayOop.hpp"
 #include "oops/markOop.hpp"
+#include "oops/method.hpp"
 #include "oops/methodData.hpp"
-#include "oops/method.hpp"
 #include "prims/jvmtiExport.hpp"
 #include "prims/jvmtiThreadState.hpp"
 #include "runtime/basicLock.hpp"
 #include "runtime/biasedLocking.hpp"
+#include "runtime/safepointMechanism.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/thread.inline.hpp"
 
@@ -438,13 +439,26 @@
 
 void InterpreterMacroAssembler::dispatch_base(TosState state,
                                               address* table,
-                                              bool verifyoop) {
+                                              bool verifyoop,
+                                              bool generate_poll) {
   if (VerifyActivationFrameSize) {
     Unimplemented();
   }
   if (verifyoop) {
     verify_oop(r0, state);
   }
+
+  Label safepoint;
+  address* const safepoint_table = Interpreter::safept_table(state);
+  bool needs_thread_local_poll = generate_poll &&
+    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
+
+  if (needs_thread_local_poll) {
+    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
+    ldr(rscratch2, Address(rthread, Thread::polling_page_offset()));
+    tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint);
+  }
+
   if (table == Interpreter::dispatch_table(state)) {
     addw(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state));
     ldr(rscratch2, Address(rdispatch, rscratch2, Address::uxtw(3)));
@@ -453,10 +467,17 @@
     ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
   }
   br(rscratch2);
+
+  if (needs_thread_local_poll) {
+    bind(safepoint);
+    lea(rscratch2, ExternalAddress((address)safepoint_table));
+    ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
+    br(rscratch2);
+  }
 }
 
-void InterpreterMacroAssembler::dispatch_only(TosState state) {
-  dispatch_base(state, Interpreter::dispatch_table(state));
+void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
+  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
 }
 
 void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
@@ -468,10 +489,10 @@
 }
 
 
-void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
   // load next bytecode
   ldrb(rscratch1, Address(pre(rbcp, step)));
-  dispatch_base(state, Interpreter::dispatch_table(state));
+  dispatch_base(state, Interpreter::dispatch_table(state), generate_poll);
 }
 
 void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp	Fri Nov 24 17:19:47 2017 +0000
@@ -55,7 +55,8 @@
                             bool check_exceptions);
 
   // base routine for all dispatches
-  void dispatch_base(TosState state, address* table, bool verifyoop = true);
+  void dispatch_base(TosState state, address* table,
+                     bool verifyoop = true, bool generate_poll = false);
 
  public:
   InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
@@ -165,12 +166,12 @@
   void dispatch_prolog(TosState state, int step = 0);
   void dispatch_epilog(TosState state, int step = 0);
   // dispatch via rscratch1
-  void dispatch_only(TosState state);
+  void dispatch_only(TosState state, bool generate_poll = false);
   // dispatch normal table via rscratch1 (assume rscratch1 is loaded already)
   void dispatch_only_normal(TosState state);
   void dispatch_only_noverify(TosState state);
   // load rscratch1 from [rbcp + step] and dispatch via rscratch1
-  void dispatch_next(TosState state, int step = 0);
+  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
   // load rscratch1 from [esi] and dispatch via rscratch1 and table
   void dispatch_via (TosState state, address* table);
 
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Fri Nov 24 17:19:47 2017 +0000
@@ -287,6 +287,40 @@
   dsb(Assembler::SY);
 }
 
+void MacroAssembler::safepoint_poll(Label& slow_path) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
+    tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+  } else {
+    unsigned long offset;
+    adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
+    ldrw(rscratch1, Address(rscratch1, offset));
+    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
+    cbnz(rscratch1, slow_path);
+  }
+}
+
+// Just like safepoint_poll, but use an acquiring load for thread-
+// local polling.
+//
+// We need an acquire here to ensure that any subsequent load of the
+// global SafepointSynchronize::_state flag is ordered after this load
+// of the local Thread::_polling page.  We don't want this poll to
+// return false (i.e. not safepointing) and a later poll of the global
+// SafepointSynchronize::_state spuriously to return true.
+//
+// This is to avoid a race when we're in a native->Java transition
+// racing the code which wakes up from a safepoint.
+//
+void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    lea(rscratch1, Address(rthread, Thread::polling_page_offset()));
+    ldar(rscratch1, rscratch1);
+    tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+  } else {
+    safepoint_poll(slow_path);
+  }
+}
 
 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
   // we must set sp to zero to clear frame
@@ -4336,15 +4370,26 @@
 }
 
 
+// Move the address of the polling page into dest.
+void MacroAssembler::get_polling_page(Register dest, address page, relocInfo::relocType rtype) {
+  if (SafepointMechanism::uses_thread_local_poll()) {
+    ldr(dest, Address(rthread, Thread::polling_page_offset()));
+  } else {
+    unsigned long off;
+    adrp(dest, Address(page, rtype), off);
+    assert(off == 0, "polling page must be page aligned");
+  }
+}
+
+// Move the address of the polling page into r, then read the polling
+// page.
 address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) {
-  unsigned long off;
-  adrp(r, Address(page, rtype), off);
-  InstructionMark im(this);
-  code_section()->relocate(inst_mark(), rtype);
-  ldrw(zr, Address(r, off));
-  return inst_mark();
-}
-
+  get_polling_page(r, page, rtype);
+  return read_polling_page(r, rtype);
+}
+
+// Read the polling page.  The address of the polling page must
+// already be in r.
 address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
   InstructionMark im(this);
   code_section()->relocate(inst_mark(), rtype);
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Fri Nov 24 17:19:47 2017 +0000
@@ -97,6 +97,9 @@
  virtual void check_and_handle_popframe(Register java_thread);
  virtual void check_and_handle_earlyret(Register java_thread);
 
+  void safepoint_poll(Label& slow_path);
+  void safepoint_poll_acquire(Label& slow_path);
+
   // Biased locking support
   // lock_reg and obj_reg must be loaded up with the appropriate values.
   // swap_reg is killed.
@@ -1199,6 +1202,7 @@
 
   address read_polling_page(Register r, address page, relocInfo::relocType rtype);
   address read_polling_page(Register r, relocInfo::relocType rtype);
+  void get_polling_page(Register dest, address page, relocInfo::relocType rtype);
 
   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
   void update_byte_crc32(Register crc, Register val, Register table);
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.cpp	Fri Nov 24 17:19:47 2017 +0000
@@ -245,6 +245,11 @@
   // mov(reg, polling_page);
   // ldr(zr, [reg, #offset]);
   //
+  // or
+  //
+  // ldr(reg, [rthread, #offset]);
+  // ldr(zr, [reg, #offset]);
+  //
   // however, we cannot rely on the polling page address load always
   // directly preceding the read from the page. C1 does that but C2
   // has to do the load and read as two independent instruction
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Fri Nov 24 17:19:47 2017 +0000
@@ -1952,7 +1952,7 @@
       __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
 
       // Force this write out before the read below
-      __ dmb(Assembler::SY);
+      __ dmb(Assembler::ISH);
     } else {
       __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
       __ stlrw(rscratch1, rscratch2);
@@ -1970,13 +1970,7 @@
   // check for safepoint operation in progress and/or pending suspend requests
   Label safepoint_in_progress, safepoint_in_progress_done;
   {
-    assert(SafepointSynchronize::_not_synchronized == 0, "fix this code");
-    unsigned long offset;
-    __ adrp(rscratch1,
-            ExternalAddress((address)SafepointSynchronize::address_of_state()),
-            offset);
-    __ ldrw(rscratch1, Address(rscratch1, offset));
-    __ cbnzw(rscratch1, safepoint_in_progress);
+    __ safepoint_poll_acquire(safepoint_in_progress);
     __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
     __ cbnzw(rscratch1, safepoint_in_progress);
     __ bind(safepoint_in_progress_done);
@@ -2932,8 +2926,11 @@
 
   if (!cause_return) {
     // overwrite the return address pushed by save_live_registers
-    __ ldr(c_rarg0, Address(rthread, JavaThread::saved_exception_pc_offset()));
-    __ str(c_rarg0, Address(rfp, wordSize));
+    // Additionally, r20 is a callee-saved register so we can look at
+    // it later to determine if someone changed the return address for
+    // us!
+    __ ldr(r20, Address(rthread, JavaThread::saved_exception_pc_offset()));
+    __ str(r20, Address(rfp, wordSize));
   }
 
   // Do the call
@@ -2968,11 +2965,40 @@
   // No exception case
   __ bind(noException);
 
+  Label no_adjust, bail;
+  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
+    // If our stashed return pc was modified by the runtime we avoid touching it
+    __ ldr(rscratch1, Address(rfp, wordSize));
+    __ cmp(r20, rscratch1);
+    __ br(Assembler::NE, no_adjust);
+
+#ifdef ASSERT
+    // Verify the correct encoding of the poll we're about to skip.
+    // See NativeInstruction::is_ldrw_to_zr()
+    __ ldrw(rscratch1, Address(r20));
+    __ ubfx(rscratch2, rscratch1, 22, 10);
+    __ cmpw(rscratch2, 0b1011100101);
+    __ br(Assembler::NE, bail);
+    __ ubfx(rscratch2, rscratch1, 0, 5);
+    __ cmpw(rscratch2, 0b11111);
+    __ br(Assembler::NE, bail);
+#endif
+    // Adjust return pc forward to step over the safepoint poll instruction
+    __ add(r20, r20, NativeInstruction::instruction_size);
+    __ str(r20, Address(rfp, wordSize));
+  }
+
+  __ bind(no_adjust);
   // Normal exit, restore registers and exit.
   RegisterSaver::restore_live_registers(masm, save_vectors);
 
   __ ret(lr);
 
+#ifdef ASSERT
+  __ bind(bail);
+  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
+#endif
+
   // Make sure all code is generated
   masm->flush();
 
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Fri Nov 24 17:19:47 2017 +0000
@@ -967,12 +967,7 @@
 
     Label slow_path;
     // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    unsigned long offset;
-    __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
-    __ ldrw(rscratch1, Address(rscratch1, offset));
-    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
-    __ cbnz(rscratch1, slow_path);
+    __ safepoint_poll(slow_path);
 
     // We don't generate local frame and don't align stack because
     // we call stub code and there is no safepoint on this path.
@@ -986,6 +981,7 @@
     __ ldrw(val, Address(esp, 0));              // byte value
     __ ldrw(crc, Address(esp, wordSize));       // Initial CRC
 
+    unsigned long offset;
     __ adrp(tbl, ExternalAddress(StubRoutines::crc_table_addr()), offset);
     __ add(tbl, tbl, offset);
 
@@ -1020,12 +1016,7 @@
 
     Label slow_path;
     // If we need a safepoint check, generate full interpreter entry.
-    ExternalAddress state(SafepointSynchronize::address_of_state());
-    unsigned long offset;
-    __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset);
-    __ ldrw(rscratch1, Address(rscratch1, offset));
-    assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code");
-    __ cbnz(rscratch1, slow_path);
+    __ safepoint_poll(slow_path);
 
     // We don't generate local frame and don't align stack because
     // we call stub code and there is no safepoint on this path.
@@ -1375,7 +1366,7 @@
   if (os::is_MP()) {
     if (UseMembar) {
       // Force this write out before the read below
-      __ dsb(Assembler::SY);
+      __ dmb(Assembler::ISH);
     } else {
       // Write serialization page so VM thread can do a pseudo remote membar.
       // We use the current thread pointer to calculate a thread specific
@@ -1387,16 +1378,8 @@
 
   // check for safepoint operation in progress and/or pending suspend requests
   {
-    Label Continue;
-    {
-      unsigned long offset;
-      __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset);
-      __ ldrw(rscratch2, Address(rscratch2, offset));
-    }
-    assert(SafepointSynchronize::_not_synchronized == 0,
-           "SafepointSynchronize::_not_synchronized");
-    Label L;
-    __ cbnz(rscratch2, L);
+    Label L, Continue;
+    __ safepoint_poll_acquire(L);
     __ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset()));
     __ cbz(rscratch2, Continue);
     __ bind(L);
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Wed Nov 22 14:53:20 2017 +0100
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp	Fri Nov 24 17:19:47 2017 +0000
@@ -1717,7 +1717,7 @@
     __ push_i(r1);
     // Adjust the bcp by the 16-bit displacement in r2
     __ add(rbcp, rbcp, r2);
-    __ dispatch_only(vtos);
+    __ dispatch_only(vtos, /*generate_poll*/true);
     return;
   }
 
@@ -1833,7 +1833,7 @@
   // continue with the bytecode @ target
   // rscratch1: target bytecode
   // rbcp: target bcp
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, /*generate_poll*/true);
 
   if (UseLoopCounter) {
     if (ProfileInterpreter) {
@@ -1973,7 +1973,7 @@
   __ ldr(rbcp, Address(rmethod, Method::const_offset()));
   __ lea(rbcp, Address(rbcp, r1));
   __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
 }
 
 void TemplateTable::wide_ret() {
@@ -1984,7 +1984,7 @@
   __ ldr(rbcp, Address(rmethod, Method::const_offset()));
   __ lea(rbcp, Address(rbcp, r1));
   __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset()));
-  __ dispatch_next(vtos);
+  __ dispatch_next(vtos, 0, /*generate_poll*/true);
 }
 
 
@@ -2014,7 +2014,7 @@
   __ rev32(r3, r3);
   __ load_unsigned_byte(rscratch1, Address(rbcp, r3, Address::sxtw(0)));
   __ add(rbcp, rbcp, r3, ext::sxtw);
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, /*generate_poll*/true);
   // handle default
   __ bind(default_case);
   __ profile_switch_default(r0);
@@ -2064,7 +2064,7 @@
   __ rev32(r3, r3);
   __ add(rbcp, rbcp, r3, ext::sxtw);
   __ ldrb(rscratch1, Address(rbcp, 0));
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, /*generate_poll*/true);
 }
 
 void TemplateTable::fast_binaryswitch() {
@@ -2162,7 +2162,7 @@
   __ rev32(j, j);
   __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
   __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, /*generate_poll*/true);
 
   // default case -> j = default offset
   __ bind(default_case);
@@ -2171,7 +2171,7 @@
   __ rev32(j, j);
   __ load_unsigned_byte(rscratch1, Address(rbcp, j, Address::sxtw(0)));
   __ lea(rbcp, Address(rbcp, j, Address::sxtw(0)));
-  __ dispatch_only(vtos);
+  __ dispatch_only(vtos, /*generate_poll*/true);
 }