changeset 1988:b1a2afa37ec4

7003271: Hotspot should track cumulative Java heap bytes allocated on a per-thread basis Summary: Track allocated bytes in Thread's, update on TLAB retirement and direct allocation in Eden and tenured, add JNI methods for ThreadMXBean. Reviewed-by: coleenp, kvn, dholmes, ysr
author phh
date Fri, 07 Jan 2011 10:42:32 -0500
parents 039eb4201e06
children 55d7d18ccff9
files src/cpu/sparc/vm/assembler_sparc.cpp src/cpu/sparc/vm/assembler_sparc.hpp src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp src/cpu/sparc/vm/c1_Runtime1_sparc.cpp src/cpu/sparc/vm/templateTable_sparc.cpp src/cpu/x86/vm/assembler_x86.cpp src/cpu/x86/vm/assembler_x86.hpp src/cpu/x86/vm/c1_MacroAssembler_x86.cpp src/cpu/x86/vm/c1_Runtime1_x86.cpp src/cpu/x86/vm/templateTable_x86_32.cpp src/cpu/x86/vm/templateTable_x86_64.cpp src/os/solaris/vm/os_solaris.cpp src/os/solaris/vm/thread_solaris.inline.hpp src/share/vm/gc_interface/collectedHeap.inline.hpp src/share/vm/memory/threadLocalAllocBuffer.cpp src/share/vm/memory/threadLocalAllocBuffer.hpp src/share/vm/opto/macro.cpp src/share/vm/prims/jvmti.xml src/share/vm/prims/jvmtiEnv.cpp src/share/vm/runtime/thread.cpp src/share/vm/runtime/thread.hpp src/share/vm/services/jmm.h src/share/vm/services/management.cpp src/share/vm/services/threadService.cpp src/share/vm/services/threadService.hpp
diffstat 26 files changed, 452 insertions(+), 217 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -4083,11 +4083,15 @@
   store_klass(t2, top);
   verify_oop(top);
 
+  ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1);
+  sub(top, t1, t1); // size of tlab's allocated portion
+  incr_allocated_bytes(t1, 0, t2);
+
   // refill the tlab with an eden allocation
   bind(do_refill);
   ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
   sll_ptr(t1, LogHeapWordSize, t1);
-  // add object_size ??
+  // allocate new tlab, address returned in top
   eden_allocate(top, t1, 0, t2, t3, slow_case);
 
   st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset()));
@@ -4115,6 +4119,22 @@
   delayed()->nop();
 }
 
+void MacroAssembler::incr_allocated_bytes(Register var_size_in_bytes,
+                                          int con_size_in_bytes,
+                                          Register t1) {
+  // Bump total bytes allocated by this thread
+  assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch
+  assert_different_registers(var_size_in_bytes, t1);
+  // v8 support has gone the way of the dodo
+  ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1);
+  if (var_size_in_bytes->is_valid()) {
+    add(t1, var_size_in_bytes, t1);
+  } else {
+    add(t1, con_size_in_bytes, t1);
+  }
+  stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset()));
+}
+
 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   switch (cond) {
     // Note some conditions are synonyms for others
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2388,6 +2388,7 @@
     Label&   slow_case                 // continuation point if fast allocation fails
   );
   void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+  void incr_allocated_bytes(Register var_size_in_bytes, int con_size_in_bytes, Register t1);
 
   // interface method calling
   void lookup_interface_method(Register recv_klass,
--- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -166,7 +166,7 @@
   Register obj,                        // result: pointer to object after successful allocation
   Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
   int      con_size_in_bytes,          // object size in bytes if   known at compile time
-  Register t1,                         // temp register
+  Register t1,                         // temp register, must be global register for incr_allocated_bytes
   Register t2,                         // temp register
   Label&   slow_case                   // continuation point if fast allocation fails
 ) {
@@ -174,6 +174,7 @@
     tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
   } else {
     eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+    incr_allocated_bytes(var_size_in_bytes, con_size_in_bytes, t1);
   }
 }
 
@@ -214,7 +215,7 @@
 void C1_MacroAssembler::allocate_object(
   Register obj,                        // result: pointer to object after successful allocation
   Register t1,                         // temp register
-  Register t2,                         // temp register
+  Register t2,                         // temp register, must be a global register for try_allocate
   Register t3,                         // temp register
   int      hdr_size,                   // object header size in words
   int      obj_size,                   // object size in words
--- a/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -448,7 +448,9 @@
 
           // get the instance size
           __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
+
           __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path);
+
           __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
           __ verify_oop(O0_obj);
           __ mov(O0, I0);
@@ -459,6 +461,8 @@
           // get the instance size
           __ ld(G5_klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes(), G1_obj_size);
           __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path);
+          __ incr_allocated_bytes(G1_obj_size, 0, G3_t1);
+
           __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2);
           __ verify_oop(O0_obj);
           __ mov(O0, I0);
@@ -573,6 +577,7 @@
           __ and3(G1_arr_size, ~MinObjAlignmentInBytesMask, G1_arr_size);
 
           __ eden_allocate(O0_obj, G1_arr_size, 0, G3_t1, O1_t2, slow_path);  // preserves G1_arr_size
+          __ incr_allocated_bytes(G1_arr_size, 0, G3_t1);
 
           __ initialize_header(O0_obj, G5_klass, G4_length, G3_t1, O1_t2);
           __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset);
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3393,21 +3393,21 @@
     __ delayed()->st_ptr(RnewTopValue, G2_thread, in_bytes(JavaThread::tlab_top_offset()));
 
     if (allow_shared_alloc) {
-    // Check if tlab should be discarded (refill_waste_limit >= free)
-    __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue);
-    __ sub(RendValue, RoldTopValue, RfreeValue);
+      // Check if tlab should be discarded (refill_waste_limit >= free)
+      __ ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), RtlabWasteLimitValue);
+      __ sub(RendValue, RoldTopValue, RfreeValue);
 #ifdef _LP64
-    __ srlx(RfreeValue, LogHeapWordSize, RfreeValue);
+      __ srlx(RfreeValue, LogHeapWordSize, RfreeValue);
 #else
-    __ srl(RfreeValue, LogHeapWordSize, RfreeValue);
+      __ srl(RfreeValue, LogHeapWordSize, RfreeValue);
 #endif
-    __ cmp(RtlabWasteLimitValue, RfreeValue);
-    __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, slow_case); // tlab waste is small
-    __ delayed()->nop();
-
-    // increment waste limit to prevent getting stuck on this slow path
-    __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
-    __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
+      __ cmp(RtlabWasteLimitValue, RfreeValue);
+      __ brx(Assembler::greaterEqualUnsigned, false, Assembler::pt, slow_case); // tlab waste is small
+      __ delayed()->nop();
+
+      // increment waste limit to prevent getting stuck on this slow path
+      __ add(RtlabWasteLimitValue, ThreadLocalAllocBuffer::refill_waste_limit_increment(), RtlabWasteLimitValue);
+      __ st_ptr(RtlabWasteLimitValue, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
     } else {
       // No allocation in the shared eden.
       __ br(Assembler::always, false, Assembler::pt, slow_case);
@@ -3445,6 +3445,9 @@
     __ cmp(RoldTopValue, RnewTopValue);
     __ brx(Assembler::notEqual, false, Assembler::pn, retry);
     __ delayed()->nop();
+
+    // bump total bytes allocated by this thread
+    __ incr_allocated_bytes(Roffset, 0, G1_scratch);
   }
 
   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
--- a/src/cpu/x86/vm/assembler_x86.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -820,7 +820,20 @@
 }
 
 
-// Now the Assembler instruction (identical for 32/64 bits)
+// Now the Assembler instructions (identical for 32/64 bits)
+
+void Assembler::adcl(Address dst, int32_t imm32) {
+  InstructionMark im(this);
+  prefix(dst);
+  emit_arith_operand(0x81, rdx, dst, imm32);
+}
+
+void Assembler::adcl(Address dst, Register src) {
+  InstructionMark im(this);
+  prefix(dst, src);
+  emit_byte(0x11);
+  emit_operand(src, dst);
+}
 
 void Assembler::adcl(Register dst, int32_t imm32) {
   prefix(dst);
@@ -2195,9 +2208,7 @@
 void Assembler::orl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  emit_byte(0x81);
-  emit_operand(rcx, dst, 4);
-  emit_long(imm32);
+  emit_arith_operand(0x81, rcx, dst, imm32);
 }
 
 void Assembler::orl(Register dst, int32_t imm32) {
@@ -2205,7 +2216,6 @@
   emit_arith(0x81, 0xC8, dst, imm32);
 }
 
-
 void Assembler::orl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
@@ -2213,7 +2223,6 @@
   emit_operand(dst, src);
 }
 
-
 void Assembler::orl(Register dst, Register src) {
   (void) prefix_and_encode(dst->encoding(), src->encoding());
   emit_arith(0x0B, 0xC0, dst, src);
@@ -2692,15 +2701,14 @@
 void Assembler::subl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
-  if (is8bit(imm32)) {
-    emit_byte(0x83);
-    emit_operand(rbp, dst, 1);
-    emit_byte(imm32 & 0xFF);
-  } else {
-    emit_byte(0x81);
-    emit_operand(rbp, dst, 4);
-    emit_long(imm32);
-  }
+  emit_arith_operand(0x81, rbp, dst, imm32);
+}
+
+void Assembler::subl(Address dst, Register src) {
+  InstructionMark im(this);
+  prefix(dst, src);
+  emit_byte(0x29);
+  emit_operand(src, dst);
 }
 
 void Assembler::subl(Register dst, int32_t imm32) {
@@ -2708,13 +2716,6 @@
   emit_arith(0x81, 0xE8, dst, imm32);
 }
 
-void Assembler::subl(Address dst, Register src) {
-  InstructionMark im(this);
-  prefix(dst, src);
-  emit_byte(0x29);
-  emit_operand(src, dst);
-}
-
 void Assembler::subl(Register dst, Address src) {
   InstructionMark im(this);
   prefix(src, dst);
@@ -4333,6 +4334,7 @@
   emit_byte(0xD3);
   emit_byte(0xF8 | encode);
 }
+
 void Assembler::sbbq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
@@ -4392,15 +4394,14 @@
 void Assembler::subq(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefixq(dst);
-  if (is8bit(imm32)) {
-    emit_byte(0x83);
-    emit_operand(rbp, dst, 1);
-    emit_byte(imm32 & 0xFF);
-  } else {
-    emit_byte(0x81);
-    emit_operand(rbp, dst, 4);
-    emit_long(imm32);
-  }
+  emit_arith_operand(0x81, rbp, dst, imm32);
+}
+
+void Assembler::subq(Address dst, Register src) {
+  InstructionMark im(this);
+  prefixq(dst, src);
+  emit_byte(0x29);
+  emit_operand(src, dst);
 }
 
 void Assembler::subq(Register dst, int32_t imm32) {
@@ -4408,13 +4409,6 @@
   emit_arith(0x81, 0xE8, dst, imm32);
 }
 
-void Assembler::subq(Address dst, Register src) {
-  InstructionMark im(this);
-  prefixq(dst, src);
-  emit_byte(0x29);
-  emit_operand(src, dst);
-}
-
 void Assembler::subq(Register dst, Address src) {
   InstructionMark im(this);
   prefixq(src, dst);
@@ -7136,9 +7130,9 @@
 }
 
 // Preserves rbx, and rdx.
-void MacroAssembler::tlab_refill(Label& retry,
-                                 Label& try_eden,
-                                 Label& slow_case) {
+Register MacroAssembler::tlab_refill(Label& retry,
+                                     Label& try_eden,
+                                     Label& slow_case) {
   Register top = rax;
   Register t1  = rcx;
   Register t2  = rsi;
@@ -7185,7 +7179,7 @@
 
   // if tlab is currently allocated (top or end != null) then
   // fill [top, end + alignment_reserve) with array object
-  testptr (top, top);
+  testptr(top, top);
   jcc(Assembler::zero, do_refill);
 
   // set up the mark word
@@ -7197,16 +7191,20 @@
   movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1);
   // set klass to intArrayKlass
   // dubious reloc why not an oop reloc?
-  movptr(t1, ExternalAddress((address) Universe::intArrayKlassObj_addr()));
+  movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr()));
   // store klass last.  concurrent gcs assumes klass length is valid if
   // klass field is not null.
   store_klass(top, t1);
 
+  movptr(t1, top);
+  subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
+  incr_allocated_bytes(thread_reg, t1, 0);
+
   // refill the tlab with an eden allocation
   bind(do_refill);
   movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset())));
   shlptr(t1, LogHeapWordSize);
-  // add object_size ??
+  // allocate new tlab, address returned in top
   eden_allocate(top, t1, 0, t2, slow_case);
 
   // Check that t1 was preserved in eden_allocate.
@@ -7234,6 +7232,34 @@
   movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top);
   verify_tlab();
   jmp(retry);
+
+  return thread_reg; // for use by caller
+}
+
+void MacroAssembler::incr_allocated_bytes(Register thread,
+                                          Register var_size_in_bytes,
+                                          int con_size_in_bytes,
+                                          Register t1) {
+#ifdef _LP64
+  if (var_size_in_bytes->is_valid()) {
+    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
+  } else {
+    addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
+  }
+#else
+  if (!thread->is_valid()) {
+    assert(t1->is_valid(), "need temp reg");
+    thread = t1;
+    get_thread(thread);
+  }
+
+  if (var_size_in_bytes->is_valid()) {
+    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
+  } else {
+    addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
+  }
+  adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
+#endif
 }
 
 static const double     pi_4 =  0.7853981633974483;
--- a/src/cpu/x86/vm/assembler_x86.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -674,12 +674,14 @@
   // Utilities
 
 #ifdef _LP64
- static bool is_simm(int64_t x, int nbits) { return -( CONST64(1) << (nbits-1) )  <= x   &&   x  <  ( CONST64(1) << (nbits-1) ); }
+ static bool is_simm(int64_t x, int nbits) { return -(CONST64(1) << (nbits-1)) <= x &&
+                                                    x < (CONST64(1) << (nbits-1)); }
  static bool is_simm32(int64_t x) { return x == (int64_t)(int32_t)x; }
 #else
- static bool is_simm(int32_t x, int nbits) { return -( 1 << (nbits-1) )  <= x   &&   x  <  ( 1 << (nbits-1) ); }
+ static bool is_simm(int32_t x, int nbits) { return -(1 << (nbits-1)) <= x &&
+                                                    x < (1 << (nbits-1)); }
  static bool is_simm32(int32_t x) { return true; }
-#endif // LP64
+#endif // _LP64
 
   // Generic instructions
   // Does 32bit or 64bit as needed for the platform. In some sense these
@@ -705,7 +707,6 @@
   void push(void* v);
   void pop(void* v);
 
-
   // These do register sized moves/scans
   void rep_mov();
   void rep_set();
@@ -716,6 +717,8 @@
 
   // Vanilla instructions in lexical order
 
+  void adcl(Address dst, int32_t imm32);
+  void adcl(Address dst, Register src);
   void adcl(Register dst, int32_t imm32);
   void adcl(Register dst, Address src);
   void adcl(Register dst, Register src);
@@ -724,7 +727,6 @@
   void adcq(Register dst, Address src);
   void adcq(Register dst, Register src);
 
-
   void addl(Address dst, int32_t imm32);
   void addl(Address dst, Register src);
   void addl(Register dst, int32_t imm32);
@@ -737,7 +739,6 @@
   void addq(Register dst, Address src);
   void addq(Register dst, Register src);
 
-
   void addr_nop_4();
   void addr_nop_5();
   void addr_nop_7();
@@ -759,7 +760,6 @@
   void andq(Register dst, Address src);
   void andq(Register dst, Register src);
 
-
   // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
   void andpd(XMMRegister dst, Address src);
   void andpd(XMMRegister dst, XMMRegister src);
@@ -1151,7 +1151,7 @@
 #ifdef _LP64
   void movq(Register dst, Register src);
   void movq(Register dst, Address src);
-  void movq(Address dst, Register src);
+  void movq(Address  dst, Register src);
 #endif
 
   void movq(Address     dst, MMXRegister src );
@@ -1177,7 +1177,7 @@
   void movsbq(Register dst, Register src);
 
   // Move signed 32bit immediate to 64bit extending sign
-  void movslq(Address dst, int32_t imm64);
+  void movslq(Address  dst, int32_t imm64);
   void movslq(Register dst, int32_t imm64);
 
   void movslq(Register dst, Address src);
@@ -1857,7 +1857,10 @@
     Register t2,                       // temp register
     Label&   slow_case                 // continuation point if fast allocation fails
   );
-  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+  Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address
+  void incr_allocated_bytes(Register thread,
+                            Register var_size_in_bytes, int con_size_in_bytes,
+                            Register t1 = noreg);
 
   // interface method calling
   void lookup_interface_method(Register recv_klass,
@@ -2180,9 +2183,9 @@
   void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
   void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
 
-  void movsd(XMMRegister dst, XMMRegister src)    { Assembler::movsd(dst, src); }
-  void movsd(Address dst, XMMRegister src)        { Assembler::movsd(dst, src); }
-  void movsd(XMMRegister dst, Address src)        { Assembler::movsd(dst, src); }
+  void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
+  void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
+  void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
   void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
 
   void mulsd(XMMRegister dst, XMMRegister src)    { Assembler::mulsd(dst, src); }
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -141,6 +141,7 @@
     tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
   } else {
     eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+    incr_allocated_bytes(noreg, var_size_in_bytes, con_size_in_bytes, t1);
   }
 }
 
@@ -234,7 +235,7 @@
 
 void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
   assert(obj == rax, "obj must be in rax, for cmpxchg");
-  assert(obj != t1 && obj != t2 && t1 != t2, "registers must be different"); // XXX really?
+  assert_different_registers(obj, t1, t2); // XXX really?
   assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
 
   try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -977,7 +977,6 @@
         // verify that that there is really a valid exception in rax,
         __ verify_not_null_oop(exception_oop);
 
-
         oop_maps = new OopMapSet();
         OopMap* oop_map = generate_oop_map(sasm, 1);
         generate_handle_exception(sasm, oop_maps, oop_map);
@@ -1037,13 +1036,16 @@
           // if we got here then the TLAB allocation failed, so try
           // refilling the TLAB or allocating directly from eden.
           Label retry_tlab, try_eden;
-          __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy rdx (klass)
+          const Register thread =
+            __ tlab_refill(retry_tlab, try_eden, slow_path); // does not destroy rdx (klass), returns rdi
 
           __ bind(retry_tlab);
 
           // get the instance size (size is postive so movl is fine for 64bit)
           __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+
           __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path);
+
           __ initialize_object(obj, klass, obj_size, 0, t1, t2);
           __ verify_oop(obj);
           __ pop(rbx);
@@ -1053,7 +1055,10 @@
           __ bind(try_eden);
           // get the instance size (size is postive so movl is fine for 64bit)
           __ movl(obj_size, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
+
           __ eden_allocate(obj, obj_size, 0, t1, slow_path);
+          __ incr_allocated_bytes(thread, obj_size, 0);
+
           __ initialize_object(obj, klass, obj_size, 0, t1, t2);
           __ verify_oop(obj);
           __ pop(rbx);
@@ -1143,12 +1148,13 @@
           // if we got here then the TLAB allocation failed, so try
           // refilling the TLAB or allocating directly from eden.
           Label retry_tlab, try_eden;
-          __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves rbx, & rdx
+          const Register thread =
+            __ tlab_refill(retry_tlab, try_eden, slow_path); // preserves rbx & rdx, returns rdi
 
           __ bind(retry_tlab);
 
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
-          // since size is postive movl does right thing on 64bit
+          // since size is positive movl does right thing on 64bit
           __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
           // since size is postive movl does right thing on 64bit
           __ movl(arr_size, length);
@@ -1175,7 +1181,7 @@
 
           __ bind(try_eden);
           // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
-          // since size is postive movl does right thing on 64bit
+          // since size is positive movl does right thing on 64bit
           __ movl(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes()));
           // since size is postive movl does right thing on 64bit
           __ movl(arr_size, length);
@@ -1188,6 +1194,7 @@
           __ andptr(arr_size, ~MinObjAlignmentInBytesMask);
 
           __ eden_allocate(obj, arr_size, 0, t1, slow_path);  // preserves arr_size
+          __ incr_allocated_bytes(thread, arr_size, 0);
 
           __ initialize_header(obj, klass, length, t1, t2);
           __ movb(t1, Address(klass, klassOopDesc::header_size() * HeapWordSize + Klass::layout_helper_offset_in_bytes() + (Klass::_lh_header_size_shift / BitsPerByte)));
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3203,10 +3203,12 @@
   const bool allow_shared_alloc =
     Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
 
+  const Register thread = rcx;
+  if (UseTLAB || allow_shared_alloc) {
+    __ get_thread(thread);
+  }
+
   if (UseTLAB) {
-    const Register thread = rcx;
-
-    __ get_thread(thread);
     __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
     __ lea(rbx, Address(rax, rdx, Address::times_1));
     __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
@@ -3247,6 +3249,8 @@
 
     // if someone beat us on the allocation, try again, otherwise continue
     __ jcc(Assembler::notEqual, retry);
+
+    __ incr_allocated_bytes(thread, rdx, 0);
   }
 
   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
@@ -3256,12 +3260,12 @@
     __ decrement(rdx, sizeof(oopDesc));
     __ jcc(Assembler::zero, initialize_header);
 
-  // Initialize topmost object field, divide rdx by 8, check if odd and
-  // test if zero.
+    // Initialize topmost object field, divide rdx by 8, check if odd and
+    // test if zero.
     __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
     __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
 
-  // rdx must have been multiple of 8
+    // rdx must have been multiple of 8
 #ifdef ASSERT
     // make sure rdx was multiple of 8
     Label L;
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3266,6 +3266,8 @@
 
     // if someone beat us on the allocation, try again, otherwise continue
     __ jcc(Assembler::notEqual, retry);
+
+    __ incr_allocated_bytes(r15_thread, rdx, 0);
   }
 
   if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
--- a/src/os/solaris/vm/os_solaris.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/os/solaris/vm/os_solaris.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -283,7 +283,7 @@
                is_error_reported(),
               "sp must be inside of selected thread stack");
 
-    thread->_self_raw_id = raw_id;  // mark for quick retrieval
+    thread->set_self_raw_id(raw_id);  // mark for quick retrieval
     _get_thread_cache[ index ] = thread;
   }
   return thread;
--- a/src/os/solaris/vm/thread_solaris.inline.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/os/solaris/vm/thread_solaris.inline.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,10 +53,10 @@
 
   uintptr_t raw = pd_raw_thread_id();
   int ix = pd_cache_index(raw);
-  Thread *Candidate = ThreadLocalStorage::_get_thread_cache[ix];
-  if (Candidate->_self_raw_id == raw) {
+  Thread* candidate = ThreadLocalStorage::_get_thread_cache[ix];
+  if (candidate->self_raw_id() == raw) {
     // hit
-    return Candidate;
+    return candidate;
   } else {
     return ThreadLocalStorage::get_thread_via_cache_slowly(raw, ix);
   }
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -153,6 +153,7 @@
       check_for_non_bad_heap_word_value(result, size));
     assert(!HAS_PENDING_EXCEPTION,
            "Unexpected exception, will result in uninitialized storage");
+    THREAD->incr_allocated_bytes(size * HeapWordSize);
     return result;
   }
 
--- a/src/share/vm/memory/threadLocalAllocBuffer.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/memory/threadLocalAllocBuffer.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -114,6 +114,11 @@
 void ThreadLocalAllocBuffer::make_parsable(bool retire) {
   if (end() != NULL) {
     invariants();
+
+    if (retire) {
+      myThread()->incr_allocated_bytes(used_bytes());
+    }
+
     CollectedHeap::fill_with_object(top(), hard_end(), retire);
 
     if (retire || ZeroTLAB) {  // "Reset" the TLAB
--- a/src/share/vm/memory/threadLocalAllocBuffer.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/memory/threadLocalAllocBuffer.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -112,6 +112,8 @@
   HeapWord* top() const                          { return _top; }
   HeapWord* pf_top() const                       { return _pf_top; }
   size_t desired_size() const                    { return _desired_size; }
+  size_t used() const                            { return pointer_delta(top(), start()); }
+  size_t used_bytes() const                      { return pointer_delta(top(), start(), 1); }
   size_t free() const                            { return pointer_delta(end(), top()); }
   // Don't discard tlab if remaining space is larger than this.
   size_t refill_waste_limit() const              { return _refill_waste_limit; }
--- a/src/share/vm/opto/macro.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/opto/macro.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1158,7 +1158,7 @@
     // Note: We set the control input on "eden_end" and "old_eden_top" when using
     //       a TLAB to work around a bug where these values were being moved across
     //       a safepoint.  These are not oops, so they cannot be include in the oop
-    //       map, but the can be changed by a GC.   The proper way to fix this would
+    //       map, but they can be changed by a GC.   The proper way to fix this would
     //       be to set the raw memory state when generating a  SafepointNode.  However
     //       this will require extensive changes to the loop optimization in order to
     //       prevent a degradation of the optimization.
@@ -1167,24 +1167,24 @@
 
     // allocate the Region and Phi nodes for the result
     result_region = new (C, 3) RegionNode(3);
-    result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM );
-    result_phi_rawoop = new (C, 3) PhiNode( result_region, TypeRawPtr::BOTTOM );
-    result_phi_i_o    = new (C, 3) PhiNode( result_region, Type::ABIO ); // I/O is used for Prefetch
+    result_phi_rawmem = new (C, 3) PhiNode(result_region, Type::MEMORY, TypeRawPtr::BOTTOM);
+    result_phi_rawoop = new (C, 3) PhiNode(result_region, TypeRawPtr::BOTTOM);
+    result_phi_i_o    = new (C, 3) PhiNode(result_region, Type::ABIO); // I/O is used for Prefetch
 
     // We need a Region for the loop-back contended case.
     enum { fall_in_path = 1, contended_loopback_path = 2 };
     Node *contended_region;
     Node *contended_phi_rawmem;
-    if( UseTLAB ) {
+    if (UseTLAB) {
       contended_region = toobig_false;
       contended_phi_rawmem = mem;
     } else {
       contended_region = new (C, 3) RegionNode(3);
-      contended_phi_rawmem = new (C, 3) PhiNode( contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
+      contended_phi_rawmem = new (C, 3) PhiNode(contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
       // Now handle the passing-too-big test.  We fall into the contended
       // loop-back merge point.
-      contended_region    ->init_req( fall_in_path, toobig_false );
-      contended_phi_rawmem->init_req( fall_in_path, mem );
+      contended_region    ->init_req(fall_in_path, toobig_false);
+      contended_phi_rawmem->init_req(fall_in_path, mem);
       transform_later(contended_region);
       transform_later(contended_phi_rawmem);
     }
@@ -1192,78 +1192,101 @@
     // Load(-locked) the heap top.
     // See note above concerning the control input when using a TLAB
     Node *old_eden_top = UseTLAB
-      ? new (C, 3) LoadPNode     ( ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM )
-      : new (C, 3) LoadPLockedNode( contended_region, contended_phi_rawmem, eden_top_adr );
+      ? new (C, 3) LoadPNode      (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM)
+      : new (C, 3) LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr);
 
     transform_later(old_eden_top);
     // Add to heap top to get a new heap top
-    Node *new_eden_top = new (C, 4) AddPNode( top(), old_eden_top, size_in_bytes );
+    Node *new_eden_top = new (C, 4) AddPNode(top(), old_eden_top, size_in_bytes);
     transform_later(new_eden_top);
     // Check for needing a GC; compare against heap end
-    Node *needgc_cmp = new (C, 3) CmpPNode( new_eden_top, eden_end );
+    Node *needgc_cmp = new (C, 3) CmpPNode(new_eden_top, eden_end);
     transform_later(needgc_cmp);
-    Node *needgc_bol = new (C, 2) BoolNode( needgc_cmp, BoolTest::ge );
+    Node *needgc_bol = new (C, 2) BoolNode(needgc_cmp, BoolTest::ge);
     transform_later(needgc_bol);
-    IfNode *needgc_iff = new (C, 2) IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN );
+    IfNode *needgc_iff = new (C, 2) IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN);
     transform_later(needgc_iff);
 
     // Plug the failing-heap-space-need-gc test into the slow-path region
-    Node *needgc_true = new (C, 1) IfTrueNode( needgc_iff );
+    Node *needgc_true = new (C, 1) IfTrueNode(needgc_iff);
     transform_later(needgc_true);
-    if( initial_slow_test ) {
-      slow_region    ->init_req( need_gc_path, needgc_true );
+    if (initial_slow_test) {
+      slow_region->init_req(need_gc_path, needgc_true);
       // This completes all paths into the slow merge point
       transform_later(slow_region);
     } else {                      // No initial slow path needed!
       // Just fall from the need-GC path straight into the VM call.
-      slow_region    = needgc_true;
+      slow_region = needgc_true;
     }
     // No need for a GC.  Setup for the Store-Conditional
-    Node *needgc_false = new (C, 1) IfFalseNode( needgc_iff );
+    Node *needgc_false = new (C, 1) IfFalseNode(needgc_iff);
     transform_later(needgc_false);
 
     // Grab regular I/O before optional prefetch may change it.
     // Slow-path does no I/O so just set it to the original I/O.
-    result_phi_i_o->init_req( slow_result_path, i_o );
+    result_phi_i_o->init_req(slow_result_path, i_o);
 
     i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
                               old_eden_top, new_eden_top, length);
 
+    // Name successful fast-path variables
+    Node* fast_oop = old_eden_top;
+    Node* fast_oop_ctrl;
+    Node* fast_oop_rawmem;
+
     // Store (-conditional) the modified eden top back down.
     // StorePConditional produces flags for a test PLUS a modified raw
     // memory state.
-    Node *store_eden_top;
-    Node *fast_oop_ctrl;
-    if( UseTLAB ) {
-      store_eden_top = new (C, 4) StorePNode( needgc_false, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, new_eden_top );
+    if (UseTLAB) {
+      Node* store_eden_top =
+        new (C, 4) StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr,
+                              TypeRawPtr::BOTTOM, new_eden_top);
       transform_later(store_eden_top);
       fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
+      fast_oop_rawmem = store_eden_top;
     } else {
-      store_eden_top = new (C, 5) StorePConditionalNode( needgc_false, contended_phi_rawmem, eden_top_adr, new_eden_top, old_eden_top );
+      Node* store_eden_top =
+        new (C, 5) StorePConditionalNode(needgc_false, contended_phi_rawmem, eden_top_adr,
+                                         new_eden_top, fast_oop/*old_eden_top*/);
       transform_later(store_eden_top);
-      Node *contention_check = new (C, 2) BoolNode( store_eden_top, BoolTest::ne );
+      Node *contention_check = new (C, 2) BoolNode(store_eden_top, BoolTest::ne);
       transform_later(contention_check);
       store_eden_top = new (C, 1) SCMemProjNode(store_eden_top);
       transform_later(store_eden_top);
 
       // If not using TLABs, check to see if there was contention.
-      IfNode *contention_iff = new (C, 2) IfNode ( needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN );
+      IfNode *contention_iff = new (C, 2) IfNode (needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN);
       transform_later(contention_iff);
-      Node *contention_true = new (C, 1) IfTrueNode( contention_iff );
+      Node *contention_true = new (C, 1) IfTrueNode(contention_iff);
       transform_later(contention_true);
       // If contention, loopback and try again.
-      contended_region->init_req( contended_loopback_path, contention_true );
-      contended_phi_rawmem->init_req( contended_loopback_path, store_eden_top );
+      contended_region->init_req(contended_loopback_path, contention_true);
+      contended_phi_rawmem->init_req(contended_loopback_path, store_eden_top);
 
       // Fast-path succeeded with no contention!
-      Node *contention_false = new (C, 1) IfFalseNode( contention_iff );
+      Node *contention_false = new (C, 1) IfFalseNode(contention_iff);
       transform_later(contention_false);
       fast_oop_ctrl = contention_false;
+
+      // Bump total allocated bytes for this thread
+      Node* thread = new (C, 1) ThreadLocalNode();
+      transform_later(thread);
+      Node* alloc_bytes_adr = basic_plus_adr(top()/*not oop*/, thread,
+                                             in_bytes(JavaThread::allocated_bytes_offset()));
+      Node* alloc_bytes = make_load(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
+                                    0, TypeLong::LONG, T_LONG);
+#ifdef _LP64
+      Node* alloc_size = size_in_bytes;
+#else
+      Node* alloc_size = new (C, 2) ConvI2LNode(size_in_bytes);
+      transform_later(alloc_size);
+#endif
+      Node* new_alloc_bytes = new (C, 3) AddLNode(alloc_bytes, alloc_size);
+      transform_later(new_alloc_bytes);
+      fast_oop_rawmem = make_store(fast_oop_ctrl, store_eden_top, alloc_bytes_adr,
+                                   0, new_alloc_bytes, T_LONG);
     }
 
-    // Rename successful fast-path variables to make meaning more obvious
-    Node* fast_oop        = old_eden_top;
-    Node* fast_oop_rawmem = store_eden_top;
     fast_oop_rawmem = initialize_object(alloc,
                                         fast_oop_ctrl, fast_oop_rawmem, fast_oop,
                                         klass_node, length, size_in_bytes);
@@ -1282,11 +1305,11 @@
 
       call->init_req(TypeFunc::Parms+0, thread);
       call->init_req(TypeFunc::Parms+1, fast_oop);
-      call->init_req( TypeFunc::Control, fast_oop_ctrl );
-      call->init_req( TypeFunc::I_O    , top() )        ;   // does no i/o
-      call->init_req( TypeFunc::Memory , fast_oop_rawmem );
-      call->init_req( TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr) );
-      call->init_req( TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr) );
+      call->init_req(TypeFunc::Control, fast_oop_ctrl);
+      call->init_req(TypeFunc::I_O    , top()); // does no i/o
+      call->init_req(TypeFunc::Memory , fast_oop_rawmem);
+      call->init_req(TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr));
+      call->init_req(TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr));
       transform_later(call);
       fast_oop_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control);
       transform_later(fast_oop_ctrl);
@@ -1295,10 +1318,10 @@
     }
 
     // Plug in the successful fast-path into the result merge point
-    result_region    ->init_req( fast_result_path, fast_oop_ctrl );
-    result_phi_rawoop->init_req( fast_result_path, fast_oop );
-    result_phi_i_o   ->init_req( fast_result_path, i_o );
-    result_phi_rawmem->init_req( fast_result_path, fast_oop_rawmem );
+    result_region    ->init_req(fast_result_path, fast_oop_ctrl);
+    result_phi_rawoop->init_req(fast_result_path, fast_oop);
+    result_phi_i_o   ->init_req(fast_result_path, i_o);
+    result_phi_rawmem->init_req(fast_result_path, fast_oop_rawmem);
   } else {
     slow_region = ctrl;
   }
--- a/src/share/vm/prims/jvmti.xml	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/prims/jvmti.xml	Fri Jan 07 10:42:32 2011 -0500
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="ISO-8859-1"?>
 <?xml-stylesheet type="text/xsl" href="jvmti.xsl"?>
 <!--
- Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 
  This code is free software; you can redistribute it and/or modify it
@@ -10697,7 +10697,7 @@
 	    <internallink id="mUTF">modified UTF-8</internallink> string.
 	  </description>
 	</param>
-        <param id="value">
+        <param id="value_ptr">
 	  <inbuf>
 	    <char/>
 	    <nullok>
--- a/src/share/vm/prims/jvmtiEnv.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/prims/jvmtiEnv.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1802,7 +1802,7 @@
 // depth - pre-checked as non-negative
 // value - pre-checked for NULL
 jvmtiError
-JvmtiEnv::GetLocalInstance(JavaThread* java_thread, jint depth, jobject* value){
+JvmtiEnv::GetLocalInstance(JavaThread* java_thread, jint depth, jobject* value_ptr){
   JavaThread* current_thread = JavaThread::current();
   // rm object is created to clean up the javaVFrame created in
   // doit_prologue(), but after doit() is finished with it.
@@ -1814,7 +1814,7 @@
   if (err != JVMTI_ERROR_NONE) {
     return err;
   } else {
-    *value = op.value().l;
+    *value_ptr = op.value().l;
     return JVMTI_ERROR_NONE;
   }
 } /* end GetLocalInstance */
@@ -3440,12 +3440,12 @@
 // property - pre-checked for NULL
 // value - NULL is a valid value, must be checked
 jvmtiError
-JvmtiEnv::SetSystemProperty(const char* property, const char* value) {
+JvmtiEnv::SetSystemProperty(const char* property, const char* value_ptr) {
   jvmtiError err =JVMTI_ERROR_NOT_AVAILABLE;
 
   for (SystemProperty* p = Arguments::system_properties(); p != NULL; p = p->next()) {
     if (strcmp(property, p->key()) == 0) {
-      if (p->set_value((char *)value)) {
+      if (p->set_value((char *)value_ptr)) {
         err =  JVMTI_ERROR_NONE;
       }
     }
--- a/src/share/vm/runtime/thread.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/runtime/thread.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -177,20 +177,19 @@
 
 
 Thread::Thread() {
-  // stack
-  _stack_base   = NULL;
-  _stack_size   = 0;
-  _self_raw_id  = 0;
-  _lgrp_id      = -1;
-  _osthread     = NULL;
+  // stack and get_thread
+  set_stack_base(NULL);
+  set_stack_size(0);
+  set_self_raw_id(0);
+  set_lgrp_id(-1);
 
   // allocated data structures
+  set_osthread(NULL);
   set_resource_area(new ResourceArea());
   set_handle_area(new HandleArea(NULL));
   set_active_handles(NULL);
   set_free_handle_block(NULL);
   set_last_handle_mark(NULL);
-  set_osthread(NULL);
 
   // This initial value ==> never claimed.
   _oops_do_parity = 0;
@@ -205,6 +204,7 @@
   NOT_PRODUCT(_skip_gcalot = false;)
   CHECK_UNHANDLED_OOPS_ONLY(_gc_locked_out_count = 0;)
   _jvmti_env_iteration_count = 0;
+  set_allocated_bytes(0);
   _vm_operation_started_count = 0;
   _vm_operation_completed_count = 0;
   _current_pending_monitor = NULL;
@@ -3231,7 +3231,7 @@
       warning("java.lang.ArithmeticException has not been initialized");
       warning("java.lang.StackOverflowError has not been initialized");
     }
-  }
+    }
 
   // See        : bugid 4211085.
   // Background : the static initializer of java.lang.Compiler tries to read
--- a/src/share/vm/runtime/thread.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/runtime/thread.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,7 +60,7 @@
 class JvmtiGetLoadedClassesClosure;
 class ThreadStatistics;
 class ConcurrentLocksDump;
-class ParkEvent ;
+class ParkEvent;
 class Parker;
 
 class ciEnv;
@@ -170,7 +170,7 @@
   //
 
   // suspend/resume lock: used for self-suspend
-  Monitor*    _SR_lock;
+  Monitor* _SR_lock;
 
  protected:
   enum SuspendFlags {
@@ -194,7 +194,7 @@
  public:
   void enter_signal_handler() { _num_nested_signal++; }
   void leave_signal_handler() { _num_nested_signal--; }
-  bool is_inside_signal_handler() const  { return _num_nested_signal > 0; }
+  bool is_inside_signal_handler() const { return _num_nested_signal > 0; }
 
  private:
   // Debug tracing
@@ -215,7 +215,7 @@
 
   public:
    void set_last_handle_mark(HandleMark* mark)   { _last_handle_mark = mark; }
-    HandleMark* last_handle_mark() const          { return _last_handle_mark; }
+   HandleMark* last_handle_mark() const          { return _last_handle_mark; }
   private:
 
   // debug support for checking if code does allow safepoints or not
@@ -227,11 +227,11 @@
   //
   // The two classes No_Safepoint_Verifier and No_Allocation_Verifier are used to set these counters.
   //
-  NOT_PRODUCT(int _allow_safepoint_count;)       // If 0, thread allow a safepoint to happen
-  debug_only (int _allow_allocation_count;)      // If 0, the thread is allowed to allocate oops.
+  NOT_PRODUCT(int _allow_safepoint_count;)      // If 0, thread allow a safepoint to happen
+  debug_only (int _allow_allocation_count;)     // If 0, the thread is allowed to allocate oops.
 
   // Used by SkipGCALot class.
-  NOT_PRODUCT(bool _skip_gcalot;)                // Should we elide gc-a-lot?
+  NOT_PRODUCT(bool _skip_gcalot;)               // Should we elide gc-a-lot?
 
   // Record when GC is locked out via the GC_locker mechanism
   CHECK_UNHANDLED_OOPS_ONLY(int _gc_locked_out_count;)
@@ -242,24 +242,26 @@
   friend class ThreadLocalStorage;
   friend class GC_locker;
 
-  ThreadLocalAllocBuffer _tlab;                  // Thread-local eden
+  ThreadLocalAllocBuffer _tlab;                 // Thread-local eden
+  jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
+                                                // the Java heap
 
-  int   _vm_operation_started_count;             // VM_Operation support
-  int   _vm_operation_completed_count;           // VM_Operation support
+  int   _vm_operation_started_count;            // VM_Operation support
+  int   _vm_operation_completed_count;          // VM_Operation support
 
-  ObjectMonitor* _current_pending_monitor;       // ObjectMonitor this thread
-                                                 // is waiting to lock
-  bool _current_pending_monitor_is_from_java;    // locking is from Java code
+  ObjectMonitor* _current_pending_monitor;      // ObjectMonitor this thread
+                                                // is waiting to lock
+  bool _current_pending_monitor_is_from_java;   // locking is from Java code
 
   // ObjectMonitor on which this thread called Object.wait()
   ObjectMonitor* _current_waiting_monitor;
 
   // Private thread-local objectmonitor list - a simple cache organized as a SLL.
  public:
-  ObjectMonitor * omFreeList ;
-  int omFreeCount ;                             // length of omFreeList
-  int omFreeProvision ;                         // reload chunk size
-  ObjectMonitor * omInUseList;                  // SLL to track monitors in circulation
+  ObjectMonitor* omFreeList;
+  int omFreeCount;                              // length of omFreeList
+  int omFreeProvision;                          // reload chunk size
+  ObjectMonitor* omInUseList;                   // SLL to track monitors in circulation
   int omInUseCount;                             // length of omInUseList
 
  public:
@@ -280,7 +282,6 @@
   // Testers
   virtual bool is_VM_thread()       const            { return false; }
   virtual bool is_Java_thread()     const            { return false; }
-  // Remove this ifdef when C1 is ported to the compiler interface.
   virtual bool is_Compiler_thread() const            { return false; }
   virtual bool is_hidden_from_external_view() const  { return false; }
   virtual bool is_jvmti_agent_thread() const         { return false; }
@@ -344,15 +345,15 @@
   // Support for Unhandled Oop detection
 #ifdef CHECK_UNHANDLED_OOPS
  private:
-  UnhandledOops *_unhandled_oops;
+  UnhandledOops* _unhandled_oops;
  public:
-  UnhandledOops* unhandled_oops()               { return _unhandled_oops; }
+  UnhandledOops* unhandled_oops() { return _unhandled_oops; }
   // Mark oop safe for gc.  It may be stack allocated but won't move.
-  void allow_unhandled_oop(oop *op)              {
+  void allow_unhandled_oop(oop *op) {
     if (CheckUnhandledOops) unhandled_oops()->allow_unhandled_oop(op);
   }
   // Clear oops at safepoint so crashes point to unhandled oop violator
-  void clear_unhandled_oops()                   {
+  void clear_unhandled_oops() {
     if (CheckUnhandledOops) unhandled_oops()->clear_unhandled_oops();
   }
   bool is_gc_locked_out() { return _gc_locked_out_count > 0; }
@@ -392,6 +393,22 @@
     }
   }
 
+  jlong allocated_bytes()               { return _allocated_bytes; }
+  void set_allocated_bytes(jlong value) { _allocated_bytes = value; }
+  void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
+  jlong cooked_allocated_bytes() {
+    jlong allocated_bytes = OrderAccess::load_acquire(&_allocated_bytes);
+    if (UseTLAB) {
+      size_t used_bytes = tlab().used_bytes();
+      if ((ssize_t)used_bytes > 0) {
+        // More-or-less valid tlab.  The load_acquire above should ensure
+        // that the result of the add is <= the instantaneous value
+        return allocated_bytes + used_bytes;
+      }
+    }
+    return allocated_bytes;
+  }
+
   // VM operation support
   int vm_operation_ticket()                      { return ++_vm_operation_started_count; }
   int vm_operation_completed_count()             { return _vm_operation_completed_count; }
@@ -489,8 +506,11 @@
     return (_stack_base >= adr && adr >= (_stack_base - _stack_size));
   }
 
-  int     lgrp_id() const                 { return _lgrp_id; }
-  void    set_lgrp_id(int value)          { _lgrp_id = value; }
+  uintptr_t self_raw_id()                    { return _self_raw_id; }
+  void      set_self_raw_id(uintptr_t value) { _self_raw_id = value; }
+
+  int     lgrp_id() const        { return _lgrp_id; }
+  void    set_lgrp_id(int value) { _lgrp_id = value; }
 
   // Printing
   void print_on(outputStream* st) const;
@@ -502,7 +522,7 @@
 #ifdef ASSERT
  private:
   // Deadlock detection support for Mutex locks. List of locks own by thread.
-  Monitor *_owned_locks;
+  Monitor* _owned_locks;
   // Mutex::set_owner_implementation is the only place where _owned_locks is modified,
   // thus the friendship
   friend class Mutex;
@@ -511,7 +531,7 @@
  public:
   void print_owned_locks_on(outputStream* st) const;
   void print_owned_locks() const                 { print_owned_locks_on(tty);    }
-  Monitor * owned_locks() const                  { return _owned_locks;          }
+  Monitor* owned_locks() const                   { return _owned_locks;          }
   bool owns_locks() const                        { return owned_locks() != NULL; }
   bool owns_locks_but_compiled_lock() const;
 
@@ -538,7 +558,7 @@
   static ByteSize stack_size_offset()            { return byte_offset_of(Thread, _stack_size ); }
 
 #define TLAB_FIELD_OFFSET(name) \
-  static ByteSize tlab_##name##_offset()            { return byte_offset_of(Thread, _tlab) + ThreadLocalAllocBuffer::name##_offset(); }
+  static ByteSize tlab_##name##_offset()         { return byte_offset_of(Thread, _tlab) + ThreadLocalAllocBuffer::name##_offset(); }
 
   TLAB_FIELD_OFFSET(start)
   TLAB_FIELD_OFFSET(end)
@@ -552,6 +572,8 @@
 
 #undef TLAB_FIELD_OFFSET
 
+  static ByteSize allocated_bytes_offset()       { return byte_offset_of(Thread, _allocated_bytes ); }
+
  public:
   volatile intptr_t _Stalled ;
   volatile int _TypeTag ;
--- a/src/share/vm/services/jmm.h	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/services/jmm.h	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -60,7 +60,8 @@
   unsigned int isBootClassPathSupported : 1;
   unsigned int isObjectMonitorUsageSupported : 1;
   unsigned int isSynchronizerUsageSupported : 1;
-  unsigned int : 24;
+  unsigned int isThreadAllocatedMemorySupported : 1;
+  unsigned int : 23;
 } jmmOptionalSupport;
 
 typedef enum {
@@ -105,7 +106,8 @@
   JMM_VERBOSE_GC                     = 21,
   JMM_VERBOSE_CLASS                  = 22,
   JMM_THREAD_CONTENTION_MONITORING   = 23,
-  JMM_THREAD_CPU_TIME                = 24
+  JMM_THREAD_CPU_TIME                = 24,
+  JMM_THREAD_ALLOCATED_MEMORY        = 25
 } jmmBoolAttribute;
 
 
@@ -213,7 +215,10 @@
   jobject      (JNICALL *GetMemoryPoolUsage)     (JNIEnv* env, jobject pool);
   jobject      (JNICALL *GetPeakMemoryPoolUsage) (JNIEnv* env, jobject pool);
 
-  void*        reserved4;
+  void         (JNICALL *GetThreadAllocatedMemory)
+                                                 (JNIEnv *env,
+                                                  jlongArray ids,
+                                                  jlongArray sizeArray);
 
   jobject      (JNICALL *GetMemoryUsage)         (JNIEnv* env, jboolean heap);
 
@@ -228,6 +233,8 @@
                                                   jlong* result);
 
   jobjectArray (JNICALL *FindCircularBlockedThreads) (JNIEnv *env);
+
+  // Not used in JDK 6 or JDK 7
   jlong        (JNICALL *GetThreadCpuTime)       (JNIEnv *env, jlong thread_id);
 
   jobjectArray (JNICALL *GetVMGlobalNames)       (JNIEnv *env);
@@ -262,14 +269,22 @@
   void         (JNICALL *GetLastGCStat)          (JNIEnv *env,
                                                   jobject mgr,
                                                   jmmGCStat *gc_stat);
-  jlong        (JNICALL *GetThreadCpuTimeWithKind) (JNIEnv *env,
-                                                    jlong thread_id,
-                                                    jboolean user_sys_cpu_time);
-  void*        reserved5;
+
+  jlong        (JNICALL *GetThreadCpuTimeWithKind)
+                                                 (JNIEnv *env,
+                                                  jlong thread_id,
+                                                  jboolean user_sys_cpu_time);
+  void         (JNICALL *GetThreadCpuTimesWithKind)
+                                                 (JNIEnv *env,
+                                                  jlongArray ids,
+                                                  jlongArray timeArray,
+                                                  jboolean user_sys_cpu_time);
+
   jint         (JNICALL *DumpHeap0)              (JNIEnv *env,
                                                   jstring outputfile,
                                                   jboolean live);
-  jobjectArray (JNICALL *FindDeadlocks)             (JNIEnv *env, jboolean object_monitors_only);
+  jobjectArray (JNICALL *FindDeadlocks)          (JNIEnv *env,
+                                                  jboolean object_monitors_only);
   void         (JNICALL *SetVMGlobal)            (JNIEnv *env,
                                                   jstring flag_name,
                                                   jvalue  new_value);
--- a/src/share/vm/services/management.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/services/management.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -101,12 +101,14 @@
     _optional_support.isCurrentThreadCpuTimeSupported = 0;
     _optional_support.isOtherThreadCpuTimeSupported = 0;
   }
+
   _optional_support.isBootClassPathSupported = 1;
   _optional_support.isObjectMonitorUsageSupported = 1;
 #ifndef SERVICES_KERNEL
   // This depends on the heap inspector
   _optional_support.isSynchronizerUsageSupported = 1;
 #endif // SERVICES_KERNEL
+  _optional_support.isThreadAllocatedMemorySupported = 1;
 }
 
 void Management::initialize(TRAPS) {
@@ -386,11 +388,6 @@
 
 static void validate_thread_id_array(typeArrayHandle ids_ah, TRAPS) {
   int num_threads = ids_ah->length();
-  // should be non-empty array
-  if (num_threads == 0) {
-    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
-              "Empty array of thread IDs");
-  }
 
   // Validate input thread IDs
   int i = 0;
@@ -402,11 +399,9 @@
                 "Invalid thread ID entry");
     }
   }
-
 }
 
 static void validate_thread_info_array(objArrayHandle infoArray_h, TRAPS) {
-
   // check if the element of infoArray is of type ThreadInfo class
   klassOop threadinfo_klass = Management::java_lang_management_ThreadInfo_klass(CHECK);
   klassOop element_klass = objArrayKlass::cast(infoArray_h->klass())->element_klass();
@@ -414,7 +409,6 @@
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "infoArray element type is not ThreadInfo class");
   }
-
 }
 
 
@@ -770,6 +764,45 @@
   return prev;
 JVM_END
 
+// Gets an array containing the amount of memory allocated on the Java
+// heap for a set of threads (in bytes).  Each element of the array is
+// the amount of memory allocated for the thread ID specified in the
+// corresponding entry in the given array of thread IDs; or -1 if the
+// thread does not exist or has terminated.
+JVM_ENTRY(void, jmm_GetThreadAllocatedMemory(JNIEnv *env, jlongArray ids,
+                                             jlongArray sizeArray))
+  // Check if threads is null
+  if (ids == NULL || sizeArray == NULL) {
+    THROW(vmSymbols::java_lang_NullPointerException());
+  }
+
+  ResourceMark rm(THREAD);
+  typeArrayOop ta = typeArrayOop(JNIHandles::resolve_non_null(ids));
+  typeArrayHandle ids_ah(THREAD, ta);
+
+  typeArrayOop sa = typeArrayOop(JNIHandles::resolve_non_null(sizeArray));
+  typeArrayHandle sizeArray_h(THREAD, sa);
+
+  // validate the thread id array
+  validate_thread_id_array(ids_ah, CHECK);
+
+  // sizeArray must be of the same length as the given array of thread IDs
+  int num_threads = ids_ah->length();
+  if (num_threads != sizeArray_h->length()) {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+              "The length of the given long array does not match the length of "
+              "the given array of thread IDs");
+  }
+
+  MutexLockerEx ml(Threads_lock);
+  for (int i = 0; i < num_threads; i++) {
+    JavaThread* java_thread = find_java_thread_from_id(ids_ah->long_at(i));
+    if (java_thread != NULL) {
+      sizeArray_h->long_at_put(i, java_thread->cooked_allocated_bytes());
+    }
+  }
+JVM_END
+
 // Returns a java/lang/management/MemoryUsage object representing
 // the memory usage for the heap or non-heap memory.
 JVM_ENTRY(jobject, jmm_GetMemoryUsage(JNIEnv* env, jboolean heap))
@@ -834,6 +867,8 @@
     return ThreadService::is_thread_monitoring_contention();
   case JMM_THREAD_CPU_TIME:
     return ThreadService::is_thread_cpu_time_enabled();
+  case JMM_THREAD_ALLOCATED_MEMORY:
+    return ThreadService::is_thread_allocated_memory_enabled();
   default:
     assert(0, "Unrecognized attribute");
     return false;
@@ -851,6 +886,8 @@
     return ThreadService::set_thread_monitoring_contention(flag != 0);
   case JMM_THREAD_CPU_TIME:
     return ThreadService::set_thread_cpu_time_enabled(flag != 0);
+  case JMM_THREAD_ALLOCATED_MEMORY:
+    return ThreadService::set_thread_allocated_memory_enabled(flag != 0);
   default:
     assert(0, "Unrecognized attribute");
     return false;
@@ -1096,6 +1133,7 @@
 //               maxDepth == 0  requests no stack trace.
 //   infoArray - array of ThreadInfo objects
 //
+// QQQ - Why does this method return a value instead of void?
 JVM_ENTRY(jint, jmm_GetThreadInfo(JNIEnv *env, jlongArray ids, jint maxDepth, jobjectArray infoArray))
   // Check if threads is null
   if (ids == NULL || infoArray == NULL) {
@@ -1159,7 +1197,6 @@
     }
   } else {
     // obtain thread dump with the specific list of threads with stack trace
-
     do_thread_dump(&dump_result,
                    ids_ah,
                    num_threads,
@@ -1252,8 +1289,6 @@
       continue;
     }
 
-
-
     ThreadStackTrace* stacktrace = ts->get_stack_trace();
     assert(stacktrace != NULL, "Must have a stack trace dumped");
 
@@ -1500,6 +1535,49 @@
   return -1;
 JVM_END
 
+// Gets an array containing the CPU times consumed by a set of threads
+// (in nanoseconds).  Each element of the array is the CPU time for the
+// thread ID specified in the corresponding entry in the given array
+// of thread IDs; or -1 if the thread does not exist or has terminated.
+// If user_sys_cpu_time = true, the sum of user level and system CPU time
+// for the given thread is returned; otherwise, only user level CPU time
+// is returned.
+JVM_ENTRY(void, jmm_GetThreadCpuTimesWithKind(JNIEnv *env, jlongArray ids,
+                                              jlongArray timeArray,
+                                              jboolean user_sys_cpu_time))
+  // Check if threads is null
+  if (ids == NULL || timeArray == NULL) {
+    THROW(vmSymbols::java_lang_NullPointerException());
+  }
+
+  ResourceMark rm(THREAD);
+  typeArrayOop ta = typeArrayOop(JNIHandles::resolve_non_null(ids));
+  typeArrayHandle ids_ah(THREAD, ta);
+
+  typeArrayOop tia = typeArrayOop(JNIHandles::resolve_non_null(timeArray));
+  typeArrayHandle timeArray_h(THREAD, tia);
+
+  // validate the thread id array
+  validate_thread_id_array(ids_ah, CHECK);
+
+  // timeArray must be of the same length as the given array of thread IDs
+  int num_threads = ids_ah->length();
+  if (num_threads != timeArray_h->length()) {
+    THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
+              "The length of the given long array does not match the length of "
+              "the given array of thread IDs");
+  }
+
+  MutexLockerEx ml(Threads_lock);
+  for (int i = 0; i < num_threads; i++) {
+    JavaThread* java_thread = find_java_thread_from_id(ids_ah->long_at(i));
+    if (java_thread != NULL) {
+      timeArray_h->long_at_put(i, os::thread_cpu_time((Thread*)java_thread,
+                                                      user_sys_cpu_time != 0));
+    }
+  }
+JVM_END
+
 // Returns a String array of all VM global flag names
 JVM_ENTRY(jobjectArray, jmm_GetVMGlobalNames(JNIEnv *env))
   // last flag entry is always NULL, so subtract 1
@@ -2020,7 +2098,7 @@
   jmm_GetMemoryManagers,
   jmm_GetMemoryPoolUsage,
   jmm_GetPeakMemoryPoolUsage,
-  NULL,
+  jmm_GetThreadAllocatedMemory,
   jmm_GetMemoryUsage,
   jmm_GetLongAttribute,
   jmm_GetBoolAttribute,
@@ -2038,7 +2116,7 @@
   jmm_GetGCExtAttributeInfo,
   jmm_GetLastGCStat,
   jmm_GetThreadCpuTimeWithKind,
-  NULL,
+  jmm_GetThreadCpuTimesWithKind,
   jmm_DumpHeap0,
   jmm_FindDeadlockedThreads,
   jmm_SetVMGlobal,
--- a/src/share/vm/services/threadService.cpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/services/threadService.cpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,6 +46,7 @@
 // Default is disabled.
 bool ThreadService::_thread_monitoring_contention_enabled = false;
 bool ThreadService::_thread_cpu_time_enabled = false;
+bool ThreadService::_thread_allocated_memory_enabled = false;
 
 PerfCounter*  ThreadService::_total_threads_count = NULL;
 PerfVariable* ThreadService::_live_threads_count = NULL;
@@ -84,6 +85,8 @@
   if (os::is_thread_cpu_time_supported()) {
     _thread_cpu_time_enabled = true;
   }
+
+  _thread_allocated_memory_enabled = true; // Always on, so enable it
 }
 
 void ThreadService::reset_peak_thread_count() {
@@ -181,6 +184,15 @@
   return prev;
 }
 
+bool ThreadService::set_thread_allocated_memory_enabled(bool flag) {
+  MutexLocker m(Management_lock);
+
+  bool prev = _thread_allocated_memory_enabled;
+  _thread_allocated_memory_enabled = flag;
+
+  return prev;
+}
+
 // GC support
 void ThreadService::oops_do(OopClosure* f) {
   for (ThreadDumpResult* dump = _threaddump_list; dump != NULL; dump = dump->next()) {
--- a/src/share/vm/services/threadService.hpp	Fri Jan 07 03:38:19 2011 -0800
+++ b/src/share/vm/services/threadService.hpp	Fri Jan 07 10:42:32 2011 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -65,6 +65,7 @@
 
   static bool          _thread_monitoring_contention_enabled;
   static bool          _thread_cpu_time_enabled;
+  static bool          _thread_allocated_memory_enabled;
 
   // Need to keep the list of thread dump result that
   // keep references to methodOop since thread dump can be
@@ -83,6 +84,9 @@
   static bool set_thread_cpu_time_enabled(bool flag);
   static bool is_thread_cpu_time_enabled()    { return _thread_cpu_time_enabled; }
 
+  static bool set_thread_allocated_memory_enabled(bool flag);
+  static bool is_thread_allocated_memory_enabled() { return _thread_cpu_time_enabled; }
+
   static jlong get_total_thread_count()       { return _total_threads_count->get_value(); }
   static jlong get_peak_thread_count()        { return _peak_threads_count->get_value(); }
   static jlong get_live_thread_count()        { return _live_threads_count->get_value() - _exiting_threads_count; }