changeset 142:8bd1e4487c18

Merge
author iveresov
date Sun, 04 May 2008 03:29:31 -0700
parents bcdc68eb7e1f fcbfc50865ab
children b5489bb705c9 7cce9e4e0f7c
files make/linux/makefiles/mapfile-vers-debug make/linux/makefiles/mapfile-vers-product src/os/windows/vm/os_windows.cpp src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp src/share/vm/includeDB_core src/share/vm/memory/genCollectedHeap.hpp src/share/vm/runtime/globals.hpp
diffstat 27 files changed, 425 insertions(+), 153 deletions(-) [+]
line wrap: on
line diff
--- a/make/linux/makefiles/mapfile-vers-debug	Fri May 02 08:22:11 2008 -0700
+++ b/make/linux/makefiles/mapfile-vers-debug	Sun May 04 03:29:31 2008 -0700
@@ -279,7 +279,9 @@
                 jio_snprintf;
                 jio_vfprintf;
                 jio_vsnprintf;
-		fork1;
+                fork1;
+                numa_warn;
+                numa_error;
 
                 # Needed because there is no JVM interface for this.
                 sysThreadAvailableStackWithSlack;
--- a/make/linux/makefiles/mapfile-vers-product	Fri May 02 08:22:11 2008 -0700
+++ b/make/linux/makefiles/mapfile-vers-product	Sun May 04 03:29:31 2008 -0700
@@ -274,7 +274,9 @@
                 jio_snprintf;
                 jio_vfprintf;
                 jio_vsnprintf;
-		fork1;
+                fork1;
+                numa_warn;
+                numa_error;
 
                 # Needed because there is no JVM interface for this.
                 sysThreadAvailableStackWithSlack;
--- a/src/os/linux/vm/os_linux.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Sun May 04 03:29:31 2008 -0700
@@ -2228,20 +2228,42 @@
 }
 
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes)         { }
+
+void os::free_memory(char *addr, size_t bytes) {
+  uncommit_memory(addr, bytes);
+}
+
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
-bool os::numa_topology_changed()                       { return false; }
-size_t os::numa_get_groups_num()                       { return 1; }
-int os::numa_get_group_id()                            { return 0; }
-size_t os::numa_get_leaf_groups(int *ids, size_t size) {
-  if (size > 0) {
-    ids[0] = 0;
-    return 1;
+
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
+  Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
+}
+
+bool os::numa_topology_changed()   { return false; }
+
+size_t os::numa_get_groups_num() {
+  int max_node = Linux::numa_max_node();
+  return max_node > 0 ? max_node + 1 : 1;
+}
+
+int os::numa_get_group_id() {
+  int cpu_id = Linux::sched_getcpu();
+  if (cpu_id != -1) {
+    int lgrp_id = Linux::get_node_by_cpu(cpu_id);
+    if (lgrp_id != -1) {
+      return lgrp_id;
+    }
   }
   return 0;
 }
 
+size_t os::numa_get_leaf_groups(int *ids, size_t size) {
+  for (size_t i = 0; i < size; i++) {
+    ids[i] = i;
+  }
+  return size;
+}
+
 bool os::get_page_info(char *start, page_info* info) {
   return false;
 }
@@ -2250,6 +2272,74 @@
   return end;
 }
 
+extern "C" void numa_warn(int number, char *where, ...) { }
+extern "C" void numa_error(char *where) { }
+
+void os::Linux::libnuma_init() {
+  // sched_getcpu() should be in libc.
+  set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
+                                  dlsym(RTLD_DEFAULT, "sched_getcpu")));
+
+  if (sched_getcpu() != -1) { // Does it work?
+    void *handle = dlopen("libnuma.so", RTLD_LAZY);
+    if (handle != NULL) {
+      set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
+                                           dlsym(handle, "numa_node_to_cpus")));
+      set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
+                                       dlsym(handle, "numa_max_node")));
+      set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
+                                        dlsym(handle, "numa_available")));
+      set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
+                                            dlsym(handle, "numa_tonode_memory")));
+      if (numa_available() != -1) {
+        // Create a cpu -> node mapping
+        _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
+        rebuild_cpu_to_node_map();
+      }
+    }
+  }
+}
+
+// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
+// The table is later used in get_node_by_cpu().
+void os::Linux::rebuild_cpu_to_node_map() {
+  int cpu_num = os::active_processor_count();
+  cpu_to_node()->clear();
+  cpu_to_node()->at_grow(cpu_num - 1);
+  int node_num = numa_get_groups_num();
+  int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong;
+  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
+  for (int i = 0; i < node_num; i++) {
+    if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
+      for (int j = 0; j < cpu_map_size; j++) {
+        if (cpu_map[j] != 0) {
+          for (int k = 0; k < BitsPerLong; k++) {
+            if (cpu_map[j] & (1UL << k)) {
+              cpu_to_node()->at_put(j * BitsPerLong + k, i);
+            }
+          }
+        }
+      }
+    }
+  }
+  FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
+}
+
+int os::Linux::get_node_by_cpu(int cpu_id) {
+  if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
+    return cpu_to_node()->at(cpu_id);
+  }
+  return -1;
+}
+
+GrowableArray<int>* os::Linux::_cpu_to_node;
+os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
+os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
+os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
+os::Linux::numa_available_func_t os::Linux::_numa_available;
+os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
+
+
 bool os::uncommit_memory(char* addr, size_t size) {
   return ::mmap(addr, size,
                 PROT_READ|PROT_WRITE|PROT_EXEC,
@@ -3552,6 +3642,10 @@
           Linux::is_floating_stack() ? "floating stack" : "fixed stack");
   }
 
+  if (UseNUMA) {
+    Linux::libnuma_init();
+  }
+
   if (MaxFDLimit) {
     // set the number of file descriptors to max. print out error
     // if getrlimit/setrlimit fails but continue regardless.
--- a/src/os/linux/vm/os_linux.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/os/linux/vm/os_linux.hpp	Sun May 04 03:29:31 2008 -0700
@@ -59,6 +59,8 @@
   static bool _is_NPTL;
   static bool _supports_fast_thread_cpu_time;
 
+  static GrowableArray<int>* _cpu_to_node;
+
  protected:
 
   static julong _physical_memory;
@@ -79,8 +81,9 @@
   static void set_is_LinuxThreads()           { _is_NPTL = false; }
   static void set_is_floating_stack()         { _is_floating_stack = true; }
 
+  static void rebuild_cpu_to_node_map();
+  static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
  public:
-
   static void init_thread_fpu_state();
   static int  get_fpu_control_word();
   static void set_fpu_control_word(int fpu_control);
@@ -143,6 +146,7 @@
   static bool is_floating_stack()             { return _is_floating_stack; }
 
   static void libpthread_init();
+  static void libnuma_init();
 
   // Minimum stack size a thread can be created with (allowing
   // the VM to completely create the thread and enter user code)
@@ -229,6 +233,38 @@
 
     #undef SR_SUSPENDED
   };
+
+private:
+  typedef int (*sched_getcpu_func_t)(void);
+  typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
+  typedef int (*numa_max_node_func_t)(void);
+  typedef int (*numa_available_func_t)(void);
+  typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
+
+
+  static sched_getcpu_func_t _sched_getcpu;
+  static numa_node_to_cpus_func_t _numa_node_to_cpus;
+  static numa_max_node_func_t _numa_max_node;
+  static numa_available_func_t _numa_available;
+  static numa_tonode_memory_func_t _numa_tonode_memory;
+
+  static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
+  static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
+  static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
+  static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
+  static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
+
+public:
+  static int sched_getcpu()  { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
+  static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
+    return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
+  }
+  static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
+  static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
+  static int numa_tonode_memory(void *start, size_t size, int node) {
+    return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
+  }
+  static int get_node_by_cpu(int cpu_id);
 };
 
 
--- a/src/os/linux/vm/os_linux.inline.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/os/linux/vm/os_linux.inline.hpp	Sun May 04 03:29:31 2008 -0700
@@ -120,3 +120,6 @@
   RESTARTABLE(_cmd, _result); \
   return _result; \
 } while(false)
+
+inline bool os::numa_has_static_binding()   { return true; }
+inline bool os::numa_has_group_homing()     { return false;  }
--- a/src/os/solaris/vm/os_solaris.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/os/solaris/vm/os_solaris.cpp	Sun May 04 03:29:31 2008 -0700
@@ -2602,7 +2602,7 @@
 }
 
 // Tell the OS to make the range local to the first-touching LWP
-void os::numa_make_local(char *addr, size_t bytes) {
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
   assert((intptr_t)addr % os::vm_page_size() == 0, "Address should be page-aligned.");
   if (madvise(addr, bytes, MADV_ACCESS_LWP) < 0) {
     debug_only(warning("MADV_ACCESS_LWP failed."));
--- a/src/os/solaris/vm/os_solaris.inline.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/os/solaris/vm/os_solaris.inline.hpp	Sun May 04 03:29:31 2008 -0700
@@ -204,3 +204,6 @@
   RESTARTABLE(_cmd, _result); \
   return _result; \
 } while(false)
+
+inline bool os::numa_has_static_binding()   { return false; }
+inline bool os::numa_has_group_homing()     { return true;  }
--- a/src/os/windows/vm/os_windows.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Sun May 04 03:29:31 2008 -0700
@@ -2581,7 +2581,7 @@
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
 void os::free_memory(char *addr, size_t bytes)         { }
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
 size_t os::numa_get_groups_num()                       { return 1; }
 int os::numa_get_group_id()                            { return 0; }
--- a/src/os/windows/vm/os_windows.inline.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/os/windows/vm/os_windows.inline.hpp	Sun May 04 03:29:31 2008 -0700
@@ -69,3 +69,6 @@
     *((int *)(sp - (pages * vm_page_size()))) = 0;
   }
 }
+
+inline bool os::numa_has_static_binding()   { return true;   }
+inline bool os::numa_has_group_homing()     { return false;  }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp	Sun May 04 03:29:31 2008 -0700
@@ -44,52 +44,12 @@
   bool lock_owned = lock->owned_by_self();
   if (lock_owned) {
     MutexUnlocker mul(lock);
-    return mem_allocate_work(size);
+    return mem_allocate_in_gen(size, _gen);
   } else {
-    return mem_allocate_work(size);
+    return mem_allocate_in_gen(size, _gen);
   }
 }
 
-HeapWord* CMSPermGen::mem_allocate_work(size_t size) {
-  assert(!_gen->freelistLock()->owned_by_self(), "Potetntial deadlock");
-
-  MutexLocker ml(Heap_lock);
-  HeapWord* obj = NULL;
-
-  obj = _gen->allocate(size, false);
-  // Since we want to minimize pause times, we will prefer
-  // expanding the perm gen rather than doing a stop-world
-  // collection to satisfy the allocation request.
-  if (obj == NULL) {
-    // Try to expand the perm gen and allocate space.
-    obj = _gen->expand_and_allocate(size, false, false);
-    if (obj == NULL) {
-      // Let's see if a normal stop-world full collection will
-      // free up enough space.
-      SharedHeap::heap()->collect_locked(GCCause::_permanent_generation_full);
-      obj = _gen->allocate(size, false);
-      if (obj == NULL) {
-        // The collection above may have shrunk the space, so try
-        // to expand again and allocate space.
-        obj = _gen->expand_and_allocate(size, false, false);
-      }
-      if (obj == NULL) {
-        // We have not been able to allocate space despite a
-        // full stop-world collection. We now make a last-ditch collection
-        // attempt (in which soft refs are all aggressively freed)
-        // that will try to reclaim as much space as possible.
-        SharedHeap::heap()->collect_locked(GCCause::_last_ditch_collection);
-        obj = _gen->allocate(size, false);
-        if (obj == NULL) {
-          // Expand generation in case it was shrunk following the collection.
-          obj = _gen->expand_and_allocate(size, false, false);
-        }
-      }
-    }
-  }
-  return obj;
-}
-
 void CMSPermGen::compute_new_size() {
   _gen->compute_new_size();
 }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp	Sun May 04 03:29:31 2008 -0700
@@ -29,7 +29,6 @@
 class CMSPermGen:  public PermGen {
   friend class VMStructs;
 
-  HeapWord* mem_allocate_work(size_t size);
  protected:
   // The "generation" view.
   ConcurrentMarkSweepGeneration* _gen;
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp	Sun May 04 03:29:31 2008 -0700
@@ -590,6 +590,31 @@
       full_gc_count = Universe::heap()->total_full_collections();
 
       result = perm_gen()->allocate_permanent(size);
+
+      if (result != NULL) {
+        return result;
+      }
+
+      if (GC_locker::is_active_and_needs_gc()) {
+        // If this thread is not in a jni critical section, we stall
+        // the requestor until the critical section has cleared and
+        // GC allowed. When the critical section clears, a GC is
+        // initiated by the last thread exiting the critical section; so
+        // we retry the allocation sequence from the beginning of the loop,
+        // rather than causing more, now probably unnecessary, GC attempts.
+        JavaThread* jthr = JavaThread::current();
+        if (!jthr->in_critical()) {
+          MutexUnlocker mul(Heap_lock);
+          GC_locker::stall_until_clear();
+          continue;
+        } else {
+          if (CheckJNICalls) {
+            fatal("Possible deadlock due to allocating while"
+                  " in jni critical section");
+          }
+          return NULL;
+        }
+      }
     }
 
     if (result == NULL) {
@@ -622,6 +647,12 @@
       if (op.prologue_succeeded()) {
         assert(Universe::heap()->is_in_permanent_or_null(op.result()),
           "result not in heap");
+        // If GC was locked out during VM operation then retry allocation
+        // and/or stall as necessary.
+        if (op.gc_locked()) {
+          assert(op.result() == NULL, "must be NULL if gc_locked() is true");
+          continue;  // retry and/or stall as necessary
+        }
         // If a NULL results is being returned, an out-of-memory
         // will be thrown now.  Clear the gc_time_limit_exceeded
         // flag to avoid the following situation.
--- a/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp	Sun May 04 03:29:31 2008 -0700
@@ -169,8 +169,9 @@
   size_t large_typearray_limit() { return FastAllocateSizeLimit; }
 
   bool supports_inline_contig_alloc() const { return !UseNUMA; }
-  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : NULL; }
-  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : NULL; }
+
+  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : (HeapWord**)-1; }
+  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : (HeapWord**)-1; }
 
   void ensure_parsability(bool retire_tlabs);
   void accumulate_statistics_all_tlabs();
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sun May 04 03:29:31 2008 -0700
@@ -976,7 +976,7 @@
   DEBUG_ONLY(mark_bitmap_count = mark_bitmap_size = 0;)
 
   // Increment the invocation count
-  heap->increment_total_collections();
+  heap->increment_total_collections(true);
 
   // We need to track unique mark sweep invocations as well.
   _total_invocations++;
@@ -1941,7 +1941,7 @@
   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
   assert(ref_processor() != NULL, "Sanity");
 
-  if (GC_locker::is_active()) {
+  if (GC_locker::check_active_before_gc()) {
     return;
   }
 
--- a/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp	Sun May 04 03:29:31 2008 -0700
@@ -69,6 +69,9 @@
 
   GCCauseSetter gccs(heap, _gc_cause);
   _result = heap->failed_permanent_mem_allocate(_size);
+  if (_result == NULL && GC_locker::is_active_and_needs_gc()) {
+    set_gc_locked();
+  }
   notify_gc_end();
 }
 
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp	Sun May 04 03:29:31 2008 -0700
@@ -46,9 +46,11 @@
   for (int i = 0; i < lgrp_spaces()->length(); i++) {
     LGRPSpace *ls = lgrp_spaces()->at(i);
     MutableSpace *s = ls->space();
-    HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
-    if (top < s->end()) {
-      ls->add_invalid_region(MemRegion(top, s->end()));
+    if (!os::numa_has_static_binding()) {
+      HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+      if (top < s->end()) {
+        ls->add_invalid_region(MemRegion(top, s->end()));
+      }
     }
     s->mangle_unused_area();
   }
@@ -70,32 +72,36 @@
                                     area_touched_words);
         }
 #endif
-        MemRegion invalid;
-        HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
-        HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
-                                                     os::vm_page_size());
-        if (crossing_start != crossing_end) {
-          // If object header crossed a small page boundary we mark the area
-          // as invalid rounding it to a page_size().
-          HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
-          HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
-                               s->end());
-          invalid = MemRegion(start, end);
+        if (!os::numa_has_static_binding()) {
+          MemRegion invalid;
+          HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
+          HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
+                                                       os::vm_page_size());
+          if (crossing_start != crossing_end) {
+            // If object header crossed a small page boundary we mark the area
+            // as invalid rounding it to a page_size().
+            HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+            HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
+                                 s->end());
+            invalid = MemRegion(start, end);
+          }
+
+          ls->add_invalid_region(invalid);
         }
-
-        ls->add_invalid_region(invalid);
         s->set_top(s->end());
       }
     } else {
+      if (!os::numa_has_static_binding()) {
 #ifdef ASSERT
-      MemRegion invalid(s->top(), s->end());
-      ls->add_invalid_region(invalid);
-#else
-      if (ZapUnusedHeapArea) {
         MemRegion invalid(s->top(), s->end());
         ls->add_invalid_region(invalid);
-      } else break;
+#else
+        if (ZapUnusedHeapArea) {
+          MemRegion invalid(s->top(), s->end());
+          ls->add_invalid_region(invalid);
+        } else break;
 #endif
+      }
     }
   }
 }
@@ -194,7 +200,7 @@
 }
 
 // Bias region towards the first-touching lgrp. Set the right page sizes.
-void MutableNUMASpace::bias_region(MemRegion mr) {
+void MutableNUMASpace::bias_region(MemRegion mr, int lgrp_id) {
   HeapWord *start = (HeapWord*)round_to((intptr_t)mr.start(), page_size());
   HeapWord *end = (HeapWord*)round_down((intptr_t)mr.end(), page_size());
   if (end > start) {
@@ -202,9 +208,13 @@
     assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
            (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
     assert(region().contains(aligned_region), "Sanity");
+    // First we tell the OS which page size we want in the given range. The underlying
+    // large page can be broken down if we require small pages.
+    os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
+    // Then we uncommit the pages in the range.
     os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
-    os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
-    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size());
+    // And make them local/first-touch biased.
+    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
   }
 }
 
@@ -233,10 +243,12 @@
     initialize(region(), true);
   } else {
     bool should_initialize = false;
-    for (int i = 0; i < lgrp_spaces()->length(); i++) {
-      if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
-        should_initialize = true;
-        break;
+    if (!os::numa_has_static_binding()) {
+      for (int i = 0; i < lgrp_spaces()->length(); i++) {
+        if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
+          should_initialize = true;
+          break;
+        }
       }
     }
 
@@ -472,8 +484,8 @@
       intersection = MemRegion(new_region.start(), new_region.start());
     }
     select_tails(new_region, intersection, &bottom_region, &top_region);
-    bias_region(bottom_region);
-    bias_region(top_region);
+    bias_region(bottom_region, lgrp_spaces()->at(0)->lgrp_id());
+    bias_region(top_region, lgrp_spaces()->at(lgrp_spaces()->length() - 1)->lgrp_id());
   }
 
   // Check if the space layout has changed significantly?
@@ -545,22 +557,37 @@
       intersection = MemRegion(new_region.start(), new_region.start());
     }
 
-    MemRegion invalid_region = ls->invalid_region().intersection(new_region);
-    if (!invalid_region.is_empty()) {
-      merge_regions(new_region, &intersection, &invalid_region);
-      free_region(invalid_region);
+    if (!os::numa_has_static_binding()) {
+      MemRegion invalid_region = ls->invalid_region().intersection(new_region);
+      // Invalid region is a range of memory that could've possibly
+      // been allocated on the other node. That's relevant only on Solaris where
+      // there is no static memory binding.
+      if (!invalid_region.is_empty()) {
+        merge_regions(new_region, &intersection, &invalid_region);
+        free_region(invalid_region);
+        ls->set_invalid_region(MemRegion());
+      }
     }
+
     select_tails(new_region, intersection, &bottom_region, &top_region);
-    free_region(bottom_region);
-    free_region(top_region);
+
+    if (!os::numa_has_static_binding()) {
+      // If that's a system with the first-touch policy then it's enough
+      // to free the pages.
+      free_region(bottom_region);
+      free_region(top_region);
+    } else {
+      // In a system with static binding we have to change the bias whenever
+      // we reshape the heap.
+      bias_region(bottom_region, ls->lgrp_id());
+      bias_region(top_region, ls->lgrp_id());
+    }
 
     // If we clear the region, we would mangle it in debug. That would cause page
     // allocation in a different place. Hence setting the top directly.
     s->initialize(new_region, false);
     s->set_top(s->bottom());
 
-    ls->set_invalid_region(MemRegion());
-
     set_adaptation_cycles(samples_count());
   }
 }
@@ -575,7 +602,7 @@
     HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
 
     if (s->contains(value)) {
-      if (top < value && top < s->end()) {
+      if (!os::numa_has_static_binding() && top < value && top < s->end()) {
         ls->add_invalid_region(MemRegion(top, value));
       }
       s->set_top(value);
@@ -584,10 +611,10 @@
         if (found_top) {
             s->set_top(s->bottom());
         } else {
-            if (top < s->end()) {
-              ls->add_invalid_region(MemRegion(top, s->end()));
-            }
-            s->set_top(s->end());
+          if (!os::numa_has_static_binding() && top < s->end()) {
+            ls->add_invalid_region(MemRegion(top, s->end()));
+          }
+          s->set_top(s->end());
         }
     }
   }
@@ -601,11 +628,23 @@
   }
 }
 
+/*
+   Linux supports static memory binding, therefore the most part of the
+   logic dealing with the possible invalid page allocation is effectively
+   disabled. Besides there is no notion of the home node in Linux. A
+   thread is allowed to migrate freely. Although the scheduler is rather
+   reluctant to move threads between the nodes. We check for the current
+   node every allocation. And with a high probability a thread stays on
+   the same node for some time allowing local access to recently allocated
+   objects.
+ */
+
 HeapWord* MutableNUMASpace::allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
-  if (lgrp_id == -1) {
+  Thread* thr = Thread::current();
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
     lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
   }
 
   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@@ -628,22 +667,22 @@
       MutableSpace::set_top(s->top());
     }
   }
-  // Make the page allocation happen here.
-  if (p != NULL) {
+  // Make the page allocation happen here if there is no static binding..
+  if (p != NULL && !os::numa_has_static_binding()) {
     for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
       *(int*)i = 0;
     }
   }
-
   return p;
 }
 
 // This version is lock-free.
 HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
-  if (lgrp_id == -1) {
+  Thread* thr = Thread::current();
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
     lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
   }
 
   int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@@ -670,8 +709,8 @@
     }
   }
 
-  // Make the page allocation happen here.
-  if (p != NULL) {
+  // Make the page allocation happen here if there is no static binding.
+  if (p != NULL && !os::numa_has_static_binding() ) {
     for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
       *(int*)i = 0;
     }
--- a/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp	Sun May 04 03:29:31 2008 -0700
@@ -139,8 +139,8 @@
   // Check if the NUMA topology has changed. Add and remove spaces if needed.
   // The update can be forced by setting the force parameter equal to true.
   bool update_layout(bool force);
-  // Bias region towards the first-touching lgrp.
-  void bias_region(MemRegion mr);
+  // Bias region towards the lgrp.
+  void bias_region(MemRegion mr, int lgrp_id);
   // Free pages in a given region.
   void free_region(MemRegion mr);
   // Get current chunk size.
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Sun May 04 03:29:31 2008 -0700
@@ -144,3 +144,18 @@
   gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level);
   notify_gc_end();
 }
+
+void VM_GenCollectForPermanentAllocation::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  notify_gc_begin(true);
+  GenCollectedHeap* gch = GenCollectedHeap::heap();
+  GCCauseSetter gccs(gch, _gc_cause);
+  gch->do_full_collection(gch->must_clear_all_soft_refs(),
+                          gch->n_gens() - 1);
+  _res = gch->perm_gen()->allocate(_size, false);
+  assert(gch->is_in_reserved_or_null(_res), "result not in heap");
+  if (_res == NULL && GC_locker::is_active_and_needs_gc()) {
+    set_gc_locked();
+  }
+  notify_gc_end();
+}
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Sun May 04 03:29:31 2008 -0700
@@ -43,6 +43,7 @@
 //     is specified; and also the attach "inspectheap" operation
 //
 //  VM_GenCollectForAllocation
+//  VM_GenCollectForPermanentAllocation
 //  VM_ParallelGCFailedAllocation
 //  VM_ParallelGCFailedPermanentAllocation
 //   - this operation is invoked when allocation is failed;
@@ -166,3 +167,23 @@
   virtual VMOp_Type type() const { return VMOp_GenCollectFull; }
   virtual void doit();
 };
+
+class VM_GenCollectForPermanentAllocation: public VM_GC_Operation {
+ private:
+  HeapWord*   _res;
+  size_t      _size;                       // size of object to be allocated
+ public:
+  VM_GenCollectForPermanentAllocation(size_t size,
+                                      unsigned int gc_count_before,
+                                      unsigned int full_gc_count_before,
+                                      GCCause::Cause gc_cause)
+    : VM_GC_Operation(gc_count_before, full_gc_count_before, true),
+      _size(size) {
+    _res = NULL;
+    _gc_cause = gc_cause;
+  }
+  ~VM_GenCollectForPermanentAllocation()  {}
+  virtual VMOp_Type type() const { return VMOp_GenCollectForPermanentAllocation; }
+  virtual void doit();
+  HeapWord* result() const       { return _res; }
+};
--- a/src/share/vm/includeDB_core	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/includeDB_core	Sun May 04 03:29:31 2008 -0700
@@ -718,6 +718,11 @@
 ciObjArray.cpp                          ciUtilities.hpp
 ciObjArray.cpp                          objArrayOop.hpp
 
+ciObjArray.cpp                          ciObjArray.hpp
+ciObjArray.cpp                          ciNullObject.hpp
+ciObjArray.cpp                          ciUtilities.hpp
+ciObjArray.cpp                          objArrayOop.hpp
+
 ciObjArrayKlass.cpp                     ciInstanceKlass.hpp
 ciObjArrayKlass.cpp                     ciObjArrayKlass.hpp
 ciObjArrayKlass.cpp                     ciObjArrayKlassKlass.hpp
@@ -1662,6 +1667,7 @@
 
 gcLocker.cpp                            gcLocker.inline.hpp
 gcLocker.cpp                            sharedHeap.hpp
+gcLocker.cpp                            resourceArea.hpp
 
 gcLocker.hpp                            collectedHeap.hpp
 gcLocker.hpp                            genCollectedHeap.hpp
@@ -3094,13 +3100,14 @@
 oopMap.cpp                              signature.hpp
 
 oopMap.hpp                              allocation.hpp
+oopMapCache.cpp                         jvmtiRedefineClassesTrace.hpp
 oopMap.hpp                              compressedStream.hpp
 oopMap.hpp                              growableArray.hpp
 oopMap.hpp                              vmreg.hpp
 
 oopMapCache.cpp                         allocation.inline.hpp
+oopMapCache.cpp                         jvmtiRedefineClassesTrace.hpp
 oopMapCache.cpp                         handles.inline.hpp
-oopMapCache.cpp                         jvmtiRedefineClassesTrace.hpp
 oopMapCache.cpp                         oop.inline.hpp
 oopMapCache.cpp                         oopMapCache.hpp
 oopMapCache.cpp                         resourceArea.hpp
@@ -3207,6 +3214,7 @@
 os_<os_family>.cpp                      extendedPC.hpp
 os_<os_family>.cpp                      filemap.hpp
 os_<os_family>.cpp                      globals.hpp
+os_<os_family>.cpp                      growableArray.hpp
 os_<os_family>.cpp                      hpi.hpp
 os_<os_family>.cpp                      icBuffer.hpp
 os_<os_family>.cpp                      interfaceSupport.hpp
@@ -3348,6 +3356,10 @@
 permGen.cpp                             oop.inline.hpp
 permGen.cpp                             permGen.hpp
 permGen.cpp                             universe.hpp
+permGen.cpp                             gcLocker.hpp
+permGen.cpp                             gcLocker.inline.hpp
+permGen.cpp                             vmGCOperations.hpp
+permGen.cpp                             vmThread.hpp
 
 permGen.hpp                             gcCause.hpp
 permGen.hpp                             generation.hpp
--- a/src/share/vm/memory/gcLocker.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/memory/gcLocker.cpp	Sun May 04 03:29:31 2008 -0700
@@ -32,6 +32,12 @@
 
 void GC_locker::stall_until_clear() {
   assert(!JavaThread::current()->in_critical(), "Would deadlock");
+  if (PrintJNIGCStalls && PrintGCDetails) {
+    ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
+    gclog_or_tty->print_cr(
+      "Allocation failed. Thread \"%s\" is stalled by JNI critical section.",
+      JavaThread::current()->name());
+  }
   MutexLocker   ml(JNICritical_lock);
   // Wait for _needs_gc  to be cleared
   while (GC_locker::needs_gc()) {
--- a/src/share/vm/memory/genCollectedHeap.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Sun May 04 03:29:31 2008 -0700
@@ -35,6 +35,7 @@
   friend class CMSCollector;
   friend class GenMarkSweep;
   friend class VM_GenCollectForAllocation;
+  friend class VM_GenCollectForPermanentAllocation;
   friend class VM_GenCollectFull;
   friend class VM_GenCollectFullConcurrent;
   friend class VM_GC_HeapInspection;
--- a/src/share/vm/memory/permGen.cpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/memory/permGen.cpp	Sun May 04 03:29:31 2008 -0700
@@ -25,6 +25,70 @@
 #include "incls/_precompiled.incl"
 #include "incls/_permGen.cpp.incl"
 
+HeapWord* PermGen::mem_allocate_in_gen(size_t size, Generation* gen) {
+  MutexLocker ml(Heap_lock);
+  GCCause::Cause next_cause = GCCause::_permanent_generation_full;
+  GCCause::Cause prev_cause = GCCause::_no_gc;
+
+  for (;;) {
+    HeapWord* obj = gen->allocate(size, false);
+    if (obj != NULL) {
+      return obj;
+    }
+    if (gen->capacity() < _capacity_expansion_limit ||
+        prev_cause != GCCause::_no_gc) {
+      obj = gen->expand_and_allocate(size, false);
+    }
+    if (obj == NULL && prev_cause != GCCause::_last_ditch_collection) {
+      if (GC_locker::is_active_and_needs_gc()) {
+        // If this thread is not in a jni critical section, we stall
+        // the requestor until the critical section has cleared and
+        // GC allowed. When the critical section clears, a GC is
+        // initiated by the last thread exiting the critical section; so
+        // we retry the allocation sequence from the beginning of the loop,
+        // rather than causing more, now probably unnecessary, GC attempts.
+        JavaThread* jthr = JavaThread::current();
+        if (!jthr->in_critical()) {
+          MutexUnlocker mul(Heap_lock);
+          // Wait for JNI critical section to be exited
+          GC_locker::stall_until_clear();
+          continue;
+        } else {
+          if (CheckJNICalls) {
+            fatal("Possible deadlock due to allocating while"
+                  " in jni critical section");
+          }
+          return NULL;
+        }
+      }
+
+      // Read the GC count while holding the Heap_lock
+      unsigned int gc_count_before      = SharedHeap::heap()->total_collections();
+      unsigned int full_gc_count_before = SharedHeap::heap()->total_full_collections();
+      {
+        MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+        VM_GenCollectForPermanentAllocation op(size, gc_count_before, full_gc_count_before,
+                                               next_cause);
+        VMThread::execute(&op);
+        if (!op.prologue_succeeded() || op.gc_locked()) {
+          assert(op.result() == NULL, "must be NULL if gc_locked() is true");
+          continue;  // retry and/or stall as necessary
+        }
+        obj = op.result();
+        assert(obj == NULL || SharedHeap::heap()->is_in_reserved(obj),
+               "result not in heap");
+        if (obj != NULL) {
+          return obj;
+        }
+      }
+      prev_cause = next_cause;
+      next_cause = GCCause::_last_ditch_collection;
+    } else {
+      return obj;
+    }
+  }
+}
+
 CompactingPermGen::CompactingPermGen(ReservedSpace rs,
                                      ReservedSpace shared_rs,
                                      size_t initial_byte_size,
@@ -44,40 +108,7 @@
 }
 
 HeapWord* CompactingPermGen::mem_allocate(size_t size) {
-  MutexLocker ml(Heap_lock);
-  HeapWord* obj = _gen->allocate(size, false);
-  bool tried_collection = false;
-  bool tried_expansion = false;
-  while (obj == NULL) {
-    if (_gen->capacity() >= _capacity_expansion_limit || tried_expansion) {
-      // Expansion limit reached, try collection before expanding further
-      // For now we force a full collection, this could be changed
-      SharedHeap::heap()->collect_locked(GCCause::_permanent_generation_full);
-      obj = _gen->allocate(size, false);
-      tried_collection = true;
-      tried_expansion =  false;    // ... following the collection:
-                                   // the collection may have shrunk the space.
-    }
-    if (obj == NULL && !tried_expansion) {
-      obj = _gen->expand_and_allocate(size, false);
-      tried_expansion = true;
-    }
-    if (obj == NULL && tried_collection && tried_expansion) {
-      // We have not been able to allocate despite a collection and
-      // an attempted space expansion. We now make a last-ditch collection
-      // attempt that will try to reclaim as much space as possible (for
-      // example by aggressively clearing all soft refs).
-      SharedHeap::heap()->collect_locked(GCCause::_last_ditch_collection);
-      obj = _gen->allocate(size, false);
-      if (obj == NULL) {
-        // An expansion attempt is necessary since the previous
-        // collection may have shrunk the space.
-        obj = _gen->expand_and_allocate(size, false);
-      }
-      break;
-    }
-  }
-  return obj;
+  return mem_allocate_in_gen(size, _gen);
 }
 
 void CompactingPermGen::compute_new_size() {
--- a/src/share/vm/memory/permGen.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/memory/permGen.hpp	Sun May 04 03:29:31 2008 -0700
@@ -38,6 +38,8 @@
   size_t _capacity_expansion_limit;  // maximum expansion allowed without a
                                      // full gc occuring
 
+  HeapWord* mem_allocate_in_gen(size_t size, Generation* gen);
+
  public:
   enum Name {
     MarkSweepCompact, MarkSweep, ConcurrentMarkSweep
--- a/src/share/vm/runtime/globals.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/runtime/globals.hpp	Sun May 04 03:29:31 2008 -0700
@@ -1928,6 +1928,10 @@
   develop(bool, IgnoreLibthreadGPFault, false,                              \
           "Suppress workaround for libthread GP fault")                     \
                                                                             \
+  product(bool, PrintJNIGCStalls, false,                                    \
+          "Print diagnostic message when GC is stalled"                     \
+          "by JNI critical section")                                        \
+                                                                            \
   /* JVMTI heap profiling */                                                \
                                                                             \
   diagnostic(bool, TraceJVMTIObjectTagging, false,                          \
--- a/src/share/vm/runtime/os.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/runtime/os.hpp	Sun May 04 03:29:31 2008 -0700
@@ -33,6 +33,7 @@
 class Event;
 class DLL;
 class FileHandle;
+template<class E> class GrowableArray;
 
 // %%%%% Moved ThreadState, START_FN, OSThread to new osThread.hpp. -- Rose
 
@@ -206,7 +207,9 @@
   static void   realign_memory(char *addr, size_t bytes, size_t alignment_hint);
 
   // NUMA-specific interface
-  static void   numa_make_local(char *addr, size_t bytes);
+  static bool   numa_has_static_binding();
+  static bool   numa_has_group_homing();
+  static void   numa_make_local(char *addr, size_t bytes, int lgrp_hint);
   static void   numa_make_global(char *addr, size_t bytes);
   static size_t numa_get_groups_num();
   static size_t numa_get_leaf_groups(int *ids, size_t size);
--- a/src/share/vm/runtime/vm_operations.hpp	Fri May 02 08:22:11 2008 -0700
+++ b/src/share/vm/runtime/vm_operations.hpp	Sun May 04 03:29:31 2008 -0700
@@ -49,6 +49,7 @@
   template(GenCollectFull)                        \
   template(GenCollectFullConcurrent)              \
   template(GenCollectForAllocation)               \
+  template(GenCollectForPermanentAllocation)      \
   template(ParallelGCFailedAllocation)            \
   template(ParallelGCFailedPermanentAllocation)   \
   template(ParallelGCSystemGC)                    \