changeset 1825:76efbe666d6c

6964774: Adjust optimization flags setting Summary: Adjust performance flags settings. Reviewed-by: never, phh
author kvn
date Tue, 29 Jun 2010 10:34:00 -0700
parents 6027dddc26c6
children fcbb92a1ab3b
files src/cpu/x86/vm/vm_version_x86.cpp src/cpu/x86/vm/vm_version_x86.hpp src/share/vm/runtime/arguments.cpp
diffstat 3 files changed, 106 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Mon Jun 28 14:54:39 2010 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Tue Jun 29 10:34:00 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
 VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
 
 static BufferBlob* stub_blob;
-static const int stub_size = 300;
+static const int stub_size = 400;
 
 extern "C" {
   typedef void (*getPsrInfo_stub_t)(void*);
@@ -56,7 +56,7 @@
     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 
-    Label detect_486, cpu486, detect_586, std_cpuid1;
+    Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
     Label ext_cpuid1, ext_cpuid5, done;
 
     StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
@@ -131,13 +131,62 @@
     __ movl(Address(rsi, 8), rcx);
     __ movl(Address(rsi,12), rdx);
 
-    __ cmpl(rax, 3);     // Is cpuid(0x4) supported?
-    __ jccb(Assembler::belowEqual, std_cpuid1);
+    __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
+    __ jccb(Assembler::belowEqual, std_cpuid4);
+
+    //
+    // cpuid(0xB) Processor Topology
+    //
+    __ movl(rax, 0xb);
+    __ xorl(rcx, rcx);   // Threads level
+    __ cpuid();
+
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    __ movl(rax, 0xb);
+    __ movl(rcx, 1);     // Cores level
+    __ cpuid();
+    __ push(rax);
+    __ andl(rax, 0x1f);  // Determine if valid topology level
+    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
+    __ andl(rax, 0xffff);
+    __ pop(rax);
+    __ jccb(Assembler::equal, std_cpuid4);
+
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
+    __ movl(rax, 0xb);
+    __ movl(rcx, 2);     // Packages level
+    __ cpuid();
+    __ push(rax);
+    __ andl(rax, 0x1f);  // Determine if valid topology level
+    __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
+    __ andl(rax, 0xffff);
+    __ pop(rax);
+    __ jccb(Assembler::equal, std_cpuid4);
+
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
 
     //
     // cpuid(0x4) Deterministic cache params
     //
+    __ bind(std_cpuid4);
     __ movl(rax, 4);
+    __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
+    __ jccb(Assembler::greater, std_cpuid1);
+
     __ xorl(rcx, rcx);   // L1 cache
     __ cpuid();
     __ push(rax);
@@ -460,13 +509,18 @@
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  if( AllocatePrefetchStyle == 2 && is_intel() &&
-      cpu_family() == 6 && supports_sse3() ) { // watermark prefetching on Core
+  if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
+    if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
 #ifdef _LP64
-    AllocatePrefetchDistance = 384;
+      AllocatePrefetchDistance = 384;
 #else
-    AllocatePrefetchDistance = 320;
+      AllocatePrefetchDistance = 320;
 #endif
+    }
+    if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
+      AllocatePrefetchDistance = 192;
+      AllocatePrefetchLines = 4;
+    }
   }
   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
 
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Mon Jun 28 14:54:39 2010 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Tue Jun 29 10:34:00 2010 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -114,6 +114,14 @@
     } bits;
   };
 
+  union TplCpuidBEbx {
+    uint32_t value;
+    struct {
+      uint32_t logical_cpus : 16,
+                            : 16;
+    } bits;
+  };
+
   union ExtCpuid1Ecx {
     uint32_t value;
     struct {
@@ -211,6 +219,25 @@
     uint32_t     dcp_cpuid4_ecx; // unused currently
     uint32_t     dcp_cpuid4_edx; // unused currently
 
+    // cpuid function 0xB (processor topology)
+    // ecx = 0
+    uint32_t     tpl_cpuidB0_eax;
+    TplCpuidBEbx tpl_cpuidB0_ebx;
+    uint32_t     tpl_cpuidB0_ecx; // unused currently
+    uint32_t     tpl_cpuidB0_edx; // unused currently
+
+    // ecx = 1
+    uint32_t     tpl_cpuidB1_eax;
+    TplCpuidBEbx tpl_cpuidB1_ebx;
+    uint32_t     tpl_cpuidB1_ecx; // unused currently
+    uint32_t     tpl_cpuidB1_edx; // unused currently
+
+    // ecx = 2
+    uint32_t     tpl_cpuidB2_eax;
+    TplCpuidBEbx tpl_cpuidB2_ebx;
+    uint32_t     tpl_cpuidB2_ecx; // unused currently
+    uint32_t     tpl_cpuidB2_edx; // unused currently
+
     // cpuid function 0x80000000 // example, unused
     uint32_t ext_max_function;
     uint32_t ext_vendor_name_0;
@@ -316,6 +343,9 @@
   static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
   static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+  static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
+  static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
+  static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
 
   // Initialization
   static void initialize();
@@ -349,7 +379,12 @@
   static uint cores_per_cpu()  {
     uint result = 1;
     if (is_intel()) {
-      result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
+      if (_cpuid_info.std_max_function >= 0xB) {
+        result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
+                 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+      } else {
+        result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
+      }
     } else if (is_amd()) {
       result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
     }
@@ -358,7 +393,9 @@
 
   static uint threads_per_core()  {
     uint result = 1;
-    if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
+    if (is_intel() && _cpuid_info.std_max_function >= 0xB) {
+      result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+    } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
                cores_per_cpu();
     }
--- a/src/share/vm/runtime/arguments.cpp	Mon Jun 28 14:54:39 2010 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Tue Jun 29 10:34:00 2010 -0700
@@ -1513,6 +1513,9 @@
   if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) {
     FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500);
   }
+  if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) {
+    FLAG_SET_DEFAULT(OptimizeStringConcat, true);
+  }
 #endif
 
   if (AggressiveOpts) {