changeset 59305:0d33d0db0c96

8235551: BitMap::count_one_bits should use population_count Reviewed-by: kbarrett, neliasso Contributed-by: kim.barrett@oracle.com, claes.redestad@oracle.com
author redestad
date Wed, 11 Dec 2019 16:24:10 +0100
parents 5d477a977ce5
children 184d94d22d72
files src/hotspot/share/opto/regmask.cpp src/hotspot/share/utilities/bitMap.cpp src/hotspot/share/utilities/bitMap.hpp src/hotspot/share/utilities/population_count.hpp test/hotspot/gtest/utilities/test_population_count.cpp
diffstat 5 files changed, 70 insertions(+), 70 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/share/opto/regmask.cpp	Wed Dec 11 15:16:21 2019 +0000
+++ b/src/hotspot/share/opto/regmask.cpp	Wed Dec 11 16:24:10 2019 +0100
@@ -309,7 +309,7 @@
   uint sum = 0;
   assert(valid_watermarks(), "sanity");
   for (int i = _lwm; i <= _hwm; i++) {
-    sum += population_count(_A[i]);
+    sum += population_count((unsigned)_A[i]);
   }
   return sum;
 }
--- a/src/hotspot/share/utilities/bitMap.cpp	Wed Dec 11 15:16:21 2019 +0000
+++ b/src/hotspot/share/utilities/bitMap.cpp	Wed Dec 11 16:24:10 2019 +0100
@@ -29,6 +29,7 @@
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/debug.hpp"
+#include "utilities/population_count.hpp"
 
 STATIC_ASSERT(sizeof(BitMap::bm_word_t) == BytesPerWord); // "Implementation assumption."
 
@@ -646,50 +647,11 @@
   return true;
 }
 
-const BitMap::idx_t* BitMap::_pop_count_table = NULL;
-
-void BitMap::init_pop_count_table() {
-  if (_pop_count_table == NULL) {
-    BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256, mtInternal);
-    for (uint i = 0; i < 256; i++) {
-      table[i] = num_set_bits(i);
-    }
-
-    if (!Atomic::replace_if_null(&_pop_count_table, table)) {
-      guarantee(_pop_count_table != NULL, "invariant");
-      FREE_C_HEAP_ARRAY(idx_t, table);
-    }
-  }
-}
-
-BitMap::idx_t BitMap::num_set_bits(bm_word_t w) {
-  idx_t bits = 0;
-
-  while (w != 0) {
-    while ((w & 1) == 0) {
-      w >>= 1;
-    }
-    bits++;
-    w >>= 1;
-  }
-  return bits;
-}
-
-BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) {
-  assert(_pop_count_table != NULL, "precondition");
-  return _pop_count_table[c];
-}
-
 BitMap::idx_t BitMap::count_one_bits() const {
-  init_pop_count_table(); // If necessary.
   idx_t sum = 0;
-  typedef unsigned char uchar;
   for (idx_t i = 0; i < size_in_words(); i++) {
     bm_word_t w = map()[i];
-    for (size_t j = 0; j < sizeof(bm_word_t); j++) {
-      sum += num_set_bits_from_table(uchar(w & 255));
-      w >>= 8;
-    }
+    sum += population_count(w);
   }
   return sum;
 }
--- a/src/hotspot/share/utilities/bitMap.hpp	Wed Dec 11 15:16:21 2019 +0000
+++ b/src/hotspot/share/utilities/bitMap.hpp	Wed Dec 11 16:24:10 2019 +0100
@@ -169,12 +169,6 @@
   // Verify [beg,end) is a valid range, e.g. beg <= end <= size().
   void verify_range(idx_t beg, idx_t end) const NOT_DEBUG_RETURN;
 
-  // Statistics.
-  static const idx_t* _pop_count_table;
-  static void init_pop_count_table();
-  static idx_t num_set_bits(bm_word_t w);
-  static idx_t num_set_bits_from_table(unsigned char c);
-
   // Allocation Helpers.
 
   // Allocates and clears the bitmap memory.
--- a/src/hotspot/share/utilities/population_count.hpp	Wed Dec 11 15:16:21 2019 +0000
+++ b/src/hotspot/share/utilities/population_count.hpp	Wed Dec 11 16:24:10 2019 +0100
@@ -25,23 +25,47 @@
 #ifndef SHARE_UTILITIES_POPULATION_COUNT_HPP
 #define SHARE_UTILITIES_POPULATION_COUNT_HPP
 
+#include "metaprogramming/conditional.hpp"
+#include "metaprogramming/enableIf.hpp"
+#include "metaprogramming/isIntegral.hpp"
+#include "metaprogramming/isSigned.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
 
 // Returns the population count of x, i.e., the number of bits set in x.
 //
-// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2.
+// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2 and the text that
+// follows.
 //
 // Ideally this should be dispatched per platform to use optimized
 // instructions when available, such as POPCNT on modern x86/AMD. Our builds
 // still target and support older architectures that might lack support for
-// these, however. For example, with current build configurations,
-// __builtin_popcount(x) would generate a call to a similar but slower 64-bit
-// version of this 32-bit implementation.
-static uint32_t population_count(uint32_t x) {
-  x -= ((x >> 1) & 0x55555555);
-  x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
-  return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
+// these. For example, with current build configurations, __builtin_popcount(x)
+// generate a call to a similar but slower 64-bit version when calling with
+// a 32-bit integer type.
+template <typename T>
+inline unsigned population_count(T x) {
+  STATIC_ASSERT(BitsPerWord <= 128);
+  STATIC_ASSERT(BitsPerByte == 8);
+  STATIC_ASSERT(IsIntegral<T>::value);
+  STATIC_ASSERT(!IsSigned<T>::value);
+  // We need to take care with implicit integer promotion when dealing with
+  // integers < 32-bit. We chose to do this by explicitly widening constants
+  // to unsigned
+  typedef typename Conditional<(sizeof(T) < sizeof(unsigned)), unsigned, T>::type P;
+  const T all = ~T(0);           // 0xFF..FF
+  const P fives = all/3;         // 0x55..55
+  const P threes = (all/15) * 3; // 0x33..33
+  const P z_ones = all/255;      // 0x0101..01
+  const P z_effs = z_ones * 15;  // 0x0F0F..0F
+  P r = x;
+  r -= ((r >> 1) & fives);
+  r = (r & threes) + ((r >> 2) & threes);
+  r = ((r + (r >> 4)) & z_effs) * z_ones;
+  // The preceeding multiply by z_ones is the only place where the intermediate
+  // calculations can exceed the range of T. We need to discard any such excess
+  // before the right-shift, hence the conversion back to T.
+  return static_cast<T>(r) >> (((sizeof(T) - 1) * BitsPerByte));
 }
 
 #endif // SHARE_UTILITIES_POPULATION_COUNT_HPP
--- a/test/hotspot/gtest/utilities/test_population_count.cpp	Wed Dec 11 15:16:21 2019 +0000
+++ b/test/hotspot/gtest/utilities/test_population_count.cpp	Wed Dec 11 16:24:10 2019 +0100
@@ -25,12 +25,13 @@
 #include "precompiled.hpp"
 #include "runtime/os.hpp"
 #include "utilities/population_count.hpp"
+#include "utilities/powerOfTwo.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "unittest.hpp"
 
 #define BITS_IN_BYTE_ARRAY_SIZE 256
 
-uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
+const uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
         0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
         1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -49,30 +50,49 @@
         4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 };
 
-TEST(population_count, sparse) {
+template <typename T>
+void sparse() {
+  const T max_val = max_value<T>();
+
   // Step through the entire input range from a random starting point,
   // verify population_count return values against the lookup table
   // approach used historically
-  uint32_t step = 4711;
-  for (uint32_t value = os::random() % step; value < UINT_MAX - step; value += step) {
-    uint32_t lookup = test_popcnt_bitsInByte[(value >> 24) & 0xff] +
-                      test_popcnt_bitsInByte[(value >> 16) & 0xff] +
-                      test_popcnt_bitsInByte[(value >> 8)  & 0xff] +
-                      test_popcnt_bitsInByte[ value        & 0xff];
+  T step = T(1) << ((sizeof(T) * 8) - 7);
 
+  for (T value = os::random() % step; value < max_val - step; value += step) {
+    uint64_t v = (uint64_t)value;
+    unsigned lookup = 0u;
+    for (unsigned i = 0u; i < sizeof(T); i++) {
+      lookup += test_popcnt_bitsInByte[v & 0xff];
+      v >>= 8u;
+    }
     EXPECT_EQ(lookup, population_count(value))
         << "value = " << value;
   }
 
   // Test a few edge cases
-  EXPECT_EQ(0u, population_count(0u))
+  EXPECT_EQ(0u, population_count(T(0u)))
       << "value = " << 0;
-  EXPECT_EQ(1u, population_count(1u))
+  EXPECT_EQ(1u, population_count(T(1u)))
       << "value = " << 1;
-  EXPECT_EQ(1u, population_count(2u))
+  EXPECT_EQ(1u, population_count(T(2u)))
       << "value = " << 2;
-  EXPECT_EQ(32u, population_count(UINT_MAX))
-      << "value = " << UINT_MAX;
-  EXPECT_EQ(31u, population_count(UINT_MAX - 1))
-      << "value = " << (UINT_MAX - 1);
+  EXPECT_EQ(T(sizeof(T) * BitsPerByte), population_count(max_val))
+      << "value = " << max_val;
+  EXPECT_EQ(T(sizeof(T) * BitsPerByte - 1u), population_count(T(max_val - 1u)))
+      << "value = " << (max_val - 1u);
 }
+
+
+TEST(population_count, sparse8) {
+  sparse<uint8_t>();
+}
+TEST(population_count, sparse16) {
+  sparse<uint16_t>();
+}
+TEST(population_count, sparse32) {
+  sparse<uint32_t>();
+}
+TEST(population_count, sparse64) {
+  sparse<uint64_t>();
+}
\ No newline at end of file