changeset 57309:0edc7fd0d7a3

8234331: Add robust and optimized utility for rounding up to next power of two Reviewed-by: eosterlund, stuefe, jrose
author redestad
date Fri, 06 Dec 2019 18:05:36 +0100
parents 31f9903f0838
children 5f73126dc390
files src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp src/hotspot/share/gc/z/zForwarding.cpp src/hotspot/share/gc/z/zHeuristics.cpp src/hotspot/share/gc/z/zMark.cpp src/hotspot/share/gc/z/zUtils.inline.hpp src/hotspot/share/libadt/dict.cpp src/hotspot/share/libadt/vectset.cpp src/hotspot/share/opto/block.cpp src/hotspot/share/opto/loopnode.cpp src/hotspot/share/opto/node.cpp src/hotspot/share/opto/phaseX.cpp src/hotspot/share/runtime/threadSMR.cpp src/hotspot/share/utilities/count_leading_zeros.hpp src/hotspot/share/utilities/growableArray.hpp src/hotspot/share/utilities/powerOfTwo.hpp test/hotspot/gtest/gc/z/test_zUtils.cpp test/hotspot/gtest/utilities/test_count_leading_zeros.cpp test/hotspot/gtest/utilities/test_powerOfTwo.cpp
diffstat 19 files changed, 552 insertions(+), 174 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -23,9 +23,9 @@
 
 #include "precompiled.hpp"
 #include "gc/z/zGlobals.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "runtime/globals.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 //
 // The heap can have three different layouts, depending on the max heap size.
@@ -142,7 +142,7 @@
 size_t ZPlatformAddressOffsetBits() {
   const size_t min_address_offset_bits = 42; // 4TB
   const size_t max_address_offset_bits = 44; // 16TB
-  const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
+  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
   const size_t address_offset_bits = log2_intptr(address_offset);
   return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
 }
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -23,9 +23,9 @@
 
 #include "precompiled.hpp"
 #include "gc/z/zGlobals.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "runtime/globals.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 //
 // The heap can have three different layouts, depending on the max heap size.
@@ -142,7 +142,7 @@
 size_t ZPlatformAddressOffsetBits() {
   const size_t min_address_offset_bits = 42; // 4TB
   const size_t max_address_offset_bits = 44; // 16TB
-  const size_t address_offset = ZUtils::round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
+  const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
   const size_t address_offset_bits = log2_intptr(address_offset);
   return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
 }
--- a/src/hotspot/share/gc/z/zForwarding.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/gc/z/zForwarding.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -24,9 +24,9 @@
 #include "precompiled.hpp"
 #include "gc/z/zForwarding.inline.hpp"
 #include "gc/z/zPage.inline.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "memory/allocation.hpp"
 #include "utilities/debug.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 ZForwarding* ZForwarding::create(ZPage* page) {
   // Allocate table for linear probing. The size of the table must be
@@ -34,7 +34,7 @@
   // The table is sized to have a load factor of 50%, i.e. sized to have
   // double the number of entries actually inserted.
   assert(page->live_objects() > 0, "Invalid value");
-  const size_t nentries = ZUtils::round_up_power_of_2(page->live_objects() * 2);
+  const size_t nentries = round_up_power_of_2(page->live_objects() * 2);
   return ::new (AttachedArray::alloc(nentries)) ZForwarding(page, nentries);
 }
 
--- a/src/hotspot/share/gc/z/zHeuristics.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/gc/z/zHeuristics.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -25,10 +25,10 @@
 #include "gc/z/zCPU.inline.hpp"
 #include "gc/z/zGlobals.hpp"
 #include "gc/z/zHeuristics.hpp"
-#include "gc/z/zUtils.inline.hpp"
 #include "logging/log.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/os.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 void ZHeuristics::set_medium_page_size() {
   // Set ZPageSizeMedium so that a medium page occupies at most 3.125% of the
@@ -39,7 +39,7 @@
   const size_t max = ZGranuleSize * 16;
   const size_t unclamped = MaxHeapSize * 0.03125;
   const size_t clamped = MIN2(MAX2(min, unclamped), max);
-  const size_t size = ZUtils::round_down_power_of_2(clamped);
+  const size_t size = round_down_power_of_2(clamped);
 
   if (size > ZPageSizeSmall) {
     // Enable medium pages
--- a/src/hotspot/share/gc/z/zMark.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/gc/z/zMark.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -48,6 +48,7 @@
 #include "runtime/thread.hpp"
 #include "utilities/align.hpp"
 #include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
 #include "utilities/ticks.hpp"
 
 static const ZStatSubPhase ZSubPhaseConcurrentMark("Concurrent Mark");
@@ -79,7 +80,7 @@
   // Calculate the number of stripes from the number of workers we use,
   // where the number of stripes must be a power of two and we want to
   // have at least one worker per stripe.
-  const size_t nstripes = ZUtils::round_down_power_of_2(nworkers);
+  const size_t nstripes = round_down_power_of_2(nworkers);
   return MIN2(nstripes, ZMarkStripesMax);
 }
 
--- a/src/hotspot/share/gc/z/zUtils.inline.hpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/gc/z/zUtils.inline.hpp	Fri Dec 06 18:05:36 2019 +0100
@@ -32,21 +32,6 @@
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
 
-inline size_t ZUtils::round_up_power_of_2(size_t value) {
-  assert(value != 0, "Invalid value");
-
-  if (is_power_of_2(value)) {
-    return value;
-  }
-
-  return (size_t)1 << (log2_intptr(value) + 1);
-}
-
-inline size_t ZUtils::round_down_power_of_2(size_t value) {
-  assert(value != 0, "Invalid value");
-  return (size_t)1 << log2_intptr(value);
-}
-
 inline size_t ZUtils::bytes_to_words(size_t size_in_bytes) {
   assert(is_aligned(size_in_bytes, BytesPerWord), "Size not word aligned");
   return size_in_bytes >> LogBytesPerWord;
--- a/src/hotspot/share/libadt/dict.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/libadt/dict.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "libadt/dict.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 // Dictionaries - An Abstract Data Type
 
@@ -86,8 +87,7 @@
     initflag = 1;               // Never again
   }
 
-  i=16;
-  while( i < size ) i <<= 1;
+  i = MAX2(16, round_up_power_of_2(size));
   _size = i;                    // Size is a power of 2
   _cnt = 0;                     // Dictionary is empty
   _bin = (bucket*)_arena->Amalloc_4(sizeof(bucket)*_size);
--- a/src/hotspot/share/libadt/vectset.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/libadt/vectset.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -27,6 +27,7 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/arena.hpp"
 #include "utilities/count_leading_zeros.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 VectorSet::VectorSet(Arena *arena) : _size(2),
     _data(NEW_ARENA_ARRAY(arena, uint32_t, 2)),
@@ -38,8 +39,8 @@
 // Expand the existing set to a bigger size
 void VectorSet::grow(uint new_size) {
   new_size = (new_size + bit_mask) >> word_bits;
-  assert(new_size != 0 && new_size < (1U << 31), "");
-  uint x = (1U << 31) >> (count_leading_zeros(new_size) - 1);
+  assert(new_size > 0, "sanity");
+  uint x = next_power_of_2(new_size);
   _data = REALLOC_ARENA_ARRAY(_set_arena, uint32_t, _data, _size, x);
   Copy::zero_to_bytes(_data + _size, (x - _size) * sizeof(uint32_t));
   _size = x;
--- a/src/hotspot/share/opto/block.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/opto/block.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -36,6 +36,7 @@
 #include "opto/opcodes.hpp"
 #include "opto/rootnode.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 void Block_Array::grow( uint i ) {
   assert(i >= Max(), "must be an overflow");
@@ -47,7 +48,7 @@
     _blocks[0] = NULL;
   }
   uint old = _size;
-  while( i >= _size ) _size <<= 1;      // Double to fit
+  _size = next_power_of_2(i);
   _blocks = (Block**)_arena->Arealloc( _blocks, old*sizeof(Block*),_size*sizeof(Block*));
   Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
 }
--- a/src/hotspot/share/opto/loopnode.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/opto/loopnode.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -3391,10 +3391,7 @@
 void PhaseIdealLoop::set_idom(Node* d, Node* n, uint dom_depth) {
   uint idx = d->_idx;
   if (idx >= _idom_size) {
-    uint newsize = _idom_size<<1;
-    while( idx >= newsize ) {
-      newsize <<= 1;
-    }
+    uint newsize = next_power_of_2(idx);
     _idom      = REALLOC_RESOURCE_ARRAY( Node*,     _idom,_idom_size,newsize);
     _dom_depth = REALLOC_RESOURCE_ARRAY( uint, _dom_depth,_idom_size,newsize);
     memset( _dom_depth + _idom_size, 0, (newsize - _idom_size) * sizeof(uint) );
--- a/src/hotspot/share/opto/node.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/opto/node.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -41,6 +41,7 @@
 #include "opto/type.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/macros.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 class RegMask;
 // #include "phase.hpp"
@@ -653,7 +654,7 @@
     to[3] = NULL;
     return;
   }
-  while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+  new_max = next_power_of_2(len);
   // Trimming to limit allows a uint8 to handle up to 255 edges.
   // Previously I was using only powers-of-2 which peaked at 128 edges.
   //if( new_max >= limit ) new_max = limit-1;
@@ -676,7 +677,7 @@
     _out = (Node **)arena->Amalloc(4*sizeof(Node*));
     return;
   }
-  while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+  new_max = next_power_of_2(len);
   // Trimming to limit allows a uint8 to handle up to 255 edges.
   // Previously I was using only powers-of-2 which peaked at 128 edges.
   //if( new_max >= limit ) new_max = limit-1;
@@ -2256,7 +2257,7 @@
     _nodes[0] = NULL;
   }
   uint old = _max;
-  while( i >= _max ) _max <<= 1;        // Double to fit
+  _max = next_power_of_2(i);
   _nodes = (Node**)_a->Arealloc( _nodes, old*sizeof(Node*),_max*sizeof(Node*));
   Copy::zero_to_bytes( &_nodes[old], (_max-old)*sizeof(Node*) );
 }
--- a/src/hotspot/share/opto/phaseX.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/opto/phaseX.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,6 +39,7 @@
 #include "opto/regalloc.hpp"
 #include "opto/rootnode.hpp"
 #include "utilities/macros.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 //=============================================================================
 #define NODE_HASH_MINIMUM_SIZE    255
@@ -260,12 +261,9 @@
 
 //------------------------------round_up---------------------------------------
 // Round up to nearest power of 2
-uint NodeHash::round_up( uint x ) {
-  x += (x>>2);                  // Add 25% slop
-  if( x <16 ) return 16;        // Small stuff
-  uint i=16;
-  while( i < x ) i <<= 1;       // Double to fit
-  return i;                     // Return hash table size
+uint NodeHash::round_up(uint x) {
+  x += (x >> 2);                  // Add 25% slop
+  return MAX2(16U, round_up_power_of_2(x));
 }
 
 //------------------------------grow-------------------------------------------
@@ -2138,7 +2136,7 @@
     _types[0] = NULL;
   }
   uint old = _max;
-  while( i >= _max ) _max <<= 1;        // Double to fit
+  _max = next_power_of_2(i);
   _types = (const Type**)_a->Arealloc( _types, old*sizeof(Type*),_max*sizeof(Type*));
   memset( &_types[old], 0, (_max-old)*sizeof(Type*) );
 }
--- a/src/hotspot/share/runtime/threadSMR.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/runtime/threadSMR.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -37,6 +37,7 @@
 #include "utilities/copy.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
+#include "utilities/powerOfTwo.hpp"
 #include "utilities/resourceHash.hpp"
 #include "utilities/vmError.hpp"
 
@@ -809,13 +810,7 @@
 
   // Hash table size should be first power of two higher than twice the length of the ThreadsList
   int hash_table_size = MIN2((int)get_java_thread_list()->length(), 32) << 1;
-  hash_table_size--;
-  hash_table_size |= hash_table_size >> 1;
-  hash_table_size |= hash_table_size >> 2;
-  hash_table_size |= hash_table_size >> 4;
-  hash_table_size |= hash_table_size >> 8;
-  hash_table_size |= hash_table_size >> 16;
-  hash_table_size++;
+  hash_table_size = round_up_power_of_2(hash_table_size);
 
   // Gather a hash table of the current hazard ptrs:
   ThreadScanHashtable *scan_table = new ThreadScanHashtable(hash_table_size);
@@ -872,13 +867,7 @@
   // Hash table size should be first power of two higher than twice
   // the length of the Threads list.
   int hash_table_size = MIN2((int)get_java_thread_list()->length(), 32) << 1;
-  hash_table_size--;
-  hash_table_size |= hash_table_size >> 1;
-  hash_table_size |= hash_table_size >> 2;
-  hash_table_size |= hash_table_size >> 4;
-  hash_table_size |= hash_table_size >> 8;
-  hash_table_size |= hash_table_size >> 16;
-  hash_table_size++;
+  hash_table_size = round_up_power_of_2(hash_table_size);
 
   // Gather a hash table of the JavaThreads indirectly referenced by
   // hazard ptrs.
--- a/src/hotspot/share/utilities/count_leading_zeros.hpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/utilities/count_leading_zeros.hpp	Fri Dec 06 18:05:36 2019 +0100
@@ -27,45 +27,190 @@
 
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
-#include "utilities/count_trailing_zeros.hpp"
 
-#if defined(TARGET_COMPILER_visCPP)
+// uint32_t count_leading_zeros(T x)
+
+// Return the number of leading zeros in x, e.g. the zero-based index
+// of the most significant set bit in x.  Undefined for 0.
+
+// We implement and support variants for 8, 16, 32 and 64 bit integral types.
+template <typename T, size_t n> struct CountLeadingZerosImpl;
+
+template <typename T> unsigned count_leading_zeros(T v) {
+  assert(v != 0, "precondition");
+  return CountLeadingZerosImpl<T, sizeof(T)>::doit(v);
+}
+
+/*****************************************************************************
+ * GCC and compatible (including Clang)
+ *****************************************************************************/
+#if defined(TARGET_COMPILER_gcc)
+
+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    return __builtin_clz((uint32_t)v & 0xFF) - 24u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    return __builtin_clz((uint32_t)v & 0xFFFF) - 16u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    return __builtin_clz(v);
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    return __builtin_clzll(v);
+  }
+};
+
+/*****************************************************************************
+ * Microsoft Visual Studio
+ *****************************************************************************/
+#elif defined(TARGET_COMPILER_visCPP)
+
 #include <intrin.h>
 #pragma intrinsic(_BitScanReverse)
-#elif defined(TARGET_COMPILER_xlc)
-#include <builtins.h>
+
+#ifdef _LP64
+#pragma intrinsic(_BitScanReverse64)
 #endif
 
-// uint32_t count_leading_zeros(uint32_t x)
-// Return the number of leading zeros in x, e.g. the zero-based index
-// of the most significant set bit in x.  Undefined for 0.
-inline uint32_t count_leading_zeros(uint32_t x) {
+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    unsigned long index;
+    _BitScanReverse(&index, (uint32_t)v & 0xFF);
+    return 7u - index;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    unsigned long index;
+    _BitScanReverse(&index, (uint32_t)v & 0xFFFF);
+    return 15u - index;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    unsigned long index;
+    _BitScanReverse(&index, v);
+    return 31u - index;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    unsigned long index;
+#ifdef _LP64
+    _BitScanReverse64(&index, v);
+    return 63u - index;
+#else
+    uint64_t high = ((uint64_t)v) >> 32ULL;
+    if (high != 0) {
+      return count_leading_zeros((uint32_t)high);
+    } else {
+      return count_leading_zeros((uint32_t)v) + 32;
+    }
+#endif
+  }
+};
+
+/*****************************************************************************
+ * IBM XL C/C++
+ *****************************************************************************/
+#elif defined(TARGET_COMPILER_xlc)
+
+#include <builtins.h>
+
+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    return __cntlz4((uint32_t)v & 0xFF) - 24u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    return __cntlz4((uint32_t)v & 0xFFFF) - 16u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    return __cntlz4(v);
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    return __cntlz8(v);
+  }
+};
+
+/*****************************************************************************
+ * Fallback
+ *****************************************************************************/
+#else
+
+inline uint32_t count_leading_zeros_32(uint32_t x) {
   assert(x != 0, "precondition");
-#if defined(TARGET_COMPILER_gcc)
-  return __builtin_clz(x);
-#elif defined(TARGET_COMPILER_visCPP)
-  unsigned long index;
-  _BitScanReverse(&index, x);
-  return index ^ 31u;
-#elif defined(TARGET_COMPILER_xlc)
-  return __cntlz4(x);
-#else
+
   // Efficient and portable fallback implementation:
   // http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
   // - with positions xor'd by 31 to get number of leading zeros
   // rather than position of highest bit.
-  static const int MultiplyDeBruijnBitPosition[32] = {
-      31, 22, 30, 21, 18, 10, 29,  2, 20, 17, 15, 13, 9,  6, 28, 1,
-      23, 19, 11,  3, 16, 14,  7, 24, 12,  4,  8, 25, 5, 26, 27, 0
+  static const uint32_t MultiplyDeBruijnBitPosition[32] = {
+      31, 22, 30, 21, 18, 10, 29,  2, 20, 17, 15, 13, 9,  6, 28,  1,
+      23, 19, 11,  3, 16, 14,  7, 24, 12,  4,  8, 25, 5, 26, 27,  0
   };
 
-  x |= x >> 1; // first round down to one less than a power of 2
+  // First round down to one less than a power of 2
+  x |= x >> 1;
   x |= x >> 2;
   x |= x >> 4;
   x |= x >> 8;
   x |= x >> 16;
-  return MultiplyDeBruijnBitPosition[(uint32_t)( x * 0x07c4acddu ) >> 27];
-#endif
+  // Multiply by a magic constant which ensure the highest 5 bits point to
+  // the right index in the lookup table
+  return MultiplyDeBruijnBitPosition[(x * 0x07c4acddu) >> 27u];
 }
 
+template <typename T> struct CountLeadingZerosImpl<T, 1> {
+  static unsigned doit(T v) {
+    return count_leading_zeros_32((uint32_t)v & 0xFF) - 24u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 2> {
+  static unsigned doit(T v) {
+    return count_leading_zeros_32((uint32_t)v & 0xFFFF) - 16u;
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 4> {
+  static unsigned doit(T v) {
+    return count_leading_zeros_32(v);
+  }
+};
+
+template <typename T> struct CountLeadingZerosImpl<T, 8> {
+  static unsigned doit(T v) {
+    uint64_t high = ((uint64_t)v) >> 32ULL;
+    if (high != 0) {
+      return count_leading_zeros_32((uint32_t)high);
+    } else {
+      return count_leading_zeros_32((uint32_t)v) + 32u;
+    }
+  }
+};
+
+#endif
+
 #endif // SHARE_UTILITIES_COUNT_LEADING_ZEROS_HPP
--- a/src/hotspot/share/utilities/growableArray.hpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/src/hotspot/share/utilities/growableArray.hpp	Fri Dec 06 18:05:36 2019 +0100
@@ -30,6 +30,7 @@
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/ostream.hpp"
+#include "utilities/powerOfTwo.hpp"
 
 // A growable array.
 
@@ -476,10 +477,9 @@
 // Global GrowableArray methods (one instance in the library per each 'E' type).
 
 template<class E> void GrowableArray<E>::grow(int j) {
-    // grow the array by doubling its size (amortized growth)
     int old_max = _max;
-    if (_max == 0) _max = 1; // prevent endless loop
-    while (j >= _max) _max = _max*2;
+    // grow the array by increasing _max to the first power of two larger than the size we need
+    _max = next_power_of_2((uint32_t)j);
     // j < _max
     E* newData = (E*)raw_allocate(sizeof(E));
     int i = 0;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hotspot/share/utilities/powerOfTwo.hpp	Fri Dec 06 18:05:36 2019 +0100
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_UTILITIES_POWEROFTWO_HPP
+#define SHARE_UTILITIES_POWEROFTWO_HPP
+
+#include "metaprogramming/enableIf.hpp"
+#include "metaprogramming/isIntegral.hpp"
+#include "metaprogramming/isSigned.hpp"
+#include "utilities/count_leading_zeros.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// Power of two convenience library.
+
+// Round down to the closest power of two greater to or equal to the given
+// value.
+
+// Signed version: 0 is an invalid input, negative values are invalid
+template <typename T>
+inline typename EnableIf<IsSigned<T>::value, T>::type round_down_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  assert(value > 0, "Invalid value");
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  return T(1) << (sizeof(T) * BitsPerByte - 1 - lz);
+}
+
+// Unsigned version: 0 is an invalid input
+template <typename T>
+inline typename EnableIf<!IsSigned<T>::value, T>::type round_down_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  assert(value != 0, "Invalid value");
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  return T(1) << (sizeof(T) * BitsPerByte - 1 - lz);
+}
+
+// Round up to the closest power of two greater to or equal to
+// the given value.
+
+// Signed version: 0 is an invalid input, negative values are invalid,
+// overflows with assert if value is larger than 2^30 or 2^62 for 32- and
+// 64-bit integers, respectively
+template <typename T>
+inline typename EnableIf<IsSigned<T>::value, T>::type round_up_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  STATIC_ASSERT(IsSigned<T>::value);
+  assert(value > 0, "Invalid value");
+  if (is_power_of_2(value)) {
+    return value;
+  }
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  assert(lz > 1, "Will overflow");
+  return T(1) << (sizeof(T) * BitsPerByte - lz);
+}
+
+// Unsigned version: 0 is an invalid input, overflows with assert if value
+// is larger than 2^31 or 2^63 for 32- and 64-bit integers, respectively
+template <typename T>
+inline typename EnableIf<!IsSigned<T>::value, T>::type round_up_power_of_2(T value) {
+  STATIC_ASSERT(IsIntegral<T>::value);
+  STATIC_ASSERT(!IsSigned<T>::value);
+  assert(value != 0, "Invalid value");
+  if (is_power_of_2(value)) {
+    return value;
+  }
+  uint32_t lz = count_leading_zeros(value);
+  assert(lz < sizeof(T) * BitsPerByte, "Sanity");
+  assert(lz > 0, "Will overflow");
+  return T(1) << (sizeof(T) * BitsPerByte - lz);
+}
+
+// Helper function to get the maximum positive value. Implemented here
+// since using std::numeric_limits<T>::max() seems problematic on some
+// platforms.
+
+template <typename T> T max_value() {
+  if (IsSigned<T>::value) {
+    // Highest positive power of two expressible in the type
+    uint64_t val = static_cast<T>(1) << (sizeof(T) * BitsPerByte - 2);
+    // Fill lower bits with ones
+    val |= val >> 1;
+    val |= val >> 2;
+    val |= val >> 4;
+    if (sizeof(T) >= 2)  val |= val >> 8;
+    if (sizeof(T) >= 4)  val |= val >> 16;
+    if (sizeof(T) == 8)  val |= val >> 32;
+    return (T)val;
+  } else {
+    return ~(static_cast<T>(0));
+  }
+}
+
+// Calculate the next power of two greater than the given value.
+
+// Accepts 0 (returns 1), overflows with assert if value is larger than
+// or equal to 2^31 (signed: 2^30) or 2^63 (signed: 2^62), for 32-
+// and 64-bit integers, respectively
+template <typename T>
+inline T next_power_of_2(T value)  {
+  assert(value != max_value<T>(), "Overflow");
+  return round_up_power_of_2(value + 1);
+}
+
+#endif // SHARE_UTILITIES_POWEROFTWO_HPP
--- a/test/hotspot/gtest/gc/z/test_zUtils.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-#include "precompiled.hpp"
-#include "gc/z/zUtils.inline.hpp"
-#include "unittest.hpp"
-
-#include <limits>
-
-template <typename T>
-static T max_alignment() {
-  T max = std::numeric_limits<T>::max();
-  return max ^ (max >> 1);
-}
-
-TEST(ZUtilsTest, round_up_power_of_2) {
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1u), 1u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(2u), 2u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(3u), 4u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(4u), 4u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(5u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(6u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(7u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(8u), 8u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(9u), 16u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(10u), 16u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1023u), 1024u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1024u), 1024u);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(1025u), 2048u);
-
-  const size_t max = max_alignment<size_t>();
-  EXPECT_EQ(ZUtils::round_up_power_of_2(max - 1), max);
-  EXPECT_EQ(ZUtils::round_up_power_of_2(max), max);
-}
-
-TEST(ZUtilsTest, round_down_power_of_2) {
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1u), 1u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(2u), 2u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(3u), 2u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(4u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(5u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(6u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(7u), 4u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(8u), 8u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(9u), 8u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(10u), 8u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1023u), 512u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1024u), 1024u);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(1025u), 1024u);
-
-  const size_t max = max_alignment<size_t>();
-  EXPECT_EQ(ZUtils::round_down_power_of_2(max), max);
-  EXPECT_EQ(ZUtils::round_down_power_of_2(max - 1), max / 2);
-}
--- a/test/hotspot/gtest/utilities/test_count_leading_zeros.cpp	Fri Dec 06 09:14:38 2019 -0800
+++ b/test/hotspot/gtest/utilities/test_count_leading_zeros.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -23,35 +23,78 @@
  */
 
 #include "precompiled.hpp"
+#include "metaprogramming/isSigned.hpp"
 #include "utilities/count_leading_zeros.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "unittest.hpp"
 
-TEST(count_leading_zeros, one_or_two_set_bits) {
-  unsigned i = 0;                  // Position of a set bit.
-  for (uint32_t ix = 1; ix != 0; ix <<= 1, ++i) {
-    unsigned j = 0;                // Position of a set bit.
-    for (uint32_t jx = 1; jx != 0; jx <<= 1, ++j) {
-      uint32_t value = ix | jx;
-      EXPECT_EQ(31u - MAX2(i, j), count_leading_zeros(value))
+#include <limits>
+
+template <typename T> void one_or_two_set_bits() {
+  uint32_t bit1_pos = 0;
+  uint32_t bits = sizeof(T) * BitsPerByte;
+  uint32_t limit = bits - (IsSigned<T>::value ? 1 : 0);
+  for (uint64_t ix = 1; bit1_pos < limit; ix = ix * 2, ++bit1_pos) {
+    uint32_t bit2_pos = 0;
+    for (uint64_t jx = 1; bit2_pos < limit; jx = jx * 2, ++bit2_pos) {
+      T value = T(ix | jx);
+      EXPECT_EQ((uint32_t)(bits - 1u - MAX2(bit1_pos, bit2_pos)), count_leading_zeros(value))
         << "value = " << value;
     }
   }
 }
 
-TEST(count_leading_zeros, high_zeros_low_ones) {
-  unsigned i = 0;                  // Number of leading zeros
-  uint32_t value = ~(uint32_t)0;
-  for ( ; value != 0; value >>= 1, ++i) {
-    EXPECT_EQ(i, count_leading_zeros(value))
+TEST(count_leading_zeros, one_or_two_set_bits) {
+  one_or_two_set_bits<int8_t>();
+  one_or_two_set_bits<int16_t>();
+  one_or_two_set_bits<int32_t>();
+  one_or_two_set_bits<int64_t>();
+  one_or_two_set_bits<uint8_t>();
+  one_or_two_set_bits<uint16_t>();
+  one_or_two_set_bits<uint32_t>();
+  one_or_two_set_bits<uint64_t>();
+}
+
+template <typename T> void high_zeros_low_ones() {
+  uint32_t number_of_leading_zeros = (IsSigned<T>::value ? 1 : 0);
+  T value = std::numeric_limits<T>::max();
+  for ( ; value != 0; value >>= 1, ++number_of_leading_zeros) {
+    EXPECT_EQ(number_of_leading_zeros, count_leading_zeros(value))
       << "value = " << value;
   }
 }
 
-TEST(count_leading_zeros, high_ones_low_zeros) {
-  uint32_t value = ~(uint32_t)0;
-  for ( ; value != 0; value <<= 1) {
-    EXPECT_EQ(0u, count_leading_zeros(value))
+TEST(count_leading_zeros, high_zeros_low_ones) {
+  high_zeros_low_ones<int8_t>();
+  high_zeros_low_ones<int16_t>();
+  high_zeros_low_ones<int32_t>();
+  high_zeros_low_ones<int64_t>();
+  high_zeros_low_ones<uint8_t>();
+  high_zeros_low_ones<uint16_t>();
+  high_zeros_low_ones<uint32_t>();
+  high_zeros_low_ones<uint64_t>();
+}
+
+template <typename T> void high_ones_low_zeros() {
+  T value = std::numeric_limits<T>::max();
+
+  uint32_t number_of_leading_zeros = (IsSigned<T>::value ? 1 : 0);
+  for (uint64_t i = 1; value != 0; value -= i, i <<= 1) {
+    EXPECT_EQ(number_of_leading_zeros, count_leading_zeros(value))
       << "value = " << value;
   }
+  value = (T)(~((uint64_t)0)); // all ones
+  EXPECT_EQ(0u, count_leading_zeros(value))
+    << "value = " << value;
 }
+
+TEST(count_leading_zeros, high_ones_low_zeros) {
+  high_ones_low_zeros<int8_t>();
+  high_ones_low_zeros<int16_t>();
+  high_ones_low_zeros<int32_t>();
+  high_ones_low_zeros<int64_t>();
+  high_ones_low_zeros<uint8_t>();
+  high_ones_low_zeros<uint16_t>();
+  high_ones_low_zeros<uint32_t>();
+  high_ones_low_zeros<uint64_t>();
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/gtest/utilities/test_powerOfTwo.cpp	Fri Dec 06 18:05:36 2019 +0100
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICUAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/powerOfTwo.hpp"
+#include "unittest.hpp"
+
+template <typename T> T max_pow2() {
+  T max_val = max_value<T>();
+  return max_val - (max_val >> 1);
+}
+
+template <typename T> void round_up_power_of_2() {
+  EXPECT_EQ(round_up_power_of_2(T(1)), T(1)) << "value = " << T(1);
+  EXPECT_EQ(round_up_power_of_2(T(2)), T(2)) << "value = " << T(2);
+  EXPECT_EQ(round_up_power_of_2(T(3)), T(4)) << "value = " << T(3);
+  EXPECT_EQ(round_up_power_of_2(T(4)), T(4)) << "value = " << T(4);
+  EXPECT_EQ(round_up_power_of_2(T(5)), T(8)) << "value = " << T(5);
+  EXPECT_EQ(round_up_power_of_2(T(6)), T(8)) << "value = " << T(6);
+  EXPECT_EQ(round_up_power_of_2(T(7)), T(8)) << "value = " << T(7);
+  EXPECT_EQ(round_up_power_of_2(T(8)), T(8)) << "value = " << T(8);
+  EXPECT_EQ(round_up_power_of_2(T(9)), T(16)) << "value = " << T(9);
+  EXPECT_EQ(round_up_power_of_2(T(10)), T(16)) << "value = " << T(10);
+
+  T t_max_pow2 = max_pow2<T>();
+
+  // round_up(any power of two) should return input
+  for (T pow2 = T(1); pow2 < t_max_pow2; pow2 *= 2) {
+    EXPECT_EQ(pow2, round_up_power_of_2(pow2))
+      << "value = " << pow2;
+  }
+  EXPECT_EQ(round_up_power_of_2(t_max_pow2), t_max_pow2)
+    << "value = " << (t_max_pow2);
+
+  // For each pow2 gt 2, round_up(pow2 - 1) should return pow2
+  for (T pow2 = T(4); pow2 < t_max_pow2; pow2 *= 2) {
+    EXPECT_EQ(pow2, round_up_power_of_2(pow2 - 1))
+      << "value = " << pow2;
+  }
+  EXPECT_EQ(round_up_power_of_2(t_max_pow2 - 1), t_max_pow2)
+    << "value = " << (t_max_pow2 - 1);
+
+}
+
+TEST(power_of_2, round_up_power_of_2) {
+  round_up_power_of_2<int8_t>();
+  round_up_power_of_2<int16_t>();
+  round_up_power_of_2<int32_t>();
+  round_up_power_of_2<int64_t>();
+  round_up_power_of_2<uint8_t>();
+  round_up_power_of_2<uint16_t>();
+  round_up_power_of_2<uint32_t>();
+  round_up_power_of_2<uint64_t>();
+}
+
+template <typename T> void round_down_power_of_2() {
+  EXPECT_EQ(round_down_power_of_2(T(1)), T(1)) << "value = " << T(1);
+  EXPECT_EQ(round_down_power_of_2(T(2)), T(2)) << "value = " << T(2);
+  EXPECT_EQ(round_down_power_of_2(T(3)), T(2)) << "value = " << T(3);
+  EXPECT_EQ(round_down_power_of_2(T(4)), T(4)) << "value = " << T(4);
+  EXPECT_EQ(round_down_power_of_2(T(5)), T(4)) << "value = " << T(5);
+  EXPECT_EQ(round_down_power_of_2(T(6)), T(4)) << "value = " << T(6);
+  EXPECT_EQ(round_down_power_of_2(T(7)), T(4)) << "value = " << T(7);
+  EXPECT_EQ(round_down_power_of_2(T(8)), T(8)) << "value = " << T(8);
+  EXPECT_EQ(round_down_power_of_2(T(9)), T(8)) << "value = " << T(9);
+  EXPECT_EQ(round_down_power_of_2(T(10)), T(8)) << "value = " << T(10);
+
+  T t_max_pow2 = max_pow2<T>();
+
+  // For each pow2 >= 2:
+  // - round_down(pow2) should return pow2
+  // - round_down(pow2 + 1) should return pow2
+  // - round_down(pow2 - 1) should return pow2 / 2
+  for (T pow2 = T(2); pow2 < t_max_pow2; pow2 = pow2 * 2) {
+    EXPECT_EQ(pow2, round_down_power_of_2(pow2))
+      << "value = " << pow2;
+    EXPECT_EQ(pow2, round_down_power_of_2(pow2 + 1))
+      << "value = " << pow2;
+    EXPECT_EQ(pow2 / 2, round_down_power_of_2(pow2 - 1))
+      << "value = " << (pow2 / 2);
+  }
+  EXPECT_EQ(round_down_power_of_2(t_max_pow2), t_max_pow2)
+    << "value = " << (t_max_pow2);
+  EXPECT_EQ(round_down_power_of_2(t_max_pow2 + 1), t_max_pow2)
+    << "value = " << (t_max_pow2 + 1);
+  EXPECT_EQ(round_down_power_of_2(t_max_pow2 - 1), t_max_pow2 / 2)
+    << "value = " << (t_max_pow2 - 1);
+}
+
+TEST(power_of_2, round_down_power_of_2) {
+  round_down_power_of_2<int8_t>();
+  round_down_power_of_2<int16_t>();
+  round_down_power_of_2<int32_t>();
+  round_down_power_of_2<int64_t>();
+  round_down_power_of_2<uint8_t>();
+  round_down_power_of_2<uint16_t>();
+  round_down_power_of_2<uint32_t>();
+  round_down_power_of_2<uint64_t>();
+}
+
+template <typename T> void next_power_of_2() {
+  EXPECT_EQ(next_power_of_2(T(0)), T(1)) << "value = " << T(0);
+  EXPECT_EQ(next_power_of_2(T(1)), T(2)) << "value = " << T(1);
+  EXPECT_EQ(next_power_of_2(T(2)), T(4)) << "value = " << T(2);
+  EXPECT_EQ(next_power_of_2(T(3)), T(4)) << "value = " << T(3);
+  EXPECT_EQ(next_power_of_2(T(4)), T(8)) << "value = " << T(4);
+  EXPECT_EQ(next_power_of_2(T(5)), T(8)) << "value = " << T(5);
+  EXPECT_EQ(next_power_of_2(T(6)), T(8)) << "value = " << T(6);
+  EXPECT_EQ(next_power_of_2(T(7)), T(8)) << "value = " << T(7);
+  EXPECT_EQ(next_power_of_2(T(8)), T(16)) << "value = " << T(8);
+  EXPECT_EQ(next_power_of_2(T(9)), T(16)) << "value = " << T(9);
+  EXPECT_EQ(next_power_of_2(T(10)), T(16)) << "value = " << T(10);
+
+  T t_max_pow2 = max_pow2<T>();
+
+  // next(pow2 - 1) should return pow2
+  for (T pow2 = T(1); pow2 < t_max_pow2; pow2 = pow2 * 2) {
+    EXPECT_EQ(pow2, next_power_of_2(pow2 - 1))
+      << "value = " << pow2 - 1;
+  }
+  EXPECT_EQ(next_power_of_2(t_max_pow2 - 1), t_max_pow2)
+    << "value = " << (t_max_pow2 - 1);
+
+  // next(pow2) should return pow2 * 2
+  for (T pow2 = T(1); pow2 < t_max_pow2 / 2; pow2 = pow2 * 2) {
+    EXPECT_EQ(pow2 * 2, next_power_of_2(pow2))
+      << "value = " << pow2;
+  }
+}
+
+TEST(power_of_2, next_power_of_2) {
+  next_power_of_2<int8_t>();
+  next_power_of_2<int16_t>();
+  next_power_of_2<int32_t>();
+  next_power_of_2<int64_t>();
+  next_power_of_2<uint8_t>();
+  next_power_of_2<uint16_t>();
+  next_power_of_2<uint32_t>();
+  next_power_of_2<uint64_t>();
+}
\ No newline at end of file