annotate hotspot/src/share/vm/utilities/hashtable.cpp @ 13087:673ea6efaf18

7158800: Improve storage of symbol tables Summary: Use an alternate version of hashing algorithm for symbol string tables and after a certain bucket size to improve performance Reviewed-by: pbk, kamg, dlong, kvn, fparain
author coleenp
date Wed, 13 Jun 2012 19:52:59 -0400
parents 91935236600e
children c146b608d91f
rev   line source
duke@1 1 /*
coleenp@13087 2 * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
duke@1 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@1 4 *
duke@1 5 * This code is free software; you can redistribute it and/or modify it
duke@1 6 * under the terms of the GNU General Public License version 2 only, as
duke@1 7 * published by the Free Software Foundation.
duke@1 8 *
duke@1 9 * This code is distributed in the hope that it will be useful, but WITHOUT
duke@1 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@1 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@1 12 * version 2 for more details (a copy is included in the LICENSE file that
duke@1 13 * accompanied this code).
duke@1 14 *
duke@1 15 * You should have received a copy of the GNU General Public License version
duke@1 16 * 2 along with this work; if not, write to the Free Software Foundation,
duke@1 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@1 18 *
trims@5547 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
trims@5547 20 * or visit www.oracle.com if you need additional information or have any
trims@5547 21 * questions.
duke@1 22 *
duke@1 23 */
duke@1 24
stefank@7397 25 #include "precompiled.hpp"
stefank@7397 26 #include "memory/allocation.inline.hpp"
stefank@7397 27 #include "memory/resourceArea.hpp"
stefank@7397 28 #include "oops/oop.inline.hpp"
stefank@7397 29 #include "runtime/safepoint.hpp"
stefank@7397 30 #include "utilities/dtrace.hpp"
stefank@7397 31 #include "utilities/hashtable.hpp"
stefank@7397 32 #include "utilities/hashtable.inline.hpp"
duke@1 33
coleenp@8076 34
dcubed@10739 35 #ifndef USDT2
duke@1 36 HS_DTRACE_PROBE_DECL4(hs_private, hashtable__new_entry,
coleenp@8076 37 void*, unsigned int, void*, void*);
dcubed@10739 38 #endif /* !USDT2 */
duke@1 39
duke@1 40 // This is a generic hashtable, designed to be used for the symbol
duke@1 41 // and string tables.
duke@1 42 //
duke@1 43 // It is implemented as an open hash table with a fixed number of buckets.
duke@1 44 //
duke@1 45 // %note:
duke@1 46 // - HashtableEntrys are allocated in blocks to reduce the space overhead.
duke@1 47
duke@1 48 BasicHashtableEntry* BasicHashtable::new_entry(unsigned int hashValue) {
duke@1 49 BasicHashtableEntry* entry;
duke@1 50
duke@1 51 if (_free_list) {
duke@1 52 entry = _free_list;
duke@1 53 _free_list = _free_list->next();
duke@1 54 } else {
jrose@1551 55 if (_first_free_entry + _entry_size >= _end_block) {
jrose@1551 56 int block_size = MIN2(512, MAX2((int)_table_size / 2, (int)_number_of_entries));
duke@1 57 int len = _entry_size * block_size;
jrose@1551 58 len = 1 << log2_intptr(len); // round down to power of 2
jrose@1551 59 assert(len >= _entry_size, "");
duke@1 60 _first_free_entry = NEW_C_HEAP_ARRAY(char, len);
duke@1 61 _end_block = _first_free_entry + len;
duke@1 62 }
duke@1 63 entry = (BasicHashtableEntry*)_first_free_entry;
duke@1 64 _first_free_entry += _entry_size;
duke@1 65 }
duke@1 66
jrose@1551 67 assert(_entry_size % HeapWordSize == 0, "");
duke@1 68 entry->set_hash(hashValue);
duke@1 69 return entry;
duke@1 70 }
duke@1 71
duke@1 72
coleenp@8076 73 template <class T> HashtableEntry<T>* Hashtable<T>::new_entry(unsigned int hashValue, T obj) {
coleenp@8076 74 HashtableEntry<T>* entry;
duke@1 75
coleenp@8076 76 entry = (HashtableEntry<T>*)BasicHashtable::new_entry(hashValue);
coleenp@8076 77 entry->set_literal(obj);
dcubed@10739 78 #ifndef USDT2
duke@1 79 HS_DTRACE_PROBE4(hs_private, hashtable__new_entry,
duke@1 80 this, hashValue, obj, entry);
dcubed@10739 81 #else /* USDT2 */
dcubed@10739 82 HS_PRIVATE_HASHTABLE_NEW_ENTRY(
dcubed@10739 83 this, hashValue, (uintptr_t) obj, entry);
dcubed@10739 84 #endif /* USDT2 */
duke@1 85 return entry;
duke@1 86 }
duke@1 87
duke@1 88
coleenp@13087 89 // Check to see if the hashtable is unbalanced. The caller set a flag to
coleenp@13087 90 // rehash at the next safepoint. If this bucket is 60 times greater than the
coleenp@13087 91 // expected average bucket length, it's an unbalanced hashtable.
coleenp@13087 92 // This is somewhat an arbitrary heuristic but if one bucket gets to
coleenp@13087 93 // rehash_count which is currently 100, there's probably something wrong.
coleenp@13087 94
coleenp@13087 95 bool BasicHashtable::check_rehash_table(int count) {
coleenp@13087 96 assert(table_size() != 0, "underflow");
coleenp@13087 97 if (count > (((double)number_of_entries()/(double)table_size())*rehash_multiple)) {
coleenp@13087 98 // Set a flag for the next safepoint, which should be at some guaranteed
coleenp@13087 99 // safepoint interval.
coleenp@13087 100 return true;
coleenp@13087 101 }
coleenp@13087 102 return false;
coleenp@13087 103 }
coleenp@13087 104
coleenp@13087 105 // Create a new table and using alternate hash code, populate the new table
coleenp@13087 106 // with the existing elements. This can be used to change the hash code
coleenp@13087 107 // and could in the future change the size of the table.
coleenp@13087 108
coleenp@13087 109 template <class T> void Hashtable<T>::move_to(Hashtable<T>* new_table) {
coleenp@13087 110 int saved_entry_count = number_of_entries();
coleenp@13087 111
coleenp@13087 112 // Iterate through the table and create a new entry for the new table
coleenp@13087 113 for (int i = 0; i < new_table->table_size(); ++i) {
coleenp@13087 114 for (HashtableEntry<T>* p = bucket(i); p != NULL; ) {
coleenp@13087 115 HashtableEntry<T>* next = p->next();
coleenp@13087 116 T string = p->literal();
coleenp@13087 117 // Use alternate hashing algorithm on the symbol in the first table
coleenp@13087 118 unsigned int hashValue = new_hash(string);
coleenp@13087 119 // Get a new index relative to the new table (can also change size)
coleenp@13087 120 int index = new_table->hash_to_index(hashValue);
coleenp@13087 121 p->set_hash(hashValue);
coleenp@13087 122 unlink_entry(p);
coleenp@13087 123 new_table->add_entry(index, p);
coleenp@13087 124 p = next;
coleenp@13087 125 }
coleenp@13087 126 }
coleenp@13087 127 // give the new table the free list as well
coleenp@13087 128 new_table->copy_freelist(this);
coleenp@13087 129 assert(new_table->number_of_entries() == saved_entry_count, "lost entry on dictionary copy?");
coleenp@13087 130
coleenp@13087 131 // Destroy memory used by the buckets in the hashtable. The memory
coleenp@13087 132 // for the elements has been used in a new table and is not
coleenp@13087 133 // destroyed. The memory reuse will benefit resizing the SystemDictionary
coleenp@13087 134 // to avoid a memory allocation spike at safepoint.
coleenp@13087 135 free_buckets();
coleenp@13087 136 }
coleenp@13087 137
duke@1 138 // Reverse the order of elements in the hash buckets.
duke@1 139
duke@1 140 void BasicHashtable::reverse() {
duke@1 141
duke@1 142 for (int i = 0; i < _table_size; ++i) {
duke@1 143 BasicHashtableEntry* new_list = NULL;
duke@1 144 BasicHashtableEntry* p = bucket(i);
duke@1 145 while (p != NULL) {
duke@1 146 BasicHashtableEntry* next = p->next();
duke@1 147 p->set_next(new_list);
duke@1 148 new_list = p;
duke@1 149 p = next;
duke@1 150 }
duke@1 151 *bucket_addr(i) = new_list;
duke@1 152 }
duke@1 153 }
duke@1 154
duke@1 155
duke@1 156 // Copy the table to the shared space.
duke@1 157
duke@1 158 void BasicHashtable::copy_table(char** top, char* end) {
duke@1 159
duke@1 160 // Dump the hash table entries.
duke@1 161
duke@1 162 intptr_t *plen = (intptr_t*)(*top);
duke@1 163 *top += sizeof(*plen);
duke@1 164
duke@1 165 int i;
duke@1 166 for (i = 0; i < _table_size; ++i) {
duke@1 167 for (BasicHashtableEntry** p = _buckets[i].entry_addr();
duke@1 168 *p != NULL;
duke@1 169 p = (*p)->next_addr()) {
duke@1 170 if (*top + entry_size() > end) {
coleenp@8076 171 report_out_of_shared_space(SharedMiscData);
duke@1 172 }
duke@1 173 *p = (BasicHashtableEntry*)memcpy(*top, *p, entry_size());
duke@1 174 *top += entry_size();
duke@1 175 }
duke@1 176 }
duke@1 177 *plen = (char*)(*top) - (char*)plen - sizeof(*plen);
duke@1 178
duke@1 179 // Set the shared bit.
duke@1 180
duke@1 181 for (i = 0; i < _table_size; ++i) {
duke@1 182 for (BasicHashtableEntry* p = bucket(i); p != NULL; p = p->next()) {
duke@1 183 p->set_shared();
duke@1 184 }
duke@1 185 }
duke@1 186 }
duke@1 187
duke@1 188
duke@1 189
duke@1 190 // Reverse the order of elements in the hash buckets.
duke@1 191
coleenp@8076 192 template <class T> void Hashtable<T>::reverse(void* boundary) {
duke@1 193
duke@1 194 for (int i = 0; i < table_size(); ++i) {
coleenp@8076 195 HashtableEntry<T>* high_list = NULL;
coleenp@8076 196 HashtableEntry<T>* low_list = NULL;
coleenp@8076 197 HashtableEntry<T>* last_low_entry = NULL;
coleenp@8076 198 HashtableEntry<T>* p = bucket(i);
duke@1 199 while (p != NULL) {
coleenp@8076 200 HashtableEntry<T>* next = p->next();
duke@1 201 if ((void*)p->literal() >= boundary) {
duke@1 202 p->set_next(high_list);
duke@1 203 high_list = p;
duke@1 204 } else {
duke@1 205 p->set_next(low_list);
duke@1 206 low_list = p;
duke@1 207 if (last_low_entry == NULL) {
duke@1 208 last_low_entry = p;
duke@1 209 }
duke@1 210 }
duke@1 211 p = next;
duke@1 212 }
duke@1 213 if (low_list != NULL) {
duke@1 214 *bucket_addr(i) = low_list;
duke@1 215 last_low_entry->set_next(high_list);
duke@1 216 } else {
duke@1 217 *bucket_addr(i) = high_list;
duke@1 218 }
duke@1 219 }
duke@1 220 }
duke@1 221
duke@1 222
duke@1 223 // Dump the hash table buckets.
duke@1 224
duke@1 225 void BasicHashtable::copy_buckets(char** top, char* end) {
duke@1 226 intptr_t len = _table_size * sizeof(HashtableBucket);
duke@1 227 *(intptr_t*)(*top) = len;
duke@1 228 *top += sizeof(intptr_t);
duke@1 229
duke@1 230 *(intptr_t*)(*top) = _number_of_entries;
duke@1 231 *top += sizeof(intptr_t);
duke@1 232
duke@1 233 if (*top + len > end) {
coleenp@8076 234 report_out_of_shared_space(SharedMiscData);
duke@1 235 }
duke@1 236 _buckets = (HashtableBucket*)memcpy(*top, _buckets, len);
duke@1 237 *top += len;
duke@1 238 }
duke@1 239
duke@1 240
duke@1 241 #ifndef PRODUCT
duke@1 242
coleenp@8076 243 template <class T> void Hashtable<T>::print() {
duke@1 244 ResourceMark rm;
duke@1 245
duke@1 246 for (int i = 0; i < table_size(); i++) {
coleenp@8076 247 HashtableEntry<T>* entry = bucket(i);
duke@1 248 while(entry != NULL) {
duke@1 249 tty->print("%d : ", i);
duke@1 250 entry->literal()->print();
duke@1 251 tty->cr();
duke@1 252 entry = entry->next();
duke@1 253 }
duke@1 254 }
duke@1 255 }
duke@1 256
duke@1 257
duke@1 258 void BasicHashtable::verify() {
duke@1 259 int count = 0;
duke@1 260 for (int i = 0; i < table_size(); i++) {
duke@1 261 for (BasicHashtableEntry* p = bucket(i); p != NULL; p = p->next()) {
duke@1 262 ++count;
duke@1 263 }
duke@1 264 }
duke@1 265 assert(count == number_of_entries(), "number of hashtable entries incorrect");
duke@1 266 }
duke@1 267
duke@1 268
duke@1 269 #endif // PRODUCT
duke@1 270
duke@1 271
duke@1 272 #ifdef ASSERT
duke@1 273
duke@1 274 void BasicHashtable::verify_lookup_length(double load) {
duke@1 275 if ((double)_lookup_length / (double)_lookup_count > load * 2.0) {
duke@1 276 warning("Performance bug: SystemDictionary lookup_count=%d "
duke@1 277 "lookup_length=%d average=%lf load=%f",
duke@1 278 _lookup_count, _lookup_length,
duke@1 279 (double) _lookup_length / _lookup_count, load);
duke@1 280 }
duke@1 281 }
duke@1 282
duke@1 283 #endif
coleenp@8076 284
coleenp@8076 285 // Explicitly instantiate these types
coleenp@8076 286 template class Hashtable<constantPoolOop>;
coleenp@8076 287 template class Hashtable<Symbol*>;
coleenp@8076 288 template class Hashtable<klassOop>;
coleenp@8076 289 template class Hashtable<oop>;
coleenp@8076 290