OpenJDK / graal / graal-jvmci-8
changeset 24007:776cb7cbe2e4
8017629: G1: UseSHM in combination with a G1HeapRegionSize > os::large_page_size() falls back to use small pages
Reviewed-by: pliden, sjohanss, stuefe
author | stefank |
---|---|
date | Mon, 25 Apr 2016 11:36:14 +0200 |
parents | 44c8fe602a5e |
children | 64bd5b63923c |
files | src/os/linux/vm/os_linux.cpp |
diffstat | 1 files changed, 457 insertions(+), 388 deletions(-) [+] |
line wrap: on
line diff
--- a/src/os/linux/vm/os_linux.cpp Thu Apr 23 18:00:50 2015 +0200 +++ b/src/os/linux/vm/os_linux.cpp Mon Apr 25 11:36:14 2016 +0200 @@ -3047,393 +3047,6 @@ return addr == MAP_FAILED ? NULL : addr; } -// Don't update _highest_vm_reserved_address, because there might be memory -// regions above addr + size. If so, releasing a memory region only creates -// a hole in the address space, it doesn't help prevent heap-stack collision. -// -static int anon_munmap(char * addr, size_t size) { - return ::munmap(addr, size) == 0; -} - -char* os::pd_reserve_memory(size_t bytes, char* requested_addr, - size_t alignment_hint) { - return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); -} - -bool os::pd_release_memory(char* addr, size_t size) { - return anon_munmap(addr, size); -} - -static address highest_vm_reserved_address() { - return _highest_vm_reserved_address; -} - -static bool linux_mprotect(char* addr, size_t size, int prot) { - // Linux wants the mprotect address argument to be page aligned. - char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); - - // According to SUSv3, mprotect() should only be used with mappings - // established by mmap(), and mmap() always maps whole pages. Unaligned - // 'addr' likely indicates problem in the VM (e.g. trying to change - // protection of malloc'ed or statically allocated memory). Check the - // caller if you hit this assert. - assert(addr == bottom, "sanity check"); - - size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); - return ::mprotect(bottom, size, prot) == 0; -} - -// Set protections specified -bool os::protect_memory(char* addr, size_t bytes, ProtType prot, - bool is_committed) { - unsigned int p = 0; - switch (prot) { - case MEM_PROT_NONE: p = PROT_NONE; break; - case MEM_PROT_READ: p = PROT_READ; break; - case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break; - case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break; - default: - ShouldNotReachHere(); - } - // is_committed is unused. - return linux_mprotect(addr, bytes, p); -} - -bool os::guard_memory(char* addr, size_t size) { - return linux_mprotect(addr, size, PROT_NONE); -} - -bool os::unguard_memory(char* addr, size_t size) { - return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); -} - -bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) { - bool result = false; - void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, - -1, 0); - if (p != MAP_FAILED) { - void *aligned_p = align_ptr_up(p, page_size); - - result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; - - munmap(p, page_size * 2); - } - - if (warn && !result) { - warning("TransparentHugePages is not supported by the operating system."); - } - - return result; -} - -bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { - bool result = false; - void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, - -1, 0); - - if (p != MAP_FAILED) { - // We don't know if this really is a huge page or not. - FILE *fp = fopen("/proc/self/maps", "r"); - if (fp) { - while (!feof(fp)) { - char chars[257]; - long x = 0; - if (fgets(chars, sizeof(chars), fp)) { - if (sscanf(chars, "%lx-%*x", &x) == 1 - && x == (long)p) { - if (strstr (chars, "hugepage")) { - result = true; - break; - } - } - } - } - fclose(fp); - } - munmap(p, page_size); - } - - if (warn && !result) { - warning("HugeTLBFS is not supported by the operating system."); - } - - return result; -} - -/* -* Set the coredump_filter bits to include largepages in core dump (bit 6) -* -* From the coredump_filter documentation: -* -* - (bit 0) anonymous private memory -* - (bit 1) anonymous shared memory -* - (bit 2) file-backed private memory -* - (bit 3) file-backed shared memory -* - (bit 4) ELF header pages in file-backed private memory areas (it is -* effective only if the bit 2 is cleared) -* - (bit 5) hugetlb private memory -* - (bit 6) hugetlb shared memory -*/ -static void set_coredump_filter(void) { - FILE *f; - long cdm; - - if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) { - return; - } - - if (fscanf(f, "%lx", &cdm) != 1) { - fclose(f); - return; - } - - rewind(f); - - if ((cdm & LARGEPAGES_BIT) == 0) { - cdm |= LARGEPAGES_BIT; - fprintf(f, "%#lx", cdm); - } - - fclose(f); -} - -// Large page support - -static size_t _large_page_size = 0; - -size_t os::Linux::find_large_page_size() { - size_t large_page_size = 0; - - // large_page_size on Linux is used to round up heap size. x86 uses either - // 2M or 4M page, depending on whether PAE (Physical Address Extensions) - // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use - // page as large as 256M. - // - // Here we try to figure out page size by parsing /proc/meminfo and looking - // for a line with the following format: - // Hugepagesize: 2048 kB - // - // If we can't determine the value (e.g. /proc is not mounted, or the text - // format has been changed), we'll use the largest page size supported by - // the processor. - -#ifndef ZERO - large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) - ARM_ONLY(2 * M) PPC_ONLY(4 * M); -#endif // ZERO - - FILE *fp = fopen("/proc/meminfo", "r"); - if (fp) { - while (!feof(fp)) { - int x = 0; - char buf[16]; - if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { - if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { - large_page_size = x * K; - break; - } - } else { - // skip to next line - for (;;) { - int ch = fgetc(fp); - if (ch == EOF || ch == (int)'\n') break; - } - } - } - fclose(fp); - } - - if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { - warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " - SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), - proper_unit_for_byte_size(large_page_size)); - } - - return large_page_size; -} - -size_t os::Linux::setup_large_page_size() { - _large_page_size = Linux::find_large_page_size(); - const size_t default_page_size = (size_t)Linux::page_size(); - if (_large_page_size > default_page_size) { - _page_sizes[0] = _large_page_size; - _page_sizes[1] = default_page_size; - _page_sizes[2] = 0; - } - - return _large_page_size; -} - -bool os::Linux::setup_large_page_type(size_t page_size) { - if (FLAG_IS_DEFAULT(UseHugeTLBFS) && - FLAG_IS_DEFAULT(UseSHM) && - FLAG_IS_DEFAULT(UseTransparentHugePages)) { - - // The type of large pages has not been specified by the user. - - // Try UseHugeTLBFS and then UseSHM. - UseHugeTLBFS = UseSHM = true; - - // Don't try UseTransparentHugePages since there are known - // performance issues with it turned on. This might change in the future. - UseTransparentHugePages = false; - } - - if (UseTransparentHugePages) { - bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); - if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { - UseHugeTLBFS = false; - UseSHM = false; - return true; - } - UseTransparentHugePages = false; - } - - if (UseHugeTLBFS) { - bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); - if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { - UseSHM = false; - return true; - } - UseHugeTLBFS = false; - } - - return UseSHM; -} - -void os::large_page_init() { - if (!UseLargePages && - !UseTransparentHugePages && - !UseHugeTLBFS && - !UseSHM) { - // Not using large pages. - return; - } - - if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) { - // The user explicitly turned off large pages. - // Ignore the rest of the large pages flags. - UseTransparentHugePages = false; - UseHugeTLBFS = false; - UseSHM = false; - return; - } - - size_t large_page_size = Linux::setup_large_page_size(); - UseLargePages = Linux::setup_large_page_type(large_page_size); - - set_coredump_filter(); -} - -#ifndef SHM_HUGETLB -#define SHM_HUGETLB 04000 -#endif - -char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) { - // "exec" is passed in but not used. Creating the shared image for - // the code cache doesn't have an SHM_X executable permission to check. - assert(UseLargePages && UseSHM, "only for SHM large pages"); - assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); - - if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) { - return NULL; // Fallback to small pages. - } - - key_t key = IPC_PRIVATE; - char *addr; - - bool warn_on_failure = UseLargePages && - (!FLAG_IS_DEFAULT(UseLargePages) || - !FLAG_IS_DEFAULT(UseSHM) || - !FLAG_IS_DEFAULT(LargePageSizeInBytes) - ); - char msg[128]; - - // Create a large shared memory region to attach to based on size. - // Currently, size is the total size of the heap - int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); - if (shmid == -1) { - // Possible reasons for shmget failure: - // 1. shmmax is too small for Java heap. - // > check shmmax value: cat /proc/sys/kernel/shmmax - // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax - // 2. not enough large page memory. - // > check available large pages: cat /proc/meminfo - // > increase amount of large pages: - // echo new_value > /proc/sys/vm/nr_hugepages - // Note 1: different Linux may use different name for this property, - // e.g. on Redhat AS-3 it is "hugetlb_pool". - // Note 2: it's possible there's enough physical memory available but - // they are so fragmented after a long run that they can't - // coalesce into large pages. Try to reserve large pages when - // the system is still "fresh". - if (warn_on_failure) { - jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno); - warning("%s", msg); - } - return NULL; - } - - // attach to the region - addr = (char*)shmat(shmid, req_addr, 0); - int err = errno; - - // Remove shmid. If shmat() is successful, the actual shared memory segment - // will be deleted when it's detached by shmdt() or when the process - // terminates. If shmat() is not successful this will remove the shared - // segment immediately. - shmctl(shmid, IPC_RMID, NULL); - - if ((intptr_t)addr == -1) { - if (warn_on_failure) { - jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err); - warning("%s", msg); - } - return NULL; - } - - return addr; -} - -static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) { - assert(error == ENOMEM, "Only expect to fail if no memory is available"); - - bool warn_on_failure = UseLargePages && - (!FLAG_IS_DEFAULT(UseLargePages) || - !FLAG_IS_DEFAULT(UseHugeTLBFS) || - !FLAG_IS_DEFAULT(LargePageSizeInBytes)); - - if (warn_on_failure) { - char msg[128]; - jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " - PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); - warning("%s", msg); - } -} - -char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) { - assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); - assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); - assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); - - int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; - char* addr = (char*)::mmap(req_addr, bytes, prot, - MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, - -1, 0); - - if (addr == MAP_FAILED) { - warn_on_large_pages_failure(req_addr, bytes, errno); - return NULL; - } - - assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); - - return addr; -} - -// Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed(). // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address // (req_addr != NULL) or with a given alignment. // - bytes shall be a multiple of alignment. @@ -3474,7 +3087,463 @@ } } return start; - +} + +// Don't update _highest_vm_reserved_address, because there might be memory +// regions above addr + size. If so, releasing a memory region only creates +// a hole in the address space, it doesn't help prevent heap-stack collision. +// +static int anon_munmap(char * addr, size_t size) { + return ::munmap(addr, size) == 0; +} + +char* os::pd_reserve_memory(size_t bytes, char* requested_addr, + size_t alignment_hint) { + return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); +} + +bool os::pd_release_memory(char* addr, size_t size) { + return anon_munmap(addr, size); +} + +static address highest_vm_reserved_address() { + return _highest_vm_reserved_address; +} + +static bool linux_mprotect(char* addr, size_t size, int prot) { + // Linux wants the mprotect address argument to be page aligned. + char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); + + // According to SUSv3, mprotect() should only be used with mappings + // established by mmap(), and mmap() always maps whole pages. Unaligned + // 'addr' likely indicates problem in the VM (e.g. trying to change + // protection of malloc'ed or statically allocated memory). Check the + // caller if you hit this assert. + assert(addr == bottom, "sanity check"); + + size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); + return ::mprotect(bottom, size, prot) == 0; +} + +// Set protections specified +bool os::protect_memory(char* addr, size_t bytes, ProtType prot, + bool is_committed) { + unsigned int p = 0; + switch (prot) { + case MEM_PROT_NONE: p = PROT_NONE; break; + case MEM_PROT_READ: p = PROT_READ; break; + case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break; + case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break; + default: + ShouldNotReachHere(); + } + // is_committed is unused. + return linux_mprotect(addr, bytes, p); +} + +bool os::guard_memory(char* addr, size_t size) { + return linux_mprotect(addr, size, PROT_NONE); +} + +bool os::unguard_memory(char* addr, size_t size) { + return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); +} + +bool os::Linux::transparent_huge_pages_sanity_check(bool warn, size_t page_size) { + bool result = false; + void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, + -1, 0); + if (p != MAP_FAILED) { + void *aligned_p = align_ptr_up(p, page_size); + + result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; + + munmap(p, page_size * 2); + } + + if (warn && !result) { + warning("TransparentHugePages is not supported by the operating system."); + } + + return result; +} + +bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { + bool result = false; + void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, + -1, 0); + + if (p != MAP_FAILED) { + // We don't know if this really is a huge page or not. + FILE *fp = fopen("/proc/self/maps", "r"); + if (fp) { + while (!feof(fp)) { + char chars[257]; + long x = 0; + if (fgets(chars, sizeof(chars), fp)) { + if (sscanf(chars, "%lx-%*x", &x) == 1 + && x == (long)p) { + if (strstr (chars, "hugepage")) { + result = true; + break; + } + } + } + } + fclose(fp); + } + munmap(p, page_size); + } + + if (warn && !result) { + warning("HugeTLBFS is not supported by the operating system."); + } + + return result; +} + +/* +* Set the coredump_filter bits to include largepages in core dump (bit 6) +* +* From the coredump_filter documentation: +* +* - (bit 0) anonymous private memory +* - (bit 1) anonymous shared memory +* - (bit 2) file-backed private memory +* - (bit 3) file-backed shared memory +* - (bit 4) ELF header pages in file-backed private memory areas (it is +* effective only if the bit 2 is cleared) +* - (bit 5) hugetlb private memory +* - (bit 6) hugetlb shared memory +*/ +static void set_coredump_filter(void) { + FILE *f; + long cdm; + + if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) { + return; + } + + if (fscanf(f, "%lx", &cdm) != 1) { + fclose(f); + return; + } + + rewind(f); + + if ((cdm & LARGEPAGES_BIT) == 0) { + cdm |= LARGEPAGES_BIT; + fprintf(f, "%#lx", cdm); + } + + fclose(f); +} + +// Large page support + +static size_t _large_page_size = 0; + +size_t os::Linux::find_large_page_size() { + size_t large_page_size = 0; + + // large_page_size on Linux is used to round up heap size. x86 uses either + // 2M or 4M page, depending on whether PAE (Physical Address Extensions) + // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use + // page as large as 256M. + // + // Here we try to figure out page size by parsing /proc/meminfo and looking + // for a line with the following format: + // Hugepagesize: 2048 kB + // + // If we can't determine the value (e.g. /proc is not mounted, or the text + // format has been changed), we'll use the largest page size supported by + // the processor. + +#ifndef ZERO + large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) + ARM_ONLY(2 * M) PPC_ONLY(4 * M); +#endif // ZERO + + FILE *fp = fopen("/proc/meminfo", "r"); + if (fp) { + while (!feof(fp)) { + int x = 0; + char buf[16]; + if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { + if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { + large_page_size = x * K; + break; + } + } else { + // skip to next line + for (;;) { + int ch = fgetc(fp); + if (ch == EOF || ch == (int)'\n') break; + } + } + } + fclose(fp); + } + + if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { + warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " + SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), + proper_unit_for_byte_size(large_page_size)); + } + + return large_page_size; +} + +size_t os::Linux::setup_large_page_size() { + _large_page_size = Linux::find_large_page_size(); + const size_t default_page_size = (size_t)Linux::page_size(); + if (_large_page_size > default_page_size) { + _page_sizes[0] = _large_page_size; + _page_sizes[1] = default_page_size; + _page_sizes[2] = 0; + } + + return _large_page_size; +} + +bool os::Linux::setup_large_page_type(size_t page_size) { + if (FLAG_IS_DEFAULT(UseHugeTLBFS) && + FLAG_IS_DEFAULT(UseSHM) && + FLAG_IS_DEFAULT(UseTransparentHugePages)) { + + // The type of large pages has not been specified by the user. + + // Try UseHugeTLBFS and then UseSHM. + UseHugeTLBFS = UseSHM = true; + + // Don't try UseTransparentHugePages since there are known + // performance issues with it turned on. This might change in the future. + UseTransparentHugePages = false; + } + + if (UseTransparentHugePages) { + bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); + if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { + UseHugeTLBFS = false; + UseSHM = false; + return true; + } + UseTransparentHugePages = false; + } + + if (UseHugeTLBFS) { + bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); + if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { + UseSHM = false; + return true; + } + UseHugeTLBFS = false; + } + + return UseSHM; +} + +void os::large_page_init() { + if (!UseLargePages && + !UseTransparentHugePages && + !UseHugeTLBFS && + !UseSHM) { + // Not using large pages. + return; + } + + if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) { + // The user explicitly turned off large pages. + // Ignore the rest of the large pages flags. + UseTransparentHugePages = false; + UseHugeTLBFS = false; + UseSHM = false; + return; + } + + size_t large_page_size = Linux::setup_large_page_size(); + UseLargePages = Linux::setup_large_page_type(large_page_size); + + set_coredump_filter(); +} + +#ifndef SHM_HUGETLB +#define SHM_HUGETLB 04000 +#endif + +#define shm_warning_format(format, ...) \ + do { \ + if (UseLargePages && \ + (!FLAG_IS_DEFAULT(UseLargePages) || \ + !FLAG_IS_DEFAULT(UseSHM) || \ + !FLAG_IS_DEFAULT(LargePageSizeInBytes))) { \ + warning(format, __VA_ARGS__); \ + } \ + } while (0) + +#define shm_warning(str) shm_warning_format("%s", str) + +#define shm_warning_with_errno(str) \ + do { \ + int err = errno; \ + shm_warning_format(str " (error = %d)", err); \ + } while (0) + +static char* shmat_with_alignment(int shmid, size_t bytes, size_t alignment) { + assert(is_size_aligned(bytes, alignment), "Must be divisible by the alignment"); + + if (!is_size_aligned(alignment, SHMLBA)) { + assert(false, "Code below assumes that alignment is at least SHMLBA aligned"); + return NULL; + } + + // To ensure that we get 'alignment' aligned memory from shmat, + // we pre-reserve aligned virtual memory and then attach to that. + + char* pre_reserved_addr = anon_mmap_aligned(bytes, alignment, NULL); + if (pre_reserved_addr == NULL) { + // Couldn't pre-reserve aligned memory. + shm_warning("Failed to pre-reserve aligned memory for shmat."); + return NULL; + } + + // SHM_REMAP is needed to allow shmat to map over an existing mapping. + char* addr = (char*)shmat(shmid, pre_reserved_addr, SHM_REMAP); + + if ((intptr_t)addr == -1) { + int err = errno; + shm_warning_with_errno("Failed to attach shared memory."); + + assert(err != EACCES, "Unexpected error"); + assert(err != EIDRM, "Unexpected error"); + assert(err != EINVAL, "Unexpected error"); + + // Since we don't know if the kernel unmapped the pre-reserved memory area + // we can't unmap it, since that would potentially unmap memory that was + // mapped from other threads. + return NULL; + } + + return addr; +} + +static char* shmat_at_address(int shmid, char* req_addr) { + if (!is_ptr_aligned(req_addr, SHMLBA)) { + assert(false, "Requested address needs to be SHMLBA aligned"); + return NULL; + } + + char* addr = (char*)shmat(shmid, req_addr, 0); + + if ((intptr_t)addr == -1) { + shm_warning_with_errno("Failed to attach shared memory."); + return NULL; + } + + return addr; +} + +static char* shmat_large_pages(int shmid, size_t bytes, size_t alignment, char* req_addr) { + // If a req_addr has been provided, we assume that the caller has already aligned the address. + if (req_addr != NULL) { + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Must be divisible by the large page size"); + assert(is_ptr_aligned(req_addr, alignment), "Must be divisible by given alignment"); + return shmat_at_address(shmid, req_addr); + } + + // Since shmid has been setup with SHM_HUGETLB, shmat will automatically + // return large page size aligned memory addresses when req_addr == NULL. + // However, if the alignment is larger than the large page size, we have + // to manually ensure that the memory returned is 'alignment' aligned. + if (alignment > os::large_page_size()) { + assert(is_size_aligned(alignment, os::large_page_size()), "Must be divisible by the large page size"); + return shmat_with_alignment(shmid, bytes, alignment); + } else { + return shmat_at_address(shmid, NULL); + } +} + +char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec) { + // "exec" is passed in but not used. Creating the shared image for + // the code cache doesn't have an SHM_X executable permission to check. + assert(UseLargePages && UseSHM, "only for SHM large pages"); + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); + assert(is_ptr_aligned(req_addr, alignment), "Unaligned address"); + + if (!is_size_aligned(bytes, os::large_page_size())) { + return NULL; // Fallback to small pages. + } + + // Create a large shared memory region to attach to based on size. + // Currently, size is the total size of the heap. + int shmid = shmget(IPC_PRIVATE, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); + if (shmid == -1) { + // Possible reasons for shmget failure: + // 1. shmmax is too small for Java heap. + // > check shmmax value: cat /proc/sys/kernel/shmmax + // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax + // 2. not enough large page memory. + // > check available large pages: cat /proc/meminfo + // > increase amount of large pages: + // echo new_value > /proc/sys/vm/nr_hugepages + // Note 1: different Linux may use different name for this property, + // e.g. on Redhat AS-3 it is "hugetlb_pool". + // Note 2: it's possible there's enough physical memory available but + // they are so fragmented after a long run that they can't + // coalesce into large pages. Try to reserve large pages when + // the system is still "fresh". + shm_warning_with_errno("Failed to reserve shared memory."); + return NULL; + } + + // Attach to the region. + char* addr = shmat_large_pages(shmid, bytes, alignment, req_addr); + + // Remove shmid. If shmat() is successful, the actual shared memory segment + // will be deleted when it's detached by shmdt() or when the process + // terminates. If shmat() is not successful this will remove the shared + // segment immediately. + shmctl(shmid, IPC_RMID, NULL); + + return addr; +} + +static void warn_on_large_pages_failure(char* req_addr, size_t bytes, int error) { + assert(error == ENOMEM, "Only expect to fail if no memory is available"); + + bool warn_on_failure = UseLargePages && + (!FLAG_IS_DEFAULT(UseLargePages) || + !FLAG_IS_DEFAULT(UseHugeTLBFS) || + !FLAG_IS_DEFAULT(LargePageSizeInBytes)); + + if (warn_on_failure) { + char msg[128]; + jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " + PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); + warning("%s", msg); + } +} + +char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec) { + assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); + assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); + assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); + + int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; + char* addr = (char*)::mmap(req_addr, bytes, prot, + MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, + -1, 0); + + if (addr == MAP_FAILED) { + warn_on_large_pages_failure(req_addr, bytes, errno); + return NULL; + } + + assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); + + return addr; } // Reserve memory using mmap(MAP_HUGETLB).