changeset 60362:2a4c6b72e5c3

8247515: OSX pc_to_symbol() lookup does not work with core files Reviewed-by: sspitsyn, kevinw
author cjplummer
date Tue, 28 Jul 2020 09:52:07 -0700
parents efc15bd70e48
children 18b22c189e69
files src/jdk.hotspot.agent/macosx/native/libsaproc/libproc_impl.c src/jdk.hotspot.agent/macosx/native/libsaproc/libproc_impl.h src/jdk.hotspot.agent/macosx/native/libsaproc/ps_core.c src/jdk.hotspot.agent/macosx/native/libsaproc/symtab.c
diffstat 4 files changed, 103 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/libproc_impl.c	Tue Jul 28 09:25:23 2020 -0700
+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/libproc_impl.c	Tue Jul 28 09:52:07 2020 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -208,11 +208,11 @@
   free(ph);
 }
 
-lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base) {
-  return add_lib_info_fd(ph, libname, -1, base);
+lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base, size_t memsz) {
+  return add_lib_info_fd(ph, libname, -1, base, memsz);
 }
 
-lib_info* add_lib_info_fd(struct ps_prochandle* ph, const char* libname, int fd, uintptr_t base) {
+lib_info* add_lib_info_fd(struct ps_prochandle* ph, const char* libname, int fd, uintptr_t base, size_t memsz) {
    lib_info* newlib;
   print_debug("add_lib_info_fd %s\n", libname);
 
@@ -229,6 +229,7 @@
   strcpy(newlib->name, libname);
 
   newlib->base = base;
+  newlib->memsz = memsz;
 
   if (fd == -1) {
     if ( (newlib->fd = pathmap_open(newlib->name)) < 0) {
@@ -262,7 +263,7 @@
   if (newlib->symtab == NULL) {
     print_debug("symbol table build failed for %s\n", newlib->name);
   } else {
-    print_debug("built symbol table for %s\n", newlib->name);
+    print_debug("built symbol table for 0x%lx %s\n", newlib, newlib->name);
   }
 
   // even if symbol table building fails, we add the lib_info.
@@ -305,8 +306,12 @@
 const char* symbol_for_pc(struct ps_prochandle* ph, uintptr_t addr, uintptr_t* poffset) {
   const char* res = NULL;
   lib_info* lib = ph->libs;
+  print_debug("symbol_for_pc: addr 0x%lx\n", addr);
   while (lib) {
-    if (lib->symtab && addr >= lib->base) {
+    print_debug("symbol_for_pc: checking lib 0x%lx 0x%lx %s\n", lib->base, lib->memsz, lib->name);
+    if (lib->symtab && addr >= lib->base && addr < lib->base + lib->memsz) {
+      print_debug("symbol_for_pc: address=0x%lx offset=0x%lx found inside lib base=0x%lx memsz=0x%lx %s\n",
+                  addr, addr - lib->base, lib->base, lib->memsz, lib->name);
       res = nearest_symbol(lib->symtab, addr - lib->base, poffset);
       if (res) return res;
     }
--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/libproc_impl.h	Tue Jul 28 09:25:23 2020 -0700
+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/libproc_impl.h	Tue Jul 28 09:52:07 2020 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -95,6 +95,7 @@
   struct symtab*   symtab;
   int              fd;        // file descriptor for lib
   struct lib_info* next;
+  size_t           memsz;
 } lib_info;
 
 // list of threads
@@ -108,8 +109,8 @@
 // list of virtual memory maps
 typedef struct map_info {
    int              fd;       // file descriptor
-   off_t            offset;   // file offset of this mapping
-   uintptr_t        vaddr;    // starting virtual address
+   uint64_t         offset;   // file offset of this mapping
+   uint64_t         vaddr;    // starting virtual address
    size_t           memsz;    // size of the mapping
    struct map_info* next;
 } map_info;
@@ -170,11 +171,11 @@
 bool read_thread_info(struct ps_prochandle* ph, thread_info_callback cb);
 
 // adds a new shared object to lib list, returns NULL on failure
-lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base);
+lib_info* add_lib_info(struct ps_prochandle* ph, const char* libname, uintptr_t base, size_t memsz);
 
 // adds a new shared object to lib list, supply open lib file descriptor as well
 lib_info* add_lib_info_fd(struct ps_prochandle* ph, const char* libname, int fd,
-                          uintptr_t base);
+                          uintptr_t base, size_t memsz);
 
 sa_thread_info* add_thread_info(struct ps_prochandle* ph, pthread_t pthread_id, lwpid_t lwp_id);
 // a test for ELF signature without using libelf
--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/ps_core.c	Tue Jul 28 09:25:23 2020 -0700
+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/ps_core.c	Tue Jul 28 09:52:07 2020 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -241,6 +241,7 @@
       goto err;
     }
     offset += lcmd.cmdsize;    // next command position
+    //print_debug("LC: 0x%x\n", lcmd.cmd);
     if (lcmd.cmd == LC_SEGMENT_64) {
       lseek(fd, -sizeof(load_command), SEEK_CUR);
       if (read(fd, (void *)&segcmd, sizeof(segment_command_64)) != sizeof(segment_command_64)) {
@@ -251,8 +252,9 @@
         print_debug("Failed to add map_info at i = %d\n", i);
         goto err;
       }
-      print_debug("segment added: %" PRIu64 " 0x%" PRIx64 " %d\n",
-                   segcmd.fileoff, segcmd.vmaddr, segcmd.vmsize);
+      print_debug("LC_SEGMENT_64 added: nsects=%d fileoff=0x%llx vmaddr=0x%llx vmsize=0x%llx filesize=0x%llx %s\n",
+                  segcmd.nsects, segcmd.fileoff, segcmd.vmaddr, segcmd.vmsize,
+                  segcmd.filesize, &segcmd.segname[0]);
     } else if (lcmd.cmd == LC_THREAD || lcmd.cmd == LC_UNIXTHREAD) {
       typedef struct thread_fc {
         uint32_t  flavor;
@@ -440,7 +442,7 @@
       // only search core file!
       continue;
     }
-    print_debug("map_info %d: vmaddr = 0x%016" PRIx64 "  fileoff = %" PRIu64 "  vmsize = %" PRIu64 "\n",
+    print_debug("map_info %d: vmaddr = 0x%016llx fileoff = 0x%llx vmsize = 0x%lx\n",
                            j, iter->vaddr, iter->offset, iter->memsz);
     lseek(fd, fpos, SEEK_SET);
     // we assume .dylib loaded at segment address --- which is true for JVM libraries
@@ -464,7 +466,7 @@
         continue;
       }
       lseek(fd, -sizeof(uint32_t), SEEK_CUR);
-      // this is the file begining to core file.
+      // This is the begining of the mach-o file in the segment.
       if (read(fd, (void *)&header, sizeof(mach_header_64)) != sizeof(mach_header_64)) {
         goto err;
       }
@@ -497,18 +499,26 @@
             if (name[j] == '\0') break;
             j++;
           }
-          print_debug("%s\n", name);
+          print_debug("%d %s\n", lcmd.cmd, name);
           // changed name from @rpath/xxxx.dylib to real path
           if (strrchr(name, '@')) {
             get_real_path(ph, name);
             print_debug("get_real_path returned: %s\n", name);
+          } else {
+            break; // Ignore non-relative paths, which are system libs. See JDK-8249779.
           }
-          add_lib_info(ph, name, iter->vaddr);
+          add_lib_info(ph, name, iter->vaddr, iter->memsz);
           break;
         }
       }
       // done with the file, advanced to next page to search more files
+#if 0
+      // This line is disabled due to JDK-8249779. Instead we break out of the loop
+      // and don't attempt to find any more mach-o files in this segment.
       fpos = (ltell(fd) + pagesize - 1) / pagesize * pagesize;
+#else
+      break;
+#endif
     }
   }
   return true;
--- a/src/jdk.hotspot.agent/macosx/native/libsaproc/symtab.c	Tue Jul 28 09:25:23 2020 -0700
+++ b/src/jdk.hotspot.agent/macosx/native/libsaproc/symtab.c	Tue Jul 28 09:52:07 2020 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 #include <unistd.h>
 #include <search.h>
+#include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
 #include <db.h>
@@ -57,12 +58,14 @@
 
 void build_search_table(symtab_t *symtab) {
   int i;
+  print_debug("build_search_table\n");
   for (i = 0; i < symtab->num_symbols; i++) {
     DBT key, value;
     key.data = symtab->symbols[i].name;
     key.size = strlen(key.data) + 1;
     value.data = &(symtab->symbols[i]);
     value.size = sizeof(symtab_symbol);
+    //print_debug("build_search_table: %d 0x%x %s\n", i, symtab->symbols[i].offset, symtab->symbols[i].name);
     (*symtab->hash_table->put)(symtab->hash_table, &key, &value, 0);
 
     // check result
@@ -92,10 +95,11 @@
 // read symbol table from given fd.
 struct symtab* build_symtab(int fd) {
   symtab_t* symtab = NULL;
-  int i;
+  int i, j;
   mach_header_64 header;
   off_t image_start;
 
+  print_debug("build_symtab\n");
   if (!get_arch_off(fd, CPU_TYPE_X86_64, &image_start)) {
     print_debug("failed in get fat header\n");
     return NULL;
@@ -151,6 +155,7 @@
   if (symtab->hash_table == NULL)
     goto quit;
 
+  // allocate the symtab
   symtab->num_symbols = symtabcmd.nsyms;
   symtab->symbols = (symtab_symbol *)malloc(sizeof(symtab_symbol) * symtab->num_symbols);
   symtab->strs    = (char *)malloc(sizeof(char) * symtabcmd.strsize);
@@ -158,17 +163,8 @@
      print_debug("out of memory: allocating symtab.symbol or symtab.strs\n");
      goto quit;
   }
-  lseek(fd, image_start + symtabcmd.symoff, SEEK_SET);
-  for (i = 0; i < symtab->num_symbols; i++) {
-    if (read(fd, (void *)&lentry, sizeof(nlist_64)) != sizeof(nlist_64)) {
-      print_debug("read nlist_64 failed at %i\n", i);
-      goto quit;
-    }
-    symtab->symbols[i].offset = lentry.n_value;
-    symtab->symbols[i].size  = lentry.n_un.n_strx;        // index
-  }
 
-  // string table
+  // read in the string table
   lseek(fd, image_start + symtabcmd.stroff, SEEK_SET);
   int size = read(fd, (void *)(symtab->strs), symtabcmd.strsize * sizeof(char));
   if (size != symtabcmd.strsize * sizeof(char)) {
@@ -176,21 +172,42 @@
      goto quit;
   }
 
-  for (i = 0; i < symtab->num_symbols; i++) {
-    symtab->symbols[i].name = symtab->strs + symtab->symbols[i].size;
-    if (i > 0) {
-      // fix size
-      symtab->symbols[i - 1].size = symtab->symbols[i].size - symtab->symbols[i - 1].size;
-      print_debug("%s size = %d\n", symtab->symbols[i - 1].name, symtab->symbols[i - 1].size);
-
+  // read in each nlist_64 from the symbol table and use to fill in symtab->symbols
+  lseek(fd, image_start + symtabcmd.symoff, SEEK_SET);
+  i = 0;
+  for (j = 0; j < symtab->num_symbols; j++) {
+    if (read(fd, (void *)&lentry, sizeof(nlist_64)) != sizeof(nlist_64)) {
+      print_debug("read nlist_64 failed at %j\n", j);
+      goto quit;
     }
 
-    if (i == symtab->num_symbols - 1) {
-      // last index
-      symtab->symbols[i].size =
-            symtabcmd.strsize - symtab->symbols[i].size;
-      print_debug("%s size = %d\n", symtab->symbols[i].name, symtab->symbols[i].size);
+    uintptr_t offset = lentry.n_value;     // offset of the symbol code/data in the file
+    uintptr_t stridx = lentry.n_un.n_strx; // offset of symbol string in the symtabcmd.symoff section
+
+    if (stridx == 0 || offset == 0) {
+      continue; // Skip this entry. It's not a reference to code or data
     }
+    symtab->symbols[i].offset = offset;
+    symtab->symbols[i].name = symtab->strs + stridx;
+    symtab->symbols[i].size = strlen(symtab->symbols[i].name);
+
+    if (symtab->symbols[i].size == 0) {
+      continue; // Skip this entry. It points to an empty string.
+    }
+
+    print_debug("symbol read: %d %d n_type=0x%x n_sect=0x%x n_desc=0x%x n_strx=0x%lx offset=0x%lx %s\n",
+                j, i, lentry.n_type, lentry.n_sect, lentry.n_desc, stridx, offset, symtab->symbols[i].name);
+    i++;
+  }
+
+  // Update symtab->num_symbols to be the actual number of symbols we added. Since the symbols
+  // array was allocated larger, reallocate it to the proper size.
+  print_debug("build_symtab: included %d of %d entries.\n", i, symtab->num_symbols);
+  symtab->num_symbols = i;
+  symtab->symbols = (symtab_symbol *)realloc(symtab->symbols, sizeof(symtab_symbol) * symtab->num_symbols);
+  if (symtab->symbols == NULL) {
+     print_debug("out of memory: reallocating symtab.symbol\n");
+     goto quit;
   }
 
   // build a hashtable for fast query
@@ -389,14 +406,37 @@
 const char* nearest_symbol(struct symtab* symtab, uintptr_t offset,
                            uintptr_t* poffset) {
   int n = 0;
+  char* result = NULL;
+  ptrdiff_t lowest_offset_from_sym = -1;
   if (!symtab) return NULL;
+  // Search the symbol table for the symbol that is closest to the specified offset, but is not under.
+  //
+  // Note we can't just use the first symbol that is >= the offset because the symbols may not be
+  // sorted by offset.
+  //
+  // Note this is a rather slow search that is O(n/2), and libjvm has as many as 250k symbols.
+  // Probably would be good to sort the array and do a binary search, or use a hash table like
+  // we do for name -> address lookups. However, this functionality is not used often and
+  // generally just involves one lookup, such as with the clhsdb "findpc" command.
   for (; n < symtab->num_symbols; n++) {
     symtab_symbol* sym = &(symtab->symbols[n]);
-    if (sym->name != NULL &&
-      offset >= sym->offset && offset < sym->offset + sym->size) {
-      if (poffset) *poffset = (offset - sym->offset);
-      return sym->name;
+    if (sym->size != 0 && offset >= sym->offset) {
+      ptrdiff_t offset_from_sym = offset - sym->offset;
+      if (offset_from_sym >= 0) { // ignore symbols that come after "offset"
+        if (lowest_offset_from_sym == -1 || offset_from_sym < lowest_offset_from_sym) {
+          lowest_offset_from_sym = offset_from_sym;
+          result = sym->name;
+          //print_debug("nearest_symbol: found %d %s 0x%x 0x%x 0x%x\n",
+          //            n, sym->name, offset, sym->offset, lowest_offset_from_sym);
+        }
+      }
     }
   }
-  return NULL;
+  print_debug("nearest_symbol: found symbol %d file_offset=0x%lx sym_offset=0x%lx %s\n",
+              n, offset, lowest_offset_from_sym, result);
+  // Save the offset from the symbol if requested.
+  if (result != NULL && poffset) {
+    *poffset = lowest_offset_from_sym;
+  }
+  return result;
 }