diff options
author | Vincent Ambo <tazjin@google.com> | 2020-05-27T00·26+0100 |
---|---|---|
committer | Vincent Ambo <tazjin@google.com> | 2020-05-27T00·26+0100 |
commit | afe04691aca3f669f517adaeb5bd4a87a481fb4a (patch) | |
tree | 60ae6c91a3959b2f6486256e26ff126e598d1e5d /src/symbolize.cc |
Squashed 'third_party/glog/' content from commit 9ef754a3023
git-subtree-dir: third_party/glog git-subtree-split: 9ef754a3023e6fd10f20fe53dfca96dd898182e3
Diffstat (limited to 'src/symbolize.cc')
-rw-r--r-- | src/symbolize.cc | 966 |
1 files changed, 966 insertions, 0 deletions
diff --git a/src/symbolize.cc b/src/symbolize.cc new file mode 100644 index 000000000000..76a1b0febeec --- /dev/null +++ b/src/symbolize.cc @@ -0,0 +1,966 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Satoru Takabayashi +// Stack-footprint reduction work done by Raksit Ashok +// +// Implementation note: +// +// We don't use heaps but only use stacks. We want to reduce the +// stack consumption so that the symbolizer can run on small stacks. +// +// Here are some numbers collected with GCC 4.1.0 on x86: +// - sizeof(Elf32_Sym) = 16 +// - sizeof(Elf32_Shdr) = 40 +// - sizeof(Elf64_Sym) = 24 +// - sizeof(Elf64_Shdr) = 64 +// +// This implementation is intended to be async-signal-safe but uses +// some functions which are not guaranteed to be so, such as memchr() +// and memmove(). We assume they are async-signal-safe. +// +// Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE +// macro to add platform specific defines (e.g. OS_OPENBSD). + +#ifdef GLOG_BUILD_CONFIG_INCLUDE +#include GLOG_BUILD_CONFIG_INCLUDE +#endif // GLOG_BUILD_CONFIG_INCLUDE + +#include "utilities.h" + +#if defined(HAVE_SYMBOLIZE) + +#include <string.h> + +#include <algorithm> +#include <limits> + +#include "symbolize.h" +#include "demangle.h" + +_START_GOOGLE_NAMESPACE_ + +// We don't use assert() since it's not guaranteed to be +// async-signal-safe. Instead we define a minimal assertion +// macro. So far, we don't need pretty printing for __FILE__, etc. + +// A wrapper for abort() to make it callable in ? :. +static int AssertFail() { + abort(); + return 0; // Should not reach. +} + +#define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail()) + +static SymbolizeCallback g_symbolize_callback = NULL; +void InstallSymbolizeCallback(SymbolizeCallback callback) { + g_symbolize_callback = callback; +} + +static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback = + NULL; +void InstallSymbolizeOpenObjectFileCallback( + SymbolizeOpenObjectFileCallback callback) { + g_symbolize_open_object_file_callback = callback; +} + +// This function wraps the Demangle function to provide an interface +// where the input symbol is demangled in-place. +// To keep stack consumption low, we would like this function to not +// get inlined. +static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, int out_size) { + char demangled[256]; // Big enough for sane demangled symbols. + if (Demangle(out, demangled, sizeof(demangled))) { + // Demangling succeeded. Copy to out if the space allows. + size_t len = strlen(demangled); + if (len + 1 <= (size_t)out_size) { // +1 for '\0'. + SAFE_ASSERT(len < sizeof(demangled)); + memmove(out, demangled, len + 1); + } + } +} + +_END_GOOGLE_NAMESPACE_ + +#if defined(__ELF__) + +#if defined(HAVE_DLFCN_H) +#include <dlfcn.h> +#endif +#if defined(OS_OPENBSD) +#include <sys/exec_elf.h> +#else +#include <elf.h> +#endif +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "symbolize.h" +#include "config.h" +#include "glog/raw_logging.h" + +// Re-runs fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +_START_GOOGLE_NAMESPACE_ + +// Read up to "count" bytes from "offset" in the file pointed by file +// descriptor "fd" into the buffer starting at "buf" while handling short reads +// and EINTR. On success, return the number of bytes read. Otherwise, return +// -1. +static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count, + const off_t offset) { + SAFE_ASSERT(fd >= 0); + SAFE_ASSERT(count <= std::numeric_limits<ssize_t>::max()); + char *buf0 = reinterpret_cast<char *>(buf); + ssize_t num_bytes = 0; + while (num_bytes < count) { + ssize_t len; + NO_INTR(len = pread(fd, buf0 + num_bytes, count - num_bytes, + offset + num_bytes)); + if (len < 0) { // There was an error other than EINTR. + return -1; + } + if (len == 0) { // Reached EOF. + break; + } + num_bytes += len; + } + SAFE_ASSERT(num_bytes <= count); + return num_bytes; +} + +// Try reading exactly "count" bytes from "offset" bytes in a file +// pointed by "fd" into the buffer starting at "buf" while handling +// short reads and EINTR. On success, return true. Otherwise, return +// false. +static bool ReadFromOffsetExact(const int fd, void *buf, + const size_t count, const off_t offset) { + ssize_t len = ReadFromOffset(fd, buf, count, offset); + return len == count; +} + +// Returns elf_header.e_type if the file pointed by fd is an ELF binary. +static int FileGetElfType(const int fd) { + ElfW(Ehdr) elf_header; + if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { + return -1; + } + if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) { + return -1; + } + return elf_header.e_type; +} + +// Read the section headers in the given ELF binary, and if a section +// of the specified type is found, set the output to this section header +// and return true. Otherwise, return false. +// To keep stack consumption low, we would like this function to not get +// inlined. +static ATTRIBUTE_NOINLINE bool +GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset, + ElfW(Word) type, ElfW(Shdr) *out) { + // Read at most 16 section headers at a time to save read calls. + ElfW(Shdr) buf[16]; + for (int i = 0; i < sh_num;) { + const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]); + const ssize_t num_bytes_to_read = + (sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf); + const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read, + sh_offset + i * sizeof(buf[0])); + if (len == -1) { + return false; + } + SAFE_ASSERT(len % sizeof(buf[0]) == 0); + const ssize_t num_headers_in_buf = len / sizeof(buf[0]); + SAFE_ASSERT(num_headers_in_buf <= sizeof(buf) / sizeof(buf[0])); + for (int j = 0; j < num_headers_in_buf; ++j) { + if (buf[j].sh_type == type) { + *out = buf[j]; + return true; + } + } + i += num_headers_in_buf; + } + return false; +} + +// There is no particular reason to limit section name to 63 characters, +// but there has (as yet) been no need for anything longer either. +const int kMaxSectionNameLen = 64; + +// name_len should include terminating '\0'. +bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, + ElfW(Shdr) *out) { + ElfW(Ehdr) elf_header; + if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { + return false; + } + + ElfW(Shdr) shstrtab; + off_t shstrtab_offset = (elf_header.e_shoff + + elf_header.e_shentsize * elf_header.e_shstrndx); + if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) { + return false; + } + + for (int i = 0; i < elf_header.e_shnum; ++i) { + off_t section_header_offset = (elf_header.e_shoff + + elf_header.e_shentsize * i); + if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) { + return false; + } + char header_name[kMaxSectionNameLen]; + if (sizeof(header_name) < name_len) { + RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); " + "section will not be found (even if present).", name, name_len); + // No point in even trying. + return false; + } + off_t name_offset = shstrtab.sh_offset + out->sh_name; + ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset); + if (n_read == -1) { + return false; + } else if (n_read != name_len) { + // Short read -- name could be at end of file. + continue; + } + if (memcmp(header_name, name, name_len) == 0) { + return true; + } + } + return false; +} + +// Read a symbol table and look for the symbol containing the +// pc. Iterate over symbols in a symbol table and look for the symbol +// containing "pc". On success, return true and write the symbol name +// to out. Otherwise, return false. +// To keep stack consumption low, we would like this function to not get +// inlined. +static ATTRIBUTE_NOINLINE bool +FindSymbol(uint64_t pc, const int fd, char *out, int out_size, + uint64_t symbol_offset, const ElfW(Shdr) *strtab, + const ElfW(Shdr) *symtab) { + if (symtab == NULL) { + return false; + } + const int num_symbols = symtab->sh_size / symtab->sh_entsize; + for (int i = 0; i < num_symbols;) { + off_t offset = symtab->sh_offset + i * symtab->sh_entsize; + + // If we are reading Elf64_Sym's, we want to limit this array to + // 32 elements (to keep stack consumption low), otherwise we can + // have a 64 element Elf32_Sym array. +#if __WORDSIZE == 64 +#define NUM_SYMBOLS 32 +#else +#define NUM_SYMBOLS 64 +#endif + + // Read at most NUM_SYMBOLS symbols at once to save read() calls. + ElfW(Sym) buf[NUM_SYMBOLS]; + int num_symbols_to_read = std::min(NUM_SYMBOLS, num_symbols - i); + const ssize_t len = + ReadFromOffset(fd, &buf, sizeof(buf[0]) * num_symbols_to_read, offset); + SAFE_ASSERT(len % sizeof(buf[0]) == 0); + const ssize_t num_symbols_in_buf = len / sizeof(buf[0]); + SAFE_ASSERT(num_symbols_in_buf <= num_symbols_to_read); + for (int j = 0; j < num_symbols_in_buf; ++j) { + const ElfW(Sym)& symbol = buf[j]; + uint64_t start_address = symbol.st_value; + start_address += symbol_offset; + uint64_t end_address = start_address + symbol.st_size; + if (symbol.st_value != 0 && // Skip null value symbols. + symbol.st_shndx != 0 && // Skip undefined symbols. + start_address <= pc && pc < end_address) { + ssize_t len1 = ReadFromOffset(fd, out, out_size, + strtab->sh_offset + symbol.st_name); + if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) { + memset(out, 0, out_size); + return false; + } + return true; // Obtained the symbol name. + } + } + i += num_symbols_in_buf; + } + return false; +} + +// Get the symbol name of "pc" from the file pointed by "fd". Process +// both regular and dynamic symbol tables if necessary. On success, +// write the symbol name to "out" and return true. Otherwise, return +// false. +static bool GetSymbolFromObjectFile(const int fd, + uint64_t pc, + char* out, + int out_size, + uint64_t base_address) { + // Read the ELF header. + ElfW(Ehdr) elf_header; + if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) { + return false; + } + + ElfW(Shdr) symtab, strtab; + + // Consult a regular symbol table first. + if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, + SHT_SYMTAB, &symtab)) { + if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + + symtab.sh_link * sizeof(symtab))) { + return false; + } + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { + return true; // Found the symbol in a regular symbol table. + } + } + + // If the symbol is not found, then consult a dynamic symbol table. + if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff, + SHT_DYNSYM, &symtab)) { + if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff + + symtab.sh_link * sizeof(symtab))) { + return false; + } + if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) { + return true; // Found the symbol in a dynamic symbol table. + } + } + + return false; +} + +namespace { +// Thin wrapper around a file descriptor so that the file descriptor +// gets closed for sure. +struct FileDescriptor { + const int fd_; + explicit FileDescriptor(int fd) : fd_(fd) {} + ~FileDescriptor() { + if (fd_ >= 0) { + close(fd_); + } + } + int get() { return fd_; } + + private: + explicit FileDescriptor(const FileDescriptor&); + void operator=(const FileDescriptor&); +}; + +// Helper class for reading lines from file. +// +// Note: we don't use ProcMapsIterator since the object is big (it has +// a 5k array member) and uses async-unsafe functions such as sscanf() +// and snprintf(). +class LineReader { + public: + explicit LineReader(int fd, char *buf, int buf_len, off_t offset) + : fd_(fd), + buf_(buf), + buf_len_(buf_len), + offset_(offset), + bol_(buf), + eol_(buf), + eod_(buf) {} + + // Read '\n'-terminated line from file. On success, modify "bol" + // and "eol", then return true. Otherwise, return false. + // + // Note: if the last line doesn't end with '\n', the line will be + // dropped. It's an intentional behavior to make the code simple. + bool ReadLine(const char **bol, const char **eol) { + if (BufferIsEmpty()) { // First time. + const ssize_t num_bytes = ReadFromOffset(fd_, buf_, buf_len_, offset_); + if (num_bytes <= 0) { // EOF or error. + return false; + } + offset_ += num_bytes; + eod_ = buf_ + num_bytes; + bol_ = buf_; + } else { + bol_ = eol_ + 1; // Advance to the next line in the buffer. + SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_". + if (!HasCompleteLine()) { + const int incomplete_line_length = eod_ - bol_; + // Move the trailing incomplete line to the beginning. + memmove(buf_, bol_, incomplete_line_length); + // Read text from file and append it. + char * const append_pos = buf_ + incomplete_line_length; + const int capacity_left = buf_len_ - incomplete_line_length; + const ssize_t num_bytes = + ReadFromOffset(fd_, append_pos, capacity_left, offset_); + if (num_bytes <= 0) { // EOF or error. + return false; + } + offset_ += num_bytes; + eod_ = append_pos + num_bytes; + bol_ = buf_; + } + } + eol_ = FindLineFeed(); + if (eol_ == NULL) { // '\n' not found. Malformed line. + return false; + } + *eol_ = '\0'; // Replace '\n' with '\0'. + + *bol = bol_; + *eol = eol_; + return true; + } + + // Beginning of line. + const char *bol() { + return bol_; + } + + // End of line. + const char *eol() { + return eol_; + } + + private: + explicit LineReader(const LineReader&); + void operator=(const LineReader&); + + char *FindLineFeed() { + return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_)); + } + + bool BufferIsEmpty() { + return buf_ == eod_; + } + + bool HasCompleteLine() { + return !BufferIsEmpty() && FindLineFeed() != NULL; + } + + const int fd_; + char * const buf_; + const int buf_len_; + off_t offset_; + char *bol_; + char *eol_; + const char *eod_; // End of data in "buf_". +}; +} // namespace + +// Place the hex number read from "start" into "*hex". The pointer to +// the first non-hex character or "end" is returned. +static char *GetHex(const char *start, const char *end, uint64_t *hex) { + *hex = 0; + const char *p; + for (p = start; p < end; ++p) { + int ch = *p; + if ((ch >= '0' && ch <= '9') || + (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) { + *hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9); + } else { // Encountered the first non-hex character. + break; + } + } + SAFE_ASSERT(p <= end); + return const_cast<char *>(p); +} + +// Searches for the object file (from /proc/self/maps) that contains +// the specified pc. If found, sets |start_address| to the start address +// of where this object file is mapped in memory, sets the module base +// address into |base_address|, copies the object file name into +// |out_file_name|, and attempts to open the object file. If the object +// file is opened successfully, returns the file descriptor. Otherwise, +// returns -1. |out_file_name_size| is the size of the file name buffer +// (including the null-terminator). +static ATTRIBUTE_NOINLINE int +OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc, + uint64_t &start_address, + uint64_t &base_address, + char *out_file_name, + int out_file_name_size) { + int object_fd; + + int maps_fd; + NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY)); + FileDescriptor wrapped_maps_fd(maps_fd); + if (wrapped_maps_fd.get() < 0) { + return -1; + } + + int mem_fd; + NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY)); + FileDescriptor wrapped_mem_fd(mem_fd); + if (wrapped_mem_fd.get() < 0) { + return -1; + } + + // Iterate over maps and look for the map containing the pc. Then + // look into the symbol tables inside. + char buf[1024]; // Big enough for line of sane /proc/self/maps + int num_maps = 0; + LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf), 0); + while (true) { + num_maps++; + const char *cursor; + const char *eol; + if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line. + return -1; + } + + // Start parsing line in /proc/self/maps. Here is an example: + // + // 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat + // + // We want start address (08048000), end address (0804c000), flags + // (r-xp) and file name (/bin/cat). + + // Read start address. + cursor = GetHex(cursor, eol, &start_address); + if (cursor == eol || *cursor != '-') { + return -1; // Malformed line. + } + ++cursor; // Skip '-'. + + // Read end address. + uint64_t end_address; + cursor = GetHex(cursor, eol, &end_address); + if (cursor == eol || *cursor != ' ') { + return -1; // Malformed line. + } + ++cursor; // Skip ' '. + + // Read flags. Skip flags until we encounter a space or eol. + const char * const flags_start = cursor; + while (cursor < eol && *cursor != ' ') { + ++cursor; + } + // We expect at least four letters for flags (ex. "r-xp"). + if (cursor == eol || cursor < flags_start + 4) { + return -1; // Malformed line. + } + + // Determine the base address by reading ELF headers in process memory. + ElfW(Ehdr) ehdr; + // Skip non-readable maps. + if (flags_start[0] == 'r' && + ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) && + memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) { + switch (ehdr.e_type) { + case ET_EXEC: + base_address = 0; + break; + case ET_DYN: + // Find the segment containing file offset 0. This will correspond + // to the ELF header that we just read. Normally this will have + // virtual address 0, but this is not guaranteed. We must subtract + // the virtual address from the address where the ELF header was + // mapped to get the base address. + // + // If we fail to find a segment for file offset 0, use the address + // of the ELF header as the base address. + base_address = start_address; + for (unsigned i = 0; i != ehdr.e_phnum; ++i) { + ElfW(Phdr) phdr; + if (ReadFromOffsetExact( + mem_fd, &phdr, sizeof(phdr), + start_address + ehdr.e_phoff + i * sizeof(phdr)) && + phdr.p_type == PT_LOAD && phdr.p_offset == 0) { + base_address = start_address - phdr.p_vaddr; + break; + } + } + break; + default: + // ET_REL or ET_CORE. These aren't directly executable, so they don't + // affect the base address. + break; + } + } + + // Check start and end addresses. + if (!(start_address <= pc && pc < end_address)) { + continue; // We skip this map. PC isn't in this map. + } + + // Check flags. We are only interested in "r*x" maps. + if (flags_start[0] != 'r' || flags_start[2] != 'x') { + continue; // We skip this map. + } + ++cursor; // Skip ' '. + + // Read file offset. + uint64_t file_offset; + cursor = GetHex(cursor, eol, &file_offset); + if (cursor == eol || *cursor != ' ') { + return -1; // Malformed line. + } + ++cursor; // Skip ' '. + + // Skip to file name. "cursor" now points to dev. We need to + // skip at least two spaces for dev and inode. + int num_spaces = 0; + while (cursor < eol) { + if (*cursor == ' ') { + ++num_spaces; + } else if (num_spaces >= 2) { + // The first non-space character after skipping two spaces + // is the beginning of the file name. + break; + } + ++cursor; + } + if (cursor == eol) { + return -1; // Malformed line. + } + + // Finally, "cursor" now points to file name of our interest. + NO_INTR(object_fd = open(cursor, O_RDONLY)); + if (object_fd < 0) { + // Failed to open object file. Copy the object file name to + // |out_file_name|. + strncpy(out_file_name, cursor, out_file_name_size); + // Making sure |out_file_name| is always null-terminated. + out_file_name[out_file_name_size - 1] = '\0'; + return -1; + } + return object_fd; + } +} + +// POSIX doesn't define any async-signal safe function for converting +// an integer to ASCII. We'll have to define our own version. +// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the +// conversion was successful or NULL otherwise. It never writes more than "sz" +// bytes. Output will be truncated as needed, and a NUL character is always +// appended. +// NOTE: code from sandbox/linux/seccomp-bpf/demo.cc. +static char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) { + // Make sure we can write at least one NUL byte. + size_t n = 1; + if (n > sz) + return NULL; + + if (base < 2 || base > 16) { + buf[0] = '\000'; + return NULL; + } + + char *start = buf; + + uintptr_t j = i; + + // Handle negative numbers (only for base 10). + if (i < 0 && base == 10) { + // This does "j = -i" while avoiding integer overflow. + j = static_cast<uintptr_t>(-(i + 1)) + 1; + + // Make sure we can write the '-' character. + if (++n > sz) { + buf[0] = '\000'; + return NULL; + } + *start++ = '-'; + } + + // Loop until we have converted the entire number. Output at least one + // character (i.e. '0'). + char *ptr = start; + do { + // Make sure there is still enough space left in our output buffer. + if (++n > sz) { + buf[0] = '\000'; + return NULL; + } + + // Output the next digit. + *ptr++ = "0123456789abcdef"[j % base]; + j /= base; + + if (padding > 0) + padding--; + } while (j > 0 || padding > 0); + + // Terminate the output with a NUL character. + *ptr = '\000'; + + // Conversion to ASCII actually resulted in the digits being in reverse + // order. We can't easily generate them in forward order, as we can't tell + // the number of characters needed until we are done converting. + // So, now, we reverse the string (except for the possible "-" sign). + while (--ptr > start) { + char ch = *ptr; + *ptr = *start; + *start++ = ch; + } + return buf; +} + +// Safely appends string |source| to string |dest|. Never writes past the +// buffer size |dest_size| and guarantees that |dest| is null-terminated. +static void SafeAppendString(const char* source, char* dest, int dest_size) { + int dest_string_length = strlen(dest); + SAFE_ASSERT(dest_string_length < dest_size); + dest += dest_string_length; + dest_size -= dest_string_length; + strncpy(dest, source, dest_size); + // Making sure |dest| is always null-terminated. + dest[dest_size - 1] = '\0'; +} + +// Converts a 64-bit value into a hex string, and safely appends it to |dest|. +// Never writes past the buffer size |dest_size| and guarantees that |dest| is +// null-terminated. +static void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) { + // 64-bit numbers in hex can have up to 16 digits. + char buf[17] = {'\0'}; + SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size); +} + +// The implementation of our symbolization routine. If it +// successfully finds the symbol containing "pc" and obtains the +// symbol name, returns true and write the symbol name to "out". +// Otherwise, returns false. If Callback function is installed via +// InstallSymbolizeCallback(), the function is also called in this function, +// and "out" is used as its output. +// To keep stack consumption low, we would like this function to not +// get inlined. +static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, + int out_size) { + uint64_t pc0 = reinterpret_cast<uintptr_t>(pc); + uint64_t start_address = 0; + uint64_t base_address = 0; + int object_fd = -1; + + if (out_size < 1) { + return false; + } + out[0] = '\0'; + SafeAppendString("(", out, out_size); + + if (g_symbolize_open_object_file_callback) { + object_fd = g_symbolize_open_object_file_callback(pc0, start_address, + base_address, out + 1, + out_size - 1); + } else { + object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address, + base_address, + out + 1, + out_size - 1); + } + + FileDescriptor wrapped_object_fd(object_fd); + +#if defined(PRINT_UNSYMBOLIZED_STACK_TRACES) + { +#else + // Check whether a file name was returned. + if (object_fd < 0) { +#endif + if (out[1]) { + // The object file containing PC was determined successfully however the + // object file was not opened successfully. This is still considered + // success because the object file name and offset are known and tools + // like asan_symbolize.py can be used for the symbolization. + out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated. + SafeAppendString("+0x", out, out_size); + SafeAppendHexNumber(pc0 - base_address, out, out_size); + SafeAppendString(")", out, out_size); + return true; + } + // Failed to determine the object file containing PC. Bail out. + return false; + } + int elf_type = FileGetElfType(wrapped_object_fd.get()); + if (elf_type == -1) { + return false; + } + if (g_symbolize_callback) { + // Run the call back if it's installed. + // Note: relocation (and much of the rest of this code) will be + // wrong for prelinked shared libraries and PIE executables. + uint64_t relocation = (elf_type == ET_DYN) ? start_address : 0; + int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(), + pc, out, out_size, + relocation); + if (num_bytes_written > 0) { + out += num_bytes_written; + out_size -= num_bytes_written; + } + } + if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0, + out, out_size, base_address)) { + if (out[1] && !g_symbolize_callback) { + // The object file containing PC was opened successfully however the + // symbol was not found. The object may have been stripped. This is still + // considered success because the object file name and offset are known + // and tools like asan_symbolize.py can be used for the symbolization. + out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated. + SafeAppendString("+0x", out, out_size); + SafeAppendHexNumber(pc0 - base_address, out, out_size); + SafeAppendString(")", out, out_size); + return true; + } + return false; + } + + // Symbolization succeeded. Now we try to demangle the symbol. + DemangleInplace(out, out_size); + return true; +} + +_END_GOOGLE_NAMESPACE_ + +#elif defined(OS_MACOSX) && defined(HAVE_DLADDR) + +#include <dlfcn.h> +#include <string.h> + +_START_GOOGLE_NAMESPACE_ + +static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, + int out_size) { + Dl_info info; + if (dladdr(pc, &info)) { + if ((int)strlen(info.dli_sname) < out_size) { + strcpy(out, info.dli_sname); + // Symbolization succeeded. Now we try to demangle the symbol. + DemangleInplace(out, out_size); + return true; + } + } + return false; +} + +_END_GOOGLE_NAMESPACE_ + +#elif defined(OS_WINDOWS) || defined(OS_CYGWIN) + +#include <windows.h> +#include <dbghelp.h> + +#ifdef _MSC_VER +#pragma comment(lib, "dbghelp") +#endif + +_START_GOOGLE_NAMESPACE_ + +class SymInitializer { +public: + HANDLE process; + bool ready; + SymInitializer() : process(NULL), ready(false) { + // Initialize the symbol handler. + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680344(v=vs.85).aspx + process = GetCurrentProcess(); + // Defer symbol loading. + // We do not request undecorated symbols with SYMOPT_UNDNAME + // because the mangling library calls UnDecorateSymbolName. + SymSetOptions(SYMOPT_DEFERRED_LOADS); + if (SymInitialize(process, NULL, true)) { + ready = true; + } + } + ~SymInitializer() { + SymCleanup(process); + // We do not need to close `HANDLE process` because it's a "pseudo handle." + } +private: + SymInitializer(const SymInitializer&); + SymInitializer& operator=(const SymInitializer&); +}; + +static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out, + int out_size) { + const static SymInitializer symInitializer; + if (!symInitializer.ready) { + return false; + } + // Resolve symbol information from address. + // https://msdn.microsoft.com/en-us/library/windows/desktop/ms680578(v=vs.85).aspx + char buf[sizeof(SYMBOL_INFO) + MAX_SYM_NAME]; + SYMBOL_INFO *symbol = reinterpret_cast<SYMBOL_INFO *>(buf); + symbol->SizeOfStruct = sizeof(SYMBOL_INFO); + symbol->MaxNameLen = MAX_SYM_NAME; + // We use the ANSI version to ensure the string type is always `char *`. + // This could break if a symbol has Unicode in it. + BOOL ret = SymFromAddr(symInitializer.process, + reinterpret_cast<DWORD64>(pc), 0, symbol); + if (ret == 1 && static_cast<int>(symbol->NameLen) < out_size) { + // `NameLen` does not include the null terminating character. + strncpy(out, symbol->Name, static_cast<size_t>(symbol->NameLen) + 1); + out[static_cast<size_t>(symbol->NameLen)] = '\0'; + // Symbolization succeeded. Now we try to demangle the symbol. + DemangleInplace(out, out_size); + return true; + } + return false; +} + +_END_GOOGLE_NAMESPACE_ + +#else +# error BUG: HAVE_SYMBOLIZE was wrongly set +#endif + +_START_GOOGLE_NAMESPACE_ + +bool Symbolize(void *pc, char *out, int out_size) { + SAFE_ASSERT(out_size >= 0); + return SymbolizeAndDemangle(pc, out, out_size); +} + +_END_GOOGLE_NAMESPACE_ + +#else /* HAVE_SYMBOLIZE */ + +#include <assert.h> + +#include "config.h" + +_START_GOOGLE_NAMESPACE_ + +// TODO: Support other environments. +bool Symbolize(void *pc, char *out, int out_size) { + assert(0); + return false; +} + +_END_GOOGLE_NAMESPACE_ + +#endif |