diff options
Diffstat (limited to 'third_party/abseil_cpp/absl/debugging/internal')
23 files changed, 5133 insertions, 0 deletions
diff --git a/third_party/abseil_cpp/absl/debugging/internal/address_is_readable.cc b/third_party/abseil_cpp/absl/debugging/internal/address_is_readable.cc new file mode 100644 index 000000000000..329c285f3b58 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/address_is_readable.cc @@ -0,0 +1,139 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// base::AddressIsReadable() probes an address to see whether it is readable, +// without faulting. + +#include "absl/debugging/internal/address_is_readable.h" + +#if !defined(__linux__) || defined(__ANDROID__) + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// On platforms other than Linux, just return true. +bool AddressIsReadable(const void* /* addr */) { return true; } + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#else + +#include <fcntl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include <atomic> +#include <cerrno> +#include <cstdint> + +#include "absl/base/internal/errno_saver.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Pack a pid and two file descriptors into a 64-bit word, +// using 16, 24, and 24 bits for each respectively. +static uint64_t Pack(uint64_t pid, uint64_t read_fd, uint64_t write_fd) { + ABSL_RAW_CHECK((read_fd >> 24) == 0 && (write_fd >> 24) == 0, + "fd out of range"); + return (pid << 48) | ((read_fd & 0xffffff) << 24) | (write_fd & 0xffffff); +} + +// Unpack x into a pid and two file descriptors, where x was created with +// Pack(). +static void Unpack(uint64_t x, int *pid, int *read_fd, int *write_fd) { + *pid = x >> 48; + *read_fd = (x >> 24) & 0xffffff; + *write_fd = x & 0xffffff; +} + +// Return whether the byte at *addr is readable, without faulting. +// Save and restores errno. Returns true on systems where +// unimplemented. +// This is a namespace-scoped variable for correct zero-initialization. +static std::atomic<uint64_t> pid_and_fds; // initially 0, an invalid pid. + +bool AddressIsReadable(const void *addr) { + absl::base_internal::ErrnoSaver errno_saver; + // We test whether a byte is readable by using write(). Normally, this would + // be done via a cached file descriptor to /dev/null, but linux fails to + // check whether the byte is readable when the destination is /dev/null, so + // we use a cached pipe. We store the pid of the process that created the + // pipe to handle the case where a process forks, and the child closes all + // the file descriptors and then calls this routine. This is not perfect: + // the child could use the routine, then close all file descriptors and then + // use this routine again. But the likely use of this routine is when + // crashing, to test the validity of pages when dumping the stack. Beware + // that we may leak file descriptors, but we're unlikely to leak many. + int bytes_written; + int current_pid = getpid() & 0xffff; // we use only the low order 16 bits + do { // until we do not get EBADF trying to use file descriptors + int pid; + int read_fd; + int write_fd; + uint64_t local_pid_and_fds = pid_and_fds.load(std::memory_order_acquire); + Unpack(local_pid_and_fds, &pid, &read_fd, &write_fd); + while (current_pid != pid) { + int p[2]; + // new pipe + if (pipe(p) != 0) { + ABSL_RAW_LOG(FATAL, "Failed to create pipe, errno=%d", errno); + } + fcntl(p[0], F_SETFD, FD_CLOEXEC); + fcntl(p[1], F_SETFD, FD_CLOEXEC); + uint64_t new_pid_and_fds = Pack(current_pid, p[0], p[1]); + if (pid_and_fds.compare_exchange_strong( + local_pid_and_fds, new_pid_and_fds, std::memory_order_release, + std::memory_order_relaxed)) { + local_pid_and_fds = new_pid_and_fds; // fds exposed to other threads + } else { // fds not exposed to other threads; we can close them. + close(p[0]); + close(p[1]); + local_pid_and_fds = pid_and_fds.load(std::memory_order_acquire); + } + Unpack(local_pid_and_fds, &pid, &read_fd, &write_fd); + } + errno = 0; + // Use syscall(SYS_write, ...) instead of write() to prevent ASAN + // and other checkers from complaining about accesses to arbitrary + // memory. + do { + bytes_written = syscall(SYS_write, write_fd, addr, 1); + } while (bytes_written == -1 && errno == EINTR); + if (bytes_written == 1) { // remove the byte from the pipe + char c; + while (read(read_fd, &c, 1) == -1 && errno == EINTR) { + } + } + if (errno == EBADF) { // Descriptors invalid. + // If pid_and_fds contains the problematic file descriptors we just used, + // this call will forget them, and the loop will try again. + pid_and_fds.compare_exchange_strong(local_pid_and_fds, 0, + std::memory_order_release, + std::memory_order_relaxed); + } + } while (errno == EBADF); + return bytes_written == 1; +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif diff --git a/third_party/abseil_cpp/absl/debugging/internal/address_is_readable.h b/third_party/abseil_cpp/absl/debugging/internal/address_is_readable.h new file mode 100644 index 000000000000..4bbaf4d69bd8 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/address_is_readable.h @@ -0,0 +1,32 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_DEBUGGING_INTERNAL_ADDRESS_IS_READABLE_H_ +#define ABSL_DEBUGGING_INTERNAL_ADDRESS_IS_READABLE_H_ + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Return whether the byte at *addr is readable, without faulting. +// Save and restores errno. +bool AddressIsReadable(const void *addr); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_ADDRESS_IS_READABLE_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/demangle.cc b/third_party/abseil_cpp/absl/debugging/internal/demangle.cc new file mode 100644 index 000000000000..46cdb67b1fbf --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/demangle.cc @@ -0,0 +1,1945 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// For reference check out: +// https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling +// +// Note that we only have partial C++11 support yet. + +#include "absl/debugging/internal/demangle.h" + +#include <cstdint> +#include <cstdio> +#include <limits> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +typedef struct { + const char *abbrev; + const char *real_name; + // Number of arguments in <expression> context, or 0 if disallowed. + int arity; +} AbbrevPair; + +// List of operators from Itanium C++ ABI. +static const AbbrevPair kOperatorList[] = { + // New has special syntax (not currently supported). + {"nw", "new", 0}, + {"na", "new[]", 0}, + + // Works except that the 'gs' prefix is not supported. + {"dl", "delete", 1}, + {"da", "delete[]", 1}, + + {"ps", "+", 1}, // "positive" + {"ng", "-", 1}, // "negative" + {"ad", "&", 1}, // "address-of" + {"de", "*", 1}, // "dereference" + {"co", "~", 1}, + + {"pl", "+", 2}, + {"mi", "-", 2}, + {"ml", "*", 2}, + {"dv", "/", 2}, + {"rm", "%", 2}, + {"an", "&", 2}, + {"or", "|", 2}, + {"eo", "^", 2}, + {"aS", "=", 2}, + {"pL", "+=", 2}, + {"mI", "-=", 2}, + {"mL", "*=", 2}, + {"dV", "/=", 2}, + {"rM", "%=", 2}, + {"aN", "&=", 2}, + {"oR", "|=", 2}, + {"eO", "^=", 2}, + {"ls", "<<", 2}, + {"rs", ">>", 2}, + {"lS", "<<=", 2}, + {"rS", ">>=", 2}, + {"eq", "==", 2}, + {"ne", "!=", 2}, + {"lt", "<", 2}, + {"gt", ">", 2}, + {"le", "<=", 2}, + {"ge", ">=", 2}, + {"nt", "!", 1}, + {"aa", "&&", 2}, + {"oo", "||", 2}, + {"pp", "++", 1}, + {"mm", "--", 1}, + {"cm", ",", 2}, + {"pm", "->*", 2}, + {"pt", "->", 0}, // Special syntax + {"cl", "()", 0}, // Special syntax + {"ix", "[]", 2}, + {"qu", "?", 3}, + {"st", "sizeof", 0}, // Special syntax + {"sz", "sizeof", 1}, // Not a real operator name, but used in expressions. + {nullptr, nullptr, 0}, +}; + +// List of builtin types from Itanium C++ ABI. +// +// Invariant: only one- or two-character type abbreviations here. +static const AbbrevPair kBuiltinTypeList[] = { + {"v", "void", 0}, + {"w", "wchar_t", 0}, + {"b", "bool", 0}, + {"c", "char", 0}, + {"a", "signed char", 0}, + {"h", "unsigned char", 0}, + {"s", "short", 0}, + {"t", "unsigned short", 0}, + {"i", "int", 0}, + {"j", "unsigned int", 0}, + {"l", "long", 0}, + {"m", "unsigned long", 0}, + {"x", "long long", 0}, + {"y", "unsigned long long", 0}, + {"n", "__int128", 0}, + {"o", "unsigned __int128", 0}, + {"f", "float", 0}, + {"d", "double", 0}, + {"e", "long double", 0}, + {"g", "__float128", 0}, + {"z", "ellipsis", 0}, + + {"De", "decimal128", 0}, // IEEE 754r decimal floating point (128 bits) + {"Dd", "decimal64", 0}, // IEEE 754r decimal floating point (64 bits) + {"Dc", "decltype(auto)", 0}, + {"Da", "auto", 0}, + {"Dn", "std::nullptr_t", 0}, // i.e., decltype(nullptr) + {"Df", "decimal32", 0}, // IEEE 754r decimal floating point (32 bits) + {"Di", "char32_t", 0}, + {"Du", "char8_t", 0}, + {"Ds", "char16_t", 0}, + {"Dh", "float16", 0}, // IEEE 754r half-precision float (16 bits) + {nullptr, nullptr, 0}, +}; + +// List of substitutions Itanium C++ ABI. +static const AbbrevPair kSubstitutionList[] = { + {"St", "", 0}, + {"Sa", "allocator", 0}, + {"Sb", "basic_string", 0}, + // std::basic_string<char, std::char_traits<char>,std::allocator<char> > + {"Ss", "string", 0}, + // std::basic_istream<char, std::char_traits<char> > + {"Si", "istream", 0}, + // std::basic_ostream<char, std::char_traits<char> > + {"So", "ostream", 0}, + // std::basic_iostream<char, std::char_traits<char> > + {"Sd", "iostream", 0}, + {nullptr, nullptr, 0}, +}; + +// State needed for demangling. This struct is copied in almost every stack +// frame, so every byte counts. +typedef struct { + int mangled_idx; // Cursor of mangled name. + int out_cur_idx; // Cursor of output string. + int prev_name_idx; // For constructors/destructors. + signed int prev_name_length : 16; // For constructors/destructors. + signed int nest_level : 15; // For nested names. + unsigned int append : 1; // Append flag. + // Note: for some reason MSVC can't pack "bool append : 1" into the same int + // with the above two fields, so we use an int instead. Amusingly it can pack + // "signed bool" as expected, but relying on that to continue to be a legal + // type seems ill-advised (as it's illegal in at least clang). +} ParseState; + +static_assert(sizeof(ParseState) == 4 * sizeof(int), + "unexpected size of ParseState"); + +// One-off state for demangling that's not subject to backtracking -- either +// constant data, data that's intentionally immune to backtracking (steps), or +// data that would never be changed by backtracking anyway (recursion_depth). +// +// Only one copy of this exists for each call to Demangle, so the size of this +// struct is nearly inconsequential. +typedef struct { + const char *mangled_begin; // Beginning of input string. + char *out; // Beginning of output string. + int out_end_idx; // One past last allowed output character. + int recursion_depth; // For stack exhaustion prevention. + int steps; // Cap how much work we'll do, regardless of depth. + ParseState parse_state; // Backtrackable state copied for most frames. +} State; + +namespace { +// Prevent deep recursion / stack exhaustion. +// Also prevent unbounded handling of complex inputs. +class ComplexityGuard { + public: + explicit ComplexityGuard(State *state) : state_(state) { + ++state->recursion_depth; + ++state->steps; + } + ~ComplexityGuard() { --state_->recursion_depth; } + + // 256 levels of recursion seems like a reasonable upper limit on depth. + // 128 is not enough to demagle synthetic tests from demangle_unittest.txt: + // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..." + static constexpr int kRecursionDepthLimit = 256; + + // We're trying to pick a charitable upper-limit on how many parse steps are + // necessary to handle something that a human could actually make use of. + // This is mostly in place as a bound on how much work we'll do if we are + // asked to demangle an mangled name from an untrusted source, so it should be + // much larger than the largest expected symbol, but much smaller than the + // amount of work we can do in, e.g., a second. + // + // Some real-world symbols from an arbitrary binary started failing between + // 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set + // the limit. + // + // Spending one second on 2^17 parse steps would require each step to take + // 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in + // under a second. + static constexpr int kParseStepsLimit = 1 << 17; + + bool IsTooComplex() const { + return state_->recursion_depth > kRecursionDepthLimit || + state_->steps > kParseStepsLimit; + } + + private: + State *state_; +}; +} // namespace + +// We don't use strlen() in libc since it's not guaranteed to be async +// signal safe. +static size_t StrLen(const char *str) { + size_t len = 0; + while (*str != '\0') { + ++str; + ++len; + } + return len; +} + +// Returns true if "str" has at least "n" characters remaining. +static bool AtLeastNumCharsRemaining(const char *str, int n) { + for (int i = 0; i < n; ++i) { + if (str[i] == '\0') { + return false; + } + } + return true; +} + +// Returns true if "str" has "prefix" as a prefix. +static bool StrPrefix(const char *str, const char *prefix) { + size_t i = 0; + while (str[i] != '\0' && prefix[i] != '\0' && str[i] == prefix[i]) { + ++i; + } + return prefix[i] == '\0'; // Consumed everything in "prefix". +} + +static void InitState(State *state, const char *mangled, char *out, + int out_size) { + state->mangled_begin = mangled; + state->out = out; + state->out_end_idx = out_size; + state->recursion_depth = 0; + state->steps = 0; + + state->parse_state.mangled_idx = 0; + state->parse_state.out_cur_idx = 0; + state->parse_state.prev_name_idx = 0; + state->parse_state.prev_name_length = -1; + state->parse_state.nest_level = -1; + state->parse_state.append = true; +} + +static inline const char *RemainingInput(State *state) { + return &state->mangled_begin[state->parse_state.mangled_idx]; +} + +// Returns true and advances "mangled_idx" if we find "one_char_token" +// at "mangled_idx" position. It is assumed that "one_char_token" does +// not contain '\0'. +static bool ParseOneCharToken(State *state, const char one_char_token) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (RemainingInput(state)[0] == one_char_token) { + ++state->parse_state.mangled_idx; + return true; + } + return false; +} + +// Returns true and advances "mangled_cur" if we find "two_char_token" +// at "mangled_cur" position. It is assumed that "two_char_token" does +// not contain '\0'. +static bool ParseTwoCharToken(State *state, const char *two_char_token) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (RemainingInput(state)[0] == two_char_token[0] && + RemainingInput(state)[1] == two_char_token[1]) { + state->parse_state.mangled_idx += 2; + return true; + } + return false; +} + +// Returns true and advances "mangled_cur" if we find any character in +// "char_class" at "mangled_cur" position. +static bool ParseCharClass(State *state, const char *char_class) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (RemainingInput(state)[0] == '\0') { + return false; + } + const char *p = char_class; + for (; *p != '\0'; ++p) { + if (RemainingInput(state)[0] == *p) { + ++state->parse_state.mangled_idx; + return true; + } + } + return false; +} + +static bool ParseDigit(State *state, int *digit) { + char c = RemainingInput(state)[0]; + if (ParseCharClass(state, "0123456789")) { + if (digit != nullptr) { + *digit = c - '0'; + } + return true; + } + return false; +} + +// This function is used for handling an optional non-terminal. +static bool Optional(bool /*status*/) { return true; } + +// This function is used for handling <non-terminal>+ syntax. +typedef bool (*ParseFunc)(State *); +static bool OneOrMore(ParseFunc parse_func, State *state) { + if (parse_func(state)) { + while (parse_func(state)) { + } + return true; + } + return false; +} + +// This function is used for handling <non-terminal>* syntax. The function +// always returns true and must be followed by a termination token or a +// terminating sequence not handled by parse_func (e.g. +// ParseOneCharToken(state, 'E')). +static bool ZeroOrMore(ParseFunc parse_func, State *state) { + while (parse_func(state)) { + } + return true; +} + +// Append "str" at "out_cur_idx". If there is an overflow, out_cur_idx is +// set to out_end_idx+1. The output string is ensured to +// always terminate with '\0' as long as there is no overflow. +static void Append(State *state, const char *const str, const int length) { + for (int i = 0; i < length; ++i) { + if (state->parse_state.out_cur_idx + 1 < + state->out_end_idx) { // +1 for '\0' + state->out[state->parse_state.out_cur_idx++] = str[i]; + } else { + // signal overflow + state->parse_state.out_cur_idx = state->out_end_idx + 1; + break; + } + } + if (state->parse_state.out_cur_idx < state->out_end_idx) { + state->out[state->parse_state.out_cur_idx] = + '\0'; // Terminate it with '\0' + } +} + +// We don't use equivalents in libc to avoid locale issues. +static bool IsLower(char c) { return c >= 'a' && c <= 'z'; } + +static bool IsAlpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static bool IsDigit(char c) { return c >= '0' && c <= '9'; } + +// Returns true if "str" is a function clone suffix. These suffixes are used +// by GCC 4.5.x and later versions (and our locally-modified version of GCC +// 4.4.x) to indicate functions which have been cloned during optimization. +// We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix. +static bool IsFunctionCloneSuffix(const char *str) { + size_t i = 0; + while (str[i] != '\0') { + // Consume a single .<alpha>+.<digit>+ sequence. + if (str[i] != '.' || !IsAlpha(str[i + 1])) { + return false; + } + i += 2; + while (IsAlpha(str[i])) { + ++i; + } + if (str[i] != '.' || !IsDigit(str[i + 1])) { + return false; + } + i += 2; + while (IsDigit(str[i])) { + ++i; + } + } + return true; // Consumed everything in "str". +} + +static bool EndsWith(State *state, const char chr) { + return state->parse_state.out_cur_idx > 0 && + state->parse_state.out_cur_idx < state->out_end_idx && + chr == state->out[state->parse_state.out_cur_idx - 1]; +} + +// Append "str" with some tweaks, iff "append" state is true. +static void MaybeAppendWithLength(State *state, const char *const str, + const int length) { + if (state->parse_state.append && length > 0) { + // Append a space if the output buffer ends with '<' and "str" + // starts with '<' to avoid <<<. + if (str[0] == '<' && EndsWith(state, '<')) { + Append(state, " ", 1); + } + // Remember the last identifier name for ctors/dtors, + // but only if we haven't yet overflown the buffer. + if (state->parse_state.out_cur_idx < state->out_end_idx && + (IsAlpha(str[0]) || str[0] == '_')) { + state->parse_state.prev_name_idx = state->parse_state.out_cur_idx; + state->parse_state.prev_name_length = length; + } + Append(state, str, length); + } +} + +// Appends a positive decimal number to the output if appending is enabled. +static bool MaybeAppendDecimal(State *state, unsigned int val) { + // Max {32-64}-bit unsigned int is 20 digits. + constexpr size_t kMaxLength = 20; + char buf[kMaxLength]; + + // We can't use itoa or sprintf as neither is specified to be + // async-signal-safe. + if (state->parse_state.append) { + // We can't have a one-before-the-beginning pointer, so instead start with + // one-past-the-end and manipulate one character before the pointer. + char *p = &buf[kMaxLength]; + do { // val=0 is the only input that should write a leading zero digit. + *--p = (val % 10) + '0'; + val /= 10; + } while (p > buf && val != 0); + + // 'p' landed on the last character we set. How convenient. + Append(state, p, kMaxLength - (p - buf)); + } + + return true; +} + +// A convenient wrapper around MaybeAppendWithLength(). +// Returns true so that it can be placed in "if" conditions. +static bool MaybeAppend(State *state, const char *const str) { + if (state->parse_state.append) { + int length = StrLen(str); + MaybeAppendWithLength(state, str, length); + } + return true; +} + +// This function is used for handling nested names. +static bool EnterNestedName(State *state) { + state->parse_state.nest_level = 0; + return true; +} + +// This function is used for handling nested names. +static bool LeaveNestedName(State *state, int16_t prev_value) { + state->parse_state.nest_level = prev_value; + return true; +} + +// Disable the append mode not to print function parameters, etc. +static bool DisableAppend(State *state) { + state->parse_state.append = false; + return true; +} + +// Restore the append mode to the previous state. +static bool RestoreAppend(State *state, bool prev_value) { + state->parse_state.append = prev_value; + return true; +} + +// Increase the nest level for nested names. +static void MaybeIncreaseNestLevel(State *state) { + if (state->parse_state.nest_level > -1) { + ++state->parse_state.nest_level; + } +} + +// Appends :: for nested names if necessary. +static void MaybeAppendSeparator(State *state) { + if (state->parse_state.nest_level >= 1) { + MaybeAppend(state, "::"); + } +} + +// Cancel the last separator if necessary. +static void MaybeCancelLastSeparator(State *state) { + if (state->parse_state.nest_level >= 1 && state->parse_state.append && + state->parse_state.out_cur_idx >= 2) { + state->parse_state.out_cur_idx -= 2; + state->out[state->parse_state.out_cur_idx] = '\0'; + } +} + +// Returns true if the identifier of the given length pointed to by +// "mangled_cur" is anonymous namespace. +static bool IdentifierIsAnonymousNamespace(State *state, int length) { + // Returns true if "anon_prefix" is a proper prefix of "mangled_cur". + static const char anon_prefix[] = "_GLOBAL__N_"; + return (length > static_cast<int>(sizeof(anon_prefix) - 1) && + StrPrefix(RemainingInput(state), anon_prefix)); +} + +// Forward declarations of our parsing functions. +static bool ParseMangledName(State *state); +static bool ParseEncoding(State *state); +static bool ParseName(State *state); +static bool ParseUnscopedName(State *state); +static bool ParseNestedName(State *state); +static bool ParsePrefix(State *state); +static bool ParseUnqualifiedName(State *state); +static bool ParseSourceName(State *state); +static bool ParseLocalSourceName(State *state); +static bool ParseUnnamedTypeName(State *state); +static bool ParseNumber(State *state, int *number_out); +static bool ParseFloatNumber(State *state); +static bool ParseSeqId(State *state); +static bool ParseIdentifier(State *state, int length); +static bool ParseOperatorName(State *state, int *arity); +static bool ParseSpecialName(State *state); +static bool ParseCallOffset(State *state); +static bool ParseNVOffset(State *state); +static bool ParseVOffset(State *state); +static bool ParseCtorDtorName(State *state); +static bool ParseDecltype(State *state); +static bool ParseType(State *state); +static bool ParseCVQualifiers(State *state); +static bool ParseBuiltinType(State *state); +static bool ParseFunctionType(State *state); +static bool ParseBareFunctionType(State *state); +static bool ParseClassEnumType(State *state); +static bool ParseArrayType(State *state); +static bool ParsePointerToMemberType(State *state); +static bool ParseTemplateParam(State *state); +static bool ParseTemplateTemplateParam(State *state); +static bool ParseTemplateArgs(State *state); +static bool ParseTemplateArg(State *state); +static bool ParseBaseUnresolvedName(State *state); +static bool ParseUnresolvedName(State *state); +static bool ParseExpression(State *state); +static bool ParseExprPrimary(State *state); +static bool ParseExprCastValue(State *state); +static bool ParseLocalName(State *state); +static bool ParseLocalNameSuffix(State *state); +static bool ParseDiscriminator(State *state); +static bool ParseSubstitution(State *state, bool accept_std); + +// Implementation note: the following code is a straightforward +// translation of the Itanium C++ ABI defined in BNF with a couple of +// exceptions. +// +// - Support GNU extensions not defined in the Itanium C++ ABI +// - <prefix> and <template-prefix> are combined to avoid infinite loop +// - Reorder patterns to shorten the code +// - Reorder patterns to give greedier functions precedence +// We'll mark "Less greedy than" for these cases in the code +// +// Each parsing function changes the parse state and returns true on +// success, or returns false and doesn't change the parse state (note: +// the parse-steps counter increases regardless of success or failure). +// To ensure that the parse state isn't changed in the latter case, we +// save the original state before we call multiple parsing functions +// consecutively with &&, and restore it if unsuccessful. See +// ParseEncoding() as an example of this convention. We follow the +// convention throughout the code. +// +// Originally we tried to do demangling without following the full ABI +// syntax but it turned out we needed to follow the full syntax to +// parse complicated cases like nested template arguments. Note that +// implementing a full-fledged demangler isn't trivial (libiberty's +// cp-demangle.c has +4300 lines). +// +// Note that (foo) in <(foo) ...> is a modifier to be ignored. +// +// Reference: +// - Itanium C++ ABI +// <https://mentorembedded.github.io/cxx-abi/abi.html#mangling> + +// <mangled-name> ::= _Z <encoding> +static bool ParseMangledName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + return ParseTwoCharToken(state, "_Z") && ParseEncoding(state); +} + +// <encoding> ::= <(function) name> <bare-function-type> +// ::= <(data) name> +// ::= <special-name> +static bool ParseEncoding(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + // Implementing the first two productions together as <name> + // [<bare-function-type>] avoids exponential blowup of backtracking. + // + // Since Optional(...) can't fail, there's no need to copy the state for + // backtracking. + if (ParseName(state) && Optional(ParseBareFunctionType(state))) { + return true; + } + + if (ParseSpecialName(state)) { + return true; + } + return false; +} + +// <name> ::= <nested-name> +// ::= <unscoped-template-name> <template-args> +// ::= <unscoped-name> +// ::= <local-name> +static bool ParseName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (ParseNestedName(state) || ParseLocalName(state)) { + return true; + } + + // We reorganize the productions to avoid re-parsing unscoped names. + // - Inline <unscoped-template-name> productions: + // <name> ::= <substitution> <template-args> + // ::= <unscoped-name> <template-args> + // ::= <unscoped-name> + // - Merge the two productions that start with unscoped-name: + // <name> ::= <unscoped-name> [<template-args>] + + ParseState copy = state->parse_state; + // "std<...>" isn't a valid name. + if (ParseSubstitution(state, /*accept_std=*/false) && + ParseTemplateArgs(state)) { + return true; + } + state->parse_state = copy; + + // Note there's no need to restore state after this since only the first + // subparser can fail. + return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state)); +} + +// <unscoped-name> ::= <unqualified-name> +// ::= St <unqualified-name> +static bool ParseUnscopedName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (ParseUnqualifiedName(state)) { + return true; + } + + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "St") && MaybeAppend(state, "std::") && + ParseUnqualifiedName(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <ref-qualifer> ::= R // lvalue method reference qualifier +// ::= O // rvalue method reference qualifier +static inline bool ParseRefQualifier(State *state) { + return ParseCharClass(state, "OR"); +} + +// <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix> +// <unqualified-name> E +// ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix> +// <template-args> E +static bool ParseNestedName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'N') && EnterNestedName(state) && + Optional(ParseCVQualifiers(state)) && + Optional(ParseRefQualifier(state)) && ParsePrefix(state) && + LeaveNestedName(state, copy.nest_level) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return false; +} + +// This part is tricky. If we literally translate them to code, we'll +// end up infinite loop. Hence we merge them to avoid the case. +// +// <prefix> ::= <prefix> <unqualified-name> +// ::= <template-prefix> <template-args> +// ::= <template-param> +// ::= <substitution> +// ::= # empty +// <template-prefix> ::= <prefix> <(template) unqualified-name> +// ::= <template-param> +// ::= <substitution> +static bool ParsePrefix(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + bool has_something = false; + while (true) { + MaybeAppendSeparator(state); + if (ParseTemplateParam(state) || + ParseSubstitution(state, /*accept_std=*/true) || + ParseUnscopedName(state) || + (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) { + has_something = true; + MaybeIncreaseNestLevel(state); + continue; + } + MaybeCancelLastSeparator(state); + if (has_something && ParseTemplateArgs(state)) { + return ParsePrefix(state); + } else { + break; + } + } + return true; +} + +// <unqualified-name> ::= <operator-name> +// ::= <ctor-dtor-name> +// ::= <source-name> +// ::= <local-source-name> // GCC extension; see below. +// ::= <unnamed-type-name> +static bool ParseUnqualifiedName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + return (ParseOperatorName(state, nullptr) || ParseCtorDtorName(state) || + ParseSourceName(state) || ParseLocalSourceName(state) || + ParseUnnamedTypeName(state)); +} + +// <source-name> ::= <positive length number> <identifier> +static bool ParseSourceName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + int length = -1; + if (ParseNumber(state, &length) && ParseIdentifier(state, length)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <local-source-name> ::= L <source-name> [<discriminator>] +// +// References: +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775 +// https://gcc.gnu.org/viewcvs?view=rev&revision=124467 +static bool ParseLocalSourceName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'L') && ParseSourceName(state) && + Optional(ParseDiscriminator(state))) { + return true; + } + state->parse_state = copy; + return false; +} + +// <unnamed-type-name> ::= Ut [<(nonnegative) number>] _ +// ::= <closure-type-name> +// <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _ +// <lambda-sig> ::= <(parameter) type>+ +static bool ParseUnnamedTypeName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + // Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }. + // Optionally parse the encoded value into 'which' and add 2 to get the index. + int which = -1; + + // Unnamed type local to function or class. + if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) && + which <= std::numeric_limits<int>::max() - 2 && // Don't overflow. + ParseOneCharToken(state, '_')) { + MaybeAppend(state, "{unnamed type#"); + MaybeAppendDecimal(state, 2 + which); + MaybeAppend(state, "}"); + return true; + } + state->parse_state = copy; + + // Closure type. + which = -1; + if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) && + OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) && + ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) && + which <= std::numeric_limits<int>::max() - 2 && // Don't overflow. + ParseOneCharToken(state, '_')) { + MaybeAppend(state, "{lambda()#"); + MaybeAppendDecimal(state, 2 + which); + MaybeAppend(state, "}"); + return true; + } + state->parse_state = copy; + + return false; +} + +// <number> ::= [n] <non-negative decimal integer> +// If "number_out" is non-null, then *number_out is set to the value of the +// parsed number on success. +static bool ParseNumber(State *state, int *number_out) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + bool negative = false; + if (ParseOneCharToken(state, 'n')) { + negative = true; + } + const char *p = RemainingInput(state); + uint64_t number = 0; + for (; *p != '\0'; ++p) { + if (IsDigit(*p)) { + number = number * 10 + (*p - '0'); + } else { + break; + } + } + // Apply the sign with uint64_t arithmetic so overflows aren't UB. Gives + // "incorrect" results for out-of-range inputs, but negative values only + // appear for literals, which aren't printed. + if (negative) { + number = ~number + 1; + } + if (p != RemainingInput(state)) { // Conversion succeeded. + state->parse_state.mangled_idx += p - RemainingInput(state); + if (number_out != nullptr) { + // Note: possibly truncate "number". + *number_out = number; + } + return true; + } + return false; +} + +// Floating-point literals are encoded using a fixed-length lowercase +// hexadecimal string. +static bool ParseFloatNumber(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + const char *p = RemainingInput(state); + for (; *p != '\0'; ++p) { + if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) { + break; + } + } + if (p != RemainingInput(state)) { // Conversion succeeded. + state->parse_state.mangled_idx += p - RemainingInput(state); + return true; + } + return false; +} + +// The <seq-id> is a sequence number in base 36, +// using digits and upper case letters +static bool ParseSeqId(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + const char *p = RemainingInput(state); + for (; *p != '\0'; ++p) { + if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) { + break; + } + } + if (p != RemainingInput(state)) { // Conversion succeeded. + state->parse_state.mangled_idx += p - RemainingInput(state); + return true; + } + return false; +} + +// <identifier> ::= <unqualified source code identifier> (of given length) +static bool ParseIdentifier(State *state, int length) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (length < 0 || !AtLeastNumCharsRemaining(RemainingInput(state), length)) { + return false; + } + if (IdentifierIsAnonymousNamespace(state, length)) { + MaybeAppend(state, "(anonymous namespace)"); + } else { + MaybeAppendWithLength(state, RemainingInput(state), length); + } + state->parse_state.mangled_idx += length; + return true; +} + +// <operator-name> ::= nw, and other two letters cases +// ::= cv <type> # (cast) +// ::= v <digit> <source-name> # vendor extended operator +static bool ParseOperatorName(State *state, int *arity) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) { + return false; + } + // First check with "cv" (cast) case. + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") && + EnterNestedName(state) && ParseType(state) && + LeaveNestedName(state, copy.nest_level)) { + if (arity != nullptr) { + *arity = 1; + } + return true; + } + state->parse_state = copy; + + // Then vendor extended operators. + if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) && + ParseSourceName(state)) { + return true; + } + state->parse_state = copy; + + // Other operator names should start with a lower alphabet followed + // by a lower/upper alphabet. + if (!(IsLower(RemainingInput(state)[0]) && + IsAlpha(RemainingInput(state)[1]))) { + return false; + } + // We may want to perform a binary search if we really need speed. + const AbbrevPair *p; + for (p = kOperatorList; p->abbrev != nullptr; ++p) { + if (RemainingInput(state)[0] == p->abbrev[0] && + RemainingInput(state)[1] == p->abbrev[1]) { + if (arity != nullptr) { + *arity = p->arity; + } + MaybeAppend(state, "operator"); + if (IsLower(*p->real_name)) { // new, delete, etc. + MaybeAppend(state, " "); + } + MaybeAppend(state, p->real_name); + state->parse_state.mangled_idx += 2; + return true; + } + } + return false; +} + +// <special-name> ::= TV <type> +// ::= TT <type> +// ::= TI <type> +// ::= TS <type> +// ::= TH <type> # thread-local +// ::= Tc <call-offset> <call-offset> <(base) encoding> +// ::= GV <(object) name> +// ::= T <call-offset> <(base) encoding> +// G++ extensions: +// ::= TC <type> <(offset) number> _ <(base) type> +// ::= TF <type> +// ::= TJ <type> +// ::= GR <name> +// ::= GA <encoding> +// ::= Th <call-offset> <(base) encoding> +// ::= Tv <call-offset> <(base) encoding> +// +// Note: we don't care much about them since they don't appear in +// stack traces. The are special data. +static bool ParseSpecialName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") && + ParseType(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) && + ParseCallOffset(state) && ParseEncoding(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "GV") && ParseName(state)) { + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) && + ParseEncoding(state)) { + return true; + } + state->parse_state = copy; + + // G++ extensions + if (ParseTwoCharToken(state, "TC") && ParseType(state) && + ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') && + DisableAppend(state) && ParseType(state)) { + RestoreAppend(state, copy.append); + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") && + ParseType(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "GR") && ParseName(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) { + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") && + ParseCallOffset(state) && ParseEncoding(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <call-offset> ::= h <nv-offset> _ +// ::= v <v-offset> _ +static bool ParseCallOffset(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'h') && ParseNVOffset(state) && + ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'v') && ParseVOffset(state) && + ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <nv-offset> ::= <(offset) number> +static bool ParseNVOffset(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + return ParseNumber(state, nullptr); +} + +// <v-offset> ::= <(offset) number> _ <(virtual offset) number> +static bool ParseVOffset(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') && + ParseNumber(state, nullptr)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <ctor-dtor-name> ::= C1 | C2 | C3 | CI1 <base-class-type> | CI2 +// <base-class-type> +// ::= D0 | D1 | D2 +// # GCC extensions: "unified" constructor/destructor. See +// # +// https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847 +// ::= C4 | D4 +static bool ParseCtorDtorName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'C')) { + if (ParseCharClass(state, "1234")) { + const char *const prev_name = + state->out + state->parse_state.prev_name_idx; + MaybeAppendWithLength(state, prev_name, + state->parse_state.prev_name_length); + return true; + } else if (ParseOneCharToken(state, 'I') && ParseCharClass(state, "12") && + ParseClassEnumType(state)) { + return true; + } + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "0124")) { + const char *const prev_name = state->out + state->parse_state.prev_name_idx; + MaybeAppend(state, "~"); + MaybeAppendWithLength(state, prev_name, + state->parse_state.prev_name_length); + return true; + } + state->parse_state = copy; + return false; +} + +// <decltype> ::= Dt <expression> E # decltype of an id-expression or class +// # member access (C++0x) +// ::= DT <expression> E # decltype of an expression (C++0x) +static bool ParseDecltype(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") && + ParseExpression(state) && ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <type> ::= <CV-qualifiers> <type> +// ::= P <type> # pointer-to +// ::= R <type> # reference-to +// ::= O <type> # rvalue reference-to (C++0x) +// ::= C <type> # complex pair (C 2000) +// ::= G <type> # imaginary (C 2000) +// ::= U <source-name> <type> # vendor extended type qualifier +// ::= <builtin-type> +// ::= <function-type> +// ::= <class-enum-type> # note: just an alias for <name> +// ::= <array-type> +// ::= <pointer-to-member-type> +// ::= <template-template-param> <template-args> +// ::= <template-param> +// ::= <decltype> +// ::= <substitution> +// ::= Dp <type> # pack expansion of (C++0x) +// ::= Dv <num-elems> _ # GNU vector extension +// +static bool ParseType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + // We should check CV-qualifers, and PRGC things first. + // + // CV-qualifiers overlap with some operator names, but an operator name is not + // valid as a type. To avoid an ambiguity that can lead to exponential time + // complexity, refuse to backtrack the CV-qualifiers. + // + // _Z4aoeuIrMvvE + // => _Z 4aoeuI rM v v E + // aoeu<operator%=, void, void> + // => _Z 4aoeuI r Mv v E + // aoeu<void void::* restrict> + // + // By consuming the CV-qualifiers first, the former parse is disabled. + if (ParseCVQualifiers(state)) { + const bool result = ParseType(state); + if (!result) state->parse_state = copy; + return result; + } + state->parse_state = copy; + + // Similarly, these tag characters can overlap with other <name>s resulting in + // two different parse prefixes that land on <template-args> in the same + // place, such as "C3r1xI...". So, disable the "ctor-name = C3" parse by + // refusing to backtrack the tag characters. + if (ParseCharClass(state, "OPRCG")) { + const bool result = ParseType(state); + if (!result) state->parse_state = copy; + return result; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "Dp") && ParseType(state)) { + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'U') && ParseSourceName(state) && + ParseType(state)) { + return true; + } + state->parse_state = copy; + + if (ParseBuiltinType(state) || ParseFunctionType(state) || + ParseClassEnumType(state) || ParseArrayType(state) || + ParsePointerToMemberType(state) || ParseDecltype(state) || + // "std" on its own isn't a type. + ParseSubstitution(state, /*accept_std=*/false)) { + return true; + } + + if (ParseTemplateTemplateParam(state) && ParseTemplateArgs(state)) { + return true; + } + state->parse_state = copy; + + // Less greedy than <template-template-param> <template-args>. + if (ParseTemplateParam(state)) { + return true; + } + + if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) && + ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <CV-qualifiers> ::= [r] [V] [K] +// We don't allow empty <CV-qualifiers> to avoid infinite loop in +// ParseType(). +static bool ParseCVQualifiers(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + int num_cv_qualifiers = 0; + num_cv_qualifiers += ParseOneCharToken(state, 'r'); + num_cv_qualifiers += ParseOneCharToken(state, 'V'); + num_cv_qualifiers += ParseOneCharToken(state, 'K'); + return num_cv_qualifiers > 0; +} + +// <builtin-type> ::= v, etc. # single-character builtin types +// ::= u <source-name> +// ::= Dd, etc. # two-character builtin types +// +// Not supported: +// ::= DF <number> _ # _FloatN (N bits) +// +static bool ParseBuiltinType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + const AbbrevPair *p; + for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) { + // Guaranteed only 1- or 2-character strings in kBuiltinTypeList. + if (p->abbrev[1] == '\0') { + if (ParseOneCharToken(state, p->abbrev[0])) { + MaybeAppend(state, p->real_name); + return true; + } + } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) { + MaybeAppend(state, p->real_name); + return true; + } + } + + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <exception-spec> ::= Do # non-throwing +// exception-specification (e.g., +// noexcept, throw()) +// ::= DO <expression> E # computed (instantiation-dependent) +// noexcept +// ::= Dw <type>+ E # dynamic exception specification +// with instantiation-dependent types +static bool ParseExceptionSpec(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + if (ParseTwoCharToken(state, "Do")) return true; + + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "DO") && ParseExpression(state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + if (ParseTwoCharToken(state, "Dw") && OneOrMore(ParseType, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E +static bool ParseFunctionType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') && + Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) && + Optional(ParseOneCharToken(state, 'O')) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return false; +} + +// <bare-function-type> ::= <(signature) type>+ +static bool ParseBareFunctionType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + DisableAppend(state); + if (OneOrMore(ParseType, state)) { + RestoreAppend(state, copy.append); + MaybeAppend(state, "()"); + return true; + } + state->parse_state = copy; + return false; +} + +// <class-enum-type> ::= <name> +static bool ParseClassEnumType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + return ParseName(state); +} + +// <array-type> ::= A <(positive dimension) number> _ <(element) type> +// ::= A [<(dimension) expression>] _ <(element) type> +static bool ParseArrayType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) && + ParseOneCharToken(state, '_') && ParseType(state)) { + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) && + ParseOneCharToken(state, '_') && ParseType(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <pointer-to-member-type> ::= M <(class) type> <(member) type> +static bool ParsePointerToMemberType(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'M') && ParseType(state) && ParseType(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <template-param> ::= T_ +// ::= T <parameter-2 non-negative number> _ +static bool ParseTemplateParam(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (ParseTwoCharToken(state, "T_")) { + MaybeAppend(state, "?"); // We don't support template substitutions. + return true; + } + + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) && + ParseOneCharToken(state, '_')) { + MaybeAppend(state, "?"); // We don't support template substitutions. + return true; + } + state->parse_state = copy; + return false; +} + +// <template-template-param> ::= <template-param> +// ::= <substitution> +static bool ParseTemplateTemplateParam(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + return (ParseTemplateParam(state) || + // "std" on its own isn't a template. + ParseSubstitution(state, /*accept_std=*/false)); +} + +// <template-args> ::= I <template-arg>+ E +static bool ParseTemplateArgs(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + DisableAppend(state); + if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) && + ParseOneCharToken(state, 'E')) { + RestoreAppend(state, copy.append); + MaybeAppend(state, "<>"); + return true; + } + state->parse_state = copy; + return false; +} + +// <template-arg> ::= <type> +// ::= <expr-primary> +// ::= J <template-arg>* E # argument pack +// ::= X <expression> E +static bool ParseTemplateArg(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'J') && ZeroOrMore(ParseTemplateArg, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // There can be significant overlap between the following leading to + // exponential backtracking: + // + // <expr-primary> ::= L <type> <expr-cast-value> E + // e.g. L 2xxIvE 1 E + // <type> ==> <local-source-name> <template-args> + // e.g. L 2xx IvE + // + // This means parsing an entire <type> twice, and <type> can contain + // <template-arg>, so this can generate exponential backtracking. There is + // only overlap when the remaining input starts with "L <source-name>", so + // parse all cases that can start this way jointly to share the common prefix. + // + // We have: + // + // <template-arg> ::= <type> + // ::= <expr-primary> + // + // First, drop all the productions of <type> that must start with something + // other than 'L'. All that's left is <class-enum-type>; inline it. + // + // <type> ::= <nested-name> # starts with 'N' + // ::= <unscoped-name> + // ::= <unscoped-template-name> <template-args> + // ::= <local-name> # starts with 'Z' + // + // Drop and inline again: + // + // <type> ::= <unscoped-name> + // ::= <unscoped-name> <template-args> + // ::= <substitution> <template-args> # starts with 'S' + // + // Merge the first two, inline <unscoped-name>, drop last: + // + // <type> ::= <unqualified-name> [<template-args>] + // ::= St <unqualified-name> [<template-args>] # starts with 'S' + // + // Drop and inline: + // + // <type> ::= <operator-name> [<template-args>] # starts with lowercase + // ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D' + // ::= <source-name> [<template-args>] # starts with digit + // ::= <local-source-name> [<template-args>] + // ::= <unnamed-type-name> [<template-args>] # starts with 'U' + // + // One more time: + // + // <type> ::= L <source-name> [<template-args>] + // + // Likewise with <expr-primary>: + // + // <expr-primary> ::= L <type> <expr-cast-value> E + // ::= LZ <encoding> E # cannot overlap; drop + // ::= L <mangled_name> E # cannot overlap; drop + // + // By similar reasoning as shown above, the only <type>s starting with + // <source-name> are "<source-name> [<template-args>]". Inline this. + // + // <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E + // + // Now inline both of these into <template-arg>: + // + // <template-arg> ::= L <source-name> [<template-args>] + // ::= L <source-name> [<template-args>] <expr-cast-value> E + // + // Merge them and we're done: + // <template-arg> + // ::= L <source-name> [<template-args>] [<expr-cast-value> E] + if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) { + copy = state->parse_state; + if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return true; + } + + // Now that the overlapping cases can't reach this code, we can safely call + // both of these. + if (ParseType(state) || ParseExprPrimary(state)) { + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'X') && ParseExpression(state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + return false; +} + +// <unresolved-type> ::= <template-param> [<template-args>] +// ::= <decltype> +// ::= <substitution> +static inline bool ParseUnresolvedType(State *state) { + // No ComplexityGuard because we don't copy the state in this stack frame. + return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) || + ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/false); +} + +// <simple-id> ::= <source-name> [<template-args>] +static inline bool ParseSimpleId(State *state) { + // No ComplexityGuard because we don't copy the state in this stack frame. + + // Note: <simple-id> cannot be followed by a parameter pack; see comment in + // ParseUnresolvedType. + return ParseSourceName(state) && Optional(ParseTemplateArgs(state)); +} + +// <base-unresolved-name> ::= <source-name> [<template-args>] +// ::= on <operator-name> [<template-args>] +// ::= dn <destructor-name> +static bool ParseBaseUnresolvedName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + if (ParseSimpleId(state)) { + return true; + } + + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) && + Optional(ParseTemplateArgs(state))) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "dn") && + (ParseUnresolvedType(state) || ParseSimpleId(state))) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <unresolved-name> ::= [gs] <base-unresolved-name> +// ::= sr <unresolved-type> <base-unresolved-name> +// ::= srN <unresolved-type> <unresolved-qualifier-level>+ E +// <base-unresolved-name> +// ::= [gs] sr <unresolved-qualifier-level>+ E +// <base-unresolved-name> +static bool ParseUnresolvedName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + ParseState copy = state->parse_state; + if (Optional(ParseTwoCharToken(state, "gs")) && + ParseBaseUnresolvedName(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) && + ParseBaseUnresolvedName(state)) { + return true; + } + state->parse_state = copy; + + if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') && + ParseUnresolvedType(state) && + OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) && + ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { + return true; + } + state->parse_state = copy; + + if (Optional(ParseTwoCharToken(state, "gs")) && + ParseTwoCharToken(state, "sr") && + OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) && + ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <expression> ::= <1-ary operator-name> <expression> +// ::= <2-ary operator-name> <expression> <expression> +// ::= <3-ary operator-name> <expression> <expression> <expression> +// ::= cl <expression>+ E +// ::= cv <type> <expression> # type (expression) +// ::= cv <type> _ <expression>* E # type (expr-list) +// ::= st <type> +// ::= <template-param> +// ::= <function-param> +// ::= <expr-primary> +// ::= dt <expression> <unresolved-name> # expr.name +// ::= pt <expression> <unresolved-name> # expr->name +// ::= sp <expression> # argument pack expansion +// ::= sr <type> <unqualified-name> <template-args> +// ::= sr <type> <unqualified-name> +// <function-param> ::= fp <(top-level) CV-qualifiers> _ +// ::= fp <(top-level) CV-qualifiers> <number> _ +// ::= fL <number> p <(top-level) CV-qualifiers> _ +// ::= fL <number> p <(top-level) CV-qualifiers> <number> _ +static bool ParseExpression(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (ParseTemplateParam(state) || ParseExprPrimary(state)) { + return true; + } + + // Object/function call expression. + ParseState copy = state->parse_state; + if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + // Function-param expression (level 0). + if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) && + Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + // Function-param expression (level 1+). + if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) && + ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) && + Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) { + return true; + } + state->parse_state = copy; + + // Parse the conversion expressions jointly to avoid re-parsing the <type> in + // their common prefix. Parsed as: + // <expression> ::= cv <type> <conversion-args> + // <conversion-args> ::= _ <expression>* E + // ::= <expression> + // + // Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName + // also needs to accept "cv <type>" in other contexts. + if (ParseTwoCharToken(state, "cv")) { + if (ParseType(state)) { + ParseState copy2 = state->parse_state; + if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy2; + if (ParseExpression(state)) { + return true; + } + } + } else { + // Parse unary, binary, and ternary operator expressions jointly, taking + // care not to re-parse subexpressions repeatedly. Parse like: + // <expression> ::= <operator-name> <expression> + // [<one-to-two-expressions>] + // <one-to-two-expressions> ::= <expression> [<expression>] + int arity = -1; + if (ParseOperatorName(state, &arity) && + arity > 0 && // 0 arity => disabled. + (arity < 3 || ParseExpression(state)) && + (arity < 2 || ParseExpression(state)) && + (arity < 1 || ParseExpression(state))) { + return true; + } + } + state->parse_state = copy; + + // sizeof type + if (ParseTwoCharToken(state, "st") && ParseType(state)) { + return true; + } + state->parse_state = copy; + + // Object and pointer member access expressions. + if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) && + ParseExpression(state) && ParseType(state)) { + return true; + } + state->parse_state = copy; + + // Pointer-to-member access expressions. This parses the same as a binary + // operator, but it's implemented separately because "ds" shouldn't be + // accepted in other contexts that parse an operator name. + if (ParseTwoCharToken(state, "ds") && ParseExpression(state) && + ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + // Parameter pack expansion + if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) { + return true; + } + state->parse_state = copy; + + return ParseUnresolvedName(state); +} + +// <expr-primary> ::= L <type> <(value) number> E +// ::= L <type> <(value) float> E +// ::= L <mangled-name> E +// // A bug in g++'s C++ ABI version 2 (-fabi-version=2). +// ::= LZ <encoding> E +// +// Warning, subtle: the "bug" LZ production above is ambiguous with the first +// production where <type> starts with <local-name>, which can lead to +// exponential backtracking in two scenarios: +// +// - When whatever follows the E in the <local-name> in the first production is +// not a name, we backtrack the whole <encoding> and re-parse the whole thing. +// +// - When whatever follows the <local-name> in the first production is not a +// number and this <expr-primary> may be followed by a name, we backtrack the +// <name> and re-parse it. +// +// Moreover this ambiguity isn't always resolved -- for example, the following +// has two different parses: +// +// _ZaaILZ4aoeuE1x1EvE +// => operator&&<aoeu, x, E, void> +// => operator&&<(aoeu::x)(1), void> +// +// To resolve this, we just do what GCC's demangler does, and refuse to parse +// casts to <local-name> types. +static bool ParseExprPrimary(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + + // The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E" + // or fail, no backtracking. + if (ParseTwoCharToken(state, "LZ")) { + if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) { + return true; + } + + state->parse_state = copy; + return false; + } + + // The merged cast production. + if (ParseOneCharToken(state, 'L') && ParseType(state) && + ParseExprCastValue(state)) { + return true; + } + state->parse_state = copy; + + if (ParseOneCharToken(state, 'L') && ParseMangledName(state) && + ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <number> or <float>, followed by 'E', as described above ParseExprPrimary. +static bool ParseExprCastValue(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + // We have to be able to backtrack after accepting a number because we could + // have e.g. "7fffE", which will accept "7" as a number but then fail to find + // the 'E'. + ParseState copy = state->parse_state; + if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) { + return true; + } + state->parse_state = copy; + + return false; +} + +// <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>] +// ::= Z <(function) encoding> E s [<discriminator>] +// +// Parsing a common prefix of these two productions together avoids an +// exponential blowup of backtracking. Parse like: +// <local-name> := Z <encoding> E <local-name-suffix> +// <local-name-suffix> ::= s [<discriminator>] +// ::= <name> [<discriminator>] + +static bool ParseLocalNameSuffix(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + + if (MaybeAppend(state, "::") && ParseName(state) && + Optional(ParseDiscriminator(state))) { + return true; + } + + // Since we're not going to overwrite the above "::" by re-parsing the + // <encoding> (whose trailing '\0' byte was in the byte now holding the + // first ':'), we have to rollback the "::" if the <name> parse failed. + if (state->parse_state.append) { + state->out[state->parse_state.out_cur_idx - 2] = '\0'; + } + + return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state)); +} + +static bool ParseLocalName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) && + ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <discriminator> := _ <(non-negative) number> +static bool ParseDiscriminator(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) { + return true; + } + state->parse_state = copy; + return false; +} + +// <substitution> ::= S_ +// ::= S <seq-id> _ +// ::= St, etc. +// +// "St" is special in that it's not valid as a standalone name, and it *is* +// allowed to precede a name without being wrapped in "N...E". This means that +// if we accept it on its own, we can accept "St1a" and try to parse +// template-args, then fail and backtrack, accept "St" on its own, then "1a" as +// an unqualified name and re-parse the same template-args. To block this +// exponential backtracking, we disable it with 'accept_std=false' in +// problematic contexts. +static bool ParseSubstitution(State *state, bool accept_std) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (ParseTwoCharToken(state, "S_")) { + MaybeAppend(state, "?"); // We don't support substitutions. + return true; + } + + ParseState copy = state->parse_state; + if (ParseOneCharToken(state, 'S') && ParseSeqId(state) && + ParseOneCharToken(state, '_')) { + MaybeAppend(state, "?"); // We don't support substitutions. + return true; + } + state->parse_state = copy; + + // Expand abbreviations like "St" => "std". + if (ParseOneCharToken(state, 'S')) { + const AbbrevPair *p; + for (p = kSubstitutionList; p->abbrev != nullptr; ++p) { + if (RemainingInput(state)[0] == p->abbrev[1] && + (accept_std || p->abbrev[1] != 't')) { + MaybeAppend(state, "std"); + if (p->real_name[0] != '\0') { + MaybeAppend(state, "::"); + MaybeAppend(state, p->real_name); + } + ++state->parse_state.mangled_idx; + return true; + } + } + } + state->parse_state = copy; + return false; +} + +// Parse <mangled-name>, optionally followed by either a function-clone suffix +// or version suffix. Returns true only if all of "mangled_cur" was consumed. +static bool ParseTopLevelMangledName(State *state) { + ComplexityGuard guard(state); + if (guard.IsTooComplex()) return false; + if (ParseMangledName(state)) { + if (RemainingInput(state)[0] != '\0') { + // Drop trailing function clone suffix, if any. + if (IsFunctionCloneSuffix(RemainingInput(state))) { + return true; + } + // Append trailing version suffix if any. + // ex. _Z3foo@@GLIBCXX_3.4 + if (RemainingInput(state)[0] == '@') { + MaybeAppend(state, RemainingInput(state)); + return true; + } + return false; // Unconsumed suffix. + } + return true; + } + return false; +} + +static bool Overflowed(const State *state) { + return state->parse_state.out_cur_idx >= state->out_end_idx; +} + +// The demangler entry point. +bool Demangle(const char *mangled, char *out, int out_size) { + State state; + InitState(&state, mangled, out, out_size); + return ParseTopLevelMangledName(&state) && !Overflowed(&state) && + state.parse_state.out_cur_idx > 0; +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/debugging/internal/demangle.h b/third_party/abseil_cpp/absl/debugging/internal/demangle.h new file mode 100644 index 000000000000..c314d9bc237c --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/demangle.h @@ -0,0 +1,71 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// An async-signal-safe and thread-safe demangler for Itanium C++ ABI +// (aka G++ V3 ABI). +// +// The demangler is implemented to be used in async signal handlers to +// symbolize stack traces. We cannot use libstdc++'s +// abi::__cxa_demangle() in such signal handlers since it's not async +// signal safe (it uses malloc() internally). +// +// Note that this demangler doesn't support full demangling. More +// specifically, it doesn't print types of function parameters and +// types of template arguments. It just skips them. However, it's +// still very useful to extract basic information such as class, +// function, constructor, destructor, and operator names. +// +// See the implementation note in demangle.cc if you are interested. +// +// Example: +// +// | Mangled Name | The Demangler | abi::__cxa_demangle() +// |---------------|---------------|----------------------- +// | _Z1fv | f() | f() +// | _Z1fi | f() | f(int) +// | _Z3foo3bar | foo() | foo(bar) +// | _Z1fIiEvi | f<>() | void f<int>(int) +// | _ZN1N1fE | N::f | N::f +// | _ZN3Foo3BarEv | Foo::Bar() | Foo::Bar() +// | _Zrm1XS_" | operator%() | operator%(X, X) +// | _ZN3FooC1Ev | Foo::Foo() | Foo::Foo() +// | _Z1fSs | f() | f(std::basic_string<char, +// | | | std::char_traits<char>, +// | | | std::allocator<char> >) +// +// See the unit test for more examples. +// +// Note: we might want to write demanglers for ABIs other than Itanium +// C++ ABI in the future. +// + +#ifndef ABSL_DEBUGGING_INTERNAL_DEMANGLE_H_ +#define ABSL_DEBUGGING_INTERNAL_DEMANGLE_H_ + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Demangle `mangled`. On success, return true and write the +// demangled symbol name to `out`. Otherwise, return false. +// `out` is modified even if demangling is unsuccessful. +bool Demangle(const char *mangled, char *out, int out_size); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_DEMANGLE_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/demangle_test.cc b/third_party/abseil_cpp/absl/debugging/internal/demangle_test.cc new file mode 100644 index 000000000000..0bed7359d8b4 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/demangle_test.cc @@ -0,0 +1,197 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/demangle.h" + +#include <cstdlib> +#include <string> + +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/debugging/internal/stack_consumption.h" +#include "absl/memory/memory.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +// A wrapper function for Demangle() to make the unit test simple. +static const char *DemangleIt(const char * const mangled) { + static char demangled[4096]; + if (Demangle(mangled, demangled, sizeof(demangled))) { + return demangled; + } else { + return mangled; + } +} + +// Test corner cases of bounary conditions. +TEST(Demangle, CornerCases) { + char tmp[10]; + EXPECT_TRUE(Demangle("_Z6foobarv", tmp, sizeof(tmp))); + // sizeof("foobar()") == 9 + EXPECT_STREQ("foobar()", tmp); + EXPECT_TRUE(Demangle("_Z6foobarv", tmp, 9)); + EXPECT_STREQ("foobar()", tmp); + EXPECT_FALSE(Demangle("_Z6foobarv", tmp, 8)); // Not enough. + EXPECT_FALSE(Demangle("_Z6foobarv", tmp, 1)); + EXPECT_FALSE(Demangle("_Z6foobarv", tmp, 0)); + EXPECT_FALSE(Demangle("_Z6foobarv", nullptr, 0)); // Should not cause SEGV. + EXPECT_FALSE(Demangle("_Z1000000", tmp, 9)); +} + +// Test handling of functions suffixed with .clone.N, which is used +// by GCC 4.5.x (and our locally-modified version of GCC 4.4.x), and +// .constprop.N and .isra.N, which are used by GCC 4.6.x. These +// suffixes are used to indicate functions which have been cloned +// during optimization. We ignore these suffixes. +TEST(Demangle, Clones) { + char tmp[20]; + EXPECT_TRUE(Demangle("_ZL3Foov", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + EXPECT_TRUE(Demangle("_ZL3Foov.clone.3", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + EXPECT_TRUE(Demangle("_ZL3Foov.constprop.80", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + EXPECT_TRUE(Demangle("_ZL3Foov.isra.18", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + EXPECT_TRUE(Demangle("_ZL3Foov.isra.2.constprop.18", tmp, sizeof(tmp))); + EXPECT_STREQ("Foo()", tmp); + // Invalid (truncated), should not demangle. + EXPECT_FALSE(Demangle("_ZL3Foov.clo", tmp, sizeof(tmp))); + // Invalid (.clone. not followed by number), should not demangle. + EXPECT_FALSE(Demangle("_ZL3Foov.clone.", tmp, sizeof(tmp))); + // Invalid (.clone. followed by non-number), should not demangle. + EXPECT_FALSE(Demangle("_ZL3Foov.clone.foo", tmp, sizeof(tmp))); + // Invalid (.constprop. not followed by number), should not demangle. + EXPECT_FALSE(Demangle("_ZL3Foov.isra.2.constprop.", tmp, sizeof(tmp))); +} + +// Tests that verify that Demangle footprint is within some limit. +// They are not to be run under sanitizers as the sanitizers increase +// stack consumption by about 4x. +#if defined(ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION) && \ + !defined(ABSL_HAVE_ADDRESS_SANITIZER) && \ + !defined(ABSL_HAVE_MEMORY_SANITIZER) && \ + !defined(ABSL_HAVE_THREAD_SANITIZER) + +static const char *g_mangled; +static char g_demangle_buffer[4096]; +static char *g_demangle_result; + +static void DemangleSignalHandler(int signo) { + if (Demangle(g_mangled, g_demangle_buffer, sizeof(g_demangle_buffer))) { + g_demangle_result = g_demangle_buffer; + } else { + g_demangle_result = nullptr; + } +} + +// Call Demangle and figure out the stack footprint of this call. +static const char *DemangleStackConsumption(const char *mangled, + int *stack_consumed) { + g_mangled = mangled; + *stack_consumed = GetSignalHandlerStackConsumption(DemangleSignalHandler); + ABSL_RAW_LOG(INFO, "Stack consumption of Demangle: %d", *stack_consumed); + return g_demangle_result; +} + +// Demangle stack consumption should be within 8kB for simple mangled names +// with some level of nesting. With alternate signal stack we have 64K, +// but some signal handlers run on thread stack, and could have arbitrarily +// little space left (so we don't want to make this number too large). +const int kStackConsumptionUpperLimit = 8192; + +// Returns a mangled name nested to the given depth. +static std::string NestedMangledName(int depth) { + std::string mangled_name = "_Z1a"; + if (depth > 0) { + mangled_name += "IXL"; + mangled_name += NestedMangledName(depth - 1); + mangled_name += "EEE"; + } + return mangled_name; +} + +TEST(Demangle, DemangleStackConsumption) { + // Measure stack consumption of Demangle for nested mangled names of varying + // depth. Since Demangle is implemented as a recursive descent parser, + // stack consumption will grow as the nesting depth increases. By measuring + // the stack consumption for increasing depths, we can see the growing + // impact of any stack-saving changes made to the code for Demangle. + int stack_consumed = 0; + + const char *demangled = + DemangleStackConsumption("_Z6foobarv", &stack_consumed); + EXPECT_STREQ("foobar()", demangled); + EXPECT_GT(stack_consumed, 0); + EXPECT_LT(stack_consumed, kStackConsumptionUpperLimit); + + const std::string nested_mangled_name0 = NestedMangledName(0); + demangled = DemangleStackConsumption(nested_mangled_name0.c_str(), + &stack_consumed); + EXPECT_STREQ("a", demangled); + EXPECT_GT(stack_consumed, 0); + EXPECT_LT(stack_consumed, kStackConsumptionUpperLimit); + + const std::string nested_mangled_name1 = NestedMangledName(1); + demangled = DemangleStackConsumption(nested_mangled_name1.c_str(), + &stack_consumed); + EXPECT_STREQ("a<>", demangled); + EXPECT_GT(stack_consumed, 0); + EXPECT_LT(stack_consumed, kStackConsumptionUpperLimit); + + const std::string nested_mangled_name2 = NestedMangledName(2); + demangled = DemangleStackConsumption(nested_mangled_name2.c_str(), + &stack_consumed); + EXPECT_STREQ("a<>", demangled); + EXPECT_GT(stack_consumed, 0); + EXPECT_LT(stack_consumed, kStackConsumptionUpperLimit); + + const std::string nested_mangled_name3 = NestedMangledName(3); + demangled = DemangleStackConsumption(nested_mangled_name3.c_str(), + &stack_consumed); + EXPECT_STREQ("a<>", demangled); + EXPECT_GT(stack_consumed, 0); + EXPECT_LT(stack_consumed, kStackConsumptionUpperLimit); +} + +#endif // Stack consumption tests + +static void TestOnInput(const char* input) { + static const int kOutSize = 1048576; + auto out = absl::make_unique<char[]>(kOutSize); + Demangle(input, out.get(), kOutSize); +} + +TEST(DemangleRegression, NegativeLength) { + TestOnInput("_ZZn4"); +} + +TEST(DemangleRegression, DeeplyNestedArrayType) { + const int depth = 100000; + std::string data = "_ZStI"; + data.reserve(data.size() + 3 * depth + 1); + for (int i = 0; i < depth; i++) { + data += "A1_"; + } + TestOnInput(data.c_str()); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/debugging/internal/elf_mem_image.cc b/third_party/abseil_cpp/absl/debugging/internal/elf_mem_image.cc new file mode 100644 index 000000000000..24cc01302dc5 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/elf_mem_image.cc @@ -0,0 +1,382 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Allow dynamic symbol lookup in an in-memory Elf image. +// + +#include "absl/debugging/internal/elf_mem_image.h" + +#ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h + +#include <string.h> +#include <cassert> +#include <cstddef> +#include "absl/base/internal/raw_logging.h" + +// From binutils/include/elf/common.h (this doesn't appear to be documented +// anywhere else). +// +// /* This flag appears in a Versym structure. It means that the symbol +// is hidden, and is only visible with an explicit version number. +// This is a GNU extension. */ +// #define VERSYM_HIDDEN 0x8000 +// +// /* This is the mask for the rest of the Versym information. */ +// #define VERSYM_VERSION 0x7fff + +#define VERSYM_VERSION 0x7fff + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +namespace { + +#if __WORDSIZE == 32 +const int kElfClass = ELFCLASS32; +int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); } +int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); } +#elif __WORDSIZE == 64 +const int kElfClass = ELFCLASS64; +int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); } +int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); } +#else +const int kElfClass = -1; +int ElfBind(const ElfW(Sym) *) { + ABSL_RAW_LOG(FATAL, "Unexpected word size"); + return 0; +} +int ElfType(const ElfW(Sym) *) { + ABSL_RAW_LOG(FATAL, "Unexpected word size"); + return 0; +} +#endif + +// Extract an element from one of the ELF tables, cast it to desired type. +// This is just a simple arithmetic and a glorified cast. +// Callers are responsible for bounds checking. +template <typename T> +const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset, + ElfW(Word) element_size, size_t index) { + return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) + + table_offset + + index * element_size); +} + +} // namespace + +// The value of this variable doesn't matter; it's used only for its +// unique address. +const int ElfMemImage::kInvalidBaseSentinel = 0; + +ElfMemImage::ElfMemImage(const void *base) { + ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer"); + Init(base); +} + +int ElfMemImage::GetNumSymbols() const { + if (!hash_) { + return 0; + } + // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash + return hash_[1]; +} + +const ElfW(Sym) *ElfMemImage::GetDynsym(int index) const { + ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); + return dynsym_ + index; +} + +const ElfW(Versym) *ElfMemImage::GetVersym(int index) const { + ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range"); + return versym_ + index; +} + +const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const { + ABSL_RAW_CHECK(index < ehdr_->e_phnum, "index out of range"); + return GetTableElement<ElfW(Phdr)>(ehdr_, + ehdr_->e_phoff, + ehdr_->e_phentsize, + index); +} + +const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const { + ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); + return dynstr_ + offset; +} + +const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { + if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { + // Symbol corresponds to "special" (e.g. SHN_ABS) section. + return reinterpret_cast<const void *>(sym->st_value); + } + ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range"); + return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_); +} + +const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const { + ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_, + "index out of range"); + const ElfW(Verdef) *version_definition = verdef_; + while (version_definition->vd_ndx < index && version_definition->vd_next) { + const char *const version_definition_as_char = + reinterpret_cast<const char *>(version_definition); + version_definition = + reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + + version_definition->vd_next); + } + return version_definition->vd_ndx == index ? version_definition : nullptr; +} + +const ElfW(Verdaux) *ElfMemImage::GetVerdefAux( + const ElfW(Verdef) *verdef) const { + return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); +} + +const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const { + ABSL_RAW_CHECK(offset < strsize_, "offset out of range"); + return dynstr_ + offset; +} + +void ElfMemImage::Init(const void *base) { + ehdr_ = nullptr; + dynsym_ = nullptr; + dynstr_ = nullptr; + versym_ = nullptr; + verdef_ = nullptr; + hash_ = nullptr; + strsize_ = 0; + verdefnum_ = 0; + link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. + if (!base) { + return; + } + const char *const base_as_char = reinterpret_cast<const char *>(base); + if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || + base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { + assert(false); + return; + } + int elf_class = base_as_char[EI_CLASS]; + if (elf_class != kElfClass) { + assert(false); + return; + } + switch (base_as_char[EI_DATA]) { + case ELFDATA2LSB: { + if (__LITTLE_ENDIAN != __BYTE_ORDER) { + assert(false); + return; + } + break; + } + case ELFDATA2MSB: { + if (__BIG_ENDIAN != __BYTE_ORDER) { + assert(false); + return; + } + break; + } + default: { + assert(false); + return; + } + } + + ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); + const ElfW(Phdr) *dynamic_program_header = nullptr; + for (int i = 0; i < ehdr_->e_phnum; ++i) { + const ElfW(Phdr) *const program_header = GetPhdr(i); + switch (program_header->p_type) { + case PT_LOAD: + if (!~link_base_) { + link_base_ = program_header->p_vaddr; + } + break; + case PT_DYNAMIC: + dynamic_program_header = program_header; + break; + } + } + if (!~link_base_ || !dynamic_program_header) { + assert(false); + // Mark this image as not present. Can not recur infinitely. + Init(nullptr); + return; + } + ptrdiff_t relocation = + base_as_char - reinterpret_cast<const char *>(link_base_); + ElfW(Dyn) *dynamic_entry = + reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + + relocation); + for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { + const ElfW(Xword) value = dynamic_entry->d_un.d_val + relocation; + switch (dynamic_entry->d_tag) { + case DT_HASH: + hash_ = reinterpret_cast<ElfW(Word) *>(value); + break; + case DT_SYMTAB: + dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); + break; + case DT_STRTAB: + dynstr_ = reinterpret_cast<const char *>(value); + break; + case DT_VERSYM: + versym_ = reinterpret_cast<ElfW(Versym) *>(value); + break; + case DT_VERDEF: + verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); + break; + case DT_VERDEFNUM: + verdefnum_ = dynamic_entry->d_un.d_val; + break; + case DT_STRSZ: + strsize_ = dynamic_entry->d_un.d_val; + break; + default: + // Unrecognized entries explicitly ignored. + break; + } + } + if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || + !verdef_ || !verdefnum_ || !strsize_) { + assert(false); // invalid VDSO + // Mark this image as not present. Can not recur infinitely. + Init(nullptr); + return; + } +} + +bool ElfMemImage::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info_out) const { + for (const SymbolInfo& info : *this) { + if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 && + ElfType(info.symbol) == type) { + if (info_out) { + *info_out = info; + } + return true; + } + } + return false; +} + +bool ElfMemImage::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + for (const SymbolInfo& info : *this) { + const char *const symbol_start = + reinterpret_cast<const char *>(info.address); + const char *const symbol_end = symbol_start + info.symbol->st_size; + if (symbol_start <= address && address < symbol_end) { + if (info_out) { + // Client wants to know details for that symbol (the usual case). + if (ElfBind(info.symbol) == STB_GLOBAL) { + // Strong symbol; just return it. + *info_out = info; + return true; + } else { + // Weak or local. Record it, but keep looking for a strong one. + *info_out = info; + } + } else { + // Client only cares if there is an overlapping symbol. + return true; + } + } + } + return false; +} + +ElfMemImage::SymbolIterator::SymbolIterator(const void *const image, int index) + : index_(index), image_(image) { +} + +const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const { + return &info_; +} + +const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const { + return info_; +} + +bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const { + return this->image_ == rhs.image_ && this->index_ == rhs.index_; +} + +bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const { + return !(*this == rhs); +} + +ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() { + this->Update(1); + return *this; +} + +ElfMemImage::SymbolIterator ElfMemImage::begin() const { + SymbolIterator it(this, 0); + it.Update(0); + return it; +} + +ElfMemImage::SymbolIterator ElfMemImage::end() const { + return SymbolIterator(this, GetNumSymbols()); +} + +void ElfMemImage::SymbolIterator::Update(int increment) { + const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); + ABSL_RAW_CHECK(image->IsPresent() || increment == 0, ""); + if (!image->IsPresent()) { + return; + } + index_ += increment; + if (index_ >= image->GetNumSymbols()) { + index_ = image->GetNumSymbols(); + return; + } + const ElfW(Sym) *symbol = image->GetDynsym(index_); + const ElfW(Versym) *version_symbol = image->GetVersym(index_); + ABSL_RAW_CHECK(symbol && version_symbol, ""); + const char *const symbol_name = image->GetDynstr(symbol->st_name); + const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; + const ElfW(Verdef) *version_definition = nullptr; + const char *version_name = ""; + if (symbol->st_shndx == SHN_UNDEF) { + // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and + // version_index could well be greater than verdefnum_, so calling + // GetVerdef(version_index) may trigger assertion. + } else { + version_definition = image->GetVerdef(version_index); + } + if (version_definition) { + // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, + // optional 2nd if the version has a parent. + ABSL_RAW_CHECK( + version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2, + "wrong number of entries"); + const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); + version_name = image->GetVerstr(version_aux->vda_name); + } + info_.name = symbol_name; + info_.version = version_name; + info_.address = image->GetSymAddr(symbol); + info_.symbol = symbol; +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_HAVE_ELF_MEM_IMAGE diff --git a/third_party/abseil_cpp/absl/debugging/internal/elf_mem_image.h b/third_party/abseil_cpp/absl/debugging/internal/elf_mem_image.h new file mode 100644 index 000000000000..46bfade3503e --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/elf_mem_image.h @@ -0,0 +1,134 @@ +/* + * Copyright 2017 The Abseil Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Allow dynamic symbol lookup for in-memory Elf images. + +#ifndef ABSL_DEBUGGING_INTERNAL_ELF_MEM_IMAGE_H_ +#define ABSL_DEBUGGING_INTERNAL_ELF_MEM_IMAGE_H_ + +// Including this will define the __GLIBC__ macro if glibc is being +// used. +#include <climits> + +#include "absl/base/config.h" + +// Maybe one day we can rewrite this file not to require the elf +// symbol extensions in glibc, but for right now we need them. +#ifdef ABSL_HAVE_ELF_MEM_IMAGE +#error ABSL_HAVE_ELF_MEM_IMAGE cannot be directly set +#endif + +#if defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__) && \ + !defined(__asmjs__) && !defined(__wasm__) +#define ABSL_HAVE_ELF_MEM_IMAGE 1 +#endif + +#ifdef ABSL_HAVE_ELF_MEM_IMAGE + +#include <link.h> // for ElfW + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// An in-memory ELF image (may not exist on disk). +class ElfMemImage { + private: + // Sentinel: there could never be an elf image at &kInvalidBaseSentinel. + static const int kInvalidBaseSentinel; + + public: + // Sentinel: there could never be an elf image at this address. + static constexpr const void *const kInvalidBase = + static_cast<const void*>(&kInvalidBaseSentinel); + + // Information about a single vdso symbol. + // All pointers are into .dynsym, .dynstr, or .text of the VDSO. + // Do not free() them or modify through them. + struct SymbolInfo { + const char *name; // E.g. "__vdso_getcpu" + const char *version; // E.g. "LINUX_2.6", could be "" + // for unversioned symbol. + const void *address; // Relocated symbol address. + const ElfW(Sym) *symbol; // Symbol in the dynamic symbol table. + }; + + // Supports iteration over all dynamic symbols. + class SymbolIterator { + public: + friend class ElfMemImage; + const SymbolInfo *operator->() const; + const SymbolInfo &operator*() const; + SymbolIterator& operator++(); + bool operator!=(const SymbolIterator &rhs) const; + bool operator==(const SymbolIterator &rhs) const; + private: + SymbolIterator(const void *const image, int index); + void Update(int incr); + SymbolInfo info_; + int index_; + const void *const image_; + }; + + + explicit ElfMemImage(const void *base); + void Init(const void *base); + bool IsPresent() const { return ehdr_ != nullptr; } + const ElfW(Phdr)* GetPhdr(int index) const; + const ElfW(Sym)* GetDynsym(int index) const; + const ElfW(Versym)* GetVersym(int index) const; + const ElfW(Verdef)* GetVerdef(int index) const; + const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; + const char* GetDynstr(ElfW(Word) offset) const; + const void* GetSymAddr(const ElfW(Sym) *sym) const; + const char* GetVerstr(ElfW(Word) offset) const; + int GetNumSymbols() const; + + SymbolIterator begin() const; + SymbolIterator end() const; + + // Look up versioned dynamic symbol in the image. + // Returns false if image is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out is non-null, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if image isn't present + // or doesn't have a symbol overlapping given address. + // If info_out is non-null, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + private: + const ElfW(Ehdr) *ehdr_; + const ElfW(Sym) *dynsym_; + const ElfW(Versym) *versym_; + const ElfW(Verdef) *verdef_; + const ElfW(Word) *hash_; + const char *dynstr_; + size_t strsize_; + size_t verdefnum_; + ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD). +}; + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_HAVE_ELF_MEM_IMAGE + +#endif // ABSL_DEBUGGING_INTERNAL_ELF_MEM_IMAGE_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/examine_stack.cc b/third_party/abseil_cpp/absl/debugging/internal/examine_stack.cc new file mode 100644 index 000000000000..6e5ff1fbd836 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/examine_stack.cc @@ -0,0 +1,187 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "absl/debugging/internal/examine_stack.h" + +#ifndef _WIN32 +#include <unistd.h> +#endif + +#ifdef __APPLE__ +#include <sys/ucontext.h> +#endif + +#include <csignal> +#include <cstdio> + +#include "absl/base/attributes.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/debugging/stacktrace.h" +#include "absl/debugging/symbolize.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Returns the program counter from signal context, nullptr if +// unknown. vuc is a ucontext_t*. We use void* to avoid the use of +// ucontext_t on non-POSIX systems. +void* GetProgramCounter(void* vuc) { +#ifdef __linux__ + if (vuc != nullptr) { + ucontext_t* context = reinterpret_cast<ucontext_t*>(vuc); +#if defined(__aarch64__) + return reinterpret_cast<void*>(context->uc_mcontext.pc); +#elif defined(__arm__) + return reinterpret_cast<void*>(context->uc_mcontext.arm_pc); +#elif defined(__i386__) + if (14 < ABSL_ARRAYSIZE(context->uc_mcontext.gregs)) + return reinterpret_cast<void*>(context->uc_mcontext.gregs[14]); +#elif defined(__mips__) + return reinterpret_cast<void*>(context->uc_mcontext.pc); +#elif defined(__powerpc64__) + return reinterpret_cast<void*>(context->uc_mcontext.gp_regs[32]); +#elif defined(__powerpc__) + return reinterpret_cast<void*>(context->uc_mcontext.regs->nip); +#elif defined(__riscv) + return reinterpret_cast<void*>(context->uc_mcontext.__gregs[REG_PC]); +#elif defined(__s390__) && !defined(__s390x__) + return reinterpret_cast<void*>(context->uc_mcontext.psw.addr & 0x7fffffff); +#elif defined(__s390__) && defined(__s390x__) + return reinterpret_cast<void*>(context->uc_mcontext.psw.addr); +#elif defined(__x86_64__) + if (16 < ABSL_ARRAYSIZE(context->uc_mcontext.gregs)) + return reinterpret_cast<void*>(context->uc_mcontext.gregs[16]); +#else +#error "Undefined Architecture." +#endif + } +#elif defined(__APPLE__) + if (vuc != nullptr) { + ucontext_t* signal_ucontext = reinterpret_cast<ucontext_t*>(vuc); +#if defined(__aarch64__) + return reinterpret_cast<void*>( + __darwin_arm_thread_state64_get_pc(signal_ucontext->uc_mcontext->__ss)); +#elif defined(__arm__) +#if __DARWIN_UNIX03 + return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->__ss.__pc); +#else + return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->ss.pc); +#endif +#elif defined(__i386__) +#if __DARWIN_UNIX03 + return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->__ss.__eip); +#else + return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->ss.eip); +#endif +#elif defined(__x86_64__) +#if __DARWIN_UNIX03 + return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->__ss.__rip); +#else + return reinterpret_cast<void*>(signal_ucontext->uc_mcontext->ss.rip); +#endif +#endif + } +#elif defined(__akaros__) + auto* ctx = reinterpret_cast<struct user_context*>(vuc); + return reinterpret_cast<void*>(get_user_ctx_pc(ctx)); +#endif + static_cast<void>(vuc); + return nullptr; +} + +// The %p field width for printf() functions is two characters per byte, +// and two extra for the leading "0x". +static constexpr int kPrintfPointerFieldWidth = 2 + 2 * sizeof(void*); + +// Print a program counter, its stack frame size, and its symbol name. +// Note that there is a separate symbolize_pc argument. Return addresses may be +// at the end of the function, and this allows the caller to back up from pc if +// appropriate. +static void DumpPCAndFrameSizeAndSymbol(void (*writerfn)(const char*, void*), + void* writerfn_arg, void* pc, + void* symbolize_pc, int framesize, + const char* const prefix) { + char tmp[1024]; + const char* symbol = "(unknown)"; + if (absl::Symbolize(symbolize_pc, tmp, sizeof(tmp))) { + symbol = tmp; + } + char buf[1024]; + if (framesize <= 0) { + snprintf(buf, sizeof(buf), "%s@ %*p (unknown) %s\n", prefix, + kPrintfPointerFieldWidth, pc, symbol); + } else { + snprintf(buf, sizeof(buf), "%s@ %*p %9d %s\n", prefix, + kPrintfPointerFieldWidth, pc, framesize, symbol); + } + writerfn(buf, writerfn_arg); +} + +// Print a program counter and the corresponding stack frame size. +static void DumpPCAndFrameSize(void (*writerfn)(const char*, void*), + void* writerfn_arg, void* pc, int framesize, + const char* const prefix) { + char buf[100]; + if (framesize <= 0) { + snprintf(buf, sizeof(buf), "%s@ %*p (unknown)\n", prefix, + kPrintfPointerFieldWidth, pc); + } else { + snprintf(buf, sizeof(buf), "%s@ %*p %9d\n", prefix, + kPrintfPointerFieldWidth, pc, framesize); + } + writerfn(buf, writerfn_arg); +} + +void DumpPCAndFrameSizesAndStackTrace( + void* pc, void* const stack[], int frame_sizes[], int depth, + int min_dropped_frames, bool symbolize_stacktrace, + void (*writerfn)(const char*, void*), void* writerfn_arg) { + if (pc != nullptr) { + // We don't know the stack frame size for PC, use 0. + if (symbolize_stacktrace) { + DumpPCAndFrameSizeAndSymbol(writerfn, writerfn_arg, pc, pc, 0, "PC: "); + } else { + DumpPCAndFrameSize(writerfn, writerfn_arg, pc, 0, "PC: "); + } + } + for (int i = 0; i < depth; i++) { + if (symbolize_stacktrace) { + // Pass the previous address of pc as the symbol address because pc is a + // return address, and an overrun may occur when the function ends with a + // call to a function annotated noreturn (e.g. CHECK). Note that we don't + // do this for pc above, as the adjustment is only correct for return + // addresses. + DumpPCAndFrameSizeAndSymbol(writerfn, writerfn_arg, stack[i], + reinterpret_cast<char*>(stack[i]) - 1, + frame_sizes[i], " "); + } else { + DumpPCAndFrameSize(writerfn, writerfn_arg, stack[i], frame_sizes[i], + " "); + } + } + if (min_dropped_frames > 0) { + char buf[100]; + snprintf(buf, sizeof(buf), " @ ... and at least %d more frames\n", + min_dropped_frames); + writerfn(buf, writerfn_arg); + } +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/third_party/abseil_cpp/absl/debugging/internal/examine_stack.h b/third_party/abseil_cpp/absl/debugging/internal/examine_stack.h new file mode 100644 index 000000000000..393369131f1e --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/examine_stack.h @@ -0,0 +1,42 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_DEBUGGING_INTERNAL_EXAMINE_STACK_H_ +#define ABSL_DEBUGGING_INTERNAL_EXAMINE_STACK_H_ + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Returns the program counter from signal context, or nullptr if +// unknown. `vuc` is a ucontext_t*. We use void* to avoid the use of +// ucontext_t on non-POSIX systems. +void* GetProgramCounter(void* vuc); + +// Uses `writerfn` to dump the program counter, stack trace, and stack +// frame sizes. +void DumpPCAndFrameSizesAndStackTrace( + void* pc, void* const stack[], int frame_sizes[], int depth, + int min_dropped_frames, bool symbolize_stacktrace, + void (*writerfn)(const char*, void*), void* writerfn_arg); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_EXAMINE_STACK_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stack_consumption.cc b/third_party/abseil_cpp/absl/debugging/internal/stack_consumption.cc new file mode 100644 index 000000000000..e3dd51c355ad --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stack_consumption.cc @@ -0,0 +1,185 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/stack_consumption.h" + +#ifdef ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION + +#include <signal.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <string.h> + +#include "absl/base/attributes.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +// This code requires that we know the direction in which the stack +// grows. It is commonly believed that this can be detected by putting +// a variable on the stack and then passing its address to a function +// that compares the address of this variable to the address of a +// variable on the function's own stack. However, this is unspecified +// behavior in C++: If two pointers p and q of the same type point to +// different objects that are not members of the same object or +// elements of the same array or to different functions, or if only +// one of them is null, the results of p<q, p>q, p<=q, and p>=q are +// unspecified. Therefore, instead we hardcode the direction of the +// stack on platforms we know about. +#if defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || \ + defined(__aarch64__) +constexpr bool kStackGrowsDown = true; +#else +#error Need to define kStackGrowsDown +#endif + +// To measure the stack footprint of some code, we create a signal handler +// (for SIGUSR2 say) that exercises this code on an alternate stack. This +// alternate stack is initialized to some known pattern (0x55, 0x55, 0x55, +// ...). We then self-send this signal, and after the signal handler returns, +// look at the alternate stack buffer to see what portion has been touched. +// +// This trick gives us the the stack footprint of the signal handler. But the +// signal handler, even before the code for it is exercised, consumes some +// stack already. We however only want the stack usage of the code inside the +// signal handler. To measure this accurately, we install two signal handlers: +// one that does nothing and just returns, and the user-provided signal +// handler. The difference between the stack consumption of these two signals +// handlers should give us the stack foorprint of interest. + +void EmptySignalHandler(int) {} + +// This is arbitrary value, and could be increase further, at the cost of +// memset()ting it all to known sentinel value. +constexpr int kAlternateStackSize = 64 << 10; // 64KiB + +constexpr int kSafetyMargin = 32; +constexpr char kAlternateStackFillValue = 0x55; + +// These helper functions look at the alternate stack buffer, and figure +// out what portion of this buffer has been touched - this is the stack +// consumption of the signal handler running on this alternate stack. +// This function will return -1 if the alternate stack buffer has not been +// touched. It will abort the program if the buffer has overflowed or is about +// to overflow. +int GetStackConsumption(const void* const altstack) { + const char* begin; + int increment; + if (kStackGrowsDown) { + begin = reinterpret_cast<const char*>(altstack); + increment = 1; + } else { + begin = reinterpret_cast<const char*>(altstack) + kAlternateStackSize - 1; + increment = -1; + } + + for (int usage_count = kAlternateStackSize; usage_count > 0; --usage_count) { + if (*begin != kAlternateStackFillValue) { + ABSL_RAW_CHECK(usage_count <= kAlternateStackSize - kSafetyMargin, + "Buffer has overflowed or is about to overflow"); + return usage_count; + } + begin += increment; + } + + ABSL_RAW_LOG(FATAL, "Unreachable code"); + return -1; +} + +} // namespace + +int GetSignalHandlerStackConsumption(void (*signal_handler)(int)) { + // The alt-signal-stack cannot be heap allocated because there is a + // bug in glibc-2.2 where some signal handler setup code looks at the + // current stack pointer to figure out what thread is currently running. + // Therefore, the alternate stack must be allocated from the main stack + // itself. + void* altstack = mmap(nullptr, kAlternateStackSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ABSL_RAW_CHECK(altstack != MAP_FAILED, "mmap() failed"); + + // Set up the alt-signal-stack (and save the older one). + stack_t sigstk; + memset(&sigstk, 0, sizeof(sigstk)); + sigstk.ss_sp = altstack; + sigstk.ss_size = kAlternateStackSize; + sigstk.ss_flags = 0; + stack_t old_sigstk; + memset(&old_sigstk, 0, sizeof(old_sigstk)); + ABSL_RAW_CHECK(sigaltstack(&sigstk, &old_sigstk) == 0, + "sigaltstack() failed"); + + // Set up SIGUSR1 and SIGUSR2 signal handlers (and save the older ones). + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + struct sigaction old_sa1, old_sa2; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK; + + // SIGUSR1 maps to EmptySignalHandler. + sa.sa_handler = EmptySignalHandler; + ABSL_RAW_CHECK(sigaction(SIGUSR1, &sa, &old_sa1) == 0, "sigaction() failed"); + + // SIGUSR2 maps to signal_handler. + sa.sa_handler = signal_handler; + ABSL_RAW_CHECK(sigaction(SIGUSR2, &sa, &old_sa2) == 0, "sigaction() failed"); + + // Send SIGUSR1 signal and measure the stack consumption of the empty + // signal handler. + // The first signal might use more stack space. Run once and ignore the + // results to get that out of the way. + ABSL_RAW_CHECK(kill(getpid(), SIGUSR1) == 0, "kill() failed"); + + memset(altstack, kAlternateStackFillValue, kAlternateStackSize); + ABSL_RAW_CHECK(kill(getpid(), SIGUSR1) == 0, "kill() failed"); + int base_stack_consumption = GetStackConsumption(altstack); + + // Send SIGUSR2 signal and measure the stack consumption of signal_handler. + ABSL_RAW_CHECK(kill(getpid(), SIGUSR2) == 0, "kill() failed"); + int signal_handler_stack_consumption = GetStackConsumption(altstack); + + // Now restore the old alt-signal-stack and signal handlers. + if (old_sigstk.ss_sp == nullptr && old_sigstk.ss_size == 0 && + (old_sigstk.ss_flags & SS_DISABLE)) { + // https://git.musl-libc.org/cgit/musl/commit/src/signal/sigaltstack.c?id=7829f42a2c8944555439380498ab8b924d0f2070 + // The original stack has ss_size==0 and ss_flags==SS_DISABLE, but some + // versions of musl have a bug that rejects ss_size==0. Work around this by + // setting ss_size to MINSIGSTKSZ, which should be ignored by the kernel + // when SS_DISABLE is set. + old_sigstk.ss_size = MINSIGSTKSZ; + } + ABSL_RAW_CHECK(sigaltstack(&old_sigstk, nullptr) == 0, + "sigaltstack() failed"); + ABSL_RAW_CHECK(sigaction(SIGUSR1, &old_sa1, nullptr) == 0, + "sigaction() failed"); + ABSL_RAW_CHECK(sigaction(SIGUSR2, &old_sa2, nullptr) == 0, + "sigaction() failed"); + + ABSL_RAW_CHECK(munmap(altstack, kAlternateStackSize) == 0, "munmap() failed"); + if (signal_handler_stack_consumption != -1 && base_stack_consumption != -1) { + return signal_handler_stack_consumption - base_stack_consumption; + } + return -1; +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION diff --git a/third_party/abseil_cpp/absl/debugging/internal/stack_consumption.h b/third_party/abseil_cpp/absl/debugging/internal/stack_consumption.h new file mode 100644 index 000000000000..2b5e7151667c --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stack_consumption.h @@ -0,0 +1,50 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Helper function for measuring stack consumption of signal handlers. + +#ifndef ABSL_DEBUGGING_INTERNAL_STACK_CONSUMPTION_H_ +#define ABSL_DEBUGGING_INTERNAL_STACK_CONSUMPTION_H_ + +#include "absl/base/config.h" + +// The code in this module is not portable. +// Use this feature test macro to detect its availability. +#ifdef ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION +#error ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION cannot be set directly +#elif !defined(__APPLE__) && !defined(_WIN32) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__ppc__) || \ + defined(__aarch64__)) +#define ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION 1 + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Returns the stack consumption in bytes for the code exercised by +// signal_handler. To measure stack consumption, signal_handler is registered +// as a signal handler, so the code that it exercises must be async-signal +// safe. The argument of signal_handler is an implementation detail of signal +// handlers and should ignored by the code for signal_handler. Use global +// variables to pass information between your test code and signal_handler. +int GetSignalHandlerStackConsumption(void (*signal_handler)(int)); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION + +#endif // ABSL_DEBUGGING_INTERNAL_STACK_CONSUMPTION_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stack_consumption_test.cc b/third_party/abseil_cpp/absl/debugging/internal/stack_consumption_test.cc new file mode 100644 index 000000000000..80445bf43aab --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stack_consumption_test.cc @@ -0,0 +1,50 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/stack_consumption.h" + +#ifdef ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION + +#include <string.h> + +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +static void SimpleSignalHandler(int signo) { + char buf[100]; + memset(buf, 'a', sizeof(buf)); + + // Never true, but prevents compiler from optimizing buf out. + if (signo == 0) { + ABSL_RAW_LOG(INFO, "%p", static_cast<void*>(buf)); + } +} + +TEST(SignalHandlerStackConsumptionTest, MeasuresStackConsumption) { + // Our handler should consume reasonable number of bytes. + EXPECT_GE(GetSignalHandlerStackConsumption(SimpleSignalHandler), 100); +} + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_INTERNAL_HAVE_DEBUGGING_STACK_CONSUMPTION diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_aarch64-inl.inc b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_aarch64-inl.inc new file mode 100644 index 000000000000..f4859d7c210a --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_aarch64-inl.inc @@ -0,0 +1,199 @@ +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_AARCH64_INL_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_AARCH64_INL_H_ + +// Generate stack tracer for aarch64 + +#if defined(__linux__) +#include <sys/mman.h> +#include <ucontext.h> +#include <unistd.h> +#endif + +#include <atomic> +#include <cassert> +#include <cstdint> +#include <iostream> + +#include "absl/base/attributes.h" +#include "absl/debugging/internal/address_is_readable.h" +#include "absl/debugging/internal/vdso_support.h" // a no-op on non-elf or non-glibc systems +#include "absl/debugging/stacktrace.h" + +static const uintptr_t kUnknownFrameSize = 0; + +#if defined(__linux__) +// Returns the address of the VDSO __kernel_rt_sigreturn function, if present. +static const unsigned char* GetKernelRtSigreturnAddress() { + constexpr uintptr_t kImpossibleAddress = 1; + ABSL_CONST_INIT static std::atomic<uintptr_t> memoized{kImpossibleAddress}; + uintptr_t address = memoized.load(std::memory_order_relaxed); + if (address != kImpossibleAddress) { + return reinterpret_cast<const unsigned char*>(address); + } + + address = reinterpret_cast<uintptr_t>(nullptr); + +#ifdef ABSL_HAVE_VDSO_SUPPORT + absl::debugging_internal::VDSOSupport vdso; + if (vdso.IsPresent()) { + absl::debugging_internal::VDSOSupport::SymbolInfo symbol_info; + auto lookup = [&](int type) { + return vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.6.39", type, + &symbol_info); + }; + if ((!lookup(STT_FUNC) && !lookup(STT_NOTYPE)) || + symbol_info.address == nullptr) { + // Unexpected: VDSO is present, yet the expected symbol is missing + // or null. + assert(false && "VDSO is present, but doesn't have expected symbol"); + } else { + if (reinterpret_cast<uintptr_t>(symbol_info.address) != + kImpossibleAddress) { + address = reinterpret_cast<uintptr_t>(symbol_info.address); + } else { + assert(false && "VDSO returned invalid address"); + } + } + } +#endif + + memoized.store(address, std::memory_order_relaxed); + return reinterpret_cast<const unsigned char*>(address); +} +#endif // __linux__ + +// Compute the size of a stack frame in [low..high). We assume that +// low < high. Return size of kUnknownFrameSize. +template<typename T> +static inline uintptr_t ComputeStackFrameSize(const T* low, + const T* high) { + const char* low_char_ptr = reinterpret_cast<const char *>(low); + const char* high_char_ptr = reinterpret_cast<const char *>(high); + return low < high ? high_char_ptr - low_char_ptr : kUnknownFrameSize; +} + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return null if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING, bool WITH_CONTEXT> +ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack. +ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack. +static void **NextStackFrame(void **old_frame_pointer, const void *uc) { + void **new_frame_pointer = reinterpret_cast<void**>(*old_frame_pointer); + bool check_frame_size = true; + +#if defined(__linux__) + if (WITH_CONTEXT && uc != nullptr) { + // Check to see if next frame's return address is __kernel_rt_sigreturn. + if (old_frame_pointer[1] == GetKernelRtSigreturnAddress()) { + const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); + // old_frame_pointer[0] is not suitable for unwinding, look at + // ucontext to discover frame pointer before signal. + void **const pre_signal_frame_pointer = + reinterpret_cast<void **>(ucv->uc_mcontext.regs[29]); + + // Check that alleged frame pointer is actually readable. This is to + // prevent "double fault" in case we hit the first fault due to e.g. + // stack corruption. + if (!absl::debugging_internal::AddressIsReadable( + pre_signal_frame_pointer)) + return nullptr; + + // Alleged frame pointer is readable, use it for further unwinding. + new_frame_pointer = pre_signal_frame_pointer; + + // Skip frame size check if we return from a signal. We may be using a + // an alternate stack for signals. + check_frame_size = false; + } + } +#endif + + // aarch64 ABI requires stack pointer to be 16-byte-aligned. + if ((reinterpret_cast<uintptr_t>(new_frame_pointer) & 15) != 0) + return nullptr; + + // Check frame size. In strict mode, we assume frames to be under + // 100,000 bytes. In non-strict mode, we relax the limit to 1MB. + if (check_frame_size) { + const uintptr_t max_size = STRICT_UNWINDING ? 100000 : 1000000; + const uintptr_t frame_size = + ComputeStackFrameSize(old_frame_pointer, new_frame_pointer); + if (frame_size == kUnknownFrameSize || frame_size > max_size) + return nullptr; + } + + return new_frame_pointer; +} + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack. +ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack. +static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count, + const void *ucp, int *min_dropped_frames) { +#ifdef __GNUC__ + void **frame_pointer = reinterpret_cast<void**>(__builtin_frame_address(0)); +#else +# error reading stack point not yet supported on this platform. +#endif + + skip_count++; // Skip the frame for this function. + int n = 0; + + // The frame pointer points to low address of a frame. The first 64-bit + // word of a frame points to the next frame up the call chain, which normally + // is just after the high address of the current frame. The second word of + // a frame contains return adress of to the caller. To find a pc value + // associated with the current frame, we need to go down a level in the call + // chain. So we remember return the address of the last frame seen. This + // does not work for the first stack frame, which belongs to UnwindImp() but + // we skip the frame for UnwindImp() anyway. + void* prev_return_address = nullptr; + + while (frame_pointer && n < max_depth) { + // The absl::GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_frame_pointer = + NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(frame_pointer, ucp); + + if (skip_count > 0) { + skip_count--; + } else { + result[n] = prev_return_address; + if (IS_STACK_FRAMES) { + sizes[n] = ComputeStackFrameSize(frame_pointer, next_frame_pointer); + } + n++; + } + prev_return_address = frame_pointer[1]; + frame_pointer = next_frame_pointer; + } + if (min_dropped_frames != nullptr) { + // Implementation detail: we clamp the max of frames we are willing to + // count, so as not to spend too much time in the loop below. + const int kMaxUnwind = 200; + int j = 0; + for (; frame_pointer != nullptr && j < kMaxUnwind; j++) { + frame_pointer = + NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(frame_pointer, ucp); + } + *min_dropped_frames = j; + } + return n; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { + return true; +} +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_AARCH64_INL_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_arm-inl.inc b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_arm-inl.inc new file mode 100644 index 000000000000..2a1bf2e88694 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_arm-inl.inc @@ -0,0 +1,134 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// This is inspired by Craig Silverstein's PowerPC stacktrace code. + +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_ARM_INL_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_ARM_INL_H_ + +#include <cstdint> + +#include "absl/debugging/stacktrace.h" + +// WARNING: +// This only works if all your code is in either ARM or THUMB mode. With +// interworking, the frame pointer of the caller can either be in r11 (ARM +// mode) or r7 (THUMB mode). A callee only saves the frame pointer of its +// mode in a fixed location on its stack frame. If the caller is a different +// mode, there is no easy way to find the frame pointer. It can either be +// still in the designated register or saved on stack along with other callee +// saved registers. + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return nullptr if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static void **NextStackFrame(void **old_sp) { + void **new_sp = (void**) old_sp[-1]; + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return nullptr; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return nullptr; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return nullptr; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return nullptr; + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return nullptr; + return new_sp; +} + +// This ensures that absl::GetStackTrace sets up the Link Register properly. +#ifdef __GNUC__ +void StacktraceArmDummyFunction() __attribute__((noinline)); +void StacktraceArmDummyFunction() { __asm__ volatile(""); } +#else +# error StacktraceArmDummyFunction() needs to be ported to this platform. +#endif + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count, + const void * /* ucp */, int *min_dropped_frames) { +#ifdef __GNUC__ + void **sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#else +# error reading stack point not yet supported on this platform. +#endif + + // On ARM, the return address is stored in the link register (r14). + // This is not saved on the stack frame of a leaf function. To + // simplify code that reads return addresses, we call a dummy + // function so that the return address of this function is also + // stored in the stack frame. This works at least for gcc. + StacktraceArmDummyFunction(); + + int n = 0; + while (sp && n < max_depth) { + // The absl::GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<!IS_STACK_FRAMES>(sp); + + if (skip_count > 0) { + skip_count--; + } else { + result[n] = *sp; + + if (IS_STACK_FRAMES) { + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + } + n++; + } + sp = next_sp; + } + if (min_dropped_frames != nullptr) { + // Implementation detail: we clamp the max of frames we are willing to + // count, so as not to spend too much time in the loop below. + const int kMaxUnwind = 200; + int j = 0; + for (; sp != nullptr && j < kMaxUnwind; j++) { + sp = NextStackFrame<!IS_STACK_FRAMES>(sp); + } + *min_dropped_frames = j; + } + return n; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { + return false; +} +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_ARM_INL_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_config.h b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_config.h new file mode 100644 index 000000000000..90af852818e7 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_config.h @@ -0,0 +1,89 @@ +/* + * Copyright 2017 The Abseil Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + + * Defines ABSL_STACKTRACE_INL_HEADER to the *-inl.h containing + * actual unwinder implementation. + * This header is "private" to stacktrace.cc. + * DO NOT include it into any other files. +*/ +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_CONFIG_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_CONFIG_H_ + +#if defined(ABSL_STACKTRACE_INL_HEADER) +#error ABSL_STACKTRACE_INL_HEADER cannot be directly set + +#elif defined(_WIN32) +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_win32-inl.inc" + +#elif defined(__APPLE__) +// Thread local support required for UnwindImpl. +// Notes: +// * Xcode's clang did not support `thread_local` until version 8, and +// even then not for all iOS < 9.0. +// * Xcode 9.3 started disallowing `thread_local` for 32-bit iOS simulator +// targeting iOS 9.x. +// * Xcode 10 moves the deployment target check for iOS < 9.0 to link time +// making __has_feature unreliable there. +// +// Otherwise, `__has_feature` is only supported by Clang so it has be inside +// `defined(__APPLE__)` check. +#if __has_feature(cxx_thread_local) && \ + !(TARGET_OS_IPHONE && __IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_9_0) +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_generic-inl.inc" +#endif + +#elif defined(__linux__) && !defined(__ANDROID__) + +#if defined(NO_FRAME_POINTER) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)) +// Note: The libunwind-based implementation is not available to open-source +// users. +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_libunwind-inl.inc" +#define STACKTRACE_USES_LIBUNWIND 1 +#elif defined(NO_FRAME_POINTER) && defined(__has_include) +#if __has_include(<execinfo.h>) +// Note: When using glibc this may require -funwind-tables to function properly. +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_generic-inl.inc" +#endif +#elif defined(__i386__) || defined(__x86_64__) +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_x86-inl.inc" +#elif defined(__ppc__) || defined(__PPC__) +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_powerpc-inl.inc" +#elif defined(__aarch64__) +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_aarch64-inl.inc" +#elif defined(__has_include) +#if __has_include(<execinfo.h>) +// Note: When using glibc this may require -funwind-tables to function properly. +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_generic-inl.inc" +#endif +#endif + +#endif + +// Fallback to the empty implementation. +#if !defined(ABSL_STACKTRACE_INL_HEADER) +#define ABSL_STACKTRACE_INL_HEADER \ + "absl/debugging/internal/stacktrace_unimplemented-inl.inc" +#endif + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_CONFIG_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_generic-inl.inc b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_generic-inl.inc new file mode 100644 index 000000000000..b2792a1f3ada --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_generic-inl.inc @@ -0,0 +1,108 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Portable implementation - just use glibc +// +// Note: The glibc implementation may cause a call to malloc. +// This can cause a deadlock in HeapProfiler. + +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_GENERIC_INL_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_GENERIC_INL_H_ + +#include <execinfo.h> +#include <atomic> +#include <cstring> + +#include "absl/debugging/stacktrace.h" +#include "absl/base/attributes.h" + +// Sometimes, we can try to get a stack trace from within a stack +// trace, because we don't block signals inside this code (which would be too +// expensive: the two extra system calls per stack trace do matter here). +// That can cause a self-deadlock. +// Protect against such reentrant call by failing to get a stack trace. +// +// We use __thread here because the code here is extremely low level -- it is +// called while collecting stack traces from within malloc and mmap, and thus +// can not call anything which might call malloc or mmap itself. +static __thread int recursive = 0; + +// The stack trace function might be invoked very early in the program's +// execution (e.g. from the very first malloc if using tcmalloc). Also, the +// glibc implementation itself will trigger malloc the first time it is called. +// As such, we suppress usage of backtrace during this early stage of execution. +static std::atomic<bool> disable_stacktraces(true); // Disabled until healthy. +// Waiting until static initializers run seems to be late enough. +// This file is included into stacktrace.cc so this will only run once. +ABSL_ATTRIBUTE_UNUSED static int stacktraces_enabler = []() { + void* unused_stack[1]; + // Force the first backtrace to happen early to get the one-time shared lib + // loading (allocation) out of the way. After the first call it is much safer + // to use backtrace from a signal handler if we crash somewhere later. + backtrace(unused_stack, 1); + disable_stacktraces.store(false, std::memory_order_relaxed); + return 0; +}(); + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count, + const void *ucp, int *min_dropped_frames) { + if (recursive || disable_stacktraces.load(std::memory_order_relaxed)) { + return 0; + } + ++recursive; + + static_cast<void>(ucp); // Unused. + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + + size = backtrace(stack, kStackLength); + skip_count++; // we want to skip the current frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + result[i] = stack[i + skip_count]; + + if (IS_STACK_FRAMES) { + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * result_count); + } + if (min_dropped_frames != nullptr) { + if (size - skip_count - max_depth > 0) { + *min_dropped_frames = size - skip_count - max_depth; + } else { + *min_dropped_frames = 0; + } + } + + --recursive; + + return result_count; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { + return true; +} +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_GENERIC_INL_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_powerpc-inl.inc b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_powerpc-inl.inc new file mode 100644 index 000000000000..2e7c2f404f20 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_powerpc-inl.inc @@ -0,0 +1,248 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Produce stack trace. I'm guessing (hoping!) the code is much like +// for x86. For apple machines, at least, it seems to be; see +// https://developer.apple.com/documentation/mac/runtimehtml/RTArch-59.html +// https://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK +// Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882 + +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_POWERPC_INL_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_POWERPC_INL_H_ + +#if defined(__linux__) +#include <asm/ptrace.h> // for PT_NIP. +#include <ucontext.h> // for ucontext_t +#endif + +#include <unistd.h> +#include <cassert> +#include <cstdint> +#include <cstdio> + +#include "absl/base/attributes.h" +#include "absl/base/optimization.h" +#include "absl/base/port.h" +#include "absl/debugging/stacktrace.h" +#include "absl/debugging/internal/address_is_readable.h" +#include "absl/debugging/internal/vdso_support.h" // a no-op on non-elf or non-glibc systems + +// Given a stack pointer, return the saved link register value. +// Note that this is the link register for a callee. +static inline void *StacktracePowerPCGetLR(void **sp) { + // PowerPC has 3 main ABIs, which say where in the stack the + // Link Register is. For DARWIN and AIX (used by apple and + // linux ppc64), it's in sp[2]. For SYSV (used by linux ppc), + // it's in sp[1]. +#if defined(_CALL_AIX) || defined(_CALL_DARWIN) + return *(sp+2); +#elif defined(_CALL_SYSV) + return *(sp+1); +#elif defined(__APPLE__) || defined(__FreeBSD__) || \ + (defined(__linux__) && defined(__PPC64__)) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + return *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + return *(sp+1); +#else +#error Need to specify the PPC ABI for your archiecture. +#endif +} + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return null if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING, bool IS_WITH_CONTEXT> +ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack. +ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack. +static void **NextStackFrame(void **old_sp, const void *uc) { + void **new_sp = (void **) *old_sp; + enum { kStackAlignment = 16 }; + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return nullptr; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return nullptr; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return nullptr; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return nullptr; + } + if ((uintptr_t)new_sp % kStackAlignment != 0) return nullptr; + +#if defined(__linux__) + enum StackTraceKernelSymbolStatus { + kNotInitialized = 0, kAddressValid, kAddressInvalid }; + + if (IS_WITH_CONTEXT && uc != nullptr) { + static StackTraceKernelSymbolStatus kernel_symbol_status = + kNotInitialized; // Sentinel: not computed yet. + // Initialize with sentinel value: __kernel_rt_sigtramp_rt64 can not + // possibly be there. + static const unsigned char *kernel_sigtramp_rt64_address = nullptr; + if (kernel_symbol_status == kNotInitialized) { + absl::debugging_internal::VDSOSupport vdso; + if (vdso.IsPresent()) { + absl::debugging_internal::VDSOSupport::SymbolInfo + sigtramp_rt64_symbol_info; + if (!vdso.LookupSymbol( + "__kernel_sigtramp_rt64", "LINUX_2.6.15", + absl::debugging_internal::VDSOSupport::kVDSOSymbolType, + &sigtramp_rt64_symbol_info) || + sigtramp_rt64_symbol_info.address == nullptr) { + // Unexpected: VDSO is present, yet the expected symbol is missing + // or null. + assert(false && "VDSO is present, but doesn't have expected symbol"); + kernel_symbol_status = kAddressInvalid; + } else { + kernel_sigtramp_rt64_address = + reinterpret_cast<const unsigned char *>( + sigtramp_rt64_symbol_info.address); + kernel_symbol_status = kAddressValid; + } + } else { + kernel_symbol_status = kAddressInvalid; + } + } + + if (new_sp != nullptr && + kernel_symbol_status == kAddressValid && + StacktracePowerPCGetLR(new_sp) == kernel_sigtramp_rt64_address) { + const ucontext_t* signal_context = + reinterpret_cast<const ucontext_t*>(uc); + void **const sp_before_signal = + reinterpret_cast<void**>(signal_context->uc_mcontext.gp_regs[PT_R1]); + // Check that alleged sp before signal is nonnull and is reasonably + // aligned. + if (sp_before_signal != nullptr && + ((uintptr_t)sp_before_signal % kStackAlignment) == 0) { + // Check that alleged stack pointer is actually readable. This is to + // prevent a "double fault" in case we hit the first fault due to e.g. + // a stack corruption. + if (absl::debugging_internal::AddressIsReadable(sp_before_signal)) { + // Alleged stack pointer is readable, use it for further unwinding. + new_sp = sp_before_signal; + } + } + } + } +#endif + + return new_sp; +} + +// This ensures that absl::GetStackTrace sets up the Link Register properly. +ABSL_ATTRIBUTE_NOINLINE static void AbslStacktracePowerPCDummyFunction() { + ABSL_BLOCK_TAIL_CALL_OPTIMIZATION(); +} + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack. +ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack. +static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count, + const void *ucp, int *min_dropped_frames) { + void **sp; + // Apple macOS uses an old version of gnu as -- both Darwin 7.9.0 (Panther) + // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a + // different asm syntax. I don't know quite the best way to discriminate + // systems using the old as from the new one; I've gone with __APPLE__. +#ifdef __APPLE__ + __asm__ volatile ("mr %0,r1" : "=r" (sp)); +#else + __asm__ volatile ("mr %0,1" : "=r" (sp)); +#endif + + // On PowerPC, the "Link Register" or "Link Record" (LR), is a stack + // entry that holds the return address of the subroutine call (what + // instruction we run after our function finishes). This is the + // same as the stack-pointer of our parent routine, which is what we + // want here. While the compiler will always(?) set up LR for + // subroutine calls, it may not for leaf functions (such as this one). + // This routine forces the compiler (at least gcc) to push it anyway. + AbslStacktracePowerPCDummyFunction(); + + // The LR save area is used by the callee, so the top entry is bogus. + skip_count++; + + int n = 0; + + // Unlike ABIs of X86 and ARM, PowerPC ABIs say that return address (in + // the link register) of a function call is stored in the caller's stack + // frame instead of the callee's. When we look for the return address + // associated with a stack frame, we need to make sure that there is a + // caller frame before it. So we call NextStackFrame before entering the + // loop below and check next_sp instead of sp for loop termination. + // The outermost frame is set up by runtimes and it does not have a + // caller frame, so it is skipped. + + // The absl::GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few + // bogus entries in some rare cases). + void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); + + while (next_sp && n < max_depth) { + if (skip_count > 0) { + skip_count--; + } else { + result[n] = StacktracePowerPCGetLR(sp); + if (IS_STACK_FRAMES) { + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + } + n++; + } + + sp = next_sp; + next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); + } + + if (min_dropped_frames != nullptr) { + // Implementation detail: we clamp the max of frames we are willing to + // count, so as not to spend too much time in the loop below. + const int kMaxUnwind = 1000; + int j = 0; + for (; next_sp != nullptr && j < kMaxUnwind; j++) { + next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(next_sp, ucp); + } + *min_dropped_frames = j; + } + return n; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { + return true; +} +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_POWERPC_INL_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_unimplemented-inl.inc b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_unimplemented-inl.inc new file mode 100644 index 000000000000..5b8fb191b65a --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_unimplemented-inl.inc @@ -0,0 +1,24 @@ +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_UNIMPLEMENTED_INL_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_UNIMPLEMENTED_INL_H_ + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +static int UnwindImpl(void** /* result */, int* /* sizes */, + int /* max_depth */, int /* skip_count */, + const void* /* ucp */, int *min_dropped_frames) { + if (min_dropped_frames != nullptr) { + *min_dropped_frames = 0; + } + return 0; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { + return false; +} +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_UNIMPLEMENTED_INL_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_win32-inl.inc b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_win32-inl.inc new file mode 100644 index 000000000000..1c666c8b561f --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_win32-inl.inc @@ -0,0 +1,93 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Produces a stack trace for Windows. Normally, one could use +// stacktrace_x86-inl.h or stacktrace_x86_64-inl.h -- and indeed, that +// should work for binaries compiled using MSVC in "debug" mode. +// However, in "release" mode, Windows uses frame-pointer +// optimization, which makes getting a stack trace very difficult. +// +// There are several approaches one can take. One is to use Windows +// intrinsics like StackWalk64. These can work, but have restrictions +// on how successful they can be. Another attempt is to write a +// version of stacktrace_x86-inl.h that has heuristic support for +// dealing with FPO, similar to what WinDbg does (see +// http://www.nynaeve.net/?p=97). There are (non-working) examples of +// these approaches, complete with TODOs, in stacktrace_win32-inl.h#1 +// +// The solution we've ended up doing is to call the undocumented +// windows function RtlCaptureStackBackTrace, which probably doesn't +// work with FPO but at least is fast, and doesn't require a symbol +// server. +// +// This code is inspired by a patch from David Vitek: +// https://code.google.com/p/google-perftools/issues/detail?id=83 + +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_WIN32_INL_H_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_WIN32_INL_H_ + +#include <windows.h> // for GetProcAddress and GetModuleHandle +#include <cassert> + +typedef USHORT NTAPI RtlCaptureStackBackTrace_Function( + IN ULONG frames_to_skip, + IN ULONG frames_to_capture, + OUT PVOID *backtrace, + OUT PULONG backtrace_hash); + +// It is not possible to load RtlCaptureStackBackTrace at static init time in +// UWP. CaptureStackBackTrace is the public version of RtlCaptureStackBackTrace +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) && \ + !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn = + &::CaptureStackBackTrace; +#else +// Load the function we need at static init time, where we don't have +// to worry about someone else holding the loader's lock. +static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn = + (RtlCaptureStackBackTrace_Function*)GetProcAddress( + GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace"); +#endif // WINAPI_PARTITION_APP && !WINAPI_PARTITION_DESKTOP + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +static int UnwindImpl(void** result, int* sizes, int max_depth, int skip_count, + const void*, int* min_dropped_frames) { + int n = 0; + if (!RtlCaptureStackBackTrace_fn) { + // can't find a stacktrace with no function to call + } else { + n = (int)RtlCaptureStackBackTrace_fn(skip_count + 2, max_depth, result, 0); + } + if (IS_STACK_FRAMES) { + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * n); + } + if (min_dropped_frames != nullptr) { + // Not implemented. + *min_dropped_frames = 0; + } + return n; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { + return false; +} +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_WIN32_INL_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/stacktrace_x86-inl.inc b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_x86-inl.inc new file mode 100644 index 000000000000..bc320ff75bc5 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/stacktrace_x86-inl.inc @@ -0,0 +1,346 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Produce stack trace + +#ifndef ABSL_DEBUGGING_INTERNAL_STACKTRACE_X86_INL_INC_ +#define ABSL_DEBUGGING_INTERNAL_STACKTRACE_X86_INL_INC_ + +#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) +#include <ucontext.h> // for ucontext_t +#endif + +#if !defined(_WIN32) +#include <unistd.h> +#endif + +#include <cassert> +#include <cstdint> + +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/debugging/internal/address_is_readable.h" +#include "absl/debugging/internal/vdso_support.h" // a no-op on non-elf or non-glibc systems +#include "absl/debugging/stacktrace.h" + +#include "absl/base/internal/raw_logging.h" + +using absl::debugging_internal::AddressIsReadable; + +#if defined(__linux__) && defined(__i386__) +// Count "push %reg" instructions in VDSO __kernel_vsyscall(), +// preceeding "syscall" or "sysenter". +// If __kernel_vsyscall uses frame pointer, answer 0. +// +// kMaxBytes tells how many instruction bytes of __kernel_vsyscall +// to analyze before giving up. Up to kMaxBytes+1 bytes of +// instructions could be accessed. +// +// Here are known __kernel_vsyscall instruction sequences: +// +// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S). +// Used on Intel. +// 0xffffe400 <__kernel_vsyscall+0>: push %ecx +// 0xffffe401 <__kernel_vsyscall+1>: push %edx +// 0xffffe402 <__kernel_vsyscall+2>: push %ebp +// 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp +// 0xffffe405 <__kernel_vsyscall+5>: sysenter +// +// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S). +// Used on AMD. +// 0xffffe400 <__kernel_vsyscall+0>: push %ebp +// 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp +// 0xffffe403 <__kernel_vsyscall+3>: syscall +// + +// The sequence below isn't actually expected in Google fleet, +// here only for completeness. Remove this comment from OSS release. + +// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S) +// 0xffffe400 <__kernel_vsyscall+0>: int $0x80 +// 0xffffe401 <__kernel_vsyscall+1>: ret +// +static const int kMaxBytes = 10; + +// We use assert()s instead of DCHECK()s -- this is too low level +// for DCHECK(). + +static int CountPushInstructions(const unsigned char *const addr) { + int result = 0; + for (int i = 0; i < kMaxBytes; ++i) { + if (addr[i] == 0x89) { + // "mov reg,reg" + if (addr[i + 1] == 0xE5) { + // Found "mov %esp,%ebp". + return 0; + } + ++i; // Skip register encoding byte. + } else if (addr[i] == 0x0F && + (addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) { + // Found "sysenter" or "syscall". + return result; + } else if ((addr[i] & 0xF0) == 0x50) { + // Found "push %reg". + ++result; + } else if (addr[i] == 0xCD && addr[i + 1] == 0x80) { + // Found "int $0x80" + assert(result == 0); + return 0; + } else { + // Unexpected instruction. + assert(false && "unexpected instruction in __kernel_vsyscall"); + return 0; + } + } + // Unexpected: didn't find SYSENTER or SYSCALL in + // [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval. + assert(false && "did not find SYSENTER or SYSCALL in __kernel_vsyscall"); + return 0; +} +#endif + +// Assume stack frames larger than 100,000 bytes are bogus. +static const int kMaxFrameBytes = 100000; + +// Returns the stack frame pointer from signal context, 0 if unknown. +// vuc is a ucontext_t *. We use void* to avoid the use +// of ucontext_t on non-POSIX systems. +static uintptr_t GetFP(const void *vuc) { +#if !defined(__linux__) + static_cast<void>(vuc); // Avoid an unused argument compiler warning. +#else + if (vuc != nullptr) { + auto *uc = reinterpret_cast<const ucontext_t *>(vuc); +#if defined(__i386__) + const auto bp = uc->uc_mcontext.gregs[REG_EBP]; + const auto sp = uc->uc_mcontext.gregs[REG_ESP]; +#elif defined(__x86_64__) + const auto bp = uc->uc_mcontext.gregs[REG_RBP]; + const auto sp = uc->uc_mcontext.gregs[REG_RSP]; +#else + const uintptr_t bp = 0; + const uintptr_t sp = 0; +#endif + // Sanity-check that the base pointer is valid. It should be as long as + // SHRINK_WRAP_FRAME_POINTER is not set, but it's possible that some code in + // the process is compiled with --copt=-fomit-frame-pointer or + // --copt=-momit-leaf-frame-pointer. + // + // TODO(bcmills): -momit-leaf-frame-pointer is currently the default + // behavior when building with clang. Talk to the C++ toolchain team about + // fixing that. + if (bp >= sp && bp - sp <= kMaxFrameBytes) return bp; + + // If bp isn't a plausible frame pointer, return the stack pointer instead. + // If we're lucky, it points to the start of a stack frame; otherwise, we'll + // get one frame of garbage in the stack trace and fail the sanity check on + // the next iteration. + return sp; + } +#endif + return 0; +} + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return null if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template <bool STRICT_UNWINDING, bool WITH_CONTEXT> +ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack. +ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack. +static void **NextStackFrame(void **old_fp, const void *uc) { + void **new_fp = (void **)*old_fp; + +#if defined(__linux__) && defined(__i386__) + if (WITH_CONTEXT && uc != nullptr) { + // How many "push %reg" instructions are there at __kernel_vsyscall? + // This is constant for a given kernel and processor, so compute + // it only once. + static int num_push_instructions = -1; // Sentinel: not computed yet. + // Initialize with sentinel value: __kernel_rt_sigreturn can not possibly + // be there. + static const unsigned char *kernel_rt_sigreturn_address = nullptr; + static const unsigned char *kernel_vsyscall_address = nullptr; + if (num_push_instructions == -1) { +#ifdef ABSL_HAVE_VDSO_SUPPORT + absl::debugging_internal::VDSOSupport vdso; + if (vdso.IsPresent()) { + absl::debugging_internal::VDSOSupport::SymbolInfo + rt_sigreturn_symbol_info; + absl::debugging_internal::VDSOSupport::SymbolInfo vsyscall_symbol_info; + if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5", STT_FUNC, + &rt_sigreturn_symbol_info) || + !vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5", STT_FUNC, + &vsyscall_symbol_info) || + rt_sigreturn_symbol_info.address == nullptr || + vsyscall_symbol_info.address == nullptr) { + // Unexpected: 32-bit VDSO is present, yet one of the expected + // symbols is missing or null. + assert(false && "VDSO is present, but doesn't have expected symbols"); + num_push_instructions = 0; + } else { + kernel_rt_sigreturn_address = + reinterpret_cast<const unsigned char *>( + rt_sigreturn_symbol_info.address); + kernel_vsyscall_address = + reinterpret_cast<const unsigned char *>( + vsyscall_symbol_info.address); + num_push_instructions = + CountPushInstructions(kernel_vsyscall_address); + } + } else { + num_push_instructions = 0; + } +#else // ABSL_HAVE_VDSO_SUPPORT + num_push_instructions = 0; +#endif // ABSL_HAVE_VDSO_SUPPORT + } + if (num_push_instructions != 0 && kernel_rt_sigreturn_address != nullptr && + old_fp[1] == kernel_rt_sigreturn_address) { + const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); + // This kernel does not use frame pointer in its VDSO code, + // and so %ebp is not suitable for unwinding. + void **const reg_ebp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]); + const unsigned char *const reg_eip = + reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); + if (new_fp == reg_ebp && kernel_vsyscall_address <= reg_eip && + reg_eip - kernel_vsyscall_address < kMaxBytes) { + // We "stepped up" to __kernel_vsyscall, but %ebp is not usable. + // Restore from 'ucv' instead. + void **const reg_esp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]); + // Check that alleged %esp is not null and is reasonably aligned. + if (reg_esp && + ((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) { + // Check that alleged %esp is actually readable. This is to prevent + // "double fault" in case we hit the first fault due to e.g. stack + // corruption. + void *const reg_esp2 = reg_esp[num_push_instructions - 1]; + if (AddressIsReadable(reg_esp2)) { + // Alleged %esp is readable, use it for further unwinding. + new_fp = reinterpret_cast<void **>(reg_esp2); + } + } + } + } + } +#endif + + const uintptr_t old_fp_u = reinterpret_cast<uintptr_t>(old_fp); + const uintptr_t new_fp_u = reinterpret_cast<uintptr_t>(new_fp); + + // Check that the transition from frame pointer old_fp to frame + // pointer new_fp isn't clearly bogus. Skip the checks if new_fp + // matches the signal context, so that we don't skip out early when + // using an alternate signal stack. + // + // TODO(bcmills): The GetFP call should be completely unnecessary when + // SHRINK_WRAP_FRAME_POINTER is set (because we should be back in the thread's + // stack by this point), but it is empirically still needed (e.g. when the + // stack includes a call to abort). unw_get_reg returns UNW_EBADREG for some + // frames. Figure out why GetValidFrameAddr and/or libunwind isn't doing what + // it's supposed to. + if (STRICT_UNWINDING && + (!WITH_CONTEXT || uc == nullptr || new_fp_u != GetFP(uc))) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_fp_u <= old_fp_u) return nullptr; + if (new_fp_u - old_fp_u > kMaxFrameBytes) return nullptr; + } else { + if (new_fp == nullptr) return nullptr; // skip AddressIsReadable() below + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_fp == old_fp) return nullptr; + } + + if (new_fp_u & (sizeof(void *) - 1)) return nullptr; +#ifdef __i386__ + // On 32-bit machines, the stack pointer can be very close to + // 0xffffffff, so we explicitly check for a pointer into the + // last two pages in the address space + if (new_fp_u >= 0xffffe000) return nullptr; +#endif +#if !defined(_WIN32) + if (!STRICT_UNWINDING) { + // Lax sanity checks cause a crash in 32-bit tcmalloc/crash_reason_test + // on AMD-based machines with VDSO-enabled kernels. + // Make an extra sanity check to insure new_fp is readable. + // Note: NextStackFrame<false>() is only called while the program + // is already on its last leg, so it's ok to be slow here. + + if (!AddressIsReadable(new_fp)) { + return nullptr; + } + } +#endif + return new_fp; +} + +template <bool IS_STACK_FRAMES, bool IS_WITH_CONTEXT> +ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS // May read random elements from stack. +ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY // May read random elements from stack. +ABSL_ATTRIBUTE_NOINLINE +static int UnwindImpl(void **result, int *sizes, int max_depth, int skip_count, + const void *ucp, int *min_dropped_frames) { + int n = 0; + void **fp = reinterpret_cast<void **>(__builtin_frame_address(0)); + + while (fp && n < max_depth) { + if (*(fp + 1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + void **next_fp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(fp, ucp); + if (skip_count > 0) { + skip_count--; + } else { + result[n] = *(fp + 1); + if (IS_STACK_FRAMES) { + if (next_fp > fp) { + sizes[n] = (uintptr_t)next_fp - (uintptr_t)fp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + } + n++; + } + fp = next_fp; + } + if (min_dropped_frames != nullptr) { + // Implementation detail: we clamp the max of frames we are willing to + // count, so as not to spend too much time in the loop below. + const int kMaxUnwind = 1000; + int j = 0; + for (; fp != nullptr && j < kMaxUnwind; j++) { + fp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(fp, ucp); + } + *min_dropped_frames = j; + } + return n; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +bool StackTraceWorksForTest() { + return true; +} +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_STACKTRACE_X86_INL_INC_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/symbolize.h b/third_party/abseil_cpp/absl/debugging/internal/symbolize.h new file mode 100644 index 000000000000..4f26130fbb71 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/symbolize.h @@ -0,0 +1,147 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains internal parts of the Abseil symbolizer. +// Do not depend on the anything in this file, it may change at anytime. + +#ifndef ABSL_DEBUGGING_INTERNAL_SYMBOLIZE_H_ +#define ABSL_DEBUGGING_INTERNAL_SYMBOLIZE_H_ + +#ifdef __cplusplus + +#include <cstddef> +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/string_view.h" + +#ifdef ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE +#error ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE cannot be directly set +#elif defined(__ELF__) && defined(__GLIBC__) && !defined(__native_client__) && \ + !defined(__asmjs__) && !defined(__wasm__) +#define ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE 1 + +#include <elf.h> +#include <link.h> // For ElfW() macro. +#include <functional> +#include <string> + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Iterates over all sections, invoking callback on each with the section name +// and the section header. +// +// Returns true on success; otherwise returns false in case of errors. +// +// This is not async-signal-safe. +bool ForEachSection(int fd, + const std::function<bool(absl::string_view name, + const ElfW(Shdr) &)>& callback); + +// Gets the section header for the given name, if it exists. Returns true on +// success. Otherwise, returns false. +bool GetSectionHeaderByName(int fd, const char *name, size_t name_len, + ElfW(Shdr) *out); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE + +#ifdef ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE +#error ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE cannot be directly set +#elif defined(__APPLE__) +#define ABSL_INTERNAL_HAVE_DARWIN_SYMBOLIZE 1 +#endif + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +struct SymbolDecoratorArgs { + // The program counter we are getting symbolic name for. + const void *pc; + // 0 for main executable, load address for shared libraries. + ptrdiff_t relocation; + // Read-only file descriptor for ELF image covering "pc", + // or -1 if no such ELF image exists in /proc/self/maps. + int fd; + // Output buffer, size. + // Note: the buffer may not be empty -- default symbolizer may have already + // produced some output, and earlier decorators may have adorned it in + // some way. You are free to replace or augment the contents (within the + // symbol_buf_size limit). + char *const symbol_buf; + size_t symbol_buf_size; + // Temporary scratch space, size. + // Use that space in preference to allocating your own stack buffer to + // conserve stack. + char *const tmp_buf; + size_t tmp_buf_size; + // User-provided argument + void* arg; +}; +using SymbolDecorator = void (*)(const SymbolDecoratorArgs *); + +// Installs a function-pointer as a decorator. Returns a value less than zero +// if the system cannot install the decorator. Otherwise, returns a unique +// identifier corresponding to the decorator. This identifier can be used to +// uninstall the decorator - See RemoveSymbolDecorator() below. +int InstallSymbolDecorator(SymbolDecorator decorator, void* arg); + +// Removes a previously installed function-pointer decorator. Parameter "ticket" +// is the return-value from calling InstallSymbolDecorator(). +bool RemoveSymbolDecorator(int ticket); + +// Remove all installed decorators. Returns true if successful, false if +// symbolization is currently in progress. +bool RemoveAllSymbolDecorators(void); + +// Registers an address range to a file mapping. +// +// Preconditions: +// start <= end +// filename != nullptr +// +// Returns true if the file was successfully registered. +bool RegisterFileMappingHint(const void* start, const void* end, + uint64_t offset, const char* filename); + +// Looks up the file mapping registered by RegisterFileMappingHint for an +// address range. If there is one, the file name is stored in *filename and +// *start and *end are modified to reflect the registered mapping. Returns +// whether any hint was found. +bool GetFileMappingHint(const void** start, const void** end, uint64_t* offset, + const char** filename); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // __cplusplus + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" +#endif // __cplusplus + + bool + AbslInternalGetFileMappingHint(const void** start, const void** end, + uint64_t* offset, const char** filename); + +#endif // ABSL_DEBUGGING_INTERNAL_SYMBOLIZE_H_ diff --git a/third_party/abseil_cpp/absl/debugging/internal/vdso_support.cc b/third_party/abseil_cpp/absl/debugging/internal/vdso_support.cc new file mode 100644 index 000000000000..6be16d907275 --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/vdso_support.cc @@ -0,0 +1,173 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSOSupport -- a class representing kernel VDSO (if present). + +#include "absl/debugging/internal/vdso_support.h" + +#ifdef ABSL_HAVE_VDSO_SUPPORT // defined in vdso_support.h + +#include <errno.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <unistd.h> + +#if __GLIBC_PREREQ(2, 16) // GLIBC-2.16 implements getauxval. +#include <sys/auxv.h> +#endif + +#include "absl/base/dynamic_annotations.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/port.h" + +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR 33 // for crosstoolv10 +#endif + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +ABSL_CONST_INIT +std::atomic<const void *> VDSOSupport::vdso_base_( + debugging_internal::ElfMemImage::kInvalidBase); + +std::atomic<VDSOSupport::GetCpuFn> VDSOSupport::getcpu_fn_(&InitAndGetCPU); +VDSOSupport::VDSOSupport() + // If vdso_base_ is still set to kInvalidBase, we got here + // before VDSOSupport::Init has been called. Call it now. + : image_(vdso_base_.load(std::memory_order_relaxed) == + debugging_internal::ElfMemImage::kInvalidBase + ? Init() + : vdso_base_.load(std::memory_order_relaxed)) {} + +// NOTE: we can't use GoogleOnceInit() below, because we can be +// called by tcmalloc, and none of the *once* stuff may be functional yet. +// +// In addition, we hope that the VDSOSupportHelper constructor +// causes this code to run before there are any threads, and before +// InitGoogle() has executed any chroot or setuid calls. +// +// Finally, even if there is a race here, it is harmless, because +// the operation should be idempotent. +const void *VDSOSupport::Init() { + const auto kInvalidBase = debugging_internal::ElfMemImage::kInvalidBase; +#if __GLIBC_PREREQ(2, 16) + if (vdso_base_.load(std::memory_order_relaxed) == kInvalidBase) { + errno = 0; + const void *const sysinfo_ehdr = + reinterpret_cast<const void *>(getauxval(AT_SYSINFO_EHDR)); + if (errno == 0) { + vdso_base_.store(sysinfo_ehdr, std::memory_order_relaxed); + } + } +#endif // __GLIBC_PREREQ(2, 16) + if (vdso_base_.load(std::memory_order_relaxed) == kInvalidBase) { + int fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + // Kernel too old to have a VDSO. + vdso_base_.store(nullptr, std::memory_order_relaxed); + getcpu_fn_.store(&GetCPUViaSyscall, std::memory_order_relaxed); + return nullptr; + } + ElfW(auxv_t) aux; + while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { + if (aux.a_type == AT_SYSINFO_EHDR) { + vdso_base_.store(reinterpret_cast<void *>(aux.a_un.a_val), + std::memory_order_relaxed); + break; + } + } + close(fd); + if (vdso_base_.load(std::memory_order_relaxed) == kInvalidBase) { + // Didn't find AT_SYSINFO_EHDR in auxv[]. + vdso_base_.store(nullptr, std::memory_order_relaxed); + } + } + GetCpuFn fn = &GetCPUViaSyscall; // default if VDSO not present. + if (vdso_base_.load(std::memory_order_relaxed)) { + VDSOSupport vdso; + SymbolInfo info; + if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { + fn = reinterpret_cast<GetCpuFn>(const_cast<void *>(info.address)); + } + } + // Subtle: this code runs outside of any locks; prevent compiler + // from assigning to getcpu_fn_ more than once. + getcpu_fn_.store(fn, std::memory_order_relaxed); + return vdso_base_.load(std::memory_order_relaxed); +} + +const void *VDSOSupport::SetBase(const void *base) { + ABSL_RAW_CHECK(base != debugging_internal::ElfMemImage::kInvalidBase, + "internal error"); + const void *old_base = vdso_base_.load(std::memory_order_relaxed); + vdso_base_.store(base, std::memory_order_relaxed); + image_.Init(base); + // Also reset getcpu_fn_, so GetCPU could be tested with simulated VDSO. + getcpu_fn_.store(&InitAndGetCPU, std::memory_order_relaxed); + return old_base; +} + +bool VDSOSupport::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + return image_.LookupSymbol(name, version, type, info); +} + +bool VDSOSupport::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + return image_.LookupSymbolByAddress(address, info_out); +} + +// NOLINT on 'long' because this routine mimics kernel api. +long VDSOSupport::GetCPUViaSyscall(unsigned *cpu, // NOLINT(runtime/int) + void *, void *) { +#ifdef SYS_getcpu + return syscall(SYS_getcpu, cpu, nullptr, nullptr); +#else + // x86_64 never implemented sys_getcpu(), except as a VDSO call. + static_cast<void>(cpu); // Avoid an unused argument compiler warning. + errno = ENOSYS; + return -1; +#endif +} + +// Use fast __vdso_getcpu if available. +long VDSOSupport::InitAndGetCPU(unsigned *cpu, // NOLINT(runtime/int) + void *x, void *y) { + Init(); + GetCpuFn fn = getcpu_fn_.load(std::memory_order_relaxed); + ABSL_RAW_CHECK(fn != &InitAndGetCPU, "Init() did not set getcpu_fn_"); + return (*fn)(cpu, x, y); +} + +// This function must be very fast, and may be called from very +// low level (e.g. tcmalloc). Hence I avoid things like +// GoogleOnceInit() and ::operator new. +ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY +int GetCPU() { + unsigned cpu; + int ret_code = (*VDSOSupport::getcpu_fn_)(&cpu, nullptr, nullptr); + return ret_code == 0 ? cpu : ret_code; +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_HAVE_VDSO_SUPPORT diff --git a/third_party/abseil_cpp/absl/debugging/internal/vdso_support.h b/third_party/abseil_cpp/absl/debugging/internal/vdso_support.h new file mode 100644 index 000000000000..6562c6c2350a --- /dev/null +++ b/third_party/abseil_cpp/absl/debugging/internal/vdso_support.h @@ -0,0 +1,158 @@ +// +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSO stands for "Virtual Dynamic Shared Object" -- a page of +// executable code, which looks like a shared library, but doesn't +// necessarily exist anywhere on disk, and which gets mmap()ed into +// every process by kernels which support VDSO, such as 2.6.x for 32-bit +// executables, and 2.6.24 and above for 64-bit executables. +// +// More details could be found here: +// http://www.trilithium.com/johan/2005/08/linux-gate/ +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// +// Example usage: +// VDSOSupport vdso; +// VDSOSupport::SymbolInfo info; +// typedef (*FN)(unsigned *, void *, void *); +// FN fn = nullptr; +// if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { +// fn = reinterpret_cast<FN>(info.address); +// } + +#ifndef ABSL_DEBUGGING_INTERNAL_VDSO_SUPPORT_H_ +#define ABSL_DEBUGGING_INTERNAL_VDSO_SUPPORT_H_ + +#include <atomic> + +#include "absl/base/attributes.h" +#include "absl/debugging/internal/elf_mem_image.h" + +#ifdef ABSL_HAVE_ELF_MEM_IMAGE + +#ifdef ABSL_HAVE_VDSO_SUPPORT +#error ABSL_HAVE_VDSO_SUPPORT cannot be directly set +#else +#define ABSL_HAVE_VDSO_SUPPORT 1 +#endif + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// NOTE: this class may be used from within tcmalloc, and can not +// use any memory allocation routines. +class VDSOSupport { + public: + VDSOSupport(); + + typedef ElfMemImage::SymbolInfo SymbolInfo; + typedef ElfMemImage::SymbolIterator SymbolIterator; + + // On PowerPC64 VDSO symbols can either be of type STT_FUNC or STT_NOTYPE + // depending on how the kernel is built. The kernel is normally built with + // STT_NOTYPE type VDSO symbols. Let's make things simpler first by using a + // compile-time constant. +#ifdef __powerpc64__ + enum { kVDSOSymbolType = STT_NOTYPE }; +#else + enum { kVDSOSymbolType = STT_FUNC }; +#endif + + // Answers whether we have a vdso at all. + bool IsPresent() const { return image_.IsPresent(); } + + // Allow to iterate over all VDSO symbols. + SymbolIterator begin() const { return image_.begin(); } + SymbolIterator end() const { return image_.end(); } + + // Look up versioned dynamic symbol in the kernel VDSO. + // Returns false if VDSO is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != nullptr, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if VDSO isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != nullptr, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + // Used only for testing. Replace real VDSO base with a mock. + // Returns previous value of vdso_base_. After you are done testing, + // you are expected to call SetBase() with previous value, in order to + // reset state to the way it was. + const void *SetBase(const void *s); + + // Computes vdso_base_ and returns it. Should be called as early as + // possible; before any thread creation, chroot or setuid. + static const void *Init(); + + private: + // image_ represents VDSO ELF image in memory. + // image_.ehdr_ == nullptr implies there is no VDSO. + ElfMemImage image_; + + // Cached value of auxv AT_SYSINFO_EHDR, computed once. + // This is a tri-state: + // kInvalidBase => value hasn't been determined yet. + // 0 => there is no VDSO. + // else => vma of VDSO Elf{32,64}_Ehdr. + // + // When testing with mock VDSO, low bit is set. + // The low bit is always available because vdso_base_ is + // page-aligned. + static std::atomic<const void *> vdso_base_; + + // NOLINT on 'long' because these routines mimic kernel api. + // The 'cache' parameter may be used by some versions of the kernel, + // and should be nullptr or point to a static buffer containing at + // least two 'long's. + static long InitAndGetCPU(unsigned *cpu, void *cache, // NOLINT 'long'. + void *unused); + static long GetCPUViaSyscall(unsigned *cpu, void *cache, // NOLINT 'long'. + void *unused); + typedef long (*GetCpuFn)(unsigned *cpu, void *cache, // NOLINT 'long'. + void *unused); + + // This function pointer may point to InitAndGetCPU, + // GetCPUViaSyscall, or __vdso_getcpu at different stages of initialization. + ABSL_CONST_INIT static std::atomic<GetCpuFn> getcpu_fn_; + + friend int GetCPU(void); // Needs access to getcpu_fn_. + + VDSOSupport(const VDSOSupport&) = delete; + VDSOSupport& operator=(const VDSOSupport&) = delete; +}; + +// Same as sched_getcpu() on later glibc versions. +// Return current CPU, using (fast) __vdso_getcpu@LINUX_2.6 if present, +// otherwise use syscall(SYS_getcpu,...). +// May return -1 with errno == ENOSYS if the kernel doesn't +// support SYS_getcpu. +int GetCPU(); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_HAVE_ELF_MEM_IMAGE + +#endif // ABSL_DEBUGGING_INTERNAL_VDSO_SUPPORT_H_ |