1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
|
// Copyright (c) 2006, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Satoru Takabayashi
// Stack-footprint reduction work done by Raksit Ashok
//
// Implementation note:
//
// We don't use heaps but only use stacks. We want to reduce the
// stack consumption so that the symbolizer can run on small stacks.
//
// Here are some numbers collected with GCC 4.1.0 on x86:
// - sizeof(Elf32_Sym) = 16
// - sizeof(Elf32_Shdr) = 40
// - sizeof(Elf64_Sym) = 24
// - sizeof(Elf64_Shdr) = 64
//
// This implementation is intended to be async-signal-safe but uses
// some functions which are not guaranteed to be so, such as memchr()
// and memmove(). We assume they are async-signal-safe.
//
// Additional header can be specified by the GLOG_BUILD_CONFIG_INCLUDE
// macro to add platform specific defines (e.g. OS_OPENBSD).
#ifdef GLOG_BUILD_CONFIG_INCLUDE
#include GLOG_BUILD_CONFIG_INCLUDE
#endif // GLOG_BUILD_CONFIG_INCLUDE
#include "utilities.h"
#if defined(HAVE_SYMBOLIZE)
#include <string.h>
#include <algorithm>
#include <limits>
#include "symbolize.h"
#include "demangle.h"
_START_GOOGLE_NAMESPACE_
// We don't use assert() since it's not guaranteed to be
// async-signal-safe. Instead we define a minimal assertion
// macro. So far, we don't need pretty printing for __FILE__, etc.
// A wrapper for abort() to make it callable in ? :.
static int AssertFail() {
abort();
return 0; // Should not reach.
}
#define SAFE_ASSERT(expr) ((expr) ? 0 : AssertFail())
static SymbolizeCallback g_symbolize_callback = NULL;
void InstallSymbolizeCallback(SymbolizeCallback callback) {
g_symbolize_callback = callback;
}
static SymbolizeOpenObjectFileCallback g_symbolize_open_object_file_callback =
NULL;
void InstallSymbolizeOpenObjectFileCallback(
SymbolizeOpenObjectFileCallback callback) {
g_symbolize_open_object_file_callback = callback;
}
// This function wraps the Demangle function to provide an interface
// where the input symbol is demangled in-place.
// To keep stack consumption low, we would like this function to not
// get inlined.
static ATTRIBUTE_NOINLINE void DemangleInplace(char *out, int out_size) {
char demangled[256]; // Big enough for sane demangled symbols.
if (Demangle(out, demangled, sizeof(demangled))) {
// Demangling succeeded. Copy to out if the space allows.
size_t len = strlen(demangled);
if (len + 1 <= (size_t)out_size) { // +1 for '\0'.
SAFE_ASSERT(len < sizeof(demangled));
memmove(out, demangled, len + 1);
}
}
}
_END_GOOGLE_NAMESPACE_
#if defined(__ELF__)
#if defined(HAVE_DLFCN_H)
#include <dlfcn.h>
#endif
#if defined(OS_OPENBSD)
#include <sys/exec_elf.h>
#else
#include <elf.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "symbolize.h"
#include "config.h"
#include "glog/raw_logging.h"
// Re-runs fn until it doesn't cause EINTR.
#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR)
_START_GOOGLE_NAMESPACE_
// Read up to "count" bytes from "offset" in the file pointed by file
// descriptor "fd" into the buffer starting at "buf" while handling short reads
// and EINTR. On success, return the number of bytes read. Otherwise, return
// -1.
static ssize_t ReadFromOffset(const int fd, void *buf, const size_t count,
const off_t offset) {
SAFE_ASSERT(fd >= 0);
SAFE_ASSERT(count <= std::numeric_limits<ssize_t>::max());
char *buf0 = reinterpret_cast<char *>(buf);
ssize_t num_bytes = 0;
while (num_bytes < count) {
ssize_t len;
NO_INTR(len = pread(fd, buf0 + num_bytes, count - num_bytes,
offset + num_bytes));
if (len < 0) { // There was an error other than EINTR.
return -1;
}
if (len == 0) { // Reached EOF.
break;
}
num_bytes += len;
}
SAFE_ASSERT(num_bytes <= count);
return num_bytes;
}
// Try reading exactly "count" bytes from "offset" bytes in a file
// pointed by "fd" into the buffer starting at "buf" while handling
// short reads and EINTR. On success, return true. Otherwise, return
// false.
static bool ReadFromOffsetExact(const int fd, void *buf,
const size_t count, const off_t offset) {
ssize_t len = ReadFromOffset(fd, buf, count, offset);
return len == count;
}
// Returns elf_header.e_type if the file pointed by fd is an ELF binary.
static int FileGetElfType(const int fd) {
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
return -1;
}
if (memcmp(elf_header.e_ident, ELFMAG, SELFMAG) != 0) {
return -1;
}
return elf_header.e_type;
}
// Read the section headers in the given ELF binary, and if a section
// of the specified type is found, set the output to this section header
// and return true. Otherwise, return false.
// To keep stack consumption low, we would like this function to not get
// inlined.
static ATTRIBUTE_NOINLINE bool
GetSectionHeaderByType(const int fd, ElfW(Half) sh_num, const off_t sh_offset,
ElfW(Word) type, ElfW(Shdr) *out) {
// Read at most 16 section headers at a time to save read calls.
ElfW(Shdr) buf[16];
for (int i = 0; i < sh_num;) {
const ssize_t num_bytes_left = (sh_num - i) * sizeof(buf[0]);
const ssize_t num_bytes_to_read =
(sizeof(buf) > num_bytes_left) ? num_bytes_left : sizeof(buf);
const ssize_t len = ReadFromOffset(fd, buf, num_bytes_to_read,
sh_offset + i * sizeof(buf[0]));
if (len == -1) {
return false;
}
SAFE_ASSERT(len % sizeof(buf[0]) == 0);
const ssize_t num_headers_in_buf = len / sizeof(buf[0]);
SAFE_ASSERT(num_headers_in_buf <= sizeof(buf) / sizeof(buf[0]));
for (int j = 0; j < num_headers_in_buf; ++j) {
if (buf[j].sh_type == type) {
*out = buf[j];
return true;
}
}
i += num_headers_in_buf;
}
return false;
}
// There is no particular reason to limit section name to 63 characters,
// but there has (as yet) been no need for anything longer either.
const int kMaxSectionNameLen = 64;
// name_len should include terminating '\0'.
bool GetSectionHeaderByName(int fd, const char *name, size_t name_len,
ElfW(Shdr) *out) {
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
return false;
}
ElfW(Shdr) shstrtab;
off_t shstrtab_offset = (elf_header.e_shoff +
elf_header.e_shentsize * elf_header.e_shstrndx);
if (!ReadFromOffsetExact(fd, &shstrtab, sizeof(shstrtab), shstrtab_offset)) {
return false;
}
for (int i = 0; i < elf_header.e_shnum; ++i) {
off_t section_header_offset = (elf_header.e_shoff +
elf_header.e_shentsize * i);
if (!ReadFromOffsetExact(fd, out, sizeof(*out), section_header_offset)) {
return false;
}
char header_name[kMaxSectionNameLen];
if (sizeof(header_name) < name_len) {
RAW_LOG(WARNING, "Section name '%s' is too long (%" PRIuS "); "
"section will not be found (even if present).", name, name_len);
// No point in even trying.
return false;
}
off_t name_offset = shstrtab.sh_offset + out->sh_name;
ssize_t n_read = ReadFromOffset(fd, &header_name, name_len, name_offset);
if (n_read == -1) {
return false;
} else if (n_read != name_len) {
// Short read -- name could be at end of file.
continue;
}
if (memcmp(header_name, name, name_len) == 0) {
return true;
}
}
return false;
}
// Read a symbol table and look for the symbol containing the
// pc. Iterate over symbols in a symbol table and look for the symbol
// containing "pc". On success, return true and write the symbol name
// to out. Otherwise, return false.
// To keep stack consumption low, we would like this function to not get
// inlined.
static ATTRIBUTE_NOINLINE bool
FindSymbol(uint64_t pc, const int fd, char *out, int out_size,
uint64_t symbol_offset, const ElfW(Shdr) *strtab,
const ElfW(Shdr) *symtab) {
if (symtab == NULL) {
return false;
}
const int num_symbols = symtab->sh_size / symtab->sh_entsize;
for (int i = 0; i < num_symbols;) {
off_t offset = symtab->sh_offset + i * symtab->sh_entsize;
// If we are reading Elf64_Sym's, we want to limit this array to
// 32 elements (to keep stack consumption low), otherwise we can
// have a 64 element Elf32_Sym array.
#if __WORDSIZE == 64
#define NUM_SYMBOLS 32
#else
#define NUM_SYMBOLS 64
#endif
// Read at most NUM_SYMBOLS symbols at once to save read() calls.
ElfW(Sym) buf[NUM_SYMBOLS];
int num_symbols_to_read = std::min(NUM_SYMBOLS, num_symbols - i);
const ssize_t len =
ReadFromOffset(fd, &buf, sizeof(buf[0]) * num_symbols_to_read, offset);
SAFE_ASSERT(len % sizeof(buf[0]) == 0);
const ssize_t num_symbols_in_buf = len / sizeof(buf[0]);
SAFE_ASSERT(num_symbols_in_buf <= num_symbols_to_read);
for (int j = 0; j < num_symbols_in_buf; ++j) {
const ElfW(Sym)& symbol = buf[j];
uint64_t start_address = symbol.st_value;
start_address += symbol_offset;
uint64_t end_address = start_address + symbol.st_size;
if (symbol.st_value != 0 && // Skip null value symbols.
symbol.st_shndx != 0 && // Skip undefined symbols.
start_address <= pc && pc < end_address) {
ssize_t len1 = ReadFromOffset(fd, out, out_size,
strtab->sh_offset + symbol.st_name);
if (len1 <= 0 || memchr(out, '\0', out_size) == NULL) {
memset(out, 0, out_size);
return false;
}
return true; // Obtained the symbol name.
}
}
i += num_symbols_in_buf;
}
return false;
}
// Get the symbol name of "pc" from the file pointed by "fd". Process
// both regular and dynamic symbol tables if necessary. On success,
// write the symbol name to "out" and return true. Otherwise, return
// false.
static bool GetSymbolFromObjectFile(const int fd,
uint64_t pc,
char* out,
int out_size,
uint64_t base_address) {
// Read the ELF header.
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
return false;
}
ElfW(Shdr) symtab, strtab;
// Consult a regular symbol table first.
if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
SHT_SYMTAB, &symtab)) {
if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
symtab.sh_link * sizeof(symtab))) {
return false;
}
if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a regular symbol table.
}
}
// If the symbol is not found, then consult a dynamic symbol table.
if (GetSectionHeaderByType(fd, elf_header.e_shnum, elf_header.e_shoff,
SHT_DYNSYM, &symtab)) {
if (!ReadFromOffsetExact(fd, &strtab, sizeof(strtab), elf_header.e_shoff +
symtab.sh_link * sizeof(symtab))) {
return false;
}
if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a dynamic symbol table.
}
}
return false;
}
namespace {
// Thin wrapper around a file descriptor so that the file descriptor
// gets closed for sure.
struct FileDescriptor {
const int fd_;
explicit FileDescriptor(int fd) : fd_(fd) {}
~FileDescriptor() {
if (fd_ >= 0) {
close(fd_);
}
}
int get() { return fd_; }
private:
explicit FileDescriptor(const FileDescriptor&);
void operator=(const FileDescriptor&);
};
// Helper class for reading lines from file.
//
// Note: we don't use ProcMapsIterator since the object is big (it has
// a 5k array member) and uses async-unsafe functions such as sscanf()
// and snprintf().
class LineReader {
public:
explicit LineReader(int fd, char *buf, int buf_len, off_t offset)
: fd_(fd),
buf_(buf),
buf_len_(buf_len),
offset_(offset),
bol_(buf),
eol_(buf),
eod_(buf) {}
// Read '\n'-terminated line from file. On success, modify "bol"
// and "eol", then return true. Otherwise, return false.
//
// Note: if the last line doesn't end with '\n', the line will be
// dropped. It's an intentional behavior to make the code simple.
bool ReadLine(const char **bol, const char **eol) {
if (BufferIsEmpty()) { // First time.
const ssize_t num_bytes = ReadFromOffset(fd_, buf_, buf_len_, offset_);
if (num_bytes <= 0) { // EOF or error.
return false;
}
offset_ += num_bytes;
eod_ = buf_ + num_bytes;
bol_ = buf_;
} else {
bol_ = eol_ + 1; // Advance to the next line in the buffer.
SAFE_ASSERT(bol_ <= eod_); // "bol_" can point to "eod_".
if (!HasCompleteLine()) {
const int incomplete_line_length = eod_ - bol_;
// Move the trailing incomplete line to the beginning.
memmove(buf_, bol_, incomplete_line_length);
// Read text from file and append it.
char * const append_pos = buf_ + incomplete_line_length;
const int capacity_left = buf_len_ - incomplete_line_length;
const ssize_t num_bytes =
ReadFromOffset(fd_, append_pos, capacity_left, offset_);
if (num_bytes <= 0) { // EOF or error.
return false;
}
offset_ += num_bytes;
eod_ = append_pos + num_bytes;
bol_ = buf_;
}
}
eol_ = FindLineFeed();
if (eol_ == NULL) { // '\n' not found. Malformed line.
return false;
}
*eol_ = '\0'; // Replace '\n' with '\0'.
*bol = bol_;
*eol = eol_;
return true;
}
// Beginning of line.
const char *bol() {
return bol_;
}
// End of line.
const char *eol() {
return eol_;
}
private:
explicit LineReader(const LineReader&);
void operator=(const LineReader&);
char *FindLineFeed() {
return reinterpret_cast<char *>(memchr(bol_, '\n', eod_ - bol_));
}
bool BufferIsEmpty() {
return buf_ == eod_;
}
bool HasCompleteLine() {
return !BufferIsEmpty() && FindLineFeed() != NULL;
}
const int fd_;
char * const buf_;
const int buf_len_;
off_t offset_;
char *bol_;
char *eol_;
const char *eod_; // End of data in "buf_".
};
} // namespace
// Place the hex number read from "start" into "*hex". The pointer to
// the first non-hex character or "end" is returned.
static char *GetHex(const char *start, const char *end, uint64_t *hex) {
*hex = 0;
const char *p;
for (p = start; p < end; ++p) {
int ch = *p;
if ((ch >= '0' && ch <= '9') ||
(ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f')) {
*hex = (*hex << 4) | (ch < 'A' ? ch - '0' : (ch & 0xF) + 9);
} else { // Encountered the first non-hex character.
break;
}
}
SAFE_ASSERT(p <= end);
return const_cast<char *>(p);
}
// Searches for the object file (from /proc/self/maps) that contains
// the specified pc. If found, sets |start_address| to the start address
// of where this object file is mapped in memory, sets the module base
// address into |base_address|, copies the object file name into
// |out_file_name|, and attempts to open the object file. If the object
// file is opened successfully, returns the file descriptor. Otherwise,
// returns -1. |out_file_name_size| is the size of the file name buffer
// (including the null-terminator).
static ATTRIBUTE_NOINLINE int
OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
uint64_t &start_address,
uint64_t &base_address,
char *out_file_name,
int out_file_name_size) {
int object_fd;
int maps_fd;
NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
FileDescriptor wrapped_maps_fd(maps_fd);
if (wrapped_maps_fd.get() < 0) {
return -1;
}
int mem_fd;
NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY));
FileDescriptor wrapped_mem_fd(mem_fd);
if (wrapped_mem_fd.get() < 0) {
return -1;
}
// Iterate over maps and look for the map containing the pc. Then
// look into the symbol tables inside.
char buf[1024]; // Big enough for line of sane /proc/self/maps
int num_maps = 0;
LineReader reader(wrapped_maps_fd.get(), buf, sizeof(buf), 0);
while (true) {
num_maps++;
const char *cursor;
const char *eol;
if (!reader.ReadLine(&cursor, &eol)) { // EOF or malformed line.
return -1;
}
// Start parsing line in /proc/self/maps. Here is an example:
//
// 08048000-0804c000 r-xp 00000000 08:01 2142121 /bin/cat
//
// We want start address (08048000), end address (0804c000), flags
// (r-xp) and file name (/bin/cat).
// Read start address.
cursor = GetHex(cursor, eol, &start_address);
if (cursor == eol || *cursor != '-') {
return -1; // Malformed line.
}
++cursor; // Skip '-'.
// Read end address.
uint64_t end_address;
cursor = GetHex(cursor, eol, &end_address);
if (cursor == eol || *cursor != ' ') {
return -1; // Malformed line.
}
++cursor; // Skip ' '.
// Read flags. Skip flags until we encounter a space or eol.
const char * const flags_start = cursor;
while (cursor < eol && *cursor != ' ') {
++cursor;
}
// We expect at least four letters for flags (ex. "r-xp").
if (cursor == eol || cursor < flags_start + 4) {
return -1; // Malformed line.
}
// Determine the base address by reading ELF headers in process memory.
ElfW(Ehdr) ehdr;
// Skip non-readable maps.
if (flags_start[0] == 'r' &&
ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) &&
memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) {
switch (ehdr.e_type) {
case ET_EXEC:
base_address = 0;
break;
case ET_DYN:
// Find the segment containing file offset 0. This will correspond
// to the ELF header that we just read. Normally this will have
// virtual address 0, but this is not guaranteed. We must subtract
// the virtual address from the address where the ELF header was
// mapped to get the base address.
//
// If we fail to find a segment for file offset 0, use the address
// of the ELF header as the base address.
base_address = start_address;
for (unsigned i = 0; i != ehdr.e_phnum; ++i) {
ElfW(Phdr) phdr;
if (ReadFromOffsetExact(
mem_fd, &phdr, sizeof(phdr),
start_address + ehdr.e_phoff + i * sizeof(phdr)) &&
phdr.p_type == PT_LOAD && phdr.p_offset == 0) {
base_address = start_address - phdr.p_vaddr;
break;
}
}
break;
default:
// ET_REL or ET_CORE. These aren't directly executable, so they don't
// affect the base address.
break;
}
}
// Check start and end addresses.
if (!(start_address <= pc && pc < end_address)) {
continue; // We skip this map. PC isn't in this map.
}
// Check flags. We are only interested in "r*x" maps.
if (flags_start[0] != 'r' || flags_start[2] != 'x') {
continue; // We skip this map.
}
++cursor; // Skip ' '.
// Read file offset.
uint64_t file_offset;
cursor = GetHex(cursor, eol, &file_offset);
if (cursor == eol || *cursor != ' ') {
return -1; // Malformed line.
}
++cursor; // Skip ' '.
// Skip to file name. "cursor" now points to dev. We need to
// skip at least two spaces for dev and inode.
int num_spaces = 0;
while (cursor < eol) {
if (*cursor == ' ') {
++num_spaces;
} else if (num_spaces >= 2) {
// The first non-space character after skipping two spaces
// is the beginning of the file name.
break;
}
++cursor;
}
if (cursor == eol) {
return -1; // Malformed line.
}
// Finally, "cursor" now points to file name of our interest.
NO_INTR(object_fd = open(cursor, O_RDONLY));
if (object_fd < 0) {
// Failed to open object file. Copy the object file name to
// |out_file_name|.
strncpy(out_file_name, cursor, out_file_name_size);
// Making sure |out_file_name| is always null-terminated.
out_file_name[out_file_name_size - 1] = '\0';
return -1;
}
return object_fd;
}
}
// POSIX doesn't define any async-signal safe function for converting
// an integer to ASCII. We'll have to define our own version.
// itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
// conversion was successful or NULL otherwise. It never writes more than "sz"
// bytes. Output will be truncated as needed, and a NUL character is always
// appended.
// NOTE: code from sandbox/linux/seccomp-bpf/demo.cc.
static char *itoa_r(intptr_t i, char *buf, size_t sz, int base, size_t padding) {
// Make sure we can write at least one NUL byte.
size_t n = 1;
if (n > sz)
return NULL;
if (base < 2 || base > 16) {
buf[0] = '\000';
return NULL;
}
char *start = buf;
uintptr_t j = i;
// Handle negative numbers (only for base 10).
if (i < 0 && base == 10) {
// This does "j = -i" while avoiding integer overflow.
j = static_cast<uintptr_t>(-(i + 1)) + 1;
// Make sure we can write the '-' character.
if (++n > sz) {
buf[0] = '\000';
return NULL;
}
*start++ = '-';
}
// Loop until we have converted the entire number. Output at least one
// character (i.e. '0').
char *ptr = start;
do {
// Make sure there is still enough space left in our output buffer.
if (++n > sz) {
buf[0] = '\000';
return NULL;
}
// Output the next digit.
*ptr++ = "0123456789abcdef"[j % base];
j /= base;
if (padding > 0)
padding--;
} while (j > 0 || padding > 0);
// Terminate the output with a NUL character.
*ptr = '\000';
// Conversion to ASCII actually resulted in the digits being in reverse
// order. We can't easily generate them in forward order, as we can't tell
// the number of characters needed until we are done converting.
// So, now, we reverse the string (except for the possible "-" sign).
while (--ptr > start) {
char ch = *ptr;
*ptr = *start;
*start++ = ch;
}
return buf;
}
// Safely appends string |source| to string |dest|. Never writes past the
// buffer size |dest_size| and guarantees that |dest| is null-terminated.
static void SafeAppendString(const char* source, char* dest, int dest_size) {
int dest_string_length = strlen(dest);
SAFE_ASSERT(dest_string_length < dest_size);
dest += dest_string_length;
dest_size -= dest_string_length;
strncpy(dest, source, dest_size);
// Making sure |dest| is always null-terminated.
dest[dest_size - 1] = '\0';
}
// Converts a 64-bit value into a hex string, and safely appends it to |dest|.
// Never writes past the buffer size |dest_size| and guarantees that |dest| is
// null-terminated.
static void SafeAppendHexNumber(uint64_t value, char* dest, int dest_size) {
// 64-bit numbers in hex can have up to 16 digits.
char buf[17] = {'\0'};
SafeAppendString(itoa_r(value, buf, sizeof(buf), 16, 0), dest, dest_size);
}
// The implementation of our symbolization routine. If it
// successfully finds the symbol containing "pc" and obtains the
// symbol name, returns true and write the symbol name to "out".
// Otherwise, returns false. If Callback function is installed via
// InstallSymbolizeCallback(), the function is also called in this function,
// and "out" is used as its output.
// To keep stack consumption low, we would like this function to not
// get inlined.
static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
int out_size) {
uint64_t pc0 = reinterpret_cast<uintptr_t>(pc);
uint64_t start_address = 0;
uint64_t base_address = 0;
int object_fd = -1;
if (out_size < 1) {
return false;
}
out[0] = '\0';
SafeAppendString("(", out, out_size);
if (g_symbolize_open_object_file_callback) {
object_fd = g_symbolize_open_object_file_callback(pc0, start_address,
base_address, out + 1,
out_size - 1);
} else {
object_fd = OpenObjectFileContainingPcAndGetStartAddress(pc0, start_address,
base_address,
out + 1,
out_size - 1);
}
FileDescriptor wrapped_object_fd(object_fd);
#if defined(PRINT_UNSYMBOLIZED_STACK_TRACES)
{
#else
// Check whether a file name was returned.
if (object_fd < 0) {
#endif
if (out[1]) {
// The object file containing PC was determined successfully however the
// object file was not opened successfully. This is still considered
// success because the object file name and offset are known and tools
// like asan_symbolize.py can be used for the symbolization.
out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated.
SafeAppendString("+0x", out, out_size);
SafeAppendHexNumber(pc0 - base_address, out, out_size);
SafeAppendString(")", out, out_size);
return true;
}
// Failed to determine the object file containing PC. Bail out.
return false;
}
int elf_type = FileGetElfType(wrapped_object_fd.get());
if (elf_type == -1) {
return false;
}
if (g_symbolize_callback) {
// Run the call back if it's installed.
// Note: relocation (and much of the rest of this code) will be
// wrong for prelinked shared libraries and PIE executables.
uint64_t relocation = (elf_type == ET_DYN) ? start_address : 0;
int num_bytes_written = g_symbolize_callback(wrapped_object_fd.get(),
pc, out, out_size,
relocation);
if (num_bytes_written > 0) {
out += num_bytes_written;
out_size -= num_bytes_written;
}
}
if (!GetSymbolFromObjectFile(wrapped_object_fd.get(), pc0,
out, out_size, base_address)) {
if (out[1] && !g_symbolize_callback) {
// The object file containing PC was opened successfully however the
// symbol was not found. The object may have been stripped. This is still
// considered success because the object file name and offset are known
// and tools like asan_symbolize.py can be used for the symbolization.
out[out_size - 1] = '\0'; // Making sure |out| is always null-terminated.
SafeAppendString("+0x", out, out_size);
SafeAppendHexNumber(pc0 - base_address, out, out_size);
SafeAppendString(")", out, out_size);
return true;
}
return false;
}
// Symbolization succeeded. Now we try to demangle the symbol.
DemangleInplace(out, out_size);
return true;
}
_END_GOOGLE_NAMESPACE_
#elif defined(OS_MACOSX) && defined(HAVE_DLADDR)
#include <dlfcn.h>
#include <string.h>
_START_GOOGLE_NAMESPACE_
static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
int out_size) {
Dl_info info;
if (dladdr(pc, &info)) {
if ((int)strlen(info.dli_sname) < out_size) {
strcpy(out, info.dli_sname);
// Symbolization succeeded. Now we try to demangle the symbol.
DemangleInplace(out, out_size);
return true;
}
}
return false;
}
_END_GOOGLE_NAMESPACE_
#elif defined(OS_WINDOWS) || defined(OS_CYGWIN)
#include <windows.h>
#include <dbghelp.h>
#ifdef _MSC_VER
#pragma comment(lib, "dbghelp")
#endif
_START_GOOGLE_NAMESPACE_
class SymInitializer {
public:
HANDLE process;
bool ready;
SymInitializer() : process(NULL), ready(false) {
// Initialize the symbol handler.
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680344(v=vs.85).aspx
process = GetCurrentProcess();
// Defer symbol loading.
// We do not request undecorated symbols with SYMOPT_UNDNAME
// because the mangling library calls UnDecorateSymbolName.
SymSetOptions(SYMOPT_DEFERRED_LOADS);
if (SymInitialize(process, NULL, true)) {
ready = true;
}
}
~SymInitializer() {
SymCleanup(process);
// We do not need to close `HANDLE process` because it's a "pseudo handle."
}
private:
SymInitializer(const SymInitializer&);
SymInitializer& operator=(const SymInitializer&);
};
static ATTRIBUTE_NOINLINE bool SymbolizeAndDemangle(void *pc, char *out,
int out_size) {
const static SymInitializer symInitializer;
if (!symInitializer.ready) {
return false;
}
// Resolve symbol information from address.
// https://msdn.microsoft.com/en-us/library/windows/desktop/ms680578(v=vs.85).aspx
char buf[sizeof(SYMBOL_INFO) + MAX_SYM_NAME];
SYMBOL_INFO *symbol = reinterpret_cast<SYMBOL_INFO *>(buf);
symbol->SizeOfStruct = sizeof(SYMBOL_INFO);
symbol->MaxNameLen = MAX_SYM_NAME;
// We use the ANSI version to ensure the string type is always `char *`.
// This could break if a symbol has Unicode in it.
BOOL ret = SymFromAddr(symInitializer.process,
reinterpret_cast<DWORD64>(pc), 0, symbol);
if (ret == 1 && static_cast<int>(symbol->NameLen) < out_size) {
// `NameLen` does not include the null terminating character.
strncpy(out, symbol->Name, static_cast<size_t>(symbol->NameLen) + 1);
out[static_cast<size_t>(symbol->NameLen)] = '\0';
// Symbolization succeeded. Now we try to demangle the symbol.
DemangleInplace(out, out_size);
return true;
}
return false;
}
_END_GOOGLE_NAMESPACE_
#else
# error BUG: HAVE_SYMBOLIZE was wrongly set
#endif
_START_GOOGLE_NAMESPACE_
bool Symbolize(void *pc, char *out, int out_size) {
SAFE_ASSERT(out_size >= 0);
return SymbolizeAndDemangle(pc, out, out_size);
}
_END_GOOGLE_NAMESPACE_
#else /* HAVE_SYMBOLIZE */
#include <assert.h>
#include "config.h"
_START_GOOGLE_NAMESPACE_
// TODO: Support other environments.
bool Symbolize(void *pc, char *out, int out_size) {
assert(0);
return false;
}
_END_GOOGLE_NAMESPACE_
#endif
|