diff options
Diffstat (limited to 'third_party/bazel/rules_haskell/debug')
4 files changed, 0 insertions, 629 deletions
diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/BUILD.bazel b/third_party/bazel/rules_haskell/debug/linking_utils/BUILD.bazel deleted file mode 100644 index a32be2cfb6f9..000000000000 --- a/third_party/bazel/rules_haskell/debug/linking_utils/BUILD.bazel +++ /dev/null @@ -1,50 +0,0 @@ -load( - ":ldd_test.bzl", - "ldd_test", -) - -py_library( - name = "linking_utils", - srcs = ["ldd.py"], - visibility = ["//visibility:public"], -) - -# test the ldd debug library on the output of `//tests/binary-indirect-cbits` -ldd_test( - name = "test-ldd", - current_workspace = None, - elf_binary = "//tests/binary-indirect-cbits", - script = r''' -import sys - -def contains_error(error): - """check whether any of the dependencies contains `error`, - where error is something from `LDD_ERRORS`. - Returns {} if there's no error. - """ - def f(d): - return { k: v for k, v in d['needed'].items() - if (v == error - or (v not in LDD_ERRORS - and dict_remove_empty(v['item']) != {})) } - return f - -# output should have some runpaths -assert \ - ldd(identity, sys.argv[1])['runpath_dirs']\ - > 0 - -# some of the dependencies are implicit and not in NEEDED flags -assert ldd(contains_error(LDD_UNKNOWN), sys.argv[1]) - -import pprint -# none of the dependencies must be missing -res = ldd(contains_error(LDD_MISSING), sys.argv[1]) -if res != {}: - print("These dependencies are missing:") - pprint.pprint(res) - exit(1) -''', - # it only works on linux - tags = ["dont_test_on_darwin"], -) diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/README.md b/third_party/bazel/rules_haskell/debug/linking_utils/README.md deleted file mode 100644 index 57384a27fe54..000000000000 --- a/third_party/bazel/rules_haskell/debug/linking_utils/README.md +++ /dev/null @@ -1,265 +0,0 @@ -# Debugging linking errors - -The usual utilities, like `nm`, `objdump`, and of course `ldd` (see -[here](https://linux-audit.com/elf-binaries-on-linux-understanding-and-analysis/#tools-for-binary-analysis) -for a good overview of existing tools) go a long way. Yet, when -debugging non-trivial runtime linker failures one would oftentimes -like to filter outputs programmatically, with more advanced query -logic than just simple `grep` and `sed` expressions. - -This library provides a small set of utility subroutines. These can -help debug complicated linker errors. - -The main function is `ldd(f, elf_path)`. It is in the same spirit -as `ldd(1)`, but instead of a flat list of resolved libraries, it -returns a tree of structured information. - -When we use the term `ldd` in the following document, it refers -to the `ldd` function exported from [./ldd.py](./ldd.py). - -To query that tree, you pass it a function `f`, which is applied to -each dependency recursively (transforming the tree from the bottom -up). - -The following functions are exported alongside the `ldd` function. -They can be passed to `ldd` and used as building blocks for insightful -queries: - -- `identity`: don’t transform, output everything -- `remove_matching_needed`: remove needed entries that match a regex -- `remove_matching_runpaths`: remove runpaths that match a regex -- `non_existing_runpaths`: return a list of runpaths that don’t exist - in the filesystem -- `unused_runpaths`: return a list of runpaths that are listed in the - elf binary header, but no dependency was actually found in them -- `collect_unused_runpaths`: give an overview of all unused runpaths - -Helpers: -- `dict_remove_empty`: remove fields with empty lists/dicts from an output -- `items`: `dict.iteritems()` for both python 2 and 3 - -See the introductory tutorial below on how to use these functions. - -## Example usage - -### Setup - -If you have a bazel target which outputs a binary which you want to -debug, the easiest way is to use `ldd_test`: - -```python -load( - "//:debug/linking_utils/ldd_test.bzl", - "ldd_test", -) - -ldd_test( - name = "test-ldd", - elf_binary = "//tests/binary-indirect-cbits", - current_workspace = None, - script = r''' -YOUR SCRIPT HERE -''' -) -``` - -All exported functions from `ldd.py` are already in scope. -See the [`BUILD`](./BUILD) file in this directory for an example. - - -### Writing queries - -`ldd` takes a function that is applied to each layer of elf -dependencies. This function is passed a set of structured data. -This data is gathered by querying the elf binary with `objdump` -and parsing the header fields of the dynamic section: - -``` -DependencyInfo : -{ needed : dict(string, union( - LDD_MISSING, LDD_UNKNOWN, - { - # the needed dependency - item : a, - # where the dependency was found in - found_in : RunpathDir - })) -# all runpath directories that were searched -, runpath_dirs : [ RunpathDir ] } -``` - -The amount of data can get quite extensive for larger projects, so you -need a way to filter it down to get to the bottom of our problem. - -If a transitive dependency cannot be found by the runtime linker, the -binary cannot be started. `ldd` shows such a problem by setting -the corresponding value in the `needed` dict to `LDD_MISSING`. -To remove everything from the output but the missing dependency and -the path to that dependency, you can write a filter like this: - -```python -# `d` is the DependencyInfo dict from above -def filter_down_to_missing(d): - res = {} - - # items is a .iteritems() that works for py 2 and 3 - for name, dep in items(d['needed']): - if dep == LDD_MISSING: - res[name] = LDD_MISSING - elif dep in LDD_ERRORS: - pass - else: - # dep['item'] contains the already converted info - # from the previous layer - res[name] = dep['item'] - - # dict_remove_empty removes all empty fields from the dict, - # otherwise your result contains a lot of {} in the values. - return dict_remove_empty(res) - -# To get human-readable output, we re-use python’s pretty printing -# library. It’s only simple python values after all! -import pprint -pprint.pprint( - # actually parse the elf binary and apply only_missing on each layer - ldd( - filter_down_to_missing, - # the path to the elf binary you want to expect. - elf_binary_path - ) -) -``` - -Note that in the filter you only need to filter the data for the -current executable, and add the info from previous layers (which are -available in `d['item']`). - -The result might look something like: - -```python -{'libfoo.so.5': {'libbar.so.1': {'libbaz.so.6': 'MISSING'}}} -``` - -or - -```python -{} -``` - -if nothing is missing. - -Now, that is a similar output to what a tool like `lddtree(1)` could -give you. But we don’t need to stop there because it’s trivial to -augment your output with more information: - - -```python -def missing_with_runpath(d): - # our previous function can be re-used - missing = filter_down_to_missing(d) - - # only display runpaths if there are missing deps - runpaths = [] if missing is {} else d['runpath_dirs'] - - # dict_remove_empty keeps the output clean - return dict_remove_empty({ - 'rpth': runpaths, - 'miss': missing - }) - -# same invocation, different function -pprint.pprint( - ldd( - missing_with_runpath, - elf_binary_path - ) -) -``` - -which displays something like this for my example binary: - -```python -{ 'miss': { 'libfoo.so.5': { 'miss': { 'libbar.so.1': { 'miss': { 'libbaz.so.6': 'MISSING'}, - 'rpth': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate', - 'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}]}}, - 'rpth': [ { 'absolute_path': '/nix/store/xdsjx0gba4id3yyqxv66bxnm2sqixkjj-glibc-2.27/lib', - 'path': '/nix/store/xdsjx0gba4id3yyqxv66bxnm2sqixkjj-glibc-2.27/lib'}, - { 'absolute_path': '/nix/store/x6inizi5ahlyhqxxwv1rvn05a25icarq-gcc-7.3.0-lib/lib', - 'path': '/nix/store/x6inizi5ahlyhqxxwv1rvn05a25icarq-gcc-7.3.0-lib/lib'}]}}, - 'rpth': [ … lots more nix rpaths … ]} -``` - -That’s still a bit cluttered for my taste, so let’s filter out -the `/nix/store` paths (which are mostly noise): - -```python -import re -nix_matcher = re.compile("/nix/store.*") - -def missing_with_runpath(d): - missing = filter_down_to_missing(d) - - # this is one of the example functions provided by ldd.py - remove_matching_runpaths(d, nix_matcher) - # ^^^ - - runpaths = [] if missing is {} else d['runpath_dirs'] - - # dict_remove_empty keeps the output clean - return dict_remove_empty({ - 'rpth': runpaths, - 'miss': missing - }) -``` - -and we are down to: - -```python -{ 'miss': { 'libfoo.so.5': { 'miss': { 'libbar.so.1': { 'miss': { 'libbaz.so.6': 'MISSING'}, - 'rpth': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate', - 'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}]}}} -``` - -… which shows exactly the path that is missing the dependency we -expect. But what has gone wrong? Does this path even exist? We can -find out! - -```python -import re -nix_matcher = re.compile("/nix/store.*") - -def missing_with_runpath(d): - missing = filter_down_to_missing(d) - remove_matching_runpaths(d, nix_matcher) - runpaths = [] if missing is {} else d['runpath_dirs'] - - # returns a list of runpaths that don’t exist in the filesystem - doesnt_exist = non_existing_runpaths(d) - # ^^^ - - return dict_remove_empty({ - 'rpth': runpaths, - 'miss': missing, - 'doesnt_exist': doesnt_exist, - }) -``` - -I amended the output by a list of runpaths which point to non-existing -directories: - -```python -{ 'miss': { 'libfoo.so.5': { 'miss': { 'libbar.so.1': { 'miss': { 'libbaz.so.6': 'MISSING'}, - 'rpth': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate', - 'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}] - 'doesnt_exist': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate', - 'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}]}}} -``` - -Suddenly it’s perfectly clear where the problem lies, -`$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate` -points to a path that does not exist. - -Any data query you’d like to do is possible, as long as it uses -the data provided by the `ldd` function. See the lower part of -`ldd.py` for more examples. - diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/ldd.py b/third_party/bazel/rules_haskell/debug/linking_utils/ldd.py deleted file mode 100644 index 897cfdc713d3..000000000000 --- a/third_party/bazel/rules_haskell/debug/linking_utils/ldd.py +++ /dev/null @@ -1,288 +0,0 @@ -import subprocess -import os -import sys -import re - - -### helper functions - -def list_to_dict(f, l): - """dict with elements of list as keys & as values transformed by f""" - d = {} - for el in l: - d[el] = f(el) - return d - -def dict_remove_empty(d): - """remove keys that have [] or {} or as values""" - new = {} - for k, v in d.items(): - if not (v == [] or v == {}): - new[k] = v - return new - -def identity(x): - """identity function""" - return x - -def const(x): - """(curried) constant function""" - def f(y): - return x - return f - -def memoized(cache, f, arg): - """Memoizes a call to `f` with `arg` in the dict `cache`. - Modifies the cache dict in place.""" - res = cache.get(arg) - if arg in cache: - return cache[arg] - else: - res = f(arg) - cache[arg] = res - return res - -### IO functions that find elf dependencies - -_field_matcher = re.compile(b" ([A-Z0-9_]+) +(.*)$") - -def read_dynamic_fields(elf_path): - """Read the dynamic header fields from an elf binary - - Args: - elf_path: path to the elf binary (either absolute or relative to pwd) - - Returns: - a list [(field_key, field_value)] where field_keys could appear multiple - times (for example there's usually more than one NEEDED field). - """ - res = subprocess.check_output([ - # force locale to C for stable output - "env", "LC_ALL=C", - "objdump", - # specifying the section brings execution time down from 150ms to 10ms - "--section=.dynamic", - "--all-headers", - elf_path - ]) - to_end = res.split(b"Dynamic Section:\n")[1] - # to first empty line - dyn_section = to_end[: 1 + to_end.find(b"\n\n")] - def read_dynamic_field(s): - """return (field_key, field_value)""" - return _field_matcher.match(s).groups() - return list(map(read_dynamic_field, dyn_section.splitlines(True))) - -def __query_dynamic_fields(df, key): - """takes a list of dynamic field tuples (key and value), - where keys can appear multiple times, and returns a list of all - values with the given key (in stable order).""" - return [v for k, v in df if k == key] - -def parse_runpath_dirs(elf_path, elf_dynamic_fields): - """Parse a RUNPATH entry from an elf header bytestring. - - Returns: - { path: unmodified string from DT_RUNPATH - , absolute_path: fully normalized, absolute path to dir } - """ - fields = __query_dynamic_fields(elf_dynamic_fields, b"RUNPATH") - if fields == []: - return [] - assert len(fields) == 1 - val = fields[0] - origin = os.path.dirname(elf_path) - return [{ 'path': path, - 'absolute_path': os.path.abspath(path.replace("$ORIGIN", origin)) } - for path in val.decode().strip(":").split(":") - if path != ""] - -def parse_needed(elf_dynamic_fields): - """Returns the list of DT_NEEDED entries for elf""" - return [n.decode() for n in __query_dynamic_fields(elf_dynamic_fields, b"NEEDED")] - - -### Main utility - -# cannot find dependency -LDD_MISSING = "MISSING" -# don't know how to search for dependency -LDD_UNKNOWN = "DUNNO" -# list of all errors for easy branching -LDD_ERRORS = [ LDD_MISSING, LDD_UNKNOWN ] - -def _ldd(elf_cache, f, elf_path): - """Same as `ldd` (below), except for an additional `elf_cache` argument, - which is a dict needed for memoizing elf files that were already read. - This is done because the elf reading operation is quite expensive - and many files are referenced multiple times (e.g. glib.so).""" - - def search(rdirs, elf_libname): - """search for elf_libname in runfile dirs - and return either the name or missing""" - res = LDD_MISSING - for rdir in rdirs: - potential_path = os.path.join(rdir['absolute_path'], elf_libname) - if os.path.exists(potential_path): - res = { - 'item': potential_path, - 'found_in': rdir, - } - break - return res - - def recurse(search_res): - """Unfold the subtree of ELF dependencies for a `search` result""" - if search_res == LDD_MISSING: - return LDD_MISSING - else: - # we keep all other fields in search_res the same, - # just item is the one that does the recursion. - # This is the part that would normally be done by fmap. - search_res['item'] = _ldd(elf_cache, f, search_res['item']) - return search_res - - # (GNU) ld.so resolves any symlinks before searching for dependencies - elf_realpath = os.path.realpath(elf_path) - - # memoized uses the cache to not repeat the I/O action - # for the same elf files (same path) - dyn_fields = memoized( - elf_cache, read_dynamic_fields, elf_realpath - ) - rdirs = parse_runpath_dirs(elf_realpath, dyn_fields) - all_needed = parse_needed(dyn_fields) - - # if there's no runpath dirs we don't know where to search - if rdirs == []: - needed = list_to_dict(const(LDD_UNKNOWN), all_needed) - else: - needed = list_to_dict( - lambda name: recurse(search(rdirs, name)), - all_needed - ) - - result = { - 'runpath_dirs': rdirs, - 'needed': needed - } - # Here, f is applied to the result of the previous level of recursion - return f(result) - - -def ldd(f, elf_path): - """follows DT_NEEDED ELF headers for elf by searching the through DT_RUNPATH. - - DependencyInfo : - { needed : dict(string, union( - LDD_MISSING, LDD_UNKNOWN, - { - # the needed dependency - item : a, - # where the dependency was found in - found_in : RunpathDir - })) - # all runpath directories that were searched - , runpath_dirs : [ RunpathDir ] } - - Args: - f: DependencyInfo -> a - modifies the results of each level - elf_path: path to ELF file, either absolute or relative to current working dir - - Returns: a - """ - elf_cache = {} - return _ldd(elf_cache, f, elf_path) - - -### Functions to pass to ldd - -# Only use the current layer - -def remove_matching_needed(d, re_matcher_absolute_path=None, re_matcher_path=None): - """Destructively removes needed values from d['needed'] - if they match the given regex matcher. - Doesn't remove LDD_ERRORS.""" - def pred(v): - """return true if match""" - if v in LDD_ERRORS: - return False - found_in = v['found_in'] - abs_match = re_matcher_absolute_path.match(found_in['absolute_path']) \ - if re_matcher_absolute_path else False - match = re_matcher_path.match(found_in['path']) \ - if re_matcher_path else False - if abs_match or match: - return True - d['needed'] = { - k: v for k, v in d['needed'].items() - if not pred(v) - } - -def remove_matching_runpaths(d, re_matcher): - """Destructively removes runpaths from d['runpath_dirs'] - if they match the given regex matcher.""" - d['runpath_dirs'] = [ - runp for runp in d['runpath_dirs'] - if not re_matcher.match(runp['absolute_path']) - ] - return d - -def non_existing_runpaths(d): - """Return a list of runpaths_dirs that do not exist in the file system.""" - return [ - runp for runp in d['runpath_dirs'] - if not os.path.exists(runp['absolute_path']) - ] - -def unused_runpaths(d): - """Return a list of runpath_dirs that were not used to find NEEDED dependencies.""" - used = set() - for k, v in d['needed'].items(): - if not v in LDD_ERRORS: - used.add(v['found_in']['absolute_path']) - return [ - u for u in d['runpath_dirs'] - if u['absolute_path'] not in used - ] - -# Also use the results of sub-layers - -def collect_unused_runpaths(d): - """This is like `unused_runpaths`, but it creates a deduplicated list of all unused runpaths - for its dependencies instead of just returning them for the current layer. - - Returns: - a dict of two fields; - `mine` contains the unused dependencies of the current binary under scrutiny - `others` contains a flat dict of all .sos with unused runpath entries and a list of them for each .so - """ - used = set() - given = set(r['absolute_path'] for r in d['runpath_dirs']) - prev = {} - # TODO: use `unused_runpaths` here - for k, v in d['needed'].items(): - if not v in LDD_ERRORS: - used.add(v['found_in']['absolute_path']) - prev[k] = v['item'] - unused = [ - u for u in given.difference(used) - # leave out nix storepaths - if not u.startswith("/nix/store") - ] - - # Each layer doesn't know about their own name - # So we return a list of unused for this layer ('mine') - # and a dict of all previeous layers combined (name to list) - def combine_unused(deps): - res = {} - for name, dep in deps.items(): - res.update(dep['others']) - res[name] = dep['mine'] - return res - - return { - 'mine': unused, - 'others': combine_unused(prev), - } diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/ldd_test.bzl b/third_party/bazel/rules_haskell/debug/linking_utils/ldd_test.bzl deleted file mode 100644 index 5872828df282..000000000000 --- a/third_party/bazel/rules_haskell/debug/linking_utils/ldd_test.bzl +++ /dev/null @@ -1,26 +0,0 @@ -load( - "//:tests/inline_tests.bzl", - "py_inline_test", -) - -# -def ldd_test(name, elf_binary, script, current_workspace = None, tags = []): - """Test with imported linking_utils.ldd library. - The path to the `elf_binary` is passed in sys.argv[1]. - """ - py_inline_test( - name, - deps = ["@io_tweag_rules_haskell//debug/linking_utils"], - data = [elf_binary], - args = ["{}/$(rootpath {})".format(current_workspace, elf_binary)] if current_workspace else ["$(rootpath {})".format(elf_binary)], - script = """ -from io_tweag_rules_haskell.debug.linking_utils.ldd import \\ - dict_remove_empty, identity, const, \\ - LDD_MISSING, LDD_UNKNOWN, LDD_ERRORS, \\ - ldd, \\ - remove_matching_needed, remove_matching_runpaths, \\ - non_existing_runpaths, unused_runpaths, \\ - collect_unused_runpaths -""" + script, - tags = tags, - ) |