about summary refs log tree commit diff
path: root/third_party/bazel/rules_haskell/debug
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/bazel/rules_haskell/debug')
-rw-r--r--third_party/bazel/rules_haskell/debug/linking_utils/BUILD.bazel50
-rw-r--r--third_party/bazel/rules_haskell/debug/linking_utils/README.md265
-rw-r--r--third_party/bazel/rules_haskell/debug/linking_utils/ldd.py288
-rw-r--r--third_party/bazel/rules_haskell/debug/linking_utils/ldd_test.bzl26
4 files changed, 0 insertions, 629 deletions
diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/BUILD.bazel b/third_party/bazel/rules_haskell/debug/linking_utils/BUILD.bazel
deleted file mode 100644
index a32be2cfb6..0000000000
--- a/third_party/bazel/rules_haskell/debug/linking_utils/BUILD.bazel
+++ /dev/null
@@ -1,50 +0,0 @@
-load(
-    ":ldd_test.bzl",
-    "ldd_test",
-)
-
-py_library(
-    name = "linking_utils",
-    srcs = ["ldd.py"],
-    visibility = ["//visibility:public"],
-)
-
-# test the ldd debug library on the output of `//tests/binary-indirect-cbits`
-ldd_test(
-    name = "test-ldd",
-    current_workspace = None,
-    elf_binary = "//tests/binary-indirect-cbits",
-    script = r'''
-import sys
-
-def contains_error(error):
-    """check whether any of the dependencies contains `error`,
-    where error is something from `LDD_ERRORS`.
-    Returns {} if there's no error.
-    """
-    def f(d):
-        return { k: v for k, v in d['needed'].items()
-          if (v == error
-             or (v not in LDD_ERRORS
-                and dict_remove_empty(v['item']) != {})) }
-    return f
-
-# output should have some runpaths
-assert \
-    ldd(identity, sys.argv[1])['runpath_dirs']\
-    > 0
-
-# some of the dependencies are implicit and not in NEEDED flags
-assert ldd(contains_error(LDD_UNKNOWN), sys.argv[1])
-
-import pprint
-# none of the dependencies must be missing
-res = ldd(contains_error(LDD_MISSING), sys.argv[1])
-if res != {}:
-  print("These dependencies are missing:")
-  pprint.pprint(res)
-  exit(1)
-''',
-    # it only works on linux
-    tags = ["dont_test_on_darwin"],
-)
diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/README.md b/third_party/bazel/rules_haskell/debug/linking_utils/README.md
deleted file mode 100644
index 57384a27fe..0000000000
--- a/third_party/bazel/rules_haskell/debug/linking_utils/README.md
+++ /dev/null
@@ -1,265 +0,0 @@
-# Debugging linking errors
-
-The usual utilities, like `nm`, `objdump`, and of course `ldd` (see
-[here](https://linux-audit.com/elf-binaries-on-linux-understanding-and-analysis/#tools-for-binary-analysis)
-for a good overview of existing tools) go a long way. Yet, when
-debugging non-trivial runtime linker failures one would oftentimes
-like to filter outputs programmatically, with more advanced query
-logic than just simple `grep` and `sed` expressions.
-
-This library provides a small set of utility subroutines. These can
-help debug complicated linker errors.
-
-The main function is `ldd(f, elf_path)`. It is in the same spirit
-as `ldd(1)`, but instead of a flat list of resolved libraries, it
-returns a tree of structured information.
-
-When we use the term `ldd` in the following document, it refers
-to the `ldd` function exported from [./ldd.py](./ldd.py).
-
-To query that tree, you pass it a function `f`, which is applied to
-each dependency recursively (transforming the tree from the bottom
-up).
-
-The following functions are exported alongside the `ldd` function.
-They can be passed to `ldd` and used as building blocks for insightful
-queries:
-
-- `identity`: don’t transform, output everything
-- `remove_matching_needed`: remove needed entries that match a regex
-- `remove_matching_runpaths`: remove runpaths that match a regex
-- `non_existing_runpaths`: return a list of runpaths that don’t exist
-  in the filesystem
-- `unused_runpaths`: return a list of runpaths that are listed in the
-  elf binary header, but no dependency was actually found in them
-- `collect_unused_runpaths`: give an overview of all unused runpaths
-
-Helpers:
-- `dict_remove_empty`: remove fields with empty lists/dicts from an output
-- `items`: `dict.iteritems()` for both python 2 and 3
-
-See the introductory tutorial below on how to use these functions.
-
-## Example usage
-
-### Setup
-
-If you have a bazel target which outputs a binary which you want to
-debug, the easiest way is to use `ldd_test`:
-
-```python
-load(
-    "//:debug/linking_utils/ldd_test.bzl",
-    "ldd_test",
-)
-
-ldd_test(
-    name = "test-ldd",
-    elf_binary = "//tests/binary-indirect-cbits",
-    current_workspace = None,
-    script = r'''
-YOUR SCRIPT HERE
-'''
-)
-```
-
-All exported functions from `ldd.py` are already in scope.
-See the [`BUILD`](./BUILD) file in this directory for an example.
-
-
-### Writing queries
-
-`ldd` takes a function that is applied to each layer of elf
-dependencies. This function is passed a set of structured data.
-This data is gathered by querying the elf binary with `objdump`
-and parsing the header fields of the dynamic section:
-
-```
-DependencyInfo :
-{ needed : dict(string, union(
-    LDD_MISSING, LDD_UNKNOWN,
-    {
-        # the needed dependency
-        item : a,
-        # where the dependency was found in
-        found_in : RunpathDir
-    }))
-# all runpath directories that were searched
-, runpath_dirs : [ RunpathDir ] }
-```
-
-The amount of data can get quite extensive for larger projects, so you
-need a way to filter it down to get to the bottom of our problem.
-
-If a transitive dependency cannot be found by the runtime linker, the
-binary cannot be started. `ldd` shows such a problem by setting
-the corresponding value in the `needed` dict to `LDD_MISSING`.
-To remove everything from the output but the missing dependency and
-the path to that dependency, you can write a filter like this:
-
-```python
-# `d` is the DependencyInfo dict from above
-def filter_down_to_missing(d):
-    res = {}
-
-    # items is a .iteritems() that works for py 2 and 3
-    for name, dep in items(d['needed']):
-        if dep == LDD_MISSING:
-            res[name] = LDD_MISSING
-        elif dep in LDD_ERRORS:
-            pass
-        else:
-            # dep['item'] contains the already converted info
-            # from the previous layer
-            res[name] = dep['item']
-
-    # dict_remove_empty removes all empty fields from the dict,
-    # otherwise your result contains a lot of {} in the values.
-    return dict_remove_empty(res)
-
-# To get human-readable output, we re-use python’s pretty printing
-# library. It’s only simple python values after all!
-import pprint
-pprint.pprint(
-  # actually parse the elf binary and apply only_missing on each layer
-  ldd(
-    filter_down_to_missing,
-    # the path to the elf binary you want to expect.
-    elf_binary_path
-  )
-)
-```
-
-Note that in the filter you only need to filter the data for the
-current executable, and add the info from previous layers (which are
-available in `d['item']`).
-
-The result might look something like:
-
-```python
-{'libfoo.so.5': {'libbar.so.1': {'libbaz.so.6': 'MISSING'}}}
-```
-
-or
-
-```python
-{}
-```
-
-if nothing is missing.
-
-Now, that is a similar output to what a tool like `lddtree(1)` could
-give you. But we don’t need to stop there because it’s trivial to
-augment your output with more information:
-
-
-```python
-def missing_with_runpath(d):
-  # our previous function can be re-used
-  missing = filter_down_to_missing(d)
-
-  # only display runpaths if there are missing deps
-  runpaths = [] if missing is {} else d['runpath_dirs']
-
-  # dict_remove_empty keeps the output clean
-  return dict_remove_empty({
-    'rpth': runpaths,
-    'miss': missing
-  })
-
-# same invocation, different function
-pprint.pprint(
-  ldd(
-    missing_with_runpath,
-    elf_binary_path
-  )
-)
-```
-
-which displays something like this for my example binary:
-
-```python
-{ 'miss': { 'libfoo.so.5': { 'miss': { 'libbar.so.1': { 'miss': { 'libbaz.so.6': 'MISSING'},
-                                                          'rpth': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate',
-                                                                      'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}]}},
-                             'rpth': [ { 'absolute_path': '/nix/store/xdsjx0gba4id3yyqxv66bxnm2sqixkjj-glibc-2.27/lib',
-                                         'path': '/nix/store/xdsjx0gba4id3yyqxv66bxnm2sqixkjj-glibc-2.27/lib'},
-                                       { 'absolute_path': '/nix/store/x6inizi5ahlyhqxxwv1rvn05a25icarq-gcc-7.3.0-lib/lib',
-                                         'path': '/nix/store/x6inizi5ahlyhqxxwv1rvn05a25icarq-gcc-7.3.0-lib/lib'}]}},
-  'rpth': [ … lots more nix rpaths … ]}
-```
-
-That’s still a bit cluttered for my taste, so let’s filter out
-the `/nix/store` paths (which are mostly noise):
-
-```python
-import re
-nix_matcher = re.compile("/nix/store.*")
-
-def missing_with_runpath(d):
-  missing = filter_down_to_missing(d)
-
-  # this is one of the example functions provided by ldd.py
-  remove_matching_runpaths(d, nix_matcher)
-  # ^^^
-
-  runpaths = [] if missing is {} else d['runpath_dirs']
-
-  # dict_remove_empty keeps the output clean
-  return dict_remove_empty({
-    'rpth': runpaths,
-    'miss': missing
-  })
-```
-
-and we are down to:
-
-```python
-{ 'miss': { 'libfoo.so.5': { 'miss': { 'libbar.so.1': { 'miss': { 'libbaz.so.6': 'MISSING'},
-                                                          'rpth': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate',
-                                                                      'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}]}}}
-```
-
-… which shows exactly the path that is missing the dependency we
-expect. But what has gone wrong? Does this path even exist? We can
-find out!
-
-```python
-import re
-nix_matcher = re.compile("/nix/store.*")
-
-def missing_with_runpath(d):
-  missing = filter_down_to_missing(d)
-  remove_matching_runpaths(d, nix_matcher)
-  runpaths = [] if missing is {} else d['runpath_dirs']
-
-  # returns a list of runpaths that don’t exist in the filesystem
-  doesnt_exist = non_existing_runpaths(d)
-  # ^^^
-
-  return dict_remove_empty({
-    'rpth': runpaths,
-    'miss': missing,
-    'doesnt_exist': doesnt_exist,
-  })
-```
-
-I amended the output by a list of runpaths which point to non-existing
-directories:
-
-```python
-{ 'miss': { 'libfoo.so.5': { 'miss': { 'libbar.so.1': { 'miss': { 'libbaz.so.6': 'MISSING'},
-                                                        'rpth': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate',
-                                                                    'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}]
-                                                        'doesnt_exist': [ { 'absolute_path': '/home/philip/.cache/bazel/_bazel_philip/fd9fea5ad581ea59473dc1f9d6bce826/execroot/myproject/bazel-out/k8-fastbuild/bin/something/and/bazel-out/k8-fastbuild/bin/other/integrate',
-                                                                            'path': '$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate'}]}}}
-```
-
-Suddenly it’s perfectly clear where the problem lies,
-`$ORIGIN/../../../../../../bazel-out/k8-fastbuild/bin/other/integrate`
-points to a path that does not exist.
-
-Any data query you’d like to do is possible, as long as it uses
-the data provided by the `ldd` function. See the lower part of
-`ldd.py` for more examples.
-
diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/ldd.py b/third_party/bazel/rules_haskell/debug/linking_utils/ldd.py
deleted file mode 100644
index 897cfdc713..0000000000
--- a/third_party/bazel/rules_haskell/debug/linking_utils/ldd.py
+++ /dev/null
@@ -1,288 +0,0 @@
-import subprocess
-import os
-import sys
-import re
-
-
-### helper functions
-
-def list_to_dict(f, l):
-    """dict with elements of list as keys & as values transformed by f"""
-    d = {}
-    for el in l:
-        d[el] = f(el)
-    return d
-
-def dict_remove_empty(d):
-    """remove keys that have [] or {} or as values"""
-    new = {}
-    for k, v in d.items():
-        if not (v == [] or v == {}):
-             new[k] = v
-    return new
-
-def identity(x):
-    """identity function"""
-    return x
-
-def const(x):
-    """(curried) constant function"""
-    def f(y):
-        return x
-    return f
-
-def memoized(cache, f, arg):
-    """Memoizes a call to `f` with `arg` in the dict `cache`.
-    Modifies the cache dict in place."""
-    res = cache.get(arg)
-    if arg in cache:
-        return cache[arg]
-    else:
-        res = f(arg)
-        cache[arg] = res
-        return res
-
-### IO functions that find elf dependencies
-
-_field_matcher = re.compile(b"  ([A-Z0-9_]+) +(.*)$")
-
-def read_dynamic_fields(elf_path):
-    """Read the dynamic header fields from an elf binary
-
-    Args:
-      elf_path: path to the elf binary (either absolute or relative to pwd)
-
-    Returns:
-      a list [(field_key, field_value)] where field_keys could appear multiple
-      times (for example there's usually more than one NEEDED field).
-    """
-    res = subprocess.check_output([
-        # force locale to C for stable output
-        "env", "LC_ALL=C",
-        "objdump",
-        # specifying the section brings execution time down from 150ms to 10ms
-        "--section=.dynamic",
-        "--all-headers",
-        elf_path
-    ])
-    to_end = res.split(b"Dynamic Section:\n")[1]
-    # to first empty line
-    dyn_section = to_end[: 1 + to_end.find(b"\n\n")]
-    def read_dynamic_field(s):
-        """return (field_key, field_value)"""
-        return _field_matcher.match(s).groups()
-    return list(map(read_dynamic_field, dyn_section.splitlines(True)))
-
-def __query_dynamic_fields(df, key):
-    """takes a list of dynamic field tuples (key and value),
-    where keys can appear multiple times, and returns a list of all
-    values with the given key (in stable order)."""
-    return [v for k, v in df if k == key]
-
-def parse_runpath_dirs(elf_path, elf_dynamic_fields):
-    """Parse a RUNPATH entry from an elf header bytestring.
-
-    Returns:
-      { path: unmodified string from DT_RUNPATH
-      , absolute_path: fully normalized, absolute path to dir }
-    """
-    fields = __query_dynamic_fields(elf_dynamic_fields, b"RUNPATH")
-    if fields == []:
-        return []
-    assert len(fields) == 1
-    val = fields[0]
-    origin = os.path.dirname(elf_path)
-    return [{ 'path': path,
-              'absolute_path': os.path.abspath(path.replace("$ORIGIN", origin)) }
-            for path in val.decode().strip(":").split(":")
-            if path != ""]
-
-def parse_needed(elf_dynamic_fields):
-    """Returns the list of DT_NEEDED entries for elf"""
-    return [n.decode() for n in __query_dynamic_fields(elf_dynamic_fields, b"NEEDED")]
-
-
-### Main utility
-
-# cannot find dependency
-LDD_MISSING = "MISSING"
-# don't know how to search for dependency
-LDD_UNKNOWN = "DUNNO"
-# list of all errors for easy branching
-LDD_ERRORS = [ LDD_MISSING, LDD_UNKNOWN ]
-
-def _ldd(elf_cache, f, elf_path):
-    """Same as `ldd` (below), except for an additional `elf_cache` argument,
-    which is a dict needed for memoizing elf files that were already read.
-    This is done because the elf reading operation is quite expensive
-    and many files are referenced multiple times (e.g. glib.so)."""
-
-    def search(rdirs, elf_libname):
-        """search for elf_libname in runfile dirs
-        and return either the name or missing"""
-        res = LDD_MISSING
-        for rdir in rdirs:
-            potential_path = os.path.join(rdir['absolute_path'], elf_libname)
-            if os.path.exists(potential_path):
-                res = {
-                    'item': potential_path,
-                    'found_in': rdir,
-                }
-                break
-        return res
-
-    def recurse(search_res):
-        """Unfold the subtree of ELF dependencies for a `search` result"""
-        if search_res == LDD_MISSING:
-            return LDD_MISSING
-        else:
-            # we keep all other fields in search_res the same,
-            # just item is the one that does the recursion.
-            # This is the part that would normally be done by fmap.
-            search_res['item'] = _ldd(elf_cache, f, search_res['item'])
-            return search_res
-
-    # (GNU) ld.so resolves any symlinks before searching for dependencies
-    elf_realpath = os.path.realpath(elf_path)
-
-    # memoized uses the cache to not repeat the I/O action
-    # for the same elf files (same path)
-    dyn_fields = memoized(
-        elf_cache, read_dynamic_fields, elf_realpath
-    )
-    rdirs = parse_runpath_dirs(elf_realpath, dyn_fields)
-    all_needed = parse_needed(dyn_fields)
-
-    # if there's no runpath dirs we don't know where to search
-    if rdirs == []:
-        needed = list_to_dict(const(LDD_UNKNOWN), all_needed)
-    else:
-        needed = list_to_dict(
-            lambda name: recurse(search(rdirs, name)),
-            all_needed
-        )
-
-    result = {
-        'runpath_dirs': rdirs,
-        'needed': needed
-    }
-    # Here, f is applied to the result of the previous level of recursion
-    return f(result)
-
-
-def ldd(f, elf_path):
-    """follows DT_NEEDED ELF headers for elf by searching the through DT_RUNPATH.
-
-    DependencyInfo :
-    { needed : dict(string, union(
-        LDD_MISSING, LDD_UNKNOWN,
-        {
-            # the needed dependency
-            item : a,
-            # where the dependency was found in
-            found_in : RunpathDir
-        }))
-    # all runpath directories that were searched
-    , runpath_dirs : [ RunpathDir ] }
-
-    Args:
-        f: DependencyInfo -> a
-        modifies the results of each level
-        elf_path: path to ELF file, either absolute or relative to current working dir
-
-    Returns: a
-    """
-    elf_cache = {}
-    return _ldd(elf_cache, f, elf_path)
-
-
-### Functions to pass to ldd
-
-# Only use the current layer
-
-def remove_matching_needed(d, re_matcher_absolute_path=None, re_matcher_path=None):
-    """Destructively removes needed values from d['needed']
-    if they match the given regex matcher.
-    Doesn't remove LDD_ERRORS."""
-    def pred(v):
-        """return true if match"""
-        if v in LDD_ERRORS:
-            return False
-        found_in = v['found_in']
-        abs_match = re_matcher_absolute_path.match(found_in['absolute_path']) \
-                    if re_matcher_absolute_path else False
-        match = re_matcher_path.match(found_in['path']) \
-                    if re_matcher_path else False
-        if abs_match or match:
-            return True
-    d['needed'] = {
-        k: v for k, v in d['needed'].items()
-        if not pred(v)
-    }
-
-def remove_matching_runpaths(d, re_matcher):
-    """Destructively removes runpaths from d['runpath_dirs']
-    if they match the given regex matcher."""
-    d['runpath_dirs'] = [
-        runp for runp in d['runpath_dirs']
-        if not re_matcher.match(runp['absolute_path'])
-    ]
-    return d
-
-def non_existing_runpaths(d):
-    """Return a list of runpaths_dirs that do not exist in the file system."""
-    return [
-        runp for runp in d['runpath_dirs']
-        if not os.path.exists(runp['absolute_path'])
-    ]
-
-def unused_runpaths(d):
-    """Return a list of runpath_dirs that were not used to find NEEDED dependencies."""
-    used = set()
-    for k, v in d['needed'].items():
-        if not v in LDD_ERRORS:
-            used.add(v['found_in']['absolute_path'])
-    return [
-        u for u in d['runpath_dirs']
-        if u['absolute_path'] not in used
-    ]
-
-# Also use the results of sub-layers
-
-def collect_unused_runpaths(d):
-    """This is like `unused_runpaths`, but it creates a deduplicated list of all unused runpaths
-    for its dependencies instead of just returning them for the current layer.
-
-    Returns:
-      a dict of two fields;
-      `mine` contains the unused dependencies of the current binary under scrutiny
-      `others` contains a flat dict of all .sos with unused runpath entries and a list of them for each .so
-    """
-    used = set()
-    given = set(r['absolute_path'] for r in d['runpath_dirs'])
-    prev = {}
-    # TODO: use `unused_runpaths` here
-    for k, v in d['needed'].items():
-        if not v in LDD_ERRORS:
-            used.add(v['found_in']['absolute_path'])
-            prev[k] = v['item']
-    unused = [
-        u for u in given.difference(used)
-        # leave out nix storepaths
-        if not u.startswith("/nix/store")
-    ]
-
-    # Each layer doesn't know about their own name
-    # So we return a list of unused for this layer ('mine')
-    # and a dict of all previeous layers combined (name to list)
-    def combine_unused(deps):
-        res = {}
-        for name, dep in deps.items():
-            res.update(dep['others'])
-            res[name] = dep['mine']
-        return res
-
-    return {
-        'mine': unused,
-        'others': combine_unused(prev),
-    }
diff --git a/third_party/bazel/rules_haskell/debug/linking_utils/ldd_test.bzl b/third_party/bazel/rules_haskell/debug/linking_utils/ldd_test.bzl
deleted file mode 100644
index 5872828df2..0000000000
--- a/third_party/bazel/rules_haskell/debug/linking_utils/ldd_test.bzl
+++ /dev/null
@@ -1,26 +0,0 @@
-load(
-    "//:tests/inline_tests.bzl",
-    "py_inline_test",
-)
-
-#
-def ldd_test(name, elf_binary, script, current_workspace = None, tags = []):
-    """Test with imported linking_utils.ldd library.
-    The path to the `elf_binary` is passed in sys.argv[1].
-    """
-    py_inline_test(
-        name,
-        deps = ["@io_tweag_rules_haskell//debug/linking_utils"],
-        data = [elf_binary],
-        args = ["{}/$(rootpath {})".format(current_workspace, elf_binary)] if current_workspace else ["$(rootpath {})".format(elf_binary)],
-        script = """
-from io_tweag_rules_haskell.debug.linking_utils.ldd import \\
-        dict_remove_empty, identity, const, \\
-        LDD_MISSING, LDD_UNKNOWN, LDD_ERRORS, \\
-        ldd, \\
-        remove_matching_needed, remove_matching_runpaths, \\
-        non_existing_runpaths, unused_runpaths, \\
-        collect_unused_runpaths
-""" + script,
-        tags = tags,
-    )