import subprocess
import os
import sys
import re
### helper functions
def list_to_dict(f, l):
"""dict with elements of list as keys & as values transformed by f"""
d = {}
for el in l:
d[el] = f(el)
return d
def dict_remove_empty(d):
"""remove keys that have [] or {} or as values"""
new = {}
for k, v in d.items():
if not (v == [] or v == {}):
new[k] = v
return new
def identity(x):
"""identity function"""
return x
def const(x):
"""(curried) constant function"""
def f(y):
return x
return f
def memoized(cache, f, arg):
"""Memoizes a call to `f` with `arg` in the dict `cache`.
Modifies the cache dict in place."""
res = cache.get(arg)
if arg in cache:
return cache[arg]
else:
res = f(arg)
cache[arg] = res
return res
### IO functions that find elf dependencies
_field_matcher = re.compile(b" ([A-Z0-9_]+) +(.*)$")
def read_dynamic_fields(elf_path):
"""Read the dynamic header fields from an elf binary
Args:
elf_path: path to the elf binary (either absolute or relative to pwd)
Returns:
a list [(field_key, field_value)] where field_keys could appear multiple
times (for example there's usually more than one NEEDED field).
"""
res = subprocess.check_output([
# force locale to C for stable output
"env", "LC_ALL=C",
"objdump",
# specifying the section brings execution time down from 150ms to 10ms
"--section=.dynamic",
"--all-headers",
elf_path
])
to_end = res.split(b"Dynamic Section:\n")[1]
# to first empty line
dyn_section = to_end[: 1 + to_end.find(b"\n\n")]
def read_dynamic_field(s):
"""return (field_key, field_value)"""
return _field_matcher.match(s).groups()
return list(map(read_dynamic_field, dyn_section.splitlines(True)))
def __query_dynamic_fields(df, key):
"""takes a list of dynamic field tuples (key and value),
where keys can appear multiple times, and returns a list of all
values with the given key (in stable order)."""
return [v for k, v in df if k == key]
def parse_runpath_dirs(elf_path, elf_dynamic_fields):
"""Parse a RUNPATH entry from an elf header bytestring.
Returns:
{ path: unmodified string from DT_RUNPATH
, absolute_path: fully normalized, absolute path to dir }
"""
fields = __query_dynamic_fields(elf_dynamic_fields, b"RUNPATH")
if fields == []:
return []
assert len(fields) == 1
val = fields[0]
origin = os.path.dirname(elf_path)
return [{ 'path': path,
'absolute_path': os.path.abspath(path.replace("$ORIGIN", origin)) }
for path in val.decode().strip(":").split(":")
if path != ""]
def parse_needed(elf_dynamic_fields):
"""Returns the list of DT_NEEDED entries for elf"""
return [n.decode() for n in __query_dynamic_fields(elf_dynamic_fields, b"NEEDED")]
### Main utility
# cannot find dependency
LDD_MISSING = "MISSING"
# don't know how to search for dependency
LDD_UNKNOWN = "DUNNO"
# list of all errors for easy branching
LDD_ERRORS = [ LDD_MISSING, LDD_UNKNOWN ]
def _ldd(elf_cache, f, elf_path):
"""Same as `ldd` (below), except for an additional `elf_cache` argument,
which is a dict needed for memoizing elf files that were already read.
This is done because the elf reading operation is quite expensive
and many files are referenced multiple times (e.g. glib.so)."""
def search(rdirs, elf_libname):
"""search for elf_libname in runfile dirs
and return either the name or missing"""
res = LDD_MISSING
for rdir in rdirs:
potential_path = os.path.join(rdir['absolute_path'], elf_libname)
if os.path.exists(potential_path):
res = {
'item': potential_path,
'found_in': rdir,
}
break
return res
def recurse(search_res):
"""Unfold the subtree of ELF dependencies for a `search` result"""
if search_res == LDD_MISSING:
return LDD_MISSING
else:
# we keep all other fields in search_res the same,
# just item is the one that does the recursion.
# This is the part that would normally be done by fmap.
search_res['item'] = _ldd(elf_cache, f, search_res['item'])
return search_res
# (GNU) ld.so resolves any symlinks before searching for dependencies
elf_realpath = os.path.realpath(elf_path)
# memoized uses the cache to not repeat the I/O action
# for the same elf files (same path)
dyn_fields = memoized(
elf_cache, read_dynamic_fields, elf_realpath
)
rdirs = parse_runpath_dirs(elf_realpath, dyn_fields)
all_needed = parse_needed(dyn_fields)
# if there's no runpath dirs we don't know where to search
if rdirs == []:
needed = list_to_dict(const(LDD_UNKNOWN), all_needed)
else:
needed = list_to_dict(
lambda name: recurse(search(rdirs, name)),
all_needed
)
result = {
'runpath_dirs': rdirs,
'needed': needed
}
# Here, f is applied to the result of the previous level of recursion
return f(result)
def ldd(f, elf_path):
"""follows DT_NEEDED ELF headers for elf by searching the through DT_RUNPATH.
DependencyInfo :
{ needed : dict(string, union(
LDD_MISSING, LDD_UNKNOWN,
{
# the needed dependency
item : a,
# where the dependency was found in
found_in : RunpathDir
}))
# all runpath directories that were searched
, runpath_dirs : [ RunpathDir ] }
Args:
f: DependencyInfo -> a
modifies the results of each level
elf_path: path to ELF file, either absolute or relative to current working dir
Returns: a
"""
elf_cache = {}
return _ldd(elf_cache, f, elf_path)
### Functions to pass to ldd
# Only use the current layer
def remove_matching_needed(d, re_matcher_absolute_path=None, re_matcher_path=None):
"""Destructively removes needed values from d['needed']
if they match the given regex matcher.
Doesn't remove LDD_ERRORS."""
def pred(v):
"""return true if match"""
if v in LDD_ERRORS:
return False
found_in = v['found_in']
abs_match = re_matcher_absolute_path.match(found_in['absolute_path']) \
if re_matcher_absolute_path else False
match = re_matcher_path.match(found_in['path']) \
if re_matcher_path else False
if abs_match or match:
return True
d['needed'] = {
k: v for k, v in d['needed'].items()
if not pred(v)
}
def remove_matching_runpaths(d, re_matcher):
"""Destructively removes runpaths from d['runpath_dirs']
if they match the given regex matcher."""
d['runpath_dirs'] = [
runp for runp in d['runpath_dirs']
if not re_matcher.match(runp['absolute_path'])
]
return d
def non_existing_runpaths(d):
"""Return a list of runpaths_dirs that do not exist in the file system."""
return [
runp for runp in d['runpath_dirs']
if not os.path.exists(runp['absolute_path'])
]
def unused_runpaths(d):
"""Return a list of runpath_dirs that were not used to find NEEDED dependencies."""
used = set()
for k, v in d['needed'].items():
if not v in LDD_ERRORS:
used.add(v['found_in']['absolute_path'])
return [
u for u in d['runpath_dirs']
if u['absolute_path'] not in used
]
# Also use the results of sub-layers
def collect_unused_runpaths(d):
"""This is like `unused_runpaths`, but it creates a deduplicated list of all unused runpaths
for its dependencies instead of just returning them for the current layer.
Returns:
a dict of two fields;
`mine` contains the unused dependencies of the current binary under scrutiny
`others` contains a flat dict of all .sos with unused runpath entries and a list of them for each .so
"""
used = set()
given = set(r['absolute_path'] for r in d['runpath_dirs'])
prev = {}
# TODO: use `unused_runpaths` here
for k, v in d['needed'].items():
if not v in LDD_ERRORS:
used.add(v['found_in']['absolute_path'])
prev[k] = v['item']
unused = [
u for u in given.difference(used)
# leave out nix storepaths
if not u.startswith("/nix/store")
]
# Each layer doesn't know about their own name
# So we return a list of unused for this layer ('mine')
# and a dict of all previeous layers combined (name to list)
def combine_unused(deps):
res = {}
for name, dep in deps.items():
res.update(dep['others'])
res[name] = dep['mine']
return res
return {
'mine': unused,
'others': combine_unused(prev),
}