diff options
author | sterni <sternenseemann@systemli.org> | 2022-09-26T21·46+0200 |
---|---|---|
committer | sterni <sternenseemann@systemli.org> | 2022-10-08T10·59+0000 |
commit | 57d5988b340ec1b799882f00323010d9435892ca (patch) | |
tree | dadde22436ba1e502b2b2c50d57dc1f797950be3 /nix | |
parent | ca3bd5c7cabf517f23234501928912d55fef45b3 (diff) |
feat(nix/dependency-analyzer): find deps among a list of known drvs r/5060
This was written with the same intention (and reuses a little of its code) as cl/5060 and cl/5063: We want to be able to emit dependencies between //nix/buildkite pipeline steps, so that no agent is occupied with waiting on locks for derivations built by a different agent. This dependency information is already available to the Nix store implementation (e.g. via `nix-store --query --references`) and can also be obtained in the Nix language which is important, since the pipeline is generated at evaluation time. (Note: For Nix 2.3, you either need a strong convention about how derivations expose their dependencies (which we don't) or rely on store implementation internals (drv files). For Nix 2.6 there is a better trick, but it also relies on the existence of drv files.) The actual task can be formulated as follows: Given a set of derivations, calculate the the closest derivations also in the input each derivation depends on. (We call these (next) known dependencies.) This is crucial because pipeline step often depend on each other only indirectly with any number of intermediate derivations. For cl/5064 I determined that 6 intermediate layers is quite common for dependencies that are perceived to be “direct”. This problem is solved as follows: 1. Calculate the dependency graph of the combined dependency closure of all input derivations. This is quite easy and fairly quick thanks to the C++ implementation of builtins.genericClosure. One weak point of the current implementation is that the function to determine the direct derivation dependencies for Nix < 2.6 is quite hacky. 2. Take the graph from 1. and calculate a dependency graph that only connects the known derivations of the input, but retains all connections between them (minus intermediate nodes). In practice the dependency graph is represented as an attribute set mapping derivation paths to a list of derivation paths it depends on. The second step is performed by adding a second list of known derivation paths it depends on. The main improvements over the previous concept (cl/5060 and cl/5063): * We only try to find the closest known dependencies in the dependency graph whereas we would traverse emit dependencies for the entire dependency closure. * We immediately store the calculation of the closest known dependency in the dependency graph, even for intermediate nodes. This avoids recalculating the connection (which was a big drawback of the previous approach) and makes the calculation itself cheaper. You can run `mg build //nix/dependency-analyzer:example` to build a visualization of the internal dependencies between `depot.ci.targets` as discovered by dependency-analyzer. Change-Id: If8c0cdfc8470d4b337336257d9818aaa0d51110f Reviewed-on: https://cl.tvl.fyi/c/depot/+/6832 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'nix')
-rw-r--r-- | nix/dependency-analyzer/default.nix | 254 | ||||
-rw-r--r-- | nix/dependency-analyzer/examples/ci-targets.nix | 12 | ||||
-rw-r--r-- | nix/dependency-analyzer/examples/lisp.nix | 5 | ||||
-rw-r--r-- | nix/dependency-analyzer/tests/default.nix | 36 |
4 files changed, 307 insertions, 0 deletions
diff --git a/nix/dependency-analyzer/default.nix b/nix/dependency-analyzer/default.nix new file mode 100644 index 000000000000..4ced173eafab --- /dev/null +++ b/nix/dependency-analyzer/default.nix @@ -0,0 +1,254 @@ +{ lib, depot, pkgs, ... }: + +let + inherit (builtins) unsafeDiscardStringContext appendContext; + + # + # Utilities + # + + # Manipulate string context of the given string so that it only carries a + # `path` reference to itself (so it needs to be a string representation of + # a store path). + # + # This is intended for use on the `drvPath` attribute of derivations which by + # default carries a reference to the corresponding outputs. If we only want to + # read from the `drvPath`, having only a `path` reference makes sure we don't + # need to realise the derivation first. + # + # Type: str -> str + pathContextDrvPath = drvPath: + let + drvPath' = unsafeDiscardStringContext drvPath; + in + appendContext drvPath' { ${drvPath'} = { path = true; }; }; + + # Find all quoted references to a derivation path in the specified drv file. + # Should correspond to the list of input derivations, but is obviously a big + # HACK as we just grep for store paths that look right. This should eventually + # be solved properly by parsing the drv file. + # + # Type: str -> [str] + directDrvDeps = drvPath: builtins.concatLists ( + builtins.filter builtins.isList ( + builtins.split + "\"(${lib.escapeRegex builtins.storeDir}/[[:alnum:]+._?=-]+.drv)\"" + (builtins.readFile drvPath) + ) + ); + + # Maps a list of derivation to the list of corresponding `drvPath`s. + # + # Type: [drv] -> [str] + drvsToPaths = drvs: + builtins.map (drv: pathContextDrvPath drv.drvPath) drvs; + + # + # Calculate map of direct derivation dependencies + # + + # Create the dependency map entry for a given `drvPath` which mainly includes + # a list of other `drvPath`s it depends on. Additionally we store whether the + # derivation is `known`, i.e. part of the initial list of derivations we start + # generating the map from + # + # Type: bool -> string -> set + drvEntry = known: drvPath: + let + # key may not refer to a store path, … + key = unsafeDiscardStringContext drvPath; + # but we must read from the .drv file. + path = pathContextDrvPath drvPath; + in + { + inherit key; + # trick so we can call listToAttrs directly on the result of genericClosure + name = key; + value = { + deps = directDrvDeps path; + inherit known; + }; + }; + + # Create an attribute set that maps every derivation in the combined + # dependency closure of the list of input derivation paths to every of their + # direct dependencies. Additionally every entry will have set their `known` + # attribute to `true` if it is in the list of input derivation paths. + # + # Type: [str] -> set + plainDrvDepMap = drvPaths: + builtins.listToAttrs ( + builtins.genericClosure { + startSet = builtins.map (drvEntry true) drvPaths; + operator = { value, ... }: builtins.map (drvEntry false) value.deps; + } + ); + + # + # Calculate closest known dependencies in the dependency map + # + + inherit (depot.nix.stateMonad) + after + bind + for_ + get + getAttr + run + setAttr + pure + ; + + # This is an action in stateMonad which expects the (initial) state to have + # been produced by `plainDrvDepMap`. Given a `drvPath`, it calculates a + # `knownDeps` list which holds the `drvPath`s of the closest derivation marked + # as `known` along every edge. This list is inserted into the dependency map + # for `drvPath` and every other derivation in its dependecy closure (unless + # the information was already present). This means that the known dependency + # information for a derivation never has to be recalculated, as long as they + # are part of the same stateful computation. + # + # The upshot is that after calling `insertKnownDeps drvPath`, + # `fmap (builtins.getAttr "knownDeps") (getAttr drvPath)` will always succeed. + # + # Type: str -> stateMonad drvDepMap null + insertKnownDeps = drvPathWithContext: + let + # We no longer need to read from the store, so context is irrelevant, but + # we need to check for attr names which requires the absence of context. + drvPath = unsafeDiscardStringContext drvPathWithContext; + in + bind get (initDepMap: + # Get the dependency map's state before we've done anything to obtain the + # entry we'll be manipulating later as well as its dependencies. + let + entryPoint = initDepMap.${drvPath}; + + # We don't need to recurse if our direct dependencies either have their + # knownDeps list already populated or are known dependencies themselves. + depsPrecalculated = + builtins.partition + (dep: + initDepMap.${dep}.known + || initDepMap.${dep} ? knownDeps + ) + entryPoint.deps; + + # If a direct dependency is known, it goes right to our known dependency + # list. If it is unknown, we can copy its knownDeps list into our own. + initiallyKnownDeps = + builtins.concatLists ( + builtins.map + (dep: + if initDepMap.${dep}.known + then [ dep ] + else initDepMap.${dep}.knownDeps + ) + depsPrecalculated.right + ); + in + + # If the information was already calculated before, we can exit right away + if entryPoint ? knownDeps + then pure null + else + after + # For all unknown direct dependencies which don't have a `knownDeps` + # list, we call ourselves recursively to populate it. Since this is + # done sequentially in the state monad, we avoid recalculating the + # list for the same derivation multiple times. + (for_ + depsPrecalculated.wrong + insertKnownDeps) + # After this we can obtain the updated dependency map which will have + # a `knownDeps` list for all our direct dependencies and update the + # entry for the input `drvPath`. + (bind + get + (populatedDepMap: + (setAttr drvPath (entryPoint // { + knownDeps = + lib.unique ( + initiallyKnownDeps + ++ builtins.concatLists ( + builtins.map + (dep: populatedDepMap.${dep}.knownDeps) + depsPrecalculated.wrong + ) + ); + })))) + ); + + # This function puts it all together and is exposed via `__functor`. + # + # For a list of `drvPath`s, calculate an attribute set which maps every + # `drvPath` to a set of the following form: + # + # { + # known = true /* if it is in the list of input derivation paths */; + # deps = [ + # /* list of derivation paths it depends on directly */ + # ]; + # knownDeps = [ + # /* list of the closest derivation paths marked as known this + # derivation depends on. + # */ + # ]; + # } + knownDrvDepMap = knownDrvPaths: + run + (plainDrvDepMap knownDrvPaths) + (after + (for_ + knownDrvPaths + insertKnownDeps) + get); + + # + # Other things based on knownDrvDepMap + # + + # Create a SVG visualizing `knownDrvDepMap`. Nodes are identified by derivation + # name, so multiple entries can be collapsed if they have the same name. + # + # Type: [drv] -> drv + knownDependencyGraph = name: drvs: + let + justName = drvPath: + builtins.substring + (builtins.stringLength builtins.storeDir + 1 + 32 + 1) + (builtins.stringLength drvPath) + (unsafeDiscardStringContext drvPath); + + gv = pkgs.writeText "${name}-dependency-analysis.gv" '' + digraph depot { + ${ + (lib.concatStringsSep "\n" + (lib.mapAttrsToList (name: value: + if !value.known then "" + else lib.concatMapStringsSep "\n" + (knownDep: " \"${justName name}\" -> \"${justName knownDep}\"") + value.knownDeps + ) + (depot.nix.dependency-analyzer ( + drvsToPaths drvs + )))) + } + } + ''; + in + + pkgs.runCommand "${name}-dependency-analysis.svg" + { + nativeBuildInputs = [ + pkgs.buildPackages.graphviz + ]; + } + "dot -Tsvg < ${gv} > $out"; +in + +{ + __functor = _: knownDrvDepMap; + + inherit knownDependencyGraph plainDrvDepMap drvsToPaths; +} diff --git a/nix/dependency-analyzer/examples/ci-targets.nix b/nix/dependency-analyzer/examples/ci-targets.nix new file mode 100644 index 000000000000..597abd410961 --- /dev/null +++ b/nix/dependency-analyzer/examples/ci-targets.nix @@ -0,0 +1,12 @@ +{ depot, lib, ... }: + +( + depot.nix.dependency-analyzer.knownDependencyGraph + "depot" + depot.ci.targets +).overrideAttrs (old: { + # Causes an infinite recursion via ci.targets otherwise + meta = lib.recursiveUpdate (old.meta or { }) { + ci.skip = true; + }; +}) diff --git a/nix/dependency-analyzer/examples/lisp.nix b/nix/dependency-analyzer/examples/lisp.nix new file mode 100644 index 000000000000..775eb9ab573f --- /dev/null +++ b/nix/dependency-analyzer/examples/lisp.nix @@ -0,0 +1,5 @@ +{ depot, lib, ... }: + +depot.nix.dependency-analyzer.knownDependencyGraph "3p-lisp" ( + builtins.filter lib.isDerivation (builtins.attrValues depot.third_party.lisp) +) diff --git a/nix/dependency-analyzer/tests/default.nix b/nix/dependency-analyzer/tests/default.nix new file mode 100644 index 000000000000..79ac127e922f --- /dev/null +++ b/nix/dependency-analyzer/tests/default.nix @@ -0,0 +1,36 @@ +{ depot, lib, ... }: + +let + inherit (depot.nix.runTestsuite) + runTestsuite + assertEq + it + ; + + inherit (depot.nix.dependency-analyzer) + plainDrvDepMap + drvsToPaths + ; + + knownDrvs = drvsToPaths ( + builtins.filter lib.isDerivation (builtins.attrValues depot.third_party.lisp) + ); + exampleMap = plainDrvDepMap knownDrvs; + + # These will be needed to index into the attribute set which can't have context + # in the attribute names. + knownDrvsNoContext = builtins.map builtins.unsafeDiscardStringContext knownDrvs; +in + +runTestsuite "dependency-analyzer" [ + (it "checks plainDrvDepMap properties" [ + (assertEq "all known drvs are marked known" + (builtins.all (drv: exampleMap.${drv}.known) knownDrvsNoContext) + true) + (assertEq "no unknown drv is marked known" + (builtins.all (entry: !entry.known) ( + builtins.attrValues (builtins.removeAttrs exampleMap knownDrvsNoContext) + )) + true) + ]) +] |