about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVincent Ambo <tazjin@google.com>2019-08-12T16·47+0100
committerVincent Ambo <github@tazj.in>2019-08-13T23·02+0100
commit6035bf36eb93bc30db6ac40739913358e71d1121 (patch)
tree360e479422e04b75c2b5b920ced12886a44f7e74
parent6d718bf2713a7e2209197247976390b878f51313 (diff)
feat(popcount): Clean up popularity counting script
Adds the script used to generate the popularity information for all of
nixpkgs.

The README lists the (currently somewhat rough) usage instructions.
-rw-r--r--tools/nixery/group-layers/popcount.nix51
-rw-r--r--tools/nixery/popcount/README.md39
-rw-r--r--tools/nixery/popcount/empty.json1
-rwxr-xr-xtools/nixery/popcount/popcount (renamed from tools/nixery/group-layers/popcount)0
-rw-r--r--tools/nixery/popcount/popcount.nix53
5 files changed, 93 insertions, 51 deletions
diff --git a/tools/nixery/group-layers/popcount.nix b/tools/nixery/group-layers/popcount.nix
deleted file mode 100644
index e21d7367724b..000000000000
--- a/tools/nixery/group-layers/popcount.nix
+++ /dev/null
@@ -1,51 +0,0 @@
-{ pkgs ? import <nixpkgs> { config.allowUnfree = false; }
-, target }:
-
-let
-  inherit (pkgs) coreutils runCommand writeText;
-  inherit (builtins) replaceStrings readFile toFile fromJSON toJSON foldl' listToAttrs;
-
-  path = [ pkgs."${target}" ];
-
-  # graphJSON abuses feature in Nix that makes structured runtime
-  # closure information available to builders. This data is imported
-  # back via IFD to process it for layering data.
-  graphJSON =
-    path:
-    runCommand "build-graph" {
-      __structuredAttrs = true;
-      exportReferencesGraph.graph = path;
-      PATH = "${coreutils}/bin";
-      builder = toFile "builder" ''
-        . .attrs.sh
-        cat .attrs.json > ''${outputs[out]}
-      '';
-    } "";
-
-  buildClosures = paths: (fromJSON (readFile (graphJSON paths)));
-
-  buildGraph = paths: listToAttrs (map (c: {
-    name = c.path;
-    value = {
-    inherit (c) closureSize references;
-    };
-  }) (buildClosures paths));
-
-  # Nix does not allow attrbute set keys to refer to store paths, but
-  # we need them to for the purpose of the calculation. To work around
-  # it, the store path prefix is replaced with the string 'closure/'
-  # and later replaced again.
-  fromStorePath = replaceStrings [ "/nix/store" ] [ "closure/" ];
-  toStorePath = replaceStrings [ "closure/" ] [ "/nix/store/" ];
-
-  buildTree = paths:
-  let
-    graph = buildGraph paths;
-    top = listToAttrs (map (p: {
-      name = fromStorePath (toString p);
-      value = {};
-    }) paths);
-  in top;
-
-  outputJson = thing: writeText "the-thing.json" (builtins.toJSON thing);
-in outputJson (buildClosures path).graph
diff --git a/tools/nixery/popcount/README.md b/tools/nixery/popcount/README.md
new file mode 100644
index 000000000000..8485a4d30e9c
--- /dev/null
+++ b/tools/nixery/popcount/README.md
@@ -0,0 +1,39 @@
+popcount
+========
+
+This script is used to count the popularity for each package in `nixpkgs`, by
+determining how many other packages depend on it.
+
+It skips over all packages that fail to build, are not cached or are unfree -
+but these omissions do not meaningfully affect the statistics.
+
+It currently does not evaluate nested attribute sets (such as
+`haskellPackages`).
+
+## Usage
+
+1. Generate a list of all top-level attributes in `nixpkgs`:
+
+   ```shell
+   nix eval '(with builtins; toJSON (attrNames (import <nixpkgs> {})))' | jq -r | jq > all-top-level.json
+   ```
+
+2. Run `./popcount > all-runtime-deps.txt`
+
+3. Collect and count the results with the following magic incantation:
+
+   ```shell
+   cat all-runtime-deps.txt \
+     | sed -r 's|/nix/store/[a-z0-9]+-||g' \
+     | sort \
+     | uniq -c \
+     | sort -n -r \
+     | awk '{ print "{\"" $2 "\":" $1 "}"}' \
+     | jq -c -s '. | add | with_entries(select(.value > 1))' \
+     > your-output-file
+   ```
+
+   In essence, this will trim Nix's store paths and hashes from the output,
+   count the occurences of each package and return the output as JSON. All
+   packages that have no references other than themselves are removed from the
+   output.
diff --git a/tools/nixery/popcount/empty.json b/tools/nixery/popcount/empty.json
new file mode 100644
index 000000000000..fe51488c7066
--- /dev/null
+++ b/tools/nixery/popcount/empty.json
@@ -0,0 +1 @@
+[]
diff --git a/tools/nixery/group-layers/popcount b/tools/nixery/popcount/popcount
index 83baf3045da7..83baf3045da7 100755
--- a/tools/nixery/group-layers/popcount
+++ b/tools/nixery/popcount/popcount
diff --git a/tools/nixery/popcount/popcount.nix b/tools/nixery/popcount/popcount.nix
new file mode 100644
index 000000000000..54fd2ad589ee
--- /dev/null
+++ b/tools/nixery/popcount/popcount.nix
@@ -0,0 +1,53 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script, given a target attribute in `nixpkgs`, builds the
+# target derivations' runtime closure and returns its reference graph.
+#
+# This is invoked by popcount.sh for each package in nixpkgs to
+# collect all package references, so that package popularity can be
+# tracked.
+#
+# Check out build-image/group-layers.go for an in-depth explanation of
+# what the popularity counts are used for.
+
+{ pkgs ? import <nixpkgs> { config.allowUnfree = false; }, target }:
+
+let
+  inherit (pkgs) coreutils runCommand writeText;
+  inherit (builtins) readFile toFile fromJSON toJSON listToAttrs;
+
+  # graphJSON abuses feature in Nix that makes structured runtime
+  # closure information available to builders. This data is imported
+  # back via IFD to process it for layering data.
+  graphJSON = path:
+    runCommand "build-graph" {
+      __structuredAttrs = true;
+      exportReferencesGraph.graph = path;
+      PATH = "${coreutils}/bin";
+      builder = toFile "builder" ''
+        . .attrs.sh
+        cat .attrs.json > ''${outputs[out]}
+      '';
+    } "";
+
+  buildClosures = paths: (fromJSON (readFile (graphJSON paths)));
+
+  buildGraph = paths:
+    listToAttrs (map (c: {
+      name = c.path;
+      value = { inherit (c) closureSize references; };
+    }) (buildClosures paths));
+in writeText "${target}-graph"
+(toJSON (buildClosures [ pkgs."${target}" ]).graph)