From 6035bf36eb93bc30db6ac40739913358e71d1121 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Mon, 12 Aug 2019 17:47:27 +0100 Subject: feat(popcount): Clean up popularity counting script Adds the script used to generate the popularity information for all of nixpkgs. The README lists the (currently somewhat rough) usage instructions. --- tools/nixery/popcount/README.md | 39 ++++++++++++++++++++++++++++ tools/nixery/popcount/empty.json | 1 + tools/nixery/popcount/popcount | 13 ++++++++++ tools/nixery/popcount/popcount.nix | 53 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+) create mode 100644 tools/nixery/popcount/README.md create mode 100644 tools/nixery/popcount/empty.json create mode 100755 tools/nixery/popcount/popcount create mode 100644 tools/nixery/popcount/popcount.nix (limited to 'tools/nixery/popcount') diff --git a/tools/nixery/popcount/README.md b/tools/nixery/popcount/README.md new file mode 100644 index 000000000000..8485a4d30e9c --- /dev/null +++ b/tools/nixery/popcount/README.md @@ -0,0 +1,39 @@ +popcount +======== + +This script is used to count the popularity for each package in `nixpkgs`, by +determining how many other packages depend on it. + +It skips over all packages that fail to build, are not cached or are unfree - +but these omissions do not meaningfully affect the statistics. + +It currently does not evaluate nested attribute sets (such as +`haskellPackages`). + +## Usage + +1. Generate a list of all top-level attributes in `nixpkgs`: + + ```shell + nix eval '(with builtins; toJSON (attrNames (import {})))' | jq -r | jq > all-top-level.json + ``` + +2. Run `./popcount > all-runtime-deps.txt` + +3. Collect and count the results with the following magic incantation: + + ```shell + cat all-runtime-deps.txt \ + | sed -r 's|/nix/store/[a-z0-9]+-||g' \ + | sort \ + | uniq -c \ + | sort -n -r \ + | awk '{ print "{\"" $2 "\":" $1 "}"}' \ + | jq -c -s '. | add | with_entries(select(.value > 1))' \ + > your-output-file + ``` + + In essence, this will trim Nix's store paths and hashes from the output, + count the occurences of each package and return the output as JSON. All + packages that have no references other than themselves are removed from the + output. diff --git a/tools/nixery/popcount/empty.json b/tools/nixery/popcount/empty.json new file mode 100644 index 000000000000..fe51488c7066 --- /dev/null +++ b/tools/nixery/popcount/empty.json @@ -0,0 +1 @@ +[] diff --git a/tools/nixery/popcount/popcount b/tools/nixery/popcount/popcount new file mode 100755 index 000000000000..83baf3045da7 --- /dev/null +++ b/tools/nixery/popcount/popcount @@ -0,0 +1,13 @@ +#!/bin/bash +set -ueo pipefail + +function graphsFor() { + local pkg="${1}" + local graphs=$(nix-build --timeout 2 --argstr target "${pkg}" popcount.nix || echo -n 'empty.json') + cat $graphs | jq -r -cM '.[] | .references[]' +} + +for pkg in $(cat all-top-level.json | jq -r '.[]'); do + graphsFor "${pkg}" 2>/dev/null + echo "Printed refs for ${pkg}" >&2 +done diff --git a/tools/nixery/popcount/popcount.nix b/tools/nixery/popcount/popcount.nix new file mode 100644 index 000000000000..54fd2ad589ee --- /dev/null +++ b/tools/nixery/popcount/popcount.nix @@ -0,0 +1,53 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script, given a target attribute in `nixpkgs`, builds the +# target derivations' runtime closure and returns its reference graph. +# +# This is invoked by popcount.sh for each package in nixpkgs to +# collect all package references, so that package popularity can be +# tracked. +# +# Check out build-image/group-layers.go for an in-depth explanation of +# what the popularity counts are used for. + +{ pkgs ? import { config.allowUnfree = false; }, target }: + +let + inherit (pkgs) coreutils runCommand writeText; + inherit (builtins) readFile toFile fromJSON toJSON listToAttrs; + + # graphJSON abuses feature in Nix that makes structured runtime + # closure information available to builders. This data is imported + # back via IFD to process it for layering data. + graphJSON = path: + runCommand "build-graph" { + __structuredAttrs = true; + exportReferencesGraph.graph = path; + PATH = "${coreutils}/bin"; + builder = toFile "builder" '' + . .attrs.sh + cat .attrs.json > ''${outputs[out]} + ''; + } ""; + + buildClosures = paths: (fromJSON (readFile (graphJSON paths))); + + buildGraph = paths: + listToAttrs (map (c: { + name = c.path; + value = { inherit (c) closureSize references; }; + }) (buildClosures paths)); +in writeText "${target}-graph" +(toJSON (buildClosures [ pkgs."${target}" ]).graph) -- cgit 1.4.1