about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVincent Ambo <tazjin@google.com>2019-10-03T21·13+0100
committerVincent Ambo <github@tazj.in>2019-10-03T21·50+0100
commitd9b329ef59e35ae6070eae867cf06a5230ae3d51 (patch)
tree89ad524347885a9ebc87a4deb2d383048db94696
parent9bb6d0ae255c1340fe16687d740fad948e6a9335 (diff)
refactor(server): Always include 'cacert' & 'iana-etc'
These two packages almost always end up being required by programs,
but people don't necessarily consider them.

They will now always be added and their popularity is artificially
inflated to ensure they end up at the top of the layer list.
-rw-r--r--tools/nixery/server/builder/builder.go5
-rw-r--r--tools/nixery/server/layers/grouping.go24
2 files changed, 19 insertions, 10 deletions
diff --git a/tools/nixery/server/builder/builder.go b/tools/nixery/server/builder/builder.go
index 614291e660c5..7f391838f604 100644
--- a/tools/nixery/server/builder/builder.go
+++ b/tools/nixery/server/builder/builder.go
@@ -87,7 +87,7 @@ type BuildResult struct {
 // be used to invoke Nix.
 //
 // It will expand convenience names under the hood (see the `convenienceNames`
-// function below).
+// function below) and append packages that are always included (cacert, iana-etc).
 //
 // Once assembled the image structure uses a sorted representation of
 // the name. This is to avoid unnecessarily cache-busting images if
@@ -95,6 +95,7 @@ type BuildResult struct {
 func ImageFromName(name string, tag string) Image {
 	pkgs := strings.Split(name, "/")
 	expanded := convenienceNames(pkgs)
+	expanded = append(expanded, "cacert", "iana-etc")
 
 	sort.Strings(pkgs)
 	sort.Strings(expanded)
@@ -131,7 +132,7 @@ type ImageResult struct {
 //
 // * `shell`: Includes bash, coreutils and other common command-line tools
 func convenienceNames(packages []string) []string {
-	shellPackages := []string{"bashInteractive", "cacert", "coreutils", "iana-etc", "moreutils", "nano"}
+	shellPackages := []string{"bashInteractive", "coreutils", "moreutils", "nano"}
 
 	if packages[0] == "shell" {
 		return append(packages[1:], shellPackages...)
diff --git a/tools/nixery/server/layers/grouping.go b/tools/nixery/server/layers/grouping.go
index 9992cd3c13d6..9dbd5e88ce56 100644
--- a/tools/nixery/server/layers/grouping.go
+++ b/tools/nixery/server/layers/grouping.go
@@ -186,13 +186,11 @@ func (c *closure) bigOrPopular() bool {
 		return true
 	}
 
-	// The threshold value used here is currently roughly the
-	// minimum number of references that only 1% of packages in
-	// the entire package set have.
-	//
-	// TODO(tazjin): Do this more elegantly by calculating
-	// percentiles for each package and using those instead.
-	if c.Popularity >= 1000 {
+	// Threshold value is picked arbitrarily right now. The reason
+	// for this is that some packages (such as `cacert`) have very
+	// few direct dependencies, but are required by pretty much
+	// everything.
+	if c.Popularity >= 100 {
 		return true
 	}
 
@@ -241,7 +239,17 @@ func buildGraph(refs *RuntimeGraph, pop *Popularity) *simple.DirectedGraph {
 			Refs:    c.Refs,
 		}
 
-		if p, ok := (*pop)[node.DOTID()]; ok {
+		// The packages `nss-cacert` and `iana-etc` are added
+		// by Nixery to *every single image* and should have a
+		// very high popularity.
+		//
+		// Other popularity values are populated from the data
+		// set assembled by Nixery's popcount.
+		id := node.DOTID()
+		if strings.HasPrefix(id, "nss-cacert") || strings.HasPrefix(id, "iana-etc") {
+			// glibc has ~300k references, these packages need *more*
+			node.Popularity = 500000
+		} else if p, ok := (*pop)[id]; ok {
 			node.Popularity = p
 		} else {
 			node.Popularity = 1