about summary refs log tree commit diff
path: root/tools
diff options
context:
space:
mode:
authorVincent Ambo <tazjin@google.com>2019-08-11T19·05+0100
committerVincent Ambo <github@tazj.in>2019-08-13T23·02+0100
commit92078527dbe8f853d984a4038e718c662c016741 (patch)
tree1561a14a41b823ef52f25772294bebcf5f0059f6 /tools
parentce31598f4264a3c8af749b3fd533a905dcd69edc (diff)
feat(group-layers): Add preliminary size & popularity considerations
As described in the design document, this adds considerations for
closure size and popularity. All closures meeting a certain threshold
for either value will have an extra edge from the image root to
themselves inserted in the graph, which will cause them to be
considered for inclusion in a separate layer.

This is preliminary because popularity is considered as a boolean
toggle (the input I generated only contains the top ~200 most popular
packages), but it should be using either absolute popularity values or
percentiles (needs some experimentation).
Diffstat (limited to 'tools')
-rw-r--r--tools/nixery/group-layers/group-layers.go92
1 files changed, 66 insertions, 26 deletions
diff --git a/tools/nixery/group-layers/group-layers.go b/tools/nixery/group-layers/group-layers.go
index 1ee67433de..7be89e4e67 100644
--- a/tools/nixery/group-layers/group-layers.go
+++ b/tools/nixery/group-layers/group-layers.go
@@ -108,9 +108,7 @@ import (
 	"flag"
 	"io/ioutil"
 	"log"
-	"fmt"
 	"regexp"
-	"os"
 
 	"gonum.org/v1/gonum/graph/simple"
 	"gonum.org/v1/gonum/graph/flow"
@@ -131,12 +129,20 @@ type exportReferences struct {
 	} `json:"graph"`
 }
 
+// Popularity data for each Nix package that was calculated in advance.
+//
+// Popularity is a number from 1-100 that represents the
+// popularity percentile in which this package resides inside
+// of the nixpkgs tree.
+type pkgsMetadata = map[string]int
+
 // closure as pointed to by the graph nodes.
 type closure struct {
 	GraphID int64
 	Path string
 	Size uint64
 	Refs []string
+	Popularity int
 	// TODO(tazjin): popularity and other funny business
 }
 
@@ -149,7 +155,36 @@ func (c *closure) DOTID() string {
 	return nixRegexp.ReplaceAllString(c.Path, "")
 }
 
-func insertEdges(graph *simple.DirectedGraph, cmap *map[string]*closure, node *closure) {
+// bigOrPopular checks whether this closure should be considered for
+// separation into its own layer, even if it would otherwise only
+// appear in a subtree of the dominator tree.
+func (c *closure) bigOrPopular(pkgs *pkgsMetadata) bool {
+	const sizeThreshold = 100 * 1000000 // 100MB
+
+	if c.Size > sizeThreshold {
+		return true
+	}
+
+	// TODO(tazjin): After generating the full data, this should
+	// be changed to something other than a simple inclusion
+	// (currently the test-data only contains the top 200
+	// packages).
+	pop, ok := (*pkgs)[c.DOTID()]
+	if ok {
+		log.Printf("%q is popular!\n", c.DOTID())
+	}
+	c.Popularity = pop
+	return ok
+}
+
+func insertEdges(graph *simple.DirectedGraph, pop *pkgsMetadata, cmap *map[string]*closure, node *closure) {
+	// Big or popular nodes get a separate edge from the top to
+	// flag them for their own layer.
+	if node.bigOrPopular(pop) && !graph.HasEdgeFromTo(0, node.ID()) {
+		edge := graph.NewEdge(graph.Node(0), node)
+		graph.SetEdge(edge)
+	}
+
 	for _, c := range node.Refs {
 		// Nix adds a self reference to each node, which
 		// should not be inserted.
@@ -161,7 +196,7 @@ func insertEdges(graph *simple.DirectedGraph, cmap *map[string]*closure, node *c
 }
 
 // Create a graph structure from the references supplied by Nix.
-func buildGraph(refs *exportReferences) *simple.DirectedGraph {
+func buildGraph(refs *exportReferences, pop *pkgsMetadata) *simple.DirectedGraph {
 	cmap := make(map[string]*closure)
 	graph := simple.NewDirectedGraph()
 
@@ -196,15 +231,15 @@ func buildGraph(refs *exportReferences) *simple.DirectedGraph {
 	}
 
 	for _, c := range cmap {
-		insertEdges(graph, &cmap, c)
+		insertEdges(graph, pop, &cmap, c)
 	}
 
-	gv, err := dot.Marshal(graph, "deps", "", "")
-	if err != nil {
-		log.Fatalf("Could not encode graph: %s\n", err)
-	}
-	fmt.Print(string(gv))
-	os.Exit(0)
+	// gv, err := dot.Marshal(graph, "deps", "", "")
+	// if err != nil {
+	// 	log.Fatalf("Could not encode graph: %s\n", err)
+	// }
+	// fmt.Print(string(gv))
+	// os.Exit(0)
 
 	return graph
 }
@@ -233,25 +268,16 @@ func dominate(graph *simple.DirectedGraph) {
 	if err != nil {
 		log.Fatalf("Could not encode graph: %s\n", err)
 	}
-	fmt.Print(string(gv))
-
-	// fmt.Printf("%v edges in the graph\n", graph.Edges().Len())
-	// top := 0
-	// for _, n := range dt.DominatedBy(0) {
-	// 	fmt.Printf("%q is top-level\n", n.(*closure).Path)
-	// 	top++
-	// }
-	// fmt.Printf("%v total top-level nodes\n", top)
-	// root := dt.Root().(*closure)
-	// fmt.Printf("dominator tree root is %q\n", root.Path)
-	// fmt.Printf("%v nodes can reach to 1\n", nodes.Len())
+	ioutil.WriteFile("graph.dot", gv, 0644)
 }
 
 func main() {
-	inputFile := flag.String("input", ".attrs.json", "Input file containing graph")
+	graphFile := flag.String("graph", ".attrs.json", "Input file containing graph")
+	popFile := flag.String("pop", "popularity.json", "Package popularity data")
 	flag.Parse()
 
-	file, err := ioutil.ReadFile(*inputFile)
+	// Parse graph data
+	file, err := ioutil.ReadFile(*graphFile)
 	if err != nil {
 		log.Fatalf("Failed to load input: %s\n", err)
 	}
@@ -262,6 +288,20 @@ func main() {
 		log.Fatalf("Failed to deserialise input: %s\n", err)
 	}
 
-	graph := buildGraph(&refs)
+	// Parse popularity data
+	popBytes, err := ioutil.ReadFile(*popFile)
+	if err != nil {
+		log.Fatalf("Failed to load input: %s\n", err)
+	}
+
+	var pop pkgsMetadata
+	err = json.Unmarshal(popBytes, &pop)
+	if err != nil {
+		log.Fatalf("Failed to deserialise input: %s\n", err)
+	}
+
+	log.Printf("%v\n", pop)
+
+	graph := buildGraph(&refs, &pop)
 	dominate(graph)
 }