// Copyright 2022 The TVL Contributors // SPDX-License-Identifier: Apache-2.0 // Popcount fetches popularity information for each store path in a // given Nix channel from the upstream binary cache. // // It does this simply by inspecting the narinfo files, rather than // attempting to deal with instantiation of the binary cache. // // This is *significantly* faster than attempting to realise the whole // channel and then calling `nix path-info` on it. // // TODO(tazjin): Persist intermediate results (references for each // store path) to speed up subsequent runs. package main import ( "encoding/json" "fmt" "io" "io/ioutil" "log" "net/http" "os" "os/exec" "regexp" "strings" ) var client http.Client var pathexp = regexp.MustCompile("/nix/store/([a-z0-9]{32})-(.*)$") var refsexp = regexp.MustCompile("(?m:^References: (.*)$)") var refexp = regexp.MustCompile("^([a-z0-9]{32})-(.*)$") type meta struct { name string url string commit string } type item struct { name string hash string } func failOn(err error, msg string) { if err != nil { log.Fatalf("%s: %s", msg, err) } } func channelMetadata(channel string) meta { // This needs an HTTP client that does not follow redirects // because the channel URL is used explicitly for other // downloads. c := http.Client{ CheckRedirect: func(req *http.Request, via []*http.Request) error { return http.ErrUseLastResponse }, } resp, err := c.Get(fmt.Sprintf("https://channels.nixos.org/%s", channel)) failOn(err, "failed to retrieve channel metadata") loc, err := resp.Location() failOn(err, "no redirect location given for channel") // TODO(tazjin): These redirects are currently served as 301s, but // should (and used to) be 302s. Check if/when this is fixed and // update accordingly. if !(resp.StatusCode == 301 || resp.StatusCode == 302) { log.Fatalf("Expected redirect for channel, but received '%s'\n", resp.Status) } commitResp, err := c.Get(fmt.Sprintf("%s/git-revision", loc.String())) failOn(err, "failed to retrieve commit for channel") defer commitResp.Body.Close() commit, err := ioutil.ReadAll(commitResp.Body) failOn(err, "failed to read commit from response") if commitResp.StatusCode != 200 { log.Fatalf("non-success status code when fetching commit: %s (%v)", string(commit), commitResp.StatusCode) } return meta{ name: channel, url: loc.String(), commit: string(commit), } } func downloadStorePaths(c *meta) []string { resp, err := client.Get(fmt.Sprintf("%s/store-paths.xz", c.url)) failOn(err, "failed to download store-paths.xz") defer resp.Body.Close() cmd := exec.Command("xzcat") stdin, err := cmd.StdinPipe() failOn(err, "failed to open xzcat stdin") stdout, err := cmd.StdoutPipe() failOn(err, "failed to open xzcat stdout") defer stdout.Close() go func() { defer stdin.Close() io.Copy(stdin, resp.Body) }() err = cmd.Start() failOn(err, "failed to start xzcat") paths, err := ioutil.ReadAll(stdout) failOn(err, "failed to read uncompressed store paths") err = cmd.Wait() failOn(err, "xzcat failed to decompress") return strings.Split(string(paths), "\n") } func storePathToItem(path string) *item { res := pathexp.FindStringSubmatch(path) if len(res) != 3 { return nil } return &item{ hash: res[1], name: res[2], } } func narInfoToRefs(narinfo string) []string { all := refsexp.FindAllStringSubmatch(narinfo, 1) if len(all) != 1 { log.Fatalf("failed to parse narinfo:\n%s\nfound: %v\n", narinfo, all[0]) } if len(all[0]) != 2 { // no references found return []string{} } refs := strings.Split(all[0][1], " ") for i, s := range refs { if s == "" { continue } res := refexp.FindStringSubmatch(s) refs[i] = res[2] } return refs } func fetchNarInfo(i *item) (string, error) { file, err := ioutil.ReadFile("popcache/" + i.hash) if err == nil { return string(file), nil } resp, err := client.Get(fmt.Sprintf("https://cache.nixos.org/%s.narinfo", i.hash)) if err != nil { return "", err } defer resp.Body.Close() narinfo, err := ioutil.ReadAll(resp.Body) // best-effort write the file to the cache ioutil.WriteFile("popcache/"+i.hash, narinfo, 0644) return string(narinfo), err } // downloader starts a worker that takes care of downloading narinfos // for all paths received from the queue. // // If there is no data remaining in the queue, the downloader exits // and informs the finaliser queue about having exited. func downloader(queue chan *item, narinfos chan string, downloaders chan struct{}) { for i := range queue { ni, err := fetchNarInfo(i) if err != nil { log.Printf("couldn't fetch narinfo for %s: %s\n", i.name, err) continue } narinfos <- ni } downloaders <- struct{}{} } // finaliser counts the number of downloaders that have exited and // closes the narinfos queue to signal to the counters that no more // elements will arrive. func finaliser(count int, downloaders chan struct{}, narinfos chan string) { for range downloaders { count-- if count == 0 { close(downloaders) close(narinfos) break } } } func main() { if len(os.Args) == 1 { log.Fatalf("Nix channel must be specified as first argument") } err := os.MkdirAll("popcache", 0755) if err != nil { log.Fatalf("Failed to create 'popcache' directory in current folder: %s\n", err) } count := 42 // concurrent downloader count channel := os.Args[1] log.Printf("Fetching metadata for channel '%s'\n", channel) meta := channelMetadata(channel) log.Printf("Pinned channel '%s' to commit '%s'\n", meta.name, meta.commit) paths := downloadStorePaths(&meta) log.Printf("Fetching references for %d store paths\n", len(paths)) // Download paths concurrently and receive their narinfos into // a channel. Data is collated centrally into a map and // serialised at the /very/ end. downloadQueue := make(chan *item, len(paths)) for _, p := range paths { if i := storePathToItem(p); i != nil { downloadQueue <- i } } close(downloadQueue) // Set up a task tracking channel for parsing & counting // narinfos, as well as a coordination channel for signaling // that all downloads have finished narinfos := make(chan string, 50) downloaders := make(chan struct{}, count) for i := 0; i < count; i++ { go downloader(downloadQueue, narinfos, downloaders) } go finaliser(count, downloaders, narinfos) counts := make(map[string]int) for ni := range narinfos { refs := narInfoToRefs(ni) for _, ref := range refs { if ref == "" { continue } counts[ref] += 1 } } // Remove all self-references (i.e. packages not referenced by anyone else) for k, v := range counts { if v == 1 { delete(counts, k) } } bytes, _ := json.Marshal(counts) outfile := fmt.Sprintf("popularity-%s-%s.json", meta.name, meta.commit) err = ioutil.WriteFile(outfile, bytes, 0644) if err != nil { log.Fatalf("Failed to write output to '%s': %s\n", outfile, err) } log.Printf("Wrote output to '%s'\n", outfile) }