diff options
Diffstat (limited to 'tools/nixery/builder/builder.go')
-rw-r--r-- | tools/nixery/builder/builder.go | 518 |
1 files changed, 518 insertions, 0 deletions
diff --git a/tools/nixery/builder/builder.go b/tools/nixery/builder/builder.go new file mode 100644 index 000000000000..37c9b9fcb763 --- /dev/null +++ b/tools/nixery/builder/builder.go @@ -0,0 +1,518 @@ +// Copyright 2022 The TVL Contributors +// SPDX-License-Identifier: Apache-2.0 + +// Package builder implements the logic for assembling container +// images. It shells out to Nix to retrieve all required Nix-packages +// and assemble the symlink layer and then creates the required +// tarballs in-process. +package builder + +import ( + "bufio" + "bytes" + "compress/gzip" + "context" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "sort" + "strings" + + "github.com/google/nixery/config" + "github.com/google/nixery/manifest" + "github.com/google/nixery/storage" + log "github.com/sirupsen/logrus" +) + +// The maximum number of layers in an image is 125. To allow for +// extensibility, the actual number of layers Nixery is "allowed" to +// use up is set at a lower point. +const LayerBudget int = 94 + +// State holds the runtime state that is carried around in Nixery and +// passed to builder functions. +type State struct { + Storage storage.Backend + Cache *LocalCache + Cfg config.Config + Pop Popularity +} + +// Architecture represents the possible CPU architectures for which +// container images can be built. +// +// The default architecture is amd64, but support for ARM platforms is +// available within nixpkgs and can be toggled via meta-packages. +type Architecture struct { + // Name of the system tuple to pass to Nix + nixSystem string + + // Name of the architecture as used in the OCI manifests + imageArch string +} + +var amd64 = Architecture{"x86_64-linux", "amd64"} +var arm64 = Architecture{"aarch64-linux", "arm64"} + +// Image represents the information necessary for building a container image. +// This can be either a list of package names (corresponding to keys in the +// nixpkgs set) or a Nix expression that results in a *list* of derivations. +type Image struct { + Name string + Tag string + + // Names of packages to include in the image. These must correspond + // directly to top-level names of Nix packages in the nixpkgs tree. + Packages []string + + // Architecture for which to build the image. Nixery defaults + // this to amd64 if not specified via meta-packages. + Arch *Architecture +} + +// BuildResult represents the data returned from the server to the +// HTTP handlers. Error information is propagated straight from Nix +// for errors inside of the build that should be fed back to the +// client (such as missing packages). +type BuildResult struct { + Error string `json:"error"` + Pkgs []string `json:"pkgs"` + Manifest json.RawMessage `json:"manifest"` +} + +// ImageFromName parses an image name into the corresponding structure which can +// be used to invoke Nix. +// +// It will expand convenience names under the hood (see the `convenienceNames` +// function below) and append packages that are always included (cacert, iana-etc). +// +// Once assembled the image structure uses a sorted representation of +// the name. This is to avoid unnecessarily cache-busting images if +// only the order of requested packages has changed. +func ImageFromName(name string, tag string) Image { + pkgs := strings.Split(name, "/") + arch, expanded := metaPackages(pkgs) + expanded = append(expanded, "cacert", "iana-etc") + + sort.Strings(pkgs) + sort.Strings(expanded) + + return Image{ + Name: strings.Join(pkgs, "/"), + Tag: tag, + Packages: expanded, + Arch: arch, + } +} + +// ImageResult represents the output of calling the Nix derivation +// responsible for preparing an image. +type ImageResult struct { + // These fields are populated in case of an error + Error string `json:"error"` + Pkgs []string `json:"pkgs"` + + // These fields are populated in case of success + Graph runtimeGraph `json:"runtimeGraph"` + SymlinkLayer struct { + Size int `json:"size"` + TarHash string `json:"tarHash"` + Path string `json:"path"` + } `json:"symlinkLayer"` +} + +// metaPackages expands package names defined by Nixery which either +// include sets of packages or trigger certain image-building +// behaviour. +// +// Meta-packages must be specified as the first packages in an image +// name. +// +// Currently defined meta-packages are: +// +// * `shell`: Includes bash, coreutils and other common command-line tools +// * `arm64`: Causes Nixery to build images for the ARM64 architecture +func metaPackages(packages []string) (*Architecture, []string) { + arch := &amd64 + + var metapkgs []string + lastMeta := 0 + for idx, p := range packages { + if p == "shell" || p == "arm64" { + metapkgs = append(metapkgs, p) + lastMeta = idx + 1 + } else { + break + } + } + + // Chop off the meta-packages from the front of the package + // list + packages = packages[lastMeta:] + + for _, p := range metapkgs { + switch p { + case "shell": + packages = append(packages, "bashInteractive", "coreutils", "moreutils", "nano") + case "arm64": + arch = &arm64 + } + } + + return arch, packages +} + +// logNix logs each output line from Nix. It runs in a goroutine per +// output channel that should be live-logged. +func logNix(image, cmd string, r io.ReadCloser) { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + log.WithFields(log.Fields{ + "image": image, + "cmd": cmd, + }).Info("[nix] " + scanner.Text()) + } +} + +func callNix(program, image string, args []string) ([]byte, error) { + cmd := exec.Command(program, args...) + + outpipe, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + errpipe, err := cmd.StderrPipe() + if err != nil { + return nil, err + } + go logNix(image, program, errpipe) + + if err = cmd.Start(); err != nil { + log.WithError(err).WithFields(log.Fields{ + "image": image, + "cmd": program, + }).Error("error invoking Nix") + + return nil, err + } + + log.WithFields(log.Fields{ + "cmd": program, + "image": image, + }).Info("invoked Nix build") + + stdout, _ := ioutil.ReadAll(outpipe) + + if err = cmd.Wait(); err != nil { + log.WithError(err).WithFields(log.Fields{ + "image": image, + "cmd": program, + "stdout": stdout, + }).Info("failed to invoke Nix") + + return nil, err + } + + resultFile := strings.TrimSpace(string(stdout)) + buildOutput, err := ioutil.ReadFile(resultFile) + if err != nil { + log.WithError(err).WithFields(log.Fields{ + "image": image, + "file": resultFile, + }).Info("failed to read Nix result file") + + return nil, err + } + + return buildOutput, nil +} + +// Call out to Nix and request metadata for the image to be built. All +// required store paths for the image will be realised, but layers +// will not yet be created from them. +// +// This function is only invoked if the manifest is not found in any +// cache. +func prepareImage(s *State, image *Image) (*ImageResult, error) { + packages, err := json.Marshal(image.Packages) + if err != nil { + return nil, err + } + + srcType, srcArgs := s.Cfg.Pkgs.Render(image.Tag) + + args := []string{ + "--timeout", s.Cfg.Timeout, + "--argstr", "packages", string(packages), + "--argstr", "srcType", srcType, + "--argstr", "srcArgs", srcArgs, + "--argstr", "system", image.Arch.nixSystem, + } + + output, err := callNix("nixery-prepare-image", image.Name, args) + if err != nil { + // granular error logging is performed in callNix already + return nil, err + } + + log.WithFields(log.Fields{ + "image": image.Name, + "tag": image.Tag, + }).Info("finished image preparation via Nix") + + var result ImageResult + err = json.Unmarshal(output, &result) + if err != nil { + return nil, err + } + + return &result, nil +} + +// Groups layers and checks whether they are present in the cache +// already, otherwise calls out to Nix to assemble layers. +// +// Newly built layers are uploaded to the bucket. Cache entries are +// added only after successful uploads, which guarantees that entries +// retrieved from the cache are present in the bucket. +func prepareLayers(ctx context.Context, s *State, image *Image, result *ImageResult) ([]manifest.Entry, error) { + grouped := groupLayers(&result.Graph, &s.Pop, LayerBudget) + + var entries []manifest.Entry + + // Splits the layers into those which are already present in + // the cache, and those that are missing. + // + // Missing layers are built and uploaded to the storage + // bucket. + for _, l := range grouped { + if entry, cached := layerFromCache(ctx, s, l.Hash()); cached { + entries = append(entries, *entry) + } else { + lh := l.Hash() + + // While packing store paths, the SHA sum of + // the uncompressed layer is computed and + // written to `tarhash`. + // + // TODO(tazjin): Refactor this to make the + // flow of data cleaner. + var tarhash string + lw := func(w io.Writer) error { + var err error + tarhash, err = packStorePaths(&l, w) + return err + } + + entry, err := uploadHashLayer(ctx, s, lh, lw) + if err != nil { + return nil, err + } + entry.MergeRating = l.MergeRating + entry.TarHash = tarhash + + var pkgs []string + for _, p := range l.Contents { + pkgs = append(pkgs, packageFromPath(p)) + } + + log.WithFields(log.Fields{ + "layer": lh, + "packages": pkgs, + "tarhash": tarhash, + }).Info("created image layer") + + go cacheLayer(ctx, s, l.Hash(), *entry) + entries = append(entries, *entry) + } + } + + // Symlink layer (built in the first Nix build) needs to be + // included here manually: + slkey := result.SymlinkLayer.TarHash + entry, err := uploadHashLayer(ctx, s, slkey, func(w io.Writer) error { + f, err := os.Open(result.SymlinkLayer.Path) + if err != nil { + log.WithError(err).WithFields(log.Fields{ + "image": image.Name, + "tag": image.Tag, + "layer": slkey, + }).Error("failed to open symlink layer") + + return err + } + defer f.Close() + + gz := gzip.NewWriter(w) + _, err = io.Copy(gz, f) + if err != nil { + log.WithError(err).WithFields(log.Fields{ + "image": image.Name, + "tag": image.Tag, + "layer": slkey, + }).Error("failed to upload symlink layer") + + return err + } + + return gz.Close() + }) + + if err != nil { + return nil, err + } + + entry.TarHash = "sha256:" + result.SymlinkLayer.TarHash + go cacheLayer(ctx, s, slkey, *entry) + entries = append(entries, *entry) + + return entries, nil +} + +// layerWriter is the type for functions that can write a layer to the +// multiwriter used for uploading & hashing. +// +// This type exists to avoid duplication between the handling of +// symlink layers and store path layers. +type layerWriter func(w io.Writer) error + +// byteCounter is a special io.Writer that counts all bytes written to +// it and does nothing else. +// +// This is required because the ad-hoc writing of tarballs leaves no +// single place to count the final tarball size otherwise. +type byteCounter struct { + count int64 +} + +func (b *byteCounter) Write(p []byte) (n int, err error) { + b.count += int64(len(p)) + return len(p), nil +} + +// Upload a layer tarball to the storage bucket, while hashing it at +// the same time. The supplied function is expected to provide the +// layer data to the writer. +// +// The initial upload is performed in a 'staging' folder, as the +// SHA256-hash is not yet available when the upload is initiated. +// +// After a successful upload, the file is moved to its final location +// in the bucket and the build cache is populated. +// +// The return value is the layer's SHA256 hash, which is used in the +// image manifest. +func uploadHashLayer(ctx context.Context, s *State, key string, lw layerWriter) (*manifest.Entry, error) { + path := "staging/" + key + sha256sum, size, err := s.Storage.Persist(ctx, path, manifest.LayerType, func(sw io.Writer) (string, int64, error) { + // Sets up a "multiwriter" that simultaneously runs both hash + // algorithms and uploads to the storage backend. + shasum := sha256.New() + counter := &byteCounter{} + multi := io.MultiWriter(sw, shasum, counter) + + err := lw(multi) + sha256sum := fmt.Sprintf("%x", shasum.Sum([]byte{})) + + return sha256sum, counter.count, err + }) + + if err != nil { + log.WithError(err).WithFields(log.Fields{ + "layer": key, + "backend": s.Storage.Name(), + }).Error("failed to create and store layer") + + return nil, err + } + + // Hashes are now known and the object is in the bucket, what + // remains is to move it to the correct location and cache it. + err = s.Storage.Move(ctx, "staging/"+key, "layers/"+sha256sum) + if err != nil { + log.WithError(err).WithField("layer", key). + Error("failed to move layer from staging") + + return nil, err + } + + log.WithFields(log.Fields{ + "layer": key, + "sha256": sha256sum, + "size": size, + }).Info("created and persisted layer") + + entry := manifest.Entry{ + Digest: "sha256:" + sha256sum, + Size: size, + } + + return &entry, nil +} + +func BuildImage(ctx context.Context, s *State, image *Image) (*BuildResult, error) { + key := s.Cfg.Pkgs.CacheKey(image.Packages, image.Tag) + if key != "" { + if m, c := manifestFromCache(ctx, s, key); c { + return &BuildResult{ + Manifest: m, + }, nil + } + } + + imageResult, err := prepareImage(s, image) + if err != nil { + return nil, err + } + + if imageResult.Error != "" { + return &BuildResult{ + Error: imageResult.Error, + Pkgs: imageResult.Pkgs, + }, nil + } + + layers, err := prepareLayers(ctx, s, image, imageResult) + if err != nil { + return nil, err + } + + // If the requested packages include a shell, + // set cmd accordingly. + cmd := "" + for _, pkg := range image.Packages { + if pkg == "bashInteractive" { + cmd = "bash" + } + } + m, c := manifest.Manifest(image.Arch.imageArch, layers, cmd) + + lw := func(w io.Writer) error { + r := bytes.NewReader(c.Config) + _, err := io.Copy(w, r) + return err + } + + if _, err = uploadHashLayer(ctx, s, c.SHA256, lw); err != nil { + log.WithError(err).WithFields(log.Fields{ + "image": image.Name, + "tag": image.Tag, + }).Error("failed to upload config") + + return nil, err + } + + if key != "" { + go cacheManifest(ctx, s, key, m) + } + + result := BuildResult{ + Manifest: m, + } + return &result, nil +} |