// Copyright 2022 The TVL Contributors // SPDX-License-Identifier: Apache-2.0 // Package builder implements the logic for assembling container // images. It shells out to Nix to retrieve all required Nix-packages // and assemble the symlink layer and then creates the required // tarballs in-process. package builder import ( "bufio" "bytes" "compress/gzip" "context" "crypto/sha256" "encoding/json" "fmt" "io" "io/ioutil" "os" "os/exec" "sort" "strings" "github.com/google/nixery/config" "github.com/google/nixery/layers" "github.com/google/nixery/manifest" "github.com/google/nixery/storage" "github.com/im7mortal/kmutex" log "github.com/sirupsen/logrus" ) // The maximum number of layers in an image is 125. To allow for // extensibility, the actual number of layers Nixery is "allowed" to // use up is set at a lower point. const LayerBudget int = 94 // State holds the runtime state that is carried around in Nixery and // passed to builder functions. type State struct { Storage storage.Backend Cache *LocalCache Cfg config.Config Pop layers.Popularity UploadMutex *kmutex.Kmutex } // Architecture represents the possible CPU architectures for which // container images can be built. // // The default architecture is amd64, but support for ARM platforms is // available within nixpkgs and can be toggled via meta-packages. type Architecture struct { // Name of the system tuple to pass to Nix nixSystem string // Name of the architecture as used in the OCI manifests imageArch string } var amd64 = Architecture{"x86_64-linux", "amd64"} var arm64 = Architecture{"aarch64-linux", "arm64"} // Image represents the information necessary for building a container image. // This can be either a list of package names (corresponding to keys in the // nixpkgs set) or a Nix expression that results in a *list* of derivations. type Image struct { Name string Tag string // Names of packages to include in the image. These must correspond // directly to top-level names of Nix packages in the nixpkgs tree. Packages []string // Architecture for which to build the image. Nixery defaults // this to amd64 if not specified via meta-packages. Arch *Architecture } // BuildResult represents the data returned from the server to the // HTTP handlers. Error information is propagated straight from Nix // for errors inside of the build that should be fed back to the // client (such as missing packages). type BuildResult struct { Error string `json:"error"` Pkgs []string `json:"pkgs"` Manifest json.RawMessage `json:"manifest"` } // ImageFromName parses an image name into the corresponding structure which can // be used to invoke Nix. // // It will expand convenience names under the hood (see the `convenienceNames` // function below) and append packages that are always included (cacert, iana-etc). // // Once assembled the image structure uses a sorted representation of // the name. This is to avoid unnecessarily cache-busting images if // only the order of requested packages has changed. func ImageFromName(name string, tag string) Image { pkgs := strings.Split(name, "/") arch, expanded := metaPackages(pkgs) expanded = append(expanded, "cacert", "iana-etc") sort.Strings(pkgs) sort.Strings(expanded) return Image{ Name: strings.Join(pkgs, "/"), Tag: tag, Packages: expanded, Arch: arch, } } // ImageResult represents the output of calling the Nix derivation // responsible for preparing an image. type ImageResult struct { // These fields are populated in case of an error Error string `json:"error"` Pkgs []string `json:"pkgs"` // These fields are populated in case of success Graph layers.RuntimeGraph `json:"runtimeGraph"` SymlinkLayer struct { Size int `json:"size"` TarHash string `json:"tarHash"` Path string `json:"path"` } `json:"symlinkLayer"` } // metaPackages expands package names defined by Nixery which either // include sets of packages or trigger certain image-building // behaviour. // // Meta-packages must be specified as the first packages in an image // name. // // Currently defined meta-packages are: // // * `shell`: Includes bash, coreutils and other common command-line tools // * `arm64`: Causes Nixery to build images for the ARM64 architecture func metaPackages(packages []string) (*Architecture, []string) { arch := &amd64 var metapkgs []string lastMeta := 0 for idx, p := range packages { if p == "shell" || p == "arm64" { metapkgs = append(metapkgs, p) lastMeta = idx + 1 } else { break } } // Chop off the meta-packages from the front of the package // list packages = packages[lastMeta:] for _, p := range metapkgs { switch p { case "shell": packages = append(packages, "bashInteractive", "coreutils", "moreutils", "nano") case "arm64": arch = &arm64 } } return arch, packages } // logNix logs each output line from Nix. It runs in a goroutine per // output channel that should be live-logged. func logNix(image, cmd string, r io.ReadCloser) { scanner := bufio.NewScanner(r) for scanner.Scan() { log.WithFields(log.Fields{ "image": image, "cmd": cmd, }).Info("[nix] " + scanner.Text()) } } func callNix(program, image string, args []string) ([]byte, error) { cmd := exec.Command(program, args...) outpipe, err := cmd.StdoutPipe() if err != nil { return nil, err } errpipe, err := cmd.StderrPipe() if err != nil { return nil, err } go logNix(image, program, errpipe) if err = cmd.Start(); err != nil { log.WithError(err).WithFields(log.Fields{ "image": image, "cmd": program, }).Error("error invoking Nix") return nil, err } log.WithFields(log.Fields{ "cmd": program, "image": image, }).Info("invoked Nix build") stdout, _ := ioutil.ReadAll(outpipe) if err = cmd.Wait(); err != nil { log.WithError(err).WithFields(log.Fields{ "image": image, "cmd": program, "stdout": stdout, }).Info("failed to invoke Nix") return nil, err } resultFile := strings.TrimSpace(string(stdout)) buildOutput, err := ioutil.ReadFile(resultFile) if err != nil { log.WithError(err).WithFields(log.Fields{ "image": image, "file": resultFile, }).Info("failed to read Nix result file") return nil, err } return buildOutput, nil } // Call out to Nix and request metadata for the image to be built. All // required store paths for the image will be realised, but layers // will not yet be created from them. // // This function is only invoked if the manifest is not found in any // cache. func prepareImage(s *State, image *Image) (*ImageResult, error) { packages, err := json.Marshal(image.Packages) if err != nil { return nil, err } srcType, srcArgs := s.Cfg.Pkgs.Render(image.Tag) args := []string{ "--timeout", s.Cfg.Timeout, "--argstr", "packages", string(packages), "--argstr", "srcType", srcType, "--argstr", "srcArgs", srcArgs, "--argstr", "system", image.Arch.nixSystem, } output, err := callNix("nixery-prepare-image", image.Name, args) if err != nil { // granular error logging is performed in callNix already return nil, err } log.WithFields(log.Fields{ "image": image.Name, "tag": image.Tag, }).Info("finished image preparation via Nix") var result ImageResult err = json.Unmarshal(output, &result) if err != nil { return nil, err } return &result, nil } // Groups layers and checks whether they are present in the cache // already, otherwise calls out to Nix to assemble layers. // // Newly built layers are uploaded to the bucket. Cache entries are // added only after successful uploads, which guarantees that entries // retrieved from the cache are present in the bucket. func prepareLayers(ctx context.Context, s *State, image *Image, result *ImageResult) ([]manifest.Entry, error) { grouped := layers.GroupLayers(&result.Graph, &s.Pop, LayerBudget) var entries []manifest.Entry // Splits the layers into those which are already present in // the cache, and those that are missing. // // Missing layers are built and uploaded to the storage // bucket. for _, l := range grouped { lh := l.Hash() // While packing store paths, the SHA sum of // the uncompressed layer is computed and // written to `tarhash`. // // TODO(tazjin): Refactor this to make the // flow of data cleaner. lw := func(w io.Writer) (string, error) { tarhash, err := packStorePaths(&l, w) if err != nil { return "", err } var pkgs []string for _, p := range l.Contents { pkgs = append(pkgs, layers.PackageFromPath(p)) } log.WithFields(log.Fields{ "layer": lh, "packages": pkgs, "tarhash": tarhash, }).Info("created image layer") return tarhash, err } entry, err := uploadHashLayer(ctx, s, lh, l.MergeRating, lw) if err != nil { return nil, err } entries = append(entries, *entry) } // Symlink layer (built in the first Nix build) needs to be // included here manually: slkey := result.SymlinkLayer.TarHash entry, err := uploadHashLayer(ctx, s, slkey, 0, func(w io.Writer) (string, error) { f, err := os.Open(result.SymlinkLayer.Path) if err != nil { log.WithError(err).WithFields(log.Fields{ "image": image.Name, "tag": image.Tag, "layer": slkey, }).Error("failed to open symlink layer") return "", err } defer f.Close() gz := gzip.NewWriter(w) _, err = io.Copy(gz, f) if err != nil { log.WithError(err).WithFields(log.Fields{ "image": image.Name, "tag": image.Tag, "layer": slkey, }).Error("failed to upload symlink layer") return "", err } return "sha256:" + slkey, gz.Close() }) if err != nil { return nil, err } entries = append(entries, *entry) return entries, nil } // layerWriter is the type for functions that can write a layer to the // multiwriter used for uploading & hashing. // // This type exists to avoid duplication between the handling of // symlink layers and store path layers. type layerWriter func(w io.Writer) (string, error) // byteCounter is a special io.Writer that counts all bytes written to // it and does nothing else. // // This is required because the ad-hoc writing of tarballs leaves no // single place to count the final tarball size otherwise. type byteCounter struct { count int64 } func (b *byteCounter) Write(p []byte) (n int, err error) { b.count += int64(len(p)) return len(p), nil } // Upload a layer tarball to the storage bucket, while hashing it at // the same time. The supplied function is expected to provide the // layer data to the writer. // // The initial upload is performed in a 'staging' folder, as the // SHA256-hash is not yet available when the upload is initiated. // // After a successful upload, the file is moved to its final location // in the bucket and the build cache is populated. // // The return value is the layer's SHA256 hash, which is used in the // image manifest. func uploadHashLayer(ctx context.Context, s *State, key string, mrating uint64, lw layerWriter) (*manifest.Entry, error) { s.UploadMutex.Lock(key) defer s.UploadMutex.Unlock(key) if entry, cached := layerFromCache(ctx, s, key); cached { return entry, nil } path := "staging/" + key var tarhash string sha256sum, size, err := s.Storage.Persist(ctx, path, manifest.LayerType, func(sw io.Writer) (string, int64, error) { // Sets up a "multiwriter" that simultaneously runs both hash // algorithms and uploads to the storage backend. shasum := sha256.New() counter := &byteCounter{} multi := io.MultiWriter(sw, shasum, counter) var err error tarhash, err = lw(multi) sha256sum := fmt.Sprintf("%x", shasum.Sum([]byte{})) return sha256sum, counter.count, err }) if err != nil { log.WithError(err).WithFields(log.Fields{ "layer": key, "backend": s.Storage.Name(), }).Error("failed to create and store layer") return nil, err } // Hashes are now known and the object is in the bucket, what // remains is to move it to the correct location and cache it. err = s.Storage.Move(ctx, "staging/"+key, "layers/"+sha256sum) if err != nil { log.WithError(err).WithField("layer", key). Error("failed to move layer from staging") return nil, err } log.WithFields(log.Fields{ "layer": key, "sha256": sha256sum, "size": size, }).Info("created and persisted layer") entry := manifest.Entry{ Digest: "sha256:" + sha256sum, Size: size, TarHash: tarhash, MergeRating: mrating, } cacheLayer(ctx, s, key, entry) return &entry, nil } func BuildImage(ctx context.Context, s *State, image *Image) (*BuildResult, error) { key := s.Cfg.Pkgs.CacheKey(image.Packages, image.Tag) if key != "" { if m, c := manifestFromCache(ctx, s, key); c { return &BuildResult{ Manifest: m, }, nil } } imageResult, err := prepareImage(s, image) if err != nil { return nil, err } if imageResult.Error != "" { return &BuildResult{ Error: imageResult.Error, Pkgs: imageResult.Pkgs, }, nil } layers, err := prepareLayers(ctx, s, image, imageResult) if err != nil { return nil, err } // If the requested packages include a shell, // set cmd accordingly. cmd := "" for _, pkg := range image.Packages { if pkg == "bashInteractive" { cmd = "bash" } } m, c := manifest.Manifest(image.Arch.imageArch, layers, cmd) lw := func(w io.Writer) (string, error) { r := bytes.NewReader(c.Config) _, err := io.Copy(w, r) return "", err } if _, err = uploadHashLayer(ctx, s, c.SHA256, 0, lw); err != nil { log.WithError(err).WithFields(log.Fields{ "image": image.Name, "tag": image.Tag, }).Error("failed to upload config") return nil, err } if key != "" { go cacheManifest(ctx, s, key, m) } result := BuildResult{ Manifest: m, } return &result, nil }