// Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy of // the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. // Package builder implements the code required to build images via Nix. Image // build data is cached for up to 24 hours to avoid duplicated calls to Nix // (which are costly even if no building is performed). package builder import ( "bufio" "bytes" "context" "crypto/md5" "crypto/sha256" "encoding/json" "fmt" "io" "io/ioutil" "log" "net/http" "net/url" "os" "os/exec" "sort" "strings" "cloud.google.com/go/storage" "github.com/google/nixery/layers" "golang.org/x/oauth2/google" ) // The maximum number of layers in an image is 125. To allow for // extensibility, the actual number of layers Nixery is "allowed" to // use up is set at a lower point. const LayerBudget int = 94 // HTTP client to use for direct calls to APIs that are not part of the SDK var client = &http.Client{} // Image represents the information necessary for building a container image. // This can be either a list of package names (corresponding to keys in the // nixpkgs set) or a Nix expression that results in a *list* of derivations. type Image struct { Name string Tag string // Names of packages to include in the image. These must correspond // directly to top-level names of Nix packages in the nixpkgs tree. Packages []string } // TODO(tazjin): docstring type BuildResult struct { Error string Pkgs []string Manifest struct{} // TODO(tazjin): OCIv1 manifest } // ImageFromName parses an image name into the corresponding structure which can // be used to invoke Nix. // // It will expand convenience names under the hood (see the `convenienceNames` // function below). // // Once assembled the image structure uses a sorted representation of // the name. This is to avoid unnecessarily cache-busting images if // only the order of requested packages has changed. func ImageFromName(name string, tag string) Image { pkgs := strings.Split(name, "/") expanded := convenienceNames(pkgs) sort.Strings(pkgs) sort.Strings(expanded) return Image{ Name: strings.Join(pkgs, "/"), Tag: tag, Packages: expanded, } } // ImageResult represents the output of calling the Nix derivation // responsible for preparing an image. type ImageResult struct { // These fields are populated in case of an error Error string `json:"error"` Pkgs []string `json:"pkgs"` // These fields are populated in case of success Graph layers.RuntimeGraph `json:"runtimeGraph"` SymlinkLayer struct { Size int `json:"size"` SHA256 string `json:"sha256"` MD5 string `json:"md5"` Path string `json:"path"` } `json:"symlinkLayer"` } // convenienceNames expands convenience package names defined by Nixery which // let users include commonly required sets of tools in a container quickly. // // Convenience names must be specified as the first package in an image. // // Currently defined convenience names are: // // * `shell`: Includes bash, coreutils and other common command-line tools func convenienceNames(packages []string) []string { shellPackages := []string{"bashInteractive", "cacert", "coreutils", "iana-etc", "moreutils", "nano"} if packages[0] == "shell" { return append(packages[1:], shellPackages...) } return packages } // logNix logs each output line from Nix. It runs in a goroutine per // output channel that should be live-logged. func logNix(name string, r io.ReadCloser) { scanner := bufio.NewScanner(r) for scanner.Scan() { log.Printf("\x1b[31m[nix - %s]\x1b[39m %s\n", name, scanner.Text()) } } func callNix(program string, name string, args []string) ([]byte, error) { cmd := exec.Command(program, args...) outpipe, err := cmd.StdoutPipe() if err != nil { return nil, err } errpipe, err := cmd.StderrPipe() if err != nil { return nil, err } go logNix(name, errpipe) stdout, _ := ioutil.ReadAll(outpipe) if err = cmd.Wait(); err != nil { log.Printf("%s execution error: %s\nstdout: %s\n", program, err, stdout) return nil, err } resultFile := strings.TrimSpace(string(stdout)) buildOutput, err := ioutil.ReadFile(resultFile) if err != nil { return nil, err } return buildOutput, nil } // Call out to Nix and request metadata for the image to be built. All // required store paths for the image will be realised, but layers // will not yet be created from them. // // This function is only invoked if the manifest is not found in any // cache. func prepareImage(s *State, image *Image) (*ImageResult, error) { packages, err := json.Marshal(image.Packages) if err != nil { return nil, err } srcType, srcArgs := s.Cfg.Pkgs.Render(image.Tag) args := []string{ "--timeout", s.Cfg.Timeout, "--argstr", "packages", string(packages), "--argstr", "srcType", srcType, "--argstr", "srcArgs", srcArgs, } output, err := callNix("nixery-build-image", image.Name, args) if err != nil { log.Printf("failed to call nixery-build-image: %s\n", err) return nil, err } log.Printf("Finished image preparation for '%s' via Nix\n", image.Name) var result ImageResult err = json.Unmarshal(output, &result) if err != nil { return nil, err } return &result, nil } // Groups layers and checks whether they are present in the cache // already, otherwise calls out to Nix to assemble layers. // // Returns information about all data layers that need to be included // in the manifest, as well as information about which layers need to // be uploaded (and from where). func prepareLayers(ctx *context.Context, s *State, image *Image, graph *layers.RuntimeGraph) (map[string]string, error) { grouped := layers.Group(graph, &s.Pop, LayerBudget) // TODO(tazjin): Introduce caching strategy, for now this will // build all layers. srcType, srcArgs := s.Cfg.Pkgs.Render(image.Tag) args := []string{ "--argstr", "srcType", srcType, "--argstr", "srcArgs", srcArgs, } var layerInput map[string][]string for _, l := range grouped { layerInput[l.Hash()] = l.Contents // The derivation responsible for building layers does not // have the derivations that resulted in the required store // paths in its context, which means that its sandbox will not // contain the necessary paths if sandboxing is enabled. // // To work around this, all required store paths are added as // 'extra-sandbox-paths' parameters. for _, p := range l.Contents { args = append(args, "--option", "extra-sandbox-paths", p) } } j, _ := json.Marshal(layerInput) args = append(args, "--argstr", "layers", string(j)) output, err := callNix("nixery-build-layers", image.Name, args) if err != nil { log.Printf("failed to call nixery-build-layers: %s\n", err) return nil, err } result := make(map[string]string) err = json.Unmarshal(output, &result) if err != nil { return nil, err } return result, nil } // renameObject renames an object in the specified Cloud Storage // bucket. // // The Go API for Cloud Storage does not support renaming objects, but // the HTTP API does. The code below makes the relevant call manually. func renameObject(ctx context.Context, s *State, old, new string) error { bucket := s.Cfg.Bucket creds, err := google.FindDefaultCredentials(ctx) if err != nil { return err } token, err := creds.TokenSource.Token() if err != nil { return err } // as per https://cloud.google.com/storage/docs/renaming-copying-moving-objects#rename url := fmt.Sprintf( "https://www.googleapis.com/storage/v1/b/%s/o/%s/rewriteTo/b/%s/o/%s", url.PathEscape(bucket), url.PathEscape(old), url.PathEscape(bucket), url.PathEscape(new), ) req, err := http.NewRequest("POST", url, nil) req.Header.Add("Authorization", "Bearer "+token.AccessToken) _, err = client.Do(req) if err != nil { return err } // It seems that 'rewriteTo' copies objects instead of // renaming/moving them, hence a deletion call afterwards is // required. if err = s.Bucket.Object(old).Delete(ctx); err != nil { log.Printf("failed to delete renamed object '%s': %s\n", old, err) // this error should not break renaming and is not returned } return nil } // Upload a to the storage bucket, while hashing it at the same time. // // The initial upload is performed in a 'staging' folder, as the // SHA256-hash is not yet available when the upload is initiated. // // After a successful upload, the file is moved to its final location // in the bucket and the build cache is populated. // // The return value is the layer's SHA256 hash, which is used in the // image manifest. func uploadHashLayer(ctx context.Context, s *State, key, path string) (string, error) { staging := s.Bucket.Object("staging/" + key) // Set up a writer that simultaneously runs both hash // algorithms and uploads to the bucket sw := staging.NewWriter(ctx) shasum := sha256.New() md5sum := md5.New() multi := io.MultiWriter(sw, shasum, md5sum) f, err := os.Open(path) if err != nil { log.Printf("failed to open layer at '%s' for reading: %s\n", path, err) return "", err } defer f.Close() size, err := io.Copy(multi, f) if err != nil { log.Printf("failed to upload layer '%s' to staging: %s\n", key, err) return "", err } if err = sw.Close(); err != nil { log.Printf("failed to upload layer '%s' to staging: %s\n", key, err) return "", err } build := Build{ SHA256: fmt.Sprintf("%x", shasum.Sum([]byte{})), MD5: fmt.Sprintf("%x", md5sum.Sum([]byte{})), } // Hashes are now known and the object is in the bucket, what // remains is to move it to the correct location and cache it. err = renameObject(ctx, s, "staging/"+key, "layers/"+build.SHA256) if err != nil { log.Printf("failed to move layer '%s' from staging: %s\n", key, err) return "", err } cacheBuild(ctx, &s.Cache, s.Bucket, key, build) log.Printf("Uploaded layer sha256:%s (%v bytes written)", build.SHA256, size) return build.SHA256, nil } func BuildImage(ctx *context.Context, s *State, image *Image) (*BuildResult, error) { imageResult, err := prepareImage(s, image) if err != nil { return nil, err } if imageResult.Error != "" { return &BuildResult{ Error: imageResult.Error, Pkgs: imageResult.Pkgs, }, nil } _, err = prepareLayers(ctx, s, image, &imageResult.Graph) if err != nil { return nil, err } return nil, nil } // uploadLayer uploads a single layer to Cloud Storage bucket. Before writing // any data the bucket is probed to see if the file already exists. // // If the file does exist, its MD5 hash is verified to ensure that the stored // file is not - for example - a fragment of a previous, incomplete upload. func uploadLayer(ctx context.Context, bucket *storage.BucketHandle, layer string, path string, md5 []byte) error { layerKey := fmt.Sprintf("layers/%s", layer) obj := bucket.Object(layerKey) // Before uploading a layer to the bucket, probe whether it already // exists. // // If it does and the MD5 checksum matches the expected one, the layer // upload can be skipped. attrs, err := obj.Attrs(ctx) if err == nil && bytes.Equal(attrs.MD5, md5) { log.Printf("Layer sha256:%s already exists in bucket, skipping upload", layer) } else { writer := obj.NewWriter(ctx) file, err := os.Open(path) if err != nil { return fmt.Errorf("failed to open layer %s from path %s: %v", layer, path, err) } size, err := io.Copy(writer, file) if err != nil { return fmt.Errorf("failed to write layer %s to Cloud Storage: %v", layer, err) } if err = writer.Close(); err != nil { return fmt.Errorf("failed to write layer %s to Cloud Storage: %v", layer, err) } log.Printf("Uploaded layer sha256:%s (%v bytes written)\n", layer, size) } return nil }