diff options
-rw-r--r-- | tools/nixery/server/builder/builder.go | 333 | ||||
-rw-r--r-- | tools/nixery/server/config/config.go | 2 | ||||
-rw-r--r-- | tools/nixery/server/layers/grouping.go | 2 |
3 files changed, 250 insertions, 87 deletions
diff --git a/tools/nixery/server/builder/builder.go b/tools/nixery/server/builder/builder.go index a5744a85348a..303d796df6bf 100644 --- a/tools/nixery/server/builder/builder.go +++ b/tools/nixery/server/builder/builder.go @@ -21,20 +21,33 @@ import ( "bufio" "bytes" "context" + "crypto/md5" + "crypto/sha256" "encoding/json" "fmt" "io" "io/ioutil" "log" + "net/http" + "net/url" "os" "os/exec" "sort" "strings" "cloud.google.com/go/storage" - "github.com/google/nixery/config" + "github.com/google/nixery/layers" + "golang.org/x/oauth2/google" ) +// The maximum number of layers in an image is 125. To allow for +// extensibility, the actual number of layers Nixery is "allowed" to +// use up is set at a lower point. +const LayerBudget int = 94 + +// HTTP client to use for direct calls to APIs that are not part of the SDK +var client = &http.Client{} + // Image represents the information necessary for building a container image. // This can be either a list of package names (corresponding to keys in the // nixpkgs set) or a Nix expression that results in a *list* of derivations. @@ -47,6 +60,14 @@ type Image struct { Packages []string } +// TODO(tazjin): docstring +type BuildResult struct { + Error string + Pkgs []string + + Manifest struct{} // TODO(tazjin): OCIv1 manifest +} + // ImageFromName parses an image name into the corresponding structure which can // be used to invoke Nix. // @@ -70,24 +91,21 @@ func ImageFromName(name string, tag string) Image { } } -// BuildResult represents the output of calling the Nix derivation responsible -// for building registry images. -// -// The `layerLocations` field contains the local filesystem paths to each -// individual image layer that will need to be served, while the `manifest` -// field contains the JSON-representation of the manifest that needs to be -// served to the client. -// -// The later field is simply treated as opaque JSON and passed through. -type BuildResult struct { - Error string `json:"error"` - Pkgs []string `json:"pkgs"` - Manifest json.RawMessage `json:"manifest"` - - LayerLocations map[string]struct { - Path string `json:"path"` - Md5 []byte `json:"md5"` - } `json:"layerLocations"` +// ImageResult represents the output of calling the Nix derivation +// responsible for preparing an image. +type ImageResult struct { + // These fields are populated in case of an error + Error string `json:"error"` + Pkgs []string `json:"pkgs"` + + // These fields are populated in case of success + Graph layers.RuntimeGraph `json:"runtimeGraph"` + SymlinkLayer struct { + Size int `json:"size"` + SHA256 string `json:"sha256"` + MD5 string `json:"md5"` + Path string `json:"path"` + } `json:"symlinkLayer"` } // convenienceNames expands convenience package names defined by Nixery which @@ -117,99 +135,244 @@ func logNix(name string, r io.ReadCloser) { } } -// Call out to Nix and request that an image be built. Nix will, upon success, -// return a manifest for the container image. -func BuildImage(ctx *context.Context, cfg *config.Config, cache *LocalCache, image *Image, bucket *storage.BucketHandle) (*BuildResult, error) { - var resultFile string - cached := false +func callNix(program string, name string, args []string) ([]byte, error) { + cmd := exec.Command(program, args...) - key := cfg.Pkgs.CacheKey(image.Packages, image.Tag) - if key != "" { - resultFile, cached = manifestFromCache(ctx, cache, bucket, key) + outpipe, err := cmd.StdoutPipe() + if err != nil { + return nil, err } - if !cached { - packages, err := json.Marshal(image.Packages) - if err != nil { - return nil, err - } + errpipe, err := cmd.StderrPipe() + if err != nil { + return nil, err + } + go logNix(name, errpipe) - srcType, srcArgs := cfg.Pkgs.Render(image.Tag) + stdout, _ := ioutil.ReadAll(outpipe) - args := []string{ - "--timeout", cfg.Timeout, - "--argstr", "name", image.Name, - "--argstr", "packages", string(packages), - "--argstr", "srcType", srcType, - "--argstr", "srcArgs", srcArgs, - } + if err = cmd.Wait(); err != nil { + log.Printf("%s execution error: %s\nstdout: %s\n", program, err, stdout) + return nil, err + } - if cfg.PopUrl != "" { - args = append(args, "--argstr", "popularityUrl", cfg.PopUrl) - } + resultFile := strings.TrimSpace(string(stdout)) + buildOutput, err := ioutil.ReadFile(resultFile) + if err != nil { + return nil, err + } - cmd := exec.Command("nixery-build-image", args...) + return buildOutput, nil +} - outpipe, err := cmd.StdoutPipe() - if err != nil { - return nil, err - } +// Call out to Nix and request metadata for the image to be built. All +// required store paths for the image will be realised, but layers +// will not yet be created from them. +// +// This function is only invoked if the manifest is not found in any +// cache. +func prepareImage(s *State, image *Image) (*ImageResult, error) { + packages, err := json.Marshal(image.Packages) + if err != nil { + return nil, err + } - errpipe, err := cmd.StderrPipe() - if err != nil { - return nil, err - } - go logNix(image.Name, errpipe) + srcType, srcArgs := s.Cfg.Pkgs.Render(image.Tag) - if err = cmd.Start(); err != nil { - log.Println("Error starting nix-build:", err) - return nil, err - } - log.Printf("Started Nix image build for '%s'", image.Name) + args := []string{ + "--timeout", s.Cfg.Timeout, + "--argstr", "packages", string(packages), + "--argstr", "srcType", srcType, + "--argstr", "srcArgs", srcArgs, + } - stdout, _ := ioutil.ReadAll(outpipe) + output, err := callNix("nixery-build-image", image.Name, args) + if err != nil { + log.Printf("failed to call nixery-build-image: %s\n", err) + return nil, err + } + log.Printf("Finished image preparation for '%s' via Nix\n", image.Name) - if err = cmd.Wait(); err != nil { - log.Printf("nix-build execution error: %s\nstdout: %s\n", err, stdout) - return nil, err - } + var result ImageResult + err = json.Unmarshal(output, &result) + if err != nil { + return nil, err + } - log.Println("Finished Nix image build") + return &result, nil +} - resultFile = strings.TrimSpace(string(stdout)) +// Groups layers and checks whether they are present in the cache +// already, otherwise calls out to Nix to assemble layers. +// +// Returns information about all data layers that need to be included +// in the manifest, as well as information about which layers need to +// be uploaded (and from where). +func prepareLayers(ctx *context.Context, s *State, image *Image, graph *layers.RuntimeGraph) (map[string]string, error) { + grouped := layers.Group(graph, &s.Pop, LayerBudget) + + // TODO(tazjin): Introduce caching strategy, for now this will + // build all layers. + srcType, srcArgs := s.Cfg.Pkgs.Render(image.Tag) + args := []string{ + "--argstr", "srcType", srcType, + "--argstr", "srcArgs", srcArgs, + } - if key != "" { - cacheManifest(ctx, cache, bucket, key, resultFile) + var layerInput map[string][]string + for _, l := range grouped { + layerInput[l.Hash()] = l.Contents + + // The derivation responsible for building layers does not + // have the derivations that resulted in the required store + // paths in its context, which means that its sandbox will not + // contain the necessary paths if sandboxing is enabled. + // + // To work around this, all required store paths are added as + // 'extra-sandbox-paths' parameters. + for _, p := range l.Contents { + args = append(args, "--option", "extra-sandbox-paths", p) } } - buildOutput, err := ioutil.ReadFile(resultFile) + j, _ := json.Marshal(layerInput) + args = append(args, "--argstr", "layers", string(j)) + + output, err := callNix("nixery-build-layers", image.Name, args) if err != nil { + log.Printf("failed to call nixery-build-layers: %s\n", err) return nil, err } - // The build output returned by Nix is deserialised to add all - // contained layers to the bucket. Only the manifest itself is - // re-serialised to JSON and returned. - var result BuildResult + result := make(map[string]string) + err = json.Unmarshal(output, &result) + if err != nil { + return nil, err + } + + return result, nil +} + +// renameObject renames an object in the specified Cloud Storage +// bucket. +// +// The Go API for Cloud Storage does not support renaming objects, but +// the HTTP API does. The code below makes the relevant call manually. +func renameObject(ctx context.Context, s *State, old, new string) error { + bucket := s.Cfg.Bucket + + creds, err := google.FindDefaultCredentials(ctx) + if err != nil { + return err + } + + token, err := creds.TokenSource.Token() + if err != nil { + return err + } + + // as per https://cloud.google.com/storage/docs/renaming-copying-moving-objects#rename + url := fmt.Sprintf( + "https://www.googleapis.com/storage/v1/b/%s/o/%s/rewriteTo/b/%s/o/%s", + url.PathEscape(bucket), url.PathEscape(old), + url.PathEscape(bucket), url.PathEscape(new), + ) - err = json.Unmarshal(buildOutput, &result) + req, err := http.NewRequest("POST", url, nil) + req.Header.Add("Authorization", "Bearer "+token.AccessToken) + _, err = client.Do(req) + if err != nil { + return err + } + + // It seems that 'rewriteTo' copies objects instead of + // renaming/moving them, hence a deletion call afterwards is + // required. + if err = s.Bucket.Object(old).Delete(ctx); err != nil { + log.Printf("failed to delete renamed object '%s': %s\n", old, err) + // this error should not break renaming and is not returned + } + + return nil +} + +// Upload a to the storage bucket, while hashing it at the same time. +// +// The initial upload is performed in a 'staging' folder, as the +// SHA256-hash is not yet available when the upload is initiated. +// +// After a successful upload, the file is moved to its final location +// in the bucket and the build cache is populated. +// +// The return value is the layer's SHA256 hash, which is used in the +// image manifest. +func uploadHashLayer(ctx context.Context, s *State, key, path string) (string, error) { + staging := s.Bucket.Object("staging/" + key) + + // Set up a writer that simultaneously runs both hash + // algorithms and uploads to the bucket + sw := staging.NewWriter(ctx) + shasum := sha256.New() + md5sum := md5.New() + multi := io.MultiWriter(sw, shasum, md5sum) + + f, err := os.Open(path) + if err != nil { + log.Printf("failed to open layer at '%s' for reading: %s\n", path, err) + return "", err + } + defer f.Close() + + size, err := io.Copy(multi, f) + if err != nil { + log.Printf("failed to upload layer '%s' to staging: %s\n", key, err) + return "", err + } + + if err = sw.Close(); err != nil { + log.Printf("failed to upload layer '%s' to staging: %s\n", key, err) + return "", err + } + + build := Build{ + SHA256: fmt.Sprintf("%x", shasum.Sum([]byte{})), + MD5: fmt.Sprintf("%x", md5sum.Sum([]byte{})), + } + + // Hashes are now known and the object is in the bucket, what + // remains is to move it to the correct location and cache it. + err = renameObject(ctx, s, "staging/"+key, "layers/"+build.SHA256) + if err != nil { + log.Printf("failed to move layer '%s' from staging: %s\n", key, err) + return "", err + } + + cacheBuild(ctx, &s.Cache, s.Bucket, key, build) + + log.Printf("Uploaded layer sha256:%s (%v bytes written)", build.SHA256, size) + + return build.SHA256, nil +} + +func BuildImage(ctx *context.Context, s *State, image *Image) (*BuildResult, error) { + imageResult, err := prepareImage(s, image) if err != nil { return nil, err } - for layer, meta := range result.LayerLocations { - if !cache.hasSeenLayer(layer) { - err = uploadLayer(ctx, bucket, layer, meta.Path, meta.Md5) - if err != nil { - return nil, err - } + if imageResult.Error != "" { + return &BuildResult{ + Error: imageResult.Error, + Pkgs: imageResult.Pkgs, + }, nil + } - cache.sawLayer(layer) - } + _, err = prepareLayers(ctx, s, image, &imageResult.Graph) + if err != nil { + return nil, err } - return &result, nil + return nil, nil } // uploadLayer uploads a single layer to Cloud Storage bucket. Before writing @@ -217,7 +380,7 @@ func BuildImage(ctx *context.Context, cfg *config.Config, cache *LocalCache, ima // // If the file does exist, its MD5 hash is verified to ensure that the stored // file is not - for example - a fragment of a previous, incomplete upload. -func uploadLayer(ctx *context.Context, bucket *storage.BucketHandle, layer string, path string, md5 []byte) error { +func uploadLayer(ctx context.Context, bucket *storage.BucketHandle, layer string, path string, md5 []byte) error { layerKey := fmt.Sprintf("layers/%s", layer) obj := bucket.Object(layerKey) @@ -226,12 +389,12 @@ func uploadLayer(ctx *context.Context, bucket *storage.BucketHandle, layer strin // // If it does and the MD5 checksum matches the expected one, the layer // upload can be skipped. - attrs, err := obj.Attrs(*ctx) + attrs, err := obj.Attrs(ctx) if err == nil && bytes.Equal(attrs.MD5, md5) { log.Printf("Layer sha256:%s already exists in bucket, skipping upload", layer) } else { - writer := obj.NewWriter(*ctx) + writer := obj.NewWriter(ctx) file, err := os.Open(path) if err != nil { diff --git a/tools/nixery/server/config/config.go b/tools/nixery/server/config/config.go index ac8820f23116..30f727db1112 100644 --- a/tools/nixery/server/config/config.go +++ b/tools/nixery/server/config/config.go @@ -60,7 +60,7 @@ func getConfig(key, desc, def string) string { return value } -// config holds the Nixery configuration options. +// Config holds the Nixery configuration options. type Config struct { Bucket string // GCS bucket to cache & serve layers Signing *storage.SignedURLOptions // Signing options to use for GCS URLs diff --git a/tools/nixery/server/layers/grouping.go b/tools/nixery/server/layers/grouping.go index f8259ab989ff..07a9e0e230a5 100644 --- a/tools/nixery/server/layers/grouping.go +++ b/tools/nixery/server/layers/grouping.go @@ -328,7 +328,7 @@ func dominate(budget int, graph *simple.DirectedGraph) []Layer { // GroupLayers applies the algorithm described above the its input and returns a // list of layers, each consisting of a list of Nix store paths that it should // contain. -func GroupLayers(refs *RuntimeGraph, pop *Popularity, budget int) []Layer { +func Group(refs *RuntimeGraph, pop *Popularity, budget int) []Layer { graph := buildGraph(refs, pop) return dominate(budget, graph) } |