From 53906024ff0612b6946cff4122dc28e85a414b6b Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Thu, 3 Oct 2019 12:11:46 +0100 Subject: refactor: Remove remaining MD5-hash mentions and computations --- tools/nixery/build-image/build-image.nix | 5 ++--- tools/nixery/docs/src/caching.md | 9 ++++----- tools/nixery/docs/src/under-the-hood.md | 3 +-- tools/nixery/server/builder/builder.go | 24 +++++++++--------------- tools/nixery/server/manifest/manifest.go | 7 ++----- 5 files changed, 18 insertions(+), 30 deletions(-) diff --git a/tools/nixery/build-image/build-image.nix b/tools/nixery/build-image/build-image.nix index 33500dbb9e80..70049885ab1c 100644 --- a/tools/nixery/build-image/build-image.nix +++ b/tools/nixery/build-image/build-image.nix @@ -137,11 +137,10 @@ let buildInputs = with pkgs; [ coreutils jq openssl ]; }'' layerSha256=$(sha256sum ${symlinkLayer} | cut -d ' ' -f1) - layerMd5=$(openssl dgst -md5 -binary ${symlinkLayer} | openssl enc -base64) layerSize=$(stat --printf '%s' ${symlinkLayer}) - jq -n -c --arg sha256 $layerSha256 --arg md5 $layerMd5 --arg size $layerSize --arg path ${symlinkLayer} \ - '{ size: ($size | tonumber), sha256: $sha256, md5: $md5, path: $path }' >> $out + jq -n -c --arg sha256 $layerSha256 --arg size $layerSize --arg path ${symlinkLayer} \ + '{ size: ($size | tonumber), sha256: $sha256, path: $path }' >> $out '')); # Final output structure returned to Nixery if the build succeeded diff --git a/tools/nixery/docs/src/caching.md b/tools/nixery/docs/src/caching.md index 175fe04d7084..b07d9e22f046 100644 --- a/tools/nixery/docs/src/caching.md +++ b/tools/nixery/docs/src/caching.md @@ -46,9 +46,8 @@ They are stored content-addressably at `$BUCKET/layers/$SHA256HASH` and layer requests sent to Nixery will redirect directly to this storage location. The effect of this cache is that Nixery does not need to upload identical layers -repeatedly. When Nixery notices that a layer already exists in GCS, it will use -the object metadata to compare its MD5-hash with the locally computed one and -skip uploading. +repeatedly. When Nixery notices that a layer already exists in GCS it will skip +uploading this layer. Removing layers from the cache is *potentially problematic* if there are cached manifests or layer builds referencing those layers. @@ -61,8 +60,8 @@ reference these layers. Layer builds are cached at `$BUCKET/builds/$HASH`, where `$HASH` is a SHA1 of the Nix store paths included in the layer. -The content of the cached entries is a JSON-object that contains the MD5 and -SHA256 hashes of the built layer. +The content of the cached entries is a JSON-object that contains the SHA256 +hashes and sizes of the built layer. The effect of this cache is that different instances of Nixery will not build, hash and upload layers that have identical contents across different instances. diff --git a/tools/nixery/docs/src/under-the-hood.md b/tools/nixery/docs/src/under-the-hood.md index 6b5e5e9bbf21..b58a21d0d4ec 100644 --- a/tools/nixery/docs/src/under-the-hood.md +++ b/tools/nixery/docs/src/under-the-hood.md @@ -67,8 +67,7 @@ just ... hang, for a moment. Nixery inspects the returned manifest and uploads each layer to the configured [Google Cloud Storage][gcs] bucket. To avoid unnecessary uploading, it will -first check whether layers are already present in the bucket and - just to be -safe - compare their MD5-hashes against what was built. +check whether layers are already present in the bucket. ## 4. The image manifest is sent back diff --git a/tools/nixery/server/builder/builder.go b/tools/nixery/server/builder/builder.go index f3342f9918f8..64cfed14399b 100644 --- a/tools/nixery/server/builder/builder.go +++ b/tools/nixery/server/builder/builder.go @@ -21,7 +21,6 @@ import ( "bufio" "bytes" "context" - "crypto/md5" "crypto/sha256" "encoding/json" "fmt" @@ -108,7 +107,6 @@ type ImageResult struct { SymlinkLayer struct { Size int `json:"size"` SHA256 string `json:"sha256"` - MD5 string `json:"md5"` Path string `json:"path"` } `json:"symlinkLayer"` } @@ -328,8 +326,7 @@ func uploadHashLayer(ctx context.Context, s *State, key string, data io.Reader) // algorithms and uploads to the bucket sw := staging.NewWriter(ctx) shasum := sha256.New() - md5sum := md5.New() - multi := io.MultiWriter(sw, shasum, md5sum) + multi := io.MultiWriter(sw, shasum) size, err := io.Copy(multi, data) if err != nil { @@ -342,27 +339,24 @@ func uploadHashLayer(ctx context.Context, s *State, key string, data io.Reader) return nil, err } - build := Build{ - SHA256: fmt.Sprintf("%x", shasum.Sum([]byte{})), - MD5: fmt.Sprintf("%x", md5sum.Sum([]byte{})), - } + sha256sum := fmt.Sprintf("%x", shasum.Sum([]byte{})) // Hashes are now known and the object is in the bucket, what // remains is to move it to the correct location and cache it. - err = renameObject(ctx, s, "staging/"+key, "layers/"+build.SHA256) + err = renameObject(ctx, s, "staging/"+key, "layers/"+sha256sum) if err != nil { log.Printf("failed to move layer '%s' from staging: %s\n", key, err) return nil, err } - cacheBuild(ctx, s, key, build) - - log.Printf("Uploaded layer sha256:%s (%v bytes written)", build.SHA256, size) + log.Printf("Uploaded layer sha256:%s (%v bytes written)", sha256sum, size) - return &manifest.Entry{ - Digest: "sha256:" + build.SHA256, + entry := manifest.Entry{ + Digest: "sha256:" + sha256sum, Size: size, - }, nil + } + + return &entry, nil } func BuildImage(ctx context.Context, s *State, image *Image) (*BuildResult, error) { diff --git a/tools/nixery/server/manifest/manifest.go b/tools/nixery/server/manifest/manifest.go index 61d280a7fbab..f777e3f585df 100644 --- a/tools/nixery/server/manifest/manifest.go +++ b/tools/nixery/server/manifest/manifest.go @@ -3,7 +3,6 @@ package manifest import ( - "crypto/md5" "crypto/sha256" "encoding/json" "fmt" @@ -52,12 +51,11 @@ type imageConfig struct { } // ConfigLayer represents the configuration layer to be included in -// the manifest, containing its JSON-serialised content and the SHA256 -// & MD5 hashes of its input. +// the manifest, containing its JSON-serialised content and SHA256 +// hash. type ConfigLayer struct { Config []byte SHA256 string - MD5 string } // imageConfig creates an image configuration with the values set to @@ -78,7 +76,6 @@ func configLayer(hashes []string) ConfigLayer { return ConfigLayer{ Config: j, SHA256: fmt.Sprintf("%x", sha256.Sum256(j)), - MD5: fmt.Sprintf("%x", md5.Sum(j)), } } -- cgit 1.4.1