about summary refs log tree commit diff
path: root/tools/nixery/builder/builder.go
blob: 7f0bd7fffdb96aa5984cac63b4b24bc4c97c84a7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
// Copyright 2022 The TVL Contributors
// SPDX-License-Identifier: Apache-2.0

// Package builder implements the logic for assembling container
// images. It shells out to Nix to retrieve all required Nix-packages
// and assemble the symlink layer and then creates the required
// tarballs in-process.
package builder

import (
	"bufio"
	"bytes"
	"compress/gzip"
	"context"
	"crypto/sha256"
	"encoding/json"
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"os/exec"
	"sort"
	"strings"

	"github.com/google/nixery/config"
	"github.com/google/nixery/layers"
	"github.com/google/nixery/manifest"
	"github.com/google/nixery/storage"
	"github.com/im7mortal/kmutex"
	log "github.com/sirupsen/logrus"
)

// The maximum number of layers in an image is 125. To allow for
// extensibility, the actual number of layers Nixery is "allowed" to
// use up is set at a lower point.
const LayerBudget int = 94

// State holds the runtime state that is carried around in Nixery and
// passed to builder functions.
type State struct {
	Storage     storage.Backend
	Cache       *LocalCache
	Cfg         config.Config
	Pop         layers.Popularity
	UploadMutex *kmutex.Kmutex
}

// Architecture represents the possible CPU architectures for which
// container images can be built.
//
// The default architecture is amd64, but support for ARM platforms is
// available within nixpkgs and can be toggled via meta-packages.
type Architecture struct {
	// Name of the system tuple to pass to Nix
	nixSystem string

	// Name of the architecture as used in the OCI manifests
	imageArch string
}

var amd64 = Architecture{"x86_64-linux", "amd64"}
var arm64 = Architecture{"aarch64-linux", "arm64"}

// Image represents the information necessary for building a container image.
// This can be either a list of package names (corresponding to keys in the
// nixpkgs set) or a Nix expression that results in a *list* of derivations.
type Image struct {
	Name string
	Tag  string

	// Names of packages to include in the image. These must correspond
	// directly to top-level names of Nix packages in the nixpkgs tree.
	Packages []string

	// Architecture for which to build the image. Nixery defaults
	// this to amd64 if not specified via meta-packages.
	Arch *Architecture
}

// BuildResult represents the data returned from the server to the
// HTTP handlers. Error information is propagated straight from Nix
// for errors inside of the build that should be fed back to the
// client (such as missing packages).
type BuildResult struct {
	Error    string          `json:"error"`
	Pkgs     []string        `json:"pkgs"`
	Manifest json.RawMessage `json:"manifest"`
}

// ImageFromName parses an image name into the corresponding structure which can
// be used to invoke Nix.
//
// It will expand convenience names under the hood (see the `convenienceNames`
// function below) and append packages that are always included (cacert, iana-etc).
//
// Once assembled the image structure uses a sorted representation of
// the name. This is to avoid unnecessarily cache-busting images if
// only the order of requested packages has changed.
func ImageFromName(name string, tag string) Image {
	pkgs := strings.Split(name, "/")
	arch, expanded := metaPackages(pkgs)
	expanded = append(expanded, "cacert", "iana-etc")

	sort.Strings(pkgs)
	sort.Strings(expanded)

	return Image{
		Name:     strings.Join(pkgs, "/"),
		Tag:      tag,
		Packages: expanded,
		Arch:     arch,
	}
}

// ImageResult represents the output of calling the Nix derivation
// responsible for preparing an image.
type ImageResult struct {
	// These fields are populated in case of an error
	Error string   `json:"error"`
	Pkgs  []string `json:"pkgs"`

	// These fields are populated in case of success
	Graph        layers.RuntimeGraph `json:"runtimeGraph"`
	SymlinkLayer struct {
		Size    int    `json:"size"`
		TarHash string `json:"tarHash"`
		Path    string `json:"path"`
	} `json:"symlinkLayer"`
}

// metaPackages expands package names defined by Nixery which either
// include sets of packages or trigger certain image-building
// behaviour.
//
// Meta-packages must be specified as the first packages in an image
// name.
//
// Currently defined meta-packages are:
//
// * `shell`: Includes bash, coreutils and other common command-line tools
// * `arm64`: Causes Nixery to build images for the ARM64 architecture
func metaPackages(packages []string) (*Architecture, []string) {
	arch := &amd64

	var metapkgs []string
	lastMeta := 0
	for idx, p := range packages {
		if p == "shell" || p == "arm64" {
			metapkgs = append(metapkgs, p)
			lastMeta = idx + 1
		} else {
			break
		}
	}

	// Chop off the meta-packages from the front of the package
	// list
	packages = packages[lastMeta:]

	for _, p := range metapkgs {
		switch p {
		case "shell":
			packages = append(packages, "bashInteractive", "coreutils", "moreutils", "nano")
		case "arm64":
			arch = &arm64
		}
	}

	return arch, packages
}

// logNix logs each output line from Nix. It runs in a goroutine per
// output channel that should be live-logged.
func logNix(image, cmd string, r io.ReadCloser) {
	scanner := bufio.NewScanner(r)
	for scanner.Scan() {
		log.WithFields(log.Fields{
			"image": image,
			"cmd":   cmd,
		}).Info("[nix] " + scanner.Text())
	}
}

func callNix(program, image string, args []string) ([]byte, error) {
	cmd := exec.Command(program, args...)

	outpipe, err := cmd.StdoutPipe()
	if err != nil {
		return nil, err
	}

	errpipe, err := cmd.StderrPipe()
	if err != nil {
		return nil, err
	}
	go logNix(image, program, errpipe)

	if err = cmd.Start(); err != nil {
		log.WithError(err).WithFields(log.Fields{
			"image": image,
			"cmd":   program,
		}).Error("error invoking Nix")

		return nil, err
	}

	log.WithFields(log.Fields{
		"cmd":   program,
		"image": image,
	}).Info("invoked Nix build")

	stdout, _ := ioutil.ReadAll(outpipe)

	if err = cmd.Wait(); err != nil {
		log.WithError(err).WithFields(log.Fields{
			"image":  image,
			"cmd":    program,
			"stdout": stdout,
		}).Info("failed to invoke Nix")

		return nil, err
	}

	resultFile := strings.TrimSpace(string(stdout))
	buildOutput, err := ioutil.ReadFile(resultFile)
	if err != nil {
		log.WithError(err).WithFields(log.Fields{
			"image": image,
			"file":  resultFile,
		}).Info("failed to read Nix result file")

		return nil, err
	}

	return buildOutput, nil
}

// Call out to Nix and request metadata for the image to be built. All
// required store paths for the image will be realised, but layers
// will not yet be created from them.
//
// This function is only invoked if the manifest is not found in any
// cache.
func prepareImage(s *State, image *Image) (*ImageResult, error) {
	packages, err := json.Marshal(image.Packages)
	if err != nil {
		return nil, err
	}

	srcType, srcArgs := s.Cfg.Pkgs.Render(image.Tag)

	args := []string{
		"--timeout", s.Cfg.Timeout,
		"--argstr", "packages", string(packages),
		"--argstr", "srcType", srcType,
		"--argstr", "srcArgs", srcArgs,
		"--argstr", "system", image.Arch.nixSystem,
	}

	output, err := callNix("nixery-prepare-image", image.Name, args)
	if err != nil {
		// granular error logging is performed in callNix already
		return nil, err
	}

	log.WithFields(log.Fields{
		"image": image.Name,
		"tag":   image.Tag,
	}).Info("finished image preparation via Nix")

	var result ImageResult
	err = json.Unmarshal(output, &result)
	if err != nil {
		return nil, err
	}

	return &result, nil
}

// Groups layers and checks whether they are present in the cache
// already, otherwise calls out to Nix to assemble layers.
//
// Newly built layers are uploaded to the bucket. Cache entries are
// added only after successful uploads, which guarantees that entries
// retrieved from the cache are present in the bucket.
func prepareLayers(ctx context.Context, s *State, image *Image, result *ImageResult) ([]manifest.Entry, error) {
	grouped := layers.GroupLayers(&result.Graph, &s.Pop, LayerBudget)

	var entries []manifest.Entry

	// Splits the layers into those which are already present in
	// the cache, and those that are missing.
	//
	// Missing layers are built and uploaded to the storage
	// bucket.
	for _, l := range grouped {
		lh := l.Hash()

		// While packing store paths, the SHA sum of
		// the uncompressed layer is computed and
		// written to `tarhash`.
		//
		// TODO(tazjin): Refactor this to make the
		// flow of data cleaner.
		lw := func(w io.Writer) (string, error) {
			tarhash, err := packStorePaths(&l, w)
			if err != nil {
				return "", err
			}

			var pkgs []string
			for _, p := range l.Contents {
				pkgs = append(pkgs, layers.PackageFromPath(p))
			}

			log.WithFields(log.Fields{
				"layer":    lh,
				"packages": pkgs,
				"tarhash":  tarhash,
			}).Info("created image layer")

			return tarhash, err
		}

		entry, err := uploadHashLayer(ctx, s, lh, l.MergeRating, lw)
		if err != nil {
			return nil, err
		}

		entries = append(entries, *entry)
	}

	// Symlink layer (built in the first Nix build) needs to be
	// included here manually:
	slkey := result.SymlinkLayer.TarHash
	entry, err := uploadHashLayer(ctx, s, slkey, 0, func(w io.Writer) (string, error) {
		f, err := os.Open(result.SymlinkLayer.Path)
		if err != nil {
			log.WithError(err).WithFields(log.Fields{
				"image": image.Name,
				"tag":   image.Tag,
				"layer": slkey,
			}).Error("failed to open symlink layer")

			return "", err
		}
		defer f.Close()

		gz := gzip.NewWriter(w)
		_, err = io.Copy(gz, f)
		if err != nil {
			log.WithError(err).WithFields(log.Fields{
				"image": image.Name,
				"tag":   image.Tag,
				"layer": slkey,
			}).Error("failed to upload symlink layer")

			return "", err
		}

		return "sha256:" + slkey, gz.Close()
	})

	if err != nil {
		return nil, err
	}

	entries = append(entries, *entry)

	return entries, nil
}

// layerWriter is the type for functions that can write a layer to the
// multiwriter used for uploading & hashing.
//
// This type exists to avoid duplication between the handling of
// symlink layers and store path layers.
type layerWriter func(w io.Writer) (string, error)

// byteCounter is a special io.Writer that counts all bytes written to
// it and does nothing else.
//
// This is required because the ad-hoc writing of tarballs leaves no
// single place to count the final tarball size otherwise.
type byteCounter struct {
	count int64
}

func (b *byteCounter) Write(p []byte) (n int, err error) {
	b.count += int64(len(p))
	return len(p), nil
}

// Upload a layer tarball to the storage bucket, while hashing it at
// the same time. The supplied function is expected to provide the
// layer data to the writer.
//
// The initial upload is performed in a 'staging' folder, as the
// SHA256-hash is not yet available when the upload is initiated.
//
// After a successful upload, the file is moved to its final location
// in the bucket and the build cache is populated.
//
// The return value is the layer's SHA256 hash, which is used in the
// image manifest.
func uploadHashLayer(ctx context.Context, s *State, key string, mrating uint64, lw layerWriter) (*manifest.Entry, error) {
	s.UploadMutex.Lock(key)
	defer s.UploadMutex.Unlock(key)

	if entry, cached := layerFromCache(ctx, s, key); cached {
		return entry, nil
	}

	path := "staging/" + key
	var tarhash string
	sha256sum, size, err := s.Storage.Persist(ctx, path, manifest.LayerType, func(sw io.Writer) (string, int64, error) {
		// Sets up a "multiwriter" that simultaneously runs both hash
		// algorithms and uploads to the storage backend.
		shasum := sha256.New()
		counter := &byteCounter{}
		multi := io.MultiWriter(sw, shasum, counter)

		var err error
		tarhash, err = lw(multi)
		sha256sum := fmt.Sprintf("%x", shasum.Sum([]byte{}))

		return sha256sum, counter.count, err
	})

	if err != nil {
		log.WithError(err).WithFields(log.Fields{
			"layer":   key,
			"backend": s.Storage.Name(),
		}).Error("failed to create and store layer")

		return nil, err
	}

	// Hashes are now known and the object is in the bucket, what
	// remains is to move it to the correct location and cache it.
	err = s.Storage.Move(ctx, "staging/"+key, "layers/"+sha256sum)
	if err != nil {
		log.WithError(err).WithField("layer", key).
			Error("failed to move layer from staging")

		return nil, err
	}

	log.WithFields(log.Fields{
		"layer":  key,
		"sha256": sha256sum,
		"size":   size,
	}).Info("created and persisted layer")

	entry := manifest.Entry{
		Digest:      "sha256:" + sha256sum,
		Size:        size,
		TarHash:     tarhash,
		MergeRating: mrating,
	}

	cacheLayer(ctx, s, key, entry)

	return &entry, nil
}

func BuildImage(ctx context.Context, s *State, image *Image) (*BuildResult, error) {
	key := s.Cfg.Pkgs.CacheKey(image.Packages, image.Tag)
	if key != "" {
		if m, c := manifestFromCache(ctx, s, key); c {
			return &BuildResult{
				Manifest: m,
			}, nil
		}
	}

	imageResult, err := prepareImage(s, image)
	if err != nil {
		return nil, err
	}

	if imageResult.Error != "" {
		return &BuildResult{
			Error: imageResult.Error,
			Pkgs:  imageResult.Pkgs,
		}, nil
	}

	layers, err := prepareLayers(ctx, s, image, imageResult)
	if err != nil {
		return nil, err
	}

	// If the requested packages include a shell,
	// set cmd accordingly.
	cmd := ""
	for _, pkg := range image.Packages {
		if pkg == "bashInteractive" {
			cmd = "bash"
		}
	}
	m, c := manifest.Manifest(image.Arch.imageArch, layers, cmd)

	lw := func(w io.Writer) (string, error) {
		r := bytes.NewReader(c.Config)
		_, err := io.Copy(w, r)
		return "", err
	}

	if _, err = uploadHashLayer(ctx, s, c.SHA256, 0, lw); err != nil {
		log.WithError(err).WithFields(log.Fields{
			"image": image.Name,
			"tag":   image.Tag,
		}).Error("failed to upload config")

		return nil, err
	}

	if key != "" {
		go cacheManifest(ctx, s, key, m)
	}

	result := BuildResult{
		Manifest: m,
	}
	return &result, nil
}