diff options
Diffstat (limited to 'third_party/nix/src/libstore')
63 files changed, 18446 insertions, 0 deletions
diff --git a/third_party/nix/src/libstore/binary-cache-store.cc b/third_party/nix/src/libstore/binary-cache-store.cc new file mode 100644 index 000000000000..f631c9dee231 --- /dev/null +++ b/third_party/nix/src/libstore/binary-cache-store.cc @@ -0,0 +1,391 @@ +#include "binary-cache-store.hh" + +#include <chrono> +#include <future> +#include <memory> + +#include <absl/strings/ascii.h> +#include <absl/strings/numbers.h> +#include <absl/strings/str_split.h> + +#include "archive.hh" +#include "compression.hh" +#include "derivations.hh" +#include "fs-accessor.hh" +#include "globals.hh" +#include "glog/logging.h" +#include "json.hh" +#include "nar-accessor.hh" +#include "nar-info-disk-cache.hh" +#include "nar-info.hh" +#include "remote-fs-accessor.hh" +#include "sync.hh" + +namespace nix { + +BinaryCacheStore::BinaryCacheStore(const Params& params) : Store(params) { + if (secretKeyFile != "") { + const std::string& secret_key_file = secretKeyFile; + secretKey = std::make_unique<SecretKey>(readFile(secret_key_file)); + } + + StringSink sink; + sink << narVersionMagic1; + narMagic = *sink.s; +} + +void BinaryCacheStore::init() { + std::string cacheInfoFile = "nix-cache-info"; + + auto cacheInfo = getFile(cacheInfoFile); + if (!cacheInfo) { + upsertFile(cacheInfoFile, "StoreDir: " + storeDir + "\n", + "text/x-nix-cache-info"); + } else { + for (auto& line : absl::StrSplit(*cacheInfo, absl::ByChar('\n'))) { + size_t colon = line.find(':'); + if (colon == std::string::npos) { + continue; + } + auto name = line.substr(0, colon); + auto value = + absl::StripAsciiWhitespace(line.substr(colon + 1, std::string::npos)); + if (name == "StoreDir") { + if (value != storeDir) { + throw Error(format("binary cache '%s' is for Nix stores with prefix " + "'%s', not '%s'") % + getUri() % value % storeDir); + } + } else if (name == "WantMassQuery") { + wantMassQuery_ = value == "1"; + } else if (name == "Priority") { + if (!absl::SimpleAtoi(value, &priority)) { + LOG(WARNING) << "Invalid 'Priority' value: " << value; + } + } + } + } +} + +void BinaryCacheStore::getFile( + const std::string& path, + Callback<std::shared_ptr<std::string>> callback) noexcept { + try { + callback(getFile(path)); + } catch (...) { + callback.rethrow(); + } +} + +void BinaryCacheStore::getFile(const std::string& path, Sink& sink) { + std::promise<std::shared_ptr<std::string>> promise; + getFile(path, {[&](std::future<std::shared_ptr<std::string>> result) { + try { + promise.set_value(result.get()); + } catch (...) { + promise.set_exception(std::current_exception()); + } + }}); + auto data = promise.get_future().get(); + sink((unsigned char*)data->data(), data->size()); +} + +std::shared_ptr<std::string> BinaryCacheStore::getFile( + const std::string& path) { + StringSink sink; + try { + getFile(path, sink); + } catch (NoSuchBinaryCacheFile&) { + return nullptr; + } + return sink.s; +} + +Path BinaryCacheStore::narInfoFileFor(const Path& storePath) { + assertStorePath(storePath); + return storePathToHash(storePath) + ".narinfo"; +} + +void BinaryCacheStore::writeNarInfo(const ref<NarInfo>& narInfo) { + auto narInfoFile = narInfoFileFor(narInfo->path); + + upsertFile(narInfoFile, narInfo->to_string(), "text/x-nix-narinfo"); + + auto hashPart = storePathToHash(narInfo->path); + + { + auto state_(state.lock()); + state_->pathInfoCache.upsert(hashPart, std::shared_ptr<NarInfo>(narInfo)); + } + + if (diskCache) { + diskCache->upsertNarInfo(getUri(), hashPart, + std::shared_ptr<NarInfo>(narInfo)); + } +} + +void BinaryCacheStore::addToStore(const ValidPathInfo& info, + const ref<std::string>& nar, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + if ((repair == 0u) && isValidPath(info.path)) { + return; + } + + /* Verify that all references are valid. This may do some .narinfo + reads, but typically they'll already be cached. */ + for (auto& ref : info.references) { + try { + if (ref != info.path) { + queryPathInfo(ref); + } + } catch (InvalidPath&) { + throw Error(format("cannot add '%s' to the binary cache because the " + "reference '%s' is not valid") % + info.path % ref); + } + } + + assert(nar->compare(0, narMagic.size(), narMagic) == 0); + + auto narInfo = make_ref<NarInfo>(info); + + narInfo->narSize = nar->size(); + narInfo->narHash = hashString(htSHA256, *nar); + + if (info.narHash && info.narHash != narInfo->narHash) { + throw Error( + format("refusing to copy corrupted path '%1%' to binary cache") % + info.path); + } + + auto accessor_ = std::dynamic_pointer_cast<RemoteFSAccessor>(accessor); + + /* Optionally write a JSON file containing a listing of the + contents of the NAR. */ + if (writeNARListing) { + std::ostringstream jsonOut; + + { + JSONObject jsonRoot(jsonOut); + jsonRoot.attr("version", 1); + + auto narAccessor = makeNarAccessor(nar); + + if (accessor_) { + accessor_->addToCache(info.path, *nar, narAccessor); + } + + { + auto res = jsonRoot.placeholder("root"); + listNar(res, narAccessor, "", true); + } + } + + upsertFile(storePathToHash(info.path) + ".ls", jsonOut.str(), + "application/json"); + } + + else { + if (accessor_) { + accessor_->addToCache(info.path, *nar, makeNarAccessor(nar)); + } + } + + /* Compress the NAR. */ + narInfo->compression = compression; + auto now1 = std::chrono::steady_clock::now(); + auto narCompressed = compress(compression, *nar, parallelCompression); + auto now2 = std::chrono::steady_clock::now(); + narInfo->fileHash = hashString(htSHA256, *narCompressed); + narInfo->fileSize = narCompressed->size(); + + auto duration = + std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1) + .count(); + DLOG(INFO) << "copying path '" << narInfo->path << "' (" << narInfo->narSize + << " bytes, compressed " + << ((1.0 - (double)narCompressed->size() / nar->size()) * 100.0) + << "% in " << duration << "ms) to binary cache"; + + /* Atomically write the NAR file. */ + narInfo->url = "nar/" + narInfo->fileHash.to_string(Base32, false) + ".nar" + + (compression == "xz" ? ".xz" + : compression == "bzip2" + ? ".bz2" + : compression == "br" ? ".br" : ""); + if ((repair != 0u) || !fileExists(narInfo->url)) { + stats.narWrite++; + upsertFile(narInfo->url, *narCompressed, "application/x-nix-nar"); + } else { + stats.narWriteAverted++; + } + + stats.narWriteBytes += nar->size(); + stats.narWriteCompressedBytes += narCompressed->size(); + stats.narWriteCompressionTimeMs += duration; + + /* Atomically write the NAR info file.*/ + if (secretKey) { + narInfo->sign(*secretKey); + } + + writeNarInfo(narInfo); + + stats.narInfoWrite++; +} + +bool BinaryCacheStore::isValidPathUncached(const Path& storePath) { + // FIXME: this only checks whether a .narinfo with a matching hash + // part exists. So ‘f4kb...-foo’ matches ‘f4kb...-bar’, even + // though they shouldn't. Not easily fixed. + return fileExists(narInfoFileFor(storePath)); +} + +void BinaryCacheStore::narFromPath(const Path& storePath, Sink& sink) { + auto info = queryPathInfo(storePath).cast<const NarInfo>(); + + uint64_t narSize = 0; + + LambdaSink wrapperSink([&](const unsigned char* data, size_t len) { + sink(data, len); + narSize += len; + }); + + auto decompressor = makeDecompressionSink(info->compression, wrapperSink); + + try { + getFile(info->url, *decompressor); + } catch (NoSuchBinaryCacheFile& e) { + throw SubstituteGone(e.what()); + } + + decompressor->finish(); + + stats.narRead++; + // stats.narReadCompressedBytes += nar->size(); // FIXME + stats.narReadBytes += narSize; +} + +void BinaryCacheStore::queryPathInfoUncached( + const Path& storePath, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept { + auto uri = getUri(); + LOG(INFO) << "querying info about '" << storePath << "' on '" << uri << "'"; + + auto narInfoFile = narInfoFileFor(storePath); + + auto callbackPtr = std::make_shared<decltype(callback)>(std::move(callback)); + + getFile(narInfoFile, {[=](std::future<std::shared_ptr<std::string>> fut) { + try { + auto data = fut.get(); + + if (!data) { + return (*callbackPtr)(nullptr); + } + + stats.narInfoRead++; + + (*callbackPtr)( + (std::shared_ptr<ValidPathInfo>)std::make_shared<NarInfo>( + *this, *data, narInfoFile)); + + } catch (...) { + callbackPtr->rethrow(); + } + }}); +} + +Path BinaryCacheStore::addToStore(const std::string& name, const Path& srcPath, + bool recursive, HashType hashAlgo, + PathFilter& filter, RepairFlag repair) { + // FIXME: some cut&paste from LocalStore::addToStore(). + + /* Read the whole path into memory. This is not a very scalable + method for very large paths, but `copyPath' is mainly used for + small files. */ + StringSink sink; + Hash h; + if (recursive) { + dumpPath(srcPath, sink, filter); + h = hashString(hashAlgo, *sink.s); + } else { + auto s = readFile(srcPath); + dumpString(s, sink); + h = hashString(hashAlgo, s); + } + + ValidPathInfo info; + info.path = makeFixedOutputPath(recursive, h, name); + + addToStore(info, sink.s, repair, CheckSigs, nullptr); + + return info.path; +} + +Path BinaryCacheStore::addTextToStore(const std::string& name, + const std::string& s, + const PathSet& references, + RepairFlag repair) { + ValidPathInfo info; + info.path = computeStorePathForText(name, s, references); + info.references = references; + + if ((repair != 0u) || !isValidPath(info.path)) { + StringSink sink; + dumpString(s, sink); + addToStore(info, sink.s, repair, CheckSigs, nullptr); + } + + return info.path; +} + +ref<FSAccessor> BinaryCacheStore::getFSAccessor() { + return make_ref<RemoteFSAccessor>(ref<Store>(shared_from_this()), + localNarCache); +} + +void BinaryCacheStore::addSignatures(const Path& storePath, + const StringSet& sigs) { + /* Note: this is inherently racy since there is no locking on + binary caches. In particular, with S3 this unreliable, even + when addSignatures() is called sequentially on a path, because + S3 might return an outdated cached version. */ + + auto narInfo = make_ref<NarInfo>((NarInfo&)*queryPathInfo(storePath)); + + narInfo->sigs.insert(sigs.begin(), sigs.end()); + + auto narInfoFile = narInfoFileFor(narInfo->path); + + writeNarInfo(narInfo); +} + +std::shared_ptr<std::string> BinaryCacheStore::getBuildLog(const Path& path) { + Path drvPath; + + if (isDerivation(path)) { + drvPath = path; + } else { + try { + auto info = queryPathInfo(path); + // FIXME: add a "Log" field to .narinfo + if (info->deriver.empty()) { + return nullptr; + } + drvPath = info->deriver; + } catch (InvalidPath&) { + return nullptr; + } + } + + auto logPath = "log/" + baseNameOf(drvPath); + + DLOG(INFO) << "fetching build log from binary cache '" << getUri() << "/" + << logPath << "'"; + + return getFile(logPath); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/binary-cache-store.hh b/third_party/nix/src/libstore/binary-cache-store.hh new file mode 100644 index 000000000000..b8e1ccabf264 --- /dev/null +++ b/third_party/nix/src/libstore/binary-cache-store.hh @@ -0,0 +1,114 @@ +#pragma once + +#include <atomic> + +#include "crypto.hh" +#include "pool.hh" +#include "store-api.hh" + +namespace nix { + +struct NarInfo; + +class BinaryCacheStore : public Store { + public: + const Setting<std::string> compression{ + this, "xz", "compression", + "NAR compression method ('xz', 'bzip2', or 'none')"}; + const Setting<bool> writeNARListing{ + this, false, "write-nar-listing", + "whether to write a JSON file listing the files in each NAR"}; + const Setting<Path> secretKeyFile{ + this, "", "secret-key", + "path to secret key used to sign the binary cache"}; + const Setting<Path> localNarCache{this, "", "local-nar-cache", + "path to a local cache of NARs"}; + const Setting<bool> parallelCompression{ + this, false, "parallel-compression", + "enable multi-threading compression, available for xz only currently"}; + + private: + std::unique_ptr<SecretKey> secretKey; + + protected: + BinaryCacheStore(const Params& params); + + public: + virtual bool fileExists(const std::string& path) = 0; + + virtual void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) = 0; + + /* Note: subclasses must implement at least one of the two + following getFile() methods. */ + + /* Dump the contents of the specified file to a sink. */ + virtual void getFile(const std::string& path, Sink& sink); + + /* Fetch the specified file and call the specified callback with + the result. A subclass may implement this asynchronously. */ + virtual void getFile( + const std::string& path, + Callback<std::shared_ptr<std::string>> callback) noexcept; + + std::shared_ptr<std::string> getFile(const std::string& path); + + protected: + bool wantMassQuery_ = false; + int priority = 50; + + public: + virtual void init(); + + private: + std::string narMagic; + + std::string narInfoFileFor(const Path& storePath); + + void writeNarInfo(const ref<NarInfo>& narInfo); + + public: + bool isValidPathUncached(const Path& path) override; + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override; + + Path queryPathFromHashPart(const std::string& hashPart) override { + unsupported("queryPathFromHashPart"); + } + + bool wantMassQuery() override { return wantMassQuery_; } + + void addToStore(const ValidPathInfo& info, const ref<std::string>& nar, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override; + + Path addToStore(const std::string& name, const Path& srcPath, bool recursive, + HashType hashAlgo, PathFilter& filter, + RepairFlag repair) override; + + Path addTextToStore(const std::string& name, const std::string& s, + const PathSet& references, RepairFlag repair) override; + + void narFromPath(const Path& path, Sink& sink) override; + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override { + unsupported("buildDerivation"); + } + + void ensurePath(const Path& path) override { unsupported("ensurePath"); } + + ref<FSAccessor> getFSAccessor() override; + + void addSignatures(const Path& storePath, const StringSet& sigs) override; + + std::shared_ptr<std::string> getBuildLog(const Path& path) override; + + int getPriority() override { return priority; } +}; + +MakeError(NoSuchBinaryCacheFile, Error); + +} // namespace nix diff --git a/third_party/nix/src/libstore/build.cc b/third_party/nix/src/libstore/build.cc new file mode 100644 index 000000000000..a60f5ef173e6 --- /dev/null +++ b/third_party/nix/src/libstore/build.cc @@ -0,0 +1,4779 @@ +#include <algorithm> +#include <cerrno> +#include <chrono> +#include <climits> +#include <cstring> +#include <future> +#include <iostream> +#include <map> +#include <memory> +#include <queue> +#include <regex> +#include <sstream> +#include <string> +#include <thread> + +#include <absl/strings/ascii.h> +#include <absl/strings/numbers.h> +#include <absl/strings/str_split.h> +#include <fcntl.h> +#include <grp.h> +#include <netdb.h> +#include <pwd.h> +#include <sys/resource.h> +#include <sys/select.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <termios.h> +#include <unistd.h> + +#include "affinity.hh" +#include "archive.hh" +#include "builtins.hh" +#include "compression.hh" +#include "download.hh" +#include "finally.hh" +#include "globals.hh" +#include "glog/logging.h" +#include "json.hh" +#include "local-store.hh" +#include "machines.hh" +#include "nar-info.hh" +#include "parsed-derivations.hh" +#include "pathlocks.hh" +#include "references.hh" +#include "util.hh" + +/* Includes required for chroot support. */ +#if __linux__ +#include <net/if.h> +#include <netinet/ip.h> +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/mount.h> +#include <sys/param.h> +#include <sys/personality.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#if HAVE_SECCOMP +#include <seccomp.h> +#endif +#define pivot_root(new_root, put_old) \ + (syscall(SYS_pivot_root, new_root, put_old)) +#endif + +#if HAVE_STATVFS +#include <sys/statvfs.h> +#endif + +#include <nlohmann/json.hpp> +#include <utility> + +namespace nix { + +static std::string pathNullDevice = "/dev/null"; + +/* Forward definition. */ +class Worker; +struct HookInstance; + +/* A pointer to a goal. */ +class Goal; +class DerivationGoal; +using GoalPtr = std::shared_ptr<Goal>; +using WeakGoalPtr = std::weak_ptr<Goal>; + +struct CompareGoalPtrs { + bool operator()(const GoalPtr& a, const GoalPtr& b) const; +}; + +/* Set of goals. */ +typedef std::set<GoalPtr, CompareGoalPtrs> Goals; +using WeakGoals = std::list<WeakGoalPtr>; + +/* A map of paths to goals (and the other way around). */ +typedef std::map<Path, WeakGoalPtr> WeakGoalMap; + +class Goal : public std::enable_shared_from_this<Goal> { + public: + typedef enum { + ecBusy, + ecSuccess, + ecFailed, + ecNoSubstituters, + ecIncompleteClosure + } ExitCode; + + protected: + /* Backlink to the worker. */ + Worker& worker; + + /* Goals that this goal is waiting for. */ + Goals waitees; + + /* Goals waiting for this one to finish. Must use weak pointers + here to prevent cycles. */ + WeakGoals waiters; + + /* Number of goals we are/were waiting for that have failed. */ + unsigned int nrFailed; + + /* Number of substitution goals we are/were waiting for that + failed because there are no substituters. */ + unsigned int nrNoSubstituters; + + /* Number of substitution goals we are/were waiting for that + failed because othey had unsubstitutable references. */ + unsigned int nrIncompleteClosure; + + /* Name of this goal for debugging purposes. */ + std::string name; + + /* Whether the goal is finished. */ + ExitCode exitCode; + + explicit Goal(Worker& worker) : worker(worker) { + nrFailed = nrNoSubstituters = nrIncompleteClosure = 0; + exitCode = ecBusy; + } + + virtual ~Goal() { trace("goal destroyed"); } + + public: + virtual void work() = 0; + + void addWaitee(const GoalPtr& waitee); + + virtual void waiteeDone(GoalPtr waitee, ExitCode result); + + virtual void handleChildOutput(int fd, const std::string& data) { abort(); } + + virtual void handleEOF(int fd) { abort(); } + + void trace(const FormatOrString& fs); + + std::string getName() { return name; } + + ExitCode getExitCode() { return exitCode; } + + /* Callback in case of a timeout. It should wake up its waiters, + get rid of any running child processes that are being monitored + by the worker (important!), etc. */ + virtual void timedOut() = 0; + + virtual std::string key() = 0; + + protected: + virtual void amDone(ExitCode result); +}; + +bool CompareGoalPtrs::operator()(const GoalPtr& a, const GoalPtr& b) const { + std::string s1 = a->key(); + std::string s2 = b->key(); + return s1 < s2; +} + +using steady_time_point = std::chrono::time_point<std::chrono::steady_clock>; + +/* A mapping used to remember for each child process to what goal it + belongs, and file descriptors for receiving log data and output + path creation commands. */ +struct Child { + WeakGoalPtr goal; + Goal* goal2; // ugly hackery + std::set<int> fds; + bool respectTimeouts; + bool inBuildSlot; + steady_time_point lastOutput; /* time we last got output on stdout/stderr */ + steady_time_point timeStarted; +}; + +/* The worker class. */ +class Worker { + private: + /* Note: the worker should only have strong pointers to the + top-level goals. */ + + /* The top-level goals of the worker. */ + Goals topGoals; + + /* Goals that are ready to do some work. */ + WeakGoals awake; + + /* Goals waiting for a build slot. */ + WeakGoals wantingToBuild; + + /* Child processes currently running. */ + std::list<Child> children; + + /* Number of build slots occupied. This includes local builds and + substitutions but not remote builds via the build hook. */ + unsigned int nrLocalBuilds; + + /* Maps used to prevent multiple instantiations of a goal for the + same derivation / path. */ + WeakGoalMap derivationGoals; + WeakGoalMap substitutionGoals; + + /* Goals waiting for busy paths to be unlocked. */ + WeakGoals waitingForAnyGoal; + + /* Goals sleeping for a few seconds (polling a lock). */ + WeakGoals waitingForAWhile; + + /* Last time the goals in `waitingForAWhile' where woken up. */ + steady_time_point lastWokenUp; + + /* Cache for pathContentsGood(). */ + std::map<Path, bool> pathContentsGoodCache; + + public: + /* Set if at least one derivation had a BuildError (i.e. permanent + failure). */ + bool permanentFailure; + + /* Set if at least one derivation had a timeout. */ + bool timedOut; + + /* Set if at least one derivation fails with a hash mismatch. */ + bool hashMismatch; + + /* Set if at least one derivation is not deterministic in check mode. */ + bool checkMismatch; + + LocalStore& store; + + std::unique_ptr<HookInstance> hook; + + uint64_t expectedBuilds = 0; + uint64_t doneBuilds = 0; + uint64_t failedBuilds = 0; + uint64_t runningBuilds = 0; + + uint64_t expectedSubstitutions = 0; + uint64_t doneSubstitutions = 0; + uint64_t failedSubstitutions = 0; + uint64_t runningSubstitutions = 0; + uint64_t expectedDownloadSize = 0; + uint64_t doneDownloadSize = 0; + uint64_t expectedNarSize = 0; + uint64_t doneNarSize = 0; + + /* Whether to ask the build hook if it can build a derivation. If + it answers with "decline-permanently", we don't try again. */ + bool tryBuildHook = true; + + explicit Worker(LocalStore& store); + ~Worker(); + + /* Make a goal (with caching). */ + GoalPtr makeDerivationGoal(const Path& drvPath, + const StringSet& wantedOutputs, + BuildMode buildMode = bmNormal); + std::shared_ptr<DerivationGoal> makeBasicDerivationGoal( + const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode = bmNormal); + GoalPtr makeSubstitutionGoal(const Path& storePath, + RepairFlag repair = NoRepair); + + /* Remove a dead goal. */ + void removeGoal(const GoalPtr& goal); + + /* Wake up a goal (i.e., there is something for it to do). */ + void wakeUp(const GoalPtr& goal); + + /* Return the number of local build and substitution processes + currently running (but not remote builds via the build + hook). */ + unsigned int getNrLocalBuilds(); + + /* Registers a running child process. `inBuildSlot' means that + the process counts towards the jobs limit. */ + void childStarted(const GoalPtr& goal, const std::set<int>& fds, + bool inBuildSlot, bool respectTimeouts); + + /* Unregisters a running child process. `wakeSleepers' should be + false if there is no sense in waking up goals that are sleeping + because they can't run yet (e.g., there is no free build slot, + or the hook would still say `postpone'). */ + void childTerminated(Goal* goal, bool wakeSleepers = true); + + /* Put `goal' to sleep until a build slot becomes available (which + might be right away). */ + void waitForBuildSlot(const GoalPtr& goal); + + /* Wait for any goal to finish. Pretty indiscriminate way to + wait for some resource that some other goal is holding. */ + void waitForAnyGoal(GoalPtr goal); + + /* Wait for a few seconds and then retry this goal. Used when + waiting for a lock held by another process. This kind of + polling is inefficient, but POSIX doesn't really provide a way + to wait for multiple locks in the main select() loop. */ + void waitForAWhile(GoalPtr goal); + + /* Loop until the specified top-level goals have finished. */ + void run(const Goals& topGoals); + + /* Wait for input to become available. */ + void waitForInput(); + + unsigned int exitStatus(); + + /* Check whether the given valid path exists and has the right + contents. */ + bool pathContentsGood(const Path& path); + + void markContentsGood(const Path& path); +}; + +////////////////////////////////////////////////////////////////////// + +void addToWeakGoals(WeakGoals& goals, const GoalPtr& p) { + // FIXME: necessary? + // FIXME: O(n) + for (auto& i : goals) { + if (i.lock() == p) { + return; + } + } + goals.push_back(p); +} + +void Goal::addWaitee(const GoalPtr& waitee) { + waitees.insert(waitee); + addToWeakGoals(waitee->waiters, shared_from_this()); +} + +void Goal::waiteeDone(GoalPtr waitee, ExitCode result) { + assert(waitees.find(waitee) != waitees.end()); + waitees.erase(waitee); + + trace(format("waitee '%1%' done; %2% left") % waitee->name % waitees.size()); + + if (result == ecFailed || result == ecNoSubstituters || + result == ecIncompleteClosure) { + ++nrFailed; + } + + if (result == ecNoSubstituters) { + ++nrNoSubstituters; + } + + if (result == ecIncompleteClosure) { + ++nrIncompleteClosure; + } + + if (waitees.empty() || (result == ecFailed && !settings.keepGoing)) { + /* If we failed and keepGoing is not set, we remove all + remaining waitees. */ + for (auto& goal : waitees) { + WeakGoals waiters2; + for (auto& j : goal->waiters) { + if (j.lock() != shared_from_this()) { + waiters2.push_back(j); + } + } + goal->waiters = waiters2; + } + waitees.clear(); + + worker.wakeUp(shared_from_this()); + } +} + +void Goal::amDone(ExitCode result) { + trace("done"); + assert(exitCode == ecBusy); + assert(result == ecSuccess || result == ecFailed || + result == ecNoSubstituters || result == ecIncompleteClosure); + exitCode = result; + for (auto& i : waiters) { + GoalPtr goal = i.lock(); + if (goal) { + goal->waiteeDone(shared_from_this(), result); + } + } + waiters.clear(); + worker.removeGoal(shared_from_this()); +} + +void Goal::trace(const FormatOrString& fs) { + DLOG(INFO) << name << ": " << fs.s; +} + +////////////////////////////////////////////////////////////////////// + +/* Common initialisation performed in child processes. */ +static void commonChildInit(Pipe& logPipe) { + restoreSignals(); + + /* Put the child in a separate session (and thus a separate + process group) so that it has no controlling terminal (meaning + that e.g. ssh cannot open /dev/tty) and it doesn't receive + terminal signals. */ + if (setsid() == -1) { + throw SysError(format("creating a new session")); + } + + /* Dup the write side of the logger pipe into stderr. */ + if (dup2(logPipe.writeSide.get(), STDERR_FILENO) == -1) { + throw SysError("cannot pipe standard error into log file"); + } + + /* Dup stderr to stdout. */ + if (dup2(STDERR_FILENO, STDOUT_FILENO) == -1) { + throw SysError("cannot dup stderr into stdout"); + } + + /* Reroute stdin to /dev/null. */ + int fdDevNull = open(pathNullDevice.c_str(), O_RDWR); + if (fdDevNull == -1) { + throw SysError(format("cannot open '%1%'") % pathNullDevice); + } + if (dup2(fdDevNull, STDIN_FILENO) == -1) { + throw SysError("cannot dup null device into stdin"); + } + close(fdDevNull); +} + +void handleDiffHook(uid_t uid, uid_t gid, Path tryA, Path tryB, Path drvPath, + Path tmpDir) { + auto diffHook = settings.diffHook; + if (diffHook != "" && settings.runDiffHook) { + try { + RunOptions diffHookOptions( + diffHook, {std::move(tryA), std::move(tryB), std::move(drvPath), + std::move(tmpDir)}); + diffHookOptions.searchPath = true; + diffHookOptions.uid = uid; + diffHookOptions.gid = gid; + diffHookOptions.chdir = "/"; + + auto diffRes = runProgram(diffHookOptions); + if (!statusOk(diffRes.first)) { + throw ExecError(diffRes.first, + fmt("diff-hook program '%1%' %2%", diffHook, + statusToString(diffRes.first))); + } + + if (!diffRes.second.empty()) { + LOG(ERROR) << absl::StripTrailingAsciiWhitespace(diffRes.second); + } + } catch (Error& error) { + LOG(ERROR) << "diff hook execution failed: " << error.what(); + } + } +} + +////////////////////////////////////////////////////////////////////// + +class UserLock { + private: + /* POSIX locks suck. If we have a lock on a file, and we open and + close that file again (without closing the original file + descriptor), we lose the lock. So we have to be *very* careful + not to open a lock file on which we are holding a lock. */ + static Sync<PathSet> lockedPaths_; + + Path fnUserLock; + AutoCloseFD fdUserLock; + + std::string user; + uid_t uid; + gid_t gid; + std::vector<gid_t> supplementaryGIDs; + + public: + UserLock(); + ~UserLock(); + + void kill(); + + std::string getUser() { return user; } + uid_t getUID() { + assert(uid); + return uid; + } + uid_t getGID() { + assert(gid); + return gid; + } + std::vector<gid_t> getSupplementaryGIDs() { return supplementaryGIDs; } + + bool enabled() { return uid != 0; } +}; + +Sync<PathSet> UserLock::lockedPaths_; + +UserLock::UserLock() { + assert(settings.buildUsersGroup != ""); + + /* Get the members of the build-users-group. */ + struct group* gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (gr == nullptr) { + throw Error( + format( + "the group '%1%' specified in 'build-users-group' does not exist") % + settings.buildUsersGroup); + } + gid = gr->gr_gid; + + /* Copy the result of getgrnam. */ + Strings users; + for (char** p = gr->gr_mem; *p != nullptr; ++p) { + DLOG(INFO) << "found build user " << *p; + users.push_back(*p); + } + + if (users.empty()) { + throw Error(format("the build users group '%1%' has no members") % + settings.buildUsersGroup); + } + + /* Find a user account that isn't currently in use for another + build. */ + for (auto& i : users) { + DLOG(INFO) << "trying user " << i; + + struct passwd* pw = getpwnam(i.c_str()); + if (pw == nullptr) { + throw Error(format("the user '%1%' in the group '%2%' does not exist") % + i % settings.buildUsersGroup); + } + + createDirs(settings.nixStateDir + "/userpool"); + + fnUserLock = + (format("%1%/userpool/%2%") % settings.nixStateDir % pw->pw_uid).str(); + + { + auto lockedPaths(lockedPaths_.lock()); + if (lockedPaths->count(fnUserLock) != 0u) { + /* We already have a lock on this one. */ + continue; + } + lockedPaths->insert(fnUserLock); + } + + try { + AutoCloseFD fd = + open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) { + throw SysError(format("opening user lock '%1%'") % fnUserLock); + } + + if (lockFile(fd.get(), ltWrite, false)) { + fdUserLock = std::move(fd); + user = i; + uid = pw->pw_uid; + + /* Sanity check... */ + if (uid == getuid() || uid == geteuid()) { + throw Error(format("the Nix user should not be a member of '%1%'") % + settings.buildUsersGroup); + } + +#if __linux__ + /* Get the list of supplementary groups of this build user. This + is usually either empty or contains a group such as "kvm". */ + supplementaryGIDs.resize(10); + int ngroups = supplementaryGIDs.size(); + int err = getgrouplist(pw->pw_name, pw->pw_gid, + supplementaryGIDs.data(), &ngroups); + if (err == -1) { + throw Error( + format("failed to get list of supplementary groups for '%1%'") % + pw->pw_name); + } + + supplementaryGIDs.resize(ngroups); +#endif + + return; + } + + } catch (...) { + lockedPaths_.lock()->erase(fnUserLock); + } + } + + throw Error(format("all build users are currently in use; " + "consider creating additional users and adding them to " + "the '%1%' group") % + settings.buildUsersGroup); +} + +UserLock::~UserLock() { + auto lockedPaths(lockedPaths_.lock()); + assert(lockedPaths->count(fnUserLock)); + lockedPaths->erase(fnUserLock); +} + +void UserLock::kill() { killUser(uid); } + +////////////////////////////////////////////////////////////////////// + +struct HookInstance { + /* Pipes for talking to the build hook. */ + Pipe toHook; + + /* Pipe for the hook's standard output/error. */ + Pipe fromHook; + + /* Pipe for the builder's standard output/error. */ + Pipe builderOut; + + /* The process ID of the hook. */ + Pid pid; + + FdSink sink; + + HookInstance(); + + ~HookInstance(); +}; + +HookInstance::HookInstance() { + DLOG(INFO) << "starting build hook " << settings.buildHook; + + /* Create a pipe to get the output of the child. */ + fromHook.create(); + + /* Create the communication pipes. */ + toHook.create(); + + /* Create a pipe to get the output of the builder. */ + builderOut.create(); + + /* Fork the hook. */ + pid = startProcess([&]() { + commonChildInit(fromHook); + + if (chdir("/") == -1) { + throw SysError("changing into /"); + } + + /* Dup the communication pipes. */ + if (dup2(toHook.readSide.get(), STDIN_FILENO) == -1) { + throw SysError("dupping to-hook read side"); + } + + /* Use fd 4 for the builder's stdout/stderr. */ + if (dup2(builderOut.writeSide.get(), 4) == -1) { + throw SysError("dupping builder's stdout/stderr"); + } + + /* Hack: pass the read side of that fd to allow build-remote + to read SSH error messages. */ + if (dup2(builderOut.readSide.get(), 5) == -1) { + throw SysError("dupping builder's stdout/stderr"); + } + + Strings args = { + baseNameOf(settings.buildHook), + // std::to_string(verbosity), // TODO(tazjin): what? + }; + + execv(settings.buildHook.get().c_str(), stringsToCharPtrs(args).data()); + + throw SysError("executing '%s'", settings.buildHook); + }); + + pid.setSeparatePG(true); + fromHook.writeSide = -1; + toHook.readSide = -1; + + sink = FdSink(toHook.writeSide.get()); + std::map<std::string, Config::SettingInfo> settings; + globalConfig.getSettings(settings); + for (auto& setting : settings) { + sink << 1 << setting.first << setting.second.value; + } + sink << 0; +} + +HookInstance::~HookInstance() { + try { + toHook.writeSide = -1; + if (pid != -1) { + pid.kill(); + } + } catch (...) { + ignoreException(); + } +} + +////////////////////////////////////////////////////////////////////// + +typedef std::map<std::string, std::string> StringRewrites; + +std::string rewriteStrings(std::string s, const StringRewrites& rewrites) { + for (auto& i : rewrites) { + size_t j = 0; + while ((j = s.find(i.first, j)) != std::string::npos) { + s.replace(j, i.first.size(), i.second); + } + } + return s; +} + +////////////////////////////////////////////////////////////////////// + +typedef enum { rpAccept, rpDecline, rpPostpone } HookReply; + +class SubstitutionGoal; + +class DerivationGoal : public Goal { + private: + /* Whether to use an on-disk .drv file. */ + bool useDerivation; + + /* The path of the derivation. */ + Path drvPath; + + /* The specific outputs that we need to build. Empty means all of + them. */ + StringSet wantedOutputs; + + /* Whether additional wanted outputs have been added. */ + bool needRestart = false; + + /* Whether to retry substituting the outputs after building the + inputs. */ + bool retrySubstitution; + + /* The derivation stored at drvPath. */ + std::unique_ptr<BasicDerivation> drv; + + std::unique_ptr<ParsedDerivation> parsedDrv; + + /* The remainder is state held during the build. */ + + /* Locks on the output paths. */ + PathLocks outputLocks; + + /* All input paths (that is, the union of FS closures of the + immediate input paths). */ + PathSet inputPaths; + + /* Referenceable paths (i.e., input and output paths). */ + PathSet allPaths; + + /* Outputs that are already valid. If we're repairing, these are + the outputs that are valid *and* not corrupt. */ + PathSet validPaths; + + /* Outputs that are corrupt or not valid. */ + PathSet missingPaths; + + /* User selected for running the builder. */ + std::unique_ptr<UserLock> buildUser; + + /* The process ID of the builder. */ + Pid pid; + + /* The temporary directory. */ + Path tmpDir; + + /* The path of the temporary directory in the sandbox. */ + Path tmpDirInSandbox; + + /* File descriptor for the log file. */ + AutoCloseFD fdLogFile; + std::shared_ptr<BufferedSink> logFileSink, logSink; + + /* Number of bytes received from the builder's stdout/stderr. */ + unsigned long logSize; + + /* The most recent log lines. */ + std::list<std::string> logTail; + + std::string currentLogLine; + size_t currentLogLinePos = 0; // to handle carriage return + + std::string currentHookLine; + + /* Pipe for the builder's standard output/error. */ + Pipe builderOut; + + /* Pipe for synchronising updates to the builder user namespace. */ + Pipe userNamespaceSync; + + /* The build hook. */ + std::unique_ptr<HookInstance> hook; + + /* Whether we're currently doing a chroot build. */ + bool useChroot = false; + + Path chrootRootDir; + + /* RAII object to delete the chroot directory. */ + std::shared_ptr<AutoDelete> autoDelChroot; + + /* Whether this is a fixed-output derivation. */ + bool fixedOutput; + + /* Whether to run the build in a private network namespace. */ + bool privateNetwork = false; + + using GoalState = void (DerivationGoal::*)(); + GoalState state; + + /* Stuff we need to pass to initChild(). */ + struct ChrootPath { + Path source; + bool optional; + explicit ChrootPath(Path source = "", bool optional = false) + : source(std::move(source)), optional(optional) {} + }; + typedef std::map<Path, ChrootPath> + DirsInChroot; // maps target path to source path + DirsInChroot dirsInChroot; + + typedef std::map<std::string, std::string> Environment; + Environment env; + + /* Hash rewriting. */ + StringRewrites inputRewrites, outputRewrites; + typedef std::map<Path, Path> RedirectedOutputs; + RedirectedOutputs redirectedOutputs; + + BuildMode buildMode; + + /* If we're repairing without a chroot, there may be outputs that + are valid but corrupt. So we redirect these outputs to + temporary paths. */ + PathSet redirectedBadOutputs; + + BuildResult result; + + /* The current round, if we're building multiple times. */ + size_t curRound = 1; + + size_t nrRounds; + + /* Path registration info from the previous round, if we're + building multiple times. Since this contains the hash, it + allows us to compare whether two rounds produced the same + result. */ + std::map<Path, ValidPathInfo> prevInfos; + + const uid_t sandboxUid = 1000; + const gid_t sandboxGid = 100; + + const static Path homeDir; + + std::unique_ptr<MaintainCount<uint64_t>> mcExpectedBuilds, mcRunningBuilds; + + /* The remote machine on which we're building. */ + std::string machineName; + + public: + DerivationGoal(const Path& drvPath, StringSet wantedOutputs, Worker& worker, + BuildMode buildMode = bmNormal); + DerivationGoal(const Path& drvPath, const BasicDerivation& drv, + Worker& worker, BuildMode buildMode = bmNormal); + ~DerivationGoal() override; + + /* Whether we need to perform hash rewriting if there are valid output paths. + */ + bool needsHashRewrite(); + + void timedOut() override; + + std::string key() override { + /* Ensure that derivations get built in order of their name, + i.e. a derivation named "aardvark" always comes before + "baboon". And substitution goals always happen before + derivation goals (due to "b$"). */ + return "b$" + storePathToName(drvPath) + "$" + drvPath; + } + + void work() override; + + Path getDrvPath() { return drvPath; } + + /* Add wanted outputs to an already existing derivation goal. */ + void addWantedOutputs(const StringSet& outputs); + + BuildResult getResult() { return result; } + + private: + /* The states. */ + void getDerivation(); + void loadDerivation(); + void haveDerivation(); + void outputsSubstituted(); + void closureRepaired(); + void inputsRealised(); + void tryToBuild(); + void buildDone(); + + /* Is the build hook willing to perform the build? */ + HookReply tryBuildHook(); + + /* Start building a derivation. */ + void startBuilder(); + + /* Fill in the environment for the builder. */ + void initEnv(); + + /* Setup tmp dir location. */ + void initTmpDir(); + + /* Write a JSON file containing the derivation attributes. */ + void writeStructuredAttrs(); + + /* Make a file owned by the builder. */ + void chownToBuilder(const Path& path); + + /* Run the builder's process. */ + void runChild(); + + friend int childEntry(void* /*arg*/); + + /* Check that the derivation outputs all exist and register them + as valid. */ + void registerOutputs(); + + /* Check that an output meets the requirements specified by the + 'outputChecks' attribute (or the legacy + '{allowed,disallowed}{References,Requisites}' attributes). */ + void checkOutputs(const std::map<std::string, ValidPathInfo>& outputs); + + /* Open a log file and a pipe to it. */ + Path openLogFile(); + + /* Close the log file. */ + void closeLogFile(); + + /* Delete the temporary directory, if we have one. */ + void deleteTmpDir(bool force); + + /* Callback used by the worker to write to the log. */ + void handleChildOutput(int fd, const std::string& data) override; + void handleEOF(int fd) override; + void flushLine(); + + /* Return the set of (in)valid paths. */ + PathSet checkPathValidity(bool returnValid, bool checkHash); + + /* Abort the goal if `path' failed to build. */ + bool pathFailed(const Path& path); + + /* Forcibly kill the child process, if any. */ + void killChild(); + + Path addHashRewrite(const Path& path); + + void repairClosure(); + + void amDone(ExitCode result) override { Goal::amDone(result); } + + void done(BuildResult::Status status, const std::string& msg = ""); + + PathSet exportReferences(const PathSet& storePaths); +}; + +const Path DerivationGoal::homeDir = "/homeless-shelter"; + +DerivationGoal::DerivationGoal(const Path& drvPath, StringSet wantedOutputs, + Worker& worker, BuildMode buildMode) + : Goal(worker), + useDerivation(true), + drvPath(drvPath), + wantedOutputs(std::move(wantedOutputs)), + buildMode(buildMode) { + state = &DerivationGoal::getDerivation; + name = (format("building of '%1%'") % drvPath).str(); + trace("created"); + + mcExpectedBuilds = + std::make_unique<MaintainCount<uint64_t>>(worker.expectedBuilds); +} + +DerivationGoal::DerivationGoal(const Path& drvPath, const BasicDerivation& drv, + Worker& worker, BuildMode buildMode) + : Goal(worker), + useDerivation(false), + drvPath(drvPath), + buildMode(buildMode) { + this->drv = std::make_unique<BasicDerivation>(drv); + state = &DerivationGoal::haveDerivation; + name = (format("building of %1%") % showPaths(drv.outputPaths())).str(); + trace("created"); + + mcExpectedBuilds = + std::make_unique<MaintainCount<uint64_t>>(worker.expectedBuilds); + + /* Prevent the .chroot directory from being + garbage-collected. (See isActiveTempFile() in gc.cc.) */ + worker.store.addTempRoot(drvPath); +} + +DerivationGoal::~DerivationGoal() { + /* Careful: we should never ever throw an exception from a + destructor. */ + try { + killChild(); + } catch (...) { + ignoreException(); + } + try { + deleteTmpDir(false); + } catch (...) { + ignoreException(); + } + try { + closeLogFile(); + } catch (...) { + ignoreException(); + } +} + +inline bool DerivationGoal::needsHashRewrite() { return !useChroot; } + +void DerivationGoal::killChild() { + if (pid != -1) { + worker.childTerminated(this); + + if (buildUser) { + /* If we're using a build user, then there is a tricky + race condition: if we kill the build user before the + child has done its setuid() to the build user uid, then + it won't be killed, and we'll potentially lock up in + pid.wait(). So also send a conventional kill to the + child. */ + ::kill(-pid, SIGKILL); /* ignore the result */ + buildUser->kill(); + pid.wait(); + } else { + pid.kill(); + } + + assert(pid == -1); + } + + hook.reset(); +} + +void DerivationGoal::timedOut() { + killChild(); + done(BuildResult::TimedOut); +} + +void DerivationGoal::work() { (this->*state)(); } + +void DerivationGoal::addWantedOutputs(const StringSet& outputs) { + /* If we already want all outputs, there is nothing to do. */ + if (wantedOutputs.empty()) { + return; + } + + if (outputs.empty()) { + wantedOutputs.clear(); + needRestart = true; + } else { + for (auto& i : outputs) { + if (wantedOutputs.find(i) == wantedOutputs.end()) { + wantedOutputs.insert(i); + needRestart = true; + } + } + } +} + +void DerivationGoal::getDerivation() { + trace("init"); + + /* The first thing to do is to make sure that the derivation + exists. If it doesn't, it may be created through a + substitute. */ + if (buildMode == bmNormal && worker.store.isValidPath(drvPath)) { + loadDerivation(); + return; + } + + addWaitee(worker.makeSubstitutionGoal(drvPath)); + + state = &DerivationGoal::loadDerivation; +} + +void DerivationGoal::loadDerivation() { + trace("loading derivation"); + + if (nrFailed != 0) { + LOG(ERROR) << "cannot build missing derivation '" << drvPath << "'"; + done(BuildResult::MiscFailure); + return; + } + + /* `drvPath' should already be a root, but let's be on the safe + side: if the user forgot to make it a root, we wouldn't want + things being garbage collected while we're busy. */ + worker.store.addTempRoot(drvPath); + + assert(worker.store.isValidPath(drvPath)); + + /* Get the derivation. */ + drv = std::unique_ptr<BasicDerivation>( + new Derivation(worker.store.derivationFromPath(drvPath))); + + haveDerivation(); +} + +void DerivationGoal::haveDerivation() { + trace("have derivation"); + + retrySubstitution = false; + + for (auto& i : drv->outputs) { + worker.store.addTempRoot(i.second.path); + } + + /* Check what outputs paths are not already valid. */ + PathSet invalidOutputs = checkPathValidity(false, buildMode == bmRepair); + + /* If they are all valid, then we're done. */ + if (invalidOutputs.empty() && buildMode == bmNormal) { + done(BuildResult::AlreadyValid); + return; + } + + parsedDrv = std::make_unique<ParsedDerivation>(drvPath, *drv); + + /* We are first going to try to create the invalid output paths + through substitutes. If that doesn't work, we'll build + them. */ + if (settings.useSubstitutes && parsedDrv->substitutesAllowed()) { + for (auto& i : invalidOutputs) { + addWaitee(worker.makeSubstitutionGoal( + i, buildMode == bmRepair ? Repair : NoRepair)); + } + } + + if (waitees.empty()) { /* to prevent hang (no wake-up event) */ + outputsSubstituted(); + } else { + state = &DerivationGoal::outputsSubstituted; + } +} + +void DerivationGoal::outputsSubstituted() { + trace("all outputs substituted (maybe)"); + + if (nrFailed > 0 && nrFailed > nrNoSubstituters + nrIncompleteClosure && + !settings.tryFallback) { + done(BuildResult::TransientFailure, + (format("some substitutes for the outputs of derivation '%1%' failed " + "(usually happens due to networking issues); try '--fallback' " + "to build derivation from source ") % + drvPath) + .str()); + return; + } + + /* If the substitutes form an incomplete closure, then we should + build the dependencies of this derivation, but after that, we + can still use the substitutes for this derivation itself. */ + if (nrIncompleteClosure > 0) { + retrySubstitution = true; + } + + nrFailed = nrNoSubstituters = nrIncompleteClosure = 0; + + if (needRestart) { + needRestart = false; + haveDerivation(); + return; + } + + auto nrInvalid = checkPathValidity(false, buildMode == bmRepair).size(); + if (buildMode == bmNormal && nrInvalid == 0) { + done(BuildResult::Substituted); + return; + } + if (buildMode == bmRepair && nrInvalid == 0) { + repairClosure(); + return; + } + if (buildMode == bmCheck && nrInvalid > 0) { + throw Error(format("some outputs of '%1%' are not valid, so checking is " + "not possible") % + drvPath); + } + + /* Otherwise, at least one of the output paths could not be + produced using a substitute. So we have to build instead. */ + + /* Make sure checkPathValidity() from now on checks all + outputs. */ + wantedOutputs = PathSet(); + + /* The inputs must be built before we can build this goal. */ + if (useDerivation) { + for (auto& i : dynamic_cast<Derivation*>(drv.get())->inputDrvs) { + addWaitee(worker.makeDerivationGoal( + i.first, i.second, buildMode == bmRepair ? bmRepair : bmNormal)); + } + } + + for (auto& i : drv->inputSrcs) { + if (worker.store.isValidPath(i)) { + continue; + } + if (!settings.useSubstitutes) { + throw Error(format("dependency '%1%' of '%2%' does not exist, and " + "substitution is disabled") % + i % drvPath); + } + addWaitee(worker.makeSubstitutionGoal(i)); + } + + if (waitees.empty()) { /* to prevent hang (no wake-up event) */ + inputsRealised(); + } else { + state = &DerivationGoal::inputsRealised; + } +} + +void DerivationGoal::repairClosure() { + /* If we're repairing, we now know that our own outputs are valid. + Now check whether the other paths in the outputs closure are + good. If not, then start derivation goals for the derivations + that produced those outputs. */ + + /* Get the output closure. */ + PathSet outputClosure; + for (auto& i : drv->outputs) { + if (!wantOutput(i.first, wantedOutputs)) { + continue; + } + worker.store.computeFSClosure(i.second.path, outputClosure); + } + + /* Filter out our own outputs (which we have already checked). */ + for (auto& i : drv->outputs) { + outputClosure.erase(i.second.path); + } + + /* Get all dependencies of this derivation so that we know which + derivation is responsible for which path in the output + closure. */ + PathSet inputClosure; + if (useDerivation) { + worker.store.computeFSClosure(drvPath, inputClosure); + } + std::map<Path, Path> outputsToDrv; + for (auto& i : inputClosure) { + if (isDerivation(i)) { + Derivation drv = worker.store.derivationFromPath(i); + for (auto& j : drv.outputs) { + outputsToDrv[j.second.path] = i; + } + } + } + + /* Check each path (slow!). */ + PathSet broken; + for (auto& i : outputClosure) { + if (worker.pathContentsGood(i)) { + continue; + } + LOG(ERROR) << "found corrupted or missing path '" << i + << "' in the output closure of '" << drvPath << "'"; + Path drvPath2 = outputsToDrv[i]; + if (drvPath2.empty()) { + addWaitee(worker.makeSubstitutionGoal(i, Repair)); + } else { + addWaitee(worker.makeDerivationGoal(drvPath2, PathSet(), bmRepair)); + } + } + + if (waitees.empty()) { + done(BuildResult::AlreadyValid); + return; + } + + state = &DerivationGoal::closureRepaired; +} + +void DerivationGoal::closureRepaired() { + trace("closure repaired"); + if (nrFailed > 0) { + throw Error(format("some paths in the output closure of derivation '%1%' " + "could not be repaired") % + drvPath); + } + done(BuildResult::AlreadyValid); +} + +void DerivationGoal::inputsRealised() { + trace("all inputs realised"); + + if (nrFailed != 0) { + if (!useDerivation) { + throw Error(format("some dependencies of '%1%' are missing") % drvPath); + } + LOG(ERROR) << "cannot build derivation '" << drvPath << "': " << nrFailed + << " dependencies couldn't be built"; + done(BuildResult::DependencyFailed); + return; + } + + if (retrySubstitution) { + haveDerivation(); + return; + } + + /* Gather information necessary for computing the closure and/or + running the build hook. */ + + /* The outputs are referenceable paths. */ + for (auto& i : drv->outputs) { + DLOG(INFO) << "building path " << i.second.path; + allPaths.insert(i.second.path); + } + + /* Determine the full set of input paths. */ + + /* First, the input derivations. */ + if (useDerivation) { + for (auto& i : dynamic_cast<Derivation*>(drv.get())->inputDrvs) { + /* Add the relevant output closures of the input derivation + `i' as input paths. Only add the closures of output paths + that are specified as inputs. */ + assert(worker.store.isValidPath(i.first)); + Derivation inDrv = worker.store.derivationFromPath(i.first); + for (auto& j : i.second) { + if (inDrv.outputs.find(j) != inDrv.outputs.end()) { + worker.store.computeFSClosure(inDrv.outputs[j].path, inputPaths); + } else { + throw Error(format("derivation '%1%' requires non-existent output " + "'%2%' from input derivation '%3%'") % + drvPath % j % i.first); + } + } + } + } + + /* Second, the input sources. */ + worker.store.computeFSClosure(drv->inputSrcs, inputPaths); + + DLOG(INFO) << "added input paths " << showPaths(inputPaths); + + allPaths.insert(inputPaths.begin(), inputPaths.end()); + + /* Is this a fixed-output derivation? */ + fixedOutput = drv->isFixedOutput(); + + /* Don't repeat fixed-output derivations since they're already + verified by their output hash.*/ + nrRounds = fixedOutput ? 1 : settings.buildRepeat + 1; + + /* Okay, try to build. Note that here we don't wait for a build + slot to become available, since we don't need one if there is a + build hook. */ + state = &DerivationGoal::tryToBuild; + worker.wakeUp(shared_from_this()); + + result = BuildResult(); +} + +void DerivationGoal::tryToBuild() { + trace("trying to build"); + + /* Obtain locks on all output paths. The locks are automatically + released when we exit this function or Nix crashes. If we + can't acquire the lock, then continue; hopefully some other + goal can start a build, and if not, the main loop will sleep a + few seconds and then retry this goal. */ + PathSet lockFiles; + for (auto& outPath : drv->outputPaths()) { + lockFiles.insert(worker.store.toRealPath(outPath)); + } + + if (!outputLocks.lockPaths(lockFiles, "", false)) { + worker.waitForAWhile(shared_from_this()); + return; + } + + /* Now check again whether the outputs are valid. This is because + another process may have started building in parallel. After + it has finished and released the locks, we can (and should) + reuse its results. (Strictly speaking the first check can be + omitted, but that would be less efficient.) Note that since we + now hold the locks on the output paths, no other process can + build this derivation, so no further checks are necessary. */ + validPaths = checkPathValidity(true, buildMode == bmRepair); + if (buildMode != bmCheck && validPaths.size() == drv->outputs.size()) { + DLOG(INFO) << "skipping build of derivation '" << drvPath + << "', someone beat us to it"; + outputLocks.setDeletion(true); + done(BuildResult::AlreadyValid); + return; + } + + missingPaths = drv->outputPaths(); + if (buildMode != bmCheck) { + for (auto& i : validPaths) { + missingPaths.erase(i); + } + } + + /* If any of the outputs already exist but are not valid, delete + them. */ + for (auto& i : drv->outputs) { + Path path = i.second.path; + if (worker.store.isValidPath(path)) { + continue; + } + DLOG(INFO) << "removing invalid path " << path; + deletePath(worker.store.toRealPath(path)); + } + + /* Don't do a remote build if the derivation has the attribute + `preferLocalBuild' set. Also, check and repair modes are only + supported for local builds. */ + bool buildLocally = buildMode != bmNormal || parsedDrv->willBuildLocally(); + + auto started = [&]() { + auto msg = fmt(buildMode == bmRepair + ? "repairing outputs of '%s'" + : buildMode == bmCheck + ? "checking outputs of '%s'" + : nrRounds > 1 ? "building '%s' (round %d/%d)" + : "building '%s'", + drvPath, curRound, nrRounds); + + if (hook) { + msg += fmt(" on '%s'", machineName); + } + LOG(INFO) << msg << "[" << drvPath << "]"; + mcRunningBuilds = + std::make_unique<MaintainCount<uint64_t>>(worker.runningBuilds); + }; + + /* Is the build hook willing to accept this job? */ + if (!buildLocally) { + switch (tryBuildHook()) { + case rpAccept: + /* Yes, it has started doing so. Wait until we get + EOF from the hook. */ + result.startTime = time(nullptr); // inexact + state = &DerivationGoal::buildDone; + started(); + return; + case rpPostpone: + /* Not now; wait until at least one child finishes or + the wake-up timeout expires. */ + worker.waitForAWhile(shared_from_this()); + outputLocks.unlock(); + return; + case rpDecline: + /* We should do it ourselves. */ + break; + } + } + + /* Make sure that we are allowed to start a build. If this + derivation prefers to be done locally, do it even if + maxBuildJobs is 0. */ + unsigned int curBuilds = worker.getNrLocalBuilds(); + if (curBuilds >= settings.maxBuildJobs && !(buildLocally && curBuilds == 0)) { + worker.waitForBuildSlot(shared_from_this()); + outputLocks.unlock(); + return; + } + + try { + /* Okay, we have to build. */ + startBuilder(); + + } catch (BuildError& e) { + LOG(ERROR) << e.msg(); + outputLocks.unlock(); + buildUser.reset(); + worker.permanentFailure = true; + done(BuildResult::InputRejected, e.msg()); + return; + } + + /* This state will be reached when we get EOF on the child's + log pipe. */ + state = &DerivationGoal::buildDone; + + started(); +} + +void replaceValidPath(const Path& storePath, const Path& tmpPath) { + /* We can't atomically replace storePath (the original) with + tmpPath (the replacement), so we have to move it out of the + way first. We'd better not be interrupted here, because if + we're repairing (say) Glibc, we end up with a broken system. */ + Path oldPath = + (format("%1%.old-%2%-%3%") % storePath % getpid() % random()).str(); + if (pathExists(storePath)) { + rename(storePath.c_str(), oldPath.c_str()); + } + if (rename(tmpPath.c_str(), storePath.c_str()) == -1) { + throw SysError(format("moving '%1%' to '%2%'") % tmpPath % storePath); + } + deletePath(oldPath); +} + +MakeError(NotDeterministic, BuildError) + + void DerivationGoal::buildDone() { + trace("build done"); + + /* Release the build user at the end of this function. We don't do + it right away because we don't want another build grabbing this + uid and then messing around with our output. */ + Finally releaseBuildUser([&]() { buildUser.reset(); }); + + /* Since we got an EOF on the logger pipe, the builder is presumed + to have terminated. In fact, the builder could also have + simply have closed its end of the pipe, so just to be sure, + kill it. */ + int status = hook ? hook->pid.kill() : pid.kill(); + + DLOG(INFO) << "builder process for '" << drvPath << "' finished"; + + result.timesBuilt++; + result.stopTime = time(nullptr); + + /* So the child is gone now. */ + worker.childTerminated(this); + + /* Close the read side of the logger pipe. */ + if (hook) { + hook->builderOut.readSide = -1; + hook->fromHook.readSide = -1; + } else { + builderOut.readSide = -1; + } + + /* Close the log file. */ + closeLogFile(); + + /* When running under a build user, make sure that all processes + running under that uid are gone. This is to prevent a + malicious user from leaving behind a process that keeps files + open and modifies them after they have been chown'ed to + root. */ + if (buildUser) { + buildUser->kill(); + } + + bool diskFull = false; + + try { + /* Check the exit status. */ + if (!statusOk(status)) { + /* Heuristically check whether the build failure may have + been caused by a disk full condition. We have no way + of knowing whether the build actually got an ENOSPC. + So instead, check if the disk is (nearly) full now. If + so, we don't mark this build as a permanent failure. */ +#if HAVE_STATVFS + unsigned long long required = + 8ULL * 1024 * 1024; // FIXME: make configurable + struct statvfs st; + if (statvfs(worker.store.realStoreDir.c_str(), &st) == 0 && + (unsigned long long)st.f_bavail * st.f_bsize < required) { + diskFull = true; + } + if (statvfs(tmpDir.c_str(), &st) == 0 && + (unsigned long long)st.f_bavail * st.f_bsize < required) { + diskFull = true; + } +#endif + + deleteTmpDir(false); + + /* Move paths out of the chroot for easier debugging of + build failures. */ + if (useChroot && buildMode == bmNormal) { + for (auto& i : missingPaths) { + if (pathExists(chrootRootDir + i)) { + rename((chrootRootDir + i).c_str(), i.c_str()); + } + } + } + + std::string msg = + (format("builder for '%1%' %2%") % drvPath % statusToString(status)) + .str(); + + if (!settings.verboseBuild && !logTail.empty()) { + msg += (format("; last %d log lines:") % logTail.size()).str(); + for (auto& line : logTail) { + msg += "\n " + line; + } + } + + if (diskFull) { + msg += + "\nnote: build failure may have been caused by lack of free disk " + "space"; + } + + throw BuildError(msg); + } + + /* Compute the FS closure of the outputs and register them as + being valid. */ + registerOutputs(); + + if (settings.postBuildHook != "") { + LOG(INFO) << "running post-build-hook '" << settings.postBuildHook + << "' [" << drvPath << "]"; + auto outputPaths = drv->outputPaths(); + std::map<std::string, std::string> hookEnvironment = getEnv(); + + hookEnvironment.emplace("DRV_PATH", drvPath); + hookEnvironment.emplace("OUT_PATHS", + absl::StripTrailingAsciiWhitespace( + concatStringsSep(" ", outputPaths))); + + RunOptions opts(settings.postBuildHook, {}); + opts.environment = hookEnvironment; + + struct LogSink : Sink { + std::string currentLine; + + void operator()(const unsigned char* data, size_t len) override { + for (size_t i = 0; i < len; i++) { + auto c = data[i]; + + if (c == '\n') { + flushLine(); + } else { + currentLine += c; + } + } + } + + void flushLine() { + if (settings.verboseBuild) { + LOG(ERROR) << "post-build-hook: " << currentLine; + } + currentLine.clear(); + } + + ~LogSink() override { + if (!currentLine.empty()) { + currentLine += '\n'; + flushLine(); + } + } + }; + LogSink sink; + + opts.standardOut = &sink; + opts.mergeStderrToStdout = true; + runProgram2(opts); + } + + if (buildMode == bmCheck) { + done(BuildResult::Built); + return; + } + + /* Delete unused redirected outputs (when doing hash rewriting). */ + for (auto& i : redirectedOutputs) { + deletePath(i.second); + } + + /* Delete the chroot (if we were using one). */ + autoDelChroot.reset(); /* this runs the destructor */ + + deleteTmpDir(true); + + /* Repeat the build if necessary. */ + if (curRound++ < nrRounds) { + outputLocks.unlock(); + state = &DerivationGoal::tryToBuild; + worker.wakeUp(shared_from_this()); + return; + } + + /* It is now safe to delete the lock files, since all future + lockers will see that the output paths are valid; they will + not create new lock files with the same names as the old + (unlinked) lock files. */ + outputLocks.setDeletion(true); + outputLocks.unlock(); + + } catch (BuildError& e) { + LOG(ERROR) << e.msg(); + + outputLocks.unlock(); + + BuildResult::Status st = BuildResult::MiscFailure; + + if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) { + st = BuildResult::TimedOut; + + } else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) { + } + + else { + st = dynamic_cast<NotDeterministic*>(&e) != nullptr + ? BuildResult::NotDeterministic + : statusOk(status) + ? BuildResult::OutputRejected + : fixedOutput || diskFull ? BuildResult::TransientFailure + : BuildResult::PermanentFailure; + } + + done(st, e.msg()); + return; + } + + done(BuildResult::Built); +} + +HookReply DerivationGoal::tryBuildHook() { + if (!worker.tryBuildHook || !useDerivation) { + return rpDecline; + } + + if (!worker.hook) { + worker.hook = std::make_unique<HookInstance>(); + } + + try { + /* Send the request to the hook. */ + worker.hook->sink << "try" + << (worker.getNrLocalBuilds() < settings.maxBuildJobs ? 1 + : 0) + << drv->platform << drvPath + << parsedDrv->getRequiredSystemFeatures(); + worker.hook->sink.flush(); + + /* Read the first line of input, which should be a word indicating + whether the hook wishes to perform the build. */ + std::string reply; + while (true) { + std::string s = readLine(worker.hook->fromHook.readSide.get()); + if (std::string(s, 0, 2) == "# ") { + reply = std::string(s, 2); + break; + } + s += "\n"; + std::cerr << s; + } + + DLOG(INFO) << "hook reply is " << reply; + + if (reply == "decline") { + return rpDecline; + } + if (reply == "decline-permanently") { + worker.tryBuildHook = false; + worker.hook = nullptr; + return rpDecline; + } else if (reply == "postpone") { + return rpPostpone; + } else if (reply != "accept") { + throw Error(format("bad hook reply '%1%'") % reply); + } + } catch (SysError& e) { + if (e.errNo == EPIPE) { + LOG(ERROR) << "build hook died unexpectedly: " + << absl::StripTrailingAsciiWhitespace( + drainFD(worker.hook->fromHook.readSide.get())); + worker.hook = nullptr; + return rpDecline; + } + throw; + } + + hook = std::move(worker.hook); + + machineName = readLine(hook->fromHook.readSide.get()); + + /* Tell the hook all the inputs that have to be copied to the + remote system. */ + hook->sink << inputPaths; + + /* Tell the hooks the missing outputs that have to be copied back + from the remote system. */ + hook->sink << missingPaths; + + hook->sink = FdSink(); + hook->toHook.writeSide = -1; + + /* Create the log file and pipe. */ + Path logFile = openLogFile(); + + std::set<int> fds; + fds.insert(hook->fromHook.readSide.get()); + fds.insert(hook->builderOut.readSide.get()); + worker.childStarted(shared_from_this(), fds, false, false); + + return rpAccept; +} + +void chmod_(const Path& path, mode_t mode) { + if (chmod(path.c_str(), mode) == -1) { + throw SysError(format("setting permissions on '%1%'") % path); + } +} + +int childEntry(void* arg) { + ((DerivationGoal*)arg)->runChild(); + return 1; +} + +PathSet DerivationGoal::exportReferences(const PathSet& storePaths) { + PathSet paths; + + for (auto storePath : storePaths) { + /* Check that the store path is valid. */ + if (!worker.store.isInStore(storePath)) { + throw BuildError( + format("'exportReferencesGraph' contains a non-store path '%1%'") % + storePath); + } + + storePath = worker.store.toStorePath(storePath); + + if (inputPaths.count(storePath) == 0u) { + throw BuildError( + "cannot export references of path '%s' because it is not in the " + "input closure of the derivation", + storePath); + } + + worker.store.computeFSClosure(storePath, paths); + } + + /* If there are derivations in the graph, then include their + outputs as well. This is useful if you want to do things + like passing all build-time dependencies of some path to a + derivation that builds a NixOS DVD image. */ + PathSet paths2(paths); + + for (auto& j : paths2) { + if (isDerivation(j)) { + Derivation drv = worker.store.derivationFromPath(j); + for (auto& k : drv.outputs) { + worker.store.computeFSClosure(k.second.path, paths); + } + } + } + + return paths; +} + +static std::once_flag dns_resolve_flag; + +static void preloadNSS() { + /* builtin:fetchurl can trigger a DNS lookup, which with glibc can trigger a + dynamic library load of one of the glibc NSS libraries in a sandboxed + child, which will fail unless the library's already been loaded in the + parent. So we force a lookup of an invalid domain to force the NSS + machinery to + load its lookup libraries in the parent before any child gets a chance to. + */ + std::call_once(dns_resolve_flag, []() { + struct addrinfo* res = nullptr; + + if (getaddrinfo("this.pre-initializes.the.dns.resolvers.invalid.", "http", + nullptr, &res) != 0) { + if (res != nullptr) { + freeaddrinfo(res); + } + } + }); +} + +void DerivationGoal::startBuilder() { + /* Right platform? */ + if (!parsedDrv->canBuildLocally()) { + throw Error( + "a '%s' with features {%s} is required to build '%s', but I am a '%s' " + "with features {%s}", + drv->platform, + concatStringsSep(", ", parsedDrv->getRequiredSystemFeatures()), drvPath, + settings.thisSystem, concatStringsSep(", ", settings.systemFeatures)); + } + + if (drv->isBuiltin()) { + preloadNSS(); + } + + /* Are we doing a chroot build? */ + { + auto noChroot = parsedDrv->getBoolAttr("__noChroot"); + if (settings.sandboxMode == smEnabled) { + if (noChroot) { + throw Error(format("derivation '%1%' has '__noChroot' set, " + "but that's not allowed when 'sandbox' is 'true'") % + drvPath); + } + useChroot = true; + } else if (settings.sandboxMode == smDisabled) { + useChroot = false; + } else if (settings.sandboxMode == smRelaxed) { + useChroot = !fixedOutput && !noChroot; + } + } + + if (worker.store.storeDir != worker.store.realStoreDir) { + useChroot = true; + } + + /* If `build-users-group' is not empty, then we have to build as + one of the members of that group. */ + if (settings.buildUsersGroup != "" && getuid() == 0) { + buildUser = std::make_unique<UserLock>(); + + /* Make sure that no other processes are executing under this + uid. */ + buildUser->kill(); + } + + /* Create a temporary directory where the build will take + place. */ + auto drvName = storePathToName(drvPath); + tmpDir = createTempDir("", "nix-build-" + drvName, false, false, 0700); + + chownToBuilder(tmpDir); + + /* Substitute output placeholders with the actual output paths. */ + for (auto& output : drv->outputs) { + inputRewrites[hashPlaceholder(output.first)] = output.second.path; + } + + /* Construct the environment passed to the builder. */ + initEnv(); + + writeStructuredAttrs(); + + /* Handle exportReferencesGraph(), if set. */ + if (!parsedDrv->getStructuredAttrs()) { + /* The `exportReferencesGraph' feature allows the references graph + to be passed to a builder. This attribute should be a list of + pairs [name1 path1 name2 path2 ...]. The references graph of + each `pathN' will be stored in a text file `nameN' in the + temporary build directory. The text files have the format used + by `nix-store --register-validity'. However, the deriver + fields are left empty. */ + std::string s = get(drv->env, "exportReferencesGraph"); + std::vector<std::string> ss = absl::StrSplit(s, absl::ByAnyChar(" \t\n\r")); + if (ss.size() % 2 != 0) { + throw BuildError( + format("odd number of tokens in 'exportReferencesGraph': '%1%'") % s); + } + for (auto i = ss.begin(); i != ss.end();) { + std::string fileName = *i++; + checkStoreName(fileName); /* !!! abuse of this function */ + Path storePath = *i++; + + /* Write closure info to <fileName>. */ + writeFile(tmpDir + "/" + fileName, + worker.store.makeValidityRegistration( + exportReferences({storePath}), false, false)); + } + } + + if (useChroot) { + /* Allow a user-configurable set of directories from the + host file system. */ + PathSet dirs = settings.sandboxPaths; + PathSet dirs2 = settings.extraSandboxPaths; + dirs.insert(dirs2.begin(), dirs2.end()); + + dirsInChroot.clear(); + + for (auto i : dirs) { + if (i.empty()) { + continue; + } + bool optional = false; + if (i[i.size() - 1] == '?') { + optional = true; + i.pop_back(); + } + size_t p = i.find('='); + if (p == std::string::npos) { + dirsInChroot[i] = ChrootPath(i, optional); + } else { + dirsInChroot[std::string(i, 0, p)] = + ChrootPath(std::string(i, p + 1), optional); + } + } + dirsInChroot[tmpDirInSandbox] = ChrootPath(tmpDir); + + /* Add the closure of store paths to the chroot. */ + PathSet closure; + for (auto& i : dirsInChroot) { + try { + if (worker.store.isInStore(i.second.source)) { + worker.store.computeFSClosure( + worker.store.toStorePath(i.second.source), closure); + } + } catch (InvalidPath& e) { + } catch (Error& e) { + throw Error(format("while processing 'sandbox-paths': %s") % e.what()); + } + } + for (auto& i : closure) { + dirsInChroot[i] = ChrootPath(i); + } + + PathSet allowedPaths = settings.allowedImpureHostPrefixes; + + /* This works like the above, except on a per-derivation level */ + auto impurePaths = + parsedDrv->getStringsAttr("__impureHostDeps").value_or(Strings()); + + for (auto& i : impurePaths) { + bool found = false; + /* Note: we're not resolving symlinks here to prevent + giving a non-root user info about inaccessible + files. */ + Path canonI = canonPath(i); + /* If only we had a trie to do this more efficiently :) luckily, these are + * generally going to be pretty small */ + for (auto& a : allowedPaths) { + Path canonA = canonPath(a); + if (canonI == canonA || isInDir(canonI, canonA)) { + found = true; + break; + } + } + if (!found) { + throw Error(format("derivation '%1%' requested impure path '%2%', but " + "it was not in allowed-impure-host-deps") % + drvPath % i); + } + + dirsInChroot[i] = ChrootPath(i); + } + + /* Create a temporary directory in which we set up the chroot + environment using bind-mounts. We put it in the Nix store + to ensure that we can create hard-links to non-directory + inputs in the fake Nix store in the chroot (see below). */ + chrootRootDir = worker.store.toRealPath(drvPath) + ".chroot"; + deletePath(chrootRootDir); + + /* Clean up the chroot directory automatically. */ + autoDelChroot = std::make_shared<AutoDelete>(chrootRootDir); + + DLOG(INFO) << "setting up chroot environment in '" << chrootRootDir << "'"; + + if (mkdir(chrootRootDir.c_str(), 0750) == -1) { + throw SysError(format("cannot create '%1%'") % chrootRootDir); + } + + if (buildUser && + chown(chrootRootDir.c_str(), 0, buildUser->getGID()) == -1) { + throw SysError(format("cannot change ownership of '%1%'") % + chrootRootDir); + } + + /* Create a writable /tmp in the chroot. Many builders need + this. (Of course they should really respect $TMPDIR + instead.) */ + Path chrootTmpDir = chrootRootDir + "/tmp"; + createDirs(chrootTmpDir); + chmod_(chrootTmpDir, 01777); + + /* Create a /etc/passwd with entries for the build user and the + nobody account. The latter is kind of a hack to support + Samba-in-QEMU. */ + createDirs(chrootRootDir + "/etc"); + + writeFile(chrootRootDir + "/etc/passwd", + fmt("root:x:0:0:Nix build user:%3%:/noshell\n" + "nixbld:x:%1%:%2%:Nix build user:%3%:/noshell\n" + "nobody:x:65534:65534:Nobody:/:/noshell\n", + sandboxUid, sandboxGid, settings.sandboxBuildDir)); + + /* Declare the build user's group so that programs get a consistent + view of the system (e.g., "id -gn"). */ + writeFile(chrootRootDir + "/etc/group", (format("root:x:0:\n" + "nixbld:!:%1%:\n" + "nogroup:x:65534:\n") % + sandboxGid) + .str()); + + /* Create /etc/hosts with localhost entry. */ + if (!fixedOutput) { + writeFile(chrootRootDir + "/etc/hosts", + "127.0.0.1 localhost\n::1 localhost\n"); + } + + /* Make the closure of the inputs available in the chroot, + rather than the whole Nix store. This prevents any access + to undeclared dependencies. Directories are bind-mounted, + while other inputs are hard-linked (since only directories + can be bind-mounted). !!! As an extra security + precaution, make the fake Nix store only writable by the + build user. */ + Path chrootStoreDir = chrootRootDir + worker.store.storeDir; + createDirs(chrootStoreDir); + chmod_(chrootStoreDir, 01775); + + if (buildUser && + chown(chrootStoreDir.c_str(), 0, buildUser->getGID()) == -1) { + throw SysError(format("cannot change ownership of '%1%'") % + chrootStoreDir); + } + + for (auto& i : inputPaths) { + Path r = worker.store.toRealPath(i); + struct stat st; + if (lstat(r.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % i); + } + if (S_ISDIR(st.st_mode)) { + dirsInChroot[i] = ChrootPath(r); + } else { + Path p = chrootRootDir + i; + DLOG(INFO) << "linking '" << p << "' to '" << r << "'"; + if (link(r.c_str(), p.c_str()) == -1) { + /* Hard-linking fails if we exceed the maximum + link count on a file (e.g. 32000 of ext3), + which is quite possible after a `nix-store + --optimise'. */ + if (errno != EMLINK) { + throw SysError(format("linking '%1%' to '%2%'") % p % i); + } + StringSink sink; + dumpPath(r, sink); + StringSource source(*sink.s); + restorePath(p, source); + } + } + } + + /* If we're repairing, checking or rebuilding part of a + multiple-outputs derivation, it's possible that we're + rebuilding a path that is in settings.dirsInChroot + (typically the dependencies of /bin/sh). Throw them + out. */ + for (auto& i : drv->outputs) { + dirsInChroot.erase(i.second.path); + } + } + + if (needsHashRewrite()) { + if (pathExists(homeDir)) { + throw Error(format("directory '%1%' exists; please remove it") % homeDir); + } + + /* We're not doing a chroot build, but we have some valid + output paths. Since we can't just overwrite or delete + them, we have to do hash rewriting: i.e. in the + environment/arguments passed to the build, we replace the + hashes of the valid outputs with unique dummy strings; + after the build, we discard the redirected outputs + corresponding to the valid outputs, and rewrite the + contents of the new outputs to replace the dummy strings + with the actual hashes. */ + if (!validPaths.empty()) { + for (auto& i : validPaths) { + addHashRewrite(i); + } + } + + /* If we're repairing, then we don't want to delete the + corrupt outputs in advance. So rewrite them as well. */ + if (buildMode == bmRepair) { + for (auto& i : missingPaths) { + if (worker.store.isValidPath(i) && pathExists(i)) { + addHashRewrite(i); + redirectedBadOutputs.insert(i); + } + } + } + } + + if (useChroot && settings.preBuildHook != "" && + (dynamic_cast<Derivation*>(drv.get()) != nullptr)) { + DLOG(INFO) << "executing pre-build hook '" << settings.preBuildHook << "'"; + auto args = + useChroot ? Strings({drvPath, chrootRootDir}) : Strings({drvPath}); + enum BuildHookState { stBegin, stExtraChrootDirs }; + auto state = stBegin; + auto lines = runProgram(settings.preBuildHook, false, args); + auto lastPos = std::string::size_type{0}; + for (auto nlPos = lines.find('\n'); nlPos != std::string::npos; + nlPos = lines.find('\n', lastPos)) { + auto line = std::string{lines, lastPos, nlPos - lastPos}; + lastPos = nlPos + 1; + if (state == stBegin) { + if (line == "extra-sandbox-paths" || line == "extra-chroot-dirs") { + state = stExtraChrootDirs; + } else { + throw Error(format("unknown pre-build hook command '%1%'") % line); + } + } else if (state == stExtraChrootDirs) { + if (line.empty()) { + state = stBegin; + } else { + auto p = line.find('='); + if (p == std::string::npos) { + dirsInChroot[line] = ChrootPath(line); + } else { + dirsInChroot[std::string(line, 0, p)] = + ChrootPath(std::string(line, p + 1)); + } + } + } + } + } + + /* Run the builder. */ + DLOG(INFO) << "executing builder '" << drv->builder << "'"; + + /* Create the log file. */ + Path logFile = openLogFile(); + + /* Create a pipe to get the output of the builder. */ + // builderOut.create(); + + builderOut.readSide = posix_openpt(O_RDWR | O_NOCTTY); + if (!builderOut.readSide) { + throw SysError("opening pseudoterminal master"); + } + + std::string slaveName(ptsname(builderOut.readSide.get())); + + if (buildUser) { + if (chmod(slaveName.c_str(), 0600) != 0) { + throw SysError("changing mode of pseudoterminal slave"); + } + + if (chown(slaveName.c_str(), buildUser->getUID(), 0) != 0) { + throw SysError("changing owner of pseudoterminal slave"); + } + } else { + if (grantpt(builderOut.readSide.get()) != 0) { + throw SysError("granting access to pseudoterminal slave"); + } + } + +#if 0 + // Mount the pt in the sandbox so that the "tty" command works. + // FIXME: this doesn't work with the new devpts in the sandbox. + if (useChroot) + dirsInChroot[slaveName] = {slaveName, false}; +#endif + + if (unlockpt(builderOut.readSide.get()) != 0) { + throw SysError("unlocking pseudoterminal"); + } + + builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut.writeSide) { + throw SysError("opening pseudoterminal slave"); + } + + // Put the pt into raw mode to prevent \n -> \r\n translation. + struct termios term; + if (tcgetattr(builderOut.writeSide.get(), &term) != 0) { + throw SysError("getting pseudoterminal attributes"); + } + + cfmakeraw(&term); + + if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term) != 0) { + throw SysError("putting pseudoterminal into raw mode"); + } + + result.startTime = time(nullptr); + + /* Fork a child to build the package. */ + ProcessOptions options; + +#if __linux__ + if (useChroot) { + /* Set up private namespaces for the build: + + - The PID namespace causes the build to start as PID 1. + Processes outside of the chroot are not visible to those + on the inside, but processes inside the chroot are + visible from the outside (though with different PIDs). + + - The private mount namespace ensures that all the bind + mounts we do will only show up in this process and its + children, and will disappear automatically when we're + done. + + - The private network namespace ensures that the builder + cannot talk to the outside world (or vice versa). It + only has a private loopback interface. (Fixed-output + derivations are not run in a private network namespace + to allow functions like fetchurl to work.) + + - The IPC namespace prevents the builder from communicating + with outside processes using SysV IPC mechanisms (shared + memory, message queues, semaphores). It also ensures + that all IPC objects are destroyed when the builder + exits. + + - The UTS namespace ensures that builders see a hostname of + localhost rather than the actual hostname. + + We use a helper process to do the clone() to work around + clone() being broken in multi-threaded programs due to + at-fork handlers not being run. Note that we use + CLONE_PARENT to ensure that the real builder is parented to + us. + */ + + if (!fixedOutput) { + privateNetwork = true; + } + + userNamespaceSync.create(); + + options.allowVfork = false; + + Pid helper = startProcess( + [&]() { + /* Drop additional groups here because we can't do it + after we've created the new user namespace. FIXME: + this means that if we're not root in the parent + namespace, we can't drop additional groups; they will + be mapped to nogroup in the child namespace. There does + not seem to be a workaround for this. (But who can tell + from reading user_namespaces(7)?) + See also https://lwn.net/Articles/621612/. */ + if (getuid() == 0 && setgroups(0, nullptr) == -1) { + throw SysError("setgroups failed"); + } + + size_t stackSize = 1 * 1024 * 1024; + char* stack = + (char*)mmap(nullptr, stackSize, PROT_WRITE | PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (stack == MAP_FAILED) { + throw SysError("allocating stack"); + } + + int flags = CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS | + CLONE_NEWIPC | CLONE_NEWUTS | CLONE_PARENT | SIGCHLD; + if (privateNetwork) { + flags |= CLONE_NEWNET; + } + + pid_t child = clone(childEntry, stack + stackSize, flags, this); + if (child == -1 && errno == EINVAL) { + /* Fallback for Linux < 2.13 where CLONE_NEWPID and + CLONE_PARENT are not allowed together. */ + flags &= ~CLONE_NEWPID; + child = clone(childEntry, stack + stackSize, flags, this); + } + if (child == -1 && (errno == EPERM || errno == EINVAL)) { + /* Some distros patch Linux to not allow unpriveleged + * user namespaces. If we get EPERM or EINVAL, try + * without CLONE_NEWUSER and see if that works. + */ + flags &= ~CLONE_NEWUSER; + child = clone(childEntry, stack + stackSize, flags, this); + } + /* Otherwise exit with EPERM so we can handle this in the + parent. This is only done when sandbox-fallback is set + to true (the default). */ + if (child == -1 && (errno == EPERM || errno == EINVAL) && + settings.sandboxFallback) { + _exit(1); + } + if (child == -1) { + throw SysError("cloning builder process"); + } + + writeFull(builderOut.writeSide.get(), std::to_string(child) + "\n"); + _exit(0); + }, + options); + + int res = helper.wait(); + if (res != 0 && settings.sandboxFallback) { + useChroot = false; + initTmpDir(); + goto fallback; + } else if (res != 0) { + throw Error("unable to start build process"); + } + + userNamespaceSync.readSide = -1; + + pid_t tmp; + if (!absl::SimpleAtoi(readLine(builderOut.readSide.get()), &tmp)) { + abort(); + } + pid = tmp; + + /* Set the UID/GID mapping of the builder's user namespace + such that the sandbox user maps to the build user, or to + the calling user (if build users are disabled). */ + uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); + uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); + + writeFile("/proc/" + std::to_string(pid) + "/uid_map", + (format("%d %d 1") % sandboxUid % hostUid).str()); + + writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); + + writeFile("/proc/" + std::to_string(pid) + "/gid_map", + (format("%d %d 1") % sandboxGid % hostGid).str()); + + /* Signal the builder that we've updated its user + namespace. */ + writeFull(userNamespaceSync.writeSide.get(), "1"); + userNamespaceSync.writeSide = -1; + + } else +#endif + { + fallback: + options.allowVfork = !buildUser && !drv->isBuiltin(); + pid = startProcess([&]() { runChild(); }, options); + } + + /* parent */ + pid.setSeparatePG(true); + builderOut.writeSide = -1; + worker.childStarted(shared_from_this(), {builderOut.readSide.get()}, true, + true); + + /* Check if setting up the build environment failed. */ + while (true) { + std::string msg = readLine(builderOut.readSide.get()); + if (std::string(msg, 0, 1) == "\1") { + if (msg.size() == 1) { + break; + } + throw Error(std::string(msg, 1)); + } + DLOG(INFO) << msg; + } +} + +void DerivationGoal::initTmpDir() { + /* In a sandbox, for determinism, always use the same temporary + directory. */ +#if __linux__ + tmpDirInSandbox = useChroot ? settings.sandboxBuildDir : tmpDir; +#else + tmpDirInSandbox = tmpDir; +#endif + + /* In non-structured mode, add all bindings specified in the + derivation via the environment, except those listed in the + passAsFile attribute. Those are passed as file names pointing + to temporary files containing the contents. Note that + passAsFile is ignored in structure mode because it's not + needed (attributes are not passed through the environment, so + there is no size constraint). */ + if (!parsedDrv->getStructuredAttrs()) { + std::set<std::string> passAsFile = + absl::StrSplit(get(drv->env, "passAsFile"), absl::ByAnyChar(" \t\n\r")); + int fileNr = 0; + for (auto& i : drv->env) { + if (passAsFile.find(i.first) == passAsFile.end()) { + env[i.first] = i.second; + } else { + std::string fn = ".attr-" + std::to_string(fileNr++); + Path p = tmpDir + "/" + fn; + writeFile(p, rewriteStrings(i.second, inputRewrites)); + chownToBuilder(p); + env[i.first + "Path"] = tmpDirInSandbox + "/" + fn; + } + } + } + + /* For convenience, set an environment pointing to the top build + directory. */ + env["NIX_BUILD_TOP"] = tmpDirInSandbox; + + /* Also set TMPDIR and variants to point to this directory. */ + env["TMPDIR"] = env["TEMPDIR"] = env["TMP"] = env["TEMP"] = tmpDirInSandbox; + + /* Explicitly set PWD to prevent problems with chroot builds. In + particular, dietlibc cannot figure out the cwd because the + inode of the current directory doesn't appear in .. (because + getdents returns the inode of the mount point). */ + env["PWD"] = tmpDirInSandbox; +} + +void DerivationGoal::initEnv() { + env.clear(); + + /* Most shells initialise PATH to some default (/bin:/usr/bin:...) when + PATH is not set. We don't want this, so we fill it in with some dummy + value. */ + env["PATH"] = "/path-not-set"; + + /* Set HOME to a non-existing path to prevent certain programs from using + /etc/passwd (or NIS, or whatever) to locate the home directory (for + example, wget looks for ~/.wgetrc). I.e., these tools use /etc/passwd + if HOME is not set, but they will just assume that the settings file + they are looking for does not exist if HOME is set but points to some + non-existing path. */ + env["HOME"] = homeDir; + + /* Tell the builder where the Nix store is. Usually they + shouldn't care, but this is useful for purity checking (e.g., + the compiler or linker might only want to accept paths to files + in the store or in the build directory). */ + env["NIX_STORE"] = worker.store.storeDir; + + /* The maximum number of cores to utilize for parallel building. */ + env["NIX_BUILD_CORES"] = (format("%d") % settings.buildCores).str(); + + initTmpDir(); + + /* Compatibility hack with Nix <= 0.7: if this is a fixed-output + derivation, tell the builder, so that for instance `fetchurl' + can skip checking the output. On older Nixes, this environment + variable won't be set, so `fetchurl' will do the check. */ + if (fixedOutput) { + env["NIX_OUTPUT_CHECKED"] = "1"; + } + + /* *Only* if this is a fixed-output derivation, propagate the + values of the environment variables specified in the + `impureEnvVars' attribute to the builder. This allows for + instance environment variables for proxy configuration such as + `http_proxy' to be easily passed to downloaders like + `fetchurl'. Passing such environment variables from the caller + to the builder is generally impure, but the output of + fixed-output derivations is by definition pure (since we + already know the cryptographic hash of the output). */ + if (fixedOutput) { + for (auto& i : + parsedDrv->getStringsAttr("impureEnvVars").value_or(Strings())) { + env[i] = getEnv(i); + } + } + + /* Currently structured log messages piggyback on stderr, but we + may change that in the future. So tell the builder which file + descriptor to use for that. */ + env["NIX_LOG_FD"] = "2"; + + /* Trigger colored output in various tools. */ + env["TERM"] = "xterm-256color"; +} + +static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*"); + +void DerivationGoal::writeStructuredAttrs() { + auto& structuredAttrs = parsedDrv->getStructuredAttrs(); + if (!structuredAttrs) { + return; + } + + auto json = *structuredAttrs; + + /* Add an "outputs" object containing the output paths. */ + nlohmann::json outputs; + for (auto& i : drv->outputs) { + outputs[i.first] = rewriteStrings(i.second.path, inputRewrites); + } + json["outputs"] = outputs; + + /* Handle exportReferencesGraph. */ + auto e = json.find("exportReferencesGraph"); + if (e != json.end() && e->is_object()) { + for (auto i = e->begin(); i != e->end(); ++i) { + std::ostringstream str; + { + JSONPlaceholder jsonRoot(str, true); + PathSet storePaths; + for (auto& p : *i) { + storePaths.insert(p.get<std::string>()); + } + worker.store.pathInfoToJSON(jsonRoot, exportReferences(storePaths), + false, true); + } + json[i.key()] = nlohmann::json::parse(str.str()); // urgh + } + } + + writeFile(tmpDir + "/.attrs.json", + rewriteStrings(json.dump(), inputRewrites)); + chownToBuilder(tmpDir + "/.attrs.json"); + + /* As a convenience to bash scripts, write a shell file that + maps all attributes that are representable in bash - + namely, strings, integers, nulls, Booleans, and arrays and + objects consisting entirely of those values. (So nested + arrays or objects are not supported.) */ + + auto handleSimpleType = + [](const nlohmann::json& value) -> std::optional<std::string> { + if (value.is_string()) { + return shellEscape(value); + } + + if (value.is_number()) { + auto f = value.get<float>(); + if (std::ceil(f) == f) { + return std::to_string(value.get<int>()); + } + } + + if (value.is_null()) { + return std::string("''"); + } + + if (value.is_boolean()) { + return value.get<bool>() ? std::string("1") : std::string(""); + } + + return {}; + }; + + std::string jsonSh; + + for (auto i = json.begin(); i != json.end(); ++i) { + if (!std::regex_match(i.key(), shVarName)) { + continue; + } + + auto& value = i.value(); + + auto s = handleSimpleType(value); + if (s) { + jsonSh += fmt("declare %s=%s\n", i.key(), *s); + + } else if (value.is_array()) { + std::string s2; + bool good = true; + + for (auto i = value.begin(); i != value.end(); ++i) { + auto s3 = handleSimpleType(i.value()); + if (!s3) { + good = false; + break; + } + s2 += *s3; + s2 += ' '; + } + + if (good) { + jsonSh += fmt("declare -a %s=(%s)\n", i.key(), s2); + } + } + + else if (value.is_object()) { + std::string s2; + bool good = true; + + for (auto i = value.begin(); i != value.end(); ++i) { + auto s3 = handleSimpleType(i.value()); + if (!s3) { + good = false; + break; + } + s2 += fmt("[%s]=%s ", shellEscape(i.key()), *s3); + } + + if (good) { + jsonSh += fmt("declare -A %s=(%s)\n", i.key(), s2); + } + } + } + + writeFile(tmpDir + "/.attrs.sh", rewriteStrings(jsonSh, inputRewrites)); + chownToBuilder(tmpDir + "/.attrs.sh"); +} + +void DerivationGoal::chownToBuilder(const Path& path) { + if (!buildUser) { + return; + } + if (chown(path.c_str(), buildUser->getUID(), buildUser->getGID()) == -1) { + throw SysError(format("cannot change ownership of '%1%'") % path); + } +} + +void setupSeccomp() { +#if __linux__ + if (!settings.filterSyscalls) { + return; + } +#if HAVE_SECCOMP + scmp_filter_ctx ctx; + + if ((ctx = seccomp_init(SCMP_ACT_ALLOW)) == nullptr) { + throw SysError("unable to initialize seccomp mode 2"); + } + + Finally cleanup([&]() { seccomp_release(ctx); }); + + if (nativeSystem == "x86_64-linux" && + seccomp_arch_add(ctx, SCMP_ARCH_X86) != 0) { + throw SysError("unable to add 32-bit seccomp architecture"); + } + + if (nativeSystem == "x86_64-linux" && + seccomp_arch_add(ctx, SCMP_ARCH_X32) != 0) { + throw SysError("unable to add X32 seccomp architecture"); + } + + if (nativeSystem == "aarch64-linux" && + seccomp_arch_add(ctx, SCMP_ARCH_ARM) != 0) { + LOG(ERROR) << "unable to add ARM seccomp architecture; this may result in " + << "spurious build failures if running 32-bit ARM processes"; + } + + /* Prevent builders from creating setuid/setgid binaries. */ + for (int perm : {S_ISUID, S_ISGID}) { + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t)perm, + (scmp_datum_t)perm)) != 0) { + throw SysError("unable to add seccomp rule"); + } + + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t)perm, + (scmp_datum_t)perm)) != 0) { + throw SysError("unable to add seccomp rule"); + } + + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t)perm, + (scmp_datum_t)perm)) != 0) { + throw SysError("unable to add seccomp rule"); + } + } + + /* Prevent builders from creating EAs or ACLs. Not all filesystems + support these, and they're not allowed in the Nix store because + they're not representable in the NAR serialisation. */ + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != + 0 || + seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != + 0 || + seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != + 0) { + throw SysError("unable to add seccomp rule"); + } + + if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, + settings.allowNewPrivileges ? 0 : 1) != 0) { + throw SysError("unable to set 'no new privileges' seccomp attribute"); + } + + if (seccomp_load(ctx) != 0) { + throw SysError("unable to load seccomp BPF program"); + } +#else + throw Error( + "seccomp is not supported on this platform; " + "you can bypass this error by setting the option 'filter-syscalls' to " + "false, but note that untrusted builds can then create setuid binaries!"); +#endif +#endif +} + +void DerivationGoal::runChild() { + /* Warning: in the child we should absolutely not make any SQLite + calls! */ + + try { /* child */ + + commonChildInit(builderOut); + + try { + setupSeccomp(); + } catch (...) { + if (buildUser) { + throw; + } + } + + bool setUser = true; + + /* Make the contents of netrc available to builtin:fetchurl + (which may run under a different uid and/or in a sandbox). */ + std::string netrcData; + try { + if (drv->isBuiltin() && drv->builder == "builtin:fetchurl") { + const std::string& netrc_file = settings.netrcFile; + netrcData = readFile(netrc_file); + } + } catch (SysError&) { + } + +#if __linux__ + if (useChroot) { + userNamespaceSync.writeSide = -1; + + if (drainFD(userNamespaceSync.readSide.get()) != "1") { + throw Error("user namespace initialisation failed"); + } + + userNamespaceSync.readSide = -1; + + if (privateNetwork) { + /* Initialise the loopback interface. */ + AutoCloseFD fd(socket(PF_INET, SOCK_DGRAM, IPPROTO_IP)); + if (!fd) { + throw SysError("cannot open IP socket"); + } + + struct ifreq ifr; + strcpy(ifr.ifr_name, "lo"); + ifr.ifr_flags = IFF_UP | IFF_LOOPBACK | IFF_RUNNING; + if (ioctl(fd.get(), SIOCSIFFLAGS, &ifr) == -1) { + throw SysError("cannot set loopback interface flags"); + } + } + + /* Set the hostname etc. to fixed values. */ + char hostname[] = "localhost"; + if (sethostname(hostname, sizeof(hostname)) == -1) { + throw SysError("cannot set host name"); + } + char domainname[] = "(none)"; // kernel default + if (setdomainname(domainname, sizeof(domainname)) == -1) { + throw SysError("cannot set domain name"); + } + + /* Make all filesystems private. This is necessary + because subtrees may have been mounted as "shared" + (MS_SHARED). (Systemd does this, for instance.) Even + though we have a private mount namespace, mounting + filesystems on top of a shared subtree still propagates + outside of the namespace. Making a subtree private is + local to the namespace, though, so setting MS_PRIVATE + does not affect the outside world. */ + if (mount(nullptr, "/", nullptr, MS_REC | MS_PRIVATE, nullptr) == -1) { + throw SysError("unable to make '/' private mount"); + } + + /* Bind-mount chroot directory to itself, to treat it as a + different filesystem from /, as needed for pivot_root. */ + if (mount(chrootRootDir.c_str(), chrootRootDir.c_str(), nullptr, MS_BIND, + nullptr) == -1) { + throw SysError(format("unable to bind mount '%1%'") % chrootRootDir); + } + + /* Set up a nearly empty /dev, unless the user asked to + bind-mount the host /dev. */ + Strings ss; + if (dirsInChroot.find("/dev") == dirsInChroot.end()) { + createDirs(chrootRootDir + "/dev/shm"); + createDirs(chrootRootDir + "/dev/pts"); + ss.push_back("/dev/full"); + if ((settings.systemFeatures.get().count("kvm") != 0u) && + pathExists("/dev/kvm")) { + ss.push_back("/dev/kvm"); + } + ss.push_back("/dev/null"); + ss.push_back("/dev/random"); + ss.push_back("/dev/tty"); + ss.push_back("/dev/urandom"); + ss.push_back("/dev/zero"); + createSymlink("/proc/self/fd", chrootRootDir + "/dev/fd"); + createSymlink("/proc/self/fd/0", chrootRootDir + "/dev/stdin"); + createSymlink("/proc/self/fd/1", chrootRootDir + "/dev/stdout"); + createSymlink("/proc/self/fd/2", chrootRootDir + "/dev/stderr"); + } + + /* Fixed-output derivations typically need to access the + network, so give them access to /etc/resolv.conf and so + on. */ + if (fixedOutput) { + ss.push_back("/etc/resolv.conf"); + + // Only use nss functions to resolve hosts and + // services. Don’t use it for anything else that may + // be configured for this system. This limits the + // potential impurities introduced in fixed outputs. + writeFile(chrootRootDir + "/etc/nsswitch.conf", + "hosts: files dns\nservices: files\n"); + + ss.push_back("/etc/services"); + ss.push_back("/etc/hosts"); + if (pathExists("/var/run/nscd/socket")) { + ss.push_back("/var/run/nscd/socket"); + } + } + + for (auto& i : ss) { + dirsInChroot.emplace(i, i); + } + + /* Bind-mount all the directories from the "host" + filesystem that we want in the chroot + environment. */ + auto doBind = [&](const Path& source, const Path& target, + bool optional = false) { + DLOG(INFO) << "bind mounting '" << source << "' to '" << target << "'"; + struct stat st; + if (stat(source.c_str(), &st) == -1) { + if (optional && errno == ENOENT) { + return; + } + throw SysError("getting attributes of path '%1%'", source); + } + if (S_ISDIR(st.st_mode)) { + createDirs(target); + } else { + createDirs(dirOf(target)); + writeFile(target, ""); + } + if (mount(source.c_str(), target.c_str(), "", MS_BIND | MS_REC, + nullptr) == -1) { + throw SysError("bind mount from '%1%' to '%2%' failed", source, + target); + } + }; + + for (auto& i : dirsInChroot) { + if (i.second.source == "/proc") { + continue; + } // backwards compatibility + doBind(i.second.source, chrootRootDir + i.first, i.second.optional); + } + + /* Bind a new instance of procfs on /proc. */ + createDirs(chrootRootDir + "/proc"); + if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, + nullptr) == -1) { + throw SysError("mounting /proc"); + } + + /* Mount a new tmpfs on /dev/shm to ensure that whatever + the builder puts in /dev/shm is cleaned up automatically. */ + if (pathExists("/dev/shm") && + mount("none", (chrootRootDir + "/dev/shm").c_str(), "tmpfs", 0, + fmt("size=%s", settings.sandboxShmSize).c_str()) == -1) { + throw SysError("mounting /dev/shm"); + } + + /* Mount a new devpts on /dev/pts. Note that this + requires the kernel to be compiled with + CONFIG_DEVPTS_MULTIPLE_INSTANCES=y (which is the case + if /dev/ptx/ptmx exists). */ + if (pathExists("/dev/pts/ptmx") && + !pathExists(chrootRootDir + "/dev/ptmx") && + (dirsInChroot.count("/dev/pts") == 0u)) { + if (mount("none", (chrootRootDir + "/dev/pts").c_str(), "devpts", 0, + "newinstance,mode=0620") == 0) { + createSymlink("/dev/pts/ptmx", chrootRootDir + "/dev/ptmx"); + + /* Make sure /dev/pts/ptmx is world-writable. With some + Linux versions, it is created with permissions 0. */ + chmod_(chrootRootDir + "/dev/pts/ptmx", 0666); + } else { + if (errno != EINVAL) { + throw SysError("mounting /dev/pts"); + } + doBind("/dev/pts", chrootRootDir + "/dev/pts"); + doBind("/dev/ptmx", chrootRootDir + "/dev/ptmx"); + } + } + + /* Do the chroot(). */ + if (chdir(chrootRootDir.c_str()) == -1) { + throw SysError(format("cannot change directory to '%1%'") % + chrootRootDir); + } + + if (mkdir("real-root", 0) == -1) { + throw SysError("cannot create real-root directory"); + } + + if (pivot_root(".", "real-root") == -1) { + throw SysError(format("cannot pivot old root directory onto '%1%'") % + (chrootRootDir + "/real-root")); + } + + if (chroot(".") == -1) { + throw SysError(format("cannot change root directory to '%1%'") % + chrootRootDir); + } + + if (umount2("real-root", MNT_DETACH) == -1) { + throw SysError("cannot unmount real root filesystem"); + } + + if (rmdir("real-root") == -1) { + throw SysError("cannot remove real-root directory"); + } + + /* Switch to the sandbox uid/gid in the user namespace, + which corresponds to the build user or calling user in + the parent namespace. */ + if (setgid(sandboxGid) == -1) { + throw SysError("setgid failed"); + } + if (setuid(sandboxUid) == -1) { + throw SysError("setuid failed"); + } + + setUser = false; + } +#endif + + if (chdir(tmpDirInSandbox.c_str()) == -1) { + throw SysError(format("changing into '%1%'") % tmpDir); + } + + /* Close all other file descriptors. */ + closeMostFDs({STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO}); + +#if __linux__ + /* Change the personality to 32-bit if we're doing an + i686-linux build on an x86_64-linux machine. */ + struct utsname utsbuf; + uname(&utsbuf); + if (drv->platform == "i686-linux" && + (settings.thisSystem == "x86_64-linux" || + ((strcmp(utsbuf.sysname, "Linux") == 0) && + (strcmp(utsbuf.machine, "x86_64") == 0)))) { + if (personality(PER_LINUX32) == -1) { + throw SysError("cannot set i686-linux personality"); + } + } + + /* Impersonate a Linux 2.6 machine to get some determinism in + builds that depend on the kernel version. */ + if ((drv->platform == "i686-linux" || drv->platform == "x86_64-linux") && + settings.impersonateLinux26) { + int cur = personality(0xffffffff); + if (cur != -1) { + personality(cur | 0x0020000 /* == UNAME26 */); + } + } + + /* Disable address space randomization for improved + determinism. */ + int cur = personality(0xffffffff); + if (cur != -1) { + personality(cur | ADDR_NO_RANDOMIZE); + } +#endif + + /* Disable core dumps by default. */ + struct rlimit limit = {0, RLIM_INFINITY}; + setrlimit(RLIMIT_CORE, &limit); + + // FIXME: set other limits to deterministic values? + + /* Fill in the environment. */ + Strings envStrs; + for (auto& i : env) { + envStrs.push_back( + rewriteStrings(i.first + "=" + i.second, inputRewrites)); + } + + /* If we are running in `build-users' mode, then switch to the + user we allocated above. Make sure that we drop all root + privileges. Note that above we have closed all file + descriptors except std*, so that's safe. Also note that + setuid() when run as root sets the real, effective and + saved UIDs. */ + if (setUser && buildUser) { + /* Preserve supplementary groups of the build user, to allow + admins to specify groups such as "kvm". */ + if (!buildUser->getSupplementaryGIDs().empty() && + setgroups(buildUser->getSupplementaryGIDs().size(), + buildUser->getSupplementaryGIDs().data()) == -1) { + throw SysError("cannot set supplementary groups of build user"); + } + + if (setgid(buildUser->getGID()) == -1 || + getgid() != buildUser->getGID() || getegid() != buildUser->getGID()) { + throw SysError("setgid failed"); + } + + if (setuid(buildUser->getUID()) == -1 || + getuid() != buildUser->getUID() || geteuid() != buildUser->getUID()) { + throw SysError("setuid failed"); + } + } + + /* Fill in the arguments. */ + Strings args; + + const char* builder = "invalid"; + + if (!drv->isBuiltin()) { + builder = drv->builder.c_str(); + std::string builderBasename = baseNameOf(drv->builder); + args.push_back(builderBasename); + } + + for (auto& i : drv->args) { + args.push_back(rewriteStrings(i, inputRewrites)); + } + + /* Indicate that we managed to set up the build environment. */ + writeFull(STDERR_FILENO, std::string("\1\n")); + + /* Execute the program. This should not return. */ + if (drv->isBuiltin()) { + try { + BasicDerivation drv2(*drv); + for (auto& e : drv2.env) { + e.second = rewriteStrings(e.second, inputRewrites); + } + + if (drv->builder == "builtin:fetchurl") { + builtinFetchurl(drv2, netrcData); + } else if (drv->builder == "builtin:buildenv") { + builtinBuildenv(drv2); + } else { + throw Error(format("unsupported builtin function '%1%'") % + std::string(drv->builder, 8)); + } + _exit(0); + } catch (std::exception& e) { + writeFull(STDERR_FILENO, "error: " + std::string(e.what()) + "\n"); + _exit(1); + } + } + + execve(builder, stringsToCharPtrs(args).data(), + stringsToCharPtrs(envStrs).data()); + + throw SysError(format("executing '%1%'") % drv->builder); + + } catch (std::exception& e) { + writeFull(STDERR_FILENO, "\1while setting up the build environment: " + + std::string(e.what()) + "\n"); + _exit(1); + } +} + +/* Parse a list of reference specifiers. Each element must either be + a store path, or the symbolic name of the output of the derivation + (such as `out'). */ +PathSet parseReferenceSpecifiers(Store& store, const BasicDerivation& drv, + const Strings& paths) { + PathSet result; + for (auto& i : paths) { + if (store.isStorePath(i)) { + result.insert(i); + } else if (drv.outputs.find(i) != drv.outputs.end()) { + result.insert(drv.outputs.find(i)->second.path); + } else { + throw BuildError( + format("derivation contains an illegal reference specifier '%1%'") % + i); + } + } + return result; +} + +void DerivationGoal::registerOutputs() { + /* When using a build hook, the build hook can register the output + as valid (by doing `nix-store --import'). If so we don't have + to do anything here. */ + if (hook) { + bool allValid = true; + for (auto& i : drv->outputs) { + if (!worker.store.isValidPath(i.second.path)) { + allValid = false; + } + } + if (allValid) { + return; + } + } + + std::map<std::string, ValidPathInfo> infos; + + /* Set of inodes seen during calls to canonicalisePathMetaData() + for this build's outputs. This needs to be shared between + outputs to allow hard links between outputs. */ + InodesSeen inodesSeen; + + Path checkSuffix = ".check"; + bool keepPreviousRound = settings.keepFailed || settings.runDiffHook; + + std::exception_ptr delayedException; + + /* Check whether the output paths were created, and grep each + output path to determine what other paths it references. Also make all + output paths read-only. */ + for (auto& i : drv->outputs) { + Path path = i.second.path; + if (missingPaths.find(path) == missingPaths.end()) { + continue; + } + + ValidPathInfo info; + + Path actualPath = path; + if (useChroot) { + actualPath = chrootRootDir + path; + if (pathExists(actualPath)) { + /* Move output paths from the chroot to the Nix store. */ + if (buildMode == bmRepair) { + replaceValidPath(path, actualPath); + } else if (buildMode != bmCheck && + rename(actualPath.c_str(), + worker.store.toRealPath(path).c_str()) == -1) { + throw SysError(format("moving build output '%1%' from the sandbox to " + "the Nix store") % + path); + } + } + if (buildMode != bmCheck) { + actualPath = worker.store.toRealPath(path); + } + } + + if (needsHashRewrite()) { + Path redirected = redirectedOutputs[path]; + if (buildMode == bmRepair && + redirectedBadOutputs.find(path) != redirectedBadOutputs.end() && + pathExists(redirected)) { + replaceValidPath(path, redirected); + } + if (buildMode == bmCheck && !redirected.empty()) { + actualPath = redirected; + } + } + + struct stat st; + if (lstat(actualPath.c_str(), &st) == -1) { + if (errno == ENOENT) { + throw BuildError( + format("builder for '%1%' failed to produce output path '%2%'") % + drvPath % path); + } + throw SysError(format("getting attributes of path '%1%'") % actualPath); + } + +#ifndef __CYGWIN__ + /* Check that the output is not group or world writable, as + that means that someone else can have interfered with the + build. Also, the output should be owned by the build + user. */ + if ((!S_ISLNK(st.st_mode) && ((st.st_mode & (S_IWGRP | S_IWOTH)) != 0u)) || + (buildUser && st.st_uid != buildUser->getUID())) { + throw BuildError(format("suspicious ownership or permission on '%1%'; " + "rejecting this build output") % + path); + } +#endif + + /* Apply hash rewriting if necessary. */ + bool rewritten = false; + if (!outputRewrites.empty()) { + LOG(WARNING) << "rewriting hashes in '" << path << "'; cross fingers"; + + /* Canonicalise first. This ensures that the path we're + rewriting doesn't contain a hard link to /etc/shadow or + something like that. */ + canonicalisePathMetaData(actualPath, buildUser ? buildUser->getUID() : -1, + inodesSeen); + + /* FIXME: this is in-memory. */ + StringSink sink; + dumpPath(actualPath, sink); + deletePath(actualPath); + sink.s = make_ref<std::string>(rewriteStrings(*sink.s, outputRewrites)); + StringSource source(*sink.s); + restorePath(actualPath, source); + + rewritten = true; + } + + /* Check that fixed-output derivations produced the right + outputs (i.e., the content hash should match the specified + hash). */ + if (fixedOutput) { + bool recursive; + Hash h; + i.second.parseHashInfo(recursive, h); + + if (!recursive) { + /* The output path should be a regular file without + execute permission. */ + if (!S_ISREG(st.st_mode) || (st.st_mode & S_IXUSR) != 0) { + throw BuildError( + format( + "output path '%1%' should be a non-executable regular file") % + path); + } + } + + /* Check the hash. In hash mode, move the path produced by + the derivation to its content-addressed location. */ + Hash h2 = recursive ? hashPath(h.type, actualPath).first + : hashFile(h.type, actualPath); + + Path dest = worker.store.makeFixedOutputPath(recursive, h2, + storePathToName(path)); + + if (h != h2) { + /* Throw an error after registering the path as + valid. */ + worker.hashMismatch = true; + delayedException = std::make_exception_ptr( + BuildError("hash mismatch in fixed-output derivation '%s':\n " + "wanted: %s\n got: %s", + dest, h.to_string(), h2.to_string())); + + Path actualDest = worker.store.toRealPath(dest); + + if (worker.store.isValidPath(dest)) { + std::rethrow_exception(delayedException); + } + + if (actualPath != actualDest) { + PathLocks outputLocks({actualDest}); + deletePath(actualDest); + if (rename(actualPath.c_str(), actualDest.c_str()) == -1) { + throw SysError(format("moving '%1%' to '%2%'") % actualPath % dest); + } + } + + path = dest; + actualPath = actualDest; + } else { + assert(path == dest); + } + + info.ca = makeFixedOutputCA(recursive, h2); + } + + /* Get rid of all weird permissions. This also checks that + all files are owned by the build user, if applicable. */ + canonicalisePathMetaData(actualPath, + buildUser && !rewritten ? buildUser->getUID() : -1, + inodesSeen); + + /* For this output path, find the references to other paths + contained in it. Compute the SHA-256 NAR hash at the same + time. The hash is stored in the database so that we can + verify later on whether nobody has messed with the store. */ + DLOG(INFO) << "scanning for references inside '" << path << "'"; + HashResult hash; + PathSet references = scanForReferences(actualPath, allPaths, hash); + + if (buildMode == bmCheck) { + if (!worker.store.isValidPath(path)) { + continue; + } + auto info = *worker.store.queryPathInfo(path); + if (hash.first != info.narHash) { + worker.checkMismatch = true; + if (settings.runDiffHook || settings.keepFailed) { + Path dst = worker.store.toRealPath(path + checkSuffix); + deletePath(dst); + if (rename(actualPath.c_str(), dst.c_str()) != 0) { + throw SysError(format("renaming '%1%' to '%2%'") % actualPath % + dst); + } + + handleDiffHook(buildUser ? buildUser->getUID() : getuid(), + buildUser ? buildUser->getGID() : getgid(), path, dst, + drvPath, tmpDir); + + throw NotDeterministic( + format("derivation '%1%' may not be deterministic: output '%2%' " + "differs from '%3%'") % + drvPath % path % dst); + } + throw NotDeterministic(format("derivation '%1%' may not be " + "deterministic: output '%2%' differs") % + drvPath % path); + } + + /* Since we verified the build, it's now ultimately + trusted. */ + if (!info.ultimate) { + info.ultimate = true; + worker.store.signPathInfo(info); + worker.store.registerValidPaths({info}); + } + + continue; + } + + /* For debugging, print out the referenced and unreferenced + paths. */ + for (auto& i : inputPaths) { + auto j = references.find(i); + if (j == references.end()) { + DLOG(INFO) << "unreferenced input: '" << i << "'"; + } else { + DLOG(INFO) << "referenced input: '" << i << "'"; + } + } + + if (curRound == nrRounds) { + worker.store.optimisePath( + actualPath); // FIXME: combine with scanForReferences() + worker.markContentsGood(path); + } + + info.path = path; + info.narHash = hash.first; + info.narSize = hash.second; + info.references = references; + info.deriver = drvPath; + info.ultimate = true; + worker.store.signPathInfo(info); + + if (!info.references.empty()) { + info.ca.clear(); + } + + infos[i.first] = info; + } + + if (buildMode == bmCheck) { + return; + } + + /* Apply output checks. */ + checkOutputs(infos); + + /* Compare the result with the previous round, and report which + path is different, if any.*/ + if (curRound > 1 && prevInfos != infos) { + assert(prevInfos.size() == infos.size()); + for (auto i = prevInfos.begin(), j = infos.begin(); i != prevInfos.end(); + ++i, ++j) { + if (!(*i == *j)) { + result.isNonDeterministic = true; + Path prev = i->second.path + checkSuffix; + bool prevExists = keepPreviousRound && pathExists(prev); + auto msg = + prevExists + ? fmt("output '%1%' of '%2%' differs from '%3%' from previous " + "round", + i->second.path, drvPath, prev) + : fmt("output '%1%' of '%2%' differs from previous round", + i->second.path, drvPath); + + handleDiffHook(buildUser ? buildUser->getUID() : getuid(), + buildUser ? buildUser->getGID() : getgid(), prev, + i->second.path, drvPath, tmpDir); + + if (settings.enforceDeterminism) { + throw NotDeterministic(msg); + } + + LOG(ERROR) << msg; + curRound = nrRounds; // we know enough, bail out early + } + } + } + + /* If this is the first round of several, then move the output out + of the way. */ + if (nrRounds > 1 && curRound == 1 && curRound < nrRounds && + keepPreviousRound) { + for (auto& i : drv->outputs) { + Path prev = i.second.path + checkSuffix; + deletePath(prev); + Path dst = i.second.path + checkSuffix; + if (rename(i.second.path.c_str(), dst.c_str()) != 0) { + throw SysError(format("renaming '%1%' to '%2%'") % i.second.path % dst); + } + } + } + + if (curRound < nrRounds) { + prevInfos = infos; + return; + } + + /* Remove the .check directories if we're done. FIXME: keep them + if the result was not determistic? */ + if (curRound == nrRounds) { + for (auto& i : drv->outputs) { + Path prev = i.second.path + checkSuffix; + deletePath(prev); + } + } + + /* Register each output path as valid, and register the sets of + paths referenced by each of them. If there are cycles in the + outputs, this will fail. */ + { + ValidPathInfos infos2; + for (auto& i : infos) { + infos2.push_back(i.second); + } + worker.store.registerValidPaths(infos2); + } + + /* In case of a fixed-output derivation hash mismatch, throw an + exception now that we have registered the output as valid. */ + if (delayedException) { + std::rethrow_exception(delayedException); + } +} + +void DerivationGoal::checkOutputs( + const std::map<Path, ValidPathInfo>& outputs) { + std::map<Path, const ValidPathInfo&> outputsByPath; + for (auto& output : outputs) { + outputsByPath.emplace(output.second.path, output.second); + } + + for (auto& output : outputs) { + auto& outputName = output.first; + auto& info = output.second; + + struct Checks { + bool ignoreSelfRefs = false; + std::optional<uint64_t> maxSize, maxClosureSize; + std::optional<Strings> allowedReferences, allowedRequisites, + disallowedReferences, disallowedRequisites; + }; + + /* Compute the closure and closure size of some output. This + is slightly tricky because some of its references (namely + other outputs) may not be valid yet. */ + auto getClosure = [&](const Path& path) { + uint64_t closureSize = 0; + PathSet pathsDone; + std::queue<Path> pathsLeft; + pathsLeft.push(path); + + while (!pathsLeft.empty()) { + auto path = pathsLeft.front(); + pathsLeft.pop(); + if (!pathsDone.insert(path).second) { + continue; + } + + auto i = outputsByPath.find(path); + if (i != outputsByPath.end()) { + closureSize += i->second.narSize; + for (auto& ref : i->second.references) { + pathsLeft.push(ref); + } + } else { + auto info = worker.store.queryPathInfo(path); + closureSize += info->narSize; + for (auto& ref : info->references) { + pathsLeft.push(ref); + } + } + } + + return std::make_pair(pathsDone, closureSize); + }; + + auto applyChecks = [&](const Checks& checks) { + if (checks.maxSize && info.narSize > *checks.maxSize) { + throw BuildError( + "path '%s' is too large at %d bytes; limit is %d bytes", info.path, + info.narSize, *checks.maxSize); + } + + if (checks.maxClosureSize) { + uint64_t closureSize = getClosure(info.path).second; + if (closureSize > *checks.maxClosureSize) { + throw BuildError( + "closure of path '%s' is too large at %d bytes; limit is %d " + "bytes", + info.path, closureSize, *checks.maxClosureSize); + } + } + + auto checkRefs = [&](const std::optional<Strings>& value, bool allowed, + bool recursive) { + if (!value) { + return; + } + + PathSet spec = parseReferenceSpecifiers(worker.store, *drv, *value); + + PathSet used = + recursive ? getClosure(info.path).first : info.references; + + if (recursive && checks.ignoreSelfRefs) { + used.erase(info.path); + } + + PathSet badPaths; + + for (auto& i : used) { + if (allowed) { + if (spec.count(i) == 0u) { + badPaths.insert(i); + } + } else { + if (spec.count(i) != 0u) { + badPaths.insert(i); + } + } + } + + if (!badPaths.empty()) { + std::string badPathsStr; + for (auto& i : badPaths) { + badPathsStr += "\n "; + badPathsStr += i; + } + throw BuildError( + "output '%s' is not allowed to refer to the following paths:%s", + info.path, badPathsStr); + } + }; + + checkRefs(checks.allowedReferences, true, false); + checkRefs(checks.allowedRequisites, true, true); + checkRefs(checks.disallowedReferences, false, false); + checkRefs(checks.disallowedRequisites, false, true); + }; + + if (auto structuredAttrs = parsedDrv->getStructuredAttrs()) { + auto outputChecks = structuredAttrs->find("outputChecks"); + if (outputChecks != structuredAttrs->end()) { + auto output = outputChecks->find(outputName); + + if (output != outputChecks->end()) { + Checks checks; + + auto maxSize = output->find("maxSize"); + if (maxSize != output->end()) { + checks.maxSize = maxSize->get<uint64_t>(); + } + + auto maxClosureSize = output->find("maxClosureSize"); + if (maxClosureSize != output->end()) { + checks.maxClosureSize = maxClosureSize->get<uint64_t>(); + } + + auto get = [&](const std::string& name) -> std::optional<Strings> { + auto i = output->find(name); + if (i != output->end()) { + Strings res; + for (auto& j : *i) { + if (!j.is_string()) { + throw Error( + "attribute '%s' of derivation '%s' must be a list of " + "strings", + name, drvPath); + } + res.push_back(j.get<std::string>()); + } + checks.disallowedRequisites = res; + return res; + } + return {}; + }; + + checks.allowedReferences = get("allowedReferences"); + checks.allowedRequisites = get("allowedRequisites"); + checks.disallowedReferences = get("disallowedReferences"); + checks.disallowedRequisites = get("disallowedRequisites"); + + applyChecks(checks); + } + } + } else { + // legacy non-structured-attributes case + Checks checks; + checks.ignoreSelfRefs = true; + checks.allowedReferences = parsedDrv->getStringsAttr("allowedReferences"); + checks.allowedRequisites = parsedDrv->getStringsAttr("allowedRequisites"); + checks.disallowedReferences = + parsedDrv->getStringsAttr("disallowedReferences"); + checks.disallowedRequisites = + parsedDrv->getStringsAttr("disallowedRequisites"); + applyChecks(checks); + } + } +} + +Path DerivationGoal::openLogFile() { + logSize = 0; + + if (!settings.keepLog) { + return ""; + } + + std::string baseName = baseNameOf(drvPath); + + /* Create a log file. */ + Path dir = fmt("%s/%s/%s/", worker.store.logDir, nix::LocalStore::drvsLogDir, + std::string(baseName, 0, 2)); + createDirs(dir); + + Path logFileName = fmt("%s/%s%s", dir, std::string(baseName, 2), + settings.compressLog ? ".bz2" : ""); + + fdLogFile = + open(logFileName.c_str(), O_CREAT | O_WRONLY | O_TRUNC | O_CLOEXEC, 0666); + if (!fdLogFile) { + throw SysError(format("creating log file '%1%'") % logFileName); + } + + logFileSink = std::make_shared<FdSink>(fdLogFile.get()); + + if (settings.compressLog) { + logSink = std::shared_ptr<CompressionSink>( + makeCompressionSink("bzip2", *logFileSink)); + } else { + logSink = logFileSink; + } + + return logFileName; +} + +void DerivationGoal::closeLogFile() { + auto logSink2 = std::dynamic_pointer_cast<CompressionSink>(logSink); + if (logSink2) { + logSink2->finish(); + } + if (logFileSink) { + logFileSink->flush(); + } + logSink = logFileSink = nullptr; + fdLogFile = -1; +} + +void DerivationGoal::deleteTmpDir(bool force) { + if (!tmpDir.empty()) { + /* Don't keep temporary directories for builtins because they + might have privileged stuff (like a copy of netrc). */ + if (settings.keepFailed && !force && !drv->isBuiltin()) { + LOG(INFO) << "note: keeping build directory '" << tmpDir << "'"; + chmod(tmpDir.c_str(), 0755); + } else { + deletePath(tmpDir); + } + tmpDir = ""; + } +} + +void DerivationGoal::handleChildOutput(int fd, const std::string& data) { + if ((hook && fd == hook->builderOut.readSide.get()) || + (!hook && fd == builderOut.readSide.get())) { + logSize += data.size(); + if (settings.maxLogSize && logSize > settings.maxLogSize) { + LOG(ERROR) << getName() + << " killed after writing more than %2% bytes of log output" + << settings.maxLogSize; + killChild(); + done(BuildResult::LogLimitExceeded); + return; + } + + for (auto c : data) { + if (c == '\r') { + currentLogLinePos = 0; + } else if (c == '\n') { + flushLine(); + } else { + if (currentLogLinePos >= currentLogLine.size()) { + currentLogLine.resize(currentLogLinePos + 1); + } + currentLogLine[currentLogLinePos++] = c; + } + } + + if (logSink) { + (*logSink)(data); + } + } + + if (hook && fd == hook->fromHook.readSide.get()) { + for (auto c : data) { + if (c == '\n') { + currentHookLine.clear(); + } else { + currentHookLine += c; + } + } + } +} + +void DerivationGoal::handleEOF(int fd) { + if (!currentLogLine.empty()) { + flushLine(); + } + worker.wakeUp(shared_from_this()); +} + +void DerivationGoal::flushLine() { + if (settings.verboseBuild && + (settings.printRepeatedBuilds || curRound == 1)) { + LOG(INFO) << currentLogLine; + } else { + logTail.push_back(currentLogLine); + if (logTail.size() > settings.logLines) { + logTail.pop_front(); + } + } + + currentLogLine = ""; + currentLogLinePos = 0; +} + +PathSet DerivationGoal::checkPathValidity(bool returnValid, bool checkHash) { + PathSet result; + for (auto& i : drv->outputs) { + if (!wantOutput(i.first, wantedOutputs)) { + continue; + } + bool good = worker.store.isValidPath(i.second.path) && + (!checkHash || worker.pathContentsGood(i.second.path)); + if (good == returnValid) { + result.insert(i.second.path); + } + } + return result; +} + +Path DerivationGoal::addHashRewrite(const Path& path) { + std::string h1 = std::string(path, worker.store.storeDir.size() + 1, 32); + std::string h2 = + std::string(hashString(htSHA256, "rewrite:" + drvPath + ":" + path) + .to_string(Base32, false), + 0, 32); + Path p = worker.store.storeDir + "/" + h2 + + std::string(path, worker.store.storeDir.size() + 33); + deletePath(p); + assert(path.size() == p.size()); + inputRewrites[h1] = h2; + outputRewrites[h2] = h1; + redirectedOutputs[path] = p; + return p; +} + +void DerivationGoal::done(BuildResult::Status status, const std::string& msg) { + result.status = status; + result.errorMsg = msg; + amDone(result.success() ? ecSuccess : ecFailed); + if (result.status == BuildResult::TimedOut) { + worker.timedOut = true; + } + if (result.status == BuildResult::PermanentFailure) { + worker.permanentFailure = true; + } + + mcExpectedBuilds.reset(); + mcRunningBuilds.reset(); + + if (result.success()) { + if (status == BuildResult::Built) { + worker.doneBuilds++; + } + } else { + if (status != BuildResult::DependencyFailed) { + worker.failedBuilds++; + } + } +} + +////////////////////////////////////////////////////////////////////// + +class SubstitutionGoal : public Goal { + friend class Worker; + + private: + /* The store path that should be realised through a substitute. */ + Path storePath; + + /* The remaining substituters. */ + std::list<ref<Store>> subs; + + /* The current substituter. */ + std::shared_ptr<Store> sub; + + /* Whether a substituter failed. */ + bool substituterFailed = false; + + /* Path info returned by the substituter's query info operation. */ + std::shared_ptr<const ValidPathInfo> info; + + /* Pipe for the substituter's standard output. */ + Pipe outPipe; + + /* The substituter thread. */ + std::thread thr; + + std::promise<void> promise; + + /* Whether to try to repair a valid path. */ + RepairFlag repair; + + /* Location where we're downloading the substitute. Differs from + storePath when doing a repair. */ + Path destPath; + + std::unique_ptr<MaintainCount<uint64_t>> maintainExpectedSubstitutions, + maintainRunningSubstitutions, maintainExpectedNar, + maintainExpectedDownload; + + using GoalState = void (SubstitutionGoal::*)(); + GoalState state; + + public: + SubstitutionGoal(const Path& storePath, Worker& worker, + RepairFlag repair = NoRepair); + ~SubstitutionGoal() override; + + void timedOut() override { abort(); }; + + std::string key() override { + /* "a$" ensures substitution goals happen before derivation + goals. */ + return "a$" + storePathToName(storePath) + "$" + storePath; + } + + void work() override; + + /* The states. */ + void init(); + void tryNext(); + void gotInfo(); + void referencesValid(); + void tryToRun(); + void finished(); + + /* Callback used by the worker to write to the log. */ + void handleChildOutput(int fd, const std::string& data) override; + void handleEOF(int fd) override; + + Path getStorePath() { return storePath; } + + void amDone(ExitCode result) override { Goal::amDone(result); } +}; + +SubstitutionGoal::SubstitutionGoal(const Path& storePath, Worker& worker, + RepairFlag repair) + : Goal(worker), repair(repair) { + this->storePath = storePath; + state = &SubstitutionGoal::init; + name = (format("substitution of '%1%'") % storePath).str(); + trace("created"); + maintainExpectedSubstitutions = + std::make_unique<MaintainCount<uint64_t>>(worker.expectedSubstitutions); +} + +SubstitutionGoal::~SubstitutionGoal() { + try { + if (thr.joinable()) { + // FIXME: signal worker thread to quit. + thr.join(); + worker.childTerminated(this); + } + } catch (...) { + ignoreException(); + } +} + +void SubstitutionGoal::work() { (this->*state)(); } + +void SubstitutionGoal::init() { + trace("init"); + + worker.store.addTempRoot(storePath); + + /* If the path already exists we're done. */ + if ((repair == 0u) && worker.store.isValidPath(storePath)) { + amDone(ecSuccess); + return; + } + + if (settings.readOnlyMode) { + throw Error( + format( + "cannot substitute path '%1%' - no write access to the Nix store") % + storePath); + } + + subs = settings.useSubstitutes ? getDefaultSubstituters() + : std::list<ref<Store>>(); + + tryNext(); +} + +void SubstitutionGoal::tryNext() { + trace("trying next substituter"); + + if (subs.empty()) { + /* None left. Terminate this goal and let someone else deal + with it. */ + DLOG(WARNING) + << "path '" << storePath + << "' is required, but there is no substituter that can build it"; + + /* Hack: don't indicate failure if there were no substituters. + In that case the calling derivation should just do a + build. */ + amDone(substituterFailed ? ecFailed : ecNoSubstituters); + + if (substituterFailed) { + worker.failedSubstitutions++; + } + + return; + } + + sub = subs.front(); + subs.pop_front(); + + if (sub->storeDir != worker.store.storeDir) { + tryNext(); + return; + } + + try { + // FIXME: make async + info = sub->queryPathInfo(storePath); + } catch (InvalidPath&) { + tryNext(); + return; + } catch (SubstituterDisabled&) { + if (settings.tryFallback) { + tryNext(); + return; + } + throw; + } catch (Error& e) { + if (settings.tryFallback) { + LOG(ERROR) << e.what(); + tryNext(); + return; + } + throw; + } + + /* Update the total expected download size. */ + auto narInfo = std::dynamic_pointer_cast<const NarInfo>(info); + + maintainExpectedNar = std::make_unique<MaintainCount<uint64_t>>( + worker.expectedNarSize, info->narSize); + + maintainExpectedDownload = + narInfo && (narInfo->fileSize != 0u) + ? std::make_unique<MaintainCount<uint64_t>>( + worker.expectedDownloadSize, narInfo->fileSize) + : nullptr; + + /* Bail out early if this substituter lacks a valid + signature. LocalStore::addToStore() also checks for this, but + only after we've downloaded the path. */ + if (worker.store.requireSigs && !sub->isTrusted && + (info->checkSignatures(worker.store, worker.store.getPublicKeys()) == + 0u)) { + LOG(WARNING) << "substituter '" << sub->getUri() + << "' does not have a valid signature for path '" << storePath + << "'"; + tryNext(); + return; + } + + /* To maintain the closure invariant, we first have to realise the + paths referenced by this one. */ + for (auto& i : info->references) { + if (i != storePath) { /* ignore self-references */ + addWaitee(worker.makeSubstitutionGoal(i)); + } + } + + if (waitees.empty()) { /* to prevent hang (no wake-up event) */ + referencesValid(); + } else { + state = &SubstitutionGoal::referencesValid; + } +} + +void SubstitutionGoal::referencesValid() { + trace("all references realised"); + + if (nrFailed > 0) { + DLOG(WARNING) << "some references of path '" << storePath + << "' could not be realised"; + amDone(nrNoSubstituters > 0 || nrIncompleteClosure > 0 ? ecIncompleteClosure + : ecFailed); + return; + } + + for (auto& i : info->references) { + if (i != storePath) { /* ignore self-references */ + assert(worker.store.isValidPath(i)); + } + } + + state = &SubstitutionGoal::tryToRun; + worker.wakeUp(shared_from_this()); +} + +void SubstitutionGoal::tryToRun() { + trace("trying to run"); + + /* Make sure that we are allowed to start a build. Note that even + if maxBuildJobs == 0 (no local builds allowed), we still allow + a substituter to run. This is because substitutions cannot be + distributed to another machine via the build hook. */ + if (worker.getNrLocalBuilds() >= + std::max(1U, (unsigned int)settings.maxBuildJobs)) { + worker.waitForBuildSlot(shared_from_this()); + return; + } + + maintainRunningSubstitutions = + std::make_unique<MaintainCount<uint64_t>>(worker.runningSubstitutions); + + outPipe.create(); + + promise = std::promise<void>(); + + thr = std::thread([this]() { + try { + /* Wake up the worker loop when we're done. */ + Finally updateStats([this]() { outPipe.writeSide = -1; }); + + copyStorePath(ref<Store>(sub), + ref<Store>(worker.store.shared_from_this()), storePath, + repair, sub->isTrusted ? NoCheckSigs : CheckSigs); + + promise.set_value(); + } catch (...) { + promise.set_exception(std::current_exception()); + } + }); + + worker.childStarted(shared_from_this(), {outPipe.readSide.get()}, true, + false); + + state = &SubstitutionGoal::finished; +} + +void SubstitutionGoal::finished() { + trace("substitute finished"); + + thr.join(); + worker.childTerminated(this); + + try { + promise.get_future().get(); + } catch (std::exception& e) { + LOG(ERROR) << e.what(); + + /* Cause the parent build to fail unless --fallback is given, + or the substitute has disappeared. The latter case behaves + the same as the substitute never having existed in the + first place. */ + try { + throw; + } catch (SubstituteGone&) { + } catch (...) { + substituterFailed = true; + } + + /* Try the next substitute. */ + state = &SubstitutionGoal::tryNext; + worker.wakeUp(shared_from_this()); + return; + } + + worker.markContentsGood(storePath); + + DLOG(INFO) << "substitution of path '" << storePath << "' succeeded"; + + maintainRunningSubstitutions.reset(); + + maintainExpectedSubstitutions.reset(); + worker.doneSubstitutions++; + + if (maintainExpectedDownload) { + auto fileSize = maintainExpectedDownload->delta; + maintainExpectedDownload.reset(); + worker.doneDownloadSize += fileSize; + } + + worker.doneNarSize += maintainExpectedNar->delta; + maintainExpectedNar.reset(); + + amDone(ecSuccess); +} + +void SubstitutionGoal::handleChildOutput(int fd, const std::string& data) {} + +void SubstitutionGoal::handleEOF(int fd) { + if (fd == outPipe.readSide.get()) { + worker.wakeUp(shared_from_this()); + } +} + +////////////////////////////////////////////////////////////////////// + +static bool working = false; + +Worker::Worker(LocalStore& store) : store(store) { + /* Debugging: prevent recursive workers. */ + if (working) { + abort(); + } + working = true; + nrLocalBuilds = 0; + lastWokenUp = steady_time_point::min(); + permanentFailure = false; + timedOut = false; + hashMismatch = false; + checkMismatch = false; +} + +Worker::~Worker() { + working = false; + + /* Explicitly get rid of all strong pointers now. After this all + goals that refer to this worker should be gone. (Otherwise we + are in trouble, since goals may call childTerminated() etc. in + their destructors). */ + topGoals.clear(); + + assert(expectedSubstitutions == 0); + assert(expectedDownloadSize == 0); + assert(expectedNarSize == 0); +} + +GoalPtr Worker::makeDerivationGoal(const Path& path, + const StringSet& wantedOutputs, + BuildMode buildMode) { + GoalPtr goal = derivationGoals[path].lock(); + if (!goal) { + goal = + std::make_shared<DerivationGoal>(path, wantedOutputs, *this, buildMode); + derivationGoals[path] = goal; + wakeUp(goal); + } else { + (dynamic_cast<DerivationGoal*>(goal.get())) + ->addWantedOutputs(wantedOutputs); + } + return goal; +} + +std::shared_ptr<DerivationGoal> Worker::makeBasicDerivationGoal( + const Path& drvPath, const BasicDerivation& drv, BuildMode buildMode) { + auto goal = std::make_shared<DerivationGoal>(drvPath, drv, *this, buildMode); + wakeUp(goal); + return goal; +} + +GoalPtr Worker::makeSubstitutionGoal(const Path& path, RepairFlag repair) { + GoalPtr goal = substitutionGoals[path].lock(); + if (!goal) { + goal = std::make_shared<SubstitutionGoal>(path, *this, repair); + substitutionGoals[path] = goal; + wakeUp(goal); + } + return goal; +} + +static void removeGoal(const GoalPtr& goal, WeakGoalMap& goalMap) { + /* !!! inefficient */ + for (auto i = goalMap.begin(); i != goalMap.end();) { + if (i->second.lock() == goal) { + auto j = i; + ++j; + goalMap.erase(i); + i = j; + } else { + ++i; + } + } +} + +void Worker::removeGoal(const GoalPtr& goal) { + nix::removeGoal(goal, derivationGoals); + nix::removeGoal(goal, substitutionGoals); + if (topGoals.find(goal) != topGoals.end()) { + topGoals.erase(goal); + /* If a top-level goal failed, then kill all other goals + (unless keepGoing was set). */ + if (goal->getExitCode() == Goal::ecFailed && !settings.keepGoing) { + topGoals.clear(); + } + } + + /* Wake up goals waiting for any goal to finish. */ + for (auto& i : waitingForAnyGoal) { + GoalPtr goal = i.lock(); + if (goal) { + wakeUp(goal); + } + } + + waitingForAnyGoal.clear(); +} + +void Worker::wakeUp(const GoalPtr& goal) { + goal->trace("woken up"); + addToWeakGoals(awake, goal); +} + +unsigned Worker::getNrLocalBuilds() { return nrLocalBuilds; } + +void Worker::childStarted(const GoalPtr& goal, const std::set<int>& fds, + bool inBuildSlot, bool respectTimeouts) { + Child child; + child.goal = goal; + child.goal2 = goal.get(); + child.fds = fds; + child.timeStarted = child.lastOutput = steady_time_point::clock::now(); + child.inBuildSlot = inBuildSlot; + child.respectTimeouts = respectTimeouts; + children.emplace_back(child); + if (inBuildSlot) { + nrLocalBuilds++; + } +} + +void Worker::childTerminated(Goal* goal, bool wakeSleepers) { + auto i = + std::find_if(children.begin(), children.end(), + [&](const Child& child) { return child.goal2 == goal; }); + if (i == children.end()) { + return; + } + + if (i->inBuildSlot) { + assert(nrLocalBuilds > 0); + nrLocalBuilds--; + } + + children.erase(i); + + if (wakeSleepers) { + /* Wake up goals waiting for a build slot. */ + for (auto& j : wantingToBuild) { + GoalPtr goal = j.lock(); + if (goal) { + wakeUp(goal); + } + } + + wantingToBuild.clear(); + } +} + +void Worker::waitForBuildSlot(const GoalPtr& goal) { + DLOG(INFO) << "wait for build slot"; + if (getNrLocalBuilds() < settings.maxBuildJobs) { + wakeUp(goal); /* we can do it right away */ + } else { + addToWeakGoals(wantingToBuild, goal); + } +} + +void Worker::waitForAnyGoal(GoalPtr goal) { + DLOG(INFO) << "wait for any goal"; + addToWeakGoals(waitingForAnyGoal, std::move(goal)); +} + +void Worker::waitForAWhile(GoalPtr goal) { + DLOG(INFO) << "wait for a while"; + addToWeakGoals(waitingForAWhile, std::move(goal)); +} + +void Worker::run(const Goals& _topGoals) { + for (auto& i : _topGoals) { + topGoals.insert(i); + } + + DLOG(INFO) << "entered goal loop"; + + while (true) { + checkInterrupt(); + + store.autoGC(false); + + /* Call every wake goal (in the ordering established by + CompareGoalPtrs). */ + while (!awake.empty() && !topGoals.empty()) { + Goals awake2; + for (auto& i : awake) { + GoalPtr goal = i.lock(); + if (goal) { + awake2.insert(goal); + } + } + awake.clear(); + for (auto& goal : awake2) { + checkInterrupt(); + goal->work(); + if (topGoals.empty()) { + break; + } // stuff may have been cancelled + } + } + + if (topGoals.empty()) { + break; + } + + /* Wait for input. */ + if (!children.empty() || !waitingForAWhile.empty()) { + waitForInput(); + } else { + if (awake.empty() && 0 == settings.maxBuildJobs) { + throw Error( + "unable to start any build; either increase '--max-jobs' " + "or enable remote builds"); + } + assert(!awake.empty()); + } + } + + /* If --keep-going is not set, it's possible that the main goal + exited while some of its subgoals were still active. But if + --keep-going *is* set, then they must all be finished now. */ + assert(!settings.keepGoing || awake.empty()); + assert(!settings.keepGoing || wantingToBuild.empty()); + assert(!settings.keepGoing || children.empty()); +} + +void Worker::waitForInput() { + DLOG(INFO) << "waiting for children"; + + /* Process output from the file descriptors attached to the + children, namely log output and output path creation commands. + We also use this to detect child termination: if we get EOF on + the logger pipe of a build, we assume that the builder has + terminated. */ + + bool useTimeout = false; + struct timeval timeout; + timeout.tv_usec = 0; + auto before = steady_time_point::clock::now(); + + /* If we're monitoring for silence on stdout/stderr, or if there + is a build timeout, then wait for input until the first + deadline for any child. */ + auto nearest = steady_time_point::max(); // nearest deadline + if (settings.minFree.get() != 0) { + // Periodicallty wake up to see if we need to run the garbage collector. + nearest = before + std::chrono::seconds(10); + } + for (auto& i : children) { + if (!i.respectTimeouts) { + continue; + } + if (0 != settings.maxSilentTime) { + nearest = std::min( + nearest, i.lastOutput + std::chrono::seconds(settings.maxSilentTime)); + } + if (0 != settings.buildTimeout) { + nearest = std::min( + nearest, i.timeStarted + std::chrono::seconds(settings.buildTimeout)); + } + } + if (nearest != steady_time_point::max()) { + timeout.tv_sec = std::max( + 1L, + (long)std::chrono::duration_cast<std::chrono::seconds>(nearest - before) + .count()); + useTimeout = true; + } + + /* If we are polling goals that are waiting for a lock, then wake + up after a few seconds at most. */ + if (!waitingForAWhile.empty()) { + useTimeout = true; + if (lastWokenUp == steady_time_point::min()) { + DLOG(WARNING) << "waiting for locks or build slots..."; + } + if (lastWokenUp == steady_time_point::min() || lastWokenUp > before) { + lastWokenUp = before; + } + timeout.tv_sec = std::max( + 1L, + (long)std::chrono::duration_cast<std::chrono::seconds>( + lastWokenUp + std::chrono::seconds(settings.pollInterval) - before) + .count()); + } else { + lastWokenUp = steady_time_point::min(); + } + + if (useTimeout) { + DLOG(INFO) << "sleeping " << timeout.tv_sec << " seconds"; + } + + /* Use select() to wait for the input side of any logger pipe to + become `available'. Note that `available' (i.e., non-blocking) + includes EOF. */ + fd_set fds; + FD_ZERO(&fds); + int fdMax = 0; + for (auto& i : children) { + for (auto& j : i.fds) { + if (j >= FD_SETSIZE) { + throw Error("reached FD_SETSIZE limit"); + } + FD_SET(j, &fds); + if (j >= fdMax) { + fdMax = j + 1; + } + } + } + + if (select(fdMax, &fds, nullptr, nullptr, useTimeout ? &timeout : nullptr) == + -1) { + if (errno == EINTR) { + return; + } + throw SysError("waiting for input"); + } + + auto after = steady_time_point::clock::now(); + + /* Process all available file descriptors. FIXME: this is + O(children * fds). */ + decltype(children)::iterator i; + for (auto j = children.begin(); j != children.end(); j = i) { + i = std::next(j); + + checkInterrupt(); + + GoalPtr goal = j->goal.lock(); + assert(goal); + + std::set<int> fds2(j->fds); + std::vector<unsigned char> buffer(4096); + for (auto& k : fds2) { + if (FD_ISSET(k, &fds)) { + ssize_t rd = read(k, buffer.data(), buffer.size()); + // FIXME: is there a cleaner way to handle pt close + // than EIO? Is this even standard? + if (rd == 0 || (rd == -1 && errno == EIO)) { + DLOG(WARNING) << goal->getName() << ": got EOF"; + goal->handleEOF(k); + j->fds.erase(k); + } else if (rd == -1) { + if (errno != EINTR) { + throw SysError("%s: read failed", goal->getName()); + } + } else { + DLOG(INFO) << goal->getName() << ": read " << rd << " bytes"; + std::string data((char*)buffer.data(), rd); + j->lastOutput = after; + goal->handleChildOutput(k, data); + } + } + } + + if (goal->getExitCode() == Goal::ecBusy && 0 != settings.maxSilentTime && + j->respectTimeouts && + after - j->lastOutput >= std::chrono::seconds(settings.maxSilentTime)) { + LOG(ERROR) << goal->getName() << " timed out after " + << settings.maxSilentTime << " seconds of silence"; + goal->timedOut(); + } + + else if (goal->getExitCode() == Goal::ecBusy && + 0 != settings.buildTimeout && j->respectTimeouts && + after - j->timeStarted >= + std::chrono::seconds(settings.buildTimeout)) { + LOG(ERROR) << goal->getName() << " timed out after " + << settings.buildTimeout << " seconds"; + goal->timedOut(); + } + } + + if (!waitingForAWhile.empty() && + lastWokenUp + std::chrono::seconds(settings.pollInterval) <= after) { + lastWokenUp = after; + for (auto& i : waitingForAWhile) { + GoalPtr goal = i.lock(); + if (goal) { + wakeUp(goal); + } + } + waitingForAWhile.clear(); + } +} + +unsigned int Worker::exitStatus() { + /* + * 1100100 + * ^^^^ + * |||`- timeout + * ||`-- output hash mismatch + * |`--- build failure + * `---- not deterministic + */ + unsigned int mask = 0; + bool buildFailure = permanentFailure || timedOut || hashMismatch; + if (buildFailure) { + mask |= 0x04; // 100 + } + if (timedOut) { + mask |= 0x01; // 101 + } + if (hashMismatch) { + mask |= 0x02; // 102 + } + if (checkMismatch) { + mask |= 0x08; // 104 + } + + if (mask != 0u) { + mask |= 0x60; + } + return mask != 0u ? mask : 1; +} + +bool Worker::pathContentsGood(const Path& path) { + auto i = pathContentsGoodCache.find(path); + if (i != pathContentsGoodCache.end()) { + return i->second; + } + LOG(INFO) << "checking path '" << path << "'..."; + auto info = store.queryPathInfo(path); + bool res; + if (!pathExists(path)) { + res = false; + } else { + HashResult current = hashPath(info->narHash.type, path); + Hash nullHash(htSHA256); + res = info->narHash == nullHash || info->narHash == current.first; + } + pathContentsGoodCache[path] = res; + if (!res) { + LOG(ERROR) << "path '" << path << "' is corrupted or missing!"; + } + return res; +} + +void Worker::markContentsGood(const Path& path) { + pathContentsGoodCache[path] = true; +} + +////////////////////////////////////////////////////////////////////// + +static void primeCache(Store& store, const PathSet& paths) { + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + unsigned long long downloadSize; + unsigned long long narSize; + store.queryMissing(paths, willBuild, willSubstitute, unknown, downloadSize, + narSize); + + if (!willBuild.empty() && 0 == settings.maxBuildJobs && + getMachines().empty()) { + throw Error( + "%d derivations need to be built, but neither local builds " + "('--max-jobs') " + "nor remote builds ('--builders') are enabled", + willBuild.size()); + } +} + +void LocalStore::buildPaths(const PathSet& drvPaths, BuildMode buildMode) { + Worker worker(*this); + + primeCache(*this, drvPaths); + + Goals goals; + for (auto& i : drvPaths) { + DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i); + if (isDerivation(i2.first)) { + goals.insert(worker.makeDerivationGoal(i2.first, i2.second, buildMode)); + } else { + goals.insert(worker.makeSubstitutionGoal( + i, buildMode == bmRepair ? Repair : NoRepair)); + } + } + + worker.run(goals); + + PathSet failed; + for (auto& i : goals) { + if (i->getExitCode() != Goal::ecSuccess) { + auto* i2 = dynamic_cast<DerivationGoal*>(i.get()); + if (i2 != nullptr) { + failed.insert(i2->getDrvPath()); + } else { + failed.insert(dynamic_cast<SubstitutionGoal*>(i.get())->getStorePath()); + } + } + } + + if (!failed.empty()) { + throw Error(worker.exitStatus(), "build of %s failed", showPaths(failed)); + } +} + +BuildResult LocalStore::buildDerivation(const Path& drvPath, + const BasicDerivation& drv, + BuildMode buildMode) { + Worker worker(*this); + auto goal = worker.makeBasicDerivationGoal(drvPath, drv, buildMode); + + BuildResult result; + + try { + worker.run(Goals{goal}); + result = goal->getResult(); + } catch (Error& e) { + result.status = BuildResult::MiscFailure; + result.errorMsg = e.msg(); + } + + return result; +} + +void LocalStore::ensurePath(const Path& path) { + /* If the path is already valid, we're done. */ + if (isValidPath(path)) { + return; + } + + primeCache(*this, {path}); + + Worker worker(*this); + GoalPtr goal = worker.makeSubstitutionGoal(path); + Goals goals = {goal}; + + worker.run(goals); + + if (goal->getExitCode() != Goal::ecSuccess) { + throw Error(worker.exitStatus(), + "path '%s' does not exist and cannot be created", path); + } +} + +void LocalStore::repairPath(const Path& path) { + Worker worker(*this); + GoalPtr goal = worker.makeSubstitutionGoal(path, Repair); + Goals goals = {goal}; + + worker.run(goals); + + if (goal->getExitCode() != Goal::ecSuccess) { + /* Since substituting the path didn't work, if we have a valid + deriver, then rebuild the deriver. */ + auto deriver = queryPathInfo(path)->deriver; + if (!deriver.empty() && isValidPath(deriver)) { + goals.clear(); + goals.insert(worker.makeDerivationGoal(deriver, StringSet(), bmRepair)); + worker.run(goals); + } else { + throw Error(worker.exitStatus(), "cannot repair path '%s'", path); + } + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/builtins.hh b/third_party/nix/src/libstore/builtins.hh new file mode 100644 index 000000000000..07601be0f50c --- /dev/null +++ b/third_party/nix/src/libstore/builtins.hh @@ -0,0 +1,11 @@ +#pragma once + +#include "derivations.hh" + +namespace nix { + +// TODO: make pluggable. +void builtinFetchurl(const BasicDerivation& drv, const std::string& netrcData); +void builtinBuildenv(const BasicDerivation& drv); + +} // namespace nix diff --git a/third_party/nix/src/libstore/builtins/buildenv.cc b/third_party/nix/src/libstore/builtins/buildenv.cc new file mode 100644 index 000000000000..db093663bfef --- /dev/null +++ b/third_party/nix/src/libstore/builtins/buildenv.cc @@ -0,0 +1,228 @@ +#include <algorithm> + +#include <absl/strings/match.h> +#include <absl/strings/str_split.h> +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "builtins.hh" + +namespace nix { + +typedef std::map<Path, int> Priorities; + +// FIXME: change into local variables. + +static Priorities priorities; + +static unsigned long symlinks; + +/* For each activated package, create symlinks */ +static void createLinks(const Path& srcDir, const Path& dstDir, int priority) { + DirEntries srcFiles; + + try { + srcFiles = readDirectory(srcDir); + } catch (SysError& e) { + if (e.errNo == ENOTDIR) { + LOG(ERROR) << "warning: not including '" << srcDir + << "' in the user environment because it's not a directory"; + return; + } + throw; + } + + for (const auto& ent : srcFiles) { + if (ent.name[0] == '.') /* not matched by glob */ + continue; + auto srcFile = srcDir + "/" + ent.name; + auto dstFile = dstDir + "/" + ent.name; + + struct stat srcSt; + try { + if (stat(srcFile.c_str(), &srcSt) == -1) + throw SysError("getting status of '%1%'", srcFile); + } catch (SysError& e) { + if (e.errNo == ENOENT || e.errNo == ENOTDIR) { + LOG(ERROR) << "warning: skipping dangling symlink '" << dstFile << "'"; + continue; + } + throw; + } + + /* The files below are special-cased to that they don't show up + * in user profiles, either because they are useless, or + * because they would cauase pointless collisions (e.g., each + * Python package brings its own + * `$out/lib/pythonX.Y/site-packages/easy-install.pth'.) + */ + if (absl::EndsWith(srcFile, "/propagated-build-inputs") || + absl::EndsWith(srcFile, "/nix-support") || + absl::EndsWith(srcFile, "/perllocal.pod") || + absl::EndsWith(srcFile, "/info/dir") || absl::EndsWith(srcFile, "/log")) + continue; + + else if (S_ISDIR(srcSt.st_mode)) { + struct stat dstSt; + auto res = lstat(dstFile.c_str(), &dstSt); + if (res == 0) { + if (S_ISDIR(dstSt.st_mode)) { + createLinks(srcFile, dstFile, priority); + continue; + } else if (S_ISLNK(dstSt.st_mode)) { + auto target = canonPath(dstFile, true); + if (!S_ISDIR(lstat(target).st_mode)) + throw Error("collision between '%1%' and non-directory '%2%'", + srcFile, target); + if (unlink(dstFile.c_str()) == -1) + throw SysError(format("unlinking '%1%'") % dstFile); + if (mkdir(dstFile.c_str(), 0755) == -1) + throw SysError(format("creating directory '%1%'")); + createLinks(target, dstFile, priorities[dstFile]); + createLinks(srcFile, dstFile, priority); + continue; + } + } else if (errno != ENOENT) + throw SysError(format("getting status of '%1%'") % dstFile); + } + + else { + struct stat dstSt; + auto res = lstat(dstFile.c_str(), &dstSt); + if (res == 0) { + if (S_ISLNK(dstSt.st_mode)) { + auto prevPriority = priorities[dstFile]; + if (prevPriority == priority) + throw Error( + "packages '%1%' and '%2%' have the same priority %3%; " + "use 'nix-env --set-flag priority NUMBER INSTALLED_PKGNAME' " + "to change the priority of one of the conflicting packages" + " (0 being the highest priority)", + srcFile, readLink(dstFile), priority); + if (prevPriority < priority) { + continue; + } + if (unlink(dstFile.c_str()) == -1) + throw SysError(format("unlinking '%1%'") % dstFile); + } else if (S_ISDIR(dstSt.st_mode)) + throw Error( + "collision between non-directory '%1%' and directory '%2%'", + srcFile, dstFile); + } else if (errno != ENOENT) + throw SysError(format("getting status of '%1%'") % dstFile); + } + + createSymlink(srcFile, dstFile); + priorities[dstFile] = priority; + symlinks++; + } +} + +typedef std::set<Path> FileProp; + +static FileProp done; +static FileProp postponed = FileProp{}; + +static Path out; + +static void addPkg(const Path& pkgDir, int priority) { + if (done.count(pkgDir)) { + return; + } + done.insert(pkgDir); + createLinks(pkgDir, out, priority); + + try { + for (auto p : absl::StrSplit( + readFile(pkgDir + "/nix-support/propagated-user-env-packages"), + absl::ByAnyChar(" \n"))) { + auto pkg = std::string(p); + if (!done.count(pkg)) { + postponed.insert(pkg); + } + } + } catch (SysError& e) { + if (e.errNo != ENOENT && e.errNo != ENOTDIR) { + throw; + } + } +} + +struct Package { + Path path; + bool active; + int priority; + Package(Path path, bool active, int priority) + : path{path}, active{active}, priority{priority} {} +}; + +typedef std::vector<Package> Packages; + +void builtinBuildenv(const BasicDerivation& drv) { + auto getAttr = [&](const std::string& name) { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + throw Error("attribute '%s' missing", name); + } + return i->second; + }; + + out = getAttr("out"); + createDirs(out); + + /* Convert the stuff we get from the environment back into a + * coherent data type. */ + Packages pkgs; + Strings derivations = + absl::StrSplit(getAttr("derivations"), absl::ByAnyChar(" \t\n\r")); + while (!derivations.empty()) { + /* !!! We're trusting the caller to structure derivations env var correctly + */ + auto active = derivations.front(); + derivations.pop_front(); + auto priority = stoi(derivations.front()); + derivations.pop_front(); + auto outputs = stoi(derivations.front()); + derivations.pop_front(); + for (auto n = 0; n < outputs; n++) { + auto path = derivations.front(); + derivations.pop_front(); + pkgs.emplace_back(path, active != "false", priority); + } + } + + /* Symlink to the packages that have been installed explicitly by the + * user. Process in priority order to reduce unnecessary + * symlink/unlink steps. + */ + std::sort(pkgs.begin(), pkgs.end(), [](const Package& a, const Package& b) { + return a.priority < b.priority || + (a.priority == b.priority && a.path < b.path); + }); + for (const auto& pkg : pkgs) + if (pkg.active) { + addPkg(pkg.path, pkg.priority); + } + + /* Symlink to the packages that have been "propagated" by packages + * installed by the user (i.e., package X declares that it wants Y + * installed as well). We do these later because they have a lower + * priority in case of collisions. + */ + auto priorityCounter = 1000; + while (!postponed.empty()) { + auto pkgDirs = postponed; + postponed = FileProp{}; + for (const auto& pkgDir : pkgDirs) { + addPkg(pkgDir, priorityCounter++); + } + } + + LOG(INFO) << "created " << symlinks << " symlinks in user environment"; + + createSymlink(getAttr("manifest"), out + "/manifest.nix"); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/builtins/fetchurl.cc b/third_party/nix/src/libstore/builtins/fetchurl.cc new file mode 100644 index 000000000000..90814f6d7f0b --- /dev/null +++ b/third_party/nix/src/libstore/builtins/fetchurl.cc @@ -0,0 +1,81 @@ +#include <absl/strings/match.h> +#include <glog/logging.h> + +#include "archive.hh" +#include "builtins.hh" +#include "compression.hh" +#include "download.hh" +#include "store-api.hh" + +namespace nix { + +void builtinFetchurl(const BasicDerivation& drv, const std::string& netrcData) { + /* Make the host's netrc data available. Too bad curl requires + this to be stored in a file. It would be nice if we could just + pass a pointer to the data. */ + if (netrcData != "") { + settings.netrcFile = "netrc"; + writeFile(settings.netrcFile, netrcData, 0600); + } + + auto getAttr = [&](const std::string& name) { + auto i = drv.env.find(name); + if (i == drv.env.end()) + throw Error(format("attribute '%s' missing") % name); + return i->second; + }; + + Path storePath = getAttr("out"); + auto mainUrl = getAttr("url"); + bool unpack = get(drv.env, "unpack", "") == "1"; + + /* Note: have to use a fresh downloader here because we're in + a forked process. */ + auto downloader = makeDownloader(); + + auto fetch = [&](const std::string& url) { + auto source = sinkToSource([&](Sink& sink) { + /* No need to do TLS verification, because we check the hash of + the result anyway. */ + DownloadRequest request(url); + request.verifyTLS = false; + request.decompress = false; + + auto decompressor = makeDecompressionSink( + unpack && absl::EndsWith(mainUrl, ".xz") ? "xz" : "none", sink); + downloader->download(std::move(request), *decompressor); + decompressor->finish(); + }); + + if (unpack) + restorePath(storePath, *source); + else + writeFile(storePath, *source); + + auto executable = drv.env.find("executable"); + if (executable != drv.env.end() && executable->second == "1") { + if (chmod(storePath.c_str(), 0755) == -1) + throw SysError(format("making '%1%' executable") % storePath); + } + }; + + /* Try the hashed mirrors first. */ + if (getAttr("outputHashMode") == "flat") + for (auto hashedMirror : settings.hashedMirrors.get()) try { + if (!absl::EndsWith(hashedMirror, "/")) { + hashedMirror += '/'; + } + auto ht = parseHashType(getAttr("outputHashAlgo")); + auto h = Hash(getAttr("outputHash"), ht); + fetch(hashedMirror + printHashType(h.type) + "/" + + h.to_string(Base16, false)); + return; + } catch (Error& e) { + LOG(ERROR) << e.what(); + } + + /* Otherwise try the specified URL. */ + fetch(mainUrl); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/crypto.cc b/third_party/nix/src/libstore/crypto.cc new file mode 100644 index 000000000000..eb5bb6670738 --- /dev/null +++ b/third_party/nix/src/libstore/crypto.cc @@ -0,0 +1,125 @@ +#include "crypto.hh" + +#include "globals.hh" +#include "util.hh" + +#if HAVE_SODIUM +#include <sodium.h> +#endif + +namespace nix { + +static std::pair<std::string, std::string> split(const std::string& s) { + size_t colon = s.find(':'); + if (colon == std::string::npos || colon == 0) { + return {"", ""}; + } + return {std::string(s, 0, colon), std::string(s, colon + 1)}; +} + +Key::Key(const std::string& s) { + auto ss = split(s); + + name = ss.first; + key = ss.second; + + if (name.empty() || key.empty()) { + throw Error("secret key is corrupt"); + } + + key = base64Decode(key); +} + +SecretKey::SecretKey(const std::string& s) : Key(s) { +#if HAVE_SODIUM + if (key.size() != crypto_sign_SECRETKEYBYTES) { + throw Error("secret key is not valid"); + } +#endif +} + +#if !HAVE_SODIUM +[[noreturn]] static void noSodium() { + throw Error( + "Nix was not compiled with libsodium, required for signed binary cache " + "support"); +} +#endif + +std::string SecretKey::signDetached(const std::string& data) const { +#if HAVE_SODIUM + unsigned char sig[crypto_sign_BYTES]; + unsigned long long sigLen; + crypto_sign_detached(sig, &sigLen, (unsigned char*)data.data(), data.size(), + (unsigned char*)key.data()); + return name + ":" + base64Encode(std::string((char*)sig, sigLen)); +#else + noSodium(); +#endif +} + +PublicKey SecretKey::toPublicKey() const { +#if HAVE_SODIUM + unsigned char pk[crypto_sign_PUBLICKEYBYTES]; + crypto_sign_ed25519_sk_to_pk(pk, (unsigned char*)key.data()); + return PublicKey(name, std::string((char*)pk, crypto_sign_PUBLICKEYBYTES)); +#else + noSodium(); +#endif +} + +PublicKey::PublicKey(const std::string& s) : Key(s) { +#if HAVE_SODIUM + if (key.size() != crypto_sign_PUBLICKEYBYTES) { + throw Error("public key is not valid"); + } +#endif +} + +bool verifyDetached(const std::string& data, const std::string& sig, + const PublicKeys& publicKeys) { +#if HAVE_SODIUM + auto ss = split(sig); + + auto key = publicKeys.find(ss.first); + if (key == publicKeys.end()) { + return false; + } + + auto sig2 = base64Decode(ss.second); + if (sig2.size() != crypto_sign_BYTES) { + throw Error("signature is not valid"); + } + + return crypto_sign_verify_detached( + (unsigned char*)sig2.data(), (unsigned char*)data.data(), + data.size(), (unsigned char*)key->second.key.data()) == 0; +#else + noSodium(); +#endif +} + +PublicKeys getDefaultPublicKeys() { + PublicKeys publicKeys; + + // FIXME: filter duplicates + + for (const auto& s : settings.trustedPublicKeys.get()) { + PublicKey key(s); + publicKeys.emplace(key.name, key); + } + + for (const auto& secretKeyFile : settings.secretKeyFiles.get()) { + try { + SecretKey secretKey(readFile(secretKeyFile)); + publicKeys.emplace(secretKey.name, secretKey.toPublicKey()); + } catch (SysError& e) { + /* Ignore unreadable key files. That's normal in a + multi-user installation. */ + } + } + + return publicKeys; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/crypto.hh b/third_party/nix/src/libstore/crypto.hh new file mode 100644 index 000000000000..ef578db6d055 --- /dev/null +++ b/third_party/nix/src/libstore/crypto.hh @@ -0,0 +1,49 @@ +#pragma once + +#include <map> + +#include "types.hh" + +namespace nix { + +struct Key { + std::string name; + std::string key; + + /* Construct Key from a string in the format + ‘<name>:<key-in-base64>’. */ + Key(const std::string& s); + + protected: + Key(const std::string& name, const std::string& key) : name(name), key(key) {} +}; + +struct PublicKey; + +struct SecretKey : Key { + SecretKey(const std::string& s); + + /* Return a detached signature of the given string. */ + std::string signDetached(const std::string& data) const; + + PublicKey toPublicKey() const; +}; + +struct PublicKey : Key { + PublicKey(const std::string& s); + + private: + PublicKey(const std::string& name, const std::string& key) : Key(name, key) {} + friend struct SecretKey; +}; + +typedef std::map<std::string, PublicKey> PublicKeys; + +/* Return true iff ‘sig’ is a correct signature over ‘data’ using one + of the given public keys. */ +bool verifyDetached(const std::string& data, const std::string& sig, + const PublicKeys& publicKeys); + +PublicKeys getDefaultPublicKeys(); + +} // namespace nix diff --git a/third_party/nix/src/libstore/derivations.cc b/third_party/nix/src/libstore/derivations.cc new file mode 100644 index 000000000000..b2590f5e28e6 --- /dev/null +++ b/third_party/nix/src/libstore/derivations.cc @@ -0,0 +1,436 @@ +#include "derivations.hh" + +#include <absl/strings/match.h> +#include <absl/strings/str_split.h> +#include <absl/strings/string_view.h> + +#include "fs-accessor.hh" +#include "globals.hh" +#include "istringstream_nocopy.hh" +#include "store-api.hh" +#include "util.hh" +#include "worker-protocol.hh" + +namespace nix { + +void DerivationOutput::parseHashInfo(bool& recursive, Hash& hash) const { + recursive = false; + std::string algo = hashAlgo; + + if (std::string(algo, 0, 2) == "r:") { + recursive = true; + algo = std::string(algo, 2); + } + + HashType hashType = parseHashType(algo); + if (hashType == htUnknown) { + throw Error(format("unknown hash algorithm '%1%'") % algo); + } + + hash = Hash(this->hash, hashType); +} + +Path BasicDerivation::findOutput(const std::string& id) const { + auto i = outputs.find(id); + if (i == outputs.end()) { + throw Error(format("derivation has no output '%1%'") % id); + } + return i->second.path; +} + +bool BasicDerivation::isBuiltin() const { + return std::string(builder, 0, 8) == "builtin:"; +} + +Path writeDerivation(const ref<Store>& store, const Derivation& drv, + const std::string& name, RepairFlag repair) { + PathSet references; + references.insert(drv.inputSrcs.begin(), drv.inputSrcs.end()); + for (auto& i : drv.inputDrvs) { + references.insert(i.first); + } + /* Note that the outputs of a derivation are *not* references + (that can be missing (of course) and should not necessarily be + held during a garbage collection). */ + std::string suffix = name + drvExtension; + std::string contents = drv.unparse(); + return settings.readOnlyMode + ? store->computeStorePathForText(suffix, contents, references) + : store->addTextToStore(suffix, contents, references, repair); +} + +/* Read string `s' from stream `str'. */ +static void expect(std::istream& str, const std::string& s) { + char s2[s.size()]; + str.read(s2, s.size()); + if (std::string(s2, s.size()) != s) { + throw FormatError(format("expected string '%1%'") % s); + } +} + +/* Read a C-style string from stream `str'. */ +static std::string parseString(std::istream& str) { + std::string res; + expect(str, "\""); + int c; + while ((c = str.get()) != '"') { + if (c == '\\') { + c = str.get(); + if (c == 'n') { + res += '\n'; + } else if (c == 'r') { + res += '\r'; + } else if (c == 't') { + res += '\t'; + } else { + res += c; + } + } else { + res += c; + } + } + return res; +} + +static Path parsePath(std::istream& str) { + std::string s = parseString(str); + if (s.empty() || s[0] != '/') { + throw FormatError(format("bad path '%1%' in derivation") % s); + } + return s; +} + +static bool endOfList(std::istream& str) { + if (str.peek() == ',') { + str.get(); + return false; + } + if (str.peek() == ']') { + str.get(); + return true; + } + return false; +} + +static StringSet parseStrings(std::istream& str, bool arePaths) { + StringSet res; + while (!endOfList(str)) { + res.insert(arePaths ? parsePath(str) : parseString(str)); + } + return res; +} + +static Derivation parseDerivation(const std::string& s) { + Derivation drv; + istringstream_nocopy str(s); + expect(str, "Derive(["); + + /* Parse the list of outputs. */ + while (!endOfList(str)) { + DerivationOutput out; + expect(str, "("); + std::string id = parseString(str); + expect(str, ","); + out.path = parsePath(str); + expect(str, ","); + out.hashAlgo = parseString(str); + expect(str, ","); + out.hash = parseString(str); + expect(str, ")"); + drv.outputs[id] = out; + } + + /* Parse the list of input derivations. */ + expect(str, ",["); + while (!endOfList(str)) { + expect(str, "("); + Path drvPath = parsePath(str); + expect(str, ",["); + drv.inputDrvs[drvPath] = parseStrings(str, false); + expect(str, ")"); + } + + expect(str, ",["); + drv.inputSrcs = parseStrings(str, true); + expect(str, ","); + drv.platform = parseString(str); + expect(str, ","); + drv.builder = parseString(str); + + /* Parse the builder arguments. */ + expect(str, ",["); + while (!endOfList(str)) { + drv.args.push_back(parseString(str)); + } + + /* Parse the environment variables. */ + expect(str, ",["); + while (!endOfList(str)) { + expect(str, "("); + std::string name = parseString(str); + expect(str, ","); + std::string value = parseString(str); + expect(str, ")"); + drv.env[name] = value; + } + + expect(str, ")"); + return drv; +} + +Derivation readDerivation(const Path& drvPath) { + try { + return parseDerivation(readFile(drvPath)); + } catch (FormatError& e) { + throw Error(format("error parsing derivation '%1%': %2%") % drvPath % + e.msg()); + } +} + +Derivation Store::derivationFromPath(const Path& drvPath) { + assertStorePath(drvPath); + ensurePath(drvPath); + auto accessor = getFSAccessor(); + try { + return parseDerivation(accessor->readFile(drvPath)); + } catch (FormatError& e) { + throw Error(format("error parsing derivation '%1%': %2%") % drvPath % + e.msg()); + } +} + +static void printString(std::string& res, const std::string& s) { + res += '"'; + for (const char* i = s.c_str(); *i != 0; i++) { + if (*i == '\"' || *i == '\\') { + res += "\\"; + res += *i; + } else if (*i == '\n') { + res += "\\n"; + } else if (*i == '\r') { + res += "\\r"; + } else if (*i == '\t') { + res += "\\t"; + } else { + res += *i; + } + } + res += '"'; +} + +template <class ForwardIterator> +static void printStrings(std::string& res, ForwardIterator i, + ForwardIterator j) { + res += '['; + bool first = true; + for (; i != j; ++i) { + if (first) { + first = false; + } else { + res += ','; + } + printString(res, *i); + } + res += ']'; +} + +std::string Derivation::unparse() const { + std::string s; + s.reserve(65536); + s += "Derive(["; + + bool first = true; + for (auto& i : outputs) { + if (first) { + first = false; + } else { + s += ','; + } + s += '('; + printString(s, i.first); + s += ','; + printString(s, i.second.path); + s += ','; + printString(s, i.second.hashAlgo); + s += ','; + printString(s, i.second.hash); + s += ')'; + } + + s += "],["; + first = true; + for (auto& i : inputDrvs) { + if (first) { + first = false; + } else { + s += ','; + } + s += '('; + printString(s, i.first); + s += ','; + printStrings(s, i.second.begin(), i.second.end()); + s += ')'; + } + + s += "],"; + printStrings(s, inputSrcs.begin(), inputSrcs.end()); + + s += ','; + printString(s, platform); + s += ','; + printString(s, builder); + s += ','; + printStrings(s, args.begin(), args.end()); + + s += ",["; + first = true; + for (auto& i : env) { + if (first) { + first = false; + } else { + s += ','; + } + s += '('; + printString(s, i.first); + s += ','; + printString(s, i.second); + s += ')'; + } + + s += "])"; + + return s; +} + +bool isDerivation(const std::string& fileName) { + return absl::EndsWith(fileName, drvExtension); +} + +bool BasicDerivation::isFixedOutput() const { + return outputs.size() == 1 && outputs.begin()->first == "out" && + !outputs.begin()->second.hash.empty(); +} + +DrvHashes drvHashes; + +/* Returns the hash of a derivation modulo fixed-output + subderivations. A fixed-output derivation is a derivation with one + output (`out') for which an expected hash and hash algorithm are + specified (using the `outputHash' and `outputHashAlgo' + attributes). We don't want changes to such derivations to + propagate upwards through the dependency graph, changing output + paths everywhere. + + For instance, if we change the url in a call to the `fetchurl' + function, we do not want to rebuild everything depending on it + (after all, (the hash of) the file being downloaded is unchanged). + So the *output paths* should not change. On the other hand, the + *derivation paths* should change to reflect the new dependency + graph. + + That's what this function does: it returns a hash which is just the + hash of the derivation ATerm, except that any input derivation + paths have been replaced by the result of a recursive call to this + function, and that for fixed-output derivations we return a hash of + its output path. */ +Hash hashDerivationModulo(Store& store, Derivation drv) { + /* Return a fixed hash for fixed-output derivations. */ + if (drv.isFixedOutput()) { + auto i = drv.outputs.begin(); + return hashString(htSHA256, "fixed:out:" + i->second.hashAlgo + ":" + + i->second.hash + ":" + i->second.path); + } + + /* For other derivations, replace the inputs paths with recursive + calls to this function.*/ + DerivationInputs inputs2; + for (auto& i : drv.inputDrvs) { + Hash h = drvHashes[i.first]; + if (!h) { + assert(store.isValidPath(i.first)); + Derivation drv2 = readDerivation(store.toRealPath(i.first)); + h = hashDerivationModulo(store, drv2); + drvHashes[i.first] = h; + } + inputs2[h.to_string(Base16, false)] = i.second; + } + drv.inputDrvs = inputs2; + + return hashString(htSHA256, drv.unparse()); +} + +// TODO(tazjin): doc comment? +DrvPathWithOutputs parseDrvPathWithOutputs(absl::string_view path) { + auto pos = path.find('!'); + if (pos == absl::string_view::npos) { + return DrvPathWithOutputs(path, std::set<std::string>()); + } + + return DrvPathWithOutputs(path.substr(pos + 1), + absl::StrSplit(path, absl::ByChar(','))); +} + +Path makeDrvPathWithOutputs(const Path& drvPath, + const std::set<std::string>& outputs) { + return outputs.empty() ? drvPath + : drvPath + "!" + concatStringsSep(",", outputs); +} + +bool wantOutput(const std::string& output, + const std::set<std::string>& wanted) { + return wanted.empty() || wanted.find(output) != wanted.end(); +} + +PathSet BasicDerivation::outputPaths() const { + PathSet paths; + for (auto& i : outputs) { + paths.insert(i.second.path); + } + return paths; +} + +Source& readDerivation(Source& in, Store& store, BasicDerivation& drv) { + drv.outputs.clear(); + auto nr = readNum<size_t>(in); + for (size_t n = 0; n < nr; n++) { + auto name = readString(in); + DerivationOutput o; + in >> o.path >> o.hashAlgo >> o.hash; + store.assertStorePath(o.path); + drv.outputs[name] = o; + } + + drv.inputSrcs = readStorePaths<PathSet>(store, in); + in >> drv.platform >> drv.builder; + drv.args = readStrings<Strings>(in); + + nr = readNum<size_t>(in); + for (size_t n = 0; n < nr; n++) { + auto key = readString(in); + auto value = readString(in); + drv.env[key] = value; + } + + return in; +} + +Sink& operator<<(Sink& out, const BasicDerivation& drv) { + out << drv.outputs.size(); + for (auto& i : drv.outputs) { + out << i.first << i.second.path << i.second.hashAlgo << i.second.hash; + } + out << drv.inputSrcs << drv.platform << drv.builder << drv.args; + out << drv.env.size(); + for (auto& i : drv.env) { + out << i.first << i.second; + } + return out; +} + +std::string hashPlaceholder(const std::string& outputName) { + // FIXME: memoize? + return "/" + hashString(htSHA256, "nix-output:" + outputName) + .to_string(Base32, false); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/derivations.hh b/third_party/nix/src/libstore/derivations.hh new file mode 100644 index 000000000000..170f09804dee --- /dev/null +++ b/third_party/nix/src/libstore/derivations.hh @@ -0,0 +1,106 @@ +#pragma once + +#include <map> + +#include "hash.hh" +#include "store-api.hh" +#include "types.hh" + +namespace nix { + +/* Extension of derivations in the Nix store. */ +const std::string drvExtension = ".drv"; + +/* Abstract syntax of derivations. */ + +struct DerivationOutput { + Path path; + std::string hashAlgo; /* hash used for expected hash computation */ + std::string hash; /* expected hash, may be null */ + DerivationOutput() {} + DerivationOutput(Path path, std::string hashAlgo, std::string hash) { + this->path = path; + this->hashAlgo = hashAlgo; + this->hash = hash; + } + void parseHashInfo(bool& recursive, Hash& hash) const; +}; + +typedef std::map<std::string, DerivationOutput> DerivationOutputs; + +/* For inputs that are sub-derivations, we specify exactly which + output IDs we are interested in. */ +typedef std::map<Path, StringSet> DerivationInputs; + +typedef std::map<std::string, std::string> StringPairs; + +struct BasicDerivation { + DerivationOutputs outputs; /* keyed on symbolic IDs */ + PathSet inputSrcs; /* inputs that are sources */ + std::string platform; + Path builder; + Strings args; + StringPairs env; + + virtual ~BasicDerivation(){}; + + /* Return the path corresponding to the output identifier `id' in + the given derivation. */ + Path findOutput(const std::string& id) const; + + bool isBuiltin() const; + + /* Return true iff this is a fixed-output derivation. */ + bool isFixedOutput() const; + + /* Return the output paths of a derivation. */ + PathSet outputPaths() const; +}; + +struct Derivation : BasicDerivation { + DerivationInputs inputDrvs; /* inputs that are sub-derivations */ + + /* Print a derivation. */ + std::string unparse() const; +}; + +class Store; + +/* Write a derivation to the Nix store, and return its path. */ +Path writeDerivation(const ref<Store>& store, const Derivation& drv, + const std::string& name, RepairFlag repair = NoRepair); + +/* Read a derivation from a file. */ +Derivation readDerivation(const Path& drvPath); + +/* Check whether a file name ends with the extension for + derivations. */ +bool isDerivation(const std::string& fileName); + +Hash hashDerivationModulo(Store& store, Derivation drv); + +/* Memoisation of hashDerivationModulo(). */ +typedef std::map<Path, Hash> DrvHashes; + +extern DrvHashes drvHashes; // FIXME: global, not thread-safe + +/* Split a string specifying a derivation and a set of outputs + (/nix/store/hash-foo!out1,out2,...) into the derivation path and + the outputs. */ +typedef std::pair<std::string, std::set<std::string> > DrvPathWithOutputs; +DrvPathWithOutputs parseDrvPathWithOutputs(absl::string_view path); + +Path makeDrvPathWithOutputs(const Path& drvPath, + const std::set<std::string>& outputs); + +bool wantOutput(const std::string& output, const std::set<std::string>& wanted); + +struct Source; +struct Sink; + +Source& readDerivation(Source& in, Store& store, BasicDerivation& drv); +Sink& operator<<(Sink& out, const BasicDerivation& drv); + +std::string hashPlaceholder(const std::string& outputName); + +} // namespace nix diff --git a/third_party/nix/src/libstore/download.cc b/third_party/nix/src/libstore/download.cc new file mode 100644 index 000000000000..92a63087289e --- /dev/null +++ b/third_party/nix/src/libstore/download.cc @@ -0,0 +1,1014 @@ +#include "download.hh" + +#include <absl/strings/ascii.h> +#include <absl/strings/match.h> +#include <absl/strings/numbers.h> +#include <absl/strings/str_split.h> + +#include "archive.hh" +#include "compression.hh" +#include "finally.hh" +#include "globals.hh" +#include "hash.hh" +#include "pathlocks.hh" +#include "s3.hh" +#include "store-api.hh" +#include "util.hh" + +#ifdef ENABLE_S3 +#include <aws/core/client/ClientConfiguration.h> +#endif + +#include <algorithm> +#include <cmath> +#include <cstring> +#include <iostream> +#include <queue> +#include <random> +#include <thread> + +#include <curl/curl.h> +#include <fcntl.h> +#include <glog/logging.h> +#include <unistd.h> + +using namespace std::string_literals; + +namespace nix { + +DownloadSettings downloadSettings; + +static GlobalConfig::Register r1(&downloadSettings); + +std::string resolveUri(const std::string& uri) { + if (uri.compare(0, 8, "channel:") == 0) { + return "https://nixos.org/channels/" + std::string(uri, 8) + + "/nixexprs.tar.xz"; + } + return uri; +} + +struct CurlDownloader : public Downloader { + CURLM* curlm = nullptr; + + std::random_device rd; + std::mt19937 mt19937; + + struct DownloadItem : public std::enable_shared_from_this<DownloadItem> { + CurlDownloader& downloader; + DownloadRequest request; + DownloadResult result; + bool done = false; // whether either the success or failure function has + // been called + Callback<DownloadResult> callback; + CURL* req = nullptr; + bool active = + false; // whether the handle has been added to the multi object + std::string status; + + unsigned int attempt = 0; + + /* Don't start this download until the specified time point + has been reached. */ + std::chrono::steady_clock::time_point embargo; + + struct curl_slist* requestHeaders = nullptr; + + std::string encoding; + + bool acceptRanges = false; + + curl_off_t writtenToSink = 0; + + DownloadItem(CurlDownloader& downloader, const DownloadRequest& request, + Callback<DownloadResult>&& callback) + : downloader(downloader), + request(request), + callback(std::move(callback)), + finalSink([this](const unsigned char* data, size_t len) { + if (this->request.dataCallback) { + long httpStatus = 0; + curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); + + /* Only write data to the sink if this is a + successful response. */ + if (httpStatus == 0 || httpStatus == 200 || httpStatus == 201 || + httpStatus == 206) { + writtenToSink += len; + this->request.dataCallback((char*)data, len); + } + } else { + this->result.data->append((char*)data, len); + } + }) { + LOG(INFO) << (request.data ? "uploading '" : "downloading '") + << request.uri << "'"; + + if (!request.expectedETag.empty()) { + requestHeaders = curl_slist_append( + requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); + } + if (!request.mimeType.empty()) { + requestHeaders = curl_slist_append( + requestHeaders, ("Content-Type: " + request.mimeType).c_str()); + } + } + + ~DownloadItem() { + if (req != nullptr) { + if (active) { + curl_multi_remove_handle(downloader.curlm, req); + } + curl_easy_cleanup(req); + } + if (requestHeaders != nullptr) { + curl_slist_free_all(requestHeaders); + } + try { + if (!done) { + fail(DownloadError( + Interrupted, + format("download of '%s' was interrupted") % request.uri)); + } + } catch (...) { + ignoreException(); + } + } + + void failEx(const std::exception_ptr& ex) { + assert(!done); + done = true; + callback.rethrow(ex); + } + + template <class T> + void fail(const T& e) { + failEx(std::make_exception_ptr(e)); + } + + LambdaSink finalSink; + std::shared_ptr<CompressionSink> decompressionSink; + + std::exception_ptr writeException; + + size_t writeCallback(void* contents, size_t size, size_t nmemb) { + try { + size_t realSize = size * nmemb; + result.bodySize += realSize; + + if (!decompressionSink) { + decompressionSink = makeDecompressionSink(encoding, finalSink); + } + + (*decompressionSink)((unsigned char*)contents, realSize); + + return realSize; + } catch (...) { + writeException = std::current_exception(); + return 0; + } + } + + static size_t writeCallbackWrapper(void* contents, size_t size, + size_t nmemb, void* userp) { + return ((DownloadItem*)userp)->writeCallback(contents, size, nmemb); + } + + size_t headerCallback(void* contents, size_t size, size_t nmemb) { + size_t realSize = size * nmemb; + std::string line((char*)contents, realSize); + DLOG(INFO) << "got header for '" << request.uri + << "': " << absl::StripAsciiWhitespace(line); + if (line.compare(0, 5, "HTTP/") == 0) { // new response starts + result.etag = ""; + std::vector<std::string> ss = absl::StrSplit(line, absl::ByChar(' ')); + status = ss.size() >= 2 ? ss[1] : ""; + result.data = std::make_shared<std::string>(); + result.bodySize = 0; + acceptRanges = false; + encoding = ""; + } else { + auto i = line.find(':'); + if (i != std::string::npos) { + std::string name = absl::AsciiStrToLower( + absl::StripAsciiWhitespace(std::string(line, 0, i))); + if (name == "etag") { + result.etag = absl::StripAsciiWhitespace(std::string(line, i + 1)); + /* Hack to work around a GitHub bug: it sends + ETags, but ignores If-None-Match. So if we get + the expected ETag on a 200 response, then shut + down the connection because we already have the + data. */ + if (result.etag == request.expectedETag && status == "200") { + DLOG(INFO) + << "shutting down on 200 HTTP response with expected ETag"; + return 0; + } + } else if (name == "content-encoding") { + encoding = absl::StripAsciiWhitespace(std::string(line, i + 1)); + } else if (name == "accept-ranges" && + absl::AsciiStrToLower(absl::StripAsciiWhitespace( + std::string(line, i + 1))) == "bytes") { + acceptRanges = true; + } + } + } + return realSize; + } + + static size_t headerCallbackWrapper(void* contents, size_t size, + size_t nmemb, void* userp) { + return ((DownloadItem*)userp)->headerCallback(contents, size, nmemb); + } + + static int debugCallback(CURL* handle, curl_infotype type, char* data, + size_t size, void* userptr) { + if (type == CURLINFO_TEXT) { + DLOG(INFO) << "curl: " + << absl::StripTrailingAsciiWhitespace( + std::string(data, size)); + } + return 0; + } + + size_t readOffset = 0; + size_t readCallback(char* buffer, size_t size, size_t nitems) { + if (readOffset == request.data->length()) { + return 0; + } + auto count = std::min(size * nitems, request.data->length() - readOffset); + assert(count); + memcpy(buffer, request.data->data() + readOffset, count); + readOffset += count; + return count; + } + + static size_t readCallbackWrapper(char* buffer, size_t size, size_t nitems, + void* userp) { + return ((DownloadItem*)userp)->readCallback(buffer, size, nitems); + } + + void init() { + if (req == nullptr) { + req = curl_easy_init(); + } + + curl_easy_reset(req); + + // TODO(tazjin): Add an Abseil flag for this + // if (verbosity >= lvlVomit) { + // curl_easy_setopt(req, CURLOPT_VERBOSE, 1); + // curl_easy_setopt(req, CURLOPT_DEBUGFUNCTION, + // DownloadItem::debugCallback); + // } + + curl_easy_setopt(req, CURLOPT_URL, request.uri.c_str()); + curl_easy_setopt(req, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(req, CURLOPT_MAXREDIRS, 10); + curl_easy_setopt(req, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(req, CURLOPT_USERAGENT, + ("curl/" LIBCURL_VERSION " Nix/" + nixVersion + + (downloadSettings.userAgentSuffix != "" + ? " " + downloadSettings.userAgentSuffix.get() + : "")) + .c_str()); +#if LIBCURL_VERSION_NUM >= 0x072b00 + curl_easy_setopt(req, CURLOPT_PIPEWAIT, 1); +#endif +#if LIBCURL_VERSION_NUM >= 0x072f00 + if (downloadSettings.enableHttp2) { + curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS); + } else { + curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); + } +#endif + curl_easy_setopt(req, CURLOPT_WRITEFUNCTION, + DownloadItem::writeCallbackWrapper); + curl_easy_setopt(req, CURLOPT_WRITEDATA, this); + curl_easy_setopt(req, CURLOPT_HEADERFUNCTION, + DownloadItem::headerCallbackWrapper); + curl_easy_setopt(req, CURLOPT_HEADERDATA, this); + + curl_easy_setopt(req, CURLOPT_HTTPHEADER, requestHeaders); + + if (request.head) { + curl_easy_setopt(req, CURLOPT_NOBODY, 1); + } + + if (request.data) { + curl_easy_setopt(req, CURLOPT_UPLOAD, 1L); + curl_easy_setopt(req, CURLOPT_READFUNCTION, readCallbackWrapper); + curl_easy_setopt(req, CURLOPT_READDATA, this); + curl_easy_setopt(req, CURLOPT_INFILESIZE_LARGE, + (curl_off_t)request.data->length()); + } + + if (request.verifyTLS) { + if (!settings.caFile.empty()) { + curl_easy_setopt(req, CURLOPT_CAINFO, settings.caFile.c_str()); + } + } else { + curl_easy_setopt(req, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(req, CURLOPT_SSL_VERIFYHOST, 0); + } + + curl_easy_setopt(req, CURLOPT_CONNECTTIMEOUT, + downloadSettings.connectTimeout.get()); + + curl_easy_setopt(req, CURLOPT_LOW_SPEED_LIMIT, 1L); + curl_easy_setopt(req, CURLOPT_LOW_SPEED_TIME, + downloadSettings.stalledDownloadTimeout.get()); + + /* If no file exist in the specified path, curl continues to work + anyway as if netrc support was disabled. */ + curl_easy_setopt(req, CURLOPT_NETRC_FILE, + settings.netrcFile.get().c_str()); + curl_easy_setopt(req, CURLOPT_NETRC, CURL_NETRC_OPTIONAL); + + if (writtenToSink != 0) { + curl_easy_setopt(req, CURLOPT_RESUME_FROM_LARGE, writtenToSink); + } + + result.data = std::make_shared<std::string>(); + result.bodySize = 0; + } + + void finish(CURLcode code) { + long httpStatus = 0; + curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); + + char* effectiveUriCStr; + curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); + if (effectiveUriCStr != nullptr) { + result.effectiveUri = effectiveUriCStr; + } + + DLOG(INFO) << "finished " << request.verb() << " of " << request.uri + << "; curl status = " << code + << ", HTTP status = " << httpStatus + << ", body = " << result.bodySize << " bytes"; + + if (decompressionSink) { + try { + decompressionSink->finish(); + } catch (...) { + writeException = std::current_exception(); + } + } + + if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) { + code = CURLE_OK; + httpStatus = 304; + } + + if (writeException) { + failEx(writeException); + + } else if (code == CURLE_OK && + (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || + httpStatus == 206 || httpStatus == 304 || + httpStatus == 226 /* FTP */ || + httpStatus == 0 /* other protocol */)) { + result.cached = httpStatus == 304; + done = true; + callback(std::move(result)); + } + + else { + // We treat most errors as transient, but won't retry when hopeless + Error err = Transient; + + if (httpStatus == 404 || httpStatus == 410 || + code == CURLE_FILE_COULDNT_READ_FILE) { + // The file is definitely not there + err = NotFound; + } else if (httpStatus == 401 || httpStatus == 403 || + httpStatus == 407) { + // Don't retry on authentication/authorization failures + err = Forbidden; + } else if (httpStatus >= 400 && httpStatus < 500 && httpStatus != 408 && + httpStatus != 429) { + // Most 4xx errors are client errors and are probably not worth + // retrying: + // * 408 means the server timed out waiting for us, so we try again + // * 429 means too many requests, so we retry (with a delay) + err = Misc; + } else if (httpStatus == 501 || httpStatus == 505 || + httpStatus == 511) { + // Let's treat most 5xx (server) errors as transient, except for a + // handful: + // * 501 not implemented + // * 505 http version not supported + // * 511 we're behind a captive portal + err = Misc; + } else { + // Don't bother retrying on certain cURL errors either + switch (code) { + case CURLE_FAILED_INIT: + case CURLE_URL_MALFORMAT: + case CURLE_NOT_BUILT_IN: + case CURLE_REMOTE_ACCESS_DENIED: + case CURLE_FILE_COULDNT_READ_FILE: + case CURLE_FUNCTION_NOT_FOUND: + case CURLE_ABORTED_BY_CALLBACK: + case CURLE_BAD_FUNCTION_ARGUMENT: + case CURLE_INTERFACE_FAILED: + case CURLE_UNKNOWN_OPTION: + case CURLE_SSL_CACERT_BADFILE: + case CURLE_TOO_MANY_REDIRECTS: + case CURLE_WRITE_ERROR: + case CURLE_UNSUPPORTED_PROTOCOL: + err = Misc; + break; + default: // Shut up warnings + break; + } + } + + attempt++; + + auto exc = + code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted + ? DownloadError(Interrupted, fmt("%s of '%s' was interrupted", + request.verb(), request.uri)) + : httpStatus != 0 + ? DownloadError( + err, fmt("unable to %s '%s': HTTP error %d", + request.verb(), request.uri, httpStatus) + + (code == CURLE_OK + ? "" + : fmt(" (curl error: %s)", + curl_easy_strerror(code)))) + : DownloadError(err, fmt("unable to %s '%s': %s (%d)", + request.verb(), request.uri, + curl_easy_strerror(code), code)); + + /* If this is a transient error, then maybe retry the + download after a while. If we're writing to a + sink, we can only retry if the server supports + ranged requests. */ + if (err == Transient && attempt < request.tries && + (!this->request.dataCallback || writtenToSink == 0 || + (acceptRanges && encoding.empty()))) { + int ms = request.baseRetryTimeMs * + std::pow(2.0F, attempt - 1 + + std::uniform_real_distribution<>( + 0.0, 0.5)(downloader.mt19937)); + if (writtenToSink != 0) { + LOG(WARNING) << exc.what() << "; retrying from offset " + << writtenToSink << " in " << ms << "ms"; + } else { + LOG(WARNING) << exc.what() << "; retrying in " << ms << "ms"; + } + embargo = + std::chrono::steady_clock::now() + std::chrono::milliseconds(ms); + downloader.enqueueItem(shared_from_this()); + } else { + fail(exc); + } + } + } + }; + + struct State { + struct EmbargoComparator { + bool operator()(const std::shared_ptr<DownloadItem>& i1, + const std::shared_ptr<DownloadItem>& i2) { + return i1->embargo > i2->embargo; + } + }; + bool quit = false; + std::priority_queue<std::shared_ptr<DownloadItem>, + std::vector<std::shared_ptr<DownloadItem>>, + EmbargoComparator> + incoming; + }; + + Sync<State> state_; + + /* We can't use a std::condition_variable to wake up the curl + thread, because it only monitors file descriptors. So use a + pipe instead. */ + Pipe wakeupPipe; + + std::thread workerThread; + + CurlDownloader() : mt19937(rd()) { + static std::once_flag globalInit; + std::call_once(globalInit, curl_global_init, CURL_GLOBAL_ALL); + + curlm = curl_multi_init(); + +#if LIBCURL_VERSION_NUM >= 0x072b00 // Multiplex requires >= 7.43.0 + curl_multi_setopt(curlm, CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX); +#endif +#if LIBCURL_VERSION_NUM >= 0x071e00 // Max connections requires >= 7.30.0 + curl_multi_setopt(curlm, CURLMOPT_MAX_TOTAL_CONNECTIONS, + downloadSettings.httpConnections.get()); +#endif + + wakeupPipe.create(); + fcntl(wakeupPipe.readSide.get(), F_SETFL, O_NONBLOCK); + + workerThread = std::thread([&]() { workerThreadEntry(); }); + } + + ~CurlDownloader() override { + stopWorkerThread(); + + workerThread.join(); + + if (curlm != nullptr) { + curl_multi_cleanup(curlm); + } + } + + void stopWorkerThread() { + /* Signal the worker thread to exit. */ + { + auto state(state_.lock()); + state->quit = true; + } + writeFull(wakeupPipe.writeSide.get(), " ", false); + } + + void workerThreadMain() { + /* Cause this thread to be notified on SIGINT. */ + auto callback = createInterruptCallback([&]() { stopWorkerThread(); }); + + std::map<CURL*, std::shared_ptr<DownloadItem>> items; + + bool quit = false; + + std::chrono::steady_clock::time_point nextWakeup; + + while (!quit) { + checkInterrupt(); + + /* Let curl do its thing. */ + int running; + CURLMcode mc = curl_multi_perform(curlm, &running); + if (mc != CURLM_OK) { + throw nix::Error( + format("unexpected error from curl_multi_perform(): %s") % + curl_multi_strerror(mc)); + } + + /* Set the promises of any finished requests. */ + CURLMsg* msg; + int left; + while ((msg = curl_multi_info_read(curlm, &left)) != nullptr) { + if (msg->msg == CURLMSG_DONE) { + auto i = items.find(msg->easy_handle); + assert(i != items.end()); + i->second->finish(msg->data.result); + curl_multi_remove_handle(curlm, i->second->req); + i->second->active = false; + items.erase(i); + } + } + + /* Wait for activity, including wakeup events. */ + int numfds = 0; + struct curl_waitfd extraFDs[1]; + extraFDs[0].fd = wakeupPipe.readSide.get(); + extraFDs[0].events = CURL_WAIT_POLLIN; + extraFDs[0].revents = 0; + long maxSleepTimeMs = items.empty() ? 10000 : 100; + auto sleepTimeMs = + nextWakeup != std::chrono::steady_clock::time_point() + ? std::max( + 0, + (int)std::chrono::duration_cast<std::chrono::milliseconds>( + nextWakeup - std::chrono::steady_clock::now()) + .count()) + : maxSleepTimeMs; + DLOG(INFO) << "download thread waiting for " << sleepTimeMs << " ms"; + mc = curl_multi_wait(curlm, extraFDs, 1, sleepTimeMs, &numfds); + if (mc != CURLM_OK) { + throw nix::Error(format("unexpected error from curl_multi_wait(): %s") % + curl_multi_strerror(mc)); + } + + nextWakeup = std::chrono::steady_clock::time_point(); + + /* Add new curl requests from the incoming requests queue, + except for requests that are embargoed (waiting for a + retry timeout to expire). */ + if ((extraFDs[0].revents & CURL_WAIT_POLLIN) != 0) { + char buf[1024]; + auto res = read(extraFDs[0].fd, buf, sizeof(buf)); + if (res == -1 && errno != EINTR) { + throw SysError("reading curl wakeup socket"); + } + } + + std::vector<std::shared_ptr<DownloadItem>> incoming; + auto now = std::chrono::steady_clock::now(); + + { + auto state(state_.lock()); + while (!state->incoming.empty()) { + auto item = state->incoming.top(); + if (item->embargo <= now) { + incoming.push_back(item); + state->incoming.pop(); + } else { + if (nextWakeup == std::chrono::steady_clock::time_point() || + item->embargo < nextWakeup) { + nextWakeup = item->embargo; + } + break; + } + } + quit = state->quit; + } + + for (auto& item : incoming) { + DLOG(INFO) << "starting " << item->request.verb() << " of " + << item->request.uri; + item->init(); + curl_multi_add_handle(curlm, item->req); + item->active = true; + items[item->req] = item; + } + } + + DLOG(INFO) << "download thread shutting down"; + } + + void workerThreadEntry() { + try { + workerThreadMain(); + } catch (nix::Interrupted& e) { + } catch (std::exception& e) { + LOG(ERROR) << "unexpected error in download thread: " << e.what(); + } + + { + auto state(state_.lock()); + while (!state->incoming.empty()) { + state->incoming.pop(); + } + state->quit = true; + } + } + + void enqueueItem(const std::shared_ptr<DownloadItem>& item) { + if (item->request.data && !absl::StartsWith(item->request.uri, "http://") && + !absl::StartsWith(item->request.uri, "https://")) { + throw nix::Error("uploading to '%s' is not supported", item->request.uri); + } + + { + auto state(state_.lock()); + if (state->quit) { + throw nix::Error( + "cannot enqueue download request because the download thread is " + "shutting down"); + } + state->incoming.push(item); + } + writeFull(wakeupPipe.writeSide.get(), " "); + } + +#ifdef ENABLE_S3 + std::tuple<std::string, std::string, Store::Params> parseS3Uri( + std::string uri) { + auto [path, params] = splitUriAndParams(uri); + + auto slash = path.find('/', 5); // 5 is the length of "s3://" prefix + if (slash == std::string::npos) { + throw nix::Error("bad S3 URI '%s'", path); + } + + std::string bucketName(path, 5, slash - 5); + std::string key(path, slash + 1); + + return {bucketName, key, params}; + } +#endif + + void enqueueDownload(const DownloadRequest& request, + Callback<DownloadResult> callback) override { + /* Ugly hack to support s3:// URIs. */ + if (absl::StartsWith(request.uri, "s3://")) { + // FIXME: do this on a worker thread + try { +#ifdef ENABLE_S3 + auto [bucketName, key, params] = parseS3Uri(request.uri); + + std::string profile = get(params, "profile", ""); + std::string region = get(params, "region", Aws::Region::US_EAST_1); + std::string scheme = get(params, "scheme", ""); + std::string endpoint = get(params, "endpoint", ""); + + S3Helper s3Helper(profile, region, scheme, endpoint); + + // FIXME: implement ETag + auto s3Res = s3Helper.getObject(bucketName, key); + DownloadResult res; + if (!s3Res.data) + throw DownloadError( + NotFound, fmt("S3 object '%s' does not exist", request.uri)); + res.data = s3Res.data; + callback(std::move(res)); +#else + throw nix::Error( + "cannot download '%s' because Nix is not built with S3 support", + request.uri); +#endif + } catch (...) { + callback.rethrow(); + } + return; + } + + enqueueItem( + std::make_shared<DownloadItem>(*this, request, std::move(callback))); + } +}; + +ref<Downloader> getDownloader() { + static ref<Downloader> downloader = makeDownloader(); + return downloader; +} + +ref<Downloader> makeDownloader() { return make_ref<CurlDownloader>(); } + +std::future<DownloadResult> Downloader::enqueueDownload( + const DownloadRequest& request) { + auto promise = std::make_shared<std::promise<DownloadResult>>(); + enqueueDownload(request, {[promise](std::future<DownloadResult> fut) { + try { + promise->set_value(fut.get()); + } catch (...) { + promise->set_exception(std::current_exception()); + } + }}); + return promise->get_future(); +} + +DownloadResult Downloader::download(const DownloadRequest& request) { + return enqueueDownload(request).get(); +} + +void Downloader::download(DownloadRequest&& request, Sink& sink) { + /* Note: we can't call 'sink' via request.dataCallback, because + that would cause the sink to execute on the downloader + thread. If 'sink' is a coroutine, this will fail. Also, if the + sink is expensive (e.g. one that does decompression and writing + to the Nix store), it would stall the download thread too much. + Therefore we use a buffer to communicate data between the + download thread and the calling thread. */ + + struct State { + bool quit = false; + std::exception_ptr exc; + std::string data; + std::condition_variable avail, request; + }; + + auto _state = std::make_shared<Sync<State>>(); + + /* In case of an exception, wake up the download thread. FIXME: + abort the download request. */ + Finally finally([&]() { + auto state(_state->lock()); + state->quit = true; + state->request.notify_one(); + }); + + request.dataCallback = [_state](char* buf, size_t len) { + auto state(_state->lock()); + + if (state->quit) { + return; + } + + /* If the buffer is full, then go to sleep until the calling + thread wakes us up (i.e. when it has removed data from the + buffer). We don't wait forever to prevent stalling the + download thread. (Hopefully sleeping will throttle the + sender.) */ + if (state->data.size() > 1024 * 1024) { + DLOG(INFO) << "download buffer is full; going to sleep"; + state.wait_for(state->request, std::chrono::seconds(10)); + } + + /* Append data to the buffer and wake up the calling + thread. */ + state->data.append(buf, len); + state->avail.notify_one(); + }; + + enqueueDownload(request, {[_state](std::future<DownloadResult> fut) { + auto state(_state->lock()); + state->quit = true; + try { + fut.get(); + } catch (...) { + state->exc = std::current_exception(); + } + state->avail.notify_one(); + state->request.notify_one(); + }}); + + while (true) { + checkInterrupt(); + + std::string chunk; + + /* Grab data if available, otherwise wait for the download + thread to wake us up. */ + { + auto state(_state->lock()); + + while (state->data.empty()) { + if (state->quit) { + if (state->exc) { + std::rethrow_exception(state->exc); + } + return; + } + + state.wait(state->avail); + } + + chunk = std::move(state->data); + + state->request.notify_one(); + } + + /* Flush the data to the sink and wake up the download thread + if it's blocked on a full buffer. We don't hold the state + lock while doing this to prevent blocking the download + thread if sink() takes a long time. */ + sink((unsigned char*)chunk.data(), chunk.size()); + } +} + +CachedDownloadResult Downloader::downloadCached( + const ref<Store>& store, const CachedDownloadRequest& request) { + auto url = resolveUri(request.uri); + + auto name = request.name; + if (name.empty()) { + auto p = url.rfind('/'); + if (p != std::string::npos) { + name = std::string(url, p + 1); + } + } + + Path expectedStorePath; + if (request.expectedHash) { + expectedStorePath = + store->makeFixedOutputPath(request.unpack, request.expectedHash, name); + if (store->isValidPath(expectedStorePath)) { + CachedDownloadResult result; + result.storePath = expectedStorePath; + result.path = store->toRealPath(expectedStorePath); + return result; + } + } + + Path cacheDir = getCacheDir() + "/nix/tarballs"; + createDirs(cacheDir); + + std::string urlHash = hashString(htSHA256, name + std::string("\0"s) + url) + .to_string(Base32, false); + + Path dataFile = cacheDir + "/" + urlHash + ".info"; + Path fileLink = cacheDir + "/" + urlHash + "-file"; + + PathLocks lock({fileLink}, fmt("waiting for lock on '%1%'...", fileLink)); + + Path storePath; + + std::string expectedETag; + + bool skip = false; + + CachedDownloadResult result; + + if (pathExists(fileLink) && pathExists(dataFile)) { + storePath = readLink(fileLink); + store->addTempRoot(storePath); + if (store->isValidPath(storePath)) { + std::vector<std::string> ss = + absl::StrSplit(readFile(dataFile), absl::ByChar('\n')); + if (ss.size() >= 3 && ss[0] == url) { + time_t lastChecked; + if (absl::SimpleAtoi(ss[2], &lastChecked) && + (uint64_t)lastChecked + request.ttl >= (uint64_t)time(nullptr)) { + skip = true; + result.effectiveUri = request.uri; + result.etag = ss[1]; + } else if (!ss[1].empty()) { + DLOG(INFO) << "verifying previous ETag: " << ss[1]; + expectedETag = ss[1]; + } + } + } else { + storePath = ""; + } + } + + if (!skip) { + try { + DownloadRequest request2(url); + request2.expectedETag = expectedETag; + auto res = download(request2); + result.effectiveUri = res.effectiveUri; + result.etag = res.etag; + + if (!res.cached) { + ValidPathInfo info; + StringSink sink; + dumpString(*res.data, sink); + Hash hash = hashString( + request.expectedHash ? request.expectedHash.type : htSHA256, + *res.data); + info.path = store->makeFixedOutputPath(false, hash, name); + info.narHash = hashString(htSHA256, *sink.s); + info.narSize = sink.s->size(); + info.ca = makeFixedOutputCA(false, hash); + store->addToStore(info, sink.s, NoRepair, NoCheckSigs); + storePath = info.path; + } + + assert(!storePath.empty()); + replaceSymlink(storePath, fileLink); + + writeFile(dataFile, url + "\n" + res.etag + "\n" + + std::to_string(time(nullptr)) + "\n"); + } catch (DownloadError& e) { + if (storePath.empty()) { + throw; + } + LOG(WARNING) << e.msg() << "; using cached result"; + result.etag = expectedETag; + } + } + + if (request.unpack) { + Path unpackedLink = cacheDir + "/" + baseNameOf(storePath) + "-unpacked"; + PathLocks lock2({unpackedLink}, + fmt("waiting for lock on '%1%'...", unpackedLink)); + Path unpackedStorePath; + if (pathExists(unpackedLink)) { + unpackedStorePath = readLink(unpackedLink); + store->addTempRoot(unpackedStorePath); + if (!store->isValidPath(unpackedStorePath)) { + unpackedStorePath = ""; + } + } + if (unpackedStorePath.empty()) { + LOG(INFO) << "unpacking '" << url << "' ..."; + Path tmpDir = createTempDir(); + AutoDelete autoDelete(tmpDir, true); + // FIXME: this requires GNU tar for decompression. + runProgram("tar", true, + {"xf", store->toRealPath(storePath), "-C", tmpDir, + "--strip-components", "1"}); + unpackedStorePath = store->addToStore(name, tmpDir, true, htSHA256, + defaultPathFilter, NoRepair); + } + replaceSymlink(unpackedStorePath, unpackedLink); + storePath = unpackedStorePath; + } + + if (!expectedStorePath.empty() && storePath != expectedStorePath) { + unsigned int statusCode = 102; + Hash gotHash = + request.unpack + ? hashPath(request.expectedHash.type, store->toRealPath(storePath)) + .first + : hashFile(request.expectedHash.type, store->toRealPath(storePath)); + throw nix::Error(statusCode, + "hash mismatch in file downloaded from '%s':\n wanted: " + "%s\n got: %s", + url, request.expectedHash.to_string(), + gotHash.to_string()); + } + + result.storePath = storePath; + result.path = store->toRealPath(storePath); + return result; +} + +bool isUri(const std::string& s) { + if (s.compare(0, 8, "channel:") == 0) { + return true; + } + size_t pos = s.find("://"); + if (pos == std::string::npos) { + return false; + } + std::string scheme(s, 0, pos); + return scheme == "http" || scheme == "https" || scheme == "file" || + scheme == "channel" || scheme == "git" || scheme == "s3" || + scheme == "ssh"; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/download.hh b/third_party/nix/src/libstore/download.hh new file mode 100644 index 000000000000..a988ec682669 --- /dev/null +++ b/third_party/nix/src/libstore/download.hh @@ -0,0 +1,133 @@ +#pragma once + +#include <future> +#include <string> + +#include "globals.hh" +#include "hash.hh" +#include "types.hh" + +namespace nix { + +struct DownloadSettings : Config { + Setting<bool> enableHttp2{this, true, "http2", + "Whether to enable HTTP/2 support."}; + + Setting<std::string> userAgentSuffix{ + this, "", "user-agent-suffix", + "String appended to the user agent in HTTP requests."}; + + Setting<size_t> httpConnections{this, + 25, + "http-connections", + "Number of parallel HTTP connections.", + {"binary-caches-parallel-connections"}}; + + Setting<unsigned long> connectTimeout{ + this, 0, "connect-timeout", + "Timeout for connecting to servers during downloads. 0 means use curl's " + "builtin default."}; + + Setting<unsigned long> stalledDownloadTimeout{ + this, 300, "stalled-download-timeout", + "Timeout (in seconds) for receiving data from servers during download. " + "Nix cancels idle downloads after this timeout's duration."}; + + Setting<unsigned int> tries{ + this, 5, "download-attempts", + "How often Nix will attempt to download a file before giving up."}; +}; + +extern DownloadSettings downloadSettings; + +struct DownloadRequest { + std::string uri; + std::string expectedETag; + bool verifyTLS = true; + bool head = false; + size_t tries = downloadSettings.tries; + unsigned int baseRetryTimeMs = 250; + bool decompress = true; + std::shared_ptr<std::string> data; + std::string mimeType; + std::function<void(char*, size_t)> dataCallback; + + DownloadRequest(const std::string& uri) : uri(uri) {} + + std::string verb() { return data ? "upload" : "download"; } +}; + +struct DownloadResult { + bool cached = false; + std::string etag; + std::string effectiveUri; + std::shared_ptr<std::string> data; + uint64_t bodySize = 0; +}; + +struct CachedDownloadRequest { + std::string uri; + bool unpack = false; + std::string name; + Hash expectedHash; + unsigned int ttl = settings.tarballTtl; + + CachedDownloadRequest(const std::string& uri) : uri(uri) {} +}; + +struct CachedDownloadResult { + // Note: 'storePath' may be different from 'path' when using a + // chroot store. + Path storePath; + Path path; + std::optional<std::string> etag; + std::string effectiveUri; +}; + +class Store; + +struct Downloader { + virtual ~Downloader() {} + + /* Enqueue a download request, returning a future to the result of + the download. The future may throw a DownloadError + exception. */ + virtual void enqueueDownload(const DownloadRequest& request, + Callback<DownloadResult> callback) = 0; + + std::future<DownloadResult> enqueueDownload(const DownloadRequest& request); + + /* Synchronously download a file. */ + DownloadResult download(const DownloadRequest& request); + + /* Download a file, writing its data to a sink. The sink will be + invoked on the thread of the caller. */ + void download(DownloadRequest&& request, Sink& sink); + + /* Check if the specified file is already in ~/.cache/nix/tarballs + and is more recent than ‘tarball-ttl’ seconds. Otherwise, + use the recorded ETag to verify if the server has a more + recent version, and if so, download it to the Nix store. */ + CachedDownloadResult downloadCached(const ref<Store>& store, + const CachedDownloadRequest& request); + + enum Error { NotFound, Forbidden, Misc, Transient, Interrupted }; +}; + +/* Return a shared Downloader object. Using this object is preferred + because it enables connection reuse and HTTP/2 multiplexing. */ +ref<Downloader> getDownloader(); + +/* Return a new Downloader object. */ +ref<Downloader> makeDownloader(); + +class DownloadError : public Error { + public: + Downloader::Error error; + DownloadError(Downloader::Error error, const FormatOrString& fs) + : Error(fs), error(error) {} +}; + +bool isUri(const std::string& s); + +} // namespace nix diff --git a/third_party/nix/src/libstore/export-import.cc b/third_party/nix/src/libstore/export-import.cc new file mode 100644 index 000000000000..077b0d539001 --- /dev/null +++ b/third_party/nix/src/libstore/export-import.cc @@ -0,0 +1,111 @@ +#include <algorithm> + +#include "archive.hh" +#include "store-api.hh" +#include "worker-protocol.hh" + +namespace nix { + +struct HashAndWriteSink : Sink { + Sink& writeSink; + HashSink hashSink; + explicit HashAndWriteSink(Sink& writeSink) + : writeSink(writeSink), hashSink(htSHA256) {} + void operator()(const unsigned char* data, size_t len) override { + writeSink(data, len); + hashSink(data, len); + } + Hash currentHash() { return hashSink.currentHash().first; } +}; + +void Store::exportPaths(const Paths& paths, Sink& sink) { + Paths sorted = topoSortPaths(PathSet(paths.begin(), paths.end())); + std::reverse(sorted.begin(), sorted.end()); + + std::string doneLabel("paths exported"); + // logger->incExpected(doneLabel, sorted.size()); + + for (auto& path : sorted) { + // Activity act(*logger, lvlInfo, format("exporting path '%s'") % path); + sink << 1; + exportPath(path, sink); + // logger->incProgress(doneLabel); + } + + sink << 0; +} + +void Store::exportPath(const Path& path, Sink& sink) { + auto info = queryPathInfo(path); + + HashAndWriteSink hashAndWriteSink(sink); + + narFromPath(path, hashAndWriteSink); + + /* Refuse to export paths that have changed. This prevents + filesystem corruption from spreading to other machines. + Don't complain if the stored hash is zero (unknown). */ + Hash hash = hashAndWriteSink.currentHash(); + if (hash != info->narHash && info->narHash != Hash(info->narHash.type)) { + throw Error(format("hash of path '%1%' has changed from '%2%' to '%3%'!") % + path % info->narHash.to_string() % hash.to_string()); + } + + hashAndWriteSink << exportMagic << path << info->references << info->deriver + << 0; +} + +Paths Store::importPaths(Source& source, + const std::shared_ptr<FSAccessor>& accessor, + CheckSigsFlag checkSigs) { + Paths res; + while (true) { + auto n = readNum<uint64_t>(source); + if (n == 0) { + break; + } + if (n != 1) { + throw Error( + "input doesn't look like something created by 'nix-store --export'"); + } + + /* Extract the NAR from the source. */ + TeeSink tee(source); + parseDump(tee, tee.source); + + uint32_t magic = readInt(source); + if (magic != exportMagic) { + throw Error("Nix archive cannot be imported; wrong format"); + } + + ValidPathInfo info; + + info.path = readStorePath(*this, source); + + // Activity act(*logger, lvlInfo, format("importing path '%s'") % + // info.path); + + info.references = readStorePaths<PathSet>(*this, source); + + info.deriver = readString(source); + if (!info.deriver.empty()) { + assertStorePath(info.deriver); + } + + info.narHash = hashString(htSHA256, *tee.source.data); + info.narSize = tee.source.data->size(); + + // Ignore optional legacy signature. + if (readInt(source) == 1) { + readString(source); + } + + addToStore(info, tee.source.data, NoRepair, checkSigs, accessor); + + res.push_back(info.path); + } + + return res; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/fs-accessor.hh b/third_party/nix/src/libstore/fs-accessor.hh new file mode 100644 index 000000000000..ad0d7f0ed9fd --- /dev/null +++ b/third_party/nix/src/libstore/fs-accessor.hh @@ -0,0 +1,31 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +/* An abstract class for accessing a filesystem-like structure, such + as a (possibly remote) Nix store or the contents of a NAR file. */ +class FSAccessor { + public: + enum Type { tMissing, tRegular, tSymlink, tDirectory }; + + struct Stat { + Type type = tMissing; + uint64_t fileSize = 0; // regular files only + bool isExecutable = false; // regular files only + uint64_t narOffset = 0; // regular files only + }; + + virtual ~FSAccessor() {} + + virtual Stat stat(const Path& path) = 0; + + virtual StringSet readDirectory(const Path& path) = 0; + + virtual std::string readFile(const Path& path) = 0; + + virtual std::string readLink(const Path& path) = 0; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/gc.cc b/third_party/nix/src/libstore/gc.cc new file mode 100644 index 000000000000..5c1ea3d58b69 --- /dev/null +++ b/third_party/nix/src/libstore/gc.cc @@ -0,0 +1,993 @@ +#include <algorithm> +#include <cerrno> +#include <climits> +#include <functional> +#include <queue> +#include <random> +#include <regex> + +#include <absl/strings/match.h> +#include <absl/strings/str_split.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <sys/types.h> +#include <unistd.h> + +#include "derivations.hh" +#include "finally.hh" +#include "globals.hh" +#include "glog/logging.h" +#include "local-store.hh" + +namespace nix { + +static std::string gcLockName = "gc.lock"; +static std::string gcRootsDir = "gcroots"; + +/* Acquire the global GC lock. This is used to prevent new Nix + processes from starting after the temporary root files have been + read. To be precise: when they try to create a new temporary root + file, they will block until the garbage collector has finished / + yielded the GC lock. */ +AutoCloseFD LocalStore::openGCLock(LockType lockType) { + Path fnGCLock = (format("%1%/%2%") % stateDir % gcLockName).str(); + + DLOG(INFO) << "acquiring global GC lock " << fnGCLock; + + AutoCloseFD fdGCLock = + open(fnGCLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fdGCLock) { + throw SysError(format("opening global GC lock '%1%'") % fnGCLock); + } + + if (!lockFile(fdGCLock.get(), lockType, false)) { + LOG(ERROR) << "waiting for the big garbage collector lock..."; + lockFile(fdGCLock.get(), lockType, true); + } + + /* !!! Restrict read permission on the GC root. Otherwise any + process that can open the file for reading can DoS the + collector. */ + + return fdGCLock; +} + +static void makeSymlink(const Path& link, const Path& target) { + /* Create directories up to `gcRoot'. */ + createDirs(dirOf(link)); + + /* Create the new symlink. */ + Path tempLink = + (format("%1%.tmp-%2%-%3%") % link % getpid() % random()).str(); + createSymlink(target, tempLink); + + /* Atomically replace the old one. */ + if (rename(tempLink.c_str(), link.c_str()) == -1) { + throw SysError(format("cannot rename '%1%' to '%2%'") % tempLink % link); + } +} + +void LocalStore::syncWithGC() { AutoCloseFD fdGCLock = openGCLock(ltRead); } + +void LocalStore::addIndirectRoot(const Path& path) { + std::string hash = hashString(htSHA1, path).to_string(Base32, false); + Path realRoot = canonPath( + (format("%1%/%2%/auto/%3%") % stateDir % gcRootsDir % hash).str()); + makeSymlink(realRoot, path); +} + +Path LocalFSStore::addPermRoot(const Path& _storePath, const Path& _gcRoot, + bool indirect, bool allowOutsideRootsDir) { + Path storePath(canonPath(_storePath)); + Path gcRoot(canonPath(_gcRoot)); + assertStorePath(storePath); + + if (isInStore(gcRoot)) { + throw Error(format("creating a garbage collector root (%1%) in the Nix " + "store is forbidden " + "(are you running nix-build inside the store?)") % + gcRoot); + } + + if (indirect) { + /* Don't clobber the link if it already exists and doesn't + point to the Nix store. */ + if (pathExists(gcRoot) && + (!isLink(gcRoot) || !isInStore(readLink(gcRoot)))) { + throw Error(format("cannot create symlink '%1%'; already exists") % + gcRoot); + } + makeSymlink(gcRoot, storePath); + addIndirectRoot(gcRoot); + } + + else { + if (!allowOutsideRootsDir) { + Path rootsDir = + canonPath((format("%1%/%2%") % stateDir % gcRootsDir).str()); + + if (std::string(gcRoot, 0, rootsDir.size() + 1) != rootsDir + "/") { + throw Error(format("path '%1%' is not a valid garbage collector root; " + "it's not in the directory '%2%'") % + gcRoot % rootsDir); + } + } + + if (baseNameOf(gcRoot) == baseNameOf(storePath)) { + writeFile(gcRoot, ""); + } else { + makeSymlink(gcRoot, storePath); + } + } + + /* Check that the root can be found by the garbage collector. + !!! This can be very slow on machines that have many roots. + Instead of reading all the roots, it would be more efficient to + check if the root is in a directory in or linked from the + gcroots directory. */ + if (settings.checkRootReachability) { + Roots roots = findRoots(false); + if (roots[storePath].count(gcRoot) == 0) { + LOG(ERROR) << "warning: '" << gcRoot + << "' is not in a directory where the garbage " + << "collector looks for roots; therefore, '" << storePath + << "' might be removed by the garbage collector"; + } + } + + /* Grab the global GC root, causing us to block while a GC is in + progress. This prevents the set of permanent roots from + increasing while a GC is in progress. */ + syncWithGC(); + + return gcRoot; +} + +void LocalStore::addTempRoot(const Path& path) { + auto state(_state.lock()); + + /* Create the temporary roots file for this process. */ + if (!state->fdTempRoots) { + while (true) { + AutoCloseFD fdGCLock = openGCLock(ltRead); + + if (pathExists(fnTempRoots)) { + /* It *must* be stale, since there can be no two + processes with the same pid. */ + unlink(fnTempRoots.c_str()); + } + + state->fdTempRoots = openLockFile(fnTempRoots, true); + + fdGCLock = -1; + + DLOG(INFO) << "acquiring read lock on " << fnTempRoots; + lockFile(state->fdTempRoots.get(), ltRead, true); + + /* Check whether the garbage collector didn't get in our + way. */ + struct stat st; + if (fstat(state->fdTempRoots.get(), &st) == -1) { + throw SysError(format("statting '%1%'") % fnTempRoots); + } + if (st.st_size == 0) { + break; + } + + /* The garbage collector deleted this file before we could + get a lock. (It won't delete the file after we get a + lock.) Try again. */ + } + } + + /* Upgrade the lock to a write lock. This will cause us to block + if the garbage collector is holding our lock. */ + DLOG(INFO) << "acquiring write lock on " << fnTempRoots; + lockFile(state->fdTempRoots.get(), ltWrite, true); + + std::string s = path + '\0'; + writeFull(state->fdTempRoots.get(), s); + + /* Downgrade to a read lock. */ + DLOG(INFO) << "downgrading to read lock on " << fnTempRoots; + lockFile(state->fdTempRoots.get(), ltRead, true); +} + +static std::string censored = "{censored}"; + +void LocalStore::findTempRoots(FDs& fds, Roots& tempRoots, bool censor) { + /* Read the `temproots' directory for per-process temporary root + files. */ + for (auto& i : readDirectory(tempRootsDir)) { + Path path = tempRootsDir + "/" + i.name; + + pid_t pid = std::stoi(i.name); + + DLOG(INFO) << "reading temporary root file " << path; + FDPtr fd(new AutoCloseFD(open(path.c_str(), O_CLOEXEC | O_RDWR, 0666))); + if (!*fd) { + /* It's okay if the file has disappeared. */ + if (errno == ENOENT) { + continue; + } + throw SysError(format("opening temporary roots file '%1%'") % path); + } + + /* This should work, but doesn't, for some reason. */ + // FDPtr fd(new AutoCloseFD(openLockFile(path, false))); + // if (*fd == -1) { continue; } + + /* Try to acquire a write lock without blocking. This can + only succeed if the owning process has died. In that case + we don't care about its temporary roots. */ + if (lockFile(fd->get(), ltWrite, false)) { + LOG(ERROR) << "removing stale temporary roots file " << path; + unlink(path.c_str()); + writeFull(fd->get(), "d"); + continue; + } + + /* Acquire a read lock. This will prevent the owning process + from upgrading to a write lock, therefore it will block in + addTempRoot(). */ + DLOG(INFO) << "waiting for read lock on " << path; + lockFile(fd->get(), ltRead, true); + + /* Read the entire file. */ + std::string contents = readFile(fd->get()); + + /* Extract the roots. */ + std::string::size_type pos = 0; + std::string::size_type end; + + while ((end = contents.find((char)0, pos)) != std::string::npos) { + Path root(contents, pos, end - pos); + DLOG(INFO) << "got temporary root " << root; + assertStorePath(root); + tempRoots[root].emplace(censor ? censored : fmt("{temp:%d}", pid)); + pos = end + 1; + } + + fds.push_back(fd); /* keep open */ + } +} + +void LocalStore::findRoots(const Path& path, unsigned char type, Roots& roots) { + auto foundRoot = [&](const Path& path, const Path& target) { + Path storePath = toStorePath(target); + if (isStorePath(storePath) && isValidPath(storePath)) { + roots[storePath].emplace(path); + } else { + LOG(INFO) << "skipping invalid root from '" << path << "' to '" + << storePath << "'"; + } + }; + + try { + if (type == DT_UNKNOWN) { + type = getFileType(path); + } + + if (type == DT_DIR) { + for (auto& i : readDirectory(path)) { + findRoots(path + "/" + i.name, i.type, roots); + } + } + + else if (type == DT_LNK) { + Path target = readLink(path); + if (isInStore(target)) { + foundRoot(path, target); + } + + /* Handle indirect roots. */ + else { + target = absPath(target, dirOf(path)); + if (!pathExists(target)) { + if (isInDir(path, stateDir + "/" + gcRootsDir + "/auto")) { + LOG(INFO) << "removing stale link from '" << path << "' to '" + << target << "'"; + unlink(path.c_str()); + } + } else { + struct stat st2 = lstat(target); + if (!S_ISLNK(st2.st_mode)) { + return; + } + Path target2 = readLink(target); + if (isInStore(target2)) { + foundRoot(target, target2); + } + } + } + } + + else if (type == DT_REG) { + Path storePath = storeDir + "/" + baseNameOf(path); + if (isStorePath(storePath) && isValidPath(storePath)) { + roots[storePath].emplace(path); + } + } + + } + + catch (SysError& e) { + /* We only ignore permanent failures. */ + if (e.errNo == EACCES || e.errNo == ENOENT || e.errNo == ENOTDIR) { + LOG(INFO) << "cannot read potential root '" << path << "'"; + } else { + throw; + } + } +} + +void LocalStore::findRootsNoTemp(Roots& roots, bool censor) { + /* Process direct roots in {gcroots,profiles}. */ + findRoots(stateDir + "/" + gcRootsDir, DT_UNKNOWN, roots); + findRoots(stateDir + "/profiles", DT_UNKNOWN, roots); + + /* Add additional roots returned by different platforms-specific + heuristics. This is typically used to add running programs to + the set of roots (to prevent them from being garbage collected). */ + findRuntimeRoots(roots, censor); +} + +Roots LocalStore::findRoots(bool censor) { + Roots roots; + findRootsNoTemp(roots, censor); + + FDs fds; + findTempRoots(fds, roots, censor); + + return roots; +} + +static void readProcLink(const std::string& file, Roots& roots) { + /* 64 is the starting buffer size gnu readlink uses... */ + auto bufsiz = ssize_t{64}; +try_again: + char buf[bufsiz]; + auto res = readlink(file.c_str(), buf, bufsiz); + if (res == -1) { + if (errno == ENOENT || errno == EACCES || errno == ESRCH) { + return; + } + throw SysError("reading symlink"); + } + if (res == bufsiz) { + if (SSIZE_MAX / 2 < bufsiz) { + throw Error("stupidly long symlink"); + } + bufsiz *= 2; + goto try_again; + } + if (res > 0 && buf[0] == '/') { + roots[std::string(static_cast<char*>(buf), res)].emplace(file); + } +} + +static std::string quoteRegexChars(const std::string& raw) { + static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); + return std::regex_replace(raw, specialRegex, R"(\$&)"); +} + +static void readFileRoots(const char* path, Roots& roots) { + try { + roots[readFile(path)].emplace(path); + } catch (SysError& e) { + if (e.errNo != ENOENT && e.errNo != EACCES) { + throw; + } + } +} + +void LocalStore::findRuntimeRoots(Roots& roots, bool censor) { + Roots unchecked; + + auto procDir = AutoCloseDir{opendir("/proc")}; + if (procDir) { + struct dirent* ent; + auto digitsRegex = std::regex(R"(^\d+$)"); + auto mapRegex = + std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)"); + auto storePathRegex = std::regex(quoteRegexChars(storeDir) + + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)"); + while (errno = 0, ent = readdir(procDir.get())) { + checkInterrupt(); + if (std::regex_match(ent->d_name, digitsRegex)) { + readProcLink(fmt("/proc/%s/exe", ent->d_name), unchecked); + readProcLink(fmt("/proc/%s/cwd", ent->d_name), unchecked); + + auto fdStr = fmt("/proc/%s/fd", ent->d_name); + auto fdDir = AutoCloseDir(opendir(fdStr.c_str())); + if (!fdDir) { + if (errno == ENOENT || errno == EACCES) { + continue; + } + throw SysError(format("opening %1%") % fdStr); + } + struct dirent* fd_ent; + while (errno = 0, fd_ent = readdir(fdDir.get())) { + if (fd_ent->d_name[0] != '.') { + readProcLink(fmt("%s/%s", fdStr, fd_ent->d_name), unchecked); + } + } + if (errno) { + if (errno == ESRCH) { + continue; + } + throw SysError(format("iterating /proc/%1%/fd") % ent->d_name); + } + fdDir.reset(); + + try { + auto mapFile = fmt("/proc/%s/maps", ent->d_name); + std::vector<std::string> mapLines = + absl::StrSplit(readFile(mapFile, true), absl::ByChar('\n')); + for (const auto& line : mapLines) { + auto match = std::smatch{}; + if (std::regex_match(line, match, mapRegex)) { + unchecked[match[1]].emplace(mapFile); + } + } + + auto envFile = fmt("/proc/%s/environ", ent->d_name); + auto envString = readFile(envFile, true); + auto env_end = std::sregex_iterator{}; + for (auto i = std::sregex_iterator{envString.begin(), envString.end(), + storePathRegex}; + i != env_end; ++i) { + unchecked[i->str()].emplace(envFile); + } + } catch (SysError& e) { + if (errno == ENOENT || errno == EACCES || errno == ESRCH) { + continue; + } + throw; + } + } + } + if (errno) { + throw SysError("iterating /proc"); + } + } + + readFileRoots("/proc/sys/kernel/modprobe", unchecked); + readFileRoots("/proc/sys/kernel/fbsplash", unchecked); + readFileRoots("/proc/sys/kernel/poweroff_cmd", unchecked); + + for (auto& [target, links] : unchecked) { + if (isInStore(target)) { + Path path = toStorePath(target); + if (isStorePath(path) && isValidPath(path)) { + DLOG(INFO) << "got additional root " << path; + if (censor) { + roots[path].insert(censored); + } else { + roots[path].insert(links.begin(), links.end()); + } + } + } + } +} + +struct GCLimitReached {}; + +struct LocalStore::GCState { + GCOptions options; + GCResults& results; + PathSet roots; + PathSet tempRoots; + PathSet dead; + PathSet alive; + bool gcKeepOutputs; + bool gcKeepDerivations; + unsigned long long bytesInvalidated; + bool moveToTrash = true; + bool shouldDelete; + explicit GCState(GCResults& results_) + : results(results_), bytesInvalidated(0) {} +}; + +bool LocalStore::isActiveTempFile(const GCState& state, const Path& path, + const std::string& suffix) { + return absl::EndsWith(path, suffix) && + state.tempRoots.find(std::string( + path, 0, path.size() - suffix.size())) != state.tempRoots.end(); +} + +void LocalStore::deleteGarbage(GCState& state, const Path& path) { + unsigned long long bytesFreed; + deletePath(path, bytesFreed); + state.results.bytesFreed += bytesFreed; +} + +void LocalStore::deletePathRecursive(GCState& state, const Path& path) { + checkInterrupt(); + + unsigned long long size = 0; + + if (isStorePath(path) && isValidPath(path)) { + PathSet referrers; + queryReferrers(path, referrers); + for (auto& i : referrers) { + if (i != path) { + deletePathRecursive(state, i); + } + } + size = queryPathInfo(path)->narSize; + invalidatePathChecked(path); + } + + Path realPath = realStoreDir + "/" + baseNameOf(path); + + struct stat st; + if (lstat(realPath.c_str(), &st) != 0) { + if (errno == ENOENT) { + return; + } + throw SysError(format("getting status of %1%") % realPath); + } + + LOG(INFO) << "deleting '" << path << "'"; + + state.results.paths.insert(path); + + /* If the path is not a regular file or symlink, move it to the + trash directory. The move is to ensure that later (when we're + not holding the global GC lock) we can delete the path without + being afraid that the path has become alive again. Otherwise + delete it right away. */ + if (state.moveToTrash && S_ISDIR(st.st_mode)) { + // Estimate the amount freed using the narSize field. FIXME: + // if the path was not valid, need to determine the actual + // size. + try { + if (chmod(realPath.c_str(), st.st_mode | S_IWUSR) == -1) { + throw SysError(format("making '%1%' writable") % realPath); + } + Path tmp = trashDir + "/" + baseNameOf(path); + if (rename(realPath.c_str(), tmp.c_str()) != 0) { + throw SysError(format("unable to rename '%1%' to '%2%'") % realPath % + tmp); + } + state.bytesInvalidated += size; + } catch (SysError& e) { + if (e.errNo == ENOSPC) { + LOG(INFO) << "note: can't create move '" << realPath + << "': " << e.msg(); + deleteGarbage(state, realPath); + } + } + } else { + deleteGarbage(state, realPath); + } + + if (state.results.bytesFreed + state.bytesInvalidated > + state.options.maxFreed) { + LOG(INFO) << "deleted or invalidated more than " << state.options.maxFreed + << " bytes; stopping"; + throw GCLimitReached(); + } +} + +bool LocalStore::canReachRoot(GCState& state, PathSet& visited, + const Path& path) { + if (visited.count(path) != 0u) { + return false; + } + + if (state.alive.count(path) != 0u) { + return true; + } + + if (state.dead.count(path) != 0u) { + return false; + } + + if (state.roots.count(path) != 0u) { + DLOG(INFO) << "cannot delete '" << path << "' because it's a root"; + state.alive.insert(path); + return true; + } + + visited.insert(path); + + if (!isStorePath(path) || !isValidPath(path)) { + return false; + } + + PathSet incoming; + + /* Don't delete this path if any of its referrers are alive. */ + queryReferrers(path, incoming); + + /* If keep-derivations is set and this is a derivation, then + don't delete the derivation if any of the outputs are alive. */ + if (state.gcKeepDerivations && isDerivation(path)) { + PathSet outputs = queryDerivationOutputs(path); + for (auto& i : outputs) { + if (isValidPath(i) && queryPathInfo(i)->deriver == path) { + incoming.insert(i); + } + } + } + + /* If keep-outputs is set, then don't delete this path if there + are derivers of this path that are not garbage. */ + if (state.gcKeepOutputs) { + PathSet derivers = queryValidDerivers(path); + for (auto& i : derivers) { + incoming.insert(i); + } + } + + for (auto& i : incoming) { + if (i != path) { + if (canReachRoot(state, visited, i)) { + state.alive.insert(path); + return true; + } + } + } + + return false; +} + +void LocalStore::tryToDelete(GCState& state, const Path& path) { + checkInterrupt(); + + auto realPath = realStoreDir + "/" + baseNameOf(path); + if (realPath == linksDir || realPath == trashDir) { + return; + } + + // Activity act(*logger, lvlDebug, format("considering whether to delete + // '%1%'") % path); + + if (!isStorePath(path) || !isValidPath(path)) { + /* A lock file belonging to a path that we're building right + now isn't garbage. */ + if (isActiveTempFile(state, path, ".lock")) { + return; + } + + /* Don't delete .chroot directories for derivations that are + currently being built. */ + if (isActiveTempFile(state, path, ".chroot")) { + return; + } + + /* Don't delete .check directories for derivations that are + currently being built, because we may need to run + diff-hook. */ + if (isActiveTempFile(state, path, ".check")) { + return; + } + } + + PathSet visited; + + if (canReachRoot(state, visited, path)) { + DLOG(INFO) << "cannot delete '" << path << "' because it's still reachable"; + } else { + /* No path we visited was a root, so everything is garbage. + But we only delete ‘path’ and its referrers here so that + ‘nix-store --delete’ doesn't have the unexpected effect of + recursing into derivations and outputs. */ + state.dead.insert(visited.begin(), visited.end()); + if (state.shouldDelete) { + deletePathRecursive(state, path); + } + } +} + +/* Unlink all files in /nix/store/.links that have a link count of 1, + which indicates that there are no other links and so they can be + safely deleted. FIXME: race condition with optimisePath(): we + might see a link count of 1 just before optimisePath() increases + the link count. */ +void LocalStore::removeUnusedLinks(const GCState& state) { + AutoCloseDir dir(opendir(linksDir.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % linksDir); + } + + long long actualSize = 0; + long long unsharedSize = 0; + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { + checkInterrupt(); + std::string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + Path path = linksDir + "/" + name; + + struct stat st; + if (lstat(path.c_str(), &st) == -1) { + throw SysError(format("statting '%1%'") % path); + } + + if (st.st_nlink != 1) { + actualSize += st.st_size; + unsharedSize += (st.st_nlink - 1) * st.st_size; + continue; + } + + LOG(INFO) << "deleting unused link " << path; + + if (unlink(path.c_str()) == -1) { + throw SysError(format("deleting '%1%'") % path); + } + + state.results.bytesFreed += st.st_size; + } + + struct stat st; + if (stat(linksDir.c_str(), &st) == -1) { + throw SysError(format("statting '%1%'") % linksDir); + } + + long long overhead = st.st_blocks * 512ULL; + + // TODO(tazjin): absl::StrFormat %.2f + LOG(INFO) << "note: currently hard linking saves " + << ((unsharedSize - actualSize - overhead) / (1024.0 * 1024.0)) + << " MiB"; +} + +void LocalStore::collectGarbage(const GCOptions& options, GCResults& results) { + GCState state(results); + state.options = options; + state.gcKeepOutputs = settings.gcKeepOutputs; + state.gcKeepDerivations = settings.gcKeepDerivations; + + /* Using `--ignore-liveness' with `--delete' can have unintended + consequences if `keep-outputs' or `keep-derivations' are true + (the garbage collector will recurse into deleting the outputs + or derivers, respectively). So disable them. */ + if (options.action == GCOptions::gcDeleteSpecific && options.ignoreLiveness) { + state.gcKeepOutputs = false; + state.gcKeepDerivations = false; + } + + state.shouldDelete = options.action == GCOptions::gcDeleteDead || + options.action == GCOptions::gcDeleteSpecific; + + if (state.shouldDelete) { + deletePath(reservedPath); + } + + /* Acquire the global GC root. This prevents + a) New roots from being added. + b) Processes from creating new temporary root files. */ + AutoCloseFD fdGCLock = openGCLock(ltWrite); + + /* Find the roots. Since we've grabbed the GC lock, the set of + permanent roots cannot increase now. */ + LOG(INFO) << "finding garbage collector roots..."; + Roots rootMap; + if (!options.ignoreLiveness) { + findRootsNoTemp(rootMap, true); + } + + for (auto& i : rootMap) { + state.roots.insert(i.first); + } + + /* Read the temporary roots. This acquires read locks on all + per-process temporary root files. So after this point no paths + can be added to the set of temporary roots. */ + FDs fds; + Roots tempRoots; + findTempRoots(fds, tempRoots, true); + for (auto& root : tempRoots) { + state.tempRoots.insert(root.first); + } + state.roots.insert(state.tempRoots.begin(), state.tempRoots.end()); + + /* After this point the set of roots or temporary roots cannot + increase, since we hold locks on everything. So everything + that is not reachable from `roots' is garbage. */ + + if (state.shouldDelete) { + if (pathExists(trashDir)) { + deleteGarbage(state, trashDir); + } + try { + createDirs(trashDir); + } catch (SysError& e) { + if (e.errNo == ENOSPC) { + LOG(INFO) << "note: can't create trash directory: " << e.msg(); + state.moveToTrash = false; + } + } + } + + /* Now either delete all garbage paths, or just the specified + paths (for gcDeleteSpecific). */ + + if (options.action == GCOptions::gcDeleteSpecific) { + for (auto& i : options.pathsToDelete) { + assertStorePath(i); + tryToDelete(state, i); + if (state.dead.find(i) == state.dead.end()) { + throw Error(format("cannot delete path '%1%' since it is still alive") % + i); + } + } + + } else if (options.maxFreed > 0) { + if (state.shouldDelete) { + LOG(INFO) << "deleting garbage..."; + } else { + LOG(ERROR) << "determining live/dead paths..."; + } + + try { + AutoCloseDir dir(opendir(realStoreDir.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % realStoreDir); + } + + /* Read the store and immediately delete all paths that + aren't valid. When using --max-freed etc., deleting + invalid paths is preferred over deleting unreachable + paths, since unreachable paths could become reachable + again. We don't use readDirectory() here so that GCing + can start faster. */ + Paths entries; + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { + checkInterrupt(); + std::string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + Path path = storeDir + "/" + name; + if (isStorePath(path) && isValidPath(path)) { + entries.push_back(path); + } else { + tryToDelete(state, path); + } + } + + dir.reset(); + + /* Now delete the unreachable valid paths. Randomise the + order in which we delete entries to make the collector + less biased towards deleting paths that come + alphabetically first (e.g. /nix/store/000...). This + matters when using --max-freed etc. */ + std::vector<Path> entries_(entries.begin(), entries.end()); + std::mt19937 gen(1); + std::shuffle(entries_.begin(), entries_.end(), gen); + + for (auto& i : entries_) { + tryToDelete(state, i); + } + + } catch (GCLimitReached& e) { + } + } + + if (state.options.action == GCOptions::gcReturnLive) { + state.results.paths = state.alive; + return; + } + + if (state.options.action == GCOptions::gcReturnDead) { + state.results.paths = state.dead; + return; + } + + /* Allow other processes to add to the store from here on. */ + fdGCLock = -1; + fds.clear(); + + /* Delete the trash directory. */ + LOG(INFO) << "deleting " << trashDir; + deleteGarbage(state, trashDir); + + /* Clean up the links directory. */ + if (options.action == GCOptions::gcDeleteDead || + options.action == GCOptions::gcDeleteSpecific) { + LOG(INFO) << "deleting unused links..."; + removeUnusedLinks(state); + } + + /* While we're at it, vacuum the database. */ + // if (options.action == GCOptions::gcDeleteDead) { vacuumDB(); } +} + +void LocalStore::autoGC(bool sync) { + static auto fakeFreeSpaceFile = getEnv("_NIX_TEST_FREE_SPACE_FILE", ""); + + auto getAvail = [this]() -> uint64_t { + if (!fakeFreeSpaceFile.empty()) { + return std::stoll(readFile(fakeFreeSpaceFile)); + } + + struct statvfs st; + if (statvfs(realStoreDir.c_str(), &st) != 0) { + throw SysError("getting filesystem info about '%s'", realStoreDir); + } + + return (uint64_t)st.f_bavail * st.f_bsize; + }; + + std::shared_future<void> future; + + { + auto state(_state.lock()); + + if (state->gcRunning) { + future = state->gcFuture; + DLOG(INFO) << "waiting for auto-GC to finish"; + goto sync; + } + + auto now = std::chrono::steady_clock::now(); + + if (now < state->lastGCCheck + + std::chrono::seconds(settings.minFreeCheckInterval)) { + return; + } + + auto avail = getAvail(); + + state->lastGCCheck = now; + + if (avail >= settings.minFree || avail >= settings.maxFree) { + return; + } + + if (avail > state->availAfterGC * 0.97) { + return; + } + + state->gcRunning = true; + + std::promise<void> promise; + future = state->gcFuture = promise.get_future().share(); + + std::thread([promise{std::move(promise)}, this, avail, getAvail]() mutable { + try { + /* Wake up any threads waiting for the auto-GC to finish. */ + Finally wakeup([&]() { + auto state(_state.lock()); + state->gcRunning = false; + state->lastGCCheck = std::chrono::steady_clock::now(); + promise.set_value(); + }); + + GCOptions options; + options.maxFreed = settings.maxFree - avail; + + LOG(INFO) << "running auto-GC to free " << options.maxFreed << " bytes"; + + GCResults results; + + collectGarbage(options, results); + + _state.lock()->availAfterGC = getAvail(); + + } catch (...) { + // FIXME: we could propagate the exception to the + // future, but we don't really care. + ignoreException(); + } + }).detach(); + } + +sync: + // Wait for the future outside of the state lock. + if (sync) { + future.get(); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/globals.cc b/third_party/nix/src/libstore/globals.cc new file mode 100644 index 000000000000..f218534cc9f8 --- /dev/null +++ b/third_party/nix/src/libstore/globals.cc @@ -0,0 +1,200 @@ +#include "globals.hh" + +#include <algorithm> +#include <map> +#include <thread> + +#include <absl/strings/numbers.h> +#include <absl/strings/str_cat.h> +#include <absl/strings/str_split.h> +#include <dlfcn.h> + +#include "archive.hh" +#include "args.hh" +#include "util.hh" + +namespace nix { + +/* The default location of the daemon socket, relative to nixStateDir. + The socket is in a directory to allow you to control access to the + Nix daemon by setting the mode/ownership of the directory + appropriately. (This wouldn't work on the socket itself since it + must be deleted and recreated on startup.) */ +#define DEFAULT_SOCKET_PATH "/daemon-socket/socket" + +Settings settings; + +static GlobalConfig::Register r1(&settings); + +Settings::Settings() + : nixPrefix(NIX_PREFIX), + nixStore(canonPath( + getEnv("NIX_STORE_DIR", getEnv("NIX_STORE", NIX_STORE_DIR)))), + nixDataDir(canonPath(getEnv("NIX_DATA_DIR", NIX_DATA_DIR))), + nixLogDir(canonPath(getEnv("NIX_LOG_DIR", NIX_LOG_DIR))), + nixStateDir(canonPath(getEnv("NIX_STATE_DIR", NIX_STATE_DIR))), + nixConfDir(canonPath(getEnv("NIX_CONF_DIR", NIX_CONF_DIR))), + nixLibexecDir(canonPath(getEnv("NIX_LIBEXEC_DIR", NIX_LIBEXEC_DIR))), + nixBinDir(canonPath(getEnv("NIX_BIN_DIR", NIX_BIN_DIR))), + nixManDir(canonPath(NIX_MAN_DIR)), + nixDaemonSocketFile(canonPath(nixStateDir + DEFAULT_SOCKET_PATH)) { + buildUsersGroup = getuid() == 0 ? "nixbld" : ""; + lockCPU = getEnv("NIX_AFFINITY_HACK", "1") == "1"; + + caFile = getEnv("NIX_SSL_CERT_FILE", getEnv("SSL_CERT_FILE", "")); + if (caFile.empty()) { + for (auto& fn : + {"/etc/ssl/certs/ca-certificates.crt", + "/nix/var/nix/profiles/default/etc/ssl/certs/ca-bundle.crt"}) { + if (pathExists(fn)) { + caFile = fn; + break; + } + } + } + + /* Backwards compatibility. */ + // TODO(tazjin): still? + auto s = getEnv("NIX_REMOTE_SYSTEMS"); + if (!s.empty()) { + Strings ss; + for (auto p : absl::StrSplit(s, absl::ByChar(':'))) { + ss.push_back(absl::StrCat("@", p)); + } + builders = concatStringsSep(" ", ss); + } + + sandboxPaths = + absl::StrSplit("/bin/sh=" SANDBOX_SHELL, absl::ByAnyChar(" \t\n\r")); +} + +void loadConfFile() { + globalConfig.applyConfigFile(settings.nixConfDir + "/nix.conf"); + + /* We only want to send overrides to the daemon, i.e. stuff from + ~/.nix/nix.conf or the command line. */ + globalConfig.resetOverriden(); + + auto dirs = getConfigDirs(); + // Iterate over them in reverse so that the ones appearing first in the path + // take priority + for (auto dir = dirs.rbegin(); dir != dirs.rend(); dir++) { + globalConfig.applyConfigFile(*dir + "/nix/nix.conf"); + } +} + +unsigned int Settings::getDefaultCores() { + return std::max(1U, std::thread::hardware_concurrency()); +} + +StringSet Settings::getDefaultSystemFeatures() { + /* For backwards compatibility, accept some "features" that are + used in Nixpkgs to route builds to certain machines but don't + actually require anything special on the machines. */ + StringSet features{"nixos-test", "benchmark", "big-parallel"}; + +#if __linux__ + if (access("/dev/kvm", R_OK | W_OK) == 0) { + features.insert("kvm"); + } +#endif + + return features; +} + +const std::string nixVersion = PACKAGE_VERSION; + +template <> +void BaseSetting<SandboxMode>::set(const std::string& str) { + if (str == "true") { + value = smEnabled; + } else if (str == "relaxed") { + value = smRelaxed; + } else if (str == "false") { + value = smDisabled; + } else { + throw UsageError("option '%s' has invalid value '%s'", name, str); + } +} + +template <> +std::string BaseSetting<SandboxMode>::to_string() { + if (value == smEnabled) { + return "true"; + } + if (value == smRelaxed) { + return "relaxed"; + } else if (value == smDisabled) { + return "false"; + } else { + abort(); + } +} + +template <> +void BaseSetting<SandboxMode>::toJSON(JSONPlaceholder& out) { + AbstractSetting::toJSON(out); +} + +template <> +void BaseSetting<SandboxMode>::convertToArg(Args& args, + const std::string& category) { + args.mkFlag() + .longName(name) + .description("Enable sandboxing.") + .handler([=](const std::vector<std::string>& ss) { override(smEnabled); }) + .category(category); + args.mkFlag() + .longName("no-" + name) + .description("Disable sandboxing.") + .handler( + [=](const std::vector<std::string>& ss) { override(smDisabled); }) + .category(category); + args.mkFlag() + .longName("relaxed-" + name) + .description("Enable sandboxing, but allow builds to disable it.") + .handler([=](const std::vector<std::string>& ss) { override(smRelaxed); }) + .category(category); +} + +void MaxBuildJobsSetting::set(const std::string& str) { + if (str == "auto") { + value = std::max(1U, std::thread::hardware_concurrency()); + } else if (!absl::SimpleAtoi(str, &value)) { + throw UsageError( + "configuration setting '%s' should be 'auto' or an integer", name); + } +} + +void initPlugins() { + for (const auto& pluginFile : settings.pluginFiles.get()) { + Paths pluginFiles; + try { + auto ents = readDirectory(pluginFile); + for (const auto& ent : ents) { + pluginFiles.emplace_back(pluginFile + "/" + ent.name); + } + } catch (SysError& e) { + if (e.errNo != ENOTDIR) { + throw; + } + pluginFiles.emplace_back(pluginFile); + } + for (const auto& file : pluginFiles) { + /* handle is purposefully leaked as there may be state in the + DSO needed by the action of the plugin. */ + void* handle = dlopen(file.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) { + throw Error("could not dynamically open plugin file '%s': %s", file, + dlerror()); + } + } + } + + /* Since plugins can add settings, try to re-apply previously + unknown settings. */ + globalConfig.reapplyUnknownSettings(); + globalConfig.warnUnknownSettings(); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/globals.hh b/third_party/nix/src/libstore/globals.hh new file mode 100644 index 000000000000..feff54879070 --- /dev/null +++ b/third_party/nix/src/libstore/globals.hh @@ -0,0 +1,473 @@ +#pragma once + +#include <limits> +#include <map> + +#include <sys/types.h> + +#include "config.hh" +#include "types.hh" +#include "util.hh" + +namespace nix { + +typedef enum { smEnabled, smRelaxed, smDisabled } SandboxMode; + +struct MaxBuildJobsSetting : public BaseSetting<unsigned int> { + MaxBuildJobsSetting(Config* options, unsigned int def, + const std::string& name, const std::string& description, + const std::set<std::string>& aliases = {}) + : BaseSetting<unsigned int>(def, name, description, aliases) { + options->addSetting(this); + } + + void set(const std::string& str) override; +}; + +class Settings : public Config { + static unsigned int getDefaultCores(); + + static StringSet getDefaultSystemFeatures(); + + public: + Settings(); + + Path nixPrefix; + + /* The directory where we store sources and derived files. */ + Path nixStore; + + Path nixDataDir; /* !!! fix */ + + /* The directory where we log various operations. */ + Path nixLogDir; + + /* The directory where state is stored. */ + Path nixStateDir; + + /* The directory where configuration files are stored. */ + Path nixConfDir; + + /* The directory where internal helper programs are stored. */ + Path nixLibexecDir; + + /* The directory where the main programs are stored. */ + Path nixBinDir; + + /* The directory where the man pages are stored. */ + Path nixManDir; + + /* File name of the socket the daemon listens to. */ + Path nixDaemonSocketFile; + + Setting<std::string> storeUri{this, getEnv("NIX_REMOTE", "auto"), "store", + "The default Nix store to use."}; + + Setting<bool> keepFailed{ + this, false, "keep-failed", + "Whether to keep temporary directories of failed builds."}; + + Setting<bool> keepGoing{ + this, false, "keep-going", + "Whether to keep building derivations when another build fails."}; + + Setting<bool> tryFallback{ + this, + false, + "fallback", + "Whether to fall back to building when substitution fails.", + {"build-fallback"}}; + + /* Whether to show build log output in real time. */ + bool verboseBuild = true; + + Setting<size_t> logLines{ + this, 10, "log-lines", + "If verbose-build is false, the number of lines of the tail of " + "the log to show if a build fails."}; + + MaxBuildJobsSetting maxBuildJobs{this, + 1, + "max-jobs", + "Maximum number of parallel build jobs. " + "\"auto\" means use number of cores.", + {"build-max-jobs"}}; + + Setting<unsigned int> buildCores{ + this, + getDefaultCores(), + "cores", + "Number of CPU cores to utilize in parallel within a build, " + "i.e. by passing this number to Make via '-j'. 0 means that the " + "number of actual CPU cores on the local host ought to be " + "auto-detected.", + {"build-cores"}}; + + /* Read-only mode. Don't copy stuff to the store, don't change + the database. */ + bool readOnlyMode = false; + + Setting<std::string> thisSystem{this, SYSTEM, "system", + "The canonical Nix system name."}; + + Setting<time_t> maxSilentTime{ + this, + 0, + "max-silent-time", + "The maximum time in seconds that a builer can go without " + "producing any output on stdout/stderr before it is killed. " + "0 means infinity.", + {"build-max-silent-time"}}; + + Setting<time_t> buildTimeout{ + this, + 0, + "timeout", + "The maximum duration in seconds that a builder can run. " + "0 means infinity.", + {"build-timeout"}}; + + PathSetting buildHook{this, true, nixLibexecDir + "/nix/build-remote", + "build-hook", + "The path of the helper program that executes builds " + "to remote machines."}; + + Setting<std::string> builders{this, "@" + nixConfDir + "/machines", + "builders", + "A semicolon-separated list of build machines, " + "in the format of nix.machines."}; + + Setting<bool> buildersUseSubstitutes{ + this, false, "builders-use-substitutes", + "Whether build machines should use their own substitutes for obtaining " + "build dependencies if possible, rather than waiting for this host to " + "upload them."}; + + Setting<off_t> reservedSize{ + this, 8 * 1024 * 1024, "gc-reserved-space", + "Amount of reserved disk space for the garbage collector."}; + + Setting<bool> fsyncMetadata{this, true, "fsync-metadata", + "Whether SQLite should use fsync()."}; + + Setting<bool> useSQLiteWAL{this, true, "use-sqlite-wal", + "Whether SQLite should use WAL mode."}; + + Setting<bool> syncBeforeRegistering{ + this, false, "sync-before-registering", + "Whether to call sync() before registering a path as valid."}; + + Setting<bool> useSubstitutes{this, + true, + "substitute", + "Whether to use substitutes.", + {"build-use-substitutes"}}; + + Setting<std::string> buildUsersGroup{ + this, "", "build-users-group", + "The Unix group that contains the build users."}; + + Setting<bool> impersonateLinux26{ + this, + false, + "impersonate-linux-26", + "Whether to impersonate a Linux 2.6 machine on newer kernels.", + {"build-impersonate-linux-26"}}; + + Setting<bool> keepLog{this, + true, + "keep-build-log", + "Whether to store build logs.", + {"build-keep-log"}}; + + Setting<bool> compressLog{this, + true, + "compress-build-log", + "Whether to compress logs.", + {"build-compress-log"}}; + + Setting<unsigned long> maxLogSize{ + this, + 0, + "max-build-log-size", + "Maximum number of bytes a builder can write to stdout/stderr " + "before being killed (0 means no limit).", + {"build-max-log-size"}}; + + /* When buildRepeat > 0 and verboseBuild == true, whether to print + repeated builds (i.e. builds other than the first one) to + stderr. Hack to prevent Hydra logs from being polluted. */ + bool printRepeatedBuilds = true; + + Setting<unsigned int> pollInterval{ + this, 5, "build-poll-interval", + "How often (in seconds) to poll for locks."}; + + Setting<bool> checkRootReachability{ + this, false, "gc-check-reachability", + "Whether to check if new GC roots can in fact be found by the " + "garbage collector."}; + + Setting<bool> gcKeepOutputs{ + this, + false, + "keep-outputs", + "Whether the garbage collector should keep outputs of live derivations.", + {"gc-keep-outputs"}}; + + Setting<bool> gcKeepDerivations{ + this, + true, + "keep-derivations", + "Whether the garbage collector should keep derivers of live paths.", + {"gc-keep-derivations"}}; + + Setting<bool> autoOptimiseStore{this, false, "auto-optimise-store", + "Whether to automatically replace files with " + "identical contents with hard links."}; + + Setting<bool> envKeepDerivations{ + this, + false, + "keep-env-derivations", + "Whether to add derivations as a dependency of user environments " + "(to prevent them from being GCed).", + {"env-keep-derivations"}}; + + /* Whether to lock the Nix client and worker to the same CPU. */ + bool lockCPU; + + /* Whether to show a stack trace if Nix evaluation fails. */ + Setting<bool> showTrace{ + this, false, "show-trace", + "Whether to show a stack trace on evaluation errors."}; + + Setting<SandboxMode> sandboxMode { + this, +#if __linux__ + smEnabled +#else + smDisabled +#endif + , + "sandbox", + "Whether to enable sandboxed builds. Can be \"true\", \"false\" or " + "\"relaxed\".", + { + "build-use-chroot", "build-use-sandbox" + } + }; + + Setting<PathSet> sandboxPaths{ + this, + {}, + "sandbox-paths", + "The paths to make available inside the build sandbox.", + {"build-chroot-dirs", "build-sandbox-paths"}}; + + Setting<bool> sandboxFallback{ + this, true, "sandbox-fallback", + "Whether to disable sandboxing when the kernel doesn't allow it."}; + + Setting<PathSet> extraSandboxPaths{ + this, + {}, + "extra-sandbox-paths", + "Additional paths to make available inside the build sandbox.", + {"build-extra-chroot-dirs", "build-extra-sandbox-paths"}}; + + Setting<size_t> buildRepeat{ + this, + 0, + "repeat", + "The number of times to repeat a build in order to verify determinism.", + {"build-repeat"}}; + +#if __linux__ + Setting<std::string> sandboxShmSize{ + this, "50%", "sandbox-dev-shm-size", + "The size of /dev/shm in the build sandbox."}; + + Setting<Path> sandboxBuildDir{this, "/build", "sandbox-build-dir", + "The build directory inside the sandbox."}; +#endif + + Setting<PathSet> allowedImpureHostPrefixes{ + this, + {}, + "allowed-impure-host-deps", + "Which prefixes to allow derivations to ask for access to (primarily for " + "Darwin)."}; + + Setting<bool> runDiffHook{ + this, false, "run-diff-hook", + "Whether to run the program specified by the diff-hook setting " + "repeated builds produce a different result. Typically used to " + "plug in diffoscope."}; + + PathSetting diffHook{ + this, true, "", "diff-hook", + "A program that prints out the differences between the two paths " + "specified on its command line."}; + + Setting<bool> enforceDeterminism{ + this, true, "enforce-determinism", + "Whether to fail if repeated builds produce different output."}; + + Setting<Strings> trustedPublicKeys{ + this, + {"cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="}, + "trusted-public-keys", + "Trusted public keys for secure substitution.", + {"binary-cache-public-keys"}}; + + Setting<Strings> secretKeyFiles{ + this, + {}, + "secret-key-files", + "Secret keys with which to sign local builds."}; + + Setting<unsigned int> tarballTtl{ + this, 60 * 60, "tarball-ttl", + "How long downloaded files are considered up-to-date."}; + + Setting<bool> requireSigs{ + this, true, "require-sigs", + "Whether to check that any non-content-addressed path added to the " + "Nix store has a valid signature (that is, one signed using a key " + "listed in 'trusted-public-keys'."}; + + Setting<StringSet> extraPlatforms{ + this, + std::string{SYSTEM} == "x86_64-linux" ? StringSet{"i686-linux"} + : StringSet{}, + "extra-platforms", + "Additional platforms that can be built on the local system. " + "These may be supported natively (e.g. armv7 on some aarch64 CPUs " + "or using hacks like qemu-user."}; + + Setting<StringSet> systemFeatures{ + this, getDefaultSystemFeatures(), "system-features", + "Optional features that this system implements (like \"kvm\")."}; + + Setting<Strings> substituters{ + this, + nixStore == "/nix/store" ? Strings{"https://cache.nixos.org/"} + : Strings(), + "substituters", + "The URIs of substituters (such as https://cache.nixos.org/).", + {"binary-caches"}}; + + // FIXME: provide a way to add to option values. + Setting<Strings> extraSubstituters{this, + {}, + "extra-substituters", + "Additional URIs of substituters.", + {"extra-binary-caches"}}; + + Setting<StringSet> trustedSubstituters{ + this, + {}, + "trusted-substituters", + "Disabled substituters that may be enabled via the substituters option " + "by untrusted users.", + {"trusted-binary-caches"}}; + + Setting<Strings> trustedUsers{this, + {"root"}, + "trusted-users", + "Which users or groups are trusted to ask the " + "daemon to do unsafe things."}; + + Setting<unsigned int> ttlNegativeNarInfoCache{ + this, 3600, "narinfo-cache-negative-ttl", + "The TTL in seconds for negative lookups in the disk cache i.e binary " + "cache lookups that " + "return an invalid path result"}; + + Setting<unsigned int> ttlPositiveNarInfoCache{ + this, 30 * 24 * 3600, "narinfo-cache-positive-ttl", + "The TTL in seconds for positive lookups in the disk cache i.e binary " + "cache lookups that " + "return a valid path result."}; + + /* ?Who we trust to use the daemon in safe ways */ + Setting<Strings> allowedUsers{ + this, + {"*"}, + "allowed-users", + "Which users or groups are allowed to connect to the daemon."}; + + Setting<bool> printMissing{ + this, true, "print-missing", + "Whether to print what paths need to be built or downloaded."}; + + Setting<std::string> preBuildHook{ + this, "", "pre-build-hook", + "A program to run just before a build to set derivation-specific build " + "settings."}; + + Setting<std::string> postBuildHook{ + this, "", "post-build-hook", + "A program to run just after each successful build."}; + + Setting<std::string> netrcFile{this, fmt("%s/%s", nixConfDir, "netrc"), + "netrc-file", + "Path to the netrc file used to obtain " + "usernames/passwords for downloads."}; + + /* Path to the SSL CA file used */ + Path caFile; + +#if __linux__ + Setting<bool> filterSyscalls{ + this, true, "filter-syscalls", + "Whether to prevent certain dangerous system calls, such as " + "creation of setuid/setgid files or adding ACLs or extended " + "attributes. Only disable this if you're aware of the " + "security implications."}; + + Setting<bool> allowNewPrivileges{ + this, false, "allow-new-privileges", + "Whether builders can acquire new privileges by calling programs with " + "setuid/setgid bits or with file capabilities."}; +#endif + + Setting<Strings> hashedMirrors{ + this, + {"http://tarballs.nixos.org/"}, + "hashed-mirrors", + "A list of servers used by builtins.fetchurl to fetch files by hash."}; + + Setting<uint64_t> minFree{this, 0, "min-free", + "Automatically run the garbage collector when free " + "disk space drops below the specified amount."}; + + Setting<uint64_t> maxFree{this, std::numeric_limits<uint64_t>::max(), + "max-free", + "Stop deleting garbage when free disk space is " + "above the specified amount."}; + + Setting<uint64_t> minFreeCheckInterval{ + this, 5, "min-free-check-interval", + "Number of seconds between checking free disk space."}; + + Setting<Paths> pluginFiles{ + this, + {}, + "plugin-files", + "Plugins to dynamically load at nix initialization time."}; +}; + +// FIXME: don't use a global variable. +extern Settings settings; + +/* This should be called after settings are initialized, but before + anything else */ +void initPlugins(); + +void loadConfFile(); + +extern const std::string nixVersion; + +} // namespace nix diff --git a/third_party/nix/src/libstore/http-binary-cache-store.cc b/third_party/nix/src/libstore/http-binary-cache-store.cc new file mode 100644 index 000000000000..c75d5f2860a7 --- /dev/null +++ b/third_party/nix/src/libstore/http-binary-cache-store.cc @@ -0,0 +1,169 @@ +#include <utility> + +#include <glog/logging.h> + +#include "binary-cache-store.hh" +#include "download.hh" +#include "globals.hh" +#include "nar-info-disk-cache.hh" + +namespace nix { + +MakeError(UploadToHTTP, Error); + +class HttpBinaryCacheStore : public BinaryCacheStore { + private: + Path cacheUri; + + struct State { + bool enabled = true; + std::chrono::steady_clock::time_point disabledUntil; + }; + + Sync<State> _state; + + public: + HttpBinaryCacheStore(const Params& params, Path _cacheUri) + : BinaryCacheStore(params), cacheUri(std::move(_cacheUri)) { + if (cacheUri.back() == '/') { + cacheUri.pop_back(); + } + + diskCache = getNarInfoDiskCache(); + } + + std::string getUri() override { return cacheUri; } + + void init() override { + // FIXME: do this lazily? + if (!diskCache->cacheExists(cacheUri, wantMassQuery_, priority)) { + try { + BinaryCacheStore::init(); + } catch (UploadToHTTP&) { + throw Error("'%s' does not appear to be a binary cache", cacheUri); + } + diskCache->createCache(cacheUri, storeDir, wantMassQuery_, priority); + } + } + + protected: + void maybeDisable() { + auto state(_state.lock()); + if (state->enabled && settings.tryFallback) { + int t = 60; + LOG(WARNING) << "disabling binary cache '" << getUri() << "' for " << t + << " seconds"; + state->enabled = false; + state->disabledUntil = + std::chrono::steady_clock::now() + std::chrono::seconds(t); + } + } + + void checkEnabled() { + auto state(_state.lock()); + if (state->enabled) { + return; + } + if (std::chrono::steady_clock::now() > state->disabledUntil) { + state->enabled = true; + DLOG(INFO) << "re-enabling binary cache '" << getUri() << "'"; + return; + } + throw SubstituterDisabled("substituter '%s' is disabled", getUri()); + } + + bool fileExists(const std::string& path) override { + checkEnabled(); + + try { + DownloadRequest request(cacheUri + "/" + path); + request.head = true; + getDownloader()->download(request); + return true; + } catch (DownloadError& e) { + /* S3 buckets return 403 if a file doesn't exist and the + bucket is unlistable, so treat 403 as 404. */ + if (e.error == Downloader::NotFound || e.error == Downloader::Forbidden) { + return false; + } + maybeDisable(); + throw; + } + } + + void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) override { + auto req = DownloadRequest(cacheUri + "/" + path); + req.data = std::make_shared<std::string>(data); // FIXME: inefficient + req.mimeType = mimeType; + try { + getDownloader()->download(req); + } catch (DownloadError& e) { + throw UploadToHTTP("while uploading to HTTP binary cache at '%s': %s", + cacheUri, e.msg()); + } + } + + DownloadRequest makeRequest(const std::string& path) { + DownloadRequest request(cacheUri + "/" + path); + return request; + } + + void getFile(const std::string& path, Sink& sink) override { + checkEnabled(); + auto request(makeRequest(path)); + try { + getDownloader()->download(std::move(request), sink); + } catch (DownloadError& e) { + if (e.error == Downloader::NotFound || e.error == Downloader::Forbidden) { + throw NoSuchBinaryCacheFile( + "file '%s' does not exist in binary cache '%s'", path, getUri()); + } + maybeDisable(); + throw; + } + } + + void getFile( + const std::string& path, + Callback<std::shared_ptr<std::string>> callback) noexcept override { + checkEnabled(); + + auto request(makeRequest(path)); + + auto callbackPtr = + std::make_shared<decltype(callback)>(std::move(callback)); + + getDownloader()->enqueueDownload( + request, {[callbackPtr, this](std::future<DownloadResult> result) { + try { + (*callbackPtr)(result.get().data); + } catch (DownloadError& e) { + if (e.error == Downloader::NotFound || + e.error == Downloader::Forbidden) { + return (*callbackPtr)(std::shared_ptr<std::string>()); + } + maybeDisable(); + callbackPtr->rethrow(); + } catch (...) { + callbackPtr->rethrow(); + } + }}); + } +}; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, 7) != "http://" && + std::string(uri, 0, 8) != "https://" && + (getEnv("_NIX_FORCE_HTTP_BINARY_CACHE_STORE") != "1" || + std::string(uri, 0, 7) != "file://")) { + return nullptr; + } + auto store = std::make_shared<HttpBinaryCacheStore>(params, uri); + store->init(); + return store; + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/legacy-ssh-store.cc b/third_party/nix/src/libstore/legacy-ssh-store.cc new file mode 100644 index 000000000000..9e6870f0da6b --- /dev/null +++ b/third_party/nix/src/libstore/legacy-ssh-store.cc @@ -0,0 +1,273 @@ +#include "archive.hh" +#include "derivations.hh" +#include "glog/logging.h" +#include "pool.hh" +#include "remote-store.hh" +#include "serve-protocol.hh" +#include "ssh.hh" +#include "store-api.hh" +#include "worker-protocol.hh" + +namespace nix { + +static std::string uriScheme = "ssh://"; + +struct LegacySSHStore : public Store { + const Setting<int> maxConnections{ + this, 1, "max-connections", + "maximum number of concurrent SSH connections"}; + const Setting<Path> sshKey{this, "", "ssh-key", "path to an SSH private key"}; + const Setting<bool> compress{this, false, "compress", + "whether to compress the connection"}; + const Setting<Path> remoteProgram{ + this, "nix-store", "remote-program", + "path to the nix-store executable on the remote system"}; + const Setting<std::string> remoteStore{ + this, "", "remote-store", "URI of the store on the remote system"}; + + // Hack for getting remote build log output. + const Setting<int> logFD{ + this, -1, "log-fd", "file descriptor to which SSH's stderr is connected"}; + + struct Connection { + std::unique_ptr<SSHMaster::Connection> sshConn; + FdSink to; + FdSource from; + int remoteVersion; + bool good = true; + }; + + std::string host; + + ref<Pool<Connection>> connections; + + SSHMaster master; + + LegacySSHStore(const std::string& host, const Params& params) + : Store(params), + host(host), + connections(make_ref<Pool<Connection>>( + std::max(1, (int)maxConnections), + [this]() { return openConnection(); }, + [](const ref<Connection>& r) { return r->good; })), + master(host, sshKey, + // Use SSH master only if using more than 1 connection. + connections->capacity() > 1, compress, logFD) {} + + ref<Connection> openConnection() { + auto conn = make_ref<Connection>(); + conn->sshConn = master.startCommand( + fmt("%s --serve --write", remoteProgram) + + (remoteStore.get().empty() + ? "" + : " --store " + shellEscape(remoteStore.get()))); + conn->to = FdSink(conn->sshConn->in.get()); + conn->from = FdSource(conn->sshConn->out.get()); + + try { + conn->to << SERVE_MAGIC_1 << SERVE_PROTOCOL_VERSION; + conn->to.flush(); + + unsigned int magic = readInt(conn->from); + if (magic != SERVE_MAGIC_2) { + throw Error("protocol mismatch with 'nix-store --serve' on '%s'", host); + } + conn->remoteVersion = readInt(conn->from); + if (GET_PROTOCOL_MAJOR(conn->remoteVersion) != 0x200) { + throw Error("unsupported 'nix-store --serve' protocol version on '%s'", + host); + } + + } catch (EndOfFile& e) { + throw Error("cannot connect to '%1%'", host); + } + + return conn; + }; + + std::string getUri() override { return uriScheme + host; } + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override { + try { + auto conn(connections->get()); + + DLOG(INFO) << "querying remote host '" << host << "' for info on '" + << path << "'"; + + conn->to << cmdQueryPathInfos << PathSet{path}; + conn->to.flush(); + + auto info = std::make_shared<ValidPathInfo>(); + conn->from >> info->path; + if (info->path.empty()) { + return callback(nullptr); + } + assert(path == info->path); + + PathSet references; + conn->from >> info->deriver; + info->references = readStorePaths<PathSet>(*this, conn->from); + readLongLong(conn->from); // download size + info->narSize = readLongLong(conn->from); + + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 4) { + auto s = readString(conn->from); + info->narHash = s.empty() ? Hash() : Hash(s); + conn->from >> info->ca; + info->sigs = readStrings<StringSet>(conn->from); + } + + auto s = readString(conn->from); + assert(s.empty()); + + callback(std::move(info)); + } catch (...) { + callback.rethrow(); + } + } + + void addToStore(const ValidPathInfo& info, Source& source, RepairFlag repair, + CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override { + DLOG(INFO) << "adding path '" << info.path << "' to remote host '" << host + << "'"; + + auto conn(connections->get()); + + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 5) { + conn->to << cmdAddToStoreNar << info.path << info.deriver + << info.narHash.to_string(Base16, false) << info.references + << info.registrationTime << info.narSize + << static_cast<uint64_t>(info.ultimate) << info.sigs << info.ca; + try { + copyNAR(source, conn->to); + } catch (...) { + conn->good = false; + throw; + } + conn->to.flush(); + + } else { + conn->to << cmdImportPaths << 1; + try { + copyNAR(source, conn->to); + } catch (...) { + conn->good = false; + throw; + } + conn->to << exportMagic << info.path << info.references << info.deriver + << 0 << 0; + conn->to.flush(); + } + + if (readInt(conn->from) != 1) { + throw Error( + "failed to add path '%s' to remote host '%s', info.path, host"); + } + } + + void narFromPath(const Path& path, Sink& sink) override { + auto conn(connections->get()); + + conn->to << cmdDumpStorePath << path; + conn->to.flush(); + copyNAR(conn->from, sink); + } + + Path queryPathFromHashPart(const std::string& hashPart) override { + unsupported("queryPathFromHashPart"); + } + + Path addToStore(const std::string& name, const Path& srcPath, bool recursive, + HashType hashAlgo, PathFilter& filter, + RepairFlag repair) override { + unsupported("addToStore"); + } + + Path addTextToStore(const std::string& name, const std::string& s, + const PathSet& references, RepairFlag repair) override { + unsupported("addTextToStore"); + } + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override { + auto conn(connections->get()); + + conn->to << cmdBuildDerivation << drvPath << drv << settings.maxSilentTime + << settings.buildTimeout; + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 2) { + conn->to << settings.maxLogSize; + } + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 3) { + conn->to << settings.buildRepeat + << static_cast<uint64_t>(settings.enforceDeterminism); + } + + conn->to.flush(); + + BuildResult status; + status.status = (BuildResult::Status)readInt(conn->from); + conn->from >> status.errorMsg; + + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 3) { + conn->from >> status.timesBuilt >> status.isNonDeterministic >> + status.startTime >> status.stopTime; + } + + return status; + } + + void ensurePath(const Path& path) override { unsupported("ensurePath"); } + + void computeFSClosure(const PathSet& paths, PathSet& out, + bool flipDirection = false, bool includeOutputs = false, + bool includeDerivers = false) override { + if (flipDirection || includeDerivers) { + Store::computeFSClosure(paths, out, flipDirection, includeOutputs, + includeDerivers); + return; + } + + auto conn(connections->get()); + + conn->to << cmdQueryClosure << static_cast<uint64_t>(includeOutputs) + << paths; + conn->to.flush(); + + auto res = readStorePaths<PathSet>(*this, conn->from); + + out.insert(res.begin(), res.end()); + } + + PathSet queryValidPaths(const PathSet& paths, SubstituteFlag maybeSubstitute = + NoSubstitute) override { + auto conn(connections->get()); + + conn->to << cmdQueryValidPaths << 0u // lock + << maybeSubstitute << paths; + conn->to.flush(); + + return readStorePaths<PathSet>(*this, conn->from); + } + + void connect() override { auto conn(connections->get()); } + + unsigned int getProtocol() override { + auto conn(connections->get()); + return conn->remoteVersion; + } +}; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, uriScheme.size()) != uriScheme) { + return nullptr; + } + return std::make_shared<LegacySSHStore>( + std::string(uri, uriScheme.size()), params); + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-binary-cache-store.cc b/third_party/nix/src/libstore/local-binary-cache-store.cc new file mode 100644 index 000000000000..88dd19a32069 --- /dev/null +++ b/third_party/nix/src/libstore/local-binary-cache-store.cc @@ -0,0 +1,93 @@ +#include <utility> + +#include <absl/strings/match.h> + +#include "binary-cache-store.hh" +#include "globals.hh" +#include "nar-info-disk-cache.hh" + +namespace nix { + +class LocalBinaryCacheStore : public BinaryCacheStore { + private: + Path binaryCacheDir; + + public: + LocalBinaryCacheStore(const Params& params, Path binaryCacheDir) + : BinaryCacheStore(params), binaryCacheDir(std::move(binaryCacheDir)) {} + + void init() override; + + std::string getUri() override { return "file://" + binaryCacheDir; } + + protected: + bool fileExists(const std::string& path) override; + + void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) override; + + void getFile(const std::string& path, Sink& sink) override { + try { + readFile(binaryCacheDir + "/" + path, sink); + } catch (SysError& e) { + if (e.errNo == ENOENT) { + throw NoSuchBinaryCacheFile("file '%s' does not exist in binary cache", + path); + } + } + } + + PathSet queryAllValidPaths() override { + PathSet paths; + + for (auto& entry : readDirectory(binaryCacheDir)) { + if (entry.name.size() != 40 || !absl::EndsWith(entry.name, ".narinfo")) { + continue; + } + paths.insert(storeDir + "/" + + entry.name.substr(0, entry.name.size() - 8)); + } + + return paths; + } +}; + +void LocalBinaryCacheStore::init() { + createDirs(binaryCacheDir + "/nar"); + BinaryCacheStore::init(); +} + +static void atomicWrite(const Path& path, const std::string& s) { + Path tmp = path + ".tmp." + std::to_string(getpid()); + AutoDelete del(tmp, false); + writeFile(tmp, s); + if (rename(tmp.c_str(), path.c_str()) != 0) { + throw SysError(format("renaming '%1%' to '%2%'") % tmp % path); + } + del.cancel(); +} + +bool LocalBinaryCacheStore::fileExists(const std::string& path) { + return pathExists(binaryCacheDir + "/" + path); +} + +void LocalBinaryCacheStore::upsertFile(const std::string& path, + const std::string& data, + const std::string& mimeType) { + atomicWrite(binaryCacheDir + "/" + path, data); +} + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (getEnv("_NIX_FORCE_HTTP_BINARY_CACHE_STORE") == "1" || + std::string(uri, 0, 7) != "file://") { + return nullptr; + } + auto store = + std::make_shared<LocalBinaryCacheStore>(params, std::string(uri, 7)); + store->init(); + return store; + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-fs-store.cc b/third_party/nix/src/libstore/local-fs-store.cc new file mode 100644 index 000000000000..f11c84b182a6 --- /dev/null +++ b/third_party/nix/src/libstore/local-fs-store.cc @@ -0,0 +1,123 @@ +#include "archive.hh" +#include "compression.hh" +#include "derivations.hh" +#include "fs-accessor.hh" +#include "globals.hh" +#include "store-api.hh" + +namespace nix { + +LocalFSStore::LocalFSStore(const Params& params) : Store(params) {} + +struct LocalStoreAccessor : public FSAccessor { + ref<LocalFSStore> store; + + explicit LocalStoreAccessor(const ref<LocalFSStore>& store) : store(store) {} + + Path toRealPath(const Path& path) { + Path storePath = store->toStorePath(path); + if (!store->isValidPath(storePath)) { + throw InvalidPath(format("path '%1%' is not a valid store path") % + storePath); + } + return store->getRealStoreDir() + std::string(path, store->storeDir.size()); + } + + FSAccessor::Stat stat(const Path& path) override { + auto realPath = toRealPath(path); + + struct stat st; + if (lstat(realPath.c_str(), &st) != 0) { + if (errno == ENOENT || errno == ENOTDIR) { + return {Type::tMissing, 0, false}; + } + throw SysError(format("getting status of '%1%'") % path); + } + + if (!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) { + throw Error(format("file '%1%' has unsupported type") % path); + } + + return {S_ISREG(st.st_mode) + ? Type::tRegular + : S_ISLNK(st.st_mode) ? Type::tSymlink : Type::tDirectory, + S_ISREG(st.st_mode) ? (uint64_t)st.st_size : 0, + S_ISREG(st.st_mode) && ((st.st_mode & S_IXUSR) != 0u)}; + } + + StringSet readDirectory(const Path& path) override { + auto realPath = toRealPath(path); + + auto entries = nix::readDirectory(realPath); + + StringSet res; + for (auto& entry : entries) { + res.insert(entry.name); + } + + return res; + } + + std::string readFile(const Path& path) override { + return nix::readFile(toRealPath(path)); + } + + std::string readLink(const Path& path) override { + return nix::readLink(toRealPath(path)); + } +}; + +ref<FSAccessor> LocalFSStore::getFSAccessor() { + return make_ref<LocalStoreAccessor>(ref<LocalFSStore>( + std::dynamic_pointer_cast<LocalFSStore>(shared_from_this()))); +} + +void LocalFSStore::narFromPath(const Path& path, Sink& sink) { + if (!isValidPath(path)) { + throw Error(format("path '%s' is not valid") % path); + } + dumpPath(getRealStoreDir() + std::string(path, storeDir.size()), sink); +} + +const std::string LocalFSStore::drvsLogDir = "drvs"; + +std::shared_ptr<std::string> LocalFSStore::getBuildLog(const Path& path_) { + auto path(path_); + + assertStorePath(path); + + if (!isDerivation(path)) { + try { + path = queryPathInfo(path)->deriver; + } catch (InvalidPath&) { + return nullptr; + } + if (path.empty()) { + return nullptr; + } + } + + std::string baseName = baseNameOf(path); + + for (int j = 0; j < 2; j++) { + Path logPath = + j == 0 ? fmt("%s/%s/%s/%s", logDir, drvsLogDir, + std::string(baseName, 0, 2), std::string(baseName, 2)) + : fmt("%s/%s/%s", logDir, drvsLogDir, baseName); + Path logBz2Path = logPath + ".bz2"; + + if (pathExists(logPath)) { + return std::make_shared<std::string>(readFile(logPath)); + } + if (pathExists(logBz2Path)) { + try { + return decompress("bzip2", readFile(logBz2Path)); + } catch (Error&) { + } + } + } + + return nullptr; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-store.cc b/third_party/nix/src/libstore/local-store.cc new file mode 100644 index 000000000000..ebc912969423 --- /dev/null +++ b/third_party/nix/src/libstore/local-store.cc @@ -0,0 +1,1517 @@ +#include "local-store.hh" + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <ctime> +#include <iostream> + +#include <absl/strings/numbers.h> +#include <absl/strings/str_split.h> +#include <fcntl.h> +#include <glog/logging.h> +#include <grp.h> +#include <sched.h> +#include <sqlite3.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/select.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <unistd.h> +#include <utime.h> + +#include "archive.hh" +#include "derivations.hh" +#include "globals.hh" +#include "nar-info.hh" +#include "pathlocks.hh" +#include "worker-protocol.hh" + +namespace nix { + +LocalStore::LocalStore(const Params& params) + : Store(params), + LocalFSStore(params), + realStoreDir_{this, false, + rootDir != "" ? rootDir + "/nix/store" : storeDir, "real", + "physical path to the Nix store"}, + realStoreDir(realStoreDir_), + dbDir(stateDir + "/db"), + linksDir(realStoreDir + "/.links"), + reservedPath(dbDir + "/reserved"), + schemaPath(dbDir + "/schema"), + trashDir(realStoreDir + "/trash"), + tempRootsDir(stateDir + "/temproots"), + fnTempRoots(fmt("%s/%d", tempRootsDir, getpid())) { + auto state(_state.lock()); + + /* Create missing state directories if they don't already exist. */ + createDirs(realStoreDir); + makeStoreWritable(); + createDirs(linksDir); + Path profilesDir = stateDir + "/profiles"; + createDirs(profilesDir); + createDirs(tempRootsDir); + createDirs(dbDir); + Path gcRootsDir = stateDir + "/gcroots"; + if (!pathExists(gcRootsDir)) { + createDirs(gcRootsDir); + createSymlink(profilesDir, gcRootsDir + "/profiles"); + } + + for (auto& perUserDir : + {profilesDir + "/per-user", gcRootsDir + "/per-user"}) { + createDirs(perUserDir); + if (chmod(perUserDir.c_str(), 0755) == -1) { + throw SysError("could not set permissions on '%s' to 755", perUserDir); + } + } + + createUser(getUserName(), getuid()); + + /* Optionally, create directories and set permissions for a + multi-user install. */ + if (getuid() == 0 && settings.buildUsersGroup != "") { + mode_t perm = 01775; + + struct group* gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (gr == nullptr) { + LOG(ERROR) << "warning: the group '" << settings.buildUsersGroup + << "' specified in 'build-users-group' does not exist"; + } else { + struct stat st; + if (stat(realStoreDir.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % + realStoreDir); + } + + if (st.st_uid != 0 || st.st_gid != gr->gr_gid || + (st.st_mode & ~S_IFMT) != perm) { + if (chown(realStoreDir.c_str(), 0, gr->gr_gid) == -1) { + throw SysError(format("changing ownership of path '%1%'") % + realStoreDir); + } + if (chmod(realStoreDir.c_str(), perm) == -1) { + throw SysError(format("changing permissions on path '%1%'") % + realStoreDir); + } + } + } + } + + /* Ensure that the store and its parents are not symlinks. */ + if (getEnv("NIX_IGNORE_SYMLINK_STORE") != "1") { + Path path = realStoreDir; + struct stat st; + while (path != "/") { + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting status of '%1%'") % path); + } + if (S_ISLNK(st.st_mode)) { + throw Error(format("the path '%1%' is a symlink; " + "this is not allowed for the Nix store and its " + "parent directories") % + path); + } + path = dirOf(path); + } + } + + /* We can't open a SQLite database if the disk is full. Since + this prevents the garbage collector from running when it's most + needed, we reserve some dummy space that we can free just + before doing a garbage collection. */ + try { + struct stat st; + if (stat(reservedPath.c_str(), &st) == -1 || + st.st_size != settings.reservedSize) { + AutoCloseFD fd = + open(reservedPath.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0600); + int res = -1; +#if HAVE_POSIX_FALLOCATE + res = posix_fallocate(fd.get(), 0, settings.reservedSize); +#endif + if (res == -1) { + writeFull(fd.get(), std::string(settings.reservedSize, 'X')); + [[gnu::unused]] auto res2 = ftruncate(fd.get(), settings.reservedSize); + } + } + } catch (SysError& e) { /* don't care about errors */ + } + + /* Acquire the big fat lock in shared mode to make sure that no + schema upgrade is in progress. */ + Path globalLockPath = dbDir + "/big-lock"; + globalLock = openLockFile(globalLockPath, true); + + if (!lockFile(globalLock.get(), ltRead, false)) { + LOG(INFO) << "waiting for the big Nix store lock..."; + lockFile(globalLock.get(), ltRead, true); + } + + /* Check the current database schema and if necessary do an + upgrade. */ + int curSchema = getSchema(); + if (curSchema > nixSchemaVersion) { + throw Error( + format( + "current Nix store schema is version %1%, but I only support %2%") % + curSchema % nixSchemaVersion); + } + if (curSchema == 0) { /* new store */ + curSchema = nixSchemaVersion; + openDB(*state, true); + writeFile(schemaPath, (format("%1%") % nixSchemaVersion).str()); + } else if (curSchema < nixSchemaVersion) { + if (curSchema < 5) { + throw Error( + "Your Nix store has a database in Berkeley DB format,\n" + "which is no longer supported. To convert to the new format,\n" + "please upgrade Nix to version 0.12 first."); + } + + if (curSchema < 6) { + throw Error( + "Your Nix store has a database in flat file format,\n" + "which is no longer supported. To convert to the new format,\n" + "please upgrade Nix to version 1.11 first."); + } + + if (!lockFile(globalLock.get(), ltWrite, false)) { + LOG(INFO) << "waiting for exclusive access to the Nix store..."; + lockFile(globalLock.get(), ltWrite, true); + } + + /* Get the schema version again, because another process may + have performed the upgrade already. */ + curSchema = getSchema(); + + if (curSchema < 7) { + upgradeStore7(); + } + + openDB(*state, false); + + if (curSchema < 8) { + SQLiteTxn txn(state->db); + state->db.exec("alter table ValidPaths add column ultimate integer"); + state->db.exec("alter table ValidPaths add column sigs text"); + txn.commit(); + } + + if (curSchema < 9) { + SQLiteTxn txn(state->db); + state->db.exec("drop table FailedPaths"); + txn.commit(); + } + + if (curSchema < 10) { + SQLiteTxn txn(state->db); + state->db.exec("alter table ValidPaths add column ca text"); + txn.commit(); + } + + writeFile(schemaPath, (format("%1%") % nixSchemaVersion).str()); + + lockFile(globalLock.get(), ltRead, true); + } else { + openDB(*state, false); + } + + /* Prepare SQL statements. */ + state->stmtRegisterValidPath.create( + state->db, + "insert into ValidPaths (path, hash, registrationTime, deriver, narSize, " + "ultimate, sigs, ca) values (?, ?, ?, ?, ?, ?, ?, ?);"); + state->stmtUpdatePathInfo.create( + state->db, + "update ValidPaths set narSize = ?, hash = ?, ultimate = ?, sigs = ?, ca " + "= ? where path = ?;"); + state->stmtAddReference.create( + state->db, + "insert or replace into Refs (referrer, reference) values (?, ?);"); + state->stmtQueryPathInfo.create( + state->db, + "select id, hash, registrationTime, deriver, narSize, ultimate, sigs, ca " + "from ValidPaths where path = ?;"); + state->stmtQueryReferences.create(state->db, + "select path from Refs join ValidPaths on " + "reference = id where referrer = ?;"); + state->stmtQueryReferrers.create( + state->db, + "select path from Refs join ValidPaths on referrer = id where reference " + "= (select id from ValidPaths where path = ?);"); + state->stmtInvalidatePath.create(state->db, + "delete from ValidPaths where path = ?;"); + state->stmtAddDerivationOutput.create( + state->db, + "insert or replace into DerivationOutputs (drv, id, path) values (?, ?, " + "?);"); + state->stmtQueryValidDerivers.create( + state->db, + "select v.id, v.path from DerivationOutputs d join ValidPaths v on d.drv " + "= v.id where d.path = ?;"); + state->stmtQueryDerivationOutputs.create( + state->db, "select id, path from DerivationOutputs where drv = ?;"); + // Use "path >= ?" with limit 1 rather than "path like '?%'" to + // ensure efficient lookup. + state->stmtQueryPathFromHashPart.create( + state->db, "select path from ValidPaths where path >= ? limit 1;"); + state->stmtQueryValidPaths.create(state->db, "select path from ValidPaths"); +} + +LocalStore::~LocalStore() { + std::shared_future<void> future; + + { + auto state(_state.lock()); + if (state->gcRunning) { + future = state->gcFuture; + } + } + + if (future.valid()) { + LOG(INFO) << "waiting for auto-GC to finish on exit..."; + future.get(); + } + + try { + auto state(_state.lock()); + if (state->fdTempRoots) { + state->fdTempRoots = -1; + unlink(fnTempRoots.c_str()); + } + } catch (...) { + ignoreException(); + } +} + +std::string LocalStore::getUri() { return "local"; } + +int LocalStore::getSchema() { + int curSchema = 0; + if (pathExists(schemaPath)) { + std::string s = readFile(schemaPath); + if (!absl::SimpleAtoi(s, &curSchema)) { + throw Error(format("'%1%' is corrupt") % schemaPath); + } + } + return curSchema; +} + +void LocalStore::openDB(State& state, bool create) { + if (access(dbDir.c_str(), R_OK | W_OK) != 0) { + throw SysError(format("Nix database directory '%1%' is not writable") % + dbDir); + } + + /* Open the Nix database. */ + std::string dbPath = dbDir + "/db.sqlite"; + auto& db(state.db); + if (sqlite3_open_v2(dbPath.c_str(), &db.db, + SQLITE_OPEN_READWRITE | (create ? SQLITE_OPEN_CREATE : 0), + nullptr) != SQLITE_OK) { + throw Error(format("cannot open Nix database '%1%'") % dbPath); + } + +#ifdef __CYGWIN__ + /* The cygwin version of sqlite3 has a patch which calls + SetDllDirectory("/usr/bin") on init. It was intended to fix extension + loading, which we don't use, and the effect of SetDllDirectory is + inherited by child processes, and causes libraries to be loaded from + /usr/bin instead of $PATH. This breaks quite a few things (e.g. + checkPhase on openssh), so we set it back to default behaviour. */ + SetDllDirectoryW(L""); +#endif + + if (sqlite3_busy_timeout(db, 60 * 60 * 1000) != SQLITE_OK) { + throwSQLiteError(db, "setting timeout"); + } + + db.exec("pragma foreign_keys = 1"); + + /* !!! check whether sqlite has been built with foreign key + support */ + + /* Whether SQLite should fsync(). "Normal" synchronous mode + should be safe enough. If the user asks for it, don't sync at + all. This can cause database corruption if the system + crashes. */ + std::string syncMode = settings.fsyncMetadata ? "normal" : "off"; + db.exec("pragma synchronous = " + syncMode); + + /* Set the SQLite journal mode. WAL mode is fastest, so it's the + default. */ + std::string mode = settings.useSQLiteWAL ? "wal" : "truncate"; + std::string prevMode; + { + SQLiteStmt stmt; + stmt.create(db, "pragma main.journal_mode;"); + if (sqlite3_step(stmt) != SQLITE_ROW) { + throwSQLiteError(db, "querying journal mode"); + } + prevMode = std::string((const char*)sqlite3_column_text(stmt, 0)); + } + if (prevMode != mode && + sqlite3_exec(db, ("pragma main.journal_mode = " + mode + ";").c_str(), + nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "setting journal mode"); + } + + /* Increase the auto-checkpoint interval to 40000 pages. This + seems enough to ensure that instantiating the NixOS system + derivation is done in a single fsync(). */ + if (mode == "wal" && sqlite3_exec(db, "pragma wal_autocheckpoint = 40000;", + nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "setting autocheckpoint interval"); + } + + /* Initialise the database schema, if necessary. */ + if (create) { + const char* schema = +#include "schema.sql.gen.hh" + ; + db.exec(schema); + } +} + +/* To improve purity, users may want to make the Nix store a read-only + bind mount. So make the Nix store writable for this process. */ +void LocalStore::makeStoreWritable() { +#if __linux__ + if (getuid() != 0) { + return; + } + /* Check if /nix/store is on a read-only mount. */ + struct statvfs stat; + if (statvfs(realStoreDir.c_str(), &stat) != 0) { + throw SysError("getting info about the Nix store mount point"); + } + + if ((stat.f_flag & ST_RDONLY) != 0u) { + if (unshare(CLONE_NEWNS) == -1) { + throw SysError("setting up a private mount namespace"); + } + + if (mount(nullptr, realStoreDir.c_str(), "none", MS_REMOUNT | MS_BIND, + nullptr) == -1) { + throw SysError(format("remounting %1% writable") % realStoreDir); + } + } +#endif +} + +const time_t mtimeStore = 1; /* 1 second into the epoch */ + +static void canonicaliseTimestampAndPermissions(const Path& path, + const struct stat& st) { + if (!S_ISLNK(st.st_mode)) { + /* Mask out all type related bits. */ + mode_t mode = st.st_mode & ~S_IFMT; + + if (mode != 0444 && mode != 0555) { + mode = (st.st_mode & S_IFMT) | 0444 | + ((st.st_mode & S_IXUSR) != 0u ? 0111 : 0); + if (chmod(path.c_str(), mode) == -1) { + throw SysError(format("changing mode of '%1%' to %2$o") % path % mode); + } + } + } + + if (st.st_mtime != mtimeStore) { + struct timeval times[2]; + times[0].tv_sec = st.st_atime; + times[0].tv_usec = 0; + times[1].tv_sec = mtimeStore; + times[1].tv_usec = 0; +#if HAVE_LUTIMES + if (lutimes(path.c_str(), times) == -1) { + if (errno != ENOSYS || + (!S_ISLNK(st.st_mode) && utimes(path.c_str(), times) == -1)) { +#else + if (!S_ISLNK(st.st_mode) && utimes(path.c_str(), times) == -1) +#endif + throw SysError(format("changing modification time of '%1%'") % path); + } + } + } // namespace nix +} + +void canonicaliseTimestampAndPermissions(const Path& path) { + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + canonicaliseTimestampAndPermissions(path, st); +} + +static void canonicalisePathMetaData_(const Path& path, uid_t fromUid, + InodesSeen& inodesSeen) { + checkInterrupt(); + + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + + /* Really make sure that the path is of a supported type. */ + if (!(S_ISREG(st.st_mode) || S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))) { + throw Error(format("file '%1%' has an unsupported type") % path); + } + + /* Remove extended attributes / ACLs. */ + ssize_t eaSize = llistxattr(path.c_str(), nullptr, 0); + + if (eaSize < 0) { + if (errno != ENOTSUP && errno != ENODATA) { + throw SysError("querying extended attributes of '%s'", path); + } + } else if (eaSize > 0) { + std::vector<char> eaBuf(eaSize); + + if ((eaSize = llistxattr(path.c_str(), eaBuf.data(), eaBuf.size())) < 0) { + throw SysError("querying extended attributes of '%s'", path); + } + + for (auto& eaName : + absl::StrSplit(std::string(eaBuf.data(), eaSize), + absl::ByString(std::string("\000", 1)))) { + /* Ignore SELinux security labels since these cannot be + removed even by root. */ + if (eaName == "security.selinux") { + continue; + } + if (lremovexattr(path.c_str(), std::string(eaName).c_str()) == -1) { + throw SysError("removing extended attribute '%s' from '%s'", eaName, + path); + } + } + } + + /* Fail if the file is not owned by the build user. This prevents + us from messing up the ownership/permissions of files + hard-linked into the output (e.g. "ln /etc/shadow $out/foo"). + However, ignore files that we chown'ed ourselves previously to + ensure that we don't fail on hard links within the same build + (i.e. "touch $out/foo; ln $out/foo $out/bar"). */ + if (fromUid != (uid_t)-1 && st.st_uid != fromUid) { + assert(!S_ISDIR(st.st_mode)); + if (inodesSeen.find(Inode(st.st_dev, st.st_ino)) == inodesSeen.end()) { + throw BuildError(format("invalid ownership on file '%1%'") % path); + } + mode_t mode = st.st_mode & ~S_IFMT; + assert(S_ISLNK(st.st_mode) || + (st.st_uid == geteuid() && (mode == 0444 || mode == 0555) && + st.st_mtime == mtimeStore)); + return; + } + + inodesSeen.insert(Inode(st.st_dev, st.st_ino)); + + canonicaliseTimestampAndPermissions(path, st); + + /* Change ownership to the current uid. If it's a symlink, use + lchown if available, otherwise don't bother. Wrong ownership + of a symlink doesn't matter, since the owning user can't change + the symlink and can't delete it because the directory is not + writable. The only exception is top-level paths in the Nix + store (since that directory is group-writable for the Nix build + users group); we check for this case below. */ + if (st.st_uid != geteuid()) { +#if HAVE_LCHOWN + if (lchown(path.c_str(), geteuid(), getegid()) == -1) { +#else + if (!S_ISLNK(st.st_mode) && chown(path.c_str(), geteuid(), getegid()) == -1) +#endif + throw SysError(format("changing owner of '%1%' to %2%") % path % + geteuid()); + } + } + + if (S_ISDIR(st.st_mode)) { + DirEntries entries = readDirectory(path); + for (auto& i : entries) { + canonicalisePathMetaData_(path + "/" + i.name, fromUid, inodesSeen); + } + } +} + +void canonicalisePathMetaData(const Path& path, uid_t fromUid, + InodesSeen& inodesSeen) { + canonicalisePathMetaData_(path, fromUid, inodesSeen); + + /* On platforms that don't have lchown(), the top-level path can't + be a symlink, since we can't change its ownership. */ + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + + if (st.st_uid != geteuid()) { + assert(S_ISLNK(st.st_mode)); + throw Error(format("wrong ownership of top-level store path '%1%'") % path); + } +} + +void canonicalisePathMetaData(const Path& path, uid_t fromUid) { + InodesSeen inodesSeen; + canonicalisePathMetaData(path, fromUid, inodesSeen); +} + +void LocalStore::checkDerivationOutputs(const Path& drvPath, + const Derivation& drv) { + std::string drvName = storePathToName(drvPath); + assert(isDerivation(drvName)); + drvName = std::string(drvName, 0, drvName.size() - drvExtension.size()); + + if (drv.isFixedOutput()) { + auto out = drv.outputs.find("out"); + if (out == drv.outputs.end()) { + throw Error( + format("derivation '%1%' does not have an output named 'out'") % + drvPath); + } + + bool recursive; + Hash h; + out->second.parseHashInfo(recursive, h); + Path outPath = makeFixedOutputPath(recursive, h, drvName); + + auto j = drv.env.find("out"); + if (out->second.path != outPath || j == drv.env.end() || + j->second != outPath) { + throw Error( + format( + "derivation '%1%' has incorrect output '%2%', should be '%3%'") % + drvPath % out->second.path % outPath); + } + } + + else { + Derivation drvCopy(drv); + for (auto& i : drvCopy.outputs) { + i.second.path = ""; + drvCopy.env[i.first] = ""; + } + + Hash h = hashDerivationModulo(*this, drvCopy); + + for (auto& i : drv.outputs) { + Path outPath = makeOutputPath(i.first, h, drvName); + auto j = drv.env.find(i.first); + if (i.second.path != outPath || j == drv.env.end() || + j->second != outPath) { + throw Error(format("derivation '%1%' has incorrect output '%2%', " + "should be '%3%'") % + drvPath % i.second.path % outPath); + } + } + } +} + +uint64_t LocalStore::addValidPath(State& state, const ValidPathInfo& info, + bool checkOutputs) { + if (!info.ca.empty() && !info.isContentAddressed(*this)) { + throw Error( + "cannot add path '%s' to the Nix store because it claims to be " + "content-addressed but isn't", + info.path); + } + + state.stmtRegisterValidPath + .use()(info.path)(info.narHash.to_string(Base16))( + info.registrationTime == 0 ? time(nullptr) : info.registrationTime)( + info.deriver, !info.deriver.empty())(info.narSize, info.narSize != 0)( + info.ultimate ? 1 : 0, info.ultimate)( + concatStringsSep(" ", info.sigs), !info.sigs.empty())( + info.ca, !info.ca.empty()) + .exec(); + uint64_t id = sqlite3_last_insert_rowid(state.db); + + /* If this is a derivation, then store the derivation outputs in + the database. This is useful for the garbage collector: it can + efficiently query whether a path is an output of some + derivation. */ + if (isDerivation(info.path)) { + Derivation drv = readDerivation(realStoreDir + "/" + baseNameOf(info.path)); + + /* Verify that the output paths in the derivation are correct + (i.e., follow the scheme for computing output paths from + derivations). Note that if this throws an error, then the + DB transaction is rolled back, so the path validity + registration above is undone. */ + if (checkOutputs) { + checkDerivationOutputs(info.path, drv); + } + + for (auto& i : drv.outputs) { + state.stmtAddDerivationOutput.use()(id)(i.first)(i.second.path).exec(); + } + } + + { + auto state_(Store::state.lock()); + state_->pathInfoCache.upsert(storePathToHash(info.path), + std::make_shared<ValidPathInfo>(info)); + } + + return id; +} + +void LocalStore::queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept { + try { + auto info = std::make_shared<ValidPathInfo>(); + info->path = path; + + assertStorePath(path); + + callback(retrySQLite<std::shared_ptr<ValidPathInfo>>([&]() { + auto state(_state.lock()); + + /* Get the path info. */ + auto useQueryPathInfo(state->stmtQueryPathInfo.use()(path)); + + if (!useQueryPathInfo.next()) { + return std::shared_ptr<ValidPathInfo>(); + } + + info->id = useQueryPathInfo.getInt(0); + + try { + info->narHash = Hash(useQueryPathInfo.getStr(1)); + } catch (BadHash& e) { + throw Error("in valid-path entry for '%s': %s", path, e.what()); + } + + info->registrationTime = useQueryPathInfo.getInt(2); + + auto s = (const char*)sqlite3_column_text(state->stmtQueryPathInfo, 3); + if (s != nullptr) { + info->deriver = s; + } + + /* Note that narSize = NULL yields 0. */ + info->narSize = useQueryPathInfo.getInt(4); + + info->ultimate = useQueryPathInfo.getInt(5) == 1; + + s = (const char*)sqlite3_column_text(state->stmtQueryPathInfo, 6); + if (s != nullptr) { + info->sigs = absl::StrSplit(s, absl::ByChar(' ')); + } + + s = (const char*)sqlite3_column_text(state->stmtQueryPathInfo, 7); + if (s != nullptr) { + info->ca = s; + } + + /* Get the references. */ + auto useQueryReferences(state->stmtQueryReferences.use()(info->id)); + + while (useQueryReferences.next()) { + info->references.insert(useQueryReferences.getStr(0)); + } + + return info; + })); + + } catch (...) { + callback.rethrow(); + } +} + +/* Update path info in the database. */ +void LocalStore::updatePathInfo(State& state, const ValidPathInfo& info) { + state.stmtUpdatePathInfo + .use()(info.narSize, info.narSize != 0)(info.narHash.to_string(Base16))( + info.ultimate ? 1 : 0, info.ultimate)( + concatStringsSep(" ", info.sigs), !info.sigs.empty())( + info.ca, !info.ca.empty())(info.path) + .exec(); +} + +uint64_t LocalStore::queryValidPathId(State& state, const Path& path) { + auto use(state.stmtQueryPathInfo.use()(path)); + if (!use.next()) { + throw Error(format("path '%1%' is not valid") % path); + } + return use.getInt(0); +} + +bool LocalStore::isValidPath_(State& state, const Path& path) { + return state.stmtQueryPathInfo.use()(path).next(); +} + +bool LocalStore::isValidPathUncached(const Path& path) { + return retrySQLite<bool>([&]() { + auto state(_state.lock()); + return isValidPath_(*state, path); + }); +} + +PathSet LocalStore::queryValidPaths(const PathSet& paths, + SubstituteFlag maybeSubstitute) { + PathSet res; + for (auto& i : paths) { + if (isValidPath(i)) { + res.insert(i); + } + } + return res; +} + +PathSet LocalStore::queryAllValidPaths() { + return retrySQLite<PathSet>([&]() { + auto state(_state.lock()); + auto use(state->stmtQueryValidPaths.use()); + PathSet res; + while (use.next()) { + res.insert(use.getStr(0)); + } + return res; + }); +} + +void LocalStore::queryReferrers(State& state, const Path& path, + PathSet& referrers) { + auto useQueryReferrers(state.stmtQueryReferrers.use()(path)); + + while (useQueryReferrers.next()) { + referrers.insert(useQueryReferrers.getStr(0)); + } +} + +void LocalStore::queryReferrers(const Path& path, PathSet& referrers) { + assertStorePath(path); + return retrySQLite<void>([&]() { + auto state(_state.lock()); + queryReferrers(*state, path, referrers); + }); +} + +PathSet LocalStore::queryValidDerivers(const Path& path) { + assertStorePath(path); + + return retrySQLite<PathSet>([&]() { + auto state(_state.lock()); + + auto useQueryValidDerivers(state->stmtQueryValidDerivers.use()(path)); + + PathSet derivers; + while (useQueryValidDerivers.next()) { + derivers.insert(useQueryValidDerivers.getStr(1)); + } + + return derivers; + }); +} + +PathSet LocalStore::queryDerivationOutputs(const Path& path) { + return retrySQLite<PathSet>([&]() { + auto state(_state.lock()); + + auto useQueryDerivationOutputs(state->stmtQueryDerivationOutputs.use()( + queryValidPathId(*state, path))); + + PathSet outputs; + while (useQueryDerivationOutputs.next()) { + outputs.insert(useQueryDerivationOutputs.getStr(1)); + } + + return outputs; + }); +} + +StringSet LocalStore::queryDerivationOutputNames(const Path& path) { + return retrySQLite<StringSet>([&]() { + auto state(_state.lock()); + + auto useQueryDerivationOutputs(state->stmtQueryDerivationOutputs.use()( + queryValidPathId(*state, path))); + + StringSet outputNames; + while (useQueryDerivationOutputs.next()) { + outputNames.insert(useQueryDerivationOutputs.getStr(0)); + } + + return outputNames; + }); +} + +Path LocalStore::queryPathFromHashPart(const std::string& hashPart) { + if (hashPart.size() != storePathHashLen) { + throw Error("invalid hash part"); + } + + Path prefix = storeDir + "/" + hashPart; + + return retrySQLite<Path>([&]() -> std::string { + auto state(_state.lock()); + + auto useQueryPathFromHashPart( + state->stmtQueryPathFromHashPart.use()(prefix)); + + if (!useQueryPathFromHashPart.next()) { + return ""; + } + + const char* s = + (const char*)sqlite3_column_text(state->stmtQueryPathFromHashPart, 0); + return (s != nullptr) && + prefix.compare(0, prefix.size(), s, prefix.size()) == 0 + ? s + : ""; + }); +} + +PathSet LocalStore::querySubstitutablePaths(const PathSet& paths) { + if (!settings.useSubstitutes) { + return PathSet(); + } + + auto remaining = paths; + PathSet res; + + for (auto& sub : getDefaultSubstituters()) { + if (remaining.empty()) { + break; + } + if (sub->storeDir != storeDir) { + continue; + } + if (!sub->wantMassQuery()) { + continue; + } + + auto valid = sub->queryValidPaths(remaining); + + PathSet remaining2; + for (auto& path : remaining) { + if (valid.count(path) != 0u) { + res.insert(path); + } else { + remaining2.insert(path); + } + } + + std::swap(remaining, remaining2); + } + + return res; +} + +void LocalStore::querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) { + if (!settings.useSubstitutes) { + return; + } + for (auto& sub : getDefaultSubstituters()) { + if (sub->storeDir != storeDir) { + continue; + } + for (auto& path : paths) { + if (infos.count(path) != 0u) { + continue; + } + DLOG(INFO) << "checking substituter '" << sub->getUri() << "' for path '" + << path << "'"; + try { + auto info = sub->queryPathInfo(path); + auto narInfo = std::dynamic_pointer_cast<const NarInfo>( + std::shared_ptr<const ValidPathInfo>(info)); + infos[path] = SubstitutablePathInfo{info->deriver, info->references, + narInfo ? narInfo->fileSize : 0, + info->narSize}; + } catch (InvalidPath&) { + } catch (SubstituterDisabled&) { + } catch (Error& e) { + if (settings.tryFallback) { + LOG(ERROR) << e.what(); + } else { + throw; + } + } + } + } +} + +void LocalStore::registerValidPath(const ValidPathInfo& info) { + ValidPathInfos infos; + infos.push_back(info); + registerValidPaths(infos); +} + +void LocalStore::registerValidPaths(const ValidPathInfos& infos) { + /* SQLite will fsync by default, but the new valid paths may not + be fsync-ed. So some may want to fsync them before registering + the validity, at the expense of some speed of the path + registering operation. */ + if (settings.syncBeforeRegistering) { + sync(); + } + + return retrySQLite<void>([&]() { + auto state(_state.lock()); + + SQLiteTxn txn(state->db); + PathSet paths; + + for (auto& i : infos) { + assert(i.narHash.type == htSHA256); + if (isValidPath_(*state, i.path)) { + updatePathInfo(*state, i); + } else { + addValidPath(*state, i, false); + } + paths.insert(i.path); + } + + for (auto& i : infos) { + auto referrer = queryValidPathId(*state, i.path); + for (auto& j : i.references) { + state->stmtAddReference.use()(referrer)(queryValidPathId(*state, j)) + .exec(); + } + } + + /* Check that the derivation outputs are correct. We can't do + this in addValidPath() above, because the references might + not be valid yet. */ + for (auto& i : infos) { + if (isDerivation(i.path)) { + // FIXME: inefficient; we already loaded the + // derivation in addValidPath(). + Derivation drv = + readDerivation(realStoreDir + "/" + baseNameOf(i.path)); + checkDerivationOutputs(i.path, drv); + } + } + + /* Do a topological sort of the paths. This will throw an + error if a cycle is detected and roll back the + transaction. Cycles can only occur when a derivation + has multiple outputs. */ + topoSortPaths(paths); + + txn.commit(); + }); +} + +/* Invalidate a path. The caller is responsible for checking that + there are no referrers. */ +void LocalStore::invalidatePath(State& state, const Path& path) { + LOG(INFO) << "invalidating path '" << path << "'"; + + state.stmtInvalidatePath.use()(path).exec(); + + /* Note that the foreign key constraints on the Refs table take + care of deleting the references entries for `path'. */ + + { + auto state_(Store::state.lock()); + state_->pathInfoCache.erase(storePathToHash(path)); + } +} + +const PublicKeys& LocalStore::getPublicKeys() { + auto state(_state.lock()); + if (!state->publicKeys) { + state->publicKeys = std::make_unique<PublicKeys>(getDefaultPublicKeys()); + } + return *state->publicKeys; +} + +void LocalStore::addToStore(const ValidPathInfo& info, Source& source, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + if (!info.narHash) { + throw Error("cannot add path '%s' because it lacks a hash", info.path); + } + + if (requireSigs && (checkSigs != 0u) && + (info.checkSignatures(*this, getPublicKeys()) == 0u)) { + throw Error("cannot add path '%s' because it lacks a valid signature", + info.path); + } + + addTempRoot(info.path); + + if ((repair != 0u) || !isValidPath(info.path)) { + PathLocks outputLock; + + Path realPath = realStoreDir + "/" + baseNameOf(info.path); + + /* Lock the output path. But don't lock if we're being called + from a build hook (whose parent process already acquired a + lock on this path). */ + if (locksHeld.count(info.path) == 0u) { + outputLock.lockPaths({realPath}); + } + + if ((repair != 0u) || !isValidPath(info.path)) { + deletePath(realPath); + + /* While restoring the path from the NAR, compute the hash + of the NAR. */ + HashSink hashSink(htSHA256); + + LambdaSource wrapperSource( + [&](unsigned char* data, size_t len) -> size_t { + size_t n = source.read(data, len); + hashSink(data, n); + return n; + }); + + restorePath(realPath, wrapperSource); + + auto hashResult = hashSink.finish(); + + if (hashResult.first != info.narHash) { + throw Error( + "hash mismatch importing path '%s';\n wanted: %s\n got: %s", + info.path, info.narHash.to_string(), hashResult.first.to_string()); + } + + if (hashResult.second != info.narSize) { + throw Error( + "size mismatch importing path '%s';\n wanted: %s\n got: %s", + info.path, info.narSize, hashResult.second); + } + + autoGC(); + + canonicalisePathMetaData(realPath, -1); + + optimisePath(realPath); // FIXME: combine with hashPath() + + registerValidPath(info); + } + + outputLock.setDeletion(true); + } +} + +Path LocalStore::addToStoreFromDump(const std::string& dump, + const std::string& name, bool recursive, + HashType hashAlgo, RepairFlag repair) { + Hash h = hashString(hashAlgo, dump); + + Path dstPath = makeFixedOutputPath(recursive, h, name); + + addTempRoot(dstPath); + + if ((repair != 0u) || !isValidPath(dstPath)) { + /* The first check above is an optimisation to prevent + unnecessary lock acquisition. */ + + Path realPath = realStoreDir + "/" + baseNameOf(dstPath); + + PathLocks outputLock({realPath}); + + if ((repair != 0u) || !isValidPath(dstPath)) { + deletePath(realPath); + + autoGC(); + + if (recursive) { + StringSource source(dump); + restorePath(realPath, source); + } else { + writeFile(realPath, dump); + } + + canonicalisePathMetaData(realPath, -1); + + /* Register the SHA-256 hash of the NAR serialisation of + the path in the database. We may just have computed it + above (if called with recursive == true and hashAlgo == + sha256); otherwise, compute it here. */ + HashResult hash; + if (recursive) { + hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump); + hash.second = dump.size(); + } else { + hash = hashPath(htSHA256, realPath); + } + + optimisePath(realPath); // FIXME: combine with hashPath() + + ValidPathInfo info; + info.path = dstPath; + info.narHash = hash.first; + info.narSize = hash.second; + info.ca = makeFixedOutputCA(recursive, h); + registerValidPath(info); + } + + outputLock.setDeletion(true); + } + + return dstPath; +} + +Path LocalStore::addToStore(const std::string& name, const Path& _srcPath, + bool recursive, HashType hashAlgo, + PathFilter& filter, RepairFlag repair) { + Path srcPath(absPath(_srcPath)); + + /* Read the whole path into memory. This is not a very scalable + method for very large paths, but `copyPath' is mainly used for + small files. */ + StringSink sink; + if (recursive) { + dumpPath(srcPath, sink, filter); + } else { + sink.s = make_ref<std::string>(readFile(srcPath)); + } + + return addToStoreFromDump(*sink.s, name, recursive, hashAlgo, repair); +} + +Path LocalStore::addTextToStore(const std::string& name, const std::string& s, + const PathSet& references, RepairFlag repair) { + auto hash = hashString(htSHA256, s); + auto dstPath = makeTextPath(name, hash, references); + + addTempRoot(dstPath); + + if ((repair != 0u) || !isValidPath(dstPath)) { + Path realPath = realStoreDir + "/" + baseNameOf(dstPath); + + PathLocks outputLock({realPath}); + + if ((repair != 0u) || !isValidPath(dstPath)) { + deletePath(realPath); + + autoGC(); + + writeFile(realPath, s); + + canonicalisePathMetaData(realPath, -1); + + StringSink sink; + dumpString(s, sink); + auto narHash = hashString(htSHA256, *sink.s); + + optimisePath(realPath); + + ValidPathInfo info; + info.path = dstPath; + info.narHash = narHash; + info.narSize = sink.s->size(); + info.references = references; + info.ca = "text:" + hash.to_string(); + registerValidPath(info); + } + + outputLock.setDeletion(true); + } + + return dstPath; +} + +/* Create a temporary directory in the store that won't be + garbage-collected. */ +Path LocalStore::createTempDirInStore() { + Path tmpDir; + do { + /* There is a slight possibility that `tmpDir' gets deleted by + the GC between createTempDir() and addTempRoot(), so repeat + until `tmpDir' exists. */ + tmpDir = createTempDir(realStoreDir); + addTempRoot(tmpDir); + } while (!pathExists(tmpDir)); + return tmpDir; +} + +void LocalStore::invalidatePathChecked(const Path& path) { + assertStorePath(path); + + retrySQLite<void>([&]() { + auto state(_state.lock()); + + SQLiteTxn txn(state->db); + + if (isValidPath_(*state, path)) { + PathSet referrers; + queryReferrers(*state, path, referrers); + referrers.erase(path); /* ignore self-references */ + if (!referrers.empty()) { + throw PathInUse( + format("cannot delete path '%1%' because it is in use by %2%") % + path % showPaths(referrers)); + } + invalidatePath(*state, path); + } + + txn.commit(); + }); +} + +bool LocalStore::verifyStore(bool checkContents, RepairFlag repair) { + LOG(INFO) << "reading the Nix store..."; + + bool errors = false; + + /* Acquire the global GC lock to get a consistent snapshot of + existing and valid paths. */ + AutoCloseFD fdGCLock = openGCLock(ltWrite); + + PathSet store; + for (auto& i : readDirectory(realStoreDir)) { + store.insert(i.name); + } + + /* Check whether all valid paths actually exist. */ + LOG(INFO) << "checking path existence..."; + + PathSet validPaths2 = queryAllValidPaths(); + PathSet validPaths; + PathSet done; + + fdGCLock = -1; + + for (auto& i : validPaths2) { + verifyPath(i, store, done, validPaths, repair, errors); + } + + /* Optionally, check the content hashes (slow). */ + if (checkContents) { + LOG(INFO) << "checking hashes..."; + + Hash nullHash(htSHA256); + + for (auto& i : validPaths) { + try { + auto info = std::const_pointer_cast<ValidPathInfo>( + std::shared_ptr<const ValidPathInfo>(queryPathInfo(i))); + + /* Check the content hash (optionally - slow). */ + DLOG(INFO) << "checking contents of '" << i << "'"; + HashResult current = hashPath(info->narHash.type, toRealPath(i)); + + if (info->narHash != nullHash && info->narHash != current.first) { + LOG(ERROR) << "path '" << i << "' was modified! expected hash '" + << info->narHash.to_string() << "', got '" + << current.first.to_string() << "'"; + if (repair != 0u) { + repairPath(i); + } else { + errors = true; + } + } else { + bool update = false; + + /* Fill in missing hashes. */ + if (info->narHash == nullHash) { + LOG(WARNING) << "fixing missing hash on '" << i << "'"; + info->narHash = current.first; + update = true; + } + + /* Fill in missing narSize fields (from old stores). */ + if (info->narSize == 0) { + LOG(ERROR) << "updating size field on '" << i << "' to " + << current.second; + info->narSize = current.second; + update = true; + } + + if (update) { + auto state(_state.lock()); + updatePathInfo(*state, *info); + } + } + + } catch (Error& e) { + /* It's possible that the path got GC'ed, so ignore + errors on invalid paths. */ + if (isValidPath(i)) { + LOG(ERROR) << e.msg(); + } else { + LOG(WARNING) << e.msg(); + } + errors = true; + } + } + } + + return errors; +} + +void LocalStore::verifyPath(const Path& path, const PathSet& store, + PathSet& done, PathSet& validPaths, + RepairFlag repair, bool& errors) { + checkInterrupt(); + + if (done.find(path) != done.end()) { + return; + } + done.insert(path); + + if (!isStorePath(path)) { + LOG(ERROR) << "path '" << path << "' is not in the Nix store"; + auto state(_state.lock()); + invalidatePath(*state, path); + return; + } + + if (store.find(baseNameOf(path)) == store.end()) { + /* Check any referrers first. If we can invalidate them + first, then we can invalidate this path as well. */ + bool canInvalidate = true; + PathSet referrers; + queryReferrers(path, referrers); + for (auto& i : referrers) { + if (i != path) { + verifyPath(i, store, done, validPaths, repair, errors); + if (validPaths.find(i) != validPaths.end()) { + canInvalidate = false; + } + } + } + + if (canInvalidate) { + LOG(WARNING) << "path '" << path + << "' disappeared, removing from database..."; + auto state(_state.lock()); + invalidatePath(*state, path); + } else { + LOG(ERROR) << "path '" << path + << "' disappeared, but it still has valid referrers!"; + if (repair != 0u) { + try { + repairPath(path); + } catch (Error& e) { + LOG(WARNING) << e.msg(); + errors = true; + } + } else { + errors = true; + } + } + + return; + } + + validPaths.insert(path); +} + +unsigned int LocalStore::getProtocol() { return PROTOCOL_VERSION; } + +#if defined(FS_IOC_SETFLAGS) && defined(FS_IOC_GETFLAGS) && \ + defined(FS_IMMUTABLE_FL) + +static void makeMutable(const Path& path) { + checkInterrupt(); + + struct stat st = lstat(path); + + if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode)) { + return; + } + + if (S_ISDIR(st.st_mode)) { + for (auto& i : readDirectory(path)) { + makeMutable(path + "/" + i.name); + } + } + + /* The O_NOFOLLOW is important to prevent us from changing the + mutable bit on the target of a symlink (which would be a + security hole). */ + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_NOFOLLOW | O_CLOEXEC); + if (fd == -1) { + if (errno == ELOOP) { + return; + } // it's a symlink + throw SysError(format("opening file '%1%'") % path); + } + + unsigned int flags = 0, old; + + /* Silently ignore errors getting/setting the immutable flag so + that we work correctly on filesystems that don't support it. */ + if (ioctl(fd, FS_IOC_GETFLAGS, &flags)) { + return; + } + old = flags; + flags &= ~FS_IMMUTABLE_FL; + if (old == flags) { + return; + } + if (ioctl(fd, FS_IOC_SETFLAGS, &flags)) { + return; + } +} + +/* Upgrade from schema 6 (Nix 0.15) to schema 7 (Nix >= 1.3). */ +void LocalStore::upgradeStore7() { + if (getuid() != 0) { + return; + } + printError( + "removing immutable bits from the Nix store (this may take a while)..."); + makeMutable(realStoreDir); +} + +#else + +void LocalStore::upgradeStore7() {} + +#endif + +void LocalStore::vacuumDB() { + auto state(_state.lock()); + state->db.exec("vacuum"); +} + +void LocalStore::addSignatures(const Path& storePath, const StringSet& sigs) { + retrySQLite<void>([&]() { + auto state(_state.lock()); + + SQLiteTxn txn(state->db); + + auto info = std::const_pointer_cast<ValidPathInfo>( + std::shared_ptr<const ValidPathInfo>(queryPathInfo(storePath))); + + info->sigs.insert(sigs.begin(), sigs.end()); + + updatePathInfo(*state, *info); + + txn.commit(); + }); +} + +void LocalStore::signPathInfo(ValidPathInfo& info) { + // FIXME: keep secret keys in memory. + + auto secretKeyFiles = settings.secretKeyFiles; + + for (auto& secretKeyFile : secretKeyFiles.get()) { + SecretKey secretKey(readFile(secretKeyFile)); + info.sign(secretKey); + } +} + +void LocalStore::createUser(const std::string& userName, uid_t userId) { + for (auto& dir : {fmt("%s/profiles/per-user/%s", stateDir, userName), + fmt("%s/gcroots/per-user/%s", stateDir, userName)}) { + createDirs(dir); + if (chmod(dir.c_str(), 0755) == -1) { + throw SysError("changing permissions of directory '%s'", dir); + } + if (chown(dir.c_str(), userId, getgid()) == -1) { + throw SysError("changing owner of directory '%s'", dir); + } + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-store.hh b/third_party/nix/src/libstore/local-store.hh new file mode 100644 index 000000000000..178cadf92bf6 --- /dev/null +++ b/third_party/nix/src/libstore/local-store.hh @@ -0,0 +1,315 @@ +#pragma once + +#include <chrono> +#include <future> +#include <string> +#include <unordered_set> + +#include <absl/strings/str_split.h> + +#include "pathlocks.hh" +#include "sqlite.hh" +#include "store-api.hh" +#include "sync.hh" +#include "util.hh" + +namespace nix { + +/* Nix store and database schema version. Version 1 (or 0) was Nix <= + 0.7. Version 2 was Nix 0.8 and 0.9. Version 3 is Nix 0.10. + Version 4 is Nix 0.11. Version 5 is Nix 0.12-0.16. Version 6 is + Nix 1.0. Version 7 is Nix 1.3. Version 10 is 2.0. */ +const int nixSchemaVersion = 10; + +struct Derivation; + +struct OptimiseStats { + unsigned long filesLinked = 0; + unsigned long long bytesFreed = 0; + unsigned long long blocksFreed = 0; +}; + +class LocalStore : public LocalFSStore { + private: + /* Lock file used for upgrading. */ + AutoCloseFD globalLock; + + struct State { + /* The SQLite database object. */ + SQLite db; + + /* Some precompiled SQLite statements. */ + SQLiteStmt stmtRegisterValidPath; + SQLiteStmt stmtUpdatePathInfo; + SQLiteStmt stmtAddReference; + SQLiteStmt stmtQueryPathInfo; + SQLiteStmt stmtQueryReferences; + SQLiteStmt stmtQueryReferrers; + SQLiteStmt stmtInvalidatePath; + SQLiteStmt stmtAddDerivationOutput; + SQLiteStmt stmtQueryValidDerivers; + SQLiteStmt stmtQueryDerivationOutputs; + SQLiteStmt stmtQueryPathFromHashPart; + SQLiteStmt stmtQueryValidPaths; + + /* The file to which we write our temporary roots. */ + AutoCloseFD fdTempRoots; + + /* The last time we checked whether to do an auto-GC, or an + auto-GC finished. */ + std::chrono::time_point<std::chrono::steady_clock> lastGCCheck; + + /* Whether auto-GC is running. If so, get gcFuture to wait for + the GC to finish. */ + bool gcRunning = false; + std::shared_future<void> gcFuture; + + /* How much disk space was available after the previous + auto-GC. If the current available disk space is below + minFree but not much below availAfterGC, then there is no + point in starting a new GC. */ + uint64_t availAfterGC = std::numeric_limits<uint64_t>::max(); + + std::unique_ptr<PublicKeys> publicKeys; + }; + + Sync<State, std::recursive_mutex> _state; + + public: + PathSetting realStoreDir_; + + const Path realStoreDir; + const Path dbDir; + const Path linksDir; + const Path reservedPath; + const Path schemaPath; + const Path trashDir; + const Path tempRootsDir; + const Path fnTempRoots; + + private: + Setting<bool> requireSigs{ + (Store*)this, settings.requireSigs, "require-sigs", + "whether store paths should have a trusted signature on import"}; + + const PublicKeys& getPublicKeys(); + + public: + // Hack for build-remote.cc. + // TODO(tazjin): remove this when we've got gRPC + PathSet locksHeld = + absl::StrSplit(getEnv("NIX_HELD_LOCKS"), absl::ByAnyChar(" \t\n\r")); + + /* Initialise the local store, upgrading the schema if + necessary. */ + LocalStore(const Params& params); + + ~LocalStore(); + + /* Implementations of abstract store API methods. */ + + std::string getUri() override; + + bool isValidPathUncached(const Path& path) override; + + PathSet queryValidPaths(const PathSet& paths, SubstituteFlag maybeSubstitute = + NoSubstitute) override; + + PathSet queryAllValidPaths() override; + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override; + + void queryReferrers(const Path& path, PathSet& referrers) override; + + PathSet queryValidDerivers(const Path& path) override; + + PathSet queryDerivationOutputs(const Path& path) override; + + StringSet queryDerivationOutputNames(const Path& path) override; + + Path queryPathFromHashPart(const std::string& hashPart) override; + + PathSet querySubstitutablePaths(const PathSet& paths) override; + + void querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) override; + + void addToStore(const ValidPathInfo& info, Source& source, RepairFlag repair, + CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override; + + Path addToStore(const std::string& name, const Path& srcPath, bool recursive, + HashType hashAlgo, PathFilter& filter, + RepairFlag repair) override; + + /* Like addToStore(), but the contents of the path are contained + in `dump', which is either a NAR serialisation (if recursive == + true) or simply the contents of a regular file (if recursive == + false). */ + Path addToStoreFromDump(const std::string& dump, const std::string& name, + bool recursive = true, HashType hashAlgo = htSHA256, + RepairFlag repair = NoRepair); + + Path addTextToStore(const std::string& name, const std::string& s, + const PathSet& references, RepairFlag repair) override; + + void buildPaths(const PathSet& paths, BuildMode buildMode) override; + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override; + + void ensurePath(const Path& path) override; + + void addTempRoot(const Path& path) override; + + void addIndirectRoot(const Path& path) override; + + void syncWithGC() override; + + private: + typedef std::shared_ptr<AutoCloseFD> FDPtr; + typedef std::list<FDPtr> FDs; + + void findTempRoots(FDs& fds, Roots& roots, bool censor); + + public: + Roots findRoots(bool censor) override; + + void collectGarbage(const GCOptions& options, GCResults& results) override; + + /* Optimise the disk space usage of the Nix store by hard-linking + files with the same contents. */ + void optimiseStore(OptimiseStats& stats); + + void optimiseStore() override; + + /* Optimise a single store path. */ + void optimisePath(const Path& path); + + bool verifyStore(bool checkContents, RepairFlag repair) override; + + /* Register the validity of a path, i.e., that `path' exists, that + the paths referenced by it exists, and in the case of an output + path of a derivation, that it has been produced by a successful + execution of the derivation (or something equivalent). Also + register the hash of the file system contents of the path. The + hash must be a SHA-256 hash. */ + void registerValidPath(const ValidPathInfo& info); + + void registerValidPaths(const ValidPathInfos& infos); + + unsigned int getProtocol() override; + + void vacuumDB(); + + /* Repair the contents of the given path by redownloading it using + a substituter (if available). */ + void repairPath(const Path& path); + + void addSignatures(const Path& storePath, const StringSet& sigs) override; + + /* If free disk space in /nix/store if below minFree, delete + garbage until it exceeds maxFree. */ + void autoGC(bool sync = true); + + private: + int getSchema(); + + void openDB(State& state, bool create); + + void makeStoreWritable(); + + static uint64_t queryValidPathId(State& state, const Path& path); + + uint64_t addValidPath(State& state, const ValidPathInfo& info, + bool checkOutputs = true); + + void invalidatePath(State& state, const Path& path); + + /* Delete a path from the Nix store. */ + void invalidatePathChecked(const Path& path); + + void verifyPath(const Path& path, const PathSet& store, PathSet& done, + PathSet& validPaths, RepairFlag repair, bool& errors); + + static void updatePathInfo(State& state, const ValidPathInfo& info); + + void upgradeStore6(); + void upgradeStore7(); + PathSet queryValidPathsOld(); + ValidPathInfo queryPathInfoOld(const Path& path); + + struct GCState; + + static void deleteGarbage(GCState& state, const Path& path); + + void tryToDelete(GCState& state, const Path& path); + + bool canReachRoot(GCState& state, PathSet& visited, const Path& path); + + void deletePathRecursive(GCState& state, const Path& path); + + static bool isActiveTempFile(const GCState& state, const Path& path, + const std::string& suffix); + + AutoCloseFD openGCLock(LockType lockType); + + void findRoots(const Path& path, unsigned char type, Roots& roots); + + void findRootsNoTemp(Roots& roots, bool censor); + + void findRuntimeRoots(Roots& roots, bool censor); + + void removeUnusedLinks(const GCState& state); + + Path createTempDirInStore(); + + void checkDerivationOutputs(const Path& drvPath, const Derivation& drv); + + typedef std::unordered_set<ino_t> InodeHash; + + InodeHash loadInodeHash(); + static Strings readDirectoryIgnoringInodes(const Path& path, + const InodeHash& inodeHash); + void optimisePath_(OptimiseStats& stats, const Path& path, + InodeHash& inodeHash); + + // Internal versions that are not wrapped in retry_sqlite. + static bool isValidPath_(State& state, const Path& path); + static void queryReferrers(State& state, const Path& path, + PathSet& referrers); + + /* Add signatures to a ValidPathInfo using the secret keys + specified by the ‘secret-key-files’ option. */ + static void signPathInfo(ValidPathInfo& info); + + Path getRealStoreDir() override { return realStoreDir; } + + void createUser(const std::string& userName, uid_t userId) override; + + friend class DerivationGoal; + friend class SubstitutionGoal; +}; + +typedef std::pair<dev_t, ino_t> Inode; +typedef std::set<Inode> InodesSeen; + +/* "Fix", or canonicalise, the meta-data of the files in a store path + after it has been built. In particular: + - the last modification date on each file is set to 1 (i.e., + 00:00:01 1/1/1970 UTC) + - the permissions are set of 444 or 555 (i.e., read-only with or + without execute permission; setuid bits etc. are cleared) + - the owner and group are set to the Nix user and group, if we're + running as root. */ +void canonicalisePathMetaData(const Path& path, uid_t fromUid, + InodesSeen& inodesSeen); +void canonicalisePathMetaData(const Path& path, uid_t fromUid); + +void canonicaliseTimestampAndPermissions(const Path& path); + +MakeError(PathInUse, Error); + +} // namespace nix diff --git a/third_party/nix/src/libstore/machines.cc b/third_party/nix/src/libstore/machines.cc new file mode 100644 index 000000000000..9ad20a25f8f0 --- /dev/null +++ b/third_party/nix/src/libstore/machines.cc @@ -0,0 +1,110 @@ +#include "machines.hh" + +#include <algorithm> + +#include <absl/strings/ascii.h> +#include <absl/strings/match.h> +#include <absl/strings/str_split.h> +#include <absl/strings/string_view.h> +#include <glog/logging.h> + +#include "globals.hh" +#include "util.hh" + +namespace nix { + +Machine::Machine(decltype(storeUri)& storeUri, + decltype(systemTypes)& systemTypes, decltype(sshKey)& sshKey, + decltype(maxJobs) maxJobs, decltype(speedFactor) speedFactor, + decltype(supportedFeatures)& supportedFeatures, + decltype(mandatoryFeatures)& mandatoryFeatures, + decltype(sshPublicHostKey)& sshPublicHostKey) + : storeUri( + // Backwards compatibility: if the URI is a hostname, + // prepend ssh://. + storeUri.find("://") != std::string::npos || + absl::StartsWith(storeUri, "local") || + absl::StartsWith(storeUri, "remote") || + absl::StartsWith(storeUri, "auto") || + absl::StartsWith(storeUri, "/") + ? storeUri + : "ssh://" + storeUri), + systemTypes(systemTypes), + sshKey(sshKey), + maxJobs(maxJobs), + speedFactor(std::max(1U, speedFactor)), + supportedFeatures(supportedFeatures), + mandatoryFeatures(mandatoryFeatures), + sshPublicHostKey(sshPublicHostKey) {} + +bool Machine::allSupported(const std::set<std::string>& features) const { + return std::all_of(features.begin(), features.end(), + [&](const std::string& feature) { + return (supportedFeatures.count(feature) != 0u) || + (mandatoryFeatures.count(feature) != 0u); + }); +} + +bool Machine::mandatoryMet(const std::set<std::string>& features) const { + return std::all_of( + mandatoryFeatures.begin(), mandatoryFeatures.end(), + [&](const std::string& feature) { return features.count(feature); }); +} + +void parseMachines(const std::string& s, Machines& machines) { + for (auto line : absl::StrSplit(s, absl::ByAnyChar("\n;"))) { + // Skip empty lines & comments + line = absl::StripAsciiWhitespace(line); + if (line.empty() || line[line.find_first_not_of(" \t")] == '#') { + continue; + } + + if (line[0] == '@') { + auto file = absl::StripAsciiWhitespace(line.substr(1)); + try { + parseMachines(readFile(file), machines); + } catch (const SysError& e) { + if (e.errNo != ENOENT) { + throw; + } + DLOG(INFO) << "cannot find machines file: " << file; + } + continue; + } + + std::vector<std::string> tokens = + absl::StrSplit(line, absl::ByAnyChar(" \t\n\r")); + auto sz = tokens.size(); + if (sz < 1) { + throw FormatError("bad machine specification '%s'", line); + } + + auto isSet = [&](size_t n) { + return tokens.size() > n && !tokens[n].empty() && tokens[n] != "-"; + }; + + // TODO(tazjin): what??? + machines.emplace_back( + tokens[0], + isSet(1) ? absl::StrSplit(tokens[1], absl::ByChar(',')) + : std::vector<std::string>{settings.thisSystem}, + isSet(2) ? tokens[2] : "", isSet(3) ? std::stoull(tokens[3]) : 1LL, + isSet(4) ? std::stoull(tokens[4]) : 1LL, + isSet(5) ? absl::StrSplit(tokens[5], absl::ByChar(',')) + : std::set<std::string>{}, + isSet(6) ? absl::StrSplit(tokens[6], absl::ByChar(',')) + : std::set<std::string>{}, + isSet(7) ? tokens[7] : ""); + } +} + +Machines getMachines() { + static auto machines = [&]() { + Machines machines; + parseMachines(settings.builders, machines); + return machines; + }(); + return machines; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/machines.hh b/third_party/nix/src/libstore/machines.hh new file mode 100644 index 000000000000..23712e676056 --- /dev/null +++ b/third_party/nix/src/libstore/machines.hh @@ -0,0 +1,36 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +struct Machine { + const std::string storeUri; + const std::vector<std::string> systemTypes; + const std::string sshKey; + const unsigned int maxJobs; + const unsigned int speedFactor; + const std::set<std::string> supportedFeatures; + const std::set<std::string> mandatoryFeatures; + const std::string sshPublicHostKey; + bool enabled = true; + + bool allSupported(const std::set<std::string>& features) const; + + bool mandatoryMet(const std::set<std::string>& features) const; + + Machine(decltype(storeUri)& storeUri, decltype(systemTypes)& systemTypes, + decltype(sshKey)& sshKey, decltype(maxJobs) maxJobs, + decltype(speedFactor) speedFactor, + decltype(supportedFeatures)& supportedFeatures, + decltype(mandatoryFeatures)& mandatoryFeatures, + decltype(sshPublicHostKey)& sshPublicHostKey); +}; + +typedef std::vector<Machine> Machines; + +void parseMachines(const std::string& s, Machines& machines); + +Machines getMachines(); + +} // namespace nix diff --git a/third_party/nix/src/libstore/meson.build b/third_party/nix/src/libstore/meson.build new file mode 100644 index 000000000000..b8564f24f8b0 --- /dev/null +++ b/third_party/nix/src/libstore/meson.build @@ -0,0 +1,153 @@ +# Nix lib store build file +#============================================================================ + +src_inc += include_directories('.') + +libstore_src = files( + join_paths(meson.source_root(), 'src/libstore/binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/build.cc'), + join_paths(meson.source_root(), 'src/libstore/crypto.cc'), + join_paths(meson.source_root(), 'src/libstore/derivations.cc'), + join_paths(meson.source_root(), 'src/libstore/download.cc'), + join_paths(meson.source_root(), 'src/libstore/export-import.cc'), + join_paths(meson.source_root(), 'src/libstore/gc.cc'), + join_paths(meson.source_root(), 'src/libstore/globals.cc'), + join_paths(meson.source_root(), 'src/libstore/http-binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/legacy-ssh-store.cc'), + join_paths(meson.source_root(), 'src/libstore/local-binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/local-fs-store.cc'), + join_paths(meson.source_root(), 'src/libstore/local-store.cc'), + join_paths(meson.source_root(), 'src/libstore/machines.cc'), + join_paths(meson.source_root(), 'src/libstore/misc.cc'), + join_paths(meson.source_root(), 'src/libstore/nar-accessor.cc'), + join_paths(meson.source_root(), 'src/libstore/nar-info.cc'), + join_paths(meson.source_root(), 'src/libstore/nar-info-disk-cache.cc'), + join_paths(meson.source_root(), 'src/libstore/optimise-store.cc'), + join_paths(meson.source_root(), 'src/libstore/parsed-derivations.cc'), + join_paths(meson.source_root(), 'src/libstore/pathlocks.cc'), + join_paths(meson.source_root(), 'src/libstore/profiles.cc'), + join_paths(meson.source_root(), 'src/libstore/references.cc'), + join_paths(meson.source_root(), 'src/libstore/remote-fs-accessor.cc'), + join_paths(meson.source_root(), 'src/libstore/remote-store.cc'), + join_paths(meson.source_root(), 'src/libstore/s3-binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/sqlite.cc'), + join_paths(meson.source_root(), 'src/libstore/ssh.cc'), + join_paths(meson.source_root(), 'src/libstore/ssh-store.cc'), + join_paths(meson.source_root(), 'src/libstore/store-api.cc'), + join_paths(meson.source_root(), 'src/libstore/builtins/buildenv.cc'), + join_paths(meson.source_root(), 'src/libstore/builtins/fetchurl.cc')) + +libstore_headers = files( + join_paths(meson.source_root(), 'src/libstore/binary-cache-store.hh'), + join_paths(meson.source_root(), 'src/libstore/builtins.hh'), + join_paths(meson.source_root(), 'src/libstore/crypto.hh'), + join_paths(meson.source_root(), 'src/libstore/derivations.hh'), + join_paths(meson.source_root(), 'src/libstore/download.hh'), + join_paths(meson.source_root(), 'src/libstore/fs-accessor.hh'), + join_paths(meson.source_root(), 'src/libstore/globals.hh'), + join_paths(meson.source_root(), 'src/libstore/local-store.hh'), + join_paths(meson.source_root(), 'src/libstore/machines.hh'), + join_paths(meson.source_root(), 'src/libstore/nar-accessor.hh'), + join_paths(meson.source_root(), 'src/libstore/nar-info-disk-cache.hh'), + join_paths(meson.source_root(), 'src/libstore/nar-info.hh'), + join_paths(meson.source_root(), 'src/libstore/parsed-derivations.hh'), + join_paths(meson.source_root(), 'src/libstore/pathlocks.hh'), + join_paths(meson.source_root(), 'src/libstore/profiles.hh'), + join_paths(meson.source_root(), 'src/libstore/references.hh'), + join_paths(meson.source_root(), 'src/libstore/remote-fs-accessor.hh'), + join_paths(meson.source_root(), 'src/libstore/remote-store.hh'), + join_paths(meson.source_root(), 'src/libstore/s3-binary-cache-store.hh'), + join_paths(meson.source_root(), 'src/libstore/s3.hh'), + join_paths(meson.source_root(), 'src/libstore/serve-protocol.hh'), + join_paths(meson.source_root(), 'src/libstore/sqlite.hh'), + join_paths(meson.source_root(), 'src/libstore/ssh.hh'), + join_paths(meson.source_root(), 'src/libstore/store-api.hh'), + join_paths(meson.source_root(), 'src/libstore/worker-protocol.hh')) + +libstore_data = files( + join_paths(meson.source_root(), 'src/libstore/sandbox-defaults.sb'), + join_paths(meson.source_root(), 'src/libstore/sandbox-minimal.sb'), + join_paths(meson.source_root(), 'src/libstore/sandbox-network.sb')) + +# dependancies +#============================================================================ + +libstore_dep_list = [ + glog_dep, + libbz2_dep, + libcurl_dep, + libdl_dep, + pthread_dep, + sqlite3_dep, + libsodium_dep +] + absl_deps + +if sys_name.contains('linux') + libstore_dep_list += libseccomp_dep +endif + +if sys_name.contains('freebsd') + libstore_dep_list += libdl_dep +endif + +# Link args +#============================================================================ + +libstore_link_list = [ + libutil_lib] + +libstore_link_args = [] + +# compiler args +#============================================================================ + +libstore_cxx_args = [ + '-DNIX_PREFIX="@0@" '.format(prefix), + '-DNIX_STORE_DIR="@0@" '.format(nixstoredir), + '-DNIX_DATA_DIR="@0@" '.format(datadir), + '-DNIX_STATE_DIR="@0@" '.format(join_paths(localstatedir, 'nix')), + '-DNIX_LOG_DIR="@0@" '.format(join_paths(localstatedir, 'log/nix')), + '-DNIX_CONF_DIR="@0@" '.format(join_paths(sysconfdir, 'nix')), + '-DNIX_LIBEXEC_DIR="@0@" '.format(libexecdir), + '-DNIX_BIN_DIR="@0@" '.format(bindir), + '-DNIX_MAN_DIR="@0@" '.format(mandir), + '-DSANDBOX_SHELL="@0@" '.format(get_option('sandbox_shell')), + '-DLSOF="@0@" '.format(lsof)] + +# targets +#============================================================================ + +gen_header = ''' + echo 'R"foo(' >> "$1" + cat @INPUT@ >> "$1" + echo ')foo"' >> "$1" +''' + +libstore_src += custom_target( + 'schema.sql.gen.hh', + output : 'schema.sql.gen.hh', + input : 'schema.sql', + command : [bash, '-c', gen_header, 'sh', '@OUTPUT@']) + +# build +#============================================================================ + +libstore_lib = library( + 'nixstore', + install : true, + install_mode : 'rwxr-xr-x', + install_dir : libdir, + include_directories : src_inc, + link_with : libstore_link_list, + sources : libstore_src, + cpp_args : libstore_cxx_args, + link_args : libstore_link_args, + dependencies : libstore_dep_list) + +install_headers( + libstore_headers, + install_dir : join_paths(includedir, 'nix')) + +install_data( + libstore_data, + install_dir : join_paths(datadir, 'nix/sandbox')) diff --git a/third_party/nix/src/libstore/misc.cc b/third_party/nix/src/libstore/misc.cc new file mode 100644 index 000000000000..f95c4bec8b0c --- /dev/null +++ b/third_party/nix/src/libstore/misc.cc @@ -0,0 +1,329 @@ +#include <glog/logging.h> + +#include "derivations.hh" +#include "globals.hh" +#include "local-store.hh" +#include "parsed-derivations.hh" +#include "store-api.hh" +#include "thread-pool.hh" + +namespace nix { + +void Store::computeFSClosure(const PathSet& startPaths, PathSet& paths_, + bool flipDirection, bool includeOutputs, + bool includeDerivers) { + struct State { + size_t pending; + PathSet& paths; + std::exception_ptr exc; + }; + + Sync<State> state_(State{0, paths_, nullptr}); + + std::function<void(const Path&)> enqueue; + + std::condition_variable done; + + enqueue = [&](const Path& path) -> void { + { + auto state(state_.lock()); + if (state->exc) { + return; + } + if (state->paths.count(path) != 0u) { + return; + } + state->paths.insert(path); + state->pending++; + } + + queryPathInfo( + path, {[&, path](std::future<ref<ValidPathInfo>> fut) { + // FIXME: calls to isValidPath() should be async + + try { + auto info = fut.get(); + + if (flipDirection) { + PathSet referrers; + queryReferrers(path, referrers); + for (auto& ref : referrers) { + if (ref != path) { + enqueue(ref); + } + } + + if (includeOutputs) { + for (auto& i : queryValidDerivers(path)) { + enqueue(i); + } + } + + if (includeDerivers && isDerivation(path)) { + for (auto& i : queryDerivationOutputs(path)) { + if (isValidPath(i) && queryPathInfo(i)->deriver == path) { + enqueue(i); + } + } + } + + } else { + for (auto& ref : info->references) { + if (ref != path) { + enqueue(ref); + } + } + + if (includeOutputs && isDerivation(path)) { + for (auto& i : queryDerivationOutputs(path)) { + if (isValidPath(i)) { + enqueue(i); + } + } + } + + if (includeDerivers && isValidPath(info->deriver)) { + enqueue(info->deriver); + } + } + + { + auto state(state_.lock()); + assert(state->pending); + if (--state->pending == 0u) { + done.notify_one(); + } + } + + } catch (...) { + auto state(state_.lock()); + if (!state->exc) { + state->exc = std::current_exception(); + } + assert(state->pending); + if (--state->pending == 0u) { + done.notify_one(); + } + }; + }}); + }; + + for (auto& startPath : startPaths) { + enqueue(startPath); + } + + { + auto state(state_.lock()); + while (state->pending != 0u) { + state.wait(done); + } + if (state->exc) { + std::rethrow_exception(state->exc); + } + } +} + +void Store::computeFSClosure(const Path& startPath, PathSet& paths_, + bool flipDirection, bool includeOutputs, + bool includeDerivers) { + computeFSClosure(PathSet{startPath}, paths_, flipDirection, includeOutputs, + includeDerivers); +} + +void Store::queryMissing(const PathSet& targets, PathSet& willBuild_, + PathSet& willSubstitute_, PathSet& unknown_, + unsigned long long& downloadSize_, + unsigned long long& narSize_) { + LOG(INFO) << "querying info about missing paths"; + + downloadSize_ = narSize_ = 0; + + ThreadPool pool; + + struct State { + PathSet done; + PathSet &unknown, &willSubstitute, &willBuild; + unsigned long long& downloadSize; + unsigned long long& narSize; + }; + + struct DrvState { + size_t left; + bool done = false; + PathSet outPaths; + explicit DrvState(size_t left) : left(left) {} + }; + + Sync<State> state_(State{PathSet(), unknown_, willSubstitute_, willBuild_, + downloadSize_, narSize_}); + + std::function<void(Path)> doPath; + + auto mustBuildDrv = [&](const Path& drvPath, const Derivation& drv) { + { + auto state(state_.lock()); + state->willBuild.insert(drvPath); + } + + for (auto& i : drv.inputDrvs) { + pool.enqueue( + std::bind(doPath, makeDrvPathWithOutputs(i.first, i.second))); + } + }; + + auto checkOutput = [&](const Path& drvPath, const ref<Derivation>& drv, + const Path& outPath, + const ref<Sync<DrvState>>& drvState_) { + if (drvState_->lock()->done) { + return; + } + + SubstitutablePathInfos infos; + querySubstitutablePathInfos({outPath}, infos); + + if (infos.empty()) { + drvState_->lock()->done = true; + mustBuildDrv(drvPath, *drv); + } else { + { + auto drvState(drvState_->lock()); + if (drvState->done) { + return; + } + assert(drvState->left); + drvState->left--; + drvState->outPaths.insert(outPath); + if (drvState->left == 0u) { + for (auto& path : drvState->outPaths) { + pool.enqueue(std::bind(doPath, path)); + } + } + } + } + }; + + doPath = [&](const Path& path) { + { + auto state(state_.lock()); + if (state->done.count(path) != 0u) { + return; + } + state->done.insert(path); + } + + DrvPathWithOutputs i2 = parseDrvPathWithOutputs(path); + + if (isDerivation(i2.first)) { + if (!isValidPath(i2.first)) { + // FIXME: we could try to substitute the derivation. + auto state(state_.lock()); + state->unknown.insert(path); + return; + } + + Derivation drv = derivationFromPath(i2.first); + ParsedDerivation parsedDrv(i2.first, drv); + + PathSet invalid; + for (auto& j : drv.outputs) { + if (wantOutput(j.first, i2.second) && !isValidPath(j.second.path)) { + invalid.insert(j.second.path); + } + } + if (invalid.empty()) { + return; + } + + if (settings.useSubstitutes && parsedDrv.substitutesAllowed()) { + auto drvState = make_ref<Sync<DrvState>>(DrvState(invalid.size())); + for (auto& output : invalid) { + pool.enqueue(std::bind(checkOutput, i2.first, + make_ref<Derivation>(drv), output, drvState)); + } + } else { + mustBuildDrv(i2.first, drv); + } + + } else { + if (isValidPath(path)) { + return; + } + + SubstitutablePathInfos infos; + querySubstitutablePathInfos({path}, infos); + + if (infos.empty()) { + auto state(state_.lock()); + state->unknown.insert(path); + return; + } + + auto info = infos.find(path); + assert(info != infos.end()); + + { + auto state(state_.lock()); + state->willSubstitute.insert(path); + state->downloadSize += info->second.downloadSize; + state->narSize += info->second.narSize; + } + + for (auto& ref : info->second.references) { + pool.enqueue(std::bind(doPath, ref)); + } + } + }; + + for (auto& path : targets) { + pool.enqueue(std::bind(doPath, path)); + } + + pool.process(); +} + +Paths Store::topoSortPaths(const PathSet& paths) { + Paths sorted; + PathSet visited; + PathSet parents; + + std::function<void(const Path& path, const Path* parent)> dfsVisit; + + dfsVisit = [&](const Path& path, const Path* parent) { + if (parents.find(path) != parents.end()) { + throw BuildError( + format("cycle detected in the references of '%1%' from '%2%'") % + path % *parent); + } + + if (visited.find(path) != visited.end()) { + return; + } + visited.insert(path); + parents.insert(path); + + PathSet references; + try { + references = queryPathInfo(path)->references; + } catch (InvalidPath&) { + } + + for (auto& i : references) { + /* Don't traverse into paths that don't exist. That can + happen due to substitutes for non-existent paths. */ + if (i != path && paths.find(i) != paths.end()) { + dfsVisit(i, &path); + } + } + + sorted.push_front(path); + parents.erase(path); + }; + + for (auto& i : paths) { + dfsVisit(i, nullptr); + } + + return sorted; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-accessor.cc b/third_party/nix/src/libstore/nar-accessor.cc new file mode 100644 index 000000000000..84bfb531ecd6 --- /dev/null +++ b/third_party/nix/src/libstore/nar-accessor.cc @@ -0,0 +1,268 @@ +#include "nar-accessor.hh" + +#include <algorithm> +#include <map> +#include <nlohmann/json.hpp> +#include <stack> +#include <utility> + +#include "archive.hh" +#include "json.hh" + +namespace nix { + +struct NarMember { + FSAccessor::Type type = FSAccessor::Type::tMissing; + + bool isExecutable = false; + + /* If this is a regular file, position of the contents of this + file in the NAR. */ + size_t start = 0, size = 0; + + std::string target; + + /* If this is a directory, all the children of the directory. */ + std::map<std::string, NarMember> children; +}; + +struct NarAccessor : public FSAccessor { + std::shared_ptr<const std::string> nar; + + GetNarBytes getNarBytes; + + NarMember root; + + struct NarIndexer : ParseSink, StringSource { + NarAccessor& acc; + + std::stack<NarMember*> parents; + + std::string currentStart; + bool isExec = false; + + NarIndexer(NarAccessor& acc, const std::string& nar) + : StringSource(nar), acc(acc) {} + + void createMember(const Path& path, NarMember member) { + size_t level = std::count(path.begin(), path.end(), '/'); + while (parents.size() > level) { + parents.pop(); + } + + if (parents.empty()) { + acc.root = std::move(member); + parents.push(&acc.root); + } else { + if (parents.top()->type != FSAccessor::Type::tDirectory) { + throw Error("NAR file missing parent directory of path '%s'", path); + } + auto result = parents.top()->children.emplace(baseNameOf(path), + std::move(member)); + parents.push(&result.first->second); + } + } + + void createDirectory(const Path& path) override { + createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0}); + } + + void createRegularFile(const Path& path) override { + createMember(path, {FSAccessor::Type::tRegular, false, 0, 0}); + } + + void isExecutable() override { parents.top()->isExecutable = true; } + + void preallocateContents(unsigned long long size) override { + currentStart = std::string(s, pos, 16); + assert(size <= std::numeric_limits<size_t>::max()); + parents.top()->size = (size_t)size; + parents.top()->start = pos; + } + + void receiveContents(unsigned char* data, unsigned int len) override { + // Sanity check + if (!currentStart.empty()) { + assert(len < 16 || currentStart == std::string((char*)data, 16)); + currentStart.clear(); + } + } + + void createSymlink(const Path& path, const std::string& target) override { + createMember(path, + NarMember{FSAccessor::Type::tSymlink, false, 0, 0, target}); + } + }; + + explicit NarAccessor(const ref<const std::string>& nar) : nar(nar) { + NarIndexer indexer(*this, *nar); + parseDump(indexer, indexer); + } + + NarAccessor(const std::string& listing, GetNarBytes getNarBytes) + : getNarBytes(std::move(getNarBytes)) { + using json = nlohmann::json; + + std::function<void(NarMember&, json&)> recurse; + + recurse = [&](NarMember& member, json& v) { + std::string type = v["type"]; + + if (type == "directory") { + member.type = FSAccessor::Type::tDirectory; + for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) { + const std::string& name = i.key(); + recurse(member.children[name], i.value()); + } + } else if (type == "regular") { + member.type = FSAccessor::Type::tRegular; + member.size = v["size"]; + member.isExecutable = v.value("executable", false); + member.start = v["narOffset"]; + } else if (type == "symlink") { + member.type = FSAccessor::Type::tSymlink; + member.target = v.value("target", ""); + } else { + return; + } + }; + + json v = json::parse(listing); + recurse(root, v); + } + + NarMember* find(const Path& path) { + Path canon = path.empty() ? "" : canonPath(path); + NarMember* current = &root; + auto end = path.end(); + for (auto it = path.begin(); it != end;) { + // because it != end, the remaining component is non-empty so we need + // a directory + if (current->type != FSAccessor::Type::tDirectory) { + return nullptr; + } + + // skip slash (canonPath above ensures that this is always a slash) + assert(*it == '/'); + it += 1; + + // lookup current component + auto next = std::find(it, end, '/'); + auto child = current->children.find(std::string(it, next)); + if (child == current->children.end()) { + return nullptr; + } + current = &child->second; + + it = next; + } + + return current; + } + + NarMember& get(const Path& path) { + auto result = find(path); + if (result == nullptr) { + throw Error("NAR file does not contain path '%1%'", path); + } + return *result; + } + + Stat stat(const Path& path) override { + auto i = find(path); + if (i == nullptr) { + return {FSAccessor::Type::tMissing, 0, false}; + } + return {i->type, i->size, i->isExecutable, i->start}; + } + + StringSet readDirectory(const Path& path) override { + auto i = get(path); + + if (i.type != FSAccessor::Type::tDirectory) { + throw Error(format("path '%1%' inside NAR file is not a directory") % + path); + } + + StringSet res; + for (auto& child : i.children) { + res.insert(child.first); + } + + return res; + } + + std::string readFile(const Path& path) override { + auto i = get(path); + if (i.type != FSAccessor::Type::tRegular) { + throw Error(format("path '%1%' inside NAR file is not a regular file") % + path); + } + + if (getNarBytes) { + return getNarBytes(i.start, i.size); + } + + assert(nar); + return std::string(*nar, i.start, i.size); + } + + std::string readLink(const Path& path) override { + auto i = get(path); + if (i.type != FSAccessor::Type::tSymlink) { + throw Error(format("path '%1%' inside NAR file is not a symlink") % path); + } + return i.target; + } +}; + +ref<FSAccessor> makeNarAccessor(ref<const std::string> nar) { + return make_ref<NarAccessor>(nar); +} + +ref<FSAccessor> makeLazyNarAccessor(const std::string& listing, + GetNarBytes getNarBytes) { + return make_ref<NarAccessor>(listing, getNarBytes); +} + +void listNar(JSONPlaceholder& res, const ref<FSAccessor>& accessor, + const Path& path, bool recurse) { + auto st = accessor->stat(path); + + auto obj = res.object(); + + switch (st.type) { + case FSAccessor::Type::tRegular: + obj.attr("type", "regular"); + obj.attr("size", st.fileSize); + if (st.isExecutable) { + obj.attr("executable", true); + } + if (st.narOffset != 0u) { + obj.attr("narOffset", st.narOffset); + } + break; + case FSAccessor::Type::tDirectory: + obj.attr("type", "directory"); + { + auto res2 = obj.object("entries"); + for (auto& name : accessor->readDirectory(path)) { + if (recurse) { + auto res3 = res2.placeholder(name); + listNar(res3, accessor, path + "/" + name, true); + } else { + res2.object(name); + } + } + } + break; + case FSAccessor::Type::tSymlink: + obj.attr("type", "symlink"); + obj.attr("target", accessor->readLink(path)); + break; + default: + throw Error("path '%s' does not exist in NAR", path); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-accessor.hh b/third_party/nix/src/libstore/nar-accessor.hh new file mode 100644 index 000000000000..8b18d4a36e06 --- /dev/null +++ b/third_party/nix/src/libstore/nar-accessor.hh @@ -0,0 +1,29 @@ +#pragma once + +#include <functional> + +#include "fs-accessor.hh" + +namespace nix { + +/* Return an object that provides access to the contents of a NAR + file. */ +ref<FSAccessor> makeNarAccessor(ref<const std::string> nar); + +/* Create a NAR accessor from a NAR listing (in the format produced by + listNar()). The callback getNarBytes(offset, length) is used by the + readFile() method of the accessor to get the contents of files + inside the NAR. */ +typedef std::function<std::string(uint64_t, uint64_t)> GetNarBytes; + +ref<FSAccessor> makeLazyNarAccessor(const std::string& listing, + GetNarBytes getNarBytes); + +class JSONPlaceholder; + +/* Write a JSON representation of the contents of a NAR (except file + contents). */ +void listNar(JSONPlaceholder& res, const ref<FSAccessor>& accessor, + const Path& path, bool recurse); + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info-disk-cache.cc b/third_party/nix/src/libstore/nar-info-disk-cache.cc new file mode 100644 index 000000000000..0a7a0dc22c51 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info-disk-cache.cc @@ -0,0 +1,288 @@ +#include "nar-info-disk-cache.hh" + +#include <absl/strings/str_cat.h> +#include <absl/strings/str_split.h> +#include <glog/logging.h> +#include <sqlite3.h> + +#include "globals.hh" +#include "sqlite.hh" +#include "sync.hh" + +namespace nix { + +static const char* schema = R"sql( + +create table if not exists BinaryCaches ( + id integer primary key autoincrement not null, + url text unique not null, + timestamp integer not null, + storeDir text not null, + wantMassQuery integer not null, + priority integer not null +); + +create table if not exists NARs ( + cache integer not null, + hashPart text not null, + namePart text, + url text, + compression text, + fileHash text, + fileSize integer, + narHash text, + narSize integer, + refs text, + deriver text, + sigs text, + ca text, + timestamp integer not null, + present integer not null, + primary key (cache, hashPart), + foreign key (cache) references BinaryCaches(id) on delete cascade +); + +create table if not exists LastPurge ( + dummy text primary key, + value integer +); + +)sql"; + +class NarInfoDiskCacheImpl : public NarInfoDiskCache { + public: + /* How often to purge expired entries from the cache. */ + const int purgeInterval = 24 * 3600; + + struct Cache { + int id; + Path storeDir; + bool wantMassQuery; + int priority; + }; + + struct State { + SQLite db; + SQLiteStmt insertCache, queryCache, insertNAR, insertMissingNAR, queryNAR, + purgeCache; + std::map<std::string, Cache> caches; + }; + + Sync<State> _state; + + NarInfoDiskCacheImpl() { + auto state(_state.lock()); + + Path dbPath = getCacheDir() + "/nix/binary-cache-v6.sqlite"; + createDirs(dirOf(dbPath)); + + state->db = SQLite(dbPath); + + if (sqlite3_busy_timeout(state->db, 60 * 60 * 1000) != SQLITE_OK) { + throwSQLiteError(state->db, "setting timeout"); + } + + // We can always reproduce the cache. + state->db.exec("pragma synchronous = off"); + state->db.exec("pragma main.journal_mode = truncate"); + + state->db.exec(schema); + + state->insertCache.create( + state->db, + "insert or replace into BinaryCaches(url, timestamp, storeDir, " + "wantMassQuery, priority) values (?, ?, ?, ?, ?)"); + + state->queryCache.create(state->db, + "select id, storeDir, wantMassQuery, priority " + "from BinaryCaches where url = ?"); + + state->insertNAR.create( + state->db, + "insert or replace into NARs(cache, hashPart, namePart, url, " + "compression, fileHash, fileSize, narHash, " + "narSize, refs, deriver, sigs, ca, timestamp, present) values (?, ?, " + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1)"); + + state->insertMissingNAR.create( + state->db, + "insert or replace into NARs(cache, hashPart, timestamp, present) " + "values (?, ?, ?, 0)"); + + state->queryNAR.create( + state->db, + "select present, namePart, url, compression, fileHash, fileSize, " + "narHash, narSize, refs, deriver, sigs, ca from NARs where cache = ? " + "and hashPart = ? and ((present = 0 and timestamp > ?) or (present = 1 " + "and timestamp > ?))"); + + /* Periodically purge expired entries from the database. */ + retrySQLite<void>([&]() { + auto now = time(nullptr); + + SQLiteStmt queryLastPurge(state->db, "select value from LastPurge"); + auto queryLastPurge_(queryLastPurge.use()); + + if (!queryLastPurge_.next() || + queryLastPurge_.getInt(0) < now - purgeInterval) { + SQLiteStmt(state->db, + "delete from NARs where ((present = 0 and timestamp < ?) or " + "(present = 1 and timestamp < ?))") + .use()(now - settings.ttlNegativeNarInfoCache)( + now - settings.ttlPositiveNarInfoCache) + .exec(); + + DLOG(INFO) << "deleted " << sqlite3_changes(state->db) + << " entries from the NAR info disk cache"; + + SQLiteStmt( + state->db, + "insert or replace into LastPurge(dummy, value) values ('', ?)") + .use()(now) + .exec(); + } + }); + } + + static Cache& getCache(State& state, const std::string& uri) { + auto i = state.caches.find(uri); + if (i == state.caches.end()) { + abort(); + } + return i->second; + } + + void createCache(const std::string& uri, const Path& storeDir, + bool wantMassQuery, int priority) override { + retrySQLite<void>([&]() { + auto state(_state.lock()); + + // FIXME: race + + state->insertCache + .use()(uri)(time(nullptr))(storeDir)( + static_cast<int64_t>(wantMassQuery))(priority) + .exec(); + assert(sqlite3_changes(state->db) == 1); + state->caches[uri] = Cache{(int)sqlite3_last_insert_rowid(state->db), + storeDir, wantMassQuery, priority}; + }); + } + + bool cacheExists(const std::string& uri, bool& wantMassQuery, + int& priority) override { + return retrySQLite<bool>([&]() { + auto state(_state.lock()); + + auto i = state->caches.find(uri); + if (i == state->caches.end()) { + auto queryCache(state->queryCache.use()(uri)); + if (!queryCache.next()) { + return false; + } + state->caches.emplace( + uri, Cache{(int)queryCache.getInt(0), queryCache.getStr(1), + queryCache.getInt(2) != 0, (int)queryCache.getInt(3)}); + } + + auto& cache(getCache(*state, uri)); + + wantMassQuery = cache.wantMassQuery; + priority = cache.priority; + + return true; + }); + } + + std::pair<Outcome, std::shared_ptr<NarInfo>> lookupNarInfo( + const std::string& uri, const std::string& hashPart) override { + return retrySQLite<std::pair<Outcome, std::shared_ptr<NarInfo>>>( + [&]() -> std::pair<Outcome, std::shared_ptr<NarInfo>> { + auto state(_state.lock()); + + auto& cache(getCache(*state, uri)); + + auto now = time(nullptr); + + auto queryNAR(state->queryNAR.use()(cache.id)(hashPart)( + now - settings.ttlNegativeNarInfoCache)( + now - settings.ttlPositiveNarInfoCache)); + + if (!queryNAR.next()) { + return {oUnknown, nullptr}; + } + + if (queryNAR.getInt(0) == 0) { + return {oInvalid, nullptr}; + } + + auto narInfo = make_ref<NarInfo>(); + + auto namePart = queryNAR.getStr(1); + narInfo->path = cache.storeDir + "/" + hashPart + + (namePart.empty() ? "" : "-" + namePart); + narInfo->url = queryNAR.getStr(2); + narInfo->compression = queryNAR.getStr(3); + if (!queryNAR.isNull(4)) { + narInfo->fileHash = Hash(queryNAR.getStr(4)); + } + narInfo->fileSize = queryNAR.getInt(5); + narInfo->narHash = Hash(queryNAR.getStr(6)); + narInfo->narSize = queryNAR.getInt(7); + for (auto r : absl::StrSplit(queryNAR.getStr(8), absl::ByChar(' '))) { + narInfo->references.insert(absl::StrCat(cache.storeDir, "/", r)); + } + if (!queryNAR.isNull(9)) { + narInfo->deriver = cache.storeDir + "/" + queryNAR.getStr(9); + } + for (auto& sig : + absl::StrSplit(queryNAR.getStr(10), absl::ByChar(' '))) { + narInfo->sigs.insert(std::string(sig)); + } + narInfo->ca = queryNAR.getStr(11); + + return {oValid, narInfo}; + }); + } + + void upsertNarInfo(const std::string& uri, const std::string& hashPart, + std::shared_ptr<ValidPathInfo> info) override { + retrySQLite<void>([&]() { + auto state(_state.lock()); + + auto& cache(getCache(*state, uri)); + + if (info) { + auto narInfo = std::dynamic_pointer_cast<NarInfo>(info); + + assert(hashPart == storePathToHash(info->path)); + + state->insertNAR + .use()(cache.id)(hashPart)(storePathToName(info->path))( + narInfo ? narInfo->url : "", narInfo != nullptr)( + narInfo ? narInfo->compression : "", narInfo != nullptr)( + narInfo && narInfo->fileHash ? narInfo->fileHash.to_string() + : "", + narInfo && narInfo->fileHash)( + narInfo ? narInfo->fileSize : 0, + narInfo != nullptr && + (narInfo->fileSize != 0u))(info->narHash.to_string())( + info->narSize)(concatStringsSep(" ", info->shortRefs()))( + !info->deriver.empty() ? baseNameOf(info->deriver) : "", + !info->deriver.empty())(concatStringsSep(" ", info->sigs))( + info->ca)(time(nullptr)) + .exec(); + + } else { + state->insertMissingNAR.use()(cache.id)(hashPart)(time(nullptr)).exec(); + } + }); + } +}; + +ref<NarInfoDiskCache> getNarInfoDiskCache() { + static ref<NarInfoDiskCache> cache = make_ref<NarInfoDiskCacheImpl>(); + return cache; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info-disk-cache.hh b/third_party/nix/src/libstore/nar-info-disk-cache.hh new file mode 100644 index 000000000000..65bb773c92f7 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info-disk-cache.hh @@ -0,0 +1,30 @@ +#pragma once + +#include "nar-info.hh" +#include "ref.hh" + +namespace nix { + +class NarInfoDiskCache { + public: + typedef enum { oValid, oInvalid, oUnknown } Outcome; + + virtual void createCache(const std::string& uri, const Path& storeDir, + bool wantMassQuery, int priority) = 0; + + virtual bool cacheExists(const std::string& uri, bool& wantMassQuery, + int& priority) = 0; + + virtual std::pair<Outcome, std::shared_ptr<NarInfo>> lookupNarInfo( + const std::string& uri, const std::string& hashPart) = 0; + + virtual void upsertNarInfo(const std::string& uri, + const std::string& hashPart, + std::shared_ptr<ValidPathInfo> info) = 0; +}; + +/* Return a singleton cache object that can be used concurrently by + multiple threads. */ +ref<NarInfoDiskCache> getNarInfoDiskCache(); + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info.cc b/third_party/nix/src/libstore/nar-info.cc new file mode 100644 index 000000000000..1dc0b54cf1e6 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info.cc @@ -0,0 +1,139 @@ +#include "nar-info.hh" + +#include <absl/strings/numbers.h> +#include <absl/strings/str_split.h> + +#include "globals.hh" + +namespace nix { + +NarInfo::NarInfo(const Store& store, const std::string& s, + const std::string& whence) { + auto corrupt = [&]() { + throw Error(format("NAR info file '%1%' is corrupt") % whence); + }; + + auto parseHashField = [&](const std::string& s) { + try { + return Hash(s); + } catch (BadHash&) { + corrupt(); + return Hash(); // never reached + } + }; + + size_t pos = 0; + while (pos < s.size()) { + size_t colon = s.find(':', pos); + if (colon == std::string::npos) { + corrupt(); + } + + std::string name(s, pos, colon - pos); + + size_t eol = s.find('\n', colon + 2); + if (eol == std::string::npos) { + corrupt(); + } + + std::string value(s, colon + 2, eol - colon - 2); + + if (name == "StorePath") { + if (!store.isStorePath(value)) { + corrupt(); + } + path = value; + } else if (name == "URL") { + url = value; + } else if (name == "Compression") { + compression = value; + } else if (name == "FileHash") { + fileHash = parseHashField(value); + } else if (name == "FileSize") { + if (!absl::SimpleAtoi(value, &fileSize)) { + corrupt(); + } + } else if (name == "NarHash") { + narHash = parseHashField(value); + } else if (name == "NarSize") { + if (!absl::SimpleAtoi(value, &narSize)) { + corrupt(); + } + } else if (name == "References") { + std::vector<std::string> refs = absl::StrSplit(value, absl::ByChar(' ')); + if (!references.empty()) { + corrupt(); + } + for (auto& r : refs) { + auto r2 = store.storeDir + "/" + r; + if (!store.isStorePath(r2)) { + corrupt(); + } + references.insert(r2); + } + } else if (name == "Deriver") { + if (value != "unknown-deriver") { + auto p = store.storeDir + "/" + value; + if (!store.isStorePath(p)) { + corrupt(); + } + deriver = p; + } + } else if (name == "System") { + system = value; + } else if (name == "Sig") { + sigs.insert(value); + } else if (name == "CA") { + if (!ca.empty()) { + corrupt(); + } + ca = value; + } + + pos = eol + 1; + } + + if (compression.empty()) { + compression = "bzip2"; + } + + if (path.empty() || url.empty() || narSize == 0 || !narHash) { + corrupt(); + } +} + +std::string NarInfo::to_string() const { + std::string res; + res += "StorePath: " + path + "\n"; + res += "URL: " + url + "\n"; + assert(!compression.empty()); + res += "Compression: " + compression + "\n"; + assert(fileHash.type == htSHA256); + res += "FileHash: " + fileHash.to_string(Base32) + "\n"; + res += "FileSize: " + std::to_string(fileSize) + "\n"; + assert(narHash.type == htSHA256); + res += "NarHash: " + narHash.to_string(Base32) + "\n"; + res += "NarSize: " + std::to_string(narSize) + "\n"; + + res += "References: " + concatStringsSep(" ", shortRefs()) + "\n"; + + if (!deriver.empty()) { + res += "Deriver: " + baseNameOf(deriver) + "\n"; + } + + if (!system.empty()) { + res += "System: " + system + "\n"; + } + + for (const auto& sig : sigs) { + res += "Sig: " + sig + "\n"; + } + + if (!ca.empty()) { + res += "CA: " + ca + "\n"; + } + + return res; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info.hh b/third_party/nix/src/libstore/nar-info.hh new file mode 100644 index 000000000000..ce362e703f99 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info.hh @@ -0,0 +1,23 @@ +#pragma once + +#include "hash.hh" +#include "store-api.hh" +#include "types.hh" + +namespace nix { + +struct NarInfo : ValidPathInfo { + std::string url; + std::string compression; + Hash fileHash; + uint64_t fileSize = 0; + std::string system; + + NarInfo() {} + NarInfo(const ValidPathInfo& info) : ValidPathInfo(info) {} + NarInfo(const Store& store, const std::string& s, const std::string& whence); + + std::string to_string() const; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/nix-store.pc.in b/third_party/nix/src/libstore/nix-store.pc.in new file mode 100644 index 000000000000..6d67b1e03808 --- /dev/null +++ b/third_party/nix/src/libstore/nix-store.pc.in @@ -0,0 +1,9 @@ +prefix=@prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: Nix +Description: Nix Package Manager +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lnixstore -lnixutil +Cflags: -I${includedir}/nix -std=c++17 diff --git a/third_party/nix/src/libstore/optimise-store.cc b/third_party/nix/src/libstore/optimise-store.cc new file mode 100644 index 000000000000..7217d54ca9af --- /dev/null +++ b/third_party/nix/src/libstore/optimise-store.cc @@ -0,0 +1,296 @@ +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <regex> +#include <utility> + +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "globals.hh" +#include "glog/logging.h" +#include "local-store.hh" +#include "util.hh" + +namespace nix { + +static void makeWritable(const Path& path) { + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1) { + throw SysError(format("changing writability of '%1%'") % path); + } +} + +struct MakeReadOnly { + Path path; + explicit MakeReadOnly(Path path) : path(std::move(path)) {} + ~MakeReadOnly() { + try { + /* This will make the path read-only. */ + if (!path.empty()) { + canonicaliseTimestampAndPermissions(path); + } + } catch (...) { + ignoreException(); + } + } +}; + +LocalStore::InodeHash LocalStore::loadInodeHash() { + DLOG(INFO) << "loading hash inodes in memory"; + InodeHash inodeHash; + + AutoCloseDir dir(opendir(linksDir.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % linksDir); + } + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { /* sic */ + checkInterrupt(); + // We don't care if we hit non-hash files, anything goes + inodeHash.insert(dirent->d_ino); + } + if (errno) { + throw SysError(format("reading directory '%1%'") % linksDir); + } + + DLOG(INFO) << "loaded " << inodeHash.size() << " hash inodes"; + + return inodeHash; +} + +Strings LocalStore::readDirectoryIgnoringInodes(const Path& path, + const InodeHash& inodeHash) { + Strings names; + + AutoCloseDir dir(opendir(path.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % path); + } + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { /* sic */ + checkInterrupt(); + + if (inodeHash.count(dirent->d_ino) != 0u) { + DLOG(WARNING) << dirent->d_name << " is already linked"; + continue; + } + + std::string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + names.push_back(name); + } + if (errno) { + throw SysError(format("reading directory '%1%'") % path); + } + + return names; +} + +void LocalStore::optimisePath_(OptimiseStats& stats, const Path& path, + InodeHash& inodeHash) { + checkInterrupt(); + + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + + if (S_ISDIR(st.st_mode)) { + Strings names = readDirectoryIgnoringInodes(path, inodeHash); + for (auto& i : names) { + optimisePath_(stats, path + "/" + i, inodeHash); + } + return; + } + + /* We can hard link regular files and maybe symlinks. */ + if (!S_ISREG(st.st_mode) +#if CAN_LINK_SYMLINK + && !S_ISLNK(st.st_mode) +#endif + ) + return; + + /* Sometimes SNAFUs can cause files in the Nix store to be + modified, in particular when running programs as root under + NixOS (example: $fontconfig/var/cache being modified). Skip + those files. FIXME: check the modification time. */ + if (S_ISREG(st.st_mode) && ((st.st_mode & S_IWUSR) != 0u)) { + LOG(WARNING) << "skipping suspicious writable file '" << path << "'"; + return; + } + + /* This can still happen on top-level files. */ + if (st.st_nlink > 1 && (inodeHash.count(st.st_ino) != 0u)) { + DLOG(INFO) << path << " is already linked, with " << (st.st_nlink - 2) + << " other file(s)"; + return; + } + + /* Hash the file. Note that hashPath() returns the hash over the + NAR serialisation, which includes the execute bit on the file. + Thus, executable and non-executable files with the same + contents *won't* be linked (which is good because otherwise the + permissions would be screwed up). + + Also note that if `path' is a symlink, then we're hashing the + contents of the symlink (i.e. the result of readlink()), not + the contents of the target (which may not even exist). */ + Hash hash = hashPath(htSHA256, path).first; + LOG(INFO) << path << " has hash " << hash.to_string(); + + /* Check if this is a known hash. */ + Path linkPath = linksDir + "/" + hash.to_string(Base32, false); + +retry: + if (!pathExists(linkPath)) { + /* Nope, create a hard link in the links directory. */ + if (link(path.c_str(), linkPath.c_str()) == 0) { + inodeHash.insert(st.st_ino); + return; + } + + switch (errno) { + case EEXIST: + /* Fall through if another process created ‘linkPath’ before + we did. */ + break; + + case ENOSPC: + /* On ext4, that probably means the directory index is + full. When that happens, it's fine to ignore it: we + just effectively disable deduplication of this + file. */ + LOG(WARNING) << "cannot link '" << linkPath << " to " << path << ": " + << strerror(errno); + + return; + + default: + throw SysError("cannot link '%1%' to '%2%'", linkPath, path); + } + } + + /* Yes! We've seen a file with the same contents. Replace the + current file with a hard link to that file. */ + struct stat stLink; + if (lstat(linkPath.c_str(), &stLink) != 0) { + throw SysError(format("getting attributes of path '%1%'") % linkPath); + } + + if (st.st_ino == stLink.st_ino) { + DLOG(INFO) << path << " is already linked to " << linkPath; + return; + } + + if (st.st_size != stLink.st_size) { + LOG(WARNING) << "removing corrupted link '" << linkPath << "'"; + unlink(linkPath.c_str()); + goto retry; + } + + DLOG(INFO) << "linking '" << path << "' to '" << linkPath << "'"; + + /* Make the containing directory writable, but only if it's not + the store itself (we don't want or need to mess with its + permissions). */ + bool mustToggle = dirOf(path) != realStoreDir; + if (mustToggle) { + makeWritable(dirOf(path)); + } + + /* When we're done, make the directory read-only again and reset + its timestamp back to 0. */ + MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); + + Path tempLink = + (format("%1%/.tmp-link-%2%-%3%") % realStoreDir % getpid() % random()) + .str(); + + if (link(linkPath.c_str(), tempLink.c_str()) == -1) { + if (errno == EMLINK) { + /* Too many links to the same file (>= 32000 on most file + systems). This is likely to happen with empty files. + Just shrug and ignore. */ + if (st.st_size != 0) { + LOG(WARNING) << linkPath << " has maximum number of links"; + } + return; + } + throw SysError("cannot link '%1%' to '%2%'", tempLink, linkPath); + } + + /* Atomically replace the old file with the new hard link. */ + if (rename(tempLink.c_str(), path.c_str()) == -1) { + if (unlink(tempLink.c_str()) == -1) { + LOG(ERROR) << "unable to unlink '" << tempLink << "'"; + } + if (errno == EMLINK) { + /* Some filesystems generate too many links on the rename, + rather than on the original link. (Probably it + temporarily increases the st_nlink field before + decreasing it again.) */ + DLOG(WARNING) << "'" << linkPath + << "' has reached maximum number of links"; + return; + } + throw SysError(format("cannot rename '%1%' to '%2%'") % tempLink % path); + } + + stats.filesLinked++; + stats.bytesFreed += st.st_size; + stats.blocksFreed += st.st_blocks; +} + +void LocalStore::optimiseStore(OptimiseStats& stats) { + PathSet paths = queryAllValidPaths(); + InodeHash inodeHash = loadInodeHash(); + + uint64_t done = 0; + + for (auto& i : paths) { + addTempRoot(i); + if (!isValidPath(i)) { + continue; + } /* path was GC'ed, probably */ + { + LOG(INFO) << "optimising path '" << i << "'"; + optimisePath_(stats, realStoreDir + "/" + baseNameOf(i), inodeHash); + } + done++; + } +} + +static std::string showBytes(unsigned long long bytes) { + return (format("%.2f MiB") % (bytes / (1024.0 * 1024.0))).str(); +} + +void LocalStore::optimiseStore() { + OptimiseStats stats; + + optimiseStore(stats); + + LOG(INFO) << showBytes(stats.bytesFreed) << " freed by hard-linking " + << stats.filesLinked << " files"; +} + +void LocalStore::optimisePath(const Path& path) { + OptimiseStats stats; + InodeHash inodeHash; + + if (settings.autoOptimiseStore) { + optimisePath_(stats, path, inodeHash); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/parsed-derivations.cc b/third_party/nix/src/libstore/parsed-derivations.cc new file mode 100644 index 000000000000..571f49ad59e2 --- /dev/null +++ b/third_party/nix/src/libstore/parsed-derivations.cc @@ -0,0 +1,127 @@ +#include "parsed-derivations.hh" + +#include <absl/strings/str_split.h> + +namespace nix { + +ParsedDerivation::ParsedDerivation(const Path& drvPath, BasicDerivation& drv) + : drvPath(drvPath), drv(drv) { + /* Parse the __json attribute, if any. */ + auto jsonAttr = drv.env.find("__json"); + if (jsonAttr != drv.env.end()) { + try { + structuredAttrs = nlohmann::json::parse(jsonAttr->second); + } catch (std::exception& e) { + throw Error("cannot process __json attribute of '%s': %s", drvPath, + e.what()); + } + } +} + +std::optional<std::string> ParsedDerivation::getStringAttr( + const std::string& name) const { + if (structuredAttrs) { + auto i = structuredAttrs->find(name); + if (i == structuredAttrs->end()) { + return {}; + } + if (!i->is_string()) { + throw Error("attribute '%s' of derivation '%s' must be a string", name, + drvPath); + } + return i->get<std::string>(); + + } else { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + return {}; + } + return i->second; + } +} + +bool ParsedDerivation::getBoolAttr(const std::string& name, bool def) const { + if (structuredAttrs) { + auto i = structuredAttrs->find(name); + if (i == structuredAttrs->end()) { + return def; + } + if (!i->is_boolean()) { + throw Error("attribute '%s' of derivation '%s' must be a Boolean", name, + drvPath); + } + return i->get<bool>(); + + } else { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + return def; + } + return i->second == "1"; + } +} + +std::optional<Strings> ParsedDerivation::getStringsAttr( + const std::string& name) const { + if (structuredAttrs) { + auto i = structuredAttrs->find(name); + if (i == structuredAttrs->end()) { + return {}; + } + if (!i->is_array()) { + throw Error("attribute '%s' of derivation '%s' must be a list of strings", + name, drvPath); + } + Strings res; + for (const auto& j : *i) { + if (!j.is_string()) { + throw Error( + "attribute '%s' of derivation '%s' must be a list of strings", name, + drvPath); + } + res.push_back(j.get<std::string>()); + } + return res; + + } else { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + return {}; + } + return absl::StrSplit(i->second, absl::ByAnyChar(" \t\n\r")); + } +} + +StringSet ParsedDerivation::getRequiredSystemFeatures() const { + StringSet res; + for (auto& i : getStringsAttr("requiredSystemFeatures").value_or(Strings())) { + res.insert(i); + } + return res; +} + +bool ParsedDerivation::canBuildLocally() const { + if (drv.platform != settings.thisSystem.get() && + (settings.extraPlatforms.get().count(drv.platform) == 0u) && + !drv.isBuiltin()) { + return false; + } + + for (auto& feature : getRequiredSystemFeatures()) { + if (settings.systemFeatures.get().count(feature) == 0u) { + return false; + } + } + + return true; +} + +bool ParsedDerivation::willBuildLocally() const { + return getBoolAttr("preferLocalBuild") && canBuildLocally(); +} + +bool ParsedDerivation::substitutesAllowed() const { + return getBoolAttr("allowSubstitutes", true); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/parsed-derivations.hh b/third_party/nix/src/libstore/parsed-derivations.hh new file mode 100644 index 000000000000..7b2da3b566ac --- /dev/null +++ b/third_party/nix/src/libstore/parsed-derivations.hh @@ -0,0 +1,34 @@ +#include <nlohmann/json.hpp> + +#include "derivations.hh" + +namespace nix { + +class ParsedDerivation { + Path drvPath; + BasicDerivation& drv; + std::optional<nlohmann::json> structuredAttrs; + + public: + ParsedDerivation(const Path& drvPath, BasicDerivation& drv); + + const std::optional<nlohmann::json>& getStructuredAttrs() const { + return structuredAttrs; + } + + std::optional<std::string> getStringAttr(const std::string& name) const; + + bool getBoolAttr(const std::string& name, bool def = false) const; + + std::optional<Strings> getStringsAttr(const std::string& name) const; + + StringSet getRequiredSystemFeatures() const; + + bool canBuildLocally() const; + + bool willBuildLocally() const; + + bool substitutesAllowed() const; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/pathlocks.cc b/third_party/nix/src/libstore/pathlocks.cc new file mode 100644 index 000000000000..642e64a62bac --- /dev/null +++ b/third_party/nix/src/libstore/pathlocks.cc @@ -0,0 +1,172 @@ +#include "pathlocks.hh" + +#include <cerrno> +#include <cstdlib> + +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "sync.hh" +#include "util.hh" + +namespace nix { + +AutoCloseFD openLockFile(const Path& path, bool create) { + AutoCloseFD fd; + + fd = open(path.c_str(), O_CLOEXEC | O_RDWR | (create ? O_CREAT : 0), 0600); + if (!fd && (create || errno != ENOENT)) { + throw SysError(format("opening lock file '%1%'") % path); + } + + return fd; +} + +void deleteLockFile(const Path& path, int fd) { + /* Get rid of the lock file. Have to be careful not to introduce + races. Write a (meaningless) token to the file to indicate to + other processes waiting on this lock that the lock is stale + (deleted). */ + unlink(path.c_str()); + writeFull(fd, "d"); + /* Note that the result of unlink() is ignored; removing the lock + file is an optimisation, not a necessity. */ +} + +bool lockFile(int fd, LockType lockType, bool wait) { + int type; + if (lockType == ltRead) { + type = LOCK_SH; + } else if (lockType == ltWrite) { + type = LOCK_EX; + } else if (lockType == ltNone) { + type = LOCK_UN; + } else { + abort(); + } + + if (wait) { + while (flock(fd, type) != 0) { + checkInterrupt(); + if (errno != EINTR) { + throw SysError(format("acquiring/releasing lock")); + } + return false; + } + } else { + while (flock(fd, type | LOCK_NB) != 0) { + checkInterrupt(); + if (errno == EWOULDBLOCK) { + return false; + } + if (errno != EINTR) { + throw SysError(format("acquiring/releasing lock")); + } + } + } + + return true; +} + +PathLocks::PathLocks() : deletePaths(false) {} + +PathLocks::PathLocks(const PathSet& paths, const std::string& waitMsg) + : deletePaths(false) { + lockPaths(paths, waitMsg); +} + +bool PathLocks::lockPaths(const PathSet& paths, const std::string& waitMsg, + bool wait) { + assert(fds.empty()); + + /* Note that `fds' is built incrementally so that the destructor + will only release those locks that we have already acquired. */ + + /* Acquire the lock for each path in sorted order. This ensures + that locks are always acquired in the same order, thus + preventing deadlocks. */ + for (auto& path : paths) { + checkInterrupt(); + Path lockPath = path + ".lock"; + + DLOG(INFO) << "locking path '" << path << "'"; + + AutoCloseFD fd; + + while (true) { + /* Open/create the lock file. */ + fd = openLockFile(lockPath, true); + + /* Acquire an exclusive lock. */ + if (!lockFile(fd.get(), ltWrite, false)) { + if (wait) { + if (!waitMsg.empty()) { + LOG(WARNING) << waitMsg; + } + lockFile(fd.get(), ltWrite, true); + } else { + /* Failed to lock this path; release all other + locks. */ + unlock(); + return false; + } + } + + DLOG(INFO) << "lock acquired on '" << lockPath << "'"; + + /* Check that the lock file hasn't become stale (i.e., + hasn't been unlinked). */ + struct stat st; + if (fstat(fd.get(), &st) == -1) { + throw SysError(format("statting lock file '%1%'") % lockPath); + } + if (st.st_size != 0) { + /* This lock file has been unlinked, so we're holding + a lock on a deleted file. This means that other + processes may create and acquire a lock on + `lockPath', and proceed. So we must retry. */ + DLOG(INFO) << "open lock file '" << lockPath << "' has become stale"; + } else { + break; + } + } + + /* Use borrow so that the descriptor isn't closed. */ + fds.emplace_back(fd.release(), lockPath); + } + + return true; +} + +PathLocks::~PathLocks() { + try { + unlock(); + } catch (...) { + ignoreException(); + } +} + +void PathLocks::unlock() { + for (auto& i : fds) { + if (deletePaths) { + deleteLockFile(i.second, i.first); + } + + if (close(i.first) == -1) { + LOG(WARNING) << "cannot close lock file on '" << i.second << "'"; + } + + DLOG(INFO) << "lock released on '" << i.second << "'"; + } + + fds.clear(); +} + +void PathLocks::setDeletion(bool deletePaths) { + this->deletePaths = deletePaths; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/pathlocks.hh b/third_party/nix/src/libstore/pathlocks.hh new file mode 100644 index 000000000000..201e3f01b4df --- /dev/null +++ b/third_party/nix/src/libstore/pathlocks.hh @@ -0,0 +1,35 @@ +#pragma once + +#include "util.hh" + +namespace nix { + +/* Open (possibly create) a lock file and return the file descriptor. + -1 is returned if create is false and the lock could not be opened + because it doesn't exist. Any other error throws an exception. */ +AutoCloseFD openLockFile(const Path& path, bool create); + +/* Delete an open lock file. */ +void deleteLockFile(const Path& path, int fd); + +enum LockType { ltRead, ltWrite, ltNone }; + +bool lockFile(int fd, LockType lockType, bool wait); + +class PathLocks { + private: + typedef std::pair<int, Path> FDPair; + std::list<FDPair> fds; + bool deletePaths; + + public: + PathLocks(); + PathLocks(const PathSet& paths, const std::string& waitMsg = ""); + bool lockPaths(const PathSet& _paths, const std::string& waitMsg = "", + bool wait = true); + ~PathLocks(); + void unlock(); + void setDeletion(bool deletePaths); +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/profiles.cc b/third_party/nix/src/libstore/profiles.cc new file mode 100644 index 000000000000..53e64879954b --- /dev/null +++ b/third_party/nix/src/libstore/profiles.cc @@ -0,0 +1,252 @@ +#include "profiles.hh" + +#include <cerrno> +#include <cstdio> + +#include <absl/strings/numbers.h> +#include <absl/strings/string_view.h> +#include <absl/strings/strip.h> +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "store-api.hh" +#include "util.hh" + +namespace nix { + +static bool cmpGensByNumber(const Generation& a, const Generation& b) { + return a.number < b.number; +} + +// Parse a generation out of the format +// `<profilename>-<generation>-link'. +static int parseName(absl::string_view profileName, absl::string_view name) { + // Consume the `<profilename>-' prefix and and `-link' suffix. + if (!(absl::ConsumePrefix(&name, profileName) && + absl::ConsumePrefix(&name, "-") && + absl::ConsumeSuffix(&name, "-link"))) { + return -1; + } + + int n; + if (!absl::SimpleAtoi(name, &n) || n < 0) { + return -1; + } + + return n; +} + +Generations findGenerations(const Path& profile, int& curGen) { + Generations gens; + + Path profileDir = dirOf(profile); + std::string profileName = baseNameOf(profile); + + for (auto& i : readDirectory(profileDir)) { + int n; + if ((n = parseName(profileName, i.name)) != -1) { + Generation gen; + gen.path = profileDir + "/" + i.name; + gen.number = n; + struct stat st; + if (lstat(gen.path.c_str(), &st) != 0) { + throw SysError(format("statting '%1%'") % gen.path); + } + gen.creationTime = st.st_mtime; + gens.push_back(gen); + } + } + + gens.sort(cmpGensByNumber); + + curGen = pathExists(profile) ? parseName(profileName, readLink(profile)) : -1; + + return gens; +} + +static void makeName(const Path& profile, unsigned int num, Path& outLink) { + Path prefix = (format("%1%-%2%") % profile % num).str(); + outLink = prefix + "-link"; +} + +Path createGeneration(const ref<LocalFSStore>& store, const Path& profile, + const Path& outPath) { + /* The new generation number should be higher than old the + previous ones. */ + int dummy; + Generations gens = findGenerations(profile, dummy); + + unsigned int num; + if (!gens.empty()) { + Generation last = gens.back(); + + if (readLink(last.path) == outPath) { + /* We only create a new generation symlink if it differs + from the last one. + + This helps keeping gratuitous installs/rebuilds from piling + up uncontrolled numbers of generations, cluttering up the + UI like grub. */ + return last.path; + } + + num = gens.back().number; + } else { + num = 0; + } + + /* Create the new generation. Note that addPermRoot() blocks if + the garbage collector is running to prevent the stuff we've + built from moving from the temporary roots (which the GC knows) + to the permanent roots (of which the GC would have a stale + view). If we didn't do it this way, the GC might remove the + user environment etc. we've just built. */ + Path generation; + makeName(profile, num + 1, generation); + store->addPermRoot(outPath, generation, false, true); + + return generation; +} + +static void removeFile(const Path& path) { + if (remove(path.c_str()) == -1) { + throw SysError(format("cannot unlink '%1%'") % path); + } +} + +void deleteGeneration(const Path& profile, unsigned int gen) { + Path generation; + makeName(profile, gen, generation); + removeFile(generation); +} + +static void deleteGeneration2(const Path& profile, unsigned int gen, + bool dryRun) { + if (dryRun) { + LOG(INFO) << "would remove generation " << gen; + } else { + LOG(INFO) << "removing generation " << gen; + deleteGeneration(profile, gen); + } +} + +void deleteGenerations(const Path& profile, + const std::set<unsigned int>& gensToDelete, + bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + Generations gens = findGenerations(profile, curGen); + + if (gensToDelete.find(curGen) != gensToDelete.end()) { + throw Error(format("cannot delete current generation of profile %1%'") % + profile); + } + + for (auto& i : gens) { + if (gensToDelete.find(i.number) == gensToDelete.end()) { + continue; + } + deleteGeneration2(profile, i.number, dryRun); + } +} + +void deleteGenerationsGreaterThan(const Path& profile, int max, bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + bool fromCurGen = false; + Generations gens = findGenerations(profile, curGen); + for (auto i = gens.rbegin(); i != gens.rend(); ++i) { + if (i->number == curGen) { + fromCurGen = true; + max--; + continue; + } + if (fromCurGen) { + if (max != 0) { + max--; + continue; + } + deleteGeneration2(profile, i->number, dryRun); + } + } +} + +void deleteOldGenerations(const Path& profile, bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + Generations gens = findGenerations(profile, curGen); + + for (auto& i : gens) { + if (i.number != curGen) { + deleteGeneration2(profile, i.number, dryRun); + } + } +} + +void deleteGenerationsOlderThan(const Path& profile, time_t t, bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + Generations gens = findGenerations(profile, curGen); + + bool canDelete = false; + for (auto i = gens.rbegin(); i != gens.rend(); ++i) { + if (canDelete) { + assert(i->creationTime < t); + if (i->number != curGen) { + deleteGeneration2(profile, i->number, dryRun); + } + } else if (i->creationTime < t) { + /* We may now start deleting generations, but we don't + delete this generation yet, because this generation was + still the one that was active at the requested point in + time. */ + canDelete = true; + } + } +} + +void deleteGenerationsOlderThan(const Path& profile, + const std::string& timeSpec, bool dryRun) { + time_t curTime = time(nullptr); + std::string strDays = std::string(timeSpec, 0, timeSpec.size() - 1); + int days; + + if (!absl::SimpleAtoi(strDays, &days) || days < 1) { + throw Error(format("invalid number of days specifier '%1%'") % timeSpec); + } + + time_t oldTime = curTime - days * 24 * 3600; + + deleteGenerationsOlderThan(profile, oldTime, dryRun); +} + +void switchLink(const Path& link, Path target) { + /* Hacky. */ + if (dirOf(target) == dirOf(link)) { + target = baseNameOf(target); + } + + replaceSymlink(target, link); +} + +void lockProfile(PathLocks& lock, const Path& profile) { + lock.lockPaths({profile}, + (format("waiting for lock on profile '%1%'") % profile).str()); + lock.setDeletion(true); +} + +std::string optimisticLockProfile(const Path& profile) { + return pathExists(profile) ? readLink(profile) : ""; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/profiles.hh b/third_party/nix/src/libstore/profiles.hh new file mode 100644 index 000000000000..9f31083b32ee --- /dev/null +++ b/third_party/nix/src/libstore/profiles.hh @@ -0,0 +1,61 @@ +#pragma once + +#include <time.h> + +#include "pathlocks.hh" +#include "types.hh" + +namespace nix { + +struct Generation { + int number; + Path path; + time_t creationTime; + Generation() { number = -1; } + operator bool() const { return number != -1; } +}; + +typedef std::list<Generation> Generations; + +/* Returns the list of currently present generations for the specified + profile, sorted by generation number. */ +Generations findGenerations(const Path& profile, int& curGen); + +class LocalFSStore; + +Path createGeneration(const ref<LocalFSStore>& store, const Path& profile, + const Path& outPath); + +void deleteGeneration(const Path& profile, unsigned int gen); + +void deleteGenerations(const Path& profile, + const std::set<unsigned int>& gensToDelete, bool dryRun); + +void deleteGenerationsGreaterThan(const Path& profile, const int max, + bool dryRun); + +void deleteOldGenerations(const Path& profile, bool dryRun); + +void deleteGenerationsOlderThan(const Path& profile, time_t t, bool dryRun); + +void deleteGenerationsOlderThan(const Path& profile, + const std::string& timeSpec, bool dryRun); + +void switchLink(const Path& link, Path target); + +/* Ensure exclusive access to a profile. Any command that modifies + the profile first acquires this lock. */ +void lockProfile(PathLocks& lock, const Path& profile); + +/* Optimistic locking is used by long-running operations like `nix-env + -i'. Instead of acquiring the exclusive lock for the entire + duration of the operation, we just perform the operation + optimistically (without an exclusive lock), and check at the end + whether the profile changed while we were busy (i.e., the symlink + target changed). If so, the operation is restarted. Restarting is + generally cheap, since the build results are still in the Nix + store. Most of the time, only the user environment has to be + rebuilt. */ +std::string optimisticLockProfile(const Path& profile); + +} // namespace nix diff --git a/third_party/nix/src/libstore/references.cc b/third_party/nix/src/libstore/references.cc new file mode 100644 index 000000000000..6b9b7137c4e5 --- /dev/null +++ b/third_party/nix/src/libstore/references.cc @@ -0,0 +1,124 @@ +#include "references.hh" + +#include <cstdlib> +#include <map> + +#include <glog/logging.h> + +#include "archive.hh" +#include "hash.hh" +#include "util.hh" + +namespace nix { + +static unsigned int refLength = 32; /* characters */ + +static void search(const unsigned char* s, size_t len, StringSet& hashes, + StringSet& seen) { + static bool initialised = false; + static bool isBase32[256]; + if (!initialised) { + for (bool& i : isBase32) { + i = false; + } + for (char base32Char : base32Chars) { + isBase32[(unsigned char)base32Char] = true; + } + initialised = true; + } + + for (size_t i = 0; i + refLength <= len;) { + int j; + bool match = true; + for (j = refLength - 1; j >= 0; --j) { + if (!isBase32[(unsigned char)s[i + j]]) { + i += j + 1; + match = false; + break; + } + } + if (!match) { + continue; + } + std::string ref((const char*)s + i, refLength); + if (hashes.find(ref) != hashes.end()) { + DLOG(INFO) << "found reference to '" << ref << "' at offset " << i; + seen.insert(ref); + hashes.erase(ref); + } + ++i; + } +} + +struct RefScanSink : Sink { + HashSink hashSink; + StringSet hashes; + StringSet seen; + + std::string tail; + + RefScanSink() : hashSink(htSHA256) {} + + void operator()(const unsigned char* data, size_t len) override; +}; + +void RefScanSink::operator()(const unsigned char* data, size_t len) { + hashSink(data, len); + + /* It's possible that a reference spans the previous and current + fragment, so search in the concatenation of the tail of the + previous fragment and the start of the current fragment. */ + std::string s = + tail + std::string((const char*)data, len > refLength ? refLength : len); + search((const unsigned char*)s.data(), s.size(), hashes, seen); + + search(data, len, hashes, seen); + + size_t tailLen = len <= refLength ? len : refLength; + tail = std::string(tail, tail.size() < refLength - tailLen + ? 0 + : tail.size() - (refLength - tailLen)) + + std::string((const char*)data + len - tailLen, tailLen); +} + +PathSet scanForReferences(const std::string& path, const PathSet& refs, + HashResult& hash) { + RefScanSink sink; + std::map<std::string, Path> backMap; + + /* For efficiency (and a higher hit rate), just search for the + hash part of the file name. (This assumes that all references + have the form `HASH-bla'). */ + for (auto& i : refs) { + std::string baseName = baseNameOf(i); + std::string::size_type pos = baseName.find('-'); + if (pos == std::string::npos) { + throw Error(format("bad reference '%1%'") % i); + } + std::string s = std::string(baseName, 0, pos); + assert(s.size() == refLength); + assert(backMap.find(s) == backMap.end()); + // parseHash(htSHA256, s); + sink.hashes.insert(s); + backMap[s] = i; + } + + /* Look for the hashes in the NAR dump of the path. */ + dumpPath(path, sink); + + /* Map the hashes found back to their store paths. */ + PathSet found; + for (auto& i : sink.seen) { + std::map<std::string, Path>::iterator j; + if ((j = backMap.find(i)) == backMap.end()) { + abort(); + } + found.insert(j->second); + } + + hash = sink.hashSink.finish(); + + return found; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/references.hh b/third_party/nix/src/libstore/references.hh new file mode 100644 index 000000000000..2229150e3359 --- /dev/null +++ b/third_party/nix/src/libstore/references.hh @@ -0,0 +1,11 @@ +#pragma once + +#include "hash.hh" +#include "types.hh" + +namespace nix { + +PathSet scanForReferences(const Path& path, const PathSet& refs, + HashResult& hash); + +} diff --git a/third_party/nix/src/libstore/remote-fs-accessor.cc b/third_party/nix/src/libstore/remote-fs-accessor.cc new file mode 100644 index 000000000000..ca478c213fde --- /dev/null +++ b/third_party/nix/src/libstore/remote-fs-accessor.cc @@ -0,0 +1,131 @@ +#include "remote-fs-accessor.hh" + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "json.hh" +#include "nar-accessor.hh" + +namespace nix { + +RemoteFSAccessor::RemoteFSAccessor(const ref<Store>& store, + const Path& cacheDir) + : store(store), cacheDir(cacheDir) { + if (!cacheDir.empty()) { + createDirs(cacheDir); + } +} + +Path RemoteFSAccessor::makeCacheFile(const Path& storePath, + const std::string& ext) { + assert(!cacheDir.empty()); + return fmt("%s/%s.%s", cacheDir, storePathToHash(storePath), ext); +} + +void RemoteFSAccessor::addToCache(const Path& storePath, const std::string& nar, + const ref<FSAccessor>& narAccessor) { + nars.emplace(storePath, narAccessor); + + if (!cacheDir.empty()) { + try { + std::ostringstream str; + JSONPlaceholder jsonRoot(str); + listNar(jsonRoot, narAccessor, "", true); + writeFile(makeCacheFile(storePath, "ls"), str.str()); + + /* FIXME: do this asynchronously. */ + writeFile(makeCacheFile(storePath, "nar"), nar); + + } catch (...) { + ignoreException(); + } + } +} + +std::pair<ref<FSAccessor>, Path> RemoteFSAccessor::fetch(const Path& path_) { + auto path = canonPath(path_); + + auto storePath = store->toStorePath(path); + std::string restPath = std::string(path, storePath.size()); + + if (!store->isValidPath(storePath)) { + throw InvalidPath(format("path '%1%' is not a valid store path") % + storePath); + } + + auto i = nars.find(storePath); + if (i != nars.end()) { + return {i->second, restPath}; + } + + StringSink sink; + std::string listing; + Path cacheFile; + + if (!cacheDir.empty() && + pathExists(cacheFile = makeCacheFile(storePath, "nar"))) { + try { + listing = nix::readFile(makeCacheFile(storePath, "ls")); + + auto narAccessor = makeLazyNarAccessor( + listing, [cacheFile](uint64_t offset, uint64_t length) { + AutoCloseFD fd = open(cacheFile.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) { + throw SysError("opening NAR cache file '%s'", cacheFile); + } + + if (lseek(fd.get(), offset, SEEK_SET) != (off_t)offset) { + throw SysError("seeking in '%s'", cacheFile); + } + + std::string buf(length, 0); + readFull(fd.get(), (unsigned char*)buf.data(), length); + + return buf; + }); + + nars.emplace(storePath, narAccessor); + return {narAccessor, restPath}; + + } catch (SysError&) { + } + + try { + *sink.s = nix::readFile(cacheFile); + + auto narAccessor = makeNarAccessor(sink.s); + nars.emplace(storePath, narAccessor); + return {narAccessor, restPath}; + + } catch (SysError&) { + } + } + + store->narFromPath(storePath, sink); + auto narAccessor = makeNarAccessor(sink.s); + addToCache(storePath, *sink.s, narAccessor); + return {narAccessor, restPath}; +} + +FSAccessor::Stat RemoteFSAccessor::stat(const Path& path) { + auto res = fetch(path); + return res.first->stat(res.second); +} + +StringSet RemoteFSAccessor::readDirectory(const Path& path) { + auto res = fetch(path); + return res.first->readDirectory(res.second); +} + +std::string RemoteFSAccessor::readFile(const Path& path) { + auto res = fetch(path); + return res.first->readFile(res.second); +} + +std::string RemoteFSAccessor::readLink(const Path& path) { + auto res = fetch(path); + return res.first->readLink(res.second); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/remote-fs-accessor.hh b/third_party/nix/src/libstore/remote-fs-accessor.hh new file mode 100644 index 000000000000..32c729f50dcf --- /dev/null +++ b/third_party/nix/src/libstore/remote-fs-accessor.hh @@ -0,0 +1,38 @@ +#pragma once + +#include "fs-accessor.hh" +#include "ref.hh" +#include "store-api.hh" + +namespace nix { + +class RemoteFSAccessor : public FSAccessor { + ref<Store> store; + + std::map<Path, ref<FSAccessor>> nars; + + Path cacheDir; + + std::pair<ref<FSAccessor>, Path> fetch(const Path& path_); + + friend class BinaryCacheStore; + + Path makeCacheFile(const Path& storePath, const std::string& ext); + + void addToCache(const Path& storePath, const std::string& nar, + const ref<FSAccessor>& narAccessor); + + public: + RemoteFSAccessor(const ref<Store>& store, + const /* FIXME: use std::optional */ Path& cacheDir = ""); + + Stat stat(const Path& path) override; + + StringSet readDirectory(const Path& path) override; + + std::string readFile(const Path& path) override; + + std::string readLink(const Path& path) override; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/remote-store.cc b/third_party/nix/src/libstore/remote-store.cc new file mode 100644 index 000000000000..c5d1dac6cb6c --- /dev/null +++ b/third_party/nix/src/libstore/remote-store.cc @@ -0,0 +1,750 @@ +#include "remote-store.hh" + +#include <cerrno> +#include <cstring> + +#include <absl/strings/ascii.h> +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <unistd.h> + +#include "affinity.hh" +#include "archive.hh" +#include "derivations.hh" +#include "finally.hh" +#include "globals.hh" +#include "pool.hh" +#include "prefork-compat.hh" +#include "serialise.hh" +#include "util.hh" +#include "worker-protocol.hh" + +namespace nix { + +Path readStorePath(Store& store, Source& from) { + Path path = readString(from); + store.assertStorePath(path); + return path; +} + +template <class T> +T readStorePaths(Store& store, Source& from) { + T paths = readStrings<T>(from); + for (auto& i : paths) { + store.assertStorePath(i); + } + return paths; +} + +template PathSet readStorePaths(Store& store, Source& from); +template Paths readStorePaths(Store& store, Source& from); + +/* TODO: Separate these store impls into different files, give them better names + */ +RemoteStore::RemoteStore(const Params& params) + : Store(params), + connections(make_ref<Pool<Connection>>( + std::max(1, (int)maxConnections), + [this]() { return openConnectionWrapper(); }, + [this](const ref<Connection>& r) { + return r->to.good() && r->from.good() && + std::chrono::duration_cast<std::chrono::seconds>( + std::chrono::steady_clock::now() - r->startTime) + .count() < maxConnectionAge; + })) {} + +ref<RemoteStore::Connection> RemoteStore::openConnectionWrapper() { + if (failed) { + throw Error("opening a connection to remote store '%s' previously failed", + getUri()); + } + try { + return openConnection(); + } catch (...) { + failed = true; + throw; + } +} + +UDSRemoteStore::UDSRemoteStore(const Params& params) + : Store(params), LocalFSStore(params), RemoteStore(params) {} + +UDSRemoteStore::UDSRemoteStore(std::string socket_path, const Params& params) + : Store(params), + LocalFSStore(params), + RemoteStore(params), + path(socket_path) {} + +std::string UDSRemoteStore::getUri() { + if (path) { + return std::string("unix://") + *path; + } + return "daemon"; +} + +ref<RemoteStore::Connection> UDSRemoteStore::openConnection() { + auto conn = make_ref<Connection>(); + + /* Connect to a daemon that does the privileged work for us. */ + conn->fd = socket(PF_UNIX, + SOCK_STREAM +#ifdef SOCK_CLOEXEC + | SOCK_CLOEXEC +#endif + , + 0); + if (!conn->fd) { + throw SysError("cannot create Unix domain socket"); + } + closeOnExec(conn->fd.get()); + + std::string socketPath = path ? *path : settings.nixDaemonSocketFile; + + struct sockaddr_un addr; + addr.sun_family = AF_UNIX; + if (socketPath.size() + 1 >= sizeof(addr.sun_path)) { + throw Error(format("socket path '%1%' is too long") % socketPath); + } + strcpy(addr.sun_path, socketPath.c_str()); + + if (::connect(conn->fd.get(), (struct sockaddr*)&addr, sizeof(addr)) == -1) { + throw SysError(format("cannot connect to daemon at '%1%'") % socketPath); + } + + conn->from.fd = conn->fd.get(); + conn->to.fd = conn->fd.get(); + + conn->startTime = std::chrono::steady_clock::now(); + + initConnection(*conn); + + return conn; +} + +void RemoteStore::initConnection(Connection& conn) { + /* Send the magic greeting, check for the reply. */ + try { + conn.to << WORKER_MAGIC_1; + conn.to.flush(); + unsigned int magic = readInt(conn.from); + if (magic != WORKER_MAGIC_2) { + throw Error("protocol mismatch"); + } + + conn.from >> conn.daemonVersion; + if (GET_PROTOCOL_MAJOR(conn.daemonVersion) != + GET_PROTOCOL_MAJOR(PROTOCOL_VERSION)) { + throw Error("Nix daemon protocol version not supported"); + } + if (GET_PROTOCOL_MINOR(conn.daemonVersion) < 10) { + throw Error("the Nix daemon version is too old"); + } + conn.to << PROTOCOL_VERSION; + + if (GET_PROTOCOL_MINOR(conn.daemonVersion) >= 14) { + int cpu = sameMachine() && settings.lockCPU ? lockToCurrentCPU() : -1; + if (cpu != -1) { + conn.to << 1 << cpu; + } else { + conn.to << 0; + } + } + + if (GET_PROTOCOL_MINOR(conn.daemonVersion) >= 11) { + conn.to << 0u; + } + + auto ex = conn.processStderr(); + if (ex) { + std::rethrow_exception(ex); + } + } catch (Error& e) { + throw Error("cannot open connection to remote store '%s': %s", getUri(), + e.what()); + } + + setOptions(conn); +} + +void RemoteStore::setOptions(Connection& conn) { + conn.to << wopSetOptions << static_cast<uint64_t>(settings.keepFailed) + << static_cast<uint64_t>(settings.keepGoing) + // TODO(tazjin): Remove the verbosity stuff here. + << static_cast<uint64_t>(settings.tryFallback) << compat::kInfo + << settings.maxBuildJobs << settings.maxSilentTime + << 1u + // TODO(tazjin): what behaviour does this toggle remotely? + << (settings.verboseBuild ? compat::kError : compat::kVomit) + << 0 // obsolete log type + << 0 /* obsolete print build trace */ + << settings.buildCores + << static_cast<uint64_t>(settings.useSubstitutes); + + if (GET_PROTOCOL_MINOR(conn.daemonVersion) >= 12) { + std::map<std::string, Config::SettingInfo> overrides; + globalConfig.getSettings(overrides, true); + overrides.erase(settings.keepFailed.name); + overrides.erase(settings.keepGoing.name); + overrides.erase(settings.tryFallback.name); + overrides.erase(settings.maxBuildJobs.name); + overrides.erase(settings.maxSilentTime.name); + overrides.erase(settings.buildCores.name); + overrides.erase(settings.useSubstitutes.name); + overrides.erase(settings.showTrace.name); + conn.to << overrides.size(); + for (auto& i : overrides) { + conn.to << i.first << i.second.value; + } + } + + auto ex = conn.processStderr(); + if (ex) { + std::rethrow_exception(ex); + } +} + +/* A wrapper around Pool<RemoteStore::Connection>::Handle that marks + the connection as bad (causing it to be closed) if a non-daemon + exception is thrown before the handle is closed. Such an exception + causes a deviation from the expected protocol and therefore a + desynchronization between the client and daemon. */ +struct ConnectionHandle { + Pool<RemoteStore::Connection>::Handle handle; + bool daemonException = false; + + explicit ConnectionHandle(Pool<RemoteStore::Connection>::Handle&& handle) + : handle(std::move(handle)) {} + + ConnectionHandle(ConnectionHandle&& h) : handle(std::move(h.handle)) {} + + ~ConnectionHandle() { + if (!daemonException && (std::uncaught_exceptions() != 0)) { + handle.markBad(); + // TODO(tazjin): are these types of things supposed to be DEBUG? + DLOG(INFO) << "closing daemon connection because of an exception"; + } + } + + RemoteStore::Connection* operator->() { return &*handle; } + + void processStderr(Sink* sink = nullptr, Source* source = nullptr) { + auto ex = handle->processStderr(sink, source); + if (ex) { + daemonException = true; + std::rethrow_exception(ex); + } + } +}; + +ConnectionHandle RemoteStore::getConnection() { + return ConnectionHandle(connections->get()); +} + +bool RemoteStore::isValidPathUncached(const Path& path) { + auto conn(getConnection()); + conn->to << wopIsValidPath << path; + conn.processStderr(); + return readInt(conn->from) != 0u; +} + +PathSet RemoteStore::queryValidPaths(const PathSet& paths, + SubstituteFlag maybeSubstitute) { + auto conn(getConnection()); + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 12) { + PathSet res; + for (auto& i : paths) { + if (isValidPath(i)) { + res.insert(i); + } + } + return res; + } + conn->to << wopQueryValidPaths << paths; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::queryAllValidPaths() { + auto conn(getConnection()); + conn->to << wopQueryAllValidPaths; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::querySubstitutablePaths(const PathSet& paths) { + auto conn(getConnection()); + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 12) { + PathSet res; + for (auto& i : paths) { + conn->to << wopHasSubstitutes << i; + conn.processStderr(); + if (readInt(conn->from) != 0u) { + res.insert(i); + } + } + return res; + } + conn->to << wopQuerySubstitutablePaths << paths; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +void RemoteStore::querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) { + if (paths.empty()) { + return; + } + + auto conn(getConnection()); + + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 12) { + for (auto& i : paths) { + SubstitutablePathInfo info; + conn->to << wopQuerySubstitutablePathInfo << i; + conn.processStderr(); + unsigned int reply = readInt(conn->from); + if (reply == 0) { + continue; + } + info.deriver = readString(conn->from); + if (!info.deriver.empty()) { + assertStorePath(info.deriver); + } + info.references = readStorePaths<PathSet>(*this, conn->from); + info.downloadSize = readLongLong(conn->from); + info.narSize = readLongLong(conn->from); + infos[i] = info; + } + + } else { + conn->to << wopQuerySubstitutablePathInfos << paths; + conn.processStderr(); + auto count = readNum<size_t>(conn->from); + for (size_t n = 0; n < count; n++) { + Path path = readStorePath(*this, conn->from); + SubstitutablePathInfo& info(infos[path]); + info.deriver = readString(conn->from); + if (!info.deriver.empty()) { + assertStorePath(info.deriver); + } + info.references = readStorePaths<PathSet>(*this, conn->from); + info.downloadSize = readLongLong(conn->from); + info.narSize = readLongLong(conn->from); + } + } +} + +void RemoteStore::queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept { + try { + std::shared_ptr<ValidPathInfo> info; + { + auto conn(getConnection()); + conn->to << wopQueryPathInfo << path; + try { + conn.processStderr(); + } catch (Error& e) { + // Ugly backwards compatibility hack. + if (e.msg().find("is not valid") != std::string::npos) { + throw InvalidPath(e.what()); + } + throw; + } + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 17) { + bool valid; + conn->from >> valid; + if (!valid) { + throw InvalidPath(format("path '%s' is not valid") % path); + } + } + info = std::make_shared<ValidPathInfo>(); + info->path = path; + info->deriver = readString(conn->from); + if (!info->deriver.empty()) { + assertStorePath(info->deriver); + } + info->narHash = Hash(readString(conn->from), htSHA256); + info->references = readStorePaths<PathSet>(*this, conn->from); + conn->from >> info->registrationTime >> info->narSize; + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 16) { + conn->from >> info->ultimate; + info->sigs = readStrings<StringSet>(conn->from); + conn->from >> info->ca; + } + } + callback(std::move(info)); + } catch (...) { + callback.rethrow(); + } +} + +void RemoteStore::queryReferrers(const Path& path, PathSet& referrers) { + auto conn(getConnection()); + conn->to << wopQueryReferrers << path; + conn.processStderr(); + auto referrers2 = readStorePaths<PathSet>(*this, conn->from); + referrers.insert(referrers2.begin(), referrers2.end()); +} + +PathSet RemoteStore::queryValidDerivers(const Path& path) { + auto conn(getConnection()); + conn->to << wopQueryValidDerivers << path; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::queryDerivationOutputs(const Path& path) { + auto conn(getConnection()); + conn->to << wopQueryDerivationOutputs << path; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::queryDerivationOutputNames(const Path& path) { + auto conn(getConnection()); + conn->to << wopQueryDerivationOutputNames << path; + conn.processStderr(); + return readStrings<PathSet>(conn->from); +} + +Path RemoteStore::queryPathFromHashPart(const std::string& hashPart) { + auto conn(getConnection()); + conn->to << wopQueryPathFromHashPart << hashPart; + conn.processStderr(); + Path path = readString(conn->from); + if (!path.empty()) { + assertStorePath(path); + } + return path; +} + +void RemoteStore::addToStore(const ValidPathInfo& info, Source& source, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + auto conn(getConnection()); + + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 18) { + conn->to << wopImportPaths; + + auto source2 = sinkToSource([&](Sink& sink) { + sink << 1 // == path follows + ; + copyNAR(source, sink); + sink << exportMagic << info.path << info.references << info.deriver + << 0 // == no legacy signature + << 0 // == no path follows + ; + }); + + conn.processStderr(nullptr, source2.get()); + + auto importedPaths = readStorePaths<PathSet>(*this, conn->from); + assert(importedPaths.size() <= 1); + } + + else { + conn->to << wopAddToStoreNar << info.path << info.deriver + << info.narHash.to_string(Base16, false) << info.references + << info.registrationTime << info.narSize << info.ultimate + << info.sigs << info.ca << repair << !checkSigs; + bool tunnel = GET_PROTOCOL_MINOR(conn->daemonVersion) >= 21; + if (!tunnel) { + copyNAR(source, conn->to); + } + conn.processStderr(nullptr, tunnel ? &source : nullptr); + } +} + +Path RemoteStore::addToStore(const std::string& name, const Path& _srcPath, + bool recursive, HashType hashAlgo, + PathFilter& filter, RepairFlag repair) { + if (repair != 0u) { + throw Error( + "repairing is not supported when building through the Nix daemon"); + } + + auto conn(getConnection()); + + Path srcPath(absPath(_srcPath)); + + conn->to << wopAddToStore << name + << ((hashAlgo == htSHA256 && recursive) + ? 0 + : 1) /* backwards compatibility hack */ + << (recursive ? 1 : 0) << printHashType(hashAlgo); + + try { + conn->to.written = 0; + conn->to.warn = true; + connections->incCapacity(); + { + Finally cleanup([&]() { connections->decCapacity(); }); + dumpPath(srcPath, conn->to, filter); + } + conn->to.warn = false; + conn.processStderr(); + } catch (SysError& e) { + /* Daemon closed while we were sending the path. Probably OOM + or I/O error. */ + if (e.errNo == EPIPE) { + try { + conn.processStderr(); + } catch (EndOfFile& e) { + } + } + throw; + } + + return readStorePath(*this, conn->from); +} + +Path RemoteStore::addTextToStore(const std::string& name, const std::string& s, + const PathSet& references, RepairFlag repair) { + if (repair != 0u) { + throw Error( + "repairing is not supported when building through the Nix daemon"); + } + + auto conn(getConnection()); + conn->to << wopAddTextToStore << name << s << references; + + conn.processStderr(); + return readStorePath(*this, conn->from); +} + +void RemoteStore::buildPaths(const PathSet& drvPaths, BuildMode buildMode) { + auto conn(getConnection()); + conn->to << wopBuildPaths; + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 13) { + conn->to << drvPaths; + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 15) { + conn->to << buildMode; + } else + /* Old daemons did not take a 'buildMode' parameter, so we + need to validate it here on the client side. */ + if (buildMode != bmNormal) { + throw Error( + "repairing or checking is not supported when building through the " + "Nix daemon"); + } + } else { + /* For backwards compatibility with old daemons, strip output + identifiers. */ + PathSet drvPaths2; + for (auto& i : drvPaths) { + drvPaths2.insert(std::string(i, 0, i.find('!'))); + } + conn->to << drvPaths2; + } + conn.processStderr(); + readInt(conn->from); +} + +BuildResult RemoteStore::buildDerivation(const Path& drvPath, + const BasicDerivation& drv, + BuildMode buildMode) { + auto conn(getConnection()); + conn->to << wopBuildDerivation << drvPath << drv << buildMode; + conn.processStderr(); + BuildResult res; + unsigned int status; + conn->from >> status >> res.errorMsg; + res.status = (BuildResult::Status)status; + return res; +} + +void RemoteStore::ensurePath(const Path& path) { + auto conn(getConnection()); + conn->to << wopEnsurePath << path; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::addTempRoot(const Path& path) { + auto conn(getConnection()); + conn->to << wopAddTempRoot << path; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::addIndirectRoot(const Path& path) { + auto conn(getConnection()); + conn->to << wopAddIndirectRoot << path; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::syncWithGC() { + auto conn(getConnection()); + conn->to << wopSyncWithGC; + conn.processStderr(); + readInt(conn->from); +} + +Roots RemoteStore::findRoots(bool censor) { + auto conn(getConnection()); + conn->to << wopFindRoots; + conn.processStderr(); + auto count = readNum<size_t>(conn->from); + Roots result; + while ((count--) != 0u) { + Path link = readString(conn->from); + Path target = readStorePath(*this, conn->from); + result[target].emplace(link); + } + return result; +} + +void RemoteStore::collectGarbage(const GCOptions& options, GCResults& results) { + auto conn(getConnection()); + + conn->to << wopCollectGarbage << options.action << options.pathsToDelete + << static_cast<uint64_t>(options.ignoreLiveness) + << options.maxFreed + /* removed options */ + << 0 << 0 << 0; + + conn.processStderr(); + + results.paths = readStrings<PathSet>(conn->from); + results.bytesFreed = readLongLong(conn->from); + readLongLong(conn->from); // obsolete + + { + auto state_(Store::state.lock()); + state_->pathInfoCache.clear(); + } +} + +void RemoteStore::optimiseStore() { + auto conn(getConnection()); + conn->to << wopOptimiseStore; + conn.processStderr(); + readInt(conn->from); +} + +bool RemoteStore::verifyStore(bool checkContents, RepairFlag repair) { + auto conn(getConnection()); + conn->to << wopVerifyStore << static_cast<uint64_t>(checkContents) << repair; + conn.processStderr(); + return readInt(conn->from) != 0u; +} + +void RemoteStore::addSignatures(const Path& storePath, const StringSet& sigs) { + auto conn(getConnection()); + conn->to << wopAddSignatures << storePath << sigs; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::queryMissing(const PathSet& targets, PathSet& willBuild, + PathSet& willSubstitute, PathSet& unknown, + unsigned long long& downloadSize, + unsigned long long& narSize) { + { + auto conn(getConnection()); + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 19) { + // Don't hold the connection handle in the fallback case + // to prevent a deadlock. + goto fallback; + } + conn->to << wopQueryMissing << targets; + conn.processStderr(); + willBuild = readStorePaths<PathSet>(*this, conn->from); + willSubstitute = readStorePaths<PathSet>(*this, conn->from); + unknown = readStorePaths<PathSet>(*this, conn->from); + conn->from >> downloadSize >> narSize; + return; + } + +fallback: + return Store::queryMissing(targets, willBuild, willSubstitute, unknown, + downloadSize, narSize); +} + +void RemoteStore::connect() { auto conn(getConnection()); } + +unsigned int RemoteStore::getProtocol() { + auto conn(connections->get()); + return conn->daemonVersion; +} + +void RemoteStore::flushBadConnections() { connections->flushBad(); } + +RemoteStore::Connection::~Connection() { + try { + to.flush(); + } catch (...) { + ignoreException(); + } +} + +std::exception_ptr RemoteStore::Connection::processStderr(Sink* sink, + Source* source) { + to.flush(); + + while (true) { + auto msg = readNum<uint64_t>(from); + + if (msg == STDERR_WRITE) { + std::string s = readString(from); + if (sink == nullptr) { + throw Error("no sink"); + } + (*sink)(s); + } + + else if (msg == STDERR_READ) { + if (source == nullptr) { + throw Error("no source"); + } + auto len = readNum<size_t>(from); + auto buf = std::make_unique<unsigned char[]>(len); + writeString(buf.get(), source->read(buf.get(), len), to); + to.flush(); + } + + else if (msg == STDERR_ERROR) { + std::string error = readString(from); + unsigned int status = readInt(from); + return std::make_exception_ptr(Error(status, error)); + } + + else if (msg == STDERR_NEXT) { + LOG(ERROR) << absl::StripTrailingAsciiWhitespace(readString(from)); + } + + else if (msg == STDERR_START_ACTIVITY) { + LOG(INFO) << readString(from); + } + + else if (msg == STDERR_LAST) { + break; + } + + else { + throw Error("got unknown message type %x from Nix daemon", msg); + } + } + + return nullptr; +} + +static std::string uriScheme = "unix://"; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, uriScheme.size()) != uriScheme) { + return nullptr; + } + return std::make_shared<UDSRemoteStore>( + std::string(uri, uriScheme.size()), params); + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/remote-store.hh b/third_party/nix/src/libstore/remote-store.hh new file mode 100644 index 000000000000..e61e72892c34 --- /dev/null +++ b/third_party/nix/src/libstore/remote-store.hh @@ -0,0 +1,153 @@ +#pragma once + +#include <limits> +#include <string> + +#include "store-api.hh" + +namespace nix { + +class Pipe; +class Pid; +struct FdSink; +struct FdSource; +template <typename T> +class Pool; +struct ConnectionHandle; + +/* FIXME: RemoteStore is a misnomer - should be something like + DaemonStore. */ +class RemoteStore : public virtual Store { + public: + const Setting<int> maxConnections{ + (Store*)this, 1, "max-connections", + "maximum number of concurrent connections to the Nix daemon"}; + + const Setting<unsigned int> maxConnectionAge{ + (Store*)this, std::numeric_limits<unsigned int>::max(), + "max-connection-age", "number of seconds to reuse a connection"}; + + virtual bool sameMachine() = 0; + + RemoteStore(const Params& params); + + /* Implementations of abstract store API methods. */ + + bool isValidPathUncached(const Path& path) override; + + PathSet queryValidPaths(const PathSet& paths, SubstituteFlag maybeSubstitute = + NoSubstitute) override; + + PathSet queryAllValidPaths() override; + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override; + + void queryReferrers(const Path& path, PathSet& referrers) override; + + PathSet queryValidDerivers(const Path& path) override; + + PathSet queryDerivationOutputs(const Path& path) override; + + StringSet queryDerivationOutputNames(const Path& path) override; + + Path queryPathFromHashPart(const std::string& hashPart) override; + + PathSet querySubstitutablePaths(const PathSet& paths) override; + + void querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) override; + + void addToStore(const ValidPathInfo& info, Source& source, RepairFlag repair, + CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override; + + Path addToStore(const std::string& name, const Path& srcPath, + bool recursive = true, HashType hashAlgo = htSHA256, + PathFilter& filter = defaultPathFilter, + RepairFlag repair = NoRepair) override; + + Path addTextToStore(const std::string& name, const std::string& s, + const PathSet& references, RepairFlag repair) override; + + void buildPaths(const PathSet& paths, BuildMode buildMode) override; + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override; + + void ensurePath(const Path& path) override; + + void addTempRoot(const Path& path) override; + + void addIndirectRoot(const Path& path) override; + + void syncWithGC() override; + + Roots findRoots(bool censor) override; + + void collectGarbage(const GCOptions& options, GCResults& results) override; + + void optimiseStore() override; + + bool verifyStore(bool checkContents, RepairFlag repair) override; + + void addSignatures(const Path& storePath, const StringSet& sigs) override; + + void queryMissing(const PathSet& targets, PathSet& willBuild, + PathSet& willSubstitute, PathSet& unknown, + unsigned long long& downloadSize, + unsigned long long& narSize) override; + + void connect() override; + + unsigned int getProtocol() override; + + void flushBadConnections(); + + protected: + struct Connection { + AutoCloseFD fd; + FdSink to; + FdSource from; + unsigned int daemonVersion; + std::chrono::time_point<std::chrono::steady_clock> startTime; + + virtual ~Connection(); + + std::exception_ptr processStderr(Sink* sink = 0, Source* source = 0); + }; + + ref<Connection> openConnectionWrapper(); + + virtual ref<Connection> openConnection() = 0; + + void initConnection(Connection& conn); + + ref<Pool<Connection>> connections; + + virtual void setOptions(Connection& conn); + + ConnectionHandle getConnection(); + + friend struct ConnectionHandle; + + private: + std::atomic_bool failed{false}; +}; + +class UDSRemoteStore : public LocalFSStore, public RemoteStore { + public: + UDSRemoteStore(const Params& params); + UDSRemoteStore(std::string path, const Params& params); + + std::string getUri() override; + + bool sameMachine() { return true; } + + private: + ref<RemoteStore::Connection> openConnection() override; + std::optional<std::string> path; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/s3-binary-cache-store.cc b/third_party/nix/src/libstore/s3-binary-cache-store.cc new file mode 100644 index 000000000000..d713c5ce0151 --- /dev/null +++ b/third_party/nix/src/libstore/s3-binary-cache-store.cc @@ -0,0 +1,431 @@ +#if ENABLE_S3 + +#include "s3-binary-cache-store.hh" + +#include <absl/strings/ascii.h> +#include <absl/strings/match.h> +#include <aws/core/Aws.h> +#include <aws/core/VersionConfig.h> +#include <aws/core/auth/AWSCredentialsProvider.h> +#include <aws/core/auth/AWSCredentialsProviderChain.h> +#include <aws/core/client/ClientConfiguration.h> +#include <aws/core/client/DefaultRetryStrategy.h> +#include <aws/core/utils/logging/FormattedLogSystem.h> +#include <aws/core/utils/logging/LogMacros.h> +#include <aws/core/utils/threading/Executor.h> +#include <aws/s3/S3Client.h> +#include <aws/s3/model/GetObjectRequest.h> +#include <aws/s3/model/HeadObjectRequest.h> +#include <aws/s3/model/ListObjectsRequest.h> +#include <aws/s3/model/PutObjectRequest.h> +#include <aws/transfer/TransferManager.h> + +#include "compression.hh" +#include "download.hh" +#include "globals.hh" +#include "istringstream_nocopy.hh" +#include "nar-info-disk-cache.hh" +#include "nar-info.hh" +#include "s3.hh" + +using namespace Aws::Transfer; + +namespace nix { + +struct S3Error : public Error { + Aws::S3::S3Errors err; + S3Error(Aws::S3::S3Errors err, const FormatOrString& fs) + : Error(fs), err(err){}; +}; + +/* Helper: given an Outcome<R, E>, return R in case of success, or + throw an exception in case of an error. */ +template <typename R, typename E> +R&& checkAws(const FormatOrString& fs, Aws::Utils::Outcome<R, E>&& outcome) { + if (!outcome.IsSuccess()) + throw S3Error(outcome.GetError().GetErrorType(), + fs.s + ": " + outcome.GetError().GetMessage()); + return outcome.GetResultWithOwnership(); +} + +class AwsLogger : public Aws::Utils::Logging::FormattedLogSystem { + using Aws::Utils::Logging::FormattedLogSystem::FormattedLogSystem; + + void ProcessFormattedStatement(Aws::String&& statement) override { + debug("AWS: %s", absl::StripTrailingAsciiWhitespace(statement)); + } +}; + +static void initAWS() { + static std::once_flag flag; + std::call_once(flag, []() { + Aws::SDKOptions options; + + /* We install our own OpenSSL locking function (see + shared.cc), so don't let aws-sdk-cpp override it. */ + options.cryptoOptions.initAndCleanupOpenSSL = false; + + if (verbosity >= lvlDebug) { + options.loggingOptions.logLevel = + verbosity == lvlDebug ? Aws::Utils::Logging::LogLevel::Debug + : Aws::Utils::Logging::LogLevel::Trace; + options.loggingOptions.logger_create_fn = [options]() { + return std::make_shared<AwsLogger>(options.loggingOptions.logLevel); + }; + } + + Aws::InitAPI(options); + }); +} + +S3Helper::S3Helper(const std::string& profile, const std::string& region, + const std::string& scheme, const std::string& endpoint) + : config(makeConfig(region, scheme, endpoint)), + client(make_ref<Aws::S3::S3Client>( + profile == "" + ? std::dynamic_pointer_cast<Aws::Auth::AWSCredentialsProvider>( + std::make_shared< + Aws::Auth::DefaultAWSCredentialsProviderChain>()) + : std::dynamic_pointer_cast<Aws::Auth::AWSCredentialsProvider>( + std::make_shared< + Aws::Auth::ProfileConfigFileAWSCredentialsProvider>( + profile.c_str())), + *config, +// FIXME: https://github.com/aws/aws-sdk-cpp/issues/759 +#if AWS_VERSION_MAJOR == 1 && AWS_VERSION_MINOR < 3 + false, +#else + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, +#endif + endpoint.empty())) { +} + +/* Log AWS retries. */ +class RetryStrategy : public Aws::Client::DefaultRetryStrategy { + bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error, + long attemptedRetries) const override { + auto retry = + Aws::Client::DefaultRetryStrategy::ShouldRetry(error, attemptedRetries); + if (retry) + printError("AWS error '%s' (%s), will retry in %d ms", + error.GetExceptionName(), error.GetMessage(), + CalculateDelayBeforeNextRetry(error, attemptedRetries)); + return retry; + } +}; + +ref<Aws::Client::ClientConfiguration> S3Helper::makeConfig( + const std::string& region, const std::string& scheme, + const std::string& endpoint) { + initAWS(); + auto res = make_ref<Aws::Client::ClientConfiguration>(); + res->region = region; + if (!scheme.empty()) { + res->scheme = Aws::Http::SchemeMapper::FromString(scheme.c_str()); + } + if (!endpoint.empty()) { + res->endpointOverride = endpoint; + } + res->requestTimeoutMs = 600 * 1000; + res->connectTimeoutMs = 5 * 1000; + res->retryStrategy = std::make_shared<RetryStrategy>(); + res->caFile = settings.caFile; + return res; +} + +S3Helper::DownloadResult S3Helper::getObject(const std::string& bucketName, + const std::string& key) { + debug("fetching 's3://%s/%s'...", bucketName, key); + + auto request = + Aws::S3::Model::GetObjectRequest().WithBucket(bucketName).WithKey(key); + + request.SetResponseStreamFactory( + [&]() { return Aws::New<std::stringstream>("STRINGSTREAM"); }); + + DownloadResult res; + + auto now1 = std::chrono::steady_clock::now(); + + try { + auto result = checkAws(fmt("AWS error fetching '%s'", key), + client->GetObject(request)); + + res.data = + decompress(result.GetContentEncoding(), + dynamic_cast<std::stringstream&>(result.GetBody()).str()); + + } catch (S3Error& e) { + if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) { + throw; + } + } + + auto now2 = std::chrono::steady_clock::now(); + + res.durationMs = + std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1) + .count(); + + return res; +} + +struct S3BinaryCacheStoreImpl : public S3BinaryCacheStore { + const Setting<std::string> profile{ + this, "", "profile", "The name of the AWS configuration profile to use."}; + const Setting<std::string> region{ + this, Aws::Region::US_EAST_1, "region", {"aws-region"}}; + const Setting<std::string> scheme{ + this, "", "scheme", + "The scheme to use for S3 requests, https by default."}; + const Setting<std::string> endpoint{ + this, "", "endpoint", + "An optional override of the endpoint to use when talking to S3."}; + const Setting<std::string> narinfoCompression{ + this, "", "narinfo-compression", "compression method for .narinfo files"}; + const Setting<std::string> lsCompression{this, "", "ls-compression", + "compression method for .ls files"}; + const Setting<std::string> logCompression{ + this, "", "log-compression", "compression method for log/* files"}; + const Setting<bool> multipartUpload{this, false, "multipart-upload", + "whether to use multi-part uploads"}; + const Setting<uint64_t> bufferSize{ + this, 5 * 1024 * 1024, "buffer-size", + "size (in bytes) of each part in multi-part uploads"}; + + std::string bucketName; + + Stats stats; + + S3Helper s3Helper; + + S3BinaryCacheStoreImpl(const Params& params, const std::string& bucketName) + : S3BinaryCacheStore(params), + bucketName(bucketName), + s3Helper(profile, region, scheme, endpoint) { + diskCache = getNarInfoDiskCache(); + } + + std::string getUri() override { return "s3://" + bucketName; } + + void init() override { + if (!diskCache->cacheExists(getUri(), wantMassQuery_, priority)) { + BinaryCacheStore::init(); + + diskCache->createCache(getUri(), storeDir, wantMassQuery_, priority); + } + } + + const Stats& getS3Stats() override { return stats; } + + /* This is a specialisation of isValidPath() that optimistically + fetches the .narinfo file, rather than first checking for its + existence via a HEAD request. Since .narinfos are small, doing + a GET is unlikely to be slower than HEAD. */ + bool isValidPathUncached(const Path& storePath) override { + try { + queryPathInfo(storePath); + return true; + } catch (InvalidPath& e) { + return false; + } + } + + bool fileExists(const std::string& path) override { + stats.head++; + + auto res = s3Helper.client->HeadObject(Aws::S3::Model::HeadObjectRequest() + .WithBucket(bucketName) + .WithKey(path)); + + if (!res.IsSuccess()) { + auto& error = res.GetError(); + if (error.GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || + error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY + // If bucket listing is disabled, 404s turn into 403s + || error.GetErrorType() == Aws::S3::S3Errors::ACCESS_DENIED) + return false; + throw Error(format("AWS error fetching '%s': %s") % path % + error.GetMessage()); + } + + return true; + } + + std::shared_ptr<TransferManager> transferManager; + std::once_flag transferManagerCreated; + + void uploadFile(const std::string& path, const std::string& data, + const std::string& mimeType, + const std::string& contentEncoding) { + auto stream = std::make_shared<istringstream_nocopy>(data); + + auto maxThreads = std::thread::hardware_concurrency(); + + static std::shared_ptr<Aws::Utils::Threading::PooledThreadExecutor> + executor = + std::make_shared<Aws::Utils::Threading::PooledThreadExecutor>( + maxThreads); + + std::call_once(transferManagerCreated, [&]() { + if (multipartUpload) { + TransferManagerConfiguration transferConfig(executor.get()); + + transferConfig.s3Client = s3Helper.client; + transferConfig.bufferSize = bufferSize; + + transferConfig.uploadProgressCallback = + [](const TransferManager* transferManager, + const std::shared_ptr<const TransferHandle>& transferHandle) { + // FIXME: find a way to properly abort the multipart upload. + // checkInterrupt(); + debug("upload progress ('%s'): '%d' of '%d' bytes", + transferHandle->GetKey(), + transferHandle->GetBytesTransferred(), + transferHandle->GetBytesTotalSize()); + }; + + transferManager = TransferManager::Create(transferConfig); + } + }); + + auto now1 = std::chrono::steady_clock::now(); + + if (transferManager) { + if (contentEncoding != "") + throw Error( + "setting a content encoding is not supported with S3 multi-part " + "uploads"); + + std::shared_ptr<TransferHandle> transferHandle = + transferManager->UploadFile(stream, bucketName, path, mimeType, + Aws::Map<Aws::String, Aws::String>(), + nullptr /*, contentEncoding */); + + transferHandle->WaitUntilFinished(); + + if (transferHandle->GetStatus() == TransferStatus::FAILED) + throw Error("AWS error: failed to upload 's3://%s/%s': %s", bucketName, + path, transferHandle->GetLastError().GetMessage()); + + if (transferHandle->GetStatus() != TransferStatus::COMPLETED) + throw Error( + "AWS error: transfer status of 's3://%s/%s' in unexpected state", + bucketName, path); + + } else { + auto request = Aws::S3::Model::PutObjectRequest() + .WithBucket(bucketName) + .WithKey(path); + + request.SetContentType(mimeType); + + if (contentEncoding != "") { + request.SetContentEncoding(contentEncoding); + } + + auto stream = std::make_shared<istringstream_nocopy>(data); + + request.SetBody(stream); + + auto result = checkAws(fmt("AWS error uploading '%s'", path), + s3Helper.client->PutObject(request)); + } + + auto now2 = std::chrono::steady_clock::now(); + + auto duration = + std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1) + .count(); + + printInfo(format("uploaded 's3://%1%/%2%' (%3% bytes) in %4% ms") % + bucketName % path % data.size() % duration); + + stats.putTimeMs += duration; + stats.putBytes += data.size(); + stats.put++; + } + + void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) override { + if (narinfoCompression != "" && absl::EndsWith(path, ".narinfo")) + uploadFile(path, *compress(narinfoCompression, data), mimeType, + narinfoCompression); + else if (lsCompression != "" && absl::EndsWith(path, ".ls")) + uploadFile(path, *compress(lsCompression, data), mimeType, lsCompression); + else if (logCompression != "" && absl::StartsWith(path, "log/")) + uploadFile(path, *compress(logCompression, data), mimeType, + logCompression); + else + uploadFile(path, data, mimeType, ""); + } + + void getFile(const std::string& path, Sink& sink) override { + stats.get++; + + // FIXME: stream output to sink. + auto res = s3Helper.getObject(bucketName, path); + + stats.getBytes += res.data ? res.data->size() : 0; + stats.getTimeMs += res.durationMs; + + if (res.data) { + printTalkative("downloaded 's3://%s/%s' (%d bytes) in %d ms", bucketName, + path, res.data->size(), res.durationMs); + + sink((unsigned char*)res.data->data(), res.data->size()); + } else + throw NoSuchBinaryCacheFile( + "file '%s' does not exist in binary cache '%s'", path, getUri()); + } + + PathSet queryAllValidPaths() override { + PathSet paths; + std::string marker; + + do { + debug(format("listing bucket 's3://%s' from key '%s'...") % bucketName % + marker); + + auto res = checkAws( + format("AWS error listing bucket '%s'") % bucketName, + s3Helper.client->ListObjects(Aws::S3::Model::ListObjectsRequest() + .WithBucket(bucketName) + .WithDelimiter("/") + .WithMarker(marker))); + + auto& contents = res.GetContents(); + + debug(format("got %d keys, next marker '%s'") % contents.size() % + res.GetNextMarker()); + + for (auto object : contents) { + auto& key = object.GetKey(); + if (key.size() != 40 || !absl::EndsWith(key, ".narinfo")) { + continue; + } + paths.insert(storeDir + "/" + key.substr(0, key.size() - 8)); + } + + marker = res.GetNextMarker(); + } while (!marker.empty()); + + return paths; + } +}; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, 5) != "s3://") { + return 0; + } + auto store = + std::make_shared<S3BinaryCacheStoreImpl>(params, std::string(uri, 5)); + store->init(); + return store; + }); + +} // namespace nix + +#endif diff --git a/third_party/nix/src/libstore/s3-binary-cache-store.hh b/third_party/nix/src/libstore/s3-binary-cache-store.hh new file mode 100644 index 000000000000..24cb67721a92 --- /dev/null +++ b/third_party/nix/src/libstore/s3-binary-cache-store.hh @@ -0,0 +1,27 @@ +#pragma once + +#include <atomic> + +#include "binary-cache-store.hh" + +namespace nix { + +class S3BinaryCacheStore : public BinaryCacheStore { + protected: + S3BinaryCacheStore(const Params& params) : BinaryCacheStore(params) {} + + public: + struct Stats { + std::atomic<uint64_t> put{0}; + std::atomic<uint64_t> putBytes{0}; + std::atomic<uint64_t> putTimeMs{0}; + std::atomic<uint64_t> get{0}; + std::atomic<uint64_t> getBytes{0}; + std::atomic<uint64_t> getTimeMs{0}; + std::atomic<uint64_t> head{0}; + }; + + virtual const Stats& getS3Stats() = 0; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/s3.hh b/third_party/nix/src/libstore/s3.hh new file mode 100644 index 000000000000..09935fabed47 --- /dev/null +++ b/third_party/nix/src/libstore/s3.hh @@ -0,0 +1,42 @@ +#pragma once + +#if ENABLE_S3 + +#include "ref.hh" + +namespace Aws { +namespace Client { +class ClientConfiguration; +} +} // namespace Aws +namespace Aws { +namespace S3 { +class S3Client; +} +} // namespace Aws + +namespace nix { + +struct S3Helper { + ref<Aws::Client::ClientConfiguration> config; + ref<Aws::S3::S3Client> client; + + S3Helper(const std::string& profile, const std::string& region, + const std::string& scheme, const std::string& endpoint); + + ref<Aws::Client::ClientConfiguration> makeConfig(const std::string& region, + const std::string& scheme, + const std::string& endpoint); + + struct DownloadResult { + std::shared_ptr<std::string> data; + unsigned int durationMs; + }; + + DownloadResult getObject(const std::string& bucketName, + const std::string& key); +}; + +} // namespace nix + +#endif diff --git a/third_party/nix/src/libstore/sandbox-defaults.sb b/third_party/nix/src/libstore/sandbox-defaults.sb new file mode 100644 index 000000000000..0299d1ee45d2 --- /dev/null +++ b/third_party/nix/src/libstore/sandbox-defaults.sb @@ -0,0 +1,87 @@ +(define TMPDIR (param "_GLOBAL_TMP_DIR")) + +(deny default) + +; Disallow creating setuid/setgid binaries, since that +; would allow breaking build user isolation. +(deny file-write-setugid) + +; Allow forking. +(allow process-fork) + +; Allow reading system information like #CPUs, etc. +(allow sysctl-read) + +; Allow POSIX semaphores and shared memory. +(allow ipc-posix*) + +; Allow socket creation. +(allow system-socket) + +; Allow sending signals within the sandbox. +(allow signal (target same-sandbox)) + +; Allow getpwuid. +(allow mach-lookup (global-name "com.apple.system.opendirectoryd.libinfo")) + +; Access to /tmp. +; The network-outbound/network-inbound ones are for unix domain sockets, which +; we allow access to in TMPDIR (but if we allow them more broadly, you could in +; theory escape the sandbox) +(allow file* process-exec network-outbound network-inbound + (literal "/tmp") (subpath TMPDIR)) + +; Some packages like to read the system version. +(allow file-read* (literal "/System/Library/CoreServices/SystemVersion.plist")) + +; Without this line clang cannot write to /dev/null, breaking some configure tests. +(allow file-read-metadata (literal "/dev")) + +; Many packages like to do local networking in their test suites, but let's only +; allow it if the package explicitly asks for it. +(if (param "_ALLOW_LOCAL_NETWORKING") + (begin + (allow network* (local ip) (local tcp) (local udp)) + + ; Allow access to /etc/resolv.conf (which is a symlink to + ; /private/var/run/resolv.conf). + ; TODO: deduplicate with sandbox-network.sb + (allow file-read-metadata + (literal "/var") + (literal "/etc") + (literal "/etc/resolv.conf") + (literal "/private/etc/resolv.conf")) + + (allow file-read* + (literal "/private/var/run/resolv.conf")) + + ; Allow DNS lookups. This is even needed for localhost, which lots of tests rely on + (allow file-read-metadata (literal "/etc/hosts")) + (allow file-read* (literal "/private/etc/hosts")) + (allow network-outbound (remote unix-socket (path-literal "/private/var/run/mDNSResponder"))))) + +; Standard devices. +(allow file* + (literal "/dev/null") + (literal "/dev/random") + (literal "/dev/stdin") + (literal "/dev/stdout") + (literal "/dev/tty") + (literal "/dev/urandom") + (literal "/dev/zero") + (subpath "/dev/fd")) + +; Does nothing, but reduces build noise. +(allow file* (literal "/dev/dtracehelper")) + +; Allow access to zoneinfo since libSystem needs it. +(allow file-read* (subpath "/usr/share/zoneinfo")) + +(allow file-read* (subpath "/usr/share/locale")) + +; This is mostly to get more specific log messages when builds try to +; access something in /etc or /var. +(allow file-read-metadata + (literal "/etc") + (literal "/var") + (literal "/private/var/tmp")) diff --git a/third_party/nix/src/libstore/sandbox-minimal.sb b/third_party/nix/src/libstore/sandbox-minimal.sb new file mode 100644 index 000000000000..65f5108b3990 --- /dev/null +++ b/third_party/nix/src/libstore/sandbox-minimal.sb @@ -0,0 +1,5 @@ +(allow default) + +; Disallow creating setuid/setgid binaries, since that +; would allow breaking build user isolation. +(deny file-write-setugid) diff --git a/third_party/nix/src/libstore/sandbox-network.sb b/third_party/nix/src/libstore/sandbox-network.sb new file mode 100644 index 000000000000..56beec761fa8 --- /dev/null +++ b/third_party/nix/src/libstore/sandbox-network.sb @@ -0,0 +1,16 @@ +; Allow local and remote network traffic. +(allow network* (local ip) (remote ip)) + +; Allow access to /etc/resolv.conf (which is a symlink to +; /private/var/run/resolv.conf). +(allow file-read-metadata + (literal "/var") + (literal "/etc") + (literal "/etc/resolv.conf") + (literal "/private/etc/resolv.conf")) + +(allow file-read* + (literal "/private/var/run/resolv.conf")) + +; Allow DNS lookups. +(allow network-outbound (remote unix-socket (path-literal "/private/var/run/mDNSResponder"))) diff --git a/third_party/nix/src/libstore/schema.sql b/third_party/nix/src/libstore/schema.sql new file mode 100644 index 000000000000..09c71a2b8dd7 --- /dev/null +++ b/third_party/nix/src/libstore/schema.sql @@ -0,0 +1,42 @@ +create table if not exists ValidPaths ( + id integer primary key autoincrement not null, + path text unique not null, + hash text not null, + registrationTime integer not null, + deriver text, + narSize integer, + ultimate integer, -- null implies "false" + sigs text, -- space-separated + ca text -- if not null, an assertion that the path is content-addressed; see ValidPathInfo +); + +create table if not exists Refs ( + referrer integer not null, + reference integer not null, + primary key (referrer, reference), + foreign key (referrer) references ValidPaths(id) on delete cascade, + foreign key (reference) references ValidPaths(id) on delete restrict +); + +create index if not exists IndexReferrer on Refs(referrer); +create index if not exists IndexReference on Refs(reference); + +-- Paths can refer to themselves, causing a tuple (N, N) in the Refs +-- table. This causes a deletion of the corresponding row in +-- ValidPaths to cause a foreign key constraint violation (due to `on +-- delete restrict' on the `reference' column). Therefore, explicitly +-- get rid of self-references. +create trigger if not exists DeleteSelfRefs before delete on ValidPaths + begin + delete from Refs where referrer = old.id and reference = old.id; + end; + +create table if not exists DerivationOutputs ( + drv integer not null, + id text not null, -- symbolic output id, usually "out" + path text not null, + primary key (drv, id), + foreign key (drv) references ValidPaths(id) on delete cascade +); + +create index if not exists IndexDerivationOutputs on DerivationOutputs(path); diff --git a/third_party/nix/src/libstore/serve-protocol.hh b/third_party/nix/src/libstore/serve-protocol.hh new file mode 100644 index 000000000000..a07a7ef97425 --- /dev/null +++ b/third_party/nix/src/libstore/serve-protocol.hh @@ -0,0 +1,24 @@ +#pragma once + +namespace nix { + +#define SERVE_MAGIC_1 0x390c9deb +#define SERVE_MAGIC_2 0x5452eecb + +#define SERVE_PROTOCOL_VERSION 0x205 +#define GET_PROTOCOL_MAJOR(x) ((x)&0xff00) +#define GET_PROTOCOL_MINOR(x) ((x)&0x00ff) + +typedef enum { + cmdQueryValidPaths = 1, + cmdQueryPathInfos = 2, + cmdDumpStorePath = 3, + cmdImportPaths = 4, + cmdExportPaths = 5, + cmdBuildPaths = 6, + cmdQueryClosure = 7, + cmdBuildDerivation = 8, + cmdAddToStoreNar = 9, +} ServeCommand; + +} // namespace nix diff --git a/third_party/nix/src/libstore/sqlite.cc b/third_party/nix/src/libstore/sqlite.cc new file mode 100644 index 000000000000..dbdaa29d4fd9 --- /dev/null +++ b/third_party/nix/src/libstore/sqlite.cc @@ -0,0 +1,195 @@ +#include "sqlite.hh" + +#include <atomic> + +#include <glog/logging.h> +#include <sqlite3.h> + +#include "util.hh" + +namespace nix { + +[[noreturn]] void throwSQLiteError(sqlite3* db, const FormatOrString& fs) { + int err = sqlite3_errcode(db); + int exterr = sqlite3_extended_errcode(db); + + auto path = sqlite3_db_filename(db, nullptr); + if (path == nullptr) { + path = "(in-memory)"; + } + + if (err == SQLITE_BUSY || err == SQLITE_PROTOCOL) { + throw SQLiteBusy( + err == SQLITE_PROTOCOL + ? fmt("SQLite database '%s' is busy (SQLITE_PROTOCOL)", path) + : fmt("SQLite database '%s' is busy", path)); + } + throw SQLiteError("%s: %s (in '%s')", fs.s, sqlite3_errstr(exterr), path); +} + +SQLite::SQLite(const Path& path) { + if (sqlite3_open_v2(path.c_str(), &db, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, + nullptr) != SQLITE_OK) { + throw Error(format("cannot open SQLite database '%s'") % path); + } +} + +SQLite::~SQLite() { + try { + if ((db != nullptr) && sqlite3_close(db) != SQLITE_OK) { + throwSQLiteError(db, "closing database"); + } + } catch (...) { + ignoreException(); + } +} + +void SQLite::exec(const std::string& stmt) { + retrySQLite<void>([&]() { + if (sqlite3_exec(db, stmt.c_str(), nullptr, nullptr, nullptr) != + SQLITE_OK) { + throwSQLiteError(db, format("executing SQLite statement '%s'") % stmt); + } + }); +} + +void SQLiteStmt::create(sqlite3* db, const std::string& sql) { + checkInterrupt(); + assert(!stmt); + if (sqlite3_prepare_v2(db, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { + throwSQLiteError(db, fmt("creating statement '%s'", sql)); + } + this->db = db; + this->sql = sql; +} + +SQLiteStmt::~SQLiteStmt() { + try { + if ((stmt != nullptr) && sqlite3_finalize(stmt) != SQLITE_OK) { + throwSQLiteError(db, fmt("finalizing statement '%s'", sql)); + } + } catch (...) { + ignoreException(); + } +} + +SQLiteStmt::Use::Use(SQLiteStmt& stmt) : stmt(stmt) { + assert(stmt.stmt); + /* Note: sqlite3_reset() returns the error code for the most + recent call to sqlite3_step(). So ignore it. */ + sqlite3_reset(stmt); +} + +SQLiteStmt::Use::~Use() { sqlite3_reset(stmt); } + +SQLiteStmt::Use& SQLiteStmt::Use::operator()(const std::string& value, + bool notNull) { + if (notNull) { + if (sqlite3_bind_text(stmt, curArg++, value.c_str(), -1, + SQLITE_TRANSIENT) != SQLITE_OK) { + throwSQLiteError(stmt.db, "binding argument"); + } + } else { + bind(); + } + return *this; +} + +SQLiteStmt::Use& SQLiteStmt::Use::operator()(int64_t value, bool notNull) { + if (notNull) { + if (sqlite3_bind_int64(stmt, curArg++, value) != SQLITE_OK) { + throwSQLiteError(stmt.db, "binding argument"); + } + } else { + bind(); + } + return *this; +} + +SQLiteStmt::Use& SQLiteStmt::Use::bind() { + if (sqlite3_bind_null(stmt, curArg++) != SQLITE_OK) { + throwSQLiteError(stmt.db, "binding argument"); + } + return *this; +} + +int SQLiteStmt::Use::step() { return sqlite3_step(stmt); } + +void SQLiteStmt::Use::exec() { + int r = step(); + assert(r != SQLITE_ROW); + if (r != SQLITE_DONE) { + throwSQLiteError(stmt.db, fmt("executing SQLite statement '%s'", stmt.sql)); + } +} + +bool SQLiteStmt::Use::next() { + int r = step(); + if (r != SQLITE_DONE && r != SQLITE_ROW) { + throwSQLiteError(stmt.db, fmt("executing SQLite query '%s'", stmt.sql)); + } + return r == SQLITE_ROW; +} + +std::string SQLiteStmt::Use::getStr(int col) { + auto s = (const char*)sqlite3_column_text(stmt, col); + assert(s); + return s; +} + +int64_t SQLiteStmt::Use::getInt(int col) { + // FIXME: detect nulls? + return sqlite3_column_int64(stmt, col); +} + +bool SQLiteStmt::Use::isNull(int col) { + return sqlite3_column_type(stmt, col) == SQLITE_NULL; +} + +SQLiteTxn::SQLiteTxn(sqlite3* db) { + this->db = db; + if (sqlite3_exec(db, "begin;", nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "starting transaction"); + } + active = true; +} + +void SQLiteTxn::commit() { + if (sqlite3_exec(db, "commit;", nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "committing transaction"); + } + active = false; +} + +SQLiteTxn::~SQLiteTxn() { + try { + if (active && + sqlite3_exec(db, "rollback;", nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "aborting transaction"); + } + } catch (...) { + ignoreException(); + } +} + +void handleSQLiteBusy(const SQLiteBusy& e) { + static std::atomic<time_t> lastWarned{0}; + + time_t now = time(nullptr); + + if (now > lastWarned + 10) { + lastWarned = now; + LOG(ERROR) << e.what(); + } + + /* Sleep for a while since retrying the transaction right away + is likely to fail again. */ + checkInterrupt(); + struct timespec t; + t.tv_sec = 0; + t.tv_nsec = (random() % 100) * 1000 * 1000; /* <= 0.1s */ + nanosleep(&t, nullptr); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/sqlite.hh b/third_party/nix/src/libstore/sqlite.hh new file mode 100644 index 000000000000..298bb574ea2d --- /dev/null +++ b/third_party/nix/src/libstore/sqlite.hh @@ -0,0 +1,109 @@ +#pragma once + +#include <functional> +#include <string> + +#include "types.hh" + +class sqlite3; +class sqlite3_stmt; + +namespace nix { + +/* RAII wrapper to close a SQLite database automatically. */ +struct SQLite { + sqlite3* db = 0; + SQLite() {} + SQLite(const Path& path); + SQLite(const SQLite& from) = delete; + SQLite& operator=(const SQLite& from) = delete; + SQLite& operator=(SQLite&& from) { + db = from.db; + from.db = 0; + return *this; + } + ~SQLite(); + operator sqlite3*() { return db; } + + void exec(const std::string& stmt); +}; + +/* RAII wrapper to create and destroy SQLite prepared statements. */ +struct SQLiteStmt { + sqlite3* db = 0; + sqlite3_stmt* stmt = 0; + std::string sql; + SQLiteStmt() {} + SQLiteStmt(sqlite3* db, const std::string& sql) { create(db, sql); } + void create(sqlite3* db, const std::string& s); + ~SQLiteStmt(); + operator sqlite3_stmt*() { return stmt; } + + /* Helper for binding / executing statements. */ + class Use { + friend struct SQLiteStmt; + + private: + SQLiteStmt& stmt; + unsigned int curArg = 1; + Use(SQLiteStmt& stmt); + + public: + ~Use(); + + /* Bind the next parameter. */ + Use& operator()(const std::string& value, bool notNull = true); + Use& operator()(int64_t value, bool notNull = true); + Use& bind(); // null + + int step(); + + /* Execute a statement that does not return rows. */ + void exec(); + + /* For statements that return 0 or more rows. Returns true iff + a row is available. */ + bool next(); + + std::string getStr(int col); + int64_t getInt(int col); + bool isNull(int col); + }; + + Use use() { return Use(*this); } +}; + +/* RAII helper that ensures transactions are aborted unless explicitly + committed. */ +struct SQLiteTxn { + bool active = false; + sqlite3* db; + + SQLiteTxn(sqlite3* db); + + void commit(); + + ~SQLiteTxn(); +}; + +MakeError(SQLiteError, Error); +MakeError(SQLiteBusy, SQLiteError); + +[[noreturn]] void throwSQLiteError(sqlite3* db, const FormatOrString& fs); + +void handleSQLiteBusy(const SQLiteBusy& e); + +/* Convenience function for retrying a SQLite transaction when the + database is busy. */ +template <typename T> +T retrySQLite(std::function<T()> fun) { + while (true) { + try { + return fun(); + } catch (SQLiteBusy& e) { + handleSQLiteBusy(e); + } + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/ssh-store.cc b/third_party/nix/src/libstore/ssh-store.cc new file mode 100644 index 000000000000..9c49babcf2b2 --- /dev/null +++ b/third_party/nix/src/libstore/ssh-store.cc @@ -0,0 +1,86 @@ +#include "archive.hh" +#include "pool.hh" +#include "remote-fs-accessor.hh" +#include "remote-store.hh" +#include "ssh.hh" +#include "store-api.hh" +#include "worker-protocol.hh" + +namespace nix { + +static std::string uriScheme = "ssh-ng://"; + +class SSHStore : public RemoteStore { + public: + const Setting<Path> sshKey{(Store*)this, "", "ssh-key", + "path to an SSH private key"}; + const Setting<bool> compress{(Store*)this, false, "compress", + "whether to compress the connection"}; + + SSHStore(const std::string& host, const Params& params) + : Store(params), + RemoteStore(params), + host(host), + master(host, sshKey, + // Use SSH master only if using more than 1 connection. + connections->capacity() > 1, compress) {} + + std::string getUri() override { return uriScheme + host; } + + bool sameMachine() override { return false; } + + void narFromPath(const Path& path, Sink& sink) override; + + ref<FSAccessor> getFSAccessor() override; + + private: + struct Connection : RemoteStore::Connection { + std::unique_ptr<SSHMaster::Connection> sshConn; + }; + + ref<RemoteStore::Connection> openConnection() override; + + std::string host; + + SSHMaster master; + + void setOptions(RemoteStore::Connection& conn) override{ + /* TODO Add a way to explicitly ask for some options to be + forwarded. One option: A way to query the daemon for its + settings, and then a series of params to SSHStore like + forward-cores or forward-overridden-cores that only + override the requested settings. + */ + }; +}; + +void SSHStore::narFromPath(const Path& path, Sink& sink) { + auto conn(connections->get()); + conn->to << wopNarFromPath << path; + conn->processStderr(); + copyNAR(conn->from, sink); +} + +ref<FSAccessor> SSHStore::getFSAccessor() { + return make_ref<RemoteFSAccessor>(ref<Store>(shared_from_this())); +} + +ref<RemoteStore::Connection> SSHStore::openConnection() { + auto conn = make_ref<Connection>(); + conn->sshConn = master.startCommand("nix-daemon --stdio"); + conn->to = FdSink(conn->sshConn->in.get()); + conn->from = FdSource(conn->sshConn->out.get()); + initConnection(*conn); + return conn; +} + +static RegisterStoreImplementation regStore([](const std::string& uri, + const Store::Params& params) + -> std::shared_ptr<Store> { + if (std::string(uri, 0, uriScheme.size()) != uriScheme) { + return nullptr; + } + return std::make_shared<SSHStore>(std::string(uri, uriScheme.size()), params); +}); + +} // namespace nix diff --git a/third_party/nix/src/libstore/ssh.cc b/third_party/nix/src/libstore/ssh.cc new file mode 100644 index 000000000000..1b09eb42c6b9 --- /dev/null +++ b/third_party/nix/src/libstore/ssh.cc @@ -0,0 +1,159 @@ +#include "ssh.hh" + +#include <utility> + +#include <absl/strings/match.h> +#include <absl/strings/str_split.h> + +namespace nix { + +SSHMaster::SSHMaster(const std::string& host, std::string keyFile, + bool useMaster, bool compress, int logFD) + : host(host), + fakeSSH(host == "localhost"), + keyFile(std::move(keyFile)), + useMaster(useMaster && !fakeSSH), + compress(compress), + logFD(logFD) { + if (host.empty() || absl::StartsWith(host, "-")) { + throw Error("invalid SSH host name '%s'", host); + } +} + +void SSHMaster::addCommonSSHOpts(Strings& args) { + for (auto& i : + absl::StrSplit(getEnv("NIX_SSHOPTS"), absl::ByAnyChar(" \t\n\r"))) { + args.push_back(std::string(i)); + } + if (!keyFile.empty()) { + args.insert(args.end(), {"-i", keyFile}); + } + if (compress) { + args.push_back("-C"); + } +} + +std::unique_ptr<SSHMaster::Connection> SSHMaster::startCommand( + const std::string& command) { + Path socketPath = startMaster(); + + Pipe in; + Pipe out; + in.create(); + out.create(); + + auto conn = std::make_unique<Connection>(); + ProcessOptions options; + options.dieWithParent = false; + + conn->sshPid = startProcess( + [&]() { + restoreSignals(); + + close(in.writeSide.get()); + close(out.readSide.get()); + + if (dup2(in.readSide.get(), STDIN_FILENO) == -1) { + throw SysError("duping over stdin"); + } + if (dup2(out.writeSide.get(), STDOUT_FILENO) == -1) { + throw SysError("duping over stdout"); + } + if (logFD != -1 && dup2(logFD, STDERR_FILENO) == -1) { + throw SysError("duping over stderr"); + } + + Strings args; + + if (fakeSSH) { + args = {"bash", "-c"}; + } else { + args = {"ssh", host, "-x", "-a"}; + addCommonSSHOpts(args); + if (!socketPath.empty()) { + args.insert(args.end(), {"-S", socketPath}); + } + // TODO(tazjin): Abseil verbosity flag + /*if (verbosity >= lvlChatty) { + args.push_back("-v"); + }*/ + } + + args.push_back(command); + execvp(args.begin()->c_str(), stringsToCharPtrs(args).data()); + + // could not exec ssh/bash + throw SysError("unable to execute '%s'", args.front()); + }, + options); + + in.readSide = -1; + out.writeSide = -1; + + conn->out = std::move(out.readSide); + conn->in = std::move(in.writeSide); + + return conn; +} + +Path SSHMaster::startMaster() { + if (!useMaster) { + return ""; + } + + auto state(state_.lock()); + + if (state->sshMaster != -1) { + return state->socketPath; + } + + state->tmpDir = + std::make_unique<AutoDelete>(createTempDir("", "nix", true, true, 0700)); + + state->socketPath = (Path)*state->tmpDir + "/ssh.sock"; + + Pipe out; + out.create(); + + ProcessOptions options; + options.dieWithParent = false; + + state->sshMaster = startProcess( + [&]() { + restoreSignals(); + + close(out.readSide.get()); + + if (dup2(out.writeSide.get(), STDOUT_FILENO) == -1) { + throw SysError("duping over stdout"); + } + + Strings args = {"ssh", host, + "-M", "-N", + "-S", state->socketPath, + "-o", "LocalCommand=echo started", + "-o", "PermitLocalCommand=yes"}; + // if (verbosity >= lvlChatty) { args.push_back("-v"); } + addCommonSSHOpts(args); + execvp(args.begin()->c_str(), stringsToCharPtrs(args).data()); + + throw SysError("unable to execute '%s'", args.front()); + }, + options); + + out.writeSide = -1; + + std::string reply; + try { + reply = readLine(out.readSide.get()); + } catch (EndOfFile& e) { + } + + if (reply != "started") { + throw Error("failed to start SSH master connection to '%s'", host); + } + + return state->socketPath; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/ssh.hh b/third_party/nix/src/libstore/ssh.hh new file mode 100644 index 000000000000..23952ccb1293 --- /dev/null +++ b/third_party/nix/src/libstore/ssh.hh @@ -0,0 +1,41 @@ +#pragma once + +#include "sync.hh" +#include "util.hh" + +namespace nix { + +class SSHMaster { + private: + const std::string host; + bool fakeSSH; + const std::string keyFile; + const bool useMaster; + const bool compress; + const int logFD; + + struct State { + Pid sshMaster; + std::unique_ptr<AutoDelete> tmpDir; + Path socketPath; + }; + + Sync<State> state_; + + void addCommonSSHOpts(Strings& args); + + public: + SSHMaster(const std::string& host, std::string keyFile, bool useMaster, + bool compress, int logFD = -1); + + struct Connection { + Pid sshPid; + AutoCloseFD out, in; + }; + + std::unique_ptr<Connection> startCommand(const std::string& command); + + Path startMaster(); +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/store-api.cc b/third_party/nix/src/libstore/store-api.cc new file mode 100644 index 000000000000..afdbc14e29a3 --- /dev/null +++ b/third_party/nix/src/libstore/store-api.cc @@ -0,0 +1,982 @@ +#include "store-api.hh" + +#include <future> +#include <utility> + +#include <absl/strings/match.h> +#include <absl/strings/numbers.h> +#include <absl/strings/str_split.h> +#include <glog/logging.h> + +#include "crypto.hh" +#include "derivations.hh" +#include "globals.hh" +#include "json.hh" +#include "nar-info-disk-cache.hh" +#include "thread-pool.hh" +#include "util.hh" + +namespace nix { + +bool Store::isInStore(const Path& path) const { + return isInDir(path, storeDir); +} + +bool Store::isStorePath(const Path& path) const { + return isInStore(path) && + path.size() >= storeDir.size() + 1 + storePathHashLen && + path.find('/', storeDir.size() + 1) == Path::npos; +} + +void Store::assertStorePath(const Path& path) const { + if (!isStorePath(path)) { + throw Error(format("path '%1%' is not in the Nix store") % path); + } +} + +Path Store::toStorePath(const Path& path) const { + if (!isInStore(path)) { + throw Error(format("path '%1%' is not in the Nix store") % path); + } + Path::size_type slash = path.find('/', storeDir.size() + 1); + if (slash == Path::npos) { + return path; + } + return Path(path, 0, slash); +} + +Path Store::followLinksToStore(const Path& _path) const { + Path path = absPath(_path); + while (!isInStore(path)) { + if (!isLink(path)) { + break; + } + std::string target = readLink(path); + path = absPath(target, dirOf(path)); + } + if (!isInStore(path)) { + throw Error(format("path '%1%' is not in the Nix store") % path); + } + return path; +} + +Path Store::followLinksToStorePath(const Path& path) const { + return toStorePath(followLinksToStore(path)); +} + +std::string storePathToName(const Path& path) { + auto base = baseNameOf(path); + assert(base.size() == storePathHashLen || + (base.size() > storePathHashLen && base[storePathHashLen] == '-')); + return base.size() == storePathHashLen + ? "" + : std::string(base, storePathHashLen + 1); +} + +std::string storePathToHash(const Path& path) { + auto base = baseNameOf(path); + assert(base.size() >= storePathHashLen); + return std::string(base, 0, storePathHashLen); +} + +void checkStoreName(const std::string& name) { + std::string validChars = "+-._?="; + + auto baseError = + format( + "The path name '%2%' is invalid: %3%. " + "Path names are alphanumeric and can include the symbols %1% " + "and must not begin with a period. " + "Note: If '%2%' is a source file and you cannot rename it on " + "disk, builtins.path { name = ... } can be used to give it an " + "alternative name.") % + validChars % name; + + /* Disallow names starting with a dot for possible security + reasons (e.g., "." and ".."). */ + if (std::string(name, 0, 1) == ".") { + throw Error(baseError % "it is illegal to start the name with a period"); + } + /* Disallow names longer than 211 characters. ext4’s max is 256, + but we need extra space for the hash and .chroot extensions. */ + if (name.length() > 211) { + throw Error(baseError % "name must be less than 212 characters"); + } + for (auto& i : name) { + if (!((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z') || + (i >= '0' && i <= '9') || validChars.find(i) != std::string::npos)) { + throw Error(baseError % (format("the '%1%' character is invalid") % i)); + } + } +} + +/* Store paths have the following form: + + <store>/<h>-<name> + + where + + <store> = the location of the Nix store, usually /nix/store + + <name> = a human readable name for the path, typically obtained + from the name attribute of the derivation, or the name of the + source file from which the store path is created. For derivation + outputs other than the default "out" output, the string "-<id>" + is suffixed to <name>. + + <h> = base-32 representation of the first 160 bits of a SHA-256 + hash of <s>; the hash part of the store name + + <s> = the string "<type>:sha256:<h2>:<store>:<name>"; + note that it includes the location of the store as well as the + name to make sure that changes to either of those are reflected + in the hash (e.g. you won't get /nix/store/<h>-name1 and + /nix/store/<h>-name2 with equal hash parts). + + <type> = one of: + "text:<r1>:<r2>:...<rN>" + for plain text files written to the store using + addTextToStore(); <r1> ... <rN> are the references of the + path. + "source" + for paths copied to the store using addToStore() when recursive + = true and hashAlgo = "sha256" + "output:<id>" + for either the outputs created by derivations, OR paths copied + to the store using addToStore() with recursive != true or + hashAlgo != "sha256" (in that case "source" is used; it's + silly, but it's done that way for compatibility). <id> is the + name of the output (usually, "out"). + + <h2> = base-16 representation of a SHA-256 hash of: + if <type> = "text:...": + the string written to the resulting store path + if <type> = "source": + the serialisation of the path from which this store path is + copied, as returned by hashPath() + if <type> = "output:<id>": + for non-fixed derivation outputs: + the derivation (see hashDerivationModulo() in + primops.cc) + for paths copied by addToStore() or produced by fixed-output + derivations: + the string "fixed:out:<rec><algo>:<hash>:", where + <rec> = "r:" for recursive (path) hashes, or "" for flat + (file) hashes + <algo> = "md5", "sha1" or "sha256" + <hash> = base-16 representation of the path or flat hash of + the contents of the path (or expected contents of the + path for fixed-output derivations) + + It would have been nicer to handle fixed-output derivations under + "source", e.g. have something like "source:<rec><algo>", but we're + stuck with this for now... + + The main reason for this way of computing names is to prevent name + collisions (for security). For instance, it shouldn't be feasible + to come up with a derivation whose output path collides with the + path for a copied source. The former would have a <s> starting with + "output:out:", while the latter would have a <s> starting with + "source:". +*/ + +Path Store::makeStorePath(const std::string& type, const Hash& hash, + const std::string& name) const { + /* e.g., "source:sha256:1abc...:/nix/store:foo.tar.gz" */ + std::string s = + type + ":" + hash.to_string(Base16) + ":" + storeDir + ":" + name; + + checkStoreName(name); + + return storeDir + "/" + + compressHash(hashString(htSHA256, s), 20).to_string(Base32, false) + + "-" + name; +} + +Path Store::makeOutputPath(const std::string& id, const Hash& hash, + const std::string& name) const { + return makeStorePath("output:" + id, hash, + name + (id == "out" ? "" : "-" + id)); +} + +Path Store::makeFixedOutputPath(bool recursive, const Hash& hash, + const std::string& name) const { + return hash.type == htSHA256 && recursive + ? makeStorePath("source", hash, name) + : makeStorePath( + "output:out", + hashString( + htSHA256, + "fixed:out:" + (recursive ? (std::string) "r:" : "") + + hash.to_string(Base16) + ":"), + name); +} + +Path Store::makeTextPath(const std::string& name, const Hash& hash, + const PathSet& references) const { + assert(hash.type == htSHA256); + /* Stuff the references (if any) into the type. This is a bit + hacky, but we can't put them in `s' since that would be + ambiguous. */ + std::string type = "text"; + for (auto& i : references) { + type += ":"; + type += i; + } + return makeStorePath(type, hash, name); +} + +std::pair<Path, Hash> Store::computeStorePathForPath(const std::string& name, + const Path& srcPath, + bool recursive, + HashType hashAlgo, + PathFilter& filter) const { + Hash h = recursive ? hashPath(hashAlgo, srcPath, filter).first + : hashFile(hashAlgo, srcPath); + Path dstPath = makeFixedOutputPath(recursive, h, name); + return std::pair<Path, Hash>(dstPath, h); +} + +Path Store::computeStorePathForText(const std::string& name, + const std::string& s, + const PathSet& references) const { + return makeTextPath(name, hashString(htSHA256, s), references); +} + +Store::Store(const Params& params) + : Config(params), state({(size_t)pathInfoCacheSize}) {} + +std::string Store::getUri() { return ""; } + +bool Store::isValidPath(const Path& storePath) { + assertStorePath(storePath); + + auto hashPart = storePathToHash(storePath); + + { + auto state_(state.lock()); + auto res = state_->pathInfoCache.get(hashPart); + if (res) { + stats.narInfoReadAverted++; + return *res != nullptr; + } + } + + if (diskCache) { + auto res = diskCache->lookupNarInfo(getUri(), hashPart); + if (res.first != NarInfoDiskCache::oUnknown) { + stats.narInfoReadAverted++; + auto state_(state.lock()); + state_->pathInfoCache.upsert( + hashPart, + res.first == NarInfoDiskCache::oInvalid ? nullptr : res.second); + return res.first == NarInfoDiskCache::oValid; + } + } + + bool valid = isValidPathUncached(storePath); + + if (diskCache && !valid) { + // FIXME: handle valid = true case. + diskCache->upsertNarInfo(getUri(), hashPart, nullptr); + } + + return valid; +} + +/* Default implementation for stores that only implement + queryPathInfoUncached(). */ +bool Store::isValidPathUncached(const Path& path) { + try { + queryPathInfo(path); + return true; + } catch (InvalidPath&) { + return false; + } +} + +ref<const ValidPathInfo> Store::queryPathInfo(const Path& storePath) { + std::promise<ref<ValidPathInfo>> promise; + + queryPathInfo(storePath, {[&](std::future<ref<ValidPathInfo>> result) { + try { + promise.set_value(result.get()); + } catch (...) { + promise.set_exception(std::current_exception()); + } + }}); + + return promise.get_future().get(); +} + +void Store::queryPathInfo(const Path& storePath, + Callback<ref<ValidPathInfo>> callback) noexcept { + std::string hashPart; + + try { + assertStorePath(storePath); + + hashPart = storePathToHash(storePath); + + { + auto res = state.lock()->pathInfoCache.get(hashPart); + if (res) { + stats.narInfoReadAverted++; + if (!*res) { + throw InvalidPath(format("path '%s' is not valid") % storePath); + } + return callback(ref<ValidPathInfo>(*res)); + } + } + + if (diskCache) { + auto res = diskCache->lookupNarInfo(getUri(), hashPart); + if (res.first != NarInfoDiskCache::oUnknown) { + stats.narInfoReadAverted++; + { + auto state_(state.lock()); + state_->pathInfoCache.upsert( + hashPart, + res.first == NarInfoDiskCache::oInvalid ? nullptr : res.second); + if (res.first == NarInfoDiskCache::oInvalid || + (res.second->path != storePath && + !storePathToName(storePath).empty())) { + throw InvalidPath(format("path '%s' is not valid") % storePath); + } + } + return callback(ref<ValidPathInfo>(res.second)); + } + } + + } catch (...) { + return callback.rethrow(); + } + + auto callbackPtr = std::make_shared<decltype(callback)>(std::move(callback)); + + queryPathInfoUncached( + storePath, {[this, storePath, hashPart, callbackPtr]( + std::future<std::shared_ptr<ValidPathInfo>> fut) { + try { + auto info = fut.get(); + + if (diskCache) { + diskCache->upsertNarInfo(getUri(), hashPart, info); + } + + { + auto state_(state.lock()); + state_->pathInfoCache.upsert(hashPart, info); + } + + if (!info || (info->path != storePath && + !storePathToName(storePath).empty())) { + stats.narInfoMissing++; + throw InvalidPath("path '%s' is not valid", storePath); + } + + (*callbackPtr)(ref<ValidPathInfo>(info)); + } catch (...) { + callbackPtr->rethrow(); + } + }}); +} + +PathSet Store::queryValidPaths(const PathSet& paths, + SubstituteFlag maybeSubstitute) { + struct State { + size_t left; + PathSet valid; + std::exception_ptr exc; + }; + + Sync<State> state_(State{paths.size(), PathSet()}); + + std::condition_variable wakeup; + ThreadPool pool; + + auto doQuery = [&](const Path& path) { + checkInterrupt(); + queryPathInfo( + path, {[path, &state_, &wakeup](std::future<ref<ValidPathInfo>> fut) { + auto state(state_.lock()); + try { + auto info = fut.get(); + state->valid.insert(path); + } catch (InvalidPath&) { + } catch (...) { + state->exc = std::current_exception(); + } + assert(state->left); + if (--state->left == 0u) { + wakeup.notify_one(); + } + }}); + }; + + for (auto& path : paths) { + pool.enqueue(std::bind(doQuery, path)); + } + + pool.process(); + + while (true) { + auto state(state_.lock()); + if (state->left == 0u) { + if (state->exc) { + std::rethrow_exception(state->exc); + } + return state->valid; + } + state.wait(wakeup); + } +} + +/* Return a string accepted by decodeValidPathInfo() that + registers the specified paths as valid. Note: it's the + responsibility of the caller to provide a closure. */ +std::string Store::makeValidityRegistration(const PathSet& paths, + bool showDerivers, bool showHash) { + std::string s = s; + + for (auto& i : paths) { + s += i + "\n"; + + auto info = queryPathInfo(i); + + if (showHash) { + s += info->narHash.to_string(Base16, false) + "\n"; + s += (format("%1%\n") % info->narSize).str(); + } + + Path deriver = showDerivers ? info->deriver : ""; + s += deriver + "\n"; + + s += (format("%1%\n") % info->references.size()).str(); + + for (auto& j : info->references) { + s += j + "\n"; + } + } + + return s; +} + +void Store::pathInfoToJSON(JSONPlaceholder& jsonOut, const PathSet& storePaths, + bool includeImpureInfo, bool showClosureSize, + AllowInvalidFlag allowInvalid) { + auto jsonList = jsonOut.list(); + + for (auto storePath : storePaths) { + auto jsonPath = jsonList.object(); + jsonPath.attr("path", storePath); + + try { + auto info = queryPathInfo(storePath); + storePath = info->path; + + jsonPath.attr("narHash", info->narHash.to_string()) + .attr("narSize", info->narSize); + + { + auto jsonRefs = jsonPath.list("references"); + for (auto& ref : info->references) { + jsonRefs.elem(ref); + } + } + + if (!info->ca.empty()) { + jsonPath.attr("ca", info->ca); + } + + std::pair<uint64_t, uint64_t> closureSizes; + + if (showClosureSize) { + closureSizes = getClosureSize(storePath); + jsonPath.attr("closureSize", closureSizes.first); + } + + if (includeImpureInfo) { + if (!info->deriver.empty()) { + jsonPath.attr("deriver", info->deriver); + } + + if (info->registrationTime != 0) { + jsonPath.attr("registrationTime", info->registrationTime); + } + + if (info->ultimate) { + jsonPath.attr("ultimate", info->ultimate); + } + + if (!info->sigs.empty()) { + auto jsonSigs = jsonPath.list("signatures"); + for (auto& sig : info->sigs) { + jsonSigs.elem(sig); + } + } + + auto narInfo = std::dynamic_pointer_cast<const NarInfo>( + std::shared_ptr<const ValidPathInfo>(info)); + + if (narInfo) { + if (!narInfo->url.empty()) { + jsonPath.attr("url", narInfo->url); + } + if (narInfo->fileHash) { + jsonPath.attr("downloadHash", narInfo->fileHash.to_string()); + } + if (narInfo->fileSize != 0u) { + jsonPath.attr("downloadSize", narInfo->fileSize); + } + if (showClosureSize) { + jsonPath.attr("closureDownloadSize", closureSizes.second); + } + } + } + + } catch (InvalidPath&) { + jsonPath.attr("valid", false); + } + } +} + +std::pair<uint64_t, uint64_t> Store::getClosureSize(const Path& storePath) { + uint64_t totalNarSize = 0; + uint64_t totalDownloadSize = 0; + PathSet closure; + computeFSClosure(storePath, closure, false, false); + for (auto& p : closure) { + auto info = queryPathInfo(p); + totalNarSize += info->narSize; + auto narInfo = std::dynamic_pointer_cast<const NarInfo>( + std::shared_ptr<const ValidPathInfo>(info)); + if (narInfo) { + totalDownloadSize += narInfo->fileSize; + } + } + return {totalNarSize, totalDownloadSize}; +} + +const Store::Stats& Store::getStats() { + { + auto state_(state.lock()); + stats.pathInfoCacheSize = state_->pathInfoCache.size(); + } + return stats; +} + +void Store::buildPaths(const PathSet& paths, BuildMode buildMode) { + for (auto& path : paths) { + if (isDerivation(path)) { + unsupported("buildPaths"); + } + } + + if (queryValidPaths(paths).size() != paths.size()) { + unsupported("buildPaths"); + } +} + +void copyStorePath(ref<Store> srcStore, const ref<Store>& dstStore, + const Path& storePath, RepairFlag repair, + CheckSigsFlag checkSigs) { + auto srcUri = srcStore->getUri(); + auto dstUri = dstStore->getUri(); + + if (srcUri == "local" || srcUri == "daemon") { + LOG(INFO) << "copying path '" << storePath << "' to '" << dstUri << "'"; + } else { + if (dstUri == "local" || dstUri == "daemon") { + LOG(INFO) << "copying path '" << storePath << "' from '" << srcUri << "'"; + } else { + LOG(INFO) << "copying path '" << storePath << "' from '" << srcUri + << "' to '" << dstUri << "'"; + } + } + + auto info = srcStore->queryPathInfo(storePath); + + uint64_t total = 0; + + if (!info->narHash) { + StringSink sink; + srcStore->narFromPath({storePath}, sink); + auto info2 = make_ref<ValidPathInfo>(*info); + info2->narHash = hashString(htSHA256, *sink.s); + if (info->narSize == 0u) { + info2->narSize = sink.s->size(); + } + if (info->ultimate) { + info2->ultimate = false; + } + info = info2; + + StringSource source(*sink.s); + dstStore->addToStore(*info, source, repair, checkSigs); + return; + } + + if (info->ultimate) { + auto info2 = make_ref<ValidPathInfo>(*info); + info2->ultimate = false; + info = info2; + } + + auto source = sinkToSource( + [&](Sink& sink) { + LambdaSink wrapperSink([&](const unsigned char* data, size_t len) { + sink(data, len); + total += len; + }); + srcStore->narFromPath({storePath}, wrapperSink); + }, + [&]() { + throw EndOfFile("NAR for '%s' fetched from '%s' is incomplete", + storePath, srcStore->getUri()); + }); + + dstStore->addToStore(*info, *source, repair, checkSigs); +} + +void copyPaths(ref<Store> srcStore, ref<Store> dstStore, + const PathSet& storePaths, RepairFlag repair, + CheckSigsFlag checkSigs, SubstituteFlag substitute) { + PathSet valid = dstStore->queryValidPaths(storePaths, substitute); + + PathSet missing; + for (auto& path : storePaths) { + if (valid.count(path) == 0u) { + missing.insert(path); + } + } + + if (missing.empty()) { + return; + } + + LOG(INFO) << "copying " << missing.size() << " paths"; + + std::atomic<size_t> nrDone{0}; + std::atomic<size_t> nrFailed{0}; + std::atomic<uint64_t> bytesExpected{0}; + std::atomic<uint64_t> nrRunning{0}; + + ThreadPool pool; + + processGraph<Path>( + pool, PathSet(missing.begin(), missing.end()), + + [&](const Path& storePath) { + if (dstStore->isValidPath(storePath)) { + nrDone++; + return PathSet(); + } + + auto info = srcStore->queryPathInfo(storePath); + + bytesExpected += info->narSize; + + return info->references; + }, + + [&](const Path& storePath) { + checkInterrupt(); + + if (!dstStore->isValidPath(storePath)) { + MaintainCount<decltype(nrRunning)> mc(nrRunning); + try { + copyStorePath(srcStore, dstStore, storePath, repair, checkSigs); + } catch (Error& e) { + nrFailed++; + if (!settings.keepGoing) { + throw e; + } + LOG(ERROR) << "could not copy " << storePath << ": " << e.what(); + return; + } + } + + nrDone++; + }); +} + +void copyClosure(const ref<Store>& srcStore, const ref<Store>& dstStore, + const PathSet& storePaths, RepairFlag repair, + CheckSigsFlag checkSigs, SubstituteFlag substitute) { + PathSet closure; + srcStore->computeFSClosure({storePaths}, closure); + copyPaths(srcStore, dstStore, closure, repair, checkSigs, substitute); +} + +ValidPathInfo decodeValidPathInfo(std::istream& str, bool hashGiven) { + ValidPathInfo info; + getline(str, info.path); + if (str.eof()) { + info.path = ""; + return info; + } + if (hashGiven) { + std::string s; + getline(str, s); + info.narHash = Hash(s, htSHA256); + getline(str, s); + if (!absl::SimpleAtoi(s, &info.narSize)) { + throw Error("number expected"); + } + } + getline(str, info.deriver); + std::string s; + int n; + getline(str, s); + if (!absl::SimpleAtoi(s, &n)) { + throw Error("number expected"); + } + while ((n--) != 0) { + getline(str, s); + info.references.insert(s); + } + if (!str || str.eof()) { + throw Error("missing input"); + } + return info; +} + +std::string showPaths(const PathSet& paths) { + std::string s; + for (auto& i : paths) { + if (!s.empty()) { + s += ", "; + } + s += "'" + i + "'"; + } + return s; +} + +std::string ValidPathInfo::fingerprint() const { + if (narSize == 0 || !narHash) { + throw Error(format("cannot calculate fingerprint of path '%s' because its " + "size/hash is not known") % + path); + } + return "1;" + path + ";" + narHash.to_string(Base32) + ";" + + std::to_string(narSize) + ";" + concatStringsSep(",", references); +} + +void ValidPathInfo::sign(const SecretKey& secretKey) { + sigs.insert(secretKey.signDetached(fingerprint())); +} + +bool ValidPathInfo::isContentAddressed(const Store& store) const { + auto warn = [&]() { + LOG(ERROR) << "warning: path '" << path + << "' claims to be content-addressed but isn't"; + }; + + if (absl::StartsWith(ca, "text:")) { + Hash hash(std::string(ca, 5)); + if (store.makeTextPath(storePathToName(path), hash, references) == path) { + return true; + } + warn(); + + } + + else if (absl::StartsWith(ca, "fixed:")) { + bool recursive = ca.compare(6, 2, "r:") == 0; + Hash hash(std::string(ca, recursive ? 8 : 6)); + if (references.empty() && + store.makeFixedOutputPath(recursive, hash, storePathToName(path)) == + path) { + return true; + } + warn(); + } + + return false; +} + +size_t ValidPathInfo::checkSignatures(const Store& store, + const PublicKeys& publicKeys) const { + if (isContentAddressed(store)) { + return maxSigs; + } + + size_t good = 0; + for (auto& sig : sigs) { + if (checkSignature(publicKeys, sig)) { + good++; + } + } + return good; +} + +bool ValidPathInfo::checkSignature(const PublicKeys& publicKeys, + const std::string& sig) const { + return verifyDetached(fingerprint(), sig, publicKeys); +} + +Strings ValidPathInfo::shortRefs() const { + Strings refs; + for (auto& r : references) { + refs.push_back(baseNameOf(r)); + } + return refs; +} + +std::string makeFixedOutputCA(bool recursive, const Hash& hash) { + return "fixed:" + (recursive ? (std::string) "r:" : "") + hash.to_string(); +} + +void Store::addToStore(const ValidPathInfo& info, Source& narSource, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + addToStore(info, make_ref<std::string>(narSource.drain()), repair, checkSigs, + std::move(accessor)); +} + +void Store::addToStore(const ValidPathInfo& info, const ref<std::string>& nar, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + StringSource source(*nar); + addToStore(info, source, repair, checkSigs, std::move(accessor)); +} + +} // namespace nix + +#include "local-store.hh" +#include "remote-store.hh" + +namespace nix { + +RegisterStoreImplementation::Implementations* + RegisterStoreImplementation::implementations = nullptr; + +/* Split URI into protocol+hierarchy part and its parameter set. */ +std::pair<std::string, Store::Params> splitUriAndParams( + const std::string& uri_) { + auto uri(uri_); + Store::Params params; + auto q = uri.find('?'); + if (q != std::string::npos) { + Strings parts = absl::StrSplit(uri.substr(q + 1), absl::ByChar('&')); + for (const auto& s : parts) { + auto e = s.find('='); + if (e != std::string::npos) { + auto value = s.substr(e + 1); + std::string decoded; + for (size_t i = 0; i < value.size();) { + if (value[i] == '%') { + if (i + 2 >= value.size()) { + throw Error("invalid URI parameter '%s'", value); + } + try { + decoded += std::stoul(std::string(value, i + 1, 2), nullptr, 16); + i += 3; + } catch (...) { + throw Error("invalid URI parameter '%s'", value); + } + } else { + decoded += value[i++]; + } + } + params[s.substr(0, e)] = decoded; + } + } + uri = uri_.substr(0, q); + } + return {uri, params}; +} + +ref<Store> openStore(const std::string& uri_, + const Store::Params& extraParams) { + auto [uri, uriParams] = splitUriAndParams(uri_); + auto params = extraParams; + params.insert(uriParams.begin(), uriParams.end()); + + for (const auto& fun : *RegisterStoreImplementation::implementations) { + auto store = fun(uri, params); + if (store) { + store->warnUnknownSettings(); + return ref<Store>(store); + } + } + + throw Error("don't know how to open Nix store '%s'", uri); +} + +StoreType getStoreType(const std::string& uri, const std::string& stateDir) { + if (uri == "daemon") { + return tDaemon; + } + if (uri == "local" || absl::StartsWith(uri, "/")) { + return tLocal; + } else if (uri.empty() || uri == "auto") { + if (access(stateDir.c_str(), R_OK | W_OK) == 0) { + return tLocal; + } + if (pathExists(settings.nixDaemonSocketFile)) { + return tDaemon; + } else { + return tLocal; + } + } else { + return tOther; + } +} + +static RegisterStoreImplementation regStore([](const std::string& uri, + const Store::Params& params) + -> std::shared_ptr<Store> { + switch (getStoreType(uri, get(params, "state", settings.nixStateDir))) { + case tDaemon: + return std::shared_ptr<Store>(std::make_shared<UDSRemoteStore>(params)); + case tLocal: { + Store::Params params2 = params; + if (absl::StartsWith(uri, "/")) { + params2["root"] = uri; + } + return std::shared_ptr<Store>(std::make_shared<LocalStore>(params2)); + } + default: + return nullptr; + } +}); + +std::list<ref<Store>> getDefaultSubstituters() { + static auto stores([]() { + std::list<ref<Store>> stores; + + StringSet done; + + auto addStore = [&](const std::string& uri) { + if (done.count(uri) != 0u) { + return; + } + done.insert(uri); + try { + stores.push_back(openStore(uri)); + } catch (Error& e) { + LOG(WARNING) << e.what(); + } + }; + + for (const auto& uri : settings.substituters.get()) { + addStore(uri); + } + + for (const auto& uri : settings.extraSubstituters.get()) { + addStore(uri); + } + + stores.sort([](ref<Store>& a, ref<Store>& b) { + return a->getPriority() < b->getPriority(); + }); + + return stores; + }()); + + return stores; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/store-api.hh b/third_party/nix/src/libstore/store-api.hh new file mode 100644 index 000000000000..cf2520c6ca98 --- /dev/null +++ b/third_party/nix/src/libstore/store-api.hh @@ -0,0 +1,772 @@ +#pragma once + +#include <atomic> +#include <limits> +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> + +#include "config.hh" +#include "crypto.hh" +#include "globals.hh" +#include "hash.hh" +#include "lru-cache.hh" +#include "serialise.hh" +#include "sync.hh" + +namespace nix { + +MakeError(SubstError, Error); +MakeError(BuildError, Error); /* denotes a permanent build failure */ +MakeError(InvalidPath, Error); +MakeError(Unsupported, Error); +MakeError(SubstituteGone, Error); +MakeError(SubstituterDisabled, Error); + +struct BasicDerivation; +struct Derivation; +class FSAccessor; +class NarInfoDiskCache; +class Store; +class JSONPlaceholder; + +enum RepairFlag : bool { NoRepair = false, Repair = true }; +enum CheckSigsFlag : bool { NoCheckSigs = false, CheckSigs = true }; +enum SubstituteFlag : bool { NoSubstitute = false, Substitute = true }; +enum AllowInvalidFlag : bool { DisallowInvalid = false, AllowInvalid = true }; + +/* Size of the hash part of store paths, in base-32 characters. */ +const size_t storePathHashLen = 32; // i.e. 160 bits + +/* Magic header of exportPath() output (obsolete). */ +const uint32_t exportMagic = 0x4558494e; + +typedef std::unordered_map<Path, std::unordered_set<std::string>> Roots; + +struct GCOptions { + /* Garbage collector operation: + + - `gcReturnLive': return the set of paths reachable from + (i.e. in the closure of) the roots. + + - `gcReturnDead': return the set of paths not reachable from + the roots. + + - `gcDeleteDead': actually delete the latter set. + + - `gcDeleteSpecific': delete the paths listed in + `pathsToDelete', insofar as they are not reachable. + */ + typedef enum { + gcReturnLive, + gcReturnDead, + gcDeleteDead, + gcDeleteSpecific, + } GCAction; + + GCAction action{gcDeleteDead}; + + /* If `ignoreLiveness' is set, then reachability from the roots is + ignored (dangerous!). However, the paths must still be + unreferenced *within* the store (i.e., there can be no other + store paths that depend on them). */ + bool ignoreLiveness{false}; + + /* For `gcDeleteSpecific', the paths to delete. */ + PathSet pathsToDelete; + + /* Stop after at least `maxFreed' bytes have been freed. */ + unsigned long long maxFreed{std::numeric_limits<unsigned long long>::max()}; +}; + +struct GCResults { + /* Depending on the action, the GC roots, or the paths that would + be or have been deleted. */ + PathSet paths; + + /* For `gcReturnDead', `gcDeleteDead' and `gcDeleteSpecific', the + number of bytes that would be or was freed. */ + unsigned long long bytesFreed = 0; +}; + +struct SubstitutablePathInfo { + Path deriver; + PathSet references; + unsigned long long downloadSize; /* 0 = unknown or inapplicable */ + unsigned long long narSize; /* 0 = unknown */ +}; + +typedef std::map<Path, SubstitutablePathInfo> SubstitutablePathInfos; + +struct ValidPathInfo { + Path path; + Path deriver; + Hash narHash; + PathSet references; + time_t registrationTime = 0; + uint64_t narSize = 0; // 0 = unknown + uint64_t id; // internal use only + + /* Whether the path is ultimately trusted, that is, it's a + derivation output that was built locally. */ + bool ultimate = false; + + StringSet sigs; // note: not necessarily verified + + /* If non-empty, an assertion that the path is content-addressed, + i.e., that the store path is computed from a cryptographic hash + of the contents of the path, plus some other bits of data like + the "name" part of the path. Such a path doesn't need + signatures, since we don't have to trust anybody's claim that + the path is the output of a particular derivation. (In the + extensional store model, we have to trust that the *contents* + of an output path of a derivation were actually produced by + that derivation. In the intensional model, we have to trust + that a particular output path was produced by a derivation; the + path then implies the contents.) + + Ideally, the content-addressability assertion would just be a + Boolean, and the store path would be computed from + ‘storePathToName(path)’, ‘narHash’ and ‘references’. However, + 1) we've accumulated several types of content-addressed paths + over the years; and 2) fixed-output derivations support + multiple hash algorithms and serialisation methods (flat file + vs NAR). Thus, ‘ca’ has one of the following forms: + + * ‘text:sha256:<sha256 hash of file contents>’: For paths + computed by makeTextPath() / addTextToStore(). + + * ‘fixed:<r?>:<ht>:<h>’: For paths computed by + makeFixedOutputPath() / addToStore(). + */ + std::string ca; + + bool operator==(const ValidPathInfo& i) const { + return path == i.path && narHash == i.narHash && references == i.references; + } + + /* Return a fingerprint of the store path to be used in binary + cache signatures. It contains the store path, the base-32 + SHA-256 hash of the NAR serialisation of the path, the size of + the NAR, and the sorted references. The size field is strictly + speaking superfluous, but might prevent endless/excessive data + attacks. */ + std::string fingerprint() const; + + void sign(const SecretKey& secretKey); + + /* Return true iff the path is verifiably content-addressed. */ + bool isContentAddressed(const Store& store) const; + + static const size_t maxSigs = std::numeric_limits<size_t>::max(); + + /* Return the number of signatures on this .narinfo that were + produced by one of the specified keys, or maxSigs if the path + is content-addressed. */ + size_t checkSignatures(const Store& store, + const PublicKeys& publicKeys) const; + + /* Verify a single signature. */ + bool checkSignature(const PublicKeys& publicKeys, + const std::string& sig) const; + + Strings shortRefs() const; + + virtual ~ValidPathInfo() {} +}; + +typedef std::list<ValidPathInfo> ValidPathInfos; + +enum BuildMode { bmNormal, bmRepair, bmCheck }; + +struct BuildResult { + /* Note: don't remove status codes, and only add new status codes + at the end of the list, to prevent client/server + incompatibilities in the nix-store --serve protocol. */ + enum Status { + Built = 0, + Substituted, + AlreadyValid, + PermanentFailure, + InputRejected, + OutputRejected, + TransientFailure, // possibly transient + CachedFailure, // no longer used + TimedOut, + MiscFailure, + DependencyFailed, + LogLimitExceeded, + NotDeterministic, + } status = MiscFailure; + std::string errorMsg; + + /* How many times this build was performed. */ + unsigned int timesBuilt = 0; + + /* If timesBuilt > 1, whether some builds did not produce the same + result. (Note that 'isNonDeterministic = false' does not mean + the build is deterministic, just that we don't have evidence of + non-determinism.) */ + bool isNonDeterministic = false; + + /* The start/stop times of the build (or one of the rounds, if it + was repeated). */ + time_t startTime = 0, stopTime = 0; + + bool success() { + return status == Built || status == Substituted || status == AlreadyValid; + } +}; + +class Store : public std::enable_shared_from_this<Store>, public Config { + public: + typedef std::map<std::string, std::string> Params; + + const PathSetting storeDir_{this, false, settings.nixStore, "store", + "path to the Nix store"}; + const Path storeDir = storeDir_; + + const Setting<int> pathInfoCacheSize{ + this, 65536, "path-info-cache-size", + "size of the in-memory store path information cache"}; + + const Setting<bool> isTrusted{ + this, false, "trusted", + "whether paths from this store can be used as substitutes even when they " + "lack trusted signatures"}; + + protected: + struct State { + LRUCache<std::string, std::shared_ptr<ValidPathInfo>> pathInfoCache; + }; + + Sync<State> state; + + std::shared_ptr<NarInfoDiskCache> diskCache; + + Store(const Params& params); + + public: + virtual ~Store() {} + + virtual std::string getUri() = 0; + + /* Return true if ‘path’ is in the Nix store (but not the Nix + store itself). */ + bool isInStore(const Path& path) const; + + /* Return true if ‘path’ is a store path, i.e. a direct child of + the Nix store. */ + bool isStorePath(const Path& path) const; + + /* Throw an exception if ‘path’ is not a store path. */ + void assertStorePath(const Path& path) const; + + /* Chop off the parts after the top-level store name, e.g., + /nix/store/abcd-foo/bar => /nix/store/abcd-foo. */ + Path toStorePath(const Path& path) const; + + /* Follow symlinks until we end up with a path in the Nix store. */ + Path followLinksToStore(const Path& path) const; + + /* Same as followLinksToStore(), but apply toStorePath() to the + result. */ + Path followLinksToStorePath(const Path& path) const; + + /* Constructs a unique store path name. */ + Path makeStorePath(const std::string& type, const Hash& hash, + const std::string& name) const; + + Path makeOutputPath(const std::string& id, const Hash& hash, + const std::string& name) const; + + Path makeFixedOutputPath(bool recursive, const Hash& hash, + const std::string& name) const; + + Path makeTextPath(const std::string& name, const Hash& hash, + const PathSet& references) const; + + /* This is the preparatory part of addToStore(); it computes the + store path to which srcPath is to be copied. Returns the store + path and the cryptographic hash of the contents of srcPath. */ + std::pair<Path, Hash> computeStorePathForPath( + const std::string& name, const Path& srcPath, bool recursive = true, + HashType hashAlgo = htSHA256, + PathFilter& filter = defaultPathFilter) const; + + /* Preparatory part of addTextToStore(). + + !!! Computation of the path should take the references given to + addTextToStore() into account, otherwise we have a (relatively + minor) security hole: a caller can register a source file with + bogus references. If there are too many references, the path may + not be garbage collected when it has to be (not really a problem, + the caller could create a root anyway), or it may be garbage + collected when it shouldn't be (more serious). + + Hashing the references would solve this (bogus references would + simply yield a different store path, so other users wouldn't be + affected), but it has some backwards compatibility issues (the + hashing scheme changes), so I'm not doing that for now. */ + Path computeStorePathForText(const std::string& name, const std::string& s, + const PathSet& references) const; + + /* Check whether a path is valid. */ + bool isValidPath(const Path& path); + + protected: + virtual bool isValidPathUncached(const Path& path); + + public: + /* Query which of the given paths is valid. Optionally, try to + substitute missing paths. */ + virtual PathSet queryValidPaths( + const PathSet& paths, SubstituteFlag maybeSubstitute = NoSubstitute); + + /* Query the set of all valid paths. Note that for some store + backends, the name part of store paths may be omitted + (i.e. you'll get /nix/store/<hash> rather than + /nix/store/<hash>-<name>). Use queryPathInfo() to obtain the + full store path. */ + virtual PathSet queryAllValidPaths() { unsupported("queryAllValidPaths"); } + + /* Query information about a valid path. It is permitted to omit + the name part of the store path. */ + ref<const ValidPathInfo> queryPathInfo(const Path& path); + + /* Asynchronous version of queryPathInfo(). */ + void queryPathInfo(const Path& path, + Callback<ref<ValidPathInfo>> callback) noexcept; + + protected: + virtual void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept = 0; + + public: + /* Queries the set of incoming FS references for a store path. + The result is not cleared. */ + virtual void queryReferrers(const Path& path, PathSet& referrers) { + unsupported("queryReferrers"); + } + + /* Return all currently valid derivations that have `path' as an + output. (Note that the result of `queryDeriver()' is the + derivation that was actually used to produce `path', which may + not exist anymore.) */ + virtual PathSet queryValidDerivers(const Path& path) { return {}; }; + + /* Query the outputs of the derivation denoted by `path'. */ + virtual PathSet queryDerivationOutputs(const Path& path) { + unsupported("queryDerivationOutputs"); + } + + /* Query the output names of the derivation denoted by `path'. */ + virtual StringSet queryDerivationOutputNames(const Path& path) { + unsupported("queryDerivationOutputNames"); + } + + /* Query the full store path given the hash part of a valid store + path, or "" if the path doesn't exist. */ + virtual Path queryPathFromHashPart(const std::string& hashPart) = 0; + + /* Query which of the given paths have substitutes. */ + virtual PathSet querySubstitutablePaths(const PathSet& paths) { return {}; }; + + /* Query substitute info (i.e. references, derivers and download + sizes) of a set of paths. If a path does not have substitute + info, it's omitted from the resulting ‘infos’ map. */ + virtual void querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) { + return; + }; + + virtual bool wantMassQuery() { return false; } + + /* Import a path into the store. */ + virtual void addToStore(const ValidPathInfo& info, Source& narSource, + RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + std::shared_ptr<FSAccessor> accessor = 0); + + // FIXME: remove + virtual void addToStore(const ValidPathInfo& info, + const ref<std::string>& nar, + RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + std::shared_ptr<FSAccessor> accessor = 0); + + /* Copy the contents of a path to the store and register the + validity the resulting path. The resulting path is returned. + The function object `filter' can be used to exclude files (see + libutil/archive.hh). */ + virtual Path addToStore(const std::string& name, const Path& srcPath, + bool recursive = true, HashType hashAlgo = htSHA256, + PathFilter& filter = defaultPathFilter, + RepairFlag repair = NoRepair) = 0; + + /* Like addToStore, but the contents written to the output path is + a regular file containing the given string. */ + virtual Path addTextToStore(const std::string& name, const std::string& s, + const PathSet& references, + RepairFlag repair = NoRepair) = 0; + + /* Write a NAR dump of a store path. */ + virtual void narFromPath(const Path& path, Sink& sink) = 0; + + /* For each path, if it's a derivation, build it. Building a + derivation means ensuring that the output paths are valid. If + they are already valid, this is a no-op. Otherwise, validity + can be reached in two ways. First, if the output paths is + substitutable, then build the path that way. Second, the + output paths can be created by running the builder, after + recursively building any sub-derivations. For inputs that are + not derivations, substitute them. */ + virtual void buildPaths(const PathSet& paths, BuildMode buildMode = bmNormal); + + /* Build a single non-materialized derivation (i.e. not from an + on-disk .drv file). Note that ‘drvPath’ is only used for + informational purposes. */ + virtual BuildResult buildDerivation(const Path& drvPath, + const BasicDerivation& drv, + BuildMode buildMode = bmNormal) = 0; + + /* Ensure that a path is valid. If it is not currently valid, it + may be made valid by running a substitute (if defined for the + path). */ + virtual void ensurePath(const Path& path) = 0; + + /* Add a store path as a temporary root of the garbage collector. + The root disappears as soon as we exit. */ + virtual void addTempRoot(const Path& path) { unsupported("addTempRoot"); } + + /* Add an indirect root, which is merely a symlink to `path' from + /nix/var/nix/gcroots/auto/<hash of `path'>. `path' is supposed + to be a symlink to a store path. The garbage collector will + automatically remove the indirect root when it finds that + `path' has disappeared. */ + virtual void addIndirectRoot(const Path& path) { + unsupported("addIndirectRoot"); + } + + /* Acquire the global GC lock, then immediately release it. This + function must be called after registering a new permanent root, + but before exiting. Otherwise, it is possible that a running + garbage collector doesn't see the new root and deletes the + stuff we've just built. By acquiring the lock briefly, we + ensure that either: + + - The collector is already running, and so we block until the + collector is finished. The collector will know about our + *temporary* locks, which should include whatever it is we + want to register as a permanent lock. + + - The collector isn't running, or it's just started but hasn't + acquired the GC lock yet. In that case we get and release + the lock right away, then exit. The collector scans the + permanent root and sees our's. + + In either case the permanent root is seen by the collector. */ + virtual void syncWithGC(){}; + + /* Find the roots of the garbage collector. Each root is a pair + (link, storepath) where `link' is the path of the symlink + outside of the Nix store that point to `storePath'. If + 'censor' is true, privacy-sensitive information about roots + found in /proc is censored. */ + virtual Roots findRoots(bool censor) { unsupported("findRoots"); } + + /* Perform a garbage collection. */ + virtual void collectGarbage(const GCOptions& options, GCResults& results) { + unsupported("collectGarbage"); + } + + /* Return a string representing information about the path that + can be loaded into the database using `nix-store --load-db' or + `nix-store --register-validity'. */ + std::string makeValidityRegistration(const PathSet& paths, bool showDerivers, + bool showHash); + + /* Write a JSON representation of store path metadata, such as the + hash and the references. If ‘includeImpureInfo’ is true, + variable elements such as the registration time are + included. If ‘showClosureSize’ is true, the closure size of + each path is included. */ + void pathInfoToJSON(JSONPlaceholder& jsonOut, const PathSet& storePaths, + bool includeImpureInfo, bool showClosureSize, + AllowInvalidFlag allowInvalid = DisallowInvalid); + + /* Return the size of the closure of the specified path, that is, + the sum of the size of the NAR serialisation of each path in + the closure. */ + std::pair<uint64_t, uint64_t> getClosureSize(const Path& storePath); + + /* Optimise the disk space usage of the Nix store by hard-linking files + with the same contents. */ + virtual void optimiseStore(){}; + + /* Check the integrity of the Nix store. Returns true if errors + remain. */ + virtual bool verifyStore(bool checkContents, RepairFlag repair = NoRepair) { + return false; + }; + + /* Return an object to access files in the Nix store. */ + virtual ref<FSAccessor> getFSAccessor() { unsupported("getFSAccessor"); } + + /* Add signatures to the specified store path. The signatures are + not verified. */ + virtual void addSignatures(const Path& storePath, const StringSet& sigs) { + unsupported("addSignatures"); + } + + /* Utility functions. */ + + /* Read a derivation, after ensuring its existence through + ensurePath(). */ + Derivation derivationFromPath(const Path& drvPath); + + /* Place in `out' the set of all store paths in the file system + closure of `storePath'; that is, all paths than can be directly + or indirectly reached from it. `out' is not cleared. If + `flipDirection' is true, the set of paths that can reach + `storePath' is returned; that is, the closures under the + `referrers' relation instead of the `references' relation is + returned. */ + virtual void computeFSClosure(const PathSet& paths, PathSet& paths_, + bool flipDirection = false, + bool includeOutputs = false, + bool includeDerivers = false); + + void computeFSClosure(const Path& path, PathSet& paths_, + bool flipDirection = false, bool includeOutputs = false, + bool includeDerivers = false); + + /* Given a set of paths that are to be built, return the set of + derivations that will be built, and the set of output paths + that will be substituted. */ + virtual void queryMissing(const PathSet& targets, PathSet& willBuild, + PathSet& willSubstitute, PathSet& unknown, + unsigned long long& downloadSize, + unsigned long long& narSize); + + /* Sort a set of paths topologically under the references + relation. If p refers to q, then p precedes q in this list. */ + Paths topoSortPaths(const PathSet& paths); + + /* Export multiple paths in the format expected by ‘nix-store + --import’. */ + void exportPaths(const Paths& paths, Sink& sink); + + void exportPath(const Path& path, Sink& sink); + + /* Import a sequence of NAR dumps created by exportPaths() into + the Nix store. Optionally, the contents of the NARs are + preloaded into the specified FS accessor to speed up subsequent + access. */ + Paths importPaths(Source& source, const std::shared_ptr<FSAccessor>& accessor, + CheckSigsFlag checkSigs = CheckSigs); + + struct Stats { + std::atomic<uint64_t> narInfoRead{0}; + std::atomic<uint64_t> narInfoReadAverted{0}; + std::atomic<uint64_t> narInfoMissing{0}; + std::atomic<uint64_t> narInfoWrite{0}; + std::atomic<uint64_t> pathInfoCacheSize{0}; + std::atomic<uint64_t> narRead{0}; + std::atomic<uint64_t> narReadBytes{0}; + std::atomic<uint64_t> narReadCompressedBytes{0}; + std::atomic<uint64_t> narWrite{0}; + std::atomic<uint64_t> narWriteAverted{0}; + std::atomic<uint64_t> narWriteBytes{0}; + std::atomic<uint64_t> narWriteCompressedBytes{0}; + std::atomic<uint64_t> narWriteCompressionTimeMs{0}; + }; + + const Stats& getStats(); + + /* Return the build log of the specified store path, if available, + or null otherwise. */ + virtual std::shared_ptr<std::string> getBuildLog(const Path& path) { + return nullptr; + } + + /* Hack to allow long-running processes like hydra-queue-runner to + occasionally flush their path info cache. */ + void clearPathInfoCache() { state.lock()->pathInfoCache.clear(); } + + /* Establish a connection to the store, for store types that have + a notion of connection. Otherwise this is a no-op. */ + virtual void connect(){}; + + /* Get the protocol version of this store or it's connection. */ + virtual unsigned int getProtocol() { return 0; }; + + /* Get the priority of the store, used to order substituters. In + particular, binary caches can specify a priority field in their + "nix-cache-info" file. Lower value means higher priority. */ + virtual int getPriority() { return 0; } + + virtual Path toRealPath(const Path& storePath) { return storePath; } + + virtual void createUser(const std::string& userName, uid_t userId) {} + + protected: + Stats stats; + + /* Unsupported methods. */ + [[noreturn]] void unsupported(const std::string& op) { + throw Unsupported("operation '%s' is not supported by store '%s'", op, + getUri()); + } +}; + +class LocalFSStore : public virtual Store { + public: + // FIXME: the (Store*) cast works around a bug in gcc that causes + // it to emit the call to the Option constructor. Clang works fine + // either way. + const PathSetting rootDir{(Store*)this, true, "", "root", + "directory prefixed to all other paths"}; + const PathSetting stateDir{ + (Store*)this, false, + rootDir != "" ? rootDir + "/nix/var/nix" : settings.nixStateDir, "state", + "directory where Nix will store state"}; + const PathSetting logDir{ + (Store*)this, false, + rootDir != "" ? rootDir + "/nix/var/log/nix" : settings.nixLogDir, "log", + "directory where Nix will store state"}; + + const static std::string drvsLogDir; + + LocalFSStore(const Params& params); + + void narFromPath(const Path& path, Sink& sink) override; + ref<FSAccessor> getFSAccessor() override; + + /* Register a permanent GC root. */ + Path addPermRoot(const Path& storePath, const Path& gcRoot, bool indirect, + bool allowOutsideRootsDir = false); + + virtual Path getRealStoreDir() { return storeDir; } + + Path toRealPath(const Path& storePath) override { + assert(isInStore(storePath)); + return getRealStoreDir() + "/" + + std::string(storePath, storeDir.size() + 1); + } + + std::shared_ptr<std::string> getBuildLog(const Path& path) override; +}; + +/* Extract the name part of the given store path. */ +std::string storePathToName(const Path& path); + +/* Extract the hash part of the given store path. */ +std::string storePathToHash(const Path& path); + +/* Check whether ‘name’ is a valid store path name part, i.e. contains + only the characters [a-zA-Z0-9\+\-\.\_\?\=] and doesn't start with + a dot. */ +void checkStoreName(const std::string& name); + +/* Copy a path from one store to another. */ +void copyStorePath(ref<Store> srcStore, const ref<Store>& dstStore, + const Path& storePath, RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs); + +/* Copy store paths from one store to another. The paths may be copied + in parallel. They are copied in a topologically sorted order + (i.e. if A is a reference of B, then A is copied before B), but + the set of store paths is not automatically closed; use + copyClosure() for that. */ +void copyPaths(ref<Store> srcStore, ref<Store> dstStore, + const PathSet& storePaths, RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + SubstituteFlag substitute = NoSubstitute); + +/* Copy the closure of the specified paths from one store to another. */ +void copyClosure(const ref<Store>& srcStore, const ref<Store>& dstStore, + const PathSet& storePaths, RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + SubstituteFlag substitute = NoSubstitute); + +/* Remove the temporary roots file for this process. Any temporary + root becomes garbage after this point unless it has been registered + as a (permanent) root. */ +void removeTempRoots(); + +/* Return a Store object to access the Nix store denoted by + ‘uri’ (slight misnomer...). Supported values are: + + * ‘local’: The Nix store in /nix/store and database in + /nix/var/nix/db, accessed directly. + + * ‘daemon’: The Nix store accessed via a Unix domain socket + connection to nix-daemon. + + * ‘unix://<path>’: The Nix store accessed via a Unix domain socket + connection to nix-daemon, with the socket located at <path>. + + * ‘auto’ or ‘’: Equivalent to ‘local’ or ‘daemon’ depending on + whether the user has write access to the local Nix + store/database. + + * ‘file://<path>’: A binary cache stored in <path>. + + * ‘https://<path>’: A binary cache accessed via HTTP. + + * ‘s3://<path>’: A writable binary cache stored on Amazon's Simple + Storage Service. + + * ‘ssh://[user@]<host>’: A remote Nix store accessed by running + ‘nix-store --serve’ via SSH. + + You can pass parameters to the store implementation by appending + ‘?key=value&key=value&...’ to the URI. +*/ +ref<Store> openStore(const std::string& uri = settings.storeUri.get(), + const Store::Params& extraParams = Store::Params()); + +enum StoreType { tDaemon, tLocal, tOther }; + +StoreType getStoreType(const std::string& uri = settings.storeUri.get(), + const std::string& stateDir = settings.nixStateDir); + +/* Return the default substituter stores, defined by the + ‘substituters’ option and various legacy options. */ +std::list<ref<Store>> getDefaultSubstituters(); + +/* Store implementation registration. */ +typedef std::function<std::shared_ptr<Store>(const std::string& uri, + const Store::Params& params)> + OpenStore; + +struct RegisterStoreImplementation { + typedef std::vector<OpenStore> Implementations; + static Implementations* implementations; + + RegisterStoreImplementation(OpenStore fun) { + if (!implementations) { + implementations = new Implementations; + } + implementations->push_back(fun); + } +}; + +/* Display a set of paths in human-readable form (i.e., between quotes + and separated by commas). */ +std::string showPaths(const PathSet& paths); + +ValidPathInfo decodeValidPathInfo(std::istream& str, bool hashGiven = false); + +/* Compute the content-addressability assertion (ValidPathInfo::ca) + for paths created by makeFixedOutputPath() / addToStore(). */ +std::string makeFixedOutputCA(bool recursive, const Hash& hash); + +/* Split URI into protocol+hierarchy part and its parameter set. */ +std::pair<std::string, Store::Params> splitUriAndParams(const std::string& uri); + +} // namespace nix diff --git a/third_party/nix/src/libstore/worker-protocol.hh b/third_party/nix/src/libstore/worker-protocol.hh new file mode 100644 index 000000000000..970d494acee1 --- /dev/null +++ b/third_party/nix/src/libstore/worker-protocol.hh @@ -0,0 +1,65 @@ +#pragma once + +namespace nix { + +#define WORKER_MAGIC_1 0x6e697863 +#define WORKER_MAGIC_2 0x6478696f + +#define PROTOCOL_VERSION 0x115 +#define GET_PROTOCOL_MAJOR(x) ((x)&0xff00) +#define GET_PROTOCOL_MINOR(x) ((x)&0x00ff) + +typedef enum { + wopIsValidPath = 1, + wopHasSubstitutes = 3, + wopQueryPathHash = 4, // obsolete + wopQueryReferences = 5, // obsolete + wopQueryReferrers = 6, + wopAddToStore = 7, + wopAddTextToStore = 8, + wopBuildPaths = 9, + wopEnsurePath = 10, + wopAddTempRoot = 11, + wopAddIndirectRoot = 12, + wopSyncWithGC = 13, + wopFindRoots = 14, + wopExportPath = 16, // obsolete + wopQueryDeriver = 18, // obsolete + wopSetOptions = 19, + wopCollectGarbage = 20, + wopQuerySubstitutablePathInfo = 21, + wopQueryDerivationOutputs = 22, + wopQueryAllValidPaths = 23, + wopQueryFailedPaths = 24, + wopClearFailedPaths = 25, + wopQueryPathInfo = 26, + wopImportPaths = 27, // obsolete + wopQueryDerivationOutputNames = 28, + wopQueryPathFromHashPart = 29, + wopQuerySubstitutablePathInfos = 30, + wopQueryValidPaths = 31, + wopQuerySubstitutablePaths = 32, + wopQueryValidDerivers = 33, + wopOptimiseStore = 34, + wopVerifyStore = 35, + wopBuildDerivation = 36, + wopAddSignatures = 37, + wopNarFromPath = 38, + wopAddToStoreNar = 39, + wopQueryMissing = 40, +} WorkerOp; + +#define STDERR_NEXT 0x6f6c6d67 +#define STDERR_READ 0x64617461 // data needed from source +#define STDERR_WRITE 0x64617416 // data for sink +#define STDERR_LAST 0x616c7473 +#define STDERR_ERROR 0x63787470 +#define STDERR_START_ACTIVITY 0x53545254 +#define STDERR_STOP_ACTIVITY 0x53544f50 +#define STDERR_RESULT 0x52534c54 + +Path readStorePath(Store& store, Source& from); +template <class T> +T readStorePaths(Store& store, Source& from); + +} // namespace nix |