diff options
Diffstat (limited to 'third_party/nix/src')
189 files changed, 70199 insertions, 0 deletions
diff --git a/third_party/nix/src/build-remote/build-remote.cc b/third_party/nix/src/build-remote/build-remote.cc new file mode 100644 index 000000000000..2802b49ac8ea --- /dev/null +++ b/third_party/nix/src/build-remote/build-remote.cc @@ -0,0 +1,283 @@ +#include <algorithm> +#include <cstdlib> +#include <cstring> +#include <iomanip> +#include <memory> +#include <set> +#include <tuple> + +#include <glog/logging.h> +#if __APPLE__ +#include <sys/time.h> +#endif + +#include "derivations.hh" +#include "globals.hh" +#include "legacy.hh" +#include "local-store.hh" +#include "machines.hh" +#include "pathlocks.hh" +#include "serialise.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; +using std::cin; + +static void handleAlarm(int sig) {} + +std::string escapeUri(std::string uri) { + std::replace(uri.begin(), uri.end(), '/', '_'); + return uri; +} + +static string currentLoad; + +static AutoCloseFD openSlotLock(const Machine& m, unsigned long long slot) { + return openLockFile(fmt("%s/%s-%d", currentLoad, escapeUri(m.storeUri), slot), + true); +} + +static bool allSupportedLocally(const std::set<std::string>& requiredFeatures) { + for (auto& feature : requiredFeatures) { + if (settings.systemFeatures.get().count(feature) == 0u) { + return false; + } + } + return true; +} + +static int _main(int argc, char* argv[]) { + { + /* Ensure we don't get any SSH passphrase or host key popups. */ + unsetenv("DISPLAY"); + unsetenv("SSH_ASKPASS"); + + if (argc != 2) { + throw UsageError("called without required arguments"); + } + + FdSource source(STDIN_FILENO); + + /* Read the parent's settings. */ + while (readInt(source) != 0u) { + auto name = readString(source); + auto value = readString(source); + settings.set(name, value); + } + + settings.maxBuildJobs.set("1"); // hack to make tests with local?root= work + + initPlugins(); + + auto store = openStore().cast<LocalStore>(); + + /* It would be more appropriate to use $XDG_RUNTIME_DIR, since + that gets cleared on reboot, but it wouldn't work on macOS. */ + currentLoad = store->stateDir + "/current-load"; + + std::shared_ptr<Store> sshStore; + AutoCloseFD bestSlotLock; + + auto machines = getMachines(); + DLOG(INFO) << "got " << machines.size() << " remote builders"; + + if (machines.empty()) { + std::cerr << "# decline-permanently\n"; + return 0; + } + + string drvPath; + string storeUri; + + while (true) { + try { + auto s = readString(source); + if (s != "try") { + return 0; + } + } catch (EndOfFile&) { + return 0; + } + + auto amWilling = readInt(source); + auto neededSystem = readString(source); + source >> drvPath; + auto requiredFeatures = readStrings<std::set<std::string>>(source); + + auto canBuildLocally = + (amWilling != 0u) && + (neededSystem == settings.thisSystem || + settings.extraPlatforms.get().count(neededSystem) > 0) && + allSupportedLocally(requiredFeatures); + + /* Error ignored here, will be caught later */ + mkdir(currentLoad.c_str(), 0777); + + while (true) { + bestSlotLock = -1; + AutoCloseFD lock = openLockFile(currentLoad + "/main-lock", true); + lockFile(lock.get(), ltWrite, true); + + bool rightType = false; + + Machine* bestMachine = nullptr; + unsigned long long bestLoad = 0; + for (auto& m : machines) { + DLOG(INFO) << "considering building on remote machine '" << m.storeUri + << "'"; + + if (m.enabled && + std::find(m.systemTypes.begin(), m.systemTypes.end(), + neededSystem) != m.systemTypes.end() && + m.allSupported(requiredFeatures) && + m.mandatoryMet(requiredFeatures)) { + rightType = true; + AutoCloseFD free; + unsigned long long load = 0; + for (unsigned long long slot = 0; slot < m.maxJobs; ++slot) { + auto slotLock = openSlotLock(m, slot); + if (lockFile(slotLock.get(), ltWrite, false)) { + if (!free) { + free = std::move(slotLock); + } + } else { + ++load; + } + } + if (!free) { + continue; + } + bool best = false; + if (!bestSlotLock) { + best = true; + } else if (load / m.speedFactor < + bestLoad / bestMachine->speedFactor) { + best = true; + } else if (load / m.speedFactor == + bestLoad / bestMachine->speedFactor) { + if (m.speedFactor > bestMachine->speedFactor) { + best = true; + } else if (m.speedFactor == bestMachine->speedFactor) { + if (load < bestLoad) { + best = true; + } + } + } + if (best) { + bestLoad = load; + bestSlotLock = std::move(free); + bestMachine = &m; + } + } + } + + if (!bestSlotLock) { + if (rightType && !canBuildLocally) { + std::cerr << "# postpone\n"; + } else { + std::cerr << "# decline\n"; + } + break; + } + +#if __APPLE__ + futimes(bestSlotLock.get(), NULL); +#else + futimens(bestSlotLock.get(), nullptr); +#endif + + lock = -1; + + try { + DLOG(INFO) << "connecting to '" << bestMachine->storeUri << "'"; + + Store::Params storeParams; + if (hasPrefix(bestMachine->storeUri, "ssh://")) { + storeParams["max-connections"] = "1"; + storeParams["log-fd"] = "4"; + if (!bestMachine->sshKey.empty()) { + storeParams["ssh-key"] = bestMachine->sshKey; + } + } + + sshStore = openStore(bestMachine->storeUri, storeParams); + sshStore->connect(); + storeUri = bestMachine->storeUri; + + } catch (std::exception& e) { + auto msg = chomp(drainFD(5, false)); + LOG(ERROR) << "cannot build on '" << bestMachine->storeUri + << "': " << e.what() << (msg.empty() ? "" : ": " + msg); + bestMachine->enabled = false; + continue; + } + + goto connected; + } + } + + connected: + close(5); + + std::cerr << "# accept\n" << storeUri << "\n"; + + auto inputs = readStrings<PathSet>(source); + auto outputs = readStrings<PathSet>(source); + + AutoCloseFD uploadLock = openLockFile( + currentLoad + "/" + escapeUri(storeUri) + ".upload-lock", true); + + { + DLOG(INFO) << "waiting for the upload lock to '" << storeUri << "'"; + + auto old = signal(SIGALRM, handleAlarm); + alarm(15 * 60); + if (!lockFile(uploadLock.get(), ltWrite, true)) { + LOG(ERROR) << "somebody is hogging the upload lock, continuing..."; + } + alarm(0); + signal(SIGALRM, old); + } + + auto substitute = + settings.buildersUseSubstitutes ? Substitute : NoSubstitute; + + { + DLOG(INFO) << "copying dependencies to '" << storeUri << "'"; + copyPaths(store, ref<Store>(sshStore), inputs, NoRepair, NoCheckSigs, + substitute); + } + + uploadLock = -1; + + BasicDerivation drv( + readDerivation(store->realStoreDir + "/" + baseNameOf(drvPath))); + drv.inputSrcs = inputs; + + auto result = sshStore->buildDerivation(drvPath, drv); + + if (!result.success()) { + throw Error("build of '%s' on '%s' failed: %s", drvPath, storeUri, + result.errorMsg); + } + + PathSet missing; + for (auto& path : outputs) { + if (!store->isValidPath(path)) { + missing.insert(path); + } + } + + if (!missing.empty()) { + DLOG(INFO) << "copying outputs from '" << storeUri << "'"; + store->locksHeld.insert(missing.begin(), missing.end()); /* FIXME: ugly */ + copyPaths(ref<Store>(sshStore), store, missing, NoRepair, NoCheckSigs, + NoSubstitute); + } + + return 0; + } +} + +static RegisterLegacyCommand s1("build-remote", _main); diff --git a/third_party/nix/src/cpptoml/LICENSE b/third_party/nix/src/cpptoml/LICENSE new file mode 100644 index 000000000000..8802c4fa5a36 --- /dev/null +++ b/third_party/nix/src/cpptoml/LICENSE @@ -0,0 +1,18 @@ +Copyright (c) 2014 Chase Geigle + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/nix/src/cpptoml/cpptoml.h b/third_party/nix/src/cpptoml/cpptoml.h new file mode 100644 index 000000000000..5a00da3b4cda --- /dev/null +++ b/third_party/nix/src/cpptoml/cpptoml.h @@ -0,0 +1,3668 @@ +/** + * @file cpptoml.h + * @author Chase Geigle + * @date May 2013 + */ + +#ifndef CPPTOML_H +#define CPPTOML_H + +#include <algorithm> +#include <cassert> +#include <clocale> +#include <cstdint> +#include <cstring> +#include <fstream> +#include <iomanip> +#include <map> +#include <memory> +#include <sstream> +#include <stdexcept> +#include <string> +#include <unordered_map> +#include <vector> + +#if __cplusplus > 201103L +#define CPPTOML_DEPRECATED(reason) [[deprecated(reason)]] +#elif defined(__clang__) +#define CPPTOML_DEPRECATED(reason) __attribute__((deprecated(reason))) +#elif defined(__GNUG__) +#define CPPTOML_DEPRECATED(reason) __attribute__((deprecated)) +#elif defined(_MSC_VER) +#if _MSC_VER < 1910 +#define CPPTOML_DEPRECATED(reason) __declspec(deprecated) +#else +#define CPPTOML_DEPRECATED(reason) [[deprecated(reason)]] +#endif +#endif + +namespace cpptoml +{ +class writer; // forward declaration +class base; // forward declaration +#if defined(CPPTOML_USE_MAP) +// a std::map will ensure that entries a sorted, albeit at a slight +// performance penalty relative to the (default) unordered_map +using string_to_base_map = std::map<std::string, std::shared_ptr<base>>; +#else +// by default an unordered_map is used for best performance as the +// toml specification does not require entries to be sorted +using string_to_base_map + = std::unordered_map<std::string, std::shared_ptr<base>>; +#endif + +// if defined, `base` will retain type information in form of an enum class +// such that static_cast can be used instead of dynamic_cast +// #define CPPTOML_NO_RTTI + +template <class T> +class option +{ + public: + option() : empty_{true} + { + // nothing + } + + option(T value) : empty_{false}, value_(std::move(value)) + { + // nothing + } + + explicit operator bool() const + { + return !empty_; + } + + const T& operator*() const + { + return value_; + } + + const T* operator->() const + { + return &value_; + } + + template <class U> + T value_or(U&& alternative) const + { + if (!empty_) + return value_; + return static_cast<T>(std::forward<U>(alternative)); + } + + private: + bool empty_; + T value_; +}; + +struct local_date +{ + int year = 0; + int month = 0; + int day = 0; +}; + +struct local_time +{ + int hour = 0; + int minute = 0; + int second = 0; + int microsecond = 0; +}; + +struct zone_offset +{ + int hour_offset = 0; + int minute_offset = 0; +}; + +struct local_datetime : local_date, local_time +{ +}; + +struct offset_datetime : local_datetime, zone_offset +{ + static inline struct offset_datetime from_zoned(const struct tm& t) + { + offset_datetime dt; + dt.year = t.tm_year + 1900; + dt.month = t.tm_mon + 1; + dt.day = t.tm_mday; + dt.hour = t.tm_hour; + dt.minute = t.tm_min; + dt.second = t.tm_sec; + + char buf[16]; + strftime(buf, 16, "%z", &t); + + int offset = std::stoi(buf); + dt.hour_offset = offset / 100; + dt.minute_offset = offset % 100; + return dt; + } + + CPPTOML_DEPRECATED("from_local has been renamed to from_zoned") + static inline struct offset_datetime from_local(const struct tm& t) + { + return from_zoned(t); + } + + static inline struct offset_datetime from_utc(const struct tm& t) + { + offset_datetime dt; + dt.year = t.tm_year + 1900; + dt.month = t.tm_mon + 1; + dt.day = t.tm_mday; + dt.hour = t.tm_hour; + dt.minute = t.tm_min; + dt.second = t.tm_sec; + return dt; + } +}; + +CPPTOML_DEPRECATED("datetime has been renamed to offset_datetime") +typedef offset_datetime datetime; + +class fill_guard +{ + public: + fill_guard(std::ostream& os) : os_(os), fill_{os.fill()} + { + // nothing + } + + ~fill_guard() + { + os_.fill(fill_); + } + + private: + std::ostream& os_; + std::ostream::char_type fill_; +}; + +inline std::ostream& operator<<(std::ostream& os, const local_date& dt) +{ + fill_guard g{os}; + os.fill('0'); + + using std::setw; + os << setw(4) << dt.year << "-" << setw(2) << dt.month << "-" << setw(2) + << dt.day; + + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const local_time& ltime) +{ + fill_guard g{os}; + os.fill('0'); + + using std::setw; + os << setw(2) << ltime.hour << ":" << setw(2) << ltime.minute << ":" + << setw(2) << ltime.second; + + if (ltime.microsecond > 0) + { + os << "."; + int power = 100000; + for (int curr_us = ltime.microsecond; curr_us; power /= 10) + { + auto num = curr_us / power; + os << num; + curr_us -= num * power; + } + } + + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const zone_offset& zo) +{ + fill_guard g{os}; + os.fill('0'); + + using std::setw; + + if (zo.hour_offset != 0 || zo.minute_offset != 0) + { + if (zo.hour_offset > 0) + { + os << "+"; + } + else + { + os << "-"; + } + os << setw(2) << std::abs(zo.hour_offset) << ":" << setw(2) + << std::abs(zo.minute_offset); + } + else + { + os << "Z"; + } + + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const local_datetime& dt) +{ + return os << static_cast<const local_date&>(dt) << "T" + << static_cast<const local_time&>(dt); +} + +inline std::ostream& operator<<(std::ostream& os, const offset_datetime& dt) +{ + return os << static_cast<const local_datetime&>(dt) + << static_cast<const zone_offset&>(dt); +} + +template <class T, class... Ts> +struct is_one_of; + +template <class T, class V> +struct is_one_of<T, V> : std::is_same<T, V> +{ +}; + +template <class T, class V, class... Ts> +struct is_one_of<T, V, Ts...> +{ + const static bool value + = std::is_same<T, V>::value || is_one_of<T, Ts...>::value; +}; + +template <class T> +class value; + +template <class T> +struct valid_value + : is_one_of<T, std::string, int64_t, double, bool, local_date, local_time, + local_datetime, offset_datetime> +{ +}; + +template <class T, class Enable = void> +struct value_traits; + +template <class T> +struct valid_value_or_string_convertible +{ + + const static bool value = valid_value<typename std::decay<T>::type>::value + || std::is_convertible<T, std::string>::value; +}; + +template <class T> +struct value_traits<T, typename std::enable_if< + valid_value_or_string_convertible<T>::value>::type> +{ + using value_type = typename std::conditional< + valid_value<typename std::decay<T>::type>::value, + typename std::decay<T>::type, std::string>::type; + + using type = value<value_type>; + + static value_type construct(T&& val) + { + return value_type(val); + } +}; + +template <class T> +struct value_traits< + T, + typename std::enable_if< + !valid_value_or_string_convertible<T>::value + && std::is_floating_point<typename std::decay<T>::type>::value>::type> +{ + using value_type = typename std::decay<T>::type; + + using type = value<double>; + + static value_type construct(T&& val) + { + return value_type(val); + } +}; + +template <class T> +struct value_traits< + T, typename std::enable_if< + !valid_value_or_string_convertible<T>::value + && !std::is_floating_point<typename std::decay<T>::type>::value + && std::is_signed<typename std::decay<T>::type>::value>::type> +{ + using value_type = int64_t; + + using type = value<int64_t>; + + static value_type construct(T&& val) + { + if (val < (std::numeric_limits<int64_t>::min)()) + throw std::underflow_error{"constructed value cannot be " + "represented by a 64-bit signed " + "integer"}; + + if (val > (std::numeric_limits<int64_t>::max)()) + throw std::overflow_error{"constructed value cannot be represented " + "by a 64-bit signed integer"}; + + return static_cast<int64_t>(val); + } +}; + +template <class T> +struct value_traits< + T, typename std::enable_if< + !valid_value_or_string_convertible<T>::value + && std::is_unsigned<typename std::decay<T>::type>::value>::type> +{ + using value_type = int64_t; + + using type = value<int64_t>; + + static value_type construct(T&& val) + { + if (val > static_cast<uint64_t>((std::numeric_limits<int64_t>::max)())) + throw std::overflow_error{"constructed value cannot be represented " + "by a 64-bit signed integer"}; + + return static_cast<int64_t>(val); + } +}; + +class array; +class table; +class table_array; + +template <class T> +struct array_of_trait +{ + using return_type = option<std::vector<T>>; +}; + +template <> +struct array_of_trait<array> +{ + using return_type = option<std::vector<std::shared_ptr<array>>>; +}; + +template <class T> +inline std::shared_ptr<typename value_traits<T>::type> make_value(T&& val); +inline std::shared_ptr<array> make_array(); + +namespace detail +{ +template <class T> +inline std::shared_ptr<T> make_element(); +} + +inline std::shared_ptr<table> make_table(); +inline std::shared_ptr<table_array> make_table_array(bool is_inline = false); + +#if defined(CPPTOML_NO_RTTI) +/// Base type used to store underlying data type explicitly if RTTI is disabled +enum class base_type +{ + NONE, + STRING, + LOCAL_TIME, + LOCAL_DATE, + LOCAL_DATETIME, + OFFSET_DATETIME, + INT, + FLOAT, + BOOL, + TABLE, + ARRAY, + TABLE_ARRAY +}; + +/// Type traits class to convert C++ types to enum member +template <class T> +struct base_type_traits; + +template <> +struct base_type_traits<std::string> +{ + static const base_type type = base_type::STRING; +}; + +template <> +struct base_type_traits<local_time> +{ + static const base_type type = base_type::LOCAL_TIME; +}; + +template <> +struct base_type_traits<local_date> +{ + static const base_type type = base_type::LOCAL_DATE; +}; + +template <> +struct base_type_traits<local_datetime> +{ + static const base_type type = base_type::LOCAL_DATETIME; +}; + +template <> +struct base_type_traits<offset_datetime> +{ + static const base_type type = base_type::OFFSET_DATETIME; +}; + +template <> +struct base_type_traits<int64_t> +{ + static const base_type type = base_type::INT; +}; + +template <> +struct base_type_traits<double> +{ + static const base_type type = base_type::FLOAT; +}; + +template <> +struct base_type_traits<bool> +{ + static const base_type type = base_type::BOOL; +}; + +template <> +struct base_type_traits<table> +{ + static const base_type type = base_type::TABLE; +}; + +template <> +struct base_type_traits<array> +{ + static const base_type type = base_type::ARRAY; +}; + +template <> +struct base_type_traits<table_array> +{ + static const base_type type = base_type::TABLE_ARRAY; +}; +#endif + +/** + * A generic base TOML value used for type erasure. + */ +class base : public std::enable_shared_from_this<base> +{ + public: + virtual ~base() = default; + + virtual std::shared_ptr<base> clone() const = 0; + + /** + * Determines if the given TOML element is a value. + */ + virtual bool is_value() const + { + return false; + } + + /** + * Determines if the given TOML element is a table. + */ + virtual bool is_table() const + { + return false; + } + + /** + * Converts the TOML element into a table. + */ + std::shared_ptr<table> as_table() + { + if (is_table()) + return std::static_pointer_cast<table>(shared_from_this()); + return nullptr; + } + /** + * Determines if the TOML element is an array of "leaf" elements. + */ + virtual bool is_array() const + { + return false; + } + + /** + * Converts the TOML element to an array. + */ + std::shared_ptr<array> as_array() + { + if (is_array()) + return std::static_pointer_cast<array>(shared_from_this()); + return nullptr; + } + + /** + * Determines if the given TOML element is an array of tables. + */ + virtual bool is_table_array() const + { + return false; + } + + /** + * Converts the TOML element into a table array. + */ + std::shared_ptr<table_array> as_table_array() + { + if (is_table_array()) + return std::static_pointer_cast<table_array>(shared_from_this()); + return nullptr; + } + + /** + * Attempts to coerce the TOML element into a concrete TOML value + * of type T. + */ + template <class T> + std::shared_ptr<value<T>> as(); + + template <class T> + std::shared_ptr<const value<T>> as() const; + + template <class Visitor, class... Args> + void accept(Visitor&& visitor, Args&&... args) const; + +#if defined(CPPTOML_NO_RTTI) + base_type type() const + { + return type_; + } + + protected: + base(const base_type t) : type_(t) + { + // nothing + } + + private: + const base_type type_ = base_type::NONE; + +#else + protected: + base() + { + // nothing + } +#endif +}; + +/** + * A concrete TOML value representing the "leaves" of the "tree". + */ +template <class T> +class value : public base +{ + struct make_shared_enabler + { + // nothing; this is a private key accessible only to friends + }; + + template <class U> + friend std::shared_ptr<typename value_traits<U>::type> + cpptoml::make_value(U&& val); + + public: + static_assert(valid_value<T>::value, "invalid value type"); + + std::shared_ptr<base> clone() const override; + + value(const make_shared_enabler&, const T& val) : value(val) + { + // nothing; note that users cannot actually invoke this function + // because they lack access to the make_shared_enabler. + } + + bool is_value() const override + { + return true; + } + + /** + * Gets the data associated with this value. + */ + T& get() + { + return data_; + } + + /** + * Gets the data associated with this value. Const version. + */ + const T& get() const + { + return data_; + } + + private: + T data_; + + /** + * Constructs a value from the given data. + */ +#if defined(CPPTOML_NO_RTTI) + value(const T& val) : base(base_type_traits<T>::type), data_(val) + { + } +#else + value(const T& val) : data_(val) + { + } +#endif + + value(const value& val) = delete; + value& operator=(const value& val) = delete; +}; + +template <class T> +std::shared_ptr<typename value_traits<T>::type> make_value(T&& val) +{ + using value_type = typename value_traits<T>::type; + using enabler = typename value_type::make_shared_enabler; + return std::make_shared<value_type>( + enabler{}, value_traits<T>::construct(std::forward<T>(val))); +} + +template <class T> +inline std::shared_ptr<value<T>> base::as() +{ +#if defined(CPPTOML_NO_RTTI) + if (type() == base_type_traits<T>::type) + return std::static_pointer_cast<value<T>>(shared_from_this()); + else + return nullptr; +#else + return std::dynamic_pointer_cast<value<T>>(shared_from_this()); +#endif +} + +// special case value<double> to allow getting an integer parameter as a +// double value +template <> +inline std::shared_ptr<value<double>> base::as() +{ +#if defined(CPPTOML_NO_RTTI) + if (type() == base_type::FLOAT) + return std::static_pointer_cast<value<double>>(shared_from_this()); + + if (type() == base_type::INT) + { + auto v = std::static_pointer_cast<value<int64_t>>(shared_from_this()); + return make_value<double>(static_cast<double>(v->get())); + } +#else + if (auto v = std::dynamic_pointer_cast<value<double>>(shared_from_this())) + return v; + + if (auto v = std::dynamic_pointer_cast<value<int64_t>>(shared_from_this())) + return make_value<double>(static_cast<double>(v->get())); +#endif + + return nullptr; +} + +template <class T> +inline std::shared_ptr<const value<T>> base::as() const +{ +#if defined(CPPTOML_NO_RTTI) + if (type() == base_type_traits<T>::type) + return std::static_pointer_cast<const value<T>>(shared_from_this()); + else + return nullptr; +#else + return std::dynamic_pointer_cast<const value<T>>(shared_from_this()); +#endif +} + +// special case value<double> to allow getting an integer parameter as a +// double value +template <> +inline std::shared_ptr<const value<double>> base::as() const +{ +#if defined(CPPTOML_NO_RTTI) + if (type() == base_type::FLOAT) + return std::static_pointer_cast<const value<double>>( + shared_from_this()); + + if (type() == base_type::INT) + { + auto v = as<int64_t>(); + // the below has to be a non-const value<double> due to a bug in + // libc++: https://llvm.org/bugs/show_bug.cgi?id=18843 + return make_value<double>(static_cast<double>(v->get())); + } +#else + if (auto v + = std::dynamic_pointer_cast<const value<double>>(shared_from_this())) + return v; + + if (auto v = as<int64_t>()) + { + // the below has to be a non-const value<double> due to a bug in + // libc++: https://llvm.org/bugs/show_bug.cgi?id=18843 + return make_value<double>(static_cast<double>(v->get())); + } +#endif + + return nullptr; +} + +/** + * Exception class for array insertion errors. + */ +class array_exception : public std::runtime_error +{ + public: + array_exception(const std::string& err) : std::runtime_error{err} + { + } +}; + +class array : public base +{ + public: + friend std::shared_ptr<array> make_array(); + + std::shared_ptr<base> clone() const override; + + virtual bool is_array() const override + { + return true; + } + + using size_type = std::size_t; + + /** + * arrays can be iterated over + */ + using iterator = std::vector<std::shared_ptr<base>>::iterator; + + /** + * arrays can be iterated over. Const version. + */ + using const_iterator = std::vector<std::shared_ptr<base>>::const_iterator; + + iterator begin() + { + return values_.begin(); + } + + const_iterator begin() const + { + return values_.begin(); + } + + iterator end() + { + return values_.end(); + } + + const_iterator end() const + { + return values_.end(); + } + + /** + * Obtains the array (vector) of base values. + */ + std::vector<std::shared_ptr<base>>& get() + { + return values_; + } + + /** + * Obtains the array (vector) of base values. Const version. + */ + const std::vector<std::shared_ptr<base>>& get() const + { + return values_; + } + + std::shared_ptr<base> at(size_t idx) const + { + return values_.at(idx); + } + + /** + * Obtains an array of value<T>s. Note that elements may be + * nullptr if they cannot be converted to a value<T>. + */ + template <class T> + std::vector<std::shared_ptr<value<T>>> array_of() const + { + std::vector<std::shared_ptr<value<T>>> result(values_.size()); + + std::transform(values_.begin(), values_.end(), result.begin(), + [&](std::shared_ptr<base> v) { return v->as<T>(); }); + + return result; + } + + /** + * Obtains a option<vector<T>>. The option will be empty if the array + * contains values that are not of type T. + */ + template <class T> + inline typename array_of_trait<T>::return_type get_array_of() const + { + std::vector<T> result; + result.reserve(values_.size()); + + for (const auto& val : values_) + { + if (auto v = val->as<T>()) + result.push_back(v->get()); + else + return {}; + } + + return {std::move(result)}; + } + + /** + * Obtains an array of arrays. Note that elements may be nullptr + * if they cannot be converted to a array. + */ + std::vector<std::shared_ptr<array>> nested_array() const + { + std::vector<std::shared_ptr<array>> result(values_.size()); + + std::transform(values_.begin(), values_.end(), result.begin(), + [&](std::shared_ptr<base> v) -> std::shared_ptr<array> { + if (v->is_array()) + return std::static_pointer_cast<array>(v); + return std::shared_ptr<array>{}; + }); + + return result; + } + + /** + * Add a value to the end of the array + */ + template <class T> + void push_back(const std::shared_ptr<value<T>>& val) + { + if (values_.empty() || values_[0]->as<T>()) + { + values_.push_back(val); + } + else + { + throw array_exception{"Arrays must be homogenous."}; + } + } + + /** + * Add an array to the end of the array + */ + void push_back(const std::shared_ptr<array>& val) + { + if (values_.empty() || values_[0]->is_array()) + { + values_.push_back(val); + } + else + { + throw array_exception{"Arrays must be homogenous."}; + } + } + + /** + * Convenience function for adding a simple element to the end + * of the array. + */ + template <class T> + void push_back(T&& val, typename value_traits<T>::type* = 0) + { + push_back(make_value(std::forward<T>(val))); + } + + /** + * Insert a value into the array + */ + template <class T> + iterator insert(iterator position, const std::shared_ptr<value<T>>& value) + { + if (values_.empty() || values_[0]->as<T>()) + { + return values_.insert(position, value); + } + else + { + throw array_exception{"Arrays must be homogenous."}; + } + } + + /** + * Insert an array into the array + */ + iterator insert(iterator position, const std::shared_ptr<array>& value) + { + if (values_.empty() || values_[0]->is_array()) + { + return values_.insert(position, value); + } + else + { + throw array_exception{"Arrays must be homogenous."}; + } + } + + /** + * Convenience function for inserting a simple element in the array + */ + template <class T> + iterator insert(iterator position, T&& val, + typename value_traits<T>::type* = 0) + { + return insert(position, make_value(std::forward<T>(val))); + } + + /** + * Erase an element from the array + */ + iterator erase(iterator position) + { + return values_.erase(position); + } + + /** + * Clear the array + */ + void clear() + { + values_.clear(); + } + + /** + * Reserve space for n values. + */ + void reserve(size_type n) + { + values_.reserve(n); + } + + private: +#if defined(CPPTOML_NO_RTTI) + array() : base(base_type::ARRAY) + { + // empty + } +#else + array() = default; +#endif + + template <class InputIterator> + array(InputIterator begin, InputIterator end) : values_{begin, end} + { + // nothing + } + + array(const array& obj) = delete; + array& operator=(const array& obj) = delete; + + std::vector<std::shared_ptr<base>> values_; +}; + +inline std::shared_ptr<array> make_array() +{ + struct make_shared_enabler : public array + { + make_shared_enabler() + { + // nothing + } + }; + + return std::make_shared<make_shared_enabler>(); +} + +namespace detail +{ +template <> +inline std::shared_ptr<array> make_element<array>() +{ + return make_array(); +} +} // namespace detail + +/** + * Obtains a option<vector<T>>. The option will be empty if the array + * contains values that are not of type T. + */ +template <> +inline typename array_of_trait<array>::return_type +array::get_array_of<array>() const +{ + std::vector<std::shared_ptr<array>> result; + result.reserve(values_.size()); + + for (const auto& val : values_) + { + if (auto v = val->as_array()) + result.push_back(v); + else + return {}; + } + + return {std::move(result)}; +} + +class table; + +class table_array : public base +{ + friend class table; + friend std::shared_ptr<table_array> make_table_array(bool); + + public: + std::shared_ptr<base> clone() const override; + + using size_type = std::size_t; + + /** + * arrays can be iterated over + */ + using iterator = std::vector<std::shared_ptr<table>>::iterator; + + /** + * arrays can be iterated over. Const version. + */ + using const_iterator = std::vector<std::shared_ptr<table>>::const_iterator; + + iterator begin() + { + return array_.begin(); + } + + const_iterator begin() const + { + return array_.begin(); + } + + iterator end() + { + return array_.end(); + } + + const_iterator end() const + { + return array_.end(); + } + + virtual bool is_table_array() const override + { + return true; + } + + std::vector<std::shared_ptr<table>>& get() + { + return array_; + } + + const std::vector<std::shared_ptr<table>>& get() const + { + return array_; + } + + /** + * Add a table to the end of the array + */ + void push_back(const std::shared_ptr<table>& val) + { + array_.push_back(val); + } + + /** + * Insert a table into the array + */ + iterator insert(iterator position, const std::shared_ptr<table>& value) + { + return array_.insert(position, value); + } + + /** + * Erase an element from the array + */ + iterator erase(iterator position) + { + return array_.erase(position); + } + + /** + * Clear the array + */ + void clear() + { + array_.clear(); + } + + /** + * Reserve space for n tables. + */ + void reserve(size_type n) + { + array_.reserve(n); + } + + /** + * Whether or not the table array is declared inline. This mostly + * matters for parsing, where statically defined arrays cannot be + * appended to using the array-of-table syntax. + */ + bool is_inline() const + { + return is_inline_; + } + + private: +#if defined(CPPTOML_NO_RTTI) + table_array(bool is_inline = false) + : base(base_type::TABLE_ARRAY), is_inline_(is_inline) + { + // nothing + } +#else + table_array(bool is_inline = false) : is_inline_(is_inline) + { + // nothing + } +#endif + + table_array(const table_array& obj) = delete; + table_array& operator=(const table_array& rhs) = delete; + + std::vector<std::shared_ptr<table>> array_; + const bool is_inline_ = false; +}; + +inline std::shared_ptr<table_array> make_table_array(bool is_inline) +{ + struct make_shared_enabler : public table_array + { + make_shared_enabler(bool mse_is_inline) : table_array(mse_is_inline) + { + // nothing + } + }; + + return std::make_shared<make_shared_enabler>(is_inline); +} + +namespace detail +{ +template <> +inline std::shared_ptr<table_array> make_element<table_array>() +{ + return make_table_array(true); +} +} // namespace detail + +// The below are overloads for fetching specific value types out of a value +// where special casting behavior (like bounds checking) is desired + +template <class T> +typename std::enable_if<!std::is_floating_point<T>::value + && std::is_signed<T>::value, + option<T>>::type +get_impl(const std::shared_ptr<base>& elem) +{ + if (auto v = elem->as<int64_t>()) + { + if (v->get() < (std::numeric_limits<T>::min)()) + throw std::underflow_error{ + "T cannot represent the value requested in get"}; + + if (v->get() > (std::numeric_limits<T>::max)()) + throw std::overflow_error{ + "T cannot represent the value requested in get"}; + + return {static_cast<T>(v->get())}; + } + else + { + return {}; + } +} + +template <class T> +typename std::enable_if<!std::is_same<T, bool>::value + && std::is_unsigned<T>::value, + option<T>>::type +get_impl(const std::shared_ptr<base>& elem) +{ + if (auto v = elem->as<int64_t>()) + { + if (v->get() < 0) + throw std::underflow_error{"T cannot store negative value in get"}; + + if (static_cast<uint64_t>(v->get()) > (std::numeric_limits<T>::max)()) + throw std::overflow_error{ + "T cannot represent the value requested in get"}; + + return {static_cast<T>(v->get())}; + } + else + { + return {}; + } +} + +template <class T> +typename std::enable_if<!std::is_integral<T>::value + || std::is_same<T, bool>::value, + option<T>>::type +get_impl(const std::shared_ptr<base>& elem) +{ + if (auto v = elem->as<T>()) + { + return {v->get()}; + } + else + { + return {}; + } +} + +/** + * Represents a TOML keytable. + */ +class table : public base +{ + public: + friend class table_array; + friend std::shared_ptr<table> make_table(); + + std::shared_ptr<base> clone() const override; + + /** + * tables can be iterated over. + */ + using iterator = string_to_base_map::iterator; + + /** + * tables can be iterated over. Const version. + */ + using const_iterator = string_to_base_map::const_iterator; + + iterator begin() + { + return map_.begin(); + } + + const_iterator begin() const + { + return map_.begin(); + } + + iterator end() + { + return map_.end(); + } + + const_iterator end() const + { + return map_.end(); + } + + bool is_table() const override + { + return true; + } + + bool empty() const + { + return map_.empty(); + } + + /** + * Determines if this key table contains the given key. + */ + bool contains(const std::string& key) const + { + return map_.find(key) != map_.end(); + } + + /** + * Determines if this key table contains the given key. Will + * resolve "qualified keys". Qualified keys are the full access + * path separated with dots like "grandparent.parent.child". + */ + bool contains_qualified(const std::string& key) const + { + return resolve_qualified(key); + } + + /** + * Obtains the base for a given key. + * @throw std::out_of_range if the key does not exist + */ + std::shared_ptr<base> get(const std::string& key) const + { + return map_.at(key); + } + + /** + * Obtains the base for a given key. Will resolve "qualified + * keys". Qualified keys are the full access path separated with + * dots like "grandparent.parent.child". + * + * @throw std::out_of_range if the key does not exist + */ + std::shared_ptr<base> get_qualified(const std::string& key) const + { + std::shared_ptr<base> p; + resolve_qualified(key, &p); + return p; + } + + /** + * Obtains a table for a given key, if possible. + */ + std::shared_ptr<table> get_table(const std::string& key) const + { + if (contains(key) && get(key)->is_table()) + return std::static_pointer_cast<table>(get(key)); + return nullptr; + } + + /** + * Obtains a table for a given key, if possible. Will resolve + * "qualified keys". + */ + std::shared_ptr<table> get_table_qualified(const std::string& key) const + { + if (contains_qualified(key) && get_qualified(key)->is_table()) + return std::static_pointer_cast<table>(get_qualified(key)); + return nullptr; + } + + /** + * Obtains an array for a given key. + */ + std::shared_ptr<array> get_array(const std::string& key) const + { + if (!contains(key)) + return nullptr; + return get(key)->as_array(); + } + + /** + * Obtains an array for a given key. Will resolve "qualified keys". + */ + std::shared_ptr<array> get_array_qualified(const std::string& key) const + { + if (!contains_qualified(key)) + return nullptr; + return get_qualified(key)->as_array(); + } + + /** + * Obtains a table_array for a given key, if possible. + */ + std::shared_ptr<table_array> get_table_array(const std::string& key) const + { + if (!contains(key)) + return nullptr; + return get(key)->as_table_array(); + } + + /** + * Obtains a table_array for a given key, if possible. Will resolve + * "qualified keys". + */ + std::shared_ptr<table_array> + get_table_array_qualified(const std::string& key) const + { + if (!contains_qualified(key)) + return nullptr; + return get_qualified(key)->as_table_array(); + } + + /** + * Helper function that attempts to get a value corresponding + * to the template parameter from a given key. + */ + template <class T> + option<T> get_as(const std::string& key) const + { + try + { + return get_impl<T>(get(key)); + } + catch (const std::out_of_range&) + { + return {}; + } + } + + /** + * Helper function that attempts to get a value corresponding + * to the template parameter from a given key. Will resolve "qualified + * keys". + */ + template <class T> + option<T> get_qualified_as(const std::string& key) const + { + try + { + return get_impl<T>(get_qualified(key)); + } + catch (const std::out_of_range&) + { + return {}; + } + } + + /** + * Helper function that attempts to get an array of values of a given + * type corresponding to the template parameter for a given key. + * + * If the key doesn't exist, doesn't exist as an array type, or one or + * more keys inside the array type are not of type T, an empty option + * is returned. Otherwise, an option containing a vector of the values + * is returned. + */ + template <class T> + inline typename array_of_trait<T>::return_type + get_array_of(const std::string& key) const + { + if (auto v = get_array(key)) + { + std::vector<T> result; + result.reserve(v->get().size()); + + for (const auto& b : v->get()) + { + if (auto val = b->as<T>()) + result.push_back(val->get()); + else + return {}; + } + return {std::move(result)}; + } + + return {}; + } + + /** + * Helper function that attempts to get an array of values of a given + * type corresponding to the template parameter for a given key. Will + * resolve "qualified keys". + * + * If the key doesn't exist, doesn't exist as an array type, or one or + * more keys inside the array type are not of type T, an empty option + * is returned. Otherwise, an option containing a vector of the values + * is returned. + */ + template <class T> + inline typename array_of_trait<T>::return_type + get_qualified_array_of(const std::string& key) const + { + if (auto v = get_array_qualified(key)) + { + std::vector<T> result; + result.reserve(v->get().size()); + + for (const auto& b : v->get()) + { + if (auto val = b->as<T>()) + result.push_back(val->get()); + else + return {}; + } + return {std::move(result)}; + } + + return {}; + } + + /** + * Adds an element to the keytable. + */ + void insert(const std::string& key, const std::shared_ptr<base>& value) + { + map_[key] = value; + } + + /** + * Convenience shorthand for adding a simple element to the + * keytable. + */ + template <class T> + void insert(const std::string& key, T&& val, + typename value_traits<T>::type* = 0) + { + insert(key, make_value(std::forward<T>(val))); + } + + /** + * Removes an element from the table. + */ + void erase(const std::string& key) + { + map_.erase(key); + } + + private: +#if defined(CPPTOML_NO_RTTI) + table() : base(base_type::TABLE) + { + // nothing + } +#else + table() + { + // nothing + } +#endif + + table(const table& obj) = delete; + table& operator=(const table& rhs) = delete; + + std::vector<std::string> split(const std::string& value, + char separator) const + { + std::vector<std::string> result; + std::string::size_type p = 0; + std::string::size_type q; + while ((q = value.find(separator, p)) != std::string::npos) + { + result.emplace_back(value, p, q - p); + p = q + 1; + } + result.emplace_back(value, p); + return result; + } + + // If output parameter p is specified, fill it with the pointer to the + // specified entry and throw std::out_of_range if it couldn't be found. + // + // Otherwise, just return true if the entry could be found or false + // otherwise and do not throw. + bool resolve_qualified(const std::string& key, + std::shared_ptr<base>* p = nullptr) const + { + auto parts = split(key, '.'); + auto last_key = parts.back(); + parts.pop_back(); + + auto cur_table = this; + for (const auto& part : parts) + { + cur_table = cur_table->get_table(part).get(); + if (!cur_table) + { + if (!p) + return false; + + throw std::out_of_range{key + " is not a valid key"}; + } + } + + if (!p) + return cur_table->map_.count(last_key) != 0; + + *p = cur_table->map_.at(last_key); + return true; + } + + string_to_base_map map_; +}; + +/** + * Helper function that attempts to get an array of arrays for a given + * key. + * + * If the key doesn't exist, doesn't exist as an array type, or one or + * more keys inside the array type are not of type T, an empty option + * is returned. Otherwise, an option containing a vector of the values + * is returned. + */ +template <> +inline typename array_of_trait<array>::return_type +table::get_array_of<array>(const std::string& key) const +{ + if (auto v = get_array(key)) + { + std::vector<std::shared_ptr<array>> result; + result.reserve(v->get().size()); + + for (const auto& b : v->get()) + { + if (auto val = b->as_array()) + result.push_back(val); + else + return {}; + } + + return {std::move(result)}; + } + + return {}; +} + +/** + * Helper function that attempts to get an array of arrays for a given + * key. Will resolve "qualified keys". + * + * If the key doesn't exist, doesn't exist as an array type, or one or + * more keys inside the array type are not of type T, an empty option + * is returned. Otherwise, an option containing a vector of the values + * is returned. + */ +template <> +inline typename array_of_trait<array>::return_type +table::get_qualified_array_of<array>(const std::string& key) const +{ + if (auto v = get_array_qualified(key)) + { + std::vector<std::shared_ptr<array>> result; + result.reserve(v->get().size()); + + for (const auto& b : v->get()) + { + if (auto val = b->as_array()) + result.push_back(val); + else + return {}; + } + + return {std::move(result)}; + } + + return {}; +} + +std::shared_ptr<table> make_table() +{ + struct make_shared_enabler : public table + { + make_shared_enabler() + { + // nothing + } + }; + + return std::make_shared<make_shared_enabler>(); +} + +namespace detail +{ +template <> +inline std::shared_ptr<table> make_element<table>() +{ + return make_table(); +} +} // namespace detail + +template <class T> +std::shared_ptr<base> value<T>::clone() const +{ + return make_value(data_); +} + +inline std::shared_ptr<base> array::clone() const +{ + auto result = make_array(); + result->reserve(values_.size()); + for (const auto& ptr : values_) + result->values_.push_back(ptr->clone()); + return result; +} + +inline std::shared_ptr<base> table_array::clone() const +{ + auto result = make_table_array(is_inline()); + result->reserve(array_.size()); + for (const auto& ptr : array_) + result->array_.push_back(ptr->clone()->as_table()); + return result; +} + +inline std::shared_ptr<base> table::clone() const +{ + auto result = make_table(); + for (const auto& pr : map_) + result->insert(pr.first, pr.second->clone()); + return result; +} + +/** + * Exception class for all TOML parsing errors. + */ +class parse_exception : public std::runtime_error +{ + public: + parse_exception(const std::string& err) : std::runtime_error{err} + { + } + + parse_exception(const std::string& err, std::size_t line_number) + : std::runtime_error{err + " at line " + std::to_string(line_number)} + { + } +}; + +inline bool is_number(char c) +{ + return c >= '0' && c <= '9'; +} + +inline bool is_hex(char c) +{ + return is_number(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} + +/** + * Helper object for consuming expected characters. + */ +template <class OnError> +class consumer +{ + public: + consumer(std::string::iterator& it, const std::string::iterator& end, + OnError&& on_error) + : it_(it), end_(end), on_error_(std::forward<OnError>(on_error)) + { + // nothing + } + + void operator()(char c) + { + if (it_ == end_ || *it_ != c) + on_error_(); + ++it_; + } + + template <std::size_t N> + void operator()(const char (&str)[N]) + { + std::for_each(std::begin(str), std::end(str) - 1, + [&](char c) { (*this)(c); }); + } + + void eat_or(char a, char b) + { + if (it_ == end_ || (*it_ != a && *it_ != b)) + on_error_(); + ++it_; + } + + int eat_digits(int len) + { + int val = 0; + for (int i = 0; i < len; ++i) + { + if (!is_number(*it_) || it_ == end_) + on_error_(); + val = 10 * val + (*it_++ - '0'); + } + return val; + } + + void error() + { + on_error_(); + } + + private: + std::string::iterator& it_; + const std::string::iterator& end_; + OnError on_error_; +}; + +template <class OnError> +consumer<OnError> make_consumer(std::string::iterator& it, + const std::string::iterator& end, + OnError&& on_error) +{ + return consumer<OnError>(it, end, std::forward<OnError>(on_error)); +} + +// replacement for std::getline to handle incorrectly line-ended files +// https://stackoverflow.com/questions/6089231/getting-std-ifstream-to-handle-lf-cr-and-crlf +namespace detail +{ +inline std::istream& getline(std::istream& input, std::string& line) +{ + line.clear(); + + std::istream::sentry sentry{input, true}; + auto sb = input.rdbuf(); + + while (true) + { + auto c = sb->sbumpc(); + if (c == '\r') + { + if (sb->sgetc() == '\n') + c = sb->sbumpc(); + } + + if (c == '\n') + return input; + + if (c == std::istream::traits_type::eof()) + { + if (line.empty()) + input.setstate(std::ios::eofbit); + return input; + } + + line.push_back(static_cast<char>(c)); + } +} +} // namespace detail + +/** + * The parser class. + */ +class parser +{ + public: + /** + * Parsers are constructed from streams. + */ + parser(std::istream& stream) : input_(stream) + { + // nothing + } + + parser& operator=(const parser& parser) = delete; + + /** + * Parses the stream this parser was created on until EOF. + * @throw parse_exception if there are errors in parsing + */ + std::shared_ptr<table> parse() + { + std::shared_ptr<table> root = make_table(); + + table* curr_table = root.get(); + + while (detail::getline(input_, line_)) + { + line_number_++; + auto it = line_.begin(); + auto end = line_.end(); + consume_whitespace(it, end); + if (it == end || *it == '#') + continue; + if (*it == '[') + { + curr_table = root.get(); + parse_table(it, end, curr_table); + } + else + { + parse_key_value(it, end, curr_table); + consume_whitespace(it, end); + eol_or_comment(it, end); + } + } + return root; + } + + private: +#if defined _MSC_VER + __declspec(noreturn) +#elif defined __GNUC__ + __attribute__((noreturn)) +#endif + void throw_parse_exception(const std::string& err) + { + throw parse_exception{err, line_number_}; + } + + void parse_table(std::string::iterator& it, + const std::string::iterator& end, table*& curr_table) + { + // remove the beginning keytable marker + ++it; + if (it == end) + throw_parse_exception("Unexpected end of table"); + if (*it == '[') + parse_table_array(it, end, curr_table); + else + parse_single_table(it, end, curr_table); + } + + void parse_single_table(std::string::iterator& it, + const std::string::iterator& end, + table*& curr_table) + { + if (it == end || *it == ']') + throw_parse_exception("Table name cannot be empty"); + + std::string full_table_name; + bool inserted = false; + + auto key_end = [](char c) { return c == ']'; }; + + auto key_part_handler = [&](const std::string& part) { + if (part.empty()) + throw_parse_exception("Empty component of table name"); + + if (!full_table_name.empty()) + full_table_name += '.'; + full_table_name += part; + + if (curr_table->contains(part)) + { +#if !defined(__PGI) + auto b = curr_table->get(part); +#else + // Workaround for PGI compiler + std::shared_ptr<base> b = curr_table->get(part); +#endif + if (b->is_table()) + curr_table = static_cast<table*>(b.get()); + else if (b->is_table_array()) + curr_table = std::static_pointer_cast<table_array>(b) + ->get() + .back() + .get(); + else + throw_parse_exception("Key " + full_table_name + + "already exists as a value"); + } + else + { + inserted = true; + curr_table->insert(part, make_table()); + curr_table = static_cast<table*>(curr_table->get(part).get()); + } + }; + + key_part_handler(parse_key(it, end, key_end, key_part_handler)); + + if (it == end) + throw_parse_exception( + "Unterminated table declaration; did you forget a ']'?"); + + if (*it != ']') + { + std::string errmsg{"Unexpected character in table definition: "}; + errmsg += '"'; + errmsg += *it; + errmsg += '"'; + throw_parse_exception(errmsg); + } + + // table already existed + if (!inserted) + { + auto is_value + = [](const std::pair<const std::string&, + const std::shared_ptr<base>&>& p) { + return p.second->is_value(); + }; + + // if there are any values, we can't add values to this table + // since it has already been defined. If there aren't any + // values, then it was implicitly created by something like + // [a.b] + if (curr_table->empty() + || std::any_of(curr_table->begin(), curr_table->end(), + is_value)) + { + throw_parse_exception("Redefinition of table " + + full_table_name); + } + } + + ++it; + consume_whitespace(it, end); + eol_or_comment(it, end); + } + + void parse_table_array(std::string::iterator& it, + const std::string::iterator& end, table*& curr_table) + { + ++it; + if (it == end || *it == ']') + throw_parse_exception("Table array name cannot be empty"); + + auto key_end = [](char c) { return c == ']'; }; + + std::string full_ta_name; + auto key_part_handler = [&](const std::string& part) { + if (part.empty()) + throw_parse_exception("Empty component of table array name"); + + if (!full_ta_name.empty()) + full_ta_name += '.'; + full_ta_name += part; + + if (curr_table->contains(part)) + { +#if !defined(__PGI) + auto b = curr_table->get(part); +#else + // Workaround for PGI compiler + std::shared_ptr<base> b = curr_table->get(part); +#endif + + // if this is the end of the table array name, add an + // element to the table array that we just looked up, + // provided it was not declared inline + if (it != end && *it == ']') + { + if (!b->is_table_array()) + { + throw_parse_exception("Key " + full_ta_name + + " is not a table array"); + } + + auto v = b->as_table_array(); + + if (v->is_inline()) + { + throw_parse_exception("Static array " + full_ta_name + + " cannot be appended to"); + } + + v->get().push_back(make_table()); + curr_table = v->get().back().get(); + } + // otherwise, just keep traversing down the key name + else + { + if (b->is_table()) + curr_table = static_cast<table*>(b.get()); + else if (b->is_table_array()) + curr_table = std::static_pointer_cast<table_array>(b) + ->get() + .back() + .get(); + else + throw_parse_exception("Key " + full_ta_name + + " already exists as a value"); + } + } + else + { + // if this is the end of the table array name, add a new + // table array and a new table inside that array for us to + // add keys to next + if (it != end && *it == ']') + { + curr_table->insert(part, make_table_array()); + auto arr = std::static_pointer_cast<table_array>( + curr_table->get(part)); + arr->get().push_back(make_table()); + curr_table = arr->get().back().get(); + } + // otherwise, create the implicitly defined table and move + // down to it + else + { + curr_table->insert(part, make_table()); + curr_table + = static_cast<table*>(curr_table->get(part).get()); + } + } + }; + + key_part_handler(parse_key(it, end, key_end, key_part_handler)); + + // consume the last "]]" + auto eat = make_consumer(it, end, [this]() { + throw_parse_exception("Unterminated table array name"); + }); + eat(']'); + eat(']'); + + consume_whitespace(it, end); + eol_or_comment(it, end); + } + + void parse_key_value(std::string::iterator& it, std::string::iterator& end, + table* curr_table) + { + auto key_end = [](char c) { return c == '='; }; + + auto key_part_handler = [&](const std::string& part) { + // two cases: this key part exists already, in which case it must + // be a table, or it doesn't exist in which case we must create + // an implicitly defined table + if (curr_table->contains(part)) + { + auto val = curr_table->get(part); + if (val->is_table()) + { + curr_table = static_cast<table*>(val.get()); + } + else + { + throw_parse_exception("Key " + part + + " already exists as a value"); + } + } + else + { + auto newtable = make_table(); + curr_table->insert(part, newtable); + curr_table = newtable.get(); + } + }; + + auto key = parse_key(it, end, key_end, key_part_handler); + + if (curr_table->contains(key)) + throw_parse_exception("Key " + key + " already present"); + if (it == end || *it != '=') + throw_parse_exception("Value must follow after a '='"); + ++it; + consume_whitespace(it, end); + curr_table->insert(key, parse_value(it, end)); + consume_whitespace(it, end); + } + + template <class KeyEndFinder, class KeyPartHandler> + std::string + parse_key(std::string::iterator& it, const std::string::iterator& end, + KeyEndFinder&& key_end, KeyPartHandler&& key_part_handler) + { + // parse the key as a series of one or more simple-keys joined with '.' + while (it != end && !key_end(*it)) + { + auto part = parse_simple_key(it, end); + consume_whitespace(it, end); + + if (it == end || key_end(*it)) + { + return part; + } + + if (*it != '.') + { + std::string errmsg{"Unexpected character in key: "}; + errmsg += '"'; + errmsg += *it; + errmsg += '"'; + throw_parse_exception(errmsg); + } + + key_part_handler(part); + + // consume the dot + ++it; + } + + throw_parse_exception("Unexpected end of key"); + } + + std::string parse_simple_key(std::string::iterator& it, + const std::string::iterator& end) + { + consume_whitespace(it, end); + + if (it == end) + throw_parse_exception("Unexpected end of key (blank key?)"); + + if (*it == '"' || *it == '\'') + { + return string_literal(it, end, *it); + } + else + { + auto bke = std::find_if(it, end, [](char c) { + return c == '.' || c == '=' || c == ']'; + }); + return parse_bare_key(it, bke); + } + } + + std::string parse_bare_key(std::string::iterator& it, + const std::string::iterator& end) + { + if (it == end) + { + throw_parse_exception("Bare key missing name"); + } + + auto key_end = end; + --key_end; + consume_backwards_whitespace(key_end, it); + ++key_end; + std::string key{it, key_end}; + + if (std::find(it, key_end, '#') != key_end) + { + throw_parse_exception("Bare key " + key + " cannot contain #"); + } + + if (std::find_if(it, key_end, + [](char c) { return c == ' ' || c == '\t'; }) + != key_end) + { + throw_parse_exception("Bare key " + key + + " cannot contain whitespace"); + } + + if (std::find_if(it, key_end, + [](char c) { return c == '[' || c == ']'; }) + != key_end) + { + throw_parse_exception("Bare key " + key + + " cannot contain '[' or ']'"); + } + + it = end; + return key; + } + + enum class parse_type + { + STRING = 1, + LOCAL_TIME, + LOCAL_DATE, + LOCAL_DATETIME, + OFFSET_DATETIME, + INT, + FLOAT, + BOOL, + ARRAY, + INLINE_TABLE + }; + + std::shared_ptr<base> parse_value(std::string::iterator& it, + std::string::iterator& end) + { + parse_type type = determine_value_type(it, end); + switch (type) + { + case parse_type::STRING: + return parse_string(it, end); + case parse_type::LOCAL_TIME: + return parse_time(it, end); + case parse_type::LOCAL_DATE: + case parse_type::LOCAL_DATETIME: + case parse_type::OFFSET_DATETIME: + return parse_date(it, end); + case parse_type::INT: + case parse_type::FLOAT: + return parse_number(it, end); + case parse_type::BOOL: + return parse_bool(it, end); + case parse_type::ARRAY: + return parse_array(it, end); + case parse_type::INLINE_TABLE: + return parse_inline_table(it, end); + default: + throw_parse_exception("Failed to parse value"); + } + } + + parse_type determine_value_type(const std::string::iterator& it, + const std::string::iterator& end) + { + if (it == end) + { + throw_parse_exception("Failed to parse value type"); + } + if (*it == '"' || *it == '\'') + { + return parse_type::STRING; + } + else if (is_time(it, end)) + { + return parse_type::LOCAL_TIME; + } + else if (auto dtype = date_type(it, end)) + { + return *dtype; + } + else if (is_number(*it) || *it == '-' || *it == '+' + || (*it == 'i' && it + 1 != end && it[1] == 'n' + && it + 2 != end && it[2] == 'f') + || (*it == 'n' && it + 1 != end && it[1] == 'a' + && it + 2 != end && it[2] == 'n')) + { + return determine_number_type(it, end); + } + else if (*it == 't' || *it == 'f') + { + return parse_type::BOOL; + } + else if (*it == '[') + { + return parse_type::ARRAY; + } + else if (*it == '{') + { + return parse_type::INLINE_TABLE; + } + throw_parse_exception("Failed to parse value type"); + } + + parse_type determine_number_type(const std::string::iterator& it, + const std::string::iterator& end) + { + // determine if we are an integer or a float + auto check_it = it; + if (*check_it == '-' || *check_it == '+') + ++check_it; + + if (check_it == end) + throw_parse_exception("Malformed number"); + + if (*check_it == 'i' || *check_it == 'n') + return parse_type::FLOAT; + + while (check_it != end && is_number(*check_it)) + ++check_it; + if (check_it != end && *check_it == '.') + { + ++check_it; + while (check_it != end && is_number(*check_it)) + ++check_it; + return parse_type::FLOAT; + } + else + { + return parse_type::INT; + } + } + + std::shared_ptr<value<std::string>> parse_string(std::string::iterator& it, + std::string::iterator& end) + { + auto delim = *it; + assert(delim == '"' || delim == '\''); + + // end is non-const here because we have to be able to potentially + // parse multiple lines in a string, not just one + auto check_it = it; + ++check_it; + if (check_it != end && *check_it == delim) + { + ++check_it; + if (check_it != end && *check_it == delim) + { + it = ++check_it; + return parse_multiline_string(it, end, delim); + } + } + return make_value<std::string>(string_literal(it, end, delim)); + } + + std::shared_ptr<value<std::string>> + parse_multiline_string(std::string::iterator& it, + std::string::iterator& end, char delim) + { + std::stringstream ss; + + auto is_ws = [](char c) { return c == ' ' || c == '\t'; }; + + bool consuming = false; + std::shared_ptr<value<std::string>> ret; + + auto handle_line = [&](std::string::iterator& local_it, + std::string::iterator& local_end) { + if (consuming) + { + local_it = std::find_if_not(local_it, local_end, is_ws); + + // whole line is whitespace + if (local_it == local_end) + return; + } + + consuming = false; + + while (local_it != local_end) + { + // handle escaped characters + if (delim == '"' && *local_it == '\\') + { + auto check = local_it; + // check if this is an actual escape sequence or a + // whitespace escaping backslash + ++check; + consume_whitespace(check, local_end); + if (check == local_end) + { + consuming = true; + break; + } + + ss << parse_escape_code(local_it, local_end); + continue; + } + + // if we can end the string + if (std::distance(local_it, local_end) >= 3) + { + auto check = local_it; + // check for """ + if (*check++ == delim && *check++ == delim + && *check++ == delim) + { + local_it = check; + ret = make_value<std::string>(ss.str()); + break; + } + } + + ss << *local_it++; + } + }; + + // handle the remainder of the current line + handle_line(it, end); + if (ret) + return ret; + + // start eating lines + while (detail::getline(input_, line_)) + { + ++line_number_; + + it = line_.begin(); + end = line_.end(); + + handle_line(it, end); + + if (ret) + return ret; + + if (!consuming) + ss << std::endl; + } + + throw_parse_exception("Unterminated multi-line basic string"); + } + + std::string string_literal(std::string::iterator& it, + const std::string::iterator& end, char delim) + { + ++it; + std::string val; + while (it != end) + { + // handle escaped characters + if (delim == '"' && *it == '\\') + { + val += parse_escape_code(it, end); + } + else if (*it == delim) + { + ++it; + consume_whitespace(it, end); + return val; + } + else + { + val += *it++; + } + } + throw_parse_exception("Unterminated string literal"); + } + + std::string parse_escape_code(std::string::iterator& it, + const std::string::iterator& end) + { + ++it; + if (it == end) + throw_parse_exception("Invalid escape sequence"); + char value; + if (*it == 'b') + { + value = '\b'; + } + else if (*it == 't') + { + value = '\t'; + } + else if (*it == 'n') + { + value = '\n'; + } + else if (*it == 'f') + { + value = '\f'; + } + else if (*it == 'r') + { + value = '\r'; + } + else if (*it == '"') + { + value = '"'; + } + else if (*it == '\\') + { + value = '\\'; + } + else if (*it == 'u' || *it == 'U') + { + return parse_unicode(it, end); + } + else + { + throw_parse_exception("Invalid escape sequence"); + } + ++it; + return std::string(1, value); + } + + std::string parse_unicode(std::string::iterator& it, + const std::string::iterator& end) + { + bool large = *it++ == 'U'; + auto codepoint = parse_hex(it, end, large ? 0x10000000 : 0x1000); + + if ((codepoint > 0xd7ff && codepoint < 0xe000) || codepoint > 0x10ffff) + { + throw_parse_exception( + "Unicode escape sequence is not a Unicode scalar value"); + } + + std::string result; + // See Table 3-6 of the Unicode standard + if (codepoint <= 0x7f) + { + // 1-byte codepoints: 00000000 0xxxxxxx + // repr: 0xxxxxxx + result += static_cast<char>(codepoint & 0x7f); + } + else if (codepoint <= 0x7ff) + { + // 2-byte codepoints: 00000yyy yyxxxxxx + // repr: 110yyyyy 10xxxxxx + // + // 0x1f = 00011111 + // 0xc0 = 11000000 + // + result += static_cast<char>(0xc0 | ((codepoint >> 6) & 0x1f)); + // + // 0x80 = 10000000 + // 0x3f = 00111111 + // + result += static_cast<char>(0x80 | (codepoint & 0x3f)); + } + else if (codepoint <= 0xffff) + { + // 3-byte codepoints: zzzzyyyy yyxxxxxx + // repr: 1110zzzz 10yyyyyy 10xxxxxx + // + // 0xe0 = 11100000 + // 0x0f = 00001111 + // + result += static_cast<char>(0xe0 | ((codepoint >> 12) & 0x0f)); + result += static_cast<char>(0x80 | ((codepoint >> 6) & 0x1f)); + result += static_cast<char>(0x80 | (codepoint & 0x3f)); + } + else + { + // 4-byte codepoints: 000uuuuu zzzzyyyy yyxxxxxx + // repr: 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx + // + // 0xf0 = 11110000 + // 0x07 = 00000111 + // + result += static_cast<char>(0xf0 | ((codepoint >> 18) & 0x07)); + result += static_cast<char>(0x80 | ((codepoint >> 12) & 0x3f)); + result += static_cast<char>(0x80 | ((codepoint >> 6) & 0x3f)); + result += static_cast<char>(0x80 | (codepoint & 0x3f)); + } + return result; + } + + uint32_t parse_hex(std::string::iterator& it, + const std::string::iterator& end, uint32_t place) + { + uint32_t value = 0; + while (place > 0) + { + if (it == end) + throw_parse_exception("Unexpected end of unicode sequence"); + + if (!is_hex(*it)) + throw_parse_exception("Invalid unicode escape sequence"); + + value += place * hex_to_digit(*it++); + place /= 16; + } + return value; + } + + uint32_t hex_to_digit(char c) + { + if (is_number(c)) + return static_cast<uint32_t>(c - '0'); + return 10 + + static_cast<uint32_t>(c + - ((c >= 'a' && c <= 'f') ? 'a' : 'A')); + } + + std::shared_ptr<base> parse_number(std::string::iterator& it, + const std::string::iterator& end) + { + auto check_it = it; + auto check_end = find_end_of_number(it, end); + + auto eat_sign = [&]() { + if (check_it != end && (*check_it == '-' || *check_it == '+')) + ++check_it; + }; + + auto check_no_leading_zero = [&]() { + if (check_it != end && *check_it == '0' && check_it + 1 != check_end + && check_it[1] != '.') + { + throw_parse_exception("Numbers may not have leading zeros"); + } + }; + + auto eat_digits = [&](bool (*check_char)(char)) { + auto beg = check_it; + while (check_it != end && check_char(*check_it)) + { + ++check_it; + if (check_it != end && *check_it == '_') + { + ++check_it; + if (check_it == end || !check_char(*check_it)) + throw_parse_exception("Malformed number"); + } + } + + if (check_it == beg) + throw_parse_exception("Malformed number"); + }; + + auto eat_hex = [&]() { eat_digits(&is_hex); }; + + auto eat_numbers = [&]() { eat_digits(&is_number); }; + + if (check_it != end && *check_it == '0' && check_it + 1 != check_end + && (check_it[1] == 'x' || check_it[1] == 'o' || check_it[1] == 'b')) + { + ++check_it; + char base = *check_it; + ++check_it; + if (base == 'x') + { + eat_hex(); + return parse_int(it, check_it, 16); + } + else if (base == 'o') + { + auto start = check_it; + eat_numbers(); + auto val = parse_int(start, check_it, 8, "0"); + it = start; + return val; + } + else // if (base == 'b') + { + auto start = check_it; + eat_numbers(); + auto val = parse_int(start, check_it, 2); + it = start; + return val; + } + } + + eat_sign(); + check_no_leading_zero(); + + if (check_it != end && check_it + 1 != end && check_it + 2 != end) + { + if (check_it[0] == 'i' && check_it[1] == 'n' && check_it[2] == 'f') + { + auto val = std::numeric_limits<double>::infinity(); + if (*it == '-') + val = -val; + it = check_it + 3; + return make_value(val); + } + else if (check_it[0] == 'n' && check_it[1] == 'a' + && check_it[2] == 'n') + { + auto val = std::numeric_limits<double>::quiet_NaN(); + if (*it == '-') + val = -val; + it = check_it + 3; + return make_value(val); + } + } + + eat_numbers(); + + if (check_it != end + && (*check_it == '.' || *check_it == 'e' || *check_it == 'E')) + { + bool is_exp = *check_it == 'e' || *check_it == 'E'; + + ++check_it; + if (check_it == end) + throw_parse_exception("Floats must have trailing digits"); + + auto eat_exp = [&]() { + eat_sign(); + check_no_leading_zero(); + eat_numbers(); + }; + + if (is_exp) + eat_exp(); + else + eat_numbers(); + + if (!is_exp && check_it != end + && (*check_it == 'e' || *check_it == 'E')) + { + ++check_it; + eat_exp(); + } + + return parse_float(it, check_it); + } + else + { + return parse_int(it, check_it); + } + } + + std::shared_ptr<value<int64_t>> parse_int(std::string::iterator& it, + const std::string::iterator& end, + int base = 10, + const char* prefix = "") + { + std::string v{it, end}; + v = prefix + v; + v.erase(std::remove(v.begin(), v.end(), '_'), v.end()); + it = end; + try + { + return make_value<int64_t>(std::stoll(v, nullptr, base)); + } + catch (const std::invalid_argument& ex) + { + throw_parse_exception("Malformed number (invalid argument: " + + std::string{ex.what()} + ")"); + } + catch (const std::out_of_range& ex) + { + throw_parse_exception("Malformed number (out of range: " + + std::string{ex.what()} + ")"); + } + } + + std::shared_ptr<value<double>> parse_float(std::string::iterator& it, + const std::string::iterator& end) + { + std::string v{it, end}; + v.erase(std::remove(v.begin(), v.end(), '_'), v.end()); + it = end; + char decimal_point = std::localeconv()->decimal_point[0]; + std::replace(v.begin(), v.end(), '.', decimal_point); + try + { + return make_value<double>(std::stod(v)); + } + catch (const std::invalid_argument& ex) + { + throw_parse_exception("Malformed number (invalid argument: " + + std::string{ex.what()} + ")"); + } + catch (const std::out_of_range& ex) + { + throw_parse_exception("Malformed number (out of range: " + + std::string{ex.what()} + ")"); + } + } + + std::shared_ptr<value<bool>> parse_bool(std::string::iterator& it, + const std::string::iterator& end) + { + auto eat = make_consumer(it, end, [this]() { + throw_parse_exception("Attempted to parse invalid boolean value"); + }); + + if (*it == 't') + { + eat("true"); + return make_value<bool>(true); + } + else if (*it == 'f') + { + eat("false"); + return make_value<bool>(false); + } + + eat.error(); + return nullptr; + } + + std::string::iterator find_end_of_number(std::string::iterator it, + std::string::iterator end) + { + auto ret = std::find_if(it, end, [](char c) { + return !is_number(c) && c != '_' && c != '.' && c != 'e' && c != 'E' + && c != '-' && c != '+' && c != 'x' && c != 'o' && c != 'b'; + }); + if (ret != end && ret + 1 != end && ret + 2 != end) + { + if ((ret[0] == 'i' && ret[1] == 'n' && ret[2] == 'f') + || (ret[0] == 'n' && ret[1] == 'a' && ret[2] == 'n')) + { + ret = ret + 3; + } + } + return ret; + } + + std::string::iterator find_end_of_date(std::string::iterator it, + std::string::iterator end) + { + auto end_of_date = std::find_if(it, end, [](char c) { + return !is_number(c) && c != '-'; + }); + if (end_of_date != end && *end_of_date == ' ' && end_of_date + 1 != end + && is_number(end_of_date[1])) + end_of_date++; + return std::find_if(end_of_date, end, [](char c) { + return !is_number(c) && c != 'T' && c != 'Z' && c != ':' + && c != '-' && c != '+' && c != '.'; + }); + } + + std::string::iterator find_end_of_time(std::string::iterator it, + std::string::iterator end) + { + return std::find_if(it, end, [](char c) { + return !is_number(c) && c != ':' && c != '.'; + }); + } + + local_time read_time(std::string::iterator& it, + const std::string::iterator& end) + { + auto time_end = find_end_of_time(it, end); + + auto eat = make_consumer( + it, time_end, [&]() { throw_parse_exception("Malformed time"); }); + + local_time ltime; + + ltime.hour = eat.eat_digits(2); + eat(':'); + ltime.minute = eat.eat_digits(2); + eat(':'); + ltime.second = eat.eat_digits(2); + + int power = 100000; + if (it != time_end && *it == '.') + { + ++it; + while (it != time_end && is_number(*it)) + { + ltime.microsecond += power * (*it++ - '0'); + power /= 10; + } + } + + if (it != time_end) + throw_parse_exception("Malformed time"); + + return ltime; + } + + std::shared_ptr<value<local_time>> + parse_time(std::string::iterator& it, const std::string::iterator& end) + { + return make_value(read_time(it, end)); + } + + std::shared_ptr<base> parse_date(std::string::iterator& it, + const std::string::iterator& end) + { + auto date_end = find_end_of_date(it, end); + + auto eat = make_consumer( + it, date_end, [&]() { throw_parse_exception("Malformed date"); }); + + local_date ldate; + ldate.year = eat.eat_digits(4); + eat('-'); + ldate.month = eat.eat_digits(2); + eat('-'); + ldate.day = eat.eat_digits(2); + + if (it == date_end) + return make_value(ldate); + + eat.eat_or('T', ' '); + + local_datetime ldt; + static_cast<local_date&>(ldt) = ldate; + static_cast<local_time&>(ldt) = read_time(it, date_end); + + if (it == date_end) + return make_value(ldt); + + offset_datetime dt; + static_cast<local_datetime&>(dt) = ldt; + + int hoff = 0; + int moff = 0; + if (*it == '+' || *it == '-') + { + auto plus = *it == '+'; + ++it; + + hoff = eat.eat_digits(2); + dt.hour_offset = (plus) ? hoff : -hoff; + eat(':'); + moff = eat.eat_digits(2); + dt.minute_offset = (plus) ? moff : -moff; + } + else if (*it == 'Z') + { + ++it; + } + + if (it != date_end) + throw_parse_exception("Malformed date"); + + return make_value(dt); + } + + std::shared_ptr<base> parse_array(std::string::iterator& it, + std::string::iterator& end) + { + // this gets ugly because of the "homogeneity" restriction: + // arrays can either be of only one type, or contain arrays + // (each of those arrays could be of different types, though) + // + // because of the latter portion, we don't really have a choice + // but to represent them as arrays of base values... + ++it; + + // ugh---have to read the first value to determine array type... + skip_whitespace_and_comments(it, end); + + // edge case---empty array + if (*it == ']') + { + ++it; + return make_array(); + } + + auto val_end = std::find_if( + it, end, [](char c) { return c == ',' || c == ']' || c == '#'; }); + parse_type type = determine_value_type(it, val_end); + switch (type) + { + case parse_type::STRING: + return parse_value_array<std::string>(it, end); + case parse_type::LOCAL_TIME: + return parse_value_array<local_time>(it, end); + case parse_type::LOCAL_DATE: + return parse_value_array<local_date>(it, end); + case parse_type::LOCAL_DATETIME: + return parse_value_array<local_datetime>(it, end); + case parse_type::OFFSET_DATETIME: + return parse_value_array<offset_datetime>(it, end); + case parse_type::INT: + return parse_value_array<int64_t>(it, end); + case parse_type::FLOAT: + return parse_value_array<double>(it, end); + case parse_type::BOOL: + return parse_value_array<bool>(it, end); + case parse_type::ARRAY: + return parse_object_array<array>(&parser::parse_array, '[', it, + end); + case parse_type::INLINE_TABLE: + return parse_object_array<table_array>( + &parser::parse_inline_table, '{', it, end); + default: + throw_parse_exception("Unable to parse array"); + } + } + + template <class Value> + std::shared_ptr<array> parse_value_array(std::string::iterator& it, + std::string::iterator& end) + { + auto arr = make_array(); + while (it != end && *it != ']') + { + auto val = parse_value(it, end); + if (auto v = val->as<Value>()) + arr->get().push_back(val); + else + throw_parse_exception("Arrays must be homogeneous"); + skip_whitespace_and_comments(it, end); + if (*it != ',') + break; + ++it; + skip_whitespace_and_comments(it, end); + } + if (it != end) + ++it; + return arr; + } + + template <class Object, class Function> + std::shared_ptr<Object> parse_object_array(Function&& fun, char delim, + std::string::iterator& it, + std::string::iterator& end) + { + auto arr = detail::make_element<Object>(); + + while (it != end && *it != ']') + { + if (*it != delim) + throw_parse_exception("Unexpected character in array"); + + arr->get().push_back(((*this).*fun)(it, end)); + skip_whitespace_and_comments(it, end); + + if (it == end || *it != ',') + break; + + ++it; + skip_whitespace_and_comments(it, end); + } + + if (it == end || *it != ']') + throw_parse_exception("Unterminated array"); + + ++it; + return arr; + } + + std::shared_ptr<table> parse_inline_table(std::string::iterator& it, + std::string::iterator& end) + { + auto tbl = make_table(); + do + { + ++it; + if (it == end) + throw_parse_exception("Unterminated inline table"); + + consume_whitespace(it, end); + if (it != end && *it != '}') + { + parse_key_value(it, end, tbl.get()); + consume_whitespace(it, end); + } + } while (*it == ','); + + if (it == end || *it != '}') + throw_parse_exception("Unterminated inline table"); + + ++it; + consume_whitespace(it, end); + + return tbl; + } + + void skip_whitespace_and_comments(std::string::iterator& start, + std::string::iterator& end) + { + consume_whitespace(start, end); + while (start == end || *start == '#') + { + if (!detail::getline(input_, line_)) + throw_parse_exception("Unclosed array"); + line_number_++; + start = line_.begin(); + end = line_.end(); + consume_whitespace(start, end); + } + } + + void consume_whitespace(std::string::iterator& it, + const std::string::iterator& end) + { + while (it != end && (*it == ' ' || *it == '\t')) + ++it; + } + + void consume_backwards_whitespace(std::string::iterator& back, + const std::string::iterator& front) + { + while (back != front && (*back == ' ' || *back == '\t')) + --back; + } + + void eol_or_comment(const std::string::iterator& it, + const std::string::iterator& end) + { + if (it != end && *it != '#') + throw_parse_exception("Unidentified trailing character '" + + std::string{*it} + + "'---did you forget a '#'?"); + } + + bool is_time(const std::string::iterator& it, + const std::string::iterator& end) + { + auto time_end = find_end_of_time(it, end); + auto len = std::distance(it, time_end); + + if (len < 8) + return false; + + if (it[2] != ':' || it[5] != ':') + return false; + + if (len > 8) + return it[8] == '.' && len > 9; + + return true; + } + + option<parse_type> date_type(const std::string::iterator& it, + const std::string::iterator& end) + { + auto date_end = find_end_of_date(it, end); + auto len = std::distance(it, date_end); + + if (len < 10) + return {}; + + if (it[4] != '-' || it[7] != '-') + return {}; + + if (len >= 19 && (it[10] == 'T' || it[10] == ' ') + && is_time(it + 11, date_end)) + { + // datetime type + auto time_end = find_end_of_time(it + 11, date_end); + if (time_end == date_end) + return {parse_type::LOCAL_DATETIME}; + else + return {parse_type::OFFSET_DATETIME}; + } + else if (len == 10) + { + // just a regular date + return {parse_type::LOCAL_DATE}; + } + + return {}; + } + + std::istream& input_; + std::string line_; + std::size_t line_number_ = 0; +}; + +/** + * Utility function to parse a file as a TOML file. Returns the root table. + * Throws a parse_exception if the file cannot be opened. + */ +inline std::shared_ptr<table> parse_file(const std::string& filename) +{ +#if defined(BOOST_NOWIDE_FSTREAM_INCLUDED_HPP) + boost::nowide::ifstream file{filename.c_str()}; +#elif defined(NOWIDE_FSTREAM_INCLUDED_HPP) + nowide::ifstream file{filename.c_str()}; +#else + std::ifstream file{filename}; +#endif + if (!file.is_open()) + throw parse_exception{filename + " could not be opened for parsing"}; + parser p{file}; + return p.parse(); +} + +template <class... Ts> +struct value_accept; + +template <> +struct value_accept<> +{ + template <class Visitor, class... Args> + static void accept(const base&, Visitor&&, Args&&...) + { + // nothing + } +}; + +template <class T, class... Ts> +struct value_accept<T, Ts...> +{ + template <class Visitor, class... Args> + static void accept(const base& b, Visitor&& visitor, Args&&... args) + { + if (auto v = b.as<T>()) + { + visitor.visit(*v, std::forward<Args>(args)...); + } + else + { + value_accept<Ts...>::accept(b, std::forward<Visitor>(visitor), + std::forward<Args>(args)...); + } + } +}; + +/** + * base implementation of accept() that calls visitor.visit() on the concrete + * class. + */ +template <class Visitor, class... Args> +void base::accept(Visitor&& visitor, Args&&... args) const +{ + if (is_value()) + { + using value_acceptor + = value_accept<std::string, int64_t, double, bool, local_date, + local_time, local_datetime, offset_datetime>; + value_acceptor::accept(*this, std::forward<Visitor>(visitor), + std::forward<Args>(args)...); + } + else if (is_table()) + { + visitor.visit(static_cast<const table&>(*this), + std::forward<Args>(args)...); + } + else if (is_array()) + { + visitor.visit(static_cast<const array&>(*this), + std::forward<Args>(args)...); + } + else if (is_table_array()) + { + visitor.visit(static_cast<const table_array&>(*this), + std::forward<Args>(args)...); + } +} + +/** + * Writer that can be passed to accept() functions of cpptoml objects and + * will output valid TOML to a stream. + */ +class toml_writer +{ + public: + /** + * Construct a toml_writer that will write to the given stream + */ + toml_writer(std::ostream& s, const std::string& indent_space = "\t") + : stream_(s), indent_(indent_space), has_naked_endline_(false) + { + // nothing + } + + public: + /** + * Output a base value of the TOML tree. + */ + template <class T> + void visit(const value<T>& v, bool = false) + { + write(v); + } + + /** + * Output a table element of the TOML tree + */ + void visit(const table& t, bool in_array = false) + { + write_table_header(in_array); + std::vector<std::string> values; + std::vector<std::string> tables; + + for (const auto& i : t) + { + if (i.second->is_table() || i.second->is_table_array()) + { + tables.push_back(i.first); + } + else + { + values.push_back(i.first); + } + } + + for (unsigned int i = 0; i < values.size(); ++i) + { + path_.push_back(values[i]); + + if (i > 0) + endline(); + + write_table_item_header(*t.get(values[i])); + t.get(values[i])->accept(*this, false); + path_.pop_back(); + } + + for (unsigned int i = 0; i < tables.size(); ++i) + { + path_.push_back(tables[i]); + + if (values.size() > 0 || i > 0) + endline(); + + write_table_item_header(*t.get(tables[i])); + t.get(tables[i])->accept(*this, false); + path_.pop_back(); + } + + endline(); + } + + /** + * Output an array element of the TOML tree + */ + void visit(const array& a, bool = false) + { + write("["); + + for (unsigned int i = 0; i < a.get().size(); ++i) + { + if (i > 0) + write(", "); + + if (a.get()[i]->is_array()) + { + a.get()[i]->as_array()->accept(*this, true); + } + else + { + a.get()[i]->accept(*this, true); + } + } + + write("]"); + } + + /** + * Output a table_array element of the TOML tree + */ + void visit(const table_array& t, bool = false) + { + for (unsigned int j = 0; j < t.get().size(); ++j) + { + if (j > 0) + endline(); + + t.get()[j]->accept(*this, true); + } + + endline(); + } + + /** + * Escape a string for output. + */ + static std::string escape_string(const std::string& str) + { + std::string res; + for (auto it = str.begin(); it != str.end(); ++it) + { + if (*it == '\b') + { + res += "\\b"; + } + else if (*it == '\t') + { + res += "\\t"; + } + else if (*it == '\n') + { + res += "\\n"; + } + else if (*it == '\f') + { + res += "\\f"; + } + else if (*it == '\r') + { + res += "\\r"; + } + else if (*it == '"') + { + res += "\\\""; + } + else if (*it == '\\') + { + res += "\\\\"; + } + else if (static_cast<uint32_t>(*it) <= UINT32_C(0x001f)) + { + res += "\\u"; + std::stringstream ss; + ss << std::hex << static_cast<uint32_t>(*it); + res += ss.str(); + } + else + { + res += *it; + } + } + return res; + } + + protected: + /** + * Write out a string. + */ + void write(const value<std::string>& v) + { + write("\""); + write(escape_string(v.get())); + write("\""); + } + + /** + * Write out a double. + */ + void write(const value<double>& v) + { + std::stringstream ss; + ss << std::showpoint + << std::setprecision(std::numeric_limits<double>::max_digits10) + << v.get(); + + auto double_str = ss.str(); + auto pos = double_str.find("e0"); + if (pos != std::string::npos) + double_str.replace(pos, 2, "e"); + pos = double_str.find("e-0"); + if (pos != std::string::npos) + double_str.replace(pos, 3, "e-"); + + stream_ << double_str; + has_naked_endline_ = false; + } + + /** + * Write out an integer, local_date, local_time, local_datetime, or + * offset_datetime. + */ + template <class T> + typename std::enable_if< + is_one_of<T, int64_t, local_date, local_time, local_datetime, + offset_datetime>::value>::type + write(const value<T>& v) + { + write(v.get()); + } + + /** + * Write out a boolean. + */ + void write(const value<bool>& v) + { + write((v.get() ? "true" : "false")); + } + + /** + * Write out the header of a table. + */ + void write_table_header(bool in_array = false) + { + if (!path_.empty()) + { + indent(); + + write("["); + + if (in_array) + { + write("["); + } + + for (unsigned int i = 0; i < path_.size(); ++i) + { + if (i > 0) + { + write("."); + } + + if (path_[i].find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcde" + "fghijklmnopqrstuvwxyz0123456789" + "_-") + == std::string::npos) + { + write(path_[i]); + } + else + { + write("\""); + write(escape_string(path_[i])); + write("\""); + } + } + + if (in_array) + { + write("]"); + } + + write("]"); + endline(); + } + } + + /** + * Write out the identifier for an item in a table. + */ + void write_table_item_header(const base& b) + { + if (!b.is_table() && !b.is_table_array()) + { + indent(); + + if (path_.back().find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZabcde" + "fghijklmnopqrstuvwxyz0123456789" + "_-") + == std::string::npos) + { + write(path_.back()); + } + else + { + write("\""); + write(escape_string(path_.back())); + write("\""); + } + + write(" = "); + } + } + + private: + /** + * Indent the proper number of tabs given the size of + * the path. + */ + void indent() + { + for (std::size_t i = 1; i < path_.size(); ++i) + write(indent_); + } + + /** + * Write a value out to the stream. + */ + template <class T> + void write(const T& v) + { + stream_ << v; + has_naked_endline_ = false; + } + + /** + * Write an endline out to the stream + */ + void endline() + { + if (!has_naked_endline_) + { + stream_ << "\n"; + has_naked_endline_ = true; + } + } + + private: + std::ostream& stream_; + const std::string indent_; + std::vector<std::string> path_; + bool has_naked_endline_; +}; + +inline std::ostream& operator<<(std::ostream& stream, const base& b) +{ + toml_writer writer{stream}; + b.accept(writer); + return stream; +} + +template <class T> +std::ostream& operator<<(std::ostream& stream, const value<T>& v) +{ + toml_writer writer{stream}; + v.accept(writer); + return stream; +} + +inline std::ostream& operator<<(std::ostream& stream, const table& t) +{ + toml_writer writer{stream}; + t.accept(writer); + return stream; +} + +inline std::ostream& operator<<(std::ostream& stream, const table_array& t) +{ + toml_writer writer{stream}; + t.accept(writer); + return stream; +} + +inline std::ostream& operator<<(std::ostream& stream, const array& a) +{ + toml_writer writer{stream}; + a.accept(writer); + return stream; +} +} // namespace cpptoml +#endif // CPPTOML_H diff --git a/third_party/nix/src/libexpr/attr-path.cc b/third_party/nix/src/libexpr/attr-path.cc new file mode 100644 index 000000000000..1815b5e510d3 --- /dev/null +++ b/third_party/nix/src/libexpr/attr-path.cc @@ -0,0 +1,107 @@ +#include "attr-path.hh" + +#include "eval-inline.hh" +#include "util.hh" + +namespace nix { + +static Strings parseAttrPath(const std::string& s) { + Strings res; + std::string cur; + string::const_iterator i = s.begin(); + while (i != s.end()) { + if (*i == '.') { + res.push_back(cur); + cur.clear(); + } else if (*i == '"') { + ++i; + while (true) { + if (i == s.end()) { + throw Error(format("missing closing quote in selection path '%1%'") % + s); + } + if (*i == '"') { + break; + } + cur.push_back(*i++); + } + } else { + cur.push_back(*i); + } + ++i; + } + if (!cur.empty()) { + res.push_back(cur); + } + return res; +} + +Value* findAlongAttrPath(EvalState& state, const std::string& attrPath, + Bindings& autoArgs, Value& vIn) { + Strings tokens = parseAttrPath(attrPath); + + Error attrError = + Error(format("attribute selection path '%1%' does not match expression") % + attrPath); + + Value* v = &vIn; + + for (auto& attr : tokens) { + /* Is i an index (integer) or a normal attribute name? */ + enum { apAttr, apIndex } apType = apAttr; + unsigned int attrIndex; + if (string2Int(attr, attrIndex)) { + apType = apIndex; + } + + /* Evaluate the expression. */ + Value* vNew = state.allocValue(); + state.autoCallFunction(autoArgs, *v, *vNew); + v = vNew; + state.forceValue(*v); + + /* It should evaluate to either a set or an expression, + according to what is specified in the attrPath. */ + + if (apType == apAttr) { + if (v->type != tAttrs) { + throw TypeError(format("the expression selected by the selection path " + "'%1%' should be a set but is %2%") % + attrPath % showType(*v)); + } + + if (attr.empty()) { + throw Error(format("empty attribute name in selection path '%1%'") % + attrPath); + } + + Bindings::iterator a = v->attrs->find(state.symbols.Create(attr)); + if (a == v->attrs->end()) { + throw Error( + format("attribute '%1%' in selection path '%2%' not found") % attr % + attrPath); + } + v = &*(a->second).value; + } + + else if (apType == apIndex) { + if (!v->isList()) { + throw TypeError(format("the expression selected by the selection path " + "'%1%' should be a list but is %2%") % + attrPath % showType(*v)); + } + + if (attrIndex >= v->listSize()) { + throw Error( + format("list index %1% in selection path '%2%' is out of range") % + attrIndex % attrPath); + } + + v = v->listElems()[attrIndex]; + } + } + + return v; +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/attr-path.hh b/third_party/nix/src/libexpr/attr-path.hh new file mode 100644 index 000000000000..889c2158d79e --- /dev/null +++ b/third_party/nix/src/libexpr/attr-path.hh @@ -0,0 +1,13 @@ +#pragma once + +#include <map> +#include <string> + +#include "eval.hh" + +namespace nix { + +Value* findAlongAttrPath(EvalState& state, const std::string& attrPath, + Bindings& autoArgs, Value& vIn); + +} diff --git a/third_party/nix/src/libexpr/attr-set.cc b/third_party/nix/src/libexpr/attr-set.cc new file mode 100644 index 000000000000..e961156487a9 --- /dev/null +++ b/third_party/nix/src/libexpr/attr-set.cc @@ -0,0 +1,75 @@ +#include "attr-set.hh" + +#include <new> + +#include <absl/container/btree_map.h> +#include <gc/gc_cpp.h> + +#include "eval-inline.hh" + +namespace nix { + +// TODO: using insert_or_assign might break existing Nix code because +// of the weird ordering situation. Need to investigate. +void Bindings::push_back(const Attr& attr) { + attributes_.insert_or_assign(attr.name, attr); +} + +size_t Bindings::size() { return attributes_.size(); } + +size_t Bindings::capacity() { return 0; } + +bool Bindings::empty() { return attributes_.empty(); } + +std::vector<const Attr*> Bindings::lexicographicOrder() { + std::vector<const Attr*> res; + res.reserve(attributes_.size()); + + for (const auto& [key, value] : attributes_) { + res.emplace_back(&value); + } + + return res; +} + +Bindings::iterator Bindings::find(const Symbol& name) { + return attributes_.find(name); +} + +Bindings::iterator Bindings::begin() { return attributes_.begin(); } + +Bindings::iterator Bindings::end() { return attributes_.end(); } + +void Bindings::merge(Bindings* other) { + // We want the values from the other attribute set to take + // precedence, but .merge() works the other way around. + // + // To work around that, we merge and then swap. + other->attributes_.merge(attributes_); + attributes_.swap(other->attributes_); +} + +Bindings* Bindings::NewGC() { return new (GC) Bindings; } + +void EvalState::mkAttrs(Value& v, size_t capacity) { + if (capacity == 0) { + v = vEmptySet; + return; + } + clearValue(v); + v.type = tAttrs; + v.attrs = Bindings::NewGC(); + nrAttrsets++; + nrAttrsInAttrsets += capacity; +} + +/* Create a new attribute named 'name' on an existing attribute set stored + in 'vAttrs' and return the newly allocated Value which is associated with + this attribute. */ +Value* EvalState::allocAttr(Value& vAttrs, const Symbol& name) { + Value* v = allocValue(); + vAttrs.attrs->push_back(Attr(name, v)); + return v; +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/attr-set.hh b/third_party/nix/src/libexpr/attr-set.hh new file mode 100644 index 000000000000..caddc6bebd61 --- /dev/null +++ b/third_party/nix/src/libexpr/attr-set.hh @@ -0,0 +1,76 @@ +// This file implements the underlying structure of Nix attribute sets. +#pragma once + +#include <absl/container/btree_map.h> + +#include "nixexpr.hh" +#include "symbol-table.hh" +#include "types.hh" // TODO(tazjin): audit this include + +namespace nix { // TODO(tazjin): ::expr + +class EvalState; +struct Value; + +/* Map one attribute name to its value. */ +struct Attr { + Symbol name; + Value* value; // TODO(tazjin): Who owns this? + Pos* pos; // TODO(tazjin): Who owns this? + Attr(Symbol name, Value* value, Pos* pos = &noPos) + : name(name), value(value), pos(pos){}; + Attr() : pos(&noPos){}; + bool operator<(const Attr& a) const { return name < a.name; } +}; + +// TODO: remove this, it only exists briefly while I get rid of the +// current Attr struct +inline bool operator==(const Attr& lhs, const Attr& rhs) { + return lhs.name == rhs.name; +} + +class Bindings { + public: + typedef absl::btree_map<Symbol, Attr>::iterator iterator; + + // Allocate a new attribute set that is visible to the garbage + // collector. + static Bindings* NewGC(); + + // Return the number of contained elements. + size_t size(); + + // Is this attribute set empty? + bool empty(); + + // TODO(tazjin): rename + // TODO(tazjin): does this need to copy? + void push_back(const Attr& attr); + + // Look up a specific element of the attribute set. + iterator find(const Symbol& name); + + // TODO + iterator begin(); + iterator end(); + + // Merge values from other into the current attribute + void merge(Bindings* other); + + // ??? + [[deprecated]] size_t capacity(); + + // oh no + // Attr& operator[](size_t pos); // { return attrs[pos]; } + + // TODO: can callers just iterate? + [[deprecated]] std::vector<const Attr*> lexicographicOrder(); + + // oh no + friend class EvalState; + + private: + absl::btree_map<Symbol, Attr> attributes_; +}; + +} // namespace nix diff --git a/third_party/nix/src/libexpr/common-eval-args.cc b/third_party/nix/src/libexpr/common-eval-args.cc new file mode 100644 index 000000000000..2059d5374195 --- /dev/null +++ b/third_party/nix/src/libexpr/common-eval-args.cc @@ -0,0 +1,64 @@ +#include "common-eval-args.hh" + +#include "download.hh" +#include "eval.hh" +#include "shared.hh" +#include "util.hh" + +namespace nix { + +MixEvalArgs::MixEvalArgs() { + mkFlag() + .longName("arg") + .description("argument to be passed to Nix functions") + .labels({"name", "expr"}) + .handler( + [&](std::vector<std::string> ss) { autoArgs[ss[0]] = 'E' + ss[1]; }); + + mkFlag() + .longName("argstr") + .description("string-valued argument to be passed to Nix functions") + .labels({"name", "string"}) + .handler( + [&](std::vector<std::string> ss) { autoArgs[ss[0]] = 'S' + ss[1]; }); + + mkFlag() + .shortName('I') + .longName("include") + .description( + "add a path to the list of locations used to look up <...> file " + "names") + .label("path") + .handler([&](const std::string& s) { searchPath.push_back(s); }); +} + +Bindings* MixEvalArgs::getAutoArgs(EvalState& state) { + Bindings* res = Bindings::NewGC(); + for (auto& i : autoArgs) { + Value* v = state.allocValue(); + if (i.second[0] == 'E') { + state.mkThunk_( + *v, state.parseExprFromString(string(i.second, 1), absPath("."))); + } else { + mkString(*v, string(i.second, 1)); + } + res->push_back(Attr(state.symbols.Create(i.first), v)); + } + return res; +} + +Path lookupFileArg(EvalState& state, std::string s) { + if (isUri(s)) { + CachedDownloadRequest request(s); + request.unpack = true; + return getDownloader()->downloadCached(state.store, request).path; + } + if (s.size() > 2 && s.at(0) == '<' && s.at(s.size() - 1) == '>') { + Path p = s.substr(1, s.size() - 2); + return state.findFile(p); + } else { + return absPath(s); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/common-eval-args.hh b/third_party/nix/src/libexpr/common-eval-args.hh new file mode 100644 index 000000000000..dad30daf6b13 --- /dev/null +++ b/third_party/nix/src/libexpr/common-eval-args.hh @@ -0,0 +1,24 @@ +#pragma once + +#include "args.hh" + +namespace nix { + +class Store; +class EvalState; +class Bindings; + +struct MixEvalArgs : virtual Args { + MixEvalArgs(); + + Bindings* getAutoArgs(EvalState& state); + + Strings searchPath; + + private: + std::map<std::string, std::string> autoArgs; +}; + +Path lookupFileArg(EvalState& state, std::string s); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/eval-inline.hh b/third_party/nix/src/libexpr/eval-inline.hh new file mode 100644 index 000000000000..ca0cf2a3607a --- /dev/null +++ b/third_party/nix/src/libexpr/eval-inline.hh @@ -0,0 +1,90 @@ +#pragma once + +#include "eval.hh" + +#define LocalNoInline(f) \ + static f __attribute__((noinline)); \ + f +#define LocalNoInlineNoReturn(f) \ + static f __attribute__((noinline, noreturn)); \ + f + +namespace nix { + +LocalNoInlineNoReturn(void throwEvalError(const char* s, const Pos& pos)) { + throw EvalError(format(s) % pos); +} + +LocalNoInlineNoReturn(void throwTypeError(const char* s, const Value& v)) { + throw TypeError(format(s) % showType(v)); +} + +LocalNoInlineNoReturn(void throwTypeError(const char* s, const Value& v, + const Pos& pos)) { + throw TypeError(format(s) % showType(v) % pos); +} + +void EvalState::forceValue(Value& v, const Pos& pos) { + if (v.type == tThunk) { + Env* env = v.thunk.env; + Expr* expr = v.thunk.expr; + try { + v.type = tBlackhole; + // checkInterrupt(); + expr->eval(*this, *env, v); + } catch (...) { + v.type = tThunk; + v.thunk.env = env; + v.thunk.expr = expr; + throw; + } + } else if (v.type == tApp) { + callFunction(*v.app.left, *v.app.right, v, noPos); + } else if (v.type == tBlackhole) { + throwEvalError("infinite recursion encountered, at %1%", pos); + } +} + +inline void EvalState::forceAttrs(Value& v) { + forceValue(v); + if (v.type != tAttrs) { + throwTypeError("value is %1% while a set was expected", v); + } +} + +inline void EvalState::forceAttrs(Value& v, const Pos& pos) { + forceValue(v); + if (v.type != tAttrs) { + throwTypeError("value is %1% while a set was expected, at %2%", v, pos); + } +} + +inline void EvalState::forceList(Value& v) { + forceValue(v); + if (!v.isList()) { + throwTypeError("value is %1% while a list was expected", v); + } +} + +inline void EvalState::forceList(Value& v, const Pos& pos) { + forceValue(v); + if (!v.isList()) { + throwTypeError("value is %1% while a list was expected, at %2%", v, pos); + } +} + +/* Note: Various places expect the allocated memory to be zeroed. */ +inline void* allocBytes(size_t n) { + void* p; +#if HAVE_BOEHMGC + p = GC_MALLOC(n); +#else + p = calloc(n, 1); +#endif + if (!p) { + throw std::bad_alloc(); + } + return p; +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/eval.cc b/third_party/nix/src/libexpr/eval.cc new file mode 100644 index 000000000000..0abf94c87445 --- /dev/null +++ b/third_party/nix/src/libexpr/eval.cc @@ -0,0 +1,2001 @@ +#include "eval.hh" + +#include <algorithm> +#include <chrono> +#include <cstring> +#include <fstream> +#include <iostream> +#include <new> + +#include <gc/gc.h> +#include <gc/gc_cpp.h> +#include <glog/logging.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <unistd.h> + +#include "derivations.hh" +#include "download.hh" +#include "eval-inline.hh" +#include "function-trace.hh" +#include "globals.hh" +#include "hash.hh" +#include "json.hh" +#include "store-api.hh" +#include "util.hh" + +namespace nix { + +static char* dupString(const char* s) { + char* t; + t = GC_STRDUP(s); + if (t == nullptr) { + throw std::bad_alloc(); + } + return t; +} + +static void printValue(std::ostream& str, std::set<const Value*>& active, + const Value& v) { + checkInterrupt(); + + if (active.find(&v) != active.end()) { + str << "<CYCLE>"; + return; + } + active.insert(&v); + + switch (v.type) { + case tInt: + str << v.integer; + break; + case tBool: + str << (v.boolean ? "true" : "false"); + break; + case tString: + str << "\""; + for (const char* i = v.string.s; *i != 0; i++) { + if (*i == '\"' || *i == '\\') { + str << "\\" << *i; + } else if (*i == '\n') { + str << "\\n"; + } else if (*i == '\r') { + str << "\\r"; + } else if (*i == '\t') { + str << "\\t"; + } else { + str << *i; + } + } + str << "\""; + break; + case tPath: + str << v.path; // !!! escaping? + break; + case tNull: + str << "null"; + break; + case tAttrs: { + str << "{ "; + for (auto& i : v.attrs->lexicographicOrder()) { + str << i->name << " = "; + printValue(str, active, *i->value); + str << "; "; + } + str << "}"; + break; + } + case tList1: + case tList2: + case tListN: + str << "[ "; + for (unsigned int n = 0; n < v.listSize(); ++n) { + printValue(str, active, *v.listElems()[n]); + str << " "; + } + str << "]"; + break; + case tThunk: + case tApp: + str << "<CODE>"; + break; + case tLambda: + str << "<LAMBDA>"; + break; + case tPrimOp: + str << "<PRIMOP>"; + break; + case tPrimOpApp: + str << "<PRIMOP-APP>"; + break; + case tExternal: + str << *v.external; + break; + case tFloat: + str << v.fpoint; + break; + default: + throw Error("invalid value"); + } + + active.erase(&v); +} + +std::ostream& operator<<(std::ostream& str, const Value& v) { + std::set<const Value*> active; + printValue(str, active, v); + return str; +} + +const Value* getPrimOp(const Value& v) { + const Value* primOp = &v; + while (primOp->type == tPrimOpApp) { + primOp = primOp->primOpApp.left; + } + assert(primOp->type == tPrimOp); + return primOp; +} + +string showType(const Value& v) { + switch (v.type) { + case tInt: + return "an integer"; + case tBool: + return "a boolean"; + case tString: + return v.string.context != nullptr ? "a string with context" : "a string"; + case tPath: + return "a path"; + case tNull: + return "null"; + case tAttrs: + return "a set"; + case tList1: + case tList2: + case tListN: + return "a list"; + case tThunk: + return "a thunk"; + case tApp: + return "a function application"; + case tLambda: + return "a function"; + case tBlackhole: + return "a black hole"; + case tPrimOp: + return fmt("the built-in function '%s'", string(v.primOp->name)); + case tPrimOpApp: + return fmt("the partially applied built-in function '%s'", + string(getPrimOp(v)->primOp->name)); + case tExternal: + return v.external->showType(); + case tFloat: + return "a float"; + } + abort(); +} + +#if HAVE_BOEHMGC +/* Called when the Boehm GC runs out of memory. */ +static void* oomHandler(size_t requested) { + /* Convert this to a proper C++ exception. */ + throw std::bad_alloc(); +} +#endif + +static Symbol getName(const AttrName& name, EvalState& state, Env& env) { + if (name.symbol.set()) { + return name.symbol; + } + Value nameValue; + name.expr->eval(state, env, nameValue); + state.forceStringNoCtx(nameValue); + return state.symbols.Create(nameValue.string.s); +} + +static bool gcInitialised = false; + +void initGC() { + if (gcInitialised) { + return; + } + +#if HAVE_BOEHMGC + /* Initialise the Boehm garbage collector. */ + + /* Don't look for interior pointers. This reduces the odds of + misdetection a bit. */ + GC_set_all_interior_pointers(0); + + /* We don't have any roots in data segments, so don't scan from + there. */ + GC_set_no_dls(1); + + GC_INIT(); + + GC_set_oom_fn(oomHandler); + + /* Set the initial heap size to something fairly big (25% of + physical RAM, up to a maximum of 384 MiB) so that in most cases + we don't need to garbage collect at all. (Collection has a + fairly significant overhead.) The heap size can be overridden + through libgc's GC_INITIAL_HEAP_SIZE environment variable. We + should probably also provide a nix.conf setting for this. Note + that GC_expand_hp() causes a lot of virtual, but not physical + (resident) memory to be allocated. This might be a problem on + systems that don't overcommit. */ + if (getenv("GC_INITIAL_HEAP_SIZE") == nullptr) { + size_t size = 32 * 1024 * 1024; +#if HAVE_SYSCONF && defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES) + size_t maxSize = 384 * 1024 * 1024; + long pageSize = sysconf(_SC_PAGESIZE); + long pages = sysconf(_SC_PHYS_PAGES); + if (pageSize != -1) { + size = (pageSize * pages) / 4; + } // 25% of RAM + if (size > maxSize) { + size = maxSize; + } +#endif + DLOG(INFO) << "setting initial heap size to " << size << " bytes"; + GC_expand_hp(size); + } + +#endif + + gcInitialised = true; +} + +/* Very hacky way to parse $NIX_PATH, which is colon-separated, but + can contain URLs (e.g. "nixpkgs=https://bla...:foo=https://"). */ +static Strings parseNixPath(const std::string& s) { + Strings res; + + auto p = s.begin(); + + while (p != s.end()) { + auto start = p; + auto start2 = p; + + while (p != s.end() && *p != ':') { + if (*p == '=') { + start2 = p + 1; + } + ++p; + } + + if (p == s.end()) { + if (p != start) { + res.push_back(std::string(start, p)); + } + break; + } + + if (*p == ':') { + if (isUri(std::string(start2, s.end()))) { + ++p; + while (p != s.end() && *p != ':') { + ++p; + } + } + res.push_back(std::string(start, p)); + if (p == s.end()) { + break; + } + } + + ++p; + } + + return res; +} + +EvalState::EvalState(const Strings& _searchPath, const ref<Store>& store) + : sWith(symbols.Create("<with>")), + sOutPath(symbols.Create("outPath")), + sDrvPath(symbols.Create("drvPath")), + sType(symbols.Create("type")), + sMeta(symbols.Create("meta")), + sName(symbols.Create("name")), + sValue(symbols.Create("value")), + sSystem(symbols.Create("system")), + sOverrides(symbols.Create("__overrides")), + sOutputs(symbols.Create("outputs")), + sOutputName(symbols.Create("outputName")), + sIgnoreNulls(symbols.Create("__ignoreNulls")), + sFile(symbols.Create("file")), + sLine(symbols.Create("line")), + sColumn(symbols.Create("column")), + sFunctor(symbols.Create("__functor")), + sToString(symbols.Create("__toString")), + sRight(symbols.Create("right")), + sWrong(symbols.Create("wrong")), + sStructuredAttrs(symbols.Create("__structuredAttrs")), + sBuilder(symbols.Create("builder")), + sArgs(symbols.Create("args")), + sOutputHash(symbols.Create("outputHash")), + sOutputHashAlgo(symbols.Create("outputHashAlgo")), + sOutputHashMode(symbols.Create("outputHashMode")), + repair(NoRepair), + store(store), + baseEnv(allocEnv(128)), + staticBaseEnv(false, nullptr) { + countCalls = getEnv("NIX_COUNT_CALLS", "0") != "0"; + + assert(gcInitialised); + + static_assert(sizeof(Env) <= 16, "environment must be <= 16 bytes"); + + /* Initialise the Nix expression search path. */ + if (!evalSettings.pureEval) { + Strings paths = parseNixPath(getEnv("NIX_PATH", "")); + for (auto& i : _searchPath) { + addToSearchPath(i); + } + for (auto& i : paths) { + addToSearchPath(i); + } + } + addToSearchPath("nix=" + + canonPath(settings.nixDataDir + "/nix/corepkgs", true)); + + if (evalSettings.restrictEval || evalSettings.pureEval) { + allowedPaths = PathSet(); + + for (auto& i : searchPath) { + auto r = resolveSearchPathElem(i); + if (!r.first) { + continue; + } + + auto path = r.second; + + if (store->isInStore(r.second)) { + PathSet closure; + store->computeFSClosure(store->toStorePath(r.second), closure); + for (auto& path : closure) { + allowedPaths->insert(path); + } + } else { + allowedPaths->insert(r.second); + } + } + } + + clearValue(vEmptySet); + vEmptySet.type = tAttrs; + vEmptySet.attrs = Bindings::NewGC(); + + createBaseEnv(); +} + +EvalState::~EvalState() = default; + +Path EvalState::checkSourcePath(const Path& path_) { + if (!allowedPaths) { + return path_; + } + + auto i = resolvedPaths.find(path_); + if (i != resolvedPaths.end()) { + return i->second; + } + + bool found = false; + + /* First canonicalize the path without symlinks, so we make sure an + * attacker can't append ../../... to a path that would be in allowedPaths + * and thus leak symlink targets. + */ + Path abspath = canonPath(path_); + + for (auto& i : *allowedPaths) { + if (isDirOrInDir(abspath, i)) { + found = true; + break; + } + } + + if (!found) { + throw RestrictedPathError( + "access to path '%1%' is forbidden in restricted mode", abspath); + } + + /* Resolve symlinks. */ + DLOG(INFO) << "checking access to '" << abspath << "'"; + Path path = canonPath(abspath, true); + + for (auto& i : *allowedPaths) { + if (isDirOrInDir(path, i)) { + resolvedPaths[path_] = path; + return path; + } + } + + throw RestrictedPathError( + "access to path '%1%' is forbidden in restricted mode", path); +} + +void EvalState::checkURI(const std::string& uri) { + if (!evalSettings.restrictEval) { + return; + } + + /* 'uri' should be equal to a prefix, or in a subdirectory of a + prefix. Thus, the prefix https://github.co does not permit + access to https://github.com. Note: this allows 'http://' and + 'https://' as prefixes for any http/https URI. */ + for (auto& prefix : evalSettings.allowedUris.get()) { + if (uri == prefix || + (uri.size() > prefix.size() && !prefix.empty() && + hasPrefix(uri, prefix) && + (prefix[prefix.size() - 1] == '/' || uri[prefix.size()] == '/'))) { + return; + } + } + + /* If the URI is a path, then check it against allowedPaths as + well. */ + if (hasPrefix(uri, "/")) { + checkSourcePath(uri); + return; + } + + if (hasPrefix(uri, "file://")) { + checkSourcePath(std::string(uri, 7)); + return; + } + + throw RestrictedPathError( + "access to URI '%s' is forbidden in restricted mode", uri); +} + +Path EvalState::toRealPath(const Path& path, const PathSet& context) { + // FIXME: check whether 'path' is in 'context'. + return !context.empty() && store->isInStore(path) ? store->toRealPath(path) + : path; +}; + +Value* EvalState::addConstant(const std::string& name, Value& v) { + Value* v2 = allocValue(); + *v2 = v; + staticBaseEnv.vars[symbols.Create(name)] = baseEnvDispl; + baseEnv.values[baseEnvDispl++] = v2; + std::string name2 = string(name, 0, 2) == "__" ? string(name, 2) : name; + baseEnv.values[0]->attrs->push_back(Attr(symbols.Create(name2), v2)); + return v2; +} + +Value* EvalState::addPrimOp(const std::string& name, size_t arity, + PrimOpFun primOp) { + if (arity == 0) { + Value v; + primOp(*this, noPos, nullptr, v); + return addConstant(name, v); + } + Value* v = allocValue(); + std::string name2 = string(name, 0, 2) == "__" ? string(name, 2) : name; + Symbol sym = symbols.Create(name2); + v->type = tPrimOp; + v->primOp = new PrimOp(primOp, arity, sym); + staticBaseEnv.vars[symbols.Create(name)] = baseEnvDispl; + baseEnv.values[baseEnvDispl++] = v; + baseEnv.values[0]->attrs->push_back(Attr(sym, v)); + return v; +} + +Value& EvalState::getBuiltin(const std::string& name) { + return *baseEnv.values[0]->attrs->find(symbols.Create(name))->second.value; +} + +/* Every "format" object (even temporary) takes up a few hundred bytes + of stack space, which is a real killer in the recursive + evaluator. So here are some helper functions for throwing + exceptions. */ + +LocalNoInlineNoReturn(void throwEvalError(const char* s, + const std::string& s2)) { + throw EvalError(format(s) % s2); +} + +LocalNoInlineNoReturn(void throwEvalError(const char* s, const std::string& s2, + const Pos& pos)) { + throw EvalError(format(s) % s2 % pos); +} + +LocalNoInlineNoReturn(void throwEvalError(const char* s, const std::string& s2, + const std::string& s3)) { + throw EvalError(format(s) % s2 % s3); +} + +LocalNoInlineNoReturn(void throwEvalError(const char* s, const std::string& s2, + const std::string& s3, + const Pos& pos)) { + throw EvalError(format(s) % s2 % s3 % pos); +} + +LocalNoInlineNoReturn(void throwEvalError(const char* s, const Symbol& sym, + const Pos& p1, const Pos& p2)) { + throw EvalError(format(s) % sym % p1 % p2); +} + +LocalNoInlineNoReturn(void throwTypeError(const char* s, const Pos& pos)) { + throw TypeError(format(s) % pos); +} + +LocalNoInlineNoReturn(void throwTypeError(const char* s, + const std::string& s1)) { + throw TypeError(format(s) % s1); +} + +LocalNoInlineNoReturn(void throwTypeError(const char* s, const ExprLambda& fun, + const Symbol& s2, const Pos& pos)) { + throw TypeError(format(s) % fun.showNamePos() % s2 % pos); +} + +LocalNoInlineNoReturn(void throwAssertionError(const char* s, + const std::string& s1, + const Pos& pos)) { + throw AssertionError(format(s) % s1 % pos); +} + +LocalNoInlineNoReturn(void throwUndefinedVarError(const char* s, + const std::string& s1, + const Pos& pos)) { + throw UndefinedVarError(format(s) % s1 % pos); +} + +LocalNoInline(void addErrorPrefix(Error& e, const char* s, + const std::string& s2)) { + e.addPrefix(format(s) % s2); +} + +LocalNoInline(void addErrorPrefix(Error& e, const char* s, + const ExprLambda& fun, const Pos& pos)) { + e.addPrefix(format(s) % fun.showNamePos() % pos); +} + +LocalNoInline(void addErrorPrefix(Error& e, const char* s, + const std::string& s2, const Pos& pos)) { + e.addPrefix(format(s) % s2 % pos); +} + +void mkString(Value& v, const char* s) { mkStringNoCopy(v, dupString(s)); } + +Value& mkString(Value& v, const std::string& s, const PathSet& context) { + mkString(v, s.c_str()); + if (!context.empty()) { + size_t n = 0; + v.string.context = + (const char**)allocBytes((context.size() + 1) * sizeof(char*)); + for (auto& i : context) { + v.string.context[n++] = dupString(i.c_str()); + } + v.string.context[n] = nullptr; + } + return v; +} + +void mkPath(Value& v, const char* s) { mkPathNoCopy(v, dupString(s)); } + +inline Value* EvalState::lookupVar(Env* env, const ExprVar& var, bool noEval) { + for (size_t l = var.level; l != 0u; --l, env = env->up) { + ; + } + + if (!var.fromWith) { + return env->values[var.displ]; + } + + while (true) { + if (env->type == Env::HasWithExpr) { + if (noEval) { + return nullptr; + } + Value* v = allocValue(); + evalAttrs(*env->up, (Expr*)env->values[0], *v); + env->values[0] = v; + env->type = Env::HasWithAttrs; + } + Bindings::iterator j = env->values[0]->attrs->find(var.name); + if (j != env->values[0]->attrs->end()) { + if (countCalls && (j->second.pos != nullptr)) { + attrSelects[*j->second.pos]++; + } + return j->second.value; + } + if (env->prevWith == 0u) { + throwUndefinedVarError("undefined variable '%1%' at %2%", var.name, + var.pos); + } + for (size_t l = env->prevWith; l != 0u; --l, env = env->up) { + } + } +} + +Value* EvalState::allocValue() { + nrValues++; + return new (GC) Value; +} + +Env& EvalState::allocEnv(size_t size) { + if (size > std::numeric_limits<decltype(Env::size)>::max()) { + throw Error("environment size %d is too big", size); + } + + nrEnvs++; + nrValuesInEnvs += size; + Env* env = (Env*)allocBytes(sizeof(Env) + size * sizeof(Value*)); + env->size = (decltype(Env::size))size; + env->type = Env::Plain; + + /* We assume that env->values has been cleared by the allocator; maybeThunk() + * and lookupVar fromWith expect this. */ + + return *env; +} + +void EvalState::mkList(Value& v, size_t size) { + clearValue(v); + if (size == 1) { + v.type = tList1; + } else if (size == 2) { + v.type = tList2; + } else { + v.type = tListN; + v.bigList.size = size; + v.bigList.elems = + size != 0u ? (Value**)allocBytes(size * sizeof(Value*)) : nullptr; + } + nrListElems += size; +} + +unsigned long nrThunks = 0; + +static inline void mkThunk(Value& v, Env& env, Expr* expr) { + v.type = tThunk; + v.thunk.env = &env; + v.thunk.expr = expr; + nrThunks++; +} + +void EvalState::mkThunk_(Value& v, Expr* expr) { mkThunk(v, baseEnv, expr); } + +void EvalState::mkPos(Value& v, Pos* pos) { + if ((pos != nullptr) && pos->file.set()) { + mkAttrs(v, 3); + mkString(*allocAttr(v, sFile), pos->file); + mkInt(*allocAttr(v, sLine), pos->line); + mkInt(*allocAttr(v, sColumn), pos->column); + } else { + mkNull(v); + } +} + +/* Create a thunk for the delayed computation of the given expression + in the given environment. But if the expression is a variable, + then look it up right away. This significantly reduces the number + of thunks allocated. */ +Value* Expr::maybeThunk(EvalState& state, Env& env) { + Value* v = state.allocValue(); + mkThunk(*v, env, this); + return v; +} + +unsigned long nrAvoided = 0; + +Value* ExprVar::maybeThunk(EvalState& state, Env& env) { + Value* v = state.lookupVar(&env, *this, true); + /* The value might not be initialised in the environment yet. + In that case, ignore it. */ + if (v != nullptr) { + nrAvoided++; + return v; + } + return Expr::maybeThunk(state, env); +} + +Value* ExprString::maybeThunk(EvalState& state, Env& env) { + nrAvoided++; + return &v; +} + +Value* ExprInt::maybeThunk(EvalState& state, Env& env) { + nrAvoided++; + return &v; +} + +Value* ExprFloat::maybeThunk(EvalState& state, Env& env) { + nrAvoided++; + return &v; +} + +Value* ExprPath::maybeThunk(EvalState& state, Env& env) { + nrAvoided++; + return &v; +} + +void EvalState::evalFile(const Path& path_, Value& v) { + auto path = checkSourcePath(path_); + + FileEvalCache::iterator i; + if ((i = fileEvalCache.find(path)) != fileEvalCache.end()) { + v = i->second; + return; + } + + Path path2 = resolveExprPath(path); + if ((i = fileEvalCache.find(path2)) != fileEvalCache.end()) { + v = i->second; + return; + } + + DLOG(INFO) << "evaluating file '" << path2 << "'"; + Expr* e = nullptr; + + auto j = fileParseCache.find(path2); + if (j != fileParseCache.end()) { + e = j->second; + } + + if (e == nullptr) { + e = parseExprFromFile(checkSourcePath(path2)); + } + + fileParseCache[path2] = e; + + try { + eval(e, v); + } catch (Error& e) { + addErrorPrefix(e, "while evaluating the file '%1%':\n", path2); + throw; + } + + fileEvalCache[path2] = v; + if (path != path2) { + fileEvalCache[path] = v; + } +} + +void EvalState::resetFileCache() { + fileEvalCache.clear(); + fileParseCache.clear(); +} + +void EvalState::eval(Expr* e, Value& v) { e->eval(*this, baseEnv, v); } + +inline bool EvalState::evalBool(Env& env, Expr* e) { + Value v; + e->eval(*this, env, v); + if (v.type != tBool) { + throwTypeError("value is %1% while a Boolean was expected", v); + } + return v.boolean; +} + +inline bool EvalState::evalBool(Env& env, Expr* e, const Pos& pos) { + Value v; + e->eval(*this, env, v); + if (v.type != tBool) { + throwTypeError("value is %1% while a Boolean was expected, at %2%", v, pos); + } + return v.boolean; +} + +inline void EvalState::evalAttrs(Env& env, Expr* e, Value& v) { + e->eval(*this, env, v); + if (v.type != tAttrs) { + throwTypeError("value is %1% while a set was expected", v); + } +} + +void Expr::eval(EvalState& state, Env& env, Value& v) { abort(); } + +void ExprInt::eval(EvalState& state, Env& env, Value& v) { v = this->v; } + +void ExprFloat::eval(EvalState& state, Env& env, Value& v) { v = this->v; } + +void ExprString::eval(EvalState& state, Env& env, Value& v) { v = this->v; } + +void ExprPath::eval(EvalState& state, Env& env, Value& v) { v = this->v; } + +void ExprAttrs::eval(EvalState& state, Env& env, Value& v) { + state.mkAttrs(v, attrs.size() + dynamicAttrs.size()); + Env* dynamicEnv = &env; + + if (recursive) { + /* Create a new environment that contains the attributes in + this `rec'. */ + Env& env2(state.allocEnv(attrs.size())); + env2.up = &env; + dynamicEnv = &env2; + + // TODO(tazjin): contains? + AttrDefs::iterator overrides = attrs.find(state.sOverrides); + bool hasOverrides = overrides != attrs.end(); + + /* The recursive attributes are evaluated in the new + environment, while the inherited attributes are evaluated + in the original environment. */ + size_t displ = 0; + for (auto& i : attrs) { + Value* vAttr; + if (hasOverrides && !i.second.inherited) { + vAttr = state.allocValue(); + mkThunk(*vAttr, env2, i.second.e); + } else { + vAttr = i.second.e->maybeThunk(state, i.second.inherited ? env : env2); + } + env2.values[displ++] = vAttr; + v.attrs->push_back(Attr(i.first, vAttr, &i.second.pos)); + } + + /* If the rec contains an attribute called `__overrides', then + evaluate it, and add the attributes in that set to the rec. + This allows overriding of recursive attributes, which is + otherwise not possible. (You can use the // operator to + replace an attribute, but other attributes in the rec will + still reference the original value, because that value has + been substituted into the bodies of the other attributes. + Hence we need __overrides.) */ + if (hasOverrides) { + Value* vOverrides = v.attrs->find(overrides->first)->second.value; + state.forceAttrs(*vOverrides); + Bindings* newBnds = Bindings::NewGC(); + for (auto& i : *v.attrs) { // TODO(tazjin): copy constructor? + newBnds->push_back(i.second); + } + for (auto& i : *vOverrides->attrs) { + auto j = attrs.find(i.second.name); + if (j != attrs.end()) { + newBnds->push_back(i.second); + env2.values[j->second.displ] = i.second.value; + } else { + newBnds->push_back(i.second); + } + } + v.attrs = newBnds; + } + } else { + // TODO(tazjin): insert range + for (auto& i : attrs) { + v.attrs->push_back( + Attr(i.first, i.second.e->maybeThunk(state, env), &i.second.pos)); + } + } + + /* Dynamic attrs apply *after* rec and __overrides. */ + for (auto& i : dynamicAttrs) { + Value nameVal; + i.nameExpr->eval(state, *dynamicEnv, nameVal); + state.forceValue(nameVal, i.pos); + if (nameVal.type == tNull) { + continue; + } + state.forceStringNoCtx(nameVal); + Symbol nameSym = state.symbols.Create(nameVal.string.s); + Bindings::iterator j = v.attrs->find(nameSym); + if (j != v.attrs->end()) { + throwEvalError("dynamic attribute '%1%' at %2% already defined at %3%", + nameSym, i.pos, *j->second.pos); + } + + i.valueExpr->setName(nameSym); + v.attrs->push_back( + Attr(nameSym, i.valueExpr->maybeThunk(state, *dynamicEnv), &i.pos)); + } +} + +void ExprLet::eval(EvalState& state, Env& env, Value& v) { + /* Create a new environment that contains the attributes in this + `let'. */ + Env& env2(state.allocEnv(attrs->attrs.size())); + env2.up = &env; + + /* The recursive attributes are evaluated in the new environment, + while the inherited attributes are evaluated in the original + environment. */ + size_t displ = 0; + for (auto& i : attrs->attrs) { + env2.values[displ++] = + i.second.e->maybeThunk(state, i.second.inherited ? env : env2); + } + + body->eval(state, env2, v); +} + +void ExprList::eval(EvalState& state, Env& env, Value& v) { + state.mkList(v, elems.size()); + for (size_t n = 0; n < elems.size(); ++n) { + v.listElems()[n] = elems[n]->maybeThunk(state, env); + } +} + +void ExprVar::eval(EvalState& state, Env& env, Value& v) { + Value* v2 = state.lookupVar(&env, *this, false); + state.forceValue(*v2, pos); + v = *v2; +} + +static std::string showAttrPath(EvalState& state, Env& env, + const AttrPath& attrPath) { + std::ostringstream out; + bool first = true; + for (auto& i : attrPath) { + if (!first) { + out << '.'; + } else { + first = false; + } + try { + out << getName(i, state, env); + } catch (Error& e) { + assert(!i.symbol.set()); + out << "\"${" << *i.expr << "}\""; + } + } + return out.str(); +} + +unsigned long nrLookups = 0; + +void ExprSelect::eval(EvalState& state, Env& env, Value& v) { + Value vTmp; + Pos* pos2 = nullptr; + Value* vAttrs = &vTmp; + + e->eval(state, env, vTmp); + + try { + for (auto& i : attrPath) { + nrLookups++; + Bindings::iterator j; + Symbol name = getName(i, state, env); + if (def != nullptr) { + state.forceValue(*vAttrs, pos); + if (vAttrs->type != tAttrs || + (j = vAttrs->attrs->find(name)) == vAttrs->attrs->end()) { + def->eval(state, env, v); + return; + } + } else { + state.forceAttrs(*vAttrs, pos); + if ((j = vAttrs->attrs->find(name)) == vAttrs->attrs->end()) { + throwEvalError("attribute '%1%' missing, at %2%", name, pos); + } + } + vAttrs = j->second.value; + pos2 = j->second.pos; + if (state.countCalls && (pos2 != nullptr)) { + state.attrSelects[*pos2]++; + } + } + + state.forceValue(*vAttrs, (pos2 != nullptr ? *pos2 : this->pos)); + + } catch (Error& e) { + if ((pos2 != nullptr) && pos2->file != state.sDerivationNix) { + addErrorPrefix(e, "while evaluating the attribute '%1%' at %2%:\n", + showAttrPath(state, env, attrPath), *pos2); + } + throw; + } + + v = *vAttrs; +} + +void ExprOpHasAttr::eval(EvalState& state, Env& env, Value& v) { + Value vTmp; + Value* vAttrs = &vTmp; + + e->eval(state, env, vTmp); + + for (auto& i : attrPath) { + state.forceValue(*vAttrs); + Bindings::iterator j; + Symbol name = getName(i, state, env); + if (vAttrs->type != tAttrs || + (j = vAttrs->attrs->find(name)) == vAttrs->attrs->end()) { + mkBool(v, false); + return; + } + vAttrs = j->second.value; + } + + mkBool(v, true); +} + +void ExprLambda::eval(EvalState& state, Env& env, Value& v) { + v.type = tLambda; + v.lambda.env = &env; + v.lambda.fun = this; +} + +void ExprApp::eval(EvalState& state, Env& env, Value& v) { + /* FIXME: vFun prevents GCC from doing tail call optimisation. */ + Value vFun; + e1->eval(state, env, vFun); + state.callFunction(vFun, *(e2->maybeThunk(state, env)), v, pos); +} + +void EvalState::callPrimOp(Value& fun, Value& arg, Value& v, const Pos& pos) { + /* Figure out the number of arguments still needed. */ + size_t argsDone = 0; + Value* primOp = &fun; + while (primOp->type == tPrimOpApp) { + argsDone++; + primOp = primOp->primOpApp.left; + } + assert(primOp->type == tPrimOp); + auto arity = primOp->primOp->arity; + auto argsLeft = arity - argsDone; + + if (argsLeft == 1) { + /* We have all the arguments, so call the primop. */ + + /* Put all the arguments in an array. */ + Value* vArgs[arity]; + auto n = arity - 1; + vArgs[n--] = &arg; + for (Value* arg = &fun; arg->type == tPrimOpApp; + arg = arg->primOpApp.left) { + vArgs[n--] = arg->primOpApp.right; + } + + /* And call the primop. */ + nrPrimOpCalls++; + if (countCalls) { + primOpCalls[primOp->primOp->name]++; + } + primOp->primOp->fun(*this, pos, vArgs, v); + } else { + Value* fun2 = allocValue(); + *fun2 = fun; + v.type = tPrimOpApp; + v.primOpApp.left = fun2; + v.primOpApp.right = &arg; + } +} + +void EvalState::callFunction(Value& fun, Value& arg, Value& v, const Pos& pos) { + auto trace = evalSettings.traceFunctionCalls + ? std::make_unique<FunctionCallTrace>(pos) + : nullptr; + + forceValue(fun, pos); + + if (fun.type == tPrimOp || fun.type == tPrimOpApp) { + callPrimOp(fun, arg, v, pos); + return; + } + + if (fun.type == tAttrs) { + auto found = fun.attrs->find(sFunctor); + if (found != fun.attrs->end()) { + /* fun may be allocated on the stack of the calling function, + * but for functors we may keep a reference, so heap-allocate + * a copy and use that instead. + */ + auto& fun2 = *allocValue(); + fun2 = fun; + /* !!! Should we use the attr pos here? */ + Value v2; + callFunction(*found->second.value, fun2, v2, pos); + return callFunction(v2, arg, v, pos); + } + } + + if (fun.type != tLambda) { + throwTypeError( + "attempt to call something which is not a function but %1%, at %2%", + fun, pos); + } + + ExprLambda& lambda(*fun.lambda.fun); + + auto size = (lambda.arg.empty() ? 0 : 1) + + (lambda.matchAttrs ? lambda.formals->formals.size() : 0); + Env& env2(allocEnv(size)); + env2.up = fun.lambda.env; + + size_t displ = 0; + + if (!lambda.matchAttrs) { + env2.values[displ++] = &arg; + + } else { + forceAttrs(arg, pos); + + if (!lambda.arg.empty()) { + env2.values[displ++] = &arg; + } + + /* For each formal argument, get the actual argument. If + there is no matching actual argument but the formal + argument has a default, use the default. */ + size_t attrsUsed = 0; + for (auto& i : lambda.formals->formals) { + Bindings::iterator j = arg.attrs->find(i.name); + if (j == arg.attrs->end()) { + if (i.def == nullptr) { + throwTypeError("%1% called without required argument '%2%', at %3%", + lambda, i.name, pos); + } + env2.values[displ++] = i.def->maybeThunk(*this, env2); + } else { + attrsUsed++; + env2.values[displ++] = j->second.value; + } + } + + /* Check that each actual argument is listed as a formal + argument (unless the attribute match specifies a `...'). */ + if (!lambda.formals->ellipsis && attrsUsed != arg.attrs->size()) { + /* Nope, so show the first unexpected argument to the + user. */ + for (auto& i : *arg.attrs) { + if (lambda.formals->argNames.find(i.second.name) == + lambda.formals->argNames.end()) { + throwTypeError("%1% called with unexpected argument '%2%', at %3%", + lambda, i.second.name, pos); + } + } + abort(); // can't happen + } + } + + nrFunctionCalls++; + if (countCalls) { + incrFunctionCall(&lambda); + } + + /* Evaluate the body. This is conditional on showTrace, because + catching exceptions makes this function not tail-recursive. */ + if (settings.showTrace) { + try { + lambda.body->eval(*this, env2, v); + } catch (Error& e) { + addErrorPrefix(e, "while evaluating %1%, called from %2%:\n", lambda, + pos); + throw; + } + } else { + fun.lambda.fun->body->eval(*this, env2, v); + } +} + +// Lifted out of callFunction() because it creates a temporary that +// prevents tail-call optimisation. +void EvalState::incrFunctionCall(ExprLambda* fun) { functionCalls[fun]++; } + +void EvalState::autoCallFunction(Bindings& args, Value& fun, Value& res) { + forceValue(fun); + + if (fun.type == tAttrs) { + auto found = fun.attrs->find(sFunctor); + if (found != fun.attrs->end()) { + Value* v = allocValue(); + callFunction(*found->second.value, fun, *v, noPos); + forceValue(*v); + return autoCallFunction(args, *v, res); + } + } + + if (fun.type != tLambda || !fun.lambda.fun->matchAttrs) { + res = fun; + return; + } + + Value* actualArgs = allocValue(); + mkAttrs(*actualArgs, fun.lambda.fun->formals->formals.size()); + + for (auto& i : fun.lambda.fun->formals->formals) { + Bindings::iterator j = args.find(i.name); + if (j != args.end()) { + actualArgs->attrs->push_back(j->second); + } else if (i.def == nullptr) { + throwTypeError( + "cannot auto-call a function that has an argument without a default " + "value ('%1%')", + i.name); + } + } + + callFunction(fun, *actualArgs, res, noPos); +} + +void ExprWith::eval(EvalState& state, Env& env, Value& v) { + Env& env2(state.allocEnv(1)); + env2.up = &env; + env2.prevWith = prevWith; + env2.type = Env::HasWithExpr; + env2.values[0] = (Value*)attrs; + + body->eval(state, env2, v); +} + +void ExprIf::eval(EvalState& state, Env& env, Value& v) { + (state.evalBool(env, cond) ? then : else_)->eval(state, env, v); +} + +void ExprAssert::eval(EvalState& state, Env& env, Value& v) { + if (!state.evalBool(env, cond, pos)) { + std::ostringstream out; + cond->show(out); + throwAssertionError("assertion %1% failed at %2%", out.str(), pos); + } + body->eval(state, env, v); +} + +void ExprOpNot::eval(EvalState& state, Env& env, Value& v) { + mkBool(v, !state.evalBool(env, e)); +} + +void ExprOpEq::eval(EvalState& state, Env& env, Value& v) { + Value v1; + e1->eval(state, env, v1); + Value v2; + e2->eval(state, env, v2); + mkBool(v, state.eqValues(v1, v2)); +} + +void ExprOpNEq::eval(EvalState& state, Env& env, Value& v) { + Value v1; + e1->eval(state, env, v1); + Value v2; + e2->eval(state, env, v2); + mkBool(v, !state.eqValues(v1, v2)); +} + +void ExprOpAnd::eval(EvalState& state, Env& env, Value& v) { + mkBool(v, state.evalBool(env, e1, pos) && state.evalBool(env, e2, pos)); +} + +void ExprOpOr::eval(EvalState& state, Env& env, Value& v) { + mkBool(v, state.evalBool(env, e1, pos) || state.evalBool(env, e2, pos)); +} + +void ExprOpImpl::eval(EvalState& state, Env& env, Value& v) { + mkBool(v, !state.evalBool(env, e1, pos) || state.evalBool(env, e2, pos)); +} + +void ExprOpUpdate::eval(EvalState& state, Env& env, Value& v) { + Value v1; + Value v2; + state.evalAttrs(env, e1, v1); + state.evalAttrs(env, e2, v2); + + state.nrOpUpdates++; + + if (v1.attrs->empty()) { + v = v2; + return; + } + if (v2.attrs->empty()) { + v = v1; + return; + } + + state.mkAttrs(v, /* capacity = */ 0); + + /* Merge the sets, preferring values from the second set. Make + sure to keep the resulting vector in sorted order. */ + v.attrs->merge(v1.attrs); + v.attrs->merge(v2.attrs); + + state.nrOpUpdateValuesCopied += v.attrs->size(); +} + +void ExprOpConcatLists::eval(EvalState& state, Env& env, Value& v) { + Value v1; + e1->eval(state, env, v1); + Value v2; + e2->eval(state, env, v2); + Value* lists[2] = {&v1, &v2}; + state.concatLists(v, 2, lists, pos); +} + +void EvalState::concatLists(Value& v, size_t nrLists, Value** lists, + const Pos& pos) { + nrListConcats++; + + Value* nonEmpty = nullptr; + size_t len = 0; + for (size_t n = 0; n < nrLists; ++n) { + forceList(*lists[n], pos); + auto l = lists[n]->listSize(); + len += l; + if (l != 0u) { + nonEmpty = lists[n]; + } + } + + if ((nonEmpty != nullptr) && len == nonEmpty->listSize()) { + v = *nonEmpty; + return; + } + + mkList(v, len); + auto out = v.listElems(); + for (size_t n = 0, pos = 0; n < nrLists; ++n) { + auto l = lists[n]->listSize(); + if (l != 0u) { + memcpy(out + pos, lists[n]->listElems(), l * sizeof(Value*)); + } + pos += l; + } +} + +void ExprConcatStrings::eval(EvalState& state, Env& env, Value& v) { + PathSet context; + std::ostringstream s; + NixInt n = 0; + NixFloat nf = 0; + + bool first = !forceString; + ValueType firstType = tString; + + for (auto& i : *es) { + Value vTmp; + i->eval(state, env, vTmp); + + /* If the first element is a path, then the result will also + be a path, we don't copy anything (yet - that's done later, + since paths are copied when they are used in a derivation), + and none of the strings are allowed to have contexts. */ + if (first) { + firstType = vTmp.type; + first = false; + } + + if (firstType == tInt) { + if (vTmp.type == tInt) { + n += vTmp.integer; + } else if (vTmp.type == tFloat) { + // Upgrade the type from int to float; + firstType = tFloat; + nf = n; + nf += vTmp.fpoint; + } else { + throwEvalError("cannot add %1% to an integer, at %2%", showType(vTmp), + pos); + } + } else if (firstType == tFloat) { + if (vTmp.type == tInt) { + nf += vTmp.integer; + } else if (vTmp.type == tFloat) { + nf += vTmp.fpoint; + } else { + throwEvalError("cannot add %1% to a float, at %2%", showType(vTmp), + pos); + } + } else { + s << state.coerceToString(pos, vTmp, context, false, + firstType == tString); + } + } + + if (firstType == tInt) { + mkInt(v, n); + } else if (firstType == tFloat) { + mkFloat(v, nf); + } else if (firstType == tPath) { + if (!context.empty()) { + throwEvalError( + "a string that refers to a store path cannot be appended to a path, " + "at %1%", + pos); + } + auto path = canonPath(s.str()); + mkPath(v, path.c_str()); + } else { + mkString(v, s.str(), context); + } +} + +void ExprPos::eval(EvalState& state, Env& env, Value& v) { + state.mkPos(v, &pos); +} + +void EvalState::forceValueDeep(Value& v) { + std::set<const Value*> seen; + + std::function<void(Value & v)> recurse; + + recurse = [&](Value& v) { + if (seen.find(&v) != seen.end()) { + return; + } + seen.insert(&v); + + forceValue(v); + + if (v.type == tAttrs) { + for (auto& i : *v.attrs) { + try { + recurse(*i.second.value); + } catch (Error& e) { + addErrorPrefix(e, "while evaluating the attribute '%1%' at %2%:\n", + i.second.name, *i.second.pos); + throw; + } + } + } else if (v.isList()) { + for (size_t n = 0; n < v.listSize(); ++n) { + recurse(*v.listElems()[n]); + } + } + }; + + recurse(v); +} + +NixInt EvalState::forceInt(Value& v, const Pos& pos) { + forceValue(v, pos); + if (v.type != tInt) { + throwTypeError("value is %1% while an integer was expected, at %2%", v, + pos); + } + return v.integer; +} + +NixFloat EvalState::forceFloat(Value& v, const Pos& pos) { + forceValue(v, pos); + if (v.type == tInt) { + return v.integer; + } + if (v.type != tFloat) { + throwTypeError("value is %1% while a float was expected, at %2%", v, pos); + } + return v.fpoint; +} + +bool EvalState::forceBool(Value& v, const Pos& pos) { + forceValue(v); + if (v.type != tBool) { + throwTypeError("value is %1% while a Boolean was expected, at %2%", v, pos); + } + return v.boolean; +} + +bool EvalState::isFunctor(Value& fun) { + return fun.type == tAttrs && fun.attrs->find(sFunctor) != fun.attrs->end(); +} + +void EvalState::forceFunction(Value& v, const Pos& pos) { + forceValue(v); + if (v.type != tLambda && v.type != tPrimOp && v.type != tPrimOpApp && + !isFunctor(v)) { + throwTypeError("value is %1% while a function was expected, at %2%", v, + pos); + } +} + +string EvalState::forceString(Value& v, const Pos& pos) { + forceValue(v, pos); + if (v.type != tString) { + if (pos) { + throwTypeError("value is %1% while a string was expected, at %2%", v, + pos); + } else { + throwTypeError("value is %1% while a string was expected", v); + } + } + return string(v.string.s); +} + +void copyContext(const Value& v, PathSet& context) { + if (v.string.context != nullptr) { + for (const char** p = v.string.context; *p != nullptr; ++p) { + context.insert(*p); + } + } +} + +string EvalState::forceString(Value& v, PathSet& context, const Pos& pos) { + std::string s = forceString(v, pos); + copyContext(v, context); + return s; +} + +string EvalState::forceStringNoCtx(Value& v, const Pos& pos) { + std::string s = forceString(v, pos); + if (v.string.context != nullptr) { + if (pos) { + throwEvalError( + "the string '%1%' is not allowed to refer to a store path (such as " + "'%2%'), at %3%", + v.string.s, v.string.context[0], pos); + } else { + throwEvalError( + "the string '%1%' is not allowed to refer to a store path (such as " + "'%2%')", + v.string.s, v.string.context[0]); + } + } + return s; +} + +bool EvalState::isDerivation(Value& v) { + if (v.type != tAttrs) { + return false; + } + Bindings::iterator i = v.attrs->find(sType); + if (i == v.attrs->end()) { + return false; + } + forceValue(*i->second.value); + if (i->second.value->type != tString) { + return false; + } + return strcmp(i->second.value->string.s, "derivation") == 0; +} + +std::optional<string> EvalState::tryAttrsToString(const Pos& pos, Value& v, + PathSet& context, + bool coerceMore, + bool copyToStore) { + auto i = v.attrs->find(sToString); + if (i != v.attrs->end()) { + Value v1; + callFunction(*i->second.value, v, v1, pos); + return coerceToString(pos, v1, context, coerceMore, copyToStore); + } + + return {}; +} + +string EvalState::coerceToString(const Pos& pos, Value& v, PathSet& context, + bool coerceMore, bool copyToStore) { + forceValue(v); + + std::string s; + + if (v.type == tString) { + copyContext(v, context); + return v.string.s; + } + + if (v.type == tPath) { + Path path(canonPath(v.path)); + return copyToStore ? copyPathToStore(context, path) : path; + } + + if (v.type == tAttrs) { + auto maybeString = + tryAttrsToString(pos, v, context, coerceMore, copyToStore); + if (maybeString) { + return *maybeString; + } + auto i = v.attrs->find(sOutPath); + if (i == v.attrs->end()) { + throwTypeError("cannot coerce a set to a string, at %1%", pos); + } + return coerceToString(pos, *i->second.value, context, coerceMore, + copyToStore); + } + + if (v.type == tExternal) { + return v.external->coerceToString(pos, context, coerceMore, copyToStore); + } + + if (coerceMore) { + /* Note that `false' is represented as an empty string for + shell scripting convenience, just like `null'. */ + if (v.type == tBool && v.boolean) { + return "1"; + } + if (v.type == tBool && !v.boolean) { + return ""; + } + if (v.type == tInt) { + return std::to_string(v.integer); + } + if (v.type == tFloat) { + return std::to_string(v.fpoint); + } + if (v.type == tNull) { + return ""; + } + + if (v.isList()) { + std::string result; + for (size_t n = 0; n < v.listSize(); ++n) { + result += coerceToString(pos, *v.listElems()[n], context, coerceMore, + copyToStore); + if (n < v.listSize() - 1 + /* !!! not quite correct */ + && (!v.listElems()[n]->isList() || + v.listElems()[n]->listSize() != 0)) { + result += " "; + } + } + return result; + } + } + + throwTypeError("cannot coerce %1% to a string, at %2%", v, pos); +} + +string EvalState::copyPathToStore(PathSet& context, const Path& path) { + if (nix::isDerivation(path)) { + throwEvalError("file names are not allowed to end in '%1%'", drvExtension); + } + + Path dstPath; + if (!srcToStore[path].empty()) { + dstPath = srcToStore[path]; + } else { + dstPath = + settings.readOnlyMode + ? store + ->computeStorePathForPath(baseNameOf(path), + checkSourcePath(path)) + .first + : store->addToStore(baseNameOf(path), checkSourcePath(path), true, + htSHA256, defaultPathFilter, repair); + srcToStore[path] = dstPath; + DLOG(INFO) << "copied source '" << path << "' -> '" << dstPath << "'"; + } + + context.insert(dstPath); + return dstPath; +} + +Path EvalState::coerceToPath(const Pos& pos, Value& v, PathSet& context) { + std::string path = coerceToString(pos, v, context, false, false); + if (path.empty() || path[0] != '/') { + throwEvalError("string '%1%' doesn't represent an absolute path, at %2%", + path, pos); + } + return path; +} + +bool EvalState::eqValues(Value& v1, Value& v2) { + forceValue(v1); + forceValue(v2); + + /* !!! Hack to support some old broken code that relies on pointer + equality tests between sets. (Specifically, builderDefs calls + uniqList on a list of sets.) Will remove this eventually. */ + if (&v1 == &v2) { + return true; + } + + // Special case type-compatibility between float and int + if (v1.type == tInt && v2.type == tFloat) { + return v1.integer == v2.fpoint; + } + if (v1.type == tFloat && v2.type == tInt) { + return v1.fpoint == v2.integer; + } + + // All other types are not compatible with each other. + if (v1.type != v2.type) { + return false; + } + + switch (v1.type) { + case tInt: + return v1.integer == v2.integer; + + case tBool: + return v1.boolean == v2.boolean; + + case tString: + return strcmp(v1.string.s, v2.string.s) == 0; + + case tPath: + return strcmp(v1.path, v2.path) == 0; + + case tNull: + return true; + + case tList1: + case tList2: + case tListN: + if (v1.listSize() != v2.listSize()) { + return false; + } + for (size_t n = 0; n < v1.listSize(); ++n) { + if (!eqValues(*v1.listElems()[n], *v2.listElems()[n])) { + return false; + } + } + return true; + + case tAttrs: { + /* If both sets denote a derivation (type = "derivation"), + then compare their outPaths. */ + if (isDerivation(v1) && isDerivation(v2)) { + Bindings::iterator i = v1.attrs->find(sOutPath); + Bindings::iterator j = v2.attrs->find(sOutPath); + if (i != v1.attrs->end() && j != v2.attrs->end()) { + return eqValues(*i->second.value, *j->second.value); + } + } + + if (v1.attrs->size() != v2.attrs->size()) { + return false; + } + + /* Otherwise, compare the attributes one by one. */ + Bindings::iterator i; + Bindings::iterator j; + for (i = v1.attrs->begin(), j = v2.attrs->begin(); i != v1.attrs->end(); + ++i, ++j) { + if (i->second.name != j->second.name || + !eqValues(*i->second.value, *j->second.value)) { + return false; + } + } + + return true; + } + + /* Functions are incomparable. */ + case tLambda: + case tPrimOp: + case tPrimOpApp: + return false; + + case tExternal: + return *v1.external == *v2.external; + + case tFloat: + return v1.fpoint == v2.fpoint; + + default: + throwEvalError("cannot compare %1% with %2%", showType(v1), showType(v2)); + } +} + +void EvalState::printStats() { + bool showStats = getEnv("NIX_SHOW_STATS", "0") != "0"; + + struct rusage buf; + getrusage(RUSAGE_SELF, &buf); + float cpuTime = buf.ru_utime.tv_sec + ((float)buf.ru_utime.tv_usec / 1000000); + + uint64_t bEnvs = nrEnvs * sizeof(Env) + nrValuesInEnvs * sizeof(Value*); + uint64_t bLists = nrListElems * sizeof(Value*); + uint64_t bValues = nrValues * sizeof(Value); + uint64_t bAttrsets = + nrAttrsets * sizeof(Bindings) + nrAttrsInAttrsets * sizeof(Attr); + +#if HAVE_BOEHMGC + GC_word heapSize; + GC_word totalBytes; + GC_get_heap_usage_safe(&heapSize, nullptr, nullptr, nullptr, &totalBytes); +#endif + if (showStats) { + auto outPath = getEnv("NIX_SHOW_STATS_PATH", "-"); + std::fstream fs; + if (outPath != "-") { + fs.open(outPath, std::fstream::out); + } + JSONObject topObj(outPath == "-" ? std::cerr : fs, true); + topObj.attr("cpuTime", cpuTime); + { + auto envs = topObj.object("envs"); + envs.attr("number", nrEnvs); + envs.attr("elements", nrValuesInEnvs); + envs.attr("bytes", bEnvs); + } + { + auto lists = topObj.object("list"); + lists.attr("elements", nrListElems); + lists.attr("bytes", bLists); + lists.attr("concats", nrListConcats); + } + { + auto values = topObj.object("values"); + values.attr("number", nrValues); + values.attr("bytes", bValues); + } + { + auto syms = topObj.object("symbols"); + syms.attr("number", symbols.Size()); + syms.attr("bytes", symbols.TotalSize()); + } + { + auto sets = topObj.object("sets"); + sets.attr("number", nrAttrsets); + sets.attr("bytes", bAttrsets); + sets.attr("elements", nrAttrsInAttrsets); + } + { + auto sizes = topObj.object("sizes"); + sizes.attr("Env", sizeof(Env)); + sizes.attr("Value", sizeof(Value)); + sizes.attr("Bindings", sizeof(Bindings)); + sizes.attr("Attr", sizeof(Attr)); + } + topObj.attr("nrOpUpdates", nrOpUpdates); + topObj.attr("nrOpUpdateValuesCopied", nrOpUpdateValuesCopied); + topObj.attr("nrThunks", nrThunks); + topObj.attr("nrAvoided", nrAvoided); + topObj.attr("nrLookups", nrLookups); + topObj.attr("nrPrimOpCalls", nrPrimOpCalls); + topObj.attr("nrFunctionCalls", nrFunctionCalls); +#if HAVE_BOEHMGC + { + auto gc = topObj.object("gc"); + gc.attr("heapSize", heapSize); + gc.attr("totalBytes", totalBytes); + } +#endif + + if (countCalls) { + { + auto obj = topObj.object("primops"); + for (auto& i : primOpCalls) { + obj.attr(i.first, i.second); + } + } + { + auto list = topObj.list("functions"); + for (auto& i : functionCalls) { + auto obj = list.object(); + if (i.first->name.set()) { + obj.attr("name", (const std::string&)i.first->name); + } else { + obj.attr("name", nullptr); + } + if (i.first->pos) { + obj.attr("file", (const std::string&)i.first->pos.file); + obj.attr("line", i.first->pos.line); + obj.attr("column", i.first->pos.column); + } + obj.attr("count", i.second); + } + } + { + auto list = topObj.list("attributes"); + for (auto& i : attrSelects) { + auto obj = list.object(); + if (i.first) { + obj.attr("file", (const std::string&)i.first.file); + obj.attr("line", i.first.line); + obj.attr("column", i.first.column); + } + obj.attr("count", i.second); + } + } + } + + // TODO(tazjin): what is this? commented out because .dump() is gone. + // if (getEnv("NIX_SHOW_SYMBOLS", "0") != "0") { + // auto list = topObj.list("symbols"); + // symbols.dump([&](const std::string& s) { list.elem(s); }); + // } + } +} + +size_t valueSize(Value& v) { + std::set<const void*> seen; + + auto doString = [&](const char* s) -> size_t { + if (seen.find(s) != seen.end()) { + return 0; + } + seen.insert(s); + return strlen(s) + 1; + }; + + std::function<size_t(Value & v)> doValue; + std::function<size_t(Env & v)> doEnv; + + doValue = [&](Value& v) -> size_t { + if (seen.find(&v) != seen.end()) { + return 0; + } + seen.insert(&v); + + size_t sz = sizeof(Value); + + switch (v.type) { + case tString: + sz += doString(v.string.s); + if (v.string.context != nullptr) { + for (const char** p = v.string.context; *p != nullptr; ++p) { + sz += doString(*p); + } + } + break; + case tPath: + sz += doString(v.path); + break; + case tAttrs: + if (seen.find(v.attrs) == seen.end()) { + seen.insert(v.attrs); + sz += sizeof(Bindings) + sizeof(Attr) * v.attrs->capacity(); + for (auto& i : *v.attrs) { + sz += doValue(*i.second.value); + } + } + break; + case tList1: + case tList2: + case tListN: + if (seen.find(v.listElems()) == seen.end()) { + seen.insert(v.listElems()); + sz += v.listSize() * sizeof(Value*); + for (size_t n = 0; n < v.listSize(); ++n) { + sz += doValue(*v.listElems()[n]); + } + } + break; + case tThunk: + sz += doEnv(*v.thunk.env); + break; + case tApp: + sz += doValue(*v.app.left); + sz += doValue(*v.app.right); + break; + case tLambda: + sz += doEnv(*v.lambda.env); + break; + case tPrimOpApp: + sz += doValue(*v.primOpApp.left); + sz += doValue(*v.primOpApp.right); + break; + case tExternal: + if (seen.find(v.external) != seen.end()) { + break; + } + seen.insert(v.external); + sz += v.external->valueSize(seen); + break; + default:; + } + + return sz; + }; + + doEnv = [&](Env& env) -> size_t { + if (seen.find(&env) != seen.end()) { + return 0; + } + seen.insert(&env); + + size_t sz = sizeof(Env) + sizeof(Value*) * env.size; + + if (env.type != Env::HasWithExpr) { + for (size_t i = 0; i < env.size; ++i) { + if (env.values[i] != nullptr) { + sz += doValue(*env.values[i]); + } + } + } + + if (env.up != nullptr) { + sz += doEnv(*env.up); + } + + return sz; + }; + + return doValue(v); +} + +string ExternalValueBase::coerceToString(const Pos& pos, PathSet& context, + bool copyMore, + bool copyToStore) const { + throw TypeError(format("cannot coerce %1% to a string, at %2%") % showType() % + pos); +} + +bool ExternalValueBase::operator==(const ExternalValueBase& b) const { + return false; +} + +std::ostream& operator<<(std::ostream& str, const ExternalValueBase& v) { + return v.print(str); +} + +EvalSettings evalSettings; + +static GlobalConfig::Register r1(&evalSettings); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/eval.hh b/third_party/nix/src/libexpr/eval.hh new file mode 100644 index 000000000000..73e35c9b2ce9 --- /dev/null +++ b/third_party/nix/src/libexpr/eval.hh @@ -0,0 +1,357 @@ +#pragma once + +#include <map> +#include <optional> +#include <unordered_map> + +#include "attr-set.hh" +#include "config.hh" +#include "hash.hh" +#include "nixexpr.hh" +#include "symbol-table.hh" +#include "value.hh" + +namespace nix { + +class Store; +class EvalState; +enum RepairFlag : bool; + +typedef void (*PrimOpFun)(EvalState& state, const Pos& pos, Value** args, + Value& v); + +struct PrimOp { + PrimOpFun fun; + size_t arity; + Symbol name; + PrimOp(PrimOpFun fun, size_t arity, Symbol name) + : fun(fun), arity(arity), name(name) {} +}; + +struct Env { + Env* up; + unsigned short size; // used by โvalueSizeโ + unsigned short prevWith : 14; // nr of levels up to next `with' environment + enum { Plain = 0, HasWithExpr, HasWithAttrs } type : 2; + Value* values[0]; +}; + +Value& mkString(Value& v, const std::string& s, + const PathSet& context = PathSet()); + +void copyContext(const Value& v, PathSet& context); + +/* Cache for calls to addToStore(); maps source paths to the store + paths. */ +typedef std::map<Path, Path> SrcToStore; + +std::ostream& operator<<(std::ostream& str, const Value& v); + +typedef std::pair<std::string, std::string> SearchPathElem; +typedef std::list<SearchPathElem> SearchPath; + +/* Initialise the Boehm GC, if applicable. */ +void initGC(); + +class EvalState { + public: + SymbolTable symbols; + + const Symbol sWith, sOutPath, sDrvPath, sType, sMeta, sName, sValue, sSystem, + sOverrides, sOutputs, sOutputName, sIgnoreNulls, sFile, sLine, sColumn, + sFunctor, sToString, sRight, sWrong, sStructuredAttrs, sBuilder, sArgs, + sOutputHash, sOutputHashAlgo, sOutputHashMode; + Symbol sDerivationNix; + + /* If set, force copying files to the Nix store even if they + already exist there. */ + RepairFlag repair; + + /* The allowed filesystem paths in restricted or pure evaluation + mode. */ + std::optional<PathSet> allowedPaths; + + Value vEmptySet; + + const ref<Store> store; + + private: + SrcToStore srcToStore; + + /* A cache from path names to parse trees. */ +#if HAVE_BOEHMGC + typedef std::map<Path, Expr*, std::less<Path>, + traceable_allocator<std::pair<const Path, Expr*>>> + FileParseCache; +#else + typedef std::map<Path, Expr*> FileParseCache; +#endif + FileParseCache fileParseCache; + + /* A cache from path names to values. */ +#if HAVE_BOEHMGC + typedef std::map<Path, Value, std::less<Path>, + traceable_allocator<std::pair<const Path, Value>>> + FileEvalCache; +#else + typedef std::map<Path, Value> FileEvalCache; +#endif + FileEvalCache fileEvalCache; + + SearchPath searchPath; + + std::map<std::string, std::pair<bool, std::string>> searchPathResolved; + + /* Cache used by checkSourcePath(). */ + std::unordered_map<Path, Path> resolvedPaths; + + public: + EvalState(const Strings& _searchPath, const ref<Store>& store); + ~EvalState(); + + void addToSearchPath(const std::string& s); + + SearchPath getSearchPath() { return searchPath; } + + Path checkSourcePath(const Path& path); + + void checkURI(const std::string& uri); + + /* When using a diverted store and 'path' is in the Nix store, map + 'path' to the diverted location (e.g. /nix/store/foo is mapped + to /home/alice/my-nix/nix/store/foo). However, this is only + done if the context is not empty, since otherwise we're + probably trying to read from the actual /nix/store. This is + intended to distinguish between import-from-derivation and + sources stored in the actual /nix/store. */ + Path toRealPath(const Path& path, const PathSet& context); + + /* Parse a Nix expression from the specified file. */ + Expr* parseExprFromFile(const Path& path); + Expr* parseExprFromFile(const Path& path, StaticEnv& staticEnv); + + /* Parse a Nix expression from the specified string. */ + Expr* parseExprFromString(const std::string& s, const Path& basePath, + StaticEnv& staticEnv); + Expr* parseExprFromString(const std::string& s, const Path& basePath); + + Expr* parseStdin(); + + /* Evaluate an expression read from the given file to normal + form. */ + void evalFile(const Path& path, Value& v); + + void resetFileCache(); + + /* Look up a file in the search path. */ + Path findFile(const std::string& path); + Path findFile(SearchPath& searchPath, const std::string& path, + const Pos& pos = noPos); + + /* If the specified search path element is a URI, download it. */ + std::pair<bool, std::string> resolveSearchPathElem( + const SearchPathElem& elem); + + /* Evaluate an expression to normal form, storing the result in + value `v'. */ + void eval(Expr* e, Value& v); + + /* Evaluation the expression, then verify that it has the expected + type. */ + inline bool evalBool(Env& env, Expr* e); + inline bool evalBool(Env& env, Expr* e, const Pos& pos); + inline void evalAttrs(Env& env, Expr* e, Value& v); + + /* If `v' is a thunk, enter it and overwrite `v' with the result + of the evaluation of the thunk. If `v' is a delayed function + application, call the function and overwrite `v' with the + result. Otherwise, this is a no-op. */ + inline void forceValue(Value& v, const Pos& pos = noPos); + + /* Force a value, then recursively force list elements and + attributes. */ + void forceValueDeep(Value& v); + + /* Force `v', and then verify that it has the expected type. */ + NixInt forceInt(Value& v, const Pos& pos); + NixFloat forceFloat(Value& v, const Pos& pos); + bool forceBool(Value& v, const Pos& pos); + inline void forceAttrs(Value& v); + inline void forceAttrs(Value& v, const Pos& pos); + inline void forceList(Value& v); + inline void forceList(Value& v, const Pos& pos); + void forceFunction(Value& v, const Pos& pos); // either lambda or primop + std::string forceString(Value& v, const Pos& pos = noPos); + std::string forceString(Value& v, PathSet& context, const Pos& pos = noPos); + std::string forceStringNoCtx(Value& v, const Pos& pos = noPos); + + /* Return true iff the value `v' denotes a derivation (i.e. a + set with attribute `type = "derivation"'). */ + bool isDerivation(Value& v); + + std::optional<string> tryAttrsToString(const Pos& pos, Value& v, + PathSet& context, + bool coerceMore = false, + bool copyToStore = true); + + /* String coercion. Converts strings, paths and derivations to a + string. If `coerceMore' is set, also converts nulls, integers, + booleans and lists to a string. If `copyToStore' is set, + referenced paths are copied to the Nix store as a side effect. */ + std::string coerceToString(const Pos& pos, Value& v, PathSet& context, + bool coerceMore = false, bool copyToStore = true); + + std::string copyPathToStore(PathSet& context, const Path& path); + + /* Path coercion. Converts strings, paths and derivations to a + path. The result is guaranteed to be a canonicalised, absolute + path. Nothing is copied to the store. */ + Path coerceToPath(const Pos& pos, Value& v, PathSet& context); + + public: + /* The base environment, containing the builtin functions and + values. */ + Env& baseEnv; + + /* The same, but used during parsing to resolve variables. */ + StaticEnv staticBaseEnv; // !!! should be private + + private: + unsigned int baseEnvDispl = 0; + + void createBaseEnv(); + + Value* addConstant(const std::string& name, Value& v); + + Value* addPrimOp(const std::string& name, size_t arity, PrimOpFun primOp); + + public: + Value& getBuiltin(const std::string& name); + + private: + inline Value* lookupVar(Env* env, const ExprVar& var, bool noEval); + + friend struct ExprVar; + friend struct ExprAttrs; + friend struct ExprLet; + + Expr* parse(const char* text, const Path& path, const Path& basePath, + StaticEnv& staticEnv); + + public: + /* Do a deep equality test between two values. That is, list + elements and attributes are compared recursively. */ + bool eqValues(Value& v1, Value& v2); + + bool isFunctor(Value& fun); + + void callFunction(Value& fun, Value& arg, Value& v, const Pos& pos); + void callPrimOp(Value& fun, Value& arg, Value& v, const Pos& pos); + + /* Automatically call a function for which each argument has a + default value or has a binding in the `args' map. */ + void autoCallFunction(Bindings& args, Value& fun, Value& res); + + /* Allocation primitives. */ + Value* allocValue(); + Env& allocEnv(size_t size); + + Value* allocAttr(Value& vAttrs, const Symbol& name); + + void mkList(Value& v, size_t size); + void mkAttrs(Value& v, size_t capacity); + void mkThunk_(Value& v, Expr* expr); + void mkPos(Value& v, Pos* pos); + + void concatLists(Value& v, size_t nrLists, Value** lists, const Pos& pos); + + /* Print statistics. */ + void printStats(); + + void realiseContext(const PathSet& context); + + private: + unsigned long nrEnvs = 0; + unsigned long nrValuesInEnvs = 0; + unsigned long nrValues = 0; + unsigned long nrListElems = 0; + unsigned long nrAttrsets = 0; + unsigned long nrAttrsInAttrsets = 0; + unsigned long nrOpUpdates = 0; + unsigned long nrOpUpdateValuesCopied = 0; + unsigned long nrListConcats = 0; + unsigned long nrPrimOpCalls = 0; + unsigned long nrFunctionCalls = 0; + + bool countCalls; + + typedef std::map<Symbol, size_t> PrimOpCalls; + PrimOpCalls primOpCalls; + + typedef std::map<ExprLambda*, size_t> FunctionCalls; + FunctionCalls functionCalls; + + void incrFunctionCall(ExprLambda* fun); + + typedef std::map<Pos, size_t> AttrSelects; + AttrSelects attrSelects; + + friend struct ExprOpUpdate; + friend struct ExprOpConcatLists; + friend struct ExprSelect; + friend void prim_getAttr(EvalState& state, const Pos& pos, Value** args, + Value& v); +}; + +/* Return a string representing the type of the value `v'. */ +string showType(const Value& v); + +/* Decode a context string โ!<name>!<path>โ into a pair <path, + name>. */ +std::pair<string, string> decodeContext(const std::string& s); + +/* If `path' refers to a directory, then append "/default.nix". */ +Path resolveExprPath(Path path); + +struct InvalidPathError : EvalError { + Path path; + InvalidPathError(const Path& path); +#ifdef EXCEPTION_NEEDS_THROW_SPEC + ~InvalidPathError() noexcept {}; +#endif +}; + +struct EvalSettings : Config { + Setting<bool> enableNativeCode{this, false, + "allow-unsafe-native-code-during-evaluation", + "Whether builtin functions that allow " + "executing native code should be enabled."}; + + Setting<bool> restrictEval{ + this, false, "restrict-eval", + "Whether to restrict file system access to paths in $NIX_PATH, " + "and network access to the URI prefixes listed in 'allowed-uris'."}; + + Setting<bool> pureEval{this, false, "pure-eval", + "Whether to restrict file system and network access " + "to files specified by cryptographic hash."}; + + Setting<bool> enableImportFromDerivation{ + this, true, "allow-import-from-derivation", + "Whether the evaluator allows importing the result of a derivation."}; + + Setting<Strings> allowedUris{ + this, + {}, + "allowed-uris", + "Prefixes of URIs that builtin functions such as fetchurl and fetchGit " + "are allowed to fetch."}; + + Setting<bool> traceFunctionCalls{this, false, "trace-function-calls", + "Emit log messages for each function entry " + "and exit at the 'vomit' log level (-vvvv)"}; +}; + +extern EvalSettings evalSettings; + +} // namespace nix diff --git a/third_party/nix/src/libexpr/function-trace.cc b/third_party/nix/src/libexpr/function-trace.cc new file mode 100644 index 000000000000..8b0d60fab8be --- /dev/null +++ b/third_party/nix/src/libexpr/function-trace.cc @@ -0,0 +1,19 @@ +#include "function-trace.hh" + +#include <glog/logging.h> + +namespace nix { + +FunctionCallTrace::FunctionCallTrace(const Pos& pos) : pos(pos) { + auto duration = std::chrono::high_resolution_clock::now().time_since_epoch(); + auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(duration); + LOG(INFO) << "function-trace entered " << pos << " at " << ns.count(); +} + +FunctionCallTrace::~FunctionCallTrace() { + auto duration = std::chrono::high_resolution_clock::now().time_since_epoch(); + auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(duration); + LOG(INFO) << "function-trace exited " << pos << " at " << ns.count(); +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/function-trace.hh b/third_party/nix/src/libexpr/function-trace.hh new file mode 100644 index 000000000000..93d70862eda1 --- /dev/null +++ b/third_party/nix/src/libexpr/function-trace.hh @@ -0,0 +1,14 @@ +#pragma once + +#include <chrono> + +#include "eval.hh" + +namespace nix { + +struct FunctionCallTrace { + const Pos& pos; + FunctionCallTrace(const Pos& pos); + ~FunctionCallTrace(); +}; +} // namespace nix diff --git a/third_party/nix/src/libexpr/get-drvs.cc b/third_party/nix/src/libexpr/get-drvs.cc new file mode 100644 index 000000000000..6d4ad33be063 --- /dev/null +++ b/third_party/nix/src/libexpr/get-drvs.cc @@ -0,0 +1,444 @@ +#include "get-drvs.hh" + +#include <cstring> +#include <regex> +#include <utility> + +#include <glog/logging.h> + +#include "derivations.hh" +#include "eval-inline.hh" +#include "util.hh" + +namespace nix { + +DrvInfo::DrvInfo(EvalState& state, std::string attrPath, Bindings* attrs) + : state(&state), attrs(attrs), attrPath(std::move(attrPath)) {} + +DrvInfo::DrvInfo(EvalState& state, const ref<Store>& store, + const std::string& drvPathWithOutputs) + : state(&state), attrPath("") { + auto spec = parseDrvPathWithOutputs(drvPathWithOutputs); + + drvPath = spec.first; + + auto drv = store->derivationFromPath(drvPath); + + name = storePathToName(drvPath); + + if (spec.second.size() > 1) { + throw Error( + "building more than one derivation output is not supported, in '%s'", + drvPathWithOutputs); + } + + outputName = spec.second.empty() ? get(drv.env, "outputName", "out") + : *spec.second.begin(); + + auto i = drv.outputs.find(outputName); + if (i == drv.outputs.end()) { + throw Error("derivation '%s' does not have output '%s'", drvPath, + outputName); + } + + outPath = i->second.path; +} + +string DrvInfo::queryName() const { + if (name.empty() && (attrs != nullptr)) { + auto i = attrs->find(state->sName); + if (i == attrs->end()) { + throw TypeError("derivation name missing"); + } + name = state->forceStringNoCtx(*i->second.value); + } + return name; +} + +string DrvInfo::querySystem() const { + if (system.empty() && (attrs != nullptr)) { + auto i = attrs->find(state->sSystem); + system = i == attrs->end() + ? "unknown" + : state->forceStringNoCtx(*i->second.value, *i->second.pos); + } + return system; +} + +string DrvInfo::queryDrvPath() const { + if (drvPath.empty() && (attrs != nullptr)) { + Bindings::iterator i = attrs->find(state->sDrvPath); + PathSet context; + drvPath = i != attrs->end() ? state->coerceToPath(*i->second.pos, + *i->second.value, context) + : ""; + } + return drvPath; +} + +string DrvInfo::queryOutPath() const { + if (outPath.empty() && (attrs != nullptr)) { + Bindings::iterator i = attrs->find(state->sOutPath); + PathSet context; + outPath = i != attrs->end() ? state->coerceToPath(*i->second.pos, + *i->second.value, context) + : ""; + } + return outPath; +} + +DrvInfo::Outputs DrvInfo::queryOutputs(bool onlyOutputsToInstall) { + if (outputs.empty()) { + /* Get the โoutputsโ list. */ + Bindings::iterator i; + if ((attrs != nullptr) && + (i = attrs->find(state->sOutputs)) != attrs->end()) { + state->forceList(*i->second.value, *i->second.pos); + + /* For each output... */ + for (unsigned int j = 0; j < i->second.value->listSize(); ++j) { + /* Evaluate the corresponding set. */ + std::string name = state->forceStringNoCtx( + *i->second.value->listElems()[j], *i->second.pos); + Bindings::iterator out = attrs->find(state->symbols.Create(name)); + if (out == attrs->end()) { + continue; // FIXME: throw error? + } + state->forceAttrs(*out->second.value); + + /* And evaluate its โoutPathโ attribute. */ + Bindings::iterator outPath = + out->second.value->attrs->find(state->sOutPath); + if (outPath == out->second.value->attrs->end()) { + continue; // FIXME: throw error? + } + PathSet context; + outputs[name] = state->coerceToPath(*outPath->second.pos, + *outPath->second.value, context); + } + } else { + outputs["out"] = queryOutPath(); + } + } + if (!onlyOutputsToInstall || (attrs == nullptr)) { + return outputs; + } + + /* Check for `meta.outputsToInstall` and return `outputs` reduced to that. */ + const Value* outTI = queryMeta("outputsToInstall"); + if (outTI == nullptr) { + return outputs; + } + const auto errMsg = Error("this derivation has bad 'meta.outputsToInstall'"); + /* ^ this shows during `nix-env -i` right under the bad derivation */ + if (!outTI->isList()) { + throw errMsg; + } + Outputs result; + for (auto i = outTI->listElems(); i != outTI->listElems() + outTI->listSize(); + ++i) { + if ((*i)->type != tString) { + throw errMsg; + } + auto out = outputs.find((*i)->string.s); + if (out == outputs.end()) { + throw errMsg; + } + result.insert(*out); + } + return result; +} + +string DrvInfo::queryOutputName() const { + if (outputName.empty() && (attrs != nullptr)) { + Bindings::iterator i = attrs->find(state->sOutputName); + outputName = + i != attrs->end() ? state->forceStringNoCtx(*i->second.value) : ""; + } + return outputName; +} + +Bindings* DrvInfo::getMeta() { + if (meta != nullptr) { + return meta; + } + if (attrs == nullptr) { + return nullptr; + } + Bindings::iterator a = attrs->find(state->sMeta); + if (a == attrs->end()) { + return nullptr; + } + state->forceAttrs(*a->second.value, *a->second.pos); + meta = a->second.value->attrs; + return meta; +} + +StringSet DrvInfo::queryMetaNames() { + StringSet res; + if (getMeta() == nullptr) { + return res; + } + for (auto& i : *meta) { + res.insert(i.second.name); + } + return res; +} + +bool DrvInfo::checkMeta(Value& v) { + state->forceValue(v); + if (v.isList()) { + for (unsigned int n = 0; n < v.listSize(); ++n) { + if (!checkMeta(*v.listElems()[n])) { + return false; + } + } + return true; + } + if (v.type == tAttrs) { + Bindings::iterator i = v.attrs->find(state->sOutPath); + if (i != v.attrs->end()) { + return false; + } + for (auto& i : *v.attrs) { + if (!checkMeta(*i.second.value)) { + return false; + } + } + return true; + } else { + return v.type == tInt || v.type == tBool || v.type == tString || + v.type == tFloat; + } +} + +Value* DrvInfo::queryMeta(const std::string& name) { + if (getMeta() == nullptr) { + return nullptr; + } + Bindings::iterator a = meta->find(state->symbols.Create(name)); + if (a == meta->end() || !checkMeta(*a->second.value)) { + return nullptr; + } + return a->second.value; +} + +string DrvInfo::queryMetaString(const std::string& name) { + Value* v = queryMeta(name); + if ((v == nullptr) || v->type != tString) { + return ""; + } + return v->string.s; +} + +NixInt DrvInfo::queryMetaInt(const std::string& name, NixInt def) { + Value* v = queryMeta(name); + if (v == nullptr) { + return def; + } + if (v->type == tInt) { + return v->integer; + } + if (v->type == tString) { + /* Backwards compatibility with before we had support for + integer meta fields. */ + NixInt n; + if (string2Int(v->string.s, n)) { + return n; + } + } + return def; +} + +NixFloat DrvInfo::queryMetaFloat(const std::string& name, NixFloat def) { + Value* v = queryMeta(name); + if (v == nullptr) { + return def; + } + if (v->type == tFloat) { + return v->fpoint; + } + if (v->type == tString) { + /* Backwards compatibility with before we had support for + float meta fields. */ + NixFloat n; + if (string2Float(v->string.s, n)) { + return n; + } + } + return def; +} + +bool DrvInfo::queryMetaBool(const std::string& name, bool def) { + Value* v = queryMeta(name); + if (v == nullptr) { + return def; + } + if (v->type == tBool) { + return v->boolean; + } + if (v->type == tString) { + /* Backwards compatibility with before we had support for + Boolean meta fields. */ + if (strcmp(v->string.s, "true") == 0) { + return true; + } + if (strcmp(v->string.s, "false") == 0) { + return false; + } + } + return def; +} + +void DrvInfo::setMeta(const std::string& name, Value* v) { + getMeta(); + Bindings* old = meta; + meta = Bindings::NewGC(); + Symbol sym = state->symbols.Create(name); + if (old != nullptr) { + for (auto i : *old) { + if (i.second.name != sym) { + meta->push_back(i.second); + } + } + } + if (v != nullptr) { + meta->push_back(Attr(sym, v)); + } +} + +/* Cache for already considered attrsets. */ +using Done = set<Bindings*>; + +/* Evaluate value `v'. If it evaluates to a set of type `derivation', + then put information about it in `drvs' (unless it's already in `done'). + The result boolean indicates whether it makes sense + for the caller to recursively search for derivations in `v'. */ +static bool getDerivation(EvalState& state, Value& v, + const std::string& attrPath, DrvInfos& drvs, + Done& done, bool ignoreAssertionFailures) { + try { + state.forceValue(v); + if (!state.isDerivation(v)) { + return true; + } + + /* Remove spurious duplicates (e.g., a set like `rec { x = + derivation {...}; y = x;}'. */ + if (done.find(v.attrs) != done.end()) { + return false; + } + done.insert(v.attrs); + + DrvInfo drv(state, attrPath, v.attrs); + + drv.queryName(); + + drvs.push_back(drv); + + return false; + + } catch (AssertionError& e) { + if (ignoreAssertionFailures) { + return false; + } + throw; + } +} + +std::optional<DrvInfo> getDerivation(EvalState& state, Value& v, + bool ignoreAssertionFailures) { + Done done; + DrvInfos drvs; + getDerivation(state, v, "", drvs, done, ignoreAssertionFailures); + if (drvs.size() != 1) { + return {}; + } + return std::move(drvs.front()); +} + +static std::string addToPath(const std::string& s1, const std::string& s2) { + return s1.empty() ? s2 : s1 + "." + s2; +} + +static std::regex attrRegex("[A-Za-z_][A-Za-z0-9-_+]*"); + +static void getDerivations(EvalState& state, Value& vIn, + const std::string& pathPrefix, Bindings& autoArgs, + DrvInfos& drvs, Done& done, + bool ignoreAssertionFailures) { + Value v; + state.autoCallFunction(autoArgs, vIn, v); + + /* Process the expression. */ + if (!getDerivation(state, v, pathPrefix, drvs, done, + ignoreAssertionFailures)) { + ; + + } else if (v.type == tAttrs) { + /* !!! undocumented hackery to support combining channels in + nix-env.cc. */ + bool combineChannels = + v.attrs->find(state.symbols.Create("_combineChannels")) != + v.attrs->end(); + + /* Consider the attributes in sorted order to get more + deterministic behaviour in nix-env operations (e.g. when + there are names clashes between derivations, the derivation + bound to the attribute with the "lower" name should take + precedence). */ + for (auto& i : v.attrs->lexicographicOrder()) { + DLOG(INFO) << "evaluating attribute '" << i->name << "'"; + if (!std::regex_match(std::string(i->name), attrRegex)) { + continue; + } + std::string pathPrefix2 = addToPath(pathPrefix, i->name); + if (combineChannels) { + getDerivations(state, *i->value, pathPrefix2, autoArgs, drvs, done, + ignoreAssertionFailures); + } else if (getDerivation(state, *i->value, pathPrefix2, drvs, done, + ignoreAssertionFailures)) { + /* If the value of this attribute is itself a set, + should we recurse into it? => Only if it has a + `recurseForDerivations = true' attribute. */ + if (i->value->type == tAttrs) { + Bindings::iterator j = i->value->attrs->find( + state.symbols.Create("recurseForDerivations")); + if (j != i->value->attrs->end() && + state.forceBool(*j->second.value, *j->second.pos)) { + getDerivations(state, *i->value, pathPrefix2, autoArgs, drvs, done, + ignoreAssertionFailures); + } + } + } + } + } + + else if (v.isList()) { + for (unsigned int n = 0; n < v.listSize(); ++n) { + std::string pathPrefix2 = + addToPath(pathPrefix, (format("%1%") % n).str()); + if (getDerivation(state, *v.listElems()[n], pathPrefix2, drvs, done, + ignoreAssertionFailures)) { + getDerivations(state, *v.listElems()[n], pathPrefix2, autoArgs, drvs, + done, ignoreAssertionFailures); + } + } + } + + else { + throw TypeError( + "expression does not evaluate to a derivation (or a set or list of " + "those)"); + } +} + +void getDerivations(EvalState& state, Value& v, const std::string& pathPrefix, + Bindings& autoArgs, DrvInfos& drvs, + bool ignoreAssertionFailures) { + Done done; + getDerivations(state, v, pathPrefix, autoArgs, drvs, done, + ignoreAssertionFailures); +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/get-drvs.hh b/third_party/nix/src/libexpr/get-drvs.hh new file mode 100644 index 000000000000..3da23194616e --- /dev/null +++ b/third_party/nix/src/libexpr/get-drvs.hh @@ -0,0 +1,85 @@ +#pragma once + +#include <map> +#include <string> + +#include "eval.hh" + +namespace nix { + +struct DrvInfo { + public: + typedef std::map<string, Path> Outputs; + + private: + EvalState* state; + + mutable std::string name; + mutable std::string system; + mutable std::string drvPath; + mutable std::string outPath; + mutable std::string outputName; + Outputs outputs; + + bool failed = false; // set if we get an AssertionError + + Bindings *attrs = nullptr, *meta = nullptr; + + Bindings* getMeta(); + + bool checkMeta(Value& v); + + public: + std::string attrPath; /* path towards the derivation */ + + DrvInfo(EvalState& state) : state(&state){}; + DrvInfo(EvalState& state, std::string attrPath, Bindings* attrs); + DrvInfo(EvalState& state, const ref<Store>& store, + const std::string& drvPathWithOutputs); + + std::string queryName() const; + std::string querySystem() const; + std::string queryDrvPath() const; + std::string queryOutPath() const; + std::string queryOutputName() const; + /** Return the list of outputs. The "outputs to install" are determined by + * `meta.outputsToInstall`. */ + Outputs queryOutputs(bool onlyOutputsToInstall = false); + + StringSet queryMetaNames(); + Value* queryMeta(const std::string& name); + std::string queryMetaString(const std::string& name); + NixInt queryMetaInt(const std::string& name, NixInt def); + NixFloat queryMetaFloat(const std::string& name, NixFloat def); + bool queryMetaBool(const std::string& name, bool def); + void setMeta(const std::string& name, Value* v); + + /* + MetaInfo queryMetaInfo(EvalState & state) const; + MetaValue queryMetaInfo(EvalState & state, const std::string & name) const; + */ + + void setName(const std::string& s) { name = s; } + void setDrvPath(const std::string& s) { drvPath = s; } + void setOutPath(const std::string& s) { outPath = s; } + + void setFailed() { failed = true; }; + bool hasFailed() { return failed; }; +}; + +#if HAVE_BOEHMGC +typedef list<DrvInfo, traceable_allocator<DrvInfo> > DrvInfos; +#else +typedef list<DrvInfo> DrvInfos; +#endif + +/* If value `v' denotes a derivation, return a DrvInfo object + describing it. Otherwise return nothing. */ +std::optional<DrvInfo> getDerivation(EvalState& state, Value& v, + bool ignoreAssertionFailures); + +void getDerivations(EvalState& state, Value& v, const std::string& pathPrefix, + Bindings& autoArgs, DrvInfos& drvs, + bool ignoreAssertionFailures); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/json-to-value.cc b/third_party/nix/src/libexpr/json-to-value.cc new file mode 100644 index 000000000000..d46e6f268e9f --- /dev/null +++ b/third_party/nix/src/libexpr/json-to-value.cc @@ -0,0 +1,185 @@ +#include "json-to-value.hh" + +#include <cstring> + +namespace nix { + +static void skipWhitespace(const char*& s) { + while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') { + s++; + } +} + +static std::string parseJSONString(const char*& s) { + std::string res; + if (*s++ != '"') { + throw JSONParseError("expected JSON string"); + } + while (*s != '"') { + if (*s == 0) { + throw JSONParseError("got end-of-string in JSON string"); + } + if (*s == '\\') { + s++; + if (*s == '"') { + res += '"'; + } else if (*s == '\\') { + res += '\\'; + } else if (*s == '/') { + res += '/'; + } else if (*s == '/') { + res += '/'; + } else if (*s == 'b') { + res += '\b'; + } else if (*s == 'f') { + res += '\f'; + } else if (*s == 'n') { + res += '\n'; + } else if (*s == 'r') { + res += '\r'; + } else if (*s == 't') { + res += '\t'; + } else if (*s == 'u') { + throw JSONParseError( + "\\u characters in JSON strings are currently not supported"); + } else { + throw JSONParseError("invalid escaped character in JSON string"); + } + s++; + } else { + res += *s++; + } + } + s++; + return res; +} + +static void parseJSON(EvalState& state, const char*& s, Value& v) { + skipWhitespace(s); + + if (*s == 0) { + throw JSONParseError("expected JSON value"); + } + + if (*s == '[') { + s++; + ValueVector values; + values.reserve(128); + skipWhitespace(s); + while (true) { + if (values.empty() && *s == ']') { + break; + } + Value* v2 = state.allocValue(); + parseJSON(state, s, *v2); + values.push_back(v2); + skipWhitespace(s); + if (*s == ']') { + break; + } + if (*s != ',') { + throw JSONParseError("expected ',' or ']' after JSON array element"); + } + s++; + } + s++; + state.mkList(v, values.size()); + for (size_t n = 0; n < values.size(); ++n) { + v.listElems()[n] = values[n]; + } + } + + else if (*s == '{') { + s++; + ValueMap attrs; + while (true) { + skipWhitespace(s); + if (attrs.empty() && *s == '}') { + break; + } + std::string name = parseJSONString(s); + skipWhitespace(s); + if (*s != ':') { + throw JSONParseError("expected ':' in JSON object"); + } + s++; + Value* v2 = state.allocValue(); + parseJSON(state, s, *v2); + attrs[state.symbols.Create(name)] = v2; + skipWhitespace(s); + if (*s == '}') { + break; + } + if (*s != ',') { + throw JSONParseError("expected ',' or '}' after JSON member"); + } + s++; + } + state.mkAttrs(v, attrs.size()); + for (auto& i : attrs) { + v.attrs->push_back(Attr(i.first, i.second)); + } + s++; + } + + else if (*s == '"') { + mkString(v, parseJSONString(s)); + } + + else if ((isdigit(*s) != 0) || *s == '-' || *s == '.') { + // Buffer into a std::string first, then use built-in C++ conversions + std::string tmp_number; + ValueType number_type = tInt; + + while ((isdigit(*s) != 0) || *s == '-' || *s == '.' || *s == 'e' || + *s == 'E') { + if (*s == '.' || *s == 'e' || *s == 'E') { + number_type = tFloat; + } + tmp_number += *s++; + } + + try { + if (number_type == tFloat) { + mkFloat(v, stod(tmp_number)); + } else { + mkInt(v, stol(tmp_number)); + } + } catch (std::invalid_argument& e) { + throw JSONParseError("invalid JSON number"); + } catch (std::out_of_range& e) { + throw JSONParseError("out-of-range JSON number"); + } + } + + else if (strncmp(s, "true", 4) == 0) { + s += 4; + mkBool(v, true); + } + + else if (strncmp(s, "false", 5) == 0) { + s += 5; + mkBool(v, false); + } + + else if (strncmp(s, "null", 4) == 0) { + s += 4; + mkNull(v); + } + + else { + throw JSONParseError("unrecognised JSON value"); + } +} + +void parseJSON(EvalState& state, const std::string& s_, Value& v) { + const char* s = s_.c_str(); + parseJSON(state, s, v); + skipWhitespace(s); + if (*s != 0) { + throw JSONParseError( + format("expected end-of-string while parsing JSON value: %1%") % s); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/json-to-value.hh b/third_party/nix/src/libexpr/json-to-value.hh new file mode 100644 index 000000000000..f416dfbb3860 --- /dev/null +++ b/third_party/nix/src/libexpr/json-to-value.hh @@ -0,0 +1,13 @@ +#pragma once + +#include <string> + +#include "eval.hh" + +namespace nix { + +MakeError(JSONParseError, EvalError) + + void parseJSON(EvalState& state, const std::string& s, Value& v); + +} diff --git a/third_party/nix/src/libexpr/lexer.l b/third_party/nix/src/libexpr/lexer.l new file mode 100644 index 000000000000..9cdb2bd97ecf --- /dev/null +++ b/third_party/nix/src/libexpr/lexer.l @@ -0,0 +1,222 @@ +%option reentrant bison-bridge bison-locations +%option noyywrap +%option never-interactive +%option stack +%option nodefault +%option nounput noyy_top_state + + +%s DEFAULT +%x STRING +%x IND_STRING + + +%{ +#include <boost/lexical_cast.hpp> + +#include "nixexpr.hh" +#include "parser-tab.hh" + +using namespace nix; + +namespace nix { + + +static void initLoc(YYLTYPE * loc) +{ + loc->first_line = loc->last_line = 1; + loc->first_column = loc->last_column = 1; +} + + +static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) +{ + loc->first_line = loc->last_line; + loc->first_column = loc->last_column; + + while (len--) { + switch (*s++) { + case '\r': + if (*s == '\n') /* cr/lf */ + s++; + /* fall through */ + case '\n': + ++loc->last_line; + loc->last_column = 1; + break; + default: + ++loc->last_column; + } + } +} + + +static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length) +{ + std::string t; + t.reserve(length); + char c; + while ((c = *s++)) { + if (c == '\\') { + assert(*s); + c = *s++; + if (c == 'n') { t += '\n'; } + else if (c == 'r') { t += '\r'; } + else if (c == 't') { t += '\t'; } + else t += c; + } + else if (c == '\r') { + /* Normalise CR and CR/LF into LF. */ + t += '\n'; + if (*s == '\n') { s++; } /* cr/lf */ + } + else t += c; + } + return new ExprString(symbols.Create(t)); +} + + +} + +#define YY_USER_INIT initLoc(yylloc) +#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng); + +#define PUSH_STATE(state) yy_push_state(state, yyscanner) +#define POP_STATE() yy_pop_state(yyscanner) + +%} + + +ANY .|\n +ID [a-zA-Z\_][a-zA-Z0-9\_\'\-]* +INT [0-9]+ +FLOAT (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)? +PATH [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+\/? +HPATH \~(\/[a-zA-Z0-9\.\_\-\+]+)+\/? +SPATH \<[a-zA-Z0-9\.\_\-\+]+(\/[a-zA-Z0-9\.\_\-\+]+)*\> +URI [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+ + + +%% + + +if { return IF; } +then { return THEN; } +else { return ELSE; } +assert { return ASSERT; } +with { return WITH; } +let { return LET; } +in { return IN; } +rec { return REC; } +inherit { return INHERIT; } +or { return OR_KW; } +\.\.\. { return ELLIPSIS; } + +\=\= { return EQ; } +\!\= { return NEQ; } +\<\= { return LEQ; } +\>\= { return GEQ; } +\&\& { return AND; } +\|\| { return OR; } +\-\> { return IMPL; } +\/\/ { return UPDATE; } +\+\+ { return CONCAT; } + +{ID} { yylval->id = strdup(yytext); return ID; } +{INT} { errno = 0; + try { + yylval->n = boost::lexical_cast<int64_t>(yytext); + } catch (const boost::bad_lexical_cast &) { + throw ParseError(format("invalid integer '%1%'") % yytext); + } + return INT; + } +{FLOAT} { errno = 0; + yylval->nf = strtod(yytext, 0); + if (errno != 0) + throw ParseError(format("invalid float '%1%'") % yytext); + return FLOAT; + } + +\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } + +\} { /* State INITIAL only exists at the bottom of the stack and is + used as a marker. DEFAULT replaces it everywhere else. + Popping when in INITIAL state causes an empty stack exception, + so don't */ + if (YYSTATE != INITIAL) + POP_STATE(); + return '}'; + } +\{ { PUSH_STATE(DEFAULT); return '{'; } + +\" { PUSH_STATE(STRING); return '"'; } +<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" | +<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ { + /* It is impossible to match strings ending with '$' with one + regex because trailing contexts are only valid at the end + of a rule. (A sane but undocumented limitation.) */ + yylval->e = unescapeStr(data->symbols, yytext, yyleng); + return STR; + } +<STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } +<STRING>\" { POP_STATE(); return '"'; } +<STRING>\$|\\|\$\\ { + /* This can only occur when we reach EOF, otherwise the above + (...|\$[^\{\"\\]|\\.|\$\\.)+ would have triggered. + This is technically invalid, but we leave the problem to the + parser who fails with exact location. */ + return STR; + } + +\'\'(\ *\n)? { PUSH_STATE(IND_STRING); return IND_STRING_OPEN; } +<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'\$])+ { + yylval->e = new ExprIndStr(yytext); + return IND_STR; + } +<IND_STRING>\'\'\$ | +<IND_STRING>\$ { + yylval->e = new ExprIndStr("$"); + return IND_STR; + } +<IND_STRING>\'\'\' { + yylval->e = new ExprIndStr("''"); + return IND_STR; + } +<IND_STRING>\'\'\\{ANY} { + yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2); + return IND_STR; + } +<IND_STRING>\$\{ { PUSH_STATE(DEFAULT); return DOLLAR_CURLY; } +<IND_STRING>\'\' { POP_STATE(); return IND_STRING_CLOSE; } +<IND_STRING>\' { + yylval->e = new ExprIndStr("'"); + return IND_STR; + } + + +{PATH} { if (yytext[yyleng-1] == '/') + throw ParseError("path '%s' has a trailing slash", yytext); + yylval->path = strdup(yytext); + return PATH; + } +{HPATH} { if (yytext[yyleng-1] == '/') + throw ParseError("path '%s' has a trailing slash", yytext); + yylval->path = strdup(yytext); + return HPATH; + } +{SPATH} { yylval->path = strdup(yytext); return SPATH; } +{URI} { yylval->uri = strdup(yytext); return URI; } + +[ \t\r\n]+ /* eat up whitespace */ +\#[^\r\n]* /* single-line comments */ +\/\*([^*]|\*+[^*/])*\*+\/ /* long comments */ + +{ANY} { + /* Don't return a negative number, as this will cause + Bison to stop parsing without an error. */ + return (unsigned char) yytext[0]; + } + +%% + diff --git a/third_party/nix/src/libexpr/meson.build b/third_party/nix/src/libexpr/meson.build new file mode 100644 index 000000000000..ce50838b9744 --- /dev/null +++ b/third_party/nix/src/libexpr/meson.build @@ -0,0 +1,97 @@ +src_inc += include_directories('.', 'primops') + +libexpr_src = files( + join_paths(meson.source_root(), 'src/libexpr/primops/context.cc'), + join_paths(meson.source_root(), 'src/libexpr/primops/fetchGit.cc'), + join_paths(meson.source_root(), 'src/libexpr/primops/fetchMercurial.cc'), + join_paths(meson.source_root(), 'src/libexpr/primops/fromTOML.cc'), + + join_paths(meson.source_root(), 'src/libexpr/attr-path.cc'), + join_paths(meson.source_root(), 'src/libexpr/attr-set.cc'), + join_paths(meson.source_root(), 'src/libexpr/common-eval-args.cc'), + join_paths(meson.source_root(), 'src/libexpr/eval.cc'), + join_paths(meson.source_root(), 'src/libexpr/function-trace.cc'), + join_paths(meson.source_root(), 'src/libexpr/get-drvs.cc'), + join_paths(meson.source_root(), 'src/libexpr/json-to-value.cc'), + join_paths(meson.source_root(), 'src/libexpr/names.cc'), + join_paths(meson.source_root(), 'src/libexpr/nixexpr.cc'), + join_paths(meson.source_root(), 'src/libexpr/primops.cc'), + join_paths(meson.source_root(), 'src/libexpr/symbol-table.cc'), + join_paths(meson.source_root(), 'src/libexpr/value-to-json.cc'), + join_paths(meson.source_root(), 'src/libexpr/value-to-xml.cc'), +) + +libexpr_headers = files( + join_paths(meson.source_root(), 'src/libexpr/attr-path.hh'), + join_paths(meson.source_root(), 'src/libexpr/attr-set.hh'), + join_paths(meson.source_root(), 'src/libexpr/common-eval-args.hh'), + join_paths(meson.source_root(), 'src/libexpr/eval.hh'), + join_paths(meson.source_root(), 'src/libexpr/eval-inline.hh'), + join_paths(meson.source_root(), 'src/libexpr/function-trace.hh'), + join_paths(meson.source_root(), 'src/libexpr/get-drvs.hh'), + join_paths(meson.source_root(), 'src/libexpr/json-to-value.hh'), + join_paths(meson.source_root(), 'src/libexpr/names.hh'), + join_paths(meson.source_root(), 'src/libexpr/nixexpr.hh'), + join_paths(meson.source_root(), 'src/libexpr/primops.hh'), + join_paths(meson.source_root(), 'src/libexpr/symbol-table.hh'), + join_paths(meson.source_root(), 'src/libexpr/value.hh'), + join_paths(meson.source_root(), 'src/libexpr/value-to-json.hh'), + join_paths(meson.source_root(), 'src/libexpr/value-to-xml.hh')) + +libexpr_dep_list = [ + gc_dep, + glog_dep, + libdl_dep, + libsodium_dep, +] + absl_deps + +libexpr_link_list = [ + libutil_lib, + libstore_lib, + libmain_lib, +] + +libexpr_link_args = [ + '-lpthread', + '-lgccpp', +] + +libexpr_cxx_args = [] + +libexpr_src += custom_target( + 'parser_tab.[cchh]', + output : [ + 'parser-tab.cc', + 'parser-tab.hh'], + input : 'parser.y', + command : [ + bison, + '-v', + '--output=@OUTPUT0@', + '@INPUT@', + '-d']) + +libexpr_src += custom_target( + 'lexer_tab.[cchh]', + output : ['lexer-tab.cc', 'lexer-tab.hh'], + input : 'lexer.l', + command : [ + flex, + '--outfile=@OUTPUT0@', + '--header-file=@OUTPUT1@', + '@INPUT@']) + +libexpr_lib = library( + 'nixexpr', + install : true, + install_mode : 'rwxr-xr-x', + install_dir : libdir, + include_directories : src_inc, + link_with : libexpr_link_list, + sources : libexpr_src, + link_args : libexpr_link_args, + dependencies : libexpr_dep_list) + +install_headers( + libexpr_headers, + install_dir : join_paths(includedir, 'nix')) diff --git a/third_party/nix/src/libexpr/names.cc b/third_party/nix/src/libexpr/names.cc new file mode 100644 index 000000000000..ac8150532f25 --- /dev/null +++ b/third_party/nix/src/libexpr/names.cc @@ -0,0 +1,119 @@ +#include "names.hh" + +#include <memory> + +#include "util.hh" + +namespace nix { + +DrvName::DrvName() { name = ""; } + +/* Parse a derivation name. The `name' part of a derivation name is + everything up to but not including the first dash *not* followed by + a letter. The `version' part is the rest (excluding the separating + dash). E.g., `apache-httpd-2.0.48' is parsed to (`apache-httpd', + '2.0.48'). */ +DrvName::DrvName(const std::string& s) : hits(0) { + name = fullName = s; + for (unsigned int i = 0; i < s.size(); ++i) { + /* !!! isalpha/isdigit are affected by the locale. */ + if (s[i] == '-' && i + 1 < s.size() && (isalpha(s[i + 1]) == 0)) { + name = string(s, 0, i); + version = string(s, i + 1); + break; + } + } +} + +bool DrvName::matches(DrvName& n) { + if (name != "*") { + if (!regex) { + regex = std::make_unique<std::regex>(name, std::regex::extended); + } + if (!std::regex_match(n.name, *regex)) { + return false; + } + } + return !(!version.empty() && version != n.version); +} + +string nextComponent(string::const_iterator& p, + const string::const_iterator end) { + /* Skip any dots and dashes (component separators). */ + while (p != end && (*p == '.' || *p == '-')) { + ++p; + } + + if (p == end) { + return ""; + } + + /* If the first character is a digit, consume the longest sequence + of digits. Otherwise, consume the longest sequence of + non-digit, non-separator characters. */ + std::string s; + if (isdigit(*p) != 0) { + while (p != end && (isdigit(*p) != 0)) { + s += *p++; + } + } else { + while (p != end && ((isdigit(*p) == 0) && *p != '.' && *p != '-')) { + s += *p++; + } + } + + return s; +} + +static bool componentsLT(const std::string& c1, const std::string& c2) { + int n1; + int n2; + bool c1Num = string2Int(c1, n1); + bool c2Num = string2Int(c2, n2); + + if (c1Num && c2Num) { + return n1 < n2; + } + if (c1.empty() && c2Num) { + return true; + } else if (c1 == "pre" && c2 != "pre") { + return true; + } else if (c2 == "pre") { + return false; + /* Assume that `2.3a' < `2.3.1'. */ + } else if (c2Num) { + return true; + } else if (c1Num) { + return false; + } else { + return c1 < c2; + } +} + +int compareVersions(const std::string& v1, const std::string& v2) { + string::const_iterator p1 = v1.begin(); + string::const_iterator p2 = v2.begin(); + + while (p1 != v1.end() || p2 != v2.end()) { + std::string c1 = nextComponent(p1, v1.end()); + std::string c2 = nextComponent(p2, v2.end()); + if (componentsLT(c1, c2)) { + return -1; + } + if (componentsLT(c2, c1)) { + return 1; + } + } + + return 0; +} + +DrvNames drvNamesFromArgs(const Strings& opArgs) { + DrvNames result; + for (auto& i : opArgs) { + result.push_back(DrvName(i)); + } + return result; +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/names.hh b/third_party/nix/src/libexpr/names.hh new file mode 100644 index 000000000000..e2ddb5cb993e --- /dev/null +++ b/third_party/nix/src/libexpr/names.hh @@ -0,0 +1,31 @@ +#pragma once + +#include <memory> +#include <regex> + +#include "types.hh" + +namespace nix { + +struct DrvName { + std::string fullName; + std::string name; + std::string version; + unsigned int hits; + + DrvName(); + DrvName(const std::string& s); + bool matches(DrvName& n); + + private: + std::unique_ptr<std::regex> regex; +}; + +typedef list<DrvName> DrvNames; + +string nextComponent(string::const_iterator& p, + const string::const_iterator end); +int compareVersions(const std::string& v1, const std::string& v2); +DrvNames drvNamesFromArgs(const Strings& opArgs); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/nix-expr.pc.in b/third_party/nix/src/libexpr/nix-expr.pc.in new file mode 100644 index 000000000000..80f7a492b1a1 --- /dev/null +++ b/third_party/nix/src/libexpr/nix-expr.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: Nix +Description: Nix Package Manager +Version: @PACKAGE_VERSION@ +Requires: nix-store bdw-gc +Libs: -L${libdir} -lnixexpr +Cflags: -I${includedir}/nix -std=c++17 diff --git a/third_party/nix/src/libexpr/nixexpr.cc b/third_party/nix/src/libexpr/nixexpr.cc new file mode 100644 index 000000000000..3d454d266ff0 --- /dev/null +++ b/third_party/nix/src/libexpr/nixexpr.cc @@ -0,0 +1,416 @@ +#include "nixexpr.hh" + +#include <cstdlib> + +#include "derivations.hh" +#include "util.hh" + +namespace nix { + +/* Displaying abstract syntax trees. */ + +std::ostream& operator<<(std::ostream& str, const Expr& e) { + e.show(str); + return str; +} + +static void showString(std::ostream& str, const std::string& s) { + str << '"'; + for (auto c : (string)s) { + if (c == '"' || c == '\\' || c == '$') { + str << "\\" << c; + } else if (c == '\n') { + str << "\\n"; + } else if (c == '\r') { + str << "\\r"; + } else if (c == '\t') { + str << "\\t"; + } else { + str << c; + } + } + str << '"'; +} + +static void showId(std::ostream& str, const std::string& s) { + if (s.empty()) { + str << "\"\""; + } else if (s == "if") { // FIXME: handle other keywords + str << '"' << s << '"'; + } else { + char c = s[0]; + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')) { + showString(str, s); + return; + } + for (auto c : s) { + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '_' || c == '\'' || c == '-')) { + showString(str, s); + return; + } + } + str << s; + } +} + +std::ostream& operator<<(std::ostream& str, const Symbol& sym) { + showId(str, *sym.s); + return str; +} + +void Expr::show(std::ostream& str) const { abort(); } + +void ExprInt::show(std::ostream& str) const { str << n; } + +void ExprFloat::show(std::ostream& str) const { str << nf; } + +void ExprString::show(std::ostream& str) const { showString(str, s); } + +void ExprPath::show(std::ostream& str) const { str << s; } + +void ExprVar::show(std::ostream& str) const { str << name; } + +void ExprSelect::show(std::ostream& str) const { + str << "(" << *e << ")." << showAttrPath(attrPath); + if (def != nullptr) { + str << " or (" << *def << ")"; + } +} + +void ExprOpHasAttr::show(std::ostream& str) const { + str << "((" << *e << ") ? " << showAttrPath(attrPath) << ")"; +} + +void ExprAttrs::show(std::ostream& str) const { + if (recursive) { + str << "rec "; + } + str << "{ "; + for (auto& i : attrs) { + if (i.second.inherited) { + str << "inherit " << i.first << " " + << "; "; + } else { + str << i.first << " = " << *i.second.e << "; "; + } + } + for (auto& i : dynamicAttrs) { + str << "\"${" << *i.nameExpr << "}\" = " << *i.valueExpr << "; "; + } + str << "}"; +} + +void ExprList::show(std::ostream& str) const { + str << "[ "; + for (auto& i : elems) { + str << "(" << *i << ") "; + } + str << "]"; +} + +void ExprLambda::show(std::ostream& str) const { + str << "("; + if (matchAttrs) { + str << "{ "; + bool first = true; + for (auto& i : formals->formals) { + if (first) { + first = false; + } else { + str << ", "; + } + str << i.name; + if (i.def != nullptr) { + str << " ? " << *i.def; + } + } + if (formals->ellipsis) { + if (!first) { + str << ", "; + } + str << "..."; + } + str << " }"; + if (!arg.empty()) { + str << " @ "; + } + } + if (!arg.empty()) { + str << arg; + } + str << ": " << *body << ")"; +} + +void ExprLet::show(std::ostream& str) const { + str << "(let "; + for (auto& i : attrs->attrs) { + if (i.second.inherited) { + str << "inherit " << i.first << "; "; + } else { + str << i.first << " = " << *i.second.e << "; "; + } + } + str << "in " << *body << ")"; +} + +void ExprWith::show(std::ostream& str) const { + str << "(with " << *attrs << "; " << *body << ")"; +} + +void ExprIf::show(std::ostream& str) const { + str << "(if " << *cond << " then " << *then << " else " << *else_ << ")"; +} + +void ExprAssert::show(std::ostream& str) const { + str << "assert " << *cond << "; " << *body; +} + +void ExprOpNot::show(std::ostream& str) const { str << "(! " << *e << ")"; } + +void ExprConcatStrings::show(std::ostream& str) const { + bool first = true; + str << "("; + for (auto& i : *es) { + if (first) { + first = false; + } else { + str << " + "; + } + str << *i; + } + str << ")"; +} + +void ExprPos::show(std::ostream& str) const { str << "__curPos"; } + +std::ostream& operator<<(std::ostream& str, const Pos& pos) { + if (!pos) { + str << "undefined position"; + } else { + str << (format(ANSI_BOLD "%1%" ANSI_NORMAL ":%2%:%3%") % (string)pos.file % + pos.line % pos.column) + .str(); + } + return str; +} + +string showAttrPath(const AttrPath& attrPath) { + std::ostringstream out; + bool first = true; + for (auto& i : attrPath) { + if (!first) { + out << '.'; + } else { + first = false; + } + if (i.symbol.set()) { + out << i.symbol; + } else { + out << "\"${" << *i.expr << "}\""; + } + } + return out.str(); +} + +Pos noPos; + +/* Computing levels/displacements for variables. */ + +void Expr::bindVars(const StaticEnv& env) { abort(); } + +void ExprInt::bindVars(const StaticEnv& env) {} + +void ExprFloat::bindVars(const StaticEnv& env) {} + +void ExprString::bindVars(const StaticEnv& env) {} + +void ExprPath::bindVars(const StaticEnv& env) {} + +void ExprVar::bindVars(const StaticEnv& env) { + /* Check whether the variable appears in the environment. If so, + set its level and displacement. */ + const StaticEnv* curEnv; + unsigned int level; + int withLevel = -1; + for (curEnv = &env, level = 0; curEnv != nullptr; + curEnv = curEnv->up, level++) { + if (curEnv->isWith) { + if (withLevel == -1) { + withLevel = level; + } + } else { + auto i = curEnv->vars.find(name); + if (i != curEnv->vars.end()) { + fromWith = false; + this->level = level; + displ = i->second; + return; + } + } + } + + /* Otherwise, the variable must be obtained from the nearest + enclosing `with'. If there is no `with', then we can issue an + "undefined variable" error now. */ + if (withLevel == -1) { + throw UndefinedVarError(format("undefined variable '%1%' at %2%") % name % + pos); + } + + fromWith = true; + this->level = withLevel; +} + +void ExprSelect::bindVars(const StaticEnv& env) { + e->bindVars(env); + if (def != nullptr) { + def->bindVars(env); + } + for (auto& i : attrPath) { + if (!i.symbol.set()) { + i.expr->bindVars(env); + } + } +} + +void ExprOpHasAttr::bindVars(const StaticEnv& env) { + e->bindVars(env); + for (auto& i : attrPath) { + if (!i.symbol.set()) { + i.expr->bindVars(env); + } + } +} + +void ExprAttrs::bindVars(const StaticEnv& env) { + const StaticEnv* dynamicEnv = &env; + StaticEnv newEnv(/* isWith = */ false, &env); + + if (recursive) { + dynamicEnv = &newEnv; + + unsigned int displ = 0; + for (auto& i : attrs) { + newEnv.vars[i.first] = i.second.displ = displ++; + } + + for (auto& i : attrs) { + i.second.e->bindVars(i.second.inherited ? env : newEnv); + } + } + + else { + for (auto& i : attrs) { + i.second.e->bindVars(env); + } + } + + for (auto& i : dynamicAttrs) { + i.nameExpr->bindVars(*dynamicEnv); + i.valueExpr->bindVars(*dynamicEnv); + } +} + +void ExprList::bindVars(const StaticEnv& env) { + for (auto& i : elems) { + i->bindVars(env); + } +} + +void ExprLambda::bindVars(const StaticEnv& env) { + StaticEnv newEnv(false, &env); + + unsigned int displ = 0; + + if (!arg.empty()) { + newEnv.vars[arg] = displ++; + } + + if (matchAttrs) { + for (auto& i : formals->formals) { + newEnv.vars[i.name] = displ++; + } + + for (auto& i : formals->formals) { + if (i.def != nullptr) { + i.def->bindVars(newEnv); + } + } + } + + body->bindVars(newEnv); +} + +void ExprLet::bindVars(const StaticEnv& env) { + StaticEnv newEnv(false, &env); + + unsigned int displ = 0; + for (auto& i : attrs->attrs) { + newEnv.vars[i.first] = i.second.displ = displ++; + } + + for (auto& i : attrs->attrs) { + i.second.e->bindVars(i.second.inherited ? env : newEnv); + } + + body->bindVars(newEnv); +} + +void ExprWith::bindVars(const StaticEnv& env) { + /* Does this `with' have an enclosing `with'? If so, record its + level so that `lookupVar' can look up variables in the previous + `with' if this one doesn't contain the desired attribute. */ + const StaticEnv* curEnv; + unsigned int level; + prevWith = 0; + for (curEnv = &env, level = 1; curEnv != nullptr; + curEnv = curEnv->up, level++) { + if (curEnv->isWith) { + prevWith = level; + break; + } + } + + attrs->bindVars(env); + StaticEnv newEnv(true, &env); + body->bindVars(newEnv); +} + +void ExprIf::bindVars(const StaticEnv& env) { + cond->bindVars(env); + then->bindVars(env); + else_->bindVars(env); +} + +void ExprAssert::bindVars(const StaticEnv& env) { + cond->bindVars(env); + body->bindVars(env); +} + +void ExprOpNot::bindVars(const StaticEnv& env) { e->bindVars(env); } + +void ExprConcatStrings::bindVars(const StaticEnv& env) { + for (auto& i : *es) { + i->bindVars(env); + } +} + +void ExprPos::bindVars(const StaticEnv& env) {} + +/* Storing function names. */ + +void Expr::setName(Symbol& name) {} + +void ExprLambda::setName(Symbol& name) { + this->name = name; + body->setName(name); +} + +string ExprLambda::showNamePos() const { + return (format("%1% at %2%") % + (name.set() ? "'" + (string)name + "'" : "anonymous function") % pos) + .str(); +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/nixexpr.hh b/third_party/nix/src/libexpr/nixexpr.hh new file mode 100644 index 000000000000..715dbd8d59f1 --- /dev/null +++ b/third_party/nix/src/libexpr/nixexpr.hh @@ -0,0 +1,323 @@ +#pragma once + +#include <map> + +#include "symbol-table.hh" +#include "types.hh" // TODO(tazjin): audit this include +#include "value.hh" + +namespace nix { + +MakeError(EvalError, Error) MakeError(ParseError, Error) + MakeError(AssertionError, EvalError) MakeError(ThrownError, AssertionError) + MakeError(Abort, EvalError) MakeError(TypeError, EvalError) + MakeError(UndefinedVarError, Error) + MakeError(RestrictedPathError, Error) + + /* Position objects. */ + + struct Pos { + Symbol file; + unsigned int line, column; + Pos() : line(0), column(0){}; + Pos(const Symbol& file, unsigned int line, unsigned int column) + : file(file), line(line), column(column){}; + operator bool() const { return line != 0; } + bool operator<(const Pos& p2) const { + if (!line) { + return p2.line; + } + if (!p2.line) { + return false; + } + int d = ((string)file).compare((string)p2.file); + if (d < 0) { + return true; + } + if (d > 0) { + return false; + } + if (line < p2.line) { + return true; + } + if (line > p2.line) { + return false; + } + return column < p2.column; + } +}; + +extern Pos noPos; + +std::ostream& operator<<(std::ostream& str, const Pos& pos); + +struct Env; +struct Value; +class EvalState; +struct StaticEnv; + +/* An attribute path is a sequence of attribute names. */ +struct AttrName { + Symbol symbol; + Expr* expr; + AttrName(const Symbol& s) : symbol(s){}; + AttrName(Expr* e) : expr(e){}; +}; + +typedef std::vector<AttrName> AttrPath; + +string showAttrPath(const AttrPath& attrPath); + +/* Abstract syntax of Nix expressions. */ + +struct Expr { + virtual ~Expr(){}; + virtual void show(std::ostream& str) const; + virtual void bindVars(const StaticEnv& env); + virtual void eval(EvalState& state, Env& env, Value& v); + virtual Value* maybeThunk(EvalState& state, Env& env); + virtual void setName(Symbol& name); +}; + +std::ostream& operator<<(std::ostream& str, const Expr& e); + +#define COMMON_METHODS \ + void show(std::ostream& str) const; \ + void eval(EvalState& state, Env& env, Value& v); \ + void bindVars(const StaticEnv& env); + +struct ExprInt : Expr { + NixInt n; + Value v; + ExprInt(NixInt n) : n(n) { mkInt(v, n); }; + COMMON_METHODS + Value* maybeThunk(EvalState& state, Env& env); +}; + +struct ExprFloat : Expr { + NixFloat nf; + Value v; + ExprFloat(NixFloat nf) : nf(nf) { mkFloat(v, nf); }; + COMMON_METHODS + Value* maybeThunk(EvalState& state, Env& env); +}; + +struct ExprString : Expr { + Symbol s; + Value v; + ExprString(const Symbol& s) : s(s) { mkString(v, s); }; + COMMON_METHODS + Value* maybeThunk(EvalState& state, Env& env); +}; + +/* Temporary class used during parsing of indented strings. */ +struct ExprIndStr : Expr { + std::string s; + ExprIndStr(const std::string& s) : s(s){}; +}; + +struct ExprPath : Expr { + std::string s; + Value v; + ExprPath(const std::string& s) : s(s) { mkPathNoCopy(v, this->s.c_str()); }; + COMMON_METHODS + Value* maybeThunk(EvalState& state, Env& env); +}; + +struct ExprVar : Expr { + Pos pos; + Symbol name; + + /* Whether the variable comes from an environment (e.g. a rec, let + or function argument) or from a "with". */ + bool fromWith; + + /* In the former case, the value is obtained by going `level' + levels up from the current environment and getting the + `displ'th value in that environment. In the latter case, the + value is obtained by getting the attribute named `name' from + the set stored in the environment that is `level' levels up + from the current one.*/ + unsigned int level; + unsigned int displ; + + ExprVar(const Symbol& name) : name(name){}; + ExprVar(const Pos& pos, const Symbol& name) : pos(pos), name(name){}; + COMMON_METHODS + Value* maybeThunk(EvalState& state, Env& env); +}; + +struct ExprSelect : Expr { + Pos pos; + Expr *e, *def; + AttrPath attrPath; + ExprSelect(const Pos& pos, Expr* e, const AttrPath& attrPath, Expr* def) + : pos(pos), e(e), def(def), attrPath(attrPath){}; + ExprSelect(const Pos& pos, Expr* e, const Symbol& name) + : pos(pos), e(e), def(0) { + attrPath.push_back(AttrName(name)); + }; + COMMON_METHODS +}; + +struct ExprOpHasAttr : Expr { + Expr* e; + AttrPath attrPath; + ExprOpHasAttr(Expr* e, const AttrPath& attrPath) : e(e), attrPath(attrPath){}; + COMMON_METHODS +}; + +struct ExprAttrs : Expr { + bool recursive; + struct AttrDef { + bool inherited; + Expr* e; + Pos pos; + unsigned int displ; // displacement + AttrDef(Expr* e, const Pos& pos, bool inherited = false) + : inherited(inherited), e(e), pos(pos){}; + AttrDef(){}; + }; + typedef std::map<Symbol, AttrDef> AttrDefs; + AttrDefs attrs; + struct DynamicAttrDef { + Expr *nameExpr, *valueExpr; + Pos pos; + DynamicAttrDef(Expr* nameExpr, Expr* valueExpr, const Pos& pos) + : nameExpr(nameExpr), valueExpr(valueExpr), pos(pos){}; + }; + typedef std::vector<DynamicAttrDef> DynamicAttrDefs; + DynamicAttrDefs dynamicAttrs; + ExprAttrs() : recursive(false){}; + COMMON_METHODS +}; + +struct ExprList : Expr { + std::vector<Expr*> elems; + ExprList(){}; + COMMON_METHODS +}; + +struct Formal { + Symbol name; + Expr* def; + Formal(const Symbol& name, Expr* def) : name(name), def(def){}; +}; + +struct Formals { + typedef std::list<Formal> Formals_; + Formals_ formals; + std::set<Symbol> argNames; // used during parsing + bool ellipsis; +}; + +struct ExprLambda : Expr { + Pos pos; + Symbol name; + Symbol arg; + bool matchAttrs; + Formals* formals; + Expr* body; + ExprLambda(const Pos& pos, const Symbol& arg, bool matchAttrs, + Formals* formals, Expr* body) + : pos(pos), + arg(arg), + matchAttrs(matchAttrs), + formals(formals), + body(body) { + if (!arg.empty() && formals && + formals->argNames.find(arg) != formals->argNames.end()) + throw ParseError( + format("duplicate formal function argument '%1%' at %2%") % arg % + pos); + }; + void setName(Symbol& name); + std::string showNamePos() const; + COMMON_METHODS +}; + +struct ExprLet : Expr { + ExprAttrs* attrs; + Expr* body; + ExprLet(ExprAttrs* attrs, Expr* body) : attrs(attrs), body(body){}; + COMMON_METHODS +}; + +struct ExprWith : Expr { + Pos pos; + Expr *attrs, *body; + size_t prevWith; + ExprWith(const Pos& pos, Expr* attrs, Expr* body) + : pos(pos), attrs(attrs), body(body){}; + COMMON_METHODS +}; + +struct ExprIf : Expr { + Expr *cond, *then, *else_; + ExprIf(Expr* cond, Expr* then, Expr* else_) + : cond(cond), then(then), else_(else_){}; + COMMON_METHODS +}; + +struct ExprAssert : Expr { + Pos pos; + Expr *cond, *body; + ExprAssert(const Pos& pos, Expr* cond, Expr* body) + : pos(pos), cond(cond), body(body){}; + COMMON_METHODS +}; + +struct ExprOpNot : Expr { + Expr* e; + ExprOpNot(Expr* e) : e(e){}; + COMMON_METHODS +}; + +#define MakeBinOp(name, s) \ + struct name : Expr { \ + Pos pos; \ + Expr *e1, *e2; \ + name(Expr* e1, Expr* e2) : e1(e1), e2(e2){}; \ + name(const Pos& pos, Expr* e1, Expr* e2) : pos(pos), e1(e1), e2(e2){}; \ + void show(std::ostream& str) const { \ + str << "(" << *e1 << " " s " " << *e2 << ")"; \ + } \ + void bindVars(const StaticEnv& env) { \ + e1->bindVars(env); \ + e2->bindVars(env); \ + } \ + void eval(EvalState& state, Env& env, Value& v); \ + }; + +MakeBinOp(ExprApp, "") MakeBinOp(ExprOpEq, "==") MakeBinOp(ExprOpNEq, "!=") + MakeBinOp(ExprOpAnd, "&&") MakeBinOp(ExprOpOr, "||") + MakeBinOp(ExprOpImpl, "->") MakeBinOp(ExprOpUpdate, "//") + MakeBinOp(ExprOpConcatLists, "++") + + struct ExprConcatStrings : Expr { + Pos pos; + bool forceString; + vector<Expr*>* es; + ExprConcatStrings(const Pos& pos, bool forceString, vector<Expr*>* es) + : pos(pos), forceString(forceString), es(es){}; + COMMON_METHODS +}; + +struct ExprPos : Expr { + Pos pos; + ExprPos(const Pos& pos) : pos(pos){}; + COMMON_METHODS +}; + +/* Static environments are used to map variable names onto (level, + displacement) pairs used to obtain the value of the variable at + runtime. */ +struct StaticEnv { + bool isWith; + const StaticEnv* up; + typedef std::map<Symbol, unsigned int> Vars; + Vars vars; + StaticEnv(bool isWith, const StaticEnv* up) : isWith(isWith), up(up){}; +}; + +} // namespace nix diff --git a/third_party/nix/src/libexpr/parser.y b/third_party/nix/src/libexpr/parser.y new file mode 100644 index 000000000000..bd62a7fd0f52 --- /dev/null +++ b/third_party/nix/src/libexpr/parser.y @@ -0,0 +1,703 @@ +%glr-parser +%pure-parser +%locations +%define parse.error verbose +%defines +/* %no-lines */ +%parse-param { void * scanner } +%parse-param { nix::ParseData * data } +%lex-param { void * scanner } +%lex-param { nix::ParseData * data } +%expect 1 +%expect-rr 1 + +%code requires { + +#ifndef BISON_HEADER +#define BISON_HEADER + +#include "util.hh" +#include "nixexpr.hh" +#include "eval.hh" +#include <glog/logging.h> + +namespace nix { + + struct ParseData + { + EvalState & state; + SymbolTable & symbols; + Expr * result; + Path basePath; + Symbol path; + std::string error; + Symbol sLetBody; + ParseData(EvalState & state) + : state(state) + , symbols(state.symbols) + , sLetBody(symbols.Create("<let-body>")) + { }; + }; + +} + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data) + +#endif + +} + +%{ + +#include "parser-tab.hh" +#include "lexer-tab.hh" + +YY_DECL; + +using namespace nix; + + +namespace nix { + + +static void dupAttr(const AttrPath & attrPath, const Pos & pos, const Pos & prevPos) +{ + throw ParseError(format("attribute '%1%' at %2% already defined at %3%") + % showAttrPath(attrPath) % pos % prevPos); +} + + +static void dupAttr(Symbol attr, const Pos & pos, const Pos & prevPos) +{ + throw ParseError(format("attribute '%1%' at %2% already defined at %3%") + % attr % pos % prevPos); +} + + +static void addAttr(ExprAttrs * attrs, AttrPath & attrPath, + Expr * e, const Pos & pos) +{ + AttrPath::iterator i; + // All attrpaths have at least one attr + assert(!attrPath.empty()); + // Checking attrPath validity. + // =========================== + for (i = attrPath.begin(); i + 1 < attrPath.end(); i++) { + if (i->symbol.set()) { + ExprAttrs::AttrDefs::iterator j = attrs->attrs.find(i->symbol); + if (j != attrs->attrs.end()) { + if (!j->second.inherited) { + ExprAttrs * attrs2 = dynamic_cast<ExprAttrs *>(j->second.e); + if (!attrs2) { dupAttr(attrPath, pos, j->second.pos); } + attrs = attrs2; + } else + dupAttr(attrPath, pos, j->second.pos); + } else { + ExprAttrs * nested = new ExprAttrs; + attrs->attrs[i->symbol] = ExprAttrs::AttrDef(nested, pos); + attrs = nested; + } + } else { + ExprAttrs *nested = new ExprAttrs; + attrs->dynamicAttrs.push_back(ExprAttrs::DynamicAttrDef(i->expr, nested, pos)); + attrs = nested; + } + } + // Expr insertion. + // ========================== + if (i->symbol.set()) { + ExprAttrs::AttrDefs::iterator j = attrs->attrs.find(i->symbol); + if (j != attrs->attrs.end()) { + // This attr path is already defined. However, if both + // e and the expr pointed by the attr path are two attribute sets, + // we want to merge them. + // Otherwise, throw an error. + auto ae = dynamic_cast<ExprAttrs *>(e); + auto jAttrs = dynamic_cast<ExprAttrs *>(j->second.e); + if (jAttrs && ae) { + for (auto & ad : ae->attrs) { + auto j2 = jAttrs->attrs.find(ad.first); + if (j2 != jAttrs->attrs.end()) // Attr already defined in iAttrs, error. + dupAttr(ad.first, j2->second.pos, ad.second.pos); + jAttrs->attrs[ad.first] = ad.second; + } + } else { + dupAttr(attrPath, pos, j->second.pos); + } + } else { + // This attr path is not defined. Let's create it. + attrs->attrs[i->symbol] = ExprAttrs::AttrDef(e, pos); + e->setName(i->symbol); + } + } else { + attrs->dynamicAttrs.push_back(ExprAttrs::DynamicAttrDef(i->expr, e, pos)); + } +} + + +static void addFormal(const Pos & pos, Formals * formals, const Formal & formal) +{ + if (formals->argNames.find(formal.name) != formals->argNames.end()) + throw ParseError(format("duplicate formal function argument '%1%' at %2%") + % formal.name % pos); + formals->formals.push_front(formal); + formals->argNames.insert(formal.name); +} + + +static Expr * stripIndentation(const Pos & pos, SymbolTable & symbols, vector<Expr *> & es) +{ + if (es.empty()) { return new ExprString(symbols.Create("")); } + + /* Figure out the minimum indentation. Note that by design + whitespace-only final lines are not taken into account. (So + the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */ + bool atStartOfLine = true; /* = seen only whitespace in the current line */ + size_t minIndent = 1000000; + size_t curIndent = 0; + for (auto & i : es) { + ExprIndStr * e = dynamic_cast<ExprIndStr *>(i); + if (!e) { + /* Anti-quotations end the current start-of-line whitespace. */ + if (atStartOfLine) { + atStartOfLine = false; + if (curIndent < minIndent) { minIndent = curIndent; } + } + continue; + } + for (size_t j = 0; j < e->s.size(); ++j) { + if (atStartOfLine) { + if (e->s[j] == ' ') + curIndent++; + else if (e->s[j] == '\n') { + /* Empty line, doesn't influence minimum + indentation. */ + curIndent = 0; + } else { + atStartOfLine = false; + if (curIndent < minIndent) { minIndent = curIndent; } + } + } else if (e->s[j] == '\n') { + atStartOfLine = true; + curIndent = 0; + } + } + } + + /* Strip spaces from each line. */ + vector<Expr *> * es2 = new vector<Expr *>; + atStartOfLine = true; + size_t curDropped = 0; + size_t n = es.size(); + for (vector<Expr *>::iterator i = es.begin(); i != es.end(); ++i, --n) { + ExprIndStr * e = dynamic_cast<ExprIndStr *>(*i); + if (!e) { + atStartOfLine = false; + curDropped = 0; + es2->push_back(*i); + continue; + } + + std::string s2; + for (size_t j = 0; j < e->s.size(); ++j) { + if (atStartOfLine) { + if (e->s[j] == ' ') { + if (curDropped++ >= minIndent) + s2 += e->s[j]; + } + else if (e->s[j] == '\n') { + curDropped = 0; + s2 += e->s[j]; + } else { + atStartOfLine = false; + curDropped = 0; + s2 += e->s[j]; + } + } else { + s2 += e->s[j]; + if (e->s[j] == '\n') { atStartOfLine = true; } + } + } + + /* Remove the last line if it is empty and consists only of + spaces. */ + if (n == 1) { + string::size_type p = s2.find_last_of('\n'); + if (p != string::npos && s2.find_first_not_of(' ', p + 1) == string::npos) + s2 = string(s2, 0, p + 1); + } + + es2->push_back(new ExprString(symbols.Create(s2))); + } + + /* If this is a single string, then don't do a concatenation. */ + return es2->size() == 1 && dynamic_cast<ExprString *>((*es2)[0]) ? (*es2)[0] : new ExprConcatStrings(pos, true, es2); +} + + +static inline Pos makeCurPos(const YYLTYPE & loc, ParseData * data) +{ + return Pos(data->path, loc.first_line, loc.first_column); +} + +#define CUR_POS makeCurPos(*yylocp, data) + + +} + + +void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * error) +{ + data->error = (format("%1%, at %2%") + % error % makeCurPos(*loc, data)).str(); +} + + +%} + +%union { + // !!! We're probably leaking stuff here. + nix::Expr * e; + nix::ExprList * list; + nix::ExprAttrs * attrs; + nix::Formals * formals; + nix::Formal * formal; + nix::NixInt n; + nix::NixFloat nf; + const char * id; // !!! -> Symbol + char * path; + char * uri; + std::vector<nix::AttrName> * attrNames; + std::vector<nix::Expr *> * string_parts; +} + +%type <e> start expr expr_function expr_if expr_op +%type <e> expr_app expr_select expr_simple +%type <list> expr_list +%type <attrs> binds +%type <formals> formals +%type <formal> formal +%type <attrNames> attrs attrpath +%type <string_parts> string_parts_interpolated ind_string_parts +%type <e> string_parts string_attr +%type <id> attr +%token <id> ID ATTRPATH +%token <e> STR IND_STR +%token <n> INT +%token <nf> FLOAT +%token <path> PATH HPATH SPATH +%token <uri> URI +%token IF THEN ELSE ASSERT WITH LET IN REC INHERIT EQ NEQ AND OR IMPL OR_KW +%token DOLLAR_CURLY /* == ${ */ +%token IND_STRING_OPEN IND_STRING_CLOSE +%token ELLIPSIS + +%right IMPL +%left OR +%left AND +%nonassoc EQ NEQ +%nonassoc '<' '>' LEQ GEQ +%right UPDATE +%left NOT +%left '+' '-' +%left '*' '/' +%right CONCAT +%nonassoc '?' +%nonassoc NEGATE + +%% + +start: expr { data->result = $1; }; + +expr: expr_function; + +expr_function + : ID ':' expr_function + { $$ = new ExprLambda(CUR_POS, data->symbols.Create($1), false, 0, $3); } + | '{' formals '}' ':' expr_function + { $$ = new ExprLambda(CUR_POS, data->symbols.Create(""), true, $2, $5); } + | '{' formals '}' '@' ID ':' expr_function + { $$ = new ExprLambda(CUR_POS, data->symbols.Create($5), true, $2, $7); } + | ID '@' '{' formals '}' ':' expr_function + { $$ = new ExprLambda(CUR_POS, data->symbols.Create($1), true, $4, $7); } + | ASSERT expr ';' expr_function + { $$ = new ExprAssert(CUR_POS, $2, $4); } + | WITH expr ';' expr_function + { $$ = new ExprWith(CUR_POS, $2, $4); } + | LET binds IN expr_function + { if (!$2->dynamicAttrs.empty()) + throw ParseError(format("dynamic attributes not allowed in let at %1%") + % CUR_POS); + $$ = new ExprLet($2, $4); + } + | expr_if + ; + +expr_if + : IF expr THEN expr ELSE expr { $$ = new ExprIf($2, $4, $6); } + | expr_op + ; + +expr_op + : '!' expr_op %prec NOT { $$ = new ExprOpNot($2); } + | '-' expr_op %prec NEGATE { $$ = new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__sub")), new ExprInt(0)), $2); } + | expr_op EQ expr_op { $$ = new ExprOpEq($1, $3); } + | expr_op NEQ expr_op { $$ = new ExprOpNEq($1, $3); } + | expr_op '<' expr_op { $$ = new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__lessThan")), $1), $3); } + | expr_op LEQ expr_op { $$ = new ExprOpNot(new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__lessThan")), $3), $1)); } + | expr_op '>' expr_op { $$ = new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__lessThan")), $3), $1); } + | expr_op GEQ expr_op { $$ = new ExprOpNot(new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__lessThan")), $1), $3)); } + | expr_op AND expr_op { $$ = new ExprOpAnd(CUR_POS, $1, $3); } + | expr_op OR expr_op { $$ = new ExprOpOr(CUR_POS, $1, $3); } + | expr_op IMPL expr_op { $$ = new ExprOpImpl(CUR_POS, $1, $3); } + | expr_op UPDATE expr_op { $$ = new ExprOpUpdate(CUR_POS, $1, $3); } + | expr_op '?' attrpath { $$ = new ExprOpHasAttr($1, *$3); } + | expr_op '+' expr_op + { $$ = new ExprConcatStrings(CUR_POS, false, new vector<Expr *>({$1, $3})); } + | expr_op '-' expr_op { $$ = new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__sub")), $1), $3); } + | expr_op '*' expr_op { $$ = new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__mul")), $1), $3); } + | expr_op '/' expr_op { $$ = new ExprApp(CUR_POS, new ExprApp(new ExprVar(data->symbols.Create("__div")), $1), $3); } + | expr_op CONCAT expr_op { $$ = new ExprOpConcatLists(CUR_POS, $1, $3); } + | expr_app + ; + +expr_app + : expr_app expr_select + { $$ = new ExprApp(CUR_POS, $1, $2); } + | expr_select { $$ = $1; } + ; + +expr_select + : expr_simple '.' attrpath + { $$ = new ExprSelect(CUR_POS, $1, *$3, 0); } + | expr_simple '.' attrpath OR_KW expr_select + { $$ = new ExprSelect(CUR_POS, $1, *$3, $5); } + | /* Backwards compatibility: because Nixpkgs has a rarely used + function named โorโ, allow stuff like โmap or [...]โ. */ + expr_simple OR_KW + { $$ = new ExprApp(CUR_POS, $1, new ExprVar(CUR_POS, data->symbols.Create("or"))); } + | expr_simple { $$ = $1; } + ; + +expr_simple + : ID { + if (strcmp($1, "__curPos") == 0) + $$ = new ExprPos(CUR_POS); + else + $$ = new ExprVar(CUR_POS, data->symbols.Create($1)); + } + | INT { $$ = new ExprInt($1); } + | FLOAT { $$ = new ExprFloat($1); } + | '"' string_parts '"' { $$ = $2; } + | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE { + $$ = stripIndentation(CUR_POS, data->symbols, *$2); + } + | PATH { $$ = new ExprPath(absPath($1, data->basePath)); } + | HPATH { $$ = new ExprPath(getHome() + string{$1 + 1}); } + | SPATH { + std::string path($1 + 1, strlen($1) - 2); + $$ = new ExprApp(CUR_POS, + new ExprApp(new ExprVar(data->symbols.Create("__findFile")), + new ExprVar(data->symbols.Create("__nixPath"))), + new ExprString(data->symbols.Create(path))); + } + | URI { $$ = new ExprString(data->symbols.Create($1)); } + | '(' expr ')' { $$ = $2; } + /* Let expressions `let {..., body = ...}' are just desugared + into `(rec {..., body = ...}).body'. */ + | LET '{' binds '}' + { $3->recursive = true; $$ = new ExprSelect(noPos, $3, data->symbols.Create("body")); } + | REC '{' binds '}' + { $3->recursive = true; $$ = $3; } + | '{' binds '}' + { $$ = $2; } + | '[' expr_list ']' { $$ = $2; } + ; + +string_parts + : STR + | string_parts_interpolated { $$ = new ExprConcatStrings(CUR_POS, true, $1); } + | { $$ = new ExprString(data->symbols.Create("")); } + ; + +string_parts_interpolated + : string_parts_interpolated STR { $$ = $1; $1->push_back($2); } + | string_parts_interpolated DOLLAR_CURLY expr '}' { $$ = $1; $1->push_back($3); } + | DOLLAR_CURLY expr '}' { $$ = new vector<Expr *>; $$->push_back($2); } + | STR DOLLAR_CURLY expr '}' { + $$ = new vector<Expr *>; + $$->push_back($1); + $$->push_back($3); + } + ; + +ind_string_parts + : ind_string_parts IND_STR { $$ = $1; $1->push_back($2); } + | ind_string_parts DOLLAR_CURLY expr '}' { $$ = $1; $1->push_back($3); } + | { $$ = new vector<Expr *>; } + ; + +binds + : binds attrpath '=' expr ';' { $$ = $1; addAttr($$, *$2, $4, makeCurPos(@2, data)); } + | binds INHERIT attrs ';' + { $$ = $1; + for (auto & i : *$3) { + if ($$->attrs.find(i.symbol) != $$->attrs.end()) + dupAttr(i.symbol, makeCurPos(@3, data), $$->attrs[i.symbol].pos); + Pos pos = makeCurPos(@3, data); + $$->attrs[i.symbol] = ExprAttrs::AttrDef(new ExprVar(CUR_POS, i.symbol), pos, true); + } + } + | binds INHERIT '(' expr ')' attrs ';' + { $$ = $1; + /* !!! Should ensure sharing of the expression in $4. */ + for (auto & i : *$6) { + if ($$->attrs.find(i.symbol) != $$->attrs.end()) + dupAttr(i.symbol, makeCurPos(@6, data), $$->attrs[i.symbol].pos); + $$->attrs[i.symbol] = ExprAttrs::AttrDef(new ExprSelect(CUR_POS, $4, i.symbol), makeCurPos(@6, data)); + } + } + | { $$ = new ExprAttrs; } + ; + +attrs + : attrs attr { $$ = $1; $1->push_back(AttrName(data->symbols.Create($2))); } + | attrs string_attr + { $$ = $1; + ExprString * str = dynamic_cast<ExprString *>($2); + if (str) { + $$->push_back(AttrName(str->s)); + delete str; + } else + throw ParseError(format("dynamic attributes not allowed in inherit at %1%") + % makeCurPos(@2, data)); + } + | { $$ = new AttrPath; } + ; + +attrpath + : attrpath '.' attr { $$ = $1; $1->push_back(AttrName(data->symbols.Create($3))); } + | attrpath '.' string_attr + { $$ = $1; + ExprString * str = dynamic_cast<ExprString *>($3); + if (str) { + $$->push_back(AttrName(str->s)); + delete str; + } else + $$->push_back(AttrName($3)); + } + | attr { $$ = new vector<AttrName>; $$->push_back(AttrName(data->symbols.Create($1))); } + | string_attr + { $$ = new vector<AttrName>; + ExprString *str = dynamic_cast<ExprString *>($1); + if (str) { + $$->push_back(AttrName(str->s)); + delete str; + } else + $$->push_back(AttrName($1)); + } + ; + +attr + : ID { $$ = $1; } + | OR_KW { $$ = "or"; } + ; + +string_attr + : '"' string_parts '"' { $$ = $2; } + | DOLLAR_CURLY expr '}' { $$ = $2; } + ; + +expr_list + : expr_list expr_select { $$ = $1; $1->elems.push_back($2); /* !!! dangerous */ } + | { $$ = new ExprList; } + ; + +formals + : formal ',' formals + { $$ = $3; addFormal(CUR_POS, $$, *$1); } + | formal + { $$ = new Formals; addFormal(CUR_POS, $$, *$1); $$->ellipsis = false; } + | + { $$ = new Formals; $$->ellipsis = false; } + | ELLIPSIS + { $$ = new Formals; $$->ellipsis = true; } + ; + +formal + : ID { $$ = new Formal(data->symbols.Create($1), 0); } + | ID '?' expr { $$ = new Formal(data->symbols.Create($1), $3); } + ; + +%% + + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "eval.hh" +#include "download.hh" +#include "store-api.hh" + + +namespace nix { + + +Expr * EvalState::parse(const char * text, + const Path & path, const Path & basePath, StaticEnv & staticEnv) +{ + yyscan_t scanner; + ParseData data(*this); + data.basePath = basePath; + data.path = data.symbols.Create(path); + + yylex_init(&scanner); + yy_scan_string(text, scanner); + int res = yyparse(scanner, &data); + yylex_destroy(scanner); + + if (res) { throw ParseError(data.error); } + + data.result->bindVars(staticEnv); + + return data.result; +} + + +Path resolveExprPath(Path path) +{ + assert(path[0] == '/'); + + /* If `path' is a symlink, follow it. This is so that relative + path references work. */ + struct stat st; + while (true) { + if (lstat(path.c_str(), &st)) + throw SysError(format("getting status of '%1%'") % path); + if (!S_ISLNK(st.st_mode)) { break; } + path = absPath(readLink(path), dirOf(path)); + } + + /* If `path' refers to a directory, append `/default.nix'. */ + if (S_ISDIR(st.st_mode)) + path = canonPath(path + "/default.nix"); + + return path; +} + + +Expr * EvalState::parseExprFromFile(const Path & path) +{ + return parseExprFromFile(path, staticBaseEnv); +} + + +Expr * EvalState::parseExprFromFile(const Path & path, StaticEnv & staticEnv) +{ + return parse(readFile(path).c_str(), path, dirOf(path), staticEnv); +} + + +Expr * EvalState::parseExprFromString(const std::string & s, const Path & basePath, StaticEnv & staticEnv) +{ + return parse(s.c_str(), "(string)", basePath, staticEnv); +} + + +Expr * EvalState::parseExprFromString(const std::string & s, const Path & basePath) +{ + return parseExprFromString(s, basePath, staticBaseEnv); +} + + +Expr * EvalState::parseStdin() +{ + //Activity act(*logger, lvlTalkative, format("parsing standard input")); + return parseExprFromString(drainFD(0), absPath(".")); +} + + +void EvalState::addToSearchPath(const std::string & s) +{ + size_t pos = s.find('='); + std::string prefix; + Path path; + if (pos == string::npos) { + path = s; + } else { + prefix = string(s, 0, pos); + path = string(s, pos + 1); + } + + searchPath.emplace_back(prefix, path); +} + + +Path EvalState::findFile(const std::string & path) +{ + return findFile(searchPath, path); +} + + +Path EvalState::findFile(SearchPath & searchPath, const std::string & path, const Pos & pos) +{ + for (auto & i : searchPath) { + std::string suffix; + if (i.first.empty()) + suffix = "/" + path; + else { + auto s = i.first.size(); + if (path.compare(0, s, i.first) != 0 || + (path.size() > s && path[s] != '/')) + continue; + suffix = path.size() == s ? "" : "/" + string(path, s); + } + auto r = resolveSearchPathElem(i); + if (!r.first) { continue; } + Path res = r.second + suffix; + if (pathExists(res)) { return canonPath(res); } + } + format f = format( + "file '%1%' was not found in the Nix search path (add it using $NIX_PATH or -I)" + + string(pos ? ", at %2%" : "")); + f.exceptions(boost::io::all_error_bits ^ boost::io::too_many_args_bit); + throw ThrownError(f % path % pos); +} + + +std::pair<bool, std::string> EvalState::resolveSearchPathElem(const SearchPathElem & elem) +{ + auto i = searchPathResolved.find(elem.second); + if (i != searchPathResolved.end()) { return i->second; } + + std::pair<bool, std::string> res; + + if (isUri(elem.second)) { + try { + CachedDownloadRequest request(elem.second); + request.unpack = true; + res = { true, getDownloader()->downloadCached(store, request).path }; + } catch (DownloadError & e) { + LOG(WARNING) << "Nix search path entry '" << elem.second << "' cannot be downloaded, ignoring"; + res = { false, "" }; + } + } else { + auto path = absPath(elem.second); + if (pathExists(path)) { + res = { true, path }; + } else { + LOG(WARNING) << "Nix search path entry '" << elem.second << "' does not exist, ignoring"; + res = { false, "" }; + } + } + + DLOG(INFO) << "resolved search path element '" << elem.second << "' to '" << res.second << "'"; + + searchPathResolved[elem.second] = res; + return res; +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/primops.cc b/third_party/nix/src/libexpr/primops.cc new file mode 100644 index 000000000000..713c31e4fa79 --- /dev/null +++ b/third_party/nix/src/libexpr/primops.cc @@ -0,0 +1,2456 @@ +#include "primops.hh" + +#include <algorithm> +#include <cstring> +#include <regex> + +#include <dlfcn.h> +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "archive.hh" +#include "derivations.hh" +#include "download.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "globals.hh" +#include "json-to-value.hh" +#include "json.hh" +#include "names.hh" +#include "store-api.hh" +#include "util.hh" +#include "value-to-json.hh" +#include "value-to-xml.hh" + +namespace nix { + +/************************************************************* + * Miscellaneous + *************************************************************/ + +/* Decode a context string โ!<name>!<path>โ into a pair <path, + name>. */ +std::pair<string, string> decodeContext(const std::string& s) { + if (s.at(0) == '!') { + size_t index = s.find('!', 1); + return std::pair<string, string>(string(s, index + 1), + string(s, 1, index - 1)); + } + return std::pair<string, string>(s.at(0) == '/' ? s : string(s, 1), ""); +} + +InvalidPathError::InvalidPathError(const Path& path) + : EvalError(format("path '%1%' is not valid") % path), path(path) {} + +void EvalState::realiseContext(const PathSet& context) { + PathSet drvs; + + for (auto& i : context) { + std::pair<string, string> decoded = decodeContext(i); + Path ctx = decoded.first; + assert(store->isStorePath(ctx)); + if (!store->isValidPath(ctx)) { + throw InvalidPathError(ctx); + } + if (!decoded.second.empty() && nix::isDerivation(ctx)) { + drvs.insert(decoded.first + "!" + decoded.second); + + /* Add the output of this derivation to the allowed + paths. */ + if (allowedPaths) { + auto drv = store->derivationFromPath(decoded.first); + auto i = drv.outputs.find(decoded.second); + if (i == drv.outputs.end()) { + throw Error("derivation '%s' does not have an output named '%s'", + decoded.first, decoded.second); + } + allowedPaths->insert(i->second.path); + } + } + } + + if (drvs.empty()) { + return; + } + + if (!evalSettings.enableImportFromDerivation) { + throw EvalError(format("attempted to realize '%1%' during evaluation but " + "'allow-import-from-derivation' is false") % + *(drvs.begin())); + } + + /* For performance, prefetch all substitute info. */ + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + unsigned long long downloadSize; + unsigned long long narSize; + store->queryMissing(drvs, willBuild, willSubstitute, unknown, downloadSize, + narSize); + store->buildPaths(drvs); +} + +/* Load and evaluate an expression from path specified by the + argument. */ +static void prim_scopedImport(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path path = state.coerceToPath(pos, *args[1], context); + + try { + state.realiseContext(context); + } catch (InvalidPathError& e) { + throw EvalError( + format("cannot import '%1%', since path '%2%' is not valid, at %3%") % + path % e.path % pos); + } + + Path realPath = state.checkSourcePath(state.toRealPath(path, context)); + + if (state.store->isStorePath(path) && state.store->isValidPath(path) && + isDerivation(path)) { + Derivation drv = readDerivation(realPath); + Value& w = *state.allocValue(); + state.mkAttrs(w, 3 + drv.outputs.size()); + Value* v2 = state.allocAttr(w, state.sDrvPath); + mkString(*v2, path, {"=" + path}); + v2 = state.allocAttr(w, state.sName); + mkString(*v2, drv.env["name"]); + Value* outputsVal = state.allocAttr(w, state.symbols.Create("outputs")); + state.mkList(*outputsVal, drv.outputs.size()); + unsigned int outputs_index = 0; + + for (const auto& o : drv.outputs) { + v2 = state.allocAttr(w, state.symbols.Create(o.first)); + mkString(*v2, o.second.path, {"!" + o.first + "!" + path}); + outputsVal->listElems()[outputs_index] = state.allocValue(); + mkString(*(outputsVal->listElems()[outputs_index++]), o.first); + } + Value fun; + state.evalFile( + settings.nixDataDir + "/nix/corepkgs/imported-drv-to-derivation.nix", + fun); + state.forceFunction(fun, pos); + mkApp(v, fun, w); + state.forceAttrs(v, pos); + } else { + state.forceAttrs(*args[0]); + if (args[0]->attrs->empty()) { + state.evalFile(realPath, v); + } else { + Env* env = &state.allocEnv(args[0]->attrs->size()); + env->up = &state.baseEnv; + + StaticEnv staticEnv(false, &state.staticBaseEnv); + + unsigned int displ = 0; + for (auto& attr : *args[0]->attrs) { + staticEnv.vars[attr.second.name] = displ; + env->values[displ++] = attr.second.value; + } + + DLOG(INFO) << "evaluating file '" << realPath << "'"; + Expr* e = state.parseExprFromFile(resolveExprPath(realPath), staticEnv); + + e->eval(state, *env, v); + } + } +} + +/* Want reasonable symbol names, so extern C */ +/* !!! Should we pass the Pos or the file name too? */ +extern "C" using ValueInitializer = void(*)(EvalState&, Value&); + +/* Load a ValueInitializer from a DSO and return whatever it initializes */ +void prim_importNative(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path path = state.coerceToPath(pos, *args[0], context); + + try { + state.realiseContext(context); + } catch (InvalidPathError& e) { + throw EvalError( + format("cannot import '%1%', since path '%2%' is not valid, at %3%") % + path % e.path % pos); + } + + path = state.checkSourcePath(path); + + std::string sym = state.forceStringNoCtx(*args[1], pos); + + void* handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) { + throw EvalError(format("could not open '%1%': %2%") % path % dlerror()); + } + + dlerror(); + auto func = (ValueInitializer)dlsym(handle, sym.c_str()); + if (func == nullptr) { + char* message = dlerror(); + if (message != nullptr) { + throw EvalError(format("could not load symbol '%1%' from '%2%': %3%") % + sym % path % message); + } + throw EvalError(format("symbol '%1%' from '%2%' resolved to NULL when a " + "function pointer was expected") % + sym % path); + } + + (func)(state, v); + + /* We don't dlclose because v may be a primop referencing a function in the + * shared object file */ +} + +/* Execute a program and parse its output */ +void prim_exec(EvalState& state, const Pos& pos, Value** args, Value& v) { + state.forceList(*args[0], pos); + auto elems = args[0]->listElems(); + auto count = args[0]->listSize(); + if (count == 0) { + throw EvalError(format("at least one argument to 'exec' required, at %1%") % + pos); + } + PathSet context; + auto program = state.coerceToString(pos, *elems[0], context, false, false); + Strings commandArgs; + for (unsigned int i = 1; i < args[0]->listSize(); ++i) { + commandArgs.emplace_back( + state.coerceToString(pos, *elems[i], context, false, false)); + } + try { + state.realiseContext(context); + } catch (InvalidPathError& e) { + throw EvalError( + format("cannot execute '%1%', since path '%2%' is not valid, at %3%") % + program % e.path % pos); + } + + auto output = runProgram(program, true, commandArgs); + Expr* parsed; + try { + parsed = state.parseExprFromString(output, pos.file); + } catch (Error& e) { + e.addPrefix(format("While parsing the output from '%1%', at %2%\n") % + program % pos); + throw; + } + try { + state.eval(parsed, v); + } catch (Error& e) { + e.addPrefix(format("While evaluating the output from '%1%', at %2%\n") % + program % pos); + throw; + } +} + +/* Return a string representing the type of the expression. */ +static void prim_typeOf(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + std::string t; + switch (args[0]->type) { + case tInt: + t = "int"; + break; + case tBool: + t = "bool"; + break; + case tString: + t = "string"; + break; + case tPath: + t = "path"; + break; + case tNull: + t = "null"; + break; + case tAttrs: + t = "set"; + break; + case tList1: + case tList2: + case tListN: + t = "list"; + break; + case tLambda: + case tPrimOp: + case tPrimOpApp: + t = "lambda"; + break; + case tExternal: + t = args[0]->external->typeOf(); + break; + case tFloat: + t = "float"; + break; + default: + abort(); + } + mkString(v, state.symbols.Create(t)); +} + +/* Determine whether the argument is the null value. */ +static void prim_isNull(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->type == tNull); +} + +/* Determine whether the argument is a function. */ +static void prim_isFunction(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + bool res; + switch (args[0]->type) { + case tLambda: + case tPrimOp: + case tPrimOpApp: + res = true; + break; + default: + res = false; + break; + } + mkBool(v, res); +} + +/* Determine whether the argument is an integer. */ +static void prim_isInt(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->type == tInt); +} + +/* Determine whether the argument is a float. */ +static void prim_isFloat(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->type == tFloat); +} + +/* Determine whether the argument is a string. */ +static void prim_isString(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->type == tString); +} + +/* Determine whether the argument is a Boolean. */ +static void prim_isBool(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->type == tBool); +} + +/* Determine whether the argument is a path. */ +static void prim_isPath(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->type == tPath); +} + +struct CompareValues { + bool operator()(const Value* v1, const Value* v2) const { + if (v1->type == tFloat && v2->type == tInt) { + return v1->fpoint < v2->integer; + } + if (v1->type == tInt && v2->type == tFloat) { + return v1->integer < v2->fpoint; + } + if (v1->type != v2->type) { + throw EvalError(format("cannot compare %1% with %2%") % showType(*v1) % + showType(*v2)); + } + switch (v1->type) { + case tInt: + return v1->integer < v2->integer; + case tFloat: + return v1->fpoint < v2->fpoint; + case tString: + return strcmp(v1->string.s, v2->string.s) < 0; + case tPath: + return strcmp(v1->path, v2->path) < 0; + default: + throw EvalError(format("cannot compare %1% with %2%") % showType(*v1) % + showType(*v2)); + } + } +}; + +#if HAVE_BOEHMGC +typedef list<Value*, gc_allocator<Value*>> ValueList; +#else +typedef list<Value*> ValueList; +#endif + +static void prim_genericClosure(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceAttrs(*args[0], pos); + + /* Get the start set. */ + Bindings::iterator startSet = + args[0]->attrs->find(state.symbols.Create("startSet")); + if (startSet == args[0]->attrs->end()) { + throw EvalError(format("attribute 'startSet' required, at %1%") % pos); + } + state.forceList(*startSet->second.value, pos); + + ValueList workSet; + for (unsigned int n = 0; n < startSet->second.value->listSize(); ++n) { + workSet.push_back(startSet->second.value->listElems()[n]); + } + + /* Get the operator. */ + Bindings::iterator op = + args[0]->attrs->find(state.symbols.Create("operator")); + if (op == args[0]->attrs->end()) { + throw EvalError(format("attribute 'operator' required, at %1%") % pos); + } + state.forceValue(*op->second.value); + + /* Construct the closure by applying the operator to element of + `workSet', adding the result to `workSet', continuing until + no new elements are found. */ + ValueList res; + // `doneKeys' doesn't need to be a GC root, because its values are + // reachable from res. + set<Value*, CompareValues> doneKeys; + while (!workSet.empty()) { + Value* e = *(workSet.begin()); + workSet.pop_front(); + + state.forceAttrs(*e, pos); + + Bindings::iterator key = e->attrs->find(state.symbols.Create("key")); + if (key == e->attrs->end()) { + throw EvalError(format("attribute 'key' required, at %1%") % pos); + } + state.forceValue(*key->second.value); + + if (doneKeys.find(key->second.value) != doneKeys.end()) { + continue; + } + doneKeys.insert(key->second.value); + res.push_back(e); + + /* Call the `operator' function with `e' as argument. */ + Value call; + mkApp(call, *op->second.value, *e); + state.forceList(call, pos); + + /* Add the values returned by the operator to the work set. */ + for (unsigned int n = 0; n < call.listSize(); ++n) { + state.forceValue(*call.listElems()[n]); + workSet.push_back(call.listElems()[n]); + } + } + + /* Create the result list. */ + state.mkList(v, res.size()); + unsigned int n = 0; + for (auto& i : res) { + v.listElems()[n++] = i; + } +} + +static void prim_abort(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + std::string s = state.coerceToString(pos, *args[0], context); + throw Abort( + format("evaluation aborted with the following error message: '%1%'") % s); +} + +static void prim_throw(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + std::string s = state.coerceToString(pos, *args[0], context); + throw ThrownError(s); +} + +static void prim_addErrorContext(EvalState& state, const Pos& pos, Value** args, + Value& v) { + try { + state.forceValue(*args[1]); + v = *args[1]; + } catch (Error& e) { + PathSet context; + e.addPrefix(format("%1%\n") % state.coerceToString(pos, *args[0], context)); + throw; + } +} + +/* Try evaluating the argument. Success => {success=true; value=something;}, + * else => {success=false; value=false;} */ +static void prim_tryEval(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.mkAttrs(v, 2); + try { + state.forceValue(*args[0]); + v.attrs->push_back(Attr(state.sValue, args[0])); + mkBool(*state.allocAttr(v, state.symbols.Create("success")), true); + } catch (AssertionError& e) { + mkBool(*state.allocAttr(v, state.sValue), false); + mkBool(*state.allocAttr(v, state.symbols.Create("success")), false); + } +} + +/* Return an environment variable. Use with care. */ +static void prim_getEnv(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string name = state.forceStringNoCtx(*args[0], pos); + mkString(v, evalSettings.restrictEval || evalSettings.pureEval + ? "" + : getEnv(name)); +} + +/* Evaluate the first argument, then return the second argument. */ +static void prim_seq(EvalState& state, const Pos& pos, Value** args, Value& v) { + state.forceValue(*args[0]); + state.forceValue(*args[1]); + v = *args[1]; +} + +/* Evaluate the first argument deeply (i.e. recursing into lists and + attrsets), then return the second argument. */ +static void prim_deepSeq(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValueDeep(*args[0]); + state.forceValue(*args[1]); + v = *args[1]; +} + +/* Evaluate the first expression and print it on standard error. Then + return the second expression. Useful for debugging. */ +static void prim_trace(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + if (args[0]->type == tString) { + DLOG(INFO) << "trace: " << args[0]->string.s; + } else { + DLOG(INFO) << "trace: " << *args[0]; + } + state.forceValue(*args[1]); + v = *args[1]; +} + +void prim_valueSize(EvalState& state, const Pos& pos, Value** args, Value& v) { + /* We're not forcing the argument on purpose. */ + mkInt(v, valueSize(*args[0])); +} + +/************************************************************* + * Derivations + *************************************************************/ + +/* Construct (as a unobservable side effect) a Nix derivation + expression that performs the derivation described by the argument + set. Returns the original set extended with the following + attributes: `outPath' containing the primary output path of the + derivation; `drvPath' containing the path of the Nix expression; + and `type' set to `derivation' to indicate that this is a + derivation. */ +static void prim_derivationStrict(EvalState& state, const Pos& pos, + Value** args, Value& v) { + state.forceAttrs(*args[0], pos); + + /* Figure out the name first (for stack backtraces). */ + Bindings::iterator attr = args[0]->attrs->find(state.sName); + if (attr == args[0]->attrs->end()) { + throw EvalError(format("required attribute 'name' missing, at %1%") % pos); + } + std::string drvName; + Pos& posDrvName(*attr->second.pos); + try { + drvName = state.forceStringNoCtx(*attr->second.value, pos); + } catch (Error& e) { + e.addPrefix( + format("while evaluating the derivation attribute 'name' at %1%:\n") % + posDrvName); + throw; + } + + /* Check whether attributes should be passed as a JSON file. */ + std::ostringstream jsonBuf; + std::unique_ptr<JSONObject> jsonObject; + attr = args[0]->attrs->find(state.sStructuredAttrs); + if (attr != args[0]->attrs->end() && + state.forceBool(*attr->second.value, pos)) { + jsonObject = std::make_unique<JSONObject>(jsonBuf); + } + + /* Check whether null attributes should be ignored. */ + bool ignoreNulls = false; + attr = args[0]->attrs->find(state.sIgnoreNulls); + if (attr != args[0]->attrs->end()) { + ignoreNulls = state.forceBool(*attr->second.value, pos); + } + + /* Build the derivation expression by processing the attributes. */ + Derivation drv; + + PathSet context; + + std::optional<std::string> outputHash; + std::string outputHashAlgo; + bool outputHashRecursive = false; + + StringSet outputs; + outputs.insert("out"); + + for (auto& i : args[0]->attrs->lexicographicOrder()) { + if (i->name == state.sIgnoreNulls) { + continue; + } + const std::string& key = i->name; + + auto handleHashMode = [&](const std::string& s) { + if (s == "recursive") { + outputHashRecursive = true; + } else if (s == "flat") { + outputHashRecursive = false; + } else { + throw EvalError( + "invalid value '%s' for 'outputHashMode' attribute, at %s", s, + posDrvName); + } + }; + + auto handleOutputs = [&](const Strings& ss) { + outputs.clear(); + for (auto& j : ss) { + if (outputs.find(j) != outputs.end()) { + throw EvalError(format("duplicate derivation output '%1%', at %2%") % + j % posDrvName); + } + /* !!! Check whether j is a valid attribute + name. */ + /* Derivations cannot be named โdrvโ, because + then we'd have an attribute โdrvPathโ in + the resulting set. */ + if (j == "drv") { + throw EvalError( + format("invalid derivation output name 'drv', at %1%") % + posDrvName); + } + outputs.insert(j); + } + if (outputs.empty()) { + throw EvalError( + format("derivation cannot have an empty set of outputs, at %1%") % + posDrvName); + } + }; + + try { + if (ignoreNulls) { + state.forceValue(*i->value); + if (i->value->type == tNull) { + continue; + } + } + + /* The `args' attribute is special: it supplies the + command-line arguments to the builder. */ + if (i->name == state.sArgs) { + state.forceList(*i->value, pos); + for (unsigned int n = 0; n < i->value->listSize(); ++n) { + std::string s = state.coerceToString( + posDrvName, *i->value->listElems()[n], context, true); + drv.args.push_back(s); + } + } + + /* All other attributes are passed to the builder through + the environment. */ + else { + if (jsonObject) { + if (i->name == state.sStructuredAttrs) { + continue; + } + + auto placeholder(jsonObject->placeholder(key)); + printValueAsJSON(state, true, *i->value, placeholder, context); + + if (i->name == state.sBuilder) { + drv.builder = state.forceString(*i->value, context, posDrvName); + } else if (i->name == state.sSystem) { + drv.platform = state.forceStringNoCtx(*i->value, posDrvName); + } else if (i->name == state.sOutputHash) { + outputHash = state.forceStringNoCtx(*i->value, posDrvName); + } else if (i->name == state.sOutputHashAlgo) { + outputHashAlgo = state.forceStringNoCtx(*i->value, posDrvName); + } else if (i->name == state.sOutputHashMode) { + handleHashMode(state.forceStringNoCtx(*i->value, posDrvName)); + } else if (i->name == state.sOutputs) { + /* Require โoutputsโ to be a list of strings. */ + state.forceList(*i->value, posDrvName); + Strings ss; + for (unsigned int n = 0; n < i->value->listSize(); ++n) { + ss.emplace_back(state.forceStringNoCtx(*i->value->listElems()[n], + posDrvName)); + } + handleOutputs(ss); + } + + } else { + auto s = state.coerceToString(posDrvName, *i->value, context, true); + drv.env.emplace(key, s); + if (i->name == state.sBuilder) { + drv.builder = s; + } else if (i->name == state.sSystem) { + drv.platform = s; + } else if (i->name == state.sOutputHash) { + outputHash = s; + } else if (i->name == state.sOutputHashAlgo) { + outputHashAlgo = s; + } else if (i->name == state.sOutputHashMode) { + handleHashMode(s); + } else if (i->name == state.sOutputs) { + handleOutputs(tokenizeString<Strings>(s)); + } + } + } + + } catch (Error& e) { + e.addPrefix(format("while evaluating the attribute '%1%' of the " + "derivation '%2%' at %3%:\n") % + key % drvName % posDrvName); + throw; + } + } + + if (jsonObject) { + jsonObject.reset(); + drv.env.emplace("__json", jsonBuf.str()); + } + + /* Everything in the context of the strings in the derivation + attributes should be added as dependencies of the resulting + derivation. */ + for (auto& path : context) { + /* Paths marked with `=' denote that the path of a derivation + is explicitly passed to the builder. Since that allows the + builder to gain access to every path in the dependency + graph of the derivation (including all outputs), all paths + in the graph must be added to this derivation's list of + inputs to ensure that they are available when the builder + runs. */ + if (path.at(0) == '=') { + /* !!! This doesn't work if readOnlyMode is set. */ + PathSet refs; + state.store->computeFSClosure(string(path, 1), refs); + for (auto& j : refs) { + drv.inputSrcs.insert(j); + if (isDerivation(j)) { + drv.inputDrvs[j] = state.store->queryDerivationOutputNames(j); + } + } + } + + /* Handle derivation outputs of the form โ!<name>!<path>โ. */ + else if (path.at(0) == '!') { + std::pair<string, string> ctx = decodeContext(path); + drv.inputDrvs[ctx.first].insert(ctx.second); + } + + /* Otherwise it's a source file. */ + else { + drv.inputSrcs.insert(path); + } + } + + /* Do we have all required attributes? */ + if (drv.builder.empty()) { + throw EvalError(format("required attribute 'builder' missing, at %1%") % + posDrvName); + } + if (drv.platform.empty()) { + throw EvalError(format("required attribute 'system' missing, at %1%") % + posDrvName); + } + + /* Check whether the derivation name is valid. */ + checkStoreName(drvName); + if (isDerivation(drvName)) { + throw EvalError( + format("derivation names are not allowed to end in '%1%', at %2%") % + drvExtension % posDrvName); + } + + if (outputHash) { + /* Handle fixed-output derivations. */ + if (outputs.size() != 1 || *(outputs.begin()) != "out") { + throw Error(format("multiple outputs are not supported in fixed-output " + "derivations, at %1%") % + posDrvName); + } + + HashType ht = + outputHashAlgo.empty() ? htUnknown : parseHashType(outputHashAlgo); + Hash h(*outputHash, ht); + + Path outPath = + state.store->makeFixedOutputPath(outputHashRecursive, h, drvName); + if (!jsonObject) { + drv.env["out"] = outPath; + } + drv.outputs["out"] = DerivationOutput( + outPath, (outputHashRecursive ? "r:" : "") + printHashType(h.type), + h.to_string(Base16, false)); + } + + else { + /* Construct the "masked" store derivation, which is the final + one except that in the list of outputs, the output paths + are empty, and the corresponding environment variables have + an empty value. This ensures that changes in the set of + output names do get reflected in the hash. */ + for (auto& i : outputs) { + if (!jsonObject) { + drv.env[i] = ""; + } + drv.outputs[i] = DerivationOutput("", "", ""); + } + + /* Use the masked derivation expression to compute the output + path. */ + Hash h = hashDerivationModulo(*state.store, drv); + + for (auto& i : drv.outputs) { + if (i.second.path.empty()) { + Path outPath = state.store->makeOutputPath(i.first, h, drvName); + if (!jsonObject) { + drv.env[i.first] = outPath; + } + i.second.path = outPath; + } + } + } + + /* Write the resulting term into the Nix store directory. */ + Path drvPath = writeDerivation(state.store, drv, drvName, state.repair); + + DLOG(INFO) << "instantiated '" << drvName << "' -> '" << drvPath << "'"; + + /* Optimisation, but required in read-only mode! because in that + case we don't actually write store derivations, so we can't + read them later. */ + drvHashes[drvPath] = hashDerivationModulo(*state.store, drv); + + state.mkAttrs(v, 1 + drv.outputs.size()); + mkString(*state.allocAttr(v, state.sDrvPath), drvPath, {"=" + drvPath}); + for (auto& i : drv.outputs) { + mkString(*state.allocAttr(v, state.symbols.Create(i.first)), i.second.path, + {"!" + i.first + "!" + drvPath}); + } +} + +/* Return a placeholder string for the specified output that will be + substituted by the corresponding output path at build time. For + example, 'placeholder "out"' returns the string + /1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9. At build + time, any occurence of this string in an derivation attribute will + be replaced with the concrete path in the Nix store of the output + โoutโ. */ +static void prim_placeholder(EvalState& state, const Pos& pos, Value** args, + Value& v) { + mkString(v, hashPlaceholder(state.forceStringNoCtx(*args[0], pos))); +} + +/************************************************************* + * Paths + *************************************************************/ + +/* Convert the argument to a path. !!! obsolete? */ +static void prim_toPath(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path path = state.coerceToPath(pos, *args[0], context); + mkString(v, canonPath(path), context); +} + +/* Allow a valid store path to be used in an expression. This is + useful in some generated expressions such as in nix-push, which + generates a call to a function with an already existing store path + as argument. You don't want to use `toPath' here because it copies + the path to the Nix store, which yields a copy like + /nix/store/newhash-oldhash-oldname. In the past, `toPath' had + special case behaviour for store paths, but that created weird + corner cases. */ +static void prim_storePath(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path path = state.checkSourcePath(state.coerceToPath(pos, *args[0], context)); + /* Resolve symlinks in โpathโ, unless โpathโ itself is a symlink + directly in the store. The latter condition is necessary so + e.g. nix-push does the right thing. */ + if (!state.store->isStorePath(path)) { + path = canonPath(path, true); + } + if (!state.store->isInStore(path)) { + throw EvalError(format("path '%1%' is not in the Nix store, at %2%") % + path % pos); + } + Path path2 = state.store->toStorePath(path); + if (!settings.readOnlyMode) { + state.store->ensurePath(path2); + } + context.insert(path2); + mkString(v, path, context); +} + +static void prim_pathExists(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path path = state.coerceToPath(pos, *args[0], context); + try { + state.realiseContext(context); + } catch (InvalidPathError& e) { + throw EvalError(format("cannot check the existence of '%1%', since path " + "'%2%' is not valid, at %3%") % + path % e.path % pos); + } + + try { + mkBool(v, pathExists(state.checkSourcePath(path))); + } catch (SysError& e) { + /* Don't give away info from errors while canonicalising + โpathโ in restricted mode. */ + mkBool(v, false); + } catch (RestrictedPathError& e) { + mkBool(v, false); + } +} + +/* Return the base name of the given string, i.e., everything + following the last slash. */ +static void prim_baseNameOf(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + mkString( + v, baseNameOf(state.coerceToString(pos, *args[0], context, false, false)), + context); +} + +/* Return the directory of the given path, i.e., everything before the + last slash. Return either a path or a string depending on the type + of the argument. */ +static void prim_dirOf(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path dir = dirOf(state.coerceToString(pos, *args[0], context, false, false)); + if (args[0]->type == tPath) { + mkPath(v, dir.c_str()); + } else { + mkString(v, dir, context); + } +} + +/* Return the contents of a file as a string. */ +static void prim_readFile(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path path = state.coerceToPath(pos, *args[0], context); + try { + state.realiseContext(context); + } catch (InvalidPathError& e) { + throw EvalError( + format("cannot read '%1%', since path '%2%' is not valid, at %3%") % + path % e.path % pos); + } + std::string s = + readFile(state.checkSourcePath(state.toRealPath(path, context))); + if (s.find((char)0) != string::npos) { + throw Error(format("the contents of the file '%1%' cannot be represented " + "as a Nix string") % + path); + } + mkString(v, s.c_str()); +} + +/* Find a file in the Nix search path. Used to implement <x> paths, + which are desugared to 'findFile __nixPath "x"'. */ +static void prim_findFile(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceList(*args[0], pos); + + SearchPath searchPath; + + for (unsigned int n = 0; n < args[0]->listSize(); ++n) { + Value& v2(*args[0]->listElems()[n]); + state.forceAttrs(v2, pos); + + std::string prefix; + Bindings::iterator i = v2.attrs->find(state.symbols.Create("prefix")); + if (i != v2.attrs->end()) { + prefix = state.forceStringNoCtx(*i->second.value, pos); + } + + i = v2.attrs->find(state.symbols.Create("path")); + if (i == v2.attrs->end()) { + throw EvalError(format("attribute 'path' missing, at %1%") % pos); + } + + PathSet context; + std::string path = + state.coerceToString(pos, *i->second.value, context, false, false); + + try { + state.realiseContext(context); + } catch (InvalidPathError& e) { + throw EvalError( + format("cannot find '%1%', since path '%2%' is not valid, at %3%") % + path % e.path % pos); + } + + searchPath.emplace_back(prefix, path); + } + + std::string path = state.forceStringNoCtx(*args[1], pos); + + mkPath(v, + state.checkSourcePath(state.findFile(searchPath, path, pos)).c_str()); +} + +/* Return the cryptographic hash of a file in base-16. */ +static void prim_hashFile(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string type = state.forceStringNoCtx(*args[0], pos); + HashType ht = parseHashType(type); + if (ht == htUnknown) { + throw Error(format("unknown hash type '%1%', at %2%") % type % pos); + } + + PathSet context; // discarded + Path p = state.coerceToPath(pos, *args[1], context); + + mkString(v, hashFile(ht, state.checkSourcePath(p)).to_string(Base16, false), + context); +} + +/* Read a directory (without . or ..) */ +static void prim_readDir(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet ctx; + Path path = state.coerceToPath(pos, *args[0], ctx); + try { + state.realiseContext(ctx); + } catch (InvalidPathError& e) { + throw EvalError( + format("cannot read '%1%', since path '%2%' is not valid, at %3%") % + path % e.path % pos); + } + + DirEntries entries = readDirectory(state.checkSourcePath(path)); + state.mkAttrs(v, entries.size()); + + for (auto& ent : entries) { + Value* ent_val = state.allocAttr(v, state.symbols.Create(ent.name)); + if (ent.type == DT_UNKNOWN) { + ent.type = getFileType(path + "/" + ent.name); + } + mkStringNoCopy(*ent_val, + ent.type == DT_REG + ? "regular" + : ent.type == DT_DIR + ? "directory" + : ent.type == DT_LNK ? "symlink" : "unknown"); + } +} + +/************************************************************* + * Creating files + *************************************************************/ + +/* Convert the argument (which can be any Nix expression) to an XML + representation returned in a string. Not all Nix expressions can + be sensibly or completely represented (e.g., functions). */ +static void prim_toXML(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::ostringstream out; + PathSet context; + printValueAsXML(state, true, false, *args[0], out, context); + mkString(v, out.str(), context); +} + +/* Convert the argument (which can be any Nix expression) to a JSON + string. Not all Nix expressions can be sensibly or completely + represented (e.g., functions). */ +static void prim_toJSON(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::ostringstream out; + PathSet context; + printValueAsJSON(state, true, *args[0], out, context); + mkString(v, out.str(), context); +} + +/* Parse a JSON string to a value. */ +static void prim_fromJSON(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string s = state.forceStringNoCtx(*args[0], pos); + parseJSON(state, s, v); +} + +/* Store a string in the Nix store as a source file that can be used + as an input by derivations. */ +static void prim_toFile(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + std::string name = state.forceStringNoCtx(*args[0], pos); + std::string contents = state.forceString(*args[1], context, pos); + + PathSet refs; + + for (auto path : context) { + if (path.at(0) != '/') { + throw EvalError(format("in 'toFile': the file '%1%' cannot refer to " + "derivation outputs, at %2%") % + name % pos); + } + refs.insert(path); + } + + Path storePath = + settings.readOnlyMode + ? state.store->computeStorePathForText(name, contents, refs) + : state.store->addTextToStore(name, contents, refs, state.repair); + + /* Note: we don't need to add `context' to the context of the + result, since `storePath' itself has references to the paths + used in args[1]. */ + + mkString(v, storePath, {storePath}); +} + +static void addPath(EvalState& state, const Pos& pos, const std::string& name, + const Path& path_, Value* filterFun, bool recursive, + const Hash& expectedHash, Value& v) { + const auto path = evalSettings.pureEval && expectedHash + ? path_ + : state.checkSourcePath(path_); + PathFilter filter = filterFun != nullptr ? ([&](const Path& path) { + auto st = lstat(path); + + /* Call the filter function. The first argument is the path, + the second is a string indicating the type of the file. */ + Value arg1; + mkString(arg1, path); + + Value fun2; + state.callFunction(*filterFun, arg1, fun2, noPos); + + Value arg2; + mkString(arg2, S_ISREG(st.st_mode) + ? "regular" + : S_ISDIR(st.st_mode) + ? "directory" + : S_ISLNK(st.st_mode) + ? "symlink" + : "unknown" /* not supported, will fail! */); + + Value res; + state.callFunction(fun2, arg2, res, noPos); + + return state.forceBool(res, pos); + }) + : defaultPathFilter; + + Path expectedStorePath; + if (expectedHash) { + expectedStorePath = + state.store->makeFixedOutputPath(recursive, expectedHash, name); + } + Path dstPath; + if (!expectedHash || !state.store->isValidPath(expectedStorePath)) { + dstPath = settings.readOnlyMode + ? state.store + ->computeStorePathForPath(name, path, recursive, + htSHA256, filter) + .first + : state.store->addToStore(name, path, recursive, htSHA256, + filter, state.repair); + if (expectedHash && expectedStorePath != dstPath) { + throw Error(format("store path mismatch in (possibly filtered) path " + "added from '%1%'") % + path); + } + } else { + dstPath = expectedStorePath; + } + + mkString(v, dstPath, {dstPath}); +} + +static void prim_filterSource(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + Path path = state.coerceToPath(pos, *args[1], context); + if (!context.empty()) { + throw EvalError(format("string '%1%' cannot refer to other paths, at %2%") % + path % pos); + } + + state.forceValue(*args[0]); + if (args[0]->type != tLambda) { + throw TypeError(format("first argument in call to 'filterSource' is not a " + "function but %1%, at %2%") % + showType(*args[0]) % pos); + } + + addPath(state, pos, baseNameOf(path), path, args[0], true, Hash(), v); +} + +static void prim_path(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceAttrs(*args[0], pos); + Path path; + std::string name; + Value* filterFun = nullptr; + auto recursive = true; + Hash expectedHash; + + for (auto& attr : *args[0]->attrs) { + const std::string& n(attr.second.name); + if (n == "path") { + PathSet context; + path = state.coerceToPath(*attr.second.pos, *attr.second.value, context); + if (!context.empty()) { + throw EvalError( + format("string '%1%' cannot refer to other paths, at %2%") % path % + *attr.second.pos); + } + } else if (attr.second.name == state.sName) { + name = state.forceStringNoCtx(*attr.second.value, *attr.second.pos); + } else if (n == "filter") { + state.forceValue(*attr.second.value); + filterFun = attr.second.value; + } else if (n == "recursive") { + recursive = state.forceBool(*attr.second.value, *attr.second.pos); + } else if (n == "sha256") { + expectedHash = + Hash(state.forceStringNoCtx(*attr.second.value, *attr.second.pos), + htSHA256); + } else { + throw EvalError( + format("unsupported argument '%1%' to 'addPath', at %2%") % + attr.second.name % *attr.second.pos); + } + } + if (path.empty()) { + throw EvalError(format("'path' required, at %1%") % pos); + } + if (name.empty()) { + name = baseNameOf(path); + } + + addPath(state, pos, name, path, filterFun, recursive, expectedHash, v); +} + +/************************************************************* + * Sets + *************************************************************/ + +/* Return the names of the attributes in a set as a sorted list of + strings. */ +static void prim_attrNames(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceAttrs(*args[0], pos); + + state.mkList(v, args[0]->attrs->size()); + + size_t n = 0; + for (auto& i : *args[0]->attrs) { + mkString(*(v.listElems()[n++] = state.allocValue()), i.second.name); + } + + std::sort(v.listElems(), v.listElems() + n, [](Value* v1, Value* v2) { + return strcmp(v1->string.s, v2->string.s) < 0; + }); +} + +/* Return the values of the attributes in a set as a list, in the same + order as attrNames. */ +static void prim_attrValues(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceAttrs(*args[0], pos); + + state.mkList(v, args[0]->attrs->size()); + + unsigned int n = 0; + for (auto& i : *args[0]->attrs) { + v.listElems()[n++] = (Value*)&i; + } + + std::sort(v.listElems(), v.listElems() + n, [](Value* v1, Value* v2) { + return (string)((Attr*)v1)->name < (string)((Attr*)v2)->name; + }); + + for (unsigned int i = 0; i < n; ++i) { + v.listElems()[i] = ((Attr*)v.listElems()[i])->value; + } +} + +/* Dynamic version of the `.' operator. */ +void prim_getAttr(EvalState& state, const Pos& pos, Value** args, Value& v) { + std::string attr = state.forceStringNoCtx(*args[0], pos); + state.forceAttrs(*args[1], pos); + // !!! Should we create a symbol here or just do a lookup? + Bindings::iterator i = args[1]->attrs->find(state.symbols.Create(attr)); + if (i == args[1]->attrs->end()) { + throw EvalError(format("attribute '%1%' missing, at %2%") % attr % pos); + } + // !!! add to stack trace? + if (state.countCalls && (i->second.pos != nullptr)) { + state.attrSelects[*i->second.pos]++; + } + state.forceValue(*i->second.value); + v = *i->second.value; +} + +/* Return position information of the specified attribute. */ +void prim_unsafeGetAttrPos(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string attr = state.forceStringNoCtx(*args[0], pos); + state.forceAttrs(*args[1], pos); + Bindings::iterator i = args[1]->attrs->find(state.symbols.Create(attr)); + if (i == args[1]->attrs->end()) { + mkNull(v); + } else { + state.mkPos(v, i->second.pos); + } +} + +/* Dynamic version of the `?' operator. */ +static void prim_hasAttr(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string attr = state.forceStringNoCtx(*args[0], pos); + state.forceAttrs(*args[1], pos); + mkBool(v, args[1]->attrs->find(state.symbols.Create(attr)) != + args[1]->attrs->end()); +} + +/* Determine whether the argument is a set. */ +static void prim_isAttrs(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->type == tAttrs); +} + +static void prim_removeAttrs(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceAttrs(*args[0], pos); + state.forceList(*args[1], pos); + + /* Get the attribute names to be removed. */ + std::set<Symbol> names; + for (unsigned int i = 0; i < args[1]->listSize(); ++i) { + state.forceStringNoCtx(*args[1]->listElems()[i], pos); + names.insert(state.symbols.Create(args[1]->listElems()[i]->string.s)); + } + + /* Copy all attributes not in that set. Note that we don't need + to sort v.attrs because it's a subset of an already sorted + vector. */ + state.mkAttrs(v, args[0]->attrs->size()); + for (auto& i : *args[0]->attrs) { + if (names.find(i.second.name) == names.end()) { + v.attrs->push_back(i.second); + } + } +} + +/* Builds a set from a list specifying (name, value) pairs. To be + precise, a list [{name = "name1"; value = value1;} ... {name = + "nameN"; value = valueN;}] is transformed to {name1 = value1; + ... nameN = valueN;}. In case of duplicate occurences of the same + name, the first takes precedence. */ +static void prim_listToAttrs(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceList(*args[0], pos); + + state.mkAttrs(v, args[0]->listSize()); + + std::set<Symbol> seen; + + for (unsigned int i = 0; i < args[0]->listSize(); ++i) { + Value& v2(*args[0]->listElems()[i]); + state.forceAttrs(v2, pos); + + Bindings::iterator j = v2.attrs->find(state.sName); + if (j == v2.attrs->end()) { + throw TypeError( + format( + "'name' attribute missing in a call to 'listToAttrs', at %1%") % + pos); + } + std::string name = state.forceStringNoCtx(*j->second.value, pos); + + Symbol sym = state.symbols.Create(name); + if (seen.find(sym) == seen.end()) { + Bindings::iterator j2 = + // TODO(tazjin): this line used to construct the symbol again: + // state.symbols.Create(state.sValue)); + // Why? + v2.attrs->find(state.sValue); + if (j2 == v2.attrs->end()) { + throw TypeError(format("'value' attribute missing in a call to " + "'listToAttrs', at %1%") % + pos); + } + + v.attrs->push_back(Attr(sym, j2->second.value, j2->second.pos)); + seen.insert(sym); + } + } +} + +/* Return the right-biased intersection of two sets as1 and as2, + i.e. a set that contains every attribute from as2 that is also a + member of as1. */ +static void prim_intersectAttrs(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceAttrs(*args[0], pos); + state.forceAttrs(*args[1], pos); + + state.mkAttrs(v, std::min(args[0]->attrs->size(), args[1]->attrs->size())); + + for (auto& i : *args[0]->attrs) { + Bindings::iterator j = args[1]->attrs->find(i.second.name); + if (j != args[1]->attrs->end()) { + v.attrs->push_back(j->second); + } + } +} + +/* Collect each attribute named `attr' from a list of attribute sets. + Sets that don't contain the named attribute are ignored. + + Example: + catAttrs "a" [{a = 1;} {b = 0;} {a = 2;}] + => [1 2] +*/ +static void prim_catAttrs(EvalState& state, const Pos& pos, Value** args, + Value& v) { + Symbol attrName = state.symbols.Create(state.forceStringNoCtx(*args[0], pos)); + state.forceList(*args[1], pos); + + Value* res[args[1]->listSize()]; + unsigned int found = 0; + + for (unsigned int n = 0; n < args[1]->listSize(); ++n) { + Value& v2(*args[1]->listElems()[n]); + state.forceAttrs(v2, pos); + Bindings::iterator i = v2.attrs->find(attrName); + if (i != v2.attrs->end()) { + res[found++] = i->second.value; + } + } + + state.mkList(v, found); + for (unsigned int n = 0; n < found; ++n) { + v.listElems()[n] = res[n]; + } +} + +/* Return a set containing the names of the formal arguments expected + by the function `f'. The value of each attribute is a Boolean + denoting whether the corresponding argument has a default value. For + instance, + + functionArgs ({ x, y ? 123}: ...) + => { x = false; y = true; } + + "Formal argument" here refers to the attributes pattern-matched by + the function. Plain lambdas are not included, e.g. + + functionArgs (x: ...) + => { } +*/ +static void prim_functionArgs(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + if (args[0]->type != tLambda) { + throw TypeError(format("'functionArgs' requires a function, at %1%") % pos); + } + + if (!args[0]->lambda.fun->matchAttrs) { + state.mkAttrs(v, 0); + return; + } + + state.mkAttrs(v, args[0]->lambda.fun->formals->formals.size()); + for (auto& i : args[0]->lambda.fun->formals->formals) { + // !!! should optimise booleans (allocate only once) + // TODO(tazjin): figure out what the above comment means + mkBool(*state.allocAttr(v, i.name), i.def != nullptr); + } +} + +/* Apply a function to every element of an attribute set. */ +static void prim_mapAttrs(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceAttrs(*args[1], pos); + + state.mkAttrs(v, args[1]->attrs->size()); + + for (auto& i : *args[1]->attrs) { + Value* vName = state.allocValue(); + Value* vFun2 = state.allocValue(); + mkString(*vName, i.second.name); + mkApp(*vFun2, *args[0], *vName); + mkApp(*state.allocAttr(v, i.second.name), *vFun2, *i.second.value); + } +} + +/************************************************************* + * Lists + *************************************************************/ + +/* Determine whether the argument is a list. */ +static void prim_isList(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + mkBool(v, args[0]->isList()); +} + +static void elemAt(EvalState& state, const Pos& pos, Value& list, int n, + Value& v) { + state.forceList(list, pos); + if (n < 0 || (unsigned int)n >= list.listSize()) { + throw Error(format("list index %1% is out of bounds, at %2%") % n % pos); + } + state.forceValue(*list.listElems()[n]); + v = *list.listElems()[n]; +} + +/* Return the n-1'th element of a list. */ +static void prim_elemAt(EvalState& state, const Pos& pos, Value** args, + Value& v) { + elemAt(state, pos, *args[0], state.forceInt(*args[1], pos), v); +} + +/* Return the first element of a list. */ +static void prim_head(EvalState& state, const Pos& pos, Value** args, + Value& v) { + elemAt(state, pos, *args[0], 0, v); +} + +/* Return a list consisting of everything but the first element of + a list. Warning: this function takes O(n) time, so you probably + don't want to use it! */ +static void prim_tail(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceList(*args[0], pos); + if (args[0]->listSize() == 0) { + throw Error(format("'tail' called on an empty list, at %1%") % pos); + } + state.mkList(v, args[0]->listSize() - 1); + for (unsigned int n = 0; n < v.listSize(); ++n) { + v.listElems()[n] = args[0]->listElems()[n + 1]; + } +} + +/* Apply a function to every element of a list. */ +static void prim_map(EvalState& state, const Pos& pos, Value** args, Value& v) { + state.forceList(*args[1], pos); + + state.mkList(v, args[1]->listSize()); + + for (unsigned int n = 0; n < v.listSize(); ++n) { + mkApp(*(v.listElems()[n] = state.allocValue()), *args[0], + *args[1]->listElems()[n]); + } +} + +/* Filter a list using a predicate; that is, return a list containing + every element from the list for which the predicate function + returns true. */ +static void prim_filter(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceFunction(*args[0], pos); + state.forceList(*args[1], pos); + + // FIXME: putting this on the stack is risky. + Value* vs[args[1]->listSize()]; + unsigned int k = 0; + + bool same = true; + for (unsigned int n = 0; n < args[1]->listSize(); ++n) { + Value res; + state.callFunction(*args[0], *args[1]->listElems()[n], res, noPos); + if (state.forceBool(res, pos)) { + vs[k++] = args[1]->listElems()[n]; + } else { + same = false; + } + } + + if (same) { + v = *args[1]; + } else { + state.mkList(v, k); + for (unsigned int n = 0; n < k; ++n) { + v.listElems()[n] = vs[n]; + } + } +} + +/* Return true if a list contains a given element. */ +static void prim_elem(EvalState& state, const Pos& pos, Value** args, + Value& v) { + bool res = false; + state.forceList(*args[1], pos); + for (unsigned int n = 0; n < args[1]->listSize(); ++n) { + if (state.eqValues(*args[0], *args[1]->listElems()[n])) { + res = true; + break; + } + } + mkBool(v, res); +} + +/* Concatenate a list of lists. */ +static void prim_concatLists(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceList(*args[0], pos); + state.concatLists(v, args[0]->listSize(), args[0]->listElems(), pos); +} + +/* Return the length of a list. This is an O(1) time operation. */ +static void prim_length(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceList(*args[0], pos); + mkInt(v, args[0]->listSize()); +} + +/* Reduce a list by applying a binary operator, from left to + right. The operator is applied strictly. */ +static void prim_foldlStrict(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceFunction(*args[0], pos); + state.forceList(*args[2], pos); + + if (args[2]->listSize() != 0u) { + Value* vCur = args[1]; + + for (unsigned int n = 0; n < args[2]->listSize(); ++n) { + Value vTmp; + state.callFunction(*args[0], *vCur, vTmp, pos); + vCur = n == args[2]->listSize() - 1 ? &v : state.allocValue(); + state.callFunction(vTmp, *args[2]->listElems()[n], *vCur, pos); + } + state.forceValue(v); + } else { + state.forceValue(*args[1]); + v = *args[1]; + } +} + +static void anyOrAll(bool any, EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceFunction(*args[0], pos); + state.forceList(*args[1], pos); + + Value vTmp; + for (unsigned int n = 0; n < args[1]->listSize(); ++n) { + state.callFunction(*args[0], *args[1]->listElems()[n], vTmp, pos); + bool res = state.forceBool(vTmp, pos); + if (res == any) { + mkBool(v, any); + return; + } + } + + mkBool(v, !any); +} + +static void prim_any(EvalState& state, const Pos& pos, Value** args, Value& v) { + anyOrAll(true, state, pos, args, v); +} + +static void prim_all(EvalState& state, const Pos& pos, Value** args, Value& v) { + anyOrAll(false, state, pos, args, v); +} + +static void prim_genList(EvalState& state, const Pos& pos, Value** args, + Value& v) { + auto len = state.forceInt(*args[1], pos); + + if (len < 0) { + throw EvalError(format("cannot create list of size %1%, at %2%") % len % + pos); + } + + state.mkList(v, len); + + for (unsigned int n = 0; n < (unsigned int)len; ++n) { + Value* arg = state.allocValue(); + mkInt(*arg, n); + mkApp(*(v.listElems()[n] = state.allocValue()), *args[0], *arg); + } +} + +static void prim_lessThan(EvalState& state, const Pos& pos, Value** args, + Value& v); + +static void prim_sort(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceFunction(*args[0], pos); + state.forceList(*args[1], pos); + + auto len = args[1]->listSize(); + state.mkList(v, len); + for (unsigned int n = 0; n < len; ++n) { + state.forceValue(*args[1]->listElems()[n]); + v.listElems()[n] = args[1]->listElems()[n]; + } + + auto comparator = [&](Value* a, Value* b) { + /* Optimization: if the comparator is lessThan, bypass + callFunction. */ + if (args[0]->type == tPrimOp && args[0]->primOp->fun == prim_lessThan) { + return CompareValues()(a, b); + } + + Value vTmp1; + Value vTmp2; + state.callFunction(*args[0], *a, vTmp1, pos); + state.callFunction(vTmp1, *b, vTmp2, pos); + return state.forceBool(vTmp2, pos); + }; + + /* FIXME: std::sort can segfault if the comparator is not a strict + weak ordering. What to do? std::stable_sort() seems more + resilient, but no guarantees... */ + std::stable_sort(v.listElems(), v.listElems() + len, comparator); +} + +static void prim_partition(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceFunction(*args[0], pos); + state.forceList(*args[1], pos); + + auto len = args[1]->listSize(); + + ValueVector right; + ValueVector wrong; + + for (unsigned int n = 0; n < len; ++n) { + auto vElem = args[1]->listElems()[n]; + state.forceValue(*vElem); + Value res; + state.callFunction(*args[0], *vElem, res, pos); + if (state.forceBool(res, pos)) { + right.push_back(vElem); + } else { + wrong.push_back(vElem); + } + } + + state.mkAttrs(v, 2); + + Value* vRight = state.allocAttr(v, state.sRight); + auto rsize = right.size(); + state.mkList(*vRight, rsize); + if (rsize != 0u) { + memcpy(vRight->listElems(), right.data(), sizeof(Value*) * rsize); + } + + Value* vWrong = state.allocAttr(v, state.sWrong); + auto wsize = wrong.size(); + state.mkList(*vWrong, wsize); + if (wsize != 0u) { + memcpy(vWrong->listElems(), wrong.data(), sizeof(Value*) * wsize); + } +} + +/* concatMap = f: list: concatLists (map f list); */ +/* C++-version is to avoid allocating `mkApp', call `f' eagerly */ +static void prim_concatMap(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceFunction(*args[0], pos); + state.forceList(*args[1], pos); + auto nrLists = args[1]->listSize(); + + Value lists[nrLists]; + size_t len = 0; + + for (unsigned int n = 0; n < nrLists; ++n) { + Value* vElem = args[1]->listElems()[n]; + state.callFunction(*args[0], *vElem, lists[n], pos); + state.forceList(lists[n], pos); + len += lists[n].listSize(); + } + + state.mkList(v, len); + auto out = v.listElems(); + for (unsigned int n = 0, pos = 0; n < nrLists; ++n) { + auto l = lists[n].listSize(); + if (l != 0u) { + memcpy(out + pos, lists[n].listElems(), l * sizeof(Value*)); + } + pos += l; + } +} + +/************************************************************* + * Integer arithmetic + *************************************************************/ + +static void prim_add(EvalState& state, const Pos& pos, Value** args, Value& v) { + state.forceValue(*args[0], pos); + state.forceValue(*args[1], pos); + if (args[0]->type == tFloat || args[1]->type == tFloat) { + mkFloat(v, + state.forceFloat(*args[0], pos) + state.forceFloat(*args[1], pos)); + } else { + mkInt(v, state.forceInt(*args[0], pos) + state.forceInt(*args[1], pos)); + } +} + +static void prim_sub(EvalState& state, const Pos& pos, Value** args, Value& v) { + state.forceValue(*args[0], pos); + state.forceValue(*args[1], pos); + if (args[0]->type == tFloat || args[1]->type == tFloat) { + mkFloat(v, + state.forceFloat(*args[0], pos) - state.forceFloat(*args[1], pos)); + } else { + mkInt(v, state.forceInt(*args[0], pos) - state.forceInt(*args[1], pos)); + } +} + +static void prim_mul(EvalState& state, const Pos& pos, Value** args, Value& v) { + state.forceValue(*args[0], pos); + state.forceValue(*args[1], pos); + if (args[0]->type == tFloat || args[1]->type == tFloat) { + mkFloat(v, + state.forceFloat(*args[0], pos) * state.forceFloat(*args[1], pos)); + } else { + mkInt(v, state.forceInt(*args[0], pos) * state.forceInt(*args[1], pos)); + } +} + +static void prim_div(EvalState& state, const Pos& pos, Value** args, Value& v) { + state.forceValue(*args[0], pos); + state.forceValue(*args[1], pos); + + NixFloat f2 = state.forceFloat(*args[1], pos); + if (f2 == 0) { + throw EvalError(format("division by zero, at %1%") % pos); + } + + if (args[0]->type == tFloat || args[1]->type == tFloat) { + mkFloat(v, + state.forceFloat(*args[0], pos) / state.forceFloat(*args[1], pos)); + } else { + NixInt i1 = state.forceInt(*args[0], pos); + NixInt i2 = state.forceInt(*args[1], pos); + /* Avoid division overflow as it might raise SIGFPE. */ + if (i1 == std::numeric_limits<NixInt>::min() && i2 == -1) { + throw EvalError(format("overflow in integer division, at %1%") % pos); + } + mkInt(v, i1 / i2); + } +} + +static void prim_bitAnd(EvalState& state, const Pos& pos, Value** args, + Value& v) { + mkInt(v, state.forceInt(*args[0], pos) & state.forceInt(*args[1], pos)); +} + +static void prim_bitOr(EvalState& state, const Pos& pos, Value** args, + Value& v) { + mkInt(v, state.forceInt(*args[0], pos) | state.forceInt(*args[1], pos)); +} + +static void prim_bitXor(EvalState& state, const Pos& pos, Value** args, + Value& v) { + mkInt(v, state.forceInt(*args[0], pos) ^ state.forceInt(*args[1], pos)); +} + +static void prim_lessThan(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceValue(*args[0]); + state.forceValue(*args[1]); + CompareValues comp; + mkBool(v, comp(args[0], args[1])); +} + +/************************************************************* + * String manipulation + *************************************************************/ + +/* Convert the argument to a string. Paths are *not* copied to the + store, so `toString /foo/bar' yields `"/foo/bar"', not + `"/nix/store/whatever..."'. */ +static void prim_toString(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + std::string s = state.coerceToString(pos, *args[0], context, true, false); + mkString(v, s, context); +} + +/* `substring start len str' returns the substring of `str' starting + at character position `min(start, stringLength str)' inclusive and + ending at `min(start + len, stringLength str)'. `start' must be + non-negative. */ +static void prim_substring(EvalState& state, const Pos& pos, Value** args, + Value& v) { + int start = state.forceInt(*args[0], pos); + int len = state.forceInt(*args[1], pos); + PathSet context; + std::string s = state.coerceToString(pos, *args[2], context); + + if (start < 0) { + throw EvalError(format("negative start position in 'substring', at %1%") % + pos); + } + + mkString(v, (unsigned int)start >= s.size() ? "" : string(s, start, len), + context); +} + +static void prim_stringLength(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + std::string s = state.coerceToString(pos, *args[0], context); + mkInt(v, s.size()); +} + +/* Return the cryptographic hash of a string in base-16. */ +static void prim_hashString(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string type = state.forceStringNoCtx(*args[0], pos); + HashType ht = parseHashType(type); + if (ht == htUnknown) { + throw Error(format("unknown hash type '%1%', at %2%") % type % pos); + } + + PathSet context; // discarded + std::string s = state.forceString(*args[1], context, pos); + + mkString(v, hashString(ht, s).to_string(Base16, false), context); +} + +/* Match a regular expression against a string and return either + โnullโ or a list containing substring matches. */ +static void prim_match(EvalState& state, const Pos& pos, Value** args, + Value& v) { + auto re = state.forceStringNoCtx(*args[0], pos); + + try { + std::regex regex(re, std::regex::extended); + + PathSet context; + const std::string str = state.forceString(*args[1], context, pos); + + std::smatch match; + if (!std::regex_match(str, match, regex)) { + mkNull(v); + return; + } + + // the first match is the whole string + const size_t len = match.size() - 1; + state.mkList(v, len); + for (size_t i = 0; i < len; ++i) { + if (!match[i + 1].matched) { + mkNull(*(v.listElems()[i] = state.allocValue())); + } else { + mkString(*(v.listElems()[i] = state.allocValue()), + match[i + 1].str().c_str()); + } + } + + } catch (std::regex_error& e) { + if (e.code() == std::regex_constants::error_space) { + // limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++ + throw EvalError("memory limit exceeded by regular expression '%s', at %s", + re, pos); + } + throw EvalError("invalid regular expression '%s', at %s", re, pos); + } +} + +/* Split a std::string with a regular expression, and return a list of the + non-matching parts interleaved by the lists of the matching groups. */ +static void prim_split(EvalState& state, const Pos& pos, Value** args, + Value& v) { + auto re = state.forceStringNoCtx(*args[0], pos); + + try { + std::regex regex(re, std::regex::extended); + + PathSet context; + const std::string str = state.forceString(*args[1], context, pos); + + auto begin = std::sregex_iterator(str.begin(), str.end(), regex); + auto end = std::sregex_iterator(); + + // Any matches results are surrounded by non-matching results. + const size_t len = std::distance(begin, end); + state.mkList(v, 2 * len + 1); + size_t idx = 0; + Value* elem; + + if (len == 0) { + v.listElems()[idx++] = args[1]; + return; + } + + for (std::sregex_iterator i = begin; i != end; ++i) { + assert(idx <= 2 * len + 1 - 3); + std::smatch match = *i; + + // Add a string for non-matched characters. + elem = v.listElems()[idx++] = state.allocValue(); + mkString(*elem, match.prefix().str().c_str()); + + // Add a list for matched substrings. + const size_t slen = match.size() - 1; + elem = v.listElems()[idx++] = state.allocValue(); + + // Start at 1, beacause the first match is the whole string. + state.mkList(*elem, slen); + for (size_t si = 0; si < slen; ++si) { + if (!match[si + 1].matched) { + mkNull(*(elem->listElems()[si] = state.allocValue())); + } else { + mkString(*(elem->listElems()[si] = state.allocValue()), + match[si + 1].str().c_str()); + } + } + + // Add a string for non-matched suffix characters. + if (idx == 2 * len) { + elem = v.listElems()[idx++] = state.allocValue(); + mkString(*elem, match.suffix().str().c_str()); + } + } + assert(idx == 2 * len + 1); + + } catch (std::regex_error& e) { + if (e.code() == std::regex_constants::error_space) { + // limit is _GLIBCXX_REGEX_STATE_LIMIT for libstdc++ + throw EvalError("memory limit exceeded by regular expression '%s', at %s", + re, pos); + } + throw EvalError("invalid regular expression '%s', at %s", re, pos); + } +} + +static void prim_concatStringSep(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + + auto sep = state.forceString(*args[0], context, pos); + state.forceList(*args[1], pos); + + std::string res; + res.reserve((args[1]->listSize() + 32) * sep.size()); + bool first = true; + + for (unsigned int n = 0; n < args[1]->listSize(); ++n) { + if (first) { + first = false; + } else { + res += sep; + } + + res += state.coerceToString(pos, *args[1]->listElems()[n], context); + } + + mkString(v, res, context); +} + +static void prim_replaceStrings(EvalState& state, const Pos& pos, Value** args, + Value& v) { + state.forceList(*args[0], pos); + state.forceList(*args[1], pos); + if (args[0]->listSize() != args[1]->listSize()) { + throw EvalError(format("'from' and 'to' arguments to 'replaceStrings' have " + "different lengths, at %1%") % + pos); + } + + vector<string> from; + from.reserve(args[0]->listSize()); + for (unsigned int n = 0; n < args[0]->listSize(); ++n) { + from.push_back(state.forceString(*args[0]->listElems()[n], pos)); + } + + vector<std::pair<string, PathSet>> to; + to.reserve(args[1]->listSize()); + for (unsigned int n = 0; n < args[1]->listSize(); ++n) { + PathSet ctx; + auto s = state.forceString(*args[1]->listElems()[n], ctx, pos); + to.emplace_back(std::move(s), std::move(ctx)); + } + + PathSet context; + auto s = state.forceString(*args[2], context, pos); + + std::string res; + // Loops one past last character to handle the case where 'from' contains an + // empty string. + for (size_t p = 0; p <= s.size();) { + bool found = false; + auto i = from.begin(); + auto j = to.begin(); + for (; i != from.end(); ++i, ++j) { + if (s.compare(p, i->size(), *i) == 0) { + found = true; + res += j->first; + if (i->empty()) { + if (p < s.size()) { + res += s[p]; + } + p++; + } else { + p += i->size(); + } + for (auto& path : j->second) { + context.insert(path); + } + j->second.clear(); + break; + } + } + if (!found) { + if (p < s.size()) { + res += s[p]; + } + p++; + } + } + + mkString(v, res, context); +} + +/************************************************************* + * Versions + *************************************************************/ + +static void prim_parseDrvName(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string name = state.forceStringNoCtx(*args[0], pos); + DrvName parsed(name); + state.mkAttrs(v, 2); + mkString(*state.allocAttr(v, state.sName), parsed.name); + mkString(*state.allocAttr(v, state.symbols.Create("version")), + parsed.version); +} + +static void prim_compareVersions(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string version1 = state.forceStringNoCtx(*args[0], pos); + std::string version2 = state.forceStringNoCtx(*args[1], pos); + mkInt(v, compareVersions(version1, version2)); +} + +static void prim_splitVersion(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string version = state.forceStringNoCtx(*args[0], pos); + auto iter = version.cbegin(); + Strings components; + while (iter != version.cend()) { + auto component = nextComponent(iter, version.cend()); + if (component.empty()) { + break; + } + components.emplace_back(std::move(component)); + } + state.mkList(v, components.size()); + unsigned int n = 0; + for (auto& component : components) { + auto listElem = v.listElems()[n++] = state.allocValue(); + mkString(*listElem, component); + } +} + +/************************************************************* + * Networking + *************************************************************/ + +void fetch(EvalState& state, const Pos& pos, Value** args, Value& v, + const std::string& who, bool unpack, + const std::string& defaultName) { + CachedDownloadRequest request(""); + request.unpack = unpack; + request.name = defaultName; + + state.forceValue(*args[0]); + + if (args[0]->type == tAttrs) { + state.forceAttrs(*args[0], pos); + + for (auto& attr : *args[0]->attrs) { + std::string n(attr.second.name); + if (n == "url") { + request.uri = + state.forceStringNoCtx(*attr.second.value, *attr.second.pos); + } else if (n == "sha256") { + request.expectedHash = + Hash(state.forceStringNoCtx(*attr.second.value, *attr.second.pos), + htSHA256); + } else if (n == "name") { + request.name = + state.forceStringNoCtx(*attr.second.value, *attr.second.pos); + } else { + throw EvalError(format("unsupported argument '%1%' to '%2%', at %3%") % + attr.second.name % who % attr.second.pos); + } + } + + if (request.uri.empty()) { + throw EvalError(format("'url' argument required, at %1%") % pos); + } + + } else { + request.uri = state.forceStringNoCtx(*args[0], pos); + } + + state.checkURI(request.uri); + + if (evalSettings.pureEval && !request.expectedHash) { + throw Error("in pure evaluation mode, '%s' requires a 'sha256' argument", + who); + } + + auto res = getDownloader()->downloadCached(state.store, request); + + if (state.allowedPaths) { + state.allowedPaths->insert(res.path); + } + + mkString(v, res.storePath, PathSet({res.storePath})); +} + +static void prim_fetchurl(EvalState& state, const Pos& pos, Value** args, + Value& v) { + fetch(state, pos, args, v, "fetchurl", false, ""); +} + +static void prim_fetchTarball(EvalState& state, const Pos& pos, Value** args, + Value& v) { + fetch(state, pos, args, v, "fetchTarball", true, "source"); +} + +/************************************************************* + * Primop registration + *************************************************************/ + +RegisterPrimOp::PrimOps* RegisterPrimOp::primOps; + +RegisterPrimOp::RegisterPrimOp(const std::string& name, size_t arity, + PrimOpFun fun) { + if (primOps == nullptr) { + primOps = new PrimOps; + } + primOps->emplace_back(name, arity, fun); +} + +void EvalState::createBaseEnv() { + baseEnv.up = nullptr; + + /* Add global constants such as `true' to the base environment. */ + Value v; + + /* `builtins' must be first! */ + mkAttrs(v, 128); + addConstant("builtins", v); + + mkBool(v, true); + addConstant("true", v); + + mkBool(v, false); + addConstant("false", v); + + mkNull(v); + addConstant("null", v); + + auto vThrow = addPrimOp("throw", 1, prim_throw); + + auto addPurityError = [&](const std::string& name) { + Value* v2 = allocValue(); + mkString(*v2, fmt("'%s' is not allowed in pure evaluation mode", name)); + mkApp(v, *vThrow, *v2); + addConstant(name, v); + }; + + if (!evalSettings.pureEval) { + mkInt(v, time(nullptr)); + addConstant("__currentTime", v); + } + + if (!evalSettings.pureEval) { + mkString(v, settings.thisSystem); + addConstant("__currentSystem", v); + } + + mkString(v, nixVersion); + addConstant("__nixVersion", v); + + mkString(v, store->storeDir); + addConstant("__storeDir", v); + + /* Language version. This should be increased every time a new + language feature gets added. It's not necessary to increase it + when primops get added, because you can just use `builtins ? + primOp' to check. */ + mkInt(v, 5); + addConstant("__langVersion", v); + + // Miscellaneous + auto vScopedImport = addPrimOp("scopedImport", 2, prim_scopedImport); + Value* v2 = allocValue(); + mkAttrs(*v2, 0); + mkApp(v, *vScopedImport, *v2); + forceValue(v); + addConstant("import", v); + if (evalSettings.enableNativeCode) { + addPrimOp("__importNative", 2, prim_importNative); + addPrimOp("__exec", 1, prim_exec); + } + addPrimOp("__typeOf", 1, prim_typeOf); + addPrimOp("isNull", 1, prim_isNull); + addPrimOp("__isFunction", 1, prim_isFunction); + addPrimOp("__isString", 1, prim_isString); + addPrimOp("__isInt", 1, prim_isInt); + addPrimOp("__isFloat", 1, prim_isFloat); + addPrimOp("__isBool", 1, prim_isBool); + addPrimOp("__isPath", 1, prim_isPath); + addPrimOp("__genericClosure", 1, prim_genericClosure); + addPrimOp("abort", 1, prim_abort); + addPrimOp("__addErrorContext", 2, prim_addErrorContext); + addPrimOp("__tryEval", 1, prim_tryEval); + addPrimOp("__getEnv", 1, prim_getEnv); + + // Strictness + addPrimOp("__seq", 2, prim_seq); + addPrimOp("__deepSeq", 2, prim_deepSeq); + + // Debugging + addPrimOp("__trace", 2, prim_trace); + addPrimOp("__valueSize", 1, prim_valueSize); + + // Paths + addPrimOp("__toPath", 1, prim_toPath); + if (evalSettings.pureEval) { + addPurityError("__storePath"); + } else { + addPrimOp("__storePath", 1, prim_storePath); + } + addPrimOp("__pathExists", 1, prim_pathExists); + addPrimOp("baseNameOf", 1, prim_baseNameOf); + addPrimOp("dirOf", 1, prim_dirOf); + addPrimOp("__readFile", 1, prim_readFile); + addPrimOp("__readDir", 1, prim_readDir); + addPrimOp("__findFile", 2, prim_findFile); + addPrimOp("__hashFile", 2, prim_hashFile); + + // Creating files + addPrimOp("__toXML", 1, prim_toXML); + addPrimOp("__toJSON", 1, prim_toJSON); + addPrimOp("__fromJSON", 1, prim_fromJSON); + addPrimOp("__toFile", 2, prim_toFile); + addPrimOp("__filterSource", 2, prim_filterSource); + addPrimOp("__path", 1, prim_path); + + // Sets + addPrimOp("__attrNames", 1, prim_attrNames); + addPrimOp("__attrValues", 1, prim_attrValues); + addPrimOp("__getAttr", 2, prim_getAttr); + addPrimOp("__unsafeGetAttrPos", 2, prim_unsafeGetAttrPos); + addPrimOp("__hasAttr", 2, prim_hasAttr); + addPrimOp("__isAttrs", 1, prim_isAttrs); + addPrimOp("removeAttrs", 2, prim_removeAttrs); + addPrimOp("__listToAttrs", 1, prim_listToAttrs); + addPrimOp("__intersectAttrs", 2, prim_intersectAttrs); + addPrimOp("__catAttrs", 2, prim_catAttrs); + addPrimOp("__functionArgs", 1, prim_functionArgs); + addPrimOp("__mapAttrs", 2, prim_mapAttrs); + + // Lists + addPrimOp("__isList", 1, prim_isList); + addPrimOp("__elemAt", 2, prim_elemAt); + addPrimOp("__head", 1, prim_head); + addPrimOp("__tail", 1, prim_tail); + addPrimOp("map", 2, prim_map); + addPrimOp("__filter", 2, prim_filter); + addPrimOp("__elem", 2, prim_elem); + addPrimOp("__concatLists", 1, prim_concatLists); + addPrimOp("__length", 1, prim_length); + addPrimOp("__foldl'", 3, prim_foldlStrict); + addPrimOp("__any", 2, prim_any); + addPrimOp("__all", 2, prim_all); + addPrimOp("__genList", 2, prim_genList); + addPrimOp("__sort", 2, prim_sort); + addPrimOp("__partition", 2, prim_partition); + addPrimOp("__concatMap", 2, prim_concatMap); + + // Integer arithmetic + addPrimOp("__add", 2, prim_add); + addPrimOp("__sub", 2, prim_sub); + addPrimOp("__mul", 2, prim_mul); + addPrimOp("__div", 2, prim_div); + addPrimOp("__bitAnd", 2, prim_bitAnd); + addPrimOp("__bitOr", 2, prim_bitOr); + addPrimOp("__bitXor", 2, prim_bitXor); + addPrimOp("__lessThan", 2, prim_lessThan); + + // String manipulation + addPrimOp("toString", 1, prim_toString); + addPrimOp("__substring", 3, prim_substring); + addPrimOp("__stringLength", 1, prim_stringLength); + addPrimOp("__hashString", 2, prim_hashString); + addPrimOp("__match", 2, prim_match); + addPrimOp("__split", 2, prim_split); + addPrimOp("__concatStringsSep", 2, prim_concatStringSep); + addPrimOp("__replaceStrings", 3, prim_replaceStrings); + + // Versions + addPrimOp("__parseDrvName", 1, prim_parseDrvName); + addPrimOp("__compareVersions", 2, prim_compareVersions); + addPrimOp("__splitVersion", 1, prim_splitVersion); + + // Derivations + addPrimOp("derivationStrict", 1, prim_derivationStrict); + addPrimOp("placeholder", 1, prim_placeholder); + + // Networking + addPrimOp("__fetchurl", 1, prim_fetchurl); + addPrimOp("fetchTarball", 1, prim_fetchTarball); + + /* Add a wrapper around the derivation primop that computes the + `drvPath' and `outPath' attributes lazily. */ + std::string path = + canonPath(settings.nixDataDir + "/nix/corepkgs/derivation.nix", true); + sDerivationNix = symbols.Create(path); + evalFile(path, v); + addConstant("derivation", v); + + /* Add a value containing the current Nix expression search path. */ + mkList(v, searchPath.size()); + int n = 0; + for (auto& i : searchPath) { + v2 = v.listElems()[n++] = allocValue(); + mkAttrs(*v2, 2); + mkString(*allocAttr(*v2, symbols.Create("path")), i.second); + mkString(*allocAttr(*v2, symbols.Create("prefix")), i.first); + } + addConstant("__nixPath", v); + + if (RegisterPrimOp::primOps != nullptr) { + for (auto& primOp : *RegisterPrimOp::primOps) { + addPrimOp(std::get<0>(primOp), std::get<1>(primOp), std::get<2>(primOp)); + } + } +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/primops.hh b/third_party/nix/src/libexpr/primops.hh new file mode 100644 index 000000000000..6abd0508a09b --- /dev/null +++ b/third_party/nix/src/libexpr/primops.hh @@ -0,0 +1,26 @@ +#include <tuple> +#include <vector> + +#include "eval.hh" + +namespace nix { + +struct RegisterPrimOp { + typedef std::vector<std::tuple<std::string, size_t, PrimOpFun>> PrimOps; + static PrimOps* primOps; + /* You can register a constant by passing an arity of 0. fun + will get called during EvalState initialization, so there + may be primops not yet added and builtins is not yet sorted. */ + RegisterPrimOp(const std::string& name, size_t arity, PrimOpFun fun); +}; + +/* These primops are disabled without enableNativeCode, but plugins + may wish to use them in limited contexts without globally enabling + them. */ +/* Load a ValueInitializer from a DSO and return whatever it initializes */ +void prim_importNative(EvalState& state, const Pos& pos, Value** args, + Value& v); +/* Execute a program and parse its output */ +void prim_exec(EvalState& state, const Pos& pos, Value** args, Value& v); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/primops/context.cc b/third_party/nix/src/libexpr/primops/context.cc new file mode 100644 index 000000000000..2ae8ba8aa99e --- /dev/null +++ b/third_party/nix/src/libexpr/primops/context.cc @@ -0,0 +1,199 @@ +#include "derivations.hh" +#include "eval-inline.hh" +#include "primops.hh" + +namespace nix { + +static void prim_unsafeDiscardStringContext(EvalState& state, const Pos& pos, + Value** args, Value& v) { + PathSet context; + std::string s = state.coerceToString(pos, *args[0], context); + mkString(v, s, PathSet()); +} + +static RegisterPrimOp r1("__unsafeDiscardStringContext", 1, + prim_unsafeDiscardStringContext); + +static void prim_hasContext(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + state.forceString(*args[0], context, pos); + mkBool(v, !context.empty()); +} + +static RegisterPrimOp r2("__hasContext", 1, prim_hasContext); + +/* Sometimes we want to pass a derivation path (i.e. pkg.drvPath) to a + builder without causing the derivation to be built (for instance, + in the derivation that builds NARs in nix-push, when doing + source-only deployment). This primop marks the string context so + that builtins.derivation adds the path to drv.inputSrcs rather than + drv.inputDrvs. */ +static void prim_unsafeDiscardOutputDependency(EvalState& state, const Pos& pos, + Value** args, Value& v) { + PathSet context; + std::string s = state.coerceToString(pos, *args[0], context); + + PathSet context2; + for (auto& p : context) { + context2.insert(p.at(0) == '=' ? string(p, 1) : p); + } + + mkString(v, s, context2); +} + +static RegisterPrimOp r3("__unsafeDiscardOutputDependency", 1, + prim_unsafeDiscardOutputDependency); + +/* Extract the context of a string as a structured Nix value. + + The context is represented as an attribute set whose keys are the + paths in the context set and whose values are attribute sets with + the following keys: + path: True if the relevant path is in the context as a plain store + path (i.e. the kind of context you get when interpolating + a Nix path (e.g. ./.) into a string). False if missing. + allOutputs: True if the relevant path is a derivation and it is + in the context as a drv file with all of its outputs + (i.e. the kind of context you get when referencing + .drvPath of some derivation). False if missing. + outputs: If a non-empty list, the relevant path is a derivation + and the provided outputs are referenced in the context + (i.e. the kind of context you get when referencing + .outPath of some derivation). Empty list if missing. + Note that for a given path any combination of the above attributes + may be present. +*/ +static void prim_getContext(EvalState& state, const Pos& pos, Value** args, + Value& v) { + struct ContextInfo { + bool path = false; + bool allOutputs = false; + Strings outputs; + }; + PathSet context; + state.forceString(*args[0], context, pos); + auto contextInfos = std::map<Path, ContextInfo>(); + for (const auto& p : context) { + Path drv; + std::string output; + const Path* path = &p; + if (p.at(0) == '=') { + drv = string(p, 1); + path = &drv; + } else if (p.at(0) == '!') { + std::pair<string, string> ctx = decodeContext(p); + drv = ctx.first; + output = ctx.second; + path = &drv; + } + auto isPath = drv.empty(); + auto isAllOutputs = (!drv.empty()) && output.empty(); + + auto iter = contextInfos.find(*path); + if (iter == contextInfos.end()) { + contextInfos.emplace( + *path, + ContextInfo{isPath, isAllOutputs, + output.empty() ? Strings{} : Strings{std::move(output)}}); + } else { + if (isPath) + iter->second.path = true; + else if (isAllOutputs) + iter->second.allOutputs = true; + else + iter->second.outputs.emplace_back(std::move(output)); + } + } + + state.mkAttrs(v, contextInfos.size()); + + auto sPath = state.symbols.Create("path"); + auto sAllOutputs = state.symbols.Create("allOutputs"); + for (const auto& info : contextInfos) { + auto& infoVal = *state.allocAttr(v, state.symbols.Create(info.first)); + state.mkAttrs(infoVal, 3); + if (info.second.path) { + mkBool(*state.allocAttr(infoVal, sPath), true); + } + if (info.second.allOutputs) + mkBool(*state.allocAttr(infoVal, sAllOutputs), true); + if (!info.second.outputs.empty()) { + auto& outputsVal = *state.allocAttr(infoVal, state.sOutputs); + state.mkList(outputsVal, info.second.outputs.size()); + size_t i = 0; + for (const auto& output : info.second.outputs) { + mkString(*(outputsVal.listElems()[i++] = state.allocValue()), output); + } + } + } +} + +static RegisterPrimOp r4("__getContext", 1, prim_getContext); + +/* Append the given context to a given string. + + See the commentary above unsafeGetContext for details of the + context representation. +*/ +static void prim_appendContext(EvalState& state, const Pos& pos, Value** args, + Value& v) { + PathSet context; + auto orig = state.forceString(*args[0], context, pos); + + state.forceAttrs(*args[1], pos); + + auto sPath = state.symbols.Create("path"); + auto sAllOutputs = state.symbols.Create("allOutputs"); + for (const auto& attr_iter : *args[1]->attrs) { + const Attr* i = &attr_iter.second; // TODO(tazjin): get rid of this + if (!state.store->isStorePath(i->name)) + throw EvalError("Context key '%s' is not a store path, at %s", i->name, + i->pos); + if (!settings.readOnlyMode) { + state.store->ensurePath(i->name); + } + state.forceAttrs(*i->value, *i->pos); + auto iter = i->value->attrs->find(sPath); + if (iter != i->value->attrs->end()) { + if (state.forceBool(*iter->second.value, *iter->second.pos)) { + context.insert(i->name); + } + } + + iter = i->value->attrs->find(sAllOutputs); + if (iter != i->value->attrs->end()) { + if (state.forceBool(*iter->second.value, *iter->second.pos)) { + if (!isDerivation(i->name)) { + throw EvalError( + "Tried to add all-outputs context of %s, which is not a " + "derivation, to a string, at %s", + i->name, i->pos); + } + context.insert("=" + string(i->name)); + } + } + + iter = i->value->attrs->find(state.sOutputs); + if (iter != i->value->attrs->end()) { + state.forceList(*iter->second.value, *iter->second.pos); + if (iter->second.value->listSize() && !isDerivation(i->name)) { + throw EvalError( + "Tried to add derivation output context of %s, which is not a " + "derivation, to a string, at %s", + i->name, i->pos); + } + for (unsigned int n = 0; n < iter->second.value->listSize(); ++n) { + auto name = state.forceStringNoCtx(*iter->second.value->listElems()[n], + *iter->second.pos); + context.insert("!" + name + "!" + string(i->name)); + } + } + } + + mkString(v, orig, context); +} + +static RegisterPrimOp r5("__appendContext", 2, prim_appendContext); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/primops/fetchGit.cc b/third_party/nix/src/libexpr/primops/fetchGit.cc new file mode 100644 index 000000000000..67641258d518 --- /dev/null +++ b/third_party/nix/src/libexpr/primops/fetchGit.cc @@ -0,0 +1,263 @@ +#include <nlohmann/json.hpp> +#include <regex> + +#include <glog/logging.h> +#include <sys/time.h> + +#include "download.hh" +#include "eval-inline.hh" +#include "hash.hh" +#include "pathlocks.hh" +#include "primops.hh" +#include "store-api.hh" + +using namespace std::string_literals; + +namespace nix { + +struct GitInfo { + Path storePath; + std::string rev; + std::string shortRev; + uint64_t revCount = 0; +}; + +std::regex revRegex("^[0-9a-fA-F]{40}$"); + +GitInfo exportGit(ref<Store> store, const std::string& uri, + std::optional<std::string> ref, std::string rev, + const std::string& name) { + if (evalSettings.pureEval && rev == "") + throw Error("in pure evaluation mode, 'fetchGit' requires a Git revision"); + + if (!ref && rev == "" && hasPrefix(uri, "/") && pathExists(uri + "/.git")) { + bool clean = true; + + try { + runProgram("git", true, + {"-C", uri, "diff-index", "--quiet", "HEAD", "--"}); + } catch (ExecError& e) { + if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) { + throw; + } + clean = false; + } + + if (!clean) { + /* This is an unclean working tree. So copy all tracked + files. */ + + GitInfo gitInfo; + gitInfo.rev = "0000000000000000000000000000000000000000"; + gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("git", true, {"-C", uri, "ls-files", "-z"}), "\0"s); + + PathFilter filter = [&](const Path& p) -> bool { + assert(hasPrefix(p, uri)); + std::string file(p, uri.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + gitInfo.storePath = + store->addToStore("source", uri, true, htSHA256, filter); + + return gitInfo; + } + + // clean working tree, but no ref or rev specified. Use 'HEAD'. + rev = chomp(runProgram("git", true, {"-C", uri, "rev-parse", "HEAD"})); + ref = "HEAD"s; + } + + if (!ref) { + ref = "HEAD"s; + } + + if (rev != "" && !std::regex_match(rev, revRegex)) + throw Error("invalid Git revision '%s'", rev); + + deletePath(getCacheDir() + "/nix/git"); + + Path cacheDir = getCacheDir() + "/nix/gitv2/" + + hashString(htSHA256, uri).to_string(Base32, false); + + if (!pathExists(cacheDir)) { + createDirs(dirOf(cacheDir)); + runProgram("git", true, {"init", "--bare", cacheDir}); + } + + Path localRefFile; + if (ref->compare(0, 5, "refs/") == 0) + localRefFile = cacheDir + "/" + *ref; + else + localRefFile = cacheDir + "/refs/heads/" + *ref; + + bool doFetch; + time_t now = time(0); + /* If a rev was specified, we need to fetch if it's not in the + repo. */ + if (rev != "") { + try { + runProgram("git", true, {"-C", cacheDir, "cat-file", "-e", rev}); + doFetch = false; + } catch (ExecError& e) { + if (WIFEXITED(e.status)) { + doFetch = true; + } else { + throw; + } + } + } else { + /* If the local ref is older than โtarball-ttlโ seconds, do a + git fetch to update the local ref to the remote ref. */ + struct stat st; + doFetch = stat(localRefFile.c_str(), &st) != 0 || + (uint64_t)st.st_mtime + settings.tarballTtl <= (uint64_t)now; + } + if (doFetch) { + DLOG(INFO) << "fetching Git repository '" << uri << "'"; + + // FIXME: git stderr messes up our progress indicator, so + // we're using --quiet for now. Should process its stderr. + runProgram("git", true, + {"-C", cacheDir, "fetch", "--quiet", "--force", "--", uri, + fmt("%s:%s", *ref, *ref)}); + + struct timeval times[2]; + times[0].tv_sec = now; + times[0].tv_usec = 0; + times[1].tv_sec = now; + times[1].tv_usec = 0; + + utimes(localRefFile.c_str(), times); + } + + // FIXME: check whether rev is an ancestor of ref. + GitInfo gitInfo; + gitInfo.rev = rev != "" ? rev : chomp(readFile(localRefFile)); + gitInfo.shortRev = std::string(gitInfo.rev, 0, 7); + + DLOG(INFO) << "using revision " << gitInfo.rev << " of repo '" << uri << "'"; + + std::string storeLinkName = + hashString(htSHA512, name + std::string("\0"s) + gitInfo.rev) + .to_string(Base32, false); + Path storeLink = cacheDir + "/" + storeLinkName + ".link"; + PathLocks storeLinkLock({storeLink}, fmt("waiting for lock on '%1%'...", + storeLink)); // FIXME: broken + + try { + auto json = nlohmann::json::parse(readFile(storeLink)); + + assert(json["name"] == name && json["rev"] == gitInfo.rev); + + gitInfo.storePath = json["storePath"]; + + if (store->isValidPath(gitInfo.storePath)) { + gitInfo.revCount = json["revCount"]; + return gitInfo; + } + + } catch (SysError& e) { + if (e.errNo != ENOENT) { + throw; + } + } + + // FIXME: should pipe this, or find some better way to extract a + // revision. + auto tar = runProgram("git", true, {"-C", cacheDir, "archive", gitInfo.rev}); + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + runProgram("tar", true, {"x", "-C", tmpDir}, tar); + + gitInfo.storePath = store->addToStore(name, tmpDir); + + gitInfo.revCount = std::stoull(runProgram( + "git", true, {"-C", cacheDir, "rev-list", "--count", gitInfo.rev})); + + nlohmann::json json; + json["storePath"] = gitInfo.storePath; + json["uri"] = uri; + json["name"] = name; + json["rev"] = gitInfo.rev; + json["revCount"] = gitInfo.revCount; + + writeFile(storeLink, json.dump()); + + return gitInfo; +} + +static void prim_fetchGit(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string url; + std::optional<std::string> ref; + std::string rev; + std::string name = "source"; + PathSet context; + + state.forceValue(*args[0]); + + if (args[0]->type == tAttrs) { + state.forceAttrs(*args[0], pos); + + for (auto& attr_iter : *args[0]->attrs) { + auto& attr = attr_iter.second; + std::string n(attr.name); + if (n == "url") + url = + state.coerceToString(*attr.pos, *attr.value, context, false, false); + else if (n == "ref") + ref = state.forceStringNoCtx(*attr.value, *attr.pos); + else if (n == "rev") + rev = state.forceStringNoCtx(*attr.value, *attr.pos); + else if (n == "name") + name = state.forceStringNoCtx(*attr.value, *attr.pos); + else + throw EvalError("unsupported argument '%s' to 'fetchGit', at %s", + attr.name, *attr.pos); + } + + if (url.empty()) + throw EvalError(format("'url' argument required, at %1%") % pos); + + } else { + url = state.coerceToString(pos, *args[0], context, false, false); + } + + // FIXME: git externals probably can be used to bypass the URI + // whitelist. Ah well. + state.checkURI(url); + + auto gitInfo = exportGit(state.store, url, ref, rev, name); + + state.mkAttrs(v, 8); + mkString(*state.allocAttr(v, state.sOutPath), gitInfo.storePath, + PathSet({gitInfo.storePath})); + mkString(*state.allocAttr(v, state.symbols.Create("rev")), gitInfo.rev); + mkString(*state.allocAttr(v, state.symbols.Create("shortRev")), + gitInfo.shortRev); + mkInt(*state.allocAttr(v, state.symbols.Create("revCount")), + gitInfo.revCount); + + if (state.allowedPaths) { + state.allowedPaths->insert(state.store->toRealPath(gitInfo.storePath)); + } +} + +static RegisterPrimOp r("fetchGit", 1, prim_fetchGit); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/primops/fetchMercurial.cc b/third_party/nix/src/libexpr/primops/fetchMercurial.cc new file mode 100644 index 000000000000..9223f1c3ca99 --- /dev/null +++ b/third_party/nix/src/libexpr/primops/fetchMercurial.cc @@ -0,0 +1,237 @@ +#include <nlohmann/json.hpp> +#include <regex> + +#include <glog/logging.h> +#include <sys/time.h> + +#include "download.hh" +#include "eval-inline.hh" +#include "pathlocks.hh" +#include "primops.hh" +#include "store-api.hh" + +using namespace std::string_literals; + +namespace nix { + +struct HgInfo { + Path storePath; + std::string branch; + std::string rev; + uint64_t revCount = 0; +}; + +std::regex commitHashRegex("^[0-9a-fA-F]{40}$"); + +HgInfo exportMercurial(ref<Store> store, const std::string& uri, + std::string rev, const std::string& name) { + if (evalSettings.pureEval && rev == "") + throw Error( + "in pure evaluation mode, 'fetchMercurial' requires a Mercurial " + "revision"); + + if (rev == "" && hasPrefix(uri, "/") && pathExists(uri + "/.hg")) { + bool clean = runProgram("hg", true, + {"status", "-R", uri, "--modified", "--added", + "--removed"}) == ""; + + if (!clean) { + /* This is an unclean working tree. So copy all tracked + files. */ + + DLOG(INFO) << "copying unclean Mercurial working tree '" << uri << "'"; + + HgInfo hgInfo; + hgInfo.rev = "0000000000000000000000000000000000000000"; + hgInfo.branch = chomp(runProgram("hg", true, {"branch", "-R", uri})); + + auto files = tokenizeString<std::set<std::string>>( + runProgram("hg", true, + {"status", "-R", uri, "--clean", "--modified", "--added", + "--no-status", "--print0"}), + "\0"s); + + PathFilter filter = [&](const Path& p) -> bool { + assert(hasPrefix(p, uri)); + std::string file(p, uri.size() + 1); + + auto st = lstat(p); + + if (S_ISDIR(st.st_mode)) { + auto prefix = file + "/"; + auto i = files.lower_bound(prefix); + return i != files.end() && hasPrefix(*i, prefix); + } + + return files.count(file); + }; + + hgInfo.storePath = + store->addToStore("source", uri, true, htSHA256, filter); + + return hgInfo; + } + } + + if (rev == "") { + rev = "default"; + } + + Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), + hashString(htSHA256, uri).to_string(Base32, false)); + + Path stampFile = fmt("%s/.hg/%s.stamp", cacheDir, + hashString(htSHA512, rev).to_string(Base32, false)); + + /* If we haven't pulled this repo less than โtarball-ttlโ seconds, + do so now. */ + time_t now = time(0); + struct stat st; + if (stat(stampFile.c_str(), &st) != 0 || + (uint64_t)st.st_mtime + settings.tarballTtl <= (uint64_t)now) { + /* Except that if this is a commit hash that we already have, + we don't have to pull again. */ + if (!(std::regex_match(rev, commitHashRegex) && pathExists(cacheDir) && + runProgram(RunOptions("hg", {"log", "-R", cacheDir, "-r", rev, + "--template", "1"}) + .killStderr(true)) + .second == "1")) { + DLOG(INFO) << "fetching Mercurial repository '" << uri << "'"; + + if (pathExists(cacheDir)) { + try { + runProgram("hg", true, {"pull", "-R", cacheDir, "--", uri}); + } catch (ExecError& e) { + std::string transJournal = cacheDir + "/.hg/store/journal"; + /* hg throws "abandoned transaction" error only if this file exists */ + if (pathExists(transJournal)) { + runProgram("hg", true, {"recover", "-R", cacheDir}); + runProgram("hg", true, {"pull", "-R", cacheDir, "--", uri}); + } else { + throw ExecError(e.status, + fmt("'hg pull' %s", statusToString(e.status))); + } + } + } else { + createDirs(dirOf(cacheDir)); + runProgram("hg", true, {"clone", "--noupdate", "--", uri, cacheDir}); + } + } + + writeFile(stampFile, ""); + } + + auto tokens = tokenizeString<std::vector<std::string>>( + runProgram("hg", true, + {"log", "-R", cacheDir, "-r", rev, "--template", + "{node} {rev} {branch}"})); + assert(tokens.size() == 3); + + HgInfo hgInfo; + hgInfo.rev = tokens[0]; + hgInfo.revCount = std::stoull(tokens[1]); + hgInfo.branch = tokens[2]; + + std::string storeLinkName = + hashString(htSHA512, name + std::string("\0"s) + hgInfo.rev) + .to_string(Base32, false); + Path storeLink = fmt("%s/.hg/%s.link", cacheDir, storeLinkName); + + try { + auto json = nlohmann::json::parse(readFile(storeLink)); + + assert(json["name"] == name && json["rev"] == hgInfo.rev); + + hgInfo.storePath = json["storePath"]; + + if (store->isValidPath(hgInfo.storePath)) { + DLOG(INFO) << "using cached Mercurial store path '" << hgInfo.storePath + << "'"; + return hgInfo; + } + + } catch (SysError& e) { + if (e.errNo != ENOENT) { + throw; + } + } + + Path tmpDir = createTempDir(); + AutoDelete delTmpDir(tmpDir, true); + + runProgram("hg", true, {"archive", "-R", cacheDir, "-r", rev, tmpDir}); + + deletePath(tmpDir + "/.hg_archival.txt"); + + hgInfo.storePath = store->addToStore(name, tmpDir); + + nlohmann::json json; + json["storePath"] = hgInfo.storePath; + json["uri"] = uri; + json["name"] = name; + json["branch"] = hgInfo.branch; + json["rev"] = hgInfo.rev; + json["revCount"] = hgInfo.revCount; + + writeFile(storeLink, json.dump()); + + return hgInfo; +} + +static void prim_fetchMercurial(EvalState& state, const Pos& pos, Value** args, + Value& v) { + std::string url; + std::string rev; + std::string name = "source"; + PathSet context; + + state.forceValue(*args[0]); + + if (args[0]->type == tAttrs) { + state.forceAttrs(*args[0], pos); + + for (auto& attr_iter : *args[0]->attrs) { + auto& attr = attr_iter.second; + std::string n(attr.name); + if (n == "url") + url = + state.coerceToString(*attr.pos, *attr.value, context, false, false); + else if (n == "rev") + rev = state.forceStringNoCtx(*attr.value, *attr.pos); + else if (n == "name") + name = state.forceStringNoCtx(*attr.value, *attr.pos); + else + throw EvalError("unsupported argument '%s' to 'fetchMercurial', at %s", + attr.name, *attr.pos); + } + + if (url.empty()) + throw EvalError(format("'url' argument required, at %1%") % pos); + + } else { + url = state.coerceToString(pos, *args[0], context, false, false); + } + + // FIXME: git externals probably can be used to bypass the URI + // whitelist. Ah well. + state.checkURI(url); + + auto hgInfo = exportMercurial(state.store, url, rev, name); + + state.mkAttrs(v, 8); + mkString(*state.allocAttr(v, state.sOutPath), hgInfo.storePath, + PathSet({hgInfo.storePath})); + mkString(*state.allocAttr(v, state.symbols.Create("branch")), hgInfo.branch); + mkString(*state.allocAttr(v, state.symbols.Create("rev")), hgInfo.rev); + mkString(*state.allocAttr(v, state.symbols.Create("shortRev")), + std::string(hgInfo.rev, 0, 12)); + mkInt(*state.allocAttr(v, state.symbols.Create("revCount")), hgInfo.revCount); + + if (state.allowedPaths) { + state.allowedPaths->insert(state.store->toRealPath(hgInfo.storePath)); + } +} + +static RegisterPrimOp r("fetchMercurial", 1, prim_fetchMercurial); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/primops/fromTOML.cc b/third_party/nix/src/libexpr/primops/fromTOML.cc new file mode 100644 index 000000000000..cc7b3cfcc33e --- /dev/null +++ b/third_party/nix/src/libexpr/primops/fromTOML.cc @@ -0,0 +1,88 @@ +#include "cpptoml/cpptoml.h" +#include "eval-inline.hh" +#include "primops.hh" + +namespace nix { + +static void prim_fromTOML(EvalState& state, const Pos& pos, Value** args, + Value& v) { + using namespace cpptoml; + + auto toml = state.forceStringNoCtx(*args[0], pos); + + std::istringstream tomlStream(toml); + + std::function<void(Value&, std::shared_ptr<base>)> visit; + + visit = [&](Value& v, std::shared_ptr<base> t) { + if (auto t2 = t->as_table()) { + size_t size = 0; + for (auto& i : *t2) { + (void)i; + size++; + } + + state.mkAttrs(v, size); + + for (auto& i : *t2) { + auto& v2 = *state.allocAttr(v, state.symbols.Create(i.first)); + + if (auto i2 = i.second->as_table_array()) { + size_t size2 = i2->get().size(); + state.mkList(v2, size2); + for (size_t j = 0; j < size2; ++j) + visit(*(v2.listElems()[j] = state.allocValue()), i2->get()[j]); + } else + visit(v2, i.second); + } + } + + else if (auto t2 = t->as_array()) { + size_t size = t2->get().size(); + + state.mkList(v, size); + + for (size_t i = 0; i < size; ++i) + visit(*(v.listElems()[i] = state.allocValue()), t2->get()[i]); + } + + // Handle cases like 'a = [[{ a = true }]]', which IMHO should be + // parsed as a array containing an array containing a table, + // but instead are parsed as an array containing a table array + // containing a table. + else if (auto t2 = t->as_table_array()) { + size_t size = t2->get().size(); + + state.mkList(v, size); + + for (size_t j = 0; j < size; ++j) + visit(*(v.listElems()[j] = state.allocValue()), t2->get()[j]); + } + + else if (t->is_value()) { + if (auto val = t->as<int64_t>()) + mkInt(v, val->get()); + else if (auto val = t->as<NixFloat>()) + mkFloat(v, val->get()); + else if (auto val = t->as<bool>()) + mkBool(v, val->get()); + else if (auto val = t->as<std::string>()) + mkString(v, val->get()); + else + throw EvalError("unsupported value type in TOML"); + } + + else + abort(); + }; + + try { + visit(v, parser(tomlStream).parse()); + } catch (std::runtime_error& e) { + throw EvalError("while parsing a TOML string at %s: %s", pos, e.what()); + } +} + +static RegisterPrimOp r("fromTOML", 1, prim_fromTOML); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/symbol-table.cc b/third_party/nix/src/libexpr/symbol-table.cc new file mode 100644 index 000000000000..6abaedc5173c --- /dev/null +++ b/third_party/nix/src/libexpr/symbol-table.cc @@ -0,0 +1,24 @@ +#include "symbol-table.hh" + +#include <absl/container/node_hash_set.h> +#include <absl/strings/string_view.h> + +namespace nix { + +Symbol SymbolTable::Create(absl::string_view sym) { + auto it = symbols_.emplace(sym); + const std::string* ptr = &(*it.first); + return Symbol(ptr); +} + +size_t SymbolTable::Size() const { return symbols_.size(); } + +size_t SymbolTable::TotalSize() const { + size_t n = 0; + for (auto& i : symbols_) { + n += i.size(); + } + return n; +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/symbol-table.hh b/third_party/nix/src/libexpr/symbol-table.hh new file mode 100644 index 000000000000..df696c9e1ac8 --- /dev/null +++ b/third_party/nix/src/libexpr/symbol-table.hh @@ -0,0 +1,66 @@ +#pragma once + +#include <absl/container/node_hash_set.h> +#include <absl/strings/string_view.h> + +namespace nix { // TODO(tazjin): ::expr + +// TODO(tazjin): Replace with a simpler struct, or get rid of. +class Symbol { + private: + const std::string* s; // pointer into SymbolTable + Symbol(const std::string* s) : s(s){}; + friend class SymbolTable; + + public: + Symbol() : s(0){}; + + bool operator==(const Symbol& s2) const { return s == s2.s; } + + bool operator!=(const Symbol& s2) const { return s != s2.s; } + + bool operator<(const Symbol& s2) const { return s < s2.s; } + + operator const std::string&() const { return *s; } + + bool set() const { return s; } + + bool empty() const { return s->empty(); } + + friend std::ostream& operator<<(std::ostream& str, const Symbol& sym); +}; + +// SymbolTable is a hash-set based symbol-interning mechanism. +// +// TODO(tazjin): Figure out which things use this. AttrSets, ...? +// Is it possible this only exists because AttrSet wasn't a map? +// +// Original comment: +// +// Symbol table used by the parser and evaluator to represent and look +// up identifiers and attributes efficiently. SymbolTable::create() +// converts a string into a symbol. Symbols have the property that +// they can be compared efficiently (using a pointer equality test), +// because the symbol table stores only one copy of each string. +class SymbolTable { + public: + // Create a new symbol in this table by emplacing the provided + // string into it. + // + // The symbol will reference an existing symbol if the symbol is + // already interned. + Symbol Create(absl::string_view sym); + + // Return the number of symbols interned. + size_t Size() const; + + // Return the total size (in bytes) + size_t TotalSize() const; + + private: + // flat_hash_set does not retain pointer stability on rehashing, + // hence "interned" strings/symbols are stored on the heap. + absl::node_hash_set<std::string> symbols_; +}; + +} // namespace nix diff --git a/third_party/nix/src/libexpr/value-to-json.cc b/third_party/nix/src/libexpr/value-to-json.cc new file mode 100644 index 000000000000..0b641a41b5ac --- /dev/null +++ b/third_party/nix/src/libexpr/value-to-json.cc @@ -0,0 +1,104 @@ +#include "value-to-json.hh" + +#include <cstdlib> +#include <iomanip> + +#include "eval-inline.hh" +#include "json.hh" +#include "util.hh" + +namespace nix { + +void printValueAsJSON(EvalState& state, bool strict, Value& v, + JSONPlaceholder& out, PathSet& context) { + checkInterrupt(); + + if (strict) { + state.forceValue(v); + } + + switch (v.type) { + case tInt: + out.write(v.integer); + break; + + case tBool: + out.write(v.boolean); + break; + + case tString: + copyContext(v, context); + out.write(v.string.s); + break; + + case tPath: + out.write(state.copyPathToStore(context, v.path)); + break; + + case tNull: + out.write(nullptr); + break; + + case tAttrs: { + auto maybeString = + state.tryAttrsToString(noPos, v, context, false, false); + if (maybeString) { + out.write(*maybeString); + break; + } + auto i = v.attrs->find(state.sOutPath); + if (i == v.attrs->end()) { + auto obj(out.object()); + StringSet names; + for (auto& j : *v.attrs) { + names.insert(j.second.name); + } + for (auto& j : names) { + auto [_, a] = *v.attrs->find(state.symbols.Create(j)); + auto placeholder(obj.placeholder(j)); + printValueAsJSON(state, strict, *a.value, placeholder, context); + } + } else { + printValueAsJSON(state, strict, *i->second.value, out, context); + } + break; + } + + case tList1: + case tList2: + case tListN: { + auto list(out.list()); + for (unsigned int n = 0; n < v.listSize(); ++n) { + auto placeholder(list.placeholder()); + printValueAsJSON(state, strict, *v.listElems()[n], placeholder, + context); + } + break; + } + + case tExternal: + v.external->printValueAsJSON(state, strict, out, context); + break; + + case tFloat: + out.write(v.fpoint); + break; + + default: + throw TypeError(format("cannot convert %1% to JSON") % showType(v)); + } +} + +void printValueAsJSON(EvalState& state, bool strict, Value& v, + std::ostream& str, PathSet& context) { + JSONPlaceholder out(str); + printValueAsJSON(state, strict, v, out, context); +} + +void ExternalValueBase::printValueAsJSON(EvalState& state, bool strict, + JSONPlaceholder& out, + PathSet& context) const { + throw TypeError(format("cannot convert %1% to JSON") % showType()); +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/value-to-json.hh b/third_party/nix/src/libexpr/value-to-json.hh new file mode 100644 index 000000000000..6ec36c829d37 --- /dev/null +++ b/third_party/nix/src/libexpr/value-to-json.hh @@ -0,0 +1,19 @@ +#pragma once + +#include <map> +#include <string> + +#include "eval.hh" +#include "nixexpr.hh" + +namespace nix { + +class JSONPlaceholder; + +void printValueAsJSON(EvalState& state, bool strict, Value& v, + JSONPlaceholder& out, PathSet& context); + +void printValueAsJSON(EvalState& state, bool strict, Value& v, + std::ostream& str, PathSet& context); + +} // namespace nix diff --git a/third_party/nix/src/libexpr/value-to-xml.cc b/third_party/nix/src/libexpr/value-to-xml.cc new file mode 100644 index 000000000000..149bf764f0f9 --- /dev/null +++ b/third_party/nix/src/libexpr/value-to-xml.cc @@ -0,0 +1,198 @@ +#include "value-to-xml.hh" + +#include <cstdlib> + +#include "eval-inline.hh" +#include "util.hh" +#include "xml-writer.hh" + +namespace nix { + +static XMLAttrs singletonAttrs(const std::string& name, + const std::string& value) { + XMLAttrs attrs; + attrs[name] = value; + return attrs; +} + +static void printValueAsXML(EvalState& state, bool strict, bool location, + Value& v, XMLWriter& doc, PathSet& context, + PathSet& drvsSeen); + +static void posToXML(XMLAttrs& xmlAttrs, const Pos& pos) { + xmlAttrs["path"] = pos.file; + xmlAttrs["line"] = (format("%1%") % pos.line).str(); + xmlAttrs["column"] = (format("%1%") % pos.column).str(); +} + +static void showAttrs(EvalState& state, bool strict, bool location, + Bindings& attrs, XMLWriter& doc, PathSet& context, + PathSet& drvsSeen) { + StringSet names; + + for (auto& i : attrs) { + names.insert(i.second.name); + } + + for (auto& i : names) { + auto& [_, a] = *attrs.find(state.symbols.Create(i)); + + XMLAttrs xmlAttrs; + xmlAttrs["name"] = i; + if (location && a.pos != &noPos) { + posToXML(xmlAttrs, *a.pos); + } + + XMLOpenElement elem(doc, "attr", xmlAttrs); + printValueAsXML(state, strict, location, *a.value, doc, context, drvsSeen); + } +} + +static void printValueAsXML(EvalState& state, bool strict, bool location, + Value& v, XMLWriter& doc, PathSet& context, + PathSet& drvsSeen) { + checkInterrupt(); + + if (strict) { + state.forceValue(v); + } + + switch (v.type) { + case tInt: + doc.writeEmptyElement( + "int", singletonAttrs("value", (format("%1%") % v.integer).str())); + break; + + case tBool: + doc.writeEmptyElement( + "bool", singletonAttrs("value", v.boolean ? "true" : "false")); + break; + + case tString: + /* !!! show the context? */ + copyContext(v, context); + doc.writeEmptyElement("string", singletonAttrs("value", v.string.s)); + break; + + case tPath: + doc.writeEmptyElement("path", singletonAttrs("value", v.path)); + break; + + case tNull: + doc.writeEmptyElement("null"); + break; + + case tAttrs: + if (state.isDerivation(v)) { + XMLAttrs xmlAttrs; + + Bindings::iterator a = + v.attrs->find(state.symbols.Create("derivation")); + + Path drvPath; + a = v.attrs->find(state.sDrvPath); + if (a != v.attrs->end()) { + if (strict) { + state.forceValue(*a->second.value); + } + if (a->second.value->type == tString) { + xmlAttrs["drvPath"] = drvPath = a->second.value->string.s; + } + } + + a = v.attrs->find(state.sOutPath); + if (a != v.attrs->end()) { + if (strict) { + state.forceValue(*a->second.value); + } + if (a->second.value->type == tString) { + xmlAttrs["outPath"] = a->second.value->string.s; + } + } + + XMLOpenElement _(doc, "derivation", xmlAttrs); + + if (!drvPath.empty() && drvsSeen.find(drvPath) == drvsSeen.end()) { + drvsSeen.insert(drvPath); + showAttrs(state, strict, location, *v.attrs, doc, context, drvsSeen); + } else { + doc.writeEmptyElement("repeated"); + } + } + + else { + XMLOpenElement _(doc, "attrs"); + showAttrs(state, strict, location, *v.attrs, doc, context, drvsSeen); + } + + break; + + case tList1: + case tList2: + case tListN: { + XMLOpenElement _(doc, "list"); + for (unsigned int n = 0; n < v.listSize(); ++n) { + printValueAsXML(state, strict, location, *v.listElems()[n], doc, + context, drvsSeen); + } + break; + } + + case tLambda: { + XMLAttrs xmlAttrs; + if (location) { + posToXML(xmlAttrs, v.lambda.fun->pos); + } + XMLOpenElement _(doc, "function", xmlAttrs); + + if (v.lambda.fun->matchAttrs) { + XMLAttrs attrs; + if (!v.lambda.fun->arg.empty()) { + attrs["name"] = v.lambda.fun->arg; + } + if (v.lambda.fun->formals->ellipsis) { + attrs["ellipsis"] = "1"; + } + XMLOpenElement _(doc, "attrspat", attrs); + for (auto& i : v.lambda.fun->formals->formals) { + doc.writeEmptyElement("attr", singletonAttrs("name", i.name)); + } + } else { + doc.writeEmptyElement("varpat", + singletonAttrs("name", v.lambda.fun->arg)); + } + + break; + } + + case tExternal: + v.external->printValueAsXML(state, strict, location, doc, context, + drvsSeen); + break; + + case tFloat: + doc.writeEmptyElement( + "float", singletonAttrs("value", (format("%1%") % v.fpoint).str())); + break; + + default: + doc.writeEmptyElement("unevaluated"); + } +} + +void ExternalValueBase::printValueAsXML(EvalState& state, bool strict, + bool location, XMLWriter& doc, + PathSet& context, + PathSet& drvsSeen) const { + doc.writeEmptyElement("unevaluated"); +} + +void printValueAsXML(EvalState& state, bool strict, bool location, Value& v, + std::ostream& out, PathSet& context) { + XMLWriter doc(true, out); + XMLOpenElement root(doc, "expr"); + PathSet drvsSeen; + printValueAsXML(state, strict, location, v, doc, context, drvsSeen); +} + +} // namespace nix diff --git a/third_party/nix/src/libexpr/value-to-xml.hh b/third_party/nix/src/libexpr/value-to-xml.hh new file mode 100644 index 000000000000..e7749dd7ae30 --- /dev/null +++ b/third_party/nix/src/libexpr/value-to-xml.hh @@ -0,0 +1,14 @@ +#pragma once + +#include <map> +#include <string> + +#include "eval.hh" +#include "nixexpr.hh" + +namespace nix { + +void printValueAsXML(EvalState& state, bool strict, bool location, Value& v, + std::ostream& out, PathSet& context); + +} diff --git a/third_party/nix/src/libexpr/value.hh b/third_party/nix/src/libexpr/value.hh new file mode 100644 index 000000000000..892c11220e42 --- /dev/null +++ b/third_party/nix/src/libexpr/value.hh @@ -0,0 +1,257 @@ +#pragma once + +#include "symbol-table.hh" +#include "types.hh" + +#if HAVE_BOEHMGC +#include <gc/gc_allocator.h> +#endif + +namespace nix { + +typedef enum { + tInt = 1, + tBool, + tString, + tPath, + tNull, + tAttrs, + tList1, + tList2, + tListN, + tThunk, + tApp, + tLambda, + tBlackhole, + tPrimOp, + tPrimOpApp, + tExternal, + tFloat +} ValueType; + +class Bindings; +struct Env; +struct Expr; +struct ExprLambda; +struct PrimOp; +struct PrimOp; +class Symbol; +struct Pos; +class EvalState; +class XMLWriter; +class JSONPlaceholder; + +typedef int64_t NixInt; +typedef double NixFloat; + +/* External values must descend from ExternalValueBase, so that + * type-agnostic nix functions (e.g. showType) can be implemented + */ +class ExternalValueBase { + friend std::ostream& operator<<(std::ostream& str, + const ExternalValueBase& v); + + protected: + /* Print out the value */ + virtual std::ostream& print(std::ostream& str) const = 0; + + public: + /* Return a simple string describing the type */ + virtual std::string showType() const = 0; + + /* Return a string to be used in builtins.typeOf */ + virtual std::string typeOf() const = 0; + + /* How much space does this value take up */ + virtual size_t valueSize(std::set<const void*>& seen) const = 0; + + /* Coerce the value to a string. Defaults to uncoercable, i.e. throws an + * error + */ + virtual std::string coerceToString(const Pos& pos, PathSet& context, + bool copyMore, bool copyToStore) const; + + /* Compare to another value of the same type. Defaults to uncomparable, + * i.e. always false. + */ + virtual bool operator==(const ExternalValueBase& b) const; + + /* Print the value as JSON. Defaults to unconvertable, i.e. throws an error */ + virtual void printValueAsJSON(EvalState& state, bool strict, + JSONPlaceholder& out, PathSet& context) const; + + /* Print the value as XML. Defaults to unevaluated */ + virtual void printValueAsXML(EvalState& state, bool strict, bool location, + XMLWriter& doc, PathSet& context, + PathSet& drvsSeen) const; + + virtual ~ExternalValueBase(){}; +}; + +std::ostream& operator<<(std::ostream& str, const ExternalValueBase& v); + +// Forward declaration of Value is required because the following +// types are mutually recursive. +// +// TODO(tazjin): Really, these types need some serious refactoring. +struct Value; + +/* Strings in the evaluator carry a so-called `context' which + is a list of strings representing store paths. This is to + allow users to write things like + + "--with-freetype2-library=" + freetype + "/lib" + + where `freetype' is a derivation (or a source to be copied + to the store). If we just concatenated the strings without + keeping track of the referenced store paths, then if the + string is used as a derivation attribute, the derivation + will not have the correct dependencies in its inputDrvs and + inputSrcs. + + The semantics of the context is as follows: when a string + with context C is used as a derivation attribute, then the + derivations in C will be added to the inputDrvs of the + derivation, and the other store paths in C will be added to + the inputSrcs of the derivations. + + For canonicity, the store paths should be in sorted order. */ +struct NixString { + const char* s; + const char** context; // must be in sorted order +}; + +struct NixBigList { + size_t size; + Value** elems; +}; + +struct NixThunk { + Env* env; + Expr* expr; +}; + +struct NixApp { + Value *left, *right; +}; + +struct NixLambda { + Env* env; + ExprLambda* fun; +}; + +struct NixPrimOpApp { + Value *left, *right; +}; + +struct Value { + ValueType type; + union { // TODO(tazjin): std::variant + NixInt integer; + bool boolean; + NixString string; + const char* path; + Bindings* attrs; + NixBigList bigList; + Value* smallList[2]; + NixThunk thunk; + NixApp app; // TODO(tazjin): "app"? + NixLambda lambda; + PrimOp* primOp; + NixPrimOpApp primOpApp; + ExternalValueBase* external; + NixFloat fpoint; + }; + + bool isList() const { + return type == tList1 || type == tList2 || type == tListN; + } + + Value** listElems() { + return type == tList1 || type == tList2 ? smallList : bigList.elems; + } + + const Value* const* listElems() const { + return type == tList1 || type == tList2 ? smallList : bigList.elems; + } + + size_t listSize() const { + return type == tList1 ? 1 : type == tList2 ? 2 : bigList.size; + } +}; + +/* After overwriting an app node, be sure to clear pointers in the + Value to ensure that the target isn't kept alive unnecessarily. */ +static inline void clearValue(Value& v) { v.app.left = v.app.right = 0; } + +static inline void mkInt(Value& v, NixInt n) { + clearValue(v); + v.type = tInt; + v.integer = n; +} + +static inline void mkFloat(Value& v, NixFloat n) { + clearValue(v); + v.type = tFloat; + v.fpoint = n; +} + +static inline void mkBool(Value& v, bool b) { + clearValue(v); + v.type = tBool; + v.boolean = b; +} + +static inline void mkNull(Value& v) { + clearValue(v); + v.type = tNull; +} + +static inline void mkApp(Value& v, Value& left, Value& right) { + v.type = tApp; + v.app.left = &left; + v.app.right = &right; +} + +static inline void mkPrimOpApp(Value& v, Value& left, Value& right) { + v.type = tPrimOpApp; + v.app.left = &left; + v.app.right = &right; +} + +static inline void mkStringNoCopy(Value& v, const char* s) { + v.type = tString; + v.string.s = s; + v.string.context = 0; +} + +static inline void mkString(Value& v, const Symbol& s) { + mkStringNoCopy(v, ((const std::string&)s).c_str()); +} + +void mkString(Value& v, const char* s); + +static inline void mkPathNoCopy(Value& v, const char* s) { + clearValue(v); + v.type = tPath; + v.path = s; +} + +void mkPath(Value& v, const char* s); + +/* Compute the size in bytes of the given value, including all values + and environments reachable from it. Static expressions (Exprs) are + not included. */ +size_t valueSize(Value& v); + +#if HAVE_BOEHMGC +typedef std::vector<Value*, gc_allocator<Value*> > ValueVector; +typedef std::map<Symbol, Value*, std::less<Symbol>, + gc_allocator<std::pair<const Symbol, Value*> > > + ValueMap; +#else +typedef std::vector<Value*> ValueVector; +typedef std::map<Symbol, Value*> ValueMap; +#endif + +} // namespace nix diff --git a/third_party/nix/src/libmain/common-args.cc b/third_party/nix/src/libmain/common-args.cc new file mode 100644 index 000000000000..3d9b8ebfaefd --- /dev/null +++ b/third_party/nix/src/libmain/common-args.cc @@ -0,0 +1,42 @@ +#include "common-args.hh" + +#include <glog/logging.h> + +#include "globals.hh" + +namespace nix { + +MixCommonArgs::MixCommonArgs(const string& programName) + : programName(programName) { + mkFlag() + .longName("option") + .labels({"name", "value"}) + .description("set a Nix configuration option (overriding nix.conf)") + .arity(2) + .handler([](std::vector<std::string> ss) { + try { + globalConfig.set(ss[0], ss[1]); + } catch (UsageError& e) { + LOG(WARNING) << e.what(); + } + }); + + mkFlag() + .longName("max-jobs") + .shortName('j') + .label("jobs") + .description("maximum number of parallel builds") + .handler([=](const std::string& s) { settings.set("max-jobs", s); }); + + std::string cat = "config"; + globalConfig.convertToArgs(*this, cat); + + // Backward compatibility hack: nix-env already had a --system flag. + if (programName == "nix-env") { + longFlags.erase("system"); + } + + hiddenCategories.insert(cat); +} + +} // namespace nix diff --git a/third_party/nix/src/libmain/common-args.hh b/third_party/nix/src/libmain/common-args.hh new file mode 100644 index 000000000000..d8917b836778 --- /dev/null +++ b/third_party/nix/src/libmain/common-args.hh @@ -0,0 +1,27 @@ +#pragma once + +#include "args.hh" + +namespace nix { + +struct MixCommonArgs : virtual Args { + string programName; + MixCommonArgs(const string& programName); +}; + +struct MixDryRun : virtual Args { + bool dryRun = false; + + MixDryRun() { + mkFlag(0, "dry-run", "show what this command would do without doing it", + &dryRun); + } +}; + +struct MixJSON : virtual Args { + bool json = false; + + MixJSON() { mkFlag(0, "json", "produce JSON output", &json); } +}; + +} // namespace nix diff --git a/third_party/nix/src/libmain/meson.build b/third_party/nix/src/libmain/meson.build new file mode 100644 index 000000000000..052430105957 --- /dev/null +++ b/third_party/nix/src/libmain/meson.build @@ -0,0 +1,40 @@ +src_inc += include_directories('.') + +libmain_src = files( + join_paths(meson.source_root(), 'src/libmain/common-args.cc'), + join_paths(meson.source_root(), 'src/libmain/shared.cc'), + join_paths(meson.source_root(), 'src/libmain/stack.cc')) + +libmain_headers = files( + join_paths(meson.source_root(), 'src/libmain/common-args.hh'), + join_paths(meson.source_root(), 'src/libmain/shared.hh')) + +libmain_dep_list = [ + glog_dep, + pthread_dep, + openssl_dep, + libsodium_dep, +] + +libmain_link_list = [ + libutil_lib, + libstore_lib, +] + +libmain_link_args = [] +libstore_cxx_args = [] + +libmain_lib = library( + 'nixmain', + install : true, + install_mode : 'rwxr-xr-x', + install_dir : libdir, + include_directories : src_inc, + link_with : libmain_link_list, + sources : libmain_src, + link_args : libmain_link_args, + dependencies : libmain_dep_list) + +install_headers( + libmain_headers, + install_dir : join_paths(includedir, 'nix')) diff --git a/third_party/nix/src/libmain/nix-main.pc.in b/third_party/nix/src/libmain/nix-main.pc.in new file mode 100644 index 000000000000..37b03dcd42c0 --- /dev/null +++ b/third_party/nix/src/libmain/nix-main.pc.in @@ -0,0 +1,9 @@ +prefix=@prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: Nix +Description: Nix Package Manager +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lnixmain +Cflags: -I${includedir}/nix -std=c++17 diff --git a/third_party/nix/src/libmain/shared.cc b/third_party/nix/src/libmain/shared.cc new file mode 100644 index 000000000000..f6c80cae30ef --- /dev/null +++ b/third_party/nix/src/libmain/shared.cc @@ -0,0 +1,406 @@ +#include "shared.hh" + +#include <algorithm> +#include <cctype> +#include <csignal> +#include <cstdlib> +#include <exception> +#include <iostream> +#include <mutex> +#include <utility> + +#include <glog/logging.h> +#include <openssl/crypto.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <unistd.h> + +#include "globals.hh" +#include "store-api.hh" +#include "util.hh" + +namespace nix { + +static bool gcWarning = true; + +void printGCWarning() { + if (!gcWarning) { + return; + } + + static bool haveWarned = false; + if (!haveWarned) { + haveWarned = true; + LOG(WARNING) << "you did not specify '--add-root'; " + << "the result might be removed by the garbage collector"; + } +} + +void printMissing(const ref<Store>& store, const PathSet& paths) { + unsigned long long downloadSize; + unsigned long long narSize; + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + store->queryMissing(paths, willBuild, willSubstitute, unknown, downloadSize, + narSize); + printMissing(store, willBuild, willSubstitute, unknown, downloadSize, + narSize); +} + +void printMissing(const ref<Store>& store, const PathSet& willBuild, + const PathSet& willSubstitute, const PathSet& unknown, + unsigned long long downloadSize, unsigned long long narSize) { + if (!willBuild.empty()) { + LOG(INFO) << "these derivations will be built:"; + Paths sorted = store->topoSortPaths(willBuild); + reverse(sorted.begin(), sorted.end()); + for (auto& i : sorted) { + LOG(INFO) << " " << i; + } + } + + if (!willSubstitute.empty()) { + LOG(INFO) << "these paths will be fetched (" + << (downloadSize / (1024.0 * 1024.0)) << " MiB download, " + << (narSize / (1024.0 * 1024.0)) << "MiB unpacked):"; + + for (auto& i : willSubstitute) { + LOG(INFO) << i; + } + } + + if (!unknown.empty()) { + LOG(INFO) << "don't know how to build these paths" + << (settings.readOnlyMode + ? " (may be caused by read-only store access)" + : "") + << ":"; + + for (auto& i : unknown) { + LOG(INFO) << i; + } + } +} + +string getArg(const string& opt, Strings::iterator& i, + const Strings::iterator& end) { + ++i; + if (i == end) { + throw UsageError(format("'%1%' requires an argument") % opt); + } + + return *i; +} + +#if OPENSSL_VERSION_NUMBER < 0x10101000L +/* OpenSSL is not thread-safe by default - it will randomly crash + unless the user supplies a mutex locking function. So let's do + that. */ +static std::vector<std::mutex> opensslLocks; + +static void opensslLockCallback(int mode, int type, const char* file, + int line) { + if (mode & CRYPTO_LOCK) + opensslLocks[type].lock(); + else + opensslLocks[type].unlock(); +} +#endif + +static void sigHandler(int signo) {} + +void initNix() { + /* Turn on buffering for cerr. */ +#if HAVE_PUBSETBUF + static char buf[1024]; + std::cerr.rdbuf()->pubsetbuf(buf, sizeof(buf)); +#endif + +#if OPENSSL_VERSION_NUMBER < 0x10101000L + /* Initialise OpenSSL locking. */ + opensslLocks = std::vector<std::mutex>(CRYPTO_num_locks()); + CRYPTO_set_locking_callback(opensslLockCallback); +#endif + + loadConfFile(); + + startSignalHandlerThread(); + + /* Reset SIGCHLD to its default. */ + struct sigaction act; + sigemptyset(&act.sa_mask); + act.sa_handler = SIG_DFL; + act.sa_flags = 0; + if (sigaction(SIGCHLD, &act, nullptr) != 0) { + throw SysError("resetting SIGCHLD"); + } + + /* Install a dummy SIGUSR1 handler for use with pthread_kill(). */ + act.sa_handler = sigHandler; + if (sigaction(SIGUSR1, &act, nullptr) != 0) { + throw SysError("handling SIGUSR1"); + } + +#if __APPLE__ + /* HACK: on darwin, we need canโt use sigprocmask with SIGWINCH. + * Instead, add a dummy sigaction handler, and signalHandlerThread + * can handle the rest. */ + struct sigaction sa; + sa.sa_handler = sigHandler; + if (sigaction(SIGWINCH, &sa, 0)) { + throw SysError("handling SIGWINCH"); + } +#endif + + /* Register a SIGSEGV handler to detect stack overflows. */ + detectStackOverflow(); + + /* There is no privacy in the Nix system ;-) At least not for + now. In particular, store objects should be readable by + everybody. */ + umask(0022); + + /* Initialise the PRNG. */ + struct timeval tv; + gettimeofday(&tv, nullptr); + srandom(tv.tv_usec); + + /* On macOS, don't use the per-session TMPDIR (as set e.g. by + sshd). This breaks build users because they don't have access + to the TMPDIR, in particular in โnix-store --serveโ. */ +#if __APPLE__ + if (getuid() == 0 && hasPrefix(getEnv("TMPDIR"), "/var/folders/")) + unsetenv("TMPDIR"); +#endif +} + +LegacyArgs::LegacyArgs( + const std::string& programName, + std::function<bool(Strings::iterator& arg, const Strings::iterator& end)> + parseArg) + : MixCommonArgs(programName), parseArg(std::move(parseArg)) { + mkFlag() + .longName("no-build-output") + .shortName('Q') + .description("do not show build output") + .set(&settings.verboseBuild, false); + + mkFlag() + .longName("keep-failed") + .shortName('K') + .description("keep temporary directories of failed builds") + .set(&(bool&)settings.keepFailed, true); + + mkFlag() + .longName("keep-going") + .shortName('k') + .description("keep going after a build fails") + .set(&(bool&)settings.keepGoing, true); + + mkFlag() + .longName("fallback") + .description("build from source if substitution fails") + .set(&(bool&)settings.tryFallback, true); + + auto intSettingAlias = [&](char shortName, const std::string& longName, + const std::string& description, + const std::string& dest) { + mkFlag<unsigned int>(shortName, longName, description, [=](unsigned int n) { + settings.set(dest, std::to_string(n)); + }); + }; + + intSettingAlias(0, "cores", + "maximum number of CPU cores to use inside a build", "cores"); + intSettingAlias(0, "max-silent-time", + "number of seconds of silence before a build is killed", + "max-silent-time"); + intSettingAlias(0, "timeout", "number of seconds before a build is killed", + "timeout"); + + mkFlag(0, "readonly-mode", "do not write to the Nix store", + &settings.readOnlyMode); + + mkFlag(0, "no-gc-warning", "disable warning about not using '--add-root'", + &gcWarning, false); + + mkFlag() + .longName("store") + .label("store-uri") + .description("URI of the Nix store to use") + .dest(&(std::string&)settings.storeUri); +} + +bool LegacyArgs::processFlag(Strings::iterator& pos, Strings::iterator end) { + if (MixCommonArgs::processFlag(pos, end)) { + return true; + } + bool res = parseArg(pos, end); + if (res) { + ++pos; + } + return res; +} + +bool LegacyArgs::processArgs(const Strings& args, bool finish) { + if (args.empty()) { + return true; + } + assert(args.size() == 1); + Strings ss(args); + auto pos = ss.begin(); + if (!parseArg(pos, ss.end())) { + throw UsageError(format("unexpected argument '%1%'") % args.front()); + } + return true; +} + +void parseCmdLine( + int argc, char** argv, + std::function<bool(Strings::iterator& arg, const Strings::iterator& end)> + parseArg) { + parseCmdLine(baseNameOf(argv[0]), argvToStrings(argc, argv), + std::move(parseArg)); +} + +void parseCmdLine( + const string& programName, const Strings& args, + std::function<bool(Strings::iterator& arg, const Strings::iterator& end)> + parseArg) { + LegacyArgs(programName, std::move(parseArg)).parseCmdline(args); +} + +void printVersion(const string& programName) { + std::cout << format("%1% (Nix) %2%") % programName % nixVersion << std::endl; + + // TODO(tazjin): figure out what the fuck this is + /*if (verbosity > lvlInfo) { + Strings cfg; +#if HAVE_BOEHMGC + cfg.push_back("gc"); +#endif +#if HAVE_SODIUM + cfg.push_back("signed-caches"); +#endif + std::cout << "Features: " << concatStringsSep(", ", cfg) << "\n"; + std::cout << "Configuration file: " << settings.nixConfDir + "/nix.conf" + << "\n"; + std::cout << "Store directory: " << settings.nixStore << "\n"; + std::cout << "State directory: " << settings.nixStateDir << "\n"; + } */ + throw Exit(); +} + +void showManPage(const string& name) { + restoreSignals(); + setenv("MANPATH", settings.nixManDir.c_str(), 1); + execlp("man", "man", name.c_str(), nullptr); + throw SysError(format("command 'man %1%' failed") % name.c_str()); +} + +int handleExceptions(const string& programName, + const std::function<void()>& fun) { + ReceiveInterrupts receiveInterrupts; // FIXME: need better place for this + + string error = ANSI_RED "error:" ANSI_NORMAL " "; + try { + try { + fun(); + } catch (...) { + /* Subtle: we have to make sure that any `interrupted' + condition is discharged before we reach printMsg() + below, since otherwise it will throw an (uncaught) + exception. */ + setInterruptThrown(); + throw; + } + } catch (Exit& e) { + return e.status; + } catch (UsageError& e) { + LOG(INFO) << e.what(); + LOG(INFO) << "Try '" << programName << " " + << " --help' for more information."; + return 1; + } catch (BaseError& e) { + LOG(ERROR) << error << (settings.showTrace ? e.prefix() : "") << e.msg(); + if (!e.prefix().empty() && !settings.showTrace) { + LOG(INFO) << "(use '--show-trace' to show detailed location information)"; + } + return e.status; + } catch (std::bad_alloc& e) { + LOG(ERROR) << error << "failed to allocate: " << e.what(); + return 1; + } catch (std::exception& e) { + LOG(ERROR) << error << e.what(); + return 1; + } + + return 0; +} + +RunPager::RunPager() { + if (isatty(STDOUT_FILENO) == 0) { + return; + } + char* pager = getenv("NIX_PAGER"); + if (pager == nullptr) { + pager = getenv("PAGER"); + } + if (pager && ((string)pager == "" || (string)pager == "cat")) { + return; + } + + Pipe toPager; + toPager.create(); + + pid = startProcess([&]() { + if (dup2(toPager.readSide.get(), STDIN_FILENO) == -1) { + throw SysError("dupping stdin"); + } + if (getenv("LESS") == nullptr) { + setenv("LESS", "FRSXMK", 1); + } + restoreSignals(); + if (pager != nullptr) { + execl("/bin/sh", "sh", "-c", pager, nullptr); + } + execlp("pager", "pager", nullptr); + execlp("less", "less", nullptr); + execlp("more", "more", nullptr); + throw SysError(format("executing '%1%'") % pager); + }); + + pid.setKillSignal(SIGINT); + + if (dup2(toPager.writeSide.get(), STDOUT_FILENO) == -1) { + throw SysError("dupping stdout"); + } +} + +RunPager::~RunPager() { + try { + if (pid != -1) { + std::cout.flush(); + close(STDOUT_FILENO); + pid.wait(); + } + } catch (...) { + ignoreException(); + } +} + +string showBytes(unsigned long long bytes) { + return (format("%.2f MiB") % (bytes / (1024.0 * 1024.0))).str(); +} + +PrintFreed::~PrintFreed() { + if (show) { + std::cout << format("%1% store paths deleted, %2% freed\n") % + results.paths.size() % showBytes(results.bytesFreed); + } +} + +Exit::~Exit() = default; + +} // namespace nix diff --git a/third_party/nix/src/libmain/shared.hh b/third_party/nix/src/libmain/shared.hh new file mode 100644 index 000000000000..da1c9cc66ac3 --- /dev/null +++ b/third_party/nix/src/libmain/shared.hh @@ -0,0 +1,133 @@ +#pragma once + +#include <locale> + +#include <signal.h> + +#include "args.hh" +#include "common-args.hh" +#include "util.hh" + +namespace nix { + +class Exit : public std::exception { + public: + int status; + Exit() : status(0) {} + Exit(int status) : status(status) {} + virtual ~Exit(); +}; + +int handleExceptions(const string& programName, + const std::function<void()>& fun); + +/* Don't forget to call initPlugins() after settings are initialized! */ +void initNix(); + +void parseCmdLine( + int argc, char** argv, + std::function<bool(Strings::iterator& arg, const Strings::iterator& end)> + parseArg); + +void parseCmdLine( + const string& programName, const Strings& args, + std::function<bool(Strings::iterator& arg, const Strings::iterator& end)> + parseArg); + +void printVersion(const string& programName); + +/* Ugh. No better place to put this. */ +void printGCWarning(); + +class Store; + +void printMissing(const ref<Store>& store, const PathSet& paths); + +void printMissing(const ref<Store>& store, const PathSet& willBuild, + const PathSet& willSubstitute, const PathSet& unknown, + unsigned long long downloadSize, unsigned long long narSize); + +string getArg(const string& opt, Strings::iterator& i, + const Strings::iterator& end); + +template <class N> +N getIntArg(const string& opt, Strings::iterator& i, + const Strings::iterator& end, bool allowUnit) { + ++i; + if (i == end) { + throw UsageError(format("'%1%' requires an argument") % opt); + } + string s = *i; + N multiplier = 1; + if (allowUnit && !s.empty()) { + char u = std::toupper(*s.rbegin()); + if (std::isalpha(u)) { + if (u == 'K') { + multiplier = 1ULL << 10; + } else if (u == 'M') { + multiplier = 1ULL << 20; + } else if (u == 'G') { + multiplier = 1ULL << 30; + } else if (u == 'T') { + multiplier = 1ULL << 40; + } else { + throw UsageError(format("invalid unit specifier '%1%'") % u); + } + + s.resize(s.size() - 1); + } + } + N n; + if (!string2Int(s, n)) + throw UsageError(format("'%1%' requires an integer argument") % opt); + return n * multiplier; +} + +struct LegacyArgs : public MixCommonArgs { + std::function<bool(Strings::iterator& arg, const Strings::iterator& end)> + parseArg; + + LegacyArgs( + const std::string& programName, + std::function<bool(Strings::iterator& arg, const Strings::iterator& end)> + parseArg); + + bool processFlag(Strings::iterator& pos, Strings::iterator end) override; + + bool processArgs(const Strings& args, bool finish) override; +}; + +/* Show the manual page for the specified program. */ +void showManPage(const string& name); + +/* The constructor of this class starts a pager if stdout is a + terminal and $PAGER is set. Stdout is redirected to the pager. */ +class RunPager { + public: + RunPager(); + ~RunPager(); + + private: + Pid pid; +}; + +extern volatile ::sig_atomic_t blockInt; + +/* GC helpers. */ + +string showBytes(unsigned long long bytes); + +struct GCResults; + +struct PrintFreed { + bool show; + const GCResults& results; + PrintFreed(bool show, const GCResults& results) + : show(show), results(results) {} + ~PrintFreed(); +}; + +/* Install a SIGSEGV handler to detect stack overflows. */ +void detectStackOverflow(); + +} // namespace nix diff --git a/third_party/nix/src/libmain/stack.cc b/third_party/nix/src/libmain/stack.cc new file mode 100644 index 000000000000..f100b39156e4 --- /dev/null +++ b/third_party/nix/src/libmain/stack.cc @@ -0,0 +1,75 @@ +#include <csignal> +#include <cstddef> +#include <cstdlib> +#include <cstring> + +#include <unistd.h> + +#include "types.hh" + +namespace nix { + +static void sigsegvHandler(int signo, siginfo_t* info, void* ctx) { + /* Detect stack overflows by comparing the faulting address with + the stack pointer. Unfortunately, getting the stack pointer is + not portable. */ + bool haveSP = true; + char* sp = nullptr; +#if defined(__x86_64__) && defined(REG_RSP) + sp = (char*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP]; +#elif defined(REG_ESP) + sp = (char*)((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP]; +#else + haveSP = false; +#endif + + if (haveSP) { + ptrdiff_t diff = (char*)info->si_addr - sp; + if (diff < 0) { + diff = -diff; + } + if (diff < 4096) { + char msg[] = "error: stack overflow (possible infinite recursion)\n"; + [[gnu::unused]] auto res = write(2, msg, strlen(msg)); + _exit(1); // maybe abort instead? + } + } + + /* Restore default behaviour (i.e. segfault and dump core). */ + struct sigaction act; + sigfillset(&act.sa_mask); + act.sa_handler = SIG_DFL; + act.sa_flags = 0; + if (sigaction(SIGSEGV, &act, nullptr) != 0) { + abort(); + } +} + +void detectStackOverflow() { +#if defined(SA_SIGINFO) && defined(SA_ONSTACK) + /* Install a SIGSEGV handler to detect stack overflows. This + requires an alternative stack, otherwise the signal cannot be + delivered when we're out of stack space. */ + stack_t stack; + stack.ss_size = 4096 * 4 + MINSIGSTKSZ; + static auto stackBuf = std::make_unique<std::vector<char>>(stack.ss_size); + stack.ss_sp = stackBuf->data(); + if (stack.ss_sp == nullptr) { + throw Error("cannot allocate alternative stack"); + } + stack.ss_flags = 0; + if (sigaltstack(&stack, nullptr) == -1) { + throw SysError("cannot set alternative stack"); + } + + struct sigaction act; + sigfillset(&act.sa_mask); + act.sa_sigaction = sigsegvHandler; + act.sa_flags = SA_SIGINFO | SA_ONSTACK; + if (sigaction(SIGSEGV, &act, nullptr) != 0) { + throw SysError("resetting SIGSEGV"); + } +#endif +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/binary-cache-store.cc b/third_party/nix/src/libstore/binary-cache-store.cc new file mode 100644 index 000000000000..487c12b5313e --- /dev/null +++ b/third_party/nix/src/libstore/binary-cache-store.cc @@ -0,0 +1,382 @@ +#include "binary-cache-store.hh" + +#include <chrono> +#include <future> +#include <memory> + +#include "archive.hh" +#include "compression.hh" +#include "derivations.hh" +#include "fs-accessor.hh" +#include "globals.hh" +#include "glog/logging.h" +#include "json.hh" +#include "nar-accessor.hh" +#include "nar-info-disk-cache.hh" +#include "nar-info.hh" +#include "remote-fs-accessor.hh" +#include "sync.hh" + +namespace nix { + +BinaryCacheStore::BinaryCacheStore(const Params& params) : Store(params) { + if (secretKeyFile != "") { + secretKey = std::make_unique<SecretKey>(readFile(secretKeyFile)); + } + + StringSink sink; + sink << narVersionMagic1; + narMagic = *sink.s; +} + +void BinaryCacheStore::init() { + std::string cacheInfoFile = "nix-cache-info"; + + auto cacheInfo = getFile(cacheInfoFile); + if (!cacheInfo) { + upsertFile(cacheInfoFile, "StoreDir: " + storeDir + "\n", + "text/x-nix-cache-info"); + } else { + for (auto& line : tokenizeString<Strings>(*cacheInfo, "\n")) { + size_t colon = line.find(':'); + if (colon == std::string::npos) { + continue; + } + auto name = line.substr(0, colon); + auto value = trim(line.substr(colon + 1, std::string::npos)); + if (name == "StoreDir") { + if (value != storeDir) { + throw Error(format("binary cache '%s' is for Nix stores with prefix " + "'%s', not '%s'") % + getUri() % value % storeDir); + } + } else if (name == "WantMassQuery") { + wantMassQuery_ = value == "1"; + } else if (name == "Priority") { + string2Int(value, priority); + } + } + } +} + +void BinaryCacheStore::getFile( + const std::string& path, + Callback<std::shared_ptr<std::string>> callback) noexcept { + try { + callback(getFile(path)); + } catch (...) { + callback.rethrow(); + } +} + +void BinaryCacheStore::getFile(const std::string& path, Sink& sink) { + std::promise<std::shared_ptr<std::string>> promise; + getFile(path, {[&](std::future<std::shared_ptr<std::string>> result) { + try { + promise.set_value(result.get()); + } catch (...) { + promise.set_exception(std::current_exception()); + } + }}); + auto data = promise.get_future().get(); + sink((unsigned char*)data->data(), data->size()); +} + +std::shared_ptr<std::string> BinaryCacheStore::getFile( + const std::string& path) { + StringSink sink; + try { + getFile(path, sink); + } catch (NoSuchBinaryCacheFile&) { + return nullptr; + } + return sink.s; +} + +Path BinaryCacheStore::narInfoFileFor(const Path& storePath) { + assertStorePath(storePath); + return storePathToHash(storePath) + ".narinfo"; +} + +void BinaryCacheStore::writeNarInfo(const ref<NarInfo>& narInfo) { + auto narInfoFile = narInfoFileFor(narInfo->path); + + upsertFile(narInfoFile, narInfo->to_string(), "text/x-nix-narinfo"); + + auto hashPart = storePathToHash(narInfo->path); + + { + auto state_(state.lock()); + state_->pathInfoCache.upsert(hashPart, std::shared_ptr<NarInfo>(narInfo)); + } + + if (diskCache) { + diskCache->upsertNarInfo(getUri(), hashPart, + std::shared_ptr<NarInfo>(narInfo)); + } +} + +void BinaryCacheStore::addToStore(const ValidPathInfo& info, + const ref<std::string>& nar, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + if ((repair == 0u) && isValidPath(info.path)) { + return; + } + + /* Verify that all references are valid. This may do some .narinfo + reads, but typically they'll already be cached. */ + for (auto& ref : info.references) { + try { + if (ref != info.path) { + queryPathInfo(ref); + } + } catch (InvalidPath&) { + throw Error(format("cannot add '%s' to the binary cache because the " + "reference '%s' is not valid") % + info.path % ref); + } + } + + assert(nar->compare(0, narMagic.size(), narMagic) == 0); + + auto narInfo = make_ref<NarInfo>(info); + + narInfo->narSize = nar->size(); + narInfo->narHash = hashString(htSHA256, *nar); + + if (info.narHash && info.narHash != narInfo->narHash) { + throw Error( + format("refusing to copy corrupted path '%1%' to binary cache") % + info.path); + } + + auto accessor_ = std::dynamic_pointer_cast<RemoteFSAccessor>(accessor); + + /* Optionally write a JSON file containing a listing of the + contents of the NAR. */ + if (writeNARListing) { + std::ostringstream jsonOut; + + { + JSONObject jsonRoot(jsonOut); + jsonRoot.attr("version", 1); + + auto narAccessor = makeNarAccessor(nar); + + if (accessor_) { + accessor_->addToCache(info.path, *nar, narAccessor); + } + + { + auto res = jsonRoot.placeholder("root"); + listNar(res, narAccessor, "", true); + } + } + + upsertFile(storePathToHash(info.path) + ".ls", jsonOut.str(), + "application/json"); + } + + else { + if (accessor_) { + accessor_->addToCache(info.path, *nar, makeNarAccessor(nar)); + } + } + + /* Compress the NAR. */ + narInfo->compression = compression; + auto now1 = std::chrono::steady_clock::now(); + auto narCompressed = compress(compression, *nar, parallelCompression); + auto now2 = std::chrono::steady_clock::now(); + narInfo->fileHash = hashString(htSHA256, *narCompressed); + narInfo->fileSize = narCompressed->size(); + + auto duration = + std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1) + .count(); + DLOG(INFO) << "copying path '" << narInfo->path << "' (" << narInfo->narSize + << " bytes, compressed " + << ((1.0 - (double)narCompressed->size() / nar->size()) * 100.0) + << "% in " << duration << "ms) to binary cache"; + + /* Atomically write the NAR file. */ + narInfo->url = "nar/" + narInfo->fileHash.to_string(Base32, false) + ".nar" + + (compression == "xz" ? ".xz" + : compression == "bzip2" + ? ".bz2" + : compression == "br" ? ".br" : ""); + if ((repair != 0u) || !fileExists(narInfo->url)) { + stats.narWrite++; + upsertFile(narInfo->url, *narCompressed, "application/x-nix-nar"); + } else { + stats.narWriteAverted++; + } + + stats.narWriteBytes += nar->size(); + stats.narWriteCompressedBytes += narCompressed->size(); + stats.narWriteCompressionTimeMs += duration; + + /* Atomically write the NAR info file.*/ + if (secretKey) { + narInfo->sign(*secretKey); + } + + writeNarInfo(narInfo); + + stats.narInfoWrite++; +} + +bool BinaryCacheStore::isValidPathUncached(const Path& storePath) { + // FIXME: this only checks whether a .narinfo with a matching hash + // part exists. So โf4kb...-fooโ matches โf4kb...-barโ, even + // though they shouldn't. Not easily fixed. + return fileExists(narInfoFileFor(storePath)); +} + +void BinaryCacheStore::narFromPath(const Path& storePath, Sink& sink) { + auto info = queryPathInfo(storePath).cast<const NarInfo>(); + + uint64_t narSize = 0; + + LambdaSink wrapperSink([&](const unsigned char* data, size_t len) { + sink(data, len); + narSize += len; + }); + + auto decompressor = makeDecompressionSink(info->compression, wrapperSink); + + try { + getFile(info->url, *decompressor); + } catch (NoSuchBinaryCacheFile& e) { + throw SubstituteGone(e.what()); + } + + decompressor->finish(); + + stats.narRead++; + // stats.narReadCompressedBytes += nar->size(); // FIXME + stats.narReadBytes += narSize; +} + +void BinaryCacheStore::queryPathInfoUncached( + const Path& storePath, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept { + auto uri = getUri(); + LOG(INFO) << "querying info about '" << storePath << "' on '" << uri << "'"; + + auto narInfoFile = narInfoFileFor(storePath); + + auto callbackPtr = std::make_shared<decltype(callback)>(std::move(callback)); + + getFile(narInfoFile, {[=](std::future<std::shared_ptr<std::string>> fut) { + try { + auto data = fut.get(); + + if (!data) { + return (*callbackPtr)(nullptr); + } + + stats.narInfoRead++; + + (*callbackPtr)( + (std::shared_ptr<ValidPathInfo>)std::make_shared<NarInfo>( + *this, *data, narInfoFile)); + + } catch (...) { + callbackPtr->rethrow(); + } + }}); +} + +Path BinaryCacheStore::addToStore(const string& name, const Path& srcPath, + bool recursive, HashType hashAlgo, + PathFilter& filter, RepairFlag repair) { + // FIXME: some cut&paste from LocalStore::addToStore(). + + /* Read the whole path into memory. This is not a very scalable + method for very large paths, but `copyPath' is mainly used for + small files. */ + StringSink sink; + Hash h; + if (recursive) { + dumpPath(srcPath, sink, filter); + h = hashString(hashAlgo, *sink.s); + } else { + auto s = readFile(srcPath); + dumpString(s, sink); + h = hashString(hashAlgo, s); + } + + ValidPathInfo info; + info.path = makeFixedOutputPath(recursive, h, name); + + addToStore(info, sink.s, repair, CheckSigs, nullptr); + + return info.path; +} + +Path BinaryCacheStore::addTextToStore(const string& name, const string& s, + const PathSet& references, + RepairFlag repair) { + ValidPathInfo info; + info.path = computeStorePathForText(name, s, references); + info.references = references; + + if ((repair != 0u) || !isValidPath(info.path)) { + StringSink sink; + dumpString(s, sink); + addToStore(info, sink.s, repair, CheckSigs, nullptr); + } + + return info.path; +} + +ref<FSAccessor> BinaryCacheStore::getFSAccessor() { + return make_ref<RemoteFSAccessor>(ref<Store>(shared_from_this()), + localNarCache); +} + +void BinaryCacheStore::addSignatures(const Path& storePath, + const StringSet& sigs) { + /* Note: this is inherently racy since there is no locking on + binary caches. In particular, with S3 this unreliable, even + when addSignatures() is called sequentially on a path, because + S3 might return an outdated cached version. */ + + auto narInfo = make_ref<NarInfo>((NarInfo&)*queryPathInfo(storePath)); + + narInfo->sigs.insert(sigs.begin(), sigs.end()); + + auto narInfoFile = narInfoFileFor(narInfo->path); + + writeNarInfo(narInfo); +} + +std::shared_ptr<std::string> BinaryCacheStore::getBuildLog(const Path& path) { + Path drvPath; + + if (isDerivation(path)) { + drvPath = path; + } else { + try { + auto info = queryPathInfo(path); + // FIXME: add a "Log" field to .narinfo + if (info->deriver.empty()) { + return nullptr; + } + drvPath = info->deriver; + } catch (InvalidPath&) { + return nullptr; + } + } + + auto logPath = "log/" + baseNameOf(drvPath); + + DLOG(INFO) << "fetching build log from binary cache '" << getUri() << "/" + << logPath << "'"; + + return getFile(logPath); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/binary-cache-store.hh b/third_party/nix/src/libstore/binary-cache-store.hh new file mode 100644 index 000000000000..f5bd66bbd6e1 --- /dev/null +++ b/third_party/nix/src/libstore/binary-cache-store.hh @@ -0,0 +1,114 @@ +#pragma once + +#include <atomic> + +#include "crypto.hh" +#include "pool.hh" +#include "store-api.hh" + +namespace nix { + +struct NarInfo; + +class BinaryCacheStore : public Store { + public: + const Setting<std::string> compression{ + this, "xz", "compression", + "NAR compression method ('xz', 'bzip2', or 'none')"}; + const Setting<bool> writeNARListing{ + this, false, "write-nar-listing", + "whether to write a JSON file listing the files in each NAR"}; + const Setting<Path> secretKeyFile{ + this, "", "secret-key", + "path to secret key used to sign the binary cache"}; + const Setting<Path> localNarCache{this, "", "local-nar-cache", + "path to a local cache of NARs"}; + const Setting<bool> parallelCompression{ + this, false, "parallel-compression", + "enable multi-threading compression, available for xz only currently"}; + + private: + std::unique_ptr<SecretKey> secretKey; + + protected: + BinaryCacheStore(const Params& params); + + public: + virtual bool fileExists(const std::string& path) = 0; + + virtual void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) = 0; + + /* Note: subclasses must implement at least one of the two + following getFile() methods. */ + + /* Dump the contents of the specified file to a sink. */ + virtual void getFile(const std::string& path, Sink& sink); + + /* Fetch the specified file and call the specified callback with + the result. A subclass may implement this asynchronously. */ + virtual void getFile( + const std::string& path, + Callback<std::shared_ptr<std::string>> callback) noexcept; + + std::shared_ptr<std::string> getFile(const std::string& path); + + protected: + bool wantMassQuery_ = false; + int priority = 50; + + public: + virtual void init(); + + private: + std::string narMagic; + + std::string narInfoFileFor(const Path& storePath); + + void writeNarInfo(const ref<NarInfo>& narInfo); + + public: + bool isValidPathUncached(const Path& path) override; + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override; + + Path queryPathFromHashPart(const string& hashPart) override { + unsupported("queryPathFromHashPart"); + } + + bool wantMassQuery() override { return wantMassQuery_; } + + void addToStore(const ValidPathInfo& info, const ref<std::string>& nar, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override; + + Path addToStore(const string& name, const Path& srcPath, bool recursive, + HashType hashAlgo, PathFilter& filter, + RepairFlag repair) override; + + Path addTextToStore(const string& name, const string& s, + const PathSet& references, RepairFlag repair) override; + + void narFromPath(const Path& path, Sink& sink) override; + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override { + unsupported("buildDerivation"); + } + + void ensurePath(const Path& path) override { unsupported("ensurePath"); } + + ref<FSAccessor> getFSAccessor() override; + + void addSignatures(const Path& storePath, const StringSet& sigs) override; + + std::shared_ptr<std::string> getBuildLog(const Path& path) override; + + int getPriority() override { return priority; } +}; + +MakeError(NoSuchBinaryCacheFile, Error); + +} // namespace nix diff --git a/third_party/nix/src/libstore/build.cc b/third_party/nix/src/libstore/build.cc new file mode 100644 index 000000000000..fe6463584759 --- /dev/null +++ b/third_party/nix/src/libstore/build.cc @@ -0,0 +1,4955 @@ +#include <algorithm> +#include <cerrno> +#include <chrono> +#include <climits> +#include <cstring> +#include <future> +#include <iostream> +#include <map> +#include <memory> +#include <queue> +#include <regex> +#include <sstream> +#include <thread> + +#include <fcntl.h> +#include <grp.h> +#include <netdb.h> +#include <pwd.h> +#include <sys/resource.h> +#include <sys/select.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <termios.h> +#include <unistd.h> + +#include "affinity.hh" +#include "archive.hh" +#include "builtins.hh" +#include "compression.hh" +#include "download.hh" +#include "finally.hh" +#include "globals.hh" +#include "glog/logging.h" +#include "json.hh" +#include "local-store.hh" +#include "machines.hh" +#include "nar-info.hh" +#include "parsed-derivations.hh" +#include "pathlocks.hh" +#include "references.hh" +#include "util.hh" + +/* Includes required for chroot support. */ +#if __linux__ +#include <net/if.h> +#include <netinet/ip.h> +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/mount.h> +#include <sys/param.h> +#include <sys/personality.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#if HAVE_SECCOMP +#include <seccomp.h> +#endif +#define pivot_root(new_root, put_old) \ + (syscall(SYS_pivot_root, new_root, put_old)) +#endif + +#if HAVE_STATVFS +#include <sys/statvfs.h> +#endif + +#include <nlohmann/json.hpp> +#include <utility> + +namespace nix { + +using std::map; + +static string pathNullDevice = "/dev/null"; + +/* Forward definition. */ +class Worker; +struct HookInstance; + +/* A pointer to a goal. */ +class Goal; +class DerivationGoal; +using GoalPtr = std::shared_ptr<Goal>; +using WeakGoalPtr = std::weak_ptr<Goal>; + +struct CompareGoalPtrs { + bool operator()(const GoalPtr& a, const GoalPtr& b) const; +}; + +/* Set of goals. */ +typedef set<GoalPtr, CompareGoalPtrs> Goals; +using WeakGoals = list<WeakGoalPtr>; + +/* A map of paths to goals (and the other way around). */ +typedef map<Path, WeakGoalPtr> WeakGoalMap; + +class Goal : public std::enable_shared_from_this<Goal> { + public: + typedef enum { + ecBusy, + ecSuccess, + ecFailed, + ecNoSubstituters, + ecIncompleteClosure + } ExitCode; + + protected: + /* Backlink to the worker. */ + Worker& worker; + + /* Goals that this goal is waiting for. */ + Goals waitees; + + /* Goals waiting for this one to finish. Must use weak pointers + here to prevent cycles. */ + WeakGoals waiters; + + /* Number of goals we are/were waiting for that have failed. */ + unsigned int nrFailed; + + /* Number of substitution goals we are/were waiting for that + failed because there are no substituters. */ + unsigned int nrNoSubstituters; + + /* Number of substitution goals we are/were waiting for that + failed because othey had unsubstitutable references. */ + unsigned int nrIncompleteClosure; + + /* Name of this goal for debugging purposes. */ + string name; + + /* Whether the goal is finished. */ + ExitCode exitCode; + + explicit Goal(Worker& worker) : worker(worker) { + nrFailed = nrNoSubstituters = nrIncompleteClosure = 0; + exitCode = ecBusy; + } + + virtual ~Goal() { trace("goal destroyed"); } + + public: + virtual void work() = 0; + + void addWaitee(const GoalPtr& waitee); + + virtual void waiteeDone(GoalPtr waitee, ExitCode result); + + virtual void handleChildOutput(int fd, const string& data) { abort(); } + + virtual void handleEOF(int fd) { abort(); } + + void trace(const FormatOrString& fs); + + string getName() { return name; } + + ExitCode getExitCode() { return exitCode; } + + /* Callback in case of a timeout. It should wake up its waiters, + get rid of any running child processes that are being monitored + by the worker (important!), etc. */ + virtual void timedOut() = 0; + + virtual string key() = 0; + + protected: + virtual void amDone(ExitCode result); +}; + +bool CompareGoalPtrs::operator()(const GoalPtr& a, const GoalPtr& b) const { + string s1 = a->key(); + string s2 = b->key(); + return s1 < s2; +} + +using steady_time_point = std::chrono::time_point<std::chrono::steady_clock>; + +/* A mapping used to remember for each child process to what goal it + belongs, and file descriptors for receiving log data and output + path creation commands. */ +struct Child { + WeakGoalPtr goal; + Goal* goal2; // ugly hackery + set<int> fds; + bool respectTimeouts; + bool inBuildSlot; + steady_time_point lastOutput; /* time we last got output on stdout/stderr */ + steady_time_point timeStarted; +}; + +/* The worker class. */ +class Worker { + private: + /* Note: the worker should only have strong pointers to the + top-level goals. */ + + /* The top-level goals of the worker. */ + Goals topGoals; + + /* Goals that are ready to do some work. */ + WeakGoals awake; + + /* Goals waiting for a build slot. */ + WeakGoals wantingToBuild; + + /* Child processes currently running. */ + std::list<Child> children; + + /* Number of build slots occupied. This includes local builds and + substitutions but not remote builds via the build hook. */ + unsigned int nrLocalBuilds; + + /* Maps used to prevent multiple instantiations of a goal for the + same derivation / path. */ + WeakGoalMap derivationGoals; + WeakGoalMap substitutionGoals; + + /* Goals waiting for busy paths to be unlocked. */ + WeakGoals waitingForAnyGoal; + + /* Goals sleeping for a few seconds (polling a lock). */ + WeakGoals waitingForAWhile; + + /* Last time the goals in `waitingForAWhile' where woken up. */ + steady_time_point lastWokenUp; + + /* Cache for pathContentsGood(). */ + std::map<Path, bool> pathContentsGoodCache; + + public: + /* Set if at least one derivation had a BuildError (i.e. permanent + failure). */ + bool permanentFailure; + + /* Set if at least one derivation had a timeout. */ + bool timedOut; + + /* Set if at least one derivation fails with a hash mismatch. */ + bool hashMismatch; + + /* Set if at least one derivation is not deterministic in check mode. */ + bool checkMismatch; + + LocalStore& store; + + std::unique_ptr<HookInstance> hook; + + uint64_t expectedBuilds = 0; + uint64_t doneBuilds = 0; + uint64_t failedBuilds = 0; + uint64_t runningBuilds = 0; + + uint64_t expectedSubstitutions = 0; + uint64_t doneSubstitutions = 0; + uint64_t failedSubstitutions = 0; + uint64_t runningSubstitutions = 0; + uint64_t expectedDownloadSize = 0; + uint64_t doneDownloadSize = 0; + uint64_t expectedNarSize = 0; + uint64_t doneNarSize = 0; + + /* Whether to ask the build hook if it can build a derivation. If + it answers with "decline-permanently", we don't try again. */ + bool tryBuildHook = true; + + explicit Worker(LocalStore& store); + ~Worker(); + + /* Make a goal (with caching). */ + GoalPtr makeDerivationGoal(const Path& drvPath, + const StringSet& wantedOutputs, + BuildMode buildMode = bmNormal); + std::shared_ptr<DerivationGoal> makeBasicDerivationGoal( + const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode = bmNormal); + GoalPtr makeSubstitutionGoal(const Path& storePath, + RepairFlag repair = NoRepair); + + /* Remove a dead goal. */ + void removeGoal(const GoalPtr& goal); + + /* Wake up a goal (i.e., there is something for it to do). */ + void wakeUp(const GoalPtr& goal); + + /* Return the number of local build and substitution processes + currently running (but not remote builds via the build + hook). */ + unsigned int getNrLocalBuilds(); + + /* Registers a running child process. `inBuildSlot' means that + the process counts towards the jobs limit. */ + void childStarted(const GoalPtr& goal, const set<int>& fds, bool inBuildSlot, + bool respectTimeouts); + + /* Unregisters a running child process. `wakeSleepers' should be + false if there is no sense in waking up goals that are sleeping + because they can't run yet (e.g., there is no free build slot, + or the hook would still say `postpone'). */ + void childTerminated(Goal* goal, bool wakeSleepers = true); + + /* Put `goal' to sleep until a build slot becomes available (which + might be right away). */ + void waitForBuildSlot(const GoalPtr& goal); + + /* Wait for any goal to finish. Pretty indiscriminate way to + wait for some resource that some other goal is holding. */ + void waitForAnyGoal(GoalPtr goal); + + /* Wait for a few seconds and then retry this goal. Used when + waiting for a lock held by another process. This kind of + polling is inefficient, but POSIX doesn't really provide a way + to wait for multiple locks in the main select() loop. */ + void waitForAWhile(GoalPtr goal); + + /* Loop until the specified top-level goals have finished. */ + void run(const Goals& topGoals); + + /* Wait for input to become available. */ + void waitForInput(); + + unsigned int exitStatus(); + + /* Check whether the given valid path exists and has the right + contents. */ + bool pathContentsGood(const Path& path); + + void markContentsGood(const Path& path); +}; + +////////////////////////////////////////////////////////////////////// + +void addToWeakGoals(WeakGoals& goals, const GoalPtr& p) { + // FIXME: necessary? + // FIXME: O(n) + for (auto& i : goals) { + if (i.lock() == p) { + return; + } + } + goals.push_back(p); +} + +void Goal::addWaitee(const GoalPtr& waitee) { + waitees.insert(waitee); + addToWeakGoals(waitee->waiters, shared_from_this()); +} + +void Goal::waiteeDone(GoalPtr waitee, ExitCode result) { + assert(waitees.find(waitee) != waitees.end()); + waitees.erase(waitee); + + trace(format("waitee '%1%' done; %2% left") % waitee->name % waitees.size()); + + if (result == ecFailed || result == ecNoSubstituters || + result == ecIncompleteClosure) { + ++nrFailed; + } + + if (result == ecNoSubstituters) { + ++nrNoSubstituters; + } + + if (result == ecIncompleteClosure) { + ++nrIncompleteClosure; + } + + if (waitees.empty() || (result == ecFailed && !settings.keepGoing)) { + /* If we failed and keepGoing is not set, we remove all + remaining waitees. */ + for (auto& goal : waitees) { + WeakGoals waiters2; + for (auto& j : goal->waiters) { + if (j.lock() != shared_from_this()) { + waiters2.push_back(j); + } + } + goal->waiters = waiters2; + } + waitees.clear(); + + worker.wakeUp(shared_from_this()); + } +} + +void Goal::amDone(ExitCode result) { + trace("done"); + assert(exitCode == ecBusy); + assert(result == ecSuccess || result == ecFailed || + result == ecNoSubstituters || result == ecIncompleteClosure); + exitCode = result; + for (auto& i : waiters) { + GoalPtr goal = i.lock(); + if (goal) { + goal->waiteeDone(shared_from_this(), result); + } + } + waiters.clear(); + worker.removeGoal(shared_from_this()); +} + +void Goal::trace(const FormatOrString& fs) { + DLOG(INFO) << name << ": " << fs.s; +} + +////////////////////////////////////////////////////////////////////// + +/* Common initialisation performed in child processes. */ +static void commonChildInit(Pipe& logPipe) { + restoreSignals(); + + /* Put the child in a separate session (and thus a separate + process group) so that it has no controlling terminal (meaning + that e.g. ssh cannot open /dev/tty) and it doesn't receive + terminal signals. */ + if (setsid() == -1) { + throw SysError(format("creating a new session")); + } + + /* Dup the write side of the logger pipe into stderr. */ + if (dup2(logPipe.writeSide.get(), STDERR_FILENO) == -1) { + throw SysError("cannot pipe standard error into log file"); + } + + /* Dup stderr to stdout. */ + if (dup2(STDERR_FILENO, STDOUT_FILENO) == -1) { + throw SysError("cannot dup stderr into stdout"); + } + + /* Reroute stdin to /dev/null. */ + int fdDevNull = open(pathNullDevice.c_str(), O_RDWR); + if (fdDevNull == -1) { + throw SysError(format("cannot open '%1%'") % pathNullDevice); + } + if (dup2(fdDevNull, STDIN_FILENO) == -1) { + throw SysError("cannot dup null device into stdin"); + } + close(fdDevNull); +} + +void handleDiffHook(uid_t uid, uid_t gid, Path tryA, Path tryB, Path drvPath, + Path tmpDir) { + auto diffHook = settings.diffHook; + if (diffHook != "" && settings.runDiffHook) { + try { + RunOptions diffHookOptions( + diffHook, {std::move(tryA), std::move(tryB), std::move(drvPath), + std::move(tmpDir)}); + diffHookOptions.searchPath = true; + diffHookOptions.uid = uid; + diffHookOptions.gid = gid; + diffHookOptions.chdir = "/"; + + auto diffRes = runProgram(diffHookOptions); + if (!statusOk(diffRes.first)) { + throw ExecError(diffRes.first, + fmt("diff-hook program '%1%' %2%", diffHook, + statusToString(diffRes.first))); + } + + if (!diffRes.second.empty()) { + LOG(ERROR) << chomp(diffRes.second); + } + } catch (Error& error) { + LOG(ERROR) << "diff hook execution failed: " << error.what(); + } + } +} + +////////////////////////////////////////////////////////////////////// + +class UserLock { + private: + /* POSIX locks suck. If we have a lock on a file, and we open and + close that file again (without closing the original file + descriptor), we lose the lock. So we have to be *very* careful + not to open a lock file on which we are holding a lock. */ + static Sync<PathSet> lockedPaths_; + + Path fnUserLock; + AutoCloseFD fdUserLock; + + string user; + uid_t uid; + gid_t gid; + std::vector<gid_t> supplementaryGIDs; + + public: + UserLock(); + ~UserLock(); + + void kill(); + + string getUser() { return user; } + uid_t getUID() { + assert(uid); + return uid; + } + uid_t getGID() { + assert(gid); + return gid; + } + std::vector<gid_t> getSupplementaryGIDs() { return supplementaryGIDs; } + + bool enabled() { return uid != 0; } +}; + +Sync<PathSet> UserLock::lockedPaths_; + +UserLock::UserLock() { + assert(settings.buildUsersGroup != ""); + + /* Get the members of the build-users-group. */ + struct group* gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (gr == nullptr) { + throw Error( + format( + "the group '%1%' specified in 'build-users-group' does not exist") % + settings.buildUsersGroup); + } + gid = gr->gr_gid; + + /* Copy the result of getgrnam. */ + Strings users; + for (char** p = gr->gr_mem; *p != nullptr; ++p) { + DLOG(INFO) << "found build user " << *p; + users.push_back(*p); + } + + if (users.empty()) { + throw Error(format("the build users group '%1%' has no members") % + settings.buildUsersGroup); + } + + /* Find a user account that isn't currently in use for another + build. */ + for (auto& i : users) { + DLOG(INFO) << "trying user " << i; + + struct passwd* pw = getpwnam(i.c_str()); + if (pw == nullptr) { + throw Error(format("the user '%1%' in the group '%2%' does not exist") % + i % settings.buildUsersGroup); + } + + createDirs(settings.nixStateDir + "/userpool"); + + fnUserLock = + (format("%1%/userpool/%2%") % settings.nixStateDir % pw->pw_uid).str(); + + { + auto lockedPaths(lockedPaths_.lock()); + if (lockedPaths->count(fnUserLock) != 0u) { + /* We already have a lock on this one. */ + continue; + } + lockedPaths->insert(fnUserLock); + } + + try { + AutoCloseFD fd = + open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) { + throw SysError(format("opening user lock '%1%'") % fnUserLock); + } + + if (lockFile(fd.get(), ltWrite, false)) { + fdUserLock = std::move(fd); + user = i; + uid = pw->pw_uid; + + /* Sanity check... */ + if (uid == getuid() || uid == geteuid()) { + throw Error(format("the Nix user should not be a member of '%1%'") % + settings.buildUsersGroup); + } + +#if __linux__ + /* Get the list of supplementary groups of this build user. This + is usually either empty or contains a group such as "kvm". */ + supplementaryGIDs.resize(10); + int ngroups = supplementaryGIDs.size(); + int err = getgrouplist(pw->pw_name, pw->pw_gid, + supplementaryGIDs.data(), &ngroups); + if (err == -1) { + throw Error( + format("failed to get list of supplementary groups for '%1%'") % + pw->pw_name); + } + + supplementaryGIDs.resize(ngroups); +#endif + + return; + } + + } catch (...) { + lockedPaths_.lock()->erase(fnUserLock); + } + } + + throw Error(format("all build users are currently in use; " + "consider creating additional users and adding them to " + "the '%1%' group") % + settings.buildUsersGroup); +} + +UserLock::~UserLock() { + auto lockedPaths(lockedPaths_.lock()); + assert(lockedPaths->count(fnUserLock)); + lockedPaths->erase(fnUserLock); +} + +void UserLock::kill() { killUser(uid); } + +////////////////////////////////////////////////////////////////////// + +struct HookInstance { + /* Pipes for talking to the build hook. */ + Pipe toHook; + + /* Pipe for the hook's standard output/error. */ + Pipe fromHook; + + /* Pipe for the builder's standard output/error. */ + Pipe builderOut; + + /* The process ID of the hook. */ + Pid pid; + + FdSink sink; + + HookInstance(); + + ~HookInstance(); +}; + +HookInstance::HookInstance() { + DLOG(INFO) << "starting build hook " << settings.buildHook; + + /* Create a pipe to get the output of the child. */ + fromHook.create(); + + /* Create the communication pipes. */ + toHook.create(); + + /* Create a pipe to get the output of the builder. */ + builderOut.create(); + + /* Fork the hook. */ + pid = startProcess([&]() { + commonChildInit(fromHook); + + if (chdir("/") == -1) { + throw SysError("changing into /"); + } + + /* Dup the communication pipes. */ + if (dup2(toHook.readSide.get(), STDIN_FILENO) == -1) { + throw SysError("dupping to-hook read side"); + } + + /* Use fd 4 for the builder's stdout/stderr. */ + if (dup2(builderOut.writeSide.get(), 4) == -1) { + throw SysError("dupping builder's stdout/stderr"); + } + + /* Hack: pass the read side of that fd to allow build-remote + to read SSH error messages. */ + if (dup2(builderOut.readSide.get(), 5) == -1) { + throw SysError("dupping builder's stdout/stderr"); + } + + Strings args = { + baseNameOf(settings.buildHook), + // std::to_string(verbosity), // TODO(tazjin): what? + }; + + execv(settings.buildHook.get().c_str(), stringsToCharPtrs(args).data()); + + throw SysError("executing '%s'", settings.buildHook); + }); + + pid.setSeparatePG(true); + fromHook.writeSide = -1; + toHook.readSide = -1; + + sink = FdSink(toHook.writeSide.get()); + std::map<std::string, Config::SettingInfo> settings; + globalConfig.getSettings(settings); + for (auto& setting : settings) { + sink << 1 << setting.first << setting.second.value; + } + sink << 0; +} + +HookInstance::~HookInstance() { + try { + toHook.writeSide = -1; + if (pid != -1) { + pid.kill(); + } + } catch (...) { + ignoreException(); + } +} + +////////////////////////////////////////////////////////////////////// + +typedef map<std::string, std::string> StringRewrites; + +std::string rewriteStrings(std::string s, const StringRewrites& rewrites) { + for (auto& i : rewrites) { + size_t j = 0; + while ((j = s.find(i.first, j)) != string::npos) { + s.replace(j, i.first.size(), i.second); + } + } + return s; +} + +////////////////////////////////////////////////////////////////////// + +typedef enum { rpAccept, rpDecline, rpPostpone } HookReply; + +class SubstitutionGoal; + +class DerivationGoal : public Goal { + private: + /* Whether to use an on-disk .drv file. */ + bool useDerivation; + + /* The path of the derivation. */ + Path drvPath; + + /* The specific outputs that we need to build. Empty means all of + them. */ + StringSet wantedOutputs; + + /* Whether additional wanted outputs have been added. */ + bool needRestart = false; + + /* Whether to retry substituting the outputs after building the + inputs. */ + bool retrySubstitution; + + /* The derivation stored at drvPath. */ + std::unique_ptr<BasicDerivation> drv; + + std::unique_ptr<ParsedDerivation> parsedDrv; + + /* The remainder is state held during the build. */ + + /* Locks on the output paths. */ + PathLocks outputLocks; + + /* All input paths (that is, the union of FS closures of the + immediate input paths). */ + PathSet inputPaths; + + /* Referenceable paths (i.e., input and output paths). */ + PathSet allPaths; + + /* Outputs that are already valid. If we're repairing, these are + the outputs that are valid *and* not corrupt. */ + PathSet validPaths; + + /* Outputs that are corrupt or not valid. */ + PathSet missingPaths; + + /* User selected for running the builder. */ + std::unique_ptr<UserLock> buildUser; + + /* The process ID of the builder. */ + Pid pid; + + /* The temporary directory. */ + Path tmpDir; + + /* The path of the temporary directory in the sandbox. */ + Path tmpDirInSandbox; + + /* File descriptor for the log file. */ + AutoCloseFD fdLogFile; + std::shared_ptr<BufferedSink> logFileSink, logSink; + + /* Number of bytes received from the builder's stdout/stderr. */ + unsigned long logSize; + + /* The most recent log lines. */ + std::list<std::string> logTail; + + std::string currentLogLine; + size_t currentLogLinePos = 0; // to handle carriage return + + std::string currentHookLine; + + /* Pipe for the builder's standard output/error. */ + Pipe builderOut; + + /* Pipe for synchronising updates to the builder user namespace. */ + Pipe userNamespaceSync; + + /* The build hook. */ + std::unique_ptr<HookInstance> hook; + + /* Whether we're currently doing a chroot build. */ + bool useChroot = false; + + Path chrootRootDir; + + /* RAII object to delete the chroot directory. */ + std::shared_ptr<AutoDelete> autoDelChroot; + + /* Whether this is a fixed-output derivation. */ + bool fixedOutput; + + /* Whether to run the build in a private network namespace. */ + bool privateNetwork = false; + + using GoalState = void (DerivationGoal::*)(); + GoalState state; + + /* Stuff we need to pass to initChild(). */ + struct ChrootPath { + Path source; + bool optional; + explicit ChrootPath(Path source = "", bool optional = false) + : source(std::move(source)), optional(optional) {} + }; + typedef map<Path, ChrootPath> + DirsInChroot; // maps target path to source path + DirsInChroot dirsInChroot; + + typedef map<string, string> Environment; + Environment env; + +#if __APPLE__ + typedef string SandboxProfile; + SandboxProfile additionalSandboxProfile; +#endif + + /* Hash rewriting. */ + StringRewrites inputRewrites, outputRewrites; + typedef map<Path, Path> RedirectedOutputs; + RedirectedOutputs redirectedOutputs; + + BuildMode buildMode; + + /* If we're repairing without a chroot, there may be outputs that + are valid but corrupt. So we redirect these outputs to + temporary paths. */ + PathSet redirectedBadOutputs; + + BuildResult result; + + /* The current round, if we're building multiple times. */ + size_t curRound = 1; + + size_t nrRounds; + + /* Path registration info from the previous round, if we're + building multiple times. Since this contains the hash, it + allows us to compare whether two rounds produced the same + result. */ + std::map<Path, ValidPathInfo> prevInfos; + + const uid_t sandboxUid = 1000; + const gid_t sandboxGid = 100; + + const static Path homeDir; + + std::unique_ptr<MaintainCount<uint64_t>> mcExpectedBuilds, mcRunningBuilds; + + /* The remote machine on which we're building. */ + std::string machineName; + + public: + DerivationGoal(const Path& drvPath, StringSet wantedOutputs, Worker& worker, + BuildMode buildMode = bmNormal); + DerivationGoal(const Path& drvPath, const BasicDerivation& drv, + Worker& worker, BuildMode buildMode = bmNormal); + ~DerivationGoal() override; + + /* Whether we need to perform hash rewriting if there are valid output paths. + */ + bool needsHashRewrite(); + + void timedOut() override; + + string key() override { + /* Ensure that derivations get built in order of their name, + i.e. a derivation named "aardvark" always comes before + "baboon". And substitution goals always happen before + derivation goals (due to "b$"). */ + return "b$" + storePathToName(drvPath) + "$" + drvPath; + } + + void work() override; + + Path getDrvPath() { return drvPath; } + + /* Add wanted outputs to an already existing derivation goal. */ + void addWantedOutputs(const StringSet& outputs); + + BuildResult getResult() { return result; } + + private: + /* The states. */ + void getDerivation(); + void loadDerivation(); + void haveDerivation(); + void outputsSubstituted(); + void closureRepaired(); + void inputsRealised(); + void tryToBuild(); + void buildDone(); + + /* Is the build hook willing to perform the build? */ + HookReply tryBuildHook(); + + /* Start building a derivation. */ + void startBuilder(); + + /* Fill in the environment for the builder. */ + void initEnv(); + + /* Setup tmp dir location. */ + void initTmpDir(); + + /* Write a JSON file containing the derivation attributes. */ + void writeStructuredAttrs(); + + /* Make a file owned by the builder. */ + void chownToBuilder(const Path& path); + + /* Run the builder's process. */ + void runChild(); + + friend int childEntry(void* /*arg*/); + + /* Check that the derivation outputs all exist and register them + as valid. */ + void registerOutputs(); + + /* Check that an output meets the requirements specified by the + 'outputChecks' attribute (or the legacy + '{allowed,disallowed}{References,Requisites}' attributes). */ + void checkOutputs(const std::map<std::string, ValidPathInfo>& outputs); + + /* Open a log file and a pipe to it. */ + Path openLogFile(); + + /* Close the log file. */ + void closeLogFile(); + + /* Delete the temporary directory, if we have one. */ + void deleteTmpDir(bool force); + + /* Callback used by the worker to write to the log. */ + void handleChildOutput(int fd, const string& data) override; + void handleEOF(int fd) override; + void flushLine(); + + /* Return the set of (in)valid paths. */ + PathSet checkPathValidity(bool returnValid, bool checkHash); + + /* Abort the goal if `path' failed to build. */ + bool pathFailed(const Path& path); + + /* Forcibly kill the child process, if any. */ + void killChild(); + + Path addHashRewrite(const Path& path); + + void repairClosure(); + + void amDone(ExitCode result) override { Goal::amDone(result); } + + void done(BuildResult::Status status, const string& msg = ""); + + PathSet exportReferences(const PathSet& storePaths); +}; + +const Path DerivationGoal::homeDir = "/homeless-shelter"; + +DerivationGoal::DerivationGoal(const Path& drvPath, StringSet wantedOutputs, + Worker& worker, BuildMode buildMode) + : Goal(worker), + useDerivation(true), + drvPath(drvPath), + wantedOutputs(std::move(wantedOutputs)), + buildMode(buildMode) { + state = &DerivationGoal::getDerivation; + name = (format("building of '%1%'") % drvPath).str(); + trace("created"); + + mcExpectedBuilds = + std::make_unique<MaintainCount<uint64_t>>(worker.expectedBuilds); +} + +DerivationGoal::DerivationGoal(const Path& drvPath, const BasicDerivation& drv, + Worker& worker, BuildMode buildMode) + : Goal(worker), + useDerivation(false), + drvPath(drvPath), + buildMode(buildMode) { + this->drv = std::make_unique<BasicDerivation>(drv); + state = &DerivationGoal::haveDerivation; + name = (format("building of %1%") % showPaths(drv.outputPaths())).str(); + trace("created"); + + mcExpectedBuilds = + std::make_unique<MaintainCount<uint64_t>>(worker.expectedBuilds); + + /* Prevent the .chroot directory from being + garbage-collected. (See isActiveTempFile() in gc.cc.) */ + worker.store.addTempRoot(drvPath); +} + +DerivationGoal::~DerivationGoal() { + /* Careful: we should never ever throw an exception from a + destructor. */ + try { + killChild(); + } catch (...) { + ignoreException(); + } + try { + deleteTmpDir(false); + } catch (...) { + ignoreException(); + } + try { + closeLogFile(); + } catch (...) { + ignoreException(); + } +} + +inline bool DerivationGoal::needsHashRewrite() { +#if __linux__ + return !useChroot; +#else + /* Darwin requires hash rewriting even when sandboxing is enabled. */ + return true; +#endif +} + +void DerivationGoal::killChild() { + if (pid != -1) { + worker.childTerminated(this); + + if (buildUser) { + /* If we're using a build user, then there is a tricky + race condition: if we kill the build user before the + child has done its setuid() to the build user uid, then + it won't be killed, and we'll potentially lock up in + pid.wait(). So also send a conventional kill to the + child. */ + ::kill(-pid, SIGKILL); /* ignore the result */ + buildUser->kill(); + pid.wait(); + } else { + pid.kill(); + } + + assert(pid == -1); + } + + hook.reset(); +} + +void DerivationGoal::timedOut() { + killChild(); + done(BuildResult::TimedOut); +} + +void DerivationGoal::work() { (this->*state)(); } + +void DerivationGoal::addWantedOutputs(const StringSet& outputs) { + /* If we already want all outputs, there is nothing to do. */ + if (wantedOutputs.empty()) { + return; + } + + if (outputs.empty()) { + wantedOutputs.clear(); + needRestart = true; + } else { + for (auto& i : outputs) { + if (wantedOutputs.find(i) == wantedOutputs.end()) { + wantedOutputs.insert(i); + needRestart = true; + } + } + } +} + +void DerivationGoal::getDerivation() { + trace("init"); + + /* The first thing to do is to make sure that the derivation + exists. If it doesn't, it may be created through a + substitute. */ + if (buildMode == bmNormal && worker.store.isValidPath(drvPath)) { + loadDerivation(); + return; + } + + addWaitee(worker.makeSubstitutionGoal(drvPath)); + + state = &DerivationGoal::loadDerivation; +} + +void DerivationGoal::loadDerivation() { + trace("loading derivation"); + + if (nrFailed != 0) { + LOG(ERROR) << "cannot build missing derivation '" << drvPath << "'"; + done(BuildResult::MiscFailure); + return; + } + + /* `drvPath' should already be a root, but let's be on the safe + side: if the user forgot to make it a root, we wouldn't want + things being garbage collected while we're busy. */ + worker.store.addTempRoot(drvPath); + + assert(worker.store.isValidPath(drvPath)); + + /* Get the derivation. */ + drv = std::unique_ptr<BasicDerivation>( + new Derivation(worker.store.derivationFromPath(drvPath))); + + haveDerivation(); +} + +void DerivationGoal::haveDerivation() { + trace("have derivation"); + + retrySubstitution = false; + + for (auto& i : drv->outputs) { + worker.store.addTempRoot(i.second.path); + } + + /* Check what outputs paths are not already valid. */ + PathSet invalidOutputs = checkPathValidity(false, buildMode == bmRepair); + + /* If they are all valid, then we're done. */ + if (invalidOutputs.empty() && buildMode == bmNormal) { + done(BuildResult::AlreadyValid); + return; + } + + parsedDrv = std::make_unique<ParsedDerivation>(drvPath, *drv); + + /* We are first going to try to create the invalid output paths + through substitutes. If that doesn't work, we'll build + them. */ + if (settings.useSubstitutes && parsedDrv->substitutesAllowed()) { + for (auto& i : invalidOutputs) { + addWaitee(worker.makeSubstitutionGoal( + i, buildMode == bmRepair ? Repair : NoRepair)); + } + } + + if (waitees.empty()) { /* to prevent hang (no wake-up event) */ + outputsSubstituted(); + } else { + state = &DerivationGoal::outputsSubstituted; + } +} + +void DerivationGoal::outputsSubstituted() { + trace("all outputs substituted (maybe)"); + + if (nrFailed > 0 && nrFailed > nrNoSubstituters + nrIncompleteClosure && + !settings.tryFallback) { + done(BuildResult::TransientFailure, + (format("some substitutes for the outputs of derivation '%1%' failed " + "(usually happens due to networking issues); try '--fallback' " + "to build derivation from source ") % + drvPath) + .str()); + return; + } + + /* If the substitutes form an incomplete closure, then we should + build the dependencies of this derivation, but after that, we + can still use the substitutes for this derivation itself. */ + if (nrIncompleteClosure > 0) { + retrySubstitution = true; + } + + nrFailed = nrNoSubstituters = nrIncompleteClosure = 0; + + if (needRestart) { + needRestart = false; + haveDerivation(); + return; + } + + auto nrInvalid = checkPathValidity(false, buildMode == bmRepair).size(); + if (buildMode == bmNormal && nrInvalid == 0) { + done(BuildResult::Substituted); + return; + } + if (buildMode == bmRepair && nrInvalid == 0) { + repairClosure(); + return; + } + if (buildMode == bmCheck && nrInvalid > 0) { + throw Error(format("some outputs of '%1%' are not valid, so checking is " + "not possible") % + drvPath); + } + + /* Otherwise, at least one of the output paths could not be + produced using a substitute. So we have to build instead. */ + + /* Make sure checkPathValidity() from now on checks all + outputs. */ + wantedOutputs = PathSet(); + + /* The inputs must be built before we can build this goal. */ + if (useDerivation) { + for (auto& i : dynamic_cast<Derivation*>(drv.get())->inputDrvs) { + addWaitee(worker.makeDerivationGoal( + i.first, i.second, buildMode == bmRepair ? bmRepair : bmNormal)); + } + } + + for (auto& i : drv->inputSrcs) { + if (worker.store.isValidPath(i)) { + continue; + } + if (!settings.useSubstitutes) { + throw Error(format("dependency '%1%' of '%2%' does not exist, and " + "substitution is disabled") % + i % drvPath); + } + addWaitee(worker.makeSubstitutionGoal(i)); + } + + if (waitees.empty()) { /* to prevent hang (no wake-up event) */ + inputsRealised(); + } else { + state = &DerivationGoal::inputsRealised; + } +} + +void DerivationGoal::repairClosure() { + /* If we're repairing, we now know that our own outputs are valid. + Now check whether the other paths in the outputs closure are + good. If not, then start derivation goals for the derivations + that produced those outputs. */ + + /* Get the output closure. */ + PathSet outputClosure; + for (auto& i : drv->outputs) { + if (!wantOutput(i.first, wantedOutputs)) { + continue; + } + worker.store.computeFSClosure(i.second.path, outputClosure); + } + + /* Filter out our own outputs (which we have already checked). */ + for (auto& i : drv->outputs) { + outputClosure.erase(i.second.path); + } + + /* Get all dependencies of this derivation so that we know which + derivation is responsible for which path in the output + closure. */ + PathSet inputClosure; + if (useDerivation) { + worker.store.computeFSClosure(drvPath, inputClosure); + } + std::map<Path, Path> outputsToDrv; + for (auto& i : inputClosure) { + if (isDerivation(i)) { + Derivation drv = worker.store.derivationFromPath(i); + for (auto& j : drv.outputs) { + outputsToDrv[j.second.path] = i; + } + } + } + + /* Check each path (slow!). */ + PathSet broken; + for (auto& i : outputClosure) { + if (worker.pathContentsGood(i)) { + continue; + } + LOG(ERROR) << "found corrupted or missing path '" << i + << "' in the output closure of '" << drvPath << "'"; + Path drvPath2 = outputsToDrv[i]; + if (drvPath2.empty()) { + addWaitee(worker.makeSubstitutionGoal(i, Repair)); + } else { + addWaitee(worker.makeDerivationGoal(drvPath2, PathSet(), bmRepair)); + } + } + + if (waitees.empty()) { + done(BuildResult::AlreadyValid); + return; + } + + state = &DerivationGoal::closureRepaired; +} + +void DerivationGoal::closureRepaired() { + trace("closure repaired"); + if (nrFailed > 0) { + throw Error(format("some paths in the output closure of derivation '%1%' " + "could not be repaired") % + drvPath); + } + done(BuildResult::AlreadyValid); +} + +void DerivationGoal::inputsRealised() { + trace("all inputs realised"); + + if (nrFailed != 0) { + if (!useDerivation) { + throw Error(format("some dependencies of '%1%' are missing") % drvPath); + } + LOG(ERROR) << "cannot build derivation '" << drvPath << "': " << nrFailed + << " dependencies couldn't be built"; + done(BuildResult::DependencyFailed); + return; + } + + if (retrySubstitution) { + haveDerivation(); + return; + } + + /* Gather information necessary for computing the closure and/or + running the build hook. */ + + /* The outputs are referenceable paths. */ + for (auto& i : drv->outputs) { + DLOG(INFO) << "building path " << i.second.path; + allPaths.insert(i.second.path); + } + + /* Determine the full set of input paths. */ + + /* First, the input derivations. */ + if (useDerivation) { + for (auto& i : dynamic_cast<Derivation*>(drv.get())->inputDrvs) { + /* Add the relevant output closures of the input derivation + `i' as input paths. Only add the closures of output paths + that are specified as inputs. */ + assert(worker.store.isValidPath(i.first)); + Derivation inDrv = worker.store.derivationFromPath(i.first); + for (auto& j : i.second) { + if (inDrv.outputs.find(j) != inDrv.outputs.end()) { + worker.store.computeFSClosure(inDrv.outputs[j].path, inputPaths); + } else { + throw Error(format("derivation '%1%' requires non-existent output " + "'%2%' from input derivation '%3%'") % + drvPath % j % i.first); + } + } + } + } + + /* Second, the input sources. */ + worker.store.computeFSClosure(drv->inputSrcs, inputPaths); + + DLOG(INFO) << "added input paths " << showPaths(inputPaths); + + allPaths.insert(inputPaths.begin(), inputPaths.end()); + + /* Is this a fixed-output derivation? */ + fixedOutput = drv->isFixedOutput(); + + /* Don't repeat fixed-output derivations since they're already + verified by their output hash.*/ + nrRounds = fixedOutput ? 1 : settings.buildRepeat + 1; + + /* Okay, try to build. Note that here we don't wait for a build + slot to become available, since we don't need one if there is a + build hook. */ + state = &DerivationGoal::tryToBuild; + worker.wakeUp(shared_from_this()); + + result = BuildResult(); +} + +void DerivationGoal::tryToBuild() { + trace("trying to build"); + + /* Obtain locks on all output paths. The locks are automatically + released when we exit this function or Nix crashes. If we + can't acquire the lock, then continue; hopefully some other + goal can start a build, and if not, the main loop will sleep a + few seconds and then retry this goal. */ + PathSet lockFiles; + for (auto& outPath : drv->outputPaths()) { + lockFiles.insert(worker.store.toRealPath(outPath)); + } + + if (!outputLocks.lockPaths(lockFiles, "", false)) { + worker.waitForAWhile(shared_from_this()); + return; + } + + /* Now check again whether the outputs are valid. This is because + another process may have started building in parallel. After + it has finished and released the locks, we can (and should) + reuse its results. (Strictly speaking the first check can be + omitted, but that would be less efficient.) Note that since we + now hold the locks on the output paths, no other process can + build this derivation, so no further checks are necessary. */ + validPaths = checkPathValidity(true, buildMode == bmRepair); + if (buildMode != bmCheck && validPaths.size() == drv->outputs.size()) { + DLOG(INFO) << "skipping build of derivation '" << drvPath + << "', someone beat us to it"; + outputLocks.setDeletion(true); + done(BuildResult::AlreadyValid); + return; + } + + missingPaths = drv->outputPaths(); + if (buildMode != bmCheck) { + for (auto& i : validPaths) { + missingPaths.erase(i); + } + } + + /* If any of the outputs already exist but are not valid, delete + them. */ + for (auto& i : drv->outputs) { + Path path = i.second.path; + if (worker.store.isValidPath(path)) { + continue; + } + DLOG(INFO) << "removing invalid path " << path; + deletePath(worker.store.toRealPath(path)); + } + + /* Don't do a remote build if the derivation has the attribute + `preferLocalBuild' set. Also, check and repair modes are only + supported for local builds. */ + bool buildLocally = buildMode != bmNormal || parsedDrv->willBuildLocally(); + + auto started = [&]() { + auto msg = fmt(buildMode == bmRepair + ? "repairing outputs of '%s'" + : buildMode == bmCheck + ? "checking outputs of '%s'" + : nrRounds > 1 ? "building '%s' (round %d/%d)" + : "building '%s'", + drvPath, curRound, nrRounds); + + if (hook) { + msg += fmt(" on '%s'", machineName); + } + LOG(INFO) << msg << "[" << drvPath << "]"; + mcRunningBuilds = + std::make_unique<MaintainCount<uint64_t>>(worker.runningBuilds); + }; + + /* Is the build hook willing to accept this job? */ + if (!buildLocally) { + switch (tryBuildHook()) { + case rpAccept: + /* Yes, it has started doing so. Wait until we get + EOF from the hook. */ + result.startTime = time(nullptr); // inexact + state = &DerivationGoal::buildDone; + started(); + return; + case rpPostpone: + /* Not now; wait until at least one child finishes or + the wake-up timeout expires. */ + worker.waitForAWhile(shared_from_this()); + outputLocks.unlock(); + return; + case rpDecline: + /* We should do it ourselves. */ + break; + } + } + + /* Make sure that we are allowed to start a build. If this + derivation prefers to be done locally, do it even if + maxBuildJobs is 0. */ + unsigned int curBuilds = worker.getNrLocalBuilds(); + if (curBuilds >= settings.maxBuildJobs && !(buildLocally && curBuilds == 0)) { + worker.waitForBuildSlot(shared_from_this()); + outputLocks.unlock(); + return; + } + + try { + /* Okay, we have to build. */ + startBuilder(); + + } catch (BuildError& e) { + LOG(ERROR) << e.msg(); + outputLocks.unlock(); + buildUser.reset(); + worker.permanentFailure = true; + done(BuildResult::InputRejected, e.msg()); + return; + } + + /* This state will be reached when we get EOF on the child's + log pipe. */ + state = &DerivationGoal::buildDone; + + started(); +} + +void replaceValidPath(const Path& storePath, const Path& tmpPath) { + /* We can't atomically replace storePath (the original) with + tmpPath (the replacement), so we have to move it out of the + way first. We'd better not be interrupted here, because if + we're repairing (say) Glibc, we end up with a broken system. */ + Path oldPath = + (format("%1%.old-%2%-%3%") % storePath % getpid() % random()).str(); + if (pathExists(storePath)) { + rename(storePath.c_str(), oldPath.c_str()); + } + if (rename(tmpPath.c_str(), storePath.c_str()) == -1) { + throw SysError(format("moving '%1%' to '%2%'") % tmpPath % storePath); + } + deletePath(oldPath); +} + +MakeError(NotDeterministic, BuildError) + + void DerivationGoal::buildDone() { + trace("build done"); + + /* Release the build user at the end of this function. We don't do + it right away because we don't want another build grabbing this + uid and then messing around with our output. */ + Finally releaseBuildUser([&]() { buildUser.reset(); }); + + /* Since we got an EOF on the logger pipe, the builder is presumed + to have terminated. In fact, the builder could also have + simply have closed its end of the pipe, so just to be sure, + kill it. */ + int status = hook ? hook->pid.kill() : pid.kill(); + + DLOG(INFO) << "builder process for '" << drvPath << "' finished"; + + result.timesBuilt++; + result.stopTime = time(nullptr); + + /* So the child is gone now. */ + worker.childTerminated(this); + + /* Close the read side of the logger pipe. */ + if (hook) { + hook->builderOut.readSide = -1; + hook->fromHook.readSide = -1; + } else { + builderOut.readSide = -1; + } + + /* Close the log file. */ + closeLogFile(); + + /* When running under a build user, make sure that all processes + running under that uid are gone. This is to prevent a + malicious user from leaving behind a process that keeps files + open and modifies them after they have been chown'ed to + root. */ + if (buildUser) { + buildUser->kill(); + } + + bool diskFull = false; + + try { + /* Check the exit status. */ + if (!statusOk(status)) { + /* Heuristically check whether the build failure may have + been caused by a disk full condition. We have no way + of knowing whether the build actually got an ENOSPC. + So instead, check if the disk is (nearly) full now. If + so, we don't mark this build as a permanent failure. */ +#if HAVE_STATVFS + unsigned long long required = + 8ULL * 1024 * 1024; // FIXME: make configurable + struct statvfs st; + if (statvfs(worker.store.realStoreDir.c_str(), &st) == 0 && + (unsigned long long)st.f_bavail * st.f_bsize < required) { + diskFull = true; + } + if (statvfs(tmpDir.c_str(), &st) == 0 && + (unsigned long long)st.f_bavail * st.f_bsize < required) { + diskFull = true; + } +#endif + + deleteTmpDir(false); + + /* Move paths out of the chroot for easier debugging of + build failures. */ + if (useChroot && buildMode == bmNormal) { + for (auto& i : missingPaths) { + if (pathExists(chrootRootDir + i)) { + rename((chrootRootDir + i).c_str(), i.c_str()); + } + } + } + + std::string msg = + (format("builder for '%1%' %2%") % drvPath % statusToString(status)) + .str(); + + if (!settings.verboseBuild && !logTail.empty()) { + msg += (format("; last %d log lines:") % logTail.size()).str(); + for (auto& line : logTail) { + msg += "\n " + line; + } + } + + if (diskFull) { + msg += + "\nnote: build failure may have been caused by lack of free disk " + "space"; + } + + throw BuildError(msg); + } + + /* Compute the FS closure of the outputs and register them as + being valid. */ + registerOutputs(); + + if (settings.postBuildHook != "") { + LOG(INFO) << "running post-build-hook '" << settings.postBuildHook + << "' [" << drvPath << "]"; + auto outputPaths = drv->outputPaths(); + std::map<std::string, std::string> hookEnvironment = getEnv(); + + hookEnvironment.emplace("DRV_PATH", drvPath); + hookEnvironment.emplace("OUT_PATHS", + chomp(concatStringsSep(" ", outputPaths))); + + RunOptions opts(settings.postBuildHook, {}); + opts.environment = hookEnvironment; + + struct LogSink : Sink { + std::string currentLine; + + void operator()(const unsigned char* data, size_t len) override { + for (size_t i = 0; i < len; i++) { + auto c = data[i]; + + if (c == '\n') { + flushLine(); + } else { + currentLine += c; + } + } + } + + void flushLine() { + if (settings.verboseBuild) { + LOG(ERROR) << "post-build-hook: " << currentLine; + } + currentLine.clear(); + } + + ~LogSink() override { + if (!currentLine.empty()) { + currentLine += '\n'; + flushLine(); + } + } + }; + LogSink sink; + + opts.standardOut = &sink; + opts.mergeStderrToStdout = true; + runProgram2(opts); + } + + if (buildMode == bmCheck) { + done(BuildResult::Built); + return; + } + + /* Delete unused redirected outputs (when doing hash rewriting). */ + for (auto& i : redirectedOutputs) { + deletePath(i.second); + } + + /* Delete the chroot (if we were using one). */ + autoDelChroot.reset(); /* this runs the destructor */ + + deleteTmpDir(true); + + /* Repeat the build if necessary. */ + if (curRound++ < nrRounds) { + outputLocks.unlock(); + state = &DerivationGoal::tryToBuild; + worker.wakeUp(shared_from_this()); + return; + } + + /* It is now safe to delete the lock files, since all future + lockers will see that the output paths are valid; they will + not create new lock files with the same names as the old + (unlinked) lock files. */ + outputLocks.setDeletion(true); + outputLocks.unlock(); + + } catch (BuildError& e) { + LOG(ERROR) << e.msg(); + + outputLocks.unlock(); + + BuildResult::Status st = BuildResult::MiscFailure; + + if (hook && WIFEXITED(status) && WEXITSTATUS(status) == 101) { + st = BuildResult::TimedOut; + + } else if (hook && (!WIFEXITED(status) || WEXITSTATUS(status) != 100)) { + } + + else { + st = dynamic_cast<NotDeterministic*>(&e) != nullptr + ? BuildResult::NotDeterministic + : statusOk(status) + ? BuildResult::OutputRejected + : fixedOutput || diskFull ? BuildResult::TransientFailure + : BuildResult::PermanentFailure; + } + + done(st, e.msg()); + return; + } + + done(BuildResult::Built); +} + +HookReply DerivationGoal::tryBuildHook() { + if (!worker.tryBuildHook || !useDerivation) { + return rpDecline; + } + + if (!worker.hook) { + worker.hook = std::make_unique<HookInstance>(); + } + + try { + /* Send the request to the hook. */ + worker.hook->sink << "try" + << (worker.getNrLocalBuilds() < settings.maxBuildJobs ? 1 + : 0) + << drv->platform << drvPath + << parsedDrv->getRequiredSystemFeatures(); + worker.hook->sink.flush(); + + /* Read the first line of input, which should be a word indicating + whether the hook wishes to perform the build. */ + string reply; + while (true) { + string s = readLine(worker.hook->fromHook.readSide.get()); + if (string(s, 0, 2) == "# ") { + reply = string(s, 2); + break; + } + s += "\n"; + std::cerr << s; + } + + DLOG(INFO) << "hook reply is " << reply; + + if (reply == "decline") { + return rpDecline; + } + if (reply == "decline-permanently") { + worker.tryBuildHook = false; + worker.hook = nullptr; + return rpDecline; + } else if (reply == "postpone") { + return rpPostpone; + } else if (reply != "accept") { + throw Error(format("bad hook reply '%1%'") % reply); + } + } catch (SysError& e) { + if (e.errNo == EPIPE) { + LOG(ERROR) << "build hook died unexpectedly: " + << chomp(drainFD(worker.hook->fromHook.readSide.get())); + worker.hook = nullptr; + return rpDecline; + } + throw; + } + + hook = std::move(worker.hook); + + machineName = readLine(hook->fromHook.readSide.get()); + + /* Tell the hook all the inputs that have to be copied to the + remote system. */ + hook->sink << inputPaths; + + /* Tell the hooks the missing outputs that have to be copied back + from the remote system. */ + hook->sink << missingPaths; + + hook->sink = FdSink(); + hook->toHook.writeSide = -1; + + /* Create the log file and pipe. */ + Path logFile = openLogFile(); + + set<int> fds; + fds.insert(hook->fromHook.readSide.get()); + fds.insert(hook->builderOut.readSide.get()); + worker.childStarted(shared_from_this(), fds, false, false); + + return rpAccept; +} + +void chmod_(const Path& path, mode_t mode) { + if (chmod(path.c_str(), mode) == -1) { + throw SysError(format("setting permissions on '%1%'") % path); + } +} + +int childEntry(void* arg) { + ((DerivationGoal*)arg)->runChild(); + return 1; +} + +PathSet DerivationGoal::exportReferences(const PathSet& storePaths) { + PathSet paths; + + for (auto storePath : storePaths) { + /* Check that the store path is valid. */ + if (!worker.store.isInStore(storePath)) { + throw BuildError( + format("'exportReferencesGraph' contains a non-store path '%1%'") % + storePath); + } + + storePath = worker.store.toStorePath(storePath); + + if (inputPaths.count(storePath) == 0u) { + throw BuildError( + "cannot export references of path '%s' because it is not in the " + "input closure of the derivation", + storePath); + } + + worker.store.computeFSClosure(storePath, paths); + } + + /* If there are derivations in the graph, then include their + outputs as well. This is useful if you want to do things + like passing all build-time dependencies of some path to a + derivation that builds a NixOS DVD image. */ + PathSet paths2(paths); + + for (auto& j : paths2) { + if (isDerivation(j)) { + Derivation drv = worker.store.derivationFromPath(j); + for (auto& k : drv.outputs) { + worker.store.computeFSClosure(k.second.path, paths); + } + } + } + + return paths; +} + +static std::once_flag dns_resolve_flag; + +static void preloadNSS() { + /* builtin:fetchurl can trigger a DNS lookup, which with glibc can trigger a + dynamic library load of one of the glibc NSS libraries in a sandboxed + child, which will fail unless the library's already been loaded in the + parent. So we force a lookup of an invalid domain to force the NSS + machinery to + load its lookup libraries in the parent before any child gets a chance to. + */ + std::call_once(dns_resolve_flag, []() { + struct addrinfo* res = nullptr; + + if (getaddrinfo("this.pre-initializes.the.dns.resolvers.invalid.", "http", + nullptr, &res) != 0) { + if (res != nullptr) { + freeaddrinfo(res); + } + } + }); +} + +void DerivationGoal::startBuilder() { + /* Right platform? */ + if (!parsedDrv->canBuildLocally()) { + throw Error( + "a '%s' with features {%s} is required to build '%s', but I am a '%s' " + "with features {%s}", + drv->platform, + concatStringsSep(", ", parsedDrv->getRequiredSystemFeatures()), drvPath, + settings.thisSystem, concatStringsSep(", ", settings.systemFeatures)); + } + + if (drv->isBuiltin()) { + preloadNSS(); + } + +#if __APPLE__ + additionalSandboxProfile = + parsedDrv->getStringAttr("__sandboxProfile").value_or(""); +#endif + + /* Are we doing a chroot build? */ + { + auto noChroot = parsedDrv->getBoolAttr("__noChroot"); + if (settings.sandboxMode == smEnabled) { + if (noChroot) { + throw Error(format("derivation '%1%' has '__noChroot' set, " + "but that's not allowed when 'sandbox' is 'true'") % + drvPath); + } +#if __APPLE__ + if (additionalSandboxProfile != "") + throw Error( + format("derivation '%1%' specifies a sandbox profile, " + "but this is only allowed when 'sandbox' is 'relaxed'") % + drvPath); +#endif + useChroot = true; + } else if (settings.sandboxMode == smDisabled) { + useChroot = false; + } else if (settings.sandboxMode == smRelaxed) { + useChroot = !fixedOutput && !noChroot; + } + } + + if (worker.store.storeDir != worker.store.realStoreDir) { +#if __linux__ + useChroot = true; +#else + throw Error( + "building using a diverted store is not supported on this platform"); +#endif + } + + /* If `build-users-group' is not empty, then we have to build as + one of the members of that group. */ + if (settings.buildUsersGroup != "" && getuid() == 0) { +#if defined(__linux__) || defined(__APPLE__) + buildUser = std::make_unique<UserLock>(); + + /* Make sure that no other processes are executing under this + uid. */ + buildUser->kill(); +#else + /* Don't know how to block the creation of setuid/setgid + binaries on this platform. */ + throw Error( + "build users are not supported on this platform for security reasons"); +#endif + } + + /* Create a temporary directory where the build will take + place. */ + auto drvName = storePathToName(drvPath); + tmpDir = createTempDir("", "nix-build-" + drvName, false, false, 0700); + + chownToBuilder(tmpDir); + + /* Substitute output placeholders with the actual output paths. */ + for (auto& output : drv->outputs) { + inputRewrites[hashPlaceholder(output.first)] = output.second.path; + } + + /* Construct the environment passed to the builder. */ + initEnv(); + + writeStructuredAttrs(); + + /* Handle exportReferencesGraph(), if set. */ + if (!parsedDrv->getStructuredAttrs()) { + /* The `exportReferencesGraph' feature allows the references graph + to be passed to a builder. This attribute should be a list of + pairs [name1 path1 name2 path2 ...]. The references graph of + each `pathN' will be stored in a text file `nameN' in the + temporary build directory. The text files have the format used + by `nix-store --register-validity'. However, the deriver + fields are left empty. */ + string s = get(drv->env, "exportReferencesGraph"); + auto ss = tokenizeString<Strings>(s); + if (ss.size() % 2 != 0) { + throw BuildError( + format("odd number of tokens in 'exportReferencesGraph': '%1%'") % s); + } + for (auto i = ss.begin(); i != ss.end();) { + string fileName = *i++; + checkStoreName(fileName); /* !!! abuse of this function */ + Path storePath = *i++; + + /* Write closure info to <fileName>. */ + writeFile(tmpDir + "/" + fileName, + worker.store.makeValidityRegistration( + exportReferences({storePath}), false, false)); + } + } + + if (useChroot) { + /* Allow a user-configurable set of directories from the + host file system. */ + PathSet dirs = settings.sandboxPaths; + PathSet dirs2 = settings.extraSandboxPaths; + dirs.insert(dirs2.begin(), dirs2.end()); + + dirsInChroot.clear(); + + for (auto i : dirs) { + if (i.empty()) { + continue; + } + bool optional = false; + if (i[i.size() - 1] == '?') { + optional = true; + i.pop_back(); + } + size_t p = i.find('='); + if (p == string::npos) { + dirsInChroot[i] = ChrootPath(i, optional); + } else { + dirsInChroot[string(i, 0, p)] = ChrootPath(string(i, p + 1), optional); + } + } + dirsInChroot[tmpDirInSandbox] = ChrootPath(tmpDir); + + /* Add the closure of store paths to the chroot. */ + PathSet closure; + for (auto& i : dirsInChroot) { + try { + if (worker.store.isInStore(i.second.source)) { + worker.store.computeFSClosure( + worker.store.toStorePath(i.second.source), closure); + } + } catch (InvalidPath& e) { + } catch (Error& e) { + throw Error(format("while processing 'sandbox-paths': %s") % e.what()); + } + } + for (auto& i : closure) { + dirsInChroot[i] = ChrootPath(i); + } + + PathSet allowedPaths = settings.allowedImpureHostPrefixes; + + /* This works like the above, except on a per-derivation level */ + auto impurePaths = + parsedDrv->getStringsAttr("__impureHostDeps").value_or(Strings()); + + for (auto& i : impurePaths) { + bool found = false; + /* Note: we're not resolving symlinks here to prevent + giving a non-root user info about inaccessible + files. */ + Path canonI = canonPath(i); + /* If only we had a trie to do this more efficiently :) luckily, these are + * generally going to be pretty small */ + for (auto& a : allowedPaths) { + Path canonA = canonPath(a); + if (canonI == canonA || isInDir(canonI, canonA)) { + found = true; + break; + } + } + if (!found) { + throw Error(format("derivation '%1%' requested impure path '%2%', but " + "it was not in allowed-impure-host-deps") % + drvPath % i); + } + + dirsInChroot[i] = ChrootPath(i); + } + +#if __linux__ + /* Create a temporary directory in which we set up the chroot + environment using bind-mounts. We put it in the Nix store + to ensure that we can create hard-links to non-directory + inputs in the fake Nix store in the chroot (see below). */ + chrootRootDir = worker.store.toRealPath(drvPath) + ".chroot"; + deletePath(chrootRootDir); + + /* Clean up the chroot directory automatically. */ + autoDelChroot = std::make_shared<AutoDelete>(chrootRootDir); + + DLOG(INFO) << "setting up chroot environment in '" << chrootRootDir << "'"; + + if (mkdir(chrootRootDir.c_str(), 0750) == -1) { + throw SysError(format("cannot create '%1%'") % chrootRootDir); + } + + if (buildUser && + chown(chrootRootDir.c_str(), 0, buildUser->getGID()) == -1) { + throw SysError(format("cannot change ownership of '%1%'") % + chrootRootDir); + } + + /* Create a writable /tmp in the chroot. Many builders need + this. (Of course they should really respect $TMPDIR + instead.) */ + Path chrootTmpDir = chrootRootDir + "/tmp"; + createDirs(chrootTmpDir); + chmod_(chrootTmpDir, 01777); + + /* Create a /etc/passwd with entries for the build user and the + nobody account. The latter is kind of a hack to support + Samba-in-QEMU. */ + createDirs(chrootRootDir + "/etc"); + + writeFile(chrootRootDir + "/etc/passwd", + fmt("root:x:0:0:Nix build user:%3%:/noshell\n" + "nixbld:x:%1%:%2%:Nix build user:%3%:/noshell\n" + "nobody:x:65534:65534:Nobody:/:/noshell\n", + sandboxUid, sandboxGid, settings.sandboxBuildDir)); + + /* Declare the build user's group so that programs get a consistent + view of the system (e.g., "id -gn"). */ + writeFile(chrootRootDir + "/etc/group", (format("root:x:0:\n" + "nixbld:!:%1%:\n" + "nogroup:x:65534:\n") % + sandboxGid) + .str()); + + /* Create /etc/hosts with localhost entry. */ + if (!fixedOutput) { + writeFile(chrootRootDir + "/etc/hosts", + "127.0.0.1 localhost\n::1 localhost\n"); + } + + /* Make the closure of the inputs available in the chroot, + rather than the whole Nix store. This prevents any access + to undeclared dependencies. Directories are bind-mounted, + while other inputs are hard-linked (since only directories + can be bind-mounted). !!! As an extra security + precaution, make the fake Nix store only writable by the + build user. */ + Path chrootStoreDir = chrootRootDir + worker.store.storeDir; + createDirs(chrootStoreDir); + chmod_(chrootStoreDir, 01775); + + if (buildUser && + chown(chrootStoreDir.c_str(), 0, buildUser->getGID()) == -1) { + throw SysError(format("cannot change ownership of '%1%'") % + chrootStoreDir); + } + + for (auto& i : inputPaths) { + Path r = worker.store.toRealPath(i); + struct stat st; + if (lstat(r.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % i); + } + if (S_ISDIR(st.st_mode)) { + dirsInChroot[i] = ChrootPath(r); + } else { + Path p = chrootRootDir + i; + DLOG(INFO) << "linking '" << p << "' to '" << r << "'"; + if (link(r.c_str(), p.c_str()) == -1) { + /* Hard-linking fails if we exceed the maximum + link count on a file (e.g. 32000 of ext3), + which is quite possible after a `nix-store + --optimise'. */ + if (errno != EMLINK) { + throw SysError(format("linking '%1%' to '%2%'") % p % i); + } + StringSink sink; + dumpPath(r, sink); + StringSource source(*sink.s); + restorePath(p, source); + } + } + } + + /* If we're repairing, checking or rebuilding part of a + multiple-outputs derivation, it's possible that we're + rebuilding a path that is in settings.dirsInChroot + (typically the dependencies of /bin/sh). Throw them + out. */ + for (auto& i : drv->outputs) { + dirsInChroot.erase(i.second.path); + } + +#elif __APPLE__ + /* We don't really have any parent prep work to do (yet?) + All work happens in the child, instead. */ +#else + throw Error("sandboxing builds is not supported on this platform"); +#endif + } + + if (needsHashRewrite()) { + if (pathExists(homeDir)) { + throw Error(format("directory '%1%' exists; please remove it") % homeDir); + } + + /* We're not doing a chroot build, but we have some valid + output paths. Since we can't just overwrite or delete + them, we have to do hash rewriting: i.e. in the + environment/arguments passed to the build, we replace the + hashes of the valid outputs with unique dummy strings; + after the build, we discard the redirected outputs + corresponding to the valid outputs, and rewrite the + contents of the new outputs to replace the dummy strings + with the actual hashes. */ + if (!validPaths.empty()) { + for (auto& i : validPaths) { + addHashRewrite(i); + } + } + + /* If we're repairing, then we don't want to delete the + corrupt outputs in advance. So rewrite them as well. */ + if (buildMode == bmRepair) { + for (auto& i : missingPaths) { + if (worker.store.isValidPath(i) && pathExists(i)) { + addHashRewrite(i); + redirectedBadOutputs.insert(i); + } + } + } + } + + if (useChroot && settings.preBuildHook != "" && + (dynamic_cast<Derivation*>(drv.get()) != nullptr)) { + DLOG(INFO) << "executing pre-build hook '" << settings.preBuildHook << "'"; + auto args = + useChroot ? Strings({drvPath, chrootRootDir}) : Strings({drvPath}); + enum BuildHookState { stBegin, stExtraChrootDirs }; + auto state = stBegin; + auto lines = runProgram(settings.preBuildHook, false, args); + auto lastPos = std::string::size_type{0}; + for (auto nlPos = lines.find('\n'); nlPos != string::npos; + nlPos = lines.find('\n', lastPos)) { + auto line = std::string{lines, lastPos, nlPos - lastPos}; + lastPos = nlPos + 1; + if (state == stBegin) { + if (line == "extra-sandbox-paths" || line == "extra-chroot-dirs") { + state = stExtraChrootDirs; + } else { + throw Error(format("unknown pre-build hook command '%1%'") % line); + } + } else if (state == stExtraChrootDirs) { + if (line.empty()) { + state = stBegin; + } else { + auto p = line.find('='); + if (p == string::npos) { + dirsInChroot[line] = ChrootPath(line); + } else { + dirsInChroot[string(line, 0, p)] = ChrootPath(string(line, p + 1)); + } + } + } + } + } + + /* Run the builder. */ + DLOG(INFO) << "executing builder '" << drv->builder << "'"; + + /* Create the log file. */ + Path logFile = openLogFile(); + + /* Create a pipe to get the output of the builder. */ + // builderOut.create(); + + builderOut.readSide = posix_openpt(O_RDWR | O_NOCTTY); + if (!builderOut.readSide) { + throw SysError("opening pseudoterminal master"); + } + + std::string slaveName(ptsname(builderOut.readSide.get())); + + if (buildUser) { + if (chmod(slaveName.c_str(), 0600) != 0) { + throw SysError("changing mode of pseudoterminal slave"); + } + + if (chown(slaveName.c_str(), buildUser->getUID(), 0) != 0) { + throw SysError("changing owner of pseudoterminal slave"); + } + } else { + if (grantpt(builderOut.readSide.get()) != 0) { + throw SysError("granting access to pseudoterminal slave"); + } + } + +#if 0 + // Mount the pt in the sandbox so that the "tty" command works. + // FIXME: this doesn't work with the new devpts in the sandbox. + if (useChroot) + dirsInChroot[slaveName] = {slaveName, false}; +#endif + + if (unlockpt(builderOut.readSide.get()) != 0) { + throw SysError("unlocking pseudoterminal"); + } + + builderOut.writeSide = open(slaveName.c_str(), O_RDWR | O_NOCTTY); + if (!builderOut.writeSide) { + throw SysError("opening pseudoterminal slave"); + } + + // Put the pt into raw mode to prevent \n -> \r\n translation. + struct termios term; + if (tcgetattr(builderOut.writeSide.get(), &term) != 0) { + throw SysError("getting pseudoterminal attributes"); + } + + cfmakeraw(&term); + + if (tcsetattr(builderOut.writeSide.get(), TCSANOW, &term) != 0) { + throw SysError("putting pseudoterminal into raw mode"); + } + + result.startTime = time(nullptr); + + /* Fork a child to build the package. */ + ProcessOptions options; + +#if __linux__ + if (useChroot) { + /* Set up private namespaces for the build: + + - The PID namespace causes the build to start as PID 1. + Processes outside of the chroot are not visible to those + on the inside, but processes inside the chroot are + visible from the outside (though with different PIDs). + + - The private mount namespace ensures that all the bind + mounts we do will only show up in this process and its + children, and will disappear automatically when we're + done. + + - The private network namespace ensures that the builder + cannot talk to the outside world (or vice versa). It + only has a private loopback interface. (Fixed-output + derivations are not run in a private network namespace + to allow functions like fetchurl to work.) + + - The IPC namespace prevents the builder from communicating + with outside processes using SysV IPC mechanisms (shared + memory, message queues, semaphores). It also ensures + that all IPC objects are destroyed when the builder + exits. + + - The UTS namespace ensures that builders see a hostname of + localhost rather than the actual hostname. + + We use a helper process to do the clone() to work around + clone() being broken in multi-threaded programs due to + at-fork handlers not being run. Note that we use + CLONE_PARENT to ensure that the real builder is parented to + us. + */ + + if (!fixedOutput) { + privateNetwork = true; + } + + userNamespaceSync.create(); + + options.allowVfork = false; + + Pid helper = startProcess( + [&]() { + /* Drop additional groups here because we can't do it + after we've created the new user namespace. FIXME: + this means that if we're not root in the parent + namespace, we can't drop additional groups; they will + be mapped to nogroup in the child namespace. There does + not seem to be a workaround for this. (But who can tell + from reading user_namespaces(7)?) + See also https://lwn.net/Articles/621612/. */ + if (getuid() == 0 && setgroups(0, nullptr) == -1) { + throw SysError("setgroups failed"); + } + + size_t stackSize = 1 * 1024 * 1024; + char* stack = + (char*)mmap(nullptr, stackSize, PROT_WRITE | PROT_READ, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (stack == MAP_FAILED) { + throw SysError("allocating stack"); + } + + int flags = CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS | + CLONE_NEWIPC | CLONE_NEWUTS | CLONE_PARENT | SIGCHLD; + if (privateNetwork) { + flags |= CLONE_NEWNET; + } + + pid_t child = clone(childEntry, stack + stackSize, flags, this); + if (child == -1 && errno == EINVAL) { + /* Fallback for Linux < 2.13 where CLONE_NEWPID and + CLONE_PARENT are not allowed together. */ + flags &= ~CLONE_NEWPID; + child = clone(childEntry, stack + stackSize, flags, this); + } + if (child == -1 && (errno == EPERM || errno == EINVAL)) { + /* Some distros patch Linux to not allow unpriveleged + * user namespaces. If we get EPERM or EINVAL, try + * without CLONE_NEWUSER and see if that works. + */ + flags &= ~CLONE_NEWUSER; + child = clone(childEntry, stack + stackSize, flags, this); + } + /* Otherwise exit with EPERM so we can handle this in the + parent. This is only done when sandbox-fallback is set + to true (the default). */ + if (child == -1 && (errno == EPERM || errno == EINVAL) && + settings.sandboxFallback) { + _exit(1); + } + if (child == -1) { + throw SysError("cloning builder process"); + } + + writeFull(builderOut.writeSide.get(), std::to_string(child) + "\n"); + _exit(0); + }, + options); + + int res = helper.wait(); + if (res != 0 && settings.sandboxFallback) { + useChroot = false; + initTmpDir(); + goto fallback; + } else if (res != 0) { + throw Error("unable to start build process"); + } + + userNamespaceSync.readSide = -1; + + pid_t tmp; + if (!string2Int<pid_t>(readLine(builderOut.readSide.get()), tmp)) { + abort(); + } + pid = tmp; + + /* Set the UID/GID mapping of the builder's user namespace + such that the sandbox user maps to the build user, or to + the calling user (if build users are disabled). */ + uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); + uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); + + writeFile("/proc/" + std::to_string(pid) + "/uid_map", + (format("%d %d 1") % sandboxUid % hostUid).str()); + + writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); + + writeFile("/proc/" + std::to_string(pid) + "/gid_map", + (format("%d %d 1") % sandboxGid % hostGid).str()); + + /* Signal the builder that we've updated its user + namespace. */ + writeFull(userNamespaceSync.writeSide.get(), "1"); + userNamespaceSync.writeSide = -1; + + } else +#endif + { + fallback: + options.allowVfork = !buildUser && !drv->isBuiltin(); + pid = startProcess([&]() { runChild(); }, options); + } + + /* parent */ + pid.setSeparatePG(true); + builderOut.writeSide = -1; + worker.childStarted(shared_from_this(), {builderOut.readSide.get()}, true, + true); + + /* Check if setting up the build environment failed. */ + while (true) { + string msg = readLine(builderOut.readSide.get()); + if (string(msg, 0, 1) == "\1") { + if (msg.size() == 1) { + break; + } + throw Error(string(msg, 1)); + } + DLOG(INFO) << msg; + } +} + +void DerivationGoal::initTmpDir() { + /* In a sandbox, for determinism, always use the same temporary + directory. */ +#if __linux__ + tmpDirInSandbox = useChroot ? settings.sandboxBuildDir : tmpDir; +#else + tmpDirInSandbox = tmpDir; +#endif + + /* In non-structured mode, add all bindings specified in the + derivation via the environment, except those listed in the + passAsFile attribute. Those are passed as file names pointing + to temporary files containing the contents. Note that + passAsFile is ignored in structure mode because it's not + needed (attributes are not passed through the environment, so + there is no size constraint). */ + if (!parsedDrv->getStructuredAttrs()) { + auto passAsFile = tokenizeString<StringSet>(get(drv->env, "passAsFile")); + int fileNr = 0; + for (auto& i : drv->env) { + if (passAsFile.find(i.first) == passAsFile.end()) { + env[i.first] = i.second; + } else { + string fn = ".attr-" + std::to_string(fileNr++); + Path p = tmpDir + "/" + fn; + writeFile(p, rewriteStrings(i.second, inputRewrites)); + chownToBuilder(p); + env[i.first + "Path"] = tmpDirInSandbox + "/" + fn; + } + } + } + + /* For convenience, set an environment pointing to the top build + directory. */ + env["NIX_BUILD_TOP"] = tmpDirInSandbox; + + /* Also set TMPDIR and variants to point to this directory. */ + env["TMPDIR"] = env["TEMPDIR"] = env["TMP"] = env["TEMP"] = tmpDirInSandbox; + + /* Explicitly set PWD to prevent problems with chroot builds. In + particular, dietlibc cannot figure out the cwd because the + inode of the current directory doesn't appear in .. (because + getdents returns the inode of the mount point). */ + env["PWD"] = tmpDirInSandbox; +} + +void DerivationGoal::initEnv() { + env.clear(); + + /* Most shells initialise PATH to some default (/bin:/usr/bin:...) when + PATH is not set. We don't want this, so we fill it in with some dummy + value. */ + env["PATH"] = "/path-not-set"; + + /* Set HOME to a non-existing path to prevent certain programs from using + /etc/passwd (or NIS, or whatever) to locate the home directory (for + example, wget looks for ~/.wgetrc). I.e., these tools use /etc/passwd + if HOME is not set, but they will just assume that the settings file + they are looking for does not exist if HOME is set but points to some + non-existing path. */ + env["HOME"] = homeDir; + + /* Tell the builder where the Nix store is. Usually they + shouldn't care, but this is useful for purity checking (e.g., + the compiler or linker might only want to accept paths to files + in the store or in the build directory). */ + env["NIX_STORE"] = worker.store.storeDir; + + /* The maximum number of cores to utilize for parallel building. */ + env["NIX_BUILD_CORES"] = (format("%d") % settings.buildCores).str(); + + initTmpDir(); + + /* Compatibility hack with Nix <= 0.7: if this is a fixed-output + derivation, tell the builder, so that for instance `fetchurl' + can skip checking the output. On older Nixes, this environment + variable won't be set, so `fetchurl' will do the check. */ + if (fixedOutput) { + env["NIX_OUTPUT_CHECKED"] = "1"; + } + + /* *Only* if this is a fixed-output derivation, propagate the + values of the environment variables specified in the + `impureEnvVars' attribute to the builder. This allows for + instance environment variables for proxy configuration such as + `http_proxy' to be easily passed to downloaders like + `fetchurl'. Passing such environment variables from the caller + to the builder is generally impure, but the output of + fixed-output derivations is by definition pure (since we + already know the cryptographic hash of the output). */ + if (fixedOutput) { + for (auto& i : + parsedDrv->getStringsAttr("impureEnvVars").value_or(Strings())) { + env[i] = getEnv(i); + } + } + + /* Currently structured log messages piggyback on stderr, but we + may change that in the future. So tell the builder which file + descriptor to use for that. */ + env["NIX_LOG_FD"] = "2"; + + /* Trigger colored output in various tools. */ + env["TERM"] = "xterm-256color"; +} + +static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*"); + +void DerivationGoal::writeStructuredAttrs() { + auto& structuredAttrs = parsedDrv->getStructuredAttrs(); + if (!structuredAttrs) { + return; + } + + auto json = *structuredAttrs; + + /* Add an "outputs" object containing the output paths. */ + nlohmann::json outputs; + for (auto& i : drv->outputs) { + outputs[i.first] = rewriteStrings(i.second.path, inputRewrites); + } + json["outputs"] = outputs; + + /* Handle exportReferencesGraph. */ + auto e = json.find("exportReferencesGraph"); + if (e != json.end() && e->is_object()) { + for (auto i = e->begin(); i != e->end(); ++i) { + std::ostringstream str; + { + JSONPlaceholder jsonRoot(str, true); + PathSet storePaths; + for (auto& p : *i) { + storePaths.insert(p.get<std::string>()); + } + worker.store.pathInfoToJSON(jsonRoot, exportReferences(storePaths), + false, true); + } + json[i.key()] = nlohmann::json::parse(str.str()); // urgh + } + } + + writeFile(tmpDir + "/.attrs.json", + rewriteStrings(json.dump(), inputRewrites)); + chownToBuilder(tmpDir + "/.attrs.json"); + + /* As a convenience to bash scripts, write a shell file that + maps all attributes that are representable in bash - + namely, strings, integers, nulls, Booleans, and arrays and + objects consisting entirely of those values. (So nested + arrays or objects are not supported.) */ + + auto handleSimpleType = + [](const nlohmann::json& value) -> std::optional<std::string> { + if (value.is_string()) { + return shellEscape(value); + } + + if (value.is_number()) { + auto f = value.get<float>(); + if (std::ceil(f) == f) { + return std::to_string(value.get<int>()); + } + } + + if (value.is_null()) { + return std::string("''"); + } + + if (value.is_boolean()) { + return value.get<bool>() ? std::string("1") : std::string(""); + } + + return {}; + }; + + std::string jsonSh; + + for (auto i = json.begin(); i != json.end(); ++i) { + if (!std::regex_match(i.key(), shVarName)) { + continue; + } + + auto& value = i.value(); + + auto s = handleSimpleType(value); + if (s) { + jsonSh += fmt("declare %s=%s\n", i.key(), *s); + + } else if (value.is_array()) { + std::string s2; + bool good = true; + + for (auto i = value.begin(); i != value.end(); ++i) { + auto s3 = handleSimpleType(i.value()); + if (!s3) { + good = false; + break; + } + s2 += *s3; + s2 += ' '; + } + + if (good) { + jsonSh += fmt("declare -a %s=(%s)\n", i.key(), s2); + } + } + + else if (value.is_object()) { + std::string s2; + bool good = true; + + for (auto i = value.begin(); i != value.end(); ++i) { + auto s3 = handleSimpleType(i.value()); + if (!s3) { + good = false; + break; + } + s2 += fmt("[%s]=%s ", shellEscape(i.key()), *s3); + } + + if (good) { + jsonSh += fmt("declare -A %s=(%s)\n", i.key(), s2); + } + } + } + + writeFile(tmpDir + "/.attrs.sh", rewriteStrings(jsonSh, inputRewrites)); + chownToBuilder(tmpDir + "/.attrs.sh"); +} + +void DerivationGoal::chownToBuilder(const Path& path) { + if (!buildUser) { + return; + } + if (chown(path.c_str(), buildUser->getUID(), buildUser->getGID()) == -1) { + throw SysError(format("cannot change ownership of '%1%'") % path); + } +} + +void setupSeccomp() { +#if __linux__ + if (!settings.filterSyscalls) { + return; + } +#if HAVE_SECCOMP + scmp_filter_ctx ctx; + + if ((ctx = seccomp_init(SCMP_ACT_ALLOW)) == nullptr) { + throw SysError("unable to initialize seccomp mode 2"); + } + + Finally cleanup([&]() { seccomp_release(ctx); }); + + if (nativeSystem == "x86_64-linux" && + seccomp_arch_add(ctx, SCMP_ARCH_X86) != 0) { + throw SysError("unable to add 32-bit seccomp architecture"); + } + + if (nativeSystem == "x86_64-linux" && + seccomp_arch_add(ctx, SCMP_ARCH_X32) != 0) { + throw SysError("unable to add X32 seccomp architecture"); + } + + if (nativeSystem == "aarch64-linux" && + seccomp_arch_add(ctx, SCMP_ARCH_ARM) != 0) { + LOG(ERROR) << "unable to add ARM seccomp architecture; this may result in " + << "spurious build failures if running 32-bit ARM processes"; + } + + /* Prevent builders from creating setuid/setgid binaries. */ + for (int perm : {S_ISUID, S_ISGID}) { + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t)perm, + (scmp_datum_t)perm)) != 0) { + throw SysError("unable to add seccomp rule"); + } + + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1, + SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t)perm, + (scmp_datum_t)perm)) != 0) { + throw SysError("unable to add seccomp rule"); + } + + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1, + SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t)perm, + (scmp_datum_t)perm)) != 0) { + throw SysError("unable to add seccomp rule"); + } + } + + /* Prevent builders from creating EAs or ACLs. Not all filesystems + support these, and they're not allowed in the Nix store because + they're not representable in the NAR serialisation. */ + if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != + 0 || + seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != + 0 || + seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != + 0) { + throw SysError("unable to add seccomp rule"); + } + + if (seccomp_attr_set(ctx, SCMP_FLTATR_CTL_NNP, + settings.allowNewPrivileges ? 0 : 1) != 0) { + throw SysError("unable to set 'no new privileges' seccomp attribute"); + } + + if (seccomp_load(ctx) != 0) { + throw SysError("unable to load seccomp BPF program"); + } +#else + throw Error( + "seccomp is not supported on this platform; " + "you can bypass this error by setting the option 'filter-syscalls' to " + "false, but note that untrusted builds can then create setuid binaries!"); +#endif +#endif +} + +void DerivationGoal::runChild() { + /* Warning: in the child we should absolutely not make any SQLite + calls! */ + + try { /* child */ + + commonChildInit(builderOut); + + try { + setupSeccomp(); + } catch (...) { + if (buildUser) { + throw; + } + } + + bool setUser = true; + + /* Make the contents of netrc available to builtin:fetchurl + (which may run under a different uid and/or in a sandbox). */ + std::string netrcData; + try { + if (drv->isBuiltin() && drv->builder == "builtin:fetchurl") { + netrcData = readFile(settings.netrcFile); + } + } catch (SysError&) { + } + +#if __linux__ + if (useChroot) { + userNamespaceSync.writeSide = -1; + + if (drainFD(userNamespaceSync.readSide.get()) != "1") { + throw Error("user namespace initialisation failed"); + } + + userNamespaceSync.readSide = -1; + + if (privateNetwork) { + /* Initialise the loopback interface. */ + AutoCloseFD fd(socket(PF_INET, SOCK_DGRAM, IPPROTO_IP)); + if (!fd) { + throw SysError("cannot open IP socket"); + } + + struct ifreq ifr; + strcpy(ifr.ifr_name, "lo"); + ifr.ifr_flags = IFF_UP | IFF_LOOPBACK | IFF_RUNNING; + if (ioctl(fd.get(), SIOCSIFFLAGS, &ifr) == -1) { + throw SysError("cannot set loopback interface flags"); + } + } + + /* Set the hostname etc. to fixed values. */ + char hostname[] = "localhost"; + if (sethostname(hostname, sizeof(hostname)) == -1) { + throw SysError("cannot set host name"); + } + char domainname[] = "(none)"; // kernel default + if (setdomainname(domainname, sizeof(domainname)) == -1) { + throw SysError("cannot set domain name"); + } + + /* Make all filesystems private. This is necessary + because subtrees may have been mounted as "shared" + (MS_SHARED). (Systemd does this, for instance.) Even + though we have a private mount namespace, mounting + filesystems on top of a shared subtree still propagates + outside of the namespace. Making a subtree private is + local to the namespace, though, so setting MS_PRIVATE + does not affect the outside world. */ + if (mount(nullptr, "/", nullptr, MS_REC | MS_PRIVATE, nullptr) == -1) { + throw SysError("unable to make '/' private mount"); + } + + /* Bind-mount chroot directory to itself, to treat it as a + different filesystem from /, as needed for pivot_root. */ + if (mount(chrootRootDir.c_str(), chrootRootDir.c_str(), nullptr, MS_BIND, + nullptr) == -1) { + throw SysError(format("unable to bind mount '%1%'") % chrootRootDir); + } + + /* Set up a nearly empty /dev, unless the user asked to + bind-mount the host /dev. */ + Strings ss; + if (dirsInChroot.find("/dev") == dirsInChroot.end()) { + createDirs(chrootRootDir + "/dev/shm"); + createDirs(chrootRootDir + "/dev/pts"); + ss.push_back("/dev/full"); + if ((settings.systemFeatures.get().count("kvm") != 0u) && + pathExists("/dev/kvm")) { + ss.push_back("/dev/kvm"); + } + ss.push_back("/dev/null"); + ss.push_back("/dev/random"); + ss.push_back("/dev/tty"); + ss.push_back("/dev/urandom"); + ss.push_back("/dev/zero"); + createSymlink("/proc/self/fd", chrootRootDir + "/dev/fd"); + createSymlink("/proc/self/fd/0", chrootRootDir + "/dev/stdin"); + createSymlink("/proc/self/fd/1", chrootRootDir + "/dev/stdout"); + createSymlink("/proc/self/fd/2", chrootRootDir + "/dev/stderr"); + } + + /* Fixed-output derivations typically need to access the + network, so give them access to /etc/resolv.conf and so + on. */ + if (fixedOutput) { + ss.push_back("/etc/resolv.conf"); + + // Only use nss functions to resolve hosts and + // services. Donโt use it for anything else that may + // be configured for this system. This limits the + // potential impurities introduced in fixed outputs. + writeFile(chrootRootDir + "/etc/nsswitch.conf", + "hosts: files dns\nservices: files\n"); + + ss.push_back("/etc/services"); + ss.push_back("/etc/hosts"); + if (pathExists("/var/run/nscd/socket")) { + ss.push_back("/var/run/nscd/socket"); + } + } + + for (auto& i : ss) { + dirsInChroot.emplace(i, i); + } + + /* Bind-mount all the directories from the "host" + filesystem that we want in the chroot + environment. */ + auto doBind = [&](const Path& source, const Path& target, + bool optional = false) { + DLOG(INFO) << "bind mounting '" << source << "' to '" << target << "'"; + struct stat st; + if (stat(source.c_str(), &st) == -1) { + if (optional && errno == ENOENT) { + return; + } + throw SysError("getting attributes of path '%1%'", source); + } + if (S_ISDIR(st.st_mode)) { + createDirs(target); + } else { + createDirs(dirOf(target)); + writeFile(target, ""); + } + if (mount(source.c_str(), target.c_str(), "", MS_BIND | MS_REC, + nullptr) == -1) { + throw SysError("bind mount from '%1%' to '%2%' failed", source, + target); + } + }; + + for (auto& i : dirsInChroot) { + if (i.second.source == "/proc") { + continue; + } // backwards compatibility + doBind(i.second.source, chrootRootDir + i.first, i.second.optional); + } + + /* Bind a new instance of procfs on /proc. */ + createDirs(chrootRootDir + "/proc"); + if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, + nullptr) == -1) { + throw SysError("mounting /proc"); + } + + /* Mount a new tmpfs on /dev/shm to ensure that whatever + the builder puts in /dev/shm is cleaned up automatically. */ + if (pathExists("/dev/shm") && + mount("none", (chrootRootDir + "/dev/shm").c_str(), "tmpfs", 0, + fmt("size=%s", settings.sandboxShmSize).c_str()) == -1) { + throw SysError("mounting /dev/shm"); + } + + /* Mount a new devpts on /dev/pts. Note that this + requires the kernel to be compiled with + CONFIG_DEVPTS_MULTIPLE_INSTANCES=y (which is the case + if /dev/ptx/ptmx exists). */ + if (pathExists("/dev/pts/ptmx") && + !pathExists(chrootRootDir + "/dev/ptmx") && + (dirsInChroot.count("/dev/pts") == 0u)) { + if (mount("none", (chrootRootDir + "/dev/pts").c_str(), "devpts", 0, + "newinstance,mode=0620") == 0) { + createSymlink("/dev/pts/ptmx", chrootRootDir + "/dev/ptmx"); + + /* Make sure /dev/pts/ptmx is world-writable. With some + Linux versions, it is created with permissions 0. */ + chmod_(chrootRootDir + "/dev/pts/ptmx", 0666); + } else { + if (errno != EINVAL) { + throw SysError("mounting /dev/pts"); + } + doBind("/dev/pts", chrootRootDir + "/dev/pts"); + doBind("/dev/ptmx", chrootRootDir + "/dev/ptmx"); + } + } + + /* Do the chroot(). */ + if (chdir(chrootRootDir.c_str()) == -1) { + throw SysError(format("cannot change directory to '%1%'") % + chrootRootDir); + } + + if (mkdir("real-root", 0) == -1) { + throw SysError("cannot create real-root directory"); + } + + if (pivot_root(".", "real-root") == -1) { + throw SysError(format("cannot pivot old root directory onto '%1%'") % + (chrootRootDir + "/real-root")); + } + + if (chroot(".") == -1) { + throw SysError(format("cannot change root directory to '%1%'") % + chrootRootDir); + } + + if (umount2("real-root", MNT_DETACH) == -1) { + throw SysError("cannot unmount real root filesystem"); + } + + if (rmdir("real-root") == -1) { + throw SysError("cannot remove real-root directory"); + } + + /* Switch to the sandbox uid/gid in the user namespace, + which corresponds to the build user or calling user in + the parent namespace. */ + if (setgid(sandboxGid) == -1) { + throw SysError("setgid failed"); + } + if (setuid(sandboxUid) == -1) { + throw SysError("setuid failed"); + } + + setUser = false; + } +#endif + + if (chdir(tmpDirInSandbox.c_str()) == -1) { + throw SysError(format("changing into '%1%'") % tmpDir); + } + + /* Close all other file descriptors. */ + closeMostFDs({STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO}); + +#if __linux__ + /* Change the personality to 32-bit if we're doing an + i686-linux build on an x86_64-linux machine. */ + struct utsname utsbuf; + uname(&utsbuf); + if (drv->platform == "i686-linux" && + (settings.thisSystem == "x86_64-linux" || + ((strcmp(utsbuf.sysname, "Linux") == 0) && + (strcmp(utsbuf.machine, "x86_64") == 0)))) { + if (personality(PER_LINUX32) == -1) { + throw SysError("cannot set i686-linux personality"); + } + } + + /* Impersonate a Linux 2.6 machine to get some determinism in + builds that depend on the kernel version. */ + if ((drv->platform == "i686-linux" || drv->platform == "x86_64-linux") && + settings.impersonateLinux26) { + int cur = personality(0xffffffff); + if (cur != -1) { + personality(cur | 0x0020000 /* == UNAME26 */); + } + } + + /* Disable address space randomization for improved + determinism. */ + int cur = personality(0xffffffff); + if (cur != -1) { + personality(cur | ADDR_NO_RANDOMIZE); + } +#endif + + /* Disable core dumps by default. */ + struct rlimit limit = {0, RLIM_INFINITY}; + setrlimit(RLIMIT_CORE, &limit); + + // FIXME: set other limits to deterministic values? + + /* Fill in the environment. */ + Strings envStrs; + for (auto& i : env) { + envStrs.push_back( + rewriteStrings(i.first + "=" + i.second, inputRewrites)); + } + + /* If we are running in `build-users' mode, then switch to the + user we allocated above. Make sure that we drop all root + privileges. Note that above we have closed all file + descriptors except std*, so that's safe. Also note that + setuid() when run as root sets the real, effective and + saved UIDs. */ + if (setUser && buildUser) { + /* Preserve supplementary groups of the build user, to allow + admins to specify groups such as "kvm". */ + if (!buildUser->getSupplementaryGIDs().empty() && + setgroups(buildUser->getSupplementaryGIDs().size(), + buildUser->getSupplementaryGIDs().data()) == -1) { + throw SysError("cannot set supplementary groups of build user"); + } + + if (setgid(buildUser->getGID()) == -1 || + getgid() != buildUser->getGID() || getegid() != buildUser->getGID()) { + throw SysError("setgid failed"); + } + + if (setuid(buildUser->getUID()) == -1 || + getuid() != buildUser->getUID() || geteuid() != buildUser->getUID()) { + throw SysError("setuid failed"); + } + } + + /* Fill in the arguments. */ + Strings args; + + const char* builder = "invalid"; + + if (drv->isBuiltin()) { + ; + } +#if __APPLE__ + else if (getEnv("_NIX_TEST_NO_SANDBOX") == "") { + /* This has to appear before import statements. */ + std::string sandboxProfile = "(version 1)\n"; + + if (useChroot) { + /* Lots and lots and lots of file functions freak out if they can't stat + * their full ancestry */ + PathSet ancestry; + + /* We build the ancestry before adding all inputPaths to the store + because we know they'll all have the same parents (the store), and + there might be lots of inputs. This isn't + particularly efficient... I doubt it'll be a bottleneck in practice + */ + for (auto& i : dirsInChroot) { + Path cur = i.first; + while (cur.compare("/") != 0) { + cur = dirOf(cur); + ancestry.insert(cur); + } + } + + /* And we want the store in there regardless of how empty dirsInChroot. + We include the innermost path component this time, since it's + typically /nix/store and we care about that. */ + Path cur = worker.store.storeDir; + while (cur.compare("/") != 0) { + ancestry.insert(cur); + cur = dirOf(cur); + } + + /* Add all our input paths to the chroot */ + for (auto& i : inputPaths) { + dirsInChroot[i] = i; + } + + /* Violations will go to the syslog if you set this. Unfortunately the + * destination does not appear to be configurable */ + if (settings.darwinLogSandboxViolations) { + sandboxProfile += "(deny default)\n"; + } else { + sandboxProfile += "(deny default (with no-log))\n"; + } + + sandboxProfile += "(import \"sandbox-defaults.sb\")\n"; + + if (fixedOutput) { + sandboxProfile += "(import \"sandbox-network.sb\")\n"; + } + + /* Our rwx outputs */ + sandboxProfile += "(allow file-read* file-write* process-exec\n"; + for (auto& i : missingPaths) { + sandboxProfile += (format("\t(subpath \"%1%\")\n") % i.c_str()).str(); + } + /* Also add redirected outputs to the chroot */ + for (auto& i : redirectedOutputs) { + sandboxProfile += + (format("\t(subpath \"%1%\")\n") % i.second.c_str()).str(); + } + sandboxProfile += ")\n"; + + /* Our inputs (transitive dependencies and any impurities computed + above) + + without file-write* allowed, access() incorrectly returns EPERM + */ + sandboxProfile += "(allow file-read* file-write* process-exec\n"; + for (auto& i : dirsInChroot) { + if (i.first != i.second.source) + throw Error(format("can't map '%1%' to '%2%': mismatched impure " + "paths not supported on Darwin") % + i.first % i.second.source); + + string path = i.first; + struct stat st; + if (lstat(path.c_str(), &st)) { + if (i.second.optional && errno == ENOENT) { + continue; + } + throw SysError(format("getting attributes of path '%1%'") % path); + } + if (S_ISDIR(st.st_mode)) + sandboxProfile += (format("\t(subpath \"%1%\")\n") % path).str(); + else + sandboxProfile += (format("\t(literal \"%1%\")\n") % path).str(); + } + sandboxProfile += ")\n"; + + /* Allow file-read* on full directory hierarchy to self. Allows + * realpath() */ + sandboxProfile += "(allow file-read*\n"; + for (auto& i : ancestry) { + sandboxProfile += (format("\t(literal \"%1%\")\n") % i.c_str()).str(); + } + sandboxProfile += ")\n"; + + sandboxProfile += additionalSandboxProfile; + } else + sandboxProfile += "(import \"sandbox-minimal.sb\")\n"; + + debug("Generated sandbox profile:"); + debug(sandboxProfile); + + Path sandboxFile = tmpDir + "/.sandbox.sb"; + + writeFile(sandboxFile, sandboxProfile); + + bool allowLocalNetworking = + parsedDrv->getBoolAttr("__darwinAllowLocalNetworking"); + + /* The tmpDir in scope points at the temporary build directory for our + derivation. Some packages try different mechanisms to find temporary + directories, so we want to open up a broader place for them to dump + their files, if needed. */ + Path globalTmpDir = canonPath(getEnv("TMPDIR", "/tmp"), true); + + /* They don't like trailing slashes on subpath directives */ + if (globalTmpDir.back() == '/') { + globalTmpDir.pop_back(); + } + + builder = "/usr/bin/sandbox-exec"; + args.push_back("sandbox-exec"); + args.push_back("-f"); + args.push_back(sandboxFile); + args.push_back("-D"); + args.push_back("_GLOBAL_TMP_DIR=" + globalTmpDir); + args.push_back("-D"); + args.push_back("IMPORT_DIR=" + settings.nixDataDir + "/nix/sandbox/"); + if (allowLocalNetworking) { + args.push_back("-D"); + args.push_back(string("_ALLOW_LOCAL_NETWORKING=1")); + } + args.push_back(drv->builder); + } +#endif + else { + builder = drv->builder.c_str(); + string builderBasename = baseNameOf(drv->builder); + args.push_back(builderBasename); + } + + for (auto& i : drv->args) { + args.push_back(rewriteStrings(i, inputRewrites)); + } + + /* Indicate that we managed to set up the build environment. */ + writeFull(STDERR_FILENO, string("\1\n")); + + /* Execute the program. This should not return. */ + if (drv->isBuiltin()) { + try { + BasicDerivation drv2(*drv); + for (auto& e : drv2.env) { + e.second = rewriteStrings(e.second, inputRewrites); + } + + if (drv->builder == "builtin:fetchurl") { + builtinFetchurl(drv2, netrcData); + } else if (drv->builder == "builtin:buildenv") { + builtinBuildenv(drv2); + } else { + throw Error(format("unsupported builtin function '%1%'") % + string(drv->builder, 8)); + } + _exit(0); + } catch (std::exception& e) { + writeFull(STDERR_FILENO, "error: " + string(e.what()) + "\n"); + _exit(1); + } + } + + execve(builder, stringsToCharPtrs(args).data(), + stringsToCharPtrs(envStrs).data()); + + throw SysError(format("executing '%1%'") % drv->builder); + + } catch (std::exception& e) { + writeFull(STDERR_FILENO, "\1while setting up the build environment: " + + string(e.what()) + "\n"); + _exit(1); + } +} + +/* Parse a list of reference specifiers. Each element must either be + a store path, or the symbolic name of the output of the derivation + (such as `out'). */ +PathSet parseReferenceSpecifiers(Store& store, const BasicDerivation& drv, + const Strings& paths) { + PathSet result; + for (auto& i : paths) { + if (store.isStorePath(i)) { + result.insert(i); + } else if (drv.outputs.find(i) != drv.outputs.end()) { + result.insert(drv.outputs.find(i)->second.path); + } else { + throw BuildError( + format("derivation contains an illegal reference specifier '%1%'") % + i); + } + } + return result; +} + +void DerivationGoal::registerOutputs() { + /* When using a build hook, the build hook can register the output + as valid (by doing `nix-store --import'). If so we don't have + to do anything here. */ + if (hook) { + bool allValid = true; + for (auto& i : drv->outputs) { + if (!worker.store.isValidPath(i.second.path)) { + allValid = false; + } + } + if (allValid) { + return; + } + } + + std::map<std::string, ValidPathInfo> infos; + + /* Set of inodes seen during calls to canonicalisePathMetaData() + for this build's outputs. This needs to be shared between + outputs to allow hard links between outputs. */ + InodesSeen inodesSeen; + + Path checkSuffix = ".check"; + bool keepPreviousRound = settings.keepFailed || settings.runDiffHook; + + std::exception_ptr delayedException; + + /* Check whether the output paths were created, and grep each + output path to determine what other paths it references. Also make all + output paths read-only. */ + for (auto& i : drv->outputs) { + Path path = i.second.path; + if (missingPaths.find(path) == missingPaths.end()) { + continue; + } + + ValidPathInfo info; + + Path actualPath = path; + if (useChroot) { + actualPath = chrootRootDir + path; + if (pathExists(actualPath)) { + /* Move output paths from the chroot to the Nix store. */ + if (buildMode == bmRepair) { + replaceValidPath(path, actualPath); + } else if (buildMode != bmCheck && + rename(actualPath.c_str(), + worker.store.toRealPath(path).c_str()) == -1) { + throw SysError(format("moving build output '%1%' from the sandbox to " + "the Nix store") % + path); + } + } + if (buildMode != bmCheck) { + actualPath = worker.store.toRealPath(path); + } + } + + if (needsHashRewrite()) { + Path redirected = redirectedOutputs[path]; + if (buildMode == bmRepair && + redirectedBadOutputs.find(path) != redirectedBadOutputs.end() && + pathExists(redirected)) { + replaceValidPath(path, redirected); + } + if (buildMode == bmCheck && !redirected.empty()) { + actualPath = redirected; + } + } + + struct stat st; + if (lstat(actualPath.c_str(), &st) == -1) { + if (errno == ENOENT) { + throw BuildError( + format("builder for '%1%' failed to produce output path '%2%'") % + drvPath % path); + } + throw SysError(format("getting attributes of path '%1%'") % actualPath); + } + +#ifndef __CYGWIN__ + /* Check that the output is not group or world writable, as + that means that someone else can have interfered with the + build. Also, the output should be owned by the build + user. */ + if ((!S_ISLNK(st.st_mode) && ((st.st_mode & (S_IWGRP | S_IWOTH)) != 0u)) || + (buildUser && st.st_uid != buildUser->getUID())) { + throw BuildError(format("suspicious ownership or permission on '%1%'; " + "rejecting this build output") % + path); + } +#endif + + /* Apply hash rewriting if necessary. */ + bool rewritten = false; + if (!outputRewrites.empty()) { + LOG(WARNING) << "rewriting hashes in '" << path << "'; cross fingers"; + + /* Canonicalise first. This ensures that the path we're + rewriting doesn't contain a hard link to /etc/shadow or + something like that. */ + canonicalisePathMetaData(actualPath, buildUser ? buildUser->getUID() : -1, + inodesSeen); + + /* FIXME: this is in-memory. */ + StringSink sink; + dumpPath(actualPath, sink); + deletePath(actualPath); + sink.s = make_ref<std::string>(rewriteStrings(*sink.s, outputRewrites)); + StringSource source(*sink.s); + restorePath(actualPath, source); + + rewritten = true; + } + + /* Check that fixed-output derivations produced the right + outputs (i.e., the content hash should match the specified + hash). */ + if (fixedOutput) { + bool recursive; + Hash h; + i.second.parseHashInfo(recursive, h); + + if (!recursive) { + /* The output path should be a regular file without + execute permission. */ + if (!S_ISREG(st.st_mode) || (st.st_mode & S_IXUSR) != 0) { + throw BuildError( + format( + "output path '%1%' should be a non-executable regular file") % + path); + } + } + + /* Check the hash. In hash mode, move the path produced by + the derivation to its content-addressed location. */ + Hash h2 = recursive ? hashPath(h.type, actualPath).first + : hashFile(h.type, actualPath); + + Path dest = worker.store.makeFixedOutputPath(recursive, h2, + storePathToName(path)); + + if (h != h2) { + /* Throw an error after registering the path as + valid. */ + worker.hashMismatch = true; + delayedException = std::make_exception_ptr( + BuildError("hash mismatch in fixed-output derivation '%s':\n " + "wanted: %s\n got: %s", + dest, h.to_string(), h2.to_string())); + + Path actualDest = worker.store.toRealPath(dest); + + if (worker.store.isValidPath(dest)) { + std::rethrow_exception(delayedException); + } + + if (actualPath != actualDest) { + PathLocks outputLocks({actualDest}); + deletePath(actualDest); + if (rename(actualPath.c_str(), actualDest.c_str()) == -1) { + throw SysError(format("moving '%1%' to '%2%'") % actualPath % dest); + } + } + + path = dest; + actualPath = actualDest; + } else { + assert(path == dest); + } + + info.ca = makeFixedOutputCA(recursive, h2); + } + + /* Get rid of all weird permissions. This also checks that + all files are owned by the build user, if applicable. */ + canonicalisePathMetaData(actualPath, + buildUser && !rewritten ? buildUser->getUID() : -1, + inodesSeen); + + /* For this output path, find the references to other paths + contained in it. Compute the SHA-256 NAR hash at the same + time. The hash is stored in the database so that we can + verify later on whether nobody has messed with the store. */ + DLOG(INFO) << "scanning for references inside '" << path << "'"; + HashResult hash; + PathSet references = scanForReferences(actualPath, allPaths, hash); + + if (buildMode == bmCheck) { + if (!worker.store.isValidPath(path)) { + continue; + } + auto info = *worker.store.queryPathInfo(path); + if (hash.first != info.narHash) { + worker.checkMismatch = true; + if (settings.runDiffHook || settings.keepFailed) { + Path dst = worker.store.toRealPath(path + checkSuffix); + deletePath(dst); + if (rename(actualPath.c_str(), dst.c_str()) != 0) { + throw SysError(format("renaming '%1%' to '%2%'") % actualPath % + dst); + } + + handleDiffHook(buildUser ? buildUser->getUID() : getuid(), + buildUser ? buildUser->getGID() : getgid(), path, dst, + drvPath, tmpDir); + + throw NotDeterministic( + format("derivation '%1%' may not be deterministic: output '%2%' " + "differs from '%3%'") % + drvPath % path % dst); + } + throw NotDeterministic(format("derivation '%1%' may not be " + "deterministic: output '%2%' differs") % + drvPath % path); + } + + /* Since we verified the build, it's now ultimately + trusted. */ + if (!info.ultimate) { + info.ultimate = true; + worker.store.signPathInfo(info); + worker.store.registerValidPaths({info}); + } + + continue; + } + + /* For debugging, print out the referenced and unreferenced + paths. */ + for (auto& i : inputPaths) { + auto j = references.find(i); + if (j == references.end()) { + DLOG(INFO) << "unreferenced input: '" << i << "'"; + } else { + DLOG(INFO) << "referenced input: '" << i << "'"; + } + } + + if (curRound == nrRounds) { + worker.store.optimisePath( + actualPath); // FIXME: combine with scanForReferences() + worker.markContentsGood(path); + } + + info.path = path; + info.narHash = hash.first; + info.narSize = hash.second; + info.references = references; + info.deriver = drvPath; + info.ultimate = true; + worker.store.signPathInfo(info); + + if (!info.references.empty()) { + info.ca.clear(); + } + + infos[i.first] = info; + } + + if (buildMode == bmCheck) { + return; + } + + /* Apply output checks. */ + checkOutputs(infos); + + /* Compare the result with the previous round, and report which + path is different, if any.*/ + if (curRound > 1 && prevInfos != infos) { + assert(prevInfos.size() == infos.size()); + for (auto i = prevInfos.begin(), j = infos.begin(); i != prevInfos.end(); + ++i, ++j) { + if (!(*i == *j)) { + result.isNonDeterministic = true; + Path prev = i->second.path + checkSuffix; + bool prevExists = keepPreviousRound && pathExists(prev); + auto msg = + prevExists + ? fmt("output '%1%' of '%2%' differs from '%3%' from previous " + "round", + i->second.path, drvPath, prev) + : fmt("output '%1%' of '%2%' differs from previous round", + i->second.path, drvPath); + + handleDiffHook(buildUser ? buildUser->getUID() : getuid(), + buildUser ? buildUser->getGID() : getgid(), prev, + i->second.path, drvPath, tmpDir); + + if (settings.enforceDeterminism) { + throw NotDeterministic(msg); + } + + LOG(ERROR) << msg; + curRound = nrRounds; // we know enough, bail out early + } + } + } + + /* If this is the first round of several, then move the output out + of the way. */ + if (nrRounds > 1 && curRound == 1 && curRound < nrRounds && + keepPreviousRound) { + for (auto& i : drv->outputs) { + Path prev = i.second.path + checkSuffix; + deletePath(prev); + Path dst = i.second.path + checkSuffix; + if (rename(i.second.path.c_str(), dst.c_str()) != 0) { + throw SysError(format("renaming '%1%' to '%2%'") % i.second.path % dst); + } + } + } + + if (curRound < nrRounds) { + prevInfos = infos; + return; + } + + /* Remove the .check directories if we're done. FIXME: keep them + if the result was not determistic? */ + if (curRound == nrRounds) { + for (auto& i : drv->outputs) { + Path prev = i.second.path + checkSuffix; + deletePath(prev); + } + } + + /* Register each output path as valid, and register the sets of + paths referenced by each of them. If there are cycles in the + outputs, this will fail. */ + { + ValidPathInfos infos2; + for (auto& i : infos) { + infos2.push_back(i.second); + } + worker.store.registerValidPaths(infos2); + } + + /* In case of a fixed-output derivation hash mismatch, throw an + exception now that we have registered the output as valid. */ + if (delayedException) { + std::rethrow_exception(delayedException); + } +} + +void DerivationGoal::checkOutputs( + const std::map<Path, ValidPathInfo>& outputs) { + std::map<Path, const ValidPathInfo&> outputsByPath; + for (auto& output : outputs) { + outputsByPath.emplace(output.second.path, output.second); + } + + for (auto& output : outputs) { + auto& outputName = output.first; + auto& info = output.second; + + struct Checks { + bool ignoreSelfRefs = false; + std::optional<uint64_t> maxSize, maxClosureSize; + std::optional<Strings> allowedReferences, allowedRequisites, + disallowedReferences, disallowedRequisites; + }; + + /* Compute the closure and closure size of some output. This + is slightly tricky because some of its references (namely + other outputs) may not be valid yet. */ + auto getClosure = [&](const Path& path) { + uint64_t closureSize = 0; + PathSet pathsDone; + std::queue<Path> pathsLeft; + pathsLeft.push(path); + + while (!pathsLeft.empty()) { + auto path = pathsLeft.front(); + pathsLeft.pop(); + if (!pathsDone.insert(path).second) { + continue; + } + + auto i = outputsByPath.find(path); + if (i != outputsByPath.end()) { + closureSize += i->second.narSize; + for (auto& ref : i->second.references) { + pathsLeft.push(ref); + } + } else { + auto info = worker.store.queryPathInfo(path); + closureSize += info->narSize; + for (auto& ref : info->references) { + pathsLeft.push(ref); + } + } + } + + return std::make_pair(pathsDone, closureSize); + }; + + auto applyChecks = [&](const Checks& checks) { + if (checks.maxSize && info.narSize > *checks.maxSize) { + throw BuildError( + "path '%s' is too large at %d bytes; limit is %d bytes", info.path, + info.narSize, *checks.maxSize); + } + + if (checks.maxClosureSize) { + uint64_t closureSize = getClosure(info.path).second; + if (closureSize > *checks.maxClosureSize) { + throw BuildError( + "closure of path '%s' is too large at %d bytes; limit is %d " + "bytes", + info.path, closureSize, *checks.maxClosureSize); + } + } + + auto checkRefs = [&](const std::optional<Strings>& value, bool allowed, + bool recursive) { + if (!value) { + return; + } + + PathSet spec = parseReferenceSpecifiers(worker.store, *drv, *value); + + PathSet used = + recursive ? getClosure(info.path).first : info.references; + + if (recursive && checks.ignoreSelfRefs) { + used.erase(info.path); + } + + PathSet badPaths; + + for (auto& i : used) { + if (allowed) { + if (spec.count(i) == 0u) { + badPaths.insert(i); + } + } else { + if (spec.count(i) != 0u) { + badPaths.insert(i); + } + } + } + + if (!badPaths.empty()) { + string badPathsStr; + for (auto& i : badPaths) { + badPathsStr += "\n "; + badPathsStr += i; + } + throw BuildError( + "output '%s' is not allowed to refer to the following paths:%s", + info.path, badPathsStr); + } + }; + + checkRefs(checks.allowedReferences, true, false); + checkRefs(checks.allowedRequisites, true, true); + checkRefs(checks.disallowedReferences, false, false); + checkRefs(checks.disallowedRequisites, false, true); + }; + + if (auto structuredAttrs = parsedDrv->getStructuredAttrs()) { + auto outputChecks = structuredAttrs->find("outputChecks"); + if (outputChecks != structuredAttrs->end()) { + auto output = outputChecks->find(outputName); + + if (output != outputChecks->end()) { + Checks checks; + + auto maxSize = output->find("maxSize"); + if (maxSize != output->end()) { + checks.maxSize = maxSize->get<uint64_t>(); + } + + auto maxClosureSize = output->find("maxClosureSize"); + if (maxClosureSize != output->end()) { + checks.maxClosureSize = maxClosureSize->get<uint64_t>(); + } + + auto get = [&](const std::string& name) -> std::optional<Strings> { + auto i = output->find(name); + if (i != output->end()) { + Strings res; + for (auto& j : *i) { + if (!j.is_string()) { + throw Error( + "attribute '%s' of derivation '%s' must be a list of " + "strings", + name, drvPath); + } + res.push_back(j.get<std::string>()); + } + checks.disallowedRequisites = res; + return res; + } + return {}; + }; + + checks.allowedReferences = get("allowedReferences"); + checks.allowedRequisites = get("allowedRequisites"); + checks.disallowedReferences = get("disallowedReferences"); + checks.disallowedRequisites = get("disallowedRequisites"); + + applyChecks(checks); + } + } + } else { + // legacy non-structured-attributes case + Checks checks; + checks.ignoreSelfRefs = true; + checks.allowedReferences = parsedDrv->getStringsAttr("allowedReferences"); + checks.allowedRequisites = parsedDrv->getStringsAttr("allowedRequisites"); + checks.disallowedReferences = + parsedDrv->getStringsAttr("disallowedReferences"); + checks.disallowedRequisites = + parsedDrv->getStringsAttr("disallowedRequisites"); + applyChecks(checks); + } + } +} + +Path DerivationGoal::openLogFile() { + logSize = 0; + + if (!settings.keepLog) { + return ""; + } + + string baseName = baseNameOf(drvPath); + + /* Create a log file. */ + Path dir = fmt("%s/%s/%s/", worker.store.logDir, nix::LocalStore::drvsLogDir, + string(baseName, 0, 2)); + createDirs(dir); + + Path logFileName = fmt("%s/%s%s", dir, string(baseName, 2), + settings.compressLog ? ".bz2" : ""); + + fdLogFile = + open(logFileName.c_str(), O_CREAT | O_WRONLY | O_TRUNC | O_CLOEXEC, 0666); + if (!fdLogFile) { + throw SysError(format("creating log file '%1%'") % logFileName); + } + + logFileSink = std::make_shared<FdSink>(fdLogFile.get()); + + if (settings.compressLog) { + logSink = std::shared_ptr<CompressionSink>( + makeCompressionSink("bzip2", *logFileSink)); + } else { + logSink = logFileSink; + } + + return logFileName; +} + +void DerivationGoal::closeLogFile() { + auto logSink2 = std::dynamic_pointer_cast<CompressionSink>(logSink); + if (logSink2) { + logSink2->finish(); + } + if (logFileSink) { + logFileSink->flush(); + } + logSink = logFileSink = nullptr; + fdLogFile = -1; +} + +void DerivationGoal::deleteTmpDir(bool force) { + if (!tmpDir.empty()) { + /* Don't keep temporary directories for builtins because they + might have privileged stuff (like a copy of netrc). */ + if (settings.keepFailed && !force && !drv->isBuiltin()) { + LOG(INFO) << "note: keeping build directory '" << tmpDir << "'"; + chmod(tmpDir.c_str(), 0755); + } else { + deletePath(tmpDir); + } + tmpDir = ""; + } +} + +void DerivationGoal::handleChildOutput(int fd, const string& data) { + if ((hook && fd == hook->builderOut.readSide.get()) || + (!hook && fd == builderOut.readSide.get())) { + logSize += data.size(); + if (settings.maxLogSize && logSize > settings.maxLogSize) { + LOG(ERROR) << getName() + << " killed after writing more than %2% bytes of log output" + << settings.maxLogSize; + killChild(); + done(BuildResult::LogLimitExceeded); + return; + } + + for (auto c : data) { + if (c == '\r') { + currentLogLinePos = 0; + } else if (c == '\n') { + flushLine(); + } else { + if (currentLogLinePos >= currentLogLine.size()) { + currentLogLine.resize(currentLogLinePos + 1); + } + currentLogLine[currentLogLinePos++] = c; + } + } + + if (logSink) { + (*logSink)(data); + } + } + + if (hook && fd == hook->fromHook.readSide.get()) { + for (auto c : data) { + if (c == '\n') { + currentHookLine.clear(); + } else { + currentHookLine += c; + } + } + } +} + +void DerivationGoal::handleEOF(int fd) { + if (!currentLogLine.empty()) { + flushLine(); + } + worker.wakeUp(shared_from_this()); +} + +void DerivationGoal::flushLine() { + if (settings.verboseBuild && + (settings.printRepeatedBuilds || curRound == 1)) { + LOG(INFO) << currentLogLine; + } else { + logTail.push_back(currentLogLine); + if (logTail.size() > settings.logLines) { + logTail.pop_front(); + } + } + + currentLogLine = ""; + currentLogLinePos = 0; +} + +PathSet DerivationGoal::checkPathValidity(bool returnValid, bool checkHash) { + PathSet result; + for (auto& i : drv->outputs) { + if (!wantOutput(i.first, wantedOutputs)) { + continue; + } + bool good = worker.store.isValidPath(i.second.path) && + (!checkHash || worker.pathContentsGood(i.second.path)); + if (good == returnValid) { + result.insert(i.second.path); + } + } + return result; +} + +Path DerivationGoal::addHashRewrite(const Path& path) { + string h1 = string(path, worker.store.storeDir.size() + 1, 32); + string h2 = string(hashString(htSHA256, "rewrite:" + drvPath + ":" + path) + .to_string(Base32, false), + 0, 32); + Path p = worker.store.storeDir + "/" + h2 + + string(path, worker.store.storeDir.size() + 33); + deletePath(p); + assert(path.size() == p.size()); + inputRewrites[h1] = h2; + outputRewrites[h2] = h1; + redirectedOutputs[path] = p; + return p; +} + +void DerivationGoal::done(BuildResult::Status status, const string& msg) { + result.status = status; + result.errorMsg = msg; + amDone(result.success() ? ecSuccess : ecFailed); + if (result.status == BuildResult::TimedOut) { + worker.timedOut = true; + } + if (result.status == BuildResult::PermanentFailure) { + worker.permanentFailure = true; + } + + mcExpectedBuilds.reset(); + mcRunningBuilds.reset(); + + if (result.success()) { + if (status == BuildResult::Built) { + worker.doneBuilds++; + } + } else { + if (status != BuildResult::DependencyFailed) { + worker.failedBuilds++; + } + } +} + +////////////////////////////////////////////////////////////////////// + +class SubstitutionGoal : public Goal { + friend class Worker; + + private: + /* The store path that should be realised through a substitute. */ + Path storePath; + + /* The remaining substituters. */ + std::list<ref<Store>> subs; + + /* The current substituter. */ + std::shared_ptr<Store> sub; + + /* Whether a substituter failed. */ + bool substituterFailed = false; + + /* Path info returned by the substituter's query info operation. */ + std::shared_ptr<const ValidPathInfo> info; + + /* Pipe for the substituter's standard output. */ + Pipe outPipe; + + /* The substituter thread. */ + std::thread thr; + + std::promise<void> promise; + + /* Whether to try to repair a valid path. */ + RepairFlag repair; + + /* Location where we're downloading the substitute. Differs from + storePath when doing a repair. */ + Path destPath; + + std::unique_ptr<MaintainCount<uint64_t>> maintainExpectedSubstitutions, + maintainRunningSubstitutions, maintainExpectedNar, + maintainExpectedDownload; + + using GoalState = void (SubstitutionGoal::*)(); + GoalState state; + + public: + SubstitutionGoal(const Path& storePath, Worker& worker, + RepairFlag repair = NoRepair); + ~SubstitutionGoal() override; + + void timedOut() override { abort(); }; + + string key() override { + /* "a$" ensures substitution goals happen before derivation + goals. */ + return "a$" + storePathToName(storePath) + "$" + storePath; + } + + void work() override; + + /* The states. */ + void init(); + void tryNext(); + void gotInfo(); + void referencesValid(); + void tryToRun(); + void finished(); + + /* Callback used by the worker to write to the log. */ + void handleChildOutput(int fd, const string& data) override; + void handleEOF(int fd) override; + + Path getStorePath() { return storePath; } + + void amDone(ExitCode result) override { Goal::amDone(result); } +}; + +SubstitutionGoal::SubstitutionGoal(const Path& storePath, Worker& worker, + RepairFlag repair) + : Goal(worker), repair(repair) { + this->storePath = storePath; + state = &SubstitutionGoal::init; + name = (format("substitution of '%1%'") % storePath).str(); + trace("created"); + maintainExpectedSubstitutions = + std::make_unique<MaintainCount<uint64_t>>(worker.expectedSubstitutions); +} + +SubstitutionGoal::~SubstitutionGoal() { + try { + if (thr.joinable()) { + // FIXME: signal worker thread to quit. + thr.join(); + worker.childTerminated(this); + } + } catch (...) { + ignoreException(); + } +} + +void SubstitutionGoal::work() { (this->*state)(); } + +void SubstitutionGoal::init() { + trace("init"); + + worker.store.addTempRoot(storePath); + + /* If the path already exists we're done. */ + if ((repair == 0u) && worker.store.isValidPath(storePath)) { + amDone(ecSuccess); + return; + } + + if (settings.readOnlyMode) { + throw Error( + format( + "cannot substitute path '%1%' - no write access to the Nix store") % + storePath); + } + + subs = settings.useSubstitutes ? getDefaultSubstituters() + : std::list<ref<Store>>(); + + tryNext(); +} + +void SubstitutionGoal::tryNext() { + trace("trying next substituter"); + + if (subs.empty()) { + /* None left. Terminate this goal and let someone else deal + with it. */ + DLOG(WARNING) + << "path '" << storePath + << "' is required, but there is no substituter that can build it"; + + /* Hack: don't indicate failure if there were no substituters. + In that case the calling derivation should just do a + build. */ + amDone(substituterFailed ? ecFailed : ecNoSubstituters); + + if (substituterFailed) { + worker.failedSubstitutions++; + } + + return; + } + + sub = subs.front(); + subs.pop_front(); + + if (sub->storeDir != worker.store.storeDir) { + tryNext(); + return; + } + + try { + // FIXME: make async + info = sub->queryPathInfo(storePath); + } catch (InvalidPath&) { + tryNext(); + return; + } catch (SubstituterDisabled&) { + if (settings.tryFallback) { + tryNext(); + return; + } + throw; + } catch (Error& e) { + if (settings.tryFallback) { + LOG(ERROR) << e.what(); + tryNext(); + return; + } + throw; + } + + /* Update the total expected download size. */ + auto narInfo = std::dynamic_pointer_cast<const NarInfo>(info); + + maintainExpectedNar = std::make_unique<MaintainCount<uint64_t>>( + worker.expectedNarSize, info->narSize); + + maintainExpectedDownload = + narInfo && (narInfo->fileSize != 0u) + ? std::make_unique<MaintainCount<uint64_t>>( + worker.expectedDownloadSize, narInfo->fileSize) + : nullptr; + + /* Bail out early if this substituter lacks a valid + signature. LocalStore::addToStore() also checks for this, but + only after we've downloaded the path. */ + if (worker.store.requireSigs && !sub->isTrusted && + (info->checkSignatures(worker.store, worker.store.getPublicKeys()) == + 0u)) { + LOG(WARNING) << "substituter '" << sub->getUri() + << "' does not have a valid signature for path '" << storePath + << "'"; + tryNext(); + return; + } + + /* To maintain the closure invariant, we first have to realise the + paths referenced by this one. */ + for (auto& i : info->references) { + if (i != storePath) { /* ignore self-references */ + addWaitee(worker.makeSubstitutionGoal(i)); + } + } + + if (waitees.empty()) { /* to prevent hang (no wake-up event) */ + referencesValid(); + } else { + state = &SubstitutionGoal::referencesValid; + } +} + +void SubstitutionGoal::referencesValid() { + trace("all references realised"); + + if (nrFailed > 0) { + DLOG(WARNING) << "some references of path '" << storePath + << "' could not be realised"; + amDone(nrNoSubstituters > 0 || nrIncompleteClosure > 0 ? ecIncompleteClosure + : ecFailed); + return; + } + + for (auto& i : info->references) { + if (i != storePath) { /* ignore self-references */ + assert(worker.store.isValidPath(i)); + } + } + + state = &SubstitutionGoal::tryToRun; + worker.wakeUp(shared_from_this()); +} + +void SubstitutionGoal::tryToRun() { + trace("trying to run"); + + /* Make sure that we are allowed to start a build. Note that even + if maxBuildJobs == 0 (no local builds allowed), we still allow + a substituter to run. This is because substitutions cannot be + distributed to another machine via the build hook. */ + if (worker.getNrLocalBuilds() >= + std::max(1U, (unsigned int)settings.maxBuildJobs)) { + worker.waitForBuildSlot(shared_from_this()); + return; + } + + maintainRunningSubstitutions = + std::make_unique<MaintainCount<uint64_t>>(worker.runningSubstitutions); + + outPipe.create(); + + promise = std::promise<void>(); + + thr = std::thread([this]() { + try { + /* Wake up the worker loop when we're done. */ + Finally updateStats([this]() { outPipe.writeSide = -1; }); + + copyStorePath(ref<Store>(sub), + ref<Store>(worker.store.shared_from_this()), storePath, + repair, sub->isTrusted ? NoCheckSigs : CheckSigs); + + promise.set_value(); + } catch (...) { + promise.set_exception(std::current_exception()); + } + }); + + worker.childStarted(shared_from_this(), {outPipe.readSide.get()}, true, + false); + + state = &SubstitutionGoal::finished; +} + +void SubstitutionGoal::finished() { + trace("substitute finished"); + + thr.join(); + worker.childTerminated(this); + + try { + promise.get_future().get(); + } catch (std::exception& e) { + LOG(ERROR) << e.what(); + + /* Cause the parent build to fail unless --fallback is given, + or the substitute has disappeared. The latter case behaves + the same as the substitute never having existed in the + first place. */ + try { + throw; + } catch (SubstituteGone&) { + } catch (...) { + substituterFailed = true; + } + + /* Try the next substitute. */ + state = &SubstitutionGoal::tryNext; + worker.wakeUp(shared_from_this()); + return; + } + + worker.markContentsGood(storePath); + + DLOG(INFO) << "substitution of path '" << storePath << "' succeeded"; + + maintainRunningSubstitutions.reset(); + + maintainExpectedSubstitutions.reset(); + worker.doneSubstitutions++; + + if (maintainExpectedDownload) { + auto fileSize = maintainExpectedDownload->delta; + maintainExpectedDownload.reset(); + worker.doneDownloadSize += fileSize; + } + + worker.doneNarSize += maintainExpectedNar->delta; + maintainExpectedNar.reset(); + + amDone(ecSuccess); +} + +void SubstitutionGoal::handleChildOutput(int fd, const string& data) {} + +void SubstitutionGoal::handleEOF(int fd) { + if (fd == outPipe.readSide.get()) { + worker.wakeUp(shared_from_this()); + } +} + +////////////////////////////////////////////////////////////////////// + +static bool working = false; + +Worker::Worker(LocalStore& store) : store(store) { + /* Debugging: prevent recursive workers. */ + if (working) { + abort(); + } + working = true; + nrLocalBuilds = 0; + lastWokenUp = steady_time_point::min(); + permanentFailure = false; + timedOut = false; + hashMismatch = false; + checkMismatch = false; +} + +Worker::~Worker() { + working = false; + + /* Explicitly get rid of all strong pointers now. After this all + goals that refer to this worker should be gone. (Otherwise we + are in trouble, since goals may call childTerminated() etc. in + their destructors). */ + topGoals.clear(); + + assert(expectedSubstitutions == 0); + assert(expectedDownloadSize == 0); + assert(expectedNarSize == 0); +} + +GoalPtr Worker::makeDerivationGoal(const Path& path, + const StringSet& wantedOutputs, + BuildMode buildMode) { + GoalPtr goal = derivationGoals[path].lock(); + if (!goal) { + goal = + std::make_shared<DerivationGoal>(path, wantedOutputs, *this, buildMode); + derivationGoals[path] = goal; + wakeUp(goal); + } else { + (dynamic_cast<DerivationGoal*>(goal.get())) + ->addWantedOutputs(wantedOutputs); + } + return goal; +} + +std::shared_ptr<DerivationGoal> Worker::makeBasicDerivationGoal( + const Path& drvPath, const BasicDerivation& drv, BuildMode buildMode) { + auto goal = std::make_shared<DerivationGoal>(drvPath, drv, *this, buildMode); + wakeUp(goal); + return goal; +} + +GoalPtr Worker::makeSubstitutionGoal(const Path& path, RepairFlag repair) { + GoalPtr goal = substitutionGoals[path].lock(); + if (!goal) { + goal = std::make_shared<SubstitutionGoal>(path, *this, repair); + substitutionGoals[path] = goal; + wakeUp(goal); + } + return goal; +} + +static void removeGoal(const GoalPtr& goal, WeakGoalMap& goalMap) { + /* !!! inefficient */ + for (auto i = goalMap.begin(); i != goalMap.end();) { + if (i->second.lock() == goal) { + auto j = i; + ++j; + goalMap.erase(i); + i = j; + } else { + ++i; + } + } +} + +void Worker::removeGoal(const GoalPtr& goal) { + nix::removeGoal(goal, derivationGoals); + nix::removeGoal(goal, substitutionGoals); + if (topGoals.find(goal) != topGoals.end()) { + topGoals.erase(goal); + /* If a top-level goal failed, then kill all other goals + (unless keepGoing was set). */ + if (goal->getExitCode() == Goal::ecFailed && !settings.keepGoing) { + topGoals.clear(); + } + } + + /* Wake up goals waiting for any goal to finish. */ + for (auto& i : waitingForAnyGoal) { + GoalPtr goal = i.lock(); + if (goal) { + wakeUp(goal); + } + } + + waitingForAnyGoal.clear(); +} + +void Worker::wakeUp(const GoalPtr& goal) { + goal->trace("woken up"); + addToWeakGoals(awake, goal); +} + +unsigned Worker::getNrLocalBuilds() { return nrLocalBuilds; } + +void Worker::childStarted(const GoalPtr& goal, const set<int>& fds, + bool inBuildSlot, bool respectTimeouts) { + Child child; + child.goal = goal; + child.goal2 = goal.get(); + child.fds = fds; + child.timeStarted = child.lastOutput = steady_time_point::clock::now(); + child.inBuildSlot = inBuildSlot; + child.respectTimeouts = respectTimeouts; + children.emplace_back(child); + if (inBuildSlot) { + nrLocalBuilds++; + } +} + +void Worker::childTerminated(Goal* goal, bool wakeSleepers) { + auto i = + std::find_if(children.begin(), children.end(), + [&](const Child& child) { return child.goal2 == goal; }); + if (i == children.end()) { + return; + } + + if (i->inBuildSlot) { + assert(nrLocalBuilds > 0); + nrLocalBuilds--; + } + + children.erase(i); + + if (wakeSleepers) { + /* Wake up goals waiting for a build slot. */ + for (auto& j : wantingToBuild) { + GoalPtr goal = j.lock(); + if (goal) { + wakeUp(goal); + } + } + + wantingToBuild.clear(); + } +} + +void Worker::waitForBuildSlot(const GoalPtr& goal) { + DLOG(INFO) << "wait for build slot"; + if (getNrLocalBuilds() < settings.maxBuildJobs) { + wakeUp(goal); /* we can do it right away */ + } else { + addToWeakGoals(wantingToBuild, goal); + } +} + +void Worker::waitForAnyGoal(GoalPtr goal) { + DLOG(INFO) << "wait for any goal"; + addToWeakGoals(waitingForAnyGoal, std::move(goal)); +} + +void Worker::waitForAWhile(GoalPtr goal) { + DLOG(INFO) << "wait for a while"; + addToWeakGoals(waitingForAWhile, std::move(goal)); +} + +void Worker::run(const Goals& _topGoals) { + for (auto& i : _topGoals) { + topGoals.insert(i); + } + + DLOG(INFO) << "entered goal loop"; + + while (true) { + checkInterrupt(); + + store.autoGC(false); + + /* Call every wake goal (in the ordering established by + CompareGoalPtrs). */ + while (!awake.empty() && !topGoals.empty()) { + Goals awake2; + for (auto& i : awake) { + GoalPtr goal = i.lock(); + if (goal) { + awake2.insert(goal); + } + } + awake.clear(); + for (auto& goal : awake2) { + checkInterrupt(); + goal->work(); + if (topGoals.empty()) { + break; + } // stuff may have been cancelled + } + } + + if (topGoals.empty()) { + break; + } + + /* Wait for input. */ + if (!children.empty() || !waitingForAWhile.empty()) { + waitForInput(); + } else { + if (awake.empty() && 0 == settings.maxBuildJobs) { + throw Error( + "unable to start any build; either increase '--max-jobs' " + "or enable remote builds"); + } + assert(!awake.empty()); + } + } + + /* If --keep-going is not set, it's possible that the main goal + exited while some of its subgoals were still active. But if + --keep-going *is* set, then they must all be finished now. */ + assert(!settings.keepGoing || awake.empty()); + assert(!settings.keepGoing || wantingToBuild.empty()); + assert(!settings.keepGoing || children.empty()); +} + +void Worker::waitForInput() { + DLOG(INFO) << "waiting for children"; + + /* Process output from the file descriptors attached to the + children, namely log output and output path creation commands. + We also use this to detect child termination: if we get EOF on + the logger pipe of a build, we assume that the builder has + terminated. */ + + bool useTimeout = false; + struct timeval timeout; + timeout.tv_usec = 0; + auto before = steady_time_point::clock::now(); + + /* If we're monitoring for silence on stdout/stderr, or if there + is a build timeout, then wait for input until the first + deadline for any child. */ + auto nearest = steady_time_point::max(); // nearest deadline + if (settings.minFree.get() != 0) { + // Periodicallty wake up to see if we need to run the garbage collector. + nearest = before + std::chrono::seconds(10); + } + for (auto& i : children) { + if (!i.respectTimeouts) { + continue; + } + if (0 != settings.maxSilentTime) { + nearest = std::min( + nearest, i.lastOutput + std::chrono::seconds(settings.maxSilentTime)); + } + if (0 != settings.buildTimeout) { + nearest = std::min( + nearest, i.timeStarted + std::chrono::seconds(settings.buildTimeout)); + } + } + if (nearest != steady_time_point::max()) { + timeout.tv_sec = std::max( + 1L, + (long)std::chrono::duration_cast<std::chrono::seconds>(nearest - before) + .count()); + useTimeout = true; + } + + /* If we are polling goals that are waiting for a lock, then wake + up after a few seconds at most. */ + if (!waitingForAWhile.empty()) { + useTimeout = true; + if (lastWokenUp == steady_time_point::min()) { + DLOG(WARNING) << "waiting for locks or build slots..."; + } + if (lastWokenUp == steady_time_point::min() || lastWokenUp > before) { + lastWokenUp = before; + } + timeout.tv_sec = std::max( + 1L, + (long)std::chrono::duration_cast<std::chrono::seconds>( + lastWokenUp + std::chrono::seconds(settings.pollInterval) - before) + .count()); + } else { + lastWokenUp = steady_time_point::min(); + } + + if (useTimeout) { + DLOG(INFO) << "sleeping " << timeout.tv_sec << " seconds"; + } + + /* Use select() to wait for the input side of any logger pipe to + become `available'. Note that `available' (i.e., non-blocking) + includes EOF. */ + fd_set fds; + FD_ZERO(&fds); + int fdMax = 0; + for (auto& i : children) { + for (auto& j : i.fds) { + if (j >= FD_SETSIZE) { + throw Error("reached FD_SETSIZE limit"); + } + FD_SET(j, &fds); + if (j >= fdMax) { + fdMax = j + 1; + } + } + } + + if (select(fdMax, &fds, nullptr, nullptr, useTimeout ? &timeout : nullptr) == + -1) { + if (errno == EINTR) { + return; + } + throw SysError("waiting for input"); + } + + auto after = steady_time_point::clock::now(); + + /* Process all available file descriptors. FIXME: this is + O(children * fds). */ + decltype(children)::iterator i; + for (auto j = children.begin(); j != children.end(); j = i) { + i = std::next(j); + + checkInterrupt(); + + GoalPtr goal = j->goal.lock(); + assert(goal); + + set<int> fds2(j->fds); + std::vector<unsigned char> buffer(4096); + for (auto& k : fds2) { + if (FD_ISSET(k, &fds)) { + ssize_t rd = read(k, buffer.data(), buffer.size()); + // FIXME: is there a cleaner way to handle pt close + // than EIO? Is this even standard? + if (rd == 0 || (rd == -1 && errno == EIO)) { + DLOG(WARNING) << goal->getName() << ": got EOF"; + goal->handleEOF(k); + j->fds.erase(k); + } else if (rd == -1) { + if (errno != EINTR) { + throw SysError("%s: read failed", goal->getName()); + } + } else { + DLOG(INFO) << goal->getName() << ": read " << rd << " bytes"; + string data((char*)buffer.data(), rd); + j->lastOutput = after; + goal->handleChildOutput(k, data); + } + } + } + + if (goal->getExitCode() == Goal::ecBusy && 0 != settings.maxSilentTime && + j->respectTimeouts && + after - j->lastOutput >= std::chrono::seconds(settings.maxSilentTime)) { + LOG(ERROR) << goal->getName() << " timed out after " + << settings.maxSilentTime << " seconds of silence"; + goal->timedOut(); + } + + else if (goal->getExitCode() == Goal::ecBusy && + 0 != settings.buildTimeout && j->respectTimeouts && + after - j->timeStarted >= + std::chrono::seconds(settings.buildTimeout)) { + LOG(ERROR) << goal->getName() << " timed out after " + << settings.buildTimeout << " seconds"; + goal->timedOut(); + } + } + + if (!waitingForAWhile.empty() && + lastWokenUp + std::chrono::seconds(settings.pollInterval) <= after) { + lastWokenUp = after; + for (auto& i : waitingForAWhile) { + GoalPtr goal = i.lock(); + if (goal) { + wakeUp(goal); + } + } + waitingForAWhile.clear(); + } +} + +unsigned int Worker::exitStatus() { + /* + * 1100100 + * ^^^^ + * |||`- timeout + * ||`-- output hash mismatch + * |`--- build failure + * `---- not deterministic + */ + unsigned int mask = 0; + bool buildFailure = permanentFailure || timedOut || hashMismatch; + if (buildFailure) { + mask |= 0x04; // 100 + } + if (timedOut) { + mask |= 0x01; // 101 + } + if (hashMismatch) { + mask |= 0x02; // 102 + } + if (checkMismatch) { + mask |= 0x08; // 104 + } + + if (mask != 0u) { + mask |= 0x60; + } + return mask != 0u ? mask : 1; +} + +bool Worker::pathContentsGood(const Path& path) { + auto i = pathContentsGoodCache.find(path); + if (i != pathContentsGoodCache.end()) { + return i->second; + } + LOG(INFO) << "checking path '" << path << "'..."; + auto info = store.queryPathInfo(path); + bool res; + if (!pathExists(path)) { + res = false; + } else { + HashResult current = hashPath(info->narHash.type, path); + Hash nullHash(htSHA256); + res = info->narHash == nullHash || info->narHash == current.first; + } + pathContentsGoodCache[path] = res; + if (!res) { + LOG(ERROR) << "path '" << path << "' is corrupted or missing!"; + } + return res; +} + +void Worker::markContentsGood(const Path& path) { + pathContentsGoodCache[path] = true; +} + +////////////////////////////////////////////////////////////////////// + +static void primeCache(Store& store, const PathSet& paths) { + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + unsigned long long downloadSize; + unsigned long long narSize; + store.queryMissing(paths, willBuild, willSubstitute, unknown, downloadSize, + narSize); + + if (!willBuild.empty() && 0 == settings.maxBuildJobs && + getMachines().empty()) { + throw Error( + "%d derivations need to be built, but neither local builds " + "('--max-jobs') " + "nor remote builds ('--builders') are enabled", + willBuild.size()); + } +} + +void LocalStore::buildPaths(const PathSet& drvPaths, BuildMode buildMode) { + Worker worker(*this); + + primeCache(*this, drvPaths); + + Goals goals; + for (auto& i : drvPaths) { + DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i); + if (isDerivation(i2.first)) { + goals.insert(worker.makeDerivationGoal(i2.first, i2.second, buildMode)); + } else { + goals.insert(worker.makeSubstitutionGoal( + i, buildMode == bmRepair ? Repair : NoRepair)); + } + } + + worker.run(goals); + + PathSet failed; + for (auto& i : goals) { + if (i->getExitCode() != Goal::ecSuccess) { + auto* i2 = dynamic_cast<DerivationGoal*>(i.get()); + if (i2 != nullptr) { + failed.insert(i2->getDrvPath()); + } else { + failed.insert(dynamic_cast<SubstitutionGoal*>(i.get())->getStorePath()); + } + } + } + + if (!failed.empty()) { + throw Error(worker.exitStatus(), "build of %s failed", showPaths(failed)); + } +} + +BuildResult LocalStore::buildDerivation(const Path& drvPath, + const BasicDerivation& drv, + BuildMode buildMode) { + Worker worker(*this); + auto goal = worker.makeBasicDerivationGoal(drvPath, drv, buildMode); + + BuildResult result; + + try { + worker.run(Goals{goal}); + result = goal->getResult(); + } catch (Error& e) { + result.status = BuildResult::MiscFailure; + result.errorMsg = e.msg(); + } + + return result; +} + +void LocalStore::ensurePath(const Path& path) { + /* If the path is already valid, we're done. */ + if (isValidPath(path)) { + return; + } + + primeCache(*this, {path}); + + Worker worker(*this); + GoalPtr goal = worker.makeSubstitutionGoal(path); + Goals goals = {goal}; + + worker.run(goals); + + if (goal->getExitCode() != Goal::ecSuccess) { + throw Error(worker.exitStatus(), + "path '%s' does not exist and cannot be created", path); + } +} + +void LocalStore::repairPath(const Path& path) { + Worker worker(*this); + GoalPtr goal = worker.makeSubstitutionGoal(path, Repair); + Goals goals = {goal}; + + worker.run(goals); + + if (goal->getExitCode() != Goal::ecSuccess) { + /* Since substituting the path didn't work, if we have a valid + deriver, then rebuild the deriver. */ + auto deriver = queryPathInfo(path)->deriver; + if (!deriver.empty() && isValidPath(deriver)) { + goals.clear(); + goals.insert(worker.makeDerivationGoal(deriver, StringSet(), bmRepair)); + worker.run(goals); + } else { + throw Error(worker.exitStatus(), "cannot repair path '%s'", path); + } + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/builtins.hh b/third_party/nix/src/libstore/builtins.hh new file mode 100644 index 000000000000..07601be0f50c --- /dev/null +++ b/third_party/nix/src/libstore/builtins.hh @@ -0,0 +1,11 @@ +#pragma once + +#include "derivations.hh" + +namespace nix { + +// TODO: make pluggable. +void builtinFetchurl(const BasicDerivation& drv, const std::string& netrcData); +void builtinBuildenv(const BasicDerivation& drv); + +} // namespace nix diff --git a/third_party/nix/src/libstore/builtins/buildenv.cc b/third_party/nix/src/libstore/builtins/buildenv.cc new file mode 100644 index 000000000000..77ad722fab15 --- /dev/null +++ b/third_party/nix/src/libstore/builtins/buildenv.cc @@ -0,0 +1,223 @@ +#include <algorithm> + +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "builtins.hh" + +namespace nix { + +typedef std::map<Path, int> Priorities; + +// FIXME: change into local variables. + +static Priorities priorities; + +static unsigned long symlinks; + +/* For each activated package, create symlinks */ +static void createLinks(const Path& srcDir, const Path& dstDir, int priority) { + DirEntries srcFiles; + + try { + srcFiles = readDirectory(srcDir); + } catch (SysError& e) { + if (e.errNo == ENOTDIR) { + LOG(ERROR) << "warning: not including '" << srcDir + << "' in the user environment because it's not a directory"; + return; + } + throw; + } + + for (const auto& ent : srcFiles) { + if (ent.name[0] == '.') /* not matched by glob */ + continue; + auto srcFile = srcDir + "/" + ent.name; + auto dstFile = dstDir + "/" + ent.name; + + struct stat srcSt; + try { + if (stat(srcFile.c_str(), &srcSt) == -1) + throw SysError("getting status of '%1%'", srcFile); + } catch (SysError& e) { + if (e.errNo == ENOENT || e.errNo == ENOTDIR) { + LOG(ERROR) << "warning: skipping dangling symlink '" << dstFile << "'"; + continue; + } + throw; + } + + /* The files below are special-cased to that they don't show up + * in user profiles, either because they are useless, or + * because they would cauase pointless collisions (e.g., each + * Python package brings its own + * `$out/lib/pythonX.Y/site-packages/easy-install.pth'.) + */ + if (hasSuffix(srcFile, "/propagated-build-inputs") || + hasSuffix(srcFile, "/nix-support") || + hasSuffix(srcFile, "/perllocal.pod") || + hasSuffix(srcFile, "/info/dir") || hasSuffix(srcFile, "/log")) + continue; + + else if (S_ISDIR(srcSt.st_mode)) { + struct stat dstSt; + auto res = lstat(dstFile.c_str(), &dstSt); + if (res == 0) { + if (S_ISDIR(dstSt.st_mode)) { + createLinks(srcFile, dstFile, priority); + continue; + } else if (S_ISLNK(dstSt.st_mode)) { + auto target = canonPath(dstFile, true); + if (!S_ISDIR(lstat(target).st_mode)) + throw Error("collision between '%1%' and non-directory '%2%'", + srcFile, target); + if (unlink(dstFile.c_str()) == -1) + throw SysError(format("unlinking '%1%'") % dstFile); + if (mkdir(dstFile.c_str(), 0755) == -1) + throw SysError(format("creating directory '%1%'")); + createLinks(target, dstFile, priorities[dstFile]); + createLinks(srcFile, dstFile, priority); + continue; + } + } else if (errno != ENOENT) + throw SysError(format("getting status of '%1%'") % dstFile); + } + + else { + struct stat dstSt; + auto res = lstat(dstFile.c_str(), &dstSt); + if (res == 0) { + if (S_ISLNK(dstSt.st_mode)) { + auto prevPriority = priorities[dstFile]; + if (prevPriority == priority) + throw Error( + "packages '%1%' and '%2%' have the same priority %3%; " + "use 'nix-env --set-flag priority NUMBER INSTALLED_PKGNAME' " + "to change the priority of one of the conflicting packages" + " (0 being the highest priority)", + srcFile, readLink(dstFile), priority); + if (prevPriority < priority) { + continue; + } + if (unlink(dstFile.c_str()) == -1) + throw SysError(format("unlinking '%1%'") % dstFile); + } else if (S_ISDIR(dstSt.st_mode)) + throw Error( + "collision between non-directory '%1%' and directory '%2%'", + srcFile, dstFile); + } else if (errno != ENOENT) + throw SysError(format("getting status of '%1%'") % dstFile); + } + + createSymlink(srcFile, dstFile); + priorities[dstFile] = priority; + symlinks++; + } +} + +typedef std::set<Path> FileProp; + +static FileProp done; +static FileProp postponed = FileProp{}; + +static Path out; + +static void addPkg(const Path& pkgDir, int priority) { + if (done.count(pkgDir)) { + return; + } + done.insert(pkgDir); + createLinks(pkgDir, out, priority); + + try { + for (const auto& p : tokenizeString<std::vector<string>>( + readFile(pkgDir + "/nix-support/propagated-user-env-packages"), + " \n")) + if (!done.count(p)) { + postponed.insert(p); + } + } catch (SysError& e) { + if (e.errNo != ENOENT && e.errNo != ENOTDIR) { + throw; + } + } +} + +struct Package { + Path path; + bool active; + int priority; + Package(Path path, bool active, int priority) + : path{path}, active{active}, priority{priority} {} +}; + +typedef std::vector<Package> Packages; + +void builtinBuildenv(const BasicDerivation& drv) { + auto getAttr = [&](const string& name) { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + throw Error("attribute '%s' missing", name); + } + return i->second; + }; + + out = getAttr("out"); + createDirs(out); + + /* Convert the stuff we get from the environment back into a + * coherent data type. */ + Packages pkgs; + auto derivations = tokenizeString<Strings>(getAttr("derivations")); + while (!derivations.empty()) { + /* !!! We're trusting the caller to structure derivations env var correctly + */ + auto active = derivations.front(); + derivations.pop_front(); + auto priority = stoi(derivations.front()); + derivations.pop_front(); + auto outputs = stoi(derivations.front()); + derivations.pop_front(); + for (auto n = 0; n < outputs; n++) { + auto path = derivations.front(); + derivations.pop_front(); + pkgs.emplace_back(path, active != "false", priority); + } + } + + /* Symlink to the packages that have been installed explicitly by the + * user. Process in priority order to reduce unnecessary + * symlink/unlink steps. + */ + std::sort(pkgs.begin(), pkgs.end(), [](const Package& a, const Package& b) { + return a.priority < b.priority || + (a.priority == b.priority && a.path < b.path); + }); + for (const auto& pkg : pkgs) + if (pkg.active) { + addPkg(pkg.path, pkg.priority); + } + + /* Symlink to the packages that have been "propagated" by packages + * installed by the user (i.e., package X declares that it wants Y + * installed as well). We do these later because they have a lower + * priority in case of collisions. + */ + auto priorityCounter = 1000; + while (!postponed.empty()) { + auto pkgDirs = postponed; + postponed = FileProp{}; + for (const auto& pkgDir : pkgDirs) { + addPkg(pkgDir, priorityCounter++); + } + } + + LOG(INFO) << "created " << symlinks << " symlinks in user environment"; + + createSymlink(getAttr("manifest"), out + "/manifest.nix"); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/builtins/fetchurl.cc b/third_party/nix/src/libstore/builtins/fetchurl.cc new file mode 100644 index 000000000000..c3f38a943f57 --- /dev/null +++ b/third_party/nix/src/libstore/builtins/fetchurl.cc @@ -0,0 +1,80 @@ +#include <glog/logging.h> + +#include "archive.hh" +#include "builtins.hh" +#include "compression.hh" +#include "download.hh" +#include "store-api.hh" + +namespace nix { + +void builtinFetchurl(const BasicDerivation& drv, const std::string& netrcData) { + /* Make the host's netrc data available. Too bad curl requires + this to be stored in a file. It would be nice if we could just + pass a pointer to the data. */ + if (netrcData != "") { + settings.netrcFile = "netrc"; + writeFile(settings.netrcFile, netrcData, 0600); + } + + auto getAttr = [&](const string& name) { + auto i = drv.env.find(name); + if (i == drv.env.end()) + throw Error(format("attribute '%s' missing") % name); + return i->second; + }; + + Path storePath = getAttr("out"); + auto mainUrl = getAttr("url"); + bool unpack = get(drv.env, "unpack", "") == "1"; + + /* Note: have to use a fresh downloader here because we're in + a forked process. */ + auto downloader = makeDownloader(); + + auto fetch = [&](const std::string& url) { + auto source = sinkToSource([&](Sink& sink) { + /* No need to do TLS verification, because we check the hash of + the result anyway. */ + DownloadRequest request(url); + request.verifyTLS = false; + request.decompress = false; + + auto decompressor = makeDecompressionSink( + unpack && hasSuffix(mainUrl, ".xz") ? "xz" : "none", sink); + downloader->download(std::move(request), *decompressor); + decompressor->finish(); + }); + + if (unpack) + restorePath(storePath, *source); + else + writeFile(storePath, *source); + + auto executable = drv.env.find("executable"); + if (executable != drv.env.end() && executable->second == "1") { + if (chmod(storePath.c_str(), 0755) == -1) + throw SysError(format("making '%1%' executable") % storePath); + } + }; + + /* Try the hashed mirrors first. */ + if (getAttr("outputHashMode") == "flat") + for (auto hashedMirror : settings.hashedMirrors.get()) try { + if (!hasSuffix(hashedMirror, "/")) { + hashedMirror += '/'; + } + auto ht = parseHashType(getAttr("outputHashAlgo")); + auto h = Hash(getAttr("outputHash"), ht); + fetch(hashedMirror + printHashType(h.type) + "/" + + h.to_string(Base16, false)); + return; + } catch (Error& e) { + LOG(ERROR) << e.what(); + } + + /* Otherwise try the specified URL. */ + fetch(mainUrl); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/crypto.cc b/third_party/nix/src/libstore/crypto.cc new file mode 100644 index 000000000000..90580b8dc902 --- /dev/null +++ b/third_party/nix/src/libstore/crypto.cc @@ -0,0 +1,125 @@ +#include "crypto.hh" + +#include "globals.hh" +#include "util.hh" + +#if HAVE_SODIUM +#include <sodium.h> +#endif + +namespace nix { + +static std::pair<std::string, std::string> split(const string& s) { + size_t colon = s.find(':'); + if (colon == std::string::npos || colon == 0) { + return {"", ""}; + } + return {std::string(s, 0, colon), std::string(s, colon + 1)}; +} + +Key::Key(const string& s) { + auto ss = split(s); + + name = ss.first; + key = ss.second; + + if (name.empty() || key.empty()) { + throw Error("secret key is corrupt"); + } + + key = base64Decode(key); +} + +SecretKey::SecretKey(const string& s) : Key(s) { +#if HAVE_SODIUM + if (key.size() != crypto_sign_SECRETKEYBYTES) { + throw Error("secret key is not valid"); + } +#endif +} + +#if !HAVE_SODIUM +[[noreturn]] static void noSodium() { + throw Error( + "Nix was not compiled with libsodium, required for signed binary cache " + "support"); +} +#endif + +std::string SecretKey::signDetached(const std::string& data) const { +#if HAVE_SODIUM + unsigned char sig[crypto_sign_BYTES]; + unsigned long long sigLen; + crypto_sign_detached(sig, &sigLen, (unsigned char*)data.data(), data.size(), + (unsigned char*)key.data()); + return name + ":" + base64Encode(std::string((char*)sig, sigLen)); +#else + noSodium(); +#endif +} + +PublicKey SecretKey::toPublicKey() const { +#if HAVE_SODIUM + unsigned char pk[crypto_sign_PUBLICKEYBYTES]; + crypto_sign_ed25519_sk_to_pk(pk, (unsigned char*)key.data()); + return PublicKey(name, std::string((char*)pk, crypto_sign_PUBLICKEYBYTES)); +#else + noSodium(); +#endif +} + +PublicKey::PublicKey(const string& s) : Key(s) { +#if HAVE_SODIUM + if (key.size() != crypto_sign_PUBLICKEYBYTES) { + throw Error("public key is not valid"); + } +#endif +} + +bool verifyDetached(const std::string& data, const std::string& sig, + const PublicKeys& publicKeys) { +#if HAVE_SODIUM + auto ss = split(sig); + + auto key = publicKeys.find(ss.first); + if (key == publicKeys.end()) { + return false; + } + + auto sig2 = base64Decode(ss.second); + if (sig2.size() != crypto_sign_BYTES) { + throw Error("signature is not valid"); + } + + return crypto_sign_verify_detached( + (unsigned char*)sig2.data(), (unsigned char*)data.data(), + data.size(), (unsigned char*)key->second.key.data()) == 0; +#else + noSodium(); +#endif +} + +PublicKeys getDefaultPublicKeys() { + PublicKeys publicKeys; + + // FIXME: filter duplicates + + for (const auto& s : settings.trustedPublicKeys.get()) { + PublicKey key(s); + publicKeys.emplace(key.name, key); + } + + for (const auto& secretKeyFile : settings.secretKeyFiles.get()) { + try { + SecretKey secretKey(readFile(secretKeyFile)); + publicKeys.emplace(secretKey.name, secretKey.toPublicKey()); + } catch (SysError& e) { + /* Ignore unreadable key files. That's normal in a + multi-user installation. */ + } + } + + return publicKeys; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/crypto.hh b/third_party/nix/src/libstore/crypto.hh new file mode 100644 index 000000000000..ef578db6d055 --- /dev/null +++ b/third_party/nix/src/libstore/crypto.hh @@ -0,0 +1,49 @@ +#pragma once + +#include <map> + +#include "types.hh" + +namespace nix { + +struct Key { + std::string name; + std::string key; + + /* Construct Key from a string in the format + โ<name>:<key-in-base64>โ. */ + Key(const std::string& s); + + protected: + Key(const std::string& name, const std::string& key) : name(name), key(key) {} +}; + +struct PublicKey; + +struct SecretKey : Key { + SecretKey(const std::string& s); + + /* Return a detached signature of the given string. */ + std::string signDetached(const std::string& data) const; + + PublicKey toPublicKey() const; +}; + +struct PublicKey : Key { + PublicKey(const std::string& s); + + private: + PublicKey(const std::string& name, const std::string& key) : Key(name, key) {} + friend struct SecretKey; +}; + +typedef std::map<std::string, PublicKey> PublicKeys; + +/* Return true iff โsigโ is a correct signature over โdataโ using one + of the given public keys. */ +bool verifyDetached(const std::string& data, const std::string& sig, + const PublicKeys& publicKeys); + +PublicKeys getDefaultPublicKeys(); + +} // namespace nix diff --git a/third_party/nix/src/libstore/derivations.cc b/third_party/nix/src/libstore/derivations.cc new file mode 100644 index 000000000000..9c71d7209e37 --- /dev/null +++ b/third_party/nix/src/libstore/derivations.cc @@ -0,0 +1,428 @@ +#include "derivations.hh" + +#include "fs-accessor.hh" +#include "globals.hh" +#include "istringstream_nocopy.hh" +#include "store-api.hh" +#include "util.hh" +#include "worker-protocol.hh" + +namespace nix { + +void DerivationOutput::parseHashInfo(bool& recursive, Hash& hash) const { + recursive = false; + string algo = hashAlgo; + + if (string(algo, 0, 2) == "r:") { + recursive = true; + algo = string(algo, 2); + } + + HashType hashType = parseHashType(algo); + if (hashType == htUnknown) { + throw Error(format("unknown hash algorithm '%1%'") % algo); + } + + hash = Hash(this->hash, hashType); +} + +Path BasicDerivation::findOutput(const string& id) const { + auto i = outputs.find(id); + if (i == outputs.end()) { + throw Error(format("derivation has no output '%1%'") % id); + } + return i->second.path; +} + +bool BasicDerivation::isBuiltin() const { + return string(builder, 0, 8) == "builtin:"; +} + +Path writeDerivation(const ref<Store>& store, const Derivation& drv, + const string& name, RepairFlag repair) { + PathSet references; + references.insert(drv.inputSrcs.begin(), drv.inputSrcs.end()); + for (auto& i : drv.inputDrvs) { + references.insert(i.first); + } + /* Note that the outputs of a derivation are *not* references + (that can be missing (of course) and should not necessarily be + held during a garbage collection). */ + string suffix = name + drvExtension; + string contents = drv.unparse(); + return settings.readOnlyMode + ? store->computeStorePathForText(suffix, contents, references) + : store->addTextToStore(suffix, contents, references, repair); +} + +/* Read string `s' from stream `str'. */ +static void expect(std::istream& str, const string& s) { + char s2[s.size()]; + str.read(s2, s.size()); + if (string(s2, s.size()) != s) { + throw FormatError(format("expected string '%1%'") % s); + } +} + +/* Read a C-style string from stream `str'. */ +static string parseString(std::istream& str) { + string res; + expect(str, "\""); + int c; + while ((c = str.get()) != '"') { + if (c == '\\') { + c = str.get(); + if (c == 'n') { + res += '\n'; + } else if (c == 'r') { + res += '\r'; + } else if (c == 't') { + res += '\t'; + } else { + res += c; + } + } else { + res += c; + } + } + return res; +} + +static Path parsePath(std::istream& str) { + string s = parseString(str); + if (s.empty() || s[0] != '/') { + throw FormatError(format("bad path '%1%' in derivation") % s); + } + return s; +} + +static bool endOfList(std::istream& str) { + if (str.peek() == ',') { + str.get(); + return false; + } + if (str.peek() == ']') { + str.get(); + return true; + } + return false; +} + +static StringSet parseStrings(std::istream& str, bool arePaths) { + StringSet res; + while (!endOfList(str)) { + res.insert(arePaths ? parsePath(str) : parseString(str)); + } + return res; +} + +static Derivation parseDerivation(const string& s) { + Derivation drv; + istringstream_nocopy str(s); + expect(str, "Derive(["); + + /* Parse the list of outputs. */ + while (!endOfList(str)) { + DerivationOutput out; + expect(str, "("); + string id = parseString(str); + expect(str, ","); + out.path = parsePath(str); + expect(str, ","); + out.hashAlgo = parseString(str); + expect(str, ","); + out.hash = parseString(str); + expect(str, ")"); + drv.outputs[id] = out; + } + + /* Parse the list of input derivations. */ + expect(str, ",["); + while (!endOfList(str)) { + expect(str, "("); + Path drvPath = parsePath(str); + expect(str, ",["); + drv.inputDrvs[drvPath] = parseStrings(str, false); + expect(str, ")"); + } + + expect(str, ",["); + drv.inputSrcs = parseStrings(str, true); + expect(str, ","); + drv.platform = parseString(str); + expect(str, ","); + drv.builder = parseString(str); + + /* Parse the builder arguments. */ + expect(str, ",["); + while (!endOfList(str)) { + drv.args.push_back(parseString(str)); + } + + /* Parse the environment variables. */ + expect(str, ",["); + while (!endOfList(str)) { + expect(str, "("); + string name = parseString(str); + expect(str, ","); + string value = parseString(str); + expect(str, ")"); + drv.env[name] = value; + } + + expect(str, ")"); + return drv; +} + +Derivation readDerivation(const Path& drvPath) { + try { + return parseDerivation(readFile(drvPath)); + } catch (FormatError& e) { + throw Error(format("error parsing derivation '%1%': %2%") % drvPath % + e.msg()); + } +} + +Derivation Store::derivationFromPath(const Path& drvPath) { + assertStorePath(drvPath); + ensurePath(drvPath); + auto accessor = getFSAccessor(); + try { + return parseDerivation(accessor->readFile(drvPath)); + } catch (FormatError& e) { + throw Error(format("error parsing derivation '%1%': %2%") % drvPath % + e.msg()); + } +} + +static void printString(string& res, const string& s) { + res += '"'; + for (const char* i = s.c_str(); *i != 0; i++) { + if (*i == '\"' || *i == '\\') { + res += "\\"; + res += *i; + } else if (*i == '\n') { + res += "\\n"; + } else if (*i == '\r') { + res += "\\r"; + } else if (*i == '\t') { + res += "\\t"; + } else { + res += *i; + } + } + res += '"'; +} + +template <class ForwardIterator> +static void printStrings(string& res, ForwardIterator i, ForwardIterator j) { + res += '['; + bool first = true; + for (; i != j; ++i) { + if (first) { + first = false; + } else { + res += ','; + } + printString(res, *i); + } + res += ']'; +} + +string Derivation::unparse() const { + string s; + s.reserve(65536); + s += "Derive(["; + + bool first = true; + for (auto& i : outputs) { + if (first) { + first = false; + } else { + s += ','; + } + s += '('; + printString(s, i.first); + s += ','; + printString(s, i.second.path); + s += ','; + printString(s, i.second.hashAlgo); + s += ','; + printString(s, i.second.hash); + s += ')'; + } + + s += "],["; + first = true; + for (auto& i : inputDrvs) { + if (first) { + first = false; + } else { + s += ','; + } + s += '('; + printString(s, i.first); + s += ','; + printStrings(s, i.second.begin(), i.second.end()); + s += ')'; + } + + s += "],"; + printStrings(s, inputSrcs.begin(), inputSrcs.end()); + + s += ','; + printString(s, platform); + s += ','; + printString(s, builder); + s += ','; + printStrings(s, args.begin(), args.end()); + + s += ",["; + first = true; + for (auto& i : env) { + if (first) { + first = false; + } else { + s += ','; + } + s += '('; + printString(s, i.first); + s += ','; + printString(s, i.second); + s += ')'; + } + + s += "])"; + + return s; +} + +bool isDerivation(const string& fileName) { + return hasSuffix(fileName, drvExtension); +} + +bool BasicDerivation::isFixedOutput() const { + return outputs.size() == 1 && outputs.begin()->first == "out" && + !outputs.begin()->second.hash.empty(); +} + +DrvHashes drvHashes; + +/* Returns the hash of a derivation modulo fixed-output + subderivations. A fixed-output derivation is a derivation with one + output (`out') for which an expected hash and hash algorithm are + specified (using the `outputHash' and `outputHashAlgo' + attributes). We don't want changes to such derivations to + propagate upwards through the dependency graph, changing output + paths everywhere. + + For instance, if we change the url in a call to the `fetchurl' + function, we do not want to rebuild everything depending on it + (after all, (the hash of) the file being downloaded is unchanged). + So the *output paths* should not change. On the other hand, the + *derivation paths* should change to reflect the new dependency + graph. + + That's what this function does: it returns a hash which is just the + hash of the derivation ATerm, except that any input derivation + paths have been replaced by the result of a recursive call to this + function, and that for fixed-output derivations we return a hash of + its output path. */ +Hash hashDerivationModulo(Store& store, Derivation drv) { + /* Return a fixed hash for fixed-output derivations. */ + if (drv.isFixedOutput()) { + auto i = drv.outputs.begin(); + return hashString(htSHA256, "fixed:out:" + i->second.hashAlgo + ":" + + i->second.hash + ":" + i->second.path); + } + + /* For other derivations, replace the inputs paths with recursive + calls to this function.*/ + DerivationInputs inputs2; + for (auto& i : drv.inputDrvs) { + Hash h = drvHashes[i.first]; + if (!h) { + assert(store.isValidPath(i.first)); + Derivation drv2 = readDerivation(store.toRealPath(i.first)); + h = hashDerivationModulo(store, drv2); + drvHashes[i.first] = h; + } + inputs2[h.to_string(Base16, false)] = i.second; + } + drv.inputDrvs = inputs2; + + return hashString(htSHA256, drv.unparse()); +} + +DrvPathWithOutputs parseDrvPathWithOutputs(const string& s) { + size_t n = s.find('!'); + return n == std::string::npos + ? DrvPathWithOutputs(s, std::set<string>()) + : DrvPathWithOutputs( + string(s, 0, n), + tokenizeString<std::set<string> >(string(s, n + 1), ",")); +} + +Path makeDrvPathWithOutputs(const Path& drvPath, + const std::set<string>& outputs) { + return outputs.empty() ? drvPath + : drvPath + "!" + concatStringsSep(",", outputs); +} + +bool wantOutput(const string& output, const std::set<string>& wanted) { + return wanted.empty() || wanted.find(output) != wanted.end(); +} + +PathSet BasicDerivation::outputPaths() const { + PathSet paths; + for (auto& i : outputs) { + paths.insert(i.second.path); + } + return paths; +} + +Source& readDerivation(Source& in, Store& store, BasicDerivation& drv) { + drv.outputs.clear(); + auto nr = readNum<size_t>(in); + for (size_t n = 0; n < nr; n++) { + auto name = readString(in); + DerivationOutput o; + in >> o.path >> o.hashAlgo >> o.hash; + store.assertStorePath(o.path); + drv.outputs[name] = o; + } + + drv.inputSrcs = readStorePaths<PathSet>(store, in); + in >> drv.platform >> drv.builder; + drv.args = readStrings<Strings>(in); + + nr = readNum<size_t>(in); + for (size_t n = 0; n < nr; n++) { + auto key = readString(in); + auto value = readString(in); + drv.env[key] = value; + } + + return in; +} + +Sink& operator<<(Sink& out, const BasicDerivation& drv) { + out << drv.outputs.size(); + for (auto& i : drv.outputs) { + out << i.first << i.second.path << i.second.hashAlgo << i.second.hash; + } + out << drv.inputSrcs << drv.platform << drv.builder << drv.args; + out << drv.env.size(); + for (auto& i : drv.env) { + out << i.first << i.second; + } + return out; +} + +std::string hashPlaceholder(const std::string& outputName) { + // FIXME: memoize? + return "/" + hashString(htSHA256, "nix-output:" + outputName) + .to_string(Base32, false); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/derivations.hh b/third_party/nix/src/libstore/derivations.hh new file mode 100644 index 000000000000..52b951f5e9d9 --- /dev/null +++ b/third_party/nix/src/libstore/derivations.hh @@ -0,0 +1,106 @@ +#pragma once + +#include <map> + +#include "hash.hh" +#include "store-api.hh" +#include "types.hh" + +namespace nix { + +/* Extension of derivations in the Nix store. */ +const string drvExtension = ".drv"; + +/* Abstract syntax of derivations. */ + +struct DerivationOutput { + Path path; + string hashAlgo; /* hash used for expected hash computation */ + string hash; /* expected hash, may be null */ + DerivationOutput() {} + DerivationOutput(Path path, string hashAlgo, string hash) { + this->path = path; + this->hashAlgo = hashAlgo; + this->hash = hash; + } + void parseHashInfo(bool& recursive, Hash& hash) const; +}; + +typedef std::map<string, DerivationOutput> DerivationOutputs; + +/* For inputs that are sub-derivations, we specify exactly which + output IDs we are interested in. */ +typedef std::map<Path, StringSet> DerivationInputs; + +typedef std::map<string, string> StringPairs; + +struct BasicDerivation { + DerivationOutputs outputs; /* keyed on symbolic IDs */ + PathSet inputSrcs; /* inputs that are sources */ + string platform; + Path builder; + Strings args; + StringPairs env; + + virtual ~BasicDerivation(){}; + + /* Return the path corresponding to the output identifier `id' in + the given derivation. */ + Path findOutput(const string& id) const; + + bool isBuiltin() const; + + /* Return true iff this is a fixed-output derivation. */ + bool isFixedOutput() const; + + /* Return the output paths of a derivation. */ + PathSet outputPaths() const; +}; + +struct Derivation : BasicDerivation { + DerivationInputs inputDrvs; /* inputs that are sub-derivations */ + + /* Print a derivation. */ + std::string unparse() const; +}; + +class Store; + +/* Write a derivation to the Nix store, and return its path. */ +Path writeDerivation(const ref<Store>& store, const Derivation& drv, + const string& name, RepairFlag repair = NoRepair); + +/* Read a derivation from a file. */ +Derivation readDerivation(const Path& drvPath); + +/* Check whether a file name ends with the extension for + derivations. */ +bool isDerivation(const string& fileName); + +Hash hashDerivationModulo(Store& store, Derivation drv); + +/* Memoisation of hashDerivationModulo(). */ +typedef std::map<Path, Hash> DrvHashes; + +extern DrvHashes drvHashes; // FIXME: global, not thread-safe + +/* Split a string specifying a derivation and a set of outputs + (/nix/store/hash-foo!out1,out2,...) into the derivation path and + the outputs. */ +typedef std::pair<string, std::set<string> > DrvPathWithOutputs; +DrvPathWithOutputs parseDrvPathWithOutputs(const string& s); + +Path makeDrvPathWithOutputs(const Path& drvPath, + const std::set<string>& outputs); + +bool wantOutput(const string& output, const std::set<string>& wanted); + +struct Source; +struct Sink; + +Source& readDerivation(Source& in, Store& store, BasicDerivation& drv); +Sink& operator<<(Sink& out, const BasicDerivation& drv); + +std::string hashPlaceholder(const std::string& outputName); + +} // namespace nix diff --git a/third_party/nix/src/libstore/download.cc b/third_party/nix/src/libstore/download.cc new file mode 100644 index 000000000000..7476dd50b579 --- /dev/null +++ b/third_party/nix/src/libstore/download.cc @@ -0,0 +1,1022 @@ +#include "download.hh" + +#include "archive.hh" +#include "compression.hh" +#include "finally.hh" +#include "globals.hh" +#include "hash.hh" +#include "pathlocks.hh" +#include "s3.hh" +#include "store-api.hh" +#include "util.hh" + +#ifdef ENABLE_S3 +#include <aws/core/client/ClientConfiguration.h> +#endif + +#include <algorithm> +#include <cmath> +#include <cstring> +#include <iostream> +#include <queue> +#include <random> +#include <thread> + +#include <curl/curl.h> +#include <fcntl.h> +#include <glog/logging.h> +#include <unistd.h> + +using namespace std::string_literals; + +namespace nix { + +DownloadSettings downloadSettings; + +static GlobalConfig::Register r1(&downloadSettings); + +std::string resolveUri(const std::string& uri) { + if (uri.compare(0, 8, "channel:") == 0) { + return "https://nixos.org/channels/" + std::string(uri, 8) + + "/nixexprs.tar.xz"; + } + return uri; +} + +struct CurlDownloader : public Downloader { + CURLM* curlm = nullptr; + + std::random_device rd; + std::mt19937 mt19937; + + struct DownloadItem : public std::enable_shared_from_this<DownloadItem> { + CurlDownloader& downloader; + DownloadRequest request; + DownloadResult result; + bool done = false; // whether either the success or failure function has + // been called + Callback<DownloadResult> callback; + CURL* req = nullptr; + bool active = + false; // whether the handle has been added to the multi object + std::string status; + + unsigned int attempt = 0; + + /* Don't start this download until the specified time point + has been reached. */ + std::chrono::steady_clock::time_point embargo; + + struct curl_slist* requestHeaders = nullptr; + + std::string encoding; + + bool acceptRanges = false; + + curl_off_t writtenToSink = 0; + + DownloadItem(CurlDownloader& downloader, const DownloadRequest& request, + Callback<DownloadResult>&& callback) + : downloader(downloader), + request(request), + callback(std::move(callback)), + finalSink([this](const unsigned char* data, size_t len) { + if (this->request.dataCallback) { + long httpStatus = 0; + curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); + + /* Only write data to the sink if this is a + successful response. */ + if (httpStatus == 0 || httpStatus == 200 || httpStatus == 201 || + httpStatus == 206) { + writtenToSink += len; + this->request.dataCallback((char*)data, len); + } + } else { + this->result.data->append((char*)data, len); + } + }) { + LOG(INFO) << (request.data ? "uploading '" : "downloading '") + << request.uri << "'"; + + if (!request.expectedETag.empty()) { + requestHeaders = curl_slist_append( + requestHeaders, ("If-None-Match: " + request.expectedETag).c_str()); + } + if (!request.mimeType.empty()) { + requestHeaders = curl_slist_append( + requestHeaders, ("Content-Type: " + request.mimeType).c_str()); + } + } + + ~DownloadItem() { + if (req != nullptr) { + if (active) { + curl_multi_remove_handle(downloader.curlm, req); + } + curl_easy_cleanup(req); + } + if (requestHeaders != nullptr) { + curl_slist_free_all(requestHeaders); + } + try { + if (!done) { + fail(DownloadError( + Interrupted, + format("download of '%s' was interrupted") % request.uri)); + } + } catch (...) { + ignoreException(); + } + } + + void failEx(const std::exception_ptr& ex) { + assert(!done); + done = true; + callback.rethrow(ex); + } + + template <class T> + void fail(const T& e) { + failEx(std::make_exception_ptr(e)); + } + + LambdaSink finalSink; + std::shared_ptr<CompressionSink> decompressionSink; + + std::exception_ptr writeException; + + size_t writeCallback(void* contents, size_t size, size_t nmemb) { + try { + size_t realSize = size * nmemb; + result.bodySize += realSize; + + if (!decompressionSink) { + decompressionSink = makeDecompressionSink(encoding, finalSink); + } + + (*decompressionSink)((unsigned char*)contents, realSize); + + return realSize; + } catch (...) { + writeException = std::current_exception(); + return 0; + } + } + + static size_t writeCallbackWrapper(void* contents, size_t size, + size_t nmemb, void* userp) { + return ((DownloadItem*)userp)->writeCallback(contents, size, nmemb); + } + + size_t headerCallback(void* contents, size_t size, size_t nmemb) { + size_t realSize = size * nmemb; + std::string line((char*)contents, realSize); + DLOG(INFO) << "got header for '" << request.uri << "': " << trim(line); + if (line.compare(0, 5, "HTTP/") == 0) { // new response starts + result.etag = ""; + auto ss = tokenizeString<vector<string>>(line, " "); + status = ss.size() >= 2 ? ss[1] : ""; + result.data = std::make_shared<std::string>(); + result.bodySize = 0; + acceptRanges = false; + encoding = ""; + } else { + auto i = line.find(':'); + if (i != string::npos) { + string name = toLower(trim(string(line, 0, i))); + if (name == "etag") { + result.etag = trim(string(line, i + 1)); + /* Hack to work around a GitHub bug: it sends + ETags, but ignores If-None-Match. So if we get + the expected ETag on a 200 response, then shut + down the connection because we already have the + data. */ + if (result.etag == request.expectedETag && status == "200") { + DLOG(INFO) + << "shutting down on 200 HTTP response with expected ETag"; + return 0; + } + } else if (name == "content-encoding") { + encoding = trim(string(line, i + 1)); + } else if (name == "accept-ranges" && + toLower(trim(std::string(line, i + 1))) == "bytes") { + acceptRanges = true; + } + } + } + return realSize; + } + + static size_t headerCallbackWrapper(void* contents, size_t size, + size_t nmemb, void* userp) { + return ((DownloadItem*)userp)->headerCallback(contents, size, nmemb); + } + + static int progressCallback(double dltotal, double dlnow) { + try { + // TODO(tazjin): this had activity nonsense, clean it up + } catch (nix::Interrupted&) { + assert(_isInterrupted); + } + return static_cast<int>(_isInterrupted); + } + + static int progressCallbackWrapper(void* userp, double dltotal, + double dlnow, double ultotal, + double ulnow) { + return ((DownloadItem*)userp)->progressCallback(dltotal, dlnow); + } + + static int debugCallback(CURL* handle, curl_infotype type, char* data, + size_t size, void* userptr) { + if (type == CURLINFO_TEXT) { + DLOG(INFO) << "curl: " << chomp(std::string(data, size)); + } + return 0; + } + + size_t readOffset = 0; + size_t readCallback(char* buffer, size_t size, size_t nitems) { + if (readOffset == request.data->length()) { + return 0; + } + auto count = std::min(size * nitems, request.data->length() - readOffset); + assert(count); + memcpy(buffer, request.data->data() + readOffset, count); + readOffset += count; + return count; + } + + static size_t readCallbackWrapper(char* buffer, size_t size, size_t nitems, + void* userp) { + return ((DownloadItem*)userp)->readCallback(buffer, size, nitems); + } + + void init() { + if (req == nullptr) { + req = curl_easy_init(); + } + + curl_easy_reset(req); + + // TODO(tazjin): Add an Abseil flag for this + // if (verbosity >= lvlVomit) { + // curl_easy_setopt(req, CURLOPT_VERBOSE, 1); + // curl_easy_setopt(req, CURLOPT_DEBUGFUNCTION, + // DownloadItem::debugCallback); + // } + + curl_easy_setopt(req, CURLOPT_URL, request.uri.c_str()); + curl_easy_setopt(req, CURLOPT_FOLLOWLOCATION, 1L); + curl_easy_setopt(req, CURLOPT_MAXREDIRS, 10); + curl_easy_setopt(req, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(req, CURLOPT_USERAGENT, + ("curl/" LIBCURL_VERSION " Nix/" + nixVersion + + (downloadSettings.userAgentSuffix != "" + ? " " + downloadSettings.userAgentSuffix.get() + : "")) + .c_str()); +#if LIBCURL_VERSION_NUM >= 0x072b00 + curl_easy_setopt(req, CURLOPT_PIPEWAIT, 1); +#endif +#if LIBCURL_VERSION_NUM >= 0x072f00 + if (downloadSettings.enableHttp2) { + curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2TLS); + } else { + curl_easy_setopt(req, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); + } +#endif + curl_easy_setopt(req, CURLOPT_WRITEFUNCTION, + DownloadItem::writeCallbackWrapper); + curl_easy_setopt(req, CURLOPT_WRITEDATA, this); + curl_easy_setopt(req, CURLOPT_HEADERFUNCTION, + DownloadItem::headerCallbackWrapper); + curl_easy_setopt(req, CURLOPT_HEADERDATA, this); + + curl_easy_setopt(req, CURLOPT_PROGRESSFUNCTION, progressCallbackWrapper); + curl_easy_setopt(req, CURLOPT_PROGRESSDATA, this); + curl_easy_setopt(req, CURLOPT_NOPROGRESS, 0); + + curl_easy_setopt(req, CURLOPT_HTTPHEADER, requestHeaders); + + if (request.head) { + curl_easy_setopt(req, CURLOPT_NOBODY, 1); + } + + if (request.data) { + curl_easy_setopt(req, CURLOPT_UPLOAD, 1L); + curl_easy_setopt(req, CURLOPT_READFUNCTION, readCallbackWrapper); + curl_easy_setopt(req, CURLOPT_READDATA, this); + curl_easy_setopt(req, CURLOPT_INFILESIZE_LARGE, + (curl_off_t)request.data->length()); + } + + if (request.verifyTLS) { + if (!settings.caFile.empty()) { + curl_easy_setopt(req, CURLOPT_CAINFO, settings.caFile.c_str()); + } + } else { + curl_easy_setopt(req, CURLOPT_SSL_VERIFYPEER, 0); + curl_easy_setopt(req, CURLOPT_SSL_VERIFYHOST, 0); + } + + curl_easy_setopt(req, CURLOPT_CONNECTTIMEOUT, + downloadSettings.connectTimeout.get()); + + curl_easy_setopt(req, CURLOPT_LOW_SPEED_LIMIT, 1L); + curl_easy_setopt(req, CURLOPT_LOW_SPEED_TIME, + downloadSettings.stalledDownloadTimeout.get()); + + /* If no file exist in the specified path, curl continues to work + anyway as if netrc support was disabled. */ + curl_easy_setopt(req, CURLOPT_NETRC_FILE, + settings.netrcFile.get().c_str()); + curl_easy_setopt(req, CURLOPT_NETRC, CURL_NETRC_OPTIONAL); + + if (writtenToSink != 0) { + curl_easy_setopt(req, CURLOPT_RESUME_FROM_LARGE, writtenToSink); + } + + result.data = std::make_shared<std::string>(); + result.bodySize = 0; + } + + void finish(CURLcode code) { + long httpStatus = 0; + curl_easy_getinfo(req, CURLINFO_RESPONSE_CODE, &httpStatus); + + char* effectiveUriCStr; + curl_easy_getinfo(req, CURLINFO_EFFECTIVE_URL, &effectiveUriCStr); + if (effectiveUriCStr != nullptr) { + result.effectiveUri = effectiveUriCStr; + } + + DLOG(INFO) << "finished " << request.verb() << " of " << request.uri + << "; curl status = " << code + << ", HTTP status = " << httpStatus + << ", body = " << result.bodySize << " bytes"; + + if (decompressionSink) { + try { + decompressionSink->finish(); + } catch (...) { + writeException = std::current_exception(); + } + } + + if (code == CURLE_WRITE_ERROR && result.etag == request.expectedETag) { + code = CURLE_OK; + httpStatus = 304; + } + + if (writeException) { + failEx(writeException); + + } else if (code == CURLE_OK && + (httpStatus == 200 || httpStatus == 201 || httpStatus == 204 || + httpStatus == 206 || httpStatus == 304 || + httpStatus == 226 /* FTP */ || + httpStatus == 0 /* other protocol */)) { + result.cached = httpStatus == 304; + done = true; + callback(std::move(result)); + } + + else { + // We treat most errors as transient, but won't retry when hopeless + Error err = Transient; + + if (httpStatus == 404 || httpStatus == 410 || + code == CURLE_FILE_COULDNT_READ_FILE) { + // The file is definitely not there + err = NotFound; + } else if (httpStatus == 401 || httpStatus == 403 || + httpStatus == 407) { + // Don't retry on authentication/authorization failures + err = Forbidden; + } else if (httpStatus >= 400 && httpStatus < 500 && httpStatus != 408 && + httpStatus != 429) { + // Most 4xx errors are client errors and are probably not worth + // retrying: + // * 408 means the server timed out waiting for us, so we try again + // * 429 means too many requests, so we retry (with a delay) + err = Misc; + } else if (httpStatus == 501 || httpStatus == 505 || + httpStatus == 511) { + // Let's treat most 5xx (server) errors as transient, except for a + // handful: + // * 501 not implemented + // * 505 http version not supported + // * 511 we're behind a captive portal + err = Misc; + } else { + // Don't bother retrying on certain cURL errors either + switch (code) { + case CURLE_FAILED_INIT: + case CURLE_URL_MALFORMAT: + case CURLE_NOT_BUILT_IN: + case CURLE_REMOTE_ACCESS_DENIED: + case CURLE_FILE_COULDNT_READ_FILE: + case CURLE_FUNCTION_NOT_FOUND: + case CURLE_ABORTED_BY_CALLBACK: + case CURLE_BAD_FUNCTION_ARGUMENT: + case CURLE_INTERFACE_FAILED: + case CURLE_UNKNOWN_OPTION: + case CURLE_SSL_CACERT_BADFILE: + case CURLE_TOO_MANY_REDIRECTS: + case CURLE_WRITE_ERROR: + case CURLE_UNSUPPORTED_PROTOCOL: + err = Misc; + break; + default: // Shut up warnings + break; + } + } + + attempt++; + + auto exc = + code == CURLE_ABORTED_BY_CALLBACK && _isInterrupted + ? DownloadError(Interrupted, fmt("%s of '%s' was interrupted", + request.verb(), request.uri)) + : httpStatus != 0 + ? DownloadError( + err, fmt("unable to %s '%s': HTTP error %d", + request.verb(), request.uri, httpStatus) + + (code == CURLE_OK + ? "" + : fmt(" (curl error: %s)", + curl_easy_strerror(code)))) + : DownloadError(err, fmt("unable to %s '%s': %s (%d)", + request.verb(), request.uri, + curl_easy_strerror(code), code)); + + /* If this is a transient error, then maybe retry the + download after a while. If we're writing to a + sink, we can only retry if the server supports + ranged requests. */ + if (err == Transient && attempt < request.tries && + (!this->request.dataCallback || writtenToSink == 0 || + (acceptRanges && encoding.empty()))) { + int ms = request.baseRetryTimeMs * + std::pow(2.0F, attempt - 1 + + std::uniform_real_distribution<>( + 0.0, 0.5)(downloader.mt19937)); + if (writtenToSink != 0) { + LOG(WARNING) << exc.what() << "; retrying from offset " + << writtenToSink << " in " << ms << "ms"; + } else { + LOG(WARNING) << exc.what() << "; retrying in " << ms << "ms"; + } + embargo = + std::chrono::steady_clock::now() + std::chrono::milliseconds(ms); + downloader.enqueueItem(shared_from_this()); + } else { + fail(exc); + } + } + } + }; + + struct State { + struct EmbargoComparator { + bool operator()(const std::shared_ptr<DownloadItem>& i1, + const std::shared_ptr<DownloadItem>& i2) { + return i1->embargo > i2->embargo; + } + }; + bool quit = false; + std::priority_queue<std::shared_ptr<DownloadItem>, + std::vector<std::shared_ptr<DownloadItem>>, + EmbargoComparator> + incoming; + }; + + Sync<State> state_; + + /* We can't use a std::condition_variable to wake up the curl + thread, because it only monitors file descriptors. So use a + pipe instead. */ + Pipe wakeupPipe; + + std::thread workerThread; + + CurlDownloader() : mt19937(rd()) { + static std::once_flag globalInit; + std::call_once(globalInit, curl_global_init, CURL_GLOBAL_ALL); + + curlm = curl_multi_init(); + +#if LIBCURL_VERSION_NUM >= 0x072b00 // Multiplex requires >= 7.43.0 + curl_multi_setopt(curlm, CURLMOPT_PIPELINING, CURLPIPE_MULTIPLEX); +#endif +#if LIBCURL_VERSION_NUM >= 0x071e00 // Max connections requires >= 7.30.0 + curl_multi_setopt(curlm, CURLMOPT_MAX_TOTAL_CONNECTIONS, + downloadSettings.httpConnections.get()); +#endif + + wakeupPipe.create(); + fcntl(wakeupPipe.readSide.get(), F_SETFL, O_NONBLOCK); + + workerThread = std::thread([&]() { workerThreadEntry(); }); + } + + ~CurlDownloader() override { + stopWorkerThread(); + + workerThread.join(); + + if (curlm != nullptr) { + curl_multi_cleanup(curlm); + } + } + + void stopWorkerThread() { + /* Signal the worker thread to exit. */ + { + auto state(state_.lock()); + state->quit = true; + } + writeFull(wakeupPipe.writeSide.get(), " ", false); + } + + void workerThreadMain() { + /* Cause this thread to be notified on SIGINT. */ + auto callback = createInterruptCallback([&]() { stopWorkerThread(); }); + + std::map<CURL*, std::shared_ptr<DownloadItem>> items; + + bool quit = false; + + std::chrono::steady_clock::time_point nextWakeup; + + while (!quit) { + checkInterrupt(); + + /* Let curl do its thing. */ + int running; + CURLMcode mc = curl_multi_perform(curlm, &running); + if (mc != CURLM_OK) { + throw nix::Error( + format("unexpected error from curl_multi_perform(): %s") % + curl_multi_strerror(mc)); + } + + /* Set the promises of any finished requests. */ + CURLMsg* msg; + int left; + while ((msg = curl_multi_info_read(curlm, &left)) != nullptr) { + if (msg->msg == CURLMSG_DONE) { + auto i = items.find(msg->easy_handle); + assert(i != items.end()); + i->second->finish(msg->data.result); + curl_multi_remove_handle(curlm, i->second->req); + i->second->active = false; + items.erase(i); + } + } + + /* Wait for activity, including wakeup events. */ + int numfds = 0; + struct curl_waitfd extraFDs[1]; + extraFDs[0].fd = wakeupPipe.readSide.get(); + extraFDs[0].events = CURL_WAIT_POLLIN; + extraFDs[0].revents = 0; + long maxSleepTimeMs = items.empty() ? 10000 : 100; + auto sleepTimeMs = + nextWakeup != std::chrono::steady_clock::time_point() + ? std::max( + 0, + (int)std::chrono::duration_cast<std::chrono::milliseconds>( + nextWakeup - std::chrono::steady_clock::now()) + .count()) + : maxSleepTimeMs; + DLOG(INFO) << "download thread waiting for " << sleepTimeMs << " ms"; + mc = curl_multi_wait(curlm, extraFDs, 1, sleepTimeMs, &numfds); + if (mc != CURLM_OK) { + throw nix::Error(format("unexpected error from curl_multi_wait(): %s") % + curl_multi_strerror(mc)); + } + + nextWakeup = std::chrono::steady_clock::time_point(); + + /* Add new curl requests from the incoming requests queue, + except for requests that are embargoed (waiting for a + retry timeout to expire). */ + if ((extraFDs[0].revents & CURL_WAIT_POLLIN) != 0) { + char buf[1024]; + auto res = read(extraFDs[0].fd, buf, sizeof(buf)); + if (res == -1 && errno != EINTR) { + throw SysError("reading curl wakeup socket"); + } + } + + std::vector<std::shared_ptr<DownloadItem>> incoming; + auto now = std::chrono::steady_clock::now(); + + { + auto state(state_.lock()); + while (!state->incoming.empty()) { + auto item = state->incoming.top(); + if (item->embargo <= now) { + incoming.push_back(item); + state->incoming.pop(); + } else { + if (nextWakeup == std::chrono::steady_clock::time_point() || + item->embargo < nextWakeup) { + nextWakeup = item->embargo; + } + break; + } + } + quit = state->quit; + } + + for (auto& item : incoming) { + DLOG(INFO) << "starting " << item->request.verb() << " of " + << item->request.uri; + item->init(); + curl_multi_add_handle(curlm, item->req); + item->active = true; + items[item->req] = item; + } + } + + DLOG(INFO) << "download thread shutting down"; + } + + void workerThreadEntry() { + try { + workerThreadMain(); + } catch (nix::Interrupted& e) { + } catch (std::exception& e) { + LOG(ERROR) << "unexpected error in download thread: " << e.what(); + } + + { + auto state(state_.lock()); + while (!state->incoming.empty()) { + state->incoming.pop(); + } + state->quit = true; + } + } + + void enqueueItem(const std::shared_ptr<DownloadItem>& item) { + if (item->request.data && !hasPrefix(item->request.uri, "http://") && + !hasPrefix(item->request.uri, "https://")) { + throw nix::Error("uploading to '%s' is not supported", item->request.uri); + } + + { + auto state(state_.lock()); + if (state->quit) { + throw nix::Error( + "cannot enqueue download request because the download thread is " + "shutting down"); + } + state->incoming.push(item); + } + writeFull(wakeupPipe.writeSide.get(), " "); + } + +#ifdef ENABLE_S3 + std::tuple<std::string, std::string, Store::Params> parseS3Uri( + std::string uri) { + auto [path, params] = splitUriAndParams(uri); + + auto slash = path.find('/', 5); // 5 is the length of "s3://" prefix + if (slash == std::string::npos) { + throw nix::Error("bad S3 URI '%s'", path); + } + + std::string bucketName(path, 5, slash - 5); + std::string key(path, slash + 1); + + return {bucketName, key, params}; + } +#endif + + void enqueueDownload(const DownloadRequest& request, + Callback<DownloadResult> callback) override { + /* Ugly hack to support s3:// URIs. */ + if (hasPrefix(request.uri, "s3://")) { + // FIXME: do this on a worker thread + try { +#ifdef ENABLE_S3 + auto [bucketName, key, params] = parseS3Uri(request.uri); + + std::string profile = get(params, "profile", ""); + std::string region = get(params, "region", Aws::Region::US_EAST_1); + std::string scheme = get(params, "scheme", ""); + std::string endpoint = get(params, "endpoint", ""); + + S3Helper s3Helper(profile, region, scheme, endpoint); + + // FIXME: implement ETag + auto s3Res = s3Helper.getObject(bucketName, key); + DownloadResult res; + if (!s3Res.data) + throw DownloadError( + NotFound, fmt("S3 object '%s' does not exist", request.uri)); + res.data = s3Res.data; + callback(std::move(res)); +#else + throw nix::Error( + "cannot download '%s' because Nix is not built with S3 support", + request.uri); +#endif + } catch (...) { + callback.rethrow(); + } + return; + } + + enqueueItem( + std::make_shared<DownloadItem>(*this, request, std::move(callback))); + } +}; + +ref<Downloader> getDownloader() { + static ref<Downloader> downloader = makeDownloader(); + return downloader; +} + +ref<Downloader> makeDownloader() { return make_ref<CurlDownloader>(); } + +std::future<DownloadResult> Downloader::enqueueDownload( + const DownloadRequest& request) { + auto promise = std::make_shared<std::promise<DownloadResult>>(); + enqueueDownload(request, {[promise](std::future<DownloadResult> fut) { + try { + promise->set_value(fut.get()); + } catch (...) { + promise->set_exception(std::current_exception()); + } + }}); + return promise->get_future(); +} + +DownloadResult Downloader::download(const DownloadRequest& request) { + return enqueueDownload(request).get(); +} + +void Downloader::download(DownloadRequest&& request, Sink& sink) { + /* Note: we can't call 'sink' via request.dataCallback, because + that would cause the sink to execute on the downloader + thread. If 'sink' is a coroutine, this will fail. Also, if the + sink is expensive (e.g. one that does decompression and writing + to the Nix store), it would stall the download thread too much. + Therefore we use a buffer to communicate data between the + download thread and the calling thread. */ + + struct State { + bool quit = false; + std::exception_ptr exc; + std::string data; + std::condition_variable avail, request; + }; + + auto _state = std::make_shared<Sync<State>>(); + + /* In case of an exception, wake up the download thread. FIXME: + abort the download request. */ + Finally finally([&]() { + auto state(_state->lock()); + state->quit = true; + state->request.notify_one(); + }); + + request.dataCallback = [_state](char* buf, size_t len) { + auto state(_state->lock()); + + if (state->quit) { + return; + } + + /* If the buffer is full, then go to sleep until the calling + thread wakes us up (i.e. when it has removed data from the + buffer). We don't wait forever to prevent stalling the + download thread. (Hopefully sleeping will throttle the + sender.) */ + if (state->data.size() > 1024 * 1024) { + DLOG(INFO) << "download buffer is full; going to sleep"; + state.wait_for(state->request, std::chrono::seconds(10)); + } + + /* Append data to the buffer and wake up the calling + thread. */ + state->data.append(buf, len); + state->avail.notify_one(); + }; + + enqueueDownload(request, {[_state](std::future<DownloadResult> fut) { + auto state(_state->lock()); + state->quit = true; + try { + fut.get(); + } catch (...) { + state->exc = std::current_exception(); + } + state->avail.notify_one(); + state->request.notify_one(); + }}); + + while (true) { + checkInterrupt(); + + std::string chunk; + + /* Grab data if available, otherwise wait for the download + thread to wake us up. */ + { + auto state(_state->lock()); + + while (state->data.empty()) { + if (state->quit) { + if (state->exc) { + std::rethrow_exception(state->exc); + } + return; + } + + state.wait(state->avail); + } + + chunk = std::move(state->data); + + state->request.notify_one(); + } + + /* Flush the data to the sink and wake up the download thread + if it's blocked on a full buffer. We don't hold the state + lock while doing this to prevent blocking the download + thread if sink() takes a long time. */ + sink((unsigned char*)chunk.data(), chunk.size()); + } +} + +CachedDownloadResult Downloader::downloadCached( + const ref<Store>& store, const CachedDownloadRequest& request) { + auto url = resolveUri(request.uri); + + auto name = request.name; + if (name.empty()) { + auto p = url.rfind('/'); + if (p != string::npos) { + name = string(url, p + 1); + } + } + + Path expectedStorePath; + if (request.expectedHash) { + expectedStorePath = + store->makeFixedOutputPath(request.unpack, request.expectedHash, name); + if (store->isValidPath(expectedStorePath)) { + CachedDownloadResult result; + result.storePath = expectedStorePath; + result.path = store->toRealPath(expectedStorePath); + return result; + } + } + + Path cacheDir = getCacheDir() + "/nix/tarballs"; + createDirs(cacheDir); + + string urlHash = hashString(htSHA256, name + std::string("\0"s) + url) + .to_string(Base32, false); + + Path dataFile = cacheDir + "/" + urlHash + ".info"; + Path fileLink = cacheDir + "/" + urlHash + "-file"; + + PathLocks lock({fileLink}, fmt("waiting for lock on '%1%'...", fileLink)); + + Path storePath; + + string expectedETag; + + bool skip = false; + + CachedDownloadResult result; + + if (pathExists(fileLink) && pathExists(dataFile)) { + storePath = readLink(fileLink); + store->addTempRoot(storePath); + if (store->isValidPath(storePath)) { + auto ss = tokenizeString<vector<string>>(readFile(dataFile), "\n"); + if (ss.size() >= 3 && ss[0] == url) { + time_t lastChecked; + if (string2Int(ss[2], lastChecked) && + (uint64_t)lastChecked + request.ttl >= (uint64_t)time(nullptr)) { + skip = true; + result.effectiveUri = request.uri; + result.etag = ss[1]; + } else if (!ss[1].empty()) { + DLOG(INFO) << "verifying previous ETag: " << ss[1]; + expectedETag = ss[1]; + } + } + } else { + storePath = ""; + } + } + + if (!skip) { + try { + DownloadRequest request2(url); + request2.expectedETag = expectedETag; + auto res = download(request2); + result.effectiveUri = res.effectiveUri; + result.etag = res.etag; + + if (!res.cached) { + ValidPathInfo info; + StringSink sink; + dumpString(*res.data, sink); + Hash hash = hashString( + request.expectedHash ? request.expectedHash.type : htSHA256, + *res.data); + info.path = store->makeFixedOutputPath(false, hash, name); + info.narHash = hashString(htSHA256, *sink.s); + info.narSize = sink.s->size(); + info.ca = makeFixedOutputCA(false, hash); + store->addToStore(info, sink.s, NoRepair, NoCheckSigs); + storePath = info.path; + } + + assert(!storePath.empty()); + replaceSymlink(storePath, fileLink); + + writeFile(dataFile, url + "\n" + res.etag + "\n" + + std::to_string(time(nullptr)) + "\n"); + } catch (DownloadError& e) { + if (storePath.empty()) { + throw; + } + LOG(WARNING) << e.msg() << "; using cached result"; + result.etag = expectedETag; + } + } + + if (request.unpack) { + Path unpackedLink = cacheDir + "/" + baseNameOf(storePath) + "-unpacked"; + PathLocks lock2({unpackedLink}, + fmt("waiting for lock on '%1%'...", unpackedLink)); + Path unpackedStorePath; + if (pathExists(unpackedLink)) { + unpackedStorePath = readLink(unpackedLink); + store->addTempRoot(unpackedStorePath); + if (!store->isValidPath(unpackedStorePath)) { + unpackedStorePath = ""; + } + } + if (unpackedStorePath.empty()) { + LOG(INFO) << "unpacking '" << url << "' ..."; + Path tmpDir = createTempDir(); + AutoDelete autoDelete(tmpDir, true); + // FIXME: this requires GNU tar for decompression. + runProgram("tar", true, + {"xf", store->toRealPath(storePath), "-C", tmpDir, + "--strip-components", "1"}); + unpackedStorePath = store->addToStore(name, tmpDir, true, htSHA256, + defaultPathFilter, NoRepair); + } + replaceSymlink(unpackedStorePath, unpackedLink); + storePath = unpackedStorePath; + } + + if (!expectedStorePath.empty() && storePath != expectedStorePath) { + unsigned int statusCode = 102; + Hash gotHash = + request.unpack + ? hashPath(request.expectedHash.type, store->toRealPath(storePath)) + .first + : hashFile(request.expectedHash.type, store->toRealPath(storePath)); + throw nix::Error(statusCode, + "hash mismatch in file downloaded from '%s':\n wanted: " + "%s\n got: %s", + url, request.expectedHash.to_string(), + gotHash.to_string()); + } + + result.storePath = storePath; + result.path = store->toRealPath(storePath); + return result; +} + +bool isUri(const string& s) { + if (s.compare(0, 8, "channel:") == 0) { + return true; + } + size_t pos = s.find("://"); + if (pos == string::npos) { + return false; + } + string scheme(s, 0, pos); + return scheme == "http" || scheme == "https" || scheme == "file" || + scheme == "channel" || scheme == "git" || scheme == "s3" || + scheme == "ssh"; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/download.hh b/third_party/nix/src/libstore/download.hh new file mode 100644 index 000000000000..9ddbdfc15955 --- /dev/null +++ b/third_party/nix/src/libstore/download.hh @@ -0,0 +1,133 @@ +#pragma once + +#include <future> +#include <string> + +#include "globals.hh" +#include "hash.hh" +#include "types.hh" + +namespace nix { + +struct DownloadSettings : Config { + Setting<bool> enableHttp2{this, true, "http2", + "Whether to enable HTTP/2 support."}; + + Setting<std::string> userAgentSuffix{ + this, "", "user-agent-suffix", + "String appended to the user agent in HTTP requests."}; + + Setting<size_t> httpConnections{this, + 25, + "http-connections", + "Number of parallel HTTP connections.", + {"binary-caches-parallel-connections"}}; + + Setting<unsigned long> connectTimeout{ + this, 0, "connect-timeout", + "Timeout for connecting to servers during downloads. 0 means use curl's " + "builtin default."}; + + Setting<unsigned long> stalledDownloadTimeout{ + this, 300, "stalled-download-timeout", + "Timeout (in seconds) for receiving data from servers during download. " + "Nix cancels idle downloads after this timeout's duration."}; + + Setting<unsigned int> tries{ + this, 5, "download-attempts", + "How often Nix will attempt to download a file before giving up."}; +}; + +extern DownloadSettings downloadSettings; + +struct DownloadRequest { + std::string uri; + std::string expectedETag; + bool verifyTLS = true; + bool head = false; + size_t tries = downloadSettings.tries; + unsigned int baseRetryTimeMs = 250; + bool decompress = true; + std::shared_ptr<std::string> data; + std::string mimeType; + std::function<void(char*, size_t)> dataCallback; + + DownloadRequest(const std::string& uri) : uri(uri) {} + + std::string verb() { return data ? "upload" : "download"; } +}; + +struct DownloadResult { + bool cached = false; + std::string etag; + std::string effectiveUri; + std::shared_ptr<std::string> data; + uint64_t bodySize = 0; +}; + +struct CachedDownloadRequest { + std::string uri; + bool unpack = false; + std::string name; + Hash expectedHash; + unsigned int ttl = settings.tarballTtl; + + CachedDownloadRequest(const std::string& uri) : uri(uri) {} +}; + +struct CachedDownloadResult { + // Note: 'storePath' may be different from 'path' when using a + // chroot store. + Path storePath; + Path path; + std::optional<std::string> etag; + std::string effectiveUri; +}; + +class Store; + +struct Downloader { + virtual ~Downloader() {} + + /* Enqueue a download request, returning a future to the result of + the download. The future may throw a DownloadError + exception. */ + virtual void enqueueDownload(const DownloadRequest& request, + Callback<DownloadResult> callback) = 0; + + std::future<DownloadResult> enqueueDownload(const DownloadRequest& request); + + /* Synchronously download a file. */ + DownloadResult download(const DownloadRequest& request); + + /* Download a file, writing its data to a sink. The sink will be + invoked on the thread of the caller. */ + void download(DownloadRequest&& request, Sink& sink); + + /* Check if the specified file is already in ~/.cache/nix/tarballs + and is more recent than โtarball-ttlโ seconds. Otherwise, + use the recorded ETag to verify if the server has a more + recent version, and if so, download it to the Nix store. */ + CachedDownloadResult downloadCached(const ref<Store>& store, + const CachedDownloadRequest& request); + + enum Error { NotFound, Forbidden, Misc, Transient, Interrupted }; +}; + +/* Return a shared Downloader object. Using this object is preferred + because it enables connection reuse and HTTP/2 multiplexing. */ +ref<Downloader> getDownloader(); + +/* Return a new Downloader object. */ +ref<Downloader> makeDownloader(); + +class DownloadError : public Error { + public: + Downloader::Error error; + DownloadError(Downloader::Error error, const FormatOrString& fs) + : Error(fs), error(error) {} +}; + +bool isUri(const string& s); + +} // namespace nix diff --git a/third_party/nix/src/libstore/export-import.cc b/third_party/nix/src/libstore/export-import.cc new file mode 100644 index 000000000000..077b0d539001 --- /dev/null +++ b/third_party/nix/src/libstore/export-import.cc @@ -0,0 +1,111 @@ +#include <algorithm> + +#include "archive.hh" +#include "store-api.hh" +#include "worker-protocol.hh" + +namespace nix { + +struct HashAndWriteSink : Sink { + Sink& writeSink; + HashSink hashSink; + explicit HashAndWriteSink(Sink& writeSink) + : writeSink(writeSink), hashSink(htSHA256) {} + void operator()(const unsigned char* data, size_t len) override { + writeSink(data, len); + hashSink(data, len); + } + Hash currentHash() { return hashSink.currentHash().first; } +}; + +void Store::exportPaths(const Paths& paths, Sink& sink) { + Paths sorted = topoSortPaths(PathSet(paths.begin(), paths.end())); + std::reverse(sorted.begin(), sorted.end()); + + std::string doneLabel("paths exported"); + // logger->incExpected(doneLabel, sorted.size()); + + for (auto& path : sorted) { + // Activity act(*logger, lvlInfo, format("exporting path '%s'") % path); + sink << 1; + exportPath(path, sink); + // logger->incProgress(doneLabel); + } + + sink << 0; +} + +void Store::exportPath(const Path& path, Sink& sink) { + auto info = queryPathInfo(path); + + HashAndWriteSink hashAndWriteSink(sink); + + narFromPath(path, hashAndWriteSink); + + /* Refuse to export paths that have changed. This prevents + filesystem corruption from spreading to other machines. + Don't complain if the stored hash is zero (unknown). */ + Hash hash = hashAndWriteSink.currentHash(); + if (hash != info->narHash && info->narHash != Hash(info->narHash.type)) { + throw Error(format("hash of path '%1%' has changed from '%2%' to '%3%'!") % + path % info->narHash.to_string() % hash.to_string()); + } + + hashAndWriteSink << exportMagic << path << info->references << info->deriver + << 0; +} + +Paths Store::importPaths(Source& source, + const std::shared_ptr<FSAccessor>& accessor, + CheckSigsFlag checkSigs) { + Paths res; + while (true) { + auto n = readNum<uint64_t>(source); + if (n == 0) { + break; + } + if (n != 1) { + throw Error( + "input doesn't look like something created by 'nix-store --export'"); + } + + /* Extract the NAR from the source. */ + TeeSink tee(source); + parseDump(tee, tee.source); + + uint32_t magic = readInt(source); + if (magic != exportMagic) { + throw Error("Nix archive cannot be imported; wrong format"); + } + + ValidPathInfo info; + + info.path = readStorePath(*this, source); + + // Activity act(*logger, lvlInfo, format("importing path '%s'") % + // info.path); + + info.references = readStorePaths<PathSet>(*this, source); + + info.deriver = readString(source); + if (!info.deriver.empty()) { + assertStorePath(info.deriver); + } + + info.narHash = hashString(htSHA256, *tee.source.data); + info.narSize = tee.source.data->size(); + + // Ignore optional legacy signature. + if (readInt(source) == 1) { + readString(source); + } + + addToStore(info, tee.source.data, NoRepair, checkSigs, accessor); + + res.push_back(info.path); + } + + return res; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/fs-accessor.hh b/third_party/nix/src/libstore/fs-accessor.hh new file mode 100644 index 000000000000..ad0d7f0ed9fd --- /dev/null +++ b/third_party/nix/src/libstore/fs-accessor.hh @@ -0,0 +1,31 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +/* An abstract class for accessing a filesystem-like structure, such + as a (possibly remote) Nix store or the contents of a NAR file. */ +class FSAccessor { + public: + enum Type { tMissing, tRegular, tSymlink, tDirectory }; + + struct Stat { + Type type = tMissing; + uint64_t fileSize = 0; // regular files only + bool isExecutable = false; // regular files only + uint64_t narOffset = 0; // regular files only + }; + + virtual ~FSAccessor() {} + + virtual Stat stat(const Path& path) = 0; + + virtual StringSet readDirectory(const Path& path) = 0; + + virtual std::string readFile(const Path& path) = 0; + + virtual std::string readLink(const Path& path) = 0; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/gc.cc b/third_party/nix/src/libstore/gc.cc new file mode 100644 index 000000000000..bc3393265e48 --- /dev/null +++ b/third_party/nix/src/libstore/gc.cc @@ -0,0 +1,1013 @@ +#include <algorithm> +#include <cerrno> +#include <climits> +#include <functional> +#include <queue> +#include <random> +#include <regex> + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <sys/types.h> +#include <unistd.h> + +#include "derivations.hh" +#include "finally.hh" +#include "globals.hh" +#include "glog/logging.h" +#include "local-store.hh" + +namespace nix { + +static string gcLockName = "gc.lock"; +static string gcRootsDir = "gcroots"; + +/* Acquire the global GC lock. This is used to prevent new Nix + processes from starting after the temporary root files have been + read. To be precise: when they try to create a new temporary root + file, they will block until the garbage collector has finished / + yielded the GC lock. */ +AutoCloseFD LocalStore::openGCLock(LockType lockType) { + Path fnGCLock = (format("%1%/%2%") % stateDir % gcLockName).str(); + + DLOG(INFO) << "acquiring global GC lock " << fnGCLock; + + AutoCloseFD fdGCLock = + open(fnGCLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fdGCLock) { + throw SysError(format("opening global GC lock '%1%'") % fnGCLock); + } + + if (!lockFile(fdGCLock.get(), lockType, false)) { + LOG(ERROR) << "waiting for the big garbage collector lock..."; + lockFile(fdGCLock.get(), lockType, true); + } + + /* !!! Restrict read permission on the GC root. Otherwise any + process that can open the file for reading can DoS the + collector. */ + + return fdGCLock; +} + +static void makeSymlink(const Path& link, const Path& target) { + /* Create directories up to `gcRoot'. */ + createDirs(dirOf(link)); + + /* Create the new symlink. */ + Path tempLink = + (format("%1%.tmp-%2%-%3%") % link % getpid() % random()).str(); + createSymlink(target, tempLink); + + /* Atomically replace the old one. */ + if (rename(tempLink.c_str(), link.c_str()) == -1) { + throw SysError(format("cannot rename '%1%' to '%2%'") % tempLink % link); + } +} + +void LocalStore::syncWithGC() { AutoCloseFD fdGCLock = openGCLock(ltRead); } + +void LocalStore::addIndirectRoot(const Path& path) { + string hash = hashString(htSHA1, path).to_string(Base32, false); + Path realRoot = canonPath( + (format("%1%/%2%/auto/%3%") % stateDir % gcRootsDir % hash).str()); + makeSymlink(realRoot, path); +} + +Path LocalFSStore::addPermRoot(const Path& _storePath, const Path& _gcRoot, + bool indirect, bool allowOutsideRootsDir) { + Path storePath(canonPath(_storePath)); + Path gcRoot(canonPath(_gcRoot)); + assertStorePath(storePath); + + if (isInStore(gcRoot)) { + throw Error(format("creating a garbage collector root (%1%) in the Nix " + "store is forbidden " + "(are you running nix-build inside the store?)") % + gcRoot); + } + + if (indirect) { + /* Don't clobber the link if it already exists and doesn't + point to the Nix store. */ + if (pathExists(gcRoot) && + (!isLink(gcRoot) || !isInStore(readLink(gcRoot)))) { + throw Error(format("cannot create symlink '%1%'; already exists") % + gcRoot); + } + makeSymlink(gcRoot, storePath); + addIndirectRoot(gcRoot); + } + + else { + if (!allowOutsideRootsDir) { + Path rootsDir = + canonPath((format("%1%/%2%") % stateDir % gcRootsDir).str()); + + if (string(gcRoot, 0, rootsDir.size() + 1) != rootsDir + "/") { + throw Error(format("path '%1%' is not a valid garbage collector root; " + "it's not in the directory '%2%'") % + gcRoot % rootsDir); + } + } + + if (baseNameOf(gcRoot) == baseNameOf(storePath)) { + writeFile(gcRoot, ""); + } else { + makeSymlink(gcRoot, storePath); + } + } + + /* Check that the root can be found by the garbage collector. + !!! This can be very slow on machines that have many roots. + Instead of reading all the roots, it would be more efficient to + check if the root is in a directory in or linked from the + gcroots directory. */ + if (settings.checkRootReachability) { + Roots roots = findRoots(false); + if (roots[storePath].count(gcRoot) == 0) { + LOG(ERROR) << "warning: '" << gcRoot + << "' is not in a directory where the garbage " + << "collector looks for roots; therefore, '" << storePath + << "' might be removed by the garbage collector"; + } + } + + /* Grab the global GC root, causing us to block while a GC is in + progress. This prevents the set of permanent roots from + increasing while a GC is in progress. */ + syncWithGC(); + + return gcRoot; +} + +void LocalStore::addTempRoot(const Path& path) { + auto state(_state.lock()); + + /* Create the temporary roots file for this process. */ + if (!state->fdTempRoots) { + while (true) { + AutoCloseFD fdGCLock = openGCLock(ltRead); + + if (pathExists(fnTempRoots)) { + /* It *must* be stale, since there can be no two + processes with the same pid. */ + unlink(fnTempRoots.c_str()); + } + + state->fdTempRoots = openLockFile(fnTempRoots, true); + + fdGCLock = -1; + + DLOG(INFO) << "acquiring read lock on " << fnTempRoots; + lockFile(state->fdTempRoots.get(), ltRead, true); + + /* Check whether the garbage collector didn't get in our + way. */ + struct stat st; + if (fstat(state->fdTempRoots.get(), &st) == -1) { + throw SysError(format("statting '%1%'") % fnTempRoots); + } + if (st.st_size == 0) { + break; + } + + /* The garbage collector deleted this file before we could + get a lock. (It won't delete the file after we get a + lock.) Try again. */ + } + } + + /* Upgrade the lock to a write lock. This will cause us to block + if the garbage collector is holding our lock. */ + DLOG(INFO) << "acquiring write lock on " << fnTempRoots; + lockFile(state->fdTempRoots.get(), ltWrite, true); + + string s = path + '\0'; + writeFull(state->fdTempRoots.get(), s); + + /* Downgrade to a read lock. */ + DLOG(INFO) << "downgrading to read lock on " << fnTempRoots; + lockFile(state->fdTempRoots.get(), ltRead, true); +} + +static std::string censored = "{censored}"; + +void LocalStore::findTempRoots(FDs& fds, Roots& tempRoots, bool censor) { + /* Read the `temproots' directory for per-process temporary root + files. */ + for (auto& i : readDirectory(tempRootsDir)) { + Path path = tempRootsDir + "/" + i.name; + + pid_t pid = std::stoi(i.name); + + DLOG(INFO) << "reading temporary root file " << path; + FDPtr fd(new AutoCloseFD(open(path.c_str(), O_CLOEXEC | O_RDWR, 0666))); + if (!*fd) { + /* It's okay if the file has disappeared. */ + if (errno == ENOENT) { + continue; + } + throw SysError(format("opening temporary roots file '%1%'") % path); + } + + /* This should work, but doesn't, for some reason. */ + // FDPtr fd(new AutoCloseFD(openLockFile(path, false))); + // if (*fd == -1) { continue; } + + /* Try to acquire a write lock without blocking. This can + only succeed if the owning process has died. In that case + we don't care about its temporary roots. */ + if (lockFile(fd->get(), ltWrite, false)) { + LOG(ERROR) << "removing stale temporary roots file " << path; + unlink(path.c_str()); + writeFull(fd->get(), "d"); + continue; + } + + /* Acquire a read lock. This will prevent the owning process + from upgrading to a write lock, therefore it will block in + addTempRoot(). */ + DLOG(INFO) << "waiting for read lock on " << path; + lockFile(fd->get(), ltRead, true); + + /* Read the entire file. */ + string contents = readFile(fd->get()); + + /* Extract the roots. */ + string::size_type pos = 0; + string::size_type end; + + while ((end = contents.find((char)0, pos)) != string::npos) { + Path root(contents, pos, end - pos); + DLOG(INFO) << "got temporary root " << root; + assertStorePath(root); + tempRoots[root].emplace(censor ? censored : fmt("{temp:%d}", pid)); + pos = end + 1; + } + + fds.push_back(fd); /* keep open */ + } +} + +void LocalStore::findRoots(const Path& path, unsigned char type, Roots& roots) { + auto foundRoot = [&](const Path& path, const Path& target) { + Path storePath = toStorePath(target); + if (isStorePath(storePath) && isValidPath(storePath)) { + roots[storePath].emplace(path); + } else { + LOG(INFO) << "skipping invalid root from '" << path << "' to '" + << storePath << "'"; + } + }; + + try { + if (type == DT_UNKNOWN) { + type = getFileType(path); + } + + if (type == DT_DIR) { + for (auto& i : readDirectory(path)) { + findRoots(path + "/" + i.name, i.type, roots); + } + } + + else if (type == DT_LNK) { + Path target = readLink(path); + if (isInStore(target)) { + foundRoot(path, target); + } + + /* Handle indirect roots. */ + else { + target = absPath(target, dirOf(path)); + if (!pathExists(target)) { + if (isInDir(path, stateDir + "/" + gcRootsDir + "/auto")) { + LOG(INFO) << "removing stale link from '" << path << "' to '" + << target << "'"; + unlink(path.c_str()); + } + } else { + struct stat st2 = lstat(target); + if (!S_ISLNK(st2.st_mode)) { + return; + } + Path target2 = readLink(target); + if (isInStore(target2)) { + foundRoot(target, target2); + } + } + } + } + + else if (type == DT_REG) { + Path storePath = storeDir + "/" + baseNameOf(path); + if (isStorePath(storePath) && isValidPath(storePath)) { + roots[storePath].emplace(path); + } + } + + } + + catch (SysError& e) { + /* We only ignore permanent failures. */ + if (e.errNo == EACCES || e.errNo == ENOENT || e.errNo == ENOTDIR) { + LOG(INFO) << "cannot read potential root '" << path << "'"; + } else { + throw; + } + } +} + +void LocalStore::findRootsNoTemp(Roots& roots, bool censor) { + /* Process direct roots in {gcroots,profiles}. */ + findRoots(stateDir + "/" + gcRootsDir, DT_UNKNOWN, roots); + findRoots(stateDir + "/profiles", DT_UNKNOWN, roots); + + /* Add additional roots returned by different platforms-specific + heuristics. This is typically used to add running programs to + the set of roots (to prevent them from being garbage collected). */ + findRuntimeRoots(roots, censor); +} + +Roots LocalStore::findRoots(bool censor) { + Roots roots; + findRootsNoTemp(roots, censor); + + FDs fds; + findTempRoots(fds, roots, censor); + + return roots; +} + +static void readProcLink(const string& file, Roots& roots) { + /* 64 is the starting buffer size gnu readlink uses... */ + auto bufsiz = ssize_t{64}; +try_again: + char buf[bufsiz]; + auto res = readlink(file.c_str(), buf, bufsiz); + if (res == -1) { + if (errno == ENOENT || errno == EACCES || errno == ESRCH) { + return; + } + throw SysError("reading symlink"); + } + if (res == bufsiz) { + if (SSIZE_MAX / 2 < bufsiz) { + throw Error("stupidly long symlink"); + } + bufsiz *= 2; + goto try_again; + } + if (res > 0 && buf[0] == '/') { + roots[std::string(static_cast<char*>(buf), res)].emplace(file); + } +} + +static string quoteRegexChars(const string& raw) { + static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); + return std::regex_replace(raw, specialRegex, R"(\$&)"); +} + +static void readFileRoots(const char* path, Roots& roots) { + try { + roots[readFile(path)].emplace(path); + } catch (SysError& e) { + if (e.errNo != ENOENT && e.errNo != EACCES) { + throw; + } + } +} + +void LocalStore::findRuntimeRoots(Roots& roots, bool censor) { + Roots unchecked; + + auto procDir = AutoCloseDir{opendir("/proc")}; + if (procDir) { + struct dirent* ent; + auto digitsRegex = std::regex(R"(^\d+$)"); + auto mapRegex = + std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)"); + auto storePathRegex = std::regex(quoteRegexChars(storeDir) + + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)"); + while (errno = 0, ent = readdir(procDir.get())) { + checkInterrupt(); + if (std::regex_match(ent->d_name, digitsRegex)) { + readProcLink(fmt("/proc/%s/exe", ent->d_name), unchecked); + readProcLink(fmt("/proc/%s/cwd", ent->d_name), unchecked); + + auto fdStr = fmt("/proc/%s/fd", ent->d_name); + auto fdDir = AutoCloseDir(opendir(fdStr.c_str())); + if (!fdDir) { + if (errno == ENOENT || errno == EACCES) { + continue; + } + throw SysError(format("opening %1%") % fdStr); + } + struct dirent* fd_ent; + while (errno = 0, fd_ent = readdir(fdDir.get())) { + if (fd_ent->d_name[0] != '.') { + readProcLink(fmt("%s/%s", fdStr, fd_ent->d_name), unchecked); + } + } + if (errno) { + if (errno == ESRCH) { + continue; + } + throw SysError(format("iterating /proc/%1%/fd") % ent->d_name); + } + fdDir.reset(); + + try { + auto mapFile = fmt("/proc/%s/maps", ent->d_name); + auto mapLines = tokenizeString<std::vector<string>>( + readFile(mapFile, true), "\n"); + for (const auto& line : mapLines) { + auto match = std::smatch{}; + if (std::regex_match(line, match, mapRegex)) { + unchecked[match[1]].emplace(mapFile); + } + } + + auto envFile = fmt("/proc/%s/environ", ent->d_name); + auto envString = readFile(envFile, true); + auto env_end = std::sregex_iterator{}; + for (auto i = std::sregex_iterator{envString.begin(), envString.end(), + storePathRegex}; + i != env_end; ++i) { + unchecked[i->str()].emplace(envFile); + } + } catch (SysError& e) { + if (errno == ENOENT || errno == EACCES || errno == ESRCH) { + continue; + } + throw; + } + } + } + if (errno) { + throw SysError("iterating /proc"); + } + } + +#if !defined(__linux__) + // lsof is really slow on OS X. This actually causes the gc-concurrent.sh test + // to fail. See: https://github.com/NixOS/nix/issues/3011 Because of this we + // disable lsof when running the tests. + if (getEnv("_NIX_TEST_NO_LSOF") == "") { + try { + std::regex lsofRegex(R"(^n(/.*)$)"); + auto lsofLines = tokenizeString<std::vector<string>>( + runProgram(LSOF, true, {"-n", "-w", "-F", "n"}), "\n"); + for (const auto& line : lsofLines) { + std::smatch match; + if (std::regex_match(line, match, lsofRegex)) + unchecked[match[1]].emplace("{lsof}"); + } + } catch (ExecError& e) { + /* lsof not installed, lsof failed */ + } + } +#endif + +#if defined(__linux__) + readFileRoots("/proc/sys/kernel/modprobe", unchecked); + readFileRoots("/proc/sys/kernel/fbsplash", unchecked); + readFileRoots("/proc/sys/kernel/poweroff_cmd", unchecked); +#endif + + for (auto& [target, links] : unchecked) { + if (isInStore(target)) { + Path path = toStorePath(target); + if (isStorePath(path) && isValidPath(path)) { + DLOG(INFO) << "got additional root " << path; + if (censor) { + roots[path].insert(censored); + } else { + roots[path].insert(links.begin(), links.end()); + } + } + } + } +} + +struct GCLimitReached {}; + +struct LocalStore::GCState { + GCOptions options; + GCResults& results; + PathSet roots; + PathSet tempRoots; + PathSet dead; + PathSet alive; + bool gcKeepOutputs; + bool gcKeepDerivations; + unsigned long long bytesInvalidated; + bool moveToTrash = true; + bool shouldDelete; + explicit GCState(GCResults& results_) + : results(results_), bytesInvalidated(0) {} +}; + +bool LocalStore::isActiveTempFile(const GCState& state, const Path& path, + const string& suffix) { + return hasSuffix(path, suffix) && + state.tempRoots.find(string(path, 0, path.size() - suffix.size())) != + state.tempRoots.end(); +} + +void LocalStore::deleteGarbage(GCState& state, const Path& path) { + unsigned long long bytesFreed; + deletePath(path, bytesFreed); + state.results.bytesFreed += bytesFreed; +} + +void LocalStore::deletePathRecursive(GCState& state, const Path& path) { + checkInterrupt(); + + unsigned long long size = 0; + + if (isStorePath(path) && isValidPath(path)) { + PathSet referrers; + queryReferrers(path, referrers); + for (auto& i : referrers) { + if (i != path) { + deletePathRecursive(state, i); + } + } + size = queryPathInfo(path)->narSize; + invalidatePathChecked(path); + } + + Path realPath = realStoreDir + "/" + baseNameOf(path); + + struct stat st; + if (lstat(realPath.c_str(), &st) != 0) { + if (errno == ENOENT) { + return; + } + throw SysError(format("getting status of %1%") % realPath); + } + + LOG(INFO) << "deleting '" << path << "'"; + + state.results.paths.insert(path); + + /* If the path is not a regular file or symlink, move it to the + trash directory. The move is to ensure that later (when we're + not holding the global GC lock) we can delete the path without + being afraid that the path has become alive again. Otherwise + delete it right away. */ + if (state.moveToTrash && S_ISDIR(st.st_mode)) { + // Estimate the amount freed using the narSize field. FIXME: + // if the path was not valid, need to determine the actual + // size. + try { + if (chmod(realPath.c_str(), st.st_mode | S_IWUSR) == -1) { + throw SysError(format("making '%1%' writable") % realPath); + } + Path tmp = trashDir + "/" + baseNameOf(path); + if (rename(realPath.c_str(), tmp.c_str()) != 0) { + throw SysError(format("unable to rename '%1%' to '%2%'") % realPath % + tmp); + } + state.bytesInvalidated += size; + } catch (SysError& e) { + if (e.errNo == ENOSPC) { + LOG(INFO) << "note: can't create move '" << realPath + << "': " << e.msg(); + deleteGarbage(state, realPath); + } + } + } else { + deleteGarbage(state, realPath); + } + + if (state.results.bytesFreed + state.bytesInvalidated > + state.options.maxFreed) { + LOG(INFO) << "deleted or invalidated more than " << state.options.maxFreed + << " bytes; stopping"; + throw GCLimitReached(); + } +} + +bool LocalStore::canReachRoot(GCState& state, PathSet& visited, + const Path& path) { + if (visited.count(path) != 0u) { + return false; + } + + if (state.alive.count(path) != 0u) { + return true; + } + + if (state.dead.count(path) != 0u) { + return false; + } + + if (state.roots.count(path) != 0u) { + DLOG(INFO) << "cannot delete '" << path << "' because it's a root"; + state.alive.insert(path); + return true; + } + + visited.insert(path); + + if (!isStorePath(path) || !isValidPath(path)) { + return false; + } + + PathSet incoming; + + /* Don't delete this path if any of its referrers are alive. */ + queryReferrers(path, incoming); + + /* If keep-derivations is set and this is a derivation, then + don't delete the derivation if any of the outputs are alive. */ + if (state.gcKeepDerivations && isDerivation(path)) { + PathSet outputs = queryDerivationOutputs(path); + for (auto& i : outputs) { + if (isValidPath(i) && queryPathInfo(i)->deriver == path) { + incoming.insert(i); + } + } + } + + /* If keep-outputs is set, then don't delete this path if there + are derivers of this path that are not garbage. */ + if (state.gcKeepOutputs) { + PathSet derivers = queryValidDerivers(path); + for (auto& i : derivers) { + incoming.insert(i); + } + } + + for (auto& i : incoming) { + if (i != path) { + if (canReachRoot(state, visited, i)) { + state.alive.insert(path); + return true; + } + } + } + + return false; +} + +void LocalStore::tryToDelete(GCState& state, const Path& path) { + checkInterrupt(); + + auto realPath = realStoreDir + "/" + baseNameOf(path); + if (realPath == linksDir || realPath == trashDir) { + return; + } + + // Activity act(*logger, lvlDebug, format("considering whether to delete + // '%1%'") % path); + + if (!isStorePath(path) || !isValidPath(path)) { + /* A lock file belonging to a path that we're building right + now isn't garbage. */ + if (isActiveTempFile(state, path, ".lock")) { + return; + } + + /* Don't delete .chroot directories for derivations that are + currently being built. */ + if (isActiveTempFile(state, path, ".chroot")) { + return; + } + + /* Don't delete .check directories for derivations that are + currently being built, because we may need to run + diff-hook. */ + if (isActiveTempFile(state, path, ".check")) { + return; + } + } + + PathSet visited; + + if (canReachRoot(state, visited, path)) { + DLOG(INFO) << "cannot delete '" << path << "' because it's still reachable"; + } else { + /* No path we visited was a root, so everything is garbage. + But we only delete โpathโ and its referrers here so that + โnix-store --deleteโ doesn't have the unexpected effect of + recursing into derivations and outputs. */ + state.dead.insert(visited.begin(), visited.end()); + if (state.shouldDelete) { + deletePathRecursive(state, path); + } + } +} + +/* Unlink all files in /nix/store/.links that have a link count of 1, + which indicates that there are no other links and so they can be + safely deleted. FIXME: race condition with optimisePath(): we + might see a link count of 1 just before optimisePath() increases + the link count. */ +void LocalStore::removeUnusedLinks(const GCState& state) { + AutoCloseDir dir(opendir(linksDir.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % linksDir); + } + + long long actualSize = 0; + long long unsharedSize = 0; + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { + checkInterrupt(); + string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + Path path = linksDir + "/" + name; + + struct stat st; + if (lstat(path.c_str(), &st) == -1) { + throw SysError(format("statting '%1%'") % path); + } + + if (st.st_nlink != 1) { + actualSize += st.st_size; + unsharedSize += (st.st_nlink - 1) * st.st_size; + continue; + } + + LOG(INFO) << "deleting unused link " << path; + + if (unlink(path.c_str()) == -1) { + throw SysError(format("deleting '%1%'") % path); + } + + state.results.bytesFreed += st.st_size; + } + + struct stat st; + if (stat(linksDir.c_str(), &st) == -1) { + throw SysError(format("statting '%1%'") % linksDir); + } + + long long overhead = st.st_blocks * 512ULL; + + // TODO(tazjin): absl::StrFormat %.2f + LOG(INFO) << "note: currently hard linking saves " + << ((unsharedSize - actualSize - overhead) / (1024.0 * 1024.0)) + << " MiB"; +} + +void LocalStore::collectGarbage(const GCOptions& options, GCResults& results) { + GCState state(results); + state.options = options; + state.gcKeepOutputs = settings.gcKeepOutputs; + state.gcKeepDerivations = settings.gcKeepDerivations; + + /* Using `--ignore-liveness' with `--delete' can have unintended + consequences if `keep-outputs' or `keep-derivations' are true + (the garbage collector will recurse into deleting the outputs + or derivers, respectively). So disable them. */ + if (options.action == GCOptions::gcDeleteSpecific && options.ignoreLiveness) { + state.gcKeepOutputs = false; + state.gcKeepDerivations = false; + } + + state.shouldDelete = options.action == GCOptions::gcDeleteDead || + options.action == GCOptions::gcDeleteSpecific; + + if (state.shouldDelete) { + deletePath(reservedPath); + } + + /* Acquire the global GC root. This prevents + a) New roots from being added. + b) Processes from creating new temporary root files. */ + AutoCloseFD fdGCLock = openGCLock(ltWrite); + + /* Find the roots. Since we've grabbed the GC lock, the set of + permanent roots cannot increase now. */ + LOG(INFO) << "finding garbage collector roots..."; + Roots rootMap; + if (!options.ignoreLiveness) { + findRootsNoTemp(rootMap, true); + } + + for (auto& i : rootMap) { + state.roots.insert(i.first); + } + + /* Read the temporary roots. This acquires read locks on all + per-process temporary root files. So after this point no paths + can be added to the set of temporary roots. */ + FDs fds; + Roots tempRoots; + findTempRoots(fds, tempRoots, true); + for (auto& root : tempRoots) { + state.tempRoots.insert(root.first); + } + state.roots.insert(state.tempRoots.begin(), state.tempRoots.end()); + + /* After this point the set of roots or temporary roots cannot + increase, since we hold locks on everything. So everything + that is not reachable from `roots' is garbage. */ + + if (state.shouldDelete) { + if (pathExists(trashDir)) { + deleteGarbage(state, trashDir); + } + try { + createDirs(trashDir); + } catch (SysError& e) { + if (e.errNo == ENOSPC) { + LOG(INFO) << "note: can't create trash directory: " << e.msg(); + state.moveToTrash = false; + } + } + } + + /* Now either delete all garbage paths, or just the specified + paths (for gcDeleteSpecific). */ + + if (options.action == GCOptions::gcDeleteSpecific) { + for (auto& i : options.pathsToDelete) { + assertStorePath(i); + tryToDelete(state, i); + if (state.dead.find(i) == state.dead.end()) { + throw Error(format("cannot delete path '%1%' since it is still alive") % + i); + } + } + + } else if (options.maxFreed > 0) { + if (state.shouldDelete) { + LOG(INFO) << "deleting garbage..."; + } else { + LOG(ERROR) << "determining live/dead paths..."; + } + + try { + AutoCloseDir dir(opendir(realStoreDir.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % realStoreDir); + } + + /* Read the store and immediately delete all paths that + aren't valid. When using --max-freed etc., deleting + invalid paths is preferred over deleting unreachable + paths, since unreachable paths could become reachable + again. We don't use readDirectory() here so that GCing + can start faster. */ + Paths entries; + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { + checkInterrupt(); + string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + Path path = storeDir + "/" + name; + if (isStorePath(path) && isValidPath(path)) { + entries.push_back(path); + } else { + tryToDelete(state, path); + } + } + + dir.reset(); + + /* Now delete the unreachable valid paths. Randomise the + order in which we delete entries to make the collector + less biased towards deleting paths that come + alphabetically first (e.g. /nix/store/000...). This + matters when using --max-freed etc. */ + vector<Path> entries_(entries.begin(), entries.end()); + std::mt19937 gen(1); + std::shuffle(entries_.begin(), entries_.end(), gen); + + for (auto& i : entries_) { + tryToDelete(state, i); + } + + } catch (GCLimitReached& e) { + } + } + + if (state.options.action == GCOptions::gcReturnLive) { + state.results.paths = state.alive; + return; + } + + if (state.options.action == GCOptions::gcReturnDead) { + state.results.paths = state.dead; + return; + } + + /* Allow other processes to add to the store from here on. */ + fdGCLock = -1; + fds.clear(); + + /* Delete the trash directory. */ + LOG(INFO) << "deleting " << trashDir; + deleteGarbage(state, trashDir); + + /* Clean up the links directory. */ + if (options.action == GCOptions::gcDeleteDead || + options.action == GCOptions::gcDeleteSpecific) { + LOG(INFO) << "deleting unused links..."; + removeUnusedLinks(state); + } + + /* While we're at it, vacuum the database. */ + // if (options.action == GCOptions::gcDeleteDead) { vacuumDB(); } +} + +void LocalStore::autoGC(bool sync) { + static auto fakeFreeSpaceFile = getEnv("_NIX_TEST_FREE_SPACE_FILE", ""); + + auto getAvail = [this]() -> uint64_t { + if (!fakeFreeSpaceFile.empty()) { + return std::stoll(readFile(fakeFreeSpaceFile)); + } + + struct statvfs st; + if (statvfs(realStoreDir.c_str(), &st) != 0) { + throw SysError("getting filesystem info about '%s'", realStoreDir); + } + + return (uint64_t)st.f_bavail * st.f_bsize; + }; + + std::shared_future<void> future; + + { + auto state(_state.lock()); + + if (state->gcRunning) { + future = state->gcFuture; + DLOG(INFO) << "waiting for auto-GC to finish"; + goto sync; + } + + auto now = std::chrono::steady_clock::now(); + + if (now < state->lastGCCheck + + std::chrono::seconds(settings.minFreeCheckInterval)) { + return; + } + + auto avail = getAvail(); + + state->lastGCCheck = now; + + if (avail >= settings.minFree || avail >= settings.maxFree) { + return; + } + + if (avail > state->availAfterGC * 0.97) { + return; + } + + state->gcRunning = true; + + std::promise<void> promise; + future = state->gcFuture = promise.get_future().share(); + + std::thread([promise{std::move(promise)}, this, avail, getAvail]() mutable { + try { + /* Wake up any threads waiting for the auto-GC to finish. */ + Finally wakeup([&]() { + auto state(_state.lock()); + state->gcRunning = false; + state->lastGCCheck = std::chrono::steady_clock::now(); + promise.set_value(); + }); + + GCOptions options; + options.maxFreed = settings.maxFree - avail; + + LOG(INFO) << "running auto-GC to free " << options.maxFreed << " bytes"; + + GCResults results; + + collectGarbage(options, results); + + _state.lock()->availAfterGC = getAvail(); + + } catch (...) { + // FIXME: we could propagate the exception to the + // future, but we don't really care. + ignoreException(); + } + }).detach(); + } + +sync: + // Wait for the future outside of the state lock. + if (sync) { + future.get(); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/globals.cc b/third_party/nix/src/libstore/globals.cc new file mode 100644 index 000000000000..066de443653b --- /dev/null +++ b/third_party/nix/src/libstore/globals.cc @@ -0,0 +1,207 @@ +#include "globals.hh" + +#include <algorithm> +#include <map> +#include <thread> + +#include <dlfcn.h> + +#include "archive.hh" +#include "args.hh" +#include "util.hh" + +namespace nix { + +/* The default location of the daemon socket, relative to nixStateDir. + The socket is in a directory to allow you to control access to the + Nix daemon by setting the mode/ownership of the directory + appropriately. (This wouldn't work on the socket itself since it + must be deleted and recreated on startup.) */ +#define DEFAULT_SOCKET_PATH "/daemon-socket/socket" + +/* chroot-like behavior from Apple's sandbox */ +#if __APPLE__ +#define DEFAULT_ALLOWED_IMPURE_PREFIXES "/System/Library /usr/lib /dev /bin/sh" +#else +#define DEFAULT_ALLOWED_IMPURE_PREFIXES "" +#endif + +Settings settings; + +static GlobalConfig::Register r1(&settings); + +Settings::Settings() + : nixPrefix(NIX_PREFIX), + nixStore(canonPath( + getEnv("NIX_STORE_DIR", getEnv("NIX_STORE", NIX_STORE_DIR)))), + nixDataDir(canonPath(getEnv("NIX_DATA_DIR", NIX_DATA_DIR))), + nixLogDir(canonPath(getEnv("NIX_LOG_DIR", NIX_LOG_DIR))), + nixStateDir(canonPath(getEnv("NIX_STATE_DIR", NIX_STATE_DIR))), + nixConfDir(canonPath(getEnv("NIX_CONF_DIR", NIX_CONF_DIR))), + nixLibexecDir(canonPath(getEnv("NIX_LIBEXEC_DIR", NIX_LIBEXEC_DIR))), + nixBinDir(canonPath(getEnv("NIX_BIN_DIR", NIX_BIN_DIR))), + nixManDir(canonPath(NIX_MAN_DIR)), + nixDaemonSocketFile(canonPath(nixStateDir + DEFAULT_SOCKET_PATH)) { + buildUsersGroup = getuid() == 0 ? "nixbld" : ""; + lockCPU = getEnv("NIX_AFFINITY_HACK", "1") == "1"; + + caFile = getEnv("NIX_SSL_CERT_FILE", getEnv("SSL_CERT_FILE", "")); + if (caFile.empty()) { + for (auto& fn : + {"/etc/ssl/certs/ca-certificates.crt", + "/nix/var/nix/profiles/default/etc/ssl/certs/ca-bundle.crt"}) { + if (pathExists(fn)) { + caFile = fn; + break; + } + } + } + + /* Backwards compatibility. */ + auto s = getEnv("NIX_REMOTE_SYSTEMS"); + if (!s.empty()) { + Strings ss; + for (auto& p : tokenizeString<Strings>(s, ":")) { + ss.push_back("@" + p); + } + builders = concatStringsSep(" ", ss); + } + +#if defined(__linux__) && defined(SANDBOX_SHELL) + sandboxPaths = tokenizeString<StringSet>("/bin/sh=" SANDBOX_SHELL); +#endif + + allowedImpureHostPrefixes = + tokenizeString<StringSet>(DEFAULT_ALLOWED_IMPURE_PREFIXES); +} + +void loadConfFile() { + globalConfig.applyConfigFile(settings.nixConfDir + "/nix.conf"); + + /* We only want to send overrides to the daemon, i.e. stuff from + ~/.nix/nix.conf or the command line. */ + globalConfig.resetOverriden(); + + auto dirs = getConfigDirs(); + // Iterate over them in reverse so that the ones appearing first in the path + // take priority + for (auto dir = dirs.rbegin(); dir != dirs.rend(); dir++) { + globalConfig.applyConfigFile(*dir + "/nix/nix.conf"); + } +} + +unsigned int Settings::getDefaultCores() { + return std::max(1U, std::thread::hardware_concurrency()); +} + +StringSet Settings::getDefaultSystemFeatures() { + /* For backwards compatibility, accept some "features" that are + used in Nixpkgs to route builds to certain machines but don't + actually require anything special on the machines. */ + StringSet features{"nixos-test", "benchmark", "big-parallel"}; + +#if __linux__ + if (access("/dev/kvm", R_OK | W_OK) == 0) { + features.insert("kvm"); + } +#endif + + return features; +} + +const string nixVersion = PACKAGE_VERSION; + +template <> +void BaseSetting<SandboxMode>::set(const std::string& str) { + if (str == "true") { + value = smEnabled; + } else if (str == "relaxed") { + value = smRelaxed; + } else if (str == "false") { + value = smDisabled; + } else { + throw UsageError("option '%s' has invalid value '%s'", name, str); + } +} + +template <> +std::string BaseSetting<SandboxMode>::to_string() { + if (value == smEnabled) { + return "true"; + } + if (value == smRelaxed) { + return "relaxed"; + } else if (value == smDisabled) { + return "false"; + } else { + abort(); + } +} + +template <> +void BaseSetting<SandboxMode>::toJSON(JSONPlaceholder& out) { + AbstractSetting::toJSON(out); +} + +template <> +void BaseSetting<SandboxMode>::convertToArg(Args& args, + const std::string& category) { + args.mkFlag() + .longName(name) + .description("Enable sandboxing.") + .handler([=](const std::vector<std::string>& ss) { override(smEnabled); }) + .category(category); + args.mkFlag() + .longName("no-" + name) + .description("Disable sandboxing.") + .handler( + [=](const std::vector<std::string>& ss) { override(smDisabled); }) + .category(category); + args.mkFlag() + .longName("relaxed-" + name) + .description("Enable sandboxing, but allow builds to disable it.") + .handler([=](const std::vector<std::string>& ss) { override(smRelaxed); }) + .category(category); +} + +void MaxBuildJobsSetting::set(const std::string& str) { + if (str == "auto") { + value = std::max(1U, std::thread::hardware_concurrency()); + } else if (!string2Int(str, value)) { + throw UsageError( + "configuration setting '%s' should be 'auto' or an integer", name); + } +} + +void initPlugins() { + for (const auto& pluginFile : settings.pluginFiles.get()) { + Paths pluginFiles; + try { + auto ents = readDirectory(pluginFile); + for (const auto& ent : ents) { + pluginFiles.emplace_back(pluginFile + "/" + ent.name); + } + } catch (SysError& e) { + if (e.errNo != ENOTDIR) { + throw; + } + pluginFiles.emplace_back(pluginFile); + } + for (const auto& file : pluginFiles) { + /* handle is purposefully leaked as there may be state in the + DSO needed by the action of the plugin. */ + void* handle = dlopen(file.c_str(), RTLD_LAZY | RTLD_LOCAL); + if (handle == nullptr) { + throw Error("could not dynamically open plugin file '%s': %s", file, + dlerror()); + } + } + } + + /* Since plugins can add settings, try to re-apply previously + unknown settings. */ + globalConfig.reapplyUnknownSettings(); + globalConfig.warnUnknownSettings(); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/globals.hh b/third_party/nix/src/libstore/globals.hh new file mode 100644 index 000000000000..8e04b0937457 --- /dev/null +++ b/third_party/nix/src/libstore/globals.hh @@ -0,0 +1,479 @@ +#pragma once + +#include <limits> +#include <map> + +#include <sys/types.h> + +#include "config.hh" +#include "types.hh" +#include "util.hh" + +namespace nix { + +typedef enum { smEnabled, smRelaxed, smDisabled } SandboxMode; + +struct MaxBuildJobsSetting : public BaseSetting<unsigned int> { + MaxBuildJobsSetting(Config* options, unsigned int def, + const std::string& name, const std::string& description, + const std::set<std::string>& aliases = {}) + : BaseSetting<unsigned int>(def, name, description, aliases) { + options->addSetting(this); + } + + void set(const std::string& str) override; +}; + +class Settings : public Config { + static unsigned int getDefaultCores(); + + static StringSet getDefaultSystemFeatures(); + + public: + Settings(); + + Path nixPrefix; + + /* The directory where we store sources and derived files. */ + Path nixStore; + + Path nixDataDir; /* !!! fix */ + + /* The directory where we log various operations. */ + Path nixLogDir; + + /* The directory where state is stored. */ + Path nixStateDir; + + /* The directory where configuration files are stored. */ + Path nixConfDir; + + /* The directory where internal helper programs are stored. */ + Path nixLibexecDir; + + /* The directory where the main programs are stored. */ + Path nixBinDir; + + /* The directory where the man pages are stored. */ + Path nixManDir; + + /* File name of the socket the daemon listens to. */ + Path nixDaemonSocketFile; + + Setting<std::string> storeUri{this, getEnv("NIX_REMOTE", "auto"), "store", + "The default Nix store to use."}; + + Setting<bool> keepFailed{ + this, false, "keep-failed", + "Whether to keep temporary directories of failed builds."}; + + Setting<bool> keepGoing{ + this, false, "keep-going", + "Whether to keep building derivations when another build fails."}; + + Setting<bool> tryFallback{ + this, + false, + "fallback", + "Whether to fall back to building when substitution fails.", + {"build-fallback"}}; + + /* Whether to show build log output in real time. */ + bool verboseBuild = true; + + Setting<size_t> logLines{ + this, 10, "log-lines", + "If verbose-build is false, the number of lines of the tail of " + "the log to show if a build fails."}; + + MaxBuildJobsSetting maxBuildJobs{this, + 1, + "max-jobs", + "Maximum number of parallel build jobs. " + "\"auto\" means use number of cores.", + {"build-max-jobs"}}; + + Setting<unsigned int> buildCores{ + this, + getDefaultCores(), + "cores", + "Number of CPU cores to utilize in parallel within a build, " + "i.e. by passing this number to Make via '-j'. 0 means that the " + "number of actual CPU cores on the local host ought to be " + "auto-detected.", + {"build-cores"}}; + + /* Read-only mode. Don't copy stuff to the store, don't change + the database. */ + bool readOnlyMode = false; + + Setting<std::string> thisSystem{this, SYSTEM, "system", + "The canonical Nix system name."}; + + Setting<time_t> maxSilentTime{ + this, + 0, + "max-silent-time", + "The maximum time in seconds that a builer can go without " + "producing any output on stdout/stderr before it is killed. " + "0 means infinity.", + {"build-max-silent-time"}}; + + Setting<time_t> buildTimeout{ + this, + 0, + "timeout", + "The maximum duration in seconds that a builder can run. " + "0 means infinity.", + {"build-timeout"}}; + + PathSetting buildHook{this, true, nixLibexecDir + "/nix/build-remote", + "build-hook", + "The path of the helper program that executes builds " + "to remote machines."}; + + Setting<std::string> builders{this, "@" + nixConfDir + "/machines", + "builders", + "A semicolon-separated list of build machines, " + "in the format of nix.machines."}; + + Setting<bool> buildersUseSubstitutes{ + this, false, "builders-use-substitutes", + "Whether build machines should use their own substitutes for obtaining " + "build dependencies if possible, rather than waiting for this host to " + "upload them."}; + + Setting<off_t> reservedSize{ + this, 8 * 1024 * 1024, "gc-reserved-space", + "Amount of reserved disk space for the garbage collector."}; + + Setting<bool> fsyncMetadata{this, true, "fsync-metadata", + "Whether SQLite should use fsync()."}; + + Setting<bool> useSQLiteWAL{this, true, "use-sqlite-wal", + "Whether SQLite should use WAL mode."}; + + Setting<bool> syncBeforeRegistering{ + this, false, "sync-before-registering", + "Whether to call sync() before registering a path as valid."}; + + Setting<bool> useSubstitutes{this, + true, + "substitute", + "Whether to use substitutes.", + {"build-use-substitutes"}}; + + Setting<std::string> buildUsersGroup{ + this, "", "build-users-group", + "The Unix group that contains the build users."}; + + Setting<bool> impersonateLinux26{ + this, + false, + "impersonate-linux-26", + "Whether to impersonate a Linux 2.6 machine on newer kernels.", + {"build-impersonate-linux-26"}}; + + Setting<bool> keepLog{this, + true, + "keep-build-log", + "Whether to store build logs.", + {"build-keep-log"}}; + + Setting<bool> compressLog{this, + true, + "compress-build-log", + "Whether to compress logs.", + {"build-compress-log"}}; + + Setting<unsigned long> maxLogSize{ + this, + 0, + "max-build-log-size", + "Maximum number of bytes a builder can write to stdout/stderr " + "before being killed (0 means no limit).", + {"build-max-log-size"}}; + + /* When buildRepeat > 0 and verboseBuild == true, whether to print + repeated builds (i.e. builds other than the first one) to + stderr. Hack to prevent Hydra logs from being polluted. */ + bool printRepeatedBuilds = true; + + Setting<unsigned int> pollInterval{ + this, 5, "build-poll-interval", + "How often (in seconds) to poll for locks."}; + + Setting<bool> checkRootReachability{ + this, false, "gc-check-reachability", + "Whether to check if new GC roots can in fact be found by the " + "garbage collector."}; + + Setting<bool> gcKeepOutputs{ + this, + false, + "keep-outputs", + "Whether the garbage collector should keep outputs of live derivations.", + {"gc-keep-outputs"}}; + + Setting<bool> gcKeepDerivations{ + this, + true, + "keep-derivations", + "Whether the garbage collector should keep derivers of live paths.", + {"gc-keep-derivations"}}; + + Setting<bool> autoOptimiseStore{this, false, "auto-optimise-store", + "Whether to automatically replace files with " + "identical contents with hard links."}; + + Setting<bool> envKeepDerivations{ + this, + false, + "keep-env-derivations", + "Whether to add derivations as a dependency of user environments " + "(to prevent them from being GCed).", + {"env-keep-derivations"}}; + + /* Whether to lock the Nix client and worker to the same CPU. */ + bool lockCPU; + + /* Whether to show a stack trace if Nix evaluation fails. */ + Setting<bool> showTrace{ + this, false, "show-trace", + "Whether to show a stack trace on evaluation errors."}; + + Setting<SandboxMode> sandboxMode { + this, +#if __linux__ + smEnabled +#else + smDisabled +#endif + , + "sandbox", + "Whether to enable sandboxed builds. Can be \"true\", \"false\" or " + "\"relaxed\".", + { + "build-use-chroot", "build-use-sandbox" + } + }; + + Setting<PathSet> sandboxPaths{ + this, + {}, + "sandbox-paths", + "The paths to make available inside the build sandbox.", + {"build-chroot-dirs", "build-sandbox-paths"}}; + + Setting<bool> sandboxFallback{ + this, true, "sandbox-fallback", + "Whether to disable sandboxing when the kernel doesn't allow it."}; + + Setting<PathSet> extraSandboxPaths{ + this, + {}, + "extra-sandbox-paths", + "Additional paths to make available inside the build sandbox.", + {"build-extra-chroot-dirs", "build-extra-sandbox-paths"}}; + + Setting<size_t> buildRepeat{ + this, + 0, + "repeat", + "The number of times to repeat a build in order to verify determinism.", + {"build-repeat"}}; + +#if __linux__ + Setting<std::string> sandboxShmSize{ + this, "50%", "sandbox-dev-shm-size", + "The size of /dev/shm in the build sandbox."}; + + Setting<Path> sandboxBuildDir{this, "/build", "sandbox-build-dir", + "The build directory inside the sandbox."}; +#endif + + Setting<PathSet> allowedImpureHostPrefixes{ + this, + {}, + "allowed-impure-host-deps", + "Which prefixes to allow derivations to ask for access to (primarily for " + "Darwin)."}; + +#if __APPLE__ + Setting<bool> darwinLogSandboxViolations{ + this, false, "darwin-log-sandbox-violations", + "Whether to log Darwin sandbox access violations to the system log."}; +#endif + + Setting<bool> runDiffHook{ + this, false, "run-diff-hook", + "Whether to run the program specified by the diff-hook setting " + "repeated builds produce a different result. Typically used to " + "plug in diffoscope."}; + + PathSetting diffHook{ + this, true, "", "diff-hook", + "A program that prints out the differences between the two paths " + "specified on its command line."}; + + Setting<bool> enforceDeterminism{ + this, true, "enforce-determinism", + "Whether to fail if repeated builds produce different output."}; + + Setting<Strings> trustedPublicKeys{ + this, + {"cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="}, + "trusted-public-keys", + "Trusted public keys for secure substitution.", + {"binary-cache-public-keys"}}; + + Setting<Strings> secretKeyFiles{ + this, + {}, + "secret-key-files", + "Secret keys with which to sign local builds."}; + + Setting<unsigned int> tarballTtl{ + this, 60 * 60, "tarball-ttl", + "How long downloaded files are considered up-to-date."}; + + Setting<bool> requireSigs{ + this, true, "require-sigs", + "Whether to check that any non-content-addressed path added to the " + "Nix store has a valid signature (that is, one signed using a key " + "listed in 'trusted-public-keys'."}; + + Setting<StringSet> extraPlatforms{ + this, + std::string{SYSTEM} == "x86_64-linux" ? StringSet{"i686-linux"} + : StringSet{}, + "extra-platforms", + "Additional platforms that can be built on the local system. " + "These may be supported natively (e.g. armv7 on some aarch64 CPUs " + "or using hacks like qemu-user."}; + + Setting<StringSet> systemFeatures{ + this, getDefaultSystemFeatures(), "system-features", + "Optional features that this system implements (like \"kvm\")."}; + + Setting<Strings> substituters{ + this, + nixStore == "/nix/store" ? Strings{"https://cache.nixos.org/"} + : Strings(), + "substituters", + "The URIs of substituters (such as https://cache.nixos.org/).", + {"binary-caches"}}; + + // FIXME: provide a way to add to option values. + Setting<Strings> extraSubstituters{this, + {}, + "extra-substituters", + "Additional URIs of substituters.", + {"extra-binary-caches"}}; + + Setting<StringSet> trustedSubstituters{ + this, + {}, + "trusted-substituters", + "Disabled substituters that may be enabled via the substituters option " + "by untrusted users.", + {"trusted-binary-caches"}}; + + Setting<Strings> trustedUsers{this, + {"root"}, + "trusted-users", + "Which users or groups are trusted to ask the " + "daemon to do unsafe things."}; + + Setting<unsigned int> ttlNegativeNarInfoCache{ + this, 3600, "narinfo-cache-negative-ttl", + "The TTL in seconds for negative lookups in the disk cache i.e binary " + "cache lookups that " + "return an invalid path result"}; + + Setting<unsigned int> ttlPositiveNarInfoCache{ + this, 30 * 24 * 3600, "narinfo-cache-positive-ttl", + "The TTL in seconds for positive lookups in the disk cache i.e binary " + "cache lookups that " + "return a valid path result."}; + + /* ?Who we trust to use the daemon in safe ways */ + Setting<Strings> allowedUsers{ + this, + {"*"}, + "allowed-users", + "Which users or groups are allowed to connect to the daemon."}; + + Setting<bool> printMissing{ + this, true, "print-missing", + "Whether to print what paths need to be built or downloaded."}; + + Setting<std::string> preBuildHook{ + this, "", "pre-build-hook", + "A program to run just before a build to set derivation-specific build " + "settings."}; + + Setting<std::string> postBuildHook{ + this, "", "post-build-hook", + "A program to run just after each successful build."}; + + Setting<std::string> netrcFile{this, fmt("%s/%s", nixConfDir, "netrc"), + "netrc-file", + "Path to the netrc file used to obtain " + "usernames/passwords for downloads."}; + + /* Path to the SSL CA file used */ + Path caFile; + +#if __linux__ + Setting<bool> filterSyscalls{ + this, true, "filter-syscalls", + "Whether to prevent certain dangerous system calls, such as " + "creation of setuid/setgid files or adding ACLs or extended " + "attributes. Only disable this if you're aware of the " + "security implications."}; + + Setting<bool> allowNewPrivileges{ + this, false, "allow-new-privileges", + "Whether builders can acquire new privileges by calling programs with " + "setuid/setgid bits or with file capabilities."}; +#endif + + Setting<Strings> hashedMirrors{ + this, + {"http://tarballs.nixos.org/"}, + "hashed-mirrors", + "A list of servers used by builtins.fetchurl to fetch files by hash."}; + + Setting<uint64_t> minFree{this, 0, "min-free", + "Automatically run the garbage collector when free " + "disk space drops below the specified amount."}; + + Setting<uint64_t> maxFree{this, std::numeric_limits<uint64_t>::max(), + "max-free", + "Stop deleting garbage when free disk space is " + "above the specified amount."}; + + Setting<uint64_t> minFreeCheckInterval{ + this, 5, "min-free-check-interval", + "Number of seconds between checking free disk space."}; + + Setting<Paths> pluginFiles{ + this, + {}, + "plugin-files", + "Plugins to dynamically load at nix initialization time."}; +}; + +// FIXME: don't use a global variable. +extern Settings settings; + +/* This should be called after settings are initialized, but before + anything else */ +void initPlugins(); + +void loadConfFile(); + +extern const string nixVersion; + +} // namespace nix diff --git a/third_party/nix/src/libstore/http-binary-cache-store.cc b/third_party/nix/src/libstore/http-binary-cache-store.cc new file mode 100644 index 000000000000..1a46423b3542 --- /dev/null +++ b/third_party/nix/src/libstore/http-binary-cache-store.cc @@ -0,0 +1,169 @@ +#include <utility> + +#include <glog/logging.h> + +#include "binary-cache-store.hh" +#include "download.hh" +#include "globals.hh" +#include "nar-info-disk-cache.hh" + +namespace nix { + +MakeError(UploadToHTTP, Error); + +class HttpBinaryCacheStore : public BinaryCacheStore { + private: + Path cacheUri; + + struct State { + bool enabled = true; + std::chrono::steady_clock::time_point disabledUntil; + }; + + Sync<State> _state; + + public: + HttpBinaryCacheStore(const Params& params, Path _cacheUri) + : BinaryCacheStore(params), cacheUri(std::move(_cacheUri)) { + if (cacheUri.back() == '/') { + cacheUri.pop_back(); + } + + diskCache = getNarInfoDiskCache(); + } + + std::string getUri() override { return cacheUri; } + + void init() override { + // FIXME: do this lazily? + if (!diskCache->cacheExists(cacheUri, wantMassQuery_, priority)) { + try { + BinaryCacheStore::init(); + } catch (UploadToHTTP&) { + throw Error("'%s' does not appear to be a binary cache", cacheUri); + } + diskCache->createCache(cacheUri, storeDir, wantMassQuery_, priority); + } + } + + protected: + void maybeDisable() { + auto state(_state.lock()); + if (state->enabled && settings.tryFallback) { + int t = 60; + LOG(WARNING) << "disabling binary cache '" << getUri() << "' for " << t + << " seconds"; + state->enabled = false; + state->disabledUntil = + std::chrono::steady_clock::now() + std::chrono::seconds(t); + } + } + + void checkEnabled() { + auto state(_state.lock()); + if (state->enabled) { + return; + } + if (std::chrono::steady_clock::now() > state->disabledUntil) { + state->enabled = true; + DLOG(INFO) << "re-enabling binary cache '" << getUri() << "'"; + return; + } + throw SubstituterDisabled("substituter '%s' is disabled", getUri()); + } + + bool fileExists(const std::string& path) override { + checkEnabled(); + + try { + DownloadRequest request(cacheUri + "/" + path); + request.head = true; + getDownloader()->download(request); + return true; + } catch (DownloadError& e) { + /* S3 buckets return 403 if a file doesn't exist and the + bucket is unlistable, so treat 403 as 404. */ + if (e.error == Downloader::NotFound || e.error == Downloader::Forbidden) { + return false; + } + maybeDisable(); + throw; + } + } + + void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) override { + auto req = DownloadRequest(cacheUri + "/" + path); + req.data = std::make_shared<string>(data); // FIXME: inefficient + req.mimeType = mimeType; + try { + getDownloader()->download(req); + } catch (DownloadError& e) { + throw UploadToHTTP("while uploading to HTTP binary cache at '%s': %s", + cacheUri, e.msg()); + } + } + + DownloadRequest makeRequest(const std::string& path) { + DownloadRequest request(cacheUri + "/" + path); + return request; + } + + void getFile(const std::string& path, Sink& sink) override { + checkEnabled(); + auto request(makeRequest(path)); + try { + getDownloader()->download(std::move(request), sink); + } catch (DownloadError& e) { + if (e.error == Downloader::NotFound || e.error == Downloader::Forbidden) { + throw NoSuchBinaryCacheFile( + "file '%s' does not exist in binary cache '%s'", path, getUri()); + } + maybeDisable(); + throw; + } + } + + void getFile( + const std::string& path, + Callback<std::shared_ptr<std::string>> callback) noexcept override { + checkEnabled(); + + auto request(makeRequest(path)); + + auto callbackPtr = + std::make_shared<decltype(callback)>(std::move(callback)); + + getDownloader()->enqueueDownload( + request, {[callbackPtr, this](std::future<DownloadResult> result) { + try { + (*callbackPtr)(result.get().data); + } catch (DownloadError& e) { + if (e.error == Downloader::NotFound || + e.error == Downloader::Forbidden) { + return (*callbackPtr)(std::shared_ptr<std::string>()); + } + maybeDisable(); + callbackPtr->rethrow(); + } catch (...) { + callbackPtr->rethrow(); + } + }}); + } +}; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, 7) != "http://" && + std::string(uri, 0, 8) != "https://" && + (getEnv("_NIX_FORCE_HTTP_BINARY_CACHE_STORE") != "1" || + std::string(uri, 0, 7) != "file://")) { + return nullptr; + } + auto store = std::make_shared<HttpBinaryCacheStore>(params, uri); + store->init(); + return store; + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/legacy-ssh-store.cc b/third_party/nix/src/libstore/legacy-ssh-store.cc new file mode 100644 index 000000000000..9a84e0295657 --- /dev/null +++ b/third_party/nix/src/libstore/legacy-ssh-store.cc @@ -0,0 +1,273 @@ +#include "archive.hh" +#include "derivations.hh" +#include "glog/logging.h" +#include "pool.hh" +#include "remote-store.hh" +#include "serve-protocol.hh" +#include "ssh.hh" +#include "store-api.hh" +#include "worker-protocol.hh" + +namespace nix { + +static std::string uriScheme = "ssh://"; + +struct LegacySSHStore : public Store { + const Setting<int> maxConnections{ + this, 1, "max-connections", + "maximum number of concurrent SSH connections"}; + const Setting<Path> sshKey{this, "", "ssh-key", "path to an SSH private key"}; + const Setting<bool> compress{this, false, "compress", + "whether to compress the connection"}; + const Setting<Path> remoteProgram{ + this, "nix-store", "remote-program", + "path to the nix-store executable on the remote system"}; + const Setting<std::string> remoteStore{ + this, "", "remote-store", "URI of the store on the remote system"}; + + // Hack for getting remote build log output. + const Setting<int> logFD{ + this, -1, "log-fd", "file descriptor to which SSH's stderr is connected"}; + + struct Connection { + std::unique_ptr<SSHMaster::Connection> sshConn; + FdSink to; + FdSource from; + int remoteVersion; + bool good = true; + }; + + std::string host; + + ref<Pool<Connection>> connections; + + SSHMaster master; + + LegacySSHStore(const string& host, const Params& params) + : Store(params), + host(host), + connections(make_ref<Pool<Connection>>( + std::max(1, (int)maxConnections), + [this]() { return openConnection(); }, + [](const ref<Connection>& r) { return r->good; })), + master(host, sshKey, + // Use SSH master only if using more than 1 connection. + connections->capacity() > 1, compress, logFD) {} + + ref<Connection> openConnection() { + auto conn = make_ref<Connection>(); + conn->sshConn = master.startCommand( + fmt("%s --serve --write", remoteProgram) + + (remoteStore.get().empty() + ? "" + : " --store " + shellEscape(remoteStore.get()))); + conn->to = FdSink(conn->sshConn->in.get()); + conn->from = FdSource(conn->sshConn->out.get()); + + try { + conn->to << SERVE_MAGIC_1 << SERVE_PROTOCOL_VERSION; + conn->to.flush(); + + unsigned int magic = readInt(conn->from); + if (magic != SERVE_MAGIC_2) { + throw Error("protocol mismatch with 'nix-store --serve' on '%s'", host); + } + conn->remoteVersion = readInt(conn->from); + if (GET_PROTOCOL_MAJOR(conn->remoteVersion) != 0x200) { + throw Error("unsupported 'nix-store --serve' protocol version on '%s'", + host); + } + + } catch (EndOfFile& e) { + throw Error("cannot connect to '%1%'", host); + } + + return conn; + }; + + string getUri() override { return uriScheme + host; } + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override { + try { + auto conn(connections->get()); + + DLOG(INFO) << "querying remote host '" << host << "' for info on '" + << path << "'"; + + conn->to << cmdQueryPathInfos << PathSet{path}; + conn->to.flush(); + + auto info = std::make_shared<ValidPathInfo>(); + conn->from >> info->path; + if (info->path.empty()) { + return callback(nullptr); + } + assert(path == info->path); + + PathSet references; + conn->from >> info->deriver; + info->references = readStorePaths<PathSet>(*this, conn->from); + readLongLong(conn->from); // download size + info->narSize = readLongLong(conn->from); + + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 4) { + auto s = readString(conn->from); + info->narHash = s.empty() ? Hash() : Hash(s); + conn->from >> info->ca; + info->sigs = readStrings<StringSet>(conn->from); + } + + auto s = readString(conn->from); + assert(s.empty()); + + callback(std::move(info)); + } catch (...) { + callback.rethrow(); + } + } + + void addToStore(const ValidPathInfo& info, Source& source, RepairFlag repair, + CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override { + DLOG(INFO) << "adding path '" << info.path << "' to remote host '" << host + << "'"; + + auto conn(connections->get()); + + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 5) { + conn->to << cmdAddToStoreNar << info.path << info.deriver + << info.narHash.to_string(Base16, false) << info.references + << info.registrationTime << info.narSize + << static_cast<uint64_t>(info.ultimate) << info.sigs << info.ca; + try { + copyNAR(source, conn->to); + } catch (...) { + conn->good = false; + throw; + } + conn->to.flush(); + + } else { + conn->to << cmdImportPaths << 1; + try { + copyNAR(source, conn->to); + } catch (...) { + conn->good = false; + throw; + } + conn->to << exportMagic << info.path << info.references << info.deriver + << 0 << 0; + conn->to.flush(); + } + + if (readInt(conn->from) != 1) { + throw Error( + "failed to add path '%s' to remote host '%s', info.path, host"); + } + } + + void narFromPath(const Path& path, Sink& sink) override { + auto conn(connections->get()); + + conn->to << cmdDumpStorePath << path; + conn->to.flush(); + copyNAR(conn->from, sink); + } + + Path queryPathFromHashPart(const string& hashPart) override { + unsupported("queryPathFromHashPart"); + } + + Path addToStore(const string& name, const Path& srcPath, bool recursive, + HashType hashAlgo, PathFilter& filter, + RepairFlag repair) override { + unsupported("addToStore"); + } + + Path addTextToStore(const string& name, const string& s, + const PathSet& references, RepairFlag repair) override { + unsupported("addTextToStore"); + } + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override { + auto conn(connections->get()); + + conn->to << cmdBuildDerivation << drvPath << drv << settings.maxSilentTime + << settings.buildTimeout; + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 2) { + conn->to << settings.maxLogSize; + } + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 3) { + conn->to << settings.buildRepeat + << static_cast<uint64_t>(settings.enforceDeterminism); + } + + conn->to.flush(); + + BuildResult status; + status.status = (BuildResult::Status)readInt(conn->from); + conn->from >> status.errorMsg; + + if (GET_PROTOCOL_MINOR(conn->remoteVersion) >= 3) { + conn->from >> status.timesBuilt >> status.isNonDeterministic >> + status.startTime >> status.stopTime; + } + + return status; + } + + void ensurePath(const Path& path) override { unsupported("ensurePath"); } + + void computeFSClosure(const PathSet& paths, PathSet& out, + bool flipDirection = false, bool includeOutputs = false, + bool includeDerivers = false) override { + if (flipDirection || includeDerivers) { + Store::computeFSClosure(paths, out, flipDirection, includeOutputs, + includeDerivers); + return; + } + + auto conn(connections->get()); + + conn->to << cmdQueryClosure << static_cast<uint64_t>(includeOutputs) + << paths; + conn->to.flush(); + + auto res = readStorePaths<PathSet>(*this, conn->from); + + out.insert(res.begin(), res.end()); + } + + PathSet queryValidPaths(const PathSet& paths, SubstituteFlag maybeSubstitute = + NoSubstitute) override { + auto conn(connections->get()); + + conn->to << cmdQueryValidPaths << 0u // lock + << maybeSubstitute << paths; + conn->to.flush(); + + return readStorePaths<PathSet>(*this, conn->from); + } + + void connect() override { auto conn(connections->get()); } + + unsigned int getProtocol() override { + auto conn(connections->get()); + return conn->remoteVersion; + } +}; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, uriScheme.size()) != uriScheme) { + return nullptr; + } + return std::make_shared<LegacySSHStore>( + std::string(uri, uriScheme.size()), params); + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-binary-cache-store.cc b/third_party/nix/src/libstore/local-binary-cache-store.cc new file mode 100644 index 000000000000..00bb21eb24fe --- /dev/null +++ b/third_party/nix/src/libstore/local-binary-cache-store.cc @@ -0,0 +1,91 @@ +#include <utility> + +#include "binary-cache-store.hh" +#include "globals.hh" +#include "nar-info-disk-cache.hh" + +namespace nix { + +class LocalBinaryCacheStore : public BinaryCacheStore { + private: + Path binaryCacheDir; + + public: + LocalBinaryCacheStore(const Params& params, Path binaryCacheDir) + : BinaryCacheStore(params), binaryCacheDir(std::move(binaryCacheDir)) {} + + void init() override; + + std::string getUri() override { return "file://" + binaryCacheDir; } + + protected: + bool fileExists(const std::string& path) override; + + void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) override; + + void getFile(const std::string& path, Sink& sink) override { + try { + readFile(binaryCacheDir + "/" + path, sink); + } catch (SysError& e) { + if (e.errNo == ENOENT) { + throw NoSuchBinaryCacheFile("file '%s' does not exist in binary cache", + path); + } + } + } + + PathSet queryAllValidPaths() override { + PathSet paths; + + for (auto& entry : readDirectory(binaryCacheDir)) { + if (entry.name.size() != 40 || !hasSuffix(entry.name, ".narinfo")) { + continue; + } + paths.insert(storeDir + "/" + + entry.name.substr(0, entry.name.size() - 8)); + } + + return paths; + } +}; + +void LocalBinaryCacheStore::init() { + createDirs(binaryCacheDir + "/nar"); + BinaryCacheStore::init(); +} + +static void atomicWrite(const Path& path, const std::string& s) { + Path tmp = path + ".tmp." + std::to_string(getpid()); + AutoDelete del(tmp, false); + writeFile(tmp, s); + if (rename(tmp.c_str(), path.c_str()) != 0) { + throw SysError(format("renaming '%1%' to '%2%'") % tmp % path); + } + del.cancel(); +} + +bool LocalBinaryCacheStore::fileExists(const std::string& path) { + return pathExists(binaryCacheDir + "/" + path); +} + +void LocalBinaryCacheStore::upsertFile(const std::string& path, + const std::string& data, + const std::string& mimeType) { + atomicWrite(binaryCacheDir + "/" + path, data); +} + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (getEnv("_NIX_FORCE_HTTP_BINARY_CACHE_STORE") == "1" || + std::string(uri, 0, 7) != "file://") { + return nullptr; + } + auto store = + std::make_shared<LocalBinaryCacheStore>(params, std::string(uri, 7)); + store->init(); + return store; + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-fs-store.cc b/third_party/nix/src/libstore/local-fs-store.cc new file mode 100644 index 000000000000..2120afc0ce7c --- /dev/null +++ b/third_party/nix/src/libstore/local-fs-store.cc @@ -0,0 +1,122 @@ +#include "archive.hh" +#include "compression.hh" +#include "derivations.hh" +#include "fs-accessor.hh" +#include "globals.hh" +#include "store-api.hh" + +namespace nix { + +LocalFSStore::LocalFSStore(const Params& params) : Store(params) {} + +struct LocalStoreAccessor : public FSAccessor { + ref<LocalFSStore> store; + + explicit LocalStoreAccessor(const ref<LocalFSStore>& store) : store(store) {} + + Path toRealPath(const Path& path) { + Path storePath = store->toStorePath(path); + if (!store->isValidPath(storePath)) { + throw InvalidPath(format("path '%1%' is not a valid store path") % + storePath); + } + return store->getRealStoreDir() + std::string(path, store->storeDir.size()); + } + + FSAccessor::Stat stat(const Path& path) override { + auto realPath = toRealPath(path); + + struct stat st; + if (lstat(realPath.c_str(), &st) != 0) { + if (errno == ENOENT || errno == ENOTDIR) { + return {Type::tMissing, 0, false}; + } + throw SysError(format("getting status of '%1%'") % path); + } + + if (!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) { + throw Error(format("file '%1%' has unsupported type") % path); + } + + return {S_ISREG(st.st_mode) + ? Type::tRegular + : S_ISLNK(st.st_mode) ? Type::tSymlink : Type::tDirectory, + S_ISREG(st.st_mode) ? (uint64_t)st.st_size : 0, + S_ISREG(st.st_mode) && ((st.st_mode & S_IXUSR) != 0u)}; + } + + StringSet readDirectory(const Path& path) override { + auto realPath = toRealPath(path); + + auto entries = nix::readDirectory(realPath); + + StringSet res; + for (auto& entry : entries) { + res.insert(entry.name); + } + + return res; + } + + std::string readFile(const Path& path) override { + return nix::readFile(toRealPath(path)); + } + + std::string readLink(const Path& path) override { + return nix::readLink(toRealPath(path)); + } +}; + +ref<FSAccessor> LocalFSStore::getFSAccessor() { + return make_ref<LocalStoreAccessor>(ref<LocalFSStore>( + std::dynamic_pointer_cast<LocalFSStore>(shared_from_this()))); +} + +void LocalFSStore::narFromPath(const Path& path, Sink& sink) { + if (!isValidPath(path)) { + throw Error(format("path '%s' is not valid") % path); + } + dumpPath(getRealStoreDir() + std::string(path, storeDir.size()), sink); +} + +const string LocalFSStore::drvsLogDir = "drvs"; + +std::shared_ptr<std::string> LocalFSStore::getBuildLog(const Path& path_) { + auto path(path_); + + assertStorePath(path); + + if (!isDerivation(path)) { + try { + path = queryPathInfo(path)->deriver; + } catch (InvalidPath&) { + return nullptr; + } + if (path.empty()) { + return nullptr; + } + } + + string baseName = baseNameOf(path); + + for (int j = 0; j < 2; j++) { + Path logPath = j == 0 ? fmt("%s/%s/%s/%s", logDir, drvsLogDir, + string(baseName, 0, 2), string(baseName, 2)) + : fmt("%s/%s/%s", logDir, drvsLogDir, baseName); + Path logBz2Path = logPath + ".bz2"; + + if (pathExists(logPath)) { + return std::make_shared<std::string>(readFile(logPath)); + } + if (pathExists(logBz2Path)) { + try { + return decompress("bzip2", readFile(logBz2Path)); + } catch (Error&) { + } + } + } + + return nullptr; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-store.cc b/third_party/nix/src/libstore/local-store.cc new file mode 100644 index 000000000000..1d162ad6be10 --- /dev/null +++ b/third_party/nix/src/libstore/local-store.cc @@ -0,0 +1,1534 @@ +#include "local-store.hh" + +#include <algorithm> +#include <cerrno> +#include <cstdio> +#include <cstring> +#include <ctime> +#include <iostream> + +#include <fcntl.h> +#include <glog/logging.h> +#include <grp.h> +#include <sys/select.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> +#include <utime.h> + +#include "archive.hh" +#include "derivations.hh" +#include "globals.hh" +#include "nar-info.hh" +#include "pathlocks.hh" +#include "worker-protocol.hh" + +#if __linux__ +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/statvfs.h> +#include <sys/xattr.h> +#endif + +#ifdef __CYGWIN__ +#include <windows.h> +#endif + +#include <sqlite3.h> + +namespace nix { + +LocalStore::LocalStore(const Params& params) + : Store(params), + LocalFSStore(params), + realStoreDir_{this, false, + rootDir != "" ? rootDir + "/nix/store" : storeDir, "real", + "physical path to the Nix store"}, + realStoreDir(realStoreDir_), + dbDir(stateDir + "/db"), + linksDir(realStoreDir + "/.links"), + reservedPath(dbDir + "/reserved"), + schemaPath(dbDir + "/schema"), + trashDir(realStoreDir + "/trash"), + tempRootsDir(stateDir + "/temproots"), + fnTempRoots(fmt("%s/%d", tempRootsDir, getpid())) { + auto state(_state.lock()); + + /* Create missing state directories if they don't already exist. */ + createDirs(realStoreDir); + makeStoreWritable(); + createDirs(linksDir); + Path profilesDir = stateDir + "/profiles"; + createDirs(profilesDir); + createDirs(tempRootsDir); + createDirs(dbDir); + Path gcRootsDir = stateDir + "/gcroots"; + if (!pathExists(gcRootsDir)) { + createDirs(gcRootsDir); + createSymlink(profilesDir, gcRootsDir + "/profiles"); + } + + for (auto& perUserDir : + {profilesDir + "/per-user", gcRootsDir + "/per-user"}) { + createDirs(perUserDir); + if (chmod(perUserDir.c_str(), 0755) == -1) { + throw SysError("could not set permissions on '%s' to 755", perUserDir); + } + } + + createUser(getUserName(), getuid()); + + /* Optionally, create directories and set permissions for a + multi-user install. */ + if (getuid() == 0 && settings.buildUsersGroup != "") { + mode_t perm = 01775; + + struct group* gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (gr == nullptr) { + LOG(ERROR) << "warning: the group '" << settings.buildUsersGroup + << "' specified in 'build-users-group' does not exist"; + } else { + struct stat st; + if (stat(realStoreDir.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % + realStoreDir); + } + + if (st.st_uid != 0 || st.st_gid != gr->gr_gid || + (st.st_mode & ~S_IFMT) != perm) { + if (chown(realStoreDir.c_str(), 0, gr->gr_gid) == -1) { + throw SysError(format("changing ownership of path '%1%'") % + realStoreDir); + } + if (chmod(realStoreDir.c_str(), perm) == -1) { + throw SysError(format("changing permissions on path '%1%'") % + realStoreDir); + } + } + } + } + + /* Ensure that the store and its parents are not symlinks. */ + if (getEnv("NIX_IGNORE_SYMLINK_STORE") != "1") { + Path path = realStoreDir; + struct stat st; + while (path != "/") { + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting status of '%1%'") % path); + } + if (S_ISLNK(st.st_mode)) { + throw Error(format("the path '%1%' is a symlink; " + "this is not allowed for the Nix store and its " + "parent directories") % + path); + } + path = dirOf(path); + } + } + + /* We can't open a SQLite database if the disk is full. Since + this prevents the garbage collector from running when it's most + needed, we reserve some dummy space that we can free just + before doing a garbage collection. */ + try { + struct stat st; + if (stat(reservedPath.c_str(), &st) == -1 || + st.st_size != settings.reservedSize) { + AutoCloseFD fd = + open(reservedPath.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0600); + int res = -1; +#if HAVE_POSIX_FALLOCATE + res = posix_fallocate(fd.get(), 0, settings.reservedSize); +#endif + if (res == -1) { + writeFull(fd.get(), string(settings.reservedSize, 'X')); + [[gnu::unused]] auto res2 = ftruncate(fd.get(), settings.reservedSize); + } + } + } catch (SysError& e) { /* don't care about errors */ + } + + /* Acquire the big fat lock in shared mode to make sure that no + schema upgrade is in progress. */ + Path globalLockPath = dbDir + "/big-lock"; + globalLock = openLockFile(globalLockPath, true); + + if (!lockFile(globalLock.get(), ltRead, false)) { + LOG(INFO) << "waiting for the big Nix store lock..."; + lockFile(globalLock.get(), ltRead, true); + } + + /* Check the current database schema and if necessary do an + upgrade. */ + int curSchema = getSchema(); + if (curSchema > nixSchemaVersion) { + throw Error( + format( + "current Nix store schema is version %1%, but I only support %2%") % + curSchema % nixSchemaVersion); + } + if (curSchema == 0) { /* new store */ + curSchema = nixSchemaVersion; + openDB(*state, true); + writeFile(schemaPath, (format("%1%") % nixSchemaVersion).str()); + } else if (curSchema < nixSchemaVersion) { + if (curSchema < 5) { + throw Error( + "Your Nix store has a database in Berkeley DB format,\n" + "which is no longer supported. To convert to the new format,\n" + "please upgrade Nix to version 0.12 first."); + } + + if (curSchema < 6) { + throw Error( + "Your Nix store has a database in flat file format,\n" + "which is no longer supported. To convert to the new format,\n" + "please upgrade Nix to version 1.11 first."); + } + + if (!lockFile(globalLock.get(), ltWrite, false)) { + LOG(INFO) << "waiting for exclusive access to the Nix store..."; + lockFile(globalLock.get(), ltWrite, true); + } + + /* Get the schema version again, because another process may + have performed the upgrade already. */ + curSchema = getSchema(); + + if (curSchema < 7) { + upgradeStore7(); + } + + openDB(*state, false); + + if (curSchema < 8) { + SQLiteTxn txn(state->db); + state->db.exec("alter table ValidPaths add column ultimate integer"); + state->db.exec("alter table ValidPaths add column sigs text"); + txn.commit(); + } + + if (curSchema < 9) { + SQLiteTxn txn(state->db); + state->db.exec("drop table FailedPaths"); + txn.commit(); + } + + if (curSchema < 10) { + SQLiteTxn txn(state->db); + state->db.exec("alter table ValidPaths add column ca text"); + txn.commit(); + } + + writeFile(schemaPath, (format("%1%") % nixSchemaVersion).str()); + + lockFile(globalLock.get(), ltRead, true); + } else { + openDB(*state, false); + } + + /* Prepare SQL statements. */ + state->stmtRegisterValidPath.create( + state->db, + "insert into ValidPaths (path, hash, registrationTime, deriver, narSize, " + "ultimate, sigs, ca) values (?, ?, ?, ?, ?, ?, ?, ?);"); + state->stmtUpdatePathInfo.create( + state->db, + "update ValidPaths set narSize = ?, hash = ?, ultimate = ?, sigs = ?, ca " + "= ? where path = ?;"); + state->stmtAddReference.create( + state->db, + "insert or replace into Refs (referrer, reference) values (?, ?);"); + state->stmtQueryPathInfo.create( + state->db, + "select id, hash, registrationTime, deriver, narSize, ultimate, sigs, ca " + "from ValidPaths where path = ?;"); + state->stmtQueryReferences.create(state->db, + "select path from Refs join ValidPaths on " + "reference = id where referrer = ?;"); + state->stmtQueryReferrers.create( + state->db, + "select path from Refs join ValidPaths on referrer = id where reference " + "= (select id from ValidPaths where path = ?);"); + state->stmtInvalidatePath.create(state->db, + "delete from ValidPaths where path = ?;"); + state->stmtAddDerivationOutput.create( + state->db, + "insert or replace into DerivationOutputs (drv, id, path) values (?, ?, " + "?);"); + state->stmtQueryValidDerivers.create( + state->db, + "select v.id, v.path from DerivationOutputs d join ValidPaths v on d.drv " + "= v.id where d.path = ?;"); + state->stmtQueryDerivationOutputs.create( + state->db, "select id, path from DerivationOutputs where drv = ?;"); + // Use "path >= ?" with limit 1 rather than "path like '?%'" to + // ensure efficient lookup. + state->stmtQueryPathFromHashPart.create( + state->db, "select path from ValidPaths where path >= ? limit 1;"); + state->stmtQueryValidPaths.create(state->db, "select path from ValidPaths"); +} + +LocalStore::~LocalStore() { + std::shared_future<void> future; + + { + auto state(_state.lock()); + if (state->gcRunning) { + future = state->gcFuture; + } + } + + if (future.valid()) { + LOG(INFO) << "waiting for auto-GC to finish on exit..."; + future.get(); + } + + try { + auto state(_state.lock()); + if (state->fdTempRoots) { + state->fdTempRoots = -1; + unlink(fnTempRoots.c_str()); + } + } catch (...) { + ignoreException(); + } +} + +std::string LocalStore::getUri() { return "local"; } + +int LocalStore::getSchema() { + int curSchema = 0; + if (pathExists(schemaPath)) { + string s = readFile(schemaPath); + if (!string2Int(s, curSchema)) { + throw Error(format("'%1%' is corrupt") % schemaPath); + } + } + return curSchema; +} + +void LocalStore::openDB(State& state, bool create) { + if (access(dbDir.c_str(), R_OK | W_OK) != 0) { + throw SysError(format("Nix database directory '%1%' is not writable") % + dbDir); + } + + /* Open the Nix database. */ + string dbPath = dbDir + "/db.sqlite"; + auto& db(state.db); + if (sqlite3_open_v2(dbPath.c_str(), &db.db, + SQLITE_OPEN_READWRITE | (create ? SQLITE_OPEN_CREATE : 0), + nullptr) != SQLITE_OK) { + throw Error(format("cannot open Nix database '%1%'") % dbPath); + } + +#ifdef __CYGWIN__ + /* The cygwin version of sqlite3 has a patch which calls + SetDllDirectory("/usr/bin") on init. It was intended to fix extension + loading, which we don't use, and the effect of SetDllDirectory is + inherited by child processes, and causes libraries to be loaded from + /usr/bin instead of $PATH. This breaks quite a few things (e.g. + checkPhase on openssh), so we set it back to default behaviour. */ + SetDllDirectoryW(L""); +#endif + + if (sqlite3_busy_timeout(db, 60 * 60 * 1000) != SQLITE_OK) { + throwSQLiteError(db, "setting timeout"); + } + + db.exec("pragma foreign_keys = 1"); + + /* !!! check whether sqlite has been built with foreign key + support */ + + /* Whether SQLite should fsync(). "Normal" synchronous mode + should be safe enough. If the user asks for it, don't sync at + all. This can cause database corruption if the system + crashes. */ + string syncMode = settings.fsyncMetadata ? "normal" : "off"; + db.exec("pragma synchronous = " + syncMode); + + /* Set the SQLite journal mode. WAL mode is fastest, so it's the + default. */ + string mode = settings.useSQLiteWAL ? "wal" : "truncate"; + string prevMode; + { + SQLiteStmt stmt; + stmt.create(db, "pragma main.journal_mode;"); + if (sqlite3_step(stmt) != SQLITE_ROW) { + throwSQLiteError(db, "querying journal mode"); + } + prevMode = string((const char*)sqlite3_column_text(stmt, 0)); + } + if (prevMode != mode && + sqlite3_exec(db, ("pragma main.journal_mode = " + mode + ";").c_str(), + nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "setting journal mode"); + } + + /* Increase the auto-checkpoint interval to 40000 pages. This + seems enough to ensure that instantiating the NixOS system + derivation is done in a single fsync(). */ + if (mode == "wal" && sqlite3_exec(db, "pragma wal_autocheckpoint = 40000;", + nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "setting autocheckpoint interval"); + } + + /* Initialise the database schema, if necessary. */ + if (create) { + const char* schema = +#include "schema.sql.gen.hh" + ; + db.exec(schema); + } +} + +/* To improve purity, users may want to make the Nix store a read-only + bind mount. So make the Nix store writable for this process. */ +void LocalStore::makeStoreWritable() { +#if __linux__ + if (getuid() != 0) { + return; + } + /* Check if /nix/store is on a read-only mount. */ + struct statvfs stat; + if (statvfs(realStoreDir.c_str(), &stat) != 0) { + throw SysError("getting info about the Nix store mount point"); + } + + if ((stat.f_flag & ST_RDONLY) != 0u) { + if (unshare(CLONE_NEWNS) == -1) { + throw SysError("setting up a private mount namespace"); + } + + if (mount(nullptr, realStoreDir.c_str(), "none", MS_REMOUNT | MS_BIND, + nullptr) == -1) { + throw SysError(format("remounting %1% writable") % realStoreDir); + } + } +#endif +} + +const time_t mtimeStore = 1; /* 1 second into the epoch */ + +static void canonicaliseTimestampAndPermissions(const Path& path, + const struct stat& st) { + if (!S_ISLNK(st.st_mode)) { + /* Mask out all type related bits. */ + mode_t mode = st.st_mode & ~S_IFMT; + + if (mode != 0444 && mode != 0555) { + mode = (st.st_mode & S_IFMT) | 0444 | + ((st.st_mode & S_IXUSR) != 0u ? 0111 : 0); + if (chmod(path.c_str(), mode) == -1) { + throw SysError(format("changing mode of '%1%' to %2$o") % path % mode); + } + } + } + + if (st.st_mtime != mtimeStore) { + struct timeval times[2]; + times[0].tv_sec = st.st_atime; + times[0].tv_usec = 0; + times[1].tv_sec = mtimeStore; + times[1].tv_usec = 0; +#if HAVE_LUTIMES + if (lutimes(path.c_str(), times) == -1) { + if (errno != ENOSYS || + (!S_ISLNK(st.st_mode) && utimes(path.c_str(), times) == -1)) { +#else + if (!S_ISLNK(st.st_mode) && utimes(path.c_str(), times) == -1) +#endif + throw SysError(format("changing modification time of '%1%'") % path); + } + } + } // namespace nix +} + +void canonicaliseTimestampAndPermissions(const Path& path) { + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + canonicaliseTimestampAndPermissions(path, st); +} + +static void canonicalisePathMetaData_(const Path& path, uid_t fromUid, + InodesSeen& inodesSeen) { + checkInterrupt(); + +#if __APPLE__ + /* Remove flags, in particular UF_IMMUTABLE which would prevent + the file from being garbage-collected. FIXME: Use + setattrlist() to remove other attributes as well. */ + if (lchflags(path.c_str(), 0)) { + if (errno != ENOTSUP) + throw SysError(format("clearing flags of path '%1%'") % path); + } +#endif + + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + + /* Really make sure that the path is of a supported type. */ + if (!(S_ISREG(st.st_mode) || S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))) { + throw Error(format("file '%1%' has an unsupported type") % path); + } + +#if __linux__ + /* Remove extended attributes / ACLs. */ + ssize_t eaSize = llistxattr(path.c_str(), nullptr, 0); + + if (eaSize < 0) { + if (errno != ENOTSUP && errno != ENODATA) { + throw SysError("querying extended attributes of '%s'", path); + } + } else if (eaSize > 0) { + std::vector<char> eaBuf(eaSize); + + if ((eaSize = llistxattr(path.c_str(), eaBuf.data(), eaBuf.size())) < 0) { + throw SysError("querying extended attributes of '%s'", path); + } + + for (auto& eaName : tokenizeString<Strings>( + std::string(eaBuf.data(), eaSize), std::string("\000", 1))) { + /* Ignore SELinux security labels since these cannot be + removed even by root. */ + if (eaName == "security.selinux") { + continue; + } + if (lremovexattr(path.c_str(), eaName.c_str()) == -1) { + throw SysError("removing extended attribute '%s' from '%s'", eaName, + path); + } + } + } +#endif + + /* Fail if the file is not owned by the build user. This prevents + us from messing up the ownership/permissions of files + hard-linked into the output (e.g. "ln /etc/shadow $out/foo"). + However, ignore files that we chown'ed ourselves previously to + ensure that we don't fail on hard links within the same build + (i.e. "touch $out/foo; ln $out/foo $out/bar"). */ + if (fromUid != (uid_t)-1 && st.st_uid != fromUid) { + assert(!S_ISDIR(st.st_mode)); + if (inodesSeen.find(Inode(st.st_dev, st.st_ino)) == inodesSeen.end()) { + throw BuildError(format("invalid ownership on file '%1%'") % path); + } + mode_t mode = st.st_mode & ~S_IFMT; + assert(S_ISLNK(st.st_mode) || + (st.st_uid == geteuid() && (mode == 0444 || mode == 0555) && + st.st_mtime == mtimeStore)); + return; + } + + inodesSeen.insert(Inode(st.st_dev, st.st_ino)); + + canonicaliseTimestampAndPermissions(path, st); + + /* Change ownership to the current uid. If it's a symlink, use + lchown if available, otherwise don't bother. Wrong ownership + of a symlink doesn't matter, since the owning user can't change + the symlink and can't delete it because the directory is not + writable. The only exception is top-level paths in the Nix + store (since that directory is group-writable for the Nix build + users group); we check for this case below. */ + if (st.st_uid != geteuid()) { +#if HAVE_LCHOWN + if (lchown(path.c_str(), geteuid(), getegid()) == -1) { +#else + if (!S_ISLNK(st.st_mode) && chown(path.c_str(), geteuid(), getegid()) == -1) +#endif + throw SysError(format("changing owner of '%1%' to %2%") % path % + geteuid()); + } + } + + if (S_ISDIR(st.st_mode)) { + DirEntries entries = readDirectory(path); + for (auto& i : entries) { + canonicalisePathMetaData_(path + "/" + i.name, fromUid, inodesSeen); + } + } +} + +void canonicalisePathMetaData(const Path& path, uid_t fromUid, + InodesSeen& inodesSeen) { + canonicalisePathMetaData_(path, fromUid, inodesSeen); + + /* On platforms that don't have lchown(), the top-level path can't + be a symlink, since we can't change its ownership. */ + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + + if (st.st_uid != geteuid()) { + assert(S_ISLNK(st.st_mode)); + throw Error(format("wrong ownership of top-level store path '%1%'") % path); + } +} + +void canonicalisePathMetaData(const Path& path, uid_t fromUid) { + InodesSeen inodesSeen; + canonicalisePathMetaData(path, fromUid, inodesSeen); +} + +void LocalStore::checkDerivationOutputs(const Path& drvPath, + const Derivation& drv) { + string drvName = storePathToName(drvPath); + assert(isDerivation(drvName)); + drvName = string(drvName, 0, drvName.size() - drvExtension.size()); + + if (drv.isFixedOutput()) { + auto out = drv.outputs.find("out"); + if (out == drv.outputs.end()) { + throw Error( + format("derivation '%1%' does not have an output named 'out'") % + drvPath); + } + + bool recursive; + Hash h; + out->second.parseHashInfo(recursive, h); + Path outPath = makeFixedOutputPath(recursive, h, drvName); + + auto j = drv.env.find("out"); + if (out->second.path != outPath || j == drv.env.end() || + j->second != outPath) { + throw Error( + format( + "derivation '%1%' has incorrect output '%2%', should be '%3%'") % + drvPath % out->second.path % outPath); + } + } + + else { + Derivation drvCopy(drv); + for (auto& i : drvCopy.outputs) { + i.second.path = ""; + drvCopy.env[i.first] = ""; + } + + Hash h = hashDerivationModulo(*this, drvCopy); + + for (auto& i : drv.outputs) { + Path outPath = makeOutputPath(i.first, h, drvName); + auto j = drv.env.find(i.first); + if (i.second.path != outPath || j == drv.env.end() || + j->second != outPath) { + throw Error(format("derivation '%1%' has incorrect output '%2%', " + "should be '%3%'") % + drvPath % i.second.path % outPath); + } + } + } +} + +uint64_t LocalStore::addValidPath(State& state, const ValidPathInfo& info, + bool checkOutputs) { + if (!info.ca.empty() && !info.isContentAddressed(*this)) { + throw Error( + "cannot add path '%s' to the Nix store because it claims to be " + "content-addressed but isn't", + info.path); + } + + state.stmtRegisterValidPath + .use()(info.path)(info.narHash.to_string(Base16))( + info.registrationTime == 0 ? time(nullptr) : info.registrationTime)( + info.deriver, !info.deriver.empty())(info.narSize, info.narSize != 0)( + info.ultimate ? 1 : 0, info.ultimate)( + concatStringsSep(" ", info.sigs), !info.sigs.empty())( + info.ca, !info.ca.empty()) + .exec(); + uint64_t id = sqlite3_last_insert_rowid(state.db); + + /* If this is a derivation, then store the derivation outputs in + the database. This is useful for the garbage collector: it can + efficiently query whether a path is an output of some + derivation. */ + if (isDerivation(info.path)) { + Derivation drv = readDerivation(realStoreDir + "/" + baseNameOf(info.path)); + + /* Verify that the output paths in the derivation are correct + (i.e., follow the scheme for computing output paths from + derivations). Note that if this throws an error, then the + DB transaction is rolled back, so the path validity + registration above is undone. */ + if (checkOutputs) { + checkDerivationOutputs(info.path, drv); + } + + for (auto& i : drv.outputs) { + state.stmtAddDerivationOutput.use()(id)(i.first)(i.second.path).exec(); + } + } + + { + auto state_(Store::state.lock()); + state_->pathInfoCache.upsert(storePathToHash(info.path), + std::make_shared<ValidPathInfo>(info)); + } + + return id; +} + +void LocalStore::queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept { + try { + auto info = std::make_shared<ValidPathInfo>(); + info->path = path; + + assertStorePath(path); + + callback(retrySQLite<std::shared_ptr<ValidPathInfo>>([&]() { + auto state(_state.lock()); + + /* Get the path info. */ + auto useQueryPathInfo(state->stmtQueryPathInfo.use()(path)); + + if (!useQueryPathInfo.next()) { + return std::shared_ptr<ValidPathInfo>(); + } + + info->id = useQueryPathInfo.getInt(0); + + try { + info->narHash = Hash(useQueryPathInfo.getStr(1)); + } catch (BadHash& e) { + throw Error("in valid-path entry for '%s': %s", path, e.what()); + } + + info->registrationTime = useQueryPathInfo.getInt(2); + + auto s = (const char*)sqlite3_column_text(state->stmtQueryPathInfo, 3); + if (s != nullptr) { + info->deriver = s; + } + + /* Note that narSize = NULL yields 0. */ + info->narSize = useQueryPathInfo.getInt(4); + + info->ultimate = useQueryPathInfo.getInt(5) == 1; + + s = (const char*)sqlite3_column_text(state->stmtQueryPathInfo, 6); + if (s != nullptr) { + info->sigs = tokenizeString<StringSet>(s, " "); + } + + s = (const char*)sqlite3_column_text(state->stmtQueryPathInfo, 7); + if (s != nullptr) { + info->ca = s; + } + + /* Get the references. */ + auto useQueryReferences(state->stmtQueryReferences.use()(info->id)); + + while (useQueryReferences.next()) { + info->references.insert(useQueryReferences.getStr(0)); + } + + return info; + })); + + } catch (...) { + callback.rethrow(); + } +} + +/* Update path info in the database. */ +void LocalStore::updatePathInfo(State& state, const ValidPathInfo& info) { + state.stmtUpdatePathInfo + .use()(info.narSize, info.narSize != 0)(info.narHash.to_string(Base16))( + info.ultimate ? 1 : 0, info.ultimate)( + concatStringsSep(" ", info.sigs), !info.sigs.empty())( + info.ca, !info.ca.empty())(info.path) + .exec(); +} + +uint64_t LocalStore::queryValidPathId(State& state, const Path& path) { + auto use(state.stmtQueryPathInfo.use()(path)); + if (!use.next()) { + throw Error(format("path '%1%' is not valid") % path); + } + return use.getInt(0); +} + +bool LocalStore::isValidPath_(State& state, const Path& path) { + return state.stmtQueryPathInfo.use()(path).next(); +} + +bool LocalStore::isValidPathUncached(const Path& path) { + return retrySQLite<bool>([&]() { + auto state(_state.lock()); + return isValidPath_(*state, path); + }); +} + +PathSet LocalStore::queryValidPaths(const PathSet& paths, + SubstituteFlag maybeSubstitute) { + PathSet res; + for (auto& i : paths) { + if (isValidPath(i)) { + res.insert(i); + } + } + return res; +} + +PathSet LocalStore::queryAllValidPaths() { + return retrySQLite<PathSet>([&]() { + auto state(_state.lock()); + auto use(state->stmtQueryValidPaths.use()); + PathSet res; + while (use.next()) { + res.insert(use.getStr(0)); + } + return res; + }); +} + +void LocalStore::queryReferrers(State& state, const Path& path, + PathSet& referrers) { + auto useQueryReferrers(state.stmtQueryReferrers.use()(path)); + + while (useQueryReferrers.next()) { + referrers.insert(useQueryReferrers.getStr(0)); + } +} + +void LocalStore::queryReferrers(const Path& path, PathSet& referrers) { + assertStorePath(path); + return retrySQLite<void>([&]() { + auto state(_state.lock()); + queryReferrers(*state, path, referrers); + }); +} + +PathSet LocalStore::queryValidDerivers(const Path& path) { + assertStorePath(path); + + return retrySQLite<PathSet>([&]() { + auto state(_state.lock()); + + auto useQueryValidDerivers(state->stmtQueryValidDerivers.use()(path)); + + PathSet derivers; + while (useQueryValidDerivers.next()) { + derivers.insert(useQueryValidDerivers.getStr(1)); + } + + return derivers; + }); +} + +PathSet LocalStore::queryDerivationOutputs(const Path& path) { + return retrySQLite<PathSet>([&]() { + auto state(_state.lock()); + + auto useQueryDerivationOutputs(state->stmtQueryDerivationOutputs.use()( + queryValidPathId(*state, path))); + + PathSet outputs; + while (useQueryDerivationOutputs.next()) { + outputs.insert(useQueryDerivationOutputs.getStr(1)); + } + + return outputs; + }); +} + +StringSet LocalStore::queryDerivationOutputNames(const Path& path) { + return retrySQLite<StringSet>([&]() { + auto state(_state.lock()); + + auto useQueryDerivationOutputs(state->stmtQueryDerivationOutputs.use()( + queryValidPathId(*state, path))); + + StringSet outputNames; + while (useQueryDerivationOutputs.next()) { + outputNames.insert(useQueryDerivationOutputs.getStr(0)); + } + + return outputNames; + }); +} + +Path LocalStore::queryPathFromHashPart(const string& hashPart) { + if (hashPart.size() != storePathHashLen) { + throw Error("invalid hash part"); + } + + Path prefix = storeDir + "/" + hashPart; + + return retrySQLite<Path>([&]() -> std::string { + auto state(_state.lock()); + + auto useQueryPathFromHashPart( + state->stmtQueryPathFromHashPart.use()(prefix)); + + if (!useQueryPathFromHashPart.next()) { + return ""; + } + + const char* s = + (const char*)sqlite3_column_text(state->stmtQueryPathFromHashPart, 0); + return (s != nullptr) && + prefix.compare(0, prefix.size(), s, prefix.size()) == 0 + ? s + : ""; + }); +} + +PathSet LocalStore::querySubstitutablePaths(const PathSet& paths) { + if (!settings.useSubstitutes) { + return PathSet(); + } + + auto remaining = paths; + PathSet res; + + for (auto& sub : getDefaultSubstituters()) { + if (remaining.empty()) { + break; + } + if (sub->storeDir != storeDir) { + continue; + } + if (!sub->wantMassQuery()) { + continue; + } + + auto valid = sub->queryValidPaths(remaining); + + PathSet remaining2; + for (auto& path : remaining) { + if (valid.count(path) != 0u) { + res.insert(path); + } else { + remaining2.insert(path); + } + } + + std::swap(remaining, remaining2); + } + + return res; +} + +void LocalStore::querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) { + if (!settings.useSubstitutes) { + return; + } + for (auto& sub : getDefaultSubstituters()) { + if (sub->storeDir != storeDir) { + continue; + } + for (auto& path : paths) { + if (infos.count(path) != 0u) { + continue; + } + DLOG(INFO) << "checking substituter '" << sub->getUri() << "' for path '" + << path << "'"; + try { + auto info = sub->queryPathInfo(path); + auto narInfo = std::dynamic_pointer_cast<const NarInfo>( + std::shared_ptr<const ValidPathInfo>(info)); + infos[path] = SubstitutablePathInfo{info->deriver, info->references, + narInfo ? narInfo->fileSize : 0, + info->narSize}; + } catch (InvalidPath&) { + } catch (SubstituterDisabled&) { + } catch (Error& e) { + if (settings.tryFallback) { + LOG(ERROR) << e.what(); + } else { + throw; + } + } + } + } +} + +void LocalStore::registerValidPath(const ValidPathInfo& info) { + ValidPathInfos infos; + infos.push_back(info); + registerValidPaths(infos); +} + +void LocalStore::registerValidPaths(const ValidPathInfos& infos) { + /* SQLite will fsync by default, but the new valid paths may not + be fsync-ed. So some may want to fsync them before registering + the validity, at the expense of some speed of the path + registering operation. */ + if (settings.syncBeforeRegistering) { + sync(); + } + + return retrySQLite<void>([&]() { + auto state(_state.lock()); + + SQLiteTxn txn(state->db); + PathSet paths; + + for (auto& i : infos) { + assert(i.narHash.type == htSHA256); + if (isValidPath_(*state, i.path)) { + updatePathInfo(*state, i); + } else { + addValidPath(*state, i, false); + } + paths.insert(i.path); + } + + for (auto& i : infos) { + auto referrer = queryValidPathId(*state, i.path); + for (auto& j : i.references) { + state->stmtAddReference.use()(referrer)(queryValidPathId(*state, j)) + .exec(); + } + } + + /* Check that the derivation outputs are correct. We can't do + this in addValidPath() above, because the references might + not be valid yet. */ + for (auto& i : infos) { + if (isDerivation(i.path)) { + // FIXME: inefficient; we already loaded the + // derivation in addValidPath(). + Derivation drv = + readDerivation(realStoreDir + "/" + baseNameOf(i.path)); + checkDerivationOutputs(i.path, drv); + } + } + + /* Do a topological sort of the paths. This will throw an + error if a cycle is detected and roll back the + transaction. Cycles can only occur when a derivation + has multiple outputs. */ + topoSortPaths(paths); + + txn.commit(); + }); +} + +/* Invalidate a path. The caller is responsible for checking that + there are no referrers. */ +void LocalStore::invalidatePath(State& state, const Path& path) { + LOG(INFO) << "invalidating path '" << path << "'"; + + state.stmtInvalidatePath.use()(path).exec(); + + /* Note that the foreign key constraints on the Refs table take + care of deleting the references entries for `path'. */ + + { + auto state_(Store::state.lock()); + state_->pathInfoCache.erase(storePathToHash(path)); + } +} + +const PublicKeys& LocalStore::getPublicKeys() { + auto state(_state.lock()); + if (!state->publicKeys) { + state->publicKeys = std::make_unique<PublicKeys>(getDefaultPublicKeys()); + } + return *state->publicKeys; +} + +void LocalStore::addToStore(const ValidPathInfo& info, Source& source, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + if (!info.narHash) { + throw Error("cannot add path '%s' because it lacks a hash", info.path); + } + + if (requireSigs && (checkSigs != 0u) && + (info.checkSignatures(*this, getPublicKeys()) == 0u)) { + throw Error("cannot add path '%s' because it lacks a valid signature", + info.path); + } + + addTempRoot(info.path); + + if ((repair != 0u) || !isValidPath(info.path)) { + PathLocks outputLock; + + Path realPath = realStoreDir + "/" + baseNameOf(info.path); + + /* Lock the output path. But don't lock if we're being called + from a build hook (whose parent process already acquired a + lock on this path). */ + if (locksHeld.count(info.path) == 0u) { + outputLock.lockPaths({realPath}); + } + + if ((repair != 0u) || !isValidPath(info.path)) { + deletePath(realPath); + + /* While restoring the path from the NAR, compute the hash + of the NAR. */ + HashSink hashSink(htSHA256); + + LambdaSource wrapperSource( + [&](unsigned char* data, size_t len) -> size_t { + size_t n = source.read(data, len); + hashSink(data, n); + return n; + }); + + restorePath(realPath, wrapperSource); + + auto hashResult = hashSink.finish(); + + if (hashResult.first != info.narHash) { + throw Error( + "hash mismatch importing path '%s';\n wanted: %s\n got: %s", + info.path, info.narHash.to_string(), hashResult.first.to_string()); + } + + if (hashResult.second != info.narSize) { + throw Error( + "size mismatch importing path '%s';\n wanted: %s\n got: %s", + info.path, info.narSize, hashResult.second); + } + + autoGC(); + + canonicalisePathMetaData(realPath, -1); + + optimisePath(realPath); // FIXME: combine with hashPath() + + registerValidPath(info); + } + + outputLock.setDeletion(true); + } +} + +Path LocalStore::addToStoreFromDump(const string& dump, const string& name, + bool recursive, HashType hashAlgo, + RepairFlag repair) { + Hash h = hashString(hashAlgo, dump); + + Path dstPath = makeFixedOutputPath(recursive, h, name); + + addTempRoot(dstPath); + + if ((repair != 0u) || !isValidPath(dstPath)) { + /* The first check above is an optimisation to prevent + unnecessary lock acquisition. */ + + Path realPath = realStoreDir + "/" + baseNameOf(dstPath); + + PathLocks outputLock({realPath}); + + if ((repair != 0u) || !isValidPath(dstPath)) { + deletePath(realPath); + + autoGC(); + + if (recursive) { + StringSource source(dump); + restorePath(realPath, source); + } else { + writeFile(realPath, dump); + } + + canonicalisePathMetaData(realPath, -1); + + /* Register the SHA-256 hash of the NAR serialisation of + the path in the database. We may just have computed it + above (if called with recursive == true and hashAlgo == + sha256); otherwise, compute it here. */ + HashResult hash; + if (recursive) { + hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump); + hash.second = dump.size(); + } else { + hash = hashPath(htSHA256, realPath); + } + + optimisePath(realPath); // FIXME: combine with hashPath() + + ValidPathInfo info; + info.path = dstPath; + info.narHash = hash.first; + info.narSize = hash.second; + info.ca = makeFixedOutputCA(recursive, h); + registerValidPath(info); + } + + outputLock.setDeletion(true); + } + + return dstPath; +} + +Path LocalStore::addToStore(const string& name, const Path& _srcPath, + bool recursive, HashType hashAlgo, + PathFilter& filter, RepairFlag repair) { + Path srcPath(absPath(_srcPath)); + + /* Read the whole path into memory. This is not a very scalable + method for very large paths, but `copyPath' is mainly used for + small files. */ + StringSink sink; + if (recursive) { + dumpPath(srcPath, sink, filter); + } else { + sink.s = make_ref<std::string>(readFile(srcPath)); + } + + return addToStoreFromDump(*sink.s, name, recursive, hashAlgo, repair); +} + +Path LocalStore::addTextToStore(const string& name, const string& s, + const PathSet& references, RepairFlag repair) { + auto hash = hashString(htSHA256, s); + auto dstPath = makeTextPath(name, hash, references); + + addTempRoot(dstPath); + + if ((repair != 0u) || !isValidPath(dstPath)) { + Path realPath = realStoreDir + "/" + baseNameOf(dstPath); + + PathLocks outputLock({realPath}); + + if ((repair != 0u) || !isValidPath(dstPath)) { + deletePath(realPath); + + autoGC(); + + writeFile(realPath, s); + + canonicalisePathMetaData(realPath, -1); + + StringSink sink; + dumpString(s, sink); + auto narHash = hashString(htSHA256, *sink.s); + + optimisePath(realPath); + + ValidPathInfo info; + info.path = dstPath; + info.narHash = narHash; + info.narSize = sink.s->size(); + info.references = references; + info.ca = "text:" + hash.to_string(); + registerValidPath(info); + } + + outputLock.setDeletion(true); + } + + return dstPath; +} + +/* Create a temporary directory in the store that won't be + garbage-collected. */ +Path LocalStore::createTempDirInStore() { + Path tmpDir; + do { + /* There is a slight possibility that `tmpDir' gets deleted by + the GC between createTempDir() and addTempRoot(), so repeat + until `tmpDir' exists. */ + tmpDir = createTempDir(realStoreDir); + addTempRoot(tmpDir); + } while (!pathExists(tmpDir)); + return tmpDir; +} + +void LocalStore::invalidatePathChecked(const Path& path) { + assertStorePath(path); + + retrySQLite<void>([&]() { + auto state(_state.lock()); + + SQLiteTxn txn(state->db); + + if (isValidPath_(*state, path)) { + PathSet referrers; + queryReferrers(*state, path, referrers); + referrers.erase(path); /* ignore self-references */ + if (!referrers.empty()) { + throw PathInUse( + format("cannot delete path '%1%' because it is in use by %2%") % + path % showPaths(referrers)); + } + invalidatePath(*state, path); + } + + txn.commit(); + }); +} + +bool LocalStore::verifyStore(bool checkContents, RepairFlag repair) { + LOG(INFO) << "reading the Nix store..."; + + bool errors = false; + + /* Acquire the global GC lock to get a consistent snapshot of + existing and valid paths. */ + AutoCloseFD fdGCLock = openGCLock(ltWrite); + + PathSet store; + for (auto& i : readDirectory(realStoreDir)) { + store.insert(i.name); + } + + /* Check whether all valid paths actually exist. */ + LOG(INFO) << "checking path existence..."; + + PathSet validPaths2 = queryAllValidPaths(); + PathSet validPaths; + PathSet done; + + fdGCLock = -1; + + for (auto& i : validPaths2) { + verifyPath(i, store, done, validPaths, repair, errors); + } + + /* Optionally, check the content hashes (slow). */ + if (checkContents) { + LOG(INFO) << "checking hashes..."; + + Hash nullHash(htSHA256); + + for (auto& i : validPaths) { + try { + auto info = std::const_pointer_cast<ValidPathInfo>( + std::shared_ptr<const ValidPathInfo>(queryPathInfo(i))); + + /* Check the content hash (optionally - slow). */ + DLOG(INFO) << "checking contents of '" << i << "'"; + HashResult current = hashPath(info->narHash.type, toRealPath(i)); + + if (info->narHash != nullHash && info->narHash != current.first) { + LOG(ERROR) << "path '" << i << "' was modified! expected hash '" + << info->narHash.to_string() << "', got '" + << current.first.to_string() << "'"; + if (repair != 0u) { + repairPath(i); + } else { + errors = true; + } + } else { + bool update = false; + + /* Fill in missing hashes. */ + if (info->narHash == nullHash) { + LOG(WARNING) << "fixing missing hash on '" << i << "'"; + info->narHash = current.first; + update = true; + } + + /* Fill in missing narSize fields (from old stores). */ + if (info->narSize == 0) { + LOG(ERROR) << "updating size field on '" << i << "' to " + << current.second; + info->narSize = current.second; + update = true; + } + + if (update) { + auto state(_state.lock()); + updatePathInfo(*state, *info); + } + } + + } catch (Error& e) { + /* It's possible that the path got GC'ed, so ignore + errors on invalid paths. */ + if (isValidPath(i)) { + LOG(ERROR) << e.msg(); + } else { + LOG(WARNING) << e.msg(); + } + errors = true; + } + } + } + + return errors; +} + +void LocalStore::verifyPath(const Path& path, const PathSet& store, + PathSet& done, PathSet& validPaths, + RepairFlag repair, bool& errors) { + checkInterrupt(); + + if (done.find(path) != done.end()) { + return; + } + done.insert(path); + + if (!isStorePath(path)) { + LOG(ERROR) << "path '" << path << "' is not in the Nix store"; + auto state(_state.lock()); + invalidatePath(*state, path); + return; + } + + if (store.find(baseNameOf(path)) == store.end()) { + /* Check any referrers first. If we can invalidate them + first, then we can invalidate this path as well. */ + bool canInvalidate = true; + PathSet referrers; + queryReferrers(path, referrers); + for (auto& i : referrers) { + if (i != path) { + verifyPath(i, store, done, validPaths, repair, errors); + if (validPaths.find(i) != validPaths.end()) { + canInvalidate = false; + } + } + } + + if (canInvalidate) { + LOG(WARNING) << "path '" << path + << "' disappeared, removing from database..."; + auto state(_state.lock()); + invalidatePath(*state, path); + } else { + LOG(ERROR) << "path '" << path + << "' disappeared, but it still has valid referrers!"; + if (repair != 0u) { + try { + repairPath(path); + } catch (Error& e) { + LOG(WARNING) << e.msg(); + errors = true; + } + } else { + errors = true; + } + } + + return; + } + + validPaths.insert(path); +} + +unsigned int LocalStore::getProtocol() { return PROTOCOL_VERSION; } + +#if defined(FS_IOC_SETFLAGS) && defined(FS_IOC_GETFLAGS) && \ + defined(FS_IMMUTABLE_FL) + +static void makeMutable(const Path& path) { + checkInterrupt(); + + struct stat st = lstat(path); + + if (!S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode)) { + return; + } + + if (S_ISDIR(st.st_mode)) { + for (auto& i : readDirectory(path)) { + makeMutable(path + "/" + i.name); + } + } + + /* The O_NOFOLLOW is important to prevent us from changing the + mutable bit on the target of a symlink (which would be a + security hole). */ + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_NOFOLLOW | O_CLOEXEC); + if (fd == -1) { + if (errno == ELOOP) { + return; + } // it's a symlink + throw SysError(format("opening file '%1%'") % path); + } + + unsigned int flags = 0, old; + + /* Silently ignore errors getting/setting the immutable flag so + that we work correctly on filesystems that don't support it. */ + if (ioctl(fd, FS_IOC_GETFLAGS, &flags)) { + return; + } + old = flags; + flags &= ~FS_IMMUTABLE_FL; + if (old == flags) { + return; + } + if (ioctl(fd, FS_IOC_SETFLAGS, &flags)) { + return; + } +} + +/* Upgrade from schema 6 (Nix 0.15) to schema 7 (Nix >= 1.3). */ +void LocalStore::upgradeStore7() { + if (getuid() != 0) { + return; + } + printError( + "removing immutable bits from the Nix store (this may take a while)..."); + makeMutable(realStoreDir); +} + +#else + +void LocalStore::upgradeStore7() {} + +#endif + +void LocalStore::vacuumDB() { + auto state(_state.lock()); + state->db.exec("vacuum"); +} + +void LocalStore::addSignatures(const Path& storePath, const StringSet& sigs) { + retrySQLite<void>([&]() { + auto state(_state.lock()); + + SQLiteTxn txn(state->db); + + auto info = std::const_pointer_cast<ValidPathInfo>( + std::shared_ptr<const ValidPathInfo>(queryPathInfo(storePath))); + + info->sigs.insert(sigs.begin(), sigs.end()); + + updatePathInfo(*state, *info); + + txn.commit(); + }); +} + +void LocalStore::signPathInfo(ValidPathInfo& info) { + // FIXME: keep secret keys in memory. + + auto secretKeyFiles = settings.secretKeyFiles; + + for (auto& secretKeyFile : secretKeyFiles.get()) { + SecretKey secretKey(readFile(secretKeyFile)); + info.sign(secretKey); + } +} + +void LocalStore::createUser(const std::string& userName, uid_t userId) { + for (auto& dir : {fmt("%s/profiles/per-user/%s", stateDir, userName), + fmt("%s/gcroots/per-user/%s", stateDir, userName)}) { + createDirs(dir); + if (chmod(dir.c_str(), 0755) == -1) { + throw SysError("changing permissions of directory '%s'", dir); + } + if (chown(dir.c_str(), userId, getgid()) == -1) { + throw SysError("changing owner of directory '%s'", dir); + } + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/local-store.hh b/third_party/nix/src/libstore/local-store.hh new file mode 100644 index 000000000000..1a58df81613b --- /dev/null +++ b/third_party/nix/src/libstore/local-store.hh @@ -0,0 +1,311 @@ +#pragma once + +#include <chrono> +#include <future> +#include <string> +#include <unordered_set> + +#include "pathlocks.hh" +#include "sqlite.hh" +#include "store-api.hh" +#include "sync.hh" +#include "util.hh" + +namespace nix { + +/* Nix store and database schema version. Version 1 (or 0) was Nix <= + 0.7. Version 2 was Nix 0.8 and 0.9. Version 3 is Nix 0.10. + Version 4 is Nix 0.11. Version 5 is Nix 0.12-0.16. Version 6 is + Nix 1.0. Version 7 is Nix 1.3. Version 10 is 2.0. */ +const int nixSchemaVersion = 10; + +struct Derivation; + +struct OptimiseStats { + unsigned long filesLinked = 0; + unsigned long long bytesFreed = 0; + unsigned long long blocksFreed = 0; +}; + +class LocalStore : public LocalFSStore { + private: + /* Lock file used for upgrading. */ + AutoCloseFD globalLock; + + struct State { + /* The SQLite database object. */ + SQLite db; + + /* Some precompiled SQLite statements. */ + SQLiteStmt stmtRegisterValidPath; + SQLiteStmt stmtUpdatePathInfo; + SQLiteStmt stmtAddReference; + SQLiteStmt stmtQueryPathInfo; + SQLiteStmt stmtQueryReferences; + SQLiteStmt stmtQueryReferrers; + SQLiteStmt stmtInvalidatePath; + SQLiteStmt stmtAddDerivationOutput; + SQLiteStmt stmtQueryValidDerivers; + SQLiteStmt stmtQueryDerivationOutputs; + SQLiteStmt stmtQueryPathFromHashPart; + SQLiteStmt stmtQueryValidPaths; + + /* The file to which we write our temporary roots. */ + AutoCloseFD fdTempRoots; + + /* The last time we checked whether to do an auto-GC, or an + auto-GC finished. */ + std::chrono::time_point<std::chrono::steady_clock> lastGCCheck; + + /* Whether auto-GC is running. If so, get gcFuture to wait for + the GC to finish. */ + bool gcRunning = false; + std::shared_future<void> gcFuture; + + /* How much disk space was available after the previous + auto-GC. If the current available disk space is below + minFree but not much below availAfterGC, then there is no + point in starting a new GC. */ + uint64_t availAfterGC = std::numeric_limits<uint64_t>::max(); + + std::unique_ptr<PublicKeys> publicKeys; + }; + + Sync<State, std::recursive_mutex> _state; + + public: + PathSetting realStoreDir_; + + const Path realStoreDir; + const Path dbDir; + const Path linksDir; + const Path reservedPath; + const Path schemaPath; + const Path trashDir; + const Path tempRootsDir; + const Path fnTempRoots; + + private: + Setting<bool> requireSigs{ + (Store*)this, settings.requireSigs, "require-sigs", + "whether store paths should have a trusted signature on import"}; + + const PublicKeys& getPublicKeys(); + + public: + // Hack for build-remote.cc. + PathSet locksHeld = tokenizeString<PathSet>(getEnv("NIX_HELD_LOCKS")); + + /* Initialise the local store, upgrading the schema if + necessary. */ + LocalStore(const Params& params); + + ~LocalStore(); + + /* Implementations of abstract store API methods. */ + + std::string getUri() override; + + bool isValidPathUncached(const Path& path) override; + + PathSet queryValidPaths(const PathSet& paths, SubstituteFlag maybeSubstitute = + NoSubstitute) override; + + PathSet queryAllValidPaths() override; + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override; + + void queryReferrers(const Path& path, PathSet& referrers) override; + + PathSet queryValidDerivers(const Path& path) override; + + PathSet queryDerivationOutputs(const Path& path) override; + + StringSet queryDerivationOutputNames(const Path& path) override; + + Path queryPathFromHashPart(const string& hashPart) override; + + PathSet querySubstitutablePaths(const PathSet& paths) override; + + void querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) override; + + void addToStore(const ValidPathInfo& info, Source& source, RepairFlag repair, + CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override; + + Path addToStore(const string& name, const Path& srcPath, bool recursive, + HashType hashAlgo, PathFilter& filter, + RepairFlag repair) override; + + /* Like addToStore(), but the contents of the path are contained + in `dump', which is either a NAR serialisation (if recursive == + true) or simply the contents of a regular file (if recursive == + false). */ + Path addToStoreFromDump(const string& dump, const string& name, + bool recursive = true, HashType hashAlgo = htSHA256, + RepairFlag repair = NoRepair); + + Path addTextToStore(const string& name, const string& s, + const PathSet& references, RepairFlag repair) override; + + void buildPaths(const PathSet& paths, BuildMode buildMode) override; + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override; + + void ensurePath(const Path& path) override; + + void addTempRoot(const Path& path) override; + + void addIndirectRoot(const Path& path) override; + + void syncWithGC() override; + + private: + typedef std::shared_ptr<AutoCloseFD> FDPtr; + typedef list<FDPtr> FDs; + + void findTempRoots(FDs& fds, Roots& roots, bool censor); + + public: + Roots findRoots(bool censor) override; + + void collectGarbage(const GCOptions& options, GCResults& results) override; + + /* Optimise the disk space usage of the Nix store by hard-linking + files with the same contents. */ + void optimiseStore(OptimiseStats& stats); + + void optimiseStore() override; + + /* Optimise a single store path. */ + void optimisePath(const Path& path); + + bool verifyStore(bool checkContents, RepairFlag repair) override; + + /* Register the validity of a path, i.e., that `path' exists, that + the paths referenced by it exists, and in the case of an output + path of a derivation, that it has been produced by a successful + execution of the derivation (or something equivalent). Also + register the hash of the file system contents of the path. The + hash must be a SHA-256 hash. */ + void registerValidPath(const ValidPathInfo& info); + + void registerValidPaths(const ValidPathInfos& infos); + + unsigned int getProtocol() override; + + void vacuumDB(); + + /* Repair the contents of the given path by redownloading it using + a substituter (if available). */ + void repairPath(const Path& path); + + void addSignatures(const Path& storePath, const StringSet& sigs) override; + + /* If free disk space in /nix/store if below minFree, delete + garbage until it exceeds maxFree. */ + void autoGC(bool sync = true); + + private: + int getSchema(); + + void openDB(State& state, bool create); + + void makeStoreWritable(); + + static uint64_t queryValidPathId(State& state, const Path& path); + + uint64_t addValidPath(State& state, const ValidPathInfo& info, + bool checkOutputs = true); + + void invalidatePath(State& state, const Path& path); + + /* Delete a path from the Nix store. */ + void invalidatePathChecked(const Path& path); + + void verifyPath(const Path& path, const PathSet& store, PathSet& done, + PathSet& validPaths, RepairFlag repair, bool& errors); + + static void updatePathInfo(State& state, const ValidPathInfo& info); + + void upgradeStore6(); + void upgradeStore7(); + PathSet queryValidPathsOld(); + ValidPathInfo queryPathInfoOld(const Path& path); + + struct GCState; + + static void deleteGarbage(GCState& state, const Path& path); + + void tryToDelete(GCState& state, const Path& path); + + bool canReachRoot(GCState& state, PathSet& visited, const Path& path); + + void deletePathRecursive(GCState& state, const Path& path); + + static bool isActiveTempFile(const GCState& state, const Path& path, + const string& suffix); + + AutoCloseFD openGCLock(LockType lockType); + + void findRoots(const Path& path, unsigned char type, Roots& roots); + + void findRootsNoTemp(Roots& roots, bool censor); + + void findRuntimeRoots(Roots& roots, bool censor); + + void removeUnusedLinks(const GCState& state); + + Path createTempDirInStore(); + + void checkDerivationOutputs(const Path& drvPath, const Derivation& drv); + + typedef std::unordered_set<ino_t> InodeHash; + + InodeHash loadInodeHash(); + static Strings readDirectoryIgnoringInodes(const Path& path, + const InodeHash& inodeHash); + void optimisePath_(OptimiseStats& stats, const Path& path, + InodeHash& inodeHash); + + // Internal versions that are not wrapped in retry_sqlite. + static bool isValidPath_(State& state, const Path& path); + static void queryReferrers(State& state, const Path& path, + PathSet& referrers); + + /* Add signatures to a ValidPathInfo using the secret keys + specified by the โsecret-key-filesโ option. */ + static void signPathInfo(ValidPathInfo& info); + + Path getRealStoreDir() override { return realStoreDir; } + + void createUser(const std::string& userName, uid_t userId) override; + + friend class DerivationGoal; + friend class SubstitutionGoal; +}; + +typedef std::pair<dev_t, ino_t> Inode; +typedef set<Inode> InodesSeen; + +/* "Fix", or canonicalise, the meta-data of the files in a store path + after it has been built. In particular: + - the last modification date on each file is set to 1 (i.e., + 00:00:01 1/1/1970 UTC) + - the permissions are set of 444 or 555 (i.e., read-only with or + without execute permission; setuid bits etc. are cleared) + - the owner and group are set to the Nix user and group, if we're + running as root. */ +void canonicalisePathMetaData(const Path& path, uid_t fromUid, + InodesSeen& inodesSeen); +void canonicalisePathMetaData(const Path& path, uid_t fromUid); + +void canonicaliseTimestampAndPermissions(const Path& path); + +MakeError(PathInUse, Error); + +} // namespace nix diff --git a/third_party/nix/src/libstore/machines.cc b/third_party/nix/src/libstore/machines.cc new file mode 100644 index 000000000000..5b0c7e72ddba --- /dev/null +++ b/third_party/nix/src/libstore/machines.cc @@ -0,0 +1,103 @@ +#include "machines.hh" + +#include <algorithm> + +#include <glog/logging.h> + +#include "globals.hh" +#include "util.hh" + +namespace nix { + +Machine::Machine(decltype(storeUri)& storeUri, + decltype(systemTypes)& systemTypes, decltype(sshKey)& sshKey, + decltype(maxJobs) maxJobs, decltype(speedFactor) speedFactor, + decltype(supportedFeatures)& supportedFeatures, + decltype(mandatoryFeatures)& mandatoryFeatures, + decltype(sshPublicHostKey)& sshPublicHostKey) + : storeUri( + // Backwards compatibility: if the URI is a hostname, + // prepend ssh://. + storeUri.find("://") != std::string::npos || + hasPrefix(storeUri, "local") || + hasPrefix(storeUri, "remote") || + hasPrefix(storeUri, "auto") || hasPrefix(storeUri, "/") + ? storeUri + : "ssh://" + storeUri), + systemTypes(systemTypes), + sshKey(sshKey), + maxJobs(maxJobs), + speedFactor(std::max(1U, speedFactor)), + supportedFeatures(supportedFeatures), + mandatoryFeatures(mandatoryFeatures), + sshPublicHostKey(sshPublicHostKey) {} + +bool Machine::allSupported(const std::set<string>& features) const { + return std::all_of(features.begin(), features.end(), + [&](const string& feature) { + return (supportedFeatures.count(feature) != 0u) || + (mandatoryFeatures.count(feature) != 0u); + }); +} + +bool Machine::mandatoryMet(const std::set<string>& features) const { + return std::all_of( + mandatoryFeatures.begin(), mandatoryFeatures.end(), + [&](const string& feature) { return features.count(feature); }); +} + +void parseMachines(const std::string& s, Machines& machines) { + for (auto line : tokenizeString<std::vector<string>>(s, "\n;")) { + trim(line); + line.erase(std::find(line.begin(), line.end(), '#'), line.end()); + if (line.empty()) { + continue; + } + + if (line[0] == '@') { + auto file = trim(std::string(line, 1)); + try { + parseMachines(readFile(file), machines); + } catch (const SysError& e) { + if (e.errNo != ENOENT) { + throw; + } + DLOG(INFO) << "cannot find machines file: " << file; + } + continue; + } + + auto tokens = tokenizeString<std::vector<string>>(line); + auto sz = tokens.size(); + if (sz < 1) { + throw FormatError("bad machine specification '%s'", line); + } + + auto isSet = [&](size_t n) { + return tokens.size() > n && !tokens[n].empty() && tokens[n] != "-"; + }; + + machines.emplace_back( + tokens[0], + isSet(1) ? tokenizeString<std::vector<string>>(tokens[1], ",") + : std::vector<string>{settings.thisSystem}, + isSet(2) ? tokens[2] : "", isSet(3) ? std::stoull(tokens[3]) : 1LL, + isSet(4) ? std::stoull(tokens[4]) : 1LL, + isSet(5) ? tokenizeString<std::set<string>>(tokens[5], ",") + : std::set<string>{}, + isSet(6) ? tokenizeString<std::set<string>>(tokens[6], ",") + : std::set<string>{}, + isSet(7) ? tokens[7] : ""); + } +} + +Machines getMachines() { + static auto machines = [&]() { + Machines machines; + parseMachines(settings.builders, machines); + return machines; + }(); + return machines; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/machines.hh b/third_party/nix/src/libstore/machines.hh new file mode 100644 index 000000000000..aef8054c250e --- /dev/null +++ b/third_party/nix/src/libstore/machines.hh @@ -0,0 +1,36 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +struct Machine { + const string storeUri; + const std::vector<string> systemTypes; + const string sshKey; + const unsigned int maxJobs; + const unsigned int speedFactor; + const std::set<string> supportedFeatures; + const std::set<string> mandatoryFeatures; + const std::string sshPublicHostKey; + bool enabled = true; + + bool allSupported(const std::set<string>& features) const; + + bool mandatoryMet(const std::set<string>& features) const; + + Machine(decltype(storeUri)& storeUri, decltype(systemTypes)& systemTypes, + decltype(sshKey)& sshKey, decltype(maxJobs) maxJobs, + decltype(speedFactor) speedFactor, + decltype(supportedFeatures)& supportedFeatures, + decltype(mandatoryFeatures)& mandatoryFeatures, + decltype(sshPublicHostKey)& sshPublicHostKey); +}; + +typedef std::vector<Machine> Machines; + +void parseMachines(const std::string& s, Machines& machines); + +Machines getMachines(); + +} // namespace nix diff --git a/third_party/nix/src/libstore/meson.build b/third_party/nix/src/libstore/meson.build new file mode 100644 index 000000000000..47f6ddf25970 --- /dev/null +++ b/third_party/nix/src/libstore/meson.build @@ -0,0 +1,153 @@ +# Nix lib store build file +#============================================================================ + +src_inc += include_directories('.') + +libstore_src = files( + join_paths(meson.source_root(), 'src/libstore/binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/build.cc'), + join_paths(meson.source_root(), 'src/libstore/crypto.cc'), + join_paths(meson.source_root(), 'src/libstore/derivations.cc'), + join_paths(meson.source_root(), 'src/libstore/download.cc'), + join_paths(meson.source_root(), 'src/libstore/export-import.cc'), + join_paths(meson.source_root(), 'src/libstore/gc.cc'), + join_paths(meson.source_root(), 'src/libstore/globals.cc'), + join_paths(meson.source_root(), 'src/libstore/http-binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/legacy-ssh-store.cc'), + join_paths(meson.source_root(), 'src/libstore/local-binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/local-fs-store.cc'), + join_paths(meson.source_root(), 'src/libstore/local-store.cc'), + join_paths(meson.source_root(), 'src/libstore/machines.cc'), + join_paths(meson.source_root(), 'src/libstore/misc.cc'), + join_paths(meson.source_root(), 'src/libstore/nar-accessor.cc'), + join_paths(meson.source_root(), 'src/libstore/nar-info.cc'), + join_paths(meson.source_root(), 'src/libstore/nar-info-disk-cache.cc'), + join_paths(meson.source_root(), 'src/libstore/optimise-store.cc'), + join_paths(meson.source_root(), 'src/libstore/parsed-derivations.cc'), + join_paths(meson.source_root(), 'src/libstore/pathlocks.cc'), + join_paths(meson.source_root(), 'src/libstore/profiles.cc'), + join_paths(meson.source_root(), 'src/libstore/references.cc'), + join_paths(meson.source_root(), 'src/libstore/remote-fs-accessor.cc'), + join_paths(meson.source_root(), 'src/libstore/remote-store.cc'), + join_paths(meson.source_root(), 'src/libstore/s3-binary-cache-store.cc'), + join_paths(meson.source_root(), 'src/libstore/sqlite.cc'), + join_paths(meson.source_root(), 'src/libstore/ssh.cc'), + join_paths(meson.source_root(), 'src/libstore/ssh-store.cc'), + join_paths(meson.source_root(), 'src/libstore/store-api.cc'), + join_paths(meson.source_root(), 'src/libstore/builtins/buildenv.cc'), + join_paths(meson.source_root(), 'src/libstore/builtins/fetchurl.cc')) + +libstore_headers = files( + join_paths(meson.source_root(), 'src/libstore/binary-cache-store.hh'), + join_paths(meson.source_root(), 'src/libstore/builtins.hh'), + join_paths(meson.source_root(), 'src/libstore/crypto.hh'), + join_paths(meson.source_root(), 'src/libstore/derivations.hh'), + join_paths(meson.source_root(), 'src/libstore/download.hh'), + join_paths(meson.source_root(), 'src/libstore/fs-accessor.hh'), + join_paths(meson.source_root(), 'src/libstore/globals.hh'), + join_paths(meson.source_root(), 'src/libstore/local-store.hh'), + join_paths(meson.source_root(), 'src/libstore/machines.hh'), + join_paths(meson.source_root(), 'src/libstore/nar-accessor.hh'), + join_paths(meson.source_root(), 'src/libstore/nar-info-disk-cache.hh'), + join_paths(meson.source_root(), 'src/libstore/nar-info.hh'), + join_paths(meson.source_root(), 'src/libstore/parsed-derivations.hh'), + join_paths(meson.source_root(), 'src/libstore/pathlocks.hh'), + join_paths(meson.source_root(), 'src/libstore/profiles.hh'), + join_paths(meson.source_root(), 'src/libstore/references.hh'), + join_paths(meson.source_root(), 'src/libstore/remote-fs-accessor.hh'), + join_paths(meson.source_root(), 'src/libstore/remote-store.hh'), + join_paths(meson.source_root(), 'src/libstore/s3-binary-cache-store.hh'), + join_paths(meson.source_root(), 'src/libstore/s3.hh'), + join_paths(meson.source_root(), 'src/libstore/serve-protocol.hh'), + join_paths(meson.source_root(), 'src/libstore/sqlite.hh'), + join_paths(meson.source_root(), 'src/libstore/ssh.hh'), + join_paths(meson.source_root(), 'src/libstore/store-api.hh'), + join_paths(meson.source_root(), 'src/libstore/worker-protocol.hh')) + +libstore_data = files( + join_paths(meson.source_root(), 'src/libstore/sandbox-defaults.sb'), + join_paths(meson.source_root(), 'src/libstore/sandbox-minimal.sb'), + join_paths(meson.source_root(), 'src/libstore/sandbox-network.sb')) + +# dependancies +#============================================================================ + +libstore_dep_list = [ + glog_dep, + libbz2_dep, + libcurl_dep, + libdl_dep, + pthread_dep, + sqlite3_dep, + libsodium_dep +] + +if sys_name.contains('linux') + libstore_dep_list += libseccomp_dep +endif + +if sys_name.contains('freebsd') + libstore_dep_list += libdl_dep +endif + +# Link args +#============================================================================ + +libstore_link_list = [ + libutil_lib] + +libstore_link_args = [] + +# compiler args +#============================================================================ + +libstore_cxx_args = [ + '-DNIX_PREFIX="@0@" '.format(prefix), + '-DNIX_STORE_DIR="@0@" '.format(nixstoredir), + '-DNIX_DATA_DIR="@0@" '.format(datadir), + '-DNIX_STATE_DIR="@0@" '.format(join_paths(localstatedir, 'nix')), + '-DNIX_LOG_DIR="@0@" '.format(join_paths(localstatedir, 'log/nix')), + '-DNIX_CONF_DIR="@0@" '.format(join_paths(sysconfdir, 'nix')), + '-DNIX_LIBEXEC_DIR="@0@" '.format(libexecdir), + '-DNIX_BIN_DIR="@0@" '.format(bindir), + '-DNIX_MAN_DIR="@0@" '.format(mandir), + '-DSANDBOX_SHELL="@0@" '.format(get_option('sandbox_shell')), + '-DLSOF="@0@" '.format(lsof)] + +# targets +#============================================================================ + +gen_header = ''' + echo 'R"foo(' >> "$1" + cat @INPUT@ >> "$1" + echo ')foo"' >> "$1" +''' + +libstore_src += custom_target( + 'schema.sql.gen.hh', + output : 'schema.sql.gen.hh', + input : 'schema.sql', + command : [bash, '-c', gen_header, 'sh', '@OUTPUT@']) + +# build +#============================================================================ + +libstore_lib = library( + 'nixstore', + install : true, + install_mode : 'rwxr-xr-x', + install_dir : libdir, + include_directories : src_inc, + link_with : libstore_link_list, + sources : libstore_src, + cpp_args : libstore_cxx_args, + link_args : libstore_link_args, + dependencies : libstore_dep_list) + +install_headers( + libstore_headers, + install_dir : join_paths(includedir, 'nix')) + +install_data( + libstore_data, + install_dir : join_paths(datadir, 'nix/sandbox')) diff --git a/third_party/nix/src/libstore/misc.cc b/third_party/nix/src/libstore/misc.cc new file mode 100644 index 000000000000..f95c4bec8b0c --- /dev/null +++ b/third_party/nix/src/libstore/misc.cc @@ -0,0 +1,329 @@ +#include <glog/logging.h> + +#include "derivations.hh" +#include "globals.hh" +#include "local-store.hh" +#include "parsed-derivations.hh" +#include "store-api.hh" +#include "thread-pool.hh" + +namespace nix { + +void Store::computeFSClosure(const PathSet& startPaths, PathSet& paths_, + bool flipDirection, bool includeOutputs, + bool includeDerivers) { + struct State { + size_t pending; + PathSet& paths; + std::exception_ptr exc; + }; + + Sync<State> state_(State{0, paths_, nullptr}); + + std::function<void(const Path&)> enqueue; + + std::condition_variable done; + + enqueue = [&](const Path& path) -> void { + { + auto state(state_.lock()); + if (state->exc) { + return; + } + if (state->paths.count(path) != 0u) { + return; + } + state->paths.insert(path); + state->pending++; + } + + queryPathInfo( + path, {[&, path](std::future<ref<ValidPathInfo>> fut) { + // FIXME: calls to isValidPath() should be async + + try { + auto info = fut.get(); + + if (flipDirection) { + PathSet referrers; + queryReferrers(path, referrers); + for (auto& ref : referrers) { + if (ref != path) { + enqueue(ref); + } + } + + if (includeOutputs) { + for (auto& i : queryValidDerivers(path)) { + enqueue(i); + } + } + + if (includeDerivers && isDerivation(path)) { + for (auto& i : queryDerivationOutputs(path)) { + if (isValidPath(i) && queryPathInfo(i)->deriver == path) { + enqueue(i); + } + } + } + + } else { + for (auto& ref : info->references) { + if (ref != path) { + enqueue(ref); + } + } + + if (includeOutputs && isDerivation(path)) { + for (auto& i : queryDerivationOutputs(path)) { + if (isValidPath(i)) { + enqueue(i); + } + } + } + + if (includeDerivers && isValidPath(info->deriver)) { + enqueue(info->deriver); + } + } + + { + auto state(state_.lock()); + assert(state->pending); + if (--state->pending == 0u) { + done.notify_one(); + } + } + + } catch (...) { + auto state(state_.lock()); + if (!state->exc) { + state->exc = std::current_exception(); + } + assert(state->pending); + if (--state->pending == 0u) { + done.notify_one(); + } + }; + }}); + }; + + for (auto& startPath : startPaths) { + enqueue(startPath); + } + + { + auto state(state_.lock()); + while (state->pending != 0u) { + state.wait(done); + } + if (state->exc) { + std::rethrow_exception(state->exc); + } + } +} + +void Store::computeFSClosure(const Path& startPath, PathSet& paths_, + bool flipDirection, bool includeOutputs, + bool includeDerivers) { + computeFSClosure(PathSet{startPath}, paths_, flipDirection, includeOutputs, + includeDerivers); +} + +void Store::queryMissing(const PathSet& targets, PathSet& willBuild_, + PathSet& willSubstitute_, PathSet& unknown_, + unsigned long long& downloadSize_, + unsigned long long& narSize_) { + LOG(INFO) << "querying info about missing paths"; + + downloadSize_ = narSize_ = 0; + + ThreadPool pool; + + struct State { + PathSet done; + PathSet &unknown, &willSubstitute, &willBuild; + unsigned long long& downloadSize; + unsigned long long& narSize; + }; + + struct DrvState { + size_t left; + bool done = false; + PathSet outPaths; + explicit DrvState(size_t left) : left(left) {} + }; + + Sync<State> state_(State{PathSet(), unknown_, willSubstitute_, willBuild_, + downloadSize_, narSize_}); + + std::function<void(Path)> doPath; + + auto mustBuildDrv = [&](const Path& drvPath, const Derivation& drv) { + { + auto state(state_.lock()); + state->willBuild.insert(drvPath); + } + + for (auto& i : drv.inputDrvs) { + pool.enqueue( + std::bind(doPath, makeDrvPathWithOutputs(i.first, i.second))); + } + }; + + auto checkOutput = [&](const Path& drvPath, const ref<Derivation>& drv, + const Path& outPath, + const ref<Sync<DrvState>>& drvState_) { + if (drvState_->lock()->done) { + return; + } + + SubstitutablePathInfos infos; + querySubstitutablePathInfos({outPath}, infos); + + if (infos.empty()) { + drvState_->lock()->done = true; + mustBuildDrv(drvPath, *drv); + } else { + { + auto drvState(drvState_->lock()); + if (drvState->done) { + return; + } + assert(drvState->left); + drvState->left--; + drvState->outPaths.insert(outPath); + if (drvState->left == 0u) { + for (auto& path : drvState->outPaths) { + pool.enqueue(std::bind(doPath, path)); + } + } + } + } + }; + + doPath = [&](const Path& path) { + { + auto state(state_.lock()); + if (state->done.count(path) != 0u) { + return; + } + state->done.insert(path); + } + + DrvPathWithOutputs i2 = parseDrvPathWithOutputs(path); + + if (isDerivation(i2.first)) { + if (!isValidPath(i2.first)) { + // FIXME: we could try to substitute the derivation. + auto state(state_.lock()); + state->unknown.insert(path); + return; + } + + Derivation drv = derivationFromPath(i2.first); + ParsedDerivation parsedDrv(i2.first, drv); + + PathSet invalid; + for (auto& j : drv.outputs) { + if (wantOutput(j.first, i2.second) && !isValidPath(j.second.path)) { + invalid.insert(j.second.path); + } + } + if (invalid.empty()) { + return; + } + + if (settings.useSubstitutes && parsedDrv.substitutesAllowed()) { + auto drvState = make_ref<Sync<DrvState>>(DrvState(invalid.size())); + for (auto& output : invalid) { + pool.enqueue(std::bind(checkOutput, i2.first, + make_ref<Derivation>(drv), output, drvState)); + } + } else { + mustBuildDrv(i2.first, drv); + } + + } else { + if (isValidPath(path)) { + return; + } + + SubstitutablePathInfos infos; + querySubstitutablePathInfos({path}, infos); + + if (infos.empty()) { + auto state(state_.lock()); + state->unknown.insert(path); + return; + } + + auto info = infos.find(path); + assert(info != infos.end()); + + { + auto state(state_.lock()); + state->willSubstitute.insert(path); + state->downloadSize += info->second.downloadSize; + state->narSize += info->second.narSize; + } + + for (auto& ref : info->second.references) { + pool.enqueue(std::bind(doPath, ref)); + } + } + }; + + for (auto& path : targets) { + pool.enqueue(std::bind(doPath, path)); + } + + pool.process(); +} + +Paths Store::topoSortPaths(const PathSet& paths) { + Paths sorted; + PathSet visited; + PathSet parents; + + std::function<void(const Path& path, const Path* parent)> dfsVisit; + + dfsVisit = [&](const Path& path, const Path* parent) { + if (parents.find(path) != parents.end()) { + throw BuildError( + format("cycle detected in the references of '%1%' from '%2%'") % + path % *parent); + } + + if (visited.find(path) != visited.end()) { + return; + } + visited.insert(path); + parents.insert(path); + + PathSet references; + try { + references = queryPathInfo(path)->references; + } catch (InvalidPath&) { + } + + for (auto& i : references) { + /* Don't traverse into paths that don't exist. That can + happen due to substitutes for non-existent paths. */ + if (i != path && paths.find(i) != paths.end()) { + dfsVisit(i, &path); + } + } + + sorted.push_front(path); + parents.erase(path); + }; + + for (auto& i : paths) { + dfsVisit(i, nullptr); + } + + return sorted; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-accessor.cc b/third_party/nix/src/libstore/nar-accessor.cc new file mode 100644 index 000000000000..7f6e0377aed4 --- /dev/null +++ b/third_party/nix/src/libstore/nar-accessor.cc @@ -0,0 +1,268 @@ +#include "nar-accessor.hh" + +#include <algorithm> +#include <map> +#include <nlohmann/json.hpp> +#include <stack> +#include <utility> + +#include "archive.hh" +#include "json.hh" + +namespace nix { + +struct NarMember { + FSAccessor::Type type = FSAccessor::Type::tMissing; + + bool isExecutable = false; + + /* If this is a regular file, position of the contents of this + file in the NAR. */ + size_t start = 0, size = 0; + + std::string target; + + /* If this is a directory, all the children of the directory. */ + std::map<std::string, NarMember> children; +}; + +struct NarAccessor : public FSAccessor { + std::shared_ptr<const std::string> nar; + + GetNarBytes getNarBytes; + + NarMember root; + + struct NarIndexer : ParseSink, StringSource { + NarAccessor& acc; + + std::stack<NarMember*> parents; + + std::string currentStart; + bool isExec = false; + + NarIndexer(NarAccessor& acc, const std::string& nar) + : StringSource(nar), acc(acc) {} + + void createMember(const Path& path, NarMember member) { + size_t level = std::count(path.begin(), path.end(), '/'); + while (parents.size() > level) { + parents.pop(); + } + + if (parents.empty()) { + acc.root = std::move(member); + parents.push(&acc.root); + } else { + if (parents.top()->type != FSAccessor::Type::tDirectory) { + throw Error("NAR file missing parent directory of path '%s'", path); + } + auto result = parents.top()->children.emplace(baseNameOf(path), + std::move(member)); + parents.push(&result.first->second); + } + } + + void createDirectory(const Path& path) override { + createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0}); + } + + void createRegularFile(const Path& path) override { + createMember(path, {FSAccessor::Type::tRegular, false, 0, 0}); + } + + void isExecutable() override { parents.top()->isExecutable = true; } + + void preallocateContents(unsigned long long size) override { + currentStart = string(s, pos, 16); + assert(size <= std::numeric_limits<size_t>::max()); + parents.top()->size = (size_t)size; + parents.top()->start = pos; + } + + void receiveContents(unsigned char* data, unsigned int len) override { + // Sanity check + if (!currentStart.empty()) { + assert(len < 16 || currentStart == string((char*)data, 16)); + currentStart.clear(); + } + } + + void createSymlink(const Path& path, const string& target) override { + createMember(path, + NarMember{FSAccessor::Type::tSymlink, false, 0, 0, target}); + } + }; + + explicit NarAccessor(const ref<const std::string>& nar) : nar(nar) { + NarIndexer indexer(*this, *nar); + parseDump(indexer, indexer); + } + + NarAccessor(const std::string& listing, GetNarBytes getNarBytes) + : getNarBytes(std::move(getNarBytes)) { + using json = nlohmann::json; + + std::function<void(NarMember&, json&)> recurse; + + recurse = [&](NarMember& member, json& v) { + std::string type = v["type"]; + + if (type == "directory") { + member.type = FSAccessor::Type::tDirectory; + for (auto i = v["entries"].begin(); i != v["entries"].end(); ++i) { + const std::string& name = i.key(); + recurse(member.children[name], i.value()); + } + } else if (type == "regular") { + member.type = FSAccessor::Type::tRegular; + member.size = v["size"]; + member.isExecutable = v.value("executable", false); + member.start = v["narOffset"]; + } else if (type == "symlink") { + member.type = FSAccessor::Type::tSymlink; + member.target = v.value("target", ""); + } else { + return; + } + }; + + json v = json::parse(listing); + recurse(root, v); + } + + NarMember* find(const Path& path) { + Path canon = path.empty() ? "" : canonPath(path); + NarMember* current = &root; + auto end = path.end(); + for (auto it = path.begin(); it != end;) { + // because it != end, the remaining component is non-empty so we need + // a directory + if (current->type != FSAccessor::Type::tDirectory) { + return nullptr; + } + + // skip slash (canonPath above ensures that this is always a slash) + assert(*it == '/'); + it += 1; + + // lookup current component + auto next = std::find(it, end, '/'); + auto child = current->children.find(std::string(it, next)); + if (child == current->children.end()) { + return nullptr; + } + current = &child->second; + + it = next; + } + + return current; + } + + NarMember& get(const Path& path) { + auto result = find(path); + if (result == nullptr) { + throw Error("NAR file does not contain path '%1%'", path); + } + return *result; + } + + Stat stat(const Path& path) override { + auto i = find(path); + if (i == nullptr) { + return {FSAccessor::Type::tMissing, 0, false}; + } + return {i->type, i->size, i->isExecutable, i->start}; + } + + StringSet readDirectory(const Path& path) override { + auto i = get(path); + + if (i.type != FSAccessor::Type::tDirectory) { + throw Error(format("path '%1%' inside NAR file is not a directory") % + path); + } + + StringSet res; + for (auto& child : i.children) { + res.insert(child.first); + } + + return res; + } + + std::string readFile(const Path& path) override { + auto i = get(path); + if (i.type != FSAccessor::Type::tRegular) { + throw Error(format("path '%1%' inside NAR file is not a regular file") % + path); + } + + if (getNarBytes) { + return getNarBytes(i.start, i.size); + } + + assert(nar); + return std::string(*nar, i.start, i.size); + } + + std::string readLink(const Path& path) override { + auto i = get(path); + if (i.type != FSAccessor::Type::tSymlink) { + throw Error(format("path '%1%' inside NAR file is not a symlink") % path); + } + return i.target; + } +}; + +ref<FSAccessor> makeNarAccessor(ref<const std::string> nar) { + return make_ref<NarAccessor>(nar); +} + +ref<FSAccessor> makeLazyNarAccessor(const std::string& listing, + GetNarBytes getNarBytes) { + return make_ref<NarAccessor>(listing, getNarBytes); +} + +void listNar(JSONPlaceholder& res, const ref<FSAccessor>& accessor, + const Path& path, bool recurse) { + auto st = accessor->stat(path); + + auto obj = res.object(); + + switch (st.type) { + case FSAccessor::Type::tRegular: + obj.attr("type", "regular"); + obj.attr("size", st.fileSize); + if (st.isExecutable) { + obj.attr("executable", true); + } + if (st.narOffset != 0u) { + obj.attr("narOffset", st.narOffset); + } + break; + case FSAccessor::Type::tDirectory: + obj.attr("type", "directory"); + { + auto res2 = obj.object("entries"); + for (auto& name : accessor->readDirectory(path)) { + if (recurse) { + auto res3 = res2.placeholder(name); + listNar(res3, accessor, path + "/" + name, true); + } else { + res2.object(name); + } + } + } + break; + case FSAccessor::Type::tSymlink: + obj.attr("type", "symlink"); + obj.attr("target", accessor->readLink(path)); + break; + default: + throw Error("path '%s' does not exist in NAR", path); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-accessor.hh b/third_party/nix/src/libstore/nar-accessor.hh new file mode 100644 index 000000000000..8b18d4a36e06 --- /dev/null +++ b/third_party/nix/src/libstore/nar-accessor.hh @@ -0,0 +1,29 @@ +#pragma once + +#include <functional> + +#include "fs-accessor.hh" + +namespace nix { + +/* Return an object that provides access to the contents of a NAR + file. */ +ref<FSAccessor> makeNarAccessor(ref<const std::string> nar); + +/* Create a NAR accessor from a NAR listing (in the format produced by + listNar()). The callback getNarBytes(offset, length) is used by the + readFile() method of the accessor to get the contents of files + inside the NAR. */ +typedef std::function<std::string(uint64_t, uint64_t)> GetNarBytes; + +ref<FSAccessor> makeLazyNarAccessor(const std::string& listing, + GetNarBytes getNarBytes); + +class JSONPlaceholder; + +/* Write a JSON representation of the contents of a NAR (except file + contents). */ +void listNar(JSONPlaceholder& res, const ref<FSAccessor>& accessor, + const Path& path, bool recurse); + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info-disk-cache.cc b/third_party/nix/src/libstore/nar-info-disk-cache.cc new file mode 100644 index 000000000000..19ef2c9d7461 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info-disk-cache.cc @@ -0,0 +1,285 @@ +#include "nar-info-disk-cache.hh" + +#include <glog/logging.h> +#include <sqlite3.h> + +#include "globals.hh" +#include "sqlite.hh" +#include "sync.hh" + +namespace nix { + +static const char* schema = R"sql( + +create table if not exists BinaryCaches ( + id integer primary key autoincrement not null, + url text unique not null, + timestamp integer not null, + storeDir text not null, + wantMassQuery integer not null, + priority integer not null +); + +create table if not exists NARs ( + cache integer not null, + hashPart text not null, + namePart text, + url text, + compression text, + fileHash text, + fileSize integer, + narHash text, + narSize integer, + refs text, + deriver text, + sigs text, + ca text, + timestamp integer not null, + present integer not null, + primary key (cache, hashPart), + foreign key (cache) references BinaryCaches(id) on delete cascade +); + +create table if not exists LastPurge ( + dummy text primary key, + value integer +); + +)sql"; + +class NarInfoDiskCacheImpl : public NarInfoDiskCache { + public: + /* How often to purge expired entries from the cache. */ + const int purgeInterval = 24 * 3600; + + struct Cache { + int id; + Path storeDir; + bool wantMassQuery; + int priority; + }; + + struct State { + SQLite db; + SQLiteStmt insertCache, queryCache, insertNAR, insertMissingNAR, queryNAR, + purgeCache; + std::map<std::string, Cache> caches; + }; + + Sync<State> _state; + + NarInfoDiskCacheImpl() { + auto state(_state.lock()); + + Path dbPath = getCacheDir() + "/nix/binary-cache-v6.sqlite"; + createDirs(dirOf(dbPath)); + + state->db = SQLite(dbPath); + + if (sqlite3_busy_timeout(state->db, 60 * 60 * 1000) != SQLITE_OK) { + throwSQLiteError(state->db, "setting timeout"); + } + + // We can always reproduce the cache. + state->db.exec("pragma synchronous = off"); + state->db.exec("pragma main.journal_mode = truncate"); + + state->db.exec(schema); + + state->insertCache.create( + state->db, + "insert or replace into BinaryCaches(url, timestamp, storeDir, " + "wantMassQuery, priority) values (?, ?, ?, ?, ?)"); + + state->queryCache.create(state->db, + "select id, storeDir, wantMassQuery, priority " + "from BinaryCaches where url = ?"); + + state->insertNAR.create( + state->db, + "insert or replace into NARs(cache, hashPart, namePart, url, " + "compression, fileHash, fileSize, narHash, " + "narSize, refs, deriver, sigs, ca, timestamp, present) values (?, ?, " + "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 1)"); + + state->insertMissingNAR.create( + state->db, + "insert or replace into NARs(cache, hashPart, timestamp, present) " + "values (?, ?, ?, 0)"); + + state->queryNAR.create( + state->db, + "select present, namePart, url, compression, fileHash, fileSize, " + "narHash, narSize, refs, deriver, sigs, ca from NARs where cache = ? " + "and hashPart = ? and ((present = 0 and timestamp > ?) or (present = 1 " + "and timestamp > ?))"); + + /* Periodically purge expired entries from the database. */ + retrySQLite<void>([&]() { + auto now = time(nullptr); + + SQLiteStmt queryLastPurge(state->db, "select value from LastPurge"); + auto queryLastPurge_(queryLastPurge.use()); + + if (!queryLastPurge_.next() || + queryLastPurge_.getInt(0) < now - purgeInterval) { + SQLiteStmt(state->db, + "delete from NARs where ((present = 0 and timestamp < ?) or " + "(present = 1 and timestamp < ?))") + .use()(now - settings.ttlNegativeNarInfoCache)( + now - settings.ttlPositiveNarInfoCache) + .exec(); + + DLOG(INFO) << "deleted " << sqlite3_changes(state->db) + << " entries from the NAR info disk cache"; + + SQLiteStmt( + state->db, + "insert or replace into LastPurge(dummy, value) values ('', ?)") + .use()(now) + .exec(); + } + }); + } + + static Cache& getCache(State& state, const std::string& uri) { + auto i = state.caches.find(uri); + if (i == state.caches.end()) { + abort(); + } + return i->second; + } + + void createCache(const std::string& uri, const Path& storeDir, + bool wantMassQuery, int priority) override { + retrySQLite<void>([&]() { + auto state(_state.lock()); + + // FIXME: race + + state->insertCache + .use()(uri)(time(nullptr))(storeDir)( + static_cast<int64_t>(wantMassQuery))(priority) + .exec(); + assert(sqlite3_changes(state->db) == 1); + state->caches[uri] = Cache{(int)sqlite3_last_insert_rowid(state->db), + storeDir, wantMassQuery, priority}; + }); + } + + bool cacheExists(const std::string& uri, bool& wantMassQuery, + int& priority) override { + return retrySQLite<bool>([&]() { + auto state(_state.lock()); + + auto i = state->caches.find(uri); + if (i == state->caches.end()) { + auto queryCache(state->queryCache.use()(uri)); + if (!queryCache.next()) { + return false; + } + state->caches.emplace( + uri, Cache{(int)queryCache.getInt(0), queryCache.getStr(1), + queryCache.getInt(2) != 0, (int)queryCache.getInt(3)}); + } + + auto& cache(getCache(*state, uri)); + + wantMassQuery = cache.wantMassQuery; + priority = cache.priority; + + return true; + }); + } + + std::pair<Outcome, std::shared_ptr<NarInfo>> lookupNarInfo( + const std::string& uri, const std::string& hashPart) override { + return retrySQLite<std::pair<Outcome, std::shared_ptr<NarInfo>>>( + [&]() -> std::pair<Outcome, std::shared_ptr<NarInfo>> { + auto state(_state.lock()); + + auto& cache(getCache(*state, uri)); + + auto now = time(nullptr); + + auto queryNAR(state->queryNAR.use()(cache.id)(hashPart)( + now - settings.ttlNegativeNarInfoCache)( + now - settings.ttlPositiveNarInfoCache)); + + if (!queryNAR.next()) { + return {oUnknown, nullptr}; + } + + if (queryNAR.getInt(0) == 0) { + return {oInvalid, nullptr}; + } + + auto narInfo = make_ref<NarInfo>(); + + auto namePart = queryNAR.getStr(1); + narInfo->path = cache.storeDir + "/" + hashPart + + (namePart.empty() ? "" : "-" + namePart); + narInfo->url = queryNAR.getStr(2); + narInfo->compression = queryNAR.getStr(3); + if (!queryNAR.isNull(4)) { + narInfo->fileHash = Hash(queryNAR.getStr(4)); + } + narInfo->fileSize = queryNAR.getInt(5); + narInfo->narHash = Hash(queryNAR.getStr(6)); + narInfo->narSize = queryNAR.getInt(7); + for (auto& r : tokenizeString<Strings>(queryNAR.getStr(8), " ")) { + narInfo->references.insert(cache.storeDir + "/" + r); + } + if (!queryNAR.isNull(9)) { + narInfo->deriver = cache.storeDir + "/" + queryNAR.getStr(9); + } + for (auto& sig : tokenizeString<Strings>(queryNAR.getStr(10), " ")) { + narInfo->sigs.insert(sig); + } + narInfo->ca = queryNAR.getStr(11); + + return {oValid, narInfo}; + }); + } + + void upsertNarInfo(const std::string& uri, const std::string& hashPart, + std::shared_ptr<ValidPathInfo> info) override { + retrySQLite<void>([&]() { + auto state(_state.lock()); + + auto& cache(getCache(*state, uri)); + + if (info) { + auto narInfo = std::dynamic_pointer_cast<NarInfo>(info); + + assert(hashPart == storePathToHash(info->path)); + + state->insertNAR + .use()(cache.id)(hashPart)(storePathToName(info->path))( + narInfo ? narInfo->url : "", narInfo != nullptr)( + narInfo ? narInfo->compression : "", narInfo != nullptr)( + narInfo && narInfo->fileHash ? narInfo->fileHash.to_string() + : "", + narInfo && narInfo->fileHash)( + narInfo ? narInfo->fileSize : 0, + narInfo != nullptr && + (narInfo->fileSize != 0u))(info->narHash.to_string())( + info->narSize)(concatStringsSep(" ", info->shortRefs()))( + !info->deriver.empty() ? baseNameOf(info->deriver) : "", + !info->deriver.empty())(concatStringsSep(" ", info->sigs))( + info->ca)(time(nullptr)) + .exec(); + + } else { + state->insertMissingNAR.use()(cache.id)(hashPart)(time(nullptr)).exec(); + } + }); + } +}; + +ref<NarInfoDiskCache> getNarInfoDiskCache() { + static ref<NarInfoDiskCache> cache = make_ref<NarInfoDiskCacheImpl>(); + return cache; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info-disk-cache.hh b/third_party/nix/src/libstore/nar-info-disk-cache.hh new file mode 100644 index 000000000000..65bb773c92f7 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info-disk-cache.hh @@ -0,0 +1,30 @@ +#pragma once + +#include "nar-info.hh" +#include "ref.hh" + +namespace nix { + +class NarInfoDiskCache { + public: + typedef enum { oValid, oInvalid, oUnknown } Outcome; + + virtual void createCache(const std::string& uri, const Path& storeDir, + bool wantMassQuery, int priority) = 0; + + virtual bool cacheExists(const std::string& uri, bool& wantMassQuery, + int& priority) = 0; + + virtual std::pair<Outcome, std::shared_ptr<NarInfo>> lookupNarInfo( + const std::string& uri, const std::string& hashPart) = 0; + + virtual void upsertNarInfo(const std::string& uri, + const std::string& hashPart, + std::shared_ptr<ValidPathInfo> info) = 0; +}; + +/* Return a singleton cache object that can be used concurrently by + multiple threads. */ +ref<NarInfoDiskCache> getNarInfoDiskCache(); + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info.cc b/third_party/nix/src/libstore/nar-info.cc new file mode 100644 index 000000000000..91cf471554f3 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info.cc @@ -0,0 +1,136 @@ +#include "nar-info.hh" + +#include "globals.hh" + +namespace nix { + +NarInfo::NarInfo(const Store& store, const std::string& s, + const std::string& whence) { + auto corrupt = [&]() { + throw Error(format("NAR info file '%1%' is corrupt") % whence); + }; + + auto parseHashField = [&](const string& s) { + try { + return Hash(s); + } catch (BadHash&) { + corrupt(); + return Hash(); // never reached + } + }; + + size_t pos = 0; + while (pos < s.size()) { + size_t colon = s.find(':', pos); + if (colon == std::string::npos) { + corrupt(); + } + + std::string name(s, pos, colon - pos); + + size_t eol = s.find('\n', colon + 2); + if (eol == std::string::npos) { + corrupt(); + } + + std::string value(s, colon + 2, eol - colon - 2); + + if (name == "StorePath") { + if (!store.isStorePath(value)) { + corrupt(); + } + path = value; + } else if (name == "URL") { + url = value; + } else if (name == "Compression") { + compression = value; + } else if (name == "FileHash") { + fileHash = parseHashField(value); + } else if (name == "FileSize") { + if (!string2Int(value, fileSize)) { + corrupt(); + } + } else if (name == "NarHash") { + narHash = parseHashField(value); + } else if (name == "NarSize") { + if (!string2Int(value, narSize)) { + corrupt(); + } + } else if (name == "References") { + auto refs = tokenizeString<Strings>(value, " "); + if (!references.empty()) { + corrupt(); + } + for (auto& r : refs) { + auto r2 = store.storeDir + "/" + r; + if (!store.isStorePath(r2)) { + corrupt(); + } + references.insert(r2); + } + } else if (name == "Deriver") { + if (value != "unknown-deriver") { + auto p = store.storeDir + "/" + value; + if (!store.isStorePath(p)) { + corrupt(); + } + deriver = p; + } + } else if (name == "System") { + system = value; + } else if (name == "Sig") { + sigs.insert(value); + } else if (name == "CA") { + if (!ca.empty()) { + corrupt(); + } + ca = value; + } + + pos = eol + 1; + } + + if (compression.empty()) { + compression = "bzip2"; + } + + if (path.empty() || url.empty() || narSize == 0 || !narHash) { + corrupt(); + } +} + +std::string NarInfo::to_string() const { + std::string res; + res += "StorePath: " + path + "\n"; + res += "URL: " + url + "\n"; + assert(!compression.empty()); + res += "Compression: " + compression + "\n"; + assert(fileHash.type == htSHA256); + res += "FileHash: " + fileHash.to_string(Base32) + "\n"; + res += "FileSize: " + std::to_string(fileSize) + "\n"; + assert(narHash.type == htSHA256); + res += "NarHash: " + narHash.to_string(Base32) + "\n"; + res += "NarSize: " + std::to_string(narSize) + "\n"; + + res += "References: " + concatStringsSep(" ", shortRefs()) + "\n"; + + if (!deriver.empty()) { + res += "Deriver: " + baseNameOf(deriver) + "\n"; + } + + if (!system.empty()) { + res += "System: " + system + "\n"; + } + + for (const auto& sig : sigs) { + res += "Sig: " + sig + "\n"; + } + + if (!ca.empty()) { + res += "CA: " + ca + "\n"; + } + + return res; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/nar-info.hh b/third_party/nix/src/libstore/nar-info.hh new file mode 100644 index 000000000000..ce362e703f99 --- /dev/null +++ b/third_party/nix/src/libstore/nar-info.hh @@ -0,0 +1,23 @@ +#pragma once + +#include "hash.hh" +#include "store-api.hh" +#include "types.hh" + +namespace nix { + +struct NarInfo : ValidPathInfo { + std::string url; + std::string compression; + Hash fileHash; + uint64_t fileSize = 0; + std::string system; + + NarInfo() {} + NarInfo(const ValidPathInfo& info) : ValidPathInfo(info) {} + NarInfo(const Store& store, const std::string& s, const std::string& whence); + + std::string to_string() const; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/nix-store.pc.in b/third_party/nix/src/libstore/nix-store.pc.in new file mode 100644 index 000000000000..6d67b1e03808 --- /dev/null +++ b/third_party/nix/src/libstore/nix-store.pc.in @@ -0,0 +1,9 @@ +prefix=@prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: Nix +Description: Nix Package Manager +Version: @PACKAGE_VERSION@ +Libs: -L${libdir} -lnixstore -lnixutil +Cflags: -I${includedir}/nix -std=c++17 diff --git a/third_party/nix/src/libstore/optimise-store.cc b/third_party/nix/src/libstore/optimise-store.cc new file mode 100644 index 000000000000..d7cf2bb74449 --- /dev/null +++ b/third_party/nix/src/libstore/optimise-store.cc @@ -0,0 +1,308 @@ +#include <cerrno> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <regex> +#include <utility> + +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "globals.hh" +#include "glog/logging.h" +#include "local-store.hh" +#include "util.hh" + +namespace nix { + +static void makeWritable(const Path& path) { + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1) { + throw SysError(format("changing writability of '%1%'") % path); + } +} + +struct MakeReadOnly { + Path path; + explicit MakeReadOnly(Path path) : path(std::move(path)) {} + ~MakeReadOnly() { + try { + /* This will make the path read-only. */ + if (!path.empty()) { + canonicaliseTimestampAndPermissions(path); + } + } catch (...) { + ignoreException(); + } + } +}; + +LocalStore::InodeHash LocalStore::loadInodeHash() { + DLOG(INFO) << "loading hash inodes in memory"; + InodeHash inodeHash; + + AutoCloseDir dir(opendir(linksDir.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % linksDir); + } + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { /* sic */ + checkInterrupt(); + // We don't care if we hit non-hash files, anything goes + inodeHash.insert(dirent->d_ino); + } + if (errno) { + throw SysError(format("reading directory '%1%'") % linksDir); + } + + DLOG(INFO) << "loaded " << inodeHash.size() << " hash inodes"; + + return inodeHash; +} + +Strings LocalStore::readDirectoryIgnoringInodes(const Path& path, + const InodeHash& inodeHash) { + Strings names; + + AutoCloseDir dir(opendir(path.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % path); + } + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { /* sic */ + checkInterrupt(); + + if (inodeHash.count(dirent->d_ino) != 0u) { + DLOG(WARNING) << dirent->d_name << " is already linked"; + continue; + } + + string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + names.push_back(name); + } + if (errno) { + throw SysError(format("reading directory '%1%'") % path); + } + + return names; +} + +void LocalStore::optimisePath_(OptimiseStats& stats, const Path& path, + InodeHash& inodeHash) { + checkInterrupt(); + + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + +#if __APPLE__ + /* HFS/macOS has some undocumented security feature disabling hardlinking for + special files within .app dirs. *.app/Contents/PkgInfo and + *.app/Contents/Resources/\*.lproj seem to be the only paths affected. See + https://github.com/NixOS/nix/issues/1443 for more discussion. */ + + if (std::regex_search(path, std::regex("\\.app/Contents/.+$"))) { + debug(format("'%1%' is not allowed to be linked in macOS") % path); + return; + } +#endif + + if (S_ISDIR(st.st_mode)) { + Strings names = readDirectoryIgnoringInodes(path, inodeHash); + for (auto& i : names) { + optimisePath_(stats, path + "/" + i, inodeHash); + } + return; + } + + /* We can hard link regular files and maybe symlinks. */ + if (!S_ISREG(st.st_mode) +#if CAN_LINK_SYMLINK + && !S_ISLNK(st.st_mode) +#endif + ) + return; + + /* Sometimes SNAFUs can cause files in the Nix store to be + modified, in particular when running programs as root under + NixOS (example: $fontconfig/var/cache being modified). Skip + those files. FIXME: check the modification time. */ + if (S_ISREG(st.st_mode) && ((st.st_mode & S_IWUSR) != 0u)) { + LOG(WARNING) << "skipping suspicious writable file '" << path << "'"; + return; + } + + /* This can still happen on top-level files. */ + if (st.st_nlink > 1 && (inodeHash.count(st.st_ino) != 0u)) { + DLOG(INFO) << path << " is already linked, with " << (st.st_nlink - 2) + << " other file(s)"; + return; + } + + /* Hash the file. Note that hashPath() returns the hash over the + NAR serialisation, which includes the execute bit on the file. + Thus, executable and non-executable files with the same + contents *won't* be linked (which is good because otherwise the + permissions would be screwed up). + + Also note that if `path' is a symlink, then we're hashing the + contents of the symlink (i.e. the result of readlink()), not + the contents of the target (which may not even exist). */ + Hash hash = hashPath(htSHA256, path).first; + LOG(INFO) << path << " has hash " << hash.to_string(); + + /* Check if this is a known hash. */ + Path linkPath = linksDir + "/" + hash.to_string(Base32, false); + +retry: + if (!pathExists(linkPath)) { + /* Nope, create a hard link in the links directory. */ + if (link(path.c_str(), linkPath.c_str()) == 0) { + inodeHash.insert(st.st_ino); + return; + } + + switch (errno) { + case EEXIST: + /* Fall through if another process created โlinkPathโ before + we did. */ + break; + + case ENOSPC: + /* On ext4, that probably means the directory index is + full. When that happens, it's fine to ignore it: we + just effectively disable deduplication of this + file. */ + LOG(WARNING) << "cannot link '" << linkPath << " to " << path << ": " + << strerror(errno); + + return; + + default: + throw SysError("cannot link '%1%' to '%2%'", linkPath, path); + } + } + + /* Yes! We've seen a file with the same contents. Replace the + current file with a hard link to that file. */ + struct stat stLink; + if (lstat(linkPath.c_str(), &stLink) != 0) { + throw SysError(format("getting attributes of path '%1%'") % linkPath); + } + + if (st.st_ino == stLink.st_ino) { + DLOG(INFO) << path << " is already linked to " << linkPath; + return; + } + + if (st.st_size != stLink.st_size) { + LOG(WARNING) << "removing corrupted link '" << linkPath << "'"; + unlink(linkPath.c_str()); + goto retry; + } + + DLOG(INFO) << "linking '" << path << "' to '" << linkPath << "'"; + + /* Make the containing directory writable, but only if it's not + the store itself (we don't want or need to mess with its + permissions). */ + bool mustToggle = dirOf(path) != realStoreDir; + if (mustToggle) { + makeWritable(dirOf(path)); + } + + /* When we're done, make the directory read-only again and reset + its timestamp back to 0. */ + MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : ""); + + Path tempLink = + (format("%1%/.tmp-link-%2%-%3%") % realStoreDir % getpid() % random()) + .str(); + + if (link(linkPath.c_str(), tempLink.c_str()) == -1) { + if (errno == EMLINK) { + /* Too many links to the same file (>= 32000 on most file + systems). This is likely to happen with empty files. + Just shrug and ignore. */ + if (st.st_size != 0) { + LOG(WARNING) << linkPath << " has maximum number of links"; + } + return; + } + throw SysError("cannot link '%1%' to '%2%'", tempLink, linkPath); + } + + /* Atomically replace the old file with the new hard link. */ + if (rename(tempLink.c_str(), path.c_str()) == -1) { + if (unlink(tempLink.c_str()) == -1) { + LOG(ERROR) << "unable to unlink '" << tempLink << "'"; + } + if (errno == EMLINK) { + /* Some filesystems generate too many links on the rename, + rather than on the original link. (Probably it + temporarily increases the st_nlink field before + decreasing it again.) */ + DLOG(WARNING) << "'" << linkPath + << "' has reached maximum number of links"; + return; + } + throw SysError(format("cannot rename '%1%' to '%2%'") % tempLink % path); + } + + stats.filesLinked++; + stats.bytesFreed += st.st_size; + stats.blocksFreed += st.st_blocks; +} + +void LocalStore::optimiseStore(OptimiseStats& stats) { + PathSet paths = queryAllValidPaths(); + InodeHash inodeHash = loadInodeHash(); + + uint64_t done = 0; + + for (auto& i : paths) { + addTempRoot(i); + if (!isValidPath(i)) { + continue; + } /* path was GC'ed, probably */ + { + LOG(INFO) << "optimising path '" << i << "'"; + optimisePath_(stats, realStoreDir + "/" + baseNameOf(i), inodeHash); + } + done++; + } +} + +static string showBytes(unsigned long long bytes) { + return (format("%.2f MiB") % (bytes / (1024.0 * 1024.0))).str(); +} + +void LocalStore::optimiseStore() { + OptimiseStats stats; + + optimiseStore(stats); + + LOG(INFO) << showBytes(stats.bytesFreed) << " freed by hard-linking " + << stats.filesLinked << " files"; +} + +void LocalStore::optimisePath(const Path& path) { + OptimiseStats stats; + InodeHash inodeHash; + + if (settings.autoOptimiseStore) { + optimisePath_(stats, path, inodeHash); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/parsed-derivations.cc b/third_party/nix/src/libstore/parsed-derivations.cc new file mode 100644 index 000000000000..1186203249de --- /dev/null +++ b/third_party/nix/src/libstore/parsed-derivations.cc @@ -0,0 +1,125 @@ +#include "parsed-derivations.hh" + +namespace nix { + +ParsedDerivation::ParsedDerivation(const Path& drvPath, BasicDerivation& drv) + : drvPath(drvPath), drv(drv) { + /* Parse the __json attribute, if any. */ + auto jsonAttr = drv.env.find("__json"); + if (jsonAttr != drv.env.end()) { + try { + structuredAttrs = nlohmann::json::parse(jsonAttr->second); + } catch (std::exception& e) { + throw Error("cannot process __json attribute of '%s': %s", drvPath, + e.what()); + } + } +} + +std::optional<std::string> ParsedDerivation::getStringAttr( + const std::string& name) const { + if (structuredAttrs) { + auto i = structuredAttrs->find(name); + if (i == structuredAttrs->end()) { + return {}; + } + if (!i->is_string()) { + throw Error("attribute '%s' of derivation '%s' must be a string", name, + drvPath); + } + return i->get<std::string>(); + + } else { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + return {}; + } + return i->second; + } +} + +bool ParsedDerivation::getBoolAttr(const std::string& name, bool def) const { + if (structuredAttrs) { + auto i = structuredAttrs->find(name); + if (i == structuredAttrs->end()) { + return def; + } + if (!i->is_boolean()) { + throw Error("attribute '%s' of derivation '%s' must be a Boolean", name, + drvPath); + } + return i->get<bool>(); + + } else { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + return def; + } + return i->second == "1"; + } +} + +std::optional<Strings> ParsedDerivation::getStringsAttr( + const std::string& name) const { + if (structuredAttrs) { + auto i = structuredAttrs->find(name); + if (i == structuredAttrs->end()) { + return {}; + } + if (!i->is_array()) { + throw Error("attribute '%s' of derivation '%s' must be a list of strings", + name, drvPath); + } + Strings res; + for (const auto& j : *i) { + if (!j.is_string()) { + throw Error( + "attribute '%s' of derivation '%s' must be a list of strings", name, + drvPath); + } + res.push_back(j.get<std::string>()); + } + return res; + + } else { + auto i = drv.env.find(name); + if (i == drv.env.end()) { + return {}; + } + return tokenizeString<Strings>(i->second); + } +} + +StringSet ParsedDerivation::getRequiredSystemFeatures() const { + StringSet res; + for (auto& i : getStringsAttr("requiredSystemFeatures").value_or(Strings())) { + res.insert(i); + } + return res; +} + +bool ParsedDerivation::canBuildLocally() const { + if (drv.platform != settings.thisSystem.get() && + (settings.extraPlatforms.get().count(drv.platform) == 0u) && + !drv.isBuiltin()) { + return false; + } + + for (auto& feature : getRequiredSystemFeatures()) { + if (settings.systemFeatures.get().count(feature) == 0u) { + return false; + } + } + + return true; +} + +bool ParsedDerivation::willBuildLocally() const { + return getBoolAttr("preferLocalBuild") && canBuildLocally(); +} + +bool ParsedDerivation::substitutesAllowed() const { + return getBoolAttr("allowSubstitutes", true); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/parsed-derivations.hh b/third_party/nix/src/libstore/parsed-derivations.hh new file mode 100644 index 000000000000..7b2da3b566ac --- /dev/null +++ b/third_party/nix/src/libstore/parsed-derivations.hh @@ -0,0 +1,34 @@ +#include <nlohmann/json.hpp> + +#include "derivations.hh" + +namespace nix { + +class ParsedDerivation { + Path drvPath; + BasicDerivation& drv; + std::optional<nlohmann::json> structuredAttrs; + + public: + ParsedDerivation(const Path& drvPath, BasicDerivation& drv); + + const std::optional<nlohmann::json>& getStructuredAttrs() const { + return structuredAttrs; + } + + std::optional<std::string> getStringAttr(const std::string& name) const; + + bool getBoolAttr(const std::string& name, bool def = false) const; + + std::optional<Strings> getStringsAttr(const std::string& name) const; + + StringSet getRequiredSystemFeatures() const; + + bool canBuildLocally() const; + + bool willBuildLocally() const; + + bool substitutesAllowed() const; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/pathlocks.cc b/third_party/nix/src/libstore/pathlocks.cc new file mode 100644 index 000000000000..eeee5ee1e9c9 --- /dev/null +++ b/third_party/nix/src/libstore/pathlocks.cc @@ -0,0 +1,172 @@ +#include "pathlocks.hh" + +#include <cerrno> +#include <cstdlib> + +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "sync.hh" +#include "util.hh" + +namespace nix { + +AutoCloseFD openLockFile(const Path& path, bool create) { + AutoCloseFD fd; + + fd = open(path.c_str(), O_CLOEXEC | O_RDWR | (create ? O_CREAT : 0), 0600); + if (!fd && (create || errno != ENOENT)) { + throw SysError(format("opening lock file '%1%'") % path); + } + + return fd; +} + +void deleteLockFile(const Path& path, int fd) { + /* Get rid of the lock file. Have to be careful not to introduce + races. Write a (meaningless) token to the file to indicate to + other processes waiting on this lock that the lock is stale + (deleted). */ + unlink(path.c_str()); + writeFull(fd, "d"); + /* Note that the result of unlink() is ignored; removing the lock + file is an optimisation, not a necessity. */ +} + +bool lockFile(int fd, LockType lockType, bool wait) { + int type; + if (lockType == ltRead) { + type = LOCK_SH; + } else if (lockType == ltWrite) { + type = LOCK_EX; + } else if (lockType == ltNone) { + type = LOCK_UN; + } else { + abort(); + } + + if (wait) { + while (flock(fd, type) != 0) { + checkInterrupt(); + if (errno != EINTR) { + throw SysError(format("acquiring/releasing lock")); + } + return false; + } + } else { + while (flock(fd, type | LOCK_NB) != 0) { + checkInterrupt(); + if (errno == EWOULDBLOCK) { + return false; + } + if (errno != EINTR) { + throw SysError(format("acquiring/releasing lock")); + } + } + } + + return true; +} + +PathLocks::PathLocks() : deletePaths(false) {} + +PathLocks::PathLocks(const PathSet& paths, const string& waitMsg) + : deletePaths(false) { + lockPaths(paths, waitMsg); +} + +bool PathLocks::lockPaths(const PathSet& paths, const string& waitMsg, + bool wait) { + assert(fds.empty()); + + /* Note that `fds' is built incrementally so that the destructor + will only release those locks that we have already acquired. */ + + /* Acquire the lock for each path in sorted order. This ensures + that locks are always acquired in the same order, thus + preventing deadlocks. */ + for (auto& path : paths) { + checkInterrupt(); + Path lockPath = path + ".lock"; + + DLOG(INFO) << "locking path '" << path << "'"; + + AutoCloseFD fd; + + while (true) { + /* Open/create the lock file. */ + fd = openLockFile(lockPath, true); + + /* Acquire an exclusive lock. */ + if (!lockFile(fd.get(), ltWrite, false)) { + if (wait) { + if (!waitMsg.empty()) { + LOG(WARNING) << waitMsg; + } + lockFile(fd.get(), ltWrite, true); + } else { + /* Failed to lock this path; release all other + locks. */ + unlock(); + return false; + } + } + + DLOG(INFO) << "lock acquired on '" << lockPath << "'"; + + /* Check that the lock file hasn't become stale (i.e., + hasn't been unlinked). */ + struct stat st; + if (fstat(fd.get(), &st) == -1) { + throw SysError(format("statting lock file '%1%'") % lockPath); + } + if (st.st_size != 0) { + /* This lock file has been unlinked, so we're holding + a lock on a deleted file. This means that other + processes may create and acquire a lock on + `lockPath', and proceed. So we must retry. */ + DLOG(INFO) << "open lock file '" << lockPath << "' has become stale"; + } else { + break; + } + } + + /* Use borrow so that the descriptor isn't closed. */ + fds.emplace_back(fd.release(), lockPath); + } + + return true; +} + +PathLocks::~PathLocks() { + try { + unlock(); + } catch (...) { + ignoreException(); + } +} + +void PathLocks::unlock() { + for (auto& i : fds) { + if (deletePaths) { + deleteLockFile(i.second, i.first); + } + + if (close(i.first) == -1) { + LOG(WARNING) << "cannot close lock file on '" << i.second << "'"; + } + + DLOG(INFO) << "lock released on '" << i.second << "'"; + } + + fds.clear(); +} + +void PathLocks::setDeletion(bool deletePaths) { + this->deletePaths = deletePaths; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/pathlocks.hh b/third_party/nix/src/libstore/pathlocks.hh new file mode 100644 index 000000000000..90184989cde3 --- /dev/null +++ b/third_party/nix/src/libstore/pathlocks.hh @@ -0,0 +1,35 @@ +#pragma once + +#include "util.hh" + +namespace nix { + +/* Open (possibly create) a lock file and return the file descriptor. + -1 is returned if create is false and the lock could not be opened + because it doesn't exist. Any other error throws an exception. */ +AutoCloseFD openLockFile(const Path& path, bool create); + +/* Delete an open lock file. */ +void deleteLockFile(const Path& path, int fd); + +enum LockType { ltRead, ltWrite, ltNone }; + +bool lockFile(int fd, LockType lockType, bool wait); + +class PathLocks { + private: + typedef std::pair<int, Path> FDPair; + list<FDPair> fds; + bool deletePaths; + + public: + PathLocks(); + PathLocks(const PathSet& paths, const string& waitMsg = ""); + bool lockPaths(const PathSet& _paths, const string& waitMsg = "", + bool wait = true); + ~PathLocks(); + void unlock(); + void setDeletion(bool deletePaths); +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/profiles.cc b/third_party/nix/src/libstore/profiles.cc new file mode 100644 index 000000000000..bdb90b7388f7 --- /dev/null +++ b/third_party/nix/src/libstore/profiles.cc @@ -0,0 +1,249 @@ +#include "profiles.hh" + +#include <cerrno> +#include <cstdio> + +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "store-api.hh" +#include "util.hh" + +namespace nix { + +static bool cmpGensByNumber(const Generation& a, const Generation& b) { + return a.number < b.number; +} + +/* Parse a generation name of the format + `<profilename>-<number>-link'. */ +static int parseName(const string& profileName, const string& name) { + if (string(name, 0, profileName.size() + 1) != profileName + "-") { + return -1; + } + string s = string(name, profileName.size() + 1); + string::size_type p = s.find("-link"); + if (p == string::npos) { + return -1; + } + int n; + if (string2Int(string(s, 0, p), n) && n >= 0) { + return n; + } + return -1; +} + +Generations findGenerations(const Path& profile, int& curGen) { + Generations gens; + + Path profileDir = dirOf(profile); + string profileName = baseNameOf(profile); + + for (auto& i : readDirectory(profileDir)) { + int n; + if ((n = parseName(profileName, i.name)) != -1) { + Generation gen; + gen.path = profileDir + "/" + i.name; + gen.number = n; + struct stat st; + if (lstat(gen.path.c_str(), &st) != 0) { + throw SysError(format("statting '%1%'") % gen.path); + } + gen.creationTime = st.st_mtime; + gens.push_back(gen); + } + } + + gens.sort(cmpGensByNumber); + + curGen = pathExists(profile) ? parseName(profileName, readLink(profile)) : -1; + + return gens; +} + +static void makeName(const Path& profile, unsigned int num, Path& outLink) { + Path prefix = (format("%1%-%2%") % profile % num).str(); + outLink = prefix + "-link"; +} + +Path createGeneration(const ref<LocalFSStore>& store, const Path& profile, + const Path& outPath) { + /* The new generation number should be higher than old the + previous ones. */ + int dummy; + Generations gens = findGenerations(profile, dummy); + + unsigned int num; + if (!gens.empty()) { + Generation last = gens.back(); + + if (readLink(last.path) == outPath) { + /* We only create a new generation symlink if it differs + from the last one. + + This helps keeping gratuitous installs/rebuilds from piling + up uncontrolled numbers of generations, cluttering up the + UI like grub. */ + return last.path; + } + + num = gens.back().number; + } else { + num = 0; + } + + /* Create the new generation. Note that addPermRoot() blocks if + the garbage collector is running to prevent the stuff we've + built from moving from the temporary roots (which the GC knows) + to the permanent roots (of which the GC would have a stale + view). If we didn't do it this way, the GC might remove the + user environment etc. we've just built. */ + Path generation; + makeName(profile, num + 1, generation); + store->addPermRoot(outPath, generation, false, true); + + return generation; +} + +static void removeFile(const Path& path) { + if (remove(path.c_str()) == -1) { + throw SysError(format("cannot unlink '%1%'") % path); + } +} + +void deleteGeneration(const Path& profile, unsigned int gen) { + Path generation; + makeName(profile, gen, generation); + removeFile(generation); +} + +static void deleteGeneration2(const Path& profile, unsigned int gen, + bool dryRun) { + if (dryRun) { + LOG(INFO) << "would remove generation " << gen; + } else { + LOG(INFO) << "removing generation " << gen; + deleteGeneration(profile, gen); + } +} + +void deleteGenerations(const Path& profile, + const std::set<unsigned int>& gensToDelete, + bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + Generations gens = findGenerations(profile, curGen); + + if (gensToDelete.find(curGen) != gensToDelete.end()) { + throw Error(format("cannot delete current generation of profile %1%'") % + profile); + } + + for (auto& i : gens) { + if (gensToDelete.find(i.number) == gensToDelete.end()) { + continue; + } + deleteGeneration2(profile, i.number, dryRun); + } +} + +void deleteGenerationsGreaterThan(const Path& profile, int max, bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + bool fromCurGen = false; + Generations gens = findGenerations(profile, curGen); + for (auto i = gens.rbegin(); i != gens.rend(); ++i) { + if (i->number == curGen) { + fromCurGen = true; + max--; + continue; + } + if (fromCurGen) { + if (max != 0) { + max--; + continue; + } + deleteGeneration2(profile, i->number, dryRun); + } + } +} + +void deleteOldGenerations(const Path& profile, bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + Generations gens = findGenerations(profile, curGen); + + for (auto& i : gens) { + if (i.number != curGen) { + deleteGeneration2(profile, i.number, dryRun); + } + } +} + +void deleteGenerationsOlderThan(const Path& profile, time_t t, bool dryRun) { + PathLocks lock; + lockProfile(lock, profile); + + int curGen; + Generations gens = findGenerations(profile, curGen); + + bool canDelete = false; + for (auto i = gens.rbegin(); i != gens.rend(); ++i) { + if (canDelete) { + assert(i->creationTime < t); + if (i->number != curGen) { + deleteGeneration2(profile, i->number, dryRun); + } + } else if (i->creationTime < t) { + /* We may now start deleting generations, but we don't + delete this generation yet, because this generation was + still the one that was active at the requested point in + time. */ + canDelete = true; + } + } +} + +void deleteGenerationsOlderThan(const Path& profile, const string& timeSpec, + bool dryRun) { + time_t curTime = time(nullptr); + string strDays = string(timeSpec, 0, timeSpec.size() - 1); + int days; + + if (!string2Int(strDays, days) || days < 1) { + throw Error(format("invalid number of days specifier '%1%'") % timeSpec); + } + + time_t oldTime = curTime - days * 24 * 3600; + + deleteGenerationsOlderThan(profile, oldTime, dryRun); +} + +void switchLink(const Path& link, Path target) { + /* Hacky. */ + if (dirOf(target) == dirOf(link)) { + target = baseNameOf(target); + } + + replaceSymlink(target, link); +} + +void lockProfile(PathLocks& lock, const Path& profile) { + lock.lockPaths({profile}, + (format("waiting for lock on profile '%1%'") % profile).str()); + lock.setDeletion(true); +} + +string optimisticLockProfile(const Path& profile) { + return pathExists(profile) ? readLink(profile) : ""; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/profiles.hh b/third_party/nix/src/libstore/profiles.hh new file mode 100644 index 000000000000..ab31cc993af5 --- /dev/null +++ b/third_party/nix/src/libstore/profiles.hh @@ -0,0 +1,61 @@ +#pragma once + +#include <time.h> + +#include "pathlocks.hh" +#include "types.hh" + +namespace nix { + +struct Generation { + int number; + Path path; + time_t creationTime; + Generation() { number = -1; } + operator bool() const { return number != -1; } +}; + +typedef list<Generation> Generations; + +/* Returns the list of currently present generations for the specified + profile, sorted by generation number. */ +Generations findGenerations(const Path& profile, int& curGen); + +class LocalFSStore; + +Path createGeneration(const ref<LocalFSStore>& store, const Path& profile, + const Path& outPath); + +void deleteGeneration(const Path& profile, unsigned int gen); + +void deleteGenerations(const Path& profile, + const std::set<unsigned int>& gensToDelete, bool dryRun); + +void deleteGenerationsGreaterThan(const Path& profile, const int max, + bool dryRun); + +void deleteOldGenerations(const Path& profile, bool dryRun); + +void deleteGenerationsOlderThan(const Path& profile, time_t t, bool dryRun); + +void deleteGenerationsOlderThan(const Path& profile, const string& timeSpec, + bool dryRun); + +void switchLink(const Path& link, Path target); + +/* Ensure exclusive access to a profile. Any command that modifies + the profile first acquires this lock. */ +void lockProfile(PathLocks& lock, const Path& profile); + +/* Optimistic locking is used by long-running operations like `nix-env + -i'. Instead of acquiring the exclusive lock for the entire + duration of the operation, we just perform the operation + optimistically (without an exclusive lock), and check at the end + whether the profile changed while we were busy (i.e., the symlink + target changed). If so, the operation is restarted. Restarting is + generally cheap, since the build results are still in the Nix + store. Most of the time, only the user environment has to be + rebuilt. */ +string optimisticLockProfile(const Path& profile); + +} // namespace nix diff --git a/third_party/nix/src/libstore/references.cc b/third_party/nix/src/libstore/references.cc new file mode 100644 index 000000000000..c8cebaec08f3 --- /dev/null +++ b/third_party/nix/src/libstore/references.cc @@ -0,0 +1,124 @@ +#include "references.hh" + +#include <cstdlib> +#include <map> + +#include <glog/logging.h> + +#include "archive.hh" +#include "hash.hh" +#include "util.hh" + +namespace nix { + +static unsigned int refLength = 32; /* characters */ + +static void search(const unsigned char* s, size_t len, StringSet& hashes, + StringSet& seen) { + static bool initialised = false; + static bool isBase32[256]; + if (!initialised) { + for (bool& i : isBase32) { + i = false; + } + for (char base32Char : base32Chars) { + isBase32[(unsigned char)base32Char] = true; + } + initialised = true; + } + + for (size_t i = 0; i + refLength <= len;) { + int j; + bool match = true; + for (j = refLength - 1; j >= 0; --j) { + if (!isBase32[(unsigned char)s[i + j]]) { + i += j + 1; + match = false; + break; + } + } + if (!match) { + continue; + } + string ref((const char*)s + i, refLength); + if (hashes.find(ref) != hashes.end()) { + DLOG(INFO) << "found reference to '" << ref << "' at offset " << i; + seen.insert(ref); + hashes.erase(ref); + } + ++i; + } +} + +struct RefScanSink : Sink { + HashSink hashSink; + StringSet hashes; + StringSet seen; + + string tail; + + RefScanSink() : hashSink(htSHA256) {} + + void operator()(const unsigned char* data, size_t len) override; +}; + +void RefScanSink::operator()(const unsigned char* data, size_t len) { + hashSink(data, len); + + /* It's possible that a reference spans the previous and current + fragment, so search in the concatenation of the tail of the + previous fragment and the start of the current fragment. */ + string s = + tail + string((const char*)data, len > refLength ? refLength : len); + search((const unsigned char*)s.data(), s.size(), hashes, seen); + + search(data, len, hashes, seen); + + size_t tailLen = len <= refLength ? len : refLength; + tail = string(tail, tail.size() < refLength - tailLen + ? 0 + : tail.size() - (refLength - tailLen)) + + string((const char*)data + len - tailLen, tailLen); +} + +PathSet scanForReferences(const string& path, const PathSet& refs, + HashResult& hash) { + RefScanSink sink; + std::map<string, Path> backMap; + + /* For efficiency (and a higher hit rate), just search for the + hash part of the file name. (This assumes that all references + have the form `HASH-bla'). */ + for (auto& i : refs) { + string baseName = baseNameOf(i); + string::size_type pos = baseName.find('-'); + if (pos == string::npos) { + throw Error(format("bad reference '%1%'") % i); + } + string s = string(baseName, 0, pos); + assert(s.size() == refLength); + assert(backMap.find(s) == backMap.end()); + // parseHash(htSHA256, s); + sink.hashes.insert(s); + backMap[s] = i; + } + + /* Look for the hashes in the NAR dump of the path. */ + dumpPath(path, sink); + + /* Map the hashes found back to their store paths. */ + PathSet found; + for (auto& i : sink.seen) { + std::map<string, Path>::iterator j; + if ((j = backMap.find(i)) == backMap.end()) { + abort(); + } + found.insert(j->second); + } + + hash = sink.hashSink.finish(); + + return found; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/references.hh b/third_party/nix/src/libstore/references.hh new file mode 100644 index 000000000000..2229150e3359 --- /dev/null +++ b/third_party/nix/src/libstore/references.hh @@ -0,0 +1,11 @@ +#pragma once + +#include "hash.hh" +#include "types.hh" + +namespace nix { + +PathSet scanForReferences(const Path& path, const PathSet& refs, + HashResult& hash); + +} diff --git a/third_party/nix/src/libstore/remote-fs-accessor.cc b/third_party/nix/src/libstore/remote-fs-accessor.cc new file mode 100644 index 000000000000..ca478c213fde --- /dev/null +++ b/third_party/nix/src/libstore/remote-fs-accessor.cc @@ -0,0 +1,131 @@ +#include "remote-fs-accessor.hh" + +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "json.hh" +#include "nar-accessor.hh" + +namespace nix { + +RemoteFSAccessor::RemoteFSAccessor(const ref<Store>& store, + const Path& cacheDir) + : store(store), cacheDir(cacheDir) { + if (!cacheDir.empty()) { + createDirs(cacheDir); + } +} + +Path RemoteFSAccessor::makeCacheFile(const Path& storePath, + const std::string& ext) { + assert(!cacheDir.empty()); + return fmt("%s/%s.%s", cacheDir, storePathToHash(storePath), ext); +} + +void RemoteFSAccessor::addToCache(const Path& storePath, const std::string& nar, + const ref<FSAccessor>& narAccessor) { + nars.emplace(storePath, narAccessor); + + if (!cacheDir.empty()) { + try { + std::ostringstream str; + JSONPlaceholder jsonRoot(str); + listNar(jsonRoot, narAccessor, "", true); + writeFile(makeCacheFile(storePath, "ls"), str.str()); + + /* FIXME: do this asynchronously. */ + writeFile(makeCacheFile(storePath, "nar"), nar); + + } catch (...) { + ignoreException(); + } + } +} + +std::pair<ref<FSAccessor>, Path> RemoteFSAccessor::fetch(const Path& path_) { + auto path = canonPath(path_); + + auto storePath = store->toStorePath(path); + std::string restPath = std::string(path, storePath.size()); + + if (!store->isValidPath(storePath)) { + throw InvalidPath(format("path '%1%' is not a valid store path") % + storePath); + } + + auto i = nars.find(storePath); + if (i != nars.end()) { + return {i->second, restPath}; + } + + StringSink sink; + std::string listing; + Path cacheFile; + + if (!cacheDir.empty() && + pathExists(cacheFile = makeCacheFile(storePath, "nar"))) { + try { + listing = nix::readFile(makeCacheFile(storePath, "ls")); + + auto narAccessor = makeLazyNarAccessor( + listing, [cacheFile](uint64_t offset, uint64_t length) { + AutoCloseFD fd = open(cacheFile.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) { + throw SysError("opening NAR cache file '%s'", cacheFile); + } + + if (lseek(fd.get(), offset, SEEK_SET) != (off_t)offset) { + throw SysError("seeking in '%s'", cacheFile); + } + + std::string buf(length, 0); + readFull(fd.get(), (unsigned char*)buf.data(), length); + + return buf; + }); + + nars.emplace(storePath, narAccessor); + return {narAccessor, restPath}; + + } catch (SysError&) { + } + + try { + *sink.s = nix::readFile(cacheFile); + + auto narAccessor = makeNarAccessor(sink.s); + nars.emplace(storePath, narAccessor); + return {narAccessor, restPath}; + + } catch (SysError&) { + } + } + + store->narFromPath(storePath, sink); + auto narAccessor = makeNarAccessor(sink.s); + addToCache(storePath, *sink.s, narAccessor); + return {narAccessor, restPath}; +} + +FSAccessor::Stat RemoteFSAccessor::stat(const Path& path) { + auto res = fetch(path); + return res.first->stat(res.second); +} + +StringSet RemoteFSAccessor::readDirectory(const Path& path) { + auto res = fetch(path); + return res.first->readDirectory(res.second); +} + +std::string RemoteFSAccessor::readFile(const Path& path) { + auto res = fetch(path); + return res.first->readFile(res.second); +} + +std::string RemoteFSAccessor::readLink(const Path& path) { + auto res = fetch(path); + return res.first->readLink(res.second); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/remote-fs-accessor.hh b/third_party/nix/src/libstore/remote-fs-accessor.hh new file mode 100644 index 000000000000..32c729f50dcf --- /dev/null +++ b/third_party/nix/src/libstore/remote-fs-accessor.hh @@ -0,0 +1,38 @@ +#pragma once + +#include "fs-accessor.hh" +#include "ref.hh" +#include "store-api.hh" + +namespace nix { + +class RemoteFSAccessor : public FSAccessor { + ref<Store> store; + + std::map<Path, ref<FSAccessor>> nars; + + Path cacheDir; + + std::pair<ref<FSAccessor>, Path> fetch(const Path& path_); + + friend class BinaryCacheStore; + + Path makeCacheFile(const Path& storePath, const std::string& ext); + + void addToCache(const Path& storePath, const std::string& nar, + const ref<FSAccessor>& narAccessor); + + public: + RemoteFSAccessor(const ref<Store>& store, + const /* FIXME: use std::optional */ Path& cacheDir = ""); + + Stat stat(const Path& path) override; + + StringSet readDirectory(const Path& path) override; + + std::string readFile(const Path& path) override; + + std::string readLink(const Path& path) override; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/remote-store.cc b/third_party/nix/src/libstore/remote-store.cc new file mode 100644 index 000000000000..c4215800da8d --- /dev/null +++ b/third_party/nix/src/libstore/remote-store.cc @@ -0,0 +1,749 @@ +#include "remote-store.hh" + +#include <cerrno> +#include <cstring> + +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <unistd.h> + +#include "affinity.hh" +#include "archive.hh" +#include "derivations.hh" +#include "finally.hh" +#include "globals.hh" +#include "pool.hh" +#include "prefork-compat.hh" +#include "serialise.hh" +#include "util.hh" +#include "worker-protocol.hh" + +namespace nix { + +Path readStorePath(Store& store, Source& from) { + Path path = readString(from); + store.assertStorePath(path); + return path; +} + +template <class T> +T readStorePaths(Store& store, Source& from) { + T paths = readStrings<T>(from); + for (auto& i : paths) { + store.assertStorePath(i); + } + return paths; +} + +template PathSet readStorePaths(Store& store, Source& from); +template Paths readStorePaths(Store& store, Source& from); + +/* TODO: Separate these store impls into different files, give them better names + */ +RemoteStore::RemoteStore(const Params& params) + : Store(params), + connections(make_ref<Pool<Connection>>( + std::max(1, (int)maxConnections), + [this]() { return openConnectionWrapper(); }, + [this](const ref<Connection>& r) { + return r->to.good() && r->from.good() && + std::chrono::duration_cast<std::chrono::seconds>( + std::chrono::steady_clock::now() - r->startTime) + .count() < maxConnectionAge; + })) {} + +ref<RemoteStore::Connection> RemoteStore::openConnectionWrapper() { + if (failed) { + throw Error("opening a connection to remote store '%s' previously failed", + getUri()); + } + try { + return openConnection(); + } catch (...) { + failed = true; + throw; + } +} + +UDSRemoteStore::UDSRemoteStore(const Params& params) + : Store(params), LocalFSStore(params), RemoteStore(params) {} + +UDSRemoteStore::UDSRemoteStore(std::string socket_path, const Params& params) + : Store(params), + LocalFSStore(params), + RemoteStore(params), + path(socket_path) {} + +std::string UDSRemoteStore::getUri() { + if (path) { + return std::string("unix://") + *path; + } + return "daemon"; +} + +ref<RemoteStore::Connection> UDSRemoteStore::openConnection() { + auto conn = make_ref<Connection>(); + + /* Connect to a daemon that does the privileged work for us. */ + conn->fd = socket(PF_UNIX, + SOCK_STREAM +#ifdef SOCK_CLOEXEC + | SOCK_CLOEXEC +#endif + , + 0); + if (!conn->fd) { + throw SysError("cannot create Unix domain socket"); + } + closeOnExec(conn->fd.get()); + + string socketPath = path ? *path : settings.nixDaemonSocketFile; + + struct sockaddr_un addr; + addr.sun_family = AF_UNIX; + if (socketPath.size() + 1 >= sizeof(addr.sun_path)) { + throw Error(format("socket path '%1%' is too long") % socketPath); + } + strcpy(addr.sun_path, socketPath.c_str()); + + if (::connect(conn->fd.get(), (struct sockaddr*)&addr, sizeof(addr)) == -1) { + throw SysError(format("cannot connect to daemon at '%1%'") % socketPath); + } + + conn->from.fd = conn->fd.get(); + conn->to.fd = conn->fd.get(); + + conn->startTime = std::chrono::steady_clock::now(); + + initConnection(*conn); + + return conn; +} + +void RemoteStore::initConnection(Connection& conn) { + /* Send the magic greeting, check for the reply. */ + try { + conn.to << WORKER_MAGIC_1; + conn.to.flush(); + unsigned int magic = readInt(conn.from); + if (magic != WORKER_MAGIC_2) { + throw Error("protocol mismatch"); + } + + conn.from >> conn.daemonVersion; + if (GET_PROTOCOL_MAJOR(conn.daemonVersion) != + GET_PROTOCOL_MAJOR(PROTOCOL_VERSION)) { + throw Error("Nix daemon protocol version not supported"); + } + if (GET_PROTOCOL_MINOR(conn.daemonVersion) < 10) { + throw Error("the Nix daemon version is too old"); + } + conn.to << PROTOCOL_VERSION; + + if (GET_PROTOCOL_MINOR(conn.daemonVersion) >= 14) { + int cpu = sameMachine() && settings.lockCPU ? lockToCurrentCPU() : -1; + if (cpu != -1) { + conn.to << 1 << cpu; + } else { + conn.to << 0; + } + } + + if (GET_PROTOCOL_MINOR(conn.daemonVersion) >= 11) { + conn.to << 0u; + } + + auto ex = conn.processStderr(); + if (ex) { + std::rethrow_exception(ex); + } + } catch (Error& e) { + throw Error("cannot open connection to remote store '%s': %s", getUri(), + e.what()); + } + + setOptions(conn); +} + +void RemoteStore::setOptions(Connection& conn) { + conn.to << wopSetOptions << static_cast<uint64_t>(settings.keepFailed) + << static_cast<uint64_t>(settings.keepGoing) + // TODO(tazjin): Remove the verbosity stuff here. + << static_cast<uint64_t>(settings.tryFallback) << compat::kInfo + << settings.maxBuildJobs << settings.maxSilentTime + << 1u + // TODO(tazjin): what behaviour does this toggle remotely? + << (settings.verboseBuild ? compat::kError : compat::kVomit) + << 0 // obsolete log type + << 0 /* obsolete print build trace */ + << settings.buildCores + << static_cast<uint64_t>(settings.useSubstitutes); + + if (GET_PROTOCOL_MINOR(conn.daemonVersion) >= 12) { + std::map<std::string, Config::SettingInfo> overrides; + globalConfig.getSettings(overrides, true); + overrides.erase(settings.keepFailed.name); + overrides.erase(settings.keepGoing.name); + overrides.erase(settings.tryFallback.name); + overrides.erase(settings.maxBuildJobs.name); + overrides.erase(settings.maxSilentTime.name); + overrides.erase(settings.buildCores.name); + overrides.erase(settings.useSubstitutes.name); + overrides.erase(settings.showTrace.name); + conn.to << overrides.size(); + for (auto& i : overrides) { + conn.to << i.first << i.second.value; + } + } + + auto ex = conn.processStderr(); + if (ex) { + std::rethrow_exception(ex); + } +} + +/* A wrapper around Pool<RemoteStore::Connection>::Handle that marks + the connection as bad (causing it to be closed) if a non-daemon + exception is thrown before the handle is closed. Such an exception + causes a deviation from the expected protocol and therefore a + desynchronization between the client and daemon. */ +struct ConnectionHandle { + Pool<RemoteStore::Connection>::Handle handle; + bool daemonException = false; + + explicit ConnectionHandle(Pool<RemoteStore::Connection>::Handle&& handle) + : handle(std::move(handle)) {} + + ConnectionHandle(ConnectionHandle&& h) : handle(std::move(h.handle)) {} + + ~ConnectionHandle() { + if (!daemonException && (std::uncaught_exceptions() != 0)) { + handle.markBad(); + // TODO(tazjin): are these types of things supposed to be DEBUG? + DLOG(INFO) << "closing daemon connection because of an exception"; + } + } + + RemoteStore::Connection* operator->() { return &*handle; } + + void processStderr(Sink* sink = nullptr, Source* source = nullptr) { + auto ex = handle->processStderr(sink, source); + if (ex) { + daemonException = true; + std::rethrow_exception(ex); + } + } +}; + +ConnectionHandle RemoteStore::getConnection() { + return ConnectionHandle(connections->get()); +} + +bool RemoteStore::isValidPathUncached(const Path& path) { + auto conn(getConnection()); + conn->to << wopIsValidPath << path; + conn.processStderr(); + return readInt(conn->from) != 0u; +} + +PathSet RemoteStore::queryValidPaths(const PathSet& paths, + SubstituteFlag maybeSubstitute) { + auto conn(getConnection()); + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 12) { + PathSet res; + for (auto& i : paths) { + if (isValidPath(i)) { + res.insert(i); + } + } + return res; + } + conn->to << wopQueryValidPaths << paths; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::queryAllValidPaths() { + auto conn(getConnection()); + conn->to << wopQueryAllValidPaths; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::querySubstitutablePaths(const PathSet& paths) { + auto conn(getConnection()); + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 12) { + PathSet res; + for (auto& i : paths) { + conn->to << wopHasSubstitutes << i; + conn.processStderr(); + if (readInt(conn->from) != 0u) { + res.insert(i); + } + } + return res; + } + conn->to << wopQuerySubstitutablePaths << paths; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +void RemoteStore::querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) { + if (paths.empty()) { + return; + } + + auto conn(getConnection()); + + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 12) { + for (auto& i : paths) { + SubstitutablePathInfo info; + conn->to << wopQuerySubstitutablePathInfo << i; + conn.processStderr(); + unsigned int reply = readInt(conn->from); + if (reply == 0) { + continue; + } + info.deriver = readString(conn->from); + if (!info.deriver.empty()) { + assertStorePath(info.deriver); + } + info.references = readStorePaths<PathSet>(*this, conn->from); + info.downloadSize = readLongLong(conn->from); + info.narSize = readLongLong(conn->from); + infos[i] = info; + } + + } else { + conn->to << wopQuerySubstitutablePathInfos << paths; + conn.processStderr(); + auto count = readNum<size_t>(conn->from); + for (size_t n = 0; n < count; n++) { + Path path = readStorePath(*this, conn->from); + SubstitutablePathInfo& info(infos[path]); + info.deriver = readString(conn->from); + if (!info.deriver.empty()) { + assertStorePath(info.deriver); + } + info.references = readStorePaths<PathSet>(*this, conn->from); + info.downloadSize = readLongLong(conn->from); + info.narSize = readLongLong(conn->from); + } + } +} + +void RemoteStore::queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept { + try { + std::shared_ptr<ValidPathInfo> info; + { + auto conn(getConnection()); + conn->to << wopQueryPathInfo << path; + try { + conn.processStderr(); + } catch (Error& e) { + // Ugly backwards compatibility hack. + if (e.msg().find("is not valid") != std::string::npos) { + throw InvalidPath(e.what()); + } + throw; + } + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 17) { + bool valid; + conn->from >> valid; + if (!valid) { + throw InvalidPath(format("path '%s' is not valid") % path); + } + } + info = std::make_shared<ValidPathInfo>(); + info->path = path; + info->deriver = readString(conn->from); + if (!info->deriver.empty()) { + assertStorePath(info->deriver); + } + info->narHash = Hash(readString(conn->from), htSHA256); + info->references = readStorePaths<PathSet>(*this, conn->from); + conn->from >> info->registrationTime >> info->narSize; + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 16) { + conn->from >> info->ultimate; + info->sigs = readStrings<StringSet>(conn->from); + conn->from >> info->ca; + } + } + callback(std::move(info)); + } catch (...) { + callback.rethrow(); + } +} + +void RemoteStore::queryReferrers(const Path& path, PathSet& referrers) { + auto conn(getConnection()); + conn->to << wopQueryReferrers << path; + conn.processStderr(); + auto referrers2 = readStorePaths<PathSet>(*this, conn->from); + referrers.insert(referrers2.begin(), referrers2.end()); +} + +PathSet RemoteStore::queryValidDerivers(const Path& path) { + auto conn(getConnection()); + conn->to << wopQueryValidDerivers << path; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::queryDerivationOutputs(const Path& path) { + auto conn(getConnection()); + conn->to << wopQueryDerivationOutputs << path; + conn.processStderr(); + return readStorePaths<PathSet>(*this, conn->from); +} + +PathSet RemoteStore::queryDerivationOutputNames(const Path& path) { + auto conn(getConnection()); + conn->to << wopQueryDerivationOutputNames << path; + conn.processStderr(); + return readStrings<PathSet>(conn->from); +} + +Path RemoteStore::queryPathFromHashPart(const string& hashPart) { + auto conn(getConnection()); + conn->to << wopQueryPathFromHashPart << hashPart; + conn.processStderr(); + Path path = readString(conn->from); + if (!path.empty()) { + assertStorePath(path); + } + return path; +} + +void RemoteStore::addToStore(const ValidPathInfo& info, Source& source, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + auto conn(getConnection()); + + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 18) { + conn->to << wopImportPaths; + + auto source2 = sinkToSource([&](Sink& sink) { + sink << 1 // == path follows + ; + copyNAR(source, sink); + sink << exportMagic << info.path << info.references << info.deriver + << 0 // == no legacy signature + << 0 // == no path follows + ; + }); + + conn.processStderr(nullptr, source2.get()); + + auto importedPaths = readStorePaths<PathSet>(*this, conn->from); + assert(importedPaths.size() <= 1); + } + + else { + conn->to << wopAddToStoreNar << info.path << info.deriver + << info.narHash.to_string(Base16, false) << info.references + << info.registrationTime << info.narSize << info.ultimate + << info.sigs << info.ca << repair << !checkSigs; + bool tunnel = GET_PROTOCOL_MINOR(conn->daemonVersion) >= 21; + if (!tunnel) { + copyNAR(source, conn->to); + } + conn.processStderr(nullptr, tunnel ? &source : nullptr); + } +} + +Path RemoteStore::addToStore(const string& name, const Path& _srcPath, + bool recursive, HashType hashAlgo, + PathFilter& filter, RepairFlag repair) { + if (repair != 0u) { + throw Error( + "repairing is not supported when building through the Nix daemon"); + } + + auto conn(getConnection()); + + Path srcPath(absPath(_srcPath)); + + conn->to << wopAddToStore << name + << ((hashAlgo == htSHA256 && recursive) + ? 0 + : 1) /* backwards compatibility hack */ + << (recursive ? 1 : 0) << printHashType(hashAlgo); + + try { + conn->to.written = 0; + conn->to.warn = true; + connections->incCapacity(); + { + Finally cleanup([&]() { connections->decCapacity(); }); + dumpPath(srcPath, conn->to, filter); + } + conn->to.warn = false; + conn.processStderr(); + } catch (SysError& e) { + /* Daemon closed while we were sending the path. Probably OOM + or I/O error. */ + if (e.errNo == EPIPE) { + try { + conn.processStderr(); + } catch (EndOfFile& e) { + } + } + throw; + } + + return readStorePath(*this, conn->from); +} + +Path RemoteStore::addTextToStore(const string& name, const string& s, + const PathSet& references, RepairFlag repair) { + if (repair != 0u) { + throw Error( + "repairing is not supported when building through the Nix daemon"); + } + + auto conn(getConnection()); + conn->to << wopAddTextToStore << name << s << references; + + conn.processStderr(); + return readStorePath(*this, conn->from); +} + +void RemoteStore::buildPaths(const PathSet& drvPaths, BuildMode buildMode) { + auto conn(getConnection()); + conn->to << wopBuildPaths; + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 13) { + conn->to << drvPaths; + if (GET_PROTOCOL_MINOR(conn->daemonVersion) >= 15) { + conn->to << buildMode; + } else + /* Old daemons did not take a 'buildMode' parameter, so we + need to validate it here on the client side. */ + if (buildMode != bmNormal) { + throw Error( + "repairing or checking is not supported when building through the " + "Nix daemon"); + } + } else { + /* For backwards compatibility with old daemons, strip output + identifiers. */ + PathSet drvPaths2; + for (auto& i : drvPaths) { + drvPaths2.insert(string(i, 0, i.find('!'))); + } + conn->to << drvPaths2; + } + conn.processStderr(); + readInt(conn->from); +} + +BuildResult RemoteStore::buildDerivation(const Path& drvPath, + const BasicDerivation& drv, + BuildMode buildMode) { + auto conn(getConnection()); + conn->to << wopBuildDerivation << drvPath << drv << buildMode; + conn.processStderr(); + BuildResult res; + unsigned int status; + conn->from >> status >> res.errorMsg; + res.status = (BuildResult::Status)status; + return res; +} + +void RemoteStore::ensurePath(const Path& path) { + auto conn(getConnection()); + conn->to << wopEnsurePath << path; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::addTempRoot(const Path& path) { + auto conn(getConnection()); + conn->to << wopAddTempRoot << path; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::addIndirectRoot(const Path& path) { + auto conn(getConnection()); + conn->to << wopAddIndirectRoot << path; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::syncWithGC() { + auto conn(getConnection()); + conn->to << wopSyncWithGC; + conn.processStderr(); + readInt(conn->from); +} + +Roots RemoteStore::findRoots(bool censor) { + auto conn(getConnection()); + conn->to << wopFindRoots; + conn.processStderr(); + auto count = readNum<size_t>(conn->from); + Roots result; + while ((count--) != 0u) { + Path link = readString(conn->from); + Path target = readStorePath(*this, conn->from); + result[target].emplace(link); + } + return result; +} + +void RemoteStore::collectGarbage(const GCOptions& options, GCResults& results) { + auto conn(getConnection()); + + conn->to << wopCollectGarbage << options.action << options.pathsToDelete + << static_cast<uint64_t>(options.ignoreLiveness) + << options.maxFreed + /* removed options */ + << 0 << 0 << 0; + + conn.processStderr(); + + results.paths = readStrings<PathSet>(conn->from); + results.bytesFreed = readLongLong(conn->from); + readLongLong(conn->from); // obsolete + + { + auto state_(Store::state.lock()); + state_->pathInfoCache.clear(); + } +} + +void RemoteStore::optimiseStore() { + auto conn(getConnection()); + conn->to << wopOptimiseStore; + conn.processStderr(); + readInt(conn->from); +} + +bool RemoteStore::verifyStore(bool checkContents, RepairFlag repair) { + auto conn(getConnection()); + conn->to << wopVerifyStore << static_cast<uint64_t>(checkContents) << repair; + conn.processStderr(); + return readInt(conn->from) != 0u; +} + +void RemoteStore::addSignatures(const Path& storePath, const StringSet& sigs) { + auto conn(getConnection()); + conn->to << wopAddSignatures << storePath << sigs; + conn.processStderr(); + readInt(conn->from); +} + +void RemoteStore::queryMissing(const PathSet& targets, PathSet& willBuild, + PathSet& willSubstitute, PathSet& unknown, + unsigned long long& downloadSize, + unsigned long long& narSize) { + { + auto conn(getConnection()); + if (GET_PROTOCOL_MINOR(conn->daemonVersion) < 19) { + // Don't hold the connection handle in the fallback case + // to prevent a deadlock. + goto fallback; + } + conn->to << wopQueryMissing << targets; + conn.processStderr(); + willBuild = readStorePaths<PathSet>(*this, conn->from); + willSubstitute = readStorePaths<PathSet>(*this, conn->from); + unknown = readStorePaths<PathSet>(*this, conn->from); + conn->from >> downloadSize >> narSize; + return; + } + +fallback: + return Store::queryMissing(targets, willBuild, willSubstitute, unknown, + downloadSize, narSize); +} + +void RemoteStore::connect() { auto conn(getConnection()); } + +unsigned int RemoteStore::getProtocol() { + auto conn(connections->get()); + return conn->daemonVersion; +} + +void RemoteStore::flushBadConnections() { connections->flushBad(); } + +RemoteStore::Connection::~Connection() { + try { + to.flush(); + } catch (...) { + ignoreException(); + } +} + +std::exception_ptr RemoteStore::Connection::processStderr(Sink* sink, + Source* source) { + to.flush(); + + while (true) { + auto msg = readNum<uint64_t>(from); + + if (msg == STDERR_WRITE) { + string s = readString(from); + if (sink == nullptr) { + throw Error("no sink"); + } + (*sink)(s); + } + + else if (msg == STDERR_READ) { + if (source == nullptr) { + throw Error("no source"); + } + auto len = readNum<size_t>(from); + auto buf = std::make_unique<unsigned char[]>(len); + writeString(buf.get(), source->read(buf.get(), len), to); + to.flush(); + } + + else if (msg == STDERR_ERROR) { + string error = readString(from); + unsigned int status = readInt(from); + return std::make_exception_ptr(Error(status, error)); + } + + else if (msg == STDERR_NEXT) { + LOG(ERROR) << chomp(readString(from)); + } + + else if (msg == STDERR_START_ACTIVITY) { + LOG(INFO) << readString(from); + } + + else if (msg == STDERR_LAST) { + break; + } + + else { + throw Error("got unknown message type %x from Nix daemon", msg); + } + } + + return nullptr; +} + +static std::string uriScheme = "unix://"; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, uriScheme.size()) != uriScheme) { + return nullptr; + } + return std::make_shared<UDSRemoteStore>( + std::string(uri, uriScheme.size()), params); + }); + +} // namespace nix diff --git a/third_party/nix/src/libstore/remote-store.hh b/third_party/nix/src/libstore/remote-store.hh new file mode 100644 index 000000000000..dc44cd32ac47 --- /dev/null +++ b/third_party/nix/src/libstore/remote-store.hh @@ -0,0 +1,153 @@ +#pragma once + +#include <limits> +#include <string> + +#include "store-api.hh" + +namespace nix { + +class Pipe; +class Pid; +struct FdSink; +struct FdSource; +template <typename T> +class Pool; +struct ConnectionHandle; + +/* FIXME: RemoteStore is a misnomer - should be something like + DaemonStore. */ +class RemoteStore : public virtual Store { + public: + const Setting<int> maxConnections{ + (Store*)this, 1, "max-connections", + "maximum number of concurrent connections to the Nix daemon"}; + + const Setting<unsigned int> maxConnectionAge{ + (Store*)this, std::numeric_limits<unsigned int>::max(), + "max-connection-age", "number of seconds to reuse a connection"}; + + virtual bool sameMachine() = 0; + + RemoteStore(const Params& params); + + /* Implementations of abstract store API methods. */ + + bool isValidPathUncached(const Path& path) override; + + PathSet queryValidPaths(const PathSet& paths, SubstituteFlag maybeSubstitute = + NoSubstitute) override; + + PathSet queryAllValidPaths() override; + + void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept override; + + void queryReferrers(const Path& path, PathSet& referrers) override; + + PathSet queryValidDerivers(const Path& path) override; + + PathSet queryDerivationOutputs(const Path& path) override; + + StringSet queryDerivationOutputNames(const Path& path) override; + + Path queryPathFromHashPart(const string& hashPart) override; + + PathSet querySubstitutablePaths(const PathSet& paths) override; + + void querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) override; + + void addToStore(const ValidPathInfo& info, Source& source, RepairFlag repair, + CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) override; + + Path addToStore(const string& name, const Path& srcPath, + bool recursive = true, HashType hashAlgo = htSHA256, + PathFilter& filter = defaultPathFilter, + RepairFlag repair = NoRepair) override; + + Path addTextToStore(const string& name, const string& s, + const PathSet& references, RepairFlag repair) override; + + void buildPaths(const PathSet& paths, BuildMode buildMode) override; + + BuildResult buildDerivation(const Path& drvPath, const BasicDerivation& drv, + BuildMode buildMode) override; + + void ensurePath(const Path& path) override; + + void addTempRoot(const Path& path) override; + + void addIndirectRoot(const Path& path) override; + + void syncWithGC() override; + + Roots findRoots(bool censor) override; + + void collectGarbage(const GCOptions& options, GCResults& results) override; + + void optimiseStore() override; + + bool verifyStore(bool checkContents, RepairFlag repair) override; + + void addSignatures(const Path& storePath, const StringSet& sigs) override; + + void queryMissing(const PathSet& targets, PathSet& willBuild, + PathSet& willSubstitute, PathSet& unknown, + unsigned long long& downloadSize, + unsigned long long& narSize) override; + + void connect() override; + + unsigned int getProtocol() override; + + void flushBadConnections(); + + protected: + struct Connection { + AutoCloseFD fd; + FdSink to; + FdSource from; + unsigned int daemonVersion; + std::chrono::time_point<std::chrono::steady_clock> startTime; + + virtual ~Connection(); + + std::exception_ptr processStderr(Sink* sink = 0, Source* source = 0); + }; + + ref<Connection> openConnectionWrapper(); + + virtual ref<Connection> openConnection() = 0; + + void initConnection(Connection& conn); + + ref<Pool<Connection>> connections; + + virtual void setOptions(Connection& conn); + + ConnectionHandle getConnection(); + + friend struct ConnectionHandle; + + private: + std::atomic_bool failed{false}; +}; + +class UDSRemoteStore : public LocalFSStore, public RemoteStore { + public: + UDSRemoteStore(const Params& params); + UDSRemoteStore(std::string path, const Params& params); + + std::string getUri() override; + + bool sameMachine() { return true; } + + private: + ref<RemoteStore::Connection> openConnection() override; + std::optional<std::string> path; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/s3-binary-cache-store.cc b/third_party/nix/src/libstore/s3-binary-cache-store.cc new file mode 100644 index 000000000000..483ddc19a3aa --- /dev/null +++ b/third_party/nix/src/libstore/s3-binary-cache-store.cc @@ -0,0 +1,428 @@ +#if ENABLE_S3 + +#include "s3-binary-cache-store.hh" + +#include <aws/core/Aws.h> +#include <aws/core/VersionConfig.h> +#include <aws/core/auth/AWSCredentialsProvider.h> +#include <aws/core/auth/AWSCredentialsProviderChain.h> +#include <aws/core/client/ClientConfiguration.h> +#include <aws/core/client/DefaultRetryStrategy.h> +#include <aws/core/utils/logging/FormattedLogSystem.h> +#include <aws/core/utils/logging/LogMacros.h> +#include <aws/core/utils/threading/Executor.h> +#include <aws/s3/S3Client.h> +#include <aws/s3/model/GetObjectRequest.h> +#include <aws/s3/model/HeadObjectRequest.h> +#include <aws/s3/model/ListObjectsRequest.h> +#include <aws/s3/model/PutObjectRequest.h> +#include <aws/transfer/TransferManager.h> + +#include "compression.hh" +#include "download.hh" +#include "globals.hh" +#include "istringstream_nocopy.hh" +#include "nar-info-disk-cache.hh" +#include "nar-info.hh" +#include "s3.hh" + +using namespace Aws::Transfer; + +namespace nix { + +struct S3Error : public Error { + Aws::S3::S3Errors err; + S3Error(Aws::S3::S3Errors err, const FormatOrString& fs) + : Error(fs), err(err){}; +}; + +/* Helper: given an Outcome<R, E>, return R in case of success, or + throw an exception in case of an error. */ +template <typename R, typename E> +R&& checkAws(const FormatOrString& fs, Aws::Utils::Outcome<R, E>&& outcome) { + if (!outcome.IsSuccess()) + throw S3Error(outcome.GetError().GetErrorType(), + fs.s + ": " + outcome.GetError().GetMessage()); + return outcome.GetResultWithOwnership(); +} + +class AwsLogger : public Aws::Utils::Logging::FormattedLogSystem { + using Aws::Utils::Logging::FormattedLogSystem::FormattedLogSystem; + + void ProcessFormattedStatement(Aws::String&& statement) override { + debug("AWS: %s", chomp(statement)); + } +}; + +static void initAWS() { + static std::once_flag flag; + std::call_once(flag, []() { + Aws::SDKOptions options; + + /* We install our own OpenSSL locking function (see + shared.cc), so don't let aws-sdk-cpp override it. */ + options.cryptoOptions.initAndCleanupOpenSSL = false; + + if (verbosity >= lvlDebug) { + options.loggingOptions.logLevel = + verbosity == lvlDebug ? Aws::Utils::Logging::LogLevel::Debug + : Aws::Utils::Logging::LogLevel::Trace; + options.loggingOptions.logger_create_fn = [options]() { + return std::make_shared<AwsLogger>(options.loggingOptions.logLevel); + }; + } + + Aws::InitAPI(options); + }); +} + +S3Helper::S3Helper(const string& profile, const string& region, + const string& scheme, const string& endpoint) + : config(makeConfig(region, scheme, endpoint)), + client(make_ref<Aws::S3::S3Client>( + profile == "" + ? std::dynamic_pointer_cast<Aws::Auth::AWSCredentialsProvider>( + std::make_shared< + Aws::Auth::DefaultAWSCredentialsProviderChain>()) + : std::dynamic_pointer_cast<Aws::Auth::AWSCredentialsProvider>( + std::make_shared< + Aws::Auth::ProfileConfigFileAWSCredentialsProvider>( + profile.c_str())), + *config, +// FIXME: https://github.com/aws/aws-sdk-cpp/issues/759 +#if AWS_VERSION_MAJOR == 1 && AWS_VERSION_MINOR < 3 + false, +#else + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, +#endif + endpoint.empty())) { +} + +/* Log AWS retries. */ +class RetryStrategy : public Aws::Client::DefaultRetryStrategy { + bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error, + long attemptedRetries) const override { + auto retry = + Aws::Client::DefaultRetryStrategy::ShouldRetry(error, attemptedRetries); + if (retry) + printError("AWS error '%s' (%s), will retry in %d ms", + error.GetExceptionName(), error.GetMessage(), + CalculateDelayBeforeNextRetry(error, attemptedRetries)); + return retry; + } +}; + +ref<Aws::Client::ClientConfiguration> S3Helper::makeConfig( + const string& region, const string& scheme, const string& endpoint) { + initAWS(); + auto res = make_ref<Aws::Client::ClientConfiguration>(); + res->region = region; + if (!scheme.empty()) { + res->scheme = Aws::Http::SchemeMapper::FromString(scheme.c_str()); + } + if (!endpoint.empty()) { + res->endpointOverride = endpoint; + } + res->requestTimeoutMs = 600 * 1000; + res->connectTimeoutMs = 5 * 1000; + res->retryStrategy = std::make_shared<RetryStrategy>(); + res->caFile = settings.caFile; + return res; +} + +S3Helper::DownloadResult S3Helper::getObject(const std::string& bucketName, + const std::string& key) { + debug("fetching 's3://%s/%s'...", bucketName, key); + + auto request = + Aws::S3::Model::GetObjectRequest().WithBucket(bucketName).WithKey(key); + + request.SetResponseStreamFactory( + [&]() { return Aws::New<std::stringstream>("STRINGSTREAM"); }); + + DownloadResult res; + + auto now1 = std::chrono::steady_clock::now(); + + try { + auto result = checkAws(fmt("AWS error fetching '%s'", key), + client->GetObject(request)); + + res.data = + decompress(result.GetContentEncoding(), + dynamic_cast<std::stringstream&>(result.GetBody()).str()); + + } catch (S3Error& e) { + if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) { + throw; + } + } + + auto now2 = std::chrono::steady_clock::now(); + + res.durationMs = + std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1) + .count(); + + return res; +} + +struct S3BinaryCacheStoreImpl : public S3BinaryCacheStore { + const Setting<std::string> profile{ + this, "", "profile", "The name of the AWS configuration profile to use."}; + const Setting<std::string> region{ + this, Aws::Region::US_EAST_1, "region", {"aws-region"}}; + const Setting<std::string> scheme{ + this, "", "scheme", + "The scheme to use for S3 requests, https by default."}; + const Setting<std::string> endpoint{ + this, "", "endpoint", + "An optional override of the endpoint to use when talking to S3."}; + const Setting<std::string> narinfoCompression{ + this, "", "narinfo-compression", "compression method for .narinfo files"}; + const Setting<std::string> lsCompression{this, "", "ls-compression", + "compression method for .ls files"}; + const Setting<std::string> logCompression{ + this, "", "log-compression", "compression method for log/* files"}; + const Setting<bool> multipartUpload{this, false, "multipart-upload", + "whether to use multi-part uploads"}; + const Setting<uint64_t> bufferSize{ + this, 5 * 1024 * 1024, "buffer-size", + "size (in bytes) of each part in multi-part uploads"}; + + std::string bucketName; + + Stats stats; + + S3Helper s3Helper; + + S3BinaryCacheStoreImpl(const Params& params, const std::string& bucketName) + : S3BinaryCacheStore(params), + bucketName(bucketName), + s3Helper(profile, region, scheme, endpoint) { + diskCache = getNarInfoDiskCache(); + } + + std::string getUri() override { return "s3://" + bucketName; } + + void init() override { + if (!diskCache->cacheExists(getUri(), wantMassQuery_, priority)) { + BinaryCacheStore::init(); + + diskCache->createCache(getUri(), storeDir, wantMassQuery_, priority); + } + } + + const Stats& getS3Stats() override { return stats; } + + /* This is a specialisation of isValidPath() that optimistically + fetches the .narinfo file, rather than first checking for its + existence via a HEAD request. Since .narinfos are small, doing + a GET is unlikely to be slower than HEAD. */ + bool isValidPathUncached(const Path& storePath) override { + try { + queryPathInfo(storePath); + return true; + } catch (InvalidPath& e) { + return false; + } + } + + bool fileExists(const std::string& path) override { + stats.head++; + + auto res = s3Helper.client->HeadObject(Aws::S3::Model::HeadObjectRequest() + .WithBucket(bucketName) + .WithKey(path)); + + if (!res.IsSuccess()) { + auto& error = res.GetError(); + if (error.GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || + error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY + // If bucket listing is disabled, 404s turn into 403s + || error.GetErrorType() == Aws::S3::S3Errors::ACCESS_DENIED) + return false; + throw Error(format("AWS error fetching '%s': %s") % path % + error.GetMessage()); + } + + return true; + } + + std::shared_ptr<TransferManager> transferManager; + std::once_flag transferManagerCreated; + + void uploadFile(const std::string& path, const std::string& data, + const std::string& mimeType, + const std::string& contentEncoding) { + auto stream = std::make_shared<istringstream_nocopy>(data); + + auto maxThreads = std::thread::hardware_concurrency(); + + static std::shared_ptr<Aws::Utils::Threading::PooledThreadExecutor> + executor = + std::make_shared<Aws::Utils::Threading::PooledThreadExecutor>( + maxThreads); + + std::call_once(transferManagerCreated, [&]() { + if (multipartUpload) { + TransferManagerConfiguration transferConfig(executor.get()); + + transferConfig.s3Client = s3Helper.client; + transferConfig.bufferSize = bufferSize; + + transferConfig.uploadProgressCallback = + [](const TransferManager* transferManager, + const std::shared_ptr<const TransferHandle>& transferHandle) { + // FIXME: find a way to properly abort the multipart upload. + // checkInterrupt(); + debug("upload progress ('%s'): '%d' of '%d' bytes", + transferHandle->GetKey(), + transferHandle->GetBytesTransferred(), + transferHandle->GetBytesTotalSize()); + }; + + transferManager = TransferManager::Create(transferConfig); + } + }); + + auto now1 = std::chrono::steady_clock::now(); + + if (transferManager) { + if (contentEncoding != "") + throw Error( + "setting a content encoding is not supported with S3 multi-part " + "uploads"); + + std::shared_ptr<TransferHandle> transferHandle = + transferManager->UploadFile(stream, bucketName, path, mimeType, + Aws::Map<Aws::String, Aws::String>(), + nullptr /*, contentEncoding */); + + transferHandle->WaitUntilFinished(); + + if (transferHandle->GetStatus() == TransferStatus::FAILED) + throw Error("AWS error: failed to upload 's3://%s/%s': %s", bucketName, + path, transferHandle->GetLastError().GetMessage()); + + if (transferHandle->GetStatus() != TransferStatus::COMPLETED) + throw Error( + "AWS error: transfer status of 's3://%s/%s' in unexpected state", + bucketName, path); + + } else { + auto request = Aws::S3::Model::PutObjectRequest() + .WithBucket(bucketName) + .WithKey(path); + + request.SetContentType(mimeType); + + if (contentEncoding != "") { + request.SetContentEncoding(contentEncoding); + } + + auto stream = std::make_shared<istringstream_nocopy>(data); + + request.SetBody(stream); + + auto result = checkAws(fmt("AWS error uploading '%s'", path), + s3Helper.client->PutObject(request)); + } + + auto now2 = std::chrono::steady_clock::now(); + + auto duration = + std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1) + .count(); + + printInfo(format("uploaded 's3://%1%/%2%' (%3% bytes) in %4% ms") % + bucketName % path % data.size() % duration); + + stats.putTimeMs += duration; + stats.putBytes += data.size(); + stats.put++; + } + + void upsertFile(const std::string& path, const std::string& data, + const std::string& mimeType) override { + if (narinfoCompression != "" && hasSuffix(path, ".narinfo")) + uploadFile(path, *compress(narinfoCompression, data), mimeType, + narinfoCompression); + else if (lsCompression != "" && hasSuffix(path, ".ls")) + uploadFile(path, *compress(lsCompression, data), mimeType, lsCompression); + else if (logCompression != "" && hasPrefix(path, "log/")) + uploadFile(path, *compress(logCompression, data), mimeType, + logCompression); + else + uploadFile(path, data, mimeType, ""); + } + + void getFile(const std::string& path, Sink& sink) override { + stats.get++; + + // FIXME: stream output to sink. + auto res = s3Helper.getObject(bucketName, path); + + stats.getBytes += res.data ? res.data->size() : 0; + stats.getTimeMs += res.durationMs; + + if (res.data) { + printTalkative("downloaded 's3://%s/%s' (%d bytes) in %d ms", bucketName, + path, res.data->size(), res.durationMs); + + sink((unsigned char*)res.data->data(), res.data->size()); + } else + throw NoSuchBinaryCacheFile( + "file '%s' does not exist in binary cache '%s'", path, getUri()); + } + + PathSet queryAllValidPaths() override { + PathSet paths; + std::string marker; + + do { + debug(format("listing bucket 's3://%s' from key '%s'...") % bucketName % + marker); + + auto res = checkAws( + format("AWS error listing bucket '%s'") % bucketName, + s3Helper.client->ListObjects(Aws::S3::Model::ListObjectsRequest() + .WithBucket(bucketName) + .WithDelimiter("/") + .WithMarker(marker))); + + auto& contents = res.GetContents(); + + debug(format("got %d keys, next marker '%s'") % contents.size() % + res.GetNextMarker()); + + for (auto object : contents) { + auto& key = object.GetKey(); + if (key.size() != 40 || !hasSuffix(key, ".narinfo")) { + continue; + } + paths.insert(storeDir + "/" + key.substr(0, key.size() - 8)); + } + + marker = res.GetNextMarker(); + } while (!marker.empty()); + + return paths; + } +}; + +static RegisterStoreImplementation regStore( + [](const std::string& uri, + const Store::Params& params) -> std::shared_ptr<Store> { + if (std::string(uri, 0, 5) != "s3://") { + return 0; + } + auto store = + std::make_shared<S3BinaryCacheStoreImpl>(params, std::string(uri, 5)); + store->init(); + return store; + }); + +} // namespace nix + +#endif diff --git a/third_party/nix/src/libstore/s3-binary-cache-store.hh b/third_party/nix/src/libstore/s3-binary-cache-store.hh new file mode 100644 index 000000000000..24cb67721a92 --- /dev/null +++ b/third_party/nix/src/libstore/s3-binary-cache-store.hh @@ -0,0 +1,27 @@ +#pragma once + +#include <atomic> + +#include "binary-cache-store.hh" + +namespace nix { + +class S3BinaryCacheStore : public BinaryCacheStore { + protected: + S3BinaryCacheStore(const Params& params) : BinaryCacheStore(params) {} + + public: + struct Stats { + std::atomic<uint64_t> put{0}; + std::atomic<uint64_t> putBytes{0}; + std::atomic<uint64_t> putTimeMs{0}; + std::atomic<uint64_t> get{0}; + std::atomic<uint64_t> getBytes{0}; + std::atomic<uint64_t> getTimeMs{0}; + std::atomic<uint64_t> head{0}; + }; + + virtual const Stats& getS3Stats() = 0; +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/s3.hh b/third_party/nix/src/libstore/s3.hh new file mode 100644 index 000000000000..09935fabed47 --- /dev/null +++ b/third_party/nix/src/libstore/s3.hh @@ -0,0 +1,42 @@ +#pragma once + +#if ENABLE_S3 + +#include "ref.hh" + +namespace Aws { +namespace Client { +class ClientConfiguration; +} +} // namespace Aws +namespace Aws { +namespace S3 { +class S3Client; +} +} // namespace Aws + +namespace nix { + +struct S3Helper { + ref<Aws::Client::ClientConfiguration> config; + ref<Aws::S3::S3Client> client; + + S3Helper(const std::string& profile, const std::string& region, + const std::string& scheme, const std::string& endpoint); + + ref<Aws::Client::ClientConfiguration> makeConfig(const std::string& region, + const std::string& scheme, + const std::string& endpoint); + + struct DownloadResult { + std::shared_ptr<std::string> data; + unsigned int durationMs; + }; + + DownloadResult getObject(const std::string& bucketName, + const std::string& key); +}; + +} // namespace nix + +#endif diff --git a/third_party/nix/src/libstore/sandbox-defaults.sb b/third_party/nix/src/libstore/sandbox-defaults.sb new file mode 100644 index 000000000000..0299d1ee45d2 --- /dev/null +++ b/third_party/nix/src/libstore/sandbox-defaults.sb @@ -0,0 +1,87 @@ +(define TMPDIR (param "_GLOBAL_TMP_DIR")) + +(deny default) + +; Disallow creating setuid/setgid binaries, since that +; would allow breaking build user isolation. +(deny file-write-setugid) + +; Allow forking. +(allow process-fork) + +; Allow reading system information like #CPUs, etc. +(allow sysctl-read) + +; Allow POSIX semaphores and shared memory. +(allow ipc-posix*) + +; Allow socket creation. +(allow system-socket) + +; Allow sending signals within the sandbox. +(allow signal (target same-sandbox)) + +; Allow getpwuid. +(allow mach-lookup (global-name "com.apple.system.opendirectoryd.libinfo")) + +; Access to /tmp. +; The network-outbound/network-inbound ones are for unix domain sockets, which +; we allow access to in TMPDIR (but if we allow them more broadly, you could in +; theory escape the sandbox) +(allow file* process-exec network-outbound network-inbound + (literal "/tmp") (subpath TMPDIR)) + +; Some packages like to read the system version. +(allow file-read* (literal "/System/Library/CoreServices/SystemVersion.plist")) + +; Without this line clang cannot write to /dev/null, breaking some configure tests. +(allow file-read-metadata (literal "/dev")) + +; Many packages like to do local networking in their test suites, but let's only +; allow it if the package explicitly asks for it. +(if (param "_ALLOW_LOCAL_NETWORKING") + (begin + (allow network* (local ip) (local tcp) (local udp)) + + ; Allow access to /etc/resolv.conf (which is a symlink to + ; /private/var/run/resolv.conf). + ; TODO: deduplicate with sandbox-network.sb + (allow file-read-metadata + (literal "/var") + (literal "/etc") + (literal "/etc/resolv.conf") + (literal "/private/etc/resolv.conf")) + + (allow file-read* + (literal "/private/var/run/resolv.conf")) + + ; Allow DNS lookups. This is even needed for localhost, which lots of tests rely on + (allow file-read-metadata (literal "/etc/hosts")) + (allow file-read* (literal "/private/etc/hosts")) + (allow network-outbound (remote unix-socket (path-literal "/private/var/run/mDNSResponder"))))) + +; Standard devices. +(allow file* + (literal "/dev/null") + (literal "/dev/random") + (literal "/dev/stdin") + (literal "/dev/stdout") + (literal "/dev/tty") + (literal "/dev/urandom") + (literal "/dev/zero") + (subpath "/dev/fd")) + +; Does nothing, but reduces build noise. +(allow file* (literal "/dev/dtracehelper")) + +; Allow access to zoneinfo since libSystem needs it. +(allow file-read* (subpath "/usr/share/zoneinfo")) + +(allow file-read* (subpath "/usr/share/locale")) + +; This is mostly to get more specific log messages when builds try to +; access something in /etc or /var. +(allow file-read-metadata + (literal "/etc") + (literal "/var") + (literal "/private/var/tmp")) diff --git a/third_party/nix/src/libstore/sandbox-minimal.sb b/third_party/nix/src/libstore/sandbox-minimal.sb new file mode 100644 index 000000000000..65f5108b3990 --- /dev/null +++ b/third_party/nix/src/libstore/sandbox-minimal.sb @@ -0,0 +1,5 @@ +(allow default) + +; Disallow creating setuid/setgid binaries, since that +; would allow breaking build user isolation. +(deny file-write-setugid) diff --git a/third_party/nix/src/libstore/sandbox-network.sb b/third_party/nix/src/libstore/sandbox-network.sb new file mode 100644 index 000000000000..56beec761fa8 --- /dev/null +++ b/third_party/nix/src/libstore/sandbox-network.sb @@ -0,0 +1,16 @@ +; Allow local and remote network traffic. +(allow network* (local ip) (remote ip)) + +; Allow access to /etc/resolv.conf (which is a symlink to +; /private/var/run/resolv.conf). +(allow file-read-metadata + (literal "/var") + (literal "/etc") + (literal "/etc/resolv.conf") + (literal "/private/etc/resolv.conf")) + +(allow file-read* + (literal "/private/var/run/resolv.conf")) + +; Allow DNS lookups. +(allow network-outbound (remote unix-socket (path-literal "/private/var/run/mDNSResponder"))) diff --git a/third_party/nix/src/libstore/schema.sql b/third_party/nix/src/libstore/schema.sql new file mode 100644 index 000000000000..09c71a2b8dd7 --- /dev/null +++ b/third_party/nix/src/libstore/schema.sql @@ -0,0 +1,42 @@ +create table if not exists ValidPaths ( + id integer primary key autoincrement not null, + path text unique not null, + hash text not null, + registrationTime integer not null, + deriver text, + narSize integer, + ultimate integer, -- null implies "false" + sigs text, -- space-separated + ca text -- if not null, an assertion that the path is content-addressed; see ValidPathInfo +); + +create table if not exists Refs ( + referrer integer not null, + reference integer not null, + primary key (referrer, reference), + foreign key (referrer) references ValidPaths(id) on delete cascade, + foreign key (reference) references ValidPaths(id) on delete restrict +); + +create index if not exists IndexReferrer on Refs(referrer); +create index if not exists IndexReference on Refs(reference); + +-- Paths can refer to themselves, causing a tuple (N, N) in the Refs +-- table. This causes a deletion of the corresponding row in +-- ValidPaths to cause a foreign key constraint violation (due to `on +-- delete restrict' on the `reference' column). Therefore, explicitly +-- get rid of self-references. +create trigger if not exists DeleteSelfRefs before delete on ValidPaths + begin + delete from Refs where referrer = old.id and reference = old.id; + end; + +create table if not exists DerivationOutputs ( + drv integer not null, + id text not null, -- symbolic output id, usually "out" + path text not null, + primary key (drv, id), + foreign key (drv) references ValidPaths(id) on delete cascade +); + +create index if not exists IndexDerivationOutputs on DerivationOutputs(path); diff --git a/third_party/nix/src/libstore/serve-protocol.hh b/third_party/nix/src/libstore/serve-protocol.hh new file mode 100644 index 000000000000..a07a7ef97425 --- /dev/null +++ b/third_party/nix/src/libstore/serve-protocol.hh @@ -0,0 +1,24 @@ +#pragma once + +namespace nix { + +#define SERVE_MAGIC_1 0x390c9deb +#define SERVE_MAGIC_2 0x5452eecb + +#define SERVE_PROTOCOL_VERSION 0x205 +#define GET_PROTOCOL_MAJOR(x) ((x)&0xff00) +#define GET_PROTOCOL_MINOR(x) ((x)&0x00ff) + +typedef enum { + cmdQueryValidPaths = 1, + cmdQueryPathInfos = 2, + cmdDumpStorePath = 3, + cmdImportPaths = 4, + cmdExportPaths = 5, + cmdBuildPaths = 6, + cmdQueryClosure = 7, + cmdBuildDerivation = 8, + cmdAddToStoreNar = 9, +} ServeCommand; + +} // namespace nix diff --git a/third_party/nix/src/libstore/sqlite.cc b/third_party/nix/src/libstore/sqlite.cc new file mode 100644 index 000000000000..2dea952d0275 --- /dev/null +++ b/third_party/nix/src/libstore/sqlite.cc @@ -0,0 +1,195 @@ +#include "sqlite.hh" + +#include <atomic> + +#include <glog/logging.h> +#include <sqlite3.h> + +#include "util.hh" + +namespace nix { + +[[noreturn]] void throwSQLiteError(sqlite3* db, const FormatOrString& fs) { + int err = sqlite3_errcode(db); + int exterr = sqlite3_extended_errcode(db); + + auto path = sqlite3_db_filename(db, nullptr); + if (path == nullptr) { + path = "(in-memory)"; + } + + if (err == SQLITE_BUSY || err == SQLITE_PROTOCOL) { + throw SQLiteBusy( + err == SQLITE_PROTOCOL + ? fmt("SQLite database '%s' is busy (SQLITE_PROTOCOL)", path) + : fmt("SQLite database '%s' is busy", path)); + } + throw SQLiteError("%s: %s (in '%s')", fs.s, sqlite3_errstr(exterr), path); +} + +SQLite::SQLite(const Path& path) { + if (sqlite3_open_v2(path.c_str(), &db, + SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, + nullptr) != SQLITE_OK) { + throw Error(format("cannot open SQLite database '%s'") % path); + } +} + +SQLite::~SQLite() { + try { + if ((db != nullptr) && sqlite3_close(db) != SQLITE_OK) { + throwSQLiteError(db, "closing database"); + } + } catch (...) { + ignoreException(); + } +} + +void SQLite::exec(const std::string& stmt) { + retrySQLite<void>([&]() { + if (sqlite3_exec(db, stmt.c_str(), nullptr, nullptr, nullptr) != + SQLITE_OK) { + throwSQLiteError(db, format("executing SQLite statement '%s'") % stmt); + } + }); +} + +void SQLiteStmt::create(sqlite3* db, const string& sql) { + checkInterrupt(); + assert(!stmt); + if (sqlite3_prepare_v2(db, sql.c_str(), -1, &stmt, nullptr) != SQLITE_OK) { + throwSQLiteError(db, fmt("creating statement '%s'", sql)); + } + this->db = db; + this->sql = sql; +} + +SQLiteStmt::~SQLiteStmt() { + try { + if ((stmt != nullptr) && sqlite3_finalize(stmt) != SQLITE_OK) { + throwSQLiteError(db, fmt("finalizing statement '%s'", sql)); + } + } catch (...) { + ignoreException(); + } +} + +SQLiteStmt::Use::Use(SQLiteStmt& stmt) : stmt(stmt) { + assert(stmt.stmt); + /* Note: sqlite3_reset() returns the error code for the most + recent call to sqlite3_step(). So ignore it. */ + sqlite3_reset(stmt); +} + +SQLiteStmt::Use::~Use() { sqlite3_reset(stmt); } + +SQLiteStmt::Use& SQLiteStmt::Use::operator()(const std::string& value, + bool notNull) { + if (notNull) { + if (sqlite3_bind_text(stmt, curArg++, value.c_str(), -1, + SQLITE_TRANSIENT) != SQLITE_OK) { + throwSQLiteError(stmt.db, "binding argument"); + } + } else { + bind(); + } + return *this; +} + +SQLiteStmt::Use& SQLiteStmt::Use::operator()(int64_t value, bool notNull) { + if (notNull) { + if (sqlite3_bind_int64(stmt, curArg++, value) != SQLITE_OK) { + throwSQLiteError(stmt.db, "binding argument"); + } + } else { + bind(); + } + return *this; +} + +SQLiteStmt::Use& SQLiteStmt::Use::bind() { + if (sqlite3_bind_null(stmt, curArg++) != SQLITE_OK) { + throwSQLiteError(stmt.db, "binding argument"); + } + return *this; +} + +int SQLiteStmt::Use::step() { return sqlite3_step(stmt); } + +void SQLiteStmt::Use::exec() { + int r = step(); + assert(r != SQLITE_ROW); + if (r != SQLITE_DONE) { + throwSQLiteError(stmt.db, fmt("executing SQLite statement '%s'", stmt.sql)); + } +} + +bool SQLiteStmt::Use::next() { + int r = step(); + if (r != SQLITE_DONE && r != SQLITE_ROW) { + throwSQLiteError(stmt.db, fmt("executing SQLite query '%s'", stmt.sql)); + } + return r == SQLITE_ROW; +} + +std::string SQLiteStmt::Use::getStr(int col) { + auto s = (const char*)sqlite3_column_text(stmt, col); + assert(s); + return s; +} + +int64_t SQLiteStmt::Use::getInt(int col) { + // FIXME: detect nulls? + return sqlite3_column_int64(stmt, col); +} + +bool SQLiteStmt::Use::isNull(int col) { + return sqlite3_column_type(stmt, col) == SQLITE_NULL; +} + +SQLiteTxn::SQLiteTxn(sqlite3* db) { + this->db = db; + if (sqlite3_exec(db, "begin;", nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "starting transaction"); + } + active = true; +} + +void SQLiteTxn::commit() { + if (sqlite3_exec(db, "commit;", nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "committing transaction"); + } + active = false; +} + +SQLiteTxn::~SQLiteTxn() { + try { + if (active && + sqlite3_exec(db, "rollback;", nullptr, nullptr, nullptr) != SQLITE_OK) { + throwSQLiteError(db, "aborting transaction"); + } + } catch (...) { + ignoreException(); + } +} + +void handleSQLiteBusy(const SQLiteBusy& e) { + static std::atomic<time_t> lastWarned{0}; + + time_t now = time(nullptr); + + if (now > lastWarned + 10) { + lastWarned = now; + LOG(ERROR) << e.what(); + } + + /* Sleep for a while since retrying the transaction right away + is likely to fail again. */ + checkInterrupt(); + struct timespec t; + t.tv_sec = 0; + t.tv_nsec = (random() % 100) * 1000 * 1000; /* <= 0.1s */ + nanosleep(&t, nullptr); +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/sqlite.hh b/third_party/nix/src/libstore/sqlite.hh new file mode 100644 index 000000000000..298bb574ea2d --- /dev/null +++ b/third_party/nix/src/libstore/sqlite.hh @@ -0,0 +1,109 @@ +#pragma once + +#include <functional> +#include <string> + +#include "types.hh" + +class sqlite3; +class sqlite3_stmt; + +namespace nix { + +/* RAII wrapper to close a SQLite database automatically. */ +struct SQLite { + sqlite3* db = 0; + SQLite() {} + SQLite(const Path& path); + SQLite(const SQLite& from) = delete; + SQLite& operator=(const SQLite& from) = delete; + SQLite& operator=(SQLite&& from) { + db = from.db; + from.db = 0; + return *this; + } + ~SQLite(); + operator sqlite3*() { return db; } + + void exec(const std::string& stmt); +}; + +/* RAII wrapper to create and destroy SQLite prepared statements. */ +struct SQLiteStmt { + sqlite3* db = 0; + sqlite3_stmt* stmt = 0; + std::string sql; + SQLiteStmt() {} + SQLiteStmt(sqlite3* db, const std::string& sql) { create(db, sql); } + void create(sqlite3* db, const std::string& s); + ~SQLiteStmt(); + operator sqlite3_stmt*() { return stmt; } + + /* Helper for binding / executing statements. */ + class Use { + friend struct SQLiteStmt; + + private: + SQLiteStmt& stmt; + unsigned int curArg = 1; + Use(SQLiteStmt& stmt); + + public: + ~Use(); + + /* Bind the next parameter. */ + Use& operator()(const std::string& value, bool notNull = true); + Use& operator()(int64_t value, bool notNull = true); + Use& bind(); // null + + int step(); + + /* Execute a statement that does not return rows. */ + void exec(); + + /* For statements that return 0 or more rows. Returns true iff + a row is available. */ + bool next(); + + std::string getStr(int col); + int64_t getInt(int col); + bool isNull(int col); + }; + + Use use() { return Use(*this); } +}; + +/* RAII helper that ensures transactions are aborted unless explicitly + committed. */ +struct SQLiteTxn { + bool active = false; + sqlite3* db; + + SQLiteTxn(sqlite3* db); + + void commit(); + + ~SQLiteTxn(); +}; + +MakeError(SQLiteError, Error); +MakeError(SQLiteBusy, SQLiteError); + +[[noreturn]] void throwSQLiteError(sqlite3* db, const FormatOrString& fs); + +void handleSQLiteBusy(const SQLiteBusy& e); + +/* Convenience function for retrying a SQLite transaction when the + database is busy. */ +template <typename T> +T retrySQLite(std::function<T()> fun) { + while (true) { + try { + return fun(); + } catch (SQLiteBusy& e) { + handleSQLiteBusy(e); + } + } +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/ssh-store.cc b/third_party/nix/src/libstore/ssh-store.cc new file mode 100644 index 000000000000..9c49babcf2b2 --- /dev/null +++ b/third_party/nix/src/libstore/ssh-store.cc @@ -0,0 +1,86 @@ +#include "archive.hh" +#include "pool.hh" +#include "remote-fs-accessor.hh" +#include "remote-store.hh" +#include "ssh.hh" +#include "store-api.hh" +#include "worker-protocol.hh" + +namespace nix { + +static std::string uriScheme = "ssh-ng://"; + +class SSHStore : public RemoteStore { + public: + const Setting<Path> sshKey{(Store*)this, "", "ssh-key", + "path to an SSH private key"}; + const Setting<bool> compress{(Store*)this, false, "compress", + "whether to compress the connection"}; + + SSHStore(const std::string& host, const Params& params) + : Store(params), + RemoteStore(params), + host(host), + master(host, sshKey, + // Use SSH master only if using more than 1 connection. + connections->capacity() > 1, compress) {} + + std::string getUri() override { return uriScheme + host; } + + bool sameMachine() override { return false; } + + void narFromPath(const Path& path, Sink& sink) override; + + ref<FSAccessor> getFSAccessor() override; + + private: + struct Connection : RemoteStore::Connection { + std::unique_ptr<SSHMaster::Connection> sshConn; + }; + + ref<RemoteStore::Connection> openConnection() override; + + std::string host; + + SSHMaster master; + + void setOptions(RemoteStore::Connection& conn) override{ + /* TODO Add a way to explicitly ask for some options to be + forwarded. One option: A way to query the daemon for its + settings, and then a series of params to SSHStore like + forward-cores or forward-overridden-cores that only + override the requested settings. + */ + }; +}; + +void SSHStore::narFromPath(const Path& path, Sink& sink) { + auto conn(connections->get()); + conn->to << wopNarFromPath << path; + conn->processStderr(); + copyNAR(conn->from, sink); +} + +ref<FSAccessor> SSHStore::getFSAccessor() { + return make_ref<RemoteFSAccessor>(ref<Store>(shared_from_this())); +} + +ref<RemoteStore::Connection> SSHStore::openConnection() { + auto conn = make_ref<Connection>(); + conn->sshConn = master.startCommand("nix-daemon --stdio"); + conn->to = FdSink(conn->sshConn->in.get()); + conn->from = FdSource(conn->sshConn->out.get()); + initConnection(*conn); + return conn; +} + +static RegisterStoreImplementation regStore([](const std::string& uri, + const Store::Params& params) + -> std::shared_ptr<Store> { + if (std::string(uri, 0, uriScheme.size()) != uriScheme) { + return nullptr; + } + return std::make_shared<SSHStore>(std::string(uri, uriScheme.size()), params); +}); + +} // namespace nix diff --git a/third_party/nix/src/libstore/ssh.cc b/third_party/nix/src/libstore/ssh.cc new file mode 100644 index 000000000000..06aaf285f1b5 --- /dev/null +++ b/third_party/nix/src/libstore/ssh.cc @@ -0,0 +1,155 @@ +#include "ssh.hh" + +#include <utility> + +namespace nix { + +SSHMaster::SSHMaster(const std::string& host, std::string keyFile, + bool useMaster, bool compress, int logFD) + : host(host), + fakeSSH(host == "localhost"), + keyFile(std::move(keyFile)), + useMaster(useMaster && !fakeSSH), + compress(compress), + logFD(logFD) { + if (host.empty() || hasPrefix(host, "-")) { + throw Error("invalid SSH host name '%s'", host); + } +} + +void SSHMaster::addCommonSSHOpts(Strings& args) { + for (auto& i : tokenizeString<Strings>(getEnv("NIX_SSHOPTS"))) { + args.push_back(i); + } + if (!keyFile.empty()) { + args.insert(args.end(), {"-i", keyFile}); + } + if (compress) { + args.push_back("-C"); + } +} + +std::unique_ptr<SSHMaster::Connection> SSHMaster::startCommand( + const std::string& command) { + Path socketPath = startMaster(); + + Pipe in; + Pipe out; + in.create(); + out.create(); + + auto conn = std::make_unique<Connection>(); + ProcessOptions options; + options.dieWithParent = false; + + conn->sshPid = startProcess( + [&]() { + restoreSignals(); + + close(in.writeSide.get()); + close(out.readSide.get()); + + if (dup2(in.readSide.get(), STDIN_FILENO) == -1) { + throw SysError("duping over stdin"); + } + if (dup2(out.writeSide.get(), STDOUT_FILENO) == -1) { + throw SysError("duping over stdout"); + } + if (logFD != -1 && dup2(logFD, STDERR_FILENO) == -1) { + throw SysError("duping over stderr"); + } + + Strings args; + + if (fakeSSH) { + args = {"bash", "-c"}; + } else { + args = {"ssh", host, "-x", "-a"}; + addCommonSSHOpts(args); + if (!socketPath.empty()) { + args.insert(args.end(), {"-S", socketPath}); + } + // TODO(tazjin): Abseil verbosity flag + /*if (verbosity >= lvlChatty) { + args.push_back("-v"); + }*/ + } + + args.push_back(command); + execvp(args.begin()->c_str(), stringsToCharPtrs(args).data()); + + // could not exec ssh/bash + throw SysError("unable to execute '%s'", args.front()); + }, + options); + + in.readSide = -1; + out.writeSide = -1; + + conn->out = std::move(out.readSide); + conn->in = std::move(in.writeSide); + + return conn; +} + +Path SSHMaster::startMaster() { + if (!useMaster) { + return ""; + } + + auto state(state_.lock()); + + if (state->sshMaster != -1) { + return state->socketPath; + } + + state->tmpDir = + std::make_unique<AutoDelete>(createTempDir("", "nix", true, true, 0700)); + + state->socketPath = (Path)*state->tmpDir + "/ssh.sock"; + + Pipe out; + out.create(); + + ProcessOptions options; + options.dieWithParent = false; + + state->sshMaster = startProcess( + [&]() { + restoreSignals(); + + close(out.readSide.get()); + + if (dup2(out.writeSide.get(), STDOUT_FILENO) == -1) { + throw SysError("duping over stdout"); + } + + Strings args = {"ssh", host, + "-M", "-N", + "-S", state->socketPath, + "-o", "LocalCommand=echo started", + "-o", "PermitLocalCommand=yes"}; + // if (verbosity >= lvlChatty) { args.push_back("-v"); } + addCommonSSHOpts(args); + execvp(args.begin()->c_str(), stringsToCharPtrs(args).data()); + + throw SysError("unable to execute '%s'", args.front()); + }, + options); + + out.writeSide = -1; + + std::string reply; + try { + reply = readLine(out.readSide.get()); + } catch (EndOfFile& e) { + } + + if (reply != "started") { + throw Error("failed to start SSH master connection to '%s'", host); + } + + return state->socketPath; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/ssh.hh b/third_party/nix/src/libstore/ssh.hh new file mode 100644 index 000000000000..23952ccb1293 --- /dev/null +++ b/third_party/nix/src/libstore/ssh.hh @@ -0,0 +1,41 @@ +#pragma once + +#include "sync.hh" +#include "util.hh" + +namespace nix { + +class SSHMaster { + private: + const std::string host; + bool fakeSSH; + const std::string keyFile; + const bool useMaster; + const bool compress; + const int logFD; + + struct State { + Pid sshMaster; + std::unique_ptr<AutoDelete> tmpDir; + Path socketPath; + }; + + Sync<State> state_; + + void addCommonSSHOpts(Strings& args); + + public: + SSHMaster(const std::string& host, std::string keyFile, bool useMaster, + bool compress, int logFD = -1); + + struct Connection { + Pid sshPid; + AutoCloseFD out, in; + }; + + std::unique_ptr<Connection> startCommand(const std::string& command); + + Path startMaster(); +}; + +} // namespace nix diff --git a/third_party/nix/src/libstore/store-api.cc b/third_party/nix/src/libstore/store-api.cc new file mode 100644 index 000000000000..50e9b3b6ea86 --- /dev/null +++ b/third_party/nix/src/libstore/store-api.cc @@ -0,0 +1,974 @@ +#include "store-api.hh" + +#include <future> +#include <utility> + +#include <glog/logging.h> + +#include "crypto.hh" +#include "derivations.hh" +#include "globals.hh" +#include "json.hh" +#include "nar-info-disk-cache.hh" +#include "thread-pool.hh" +#include "util.hh" + +namespace nix { + +bool Store::isInStore(const Path& path) const { + return isInDir(path, storeDir); +} + +bool Store::isStorePath(const Path& path) const { + return isInStore(path) && + path.size() >= storeDir.size() + 1 + storePathHashLen && + path.find('/', storeDir.size() + 1) == Path::npos; +} + +void Store::assertStorePath(const Path& path) const { + if (!isStorePath(path)) { + throw Error(format("path '%1%' is not in the Nix store") % path); + } +} + +Path Store::toStorePath(const Path& path) const { + if (!isInStore(path)) { + throw Error(format("path '%1%' is not in the Nix store") % path); + } + Path::size_type slash = path.find('/', storeDir.size() + 1); + if (slash == Path::npos) { + return path; + } + return Path(path, 0, slash); +} + +Path Store::followLinksToStore(const Path& _path) const { + Path path = absPath(_path); + while (!isInStore(path)) { + if (!isLink(path)) { + break; + } + string target = readLink(path); + path = absPath(target, dirOf(path)); + } + if (!isInStore(path)) { + throw Error(format("path '%1%' is not in the Nix store") % path); + } + return path; +} + +Path Store::followLinksToStorePath(const Path& path) const { + return toStorePath(followLinksToStore(path)); +} + +string storePathToName(const Path& path) { + auto base = baseNameOf(path); + assert(base.size() == storePathHashLen || + (base.size() > storePathHashLen && base[storePathHashLen] == '-')); + return base.size() == storePathHashLen ? "" + : string(base, storePathHashLen + 1); +} + +string storePathToHash(const Path& path) { + auto base = baseNameOf(path); + assert(base.size() >= storePathHashLen); + return string(base, 0, storePathHashLen); +} + +void checkStoreName(const string& name) { + string validChars = "+-._?="; + + auto baseError = + format( + "The path name '%2%' is invalid: %3%. " + "Path names are alphanumeric and can include the symbols %1% " + "and must not begin with a period. " + "Note: If '%2%' is a source file and you cannot rename it on " + "disk, builtins.path { name = ... } can be used to give it an " + "alternative name.") % + validChars % name; + + /* Disallow names starting with a dot for possible security + reasons (e.g., "." and ".."). */ + if (string(name, 0, 1) == ".") { + throw Error(baseError % "it is illegal to start the name with a period"); + } + /* Disallow names longer than 211 characters. ext4โs max is 256, + but we need extra space for the hash and .chroot extensions. */ + if (name.length() > 211) { + throw Error(baseError % "name must be less than 212 characters"); + } + for (auto& i : name) { + if (!((i >= 'A' && i <= 'Z') || (i >= 'a' && i <= 'z') || + (i >= '0' && i <= '9') || validChars.find(i) != string::npos)) { + throw Error(baseError % (format("the '%1%' character is invalid") % i)); + } + } +} + +/* Store paths have the following form: + + <store>/<h>-<name> + + where + + <store> = the location of the Nix store, usually /nix/store + + <name> = a human readable name for the path, typically obtained + from the name attribute of the derivation, or the name of the + source file from which the store path is created. For derivation + outputs other than the default "out" output, the string "-<id>" + is suffixed to <name>. + + <h> = base-32 representation of the first 160 bits of a SHA-256 + hash of <s>; the hash part of the store name + + <s> = the string "<type>:sha256:<h2>:<store>:<name>"; + note that it includes the location of the store as well as the + name to make sure that changes to either of those are reflected + in the hash (e.g. you won't get /nix/store/<h>-name1 and + /nix/store/<h>-name2 with equal hash parts). + + <type> = one of: + "text:<r1>:<r2>:...<rN>" + for plain text files written to the store using + addTextToStore(); <r1> ... <rN> are the references of the + path. + "source" + for paths copied to the store using addToStore() when recursive + = true and hashAlgo = "sha256" + "output:<id>" + for either the outputs created by derivations, OR paths copied + to the store using addToStore() with recursive != true or + hashAlgo != "sha256" (in that case "source" is used; it's + silly, but it's done that way for compatibility). <id> is the + name of the output (usually, "out"). + + <h2> = base-16 representation of a SHA-256 hash of: + if <type> = "text:...": + the string written to the resulting store path + if <type> = "source": + the serialisation of the path from which this store path is + copied, as returned by hashPath() + if <type> = "output:<id>": + for non-fixed derivation outputs: + the derivation (see hashDerivationModulo() in + primops.cc) + for paths copied by addToStore() or produced by fixed-output + derivations: + the string "fixed:out:<rec><algo>:<hash>:", where + <rec> = "r:" for recursive (path) hashes, or "" for flat + (file) hashes + <algo> = "md5", "sha1" or "sha256" + <hash> = base-16 representation of the path or flat hash of + the contents of the path (or expected contents of the + path for fixed-output derivations) + + It would have been nicer to handle fixed-output derivations under + "source", e.g. have something like "source:<rec><algo>", but we're + stuck with this for now... + + The main reason for this way of computing names is to prevent name + collisions (for security). For instance, it shouldn't be feasible + to come up with a derivation whose output path collides with the + path for a copied source. The former would have a <s> starting with + "output:out:", while the latter would have a <s> starting with + "source:". +*/ + +Path Store::makeStorePath(const string& type, const Hash& hash, + const string& name) const { + /* e.g., "source:sha256:1abc...:/nix/store:foo.tar.gz" */ + string s = type + ":" + hash.to_string(Base16) + ":" + storeDir + ":" + name; + + checkStoreName(name); + + return storeDir + "/" + + compressHash(hashString(htSHA256, s), 20).to_string(Base32, false) + + "-" + name; +} + +Path Store::makeOutputPath(const string& id, const Hash& hash, + const string& name) const { + return makeStorePath("output:" + id, hash, + name + (id == "out" ? "" : "-" + id)); +} + +Path Store::makeFixedOutputPath(bool recursive, const Hash& hash, + const string& name) const { + return hash.type == htSHA256 && recursive + ? makeStorePath("source", hash, name) + : makeStorePath( + "output:out", + hashString(htSHA256, + "fixed:out:" + (recursive ? (string) "r:" : "") + + hash.to_string(Base16) + ":"), + name); +} + +Path Store::makeTextPath(const string& name, const Hash& hash, + const PathSet& references) const { + assert(hash.type == htSHA256); + /* Stuff the references (if any) into the type. This is a bit + hacky, but we can't put them in `s' since that would be + ambiguous. */ + string type = "text"; + for (auto& i : references) { + type += ":"; + type += i; + } + return makeStorePath(type, hash, name); +} + +std::pair<Path, Hash> Store::computeStorePathForPath(const string& name, + const Path& srcPath, + bool recursive, + HashType hashAlgo, + PathFilter& filter) const { + Hash h = recursive ? hashPath(hashAlgo, srcPath, filter).first + : hashFile(hashAlgo, srcPath); + Path dstPath = makeFixedOutputPath(recursive, h, name); + return std::pair<Path, Hash>(dstPath, h); +} + +Path Store::computeStorePathForText(const string& name, const string& s, + const PathSet& references) const { + return makeTextPath(name, hashString(htSHA256, s), references); +} + +Store::Store(const Params& params) + : Config(params), state({(size_t)pathInfoCacheSize}) {} + +std::string Store::getUri() { return ""; } + +bool Store::isValidPath(const Path& storePath) { + assertStorePath(storePath); + + auto hashPart = storePathToHash(storePath); + + { + auto state_(state.lock()); + auto res = state_->pathInfoCache.get(hashPart); + if (res) { + stats.narInfoReadAverted++; + return *res != nullptr; + } + } + + if (diskCache) { + auto res = diskCache->lookupNarInfo(getUri(), hashPart); + if (res.first != NarInfoDiskCache::oUnknown) { + stats.narInfoReadAverted++; + auto state_(state.lock()); + state_->pathInfoCache.upsert( + hashPart, + res.first == NarInfoDiskCache::oInvalid ? nullptr : res.second); + return res.first == NarInfoDiskCache::oValid; + } + } + + bool valid = isValidPathUncached(storePath); + + if (diskCache && !valid) { + // FIXME: handle valid = true case. + diskCache->upsertNarInfo(getUri(), hashPart, nullptr); + } + + return valid; +} + +/* Default implementation for stores that only implement + queryPathInfoUncached(). */ +bool Store::isValidPathUncached(const Path& path) { + try { + queryPathInfo(path); + return true; + } catch (InvalidPath&) { + return false; + } +} + +ref<const ValidPathInfo> Store::queryPathInfo(const Path& storePath) { + std::promise<ref<ValidPathInfo>> promise; + + queryPathInfo(storePath, {[&](std::future<ref<ValidPathInfo>> result) { + try { + promise.set_value(result.get()); + } catch (...) { + promise.set_exception(std::current_exception()); + } + }}); + + return promise.get_future().get(); +} + +void Store::queryPathInfo(const Path& storePath, + Callback<ref<ValidPathInfo>> callback) noexcept { + std::string hashPart; + + try { + assertStorePath(storePath); + + hashPart = storePathToHash(storePath); + + { + auto res = state.lock()->pathInfoCache.get(hashPart); + if (res) { + stats.narInfoReadAverted++; + if (!*res) { + throw InvalidPath(format("path '%s' is not valid") % storePath); + } + return callback(ref<ValidPathInfo>(*res)); + } + } + + if (diskCache) { + auto res = diskCache->lookupNarInfo(getUri(), hashPart); + if (res.first != NarInfoDiskCache::oUnknown) { + stats.narInfoReadAverted++; + { + auto state_(state.lock()); + state_->pathInfoCache.upsert( + hashPart, + res.first == NarInfoDiskCache::oInvalid ? nullptr : res.second); + if (res.first == NarInfoDiskCache::oInvalid || + (res.second->path != storePath && + !storePathToName(storePath).empty())) { + throw InvalidPath(format("path '%s' is not valid") % storePath); + } + } + return callback(ref<ValidPathInfo>(res.second)); + } + } + + } catch (...) { + return callback.rethrow(); + } + + auto callbackPtr = std::make_shared<decltype(callback)>(std::move(callback)); + + queryPathInfoUncached( + storePath, {[this, storePath, hashPart, callbackPtr]( + std::future<std::shared_ptr<ValidPathInfo>> fut) { + try { + auto info = fut.get(); + + if (diskCache) { + diskCache->upsertNarInfo(getUri(), hashPart, info); + } + + { + auto state_(state.lock()); + state_->pathInfoCache.upsert(hashPart, info); + } + + if (!info || (info->path != storePath && + !storePathToName(storePath).empty())) { + stats.narInfoMissing++; + throw InvalidPath("path '%s' is not valid", storePath); + } + + (*callbackPtr)(ref<ValidPathInfo>(info)); + } catch (...) { + callbackPtr->rethrow(); + } + }}); +} + +PathSet Store::queryValidPaths(const PathSet& paths, + SubstituteFlag maybeSubstitute) { + struct State { + size_t left; + PathSet valid; + std::exception_ptr exc; + }; + + Sync<State> state_(State{paths.size(), PathSet()}); + + std::condition_variable wakeup; + ThreadPool pool; + + auto doQuery = [&](const Path& path) { + checkInterrupt(); + queryPathInfo( + path, {[path, &state_, &wakeup](std::future<ref<ValidPathInfo>> fut) { + auto state(state_.lock()); + try { + auto info = fut.get(); + state->valid.insert(path); + } catch (InvalidPath&) { + } catch (...) { + state->exc = std::current_exception(); + } + assert(state->left); + if (--state->left == 0u) { + wakeup.notify_one(); + } + }}); + }; + + for (auto& path : paths) { + pool.enqueue(std::bind(doQuery, path)); + } + + pool.process(); + + while (true) { + auto state(state_.lock()); + if (state->left == 0u) { + if (state->exc) { + std::rethrow_exception(state->exc); + } + return state->valid; + } + state.wait(wakeup); + } +} + +/* Return a string accepted by decodeValidPathInfo() that + registers the specified paths as valid. Note: it's the + responsibility of the caller to provide a closure. */ +string Store::makeValidityRegistration(const PathSet& paths, bool showDerivers, + bool showHash) { + string s = s; + + for (auto& i : paths) { + s += i + "\n"; + + auto info = queryPathInfo(i); + + if (showHash) { + s += info->narHash.to_string(Base16, false) + "\n"; + s += (format("%1%\n") % info->narSize).str(); + } + + Path deriver = showDerivers ? info->deriver : ""; + s += deriver + "\n"; + + s += (format("%1%\n") % info->references.size()).str(); + + for (auto& j : info->references) { + s += j + "\n"; + } + } + + return s; +} + +void Store::pathInfoToJSON(JSONPlaceholder& jsonOut, const PathSet& storePaths, + bool includeImpureInfo, bool showClosureSize, + AllowInvalidFlag allowInvalid) { + auto jsonList = jsonOut.list(); + + for (auto storePath : storePaths) { + auto jsonPath = jsonList.object(); + jsonPath.attr("path", storePath); + + try { + auto info = queryPathInfo(storePath); + storePath = info->path; + + jsonPath.attr("narHash", info->narHash.to_string()) + .attr("narSize", info->narSize); + + { + auto jsonRefs = jsonPath.list("references"); + for (auto& ref : info->references) { + jsonRefs.elem(ref); + } + } + + if (!info->ca.empty()) { + jsonPath.attr("ca", info->ca); + } + + std::pair<uint64_t, uint64_t> closureSizes; + + if (showClosureSize) { + closureSizes = getClosureSize(storePath); + jsonPath.attr("closureSize", closureSizes.first); + } + + if (includeImpureInfo) { + if (!info->deriver.empty()) { + jsonPath.attr("deriver", info->deriver); + } + + if (info->registrationTime != 0) { + jsonPath.attr("registrationTime", info->registrationTime); + } + + if (info->ultimate) { + jsonPath.attr("ultimate", info->ultimate); + } + + if (!info->sigs.empty()) { + auto jsonSigs = jsonPath.list("signatures"); + for (auto& sig : info->sigs) { + jsonSigs.elem(sig); + } + } + + auto narInfo = std::dynamic_pointer_cast<const NarInfo>( + std::shared_ptr<const ValidPathInfo>(info)); + + if (narInfo) { + if (!narInfo->url.empty()) { + jsonPath.attr("url", narInfo->url); + } + if (narInfo->fileHash) { + jsonPath.attr("downloadHash", narInfo->fileHash.to_string()); + } + if (narInfo->fileSize != 0u) { + jsonPath.attr("downloadSize", narInfo->fileSize); + } + if (showClosureSize) { + jsonPath.attr("closureDownloadSize", closureSizes.second); + } + } + } + + } catch (InvalidPath&) { + jsonPath.attr("valid", false); + } + } +} + +std::pair<uint64_t, uint64_t> Store::getClosureSize(const Path& storePath) { + uint64_t totalNarSize = 0; + uint64_t totalDownloadSize = 0; + PathSet closure; + computeFSClosure(storePath, closure, false, false); + for (auto& p : closure) { + auto info = queryPathInfo(p); + totalNarSize += info->narSize; + auto narInfo = std::dynamic_pointer_cast<const NarInfo>( + std::shared_ptr<const ValidPathInfo>(info)); + if (narInfo) { + totalDownloadSize += narInfo->fileSize; + } + } + return {totalNarSize, totalDownloadSize}; +} + +const Store::Stats& Store::getStats() { + { + auto state_(state.lock()); + stats.pathInfoCacheSize = state_->pathInfoCache.size(); + } + return stats; +} + +void Store::buildPaths(const PathSet& paths, BuildMode buildMode) { + for (auto& path : paths) { + if (isDerivation(path)) { + unsupported("buildPaths"); + } + } + + if (queryValidPaths(paths).size() != paths.size()) { + unsupported("buildPaths"); + } +} + +void copyStorePath(ref<Store> srcStore, const ref<Store>& dstStore, + const Path& storePath, RepairFlag repair, + CheckSigsFlag checkSigs) { + auto srcUri = srcStore->getUri(); + auto dstUri = dstStore->getUri(); + + if (srcUri == "local" || srcUri == "daemon") { + LOG(INFO) << "copying path '" << storePath << "' to '" << dstUri << "'"; + } else { + if (dstUri == "local" || dstUri == "daemon") { + LOG(INFO) << "copying path '" << storePath << "' from '" << srcUri << "'"; + } else { + LOG(INFO) << "copying path '" << storePath << "' from '" << srcUri + << "' to '" << dstUri << "'"; + } + } + + auto info = srcStore->queryPathInfo(storePath); + + uint64_t total = 0; + + if (!info->narHash) { + StringSink sink; + srcStore->narFromPath({storePath}, sink); + auto info2 = make_ref<ValidPathInfo>(*info); + info2->narHash = hashString(htSHA256, *sink.s); + if (info->narSize == 0u) { + info2->narSize = sink.s->size(); + } + if (info->ultimate) { + info2->ultimate = false; + } + info = info2; + + StringSource source(*sink.s); + dstStore->addToStore(*info, source, repair, checkSigs); + return; + } + + if (info->ultimate) { + auto info2 = make_ref<ValidPathInfo>(*info); + info2->ultimate = false; + info = info2; + } + + auto source = sinkToSource( + [&](Sink& sink) { + LambdaSink wrapperSink([&](const unsigned char* data, size_t len) { + sink(data, len); + total += len; + }); + srcStore->narFromPath({storePath}, wrapperSink); + }, + [&]() { + throw EndOfFile("NAR for '%s' fetched from '%s' is incomplete", + storePath, srcStore->getUri()); + }); + + dstStore->addToStore(*info, *source, repair, checkSigs); +} + +void copyPaths(ref<Store> srcStore, ref<Store> dstStore, + const PathSet& storePaths, RepairFlag repair, + CheckSigsFlag checkSigs, SubstituteFlag substitute) { + PathSet valid = dstStore->queryValidPaths(storePaths, substitute); + + PathSet missing; + for (auto& path : storePaths) { + if (valid.count(path) == 0u) { + missing.insert(path); + } + } + + if (missing.empty()) { + return; + } + + LOG(INFO) << "copying " << missing.size() << " paths"; + + std::atomic<size_t> nrDone{0}; + std::atomic<size_t> nrFailed{0}; + std::atomic<uint64_t> bytesExpected{0}; + std::atomic<uint64_t> nrRunning{0}; + + ThreadPool pool; + + processGraph<Path>( + pool, PathSet(missing.begin(), missing.end()), + + [&](const Path& storePath) { + if (dstStore->isValidPath(storePath)) { + nrDone++; + return PathSet(); + } + + auto info = srcStore->queryPathInfo(storePath); + + bytesExpected += info->narSize; + + return info->references; + }, + + [&](const Path& storePath) { + checkInterrupt(); + + if (!dstStore->isValidPath(storePath)) { + MaintainCount<decltype(nrRunning)> mc(nrRunning); + try { + copyStorePath(srcStore, dstStore, storePath, repair, checkSigs); + } catch (Error& e) { + nrFailed++; + if (!settings.keepGoing) { + throw e; + } + LOG(ERROR) << "could not copy " << storePath << ": " << e.what(); + return; + } + } + + nrDone++; + }); +} + +void copyClosure(const ref<Store>& srcStore, const ref<Store>& dstStore, + const PathSet& storePaths, RepairFlag repair, + CheckSigsFlag checkSigs, SubstituteFlag substitute) { + PathSet closure; + srcStore->computeFSClosure({storePaths}, closure); + copyPaths(srcStore, dstStore, closure, repair, checkSigs, substitute); +} + +ValidPathInfo decodeValidPathInfo(std::istream& str, bool hashGiven) { + ValidPathInfo info; + getline(str, info.path); + if (str.eof()) { + info.path = ""; + return info; + } + if (hashGiven) { + string s; + getline(str, s); + info.narHash = Hash(s, htSHA256); + getline(str, s); + if (!string2Int(s, info.narSize)) { + throw Error("number expected"); + } + } + getline(str, info.deriver); + string s; + int n; + getline(str, s); + if (!string2Int(s, n)) { + throw Error("number expected"); + } + while ((n--) != 0) { + getline(str, s); + info.references.insert(s); + } + if (!str || str.eof()) { + throw Error("missing input"); + } + return info; +} + +string showPaths(const PathSet& paths) { + string s; + for (auto& i : paths) { + if (!s.empty()) { + s += ", "; + } + s += "'" + i + "'"; + } + return s; +} + +std::string ValidPathInfo::fingerprint() const { + if (narSize == 0 || !narHash) { + throw Error(format("cannot calculate fingerprint of path '%s' because its " + "size/hash is not known") % + path); + } + return "1;" + path + ";" + narHash.to_string(Base32) + ";" + + std::to_string(narSize) + ";" + concatStringsSep(",", references); +} + +void ValidPathInfo::sign(const SecretKey& secretKey) { + sigs.insert(secretKey.signDetached(fingerprint())); +} + +bool ValidPathInfo::isContentAddressed(const Store& store) const { + auto warn = [&]() { + LOG(ERROR) << "warning: path '" << path + << "' claims to be content-addressed but isn't"; + }; + + if (hasPrefix(ca, "text:")) { + Hash hash(std::string(ca, 5)); + if (store.makeTextPath(storePathToName(path), hash, references) == path) { + return true; + } + warn(); + + } + + else if (hasPrefix(ca, "fixed:")) { + bool recursive = ca.compare(6, 2, "r:") == 0; + Hash hash(std::string(ca, recursive ? 8 : 6)); + if (references.empty() && + store.makeFixedOutputPath(recursive, hash, storePathToName(path)) == + path) { + return true; + } + warn(); + } + + return false; +} + +size_t ValidPathInfo::checkSignatures(const Store& store, + const PublicKeys& publicKeys) const { + if (isContentAddressed(store)) { + return maxSigs; + } + + size_t good = 0; + for (auto& sig : sigs) { + if (checkSignature(publicKeys, sig)) { + good++; + } + } + return good; +} + +bool ValidPathInfo::checkSignature(const PublicKeys& publicKeys, + const std::string& sig) const { + return verifyDetached(fingerprint(), sig, publicKeys); +} + +Strings ValidPathInfo::shortRefs() const { + Strings refs; + for (auto& r : references) { + refs.push_back(baseNameOf(r)); + } + return refs; +} + +std::string makeFixedOutputCA(bool recursive, const Hash& hash) { + return "fixed:" + (recursive ? (std::string) "r:" : "") + hash.to_string(); +} + +void Store::addToStore(const ValidPathInfo& info, Source& narSource, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + addToStore(info, make_ref<std::string>(narSource.drain()), repair, checkSigs, + std::move(accessor)); +} + +void Store::addToStore(const ValidPathInfo& info, const ref<std::string>& nar, + RepairFlag repair, CheckSigsFlag checkSigs, + std::shared_ptr<FSAccessor> accessor) { + StringSource source(*nar); + addToStore(info, source, repair, checkSigs, std::move(accessor)); +} + +} // namespace nix + +#include "local-store.hh" +#include "remote-store.hh" + +namespace nix { + +RegisterStoreImplementation::Implementations* + RegisterStoreImplementation::implementations = nullptr; + +/* Split URI into protocol+hierarchy part and its parameter set. */ +std::pair<std::string, Store::Params> splitUriAndParams( + const std::string& uri_) { + auto uri(uri_); + Store::Params params; + auto q = uri.find('?'); + if (q != std::string::npos) { + for (const auto& s : tokenizeString<Strings>(uri.substr(q + 1), "&")) { + auto e = s.find('='); + if (e != std::string::npos) { + auto value = s.substr(e + 1); + std::string decoded; + for (size_t i = 0; i < value.size();) { + if (value[i] == '%') { + if (i + 2 >= value.size()) { + throw Error("invalid URI parameter '%s'", value); + } + try { + decoded += std::stoul(std::string(value, i + 1, 2), nullptr, 16); + i += 3; + } catch (...) { + throw Error("invalid URI parameter '%s'", value); + } + } else { + decoded += value[i++]; + } + } + params[s.substr(0, e)] = decoded; + } + } + uri = uri_.substr(0, q); + } + return {uri, params}; +} + +ref<Store> openStore(const std::string& uri_, + const Store::Params& extraParams) { + auto [uri, uriParams] = splitUriAndParams(uri_); + auto params = extraParams; + params.insert(uriParams.begin(), uriParams.end()); + + for (const auto& fun : *RegisterStoreImplementation::implementations) { + auto store = fun(uri, params); + if (store) { + store->warnUnknownSettings(); + return ref<Store>(store); + } + } + + throw Error("don't know how to open Nix store '%s'", uri); +} + +StoreType getStoreType(const std::string& uri, const std::string& stateDir) { + if (uri == "daemon") { + return tDaemon; + } + if (uri == "local" || hasPrefix(uri, "/")) { + return tLocal; + } else if (uri.empty() || uri == "auto") { + if (access(stateDir.c_str(), R_OK | W_OK) == 0) { + return tLocal; + } + if (pathExists(settings.nixDaemonSocketFile)) { + return tDaemon; + } else { + return tLocal; + } + } else { + return tOther; + } +} + +static RegisterStoreImplementation regStore([](const std::string& uri, + const Store::Params& params) + -> std::shared_ptr<Store> { + switch (getStoreType(uri, get(params, "state", settings.nixStateDir))) { + case tDaemon: + return std::shared_ptr<Store>(std::make_shared<UDSRemoteStore>(params)); + case tLocal: { + Store::Params params2 = params; + if (hasPrefix(uri, "/")) { + params2["root"] = uri; + } + return std::shared_ptr<Store>(std::make_shared<LocalStore>(params2)); + } + default: + return nullptr; + } +}); + +std::list<ref<Store>> getDefaultSubstituters() { + static auto stores([]() { + std::list<ref<Store>> stores; + + StringSet done; + + auto addStore = [&](const std::string& uri) { + if (done.count(uri) != 0u) { + return; + } + done.insert(uri); + try { + stores.push_back(openStore(uri)); + } catch (Error& e) { + LOG(WARNING) << e.what(); + } + }; + + for (const auto& uri : settings.substituters.get()) { + addStore(uri); + } + + for (const auto& uri : settings.extraSubstituters.get()) { + addStore(uri); + } + + stores.sort([](ref<Store>& a, ref<Store>& b) { + return a->getPriority() < b->getPriority(); + }); + + return stores; + }()); + + return stores; +} + +} // namespace nix diff --git a/third_party/nix/src/libstore/store-api.hh b/third_party/nix/src/libstore/store-api.hh new file mode 100644 index 000000000000..242a3e65621e --- /dev/null +++ b/third_party/nix/src/libstore/store-api.hh @@ -0,0 +1,770 @@ +#pragma once + +#include <atomic> +#include <limits> +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> + +#include "config.hh" +#include "crypto.hh" +#include "globals.hh" +#include "hash.hh" +#include "lru-cache.hh" +#include "serialise.hh" +#include "sync.hh" + +namespace nix { + +MakeError(SubstError, Error) + MakeError(BuildError, Error) /* denotes a permanent build failure */ + MakeError(InvalidPath, Error) MakeError(Unsupported, Error) + MakeError(SubstituteGone, Error) MakeError(SubstituterDisabled, Error) + + struct BasicDerivation; +struct Derivation; +class FSAccessor; +class NarInfoDiskCache; +class Store; +class JSONPlaceholder; + +enum RepairFlag : bool { NoRepair = false, Repair = true }; +enum CheckSigsFlag : bool { NoCheckSigs = false, CheckSigs = true }; +enum SubstituteFlag : bool { NoSubstitute = false, Substitute = true }; +enum AllowInvalidFlag : bool { DisallowInvalid = false, AllowInvalid = true }; + +/* Size of the hash part of store paths, in base-32 characters. */ +const size_t storePathHashLen = 32; // i.e. 160 bits + +/* Magic header of exportPath() output (obsolete). */ +const uint32_t exportMagic = 0x4558494e; + +typedef std::unordered_map<Path, std::unordered_set<std::string>> Roots; + +struct GCOptions { + /* Garbage collector operation: + + - `gcReturnLive': return the set of paths reachable from + (i.e. in the closure of) the roots. + + - `gcReturnDead': return the set of paths not reachable from + the roots. + + - `gcDeleteDead': actually delete the latter set. + + - `gcDeleteSpecific': delete the paths listed in + `pathsToDelete', insofar as they are not reachable. + */ + typedef enum { + gcReturnLive, + gcReturnDead, + gcDeleteDead, + gcDeleteSpecific, + } GCAction; + + GCAction action{gcDeleteDead}; + + /* If `ignoreLiveness' is set, then reachability from the roots is + ignored (dangerous!). However, the paths must still be + unreferenced *within* the store (i.e., there can be no other + store paths that depend on them). */ + bool ignoreLiveness{false}; + + /* For `gcDeleteSpecific', the paths to delete. */ + PathSet pathsToDelete; + + /* Stop after at least `maxFreed' bytes have been freed. */ + unsigned long long maxFreed{std::numeric_limits<unsigned long long>::max()}; +}; + +struct GCResults { + /* Depending on the action, the GC roots, or the paths that would + be or have been deleted. */ + PathSet paths; + + /* For `gcReturnDead', `gcDeleteDead' and `gcDeleteSpecific', the + number of bytes that would be or was freed. */ + unsigned long long bytesFreed = 0; +}; + +struct SubstitutablePathInfo { + Path deriver; + PathSet references; + unsigned long long downloadSize; /* 0 = unknown or inapplicable */ + unsigned long long narSize; /* 0 = unknown */ +}; + +typedef std::map<Path, SubstitutablePathInfo> SubstitutablePathInfos; + +struct ValidPathInfo { + Path path; + Path deriver; + Hash narHash; + PathSet references; + time_t registrationTime = 0; + uint64_t narSize = 0; // 0 = unknown + uint64_t id; // internal use only + + /* Whether the path is ultimately trusted, that is, it's a + derivation output that was built locally. */ + bool ultimate = false; + + StringSet sigs; // note: not necessarily verified + + /* If non-empty, an assertion that the path is content-addressed, + i.e., that the store path is computed from a cryptographic hash + of the contents of the path, plus some other bits of data like + the "name" part of the path. Such a path doesn't need + signatures, since we don't have to trust anybody's claim that + the path is the output of a particular derivation. (In the + extensional store model, we have to trust that the *contents* + of an output path of a derivation were actually produced by + that derivation. In the intensional model, we have to trust + that a particular output path was produced by a derivation; the + path then implies the contents.) + + Ideally, the content-addressability assertion would just be a + Boolean, and the store path would be computed from + โstorePathToName(path)โ, โnarHashโ and โreferencesโ. However, + 1) we've accumulated several types of content-addressed paths + over the years; and 2) fixed-output derivations support + multiple hash algorithms and serialisation methods (flat file + vs NAR). Thus, โcaโ has one of the following forms: + + * โtext:sha256:<sha256 hash of file contents>โ: For paths + computed by makeTextPath() / addTextToStore(). + + * โfixed:<r?>:<ht>:<h>โ: For paths computed by + makeFixedOutputPath() / addToStore(). + */ + std::string ca; + + bool operator==(const ValidPathInfo& i) const { + return path == i.path && narHash == i.narHash && references == i.references; + } + + /* Return a fingerprint of the store path to be used in binary + cache signatures. It contains the store path, the base-32 + SHA-256 hash of the NAR serialisation of the path, the size of + the NAR, and the sorted references. The size field is strictly + speaking superfluous, but might prevent endless/excessive data + attacks. */ + std::string fingerprint() const; + + void sign(const SecretKey& secretKey); + + /* Return true iff the path is verifiably content-addressed. */ + bool isContentAddressed(const Store& store) const; + + static const size_t maxSigs = std::numeric_limits<size_t>::max(); + + /* Return the number of signatures on this .narinfo that were + produced by one of the specified keys, or maxSigs if the path + is content-addressed. */ + size_t checkSignatures(const Store& store, + const PublicKeys& publicKeys) const; + + /* Verify a single signature. */ + bool checkSignature(const PublicKeys& publicKeys, + const std::string& sig) const; + + Strings shortRefs() const; + + virtual ~ValidPathInfo() {} +}; + +typedef list<ValidPathInfo> ValidPathInfos; + +enum BuildMode { bmNormal, bmRepair, bmCheck }; + +struct BuildResult { + /* Note: don't remove status codes, and only add new status codes + at the end of the list, to prevent client/server + incompatibilities in the nix-store --serve protocol. */ + enum Status { + Built = 0, + Substituted, + AlreadyValid, + PermanentFailure, + InputRejected, + OutputRejected, + TransientFailure, // possibly transient + CachedFailure, // no longer used + TimedOut, + MiscFailure, + DependencyFailed, + LogLimitExceeded, + NotDeterministic, + } status = MiscFailure; + std::string errorMsg; + + /* How many times this build was performed. */ + unsigned int timesBuilt = 0; + + /* If timesBuilt > 1, whether some builds did not produce the same + result. (Note that 'isNonDeterministic = false' does not mean + the build is deterministic, just that we don't have evidence of + non-determinism.) */ + bool isNonDeterministic = false; + + /* The start/stop times of the build (or one of the rounds, if it + was repeated). */ + time_t startTime = 0, stopTime = 0; + + bool success() { + return status == Built || status == Substituted || status == AlreadyValid; + } +}; + +class Store : public std::enable_shared_from_this<Store>, public Config { + public: + typedef std::map<std::string, std::string> Params; + + const PathSetting storeDir_{this, false, settings.nixStore, "store", + "path to the Nix store"}; + const Path storeDir = storeDir_; + + const Setting<int> pathInfoCacheSize{ + this, 65536, "path-info-cache-size", + "size of the in-memory store path information cache"}; + + const Setting<bool> isTrusted{ + this, false, "trusted", + "whether paths from this store can be used as substitutes even when they " + "lack trusted signatures"}; + + protected: + struct State { + LRUCache<std::string, std::shared_ptr<ValidPathInfo>> pathInfoCache; + }; + + Sync<State> state; + + std::shared_ptr<NarInfoDiskCache> diskCache; + + Store(const Params& params); + + public: + virtual ~Store() {} + + virtual std::string getUri() = 0; + + /* Return true if โpathโ is in the Nix store (but not the Nix + store itself). */ + bool isInStore(const Path& path) const; + + /* Return true if โpathโ is a store path, i.e. a direct child of + the Nix store. */ + bool isStorePath(const Path& path) const; + + /* Throw an exception if โpathโ is not a store path. */ + void assertStorePath(const Path& path) const; + + /* Chop off the parts after the top-level store name, e.g., + /nix/store/abcd-foo/bar => /nix/store/abcd-foo. */ + Path toStorePath(const Path& path) const; + + /* Follow symlinks until we end up with a path in the Nix store. */ + Path followLinksToStore(const Path& path) const; + + /* Same as followLinksToStore(), but apply toStorePath() to the + result. */ + Path followLinksToStorePath(const Path& path) const; + + /* Constructs a unique store path name. */ + Path makeStorePath(const string& type, const Hash& hash, + const string& name) const; + + Path makeOutputPath(const string& id, const Hash& hash, + const string& name) const; + + Path makeFixedOutputPath(bool recursive, const Hash& hash, + const string& name) const; + + Path makeTextPath(const string& name, const Hash& hash, + const PathSet& references) const; + + /* This is the preparatory part of addToStore(); it computes the + store path to which srcPath is to be copied. Returns the store + path and the cryptographic hash of the contents of srcPath. */ + std::pair<Path, Hash> computeStorePathForPath( + const string& name, const Path& srcPath, bool recursive = true, + HashType hashAlgo = htSHA256, + PathFilter& filter = defaultPathFilter) const; + + /* Preparatory part of addTextToStore(). + + !!! Computation of the path should take the references given to + addTextToStore() into account, otherwise we have a (relatively + minor) security hole: a caller can register a source file with + bogus references. If there are too many references, the path may + not be garbage collected when it has to be (not really a problem, + the caller could create a root anyway), or it may be garbage + collected when it shouldn't be (more serious). + + Hashing the references would solve this (bogus references would + simply yield a different store path, so other users wouldn't be + affected), but it has some backwards compatibility issues (the + hashing scheme changes), so I'm not doing that for now. */ + Path computeStorePathForText(const string& name, const string& s, + const PathSet& references) const; + + /* Check whether a path is valid. */ + bool isValidPath(const Path& path); + + protected: + virtual bool isValidPathUncached(const Path& path); + + public: + /* Query which of the given paths is valid. Optionally, try to + substitute missing paths. */ + virtual PathSet queryValidPaths( + const PathSet& paths, SubstituteFlag maybeSubstitute = NoSubstitute); + + /* Query the set of all valid paths. Note that for some store + backends, the name part of store paths may be omitted + (i.e. you'll get /nix/store/<hash> rather than + /nix/store/<hash>-<name>). Use queryPathInfo() to obtain the + full store path. */ + virtual PathSet queryAllValidPaths() { unsupported("queryAllValidPaths"); } + + /* Query information about a valid path. It is permitted to omit + the name part of the store path. */ + ref<const ValidPathInfo> queryPathInfo(const Path& path); + + /* Asynchronous version of queryPathInfo(). */ + void queryPathInfo(const Path& path, + Callback<ref<ValidPathInfo>> callback) noexcept; + + protected: + virtual void queryPathInfoUncached( + const Path& path, + Callback<std::shared_ptr<ValidPathInfo>> callback) noexcept = 0; + + public: + /* Queries the set of incoming FS references for a store path. + The result is not cleared. */ + virtual void queryReferrers(const Path& path, PathSet& referrers) { + unsupported("queryReferrers"); + } + + /* Return all currently valid derivations that have `path' as an + output. (Note that the result of `queryDeriver()' is the + derivation that was actually used to produce `path', which may + not exist anymore.) */ + virtual PathSet queryValidDerivers(const Path& path) { return {}; }; + + /* Query the outputs of the derivation denoted by `path'. */ + virtual PathSet queryDerivationOutputs(const Path& path) { + unsupported("queryDerivationOutputs"); + } + + /* Query the output names of the derivation denoted by `path'. */ + virtual StringSet queryDerivationOutputNames(const Path& path) { + unsupported("queryDerivationOutputNames"); + } + + /* Query the full store path given the hash part of a valid store + path, or "" if the path doesn't exist. */ + virtual Path queryPathFromHashPart(const string& hashPart) = 0; + + /* Query which of the given paths have substitutes. */ + virtual PathSet querySubstitutablePaths(const PathSet& paths) { return {}; }; + + /* Query substitute info (i.e. references, derivers and download + sizes) of a set of paths. If a path does not have substitute + info, it's omitted from the resulting โinfosโ map. */ + virtual void querySubstitutablePathInfos(const PathSet& paths, + SubstitutablePathInfos& infos) { + return; + }; + + virtual bool wantMassQuery() { return false; } + + /* Import a path into the store. */ + virtual void addToStore(const ValidPathInfo& info, Source& narSource, + RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + std::shared_ptr<FSAccessor> accessor = 0); + + // FIXME: remove + virtual void addToStore(const ValidPathInfo& info, + const ref<std::string>& nar, + RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + std::shared_ptr<FSAccessor> accessor = 0); + + /* Copy the contents of a path to the store and register the + validity the resulting path. The resulting path is returned. + The function object `filter' can be used to exclude files (see + libutil/archive.hh). */ + virtual Path addToStore(const string& name, const Path& srcPath, + bool recursive = true, HashType hashAlgo = htSHA256, + PathFilter& filter = defaultPathFilter, + RepairFlag repair = NoRepair) = 0; + + /* Like addToStore, but the contents written to the output path is + a regular file containing the given string. */ + virtual Path addTextToStore(const string& name, const string& s, + const PathSet& references, + RepairFlag repair = NoRepair) = 0; + + /* Write a NAR dump of a store path. */ + virtual void narFromPath(const Path& path, Sink& sink) = 0; + + /* For each path, if it's a derivation, build it. Building a + derivation means ensuring that the output paths are valid. If + they are already valid, this is a no-op. Otherwise, validity + can be reached in two ways. First, if the output paths is + substitutable, then build the path that way. Second, the + output paths can be created by running the builder, after + recursively building any sub-derivations. For inputs that are + not derivations, substitute them. */ + virtual void buildPaths(const PathSet& paths, BuildMode buildMode = bmNormal); + + /* Build a single non-materialized derivation (i.e. not from an + on-disk .drv file). Note that โdrvPathโ is only used for + informational purposes. */ + virtual BuildResult buildDerivation(const Path& drvPath, + const BasicDerivation& drv, + BuildMode buildMode = bmNormal) = 0; + + /* Ensure that a path is valid. If it is not currently valid, it + may be made valid by running a substitute (if defined for the + path). */ + virtual void ensurePath(const Path& path) = 0; + + /* Add a store path as a temporary root of the garbage collector. + The root disappears as soon as we exit. */ + virtual void addTempRoot(const Path& path) { unsupported("addTempRoot"); } + + /* Add an indirect root, which is merely a symlink to `path' from + /nix/var/nix/gcroots/auto/<hash of `path'>. `path' is supposed + to be a symlink to a store path. The garbage collector will + automatically remove the indirect root when it finds that + `path' has disappeared. */ + virtual void addIndirectRoot(const Path& path) { + unsupported("addIndirectRoot"); + } + + /* Acquire the global GC lock, then immediately release it. This + function must be called after registering a new permanent root, + but before exiting. Otherwise, it is possible that a running + garbage collector doesn't see the new root and deletes the + stuff we've just built. By acquiring the lock briefly, we + ensure that either: + + - The collector is already running, and so we block until the + collector is finished. The collector will know about our + *temporary* locks, which should include whatever it is we + want to register as a permanent lock. + + - The collector isn't running, or it's just started but hasn't + acquired the GC lock yet. In that case we get and release + the lock right away, then exit. The collector scans the + permanent root and sees our's. + + In either case the permanent root is seen by the collector. */ + virtual void syncWithGC(){}; + + /* Find the roots of the garbage collector. Each root is a pair + (link, storepath) where `link' is the path of the symlink + outside of the Nix store that point to `storePath'. If + 'censor' is true, privacy-sensitive information about roots + found in /proc is censored. */ + virtual Roots findRoots(bool censor) { unsupported("findRoots"); } + + /* Perform a garbage collection. */ + virtual void collectGarbage(const GCOptions& options, GCResults& results) { + unsupported("collectGarbage"); + } + + /* Return a string representing information about the path that + can be loaded into the database using `nix-store --load-db' or + `nix-store --register-validity'. */ + string makeValidityRegistration(const PathSet& paths, bool showDerivers, + bool showHash); + + /* Write a JSON representation of store path metadata, such as the + hash and the references. If โincludeImpureInfoโ is true, + variable elements such as the registration time are + included. If โshowClosureSizeโ is true, the closure size of + each path is included. */ + void pathInfoToJSON(JSONPlaceholder& jsonOut, const PathSet& storePaths, + bool includeImpureInfo, bool showClosureSize, + AllowInvalidFlag allowInvalid = DisallowInvalid); + + /* Return the size of the closure of the specified path, that is, + the sum of the size of the NAR serialisation of each path in + the closure. */ + std::pair<uint64_t, uint64_t> getClosureSize(const Path& storePath); + + /* Optimise the disk space usage of the Nix store by hard-linking files + with the same contents. */ + virtual void optimiseStore(){}; + + /* Check the integrity of the Nix store. Returns true if errors + remain. */ + virtual bool verifyStore(bool checkContents, RepairFlag repair = NoRepair) { + return false; + }; + + /* Return an object to access files in the Nix store. */ + virtual ref<FSAccessor> getFSAccessor() { unsupported("getFSAccessor"); } + + /* Add signatures to the specified store path. The signatures are + not verified. */ + virtual void addSignatures(const Path& storePath, const StringSet& sigs) { + unsupported("addSignatures"); + } + + /* Utility functions. */ + + /* Read a derivation, after ensuring its existence through + ensurePath(). */ + Derivation derivationFromPath(const Path& drvPath); + + /* Place in `out' the set of all store paths in the file system + closure of `storePath'; that is, all paths than can be directly + or indirectly reached from it. `out' is not cleared. If + `flipDirection' is true, the set of paths that can reach + `storePath' is returned; that is, the closures under the + `referrers' relation instead of the `references' relation is + returned. */ + virtual void computeFSClosure(const PathSet& paths, PathSet& paths_, + bool flipDirection = false, + bool includeOutputs = false, + bool includeDerivers = false); + + void computeFSClosure(const Path& path, PathSet& paths_, + bool flipDirection = false, bool includeOutputs = false, + bool includeDerivers = false); + + /* Given a set of paths that are to be built, return the set of + derivations that will be built, and the set of output paths + that will be substituted. */ + virtual void queryMissing(const PathSet& targets, PathSet& willBuild, + PathSet& willSubstitute, PathSet& unknown, + unsigned long long& downloadSize, + unsigned long long& narSize); + + /* Sort a set of paths topologically under the references + relation. If p refers to q, then p precedes q in this list. */ + Paths topoSortPaths(const PathSet& paths); + + /* Export multiple paths in the format expected by โnix-store + --importโ. */ + void exportPaths(const Paths& paths, Sink& sink); + + void exportPath(const Path& path, Sink& sink); + + /* Import a sequence of NAR dumps created by exportPaths() into + the Nix store. Optionally, the contents of the NARs are + preloaded into the specified FS accessor to speed up subsequent + access. */ + Paths importPaths(Source& source, const std::shared_ptr<FSAccessor>& accessor, + CheckSigsFlag checkSigs = CheckSigs); + + struct Stats { + std::atomic<uint64_t> narInfoRead{0}; + std::atomic<uint64_t> narInfoReadAverted{0}; + std::atomic<uint64_t> narInfoMissing{0}; + std::atomic<uint64_t> narInfoWrite{0}; + std::atomic<uint64_t> pathInfoCacheSize{0}; + std::atomic<uint64_t> narRead{0}; + std::atomic<uint64_t> narReadBytes{0}; + std::atomic<uint64_t> narReadCompressedBytes{0}; + std::atomic<uint64_t> narWrite{0}; + std::atomic<uint64_t> narWriteAverted{0}; + std::atomic<uint64_t> narWriteBytes{0}; + std::atomic<uint64_t> narWriteCompressedBytes{0}; + std::atomic<uint64_t> narWriteCompressionTimeMs{0}; + }; + + const Stats& getStats(); + + /* Return the build log of the specified store path, if available, + or null otherwise. */ + virtual std::shared_ptr<std::string> getBuildLog(const Path& path) { + return nullptr; + } + + /* Hack to allow long-running processes like hydra-queue-runner to + occasionally flush their path info cache. */ + void clearPathInfoCache() { state.lock()->pathInfoCache.clear(); } + + /* Establish a connection to the store, for store types that have + a notion of connection. Otherwise this is a no-op. */ + virtual void connect(){}; + + /* Get the protocol version of this store or it's connection. */ + virtual unsigned int getProtocol() { return 0; }; + + /* Get the priority of the store, used to order substituters. In + particular, binary caches can specify a priority field in their + "nix-cache-info" file. Lower value means higher priority. */ + virtual int getPriority() { return 0; } + + virtual Path toRealPath(const Path& storePath) { return storePath; } + + virtual void createUser(const std::string& userName, uid_t userId) {} + + protected: + Stats stats; + + /* Unsupported methods. */ + [[noreturn]] void unsupported(const std::string& op) { + throw Unsupported("operation '%s' is not supported by store '%s'", op, + getUri()); + } +}; + +class LocalFSStore : public virtual Store { + public: + // FIXME: the (Store*) cast works around a bug in gcc that causes + // it to emit the call to the Option constructor. Clang works fine + // either way. + const PathSetting rootDir{(Store*)this, true, "", "root", + "directory prefixed to all other paths"}; + const PathSetting stateDir{ + (Store*)this, false, + rootDir != "" ? rootDir + "/nix/var/nix" : settings.nixStateDir, "state", + "directory where Nix will store state"}; + const PathSetting logDir{ + (Store*)this, false, + rootDir != "" ? rootDir + "/nix/var/log/nix" : settings.nixLogDir, "log", + "directory where Nix will store state"}; + + const static string drvsLogDir; + + LocalFSStore(const Params& params); + + void narFromPath(const Path& path, Sink& sink) override; + ref<FSAccessor> getFSAccessor() override; + + /* Register a permanent GC root. */ + Path addPermRoot(const Path& storePath, const Path& gcRoot, bool indirect, + bool allowOutsideRootsDir = false); + + virtual Path getRealStoreDir() { return storeDir; } + + Path toRealPath(const Path& storePath) override { + assert(isInStore(storePath)); + return getRealStoreDir() + "/" + + std::string(storePath, storeDir.size() + 1); + } + + std::shared_ptr<std::string> getBuildLog(const Path& path) override; +}; + +/* Extract the name part of the given store path. */ +string storePathToName(const Path& path); + +/* Extract the hash part of the given store path. */ +string storePathToHash(const Path& path); + +/* Check whether โnameโ is a valid store path name part, i.e. contains + only the characters [a-zA-Z0-9\+\-\.\_\?\=] and doesn't start with + a dot. */ +void checkStoreName(const string& name); + +/* Copy a path from one store to another. */ +void copyStorePath(ref<Store> srcStore, const ref<Store>& dstStore, + const Path& storePath, RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs); + +/* Copy store paths from one store to another. The paths may be copied + in parallel. They are copied in a topologically sorted order + (i.e. if A is a reference of B, then A is copied before B), but + the set of store paths is not automatically closed; use + copyClosure() for that. */ +void copyPaths(ref<Store> srcStore, ref<Store> dstStore, + const PathSet& storePaths, RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + SubstituteFlag substitute = NoSubstitute); + +/* Copy the closure of the specified paths from one store to another. */ +void copyClosure(const ref<Store>& srcStore, const ref<Store>& dstStore, + const PathSet& storePaths, RepairFlag repair = NoRepair, + CheckSigsFlag checkSigs = CheckSigs, + SubstituteFlag substitute = NoSubstitute); + +/* Remove the temporary roots file for this process. Any temporary + root becomes garbage after this point unless it has been registered + as a (permanent) root. */ +void removeTempRoots(); + +/* Return a Store object to access the Nix store denoted by + โuriโ (slight misnomer...). Supported values are: + + * โlocalโ: The Nix store in /nix/store and database in + /nix/var/nix/db, accessed directly. + + * โdaemonโ: The Nix store accessed via a Unix domain socket + connection to nix-daemon. + + * โunix://<path>โ: The Nix store accessed via a Unix domain socket + connection to nix-daemon, with the socket located at <path>. + + * โautoโ or โโ: Equivalent to โlocalโ or โdaemonโ depending on + whether the user has write access to the local Nix + store/database. + + * โfile://<path>โ: A binary cache stored in <path>. + + * โhttps://<path>โ: A binary cache accessed via HTTP. + + * โs3://<path>โ: A writable binary cache stored on Amazon's Simple + Storage Service. + + * โssh://[user@]<host>โ: A remote Nix store accessed by running + โnix-store --serveโ via SSH. + + You can pass parameters to the store implementation by appending + โ?key=value&key=value&...โ to the URI. +*/ +ref<Store> openStore(const std::string& uri = settings.storeUri.get(), + const Store::Params& extraParams = Store::Params()); + +enum StoreType { tDaemon, tLocal, tOther }; + +StoreType getStoreType(const std::string& uri = settings.storeUri.get(), + const std::string& stateDir = settings.nixStateDir); + +/* Return the default substituter stores, defined by the + โsubstitutersโ option and various legacy options. */ +std::list<ref<Store>> getDefaultSubstituters(); + +/* Store implementation registration. */ +typedef std::function<std::shared_ptr<Store>(const std::string& uri, + const Store::Params& params)> + OpenStore; + +struct RegisterStoreImplementation { + typedef std::vector<OpenStore> Implementations; + static Implementations* implementations; + + RegisterStoreImplementation(OpenStore fun) { + if (!implementations) { + implementations = new Implementations; + } + implementations->push_back(fun); + } +}; + +/* Display a set of paths in human-readable form (i.e., between quotes + and separated by commas). */ +string showPaths(const PathSet& paths); + +ValidPathInfo decodeValidPathInfo(std::istream& str, bool hashGiven = false); + +/* Compute the content-addressability assertion (ValidPathInfo::ca) + for paths created by makeFixedOutputPath() / addToStore(). */ +std::string makeFixedOutputCA(bool recursive, const Hash& hash); + +/* Split URI into protocol+hierarchy part and its parameter set. */ +std::pair<std::string, Store::Params> splitUriAndParams(const std::string& uri); + +} // namespace nix diff --git a/third_party/nix/src/libstore/worker-protocol.hh b/third_party/nix/src/libstore/worker-protocol.hh new file mode 100644 index 000000000000..970d494acee1 --- /dev/null +++ b/third_party/nix/src/libstore/worker-protocol.hh @@ -0,0 +1,65 @@ +#pragma once + +namespace nix { + +#define WORKER_MAGIC_1 0x6e697863 +#define WORKER_MAGIC_2 0x6478696f + +#define PROTOCOL_VERSION 0x115 +#define GET_PROTOCOL_MAJOR(x) ((x)&0xff00) +#define GET_PROTOCOL_MINOR(x) ((x)&0x00ff) + +typedef enum { + wopIsValidPath = 1, + wopHasSubstitutes = 3, + wopQueryPathHash = 4, // obsolete + wopQueryReferences = 5, // obsolete + wopQueryReferrers = 6, + wopAddToStore = 7, + wopAddTextToStore = 8, + wopBuildPaths = 9, + wopEnsurePath = 10, + wopAddTempRoot = 11, + wopAddIndirectRoot = 12, + wopSyncWithGC = 13, + wopFindRoots = 14, + wopExportPath = 16, // obsolete + wopQueryDeriver = 18, // obsolete + wopSetOptions = 19, + wopCollectGarbage = 20, + wopQuerySubstitutablePathInfo = 21, + wopQueryDerivationOutputs = 22, + wopQueryAllValidPaths = 23, + wopQueryFailedPaths = 24, + wopClearFailedPaths = 25, + wopQueryPathInfo = 26, + wopImportPaths = 27, // obsolete + wopQueryDerivationOutputNames = 28, + wopQueryPathFromHashPart = 29, + wopQuerySubstitutablePathInfos = 30, + wopQueryValidPaths = 31, + wopQuerySubstitutablePaths = 32, + wopQueryValidDerivers = 33, + wopOptimiseStore = 34, + wopVerifyStore = 35, + wopBuildDerivation = 36, + wopAddSignatures = 37, + wopNarFromPath = 38, + wopAddToStoreNar = 39, + wopQueryMissing = 40, +} WorkerOp; + +#define STDERR_NEXT 0x6f6c6d67 +#define STDERR_READ 0x64617461 // data needed from source +#define STDERR_WRITE 0x64617416 // data for sink +#define STDERR_LAST 0x616c7473 +#define STDERR_ERROR 0x63787470 +#define STDERR_START_ACTIVITY 0x53545254 +#define STDERR_STOP_ACTIVITY 0x53544f50 +#define STDERR_RESULT 0x52534c54 + +Path readStorePath(Store& store, Source& from); +template <class T> +T readStorePaths(Store& store, Source& from); + +} // namespace nix diff --git a/third_party/nix/src/libutil/affinity.cc b/third_party/nix/src/libutil/affinity.cc new file mode 100644 index 000000000000..7db8906eb0b4 --- /dev/null +++ b/third_party/nix/src/libutil/affinity.cc @@ -0,0 +1,60 @@ +#include "affinity.hh" + +#include <glog/logging.h> + +#include "types.hh" +#include "util.hh" + +#if __linux__ +#include <sched.h> +#endif + +namespace nix { + +#if __linux__ +static bool didSaveAffinity = false; +static cpu_set_t savedAffinity; +#endif + +void setAffinityTo(int cpu) { +#if __linux__ + if (sched_getaffinity(0, sizeof(cpu_set_t), &savedAffinity) == -1) { + return; + } + + didSaveAffinity = true; + DLOG(INFO) << "locking this thread to CPU " << cpu; + cpu_set_t newAffinity; + CPU_ZERO(&newAffinity); + CPU_SET(cpu, &newAffinity); + if (sched_setaffinity(0, sizeof(cpu_set_t), &newAffinity) == -1) { + LOG(ERROR) << "failed to lock thread to CPU " << cpu; + } +#endif +} + +int lockToCurrentCPU() { +#if __linux__ + int cpu = sched_getcpu(); + if (cpu != -1) { + setAffinityTo(cpu); + } + return cpu; +#else + return -1; +#endif +} + +void restoreAffinity() { +#if __linux__ + if (!didSaveAffinity) { + return; + } + + if (sched_setaffinity(0, sizeof(cpu_set_t), &savedAffinity) == -1) { + LOG(ERROR) << "failed to restore affinity"; + } +#endif +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/affinity.hh b/third_party/nix/src/libutil/affinity.hh new file mode 100644 index 000000000000..5e5ef9b0de0d --- /dev/null +++ b/third_party/nix/src/libutil/affinity.hh @@ -0,0 +1,9 @@ +#pragma once + +namespace nix { + +void setAffinityTo(int cpu); +int lockToCurrentCPU(); +void restoreAffinity(); + +} // namespace nix diff --git a/third_party/nix/src/libutil/archive.cc b/third_party/nix/src/libutil/archive.cc new file mode 100644 index 000000000000..32bad07f22da --- /dev/null +++ b/third_party/nix/src/libutil/archive.cc @@ -0,0 +1,399 @@ +#include "archive.hh" + +#include <algorithm> +#include <cerrno> +#include <map> +#include <vector> + +#include <dirent.h> +#include <fcntl.h> +#include <strings.h> // for strcasecmp +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "config.hh" +#include "glog/logging.h" +#include "util.hh" + +namespace nix { + +struct ArchiveSettings : Config { + Setting<bool> useCaseHack { + this, +#if __APPLE__ + true, +#else + false, +#endif + "use-case-hack", + "Whether to enable a Darwin-specific hack for dealing with file name " + "collisions." + }; +}; + +static ArchiveSettings archiveSettings; + +static GlobalConfig::Register r1(&archiveSettings); + +const std::string narVersionMagic1 = "nix-archive-1"; + +static string caseHackSuffix = "~nix~case~hack~"; + +PathFilter defaultPathFilter = [](const Path& /*unused*/) { return true; }; + +static void dumpContents(const Path& path, size_t size, Sink& sink) { + sink << "contents" << size; + + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) { + throw SysError(format("opening file '%1%'") % path); + } + + std::vector<unsigned char> buf(65536); + size_t left = size; + + while (left > 0) { + auto n = std::min(left, buf.size()); + readFull(fd.get(), buf.data(), n); + left -= n; + sink(buf.data(), n); + } + + writePadding(size, sink); +} + +static void dump(const Path& path, Sink& sink, PathFilter& filter) { + checkInterrupt(); + + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting attributes of path '%1%'") % path); + } + + sink << "("; + + if (S_ISREG(st.st_mode)) { + sink << "type" + << "regular"; + if ((st.st_mode & S_IXUSR) != 0u) { + sink << "executable" + << ""; + } + dumpContents(path, (size_t)st.st_size, sink); + } + + else if (S_ISDIR(st.st_mode)) { + sink << "type" + << "directory"; + + /* If we're on a case-insensitive system like macOS, undo + the case hack applied by restorePath(). */ + std::map<string, string> unhacked; + for (auto& i : readDirectory(path)) { + if (archiveSettings.useCaseHack) { + string name(i.name); + size_t pos = i.name.find(caseHackSuffix); + if (pos != string::npos) { + DLOG(INFO) << "removing case hack suffix from " << path << "/" + << i.name; + + name.erase(pos); + } + if (unhacked.find(name) != unhacked.end()) { + throw Error(format("file name collision in between '%1%' and '%2%'") % + (path + "/" + unhacked[name]) % (path + "/" + i.name)); + } + unhacked[name] = i.name; + } else { + unhacked[i.name] = i.name; + } + } + + for (auto& i : unhacked) { + if (filter(path + "/" + i.first)) { + sink << "entry" + << "(" + << "name" << i.first << "node"; + dump(path + "/" + i.second, sink, filter); + sink << ")"; + } + } + } + + else if (S_ISLNK(st.st_mode)) { + sink << "type" + << "symlink" + << "target" << readLink(path); + + } else { + throw Error(format("file '%1%' has an unsupported type") % path); + } + + sink << ")"; +} + +void dumpPath(const Path& path, Sink& sink, PathFilter& filter) { + sink << narVersionMagic1; + dump(path, sink, filter); +} + +void dumpString(const std::string& s, Sink& sink) { + sink << narVersionMagic1 << "(" + << "type" + << "regular" + << "contents" << s << ")"; +} + +static SerialisationError badArchive(const string& s) { + return SerialisationError("bad archive: " + s); +} + +#if 0 +static void skipGeneric(Source & source) +{ + if (readString(source) == "(") { + while (readString(source) != ")") + skipGeneric(source); + } +} +#endif + +static void parseContents(ParseSink& sink, Source& source, const Path& path) { + unsigned long long size = readLongLong(source); + + sink.preallocateContents(size); + + unsigned long long left = size; + std::vector<unsigned char> buf(65536); + + while (left != 0u) { + checkInterrupt(); + auto n = buf.size(); + if ((unsigned long long)n > left) { + n = left; + } + source(buf.data(), n); + sink.receiveContents(buf.data(), n); + left -= n; + } + + readPadding(size, source); +} + +struct CaseInsensitiveCompare { + bool operator()(const string& a, const string& b) const { + return strcasecmp(a.c_str(), b.c_str()) < 0; + } +}; + +static void parse(ParseSink& sink, Source& source, const Path& path) { + string s; + + s = readString(source); + if (s != "(") { + throw badArchive("expected open tag"); + } + + enum { tpUnknown, tpRegular, tpDirectory, tpSymlink } type = tpUnknown; + + std::map<Path, int, CaseInsensitiveCompare> names; + + while (true) { + checkInterrupt(); + + s = readString(source); + + if (s == ")") { + break; + } + + if (s == "type") { + if (type != tpUnknown) { + throw badArchive("multiple type fields"); + } + string t = readString(source); + + if (t == "regular") { + type = tpRegular; + sink.createRegularFile(path); + } + + else if (t == "directory") { + sink.createDirectory(path); + type = tpDirectory; + } + + else if (t == "symlink") { + type = tpSymlink; + } + + else { + throw badArchive("unknown file type " + t); + } + + } + + else if (s == "contents" && type == tpRegular) { + parseContents(sink, source, path); + } + + else if (s == "executable" && type == tpRegular) { + auto s = readString(source); + if (!s.empty()) { + throw badArchive("executable marker has non-empty value"); + } + sink.isExecutable(); + } + + else if (s == "entry" && type == tpDirectory) { + string name; + string prevName; + + s = readString(source); + if (s != "(") { + throw badArchive("expected open tag"); + } + + while (true) { + checkInterrupt(); + + s = readString(source); + + if (s == ")") { + break; + } + if (s == "name") { + name = readString(source); + if (name.empty() || name == "." || name == ".." || + name.find('/') != string::npos || + name.find((char)0) != string::npos) { + throw Error(format("NAR contains invalid file name '%1%'") % name); + } + if (name <= prevName) { + throw Error("NAR directory is not sorted"); + } + prevName = name; + if (archiveSettings.useCaseHack) { + auto i = names.find(name); + if (i != names.end()) { + DLOG(INFO) << "case collision between '" << i->first << "' and '" + << name << "'"; + name += caseHackSuffix; + name += std::to_string(++i->second); + } else { + names[name] = 0; + } + } + } else if (s == "node") { + if (s.empty()) { + throw badArchive("entry name missing"); + } + parse(sink, source, path + "/" + name); + } else { + throw badArchive("unknown field " + s); + } + } + } + + else if (s == "target" && type == tpSymlink) { + string target = readString(source); + sink.createSymlink(path, target); + } + + else { + throw badArchive("unknown field " + s); + } + } +} + +void parseDump(ParseSink& sink, Source& source) { + string version; + try { + version = readString(source, narVersionMagic1.size()); + } catch (SerialisationError& e) { + /* This generally means the integer at the start couldn't be + decoded. Ignore and throw the exception below. */ + } + if (version != narVersionMagic1) { + throw badArchive("input doesn't look like a Nix archive"); + } + parse(sink, source, ""); +} + +struct RestoreSink : ParseSink { + Path dstPath; + AutoCloseFD fd; + + void createDirectory(const Path& path) override { + Path p = dstPath + path; + if (mkdir(p.c_str(), 0777) == -1) { + throw SysError(format("creating directory '%1%'") % p); + } + }; + + void createRegularFile(const Path& path) override { + Path p = dstPath + path; + fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666); + if (!fd) { + throw SysError(format("creating file '%1%'") % p); + } + } + + void isExecutable() override { + struct stat st; + if (fstat(fd.get(), &st) == -1) { + throw SysError("fstat"); + } + if (fchmod(fd.get(), st.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH)) == -1) { + throw SysError("fchmod"); + } + } + + void preallocateContents(unsigned long long len) override { +#if HAVE_POSIX_FALLOCATE + if (len != 0u) { + errno = posix_fallocate(fd.get(), 0, len); + /* Note that EINVAL may indicate that the underlying + filesystem doesn't support preallocation (e.g. on + OpenSolaris). Since preallocation is just an + optimisation, ignore it. */ + if (errno && errno != EINVAL && errno != EOPNOTSUPP && errno != ENOSYS) { + throw SysError(format("preallocating file of %1% bytes") % len); + } + } +#endif + } + + void receiveContents(unsigned char* data, unsigned int len) override { + writeFull(fd.get(), data, len); + } + + void createSymlink(const Path& path, const string& target) override { + Path p = dstPath + path; + nix::createSymlink(target, p); + } +}; + +void restorePath(const Path& path, Source& source) { + RestoreSink sink; + sink.dstPath = path; + parseDump(sink, source); +} + +void copyNAR(Source& source, Sink& sink) { + // FIXME: if 'source' is the output of dumpPath() followed by EOF, + // we should just forward all data directly without parsing. + + ParseSink parseSink; /* null sink; just parse the NAR */ + + LambdaSource wrapper([&](unsigned char* data, size_t len) { + auto n = source.read(data, len); + sink(data, n); + return n; + }); + + parseDump(parseSink, wrapper); +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/archive.hh b/third_party/nix/src/libutil/archive.hh new file mode 100644 index 000000000000..9a656edae4a3 --- /dev/null +++ b/third_party/nix/src/libutil/archive.hh @@ -0,0 +1,77 @@ +#pragma once + +#include "serialise.hh" +#include "types.hh" + +namespace nix { + +/* dumpPath creates a Nix archive of the specified path. The format + is as follows: + + IF path points to a REGULAR FILE: + dump(path) = attrs( + [ ("type", "regular") + , ("contents", contents(path)) + ]) + + IF path points to a DIRECTORY: + dump(path) = attrs( + [ ("type", "directory") + , ("entries", concat(map(f, sort(entries(path))))) + ]) + where f(fn) = attrs( + [ ("name", fn) + , ("file", dump(path + "/" + fn)) + ]) + + where: + + attrs(as) = concat(map(attr, as)) + encN(0) + attrs((a, b)) = encS(a) + encS(b) + + encS(s) = encN(len(s)) + s + (padding until next 64-bit boundary) + + encN(n) = 64-bit little-endian encoding of n. + + contents(path) = the contents of a regular file. + + sort(strings) = lexicographic sort by 8-bit value (strcmp). + + entries(path) = the entries of a directory, without `.' and + `..'. + + `+' denotes string concatenation. */ + +void dumpPath(const Path& path, Sink& sink, + PathFilter& filter = defaultPathFilter); + +void dumpString(const std::string& s, Sink& sink); + +/* FIXME: fix this API, it sucks. */ +struct ParseSink { + virtual void createDirectory(const Path& path){}; + + virtual void createRegularFile(const Path& path){}; + virtual void isExecutable(){}; + virtual void preallocateContents(unsigned long long size){}; + virtual void receiveContents(unsigned char* data, unsigned int len){}; + + virtual void createSymlink(const Path& path, const string& target){}; +}; + +struct TeeSink : ParseSink { + TeeSource source; + + TeeSink(Source& source) : source(source) {} +}; + +void parseDump(ParseSink& sink, Source& source); + +void restorePath(const Path& path, Source& source); + +/* Read a NAR from 'source' and write it to 'sink'. */ +void copyNAR(Source& source, Sink& sink); + +extern const std::string narVersionMagic1; + +} // namespace nix diff --git a/third_party/nix/src/libutil/args.cc b/third_party/nix/src/libutil/args.cc new file mode 100644 index 000000000000..48fa715fdf0f --- /dev/null +++ b/third_party/nix/src/libutil/args.cc @@ -0,0 +1,219 @@ +#include "args.hh" + +#include "hash.hh" + +namespace nix { + +Args::FlagMaker Args::mkFlag() { return FlagMaker(*this); } + +Args::FlagMaker::~FlagMaker() { + assert(!flag->longName.empty()); + args.longFlags[flag->longName] = flag; + if (flag->shortName != 0) { + args.shortFlags[flag->shortName] = flag; + } +} + +void Args::parseCmdline(const Strings& _cmdline) { + Strings pendingArgs; + bool dashDash = false; + + Strings cmdline(_cmdline); + + for (auto pos = cmdline.begin(); pos != cmdline.end();) { + auto arg = *pos; + + /* Expand compound dash options (i.e., `-qlf' -> `-q -l -f', + `-j3` -> `-j 3`). */ + if (!dashDash && arg.length() > 2 && arg[0] == '-' && arg[1] != '-' && + (isalpha(arg[1]) != 0)) { + *pos = (string) "-" + arg[1]; + auto next = pos; + ++next; + for (unsigned int j = 2; j < arg.length(); j++) { + if (isalpha(arg[j]) != 0) { + cmdline.insert(next, (string) "-" + arg[j]); + } else { + cmdline.insert(next, string(arg, j)); + break; + } + } + arg = *pos; + } + + if (!dashDash && arg == "--") { + dashDash = true; + ++pos; + } else if (!dashDash && std::string(arg, 0, 1) == "-") { + if (!processFlag(pos, cmdline.end())) { + throw UsageError(format("unrecognised flag '%1%'") % arg); + } + } else { + pendingArgs.push_back(*pos++); + if (processArgs(pendingArgs, false)) { + pendingArgs.clear(); + } + } + } + + processArgs(pendingArgs, true); +} + +void Args::printHelp(const string& programName, std::ostream& out) { + std::cout << "Usage: " << programName << " <FLAGS>..."; + for (auto& exp : expectedArgs) { + std::cout << renderLabels({exp.label}); + // FIXME: handle arity > 1 + if (exp.arity == 0) { + std::cout << "..."; + } + if (exp.optional) { + std::cout << "?"; + } + } + std::cout << "\n"; + + auto s = description(); + if (!s.empty()) { + std::cout << "\nSummary: " << s << ".\n"; + } + + if (!longFlags.empty() != 0u) { + std::cout << "\n"; + std::cout << "Flags:\n"; + printFlags(out); + } +} + +void Args::printFlags(std::ostream& out) { + Table2 table; + for (auto& flag : longFlags) { + if (hiddenCategories.count(flag.second->category) != 0u) { + continue; + } + table.push_back(std::make_pair( + (flag.second->shortName != 0 + ? std::string("-") + flag.second->shortName + ", " + : " ") + + "--" + flag.first + renderLabels(flag.second->labels), + flag.second->description)); + } + printTable(out, table); +} + +bool Args::processFlag(Strings::iterator& pos, Strings::iterator end) { + assert(pos != end); + + auto process = [&](const std::string& name, const Flag& flag) -> bool { + ++pos; + std::vector<std::string> args; + for (size_t n = 0; n < flag.arity; ++n) { + if (pos == end) { + if (flag.arity == ArityAny) { + break; + } + throw UsageError(format("flag '%1%' requires %2% argument(s)") % name % + flag.arity); + } + args.push_back(*pos++); + } + flag.handler(std::move(args)); + return true; + }; + + if (string(*pos, 0, 2) == "--") { + auto i = longFlags.find(string(*pos, 2)); + if (i == longFlags.end()) { + return false; + } + return process("--" + i->first, *i->second); + } + + if (string(*pos, 0, 1) == "-" && pos->size() == 2) { + auto c = (*pos)[1]; + auto i = shortFlags.find(c); + if (i == shortFlags.end()) { + return false; + } + return process(std::string("-") + c, *i->second); + } + + return false; +} + +bool Args::processArgs(const Strings& args, bool finish) { + if (expectedArgs.empty()) { + if (!args.empty()) { + throw UsageError(format("unexpected argument '%1%'") % args.front()); + } + return true; + } + + auto& exp = expectedArgs.front(); + + bool res = false; + + if ((exp.arity == 0 && finish) || + (exp.arity > 0 && args.size() == exp.arity)) { + std::vector<std::string> ss; + for (auto& s : args) { + ss.push_back(s); + } + exp.handler(std::move(ss)); + expectedArgs.pop_front(); + res = true; + } + + if (finish && !expectedArgs.empty() && !expectedArgs.front().optional) { + throw UsageError("more arguments are required"); + } + + return res; +} + +Args::FlagMaker& Args::FlagMaker::mkHashTypeFlag(HashType* ht) { + arity(1); + label("type"); + description("hash algorithm ('md5', 'sha1', 'sha256', or 'sha512')"); + handler([ht](const std::string& s) { + *ht = parseHashType(s); + if (*ht == htUnknown) { + throw UsageError("unknown hash type '%1%'", s); + } + }); + return *this; +} + +Strings argvToStrings(int argc, char** argv) { + Strings args; + argc--; + argv++; + while ((argc--) != 0) { + args.push_back(*argv++); + } + return args; +} + +std::string renderLabels(const Strings& labels) { + std::string res; + for (auto label : labels) { + for (auto& c : label) { + c = std::toupper(c); + } + res += " <" + label + ">"; + } + return res; +} + +void printTable(std::ostream& out, const Table2& table) { + size_t max = 0; + for (auto& row : table) { + max = std::max(max, row.first.size()); + } + for (auto& row : table) { + out << " " << row.first << std::string(max - row.first.size() + 2, ' ') + << row.second << "\n"; + } +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/args.hh b/third_party/nix/src/libutil/args.hh new file mode 100644 index 000000000000..20233d353424 --- /dev/null +++ b/third_party/nix/src/libutil/args.hh @@ -0,0 +1,217 @@ +#pragma once + +#include <iostream> +#include <map> +#include <memory> + +#include "util.hh" + +namespace nix { + +MakeError(UsageError, Error) + + enum HashType : char; + +class Args { + public: + /* Parse the command line, throwing a UsageError if something goes + wrong. */ + void parseCmdline(const Strings& cmdline); + + virtual void printHelp(const string& programName, std::ostream& out); + + virtual std::string description() { return ""; } + + protected: + static const size_t ArityAny = std::numeric_limits<size_t>::max(); + + /* Flags. */ + struct Flag { + typedef std::shared_ptr<Flag> ptr; + std::string longName; + char shortName = 0; + std::string description; + Strings labels; + size_t arity = 0; + std::function<void(std::vector<std::string>)> handler; + std::string category; + }; + + std::map<std::string, Flag::ptr> longFlags; + std::map<char, Flag::ptr> shortFlags; + + virtual bool processFlag(Strings::iterator& pos, Strings::iterator end); + + virtual void printFlags(std::ostream& out); + + /* Positional arguments. */ + struct ExpectedArg { + std::string label; + size_t arity; // 0 = any + bool optional; + std::function<void(std::vector<std::string>)> handler; + }; + + std::list<ExpectedArg> expectedArgs; + + virtual bool processArgs(const Strings& args, bool finish); + + std::set<std::string> hiddenCategories; + + public: + class FlagMaker { + Args& args; + Flag::ptr flag; + friend class Args; + FlagMaker(Args& args) : args(args), flag(std::make_shared<Flag>()){}; + + public: + ~FlagMaker(); + FlagMaker& longName(const std::string& s) { + flag->longName = s; + return *this; + }; + FlagMaker& shortName(char s) { + flag->shortName = s; + return *this; + }; + FlagMaker& description(const std::string& s) { + flag->description = s; + return *this; + }; + FlagMaker& label(const std::string& l) { + flag->arity = 1; + flag->labels = {l}; + return *this; + }; + FlagMaker& labels(const Strings& ls) { + flag->arity = ls.size(); + flag->labels = ls; + return *this; + }; + FlagMaker& arity(size_t arity) { + flag->arity = arity; + return *this; + }; + FlagMaker& handler(std::function<void(std::vector<std::string>)> handler) { + flag->handler = handler; + return *this; + }; + FlagMaker& handler(std::function<void()> handler) { + flag->handler = [handler](std::vector<std::string>) { handler(); }; + return *this; + }; + FlagMaker& handler(std::function<void(std::string)> handler) { + flag->arity = 1; + flag->handler = [handler](std::vector<std::string> ss) { + handler(std::move(ss[0])); + }; + return *this; + }; + FlagMaker& category(const std::string& s) { + flag->category = s; + return *this; + }; + + template <class T> + FlagMaker& dest(T* dest) { + flag->arity = 1; + flag->handler = [=](std::vector<std::string> ss) { *dest = ss[0]; }; + return *this; + } + + template <class T> + FlagMaker& set(T* dest, const T& val) { + flag->arity = 0; + flag->handler = [=](std::vector<std::string> ss) { *dest = val; }; + return *this; + } + + FlagMaker& mkHashTypeFlag(HashType* ht); + }; + + FlagMaker mkFlag(); + + /* Helper functions for constructing flags / positional + arguments. */ + + void mkFlag1(char shortName, const std::string& longName, + const std::string& label, const std::string& description, + std::function<void(std::string)> fun) { + mkFlag() + .shortName(shortName) + .longName(longName) + .labels({label}) + .description(description) + .arity(1) + .handler([=](std::vector<std::string> ss) { fun(ss[0]); }); + } + + void mkFlag(char shortName, const std::string& name, + const std::string& description, bool* dest) { + mkFlag(shortName, name, description, dest, true); + } + + template <class T> + void mkFlag(char shortName, const std::string& longName, + const std::string& description, T* dest, const T& value) { + mkFlag() + .shortName(shortName) + .longName(longName) + .description(description) + .handler([=](std::vector<std::string> ss) { *dest = value; }); + } + + template <class I> + void mkIntFlag(char shortName, const std::string& longName, + const std::string& description, I* dest) { + mkFlag<I>(shortName, longName, description, [=](I n) { *dest = n; }); + } + + template <class I> + void mkFlag(char shortName, const std::string& longName, + const std::string& description, std::function<void(I)> fun) { + mkFlag() + .shortName(shortName) + .longName(longName) + .labels({"N"}) + .description(description) + .arity(1) + .handler([=](std::vector<std::string> ss) { + I n; + if (!string2Int(ss[0], n)) + throw UsageError("flag '--%s' requires a integer argument", + longName); + fun(n); + }); + } + + /* Expect a string argument. */ + void expectArg(const std::string& label, string* dest, + bool optional = false) { + expectedArgs.push_back( + ExpectedArg{label, 1, optional, + [=](std::vector<std::string> ss) { *dest = ss[0]; }}); + } + + /* Expect 0 or more arguments. */ + void expectArgs(const std::string& label, std::vector<std::string>* dest) { + expectedArgs.push_back(ExpectedArg{ + label, 0, false, + [=](std::vector<std::string> ss) { *dest = std::move(ss); }}); + } + + friend class MultiCommand; +}; + +Strings argvToStrings(int argc, char** argv); + +/* Helper function for rendering argument labels. */ +std::string renderLabels(const Strings& labels); + +/* Helper function for printing 2-column tables. */ +typedef std::vector<std::pair<std::string, std::string>> Table2; + +void printTable(std::ostream& out, const Table2& table); + +} // namespace nix diff --git a/third_party/nix/src/libutil/compression.cc b/third_party/nix/src/libutil/compression.cc new file mode 100644 index 000000000000..d7084ab7f15d --- /dev/null +++ b/third_party/nix/src/libutil/compression.cc @@ -0,0 +1,405 @@ +#include "compression.hh" + +#include <cstdio> +#include <cstring> +#include <iostream> + +#include <brotli/decode.h> +#include <brotli/encode.h> +#include <bzlib.h> +#include <lzma.h> + +#include "finally.hh" +#include "glog/logging.h" +#include "util.hh" + +namespace nix { + +// Don't feed brotli too much at once. +struct ChunkedCompressionSink : CompressionSink { + uint8_t outbuf[32 * 1024]; + + void write(const unsigned char* data, size_t len) override { + const size_t CHUNK_SIZE = sizeof(outbuf) << 2; + while (len != 0u) { + size_t n = std::min(CHUNK_SIZE, len); + writeInternal(data, n); + data += n; + len -= n; + } + } + + virtual void writeInternal(const unsigned char* data, size_t len) = 0; +}; + +struct NoneSink : CompressionSink { + Sink& nextSink; + explicit NoneSink(Sink& nextSink) : nextSink(nextSink) {} + void finish() override { flush(); } + void write(const unsigned char* data, size_t len) override { + nextSink(data, len); + } +}; + +struct XzDecompressionSink : CompressionSink { + Sink& nextSink; + uint8_t outbuf[BUFSIZ]; + lzma_stream strm = LZMA_STREAM_INIT; + bool finished = false; + + explicit XzDecompressionSink(Sink& nextSink) : nextSink(nextSink) { + lzma_ret ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); + if (ret != LZMA_OK) { + throw CompressionError("unable to initialise lzma decoder"); + } + + strm.next_out = outbuf; + strm.avail_out = sizeof(outbuf); + } + + ~XzDecompressionSink() override { lzma_end(&strm); } + + void finish() override { + CompressionSink::flush(); + write(nullptr, 0); + } + + void write(const unsigned char* data, size_t len) override { + strm.next_in = data; + strm.avail_in = len; + + while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) { + checkInterrupt(); + + lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) { + throw CompressionError("error %d while decompressing xz file", ret); + } + + finished = ret == LZMA_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); + strm.next_out = outbuf; + strm.avail_out = sizeof(outbuf); + } + } + } +}; + +struct BzipDecompressionSink : ChunkedCompressionSink { + Sink& nextSink; + bz_stream strm; + bool finished = false; + + explicit BzipDecompressionSink(Sink& nextSink) : nextSink(nextSink) { + memset(&strm, 0, sizeof(strm)); + int ret = BZ2_bzDecompressInit(&strm, 0, 0); + if (ret != BZ_OK) { + throw CompressionError("unable to initialise bzip2 decoder"); + } + + strm.next_out = (char*)outbuf; + strm.avail_out = sizeof(outbuf); + } + + ~BzipDecompressionSink() override { BZ2_bzDecompressEnd(&strm); } + + void finish() override { + flush(); + write(nullptr, 0); + } + + void writeInternal(const unsigned char* data, size_t len) override { + assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); + + strm.next_in = (char*)data; + strm.avail_in = len; + + while (strm.avail_in != 0u) { + checkInterrupt(); + + int ret = BZ2_bzDecompress(&strm); + if (ret != BZ_OK && ret != BZ_STREAM_END) { + throw CompressionError("error while decompressing bzip2 file"); + } + + finished = ret == BZ_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); + strm.next_out = (char*)outbuf; + strm.avail_out = sizeof(outbuf); + } + } + } +}; + +struct BrotliDecompressionSink : ChunkedCompressionSink { + Sink& nextSink; + BrotliDecoderState* state; + bool finished = false; + + explicit BrotliDecompressionSink(Sink& nextSink) : nextSink(nextSink) { + state = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); + if (state == nullptr) { + throw CompressionError("unable to initialize brotli decoder"); + } + } + + ~BrotliDecompressionSink() override { BrotliDecoderDestroyInstance(state); } + + void finish() override { + flush(); + writeInternal(nullptr, 0); + } + + void writeInternal(const unsigned char* data, size_t len) override { + const uint8_t* next_in = data; + size_t avail_in = len; + uint8_t* next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (!finished && ((data == nullptr) || (avail_in != 0u))) { + checkInterrupt(); + + if (BrotliDecoderDecompressStream(state, &avail_in, &next_in, &avail_out, + &next_out, nullptr) == 0u) { + throw CompressionError("error while decompressing brotli file"); + } + + if (avail_out < sizeof(outbuf) || avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + + finished = (BrotliDecoderIsFinished(state) != 0); + } + } +}; + +ref<std::string> decompress(const std::string& method, const std::string& in) { + StringSink ssink; + auto sink = makeDecompressionSink(method, ssink); + (*sink)(in); + sink->finish(); + return ssink.s; +} + +ref<CompressionSink> makeDecompressionSink(const std::string& method, + Sink& nextSink) { + if (method == "none" || method.empty()) { + return make_ref<NoneSink>(nextSink); + } + if (method == "xz") { + return make_ref<XzDecompressionSink>(nextSink); + } else if (method == "bzip2") { + return make_ref<BzipDecompressionSink>(nextSink); + } else if (method == "br") { + return make_ref<BrotliDecompressionSink>(nextSink); + } else { + throw UnknownCompressionMethod("unknown compression method '%s'", method); + } +} + +struct XzCompressionSink : CompressionSink { + Sink& nextSink; + uint8_t outbuf[BUFSIZ]; + lzma_stream strm = LZMA_STREAM_INIT; + bool finished = false; + + XzCompressionSink(Sink& nextSink, bool parallel) : nextSink(nextSink) { + lzma_ret ret; + bool done = false; + + if (parallel) { +#ifdef HAVE_LZMA_MT + lzma_mt mt_options = {}; + mt_options.flags = 0; + mt_options.timeout = 300; // Using the same setting as the xz cmd line + mt_options.preset = LZMA_PRESET_DEFAULT; + mt_options.filters = NULL; + mt_options.check = LZMA_CHECK_CRC64; + mt_options.threads = lzma_cputhreads(); + mt_options.block_size = 0; + if (mt_options.threads == 0) { + mt_options.threads = 1; + } + // FIXME: maybe use lzma_stream_encoder_mt_memusage() to control the + // number of threads. + ret = lzma_stream_encoder_mt(&strm, &mt_options); + done = true; +#else + LOG(ERROR) << "parallel XZ compression requested but not supported, " + << "falling back to single-threaded compression"; +#endif + } + + if (!done) { + ret = lzma_easy_encoder(&strm, 6, LZMA_CHECK_CRC64); + } + + if (ret != LZMA_OK) { + throw CompressionError("unable to initialise lzma encoder"); + } + + // FIXME: apply the x86 BCJ filter? + + strm.next_out = outbuf; + strm.avail_out = sizeof(outbuf); + } + + ~XzCompressionSink() override { lzma_end(&strm); } + + void finish() override { + CompressionSink::flush(); + write(nullptr, 0); + } + + void write(const unsigned char* data, size_t len) override { + strm.next_in = data; + strm.avail_in = len; + + while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) { + checkInterrupt(); + + lzma_ret ret = lzma_code(&strm, data != nullptr ? LZMA_RUN : LZMA_FINISH); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) { + throw CompressionError("error %d while compressing xz file", ret); + } + + finished = ret == LZMA_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); + strm.next_out = outbuf; + strm.avail_out = sizeof(outbuf); + } + } + } +}; + +struct BzipCompressionSink : ChunkedCompressionSink { + Sink& nextSink; + bz_stream strm; + bool finished = false; + + explicit BzipCompressionSink(Sink& nextSink) : nextSink(nextSink) { + memset(&strm, 0, sizeof(strm)); + int ret = BZ2_bzCompressInit(&strm, 9, 0, 30); + if (ret != BZ_OK) { + throw CompressionError("unable to initialise bzip2 encoder"); + } + + strm.next_out = (char*)outbuf; + strm.avail_out = sizeof(outbuf); + } + + ~BzipCompressionSink() override { BZ2_bzCompressEnd(&strm); } + + void finish() override { + flush(); + writeInternal(nullptr, 0); + } + + void writeInternal(const unsigned char* data, size_t len) override { + assert(len <= std::numeric_limits<decltype(strm.avail_in)>::max()); + + strm.next_in = (char*)data; + strm.avail_in = len; + + while (!finished && ((data == nullptr) || (strm.avail_in != 0u))) { + checkInterrupt(); + + int ret = BZ2_bzCompress(&strm, data != nullptr ? BZ_RUN : BZ_FINISH); + if (ret != BZ_RUN_OK && ret != BZ_FINISH_OK && ret != BZ_STREAM_END) { + throw CompressionError("error %d while compressing bzip2 file", ret); + } + + finished = ret == BZ_STREAM_END; + + if (strm.avail_out < sizeof(outbuf) || strm.avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - strm.avail_out); + strm.next_out = (char*)outbuf; + strm.avail_out = sizeof(outbuf); + } + } + } +}; + +struct BrotliCompressionSink : ChunkedCompressionSink { + Sink& nextSink; + uint8_t outbuf[BUFSIZ]; + BrotliEncoderState* state; + bool finished = false; + + explicit BrotliCompressionSink(Sink& nextSink) : nextSink(nextSink) { + state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); + if (state == nullptr) { + throw CompressionError("unable to initialise brotli encoder"); + } + } + + ~BrotliCompressionSink() override { BrotliEncoderDestroyInstance(state); } + + void finish() override { + flush(); + writeInternal(nullptr, 0); + } + + void writeInternal(const unsigned char* data, size_t len) override { + const uint8_t* next_in = data; + size_t avail_in = len; + uint8_t* next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (!finished && ((data == nullptr) || (avail_in != 0u))) { + checkInterrupt(); + + if (BrotliEncoderCompressStream(state, + data != nullptr ? BROTLI_OPERATION_PROCESS + : BROTLI_OPERATION_FINISH, + &avail_in, &next_in, &avail_out, + &next_out, nullptr) == 0) { + throw CompressionError("error while compressing brotli compression"); + } + + if (avail_out < sizeof(outbuf) || avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + + finished = (BrotliEncoderIsFinished(state) != 0); + } + } +}; + +ref<CompressionSink> makeCompressionSink(const std::string& method, + Sink& nextSink, const bool parallel) { + if (method == "none") { + return make_ref<NoneSink>(nextSink); + } + if (method == "xz") { + return make_ref<XzCompressionSink>(nextSink, parallel); + } else if (method == "bzip2") { + return make_ref<BzipCompressionSink>(nextSink); + } else if (method == "br") { + return make_ref<BrotliCompressionSink>(nextSink); + } else { + throw UnknownCompressionMethod(format("unknown compression method '%s'") % + method); + } +} + +ref<std::string> compress(const std::string& method, const std::string& in, + const bool parallel) { + StringSink ssink; + auto sink = makeCompressionSink(method, ssink, parallel); + (*sink)(in); + sink->finish(); + return ssink.s; +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/compression.hh b/third_party/nix/src/libutil/compression.hh new file mode 100644 index 000000000000..80b651e107d4 --- /dev/null +++ b/third_party/nix/src/libutil/compression.hh @@ -0,0 +1,31 @@ +#pragma once + +#include <string> + +#include "ref.hh" +#include "serialise.hh" +#include "types.hh" + +namespace nix { + +struct CompressionSink : BufferedSink { + virtual void finish() = 0; +}; + +ref<std::string> decompress(const std::string& method, const std::string& in); + +ref<CompressionSink> makeDecompressionSink(const std::string& method, + Sink& nextSink); + +ref<std::string> compress(const std::string& method, const std::string& in, + const bool parallel = false); + +ref<CompressionSink> makeCompressionSink(const std::string& method, + Sink& nextSink, + const bool parallel = false); + +MakeError(UnknownCompressionMethod, Error); + +MakeError(CompressionError, Error); + +} // namespace nix diff --git a/third_party/nix/src/libutil/config.cc b/third_party/nix/src/libutil/config.cc new file mode 100644 index 000000000000..828ee1811bdb --- /dev/null +++ b/third_party/nix/src/libutil/config.cc @@ -0,0 +1,365 @@ +#define GOOGLE_STRIP_LOG 0 +#include "config.hh" + +#include <utility> + +#include <glog/logging.h> + +#include "args.hh" +#include "json.hh" +// #include <glog/log_severity.h> + +namespace nix { + +bool Config::set(const std::string& name, const std::string& value) { + auto i = _settings.find(name); + if (i == _settings.end()) { + return false; + } + i->second.setting->set(value); + i->second.setting->overriden = true; + return true; +} + +void Config::addSetting(AbstractSetting* setting) { + _settings.emplace(setting->name, Config::SettingData(false, setting)); + for (auto& alias : setting->aliases) { + _settings.emplace(alias, Config::SettingData(true, setting)); + } + + bool set = false; + + auto i = unknownSettings.find(setting->name); + if (i != unknownSettings.end()) { + setting->set(i->second); + setting->overriden = true; + unknownSettings.erase(i); + set = true; + } + + for (auto& alias : setting->aliases) { + auto i = unknownSettings.find(alias); + if (i != unknownSettings.end()) { + if (set) { + LOG(WARNING) << "setting '" << alias + << "' is set, but it's an alias of '" << setting->name + << "', which is also set"; + } + + else { + setting->set(i->second); + setting->overriden = true; + unknownSettings.erase(i); + set = true; + } + } + } +} + +void AbstractConfig::warnUnknownSettings() { + for (auto& s : unknownSettings) { + LOG(WARNING) << "unknown setting: " << s.first; + } +} + +void AbstractConfig::reapplyUnknownSettings() { + auto unknownSettings2 = std::move(unknownSettings); + for (auto& s : unknownSettings2) { + set(s.first, s.second); + } +} + +void Config::getSettings(std::map<std::string, SettingInfo>& res, + bool overridenOnly) { + for (auto& opt : _settings) { + if (!opt.second.isAlias && + (!overridenOnly || opt.second.setting->overriden)) { + res.emplace(opt.first, SettingInfo{opt.second.setting->to_string(), + opt.second.setting->description}); + } + } +} + +void AbstractConfig::applyConfigFile(const Path& path) { + try { + string contents = readFile(path); + + unsigned int pos = 0; + + while (pos < contents.size()) { + string line; + while (pos < contents.size() && contents[pos] != '\n') { + line += contents[pos++]; + } + pos++; + + string::size_type hash = line.find('#'); + if (hash != string::npos) { + line = string(line, 0, hash); + } + + auto tokens = tokenizeString<vector<string> >(line); + if (tokens.empty()) { + continue; + } + + if (tokens.size() < 2) { + throw UsageError("illegal configuration line '%1%' in '%2%'", line, + path); + } + + auto include = false; + auto ignoreMissing = false; + if (tokens[0] == "include") { + include = true; + } else if (tokens[0] == "!include") { + include = true; + ignoreMissing = true; + } + + if (include) { + if (tokens.size() != 2) { + throw UsageError("illegal configuration line '%1%' in '%2%'", line, + path); + } + auto p = absPath(tokens[1], dirOf(path)); + if (pathExists(p)) { + applyConfigFile(p); + } else if (!ignoreMissing) { + throw Error("file '%1%' included from '%2%' not found", p, path); + } + continue; + } + + if (tokens[1] != "=") { + throw UsageError("illegal configuration line '%1%' in '%2%'", line, + path); + } + + string name = tokens[0]; + + auto i = tokens.begin(); + advance(i, 2); + + set(name, + concatStringsSep(" ", Strings(i, tokens.end()))); // FIXME: slow + }; + } catch (SysError&) { + } +} + +void Config::resetOverriden() { + for (auto& s : _settings) { + s.second.setting->overriden = false; + } +} + +void Config::toJSON(JSONObject& out) { + for (auto& s : _settings) { + if (!s.second.isAlias) { + JSONObject out2(out.object(s.first)); + out2.attr("description", s.second.setting->description); + JSONPlaceholder out3(out2.placeholder("value")); + s.second.setting->toJSON(out3); + } + } +} + +void Config::convertToArgs(Args& args, const std::string& category) { + for (auto& s : _settings) { + if (!s.second.isAlias) { + s.second.setting->convertToArg(args, category); + } + } +} + +AbstractSetting::AbstractSetting(std::string name, std::string description, + std::set<std::string> aliases) + : name(std::move(name)), + description(std::move(description)), + aliases(std::move(aliases)) {} + +void AbstractSetting::toJSON(JSONPlaceholder& out) { out.write(to_string()); } + +void AbstractSetting::convertToArg(Args& args, const std::string& category) {} + +template <typename T> +void BaseSetting<T>::toJSON(JSONPlaceholder& out) { + out.write(value); +} + +template <typename T> +void BaseSetting<T>::convertToArg(Args& args, const std::string& category) { + args.mkFlag() + .longName(name) + .description(description) + .arity(1) + .handler([=](std::vector<std::string> ss) { + overriden = true; + set(ss[0]); + }) + .category(category); +} + +template <> +void BaseSetting<std::string>::set(const std::string& str) { + value = str; +} + +template <> +std::string BaseSetting<std::string>::to_string() { + return value; +} + +template <typename T> +void BaseSetting<T>::set(const std::string& str) { + static_assert(std::is_integral<T>::value, "Integer required."); + if (!string2Int(str, value)) { + throw UsageError("setting '%s' has invalid value '%s'", name, str); + } +} + +template <typename T> +std::string BaseSetting<T>::to_string() { + static_assert(std::is_integral<T>::value, "Integer required."); + return std::to_string(value); +} + +template <> +void BaseSetting<bool>::set(const std::string& str) { + if (str == "true" || str == "yes" || str == "1") { + value = true; + } else if (str == "false" || str == "no" || str == "0") { + value = false; + } else { + throw UsageError("Boolean setting '%s' has invalid value '%s'", name, str); + } +} + +template <> +std::string BaseSetting<bool>::to_string() { + return value ? "true" : "false"; +} + +template <> +void BaseSetting<bool>::convertToArg(Args& args, const std::string& category) { + args.mkFlag() + .longName(name) + .description(description) + .handler([=](const std::vector<std::string>& ss) { override(true); }) + .category(category); + args.mkFlag() + .longName("no-" + name) + .description(description) + .handler([=](const std::vector<std::string>& ss) { override(false); }) + .category(category); +} + +template <> +void BaseSetting<Strings>::set(const std::string& str) { + value = tokenizeString<Strings>(str); +} + +template <> +std::string BaseSetting<Strings>::to_string() { + return concatStringsSep(" ", value); +} + +template <> +void BaseSetting<Strings>::toJSON(JSONPlaceholder& out) { + JSONList list(out.list()); + for (auto& s : value) { + list.elem(s); + } +} + +template <> +void BaseSetting<StringSet>::set(const std::string& str) { + value = tokenizeString<StringSet>(str); +} + +template <> +std::string BaseSetting<StringSet>::to_string() { + return concatStringsSep(" ", value); +} + +template <> +void BaseSetting<StringSet>::toJSON(JSONPlaceholder& out) { + JSONList list(out.list()); + for (auto& s : value) { + list.elem(s); + } +} + +template class BaseSetting<int>; +template class BaseSetting<unsigned int>; +template class BaseSetting<long>; +template class BaseSetting<unsigned long>; +template class BaseSetting<long long>; +template class BaseSetting<unsigned long long>; +template class BaseSetting<bool>; +template class BaseSetting<std::string>; +template class BaseSetting<Strings>; +template class BaseSetting<StringSet>; + +void PathSetting::set(const std::string& str) { + if (str.empty()) { + if (allowEmpty) { + value = ""; + } else { + throw UsageError("setting '%s' cannot be empty", name); + } + } else { + value = canonPath(str); + } +} + +bool GlobalConfig::set(const std::string& name, const std::string& value) { + for (auto& config : *configRegistrations) { + if (config->set(name, value)) { + return true; + } + } + + unknownSettings.emplace(name, value); + + return false; +} + +void GlobalConfig::getSettings(std::map<std::string, SettingInfo>& res, + bool overridenOnly) { + for (auto& config : *configRegistrations) { + config->getSettings(res, overridenOnly); + } +} + +void GlobalConfig::resetOverriden() { + for (auto& config : *configRegistrations) { + config->resetOverriden(); + } +} + +void GlobalConfig::toJSON(JSONObject& out) { + for (auto& config : *configRegistrations) { + config->toJSON(out); + } +} + +void GlobalConfig::convertToArgs(Args& args, const std::string& category) { + for (auto& config : *configRegistrations) { + config->convertToArgs(args, category); + } +} + +GlobalConfig globalConfig; + +GlobalConfig::ConfigRegistrations* GlobalConfig::configRegistrations; + +GlobalConfig::Register::Register(Config* config) { + if (configRegistrations == nullptr) { + configRegistrations = new ConfigRegistrations; + } + configRegistrations->emplace_back(config); +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/config.hh b/third_party/nix/src/libutil/config.hh new file mode 100644 index 000000000000..f8055d4d3690 --- /dev/null +++ b/third_party/nix/src/libutil/config.hh @@ -0,0 +1,227 @@ +#include <map> +#include <set> + +#include "types.hh" + +#pragma once + +namespace nix { + +class Args; +class AbstractSetting; +class JSONPlaceholder; +class JSONObject; + +class AbstractConfig { + protected: + StringMap unknownSettings; + + AbstractConfig(const StringMap& initials = {}) : unknownSettings(initials) {} + + public: + virtual bool set(const std::string& name, const std::string& value) = 0; + + struct SettingInfo { + std::string value; + std::string description; + }; + + virtual void getSettings(std::map<std::string, SettingInfo>& res, + bool overridenOnly = false) = 0; + + void applyConfigFile(const Path& path); + + virtual void resetOverriden() = 0; + + virtual void toJSON(JSONObject& out) = 0; + + virtual void convertToArgs(Args& args, const std::string& category) = 0; + + void warnUnknownSettings(); + + void reapplyUnknownSettings(); +}; + +/* A class to simplify providing configuration settings. The typical + use is to inherit Config and add Setting<T> members: + + class MyClass : private Config + { + Setting<int> foo{this, 123, "foo", "the number of foos to use"}; + Setting<std::string> bar{this, "blabla", "bar", "the name of the bar"}; + + MyClass() : Config(readConfigFile("/etc/my-app.conf")) + { + std::cout << foo << "\n"; // will print 123 unless overriden + } + }; +*/ + +class Config : public AbstractConfig { + friend class AbstractSetting; + + public: + struct SettingData { + bool isAlias; + AbstractSetting* setting; + SettingData(bool isAlias, AbstractSetting* setting) + : isAlias(isAlias), setting(setting) {} + }; + + typedef std::map<std::string, SettingData> Settings; + + private: + Settings _settings; + + public: + Config(const StringMap& initials = {}) : AbstractConfig(initials) {} + + bool set(const std::string& name, const std::string& value) override; + + void addSetting(AbstractSetting* setting); + + void getSettings(std::map<std::string, SettingInfo>& res, + bool overridenOnly = false) override; + + void resetOverriden() override; + + void toJSON(JSONObject& out) override; + + void convertToArgs(Args& args, const std::string& category) override; +}; + +class AbstractSetting { + friend class Config; + + public: + const std::string name; + const std::string description; + const std::set<std::string> aliases; + + int created = 123; + + bool overriden = false; + + protected: + AbstractSetting(std::string name, std::string description, + std::set<std::string> aliases); + + virtual ~AbstractSetting() { + // Check against a gcc miscompilation causing our constructor + // not to run (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80431). + assert(created == 123); + } + + virtual void set(const std::string& value) = 0; + + virtual std::string to_string() = 0; + + virtual void toJSON(JSONPlaceholder& out); + + virtual void convertToArg(Args& args, const std::string& category); + + bool isOverriden() { return overriden; } +}; + +/* A setting of type T. */ +template <typename T> +class BaseSetting : public AbstractSetting { + protected: + T value; + + public: + BaseSetting(const T& def, const std::string& name, + const std::string& description, + const std::set<std::string>& aliases = {}) + : AbstractSetting(name, description, aliases), value(def) {} + + operator const T&() const { return value; } + operator T&() { return value; } + const T& get() const { return value; } + bool operator==(const T& v2) const { return value == v2; } + bool operator!=(const T& v2) const { return value != v2; } + void operator=(const T& v) { assign(v); } + virtual void assign(const T& v) { value = v; } + + void set(const std::string& str) override; + + virtual void override(const T& v) { + overriden = true; + value = v; + } + + std::string to_string() override; + + void convertToArg(Args& args, const std::string& category) override; + + void toJSON(JSONPlaceholder& out) override; +}; + +template <typename T> +std::ostream& operator<<(std::ostream& str, const BaseSetting<T>& opt) { + str << (const T&)opt; + return str; +} + +template <typename T> +bool operator==(const T& v1, const BaseSetting<T>& v2) { + return v1 == (const T&)v2; +} + +template <typename T> +class Setting : public BaseSetting<T> { + public: + Setting(Config* options, const T& def, const std::string& name, + const std::string& description, + const std::set<std::string>& aliases = {}) + : BaseSetting<T>(def, name, description, aliases) { + options->addSetting(this); + } + + void operator=(const T& v) { this->assign(v); } +}; + +/* A special setting for Paths. These are automatically canonicalised + (e.g. "/foo//bar/" becomes "/foo/bar"). */ +class PathSetting : public BaseSetting<Path> { + bool allowEmpty; + + public: + PathSetting(Config* options, bool allowEmpty, const Path& def, + const std::string& name, const std::string& description, + const std::set<std::string>& aliases = {}) + : BaseSetting<Path>(def, name, description, aliases), + allowEmpty(allowEmpty) { + options->addSetting(this); + } + + void set(const std::string& str) override; + + Path operator+(const char* p) const { return value + p; } + + void operator=(const Path& v) { this->assign(v); } +}; + +struct GlobalConfig : public AbstractConfig { + typedef std::vector<Config*> ConfigRegistrations; + static ConfigRegistrations* configRegistrations; + + bool set(const std::string& name, const std::string& value) override; + + void getSettings(std::map<std::string, SettingInfo>& res, + bool overridenOnly = false) override; + + void resetOverriden() override; + + void toJSON(JSONObject& out) override; + + void convertToArgs(Args& args, const std::string& category) override; + + struct Register { + Register(Config* config); + }; +}; + +extern GlobalConfig globalConfig; + +} // namespace nix diff --git a/third_party/nix/src/libutil/finally.hh b/third_party/nix/src/libutil/finally.hh new file mode 100644 index 000000000000..8d3083b6a3ac --- /dev/null +++ b/third_party/nix/src/libutil/finally.hh @@ -0,0 +1,13 @@ +#pragma once + +#include <functional> + +/* A trivial class to run a function at the end of a scope. */ +class Finally { + private: + std::function<void()> fun; + + public: + Finally(std::function<void()> fun) : fun(fun) {} + ~Finally() { fun(); } +}; diff --git a/third_party/nix/src/libutil/hash.cc b/third_party/nix/src/libutil/hash.cc new file mode 100644 index 000000000000..81d8628e972b --- /dev/null +++ b/third_party/nix/src/libutil/hash.cc @@ -0,0 +1,371 @@ +#include "hash.hh" + +#include <cstring> +#include <iostream> + +#include <fcntl.h> +#include <openssl/md5.h> +#include <openssl/sha.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "archive.hh" +#include "istringstream_nocopy.hh" +#include "util.hh" + +namespace nix { + +void Hash::init() { + if (type == htMD5) { + hashSize = md5HashSize; + } else if (type == htSHA1) { + hashSize = sha1HashSize; + } else if (type == htSHA256) { + hashSize = sha256HashSize; + } else if (type == htSHA512) { + hashSize = sha512HashSize; + } else { + abort(); + } + assert(hashSize <= maxHashSize); + memset(hash, 0, maxHashSize); +} + +bool Hash::operator==(const Hash& h2) const { + if (hashSize != h2.hashSize) { + return false; + } + for (unsigned int i = 0; i < hashSize; i++) { + if (hash[i] != h2.hash[i]) { + return false; + } + } + return true; +} + +bool Hash::operator!=(const Hash& h2) const { return !(*this == h2); } + +bool Hash::operator<(const Hash& h) const { + if (hashSize < h.hashSize) { + return true; + } + if (hashSize > h.hashSize) { + return false; + } + for (unsigned int i = 0; i < hashSize; i++) { + if (hash[i] < h.hash[i]) { + return true; + } + if (hash[i] > h.hash[i]) { + return false; + } + } + return false; +} + +const string base16Chars = "0123456789abcdef"; + +static string printHash16(const Hash& hash) { + char buf[hash.hashSize * 2]; + for (unsigned int i = 0; i < hash.hashSize; i++) { + buf[i * 2] = base16Chars[hash.hash[i] >> 4]; + buf[i * 2 + 1] = base16Chars[hash.hash[i] & 0x0f]; + } + return string(buf, hash.hashSize * 2); +} + +// omitted: E O U T +const string base32Chars = "0123456789abcdfghijklmnpqrsvwxyz"; + +static string printHash32(const Hash& hash) { + assert(hash.hashSize); + size_t len = hash.base32Len(); + assert(len); + + string s; + s.reserve(len); + + for (int n = (int)len - 1; n >= 0; n--) { + unsigned int b = n * 5; + unsigned int i = b / 8; + unsigned int j = b % 8; + unsigned char c = + (hash.hash[i] >> j) | + (i >= hash.hashSize - 1 ? 0 : hash.hash[i + 1] << (8 - j)); + s.push_back(base32Chars[c & 0x1f]); + } + + return s; +} + +string printHash16or32(const Hash& hash) { + return hash.to_string(hash.type == htMD5 ? Base16 : Base32, false); +} + +std::string Hash::to_string(Base base, bool includeType) const { + std::string s; + if (base == SRI || includeType) { + s += printHashType(type); + s += base == SRI ? '-' : ':'; + } + switch (base) { + case Base16: + s += printHash16(*this); + break; + case Base32: + s += printHash32(*this); + break; + case Base64: + case SRI: + s += base64Encode(std::string((const char*)hash, hashSize)); + break; + } + return s; +} + +Hash::Hash(const std::string& s, HashType type) : type(type) { + size_t pos = 0; + bool isSRI = false; + + auto sep = s.find(':'); + if (sep == string::npos) { + sep = s.find('-'); + if (sep != string::npos) { + isSRI = true; + } else if (type == htUnknown) { + throw BadHash("hash '%s' does not include a type", s); + } + } + + if (sep != string::npos) { + string hts = string(s, 0, sep); + this->type = parseHashType(hts); + if (this->type == htUnknown) { + throw BadHash("unknown hash type '%s'", hts); + } + if (type != htUnknown && type != this->type) { + throw BadHash("hash '%s' should have type '%s'", s, printHashType(type)); + } + pos = sep + 1; + } + + init(); + + size_t size = s.size() - pos; + + if (!isSRI && size == base16Len()) { + auto parseHexDigit = [&](char c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } + if (c >= 'A' && c <= 'F') { + return c - 'A' + 10; + } + if (c >= 'a' && c <= 'f') { + return c - 'a' + 10; + } + throw BadHash("invalid base-16 hash '%s'", s); + }; + + for (unsigned int i = 0; i < hashSize; i++) { + hash[i] = parseHexDigit(s[pos + i * 2]) << 4 | + parseHexDigit(s[pos + i * 2 + 1]); + } + } + + else if (!isSRI && size == base32Len()) { + for (unsigned int n = 0; n < size; ++n) { + char c = s[pos + size - n - 1]; + unsigned char digit; + for (digit = 0; digit < base32Chars.size(); ++digit) { /* !!! slow */ + if (base32Chars[digit] == c) { + break; + } + } + if (digit >= 32) { + throw BadHash("invalid base-32 hash '%s'", s); + } + unsigned int b = n * 5; + unsigned int i = b / 8; + unsigned int j = b % 8; + hash[i] |= digit << j; + + if (i < hashSize - 1) { + hash[i + 1] |= digit >> (8 - j); + } else { + if ((digit >> (8 - j)) != 0) { + throw BadHash("invalid base-32 hash '%s'", s); + } + } + } + } + + else if (isSRI || size == base64Len()) { + auto d = base64Decode(std::string(s, pos)); + if (d.size() != hashSize) { + throw BadHash("invalid %s hash '%s'", isSRI ? "SRI" : "base-64", s); + } + assert(hashSize); + memcpy(hash, d.data(), hashSize); + } + + else { + throw BadHash("hash '%s' has wrong length for hash type '%s'", s, + printHashType(type)); + } +} + +union Ctx { + MD5_CTX md5; + SHA_CTX sha1; + SHA256_CTX sha256; + SHA512_CTX sha512; +}; + +static void start(HashType ht, Ctx& ctx) { + if (ht == htMD5) { + MD5_Init(&ctx.md5); + } else if (ht == htSHA1) { + SHA1_Init(&ctx.sha1); + } else if (ht == htSHA256) { + SHA256_Init(&ctx.sha256); + } else if (ht == htSHA512) { + SHA512_Init(&ctx.sha512); + } +} + +static void update(HashType ht, Ctx& ctx, const unsigned char* bytes, + size_t len) { + if (ht == htMD5) { + MD5_Update(&ctx.md5, bytes, len); + } else if (ht == htSHA1) { + SHA1_Update(&ctx.sha1, bytes, len); + } else if (ht == htSHA256) { + SHA256_Update(&ctx.sha256, bytes, len); + } else if (ht == htSHA512) { + SHA512_Update(&ctx.sha512, bytes, len); + } +} + +static void finish(HashType ht, Ctx& ctx, unsigned char* hash) { + if (ht == htMD5) { + MD5_Final(hash, &ctx.md5); + } else if (ht == htSHA1) { + SHA1_Final(hash, &ctx.sha1); + } else if (ht == htSHA256) { + SHA256_Final(hash, &ctx.sha256); + } else if (ht == htSHA512) { + SHA512_Final(hash, &ctx.sha512); + } +} + +Hash hashString(HashType ht, const string& s) { + Ctx ctx; + Hash hash(ht); + start(ht, ctx); + update(ht, ctx, (const unsigned char*)s.data(), s.length()); + finish(ht, ctx, hash.hash); + return hash; +} + +Hash hashFile(HashType ht, const Path& path) { + Ctx ctx; + Hash hash(ht); + start(ht, ctx); + + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) { + throw SysError(format("opening file '%1%'") % path); + } + + std::vector<unsigned char> buf(8192); + ssize_t n; + while ((n = read(fd.get(), buf.data(), buf.size())) != 0) { + checkInterrupt(); + if (n == -1) { + throw SysError(format("reading file '%1%'") % path); + } + update(ht, ctx, buf.data(), n); + } + + finish(ht, ctx, hash.hash); + return hash; +} + +HashSink::HashSink(HashType ht) : ht(ht) { + ctx = new Ctx; + bytes = 0; + start(ht, *ctx); +} + +HashSink::~HashSink() { + bufPos = 0; + delete ctx; +} + +void HashSink::write(const unsigned char* data, size_t len) { + bytes += len; + update(ht, *ctx, data, len); +} + +HashResult HashSink::finish() { + flush(); + Hash hash(ht); + nix::finish(ht, *ctx, hash.hash); + return HashResult(hash, bytes); +} + +HashResult HashSink::currentHash() { + flush(); + Ctx ctx2 = *ctx; + Hash hash(ht); + nix::finish(ht, ctx2, hash.hash); + return HashResult(hash, bytes); +} + +HashResult hashPath(HashType ht, const Path& path, PathFilter& filter) { + HashSink sink(ht); + dumpPath(path, sink, filter); + return sink.finish(); +} + +Hash compressHash(const Hash& hash, unsigned int newSize) { + Hash h; + h.hashSize = newSize; + for (unsigned int i = 0; i < hash.hashSize; ++i) { + h.hash[i % newSize] ^= hash.hash[i]; + } + return h; +} + +HashType parseHashType(const string& s) { + if (s == "md5") { + return htMD5; + } + if (s == "sha1") { + return htSHA1; + } else if (s == "sha256") { + return htSHA256; + } else if (s == "sha512") { + return htSHA512; + } else { + return htUnknown; + } +} + +string printHashType(HashType ht) { + if (ht == htMD5) { + return "md5"; + } + if (ht == htSHA1) { + return "sha1"; + } else if (ht == htSHA256) { + return "sha256"; + } else if (ht == htSHA512) { + return "sha512"; + } else { + abort(); + } +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/hash.hh b/third_party/nix/src/libutil/hash.hh new file mode 100644 index 000000000000..a9002023fa31 --- /dev/null +++ b/third_party/nix/src/libutil/hash.hh @@ -0,0 +1,112 @@ +#pragma once + +#include "serialise.hh" +#include "types.hh" + +namespace nix { + +MakeError(BadHash, Error); + +enum HashType : char { htUnknown, htMD5, htSHA1, htSHA256, htSHA512 }; + +const int md5HashSize = 16; +const int sha1HashSize = 20; +const int sha256HashSize = 32; +const int sha512HashSize = 64; + +extern const string base32Chars; + +enum Base : int { Base64, Base32, Base16, SRI }; + +struct Hash { + static const unsigned int maxHashSize = 64; + unsigned int hashSize = 0; + unsigned char hash[maxHashSize] = {}; + + HashType type = htUnknown; + + /* Create an unset hash object. */ + Hash(){}; + + /* Create a zero-filled hash object. */ + Hash(HashType type) : type(type) { init(); }; + + /* Initialize the hash from a string representation, in the format + "[<type>:]<base16|base32|base64>" or "<type>-<base64>" (a + Subresource Integrity hash expression). If the 'type' argument + is htUnknown, then the hash type must be specified in the + string. */ + Hash(const std::string& s, HashType type = htUnknown); + + void init(); + + /* Check whether a hash is set. */ + operator bool() const { return type != htUnknown; } + + /* Check whether two hash are equal. */ + bool operator==(const Hash& h2) const; + + /* Check whether two hash are not equal. */ + bool operator!=(const Hash& h2) const; + + /* For sorting. */ + bool operator<(const Hash& h) const; + + /* Returns the length of a base-16 representation of this hash. */ + size_t base16Len() const { return hashSize * 2; } + + /* Returns the length of a base-32 representation of this hash. */ + size_t base32Len() const { return (hashSize * 8 - 1) / 5 + 1; } + + /* Returns the length of a base-64 representation of this hash. */ + size_t base64Len() const { return ((4 * hashSize / 3) + 3) & ~3; } + + /* Return a string representation of the hash, in base-16, base-32 + or base-64. By default, this is prefixed by the hash type + (e.g. "sha256:"). */ + std::string to_string(Base base = Base32, bool includeType = true) const; +}; + +/* Print a hash in base-16 if it's MD5, or base-32 otherwise. */ +string printHash16or32(const Hash& hash); + +/* Compute the hash of the given string. */ +Hash hashString(HashType ht, const string& s); + +/* Compute the hash of the given file. */ +Hash hashFile(HashType ht, const Path& path); + +/* Compute the hash of the given path. The hash is defined as + (essentially) hashString(ht, dumpPath(path)). */ +typedef std::pair<Hash, unsigned long long> HashResult; +HashResult hashPath(HashType ht, const Path& path, + PathFilter& filter = defaultPathFilter); + +/* Compress a hash to the specified number of bytes by cyclically + XORing bytes together. */ +Hash compressHash(const Hash& hash, unsigned int newSize); + +/* Parse a string representing a hash type. */ +HashType parseHashType(const string& s); + +/* And the reverse. */ +string printHashType(HashType ht); + +union Ctx; + +class HashSink : public BufferedSink { + private: + HashType ht; + Ctx* ctx; + unsigned long long bytes; + + public: + HashSink(HashType ht); + HashSink(const HashSink& h); + ~HashSink(); + void write(const unsigned char* data, size_t len); + HashResult finish(); + HashResult currentHash(); +}; + +} // namespace nix diff --git a/third_party/nix/src/libutil/istringstream_nocopy.hh b/third_party/nix/src/libutil/istringstream_nocopy.hh new file mode 100644 index 000000000000..997965630b6c --- /dev/null +++ b/third_party/nix/src/libutil/istringstream_nocopy.hh @@ -0,0 +1,82 @@ +/* This file provides a variant of std::istringstream that doesn't + copy its string argument. This is useful for large strings. The + caller must ensure that the string object is not destroyed while + it's referenced by this object. */ + +#pragma once + +#include <iostream> +#include <string> + +template <class CharT, class Traits = std::char_traits<CharT>, + class Allocator = std::allocator<CharT>> +class basic_istringbuf_nocopy : public std::basic_streambuf<CharT, Traits> { + public: + typedef std::basic_string<CharT, Traits, Allocator> string_type; + + typedef typename std::basic_streambuf<CharT, Traits>::off_type off_type; + + typedef typename std::basic_streambuf<CharT, Traits>::pos_type pos_type; + + typedef typename std::basic_streambuf<CharT, Traits>::int_type int_type; + + typedef typename std::basic_streambuf<CharT, Traits>::traits_type traits_type; + + private: + const string_type& s; + + off_type off; + + public: + basic_istringbuf_nocopy(const string_type& s) : s{s}, off{0} {} + + private: + pos_type seekoff(off_type off, std::ios_base::seekdir dir, + std::ios_base::openmode which) { + if (which & std::ios_base::in) { + this->off = + dir == std::ios_base::beg + ? off + : (dir == std::ios_base::end ? s.size() + off : this->off + off); + } + return pos_type(this->off); + } + + pos_type seekpos(pos_type pos, std::ios_base::openmode which) { + return seekoff(pos, std::ios_base::beg, which); + } + + std::streamsize showmanyc() { return s.size() - off; } + + int_type underflow() { + if (typename string_type::size_type(off) == s.size()) + return traits_type::eof(); + return traits_type::to_int_type(s[off]); + } + + int_type uflow() { + if (typename string_type::size_type(off) == s.size()) + return traits_type::eof(); + return traits_type::to_int_type(s[off++]); + } + + int_type pbackfail(int_type ch) { + if (off == 0 || (ch != traits_type::eof() && ch != s[off - 1])) + return traits_type::eof(); + + return traits_type::to_int_type(s[--off]); + } +}; + +template <class CharT, class Traits = std::char_traits<CharT>, + class Allocator = std::allocator<CharT>> +class basic_istringstream_nocopy : public std::basic_iostream<CharT, Traits> { + typedef basic_istringbuf_nocopy<CharT, Traits, Allocator> buf_type; + buf_type buf; + + public: + basic_istringstream_nocopy(const typename buf_type::string_type& s) + : std::basic_iostream<CharT, Traits>(&buf), buf(s){}; +}; + +typedef basic_istringstream_nocopy<char> istringstream_nocopy; diff --git a/third_party/nix/src/libutil/json.cc b/third_party/nix/src/libutil/json.cc new file mode 100644 index 000000000000..218fc264ba50 --- /dev/null +++ b/third_party/nix/src/libutil/json.cc @@ -0,0 +1,198 @@ +#include "json.hh" + +#include <cstring> +#include <iomanip> + +namespace nix { + +void toJSON(std::ostream& str, const char* start, const char* end) { + str << '"'; + for (auto i = start; i != end; i++) { + if (*i == '\"' || *i == '\\') { + str << '\\' << *i; + } else if (*i == '\n') { + str << "\\n"; + } else if (*i == '\r') { + str << "\\r"; + } else if (*i == '\t') { + str << "\\t"; + } else if (*i >= 0 && *i < 32) { + str << "\\u" << std::setfill('0') << std::setw(4) << std::hex + << (uint16_t)*i << std::dec; + } else { + str << *i; + } + } + str << '"'; +} + +void toJSON(std::ostream& str, const char* s) { + if (s == nullptr) { + str << "null"; + } else { + toJSON(str, s, s + strlen(s)); + } +} + +template <> +void toJSON<int>(std::ostream& str, const int& n) { + str << n; +} +template <> +void toJSON<unsigned int>(std::ostream& str, const unsigned int& n) { + str << n; +} +template <> +void toJSON<long>(std::ostream& str, const long& n) { + str << n; +} +template <> +void toJSON<unsigned long>(std::ostream& str, const unsigned long& n) { + str << n; +} +template <> +void toJSON<long long>(std::ostream& str, const long long& n) { + str << n; +} +template <> +void toJSON<unsigned long long>(std::ostream& str, + const unsigned long long& n) { + str << n; +} +template <> +void toJSON<float>(std::ostream& str, const float& n) { + str << n; +} +template <> +void toJSON<double>(std::ostream& str, const double& n) { + str << n; +} + +template <> +void toJSON<std::string>(std::ostream& str, const std::string& s) { + toJSON(str, s.c_str(), s.c_str() + s.size()); +} + +template <> +void toJSON<bool>(std::ostream& str, const bool& b) { + str << (b ? "true" : "false"); +} + +template <> +void toJSON<std::nullptr_t>(std::ostream& str, const std::nullptr_t& b) { + str << "null"; +} + +JSONWriter::JSONWriter(std::ostream& str, bool indent) + : state(new JSONState(str, indent)) { + state->stack++; +} + +JSONWriter::JSONWriter(JSONState* state) : state(state) { state->stack++; } + +JSONWriter::~JSONWriter() { + if (state != nullptr) { + assertActive(); + state->stack--; + if (state->stack == 0) { + delete state; + } + } +} + +void JSONWriter::comma() { + assertActive(); + if (first) { + first = false; + } else { + state->str << ','; + } + if (state->indent) { + indent(); + } +} + +void JSONWriter::indent() { + state->str << '\n' << std::string(state->depth * 2, ' '); +} + +void JSONList::open() { + state->depth++; + state->str << '['; +} + +JSONList::~JSONList() { + state->depth--; + if (state->indent && !first) { + indent(); + } + state->str << "]"; +} + +JSONList JSONList::list() { + comma(); + return JSONList(state); +} + +JSONObject JSONList::object() { + comma(); + return JSONObject(state); +} + +JSONPlaceholder JSONList::placeholder() { + comma(); + return JSONPlaceholder(state); +} + +void JSONObject::open() { + state->depth++; + state->str << '{'; +} + +JSONObject::~JSONObject() { + if (state != nullptr) { + state->depth--; + if (state->indent && !first) { + indent(); + } + state->str << "}"; + } +} + +void JSONObject::attr(const std::string& s) { + comma(); + toJSON(state->str, s); + state->str << ':'; + if (state->indent) { + state->str << ' '; + } +} + +JSONList JSONObject::list(const std::string& name) { + attr(name); + return JSONList(state); +} + +JSONObject JSONObject::object(const std::string& name) { + attr(name); + return JSONObject(state); +} + +JSONPlaceholder JSONObject::placeholder(const std::string& name) { + attr(name); + return JSONPlaceholder(state); +} + +JSONList JSONPlaceholder::list() { + assertValid(); + first = false; + return JSONList(state); +} + +JSONObject JSONPlaceholder::object() { + assertValid(); + first = false; + return JSONObject(state); +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/json.hh b/third_party/nix/src/libutil/json.hh new file mode 100644 index 000000000000..a3843a8a8a7b --- /dev/null +++ b/third_party/nix/src/libutil/json.hh @@ -0,0 +1,142 @@ +#pragma once + +#include <cassert> +#include <iostream> +#include <vector> + +namespace nix { + +void toJSON(std::ostream& str, const char* start, const char* end); +void toJSON(std::ostream& str, const char* s); + +template <typename T> +void toJSON(std::ostream& str, const T& n); + +class JSONWriter { + protected: + struct JSONState { + std::ostream& str; + bool indent; + size_t depth = 0; + size_t stack = 0; + JSONState(std::ostream& str, bool indent) : str(str), indent(indent) {} + ~JSONState() { assert(stack == 0); } + }; + + JSONState* state; + + bool first = true; + + JSONWriter(std::ostream& str, bool indent); + + JSONWriter(JSONState* state); + + ~JSONWriter(); + + void assertActive() { assert(state->stack != 0); } + + void comma(); + + void indent(); +}; + +class JSONObject; +class JSONPlaceholder; + +class JSONList : JSONWriter { + private: + friend class JSONObject; + friend class JSONPlaceholder; + + void open(); + + JSONList(JSONState* state) : JSONWriter(state) { open(); } + + public: + JSONList(std::ostream& str, bool indent = false) : JSONWriter(str, indent) { + open(); + } + + ~JSONList(); + + template <typename T> + JSONList& elem(const T& v) { + comma(); + toJSON(state->str, v); + return *this; + } + + JSONList list(); + + JSONObject object(); + + JSONPlaceholder placeholder(); +}; + +class JSONObject : JSONWriter { + private: + friend class JSONList; + friend class JSONPlaceholder; + + void open(); + + JSONObject(JSONState* state) : JSONWriter(state) { open(); } + + void attr(const std::string& s); + + public: + JSONObject(std::ostream& str, bool indent = false) : JSONWriter(str, indent) { + open(); + } + + JSONObject(const JSONObject& obj) = delete; + + JSONObject(JSONObject&& obj) : JSONWriter(obj.state) { obj.state = 0; } + + ~JSONObject(); + + template <typename T> + JSONObject& attr(const std::string& name, const T& v) { + attr(name); + toJSON(state->str, v); + return *this; + } + + JSONList list(const std::string& name); + + JSONObject object(const std::string& name); + + JSONPlaceholder placeholder(const std::string& name); +}; + +class JSONPlaceholder : JSONWriter { + private: + friend class JSONList; + friend class JSONObject; + + JSONPlaceholder(JSONState* state) : JSONWriter(state) {} + + void assertValid() { + assertActive(); + assert(first); + } + + public: + JSONPlaceholder(std::ostream& str, bool indent = false) + : JSONWriter(str, indent) {} + + ~JSONPlaceholder() { assert(!first || std::uncaught_exception()); } + + template <typename T> + void write(const T& v) { + assertValid(); + first = false; + toJSON(state->str, v); + } + + JSONList list(); + + JSONObject object(); +}; + +} // namespace nix diff --git a/third_party/nix/src/libutil/lazy.hh b/third_party/nix/src/libutil/lazy.hh new file mode 100644 index 000000000000..b564b481fc38 --- /dev/null +++ b/third_party/nix/src/libutil/lazy.hh @@ -0,0 +1,45 @@ +#include <exception> +#include <functional> +#include <mutex> + +namespace nix { + +/* A helper class for lazily-initialized variables. + + Lazy<T> var([]() { return value; }); + + declares a variable of type T that is initialized to 'value' (in a + thread-safe way) on first use, that is, when var() is first + called. If the initialiser code throws an exception, then all + subsequent calls to var() will rethrow that exception. */ +template <typename T> +class Lazy { + typedef std::function<T()> Init; + + Init init; + + std::once_flag done; + + T value; + + std::exception_ptr ex; + + public: + Lazy(Init init) : init(init) {} + + const T& operator()() { + std::call_once(done, [&]() { + try { + value = init(); + } catch (...) { + ex = std::current_exception(); + } + }); + if (ex) { + std::rethrow_exception(ex); + } + return value; + } +}; + +} // namespace nix diff --git a/third_party/nix/src/libutil/lru-cache.hh b/third_party/nix/src/libutil/lru-cache.hh new file mode 100644 index 000000000000..f6fcdaf82ebd --- /dev/null +++ b/third_party/nix/src/libutil/lru-cache.hh @@ -0,0 +1,90 @@ +#pragma once + +#include <list> +#include <map> +#include <optional> + +namespace nix { + +/* A simple least-recently used cache. Not thread-safe. */ +template <typename Key, typename Value> +class LRUCache { + private: + size_t capacity; + + // Stupid wrapper to get around circular dependency between Data + // and LRU. + struct LRUIterator; + + using Data = std::map<Key, std::pair<LRUIterator, Value>>; + using LRU = std::list<typename Data::iterator>; + + struct LRUIterator { + typename LRU::iterator it; + }; + + Data data; + LRU lru; + + public: + LRUCache(size_t capacity) : capacity(capacity) {} + + /* Insert or upsert an item in the cache. */ + void upsert(const Key& key, const Value& value) { + if (capacity == 0) { + return; + } + + erase(key); + + if (data.size() >= capacity) { + /* Retire the oldest item. */ + auto oldest = lru.begin(); + data.erase(*oldest); + lru.erase(oldest); + } + + auto res = data.emplace(key, std::make_pair(LRUIterator(), value)); + assert(res.second); + auto& i(res.first); + + auto j = lru.insert(lru.end(), i); + + i->second.first.it = j; + } + + bool erase(const Key& key) { + auto i = data.find(key); + if (i == data.end()) { + return false; + } + lru.erase(i->second.first.it); + data.erase(i); + return true; + } + + /* Look up an item in the cache. If it exists, it becomes the most + recently used item. */ + std::optional<Value> get(const Key& key) { + auto i = data.find(key); + if (i == data.end()) { + return {}; + } + + /* Move this item to the back of the LRU list. */ + lru.erase(i->second.first.it); + auto j = lru.insert(lru.end(), i); + i->second.first.it = j; + + return i->second.second; + } + + size_t size() { return data.size(); } + + void clear() { + data.clear(); + lru.clear(); + } +}; + +} // namespace nix diff --git a/third_party/nix/src/libutil/meson.build b/third_party/nix/src/libutil/meson.build new file mode 100644 index 000000000000..a6494e6eac7c --- /dev/null +++ b/third_party/nix/src/libutil/meson.build @@ -0,0 +1,68 @@ +src_inc += include_directories('.') + +libutil_src = files( + join_paths(meson.source_root(), 'src/libutil/affinity.cc'), + join_paths(meson.source_root(), 'src/libutil/archive.cc'), + join_paths(meson.source_root(), 'src/libutil/args.cc'), + join_paths(meson.source_root(), 'src/libutil/compression.cc'), + join_paths(meson.source_root(), 'src/libutil/config.cc'), + join_paths(meson.source_root(), 'src/libutil/hash.cc'), + join_paths(meson.source_root(), 'src/libutil/json.cc'), + join_paths(meson.source_root(), 'src/libutil/serialise.cc'), + join_paths(meson.source_root(), 'src/libutil/thread-pool.cc'), + join_paths(meson.source_root(), 'src/libutil/util.cc'), + join_paths(meson.source_root(), 'src/libutil/xml-writer.cc'), +) + +libutil_headers = files( + join_paths(meson.source_root(), 'src/libutil/affinity.hh'), + join_paths(meson.source_root(), 'src/libutil/archive.hh'), + join_paths(meson.source_root(), 'src/libutil/args.hh'), + join_paths(meson.source_root(), 'src/libutil/compression.hh'), + join_paths(meson.source_root(), 'src/libutil/config.hh'), + join_paths(meson.source_root(), 'src/libutil/finally.hh'), + join_paths(meson.source_root(), 'src/libutil/hash.hh'), + join_paths(meson.source_root(), 'src/libutil/istringstream_nocopy.hh'), + join_paths(meson.source_root(), 'src/libutil/json.hh'), + join_paths(meson.source_root(), 'src/libutil/lazy.hh'), + join_paths(meson.source_root(), 'src/libutil/lru-cache.hh'), + join_paths(meson.source_root(), 'src/libutil/monitor-fd.hh'), + join_paths(meson.source_root(), 'src/libutil/pool.hh'), + join_paths(meson.source_root(), 'src/libutil/prefork-compat.hh'), + join_paths(meson.source_root(), 'src/libutil/ref.hh'), + join_paths(meson.source_root(), 'src/libutil/serialise.hh'), + join_paths(meson.source_root(), 'src/libutil/sync.hh'), + join_paths(meson.source_root(), 'src/libutil/thread-pool.hh'), + join_paths(meson.source_root(), 'src/libutil/types.hh'), + join_paths(meson.source_root(), 'src/libutil/util.hh'), + join_paths(meson.source_root(), 'src/libutil/xml-writer.hh'), +) + +libutil_dep_list = [ + glog_dep, + boost_dep, + libbz2_dep, + liblzma_dep, + libbrotli_dep, + openssl_dep, + pthread_dep, + libsodium_dep, +] + +libutil_link_list = [] +libutil_link_args = [] + +libutil_lib = library( + 'nixutil', + install : true, + install_mode : 'rwxr-xr-x', + install_dir : libdir, + include_directories : src_inc, + sources : libutil_src, + link_args : libutil_link_args, + # cpp_args : [ '-E' ], + dependencies : libutil_dep_list) + +install_headers( + libutil_headers, + install_dir : join_paths(includedir, 'nix')) diff --git a/third_party/nix/src/libutil/monitor-fd.hh b/third_party/nix/src/libutil/monitor-fd.hh new file mode 100644 index 000000000000..c818c5826116 --- /dev/null +++ b/third_party/nix/src/libutil/monitor-fd.hh @@ -0,0 +1,57 @@ +#pragma once + +#include <atomic> +#include <cstdlib> +#include <thread> + +#include <poll.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +namespace nix { + +class MonitorFdHup { + private: + std::thread thread; + + public: + MonitorFdHup(int fd) { + thread = std::thread([fd]() { + while (true) { + /* Wait indefinitely until a POLLHUP occurs. */ + struct pollfd fds[1]; + fds[0].fd = fd; + /* This shouldn't be necessary, but macOS doesn't seem to + like a zeroed out events field. + See rdar://37537852. + */ + fds[0].events = POLLHUP; + auto count = poll(fds, 1, -1); + if (count == -1) { + abort(); + } // can't happen + /* This shouldn't happen, but can on macOS due to a bug. + See rdar://37550628. + + This may eventually need a delay or further + coordination with the main thread if spinning proves + too harmful. + */ + if (count == 0) { + continue; + } + assert(fds[0].revents & POLLHUP); + triggerInterrupt(); + break; + } + }); + }; + + ~MonitorFdHup() { + pthread_cancel(thread.native_handle()); + thread.join(); + } +}; + +} // namespace nix diff --git a/third_party/nix/src/libutil/pool.hh b/third_party/nix/src/libutil/pool.hh new file mode 100644 index 000000000000..6d70795d35ac --- /dev/null +++ b/third_party/nix/src/libutil/pool.hh @@ -0,0 +1,174 @@ +#pragma once + +#include <cassert> +#include <functional> +#include <limits> +#include <list> +#include <memory> + +#include "ref.hh" +#include "sync.hh" + +namespace nix { + +/* This template class implements a simple pool manager of resources + of some type R, such as database connections. It is used as + follows: + + class Connection { ... }; + + Pool<Connection> pool; + + { + auto conn(pool.get()); + conn->exec("select ..."); + } + + Here, the Connection object referenced by โconnโ is automatically + returned to the pool when โconnโ goes out of scope. +*/ + +template <class R> +class Pool { + public: + /* A function that produces new instances of R on demand. */ + typedef std::function<ref<R>()> Factory; + + /* A function that checks whether an instance of R is still + usable. Unusable instances are removed from the pool. */ + typedef std::function<bool(const ref<R>&)> Validator; + + private: + Factory factory; + Validator validator; + + struct State { + size_t inUse = 0; + size_t max; + std::vector<ref<R>> idle; + }; + + Sync<State> state; + + std::condition_variable wakeup; + + public: + Pool( + size_t max = std::numeric_limits<size_t>::max(), + const Factory& factory = []() { return make_ref<R>(); }, + const Validator& validator = [](ref<R> r) { return true; }) + : factory(factory), validator(validator) { + auto state_(state.lock()); + state_->max = max; + } + + void incCapacity() { + auto state_(state.lock()); + state_->max++; + /* we could wakeup here, but this is only used when we're + * about to nest Pool usages, and we want to save the slot for + * the nested use if we can + */ + } + + void decCapacity() { + auto state_(state.lock()); + state_->max--; + } + + ~Pool() { + auto state_(state.lock()); + assert(!state_->inUse); + state_->max = 0; + state_->idle.clear(); + } + + class Handle { + private: + Pool& pool; + std::shared_ptr<R> r; + bool bad = false; + + friend Pool; + + Handle(Pool& pool, std::shared_ptr<R> r) : pool(pool), r(r) {} + + public: + Handle(Handle&& h) : pool(h.pool), r(h.r) { h.r.reset(); } + + Handle(const Handle& l) = delete; + + ~Handle() { + if (!r) { + return; + } + { + auto state_(pool.state.lock()); + if (!bad) { + state_->idle.push_back(ref<R>(r)); + } + assert(state_->inUse); + state_->inUse--; + } + pool.wakeup.notify_one(); + } + + R* operator->() { return &*r; } + R& operator*() { return *r; } + + void markBad() { bad = true; } + }; + + Handle get() { + { + auto state_(state.lock()); + + /* If we're over the maximum number of instance, we need + to wait until a slot becomes available. */ + while (state_->idle.empty() && state_->inUse >= state_->max) + state_.wait(wakeup); + + while (!state_->idle.empty()) { + auto p = state_->idle.back(); + state_->idle.pop_back(); + if (validator(p)) { + state_->inUse++; + return Handle(*this, p); + } + } + + state_->inUse++; + } + + /* We need to create a new instance. Because that might take a + while, we don't hold the lock in the meantime. */ + try { + Handle h(*this, factory()); + return h; + } catch (...) { + auto state_(state.lock()); + state_->inUse--; + wakeup.notify_one(); + throw; + } + } + + size_t count() { + auto state_(state.lock()); + return state_->idle.size() + state_->inUse; + } + + size_t capacity() { return state.lock()->max; } + + void flushBad() { + auto state_(state.lock()); + std::vector<ref<R>> left; + for (auto& p : state_->idle) + if (validator(p)) { + left.push_back(p); + } + std::swap(state_->idle, left); + } +}; + +} // namespace nix diff --git a/third_party/nix/src/libutil/prefork-compat.hh b/third_party/nix/src/libutil/prefork-compat.hh new file mode 100644 index 000000000000..ae9c25e5397b --- /dev/null +++ b/third_party/nix/src/libutil/prefork-compat.hh @@ -0,0 +1,21 @@ +// This file exists to preserve compatibility with the pre-fork +// version of Nix (2.3.4). +// +// During the refactoring, various structures are getting ripped out +// and replaced with the dummies below while code is being cleaned up. + +#ifndef NIX_SRC_LIBUTIL_PREFORK_COMPAT_H_ +#define NIX_SRC_LIBUTIL_PREFORK_COMPAT_H_ + +namespace nix::compat { + +// This is used in remote-store.cc for various things that expect the +// old logging protocol when talking over the wire. It will be removed +// hen the worker protocol is redone. +enum [[deprecated("old logging compat only")]] Verbosity{ + kError = 0, kWarn, kInfo, kTalkative, kChatty, kDebug, kVomit, +}; + +} // namespace nix::compat + +#endif // NIX_SRC_LIBUTIL_PREFORK_COMPAT_H_ diff --git a/third_party/nix/src/libutil/ref.hh b/third_party/nix/src/libutil/ref.hh new file mode 100644 index 000000000000..063e6b327c22 --- /dev/null +++ b/third_party/nix/src/libutil/ref.hh @@ -0,0 +1,65 @@ +#pragma once + +#include <exception> +#include <memory> +#include <stdexcept> + +namespace nix { + +/* A simple non-nullable reference-counted pointer. Actually a wrapper + around std::shared_ptr that prevents non-null constructions. */ +template <typename T> +class ref { + private: + std::shared_ptr<T> p; + + public: + ref<T>(const ref<T>& r) : p(r.p) {} + + explicit ref<T>(const std::shared_ptr<T>& p) : p(p) { + if (!p) { + throw std::invalid_argument("null pointer cast to ref"); + } + } + + explicit ref<T>(T* p) : p(p) { + if (!p) { + throw std::invalid_argument("null pointer cast to ref"); + } + } + + T* operator->() const { return &*p; } + + T& operator*() const { return *p; } + + operator std::shared_ptr<T>() const { return p; } + + std::shared_ptr<T> get_ptr() const { return p; } + + template <typename T2> + ref<T2> cast() const { + return ref<T2>(std::dynamic_pointer_cast<T2>(p)); + } + + template <typename T2> + std::shared_ptr<T2> dynamic_pointer_cast() const { + return std::dynamic_pointer_cast<T2>(p); + } + + template <typename T2> + operator ref<T2>() const { + return ref<T2>((std::shared_ptr<T2>)p); + } + + private: + template <typename T2, typename... Args> + friend ref<T2> make_ref(Args&&... args); +}; + +template <typename T, typename... Args> +inline ref<T> make_ref(Args&&... args) { + auto p = std::make_shared<T>(std::forward<Args>(args)...); + return ref<T>(p); +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/serialise.cc b/third_party/nix/src/libutil/serialise.cc new file mode 100644 index 000000000000..34af4e840ab6 --- /dev/null +++ b/third_party/nix/src/libutil/serialise.cc @@ -0,0 +1,310 @@ +#include "serialise.hh" + +#include <boost/coroutine2/coroutine.hpp> +#include <cerrno> +#include <cstring> +#include <memory> +#include <utility> + +#include "glog/logging.h" +#include "util.hh" + +namespace nix { + +void BufferedSink::operator()(const unsigned char* data, size_t len) { + if (!buffer) { + buffer = decltype(buffer)(new unsigned char[bufSize]); + } + + while (len != 0u) { + /* Optimisation: bypass the buffer if the data exceeds the + buffer size. */ + if (bufPos + len >= bufSize) { + flush(); + write(data, len); + break; + } + /* Otherwise, copy the bytes to the buffer. Flush the buffer + when it's full. */ + size_t n = bufPos + len > bufSize ? bufSize - bufPos : len; + memcpy(buffer.get() + bufPos, data, n); + data += n; + bufPos += n; + len -= n; + if (bufPos == bufSize) { + flush(); + } + } +} + +void BufferedSink::flush() { + if (bufPos == 0) { + return; + } + size_t n = bufPos; + bufPos = 0; // don't trigger the assert() in ~BufferedSink() + write(buffer.get(), n); +} + +FdSink::~FdSink() { + try { + flush(); + } catch (...) { + ignoreException(); + } +} + +size_t threshold = 256 * 1024 * 1024; + +static void warnLargeDump() { + LOG(WARNING) + << "dumping very large path (> 256 MiB); this may run out of memory"; +} + +void FdSink::write(const unsigned char* data, size_t len) { + written += len; + static bool warned = false; + if (warn && !warned) { + if (written > threshold) { + warnLargeDump(); + warned = true; + } + } + try { + writeFull(fd, data, len); + } catch (SysError& e) { + _good = false; + throw; + } +} + +bool FdSink::good() { return _good; } + +void Source::operator()(unsigned char* data, size_t len) { + while (len != 0u) { + size_t n = read(data, len); + data += n; + len -= n; + } +} + +std::string Source::drain() { + std::string s; + std::vector<unsigned char> buf(8192); + while (true) { + size_t n; + try { + n = read(buf.data(), buf.size()); + s.append((char*)buf.data(), n); + } catch (EndOfFile&) { + break; + } + } + return s; +} + +size_t BufferedSource::read(unsigned char* data, size_t len) { + if (!buffer) { + buffer = decltype(buffer)(new unsigned char[bufSize]); + } + + if (bufPosIn == 0u) { + bufPosIn = readUnbuffered(buffer.get(), bufSize); + } + + /* Copy out the data in the buffer. */ + size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len; + memcpy(data, buffer.get() + bufPosOut, n); + bufPosOut += n; + if (bufPosIn == bufPosOut) { + bufPosIn = bufPosOut = 0; + } + return n; +} + +bool BufferedSource::hasData() { return bufPosOut < bufPosIn; } + +size_t FdSource::readUnbuffered(unsigned char* data, size_t len) { + ssize_t n; + do { + checkInterrupt(); + n = ::read(fd, (char*)data, len); + } while (n == -1 && errno == EINTR); + if (n == -1) { + _good = false; + throw SysError("reading from file"); + } + if (n == 0) { + _good = false; + throw EndOfFile("unexpected end-of-file"); + } + read += n; + return n; +} + +bool FdSource::good() { return _good; } + +size_t StringSource::read(unsigned char* data, size_t len) { + if (pos == s.size()) { + throw EndOfFile("end of string reached"); + } + size_t n = s.copy((char*)data, len, pos); + pos += n; + return n; +} + +#if BOOST_VERSION >= 106300 && BOOST_VERSION < 106600 +#error Coroutines are broken in this version of Boost! +#endif + +std::unique_ptr<Source> sinkToSource(const std::function<void(Sink&)>& fun, + const std::function<void()>& eof) { + struct SinkToSource : Source { + using coro_t = boost::coroutines2::coroutine<std::string>; + + std::function<void(Sink&)> fun; + std::function<void()> eof; + std::optional<coro_t::pull_type> coro; + bool started = false; + + SinkToSource(std::function<void(Sink&)> fun, std::function<void()> eof) + : fun(std::move(fun)), eof(std::move(eof)) {} + + std::string cur; + size_t pos = 0; + + size_t read(unsigned char* data, size_t len) override { + if (!coro) { + coro = coro_t::pull_type([&](coro_t::push_type& yield) { + LambdaSink sink([&](const unsigned char* data, size_t len) { + if (len != 0u) { + yield(std::string((const char*)data, len)); + } + }); + fun(sink); + }); + } + + if (!*coro) { + eof(); + abort(); + } + + if (pos == cur.size()) { + if (!cur.empty()) { + (*coro)(); + } + cur = coro->get(); + pos = 0; + } + + auto n = std::min(cur.size() - pos, len); + memcpy(data, (unsigned char*)cur.data() + pos, n); + pos += n; + + return n; + } + }; + + return std::make_unique<SinkToSource>(fun, eof); +} + +void writePadding(size_t len, Sink& sink) { + if ((len % 8) != 0u) { + unsigned char zero[8]; + memset(zero, 0, sizeof(zero)); + sink(zero, 8 - (len % 8)); + } +} + +void writeString(const unsigned char* buf, size_t len, Sink& sink) { + sink << len; + sink(buf, len); + writePadding(len, sink); +} + +Sink& operator<<(Sink& sink, const string& s) { + writeString((const unsigned char*)s.data(), s.size(), sink); + return sink; +} + +template <class T> +void writeStrings(const T& ss, Sink& sink) { + sink << ss.size(); + for (auto& i : ss) { + sink << i; + } +} + +Sink& operator<<(Sink& sink, const Strings& s) { + writeStrings(s, sink); + return sink; +} + +Sink& operator<<(Sink& sink, const StringSet& s) { + writeStrings(s, sink); + return sink; +} + +void readPadding(size_t len, Source& source) { + if ((len % 8) != 0u) { + unsigned char zero[8]; + size_t n = 8 - (len % 8); + source(zero, n); + for (unsigned int i = 0; i < n; i++) { + if (zero[i] != 0u) { + throw SerialisationError("non-zero padding"); + } + } + } +} + +size_t readString(unsigned char* buf, size_t max, Source& source) { + auto len = readNum<size_t>(source); + if (len > max) { + throw SerialisationError("string is too long"); + } + source(buf, len); + readPadding(len, source); + return len; +} + +string readString(Source& source, size_t max) { + auto len = readNum<size_t>(source); + if (len > max) { + throw SerialisationError("string is too long"); + } + std::string res(len, 0); + source((unsigned char*)res.data(), len); + readPadding(len, source); + return res; +} + +Source& operator>>(Source& in, string& s) { + s = readString(in); + return in; +} + +template <class T> +T readStrings(Source& source) { + auto count = readNum<size_t>(source); + T ss; + while (count--) { + ss.insert(ss.end(), readString(source)); + } + return ss; +} + +template Paths readStrings(Source& source); +template PathSet readStrings(Source& source); + +void StringSink::operator()(const unsigned char* data, size_t len) { + static bool warned = false; + if (!warned && s->size() > threshold) { + warnLargeDump(); + warned = true; + } + s->append((const char*)data, len); +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/serialise.hh b/third_party/nix/src/libutil/serialise.hh new file mode 100644 index 000000000000..a5992b50ec6e --- /dev/null +++ b/third_party/nix/src/libutil/serialise.hh @@ -0,0 +1,287 @@ +#pragma once + +#include <memory> + +#include "types.hh" +#include "util.hh" + +namespace nix { + +/* Abstract destination of binary data. */ +struct Sink { + virtual ~Sink() {} + virtual void operator()(const unsigned char* data, size_t len) = 0; + virtual bool good() { return true; } + + void operator()(const std::string& s) { + (*this)((const unsigned char*)s.data(), s.size()); + } +}; + +/* A buffered abstract sink. */ +struct BufferedSink : Sink { + size_t bufSize, bufPos; + std::unique_ptr<unsigned char[]> buffer; + + BufferedSink(size_t bufSize = 32 * 1024) + : bufSize(bufSize), bufPos(0), buffer(nullptr) {} + + void operator()(const unsigned char* data, size_t len) override; + + void operator()(const std::string& s) { Sink::operator()(s); } + + void flush(); + + virtual void write(const unsigned char* data, size_t len) = 0; +}; + +/* Abstract source of binary data. */ +struct Source { + virtual ~Source() {} + + /* Store exactly โlenโ bytes in the buffer pointed to by โdataโ. + It blocks until all the requested data is available, or throws + an error if it is not going to be available. */ + void operator()(unsigned char* data, size_t len); + + /* Store up to โlenโ in the buffer pointed to by โdataโ, and + return the number of bytes stored. It blocks until at least + one byte is available. */ + virtual size_t read(unsigned char* data, size_t len) = 0; + + virtual bool good() { return true; } + + std::string drain(); +}; + +/* A buffered abstract source. */ +struct BufferedSource : Source { + size_t bufSize, bufPosIn, bufPosOut; + std::unique_ptr<unsigned char[]> buffer; + + BufferedSource(size_t bufSize = 32 * 1024) + : bufSize(bufSize), bufPosIn(0), bufPosOut(0), buffer(nullptr) {} + + size_t read(unsigned char* data, size_t len) override; + + bool hasData(); + + protected: + /* Underlying read call, to be overridden. */ + virtual size_t readUnbuffered(unsigned char* data, size_t len) = 0; +}; + +/* A sink that writes data to a file descriptor. */ +struct FdSink : BufferedSink { + int fd; + bool warn = false; + size_t written = 0; + + FdSink() : fd(-1) {} + FdSink(int fd) : fd(fd) {} + FdSink(FdSink&&) = default; + + FdSink& operator=(FdSink&& s) { + flush(); + fd = s.fd; + s.fd = -1; + warn = s.warn; + written = s.written; + return *this; + } + + ~FdSink(); + + void write(const unsigned char* data, size_t len) override; + + bool good() override; + + private: + bool _good = true; +}; + +/* A source that reads data from a file descriptor. */ +struct FdSource : BufferedSource { + int fd; + size_t read = 0; + + FdSource() : fd(-1) {} + FdSource(int fd) : fd(fd) {} + FdSource(FdSource&&) = default; + + FdSource& operator=(FdSource&& s) { + fd = s.fd; + s.fd = -1; + read = s.read; + return *this; + } + + bool good() override; + + protected: + size_t readUnbuffered(unsigned char* data, size_t len) override; + + private: + bool _good = true; +}; + +/* A sink that writes data to a string. */ +struct StringSink : Sink { + ref<std::string> s; + StringSink() : s(make_ref<std::string>()){}; + StringSink(ref<std::string> s) : s(s){}; + void operator()(const unsigned char* data, size_t len) override; +}; + +/* A source that reads data from a string. */ +struct StringSource : Source { + const string& s; + size_t pos; + StringSource(const string& _s) : s(_s), pos(0) {} + size_t read(unsigned char* data, size_t len) override; +}; + +/* Adapter class of a Source that saves all data read to `s'. */ +struct TeeSource : Source { + Source& orig; + ref<std::string> data; + TeeSource(Source& orig) : orig(orig), data(make_ref<std::string>()) {} + size_t read(unsigned char* data, size_t len) { + size_t n = orig.read(data, len); + this->data->append((const char*)data, n); + return n; + } +}; + +/* A reader that consumes the original Source until 'size'. */ +struct SizedSource : Source { + Source& orig; + size_t remain; + SizedSource(Source& orig, size_t size) : orig(orig), remain(size) {} + size_t read(unsigned char* data, size_t len) { + if (this->remain <= 0) { + throw EndOfFile("sized: unexpected end-of-file"); + } + len = std::min(len, this->remain); + size_t n = this->orig.read(data, len); + this->remain -= n; + return n; + } + + /* Consume the original source until no remain data is left to consume. */ + size_t drainAll() { + std::vector<unsigned char> buf(8192); + size_t sum = 0; + while (this->remain > 0) { + size_t n = read(buf.data(), buf.size()); + sum += n; + } + return sum; + } +}; + +/* Convert a function into a sink. */ +struct LambdaSink : Sink { + typedef std::function<void(const unsigned char*, size_t)> lambda_t; + + lambda_t lambda; + + LambdaSink(const lambda_t& lambda) : lambda(lambda) {} + + virtual void operator()(const unsigned char* data, size_t len) { + lambda(data, len); + } +}; + +/* Convert a function into a source. */ +struct LambdaSource : Source { + typedef std::function<size_t(unsigned char*, size_t)> lambda_t; + + lambda_t lambda; + + LambdaSource(const lambda_t& lambda) : lambda(lambda) {} + + size_t read(unsigned char* data, size_t len) override { + return lambda(data, len); + } +}; + +/* Convert a function that feeds data into a Sink into a Source. The + Source executes the function as a coroutine. */ +std::unique_ptr<Source> sinkToSource( + const std::function<void(Sink&)>& fun, + const std::function<void()>& eof = []() { + throw EndOfFile("coroutine has finished"); + }); + +void writePadding(size_t len, Sink& sink); +void writeString(const unsigned char* buf, size_t len, Sink& sink); + +inline Sink& operator<<(Sink& sink, uint64_t n) { + unsigned char buf[8]; + buf[0] = n & 0xff; + buf[1] = (n >> 8) & 0xff; + buf[2] = (n >> 16) & 0xff; + buf[3] = (n >> 24) & 0xff; + buf[4] = (n >> 32) & 0xff; + buf[5] = (n >> 40) & 0xff; + buf[6] = (n >> 48) & 0xff; + buf[7] = (unsigned char)(n >> 56) & 0xff; + sink(buf, sizeof(buf)); + return sink; +} + +Sink& operator<<(Sink& sink, const string& s); +Sink& operator<<(Sink& sink, const Strings& s); +Sink& operator<<(Sink& sink, const StringSet& s); + +MakeError(SerialisationError, Error) + + template <typename T> + T readNum(Source& source) { + unsigned char buf[8]; + source(buf, sizeof(buf)); + + uint64_t n = + ((unsigned long long)buf[0]) | ((unsigned long long)buf[1] << 8) | + ((unsigned long long)buf[2] << 16) | ((unsigned long long)buf[3] << 24) | + ((unsigned long long)buf[4] << 32) | ((unsigned long long)buf[5] << 40) | + ((unsigned long long)buf[6] << 48) | ((unsigned long long)buf[7] << 56); + + if (n > std::numeric_limits<T>::max()) + throw SerialisationError("serialised integer %d is too large for type '%s'", + n, typeid(T).name()); + + return (T)n; +} + +inline unsigned int readInt(Source& source) { + return readNum<unsigned int>(source); +} + +inline uint64_t readLongLong(Source& source) { + return readNum<uint64_t>(source); +} + +void readPadding(size_t len, Source& source); +size_t readString(unsigned char* buf, size_t max, Source& source); +string readString(Source& source, + size_t max = std::numeric_limits<size_t>::max()); +template <class T> +T readStrings(Source& source); + +Source& operator>>(Source& in, string& s); + +template <typename T> +Source& operator>>(Source& in, T& n) { + n = readNum<T>(in); + return in; +} + +template <typename T> +Source& operator>>(Source& in, bool& b) { + b = readNum<uint64_t>(in); + return in; +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/sync.hh b/third_party/nix/src/libutil/sync.hh new file mode 100644 index 000000000000..b79d1176b9f3 --- /dev/null +++ b/third_party/nix/src/libutil/sync.hh @@ -0,0 +1,84 @@ +#pragma once + +#include <cassert> +#include <condition_variable> +#include <cstdlib> +#include <mutex> + +namespace nix { + +/* This template class ensures synchronized access to a value of type + T. It is used as follows: + + struct Data { int x; ... }; + + Sync<Data> data; + + { + auto data_(data.lock()); + data_->x = 123; + } + + Here, "data" is automatically unlocked when "data_" goes out of + scope. +*/ + +template <class T, class M = std::mutex> +class Sync { + private: + M mutex; + T data; + + public: + Sync() {} + Sync(const T& data) : data(data) {} + Sync(T&& data) noexcept : data(std::move(data)) {} + + class Lock { + private: + Sync* s; + std::unique_lock<M> lk; + friend Sync; + Lock(Sync* s) : s(s), lk(s->mutex) {} + + public: + Lock(Lock&& l) : s(l.s) { abort(); } + Lock(const Lock& l) = delete; + ~Lock() {} + T* operator->() { return &s->data; } + T& operator*() { return s->data; } + + void wait(std::condition_variable& cv) { + assert(s); + cv.wait(lk); + } + + template <class Rep, class Period> + std::cv_status wait_for( + std::condition_variable& cv, + const std::chrono::duration<Rep, Period>& duration) { + assert(s); + return cv.wait_for(lk, duration); + } + + template <class Rep, class Period, class Predicate> + bool wait_for(std::condition_variable& cv, + const std::chrono::duration<Rep, Period>& duration, + Predicate pred) { + assert(s); + return cv.wait_for(lk, duration, pred); + } + + template <class Clock, class Duration> + std::cv_status wait_until( + std::condition_variable& cv, + const std::chrono::time_point<Clock, Duration>& duration) { + assert(s); + return cv.wait_until(lk, duration); + } + }; + + Lock lock() { return Lock(this); } +}; + +} // namespace nix diff --git a/third_party/nix/src/libutil/thread-pool.cc b/third_party/nix/src/libutil/thread-pool.cc new file mode 100644 index 000000000000..879de446c294 --- /dev/null +++ b/third_party/nix/src/libutil/thread-pool.cc @@ -0,0 +1,162 @@ +#include "thread-pool.hh" + +#include "affinity.hh" +#include "glog/logging.h" + +namespace nix { + +ThreadPool::ThreadPool(size_t _maxThreads) : maxThreads(_maxThreads) { + restoreAffinity(); // FIXME + + if (maxThreads == 0u) { + maxThreads = std::thread::hardware_concurrency(); + if (maxThreads == 0u) { + maxThreads = 1; + } + } + + DLOG(INFO) << "starting pool of " << maxThreads - 1 << " threads"; +} + +ThreadPool::~ThreadPool() { shutdown(); } + +void ThreadPool::shutdown() { + std::vector<std::thread> workers; + { + auto state(state_.lock()); + quit = true; + std::swap(workers, state->workers); + } + + if (workers.empty()) { + return; + } + + DLOG(INFO) << "reaping " << workers.size() << " worker threads"; + + work.notify_all(); + + for (auto& thr : workers) { + thr.join(); + } +} + +void ThreadPool::enqueue(const work_t& t) { + auto state(state_.lock()); + if (quit) { + throw ThreadPoolShutDown( + "cannot enqueue a work item while the thread pool is shutting down"); + } + state->pending.push(t); + /* Note: process() also executes items, so count it as a worker. */ + if (state->pending.size() > state->workers.size() + 1 && + state->workers.size() + 1 < maxThreads) { + state->workers.emplace_back(&ThreadPool::doWork, this, false); + } + work.notify_one(); +} + +void ThreadPool::process() { + state_.lock()->draining = true; + + /* Do work until no more work is pending or active. */ + try { + doWork(true); + + auto state(state_.lock()); + + assert(quit); + + if (state->exception) { + std::rethrow_exception(state->exception); + } + + } catch (...) { + /* In the exceptional case, some workers may still be + active. They may be referencing the stack frame of the + caller. So wait for them to finish. (~ThreadPool also does + this, but it might be destroyed after objects referenced by + the work item lambdas.) */ + shutdown(); + throw; + } +} + +void ThreadPool::doWork(bool mainThread) { + if (!mainThread) { + interruptCheck = [&]() { return (bool)quit; }; + } + + bool didWork = false; + std::exception_ptr exc; + + while (true) { + work_t w; + { + auto state(state_.lock()); + + if (didWork) { + assert(state->active); + state->active--; + + if (exc) { + if (!state->exception) { + state->exception = exc; + // Tell the other workers to quit. + quit = true; + work.notify_all(); + } else { + /* Print the exception, since we can't + propagate it. */ + try { + std::rethrow_exception(exc); + } catch (std::exception& e) { + if ((dynamic_cast<Interrupted*>(&e) == nullptr) && + (dynamic_cast<ThreadPoolShutDown*>(&e) == nullptr)) { + ignoreException(); + } + } catch (...) { + } + } + } + } + + /* Wait until a work item is available or we're asked to + quit. */ + while (true) { + if (quit) { + return; + } + + if (!state->pending.empty()) { + break; + } + + /* If there are no active or pending items, and the + main thread is running process(), then no new items + can be added. So exit. */ + if ((state->active == 0u) && state->draining) { + quit = true; + work.notify_all(); + return; + } + + state.wait(work); + } + + w = std::move(state->pending.front()); + state->pending.pop(); + state->active++; + } + + try { + w(); + } catch (...) { + exc = std::current_exception(); + } + + didWork = true; + } +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/thread-pool.hh b/third_party/nix/src/libutil/thread-pool.hh new file mode 100644 index 000000000000..72837ca1eecc --- /dev/null +++ b/third_party/nix/src/libutil/thread-pool.hh @@ -0,0 +1,138 @@ +#pragma once + +#include <atomic> +#include <functional> +#include <map> +#include <queue> +#include <thread> + +#include "sync.hh" +#include "util.hh" + +namespace nix { + +MakeError(ThreadPoolShutDown, Error) + + /* A simple thread pool that executes a queue of work items + (lambdas). */ + class ThreadPool { + public: + ThreadPool(size_t maxThreads = 0); + + ~ThreadPool(); + + // FIXME: use std::packaged_task? + typedef std::function<void()> work_t; + + /* Enqueue a function to be executed by the thread pool. */ + void enqueue(const work_t& t); + + /* Execute work items until the queue is empty. Note that work + items are allowed to add new items to the queue; this is + handled correctly. Queue processing stops prematurely if any + work item throws an exception. This exception is propagated to + the calling thread. If multiple work items throw an exception + concurrently, only one item is propagated; the others are + printed on stderr and otherwise ignored. */ + void process(); + + private: + size_t maxThreads; + + struct State { + std::queue<work_t> pending; + size_t active = 0; + std::exception_ptr exception; + std::vector<std::thread> workers; + bool draining = false; + }; + + std::atomic_bool quit{false}; + + Sync<State> state_; + + std::condition_variable work; + + void doWork(bool mainThread); + + void shutdown(); +}; + +/* Process in parallel a set of items of type T that have a partial + ordering between them. Thus, any item is only processed after all + its dependencies have been processed. */ +template <typename T> +void processGraph(ThreadPool& pool, const std::set<T>& nodes, + std::function<std::set<T>(const T&)> getEdges, + std::function<void(const T&)> processNode) { + struct Graph { + std::set<T> left; + std::map<T, std::set<T>> refs, rrefs; + }; + + Sync<Graph> graph_(Graph{nodes, {}, {}}); + + std::function<void(const T&)> worker; + + worker = [&](const T& node) { + { + auto graph(graph_.lock()); + auto i = graph->refs.find(node); + if (i == graph->refs.end()) { + goto getRefs; + } + goto doWork; + } + + getRefs : { + auto refs = getEdges(node); + refs.erase(node); + + { + auto graph(graph_.lock()); + for (auto& ref : refs) + if (graph->left.count(ref)) { + graph->refs[node].insert(ref); + graph->rrefs[ref].insert(node); + } + if (graph->refs[node].empty()) { + goto doWork; + } + } + } + + return; + + doWork: + processNode(node); + + /* Enqueue work for all nodes that were waiting on this one + and have no unprocessed dependencies. */ + { + auto graph(graph_.lock()); + for (auto& rref : graph->rrefs[node]) { + auto& refs(graph->refs[rref]); + auto i = refs.find(node); + assert(i != refs.end()); + refs.erase(i); + if (refs.empty()) { + pool.enqueue(std::bind(worker, rref)); + } + } + graph->left.erase(node); + graph->refs.erase(node); + graph->rrefs.erase(node); + } + }; + + for (auto& node : nodes) { + pool.enqueue(std::bind(worker, std::ref(node))); + } + + pool.process(); + + if (!graph_.lock()->left.empty()) + throw Error("graph processing incomplete (cyclic reference?)"); +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/types.hh b/third_party/nix/src/libutil/types.hh new file mode 100644 index 000000000000..ac1b802ce0f1 --- /dev/null +++ b/third_party/nix/src/libutil/types.hh @@ -0,0 +1,120 @@ +#pragma once + +#include <boost/format.hpp> +#include <list> +#include <map> +#include <memory> +#include <set> +#include <string> + +#include "ref.hh" + +/* Before 4.7, gcc's std::exception uses empty throw() specifiers for + * its (virtual) destructor and what() in c++11 mode, in violation of spec + */ +#ifdef __GNUC__ +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 7) +#define EXCEPTION_NEEDS_THROW_SPEC +#endif +#endif + +namespace nix { + +/* Inherit some names from other namespaces for convenience. */ +using boost::format; +using std::list; +using std::set; +using std::string; +using std::vector; + +/* A variadic template that does nothing. Useful to call a function + for all variadic arguments but ignoring the result. */ +struct nop { + template <typename... T> + nop(T...) {} +}; + +struct FormatOrString { + string s; + FormatOrString(const string& s) : s(s){}; + FormatOrString(const format& f) : s(f.str()){}; + FormatOrString(const char* s) : s(s){}; +}; + +/* A helper for formatting strings. โfmt(format, a_0, ..., a_n)โ is + equivalent to โboost::format(format) % a_0 % ... % + ... a_nโ. However, โfmt(s)โ is equivalent to โsโ (so no %-expansion + takes place). */ + +inline std::string fmt(const std::string& s) { return s; } + +inline std::string fmt(const char* s) { return s; } + +inline std::string fmt(const FormatOrString& fs) { return fs.s; } + +template <typename... Args> +inline std::string fmt(const std::string& fs, Args... args) { + boost::format f(fs); + f.exceptions(boost::io::all_error_bits ^ boost::io::too_many_args_bit); + nop{boost::io::detail::feed(f, args)...}; + return f.str(); +} + +/* BaseError should generally not be caught, as it has Interrupted as + a subclass. Catch Error instead. */ +class BaseError : public std::exception { + protected: + string prefix_; // used for location traces etc. + string err; + + public: + unsigned int status = 1; // exit status + + template <typename... Args> + BaseError(unsigned int status, Args... args) + : err(fmt(args...)), status(status) {} + + template <typename... Args> + BaseError(Args... args) : err(fmt(args...)) {} + +#ifdef EXCEPTION_NEEDS_THROW_SPEC + ~BaseError() noexcept {}; + const char* what() const noexcept { return err.c_str(); } +#else + const char* what() const noexcept { return err.c_str(); } +#endif + + const string& msg() const { return err; } + const string& prefix() const { return prefix_; } + BaseError& addPrefix(const FormatOrString& fs); +}; + +#define MakeError(newClass, superClass) \ + class newClass : public superClass { \ + public: \ + using superClass::superClass; \ + }; + +MakeError(Error, BaseError) + + class SysError : public Error { + public: + int errNo; + + template <typename... Args> + SysError(Args... args) : Error(addErrno(fmt(args...))) {} + + private: + std::string addErrno(const std::string& s); +}; + +typedef list<string> Strings; +typedef set<string> StringSet; +typedef std::map<std::string, std::string> StringMap; + +/* Paths are just strings. */ +typedef string Path; +typedef list<Path> Paths; +typedef set<Path> PathSet; + +} // namespace nix diff --git a/third_party/nix/src/libutil/util.cc b/third_party/nix/src/libutil/util.cc new file mode 100644 index 000000000000..0ad2c51148c9 --- /dev/null +++ b/third_party/nix/src/libutil/util.cc @@ -0,0 +1,1528 @@ +#include "util.hh" + +#include <cctype> +#include <cerrno> +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <future> +#include <iostream> +#include <sstream> +#include <thread> +#include <utility> + +#include <fcntl.h> +#include <grp.h> +#include <pwd.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "affinity.hh" +#include "finally.hh" +#include "glog/logging.h" +#include "lazy.hh" +#include "serialise.hh" +#include "sync.hh" + +#ifdef __APPLE__ +#include <sys/syscall.h> +#endif + +#ifdef __linux__ +#include <sys/prctl.h> +#endif + +namespace nix { + +const std::string nativeSystem = SYSTEM; + +BaseError& BaseError::addPrefix(const FormatOrString& fs) { + prefix_ = fs.s + prefix_; + return *this; +} + +std::string SysError::addErrno(const std::string& s) { + errNo = errno; + return s + ": " + strerror(errNo); +} + +string getEnv(const string& key, const string& def) { + char* value = getenv(key.c_str()); + return value != nullptr ? string(value) : def; +} + +std::map<std::string, std::string> getEnv() { + std::map<std::string, std::string> env; + for (size_t i = 0; environ[i] != nullptr; ++i) { + auto s = environ[i]; + auto eq = strchr(s, '='); + if (eq == nullptr) { + // invalid env, just keep going + continue; + } + env.emplace(std::string(s, eq), std::string(eq + 1)); + } + return env; +} + +void clearEnv() { + for (auto& name : getEnv()) { + unsetenv(name.first.c_str()); + } +} + +void replaceEnv(const std::map<std::string, std::string>& newEnv) { + clearEnv(); + for (const auto& newEnvVar : newEnv) { + setenv(newEnvVar.first.c_str(), newEnvVar.second.c_str(), 1); + } +} + +Path absPath(Path path, Path dir) { + if (path[0] != '/') { + if (dir.empty()) { +#ifdef __GNU__ + /* GNU (aka. GNU/Hurd) doesn't have any limitation on path + lengths and doesn't define `PATH_MAX'. */ + char* buf = getcwd(NULL, 0); + if (buf == NULL) +#else + char buf[PATH_MAX]; + if (getcwd(buf, sizeof(buf)) == nullptr) { +#endif + throw SysError("cannot get cwd"); + } + dir = buf; +#ifdef __GNU__ + free(buf); +#endif + } + path = dir + "/" + path; +} +return canonPath(path); +} // namespace nix + +Path canonPath(const Path& path, bool resolveSymlinks) { + assert(!path.empty()); + + string s; + + if (path[0] != '/') { + throw Error(format("not an absolute path: '%1%'") % path); + } + + string::const_iterator i = path.begin(); + string::const_iterator end = path.end(); + string temp; + + /* Count the number of times we follow a symlink and stop at some + arbitrary (but high) limit to prevent infinite loops. */ + unsigned int followCount = 0; + unsigned int maxFollow = 1024; + + while (true) { + /* Skip slashes. */ + while (i != end && *i == '/') { + i++; + } + if (i == end) { + break; + } + + /* Ignore `.'. */ + if (*i == '.' && (i + 1 == end || i[1] == '/')) { + i++; + } + + /* If `..', delete the last component. */ + else if (*i == '.' && i + 1 < end && i[1] == '.' && + (i + 2 == end || i[2] == '/')) { + if (!s.empty()) { + s.erase(s.rfind('/')); + } + i += 2; + } + + /* Normal component; copy it. */ + else { + s += '/'; + while (i != end && *i != '/') { + s += *i++; + } + + /* If s points to a symlink, resolve it and restart (since + the symlink target might contain new symlinks). */ + if (resolveSymlinks && isLink(s)) { + if (++followCount >= maxFollow) { + throw Error(format("infinite symlink recursion in path '%1%'") % + path); + } + temp = absPath(readLink(s), dirOf(s)) + string(i, end); + i = temp.begin(); /* restart */ + end = temp.end(); + s = ""; + } + } + } + + return s.empty() ? "/" : s; +} + +Path dirOf(const Path& path) { + Path::size_type pos = path.rfind('/'); + if (pos == string::npos) { + return "."; + } + return pos == 0 ? "/" : Path(path, 0, pos); +} + +string baseNameOf(const Path& path) { + if (path.empty()) { + return ""; + } + + Path::size_type last = path.length() - 1; + if (path[last] == '/' && last > 0) { + last -= 1; + } + + Path::size_type pos = path.rfind('/', last); + if (pos == string::npos) { + pos = 0; + } else { + pos += 1; + } + + return string(path, pos, last - pos + 1); +} + +bool isInDir(const Path& path, const Path& dir) { + return path[0] == '/' && string(path, 0, dir.size()) == dir && + path.size() >= dir.size() + 2 && path[dir.size()] == '/'; +} + +bool isDirOrInDir(const Path& path, const Path& dir) { + return path == dir || isInDir(path, dir); +} + +struct stat lstat(const Path& path) { + struct stat st; + if (lstat(path.c_str(), &st) != 0) { + throw SysError(format("getting status of '%1%'") % path); + } + return st; +} + +bool pathExists(const Path& path) { + int res; + struct stat st; + res = lstat(path.c_str(), &st); + if (res == 0) { + return true; + } + if (errno != ENOENT && errno != ENOTDIR) { + throw SysError(format("getting status of %1%") % path); + } + return false; +} + +Path readLink(const Path& path) { + checkInterrupt(); + std::vector<char> buf; + for (ssize_t bufSize = PATH_MAX / 4; true; bufSize += bufSize / 2) { + buf.resize(bufSize); + ssize_t rlSize = readlink(path.c_str(), buf.data(), bufSize); + if (rlSize == -1) { + if (errno == EINVAL) { + throw Error("'%1%' is not a symlink", path); + } + throw SysError("reading symbolic link '%1%'", path); + + } else if (rlSize < bufSize) { + return string(buf.data(), rlSize); + } + } +} + +bool isLink(const Path& path) { + struct stat st = lstat(path); + return S_ISLNK(st.st_mode); +} + +DirEntries readDirectory(const Path& path) { + DirEntries entries; + entries.reserve(64); + + AutoCloseDir dir(opendir(path.c_str())); + if (!dir) { + throw SysError(format("opening directory '%1%'") % path); + } + + struct dirent* dirent; + while (errno = 0, dirent = readdir(dir.get())) { /* sic */ + checkInterrupt(); + string name = dirent->d_name; + if (name == "." || name == "..") { + continue; + } + entries.emplace_back(name, dirent->d_ino, +#ifdef HAVE_STRUCT_DIRENT_D_TYPE + dirent->d_type +#else + DT_UNKNOWN +#endif + ); + } + if (errno) { + throw SysError(format("reading directory '%1%'") % path); + } + + return entries; +} + +unsigned char getFileType(const Path& path) { + struct stat st = lstat(path); + if (S_ISDIR(st.st_mode)) { + return DT_DIR; + } + if (S_ISLNK(st.st_mode)) { + return DT_LNK; + } + if (S_ISREG(st.st_mode)) { + return DT_REG; + } + return DT_UNKNOWN; +} + +string readFile(int fd) { + struct stat st; + if (fstat(fd, &st) == -1) { + throw SysError("statting file"); + } + + std::vector<unsigned char> buf(st.st_size); + readFull(fd, buf.data(), st.st_size); + + return string((char*)buf.data(), st.st_size); +} + +string readFile(const Path& path, bool drain) { + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) { + throw SysError(format("opening file '%1%'") % path); + } + return drain ? drainFD(fd.get()) : readFile(fd.get()); +} + +void readFile(const Path& path, Sink& sink) { + AutoCloseFD fd = open(path.c_str(), O_RDONLY | O_CLOEXEC); + if (!fd) { + throw SysError("opening file '%s'", path); + } + drainFD(fd.get(), sink); +} + +void writeFile(const Path& path, const string& s, mode_t mode) { + AutoCloseFD fd = + open(path.c_str(), O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, mode); + if (!fd) { + throw SysError(format("opening file '%1%'") % path); + } + writeFull(fd.get(), s); +} + +void writeFile(const Path& path, Source& source, mode_t mode) { + AutoCloseFD fd = + open(path.c_str(), O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, mode); + if (!fd) { + throw SysError(format("opening file '%1%'") % path); + } + + std::vector<unsigned char> buf(64 * 1024); + + while (true) { + try { + auto n = source.read(buf.data(), buf.size()); + writeFull(fd.get(), (unsigned char*)buf.data(), n); + } catch (EndOfFile&) { + break; + } + } +} + +string readLine(int fd) { + string s; + while (true) { + checkInterrupt(); + char ch; + // FIXME: inefficient + ssize_t rd = read(fd, &ch, 1); + if (rd == -1) { + if (errno != EINTR) { + throw SysError("reading a line"); + } + } else if (rd == 0) { + throw EndOfFile("unexpected EOF reading a line"); + } else { + if (ch == '\n') { + return s; + } + s += ch; + } + } +} + +void writeLine(int fd, string s) { + s += '\n'; + writeFull(fd, s); +} + +static void _deletePath(const Path& path, unsigned long long& bytesFreed) { + checkInterrupt(); + + struct stat st; + if (lstat(path.c_str(), &st) == -1) { + if (errno == ENOENT) { + return; + } + throw SysError(format("getting status of '%1%'") % path); + } + + if (!S_ISDIR(st.st_mode) && st.st_nlink == 1) { + bytesFreed += st.st_size; + } + + if (S_ISDIR(st.st_mode)) { + /* Make the directory accessible. */ + const auto PERM_MASK = S_IRUSR | S_IWUSR | S_IXUSR; + if ((st.st_mode & PERM_MASK) != PERM_MASK) { + if (chmod(path.c_str(), st.st_mode | PERM_MASK) == -1) { + throw SysError(format("chmod '%1%'") % path); + } + } + + for (auto& i : readDirectory(path)) { + _deletePath(path + "/" + i.name, bytesFreed); + } + } + + if (remove(path.c_str()) == -1) { + if (errno == ENOENT) { + return; + } + throw SysError(format("cannot unlink '%1%'") % path); + } +} + +void deletePath(const Path& path) { + unsigned long long dummy; + deletePath(path, dummy); +} + +void deletePath(const Path& path, unsigned long long& bytesFreed) { + // Activity act(*logger, lvlDebug, format("recursively deleting path '%1%'") % + // path); + bytesFreed = 0; + _deletePath(path, bytesFreed); +} + +static Path tempName(Path tmpRoot, const Path& prefix, bool includePid, + int& counter) { + tmpRoot = + canonPath(tmpRoot.empty() ? getEnv("TMPDIR", "/tmp") : tmpRoot, true); + if (includePid) { + return (format("%1%/%2%-%3%-%4%") % tmpRoot % prefix % getpid() % counter++) + .str(); + } + return (format("%1%/%2%-%3%") % tmpRoot % prefix % counter++).str(); +} + +Path createTempDir(const Path& tmpRoot, const Path& prefix, bool includePid, + bool useGlobalCounter, mode_t mode) { + static int globalCounter = 0; + int localCounter = 0; + int& counter(useGlobalCounter ? globalCounter : localCounter); + + while (true) { + checkInterrupt(); + Path tmpDir = tempName(tmpRoot, prefix, includePid, counter); + if (mkdir(tmpDir.c_str(), mode) == 0) { +#if __FreeBSD__ + /* Explicitly set the group of the directory. This is to + work around around problems caused by BSD's group + ownership semantics (directories inherit the group of + the parent). For instance, the group of /tmp on + FreeBSD is "wheel", so all directories created in /tmp + will be owned by "wheel"; but if the user is not in + "wheel", then "tar" will fail to unpack archives that + have the setgid bit set on directories. */ + if (chown(tmpDir.c_str(), (uid_t)-1, getegid()) != 0) + throw SysError(format("setting group of directory '%1%'") % tmpDir); +#endif + return tmpDir; + } + if (errno != EEXIST) { + throw SysError(format("creating directory '%1%'") % tmpDir); + } + } +} + +std::string getUserName() { + auto pw = getpwuid(geteuid()); + std::string name = pw != nullptr ? pw->pw_name : getEnv("USER", ""); + if (name.empty()) { + throw Error("cannot figure out user name"); + } + return name; +} + +static Lazy<Path> getHome2([]() { + Path homeDir = getEnv("HOME"); + if (homeDir.empty()) { + std::vector<char> buf(16384); + struct passwd pwbuf; + struct passwd* pw; + if (getpwuid_r(geteuid(), &pwbuf, buf.data(), buf.size(), &pw) != 0 || + (pw == nullptr) || (pw->pw_dir == nullptr) || (pw->pw_dir[0] == 0)) { + throw Error("cannot determine user's home directory"); + } + homeDir = pw->pw_dir; + } + return homeDir; +}); + +Path getHome() { return getHome2(); } + +Path getCacheDir() { + Path cacheDir = getEnv("XDG_CACHE_HOME"); + if (cacheDir.empty()) { + cacheDir = getHome() + "/.cache"; + } + return cacheDir; +} + +Path getConfigDir() { + Path configDir = getEnv("XDG_CONFIG_HOME"); + if (configDir.empty()) { + configDir = getHome() + "/.config"; + } + return configDir; +} + +std::vector<Path> getConfigDirs() { + Path configHome = getConfigDir(); + string configDirs = getEnv("XDG_CONFIG_DIRS"); + auto result = tokenizeString<std::vector<string>>(configDirs, ":"); + result.insert(result.begin(), configHome); + return result; +} + +Path getDataDir() { + Path dataDir = getEnv("XDG_DATA_HOME"); + if (dataDir.empty()) { + dataDir = getHome() + "/.local/share"; + } + return dataDir; +} + +Paths createDirs(const Path& path) { + Paths created; + if (path == "/") { + return created; + } + + struct stat st; + if (lstat(path.c_str(), &st) == -1) { + created = createDirs(dirOf(path)); + if (mkdir(path.c_str(), 0777) == -1 && errno != EEXIST) { + throw SysError(format("creating directory '%1%'") % path); + } + st = lstat(path); + created.push_back(path); + } + + if (S_ISLNK(st.st_mode) && stat(path.c_str(), &st) == -1) { + throw SysError(format("statting symlink '%1%'") % path); + } + + if (!S_ISDIR(st.st_mode)) { + throw Error(format("'%1%' is not a directory") % path); + } + + return created; +} + +void createSymlink(const Path& target, const Path& link) { + if (symlink(target.c_str(), link.c_str()) != 0) { + throw SysError(format("creating symlink from '%1%' to '%2%'") % link % + target); + } +} + +void replaceSymlink(const Path& target, const Path& link) { + for (unsigned int n = 0; true; n++) { + Path tmp = canonPath(fmt("%s/.%d_%s", dirOf(link), n, baseNameOf(link))); + + try { + createSymlink(target, tmp); + } catch (SysError& e) { + if (e.errNo == EEXIST) { + continue; + } + throw; + } + + if (rename(tmp.c_str(), link.c_str()) != 0) { + throw SysError(format("renaming '%1%' to '%2%'") % tmp % link); + } + + break; + } +} + +void readFull(int fd, unsigned char* buf, size_t count) { + while (count != 0u) { + checkInterrupt(); + ssize_t res = read(fd, (char*)buf, count); + if (res == -1) { + if (errno == EINTR) { + continue; + } + throw SysError("reading from file"); + } + if (res == 0) { + throw EndOfFile("unexpected end-of-file"); + } + count -= res; + buf += res; + } +} + +void writeFull(int fd, const unsigned char* buf, size_t count, + bool allowInterrupts) { + while (count != 0u) { + if (allowInterrupts) { + checkInterrupt(); + } + ssize_t res = write(fd, (char*)buf, count); + if (res == -1 && errno != EINTR) { + throw SysError("writing to file"); + } + if (res > 0) { + count -= res; + buf += res; + } + } +} + +void writeFull(int fd, const string& s, bool allowInterrupts) { + writeFull(fd, (const unsigned char*)s.data(), s.size(), allowInterrupts); +} + +string drainFD(int fd, bool block) { + StringSink sink; + drainFD(fd, sink, block); + return std::move(*sink.s); +} + +void drainFD(int fd, Sink& sink, bool block) { + int saved; + + Finally finally([&]() { + if (!block) { + if (fcntl(fd, F_SETFL, saved) == -1) { + throw SysError("making file descriptor blocking"); + } + } + }); + + if (!block) { + saved = fcntl(fd, F_GETFL); + if (fcntl(fd, F_SETFL, saved | O_NONBLOCK) == -1) { + throw SysError("making file descriptor non-blocking"); + } + } + + std::vector<unsigned char> buf(64 * 1024); + while (true) { + checkInterrupt(); + ssize_t rd = read(fd, buf.data(), buf.size()); + if (rd == -1) { + if (!block && (errno == EAGAIN || errno == EWOULDBLOCK)) { + break; + } + if (errno != EINTR) { + throw SysError("reading from file"); + } + } else if (rd == 0) { + break; + } else { + sink(buf.data(), rd); + } + } +} + +////////////////////////////////////////////////////////////////////// + +AutoDelete::AutoDelete() : del{false} {} + +AutoDelete::AutoDelete(string p, bool recursive) : path(std::move(p)) { + del = true; + this->recursive = recursive; +} + +AutoDelete::~AutoDelete() { + try { + if (del) { + if (recursive) { + deletePath(path); + } else { + if (remove(path.c_str()) == -1) { + throw SysError(format("cannot unlink '%1%'") % path); + } + } + } + } catch (...) { + ignoreException(); + } +} + +void AutoDelete::cancel() { del = false; } + +void AutoDelete::reset(const Path& p, bool recursive) { + path = p; + this->recursive = recursive; + del = true; +} + +////////////////////////////////////////////////////////////////////// + +AutoCloseFD::AutoCloseFD() : fd{-1} {} + +AutoCloseFD::AutoCloseFD(int fd) : fd{fd} {} + +AutoCloseFD::AutoCloseFD(AutoCloseFD&& that) : fd{that.fd} { that.fd = -1; } + +AutoCloseFD& AutoCloseFD::operator=(AutoCloseFD&& that) { + close(); + fd = that.fd; + that.fd = -1; + return *this; +} + +AutoCloseFD::~AutoCloseFD() { + try { + close(); + } catch (...) { + ignoreException(); + } +} + +int AutoCloseFD::get() const { return fd; } + +void AutoCloseFD::close() { + if (fd != -1) { + if (::close(fd) == -1) { /* This should never happen. */ + throw SysError(format("closing file descriptor %1%") % fd); + } + } +} + +AutoCloseFD::operator bool() const { return fd != -1; } + +int AutoCloseFD::release() { + int oldFD = fd; + fd = -1; + return oldFD; +} + +void Pipe::create() { + int fds[2]; +#if HAVE_PIPE2 + if (pipe2(fds, O_CLOEXEC) != 0) { + throw SysError("creating pipe"); + } +#else + if (pipe(fds) != 0) { + throw SysError("creating pipe"); + } + closeOnExec(fds[0]); + closeOnExec(fds[1]); +#endif + readSide = fds[0]; + writeSide = fds[1]; +} + +////////////////////////////////////////////////////////////////////// + +Pid::Pid() = default; + +Pid::Pid(pid_t pid) : pid(pid) {} + +Pid::~Pid() { + if (pid != -1) { + kill(); + } +} + +void Pid::operator=(pid_t pid) { + if (this->pid != -1 && this->pid != pid) { + kill(); + } + this->pid = pid; + killSignal = SIGKILL; // reset signal to default +} + +Pid::operator pid_t() { return pid; } + +int Pid::kill() { + assert(pid != -1); + + DLOG(INFO) << "killing process " << pid; + + /* Send the requested signal to the child. If it has its own + process group, send the signal to every process in the child + process group (which hopefully includes *all* its children). */ + if (::kill(separatePG ? -pid : pid, killSignal) != 0) { + /* On BSDs, killing a process group will return EPERM if all + processes in the group are zombies (or something like + that). So try to detect and ignore that situation. */ +#if __FreeBSD__ || __APPLE__ + if (errno != EPERM || ::kill(pid, 0) != 0) +#endif + LOG(ERROR) << SysError("killing process %d", pid).msg(); + } + + return wait(); +} + +int Pid::wait() { + assert(pid != -1); + while (true) { + int status; + int res = waitpid(pid, &status, 0); + if (res == pid) { + pid = -1; + return status; + } + if (errno != EINTR) { + throw SysError("cannot get child exit status"); + } + checkInterrupt(); + } +} + +void Pid::setSeparatePG(bool separatePG) { this->separatePG = separatePG; } + +void Pid::setKillSignal(int signal) { this->killSignal = signal; } + +pid_t Pid::release() { + pid_t p = pid; + pid = -1; + return p; +} + +void killUser(uid_t uid) { + DLOG(INFO) << "killing all processes running under UID " << uid; + + assert(uid != 0); /* just to be safe... */ + + /* The system call kill(-1, sig) sends the signal `sig' to all + users to which the current process can send signals. So we + fork a process, switch to uid, and send a mass kill. */ + + ProcessOptions options; + options.allowVfork = false; + + Pid pid = startProcess( + [&]() { + if (setuid(uid) == -1) { + throw SysError("setting uid"); + } + + while (true) { +#ifdef __APPLE__ + /* OSX's kill syscall takes a third parameter that, among + other things, determines if kill(-1, signo) affects the + calling process. In the OSX libc, it's set to true, + which means "follow POSIX", which we don't want here + */ + if (syscall(SYS_kill, -1, SIGKILL, false) == 0) { + break; + } +#else + if (kill(-1, SIGKILL) == 0) { + break; + } +#endif + if (errno == ESRCH) { + break; + } /* no more processes */ + if (errno != EINTR) { + throw SysError(format("cannot kill processes for uid '%1%'") % uid); + } + } + + _exit(0); + }, + options); + + int status = pid.wait(); + if (status != 0) { + throw Error(format("cannot kill processes for uid '%1%': %2%") % uid % + statusToString(status)); + } + + /* !!! We should really do some check to make sure that there are + no processes left running under `uid', but there is no portable + way to do so (I think). The most reliable way may be `ps -eo + uid | grep -q $uid'. */ +} + +////////////////////////////////////////////////////////////////////// + +/* Wrapper around vfork to prevent the child process from clobbering + the caller's stack frame in the parent. */ +static pid_t doFork(bool allowVfork, const std::function<void()>& fun) + __attribute__((noinline)); +static pid_t doFork(bool allowVfork, const std::function<void()>& fun) { +#ifdef __linux__ + pid_t pid = allowVfork ? vfork() : fork(); +#else + pid_t pid = fork(); +#endif + if (pid != 0) { + return pid; + } + fun(); + abort(); +} + +pid_t startProcess(std::function<void()> fun, const ProcessOptions& options) { + auto wrapper = [&]() { + try { +#if __linux__ + if (options.dieWithParent && prctl(PR_SET_PDEATHSIG, SIGKILL) == -1) { + throw SysError("setting death signal"); + } +#endif + restoreAffinity(); + fun(); + } catch (std::exception& e) { + try { + LOG(ERROR) << options.errorPrefix << e.what(); + } catch (...) { + } + } catch (...) { + } + if (options.runExitHandlers) { + exit(1); + } else { + _exit(1); + } + }; + + pid_t pid = doFork(options.allowVfork, wrapper); + if (pid == -1) { + throw SysError("unable to fork"); + } + + return pid; +} + +std::vector<char*> stringsToCharPtrs(const Strings& ss) { + std::vector<char*> res; + for (auto& s : ss) { + res.push_back((char*)s.c_str()); + } + res.push_back(nullptr); + return res; +} + +string runProgram(const Path& program, bool searchPath, const Strings& args, + const std::optional<std::string>& input) { + RunOptions opts(program, args); + opts.searchPath = searchPath; + opts.input = input; + + auto res = runProgram(opts); + + if (!statusOk(res.first)) { + throw ExecError(res.first, fmt("program '%1%' %2%", program, + statusToString(res.first))); + } + + return res.second; +} + +std::pair<int, std::string> runProgram(const RunOptions& options_) { + RunOptions options(options_); + StringSink sink; + options.standardOut = &sink; + + int status = 0; + + try { + runProgram2(options); + } catch (ExecError& e) { + status = e.status; + } + + return {status, std::move(*sink.s)}; +} + +void runProgram2(const RunOptions& options) { + checkInterrupt(); + + assert(!(options.standardIn && options.input)); + + std::unique_ptr<Source> source_; + Source* source = options.standardIn; + + if (options.input) { + source_ = std::make_unique<StringSource>(*options.input); + source = source_.get(); + } + + /* Create a pipe. */ + Pipe out; + Pipe in; + if (options.standardOut != nullptr) { + out.create(); + } + if (source != nullptr) { + in.create(); + } + + ProcessOptions processOptions; + // vfork implies that the environment of the main process and the fork will + // be shared (technically this is undefined, but in practice that's the + // case), so we can't use it if we alter the environment + if (options.environment) { + processOptions.allowVfork = false; + } + + /* Fork. */ + Pid pid = startProcess( + [&]() { + if (options.environment) { + replaceEnv(*options.environment); + } + if ((options.standardOut != nullptr) && + dup2(out.writeSide.get(), STDOUT_FILENO) == -1) { + throw SysError("dupping stdout"); + } + if (options.mergeStderrToStdout) { + if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) { + throw SysError("cannot dup stdout into stderr"); + } + } + if ((source != nullptr) && + dup2(in.readSide.get(), STDIN_FILENO) == -1) { + throw SysError("dupping stdin"); + } + + if (options.chdir && chdir((*options.chdir).c_str()) == -1) { + throw SysError("chdir failed"); + } + if (options.gid && setgid(*options.gid) == -1) { + throw SysError("setgid failed"); + } + /* Drop all other groups if we're setgid. */ + if (options.gid && setgroups(0, nullptr) == -1) { + throw SysError("setgroups failed"); + } + if (options.uid && setuid(*options.uid) == -1) { + throw SysError("setuid failed"); + } + + Strings args_(options.args); + args_.push_front(options.program); + + restoreSignals(); + + if (options.searchPath) { + execvp(options.program.c_str(), stringsToCharPtrs(args_).data()); + } else { + execv(options.program.c_str(), stringsToCharPtrs(args_).data()); + } + + throw SysError("executing '%1%'", options.program); + }, + processOptions); + + out.writeSide = -1; + + std::thread writerThread; + + std::promise<void> promise; + + Finally doJoin([&]() { + if (writerThread.joinable()) { + writerThread.join(); + } + }); + + if (source != nullptr) { + in.readSide = -1; + writerThread = std::thread([&]() { + try { + std::vector<unsigned char> buf(8 * 1024); + while (true) { + size_t n; + try { + n = source->read(buf.data(), buf.size()); + } catch (EndOfFile&) { + break; + } + writeFull(in.writeSide.get(), buf.data(), n); + } + promise.set_value(); + } catch (...) { + promise.set_exception(std::current_exception()); + } + in.writeSide = -1; + }); + } + + if (options.standardOut != nullptr) { + drainFD(out.readSide.get(), *options.standardOut); + } + + /* Wait for the child to finish. */ + int status = pid.wait(); + + /* Wait for the writer thread to finish. */ + if (source != nullptr) { + promise.get_future().get(); + } + + if (status != 0) { + throw ExecError(status, fmt("program '%1%' %2%", options.program, + statusToString(status))); + } +} + +void closeMostFDs(const set<int>& exceptions) { +#if __linux__ + try { + for (auto& s : readDirectory("/proc/self/fd")) { + auto fd = std::stoi(s.name); + if (exceptions.count(fd) == 0u) { + DLOG(INFO) << "closing leaked FD " << fd; + close(fd); + } + } + return; + } catch (SysError&) { + } +#endif + + int maxFD = 0; + maxFD = sysconf(_SC_OPEN_MAX); + for (int fd = 0; fd < maxFD; ++fd) { + if (exceptions.count(fd) == 0u) { + close(fd); + } /* ignore result */ + } +} + +void closeOnExec(int fd) { + int prev; + if ((prev = fcntl(fd, F_GETFD, 0)) == -1 || + fcntl(fd, F_SETFD, prev | FD_CLOEXEC) == -1) { + throw SysError("setting close-on-exec flag"); + } +} + +////////////////////////////////////////////////////////////////////// + +bool _isInterrupted = false; + +static thread_local bool interruptThrown = false; +thread_local std::function<bool()> interruptCheck; + +void setInterruptThrown() { interruptThrown = true; } + +void _interrupted() { + /* Block user interrupts while an exception is being handled. + Throwing an exception while another exception is being handled + kills the program! */ + if (!interruptThrown && (std::uncaught_exceptions() == 0)) { + interruptThrown = true; + throw Interrupted("interrupted by the user"); + } +} + +////////////////////////////////////////////////////////////////////// + +template <class C> +C tokenizeString(const string& s, const string& separators) { + C result; + string::size_type pos = s.find_first_not_of(separators, 0); + while (pos != string::npos) { + string::size_type end = s.find_first_of(separators, pos + 1); + if (end == string::npos) { + end = s.size(); + } + string token(s, pos, end - pos); + result.insert(result.end(), token); + pos = s.find_first_not_of(separators, end); + } + return result; +} + +template Strings tokenizeString(const string& s, const string& separators); +template StringSet tokenizeString(const string& s, const string& separators); +template vector<string> tokenizeString(const string& s, + const string& separators); + +string concatStringsSep(const string& sep, const Strings& ss) { + string s; + for (auto& i : ss) { + if (!s.empty()) { + s += sep; + } + s += i; + } + return s; +} + +string concatStringsSep(const string& sep, const StringSet& ss) { + string s; + for (auto& i : ss) { + if (!s.empty()) { + s += sep; + } + s += i; + } + return s; +} + +string chomp(const string& s) { + size_t i = s.find_last_not_of(" \n\r\t"); + return i == string::npos ? "" : string(s, 0, i + 1); +} + +string trim(const string& s, const string& whitespace) { + auto i = s.find_first_not_of(whitespace); + if (i == string::npos) { + return ""; + } + auto j = s.find_last_not_of(whitespace); + return string(s, i, j == string::npos ? j : j - i + 1); +} + +string replaceStrings(const std::string& s, const std::string& from, + const std::string& to) { + if (from.empty()) { + return s; + } + string res = s; + size_t pos = 0; + while ((pos = res.find(from, pos)) != std::string::npos) { + res.replace(pos, from.size(), to); + pos += to.size(); + } + return res; +} + +string statusToString(int status) { + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + if (WIFEXITED(status)) { + return (format("failed with exit code %1%") % WEXITSTATUS(status)).str(); + } + if (WIFSIGNALED(status)) { + int sig = WTERMSIG(status); +#if HAVE_STRSIGNAL + const char* description = strsignal(sig); + return (format("failed due to signal %1% (%2%)") % sig % description) + .str(); +#else + return (format("failed due to signal %1%") % sig).str(); +#endif + } else { + return "died abnormally"; + } + } else { + return "succeeded"; + } +} + +bool statusOk(int status) { + return WIFEXITED(status) && WEXITSTATUS(status) == 0; +} + +bool hasPrefix(const string& s, const string& prefix) { + return s.compare(0, prefix.size(), prefix) == 0; +} + +bool hasSuffix(const string& s, const string& suffix) { + return s.size() >= suffix.size() && + string(s, s.size() - suffix.size()) == suffix; +} + +std::string toLower(const std::string& s) { + std::string r(s); + for (auto& c : r) { + c = std::tolower(c); + } + return r; +} + +std::string shellEscape(const std::string& s) { + std::string r = "'"; + for (auto& i : s) { + if (i == '\'') { + r += "'\\''"; + } else { + r += i; + } + } + r += '\''; + return r; +} + +void ignoreException() { + try { + throw; + } catch (std::exception& e) { + LOG(ERROR) << "error (ignored): " << e.what(); + } +} + +std::string filterANSIEscapes(const std::string& s, bool filterAll, + unsigned int width) { + std::string t; + std::string e; + size_t w = 0; + auto i = s.begin(); + + while (w < (size_t)width && i != s.end()) { + if (*i == '\e') { + std::string e; + e += *i++; + char last = 0; + + if (i != s.end() && *i == '[') { + e += *i++; + // eat parameter bytes + while (i != s.end() && *i >= 0x30 && *i <= 0x3f) { + e += *i++; + } + // eat intermediate bytes + while (i != s.end() && *i >= 0x20 && *i <= 0x2f) { + e += *i++; + } + // eat final byte + if (i != s.end() && *i >= 0x40 && *i <= 0x7e) { + e += last = *i++; + } + } else { + if (i != s.end() && *i >= 0x40 && *i <= 0x5f) { + e += *i++; + } + } + + if (!filterAll && last == 'm') { + t += e; + } + } + + else if (*i == '\t') { + i++; + t += ' '; + w++; + while (w < (size_t)width && ((w % 8) != 0u)) { + t += ' '; + w++; + } + } + + else if (*i == '\r') { + // do nothing for now + i++; + + } else { + t += *i++; + w++; + } + } + + return t; +} + +static char base64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +string base64Encode(const string& s) { + string res; + int data = 0; + int nbits = 0; + + for (char c : s) { + data = data << 8 | (unsigned char)c; + nbits += 8; + while (nbits >= 6) { + nbits -= 6; + res.push_back(base64Chars[data >> nbits & 0x3f]); + } + } + + if (nbits != 0) { + res.push_back(base64Chars[data << (6 - nbits) & 0x3f]); + } + while ((res.size() % 4) != 0u) { + res.push_back('='); + } + + return res; +} + +string base64Decode(const string& s) { + bool init = false; + char decode[256]; + if (!init) { + // FIXME: not thread-safe. + memset(decode, -1, sizeof(decode)); + for (int i = 0; i < 64; i++) { + decode[(int)base64Chars[i]] = i; + } + init = true; + } + + string res; + unsigned int d = 0; + unsigned int bits = 0; + + for (char c : s) { + if (c == '=') { + break; + } + if (c == '\n') { + continue; + } + + char digit = decode[(unsigned char)c]; + if (digit == -1) { + throw Error("invalid character in Base64 string"); + } + + bits += 6; + d = d << 6 | digit; + if (bits >= 8) { + res.push_back(d >> (bits - 8) & 0xff); + bits -= 8; + } + } + + return res; +} + +void callFailure(const std::function<void(std::exception_ptr exc)>& failure, + const std::exception_ptr& exc) { + try { + failure(exc); + } catch (std::exception& e) { + LOG(ERROR) << "uncaught exception: " << e.what(); + abort(); + } +} + +static Sync<std::pair<unsigned short, unsigned short>> windowSize{{0, 0}}; + +static void updateWindowSize() { + struct winsize ws; + if (ioctl(2, TIOCGWINSZ, &ws) == 0) { + auto windowSize_(windowSize.lock()); + windowSize_->first = ws.ws_row; + windowSize_->second = ws.ws_col; + } +} + +std::pair<unsigned short, unsigned short> getWindowSize() { + return *windowSize.lock(); +} + +static Sync<std::list<std::function<void()>>> _interruptCallbacks; + +static void signalHandlerThread(sigset_t set) { + while (true) { + int signal = 0; + sigwait(&set, &signal); + + if (signal == SIGINT || signal == SIGTERM || signal == SIGHUP) { + triggerInterrupt(); + + } else if (signal == SIGWINCH) { + updateWindowSize(); + } + } +} + +void triggerInterrupt() { + _isInterrupted = true; + + { + auto interruptCallbacks(_interruptCallbacks.lock()); + for (auto& callback : *interruptCallbacks) { + try { + callback(); + } catch (...) { + ignoreException(); + } + } + } +} + +static sigset_t savedSignalMask; + +void startSignalHandlerThread() { + updateWindowSize(); + + if (sigprocmask(SIG_BLOCK, nullptr, &savedSignalMask) != 0) { + throw SysError("quering signal mask"); + } + + sigset_t set; + sigemptyset(&set); + sigaddset(&set, SIGINT); + sigaddset(&set, SIGTERM); + sigaddset(&set, SIGHUP); + sigaddset(&set, SIGPIPE); + sigaddset(&set, SIGWINCH); + if (pthread_sigmask(SIG_BLOCK, &set, nullptr) != 0) { + throw SysError("blocking signals"); + } + + std::thread(signalHandlerThread, set).detach(); +} + +void restoreSignals() { + if (sigprocmask(SIG_SETMASK, &savedSignalMask, nullptr) != 0) { + throw SysError("restoring signals"); + } +} + +/* RAII helper to automatically deregister a callback. */ +struct InterruptCallbackImpl : InterruptCallback { + std::list<std::function<void()>>::iterator it; + ~InterruptCallbackImpl() override { _interruptCallbacks.lock()->erase(it); } +}; + +std::unique_ptr<InterruptCallback> createInterruptCallback( + const std::function<void()>& callback) { + auto interruptCallbacks(_interruptCallbacks.lock()); + interruptCallbacks->push_back(callback); + + auto res = std::make_unique<InterruptCallbackImpl>(); + res->it = interruptCallbacks->end(); + res->it--; + + return std::unique_ptr<InterruptCallback>(res.release()); +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/util.hh b/third_party/nix/src/libutil/util.hh new file mode 100644 index 000000000000..5ce12f2ede43 --- /dev/null +++ b/third_party/nix/src/libutil/util.hh @@ -0,0 +1,493 @@ +#pragma once + +#include <cstdio> +#include <functional> +#include <future> +#include <limits> +#include <map> +#include <optional> +#include <sstream> + +#include <dirent.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "types.hh" + +#ifndef HAVE_STRUCT_DIRENT_D_TYPE +#define DT_UNKNOWN 0 +#define DT_REG 1 +#define DT_LNK 2 +#define DT_DIR 3 +#endif + +namespace nix { + +struct Sink; +struct Source; + +/* The system for which Nix is compiled. */ +extern const std::string nativeSystem; + +/* Return an environment variable. */ +string getEnv(const string& key, const string& def = ""); + +/* Get the entire environment. */ +std::map<std::string, std::string> getEnv(); + +/* Clear the environment. */ +void clearEnv(); + +/* Return an absolutized path, resolving paths relative to the + specified directory, or the current directory otherwise. The path + is also canonicalised. */ +Path absPath(Path path, Path dir = ""); + +/* Canonicalise a path by removing all `.' or `..' components and + double or trailing slashes. Optionally resolves all symlink + components such that each component of the resulting path is *not* + a symbolic link. */ +Path canonPath(const Path& path, bool resolveSymlinks = false); + +/* Return the directory part of the given canonical path, i.e., + everything before the final `/'. If the path is the root or an + immediate child thereof (e.g., `/foo'), this means an empty string + is returned. */ +Path dirOf(const Path& path); + +/* Return the base name of the given canonical path, i.e., everything + following the final `/'. */ +string baseNameOf(const Path& path); + +/* Check whether 'path' is a descendant of 'dir'. */ +bool isInDir(const Path& path, const Path& dir); + +/* Check whether 'path' is equal to 'dir' or a descendant of 'dir'. */ +bool isDirOrInDir(const Path& path, const Path& dir); + +/* Get status of `path'. */ +struct stat lstat(const Path& path); + +/* Return true iff the given path exists. */ +bool pathExists(const Path& path); + +/* Read the contents (target) of a symbolic link. The result is not + in any way canonicalised. */ +Path readLink(const Path& path); + +bool isLink(const Path& path); + +/* Read the contents of a directory. The entries `.' and `..' are + removed. */ +struct DirEntry { + string name; + ino_t ino; + unsigned char type; // one of DT_* + DirEntry(const string& name, ino_t ino, unsigned char type) + : name(name), ino(ino), type(type) {} +}; + +typedef vector<DirEntry> DirEntries; + +DirEntries readDirectory(const Path& path); + +unsigned char getFileType(const Path& path); + +/* Read the contents of a file into a string. */ +string readFile(int fd); +string readFile(const Path& path, bool drain = false); +void readFile(const Path& path, Sink& sink); + +/* Write a string to a file. */ +void writeFile(const Path& path, const string& s, mode_t mode = 0666); + +void writeFile(const Path& path, Source& source, mode_t mode = 0666); + +/* Read a line from a file descriptor. */ +string readLine(int fd); + +/* Write a line to a file descriptor. */ +void writeLine(int fd, string s); + +/* Delete a path; i.e., in the case of a directory, it is deleted + recursively. It's not an error if the path does not exist. The + second variant returns the number of bytes and blocks freed. */ +void deletePath(const Path& path); + +void deletePath(const Path& path, unsigned long long& bytesFreed); + +/* Create a temporary directory. */ +Path createTempDir(const Path& tmpRoot = "", const Path& prefix = "nix", + bool includePid = true, bool useGlobalCounter = true, + mode_t mode = 0755); + +std::string getUserName(); + +/* Return $HOME or the user's home directory from /etc/passwd. */ +Path getHome(); + +/* Return $XDG_CACHE_HOME or $HOME/.cache. */ +Path getCacheDir(); + +/* Return $XDG_CONFIG_HOME or $HOME/.config. */ +Path getConfigDir(); + +/* Return the directories to search for user configuration files */ +std::vector<Path> getConfigDirs(); + +/* Return $XDG_DATA_HOME or $HOME/.local/share. */ +Path getDataDir(); + +/* Create a directory and all its parents, if necessary. Returns the + list of created directories, in order of creation. */ +Paths createDirs(const Path& path); + +/* Create a symlink. */ +void createSymlink(const Path& target, const Path& link); + +/* Atomically create or replace a symlink. */ +void replaceSymlink(const Path& target, const Path& link); + +/* Wrappers arount read()/write() that read/write exactly the + requested number of bytes. */ +void readFull(int fd, unsigned char* buf, size_t count); +void writeFull(int fd, const unsigned char* buf, size_t count, + bool allowInterrupts = true); +void writeFull(int fd, const string& s, bool allowInterrupts = true); + +MakeError(EndOfFile, Error) + + /* Read a file descriptor until EOF occurs. */ + string drainFD(int fd, bool block = true); + +void drainFD(int fd, Sink& sink, bool block = true); + +/* Automatic cleanup of resources. */ + +class AutoDelete { + Path path; + bool del; + bool recursive; + + public: + AutoDelete(); + AutoDelete(Path p, bool recursive = true); + ~AutoDelete(); + void cancel(); + void reset(const Path& p, bool recursive = true); + operator Path() const { return path; } +}; + +class AutoCloseFD { + int fd; + void close(); + + public: + AutoCloseFD(); + AutoCloseFD(int fd); + AutoCloseFD(const AutoCloseFD& fd) = delete; + AutoCloseFD(AutoCloseFD&& that); + ~AutoCloseFD(); + AutoCloseFD& operator=(const AutoCloseFD& fd) = delete; + AutoCloseFD& operator=(AutoCloseFD&& that); + int get() const; + explicit operator bool() const; + int release(); +}; + +class Pipe { + public: + AutoCloseFD readSide, writeSide; + void create(); +}; + +struct DIRDeleter { + void operator()(DIR* dir) const { closedir(dir); } +}; + +typedef std::unique_ptr<DIR, DIRDeleter> AutoCloseDir; + +class Pid { + pid_t pid = -1; + bool separatePG = false; + int killSignal = SIGKILL; + + public: + Pid(); + Pid(pid_t pid); + ~Pid(); + void operator=(pid_t pid); + operator pid_t(); + int kill(); + int wait(); + + void setSeparatePG(bool separatePG); + void setKillSignal(int signal); + pid_t release(); +}; + +/* Kill all processes running under the specified uid by sending them + a SIGKILL. */ +void killUser(uid_t uid); + +/* Fork a process that runs the given function, and return the child + pid to the caller. */ +struct ProcessOptions { + string errorPrefix = "error: "; + bool dieWithParent = true; + bool runExitHandlers = false; + bool allowVfork = true; +}; + +pid_t startProcess(std::function<void()> fun, + const ProcessOptions& options = ProcessOptions()); + +/* Run a program and return its stdout in a string (i.e., like the + shell backtick operator). */ +string runProgram(const Path& program, bool searchPath = false, + const Strings& args = Strings(), + const std::optional<std::string>& input = {}); + +struct RunOptions { + std::optional<uid_t> uid; + std::optional<uid_t> gid; + std::optional<Path> chdir; + std::optional<std::map<std::string, std::string>> environment; + Path program; + bool searchPath = true; + Strings args; + std::optional<std::string> input; + Source* standardIn = nullptr; + Sink* standardOut = nullptr; + bool mergeStderrToStdout = false; + bool _killStderr = false; + + RunOptions(const Path& program, const Strings& args) + : program(program), args(args){}; + + RunOptions& killStderr(bool v) { + _killStderr = true; + return *this; + } +}; + +std::pair<int, std::string> runProgram(const RunOptions& options); + +void runProgram2(const RunOptions& options); + +class ExecError : public Error { + public: + int status; + + template <typename... Args> + ExecError(int status, Args... args) : Error(args...), status(status) {} +}; + +/* Convert a list of strings to a null-terminated vector of char + *'s. The result must not be accessed beyond the lifetime of the + list of strings. */ +std::vector<char*> stringsToCharPtrs(const Strings& ss); + +/* Close all file descriptors except those listed in the given set. + Good practice in child processes. */ +void closeMostFDs(const set<int>& exceptions); + +/* Set the close-on-exec flag for the given file descriptor. */ +void closeOnExec(int fd); + +/* User interruption. */ + +extern bool _isInterrupted; + +extern thread_local std::function<bool()> interruptCheck; + +void setInterruptThrown(); + +void _interrupted(); + +void inline checkInterrupt() { + if (_isInterrupted || (interruptCheck && interruptCheck())) _interrupted(); +} + +MakeError(Interrupted, BaseError) + + MakeError(FormatError, Error) + + /* String tokenizer. */ + template <class C> + C tokenizeString(const string& s, const string& separators = " \t\n\r"); + +/* Concatenate the given strings with a separator between the + elements. */ +string concatStringsSep(const string& sep, const Strings& ss); +string concatStringsSep(const string& sep, const StringSet& ss); + +/* Remove trailing whitespace from a string. */ +string chomp(const string& s); + +/* Remove whitespace from the start and end of a string. */ +string trim(const string& s, const string& whitespace = " \n\r\t"); + +/* Replace all occurrences of a string inside another string. */ +string replaceStrings(const std::string& s, const std::string& from, + const std::string& to); + +/* Convert the exit status of a child as returned by wait() into an + error string. */ +string statusToString(int status); + +bool statusOk(int status); + +/* Parse a string into an integer. */ +template <class N> +bool string2Int(const string& s, N& n) { + if (string(s, 0, 1) == "-" && !std::numeric_limits<N>::is_signed) + return false; + std::istringstream str(s); + str >> n; + return str && str.get() == EOF; +} + +/* Parse a string into a float. */ +template <class N> +bool string2Float(const string& s, N& n) { + std::istringstream str(s); + str >> n; + return str && str.get() == EOF; +} + +/* Return true iff `s' starts with `prefix'. */ +bool hasPrefix(const string& s, const string& prefix); + +/* Return true iff `s' ends in `suffix'. */ +bool hasSuffix(const string& s, const string& suffix); + +/* Convert a string to lower case. */ +std::string toLower(const std::string& s); + +/* Escape a string as a shell word. */ +std::string shellEscape(const std::string& s); + +/* Exception handling in destructors: print an error message, then + ignore the exception. */ +void ignoreException(); + +/* Some ANSI escape sequences. */ +#define ANSI_NORMAL "\e[0m" +#define ANSI_BOLD "\e[1m" +#define ANSI_FAINT "\e[2m" +#define ANSI_RED "\e[31;1m" +#define ANSI_GREEN "\e[32;1m" +#define ANSI_BLUE "\e[34;1m" + +/* Truncate a string to 'width' printable characters. If 'filterAll' + is true, all ANSI escape sequences are filtered out. Otherwise, + some escape sequences (such as colour setting) are copied but not + included in the character count. Also, tabs are expanded to + spaces. */ +std::string filterANSIEscapes( + const std::string& s, bool filterAll = false, + unsigned int width = std::numeric_limits<unsigned int>::max()); + +/* Base64 encoding/decoding. */ +string base64Encode(const string& s); +string base64Decode(const string& s); + +/* Get a value for the specified key from an associate container, or a + default value if the key doesn't exist. */ +template <class T> +string get(const T& map, const string& key, const string& def = "") { + auto i = map.find(key); + return i == map.end() ? def : i->second; +} + +/* A callback is a wrapper around a lambda that accepts a valid of + type T or an exception. (We abuse std::future<T> to pass the value or + exception.) */ +template <typename T> +class Callback { + std::function<void(std::future<T>)> fun; + std::atomic_flag done = ATOMIC_FLAG_INIT; + + public: + Callback(std::function<void(std::future<T>)> fun) : fun(fun) {} + + Callback(Callback&& callback) : fun(std::move(callback.fun)) { + auto prev = callback.done.test_and_set(); + if (prev) { + done.test_and_set(); + } + } + + void operator()(T&& t) noexcept { + auto prev = done.test_and_set(); + assert(!prev); + std::promise<T> promise; + promise.set_value(std::move(t)); + fun(promise.get_future()); + } + + void rethrow( + const std::exception_ptr& exc = std::current_exception()) noexcept { + auto prev = done.test_and_set(); + assert(!prev); + std::promise<T> promise; + promise.set_exception(exc); + fun(promise.get_future()); + } +}; + +/* Start a thread that handles various signals. Also block those signals + on the current thread (and thus any threads created by it). */ +void startSignalHandlerThread(); + +/* Restore default signal handling. */ +void restoreSignals(); + +struct InterruptCallback { + virtual ~InterruptCallback(){}; +}; + +/* Register a function that gets called on SIGINT (in a non-signal + context). */ +std::unique_ptr<InterruptCallback> createInterruptCallback( + const std::function<void()>& callback); + +void triggerInterrupt(); + +/* A RAII class that causes the current thread to receive SIGUSR1 when + the signal handler thread receives SIGINT. That is, this allows + SIGINT to be multiplexed to multiple threads. */ +struct ReceiveInterrupts { + pthread_t target; + std::unique_ptr<InterruptCallback> callback; + + ReceiveInterrupts() + : target(pthread_self()), callback(createInterruptCallback([&]() { + pthread_kill(target, SIGUSR1); + })) {} +}; + +/* A RAII helper that increments a counter on construction and + decrements it on destruction. */ +template <typename T> +struct MaintainCount { + T& counter; + long delta; + MaintainCount(T& counter, long delta = 1) : counter(counter), delta(delta) { + counter += delta; + } + ~MaintainCount() { counter -= delta; } +}; + +/* Return the number of rows and columns of the terminal. */ +std::pair<unsigned short, unsigned short> getWindowSize(); + +/* Used in various places. */ +typedef std::function<bool(const Path& path)> PathFilter; + +extern PathFilter defaultPathFilter; + +} // namespace nix diff --git a/third_party/nix/src/libutil/xml-writer.cc b/third_party/nix/src/libutil/xml-writer.cc new file mode 100644 index 000000000000..11cd632399ad --- /dev/null +++ b/third_party/nix/src/libutil/xml-writer.cc @@ -0,0 +1,92 @@ +#include "xml-writer.hh" + +#include <cassert> + +namespace nix { + +XMLWriter::XMLWriter(bool indent, std::ostream& output) + : output(output), indent(indent) { + output << "<?xml version='1.0' encoding='utf-8'?>" << std::endl; + closed = false; +} + +XMLWriter::~XMLWriter() { close(); } + +void XMLWriter::close() { + if (closed) { + return; + } + while (!pendingElems.empty()) { + closeElement(); + } + closed = true; +} + +void XMLWriter::indent_(size_t depth) { + if (!indent) { + return; + } + output << string(depth * 2, ' '); +} + +void XMLWriter::openElement(const string& name, const XMLAttrs& attrs) { + assert(!closed); + indent_(pendingElems.size()); + output << "<" << name; + writeAttrs(attrs); + output << ">"; + if (indent) { + output << std::endl; + } + pendingElems.push_back(name); +} + +void XMLWriter::closeElement() { + assert(!pendingElems.empty()); + indent_(pendingElems.size() - 1); + output << "</" << pendingElems.back() << ">"; + if (indent) { + output << std::endl; + } + pendingElems.pop_back(); + if (pendingElems.empty()) { + closed = true; + } +} + +void XMLWriter::writeEmptyElement(const string& name, const XMLAttrs& attrs) { + assert(!closed); + indent_(pendingElems.size()); + output << "<" << name; + writeAttrs(attrs); + output << " />"; + if (indent) { + output << std::endl; + } +} + +void XMLWriter::writeAttrs(const XMLAttrs& attrs) { + for (auto& i : attrs) { + output << " " << i.first << "=\""; + for (char c : i.second) { + if (c == '"') { + output << """; + } else if (c == '<') { + output << "<"; + } else if (c == '>') { + output << ">"; + } else if (c == '&') { + output << "&"; + /* Escape newlines to prevent attribute normalisation (see + XML spec, section 3.3.3. */ + } else if (c == '\n') { + output << "
"; + } else { + output << c; + } + } + output << "\""; + } +} + +} // namespace nix diff --git a/third_party/nix/src/libutil/xml-writer.hh b/third_party/nix/src/libutil/xml-writer.hh new file mode 100644 index 000000000000..3a2d9a66d8e1 --- /dev/null +++ b/third_party/nix/src/libutil/xml-writer.hh @@ -0,0 +1,56 @@ +#pragma once + +#include <iostream> +#include <list> +#include <map> +#include <string> + +namespace nix { + +using std::list; +using std::map; +using std::string; + +typedef map<string, string> XMLAttrs; + +class XMLWriter { + private: + std::ostream& output; + + bool indent; + bool closed; + + list<string> pendingElems; + + public: + XMLWriter(bool indent, std::ostream& output); + ~XMLWriter(); + + void close(); + + void openElement(const string& name, const XMLAttrs& attrs = XMLAttrs()); + void closeElement(); + + void writeEmptyElement(const string& name, + const XMLAttrs& attrs = XMLAttrs()); + + private: + void writeAttrs(const XMLAttrs& attrs); + + void indent_(size_t depth); +}; + +class XMLOpenElement { + private: + XMLWriter& writer; + + public: + XMLOpenElement(XMLWriter& writer, const string& name, + const XMLAttrs& attrs = XMLAttrs()) + : writer(writer) { + writer.openElement(name, attrs); + } + ~XMLOpenElement() { writer.closeElement(); } +}; + +} // namespace nix diff --git a/third_party/nix/src/meson.build b/third_party/nix/src/meson.build new file mode 100644 index 000000000000..59916a944aed --- /dev/null +++ b/third_party/nix/src/meson.build @@ -0,0 +1,47 @@ +# nix src build file +#============================================================================ + +src_dirs = [ + 'libutil', + 'libstore', + 'libmain', + 'libexpr', + 'nix', +] + +foreach dir : src_dirs + subdir(dir) +endforeach + + + +libstore_config = pkg.generate( + libstore_lib, + libraries : [ + libutil_lib], + version : meson.project_version(), + name : 'Nix', + subdirs : ['nix/'], + filebase : 'nix-store', + extra_cflags : '-std=c++17', + description : 'Nix Package Manager.') + +libmain_config = pkg.generate( + libmain_lib, + version : meson.project_version(), + name : 'Nix', + subdirs : ['nix/'], + filebase : 'nix-main', + extra_cflags : '-std=c++17', + description : 'Nix Package Manager.') + +libexpr_config = pkg.generate( + libexpr_lib, + libraries : [ + libstore_lib], + version : meson.project_version(), + name : 'Nix', + subdirs : ['nix/'], + filebase : 'nix-expr', + extra_cflags : '-std=c++17', + description : 'Nix Package Manager.') diff --git a/third_party/nix/src/nix-build/nix-build.cc b/third_party/nix/src/nix-build/nix-build.cc new file mode 100644 index 000000000000..66947051ba32 --- /dev/null +++ b/third_party/nix/src/nix-build/nix-build.cc @@ -0,0 +1,572 @@ +#include <cstring> +#include <fstream> +#include <iostream> +#include <regex> +#include <sstream> +#include <vector> + +#include <glog/logging.h> + +#include "affinity.hh" +#include "attr-path.hh" +#include "common-eval-args.hh" +#include "derivations.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "get-drvs.hh" +#include "globals.hh" +#include "legacy.hh" +#include "shared.hh" +#include "store-api.hh" +#include "util.hh" + +using namespace nix; +using namespace std::string_literals; + +/* Recreate the effect of the perl shellwords function, breaking up a + * string into arguments like a shell word, including escapes + */ +std::vector<string> shellwords(const string& s) { + std::regex whitespace("^(\\s+).*"); + auto begin = s.cbegin(); + std::vector<string> res; + std::string cur; + enum state { sBegin, sQuote }; + state st = sBegin; + auto it = begin; + for (; it != s.cend(); ++it) { + if (st == sBegin) { + std::smatch match; + if (regex_search(it, s.cend(), match, whitespace)) { + cur.append(begin, it); + res.push_back(cur); + cur.clear(); + it = match[1].second; + begin = it; + } + } + switch (*it) { + case '"': + cur.append(begin, it); + begin = it + 1; + st = st == sBegin ? sQuote : sBegin; + break; + case '\\': + /* perl shellwords mostly just treats the next char as part of the + * string with no special processing */ + cur.append(begin, it); + begin = ++it; + break; + } + } + cur.append(begin, it); + if (!cur.empty()) { + res.push_back(cur); + } + return res; +} + +static void _main(int argc, char** argv) { + auto dryRun = false; + auto runEnv = std::regex_search(argv[0], std::regex("nix-shell$")); + auto pure = false; + auto fromArgs = false; + auto packages = false; + // Same condition as bash uses for interactive shells + auto interactive = + (isatty(STDIN_FILENO) != 0) && (isatty(STDERR_FILENO) != 0); + Strings attrPaths; + Strings left; + RepairFlag repair = NoRepair; + Path gcRoot; + BuildMode buildMode = bmNormal; + bool readStdin = false; + + std::string envCommand; // interactive shell + Strings envExclude; + + auto myName = runEnv ? "nix-shell" : "nix-build"; + + auto inShebang = false; + std::string script; + std::vector<string> savedArgs; + + AutoDelete tmpDir(createTempDir("", myName)); + + std::string outLink = "./result"; + + // List of environment variables kept for --pure + std::set<string> keepVars{ + "HOME", "USER", "LOGNAME", "DISPLAY", "PATH", "TERM", + "IN_NIX_SHELL", "TZ", "PAGER", "NIX_BUILD_SHELL", "SHLVL"}; + + Strings args; + for (int i = 1; i < argc; ++i) { + args.push_back(argv[i]); + } + + // Heuristic to see if we're invoked as a shebang script, namely, + // if we have at least one argument, it's the name of an + // executable file, and it starts with "#!". + if (runEnv && argc > 1 && + !std::regex_search(argv[1], std::regex("nix-shell"))) { + script = argv[1]; + try { + auto lines = tokenizeString<Strings>(readFile(script), "\n"); + if (std::regex_search(lines.front(), std::regex("^#!"))) { + lines.pop_front(); + inShebang = true; + for (int i = 2; i < argc; ++i) { + savedArgs.emplace_back(argv[i]); + } + args.clear(); + for (auto line : lines) { + line = chomp(line); + std::smatch match; + if (std::regex_match(line, match, + std::regex("^#!\\s*nix-shell (.*)$"))) { + for (const auto& word : shellwords(match[1].str())) { + args.push_back(word); + } + } + } + } + } catch (SysError&) { + } + } + + struct MyArgs : LegacyArgs, MixEvalArgs { + using LegacyArgs::LegacyArgs; + }; + + MyArgs myArgs( + myName, [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--help") { + deletePath(tmpDir); + showManPage(myName); + } + + else if (*arg == "--version") { + printVersion(myName); + + } else if (*arg == "--add-drv-link" || *arg == "--indirect") { + ; // obsolete + + } else if (*arg == "--no-out-link" || *arg == "--no-link") { + outLink = (Path)tmpDir + "/result"; + + } else if (*arg == "--attr" || *arg == "-A") { + attrPaths.push_back(getArg(*arg, arg, end)); + + } else if (*arg == "--drv-link") { + getArg(*arg, arg, end); // obsolete + + } else if (*arg == "--out-link" || *arg == "-o") { + outLink = getArg(*arg, arg, end); + + } else if (*arg == "--add-root") { + gcRoot = getArg(*arg, arg, end); + + } else if (*arg == "--dry-run") { + dryRun = true; + + } else if (*arg == "--repair") { + repair = Repair; + buildMode = bmRepair; + } + + else if (*arg == "--run-env") { // obsolete + runEnv = true; + + } else if (*arg == "--command" || *arg == "--run") { + if (*arg == "--run") { + interactive = false; + } + envCommand = getArg(*arg, arg, end) + "\nexit"; + } + + else if (*arg == "--check") { + buildMode = bmCheck; + + } else if (*arg == "--exclude") { + envExclude.push_back(getArg(*arg, arg, end)); + + } else if (*arg == "--expr" || *arg == "-E") { + fromArgs = true; + + } else if (*arg == "--pure") { + pure = true; + } else if (*arg == "--impure") { + pure = false; + + } else if (*arg == "--packages" || *arg == "-p") { + packages = true; + + } else if (inShebang && *arg == "-i") { + auto interpreter = getArg(*arg, arg, end); + interactive = false; + auto execArgs = ""; + + // รberhack to support Perl. Perl examines the shebang and + // executes it unless it contains the string "perl" or "indir", + // or (undocumented) argv[0] does not contain "perl". Exploit + // the latter by doing "exec -a". + if (std::regex_search(interpreter, std::regex("perl"))) { + execArgs = "-a PERL"; + } + + std::ostringstream joined; + for (const auto& i : savedArgs) { + joined << shellEscape(i) << ' '; + } + + if (std::regex_search(interpreter, std::regex("ruby"))) { + // Hack for Ruby. Ruby also examines the shebang. It tries to + // read the shebang to understand which packages to read from. Since + // this is handled via nix-shell -p, we wrap our ruby script + // execution in ruby -e 'load' which ignores the shebangs. + envCommand = (format("exec %1% %2% -e 'load(\"%3%\")' -- %4%") % + execArgs % interpreter % script % joined.str()) + .str(); + } else { + envCommand = (format("exec %1% %2% %3% %4%") % execArgs % + interpreter % script % joined.str()) + .str(); + } + } + + else if (*arg == "--keep") { + keepVars.insert(getArg(*arg, arg, end)); + + } else if (*arg == "-") { + readStdin = true; + + } else if (*arg != "" && arg->at(0) == '-') { + return false; + + } else { + left.push_back(*arg); + } + + return true; + }); + + myArgs.parseCmdline(args); + + initPlugins(); + + if (packages && fromArgs) { + throw UsageError("'-p' and '-E' are mutually exclusive"); + } + + auto store = openStore(); + + auto state = std::make_unique<EvalState>(myArgs.searchPath, store); + state->repair = repair; + + Bindings& autoArgs = *myArgs.getAutoArgs(*state); + + if (packages) { + std::ostringstream joined; + joined << "with import <nixpkgs> { }; (pkgs.runCommandCC or " + "pkgs.runCommand) \"shell\" { buildInputs = [ "; + for (const auto& i : left) { + joined << '(' << i << ") "; + } + joined << "]; } \"\""; + fromArgs = true; + left = {joined.str()}; + } else if (!fromArgs) { + if (left.empty() && runEnv && pathExists("shell.nix")) { + left = {"shell.nix"}; + } + if (left.empty()) { + left = {"default.nix"}; + } + } + + if (runEnv) { + setenv("IN_NIX_SHELL", pure ? "pure" : "impure", 1); + } + + DrvInfos drvs; + + /* Parse the expressions. */ + std::vector<Expr*> exprs; + + if (readStdin) { + exprs = {state->parseStdin()}; + } else { + for (const auto& i : left) { + if (fromArgs) { + exprs.push_back(state->parseExprFromString(i, absPath("."))); + } else { + auto absolute = i; + try { + absolute = canonPath(absPath(i), true); + } catch (Error& e) { + }; + if (store->isStorePath(absolute) && + std::regex_match(absolute, std::regex(".*\\.drv(!.*)?"))) { + drvs.push_back(DrvInfo(*state, store, absolute)); + } else { + /* If we're in a #! script, interpret filenames + relative to the script. */ + exprs.push_back( + state->parseExprFromFile(resolveExprPath(state->checkSourcePath( + lookupFileArg(*state, inShebang && !packages + ? absPath(i, absPath(dirOf(script))) + : i))))); + } + } + } + } + + /* Evaluate them into derivations. */ + if (attrPaths.empty()) { + attrPaths = {""}; + } + + for (auto e : exprs) { + Value vRoot; + state->eval(e, vRoot); + + for (auto& i : attrPaths) { + Value& v(*findAlongAttrPath(*state, i, autoArgs, vRoot)); + state->forceValue(v); + getDerivations(*state, v, "", autoArgs, drvs, false); + } + } + + state->printStats(); + + auto buildPaths = [&](const PathSet& paths) { + /* Note: we do this even when !printMissing to efficiently + fetch binary cache data. */ + unsigned long long downloadSize; + unsigned long long narSize; + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + store->queryMissing(paths, willBuild, willSubstitute, unknown, downloadSize, + narSize); + + if (settings.printMissing) { + printMissing(ref<Store>(store), willBuild, willSubstitute, unknown, + downloadSize, narSize); + } + + if (!dryRun) { + store->buildPaths(paths, buildMode); + } + }; + + if (runEnv) { + if (drvs.size() != 1) { + throw UsageError("nix-shell requires a single derivation"); + } + + auto& drvInfo = drvs.front(); + auto drv = store->derivationFromPath(drvInfo.queryDrvPath()); + + PathSet pathsToBuild; + + /* Figure out what bash shell to use. If $NIX_BUILD_SHELL + is not set, then build bashInteractive from + <nixpkgs>. */ + auto shell = getEnv("NIX_BUILD_SHELL", ""); + + if (shell.empty()) { + try { + auto expr = state->parseExprFromString( + "(import <nixpkgs> {}).bashInteractive", absPath(".")); + + Value v; + state->eval(expr, v); + + auto drv = getDerivation(*state, v, false); + if (!drv) { + throw Error( + "the 'bashInteractive' attribute in <nixpkgs> did not evaluate " + "to a derivation"); + } + + pathsToBuild.insert(drv->queryDrvPath()); + + shell = drv->queryOutPath() + "/bin/bash"; + + } catch (Error& e) { + LOG(WARNING) << e.what() << "; will use bash from your environment"; + shell = "bash"; + } + } + + // Build or fetch all dependencies of the derivation. + for (const auto& input : drv.inputDrvs) { + if (std::all_of(envExclude.cbegin(), envExclude.cend(), + [&](const string& exclude) { + return !std::regex_search(input.first, + std::regex(exclude)); + })) { + pathsToBuild.insert(makeDrvPathWithOutputs(input.first, input.second)); + } + } + for (const auto& src : drv.inputSrcs) { + pathsToBuild.insert(src); + } + + buildPaths(pathsToBuild); + + if (dryRun) { + return; + } + + // Set the environment. + auto env = getEnv(); + + auto tmp = getEnv("TMPDIR", getEnv("XDG_RUNTIME_DIR", "/tmp")); + + if (pure) { + decltype(env) newEnv; + for (auto& i : env) { + if (keepVars.count(i.first) != 0u) { + newEnv.emplace(i); + } + } + env = newEnv; + // NixOS hack: prevent /etc/bashrc from sourcing /etc/profile. + env["__ETC_PROFILE_SOURCED"] = "1"; + } + + env["NIX_BUILD_TOP"] = env["TMPDIR"] = env["TEMPDIR"] = env["TMP"] = + env["TEMP"] = tmp; + env["NIX_STORE"] = store->storeDir; + env["NIX_BUILD_CORES"] = std::to_string(settings.buildCores); + + auto passAsFile = tokenizeString<StringSet>(get(drv.env, "passAsFile", "")); + + bool keepTmp = false; + int fileNr = 0; + + for (auto& var : drv.env) { + if (passAsFile.count(var.first) != 0u) { + keepTmp = true; + string fn = ".attr-" + std::to_string(fileNr++); + Path p = (Path)tmpDir + "/" + fn; + writeFile(p, var.second); + env[var.first + "Path"] = p; + } else { + env[var.first] = var.second; + } + } + + restoreAffinity(); + + /* Run a shell using the derivation's environment. For + convenience, source $stdenv/setup to setup additional + environment variables and shell functions. Also don't + lose the current $PATH directories. */ + auto rcfile = (Path)tmpDir + "/rc"; + writeFile( + rcfile, + fmt((keepTmp ? "" : "rm -rf '%1%'; "s) + + "[ -n \"$PS1\" ] && [ -e ~/.bashrc ] && source ~/.bashrc; " + "%2%" + "dontAddDisableDepTrack=1; " + "[ -e $stdenv/setup ] && source $stdenv/setup; " + "%3%" + "PATH=\"%4%:$PATH\"; " + "SHELL=%5%; " + "set +e; " + R"s([ -n "$PS1" ] && PS1='\n\[\033[1;32m\][nix-shell:\w]\$\[\033[0m\] '; )s" + "if [ \"$(type -t runHook)\" = function ]; then runHook " + "shellHook; fi; " + "unset NIX_ENFORCE_PURITY; " + "shopt -u nullglob; " + "unset TZ; %6%" + "%7%", + (Path)tmpDir, (pure ? "" : "p=$PATH; "), + (pure ? "" : "PATH=$PATH:$p; unset p; "), dirOf(shell), shell, + (getenv("TZ") != nullptr + ? (string("export TZ='") + getenv("TZ") + "'; ") + : ""), + envCommand)); + + Strings envStrs; + for (auto& i : env) { + envStrs.push_back(i.first + "=" + i.second); + } + + auto args = interactive ? Strings{"bash", "--rcfile", rcfile} + : Strings{"bash", rcfile}; + + auto envPtrs = stringsToCharPtrs(envStrs); + + environ = envPtrs.data(); + + auto argPtrs = stringsToCharPtrs(args); + + restoreSignals(); + + execvp(shell.c_str(), argPtrs.data()); + + throw SysError("executing shell '%s'", shell); + } + + PathSet pathsToBuild; + + std::map<Path, Path> drvPrefixes; + std::map<Path, Path> resultSymlinks; + std::vector<Path> outPaths; + + for (auto& drvInfo : drvs) { + auto drvPath = drvInfo.queryDrvPath(); + auto outPath = drvInfo.queryOutPath(); + + auto outputName = drvInfo.queryOutputName(); + if (outputName.empty()) { + throw Error("derivation '%s' lacks an 'outputName' attribute", drvPath); + } + + pathsToBuild.insert(drvPath + "!" + outputName); + + std::string drvPrefix; + auto i = drvPrefixes.find(drvPath); + if (i != drvPrefixes.end()) { + drvPrefix = i->second; + } else { + drvPrefix = outLink; + if (!drvPrefixes.empty() != 0u) { + drvPrefix += fmt("-%d", drvPrefixes.size() + 1); + } + drvPrefixes[drvPath] = drvPrefix; + } + + std::string symlink = drvPrefix; + if (outputName != "out") { + symlink += "-" + outputName; + } + + resultSymlinks[symlink] = outPath; + outPaths.push_back(outPath); + } + + buildPaths(pathsToBuild); + + if (dryRun) { + return; + } + + for (auto& symlink : resultSymlinks) { + if (auto store2 = store.dynamic_pointer_cast<LocalFSStore>()) { + store2->addPermRoot(symlink.second, absPath(symlink.first), true); + } + } + + for (auto& path : outPaths) { + std::cout << path << '\n'; + } +} + +static RegisterLegacyCommand s1("nix-build", _main); +static RegisterLegacyCommand s2("nix-shell", _main); diff --git a/third_party/nix/src/nix-channel/nix-channel.cc b/third_party/nix/src/nix-channel/nix-channel.cc new file mode 100644 index 000000000000..00305151747a --- /dev/null +++ b/third_party/nix/src/nix-channel/nix-channel.cc @@ -0,0 +1,272 @@ +#include <regex> + +#include <fcntl.h> +#include <pwd.h> + +#include "download.hh" +#include "globals.hh" +#include "legacy.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +typedef std::map<string, string> Channels; + +static Channels channels; +static Path channelsList; + +// Reads the list of channels. +static void readChannels() { + if (!pathExists(channelsList)) { + return; + } + auto channelsFile = readFile(channelsList); + + for (const auto& line : + tokenizeString<std::vector<string>>(channelsFile, "\n")) { + chomp(line); + if (std::regex_search(line, std::regex("^\\s*\\#"))) { + continue; + } + auto split = tokenizeString<std::vector<string>>(line, " "); + auto url = std::regex_replace(split[0], std::regex("/*$"), ""); + auto name = split.size() > 1 ? split[1] : baseNameOf(url); + channels[name] = url; + } +} + +// Writes the list of channels. +static void writeChannels() { + auto channelsFD = AutoCloseFD{open( + channelsList.c_str(), O_WRONLY | O_CLOEXEC | O_CREAT | O_TRUNC, 0644)}; + if (!channelsFD) { + throw SysError(format("opening '%1%' for writing") % channelsList); + } + for (const auto& channel : channels) { + writeFull(channelsFD.get(), channel.second + " " + channel.first + "\n"); + } +} + +// Adds a channel. +static void addChannel(const string& url, const string& name) { + if (!regex_search(url, std::regex("^(file|http|https)://"))) { + throw Error(format("invalid channel URL '%1%'") % url); + } + if (!regex_search(name, std::regex("^[a-zA-Z0-9_][a-zA-Z0-9_\\.-]*$"))) { + throw Error(format("invalid channel identifier '%1%'") % name); + } + readChannels(); + channels[name] = url; + writeChannels(); +} + +static Path profile; + +// Remove a channel. +static void removeChannel(const string& name) { + readChannels(); + channels.erase(name); + writeChannels(); + + runProgram(settings.nixBinDir + "/nix-env", true, + {"--profile", profile, "--uninstall", name}); +} + +static Path nixDefExpr; + +// Fetch Nix expressions and binary cache URLs from the subscribed channels. +static void update(const StringSet& channelNames) { + readChannels(); + + auto store = openStore(); + + // Download each channel. + Strings exprs; + for (const auto& channel : channels) { + auto name = channel.first; + auto url = channel.second; + if (!(channelNames.empty() || (channelNames.count(name) != 0u))) { + continue; + } + + // We want to download the url to a file to see if it's a tarball while also + // checking if we got redirected in the process, so that we can grab the + // various parts of a nix channel definition from a consistent location if + // the redirect changes mid-download. + CachedDownloadRequest request(url); + request.ttl = 0; + auto dl = getDownloader(); + auto result = dl->downloadCached(store, request); + auto filename = result.path; + url = chomp(result.effectiveUri); + + // If the URL contains a version number, append it to the name + // attribute (so that "nix-env -q" on the channels profile + // shows something useful). + auto cname = name; + std::smatch match; + auto urlBase = baseNameOf(url); + if (std::regex_search(urlBase, match, std::regex("(-\\d.*)$"))) { + cname = cname + (string)match[1]; + } + + std::string extraAttrs; + + bool unpacked = false; + if (std::regex_search(filename, std::regex("\\.tar\\.(gz|bz2|xz)$"))) { + runProgram(settings.nixBinDir + "/nix-build", false, + {"--no-out-link", "--expr", + "import <nix/unpack-channel.nix> " + "{ name = \"" + + cname + "\"; channelName = \"" + name + + "\"; src = builtins.storePath \"" + filename + "\"; }"}); + unpacked = true; + } + + if (!unpacked) { + // Download the channel tarball. + try { + filename = dl->downloadCached( + store, CachedDownloadRequest(url + "/nixexprs.tar.xz")) + .path; + } catch (DownloadError& e) { + filename = + dl->downloadCached(store, + CachedDownloadRequest(url + "/nixexprs.tar.bz2")) + .path; + } + chomp(filename); + } + + // Regardless of where it came from, add the expression representing this + // channel to accumulated expression + exprs.push_back("f: f { name = \"" + cname + "\"; channelName = \"" + name + + "\"; src = builtins.storePath \"" + filename + "\"; " + + extraAttrs + " }"); + } + + // Unpack the channel tarballs into the Nix store and install them + // into the channels profile. + std::cerr << "unpacking channels...\n"; + Strings envArgs{"--profile", profile, + "--file", "<nix/unpack-channel.nix>", + "--install", "--from-expression"}; + for (auto& expr : exprs) { + envArgs.push_back(std::move(expr)); + } + envArgs.push_back("--quiet"); + runProgram(settings.nixBinDir + "/nix-env", false, envArgs); + + // Make the channels appear in nix-env. + struct stat st; + if (lstat(nixDefExpr.c_str(), &st) == 0) { + if (S_ISLNK(st.st_mode)) { + // old-skool ~/.nix-defexpr + if (unlink(nixDefExpr.c_str()) == -1) { + throw SysError(format("unlinking %1%") % nixDefExpr); + } + } + } else if (errno != ENOENT) { + throw SysError(format("getting status of %1%") % nixDefExpr); + } + createDirs(nixDefExpr); + auto channelLink = nixDefExpr + "/channels"; + replaceSymlink(profile, channelLink); +} + +static int _main(int argc, char** argv) { + { + // Figure out the name of the `.nix-channels' file to use + auto home = getHome(); + channelsList = home + "/.nix-channels"; + nixDefExpr = home + "/.nix-defexpr"; + + // Figure out the name of the channels profile. + profile = fmt("%s/profiles/per-user/%s/channels", settings.nixStateDir, + getUserName()); + + enum { cNone, cAdd, cRemove, cList, cUpdate, cRollback } cmd = cNone; + std::vector<string> args; + parseCmdLine(argc, argv, + [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--help") { + showManPage("nix-channel"); + } else if (*arg == "--version") { + printVersion("nix-channel"); + } else if (*arg == "--add") { + cmd = cAdd; + } else if (*arg == "--remove") { + cmd = cRemove; + } else if (*arg == "--list") { + cmd = cList; + } else if (*arg == "--update") { + cmd = cUpdate; + } else if (*arg == "--rollback") { + cmd = cRollback; + } else { + args.push_back(std::move(*arg)); + } + return true; + }); + + initPlugins(); + + switch (cmd) { + case cNone: + throw UsageError("no command specified"); + case cAdd: + if (args.empty() || args.size() > 2) { + throw UsageError("'--add' requires one or two arguments"); + } + { + auto url = args[0]; + std::string name; + if (args.size() == 2) { + name = args[1]; + } else { + name = baseNameOf(url); + name = std::regex_replace(name, std::regex("-unstable$"), ""); + name = std::regex_replace(name, std::regex("-stable$"), ""); + } + addChannel(url, name); + } + break; + case cRemove: + if (args.size() != 1) { + throw UsageError("'--remove' requires one argument"); + } + removeChannel(args[0]); + break; + case cList: + if (!args.empty()) { + throw UsageError("'--list' expects no arguments"); + } + readChannels(); + for (const auto& channel : channels) { + std::cout << channel.first << ' ' << channel.second << '\n'; + } + break; + case cUpdate: + update(StringSet(args.begin(), args.end())); + break; + case cRollback: + if (args.size() > 1) { + throw UsageError("'--rollback' has at most one argument"); + } + Strings envArgs{"--profile", profile}; + if (args.size() == 1) { + envArgs.push_back("--switch-generation"); + envArgs.push_back(args[0]); + } else { + envArgs.push_back("--rollback"); + } + runProgram(settings.nixBinDir + "/nix-env", false, envArgs); + break; + } + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-channel", _main); diff --git a/third_party/nix/src/nix-collect-garbage/nix-collect-garbage.cc b/third_party/nix/src/nix-collect-garbage/nix-collect-garbage.cc new file mode 100644 index 000000000000..0050bdda15fb --- /dev/null +++ b/third_party/nix/src/nix-collect-garbage/nix-collect-garbage.cc @@ -0,0 +1,105 @@ +#include <cerrno> +#include <iostream> + +#include <glog/logging.h> + +#include "globals.hh" +#include "legacy.hh" +#include "profiles.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +std::string deleteOlderThan; +bool dryRun = false; + +/* If `-d' was specified, remove all old generations of all profiles. + * Of course, this makes rollbacks to before this point in time + * impossible. */ + +void removeOldGenerations(const std::string& dir) { + if (access(dir.c_str(), R_OK) != 0) { + return; + } + + bool canWrite = access(dir.c_str(), W_OK) == 0; + + for (auto& i : readDirectory(dir)) { + checkInterrupt(); + + auto path = dir + "/" + i.name; + auto type = i.type == DT_UNKNOWN ? getFileType(path) : i.type; + + if (type == DT_LNK && canWrite) { + std::string link; + try { + link = readLink(path); + } catch (SysError& e) { + if (e.errNo == ENOENT) { + continue; + } + } + if (link.find("link") != string::npos) { + LOG(INFO) << "removing old generations of profile " << path; + if (!deleteOlderThan.empty()) { + deleteGenerationsOlderThan(path, deleteOlderThan, dryRun); + } else { + deleteOldGenerations(path, dryRun); + } + } + } else if (type == DT_DIR) { + removeOldGenerations(path); + } + } +} + +static int _main(int argc, char** argv) { + { + bool removeOld = false; + + GCOptions options; + + parseCmdLine(argc, argv, + [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--help") { + showManPage("nix-collect-garbage"); + } else if (*arg == "--version") { + printVersion("nix-collect-garbage"); + } else if (*arg == "--delete-old" || *arg == "-d") { + removeOld = true; + } else if (*arg == "--delete-older-than") { + removeOld = true; + deleteOlderThan = getArg(*arg, arg, end); + } else if (*arg == "--dry-run") { + dryRun = true; + } else if (*arg == "--max-freed") { + auto maxFreed = getIntArg<long long>(*arg, arg, end, true); + options.maxFreed = maxFreed >= 0 ? maxFreed : 0; + } else { + return false; + } + return true; + }); + + initPlugins(); + + auto profilesDir = settings.nixStateDir + "/profiles"; + if (removeOld) { + removeOldGenerations(profilesDir); + } + + // Run the actual garbage collector. + if (!dryRun) { + auto store = openStore(); + options.action = GCOptions::gcDeleteDead; + GCResults results; + PrintFreed freed(true, results); + store->collectGarbage(options, results); + } + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-collect-garbage", _main); diff --git a/third_party/nix/src/nix-copy-closure/nix-copy-closure.cc b/third_party/nix/src/nix-copy-closure/nix-copy-closure.cc new file mode 100644 index 000000000000..67765a8cf72c --- /dev/null +++ b/third_party/nix/src/nix-copy-closure/nix-copy-closure.cc @@ -0,0 +1,75 @@ +#include <glog/logging.h> + +#include "legacy.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +static int _main(int argc, char** argv) { + { + auto gzip = false; + auto toMode = true; + auto includeOutputs = false; + auto dryRun = false; + auto useSubstitutes = NoSubstitute; + std::string sshHost; + PathSet storePaths; + + parseCmdLine( + argc, argv, [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--help") { + showManPage("nix-copy-closure"); + } else if (*arg == "--version") { + printVersion("nix-copy-closure"); + } else if (*arg == "--gzip" || *arg == "--bzip2" || *arg == "--xz") { + if (*arg != "--gzip") { + LOG(WARNING) << "'" << *arg + << "' is not implemented, falling back to gzip"; + } + gzip = true; + } else if (*arg == "--from") { + toMode = false; + } else if (*arg == "--to") { + toMode = true; + } else if (*arg == "--include-outputs") { + includeOutputs = true; + } else if (*arg == "--show-progress") { + LOG(WARNING) << "'--show-progress' is not implemented"; + } else if (*arg == "--dry-run") { + dryRun = true; + } else if (*arg == "--use-substitutes" || *arg == "-s") { + useSubstitutes = Substitute; + } else if (sshHost.empty()) { + sshHost = *arg; + } else { + storePaths.insert(*arg); + } + return true; + }); + + initPlugins(); + + if (sshHost.empty()) { + throw UsageError("no host name specified"); + } + + auto remoteUri = "ssh://" + sshHost + (gzip ? "?compress=true" : ""); + auto to = toMode ? openStore(remoteUri) : openStore(); + auto from = toMode ? openStore() : openStore(remoteUri); + + PathSet storePaths2; + for (auto& path : storePaths) { + storePaths2.insert(from->followLinksToStorePath(path)); + } + + PathSet closure; + from->computeFSClosure(storePaths2, closure, false, includeOutputs); + + copyPaths(from, to, closure, NoRepair, NoCheckSigs, useSubstitutes); + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-copy-closure", _main); diff --git a/third_party/nix/src/nix-daemon/nix-daemon.cc b/third_party/nix/src/nix-daemon/nix-daemon.cc new file mode 100644 index 000000000000..9f5e8b7cbedb --- /dev/null +++ b/third_party/nix/src/nix-daemon/nix-daemon.cc @@ -0,0 +1,1181 @@ +#include <algorithm> +#include <cerrno> +#include <climits> +#include <csignal> +#include <cstring> + +#include <fcntl.h> +#include <glog/logging.h> +#include <grp.h> +#include <pwd.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "affinity.hh" +#include "archive.hh" +#include "derivations.hh" +#include "finally.hh" +#include "globals.hh" +#include "legacy.hh" +#include "local-store.hh" +#include "monitor-fd.hh" +#include "serialise.hh" +#include "shared.hh" +#include "util.hh" +#include "worker-protocol.hh" + +#if __APPLE__ || __FreeBSD__ +#include <sys/ucred.h> +#endif + +using namespace nix; + +#ifndef __linux__ +#define SPLICE_F_MOVE 0 +static ssize_t splice(int fd_in, void* off_in, int fd_out, void* off_out, + size_t len, unsigned int flags) { + /* We ignore most parameters, we just have them for conformance with the linux + * syscall */ + std::vector<char> buf(8192); + auto read_count = read(fd_in, buf.data(), buf.size()); + if (read_count == -1) { + return read_count; + } + auto write_count = decltype(read_count)(0); + while (write_count < read_count) { + auto res = + write(fd_out, buf.data() + write_count, read_count - write_count); + if (res == -1) { + return res; + } + write_count += res; + } + return read_count; +} +#endif + +static FdSource from(STDIN_FILENO); +static FdSink to(STDOUT_FILENO); + +/* Logger that forwards log messages to the client, *if* we're in a + state where the protocol allows it (i.e., when canSendStderr is + true). */ +struct TunnelLogger { + struct State { + bool canSendStderr = false; + std::vector<std::string> pendingMsgs; + }; + + Sync<State> state_; + + unsigned int clientVersion; + + explicit TunnelLogger(unsigned int clientVersion) + : clientVersion(clientVersion) {} + + void enqueueMsg(const std::string& s) { + auto state(state_.lock()); + + if (state->canSendStderr) { + assert(state->pendingMsgs.empty()); + try { + to(s); + to.flush(); + } catch (...) { + /* Write failed; that means that the other side is + gone. */ + state->canSendStderr = false; + throw; + } + } else { + state->pendingMsgs.push_back(s); + } + } + + void log(const FormatOrString& fs) { + StringSink buf; + buf << STDERR_NEXT << (fs.s + "\n"); + enqueueMsg(*buf.s); + } + + /* startWork() means that we're starting an operation for which we + want to send out stderr to the client. */ + void startWork() { + auto state(state_.lock()); + state->canSendStderr = true; + + for (auto& msg : state->pendingMsgs) { + to(msg); + } + + state->pendingMsgs.clear(); + + to.flush(); + } + + /* stopWork() means that we're done; stop sending stderr to the + client. */ + void stopWork(bool success = true, const string& msg = "", + unsigned int status = 0) { + auto state(state_.lock()); + + state->canSendStderr = false; + + if (success) { + to << STDERR_LAST; + } else { + to << STDERR_ERROR << msg; + if (status != 0) { + to << status; + } + } + } + + void startActivity(const std::string& s) { + DLOG(INFO) << "startActivity(" << s << ")"; + if (GET_PROTOCOL_MINOR(clientVersion) < 20) { + if (!s.empty()) { + LOG(INFO) << s; + } + return; + } + + StringSink buf; + buf << STDERR_START_ACTIVITY << s; + enqueueMsg(*buf.s); + } +}; + +struct TunnelSink : Sink { + Sink& to; + explicit TunnelSink(Sink& to) : to(to) {} + void operator()(const unsigned char* data, size_t len) override { + to << STDERR_WRITE; + writeString(data, len, to); + } +}; + +struct TunnelSource : BufferedSource { + Source& from; + explicit TunnelSource(Source& from) : from(from) {} + + protected: + size_t readUnbuffered(unsigned char* data, size_t len) override { + to << STDERR_READ << len; + to.flush(); + size_t n = readString(data, len, from); + if (n == 0) { + throw EndOfFile("unexpected end-of-file"); + } + return n; + } +}; + +/* If the NAR archive contains a single file at top-level, then save + the contents of the file to `s'. Otherwise barf. */ +struct RetrieveRegularNARSink : ParseSink { + bool regular{true}; + string s; + + RetrieveRegularNARSink() {} + + void createDirectory(const Path& path) override { regular = false; } + + void receiveContents(unsigned char* data, unsigned int len) override { + s.append((const char*)data, len); + } + + void createSymlink(const Path& path, const string& target) override { + regular = false; + } +}; + +static void performOp(TunnelLogger* logger, const ref<Store>& store, + bool trusted, unsigned int clientVersion, Source& from, + Sink& to, unsigned int op) { + switch (op) { + case wopIsValidPath: { + /* 'readStorePath' could raise an error leading to the connection + being closed. To be able to recover from an invalid path error, + call 'startWork' early, and do 'assertStorePath' afterwards so + that the 'Error' exception handler doesn't close the + connection. */ + Path path = readString(from); + logger->startWork(); + store->assertStorePath(path); + bool result = store->isValidPath(path); + logger->stopWork(); + to << static_cast<uint64_t>(result); + break; + } + + case wopQueryValidPaths: { + auto paths = readStorePaths<PathSet>(*store, from); + logger->startWork(); + PathSet res = store->queryValidPaths(paths); + logger->stopWork(); + to << res; + break; + } + + case wopHasSubstitutes: { + Path path = readStorePath(*store, from); + logger->startWork(); + PathSet res = store->querySubstitutablePaths({path}); + logger->stopWork(); + to << static_cast<uint64_t>(res.find(path) != res.end()); + break; + } + + case wopQuerySubstitutablePaths: { + auto paths = readStorePaths<PathSet>(*store, from); + logger->startWork(); + PathSet res = store->querySubstitutablePaths(paths); + logger->stopWork(); + to << res; + break; + } + + case wopQueryPathHash: { + Path path = readStorePath(*store, from); + logger->startWork(); + auto hash = store->queryPathInfo(path)->narHash; + logger->stopWork(); + to << hash.to_string(Base16, false); + break; + } + + case wopQueryReferences: + case wopQueryReferrers: + case wopQueryValidDerivers: + case wopQueryDerivationOutputs: { + Path path = readStorePath(*store, from); + logger->startWork(); + PathSet paths; + if (op == wopQueryReferences) { + paths = store->queryPathInfo(path)->references; + } else if (op == wopQueryReferrers) { + store->queryReferrers(path, paths); + } else if (op == wopQueryValidDerivers) { + paths = store->queryValidDerivers(path); + } else { + paths = store->queryDerivationOutputs(path); + } + logger->stopWork(); + to << paths; + break; + } + + case wopQueryDerivationOutputNames: { + Path path = readStorePath(*store, from); + logger->startWork(); + StringSet names; + names = store->queryDerivationOutputNames(path); + logger->stopWork(); + to << names; + break; + } + + case wopQueryDeriver: { + Path path = readStorePath(*store, from); + logger->startWork(); + auto deriver = store->queryPathInfo(path)->deriver; + logger->stopWork(); + to << deriver; + break; + } + + case wopQueryPathFromHashPart: { + string hashPart = readString(from); + logger->startWork(); + Path path = store->queryPathFromHashPart(hashPart); + logger->stopWork(); + to << path; + break; + } + + case wopAddToStore: { + bool fixed; + bool recursive; + std::string s; + std::string baseName; + from >> baseName >> fixed /* obsolete */ >> recursive >> s; + /* Compatibility hack. */ + if (!fixed) { + s = "sha256"; + recursive = true; + } + HashType hashAlgo = parseHashType(s); + + TeeSource savedNAR(from); + RetrieveRegularNARSink savedRegular; + + if (recursive) { + /* Get the entire NAR dump from the client and save it to + a string so that we can pass it to + addToStoreFromDump(). */ + ParseSink sink; /* null sink; just parse the NAR */ + parseDump(sink, savedNAR); + } else { + parseDump(savedRegular, from); + } + + logger->startWork(); + if (!savedRegular.regular) { + throw Error("regular file expected"); + } + + auto store2 = store.dynamic_pointer_cast<LocalStore>(); + if (!store2) { + throw Error("operation is only supported by LocalStore"); + } + + Path path = store2->addToStoreFromDump( + recursive ? *savedNAR.data : savedRegular.s, baseName, recursive, + hashAlgo); + logger->stopWork(); + + to << path; + break; + } + + case wopAddTextToStore: { + string suffix = readString(from); + string s = readString(from); + auto refs = readStorePaths<PathSet>(*store, from); + logger->startWork(); + Path path = store->addTextToStore(suffix, s, refs, NoRepair); + logger->stopWork(); + to << path; + break; + } + + case wopExportPath: { + Path path = readStorePath(*store, from); + readInt(from); // obsolete + logger->startWork(); + TunnelSink sink(to); + store->exportPath(path, sink); + logger->stopWork(); + to << 1; + break; + } + + case wopImportPaths: { + logger->startWork(); + TunnelSource source(from); + Paths paths = store->importPaths(source, nullptr, + trusted ? NoCheckSigs : CheckSigs); + logger->stopWork(); + to << paths; + break; + } + + case wopBuildPaths: { + auto drvs = readStorePaths<PathSet>(*store, from); + BuildMode mode = bmNormal; + if (GET_PROTOCOL_MINOR(clientVersion) >= 15) { + mode = (BuildMode)readInt(from); + + /* Repairing is not atomic, so disallowed for "untrusted" + clients. */ + if (mode == bmRepair && !trusted) { + throw Error( + "repairing is not allowed because you are not in " + "'trusted-users'"); + } + } + logger->startWork(); + store->buildPaths(drvs, mode); + logger->stopWork(); + to << 1; + break; + } + + case wopBuildDerivation: { + Path drvPath = readStorePath(*store, from); + BasicDerivation drv; + readDerivation(from, *store, drv); + auto buildMode = (BuildMode)readInt(from); + logger->startWork(); + if (!trusted) { + throw Error("you are not privileged to build derivations"); + } + auto res = store->buildDerivation(drvPath, drv, buildMode); + logger->stopWork(); + to << res.status << res.errorMsg; + break; + } + + case wopEnsurePath: { + Path path = readStorePath(*store, from); + logger->startWork(); + store->ensurePath(path); + logger->stopWork(); + to << 1; + break; + } + + case wopAddTempRoot: { + Path path = readStorePath(*store, from); + logger->startWork(); + store->addTempRoot(path); + logger->stopWork(); + to << 1; + break; + } + + case wopAddIndirectRoot: { + Path path = absPath(readString(from)); + logger->startWork(); + store->addIndirectRoot(path); + logger->stopWork(); + to << 1; + break; + } + + case wopSyncWithGC: { + logger->startWork(); + store->syncWithGC(); + logger->stopWork(); + to << 1; + break; + } + + case wopFindRoots: { + logger->startWork(); + Roots roots = store->findRoots(!trusted); + logger->stopWork(); + + size_t size = 0; + for (auto& i : roots) { + size += i.second.size(); + } + + to << size; + + for (auto& [target, links] : roots) { + for (auto& link : links) { + to << link << target; + } + } + + break; + } + + case wopCollectGarbage: { + GCOptions options; + options.action = (GCOptions::GCAction)readInt(from); + options.pathsToDelete = readStorePaths<PathSet>(*store, from); + from >> options.ignoreLiveness >> options.maxFreed; + // obsolete fields + readInt(from); + readInt(from); + readInt(from); + + GCResults results; + + logger->startWork(); + if (options.ignoreLiveness) { + throw Error("you are not allowed to ignore liveness"); + } + store->collectGarbage(options, results); + logger->stopWork(); + + to << results.paths << results.bytesFreed << 0 /* obsolete */; + + break; + } + + case wopSetOptions: { + settings.keepFailed = readInt(from) != 0u; + settings.keepGoing = readInt(from) != 0u; + settings.tryFallback = readInt(from) != 0u; + readInt(from); // obsolete verbosity + settings.maxBuildJobs.assign(readInt(from)); + settings.maxSilentTime = readInt(from); + readInt(from); // obsolete useBuildHook + settings.verboseBuild = 0 == readInt(from); + readInt(from); // obsolete logType + readInt(from); // obsolete printBuildTrace + settings.buildCores = readInt(from); + settings.useSubstitutes = readInt(from) != 0u; + + StringMap overrides; + if (GET_PROTOCOL_MINOR(clientVersion) >= 12) { + unsigned int n = readInt(from); + for (unsigned int i = 0; i < n; i++) { + string name = readString(from); + string value = readString(from); + overrides.emplace(name, value); + } + } + + logger->startWork(); + + for (auto& i : overrides) { + auto& name(i.first); + auto& value(i.second); + + auto setSubstituters = [&](Setting<Strings>& res) { + if (name != res.name && res.aliases.count(name) == 0) { + return false; + } + StringSet trusted = settings.trustedSubstituters; + for (auto& s : settings.substituters.get()) { + trusted.insert(s); + } + Strings subs; + auto ss = tokenizeString<Strings>(value); + for (auto& s : ss) { + if (trusted.count(s) != 0u) { + subs.push_back(s); + } else { + LOG(WARNING) << "ignoring untrusted substituter '" << s << "'"; + } + } + res = subs; + return true; + }; + + try { + if (name == "ssh-auth-sock") { // obsolete + ; + } else if (trusted || name == settings.buildTimeout.name || + name == "connect-timeout" || + (name == "builders" && value.empty())) { + settings.set(name, value); + } else if (setSubstituters(settings.substituters)) { + ; + } else if (setSubstituters(settings.extraSubstituters)) { + ; + } else { + LOG(WARNING) << "ignoring the user-specified setting '" << name + << "', because it is a " + << "restricted setting and you are not a trusted user"; + } + } catch (UsageError& e) { + LOG(WARNING) << e.what(); + } + } + + logger->stopWork(); + break; + } + + case wopQuerySubstitutablePathInfo: { + Path path = absPath(readString(from)); + logger->startWork(); + SubstitutablePathInfos infos; + store->querySubstitutablePathInfos({path}, infos); + logger->stopWork(); + auto i = infos.find(path); + if (i == infos.end()) { + to << 0; + } else { + to << 1 << i->second.deriver << i->second.references + << i->second.downloadSize << i->second.narSize; + } + break; + } + + case wopQuerySubstitutablePathInfos: { + auto paths = readStorePaths<PathSet>(*store, from); + logger->startWork(); + SubstitutablePathInfos infos; + store->querySubstitutablePathInfos(paths, infos); + logger->stopWork(); + to << infos.size(); + for (auto& i : infos) { + to << i.first << i.second.deriver << i.second.references + << i.second.downloadSize << i.second.narSize; + } + break; + } + + case wopQueryAllValidPaths: { + logger->startWork(); + PathSet paths = store->queryAllValidPaths(); + logger->stopWork(); + to << paths; + break; + } + + case wopQueryPathInfo: { + Path path = readStorePath(*store, from); + std::shared_ptr<const ValidPathInfo> info; + logger->startWork(); + try { + info = store->queryPathInfo(path); + } catch (InvalidPath&) { + if (GET_PROTOCOL_MINOR(clientVersion) < 17) { + throw; + } + } + logger->stopWork(); + if (info) { + if (GET_PROTOCOL_MINOR(clientVersion) >= 17) { + to << 1; + } + to << info->deriver << info->narHash.to_string(Base16, false) + << info->references << info->registrationTime << info->narSize; + if (GET_PROTOCOL_MINOR(clientVersion) >= 16) { + to << static_cast<uint64_t>(info->ultimate) << info->sigs << info->ca; + } + } else { + assert(GET_PROTOCOL_MINOR(clientVersion) >= 17); + to << 0; + } + break; + } + + case wopOptimiseStore: + logger->startWork(); + store->optimiseStore(); + logger->stopWork(); + to << 1; + break; + + case wopVerifyStore: { + bool checkContents; + bool repair; + from >> checkContents >> repair; + logger->startWork(); + if (repair && !trusted) { + throw Error("you are not privileged to repair paths"); + } + bool errors = store->verifyStore(checkContents, (RepairFlag)repair); + logger->stopWork(); + to << static_cast<uint64_t>(errors); + break; + } + + case wopAddSignatures: { + Path path = readStorePath(*store, from); + auto sigs = readStrings<StringSet>(from); + logger->startWork(); + if (!trusted) { + throw Error("you are not privileged to add signatures"); + } + store->addSignatures(path, sigs); + logger->stopWork(); + to << 1; + break; + } + + case wopNarFromPath: { + auto path = readStorePath(*store, from); + logger->startWork(); + logger->stopWork(); + dumpPath(path, to); + break; + } + + case wopAddToStoreNar: { + bool repair; + bool dontCheckSigs; + ValidPathInfo info; + info.path = readStorePath(*store, from); + from >> info.deriver; + if (!info.deriver.empty()) { + store->assertStorePath(info.deriver); + } + info.narHash = Hash(readString(from), htSHA256); + info.references = readStorePaths<PathSet>(*store, from); + from >> info.registrationTime >> info.narSize >> info.ultimate; + info.sigs = readStrings<StringSet>(from); + from >> info.ca >> repair >> dontCheckSigs; + if (!trusted && dontCheckSigs) { + dontCheckSigs = false; + } + if (!trusted) { + info.ultimate = false; + } + + std::string saved; + std::unique_ptr<Source> source; + if (GET_PROTOCOL_MINOR(clientVersion) >= 21) { + source = std::make_unique<TunnelSource>(from); + } else { + TeeSink tee(from); + parseDump(tee, tee.source); + saved = std::move(*tee.source.data); + source = std::make_unique<StringSource>(saved); + } + + logger->startWork(); + + // FIXME: race if addToStore doesn't read source? + store->addToStore(info, *source, (RepairFlag)repair, + dontCheckSigs ? NoCheckSigs : CheckSigs, nullptr); + + logger->stopWork(); + break; + } + + case wopQueryMissing: { + auto targets = readStorePaths<PathSet>(*store, from); + logger->startWork(); + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + unsigned long long downloadSize; + unsigned long long narSize; + store->queryMissing(targets, willBuild, willSubstitute, unknown, + downloadSize, narSize); + logger->stopWork(); + to << willBuild << willSubstitute << unknown << downloadSize << narSize; + break; + } + + default: + throw Error(format("invalid operation %1%") % op); + } +} + +static void processConnection(bool trusted, const std::string& userName, + uid_t userId) { + MonitorFdHup monitor(from.fd); + + /* Exchange the greeting. */ + unsigned int magic = readInt(from); + if (magic != WORKER_MAGIC_1) { + throw Error("protocol mismatch"); + } + to << WORKER_MAGIC_2 << PROTOCOL_VERSION; + to.flush(); + unsigned int clientVersion = readInt(from); + + if (clientVersion < 0x10a) { + throw Error("the Nix client version is too old"); + } + + auto tunnelLogger = new TunnelLogger(clientVersion); + // logger = tunnelLogger; + + unsigned int opCount = 0; + + Finally finally([&]() { + _isInterrupted = false; + DLOG(INFO) << opCount << " operations"; + }); + + if (GET_PROTOCOL_MINOR(clientVersion) >= 14 && (readInt(from) != 0u)) { + setAffinityTo(readInt(from)); + } + + readInt(from); // obsolete reserveSpace + + /* Send startup error messages to the client. */ + tunnelLogger->startWork(); + + try { + /* If we can't accept clientVersion, then throw an error + *here* (not above). */ + +#if 0 + /* Prevent users from doing something very dangerous. */ + if (geteuid() == 0 && + querySetting("build-users-group", "") == "") + throw Error("if you run 'nix-daemon' as root, then you MUST set 'build-users-group'!"); +#endif + + /* Open the store. */ + Store::Params params; // FIXME: get params from somewhere + // Disable caching since the client already does that. + params["path-info-cache-size"] = "0"; + auto store = openStore(settings.storeUri, params); + + store->createUser(userName, userId); + + tunnelLogger->stopWork(); + to.flush(); + + /* Process client requests. */ + while (true) { + WorkerOp op; + try { + op = (WorkerOp)readInt(from); + } catch (Interrupted& e) { + break; + } catch (EndOfFile& e) { + break; + } + + opCount++; + + try { + performOp(tunnelLogger, store, trusted, clientVersion, from, to, op); + } catch (Error& e) { + /* If we're not in a state where we can send replies, then + something went wrong processing the input of the + client. This can happen especially if I/O errors occur + during addTextToStore() / importPath(). If that + happens, just send the error message and exit. */ + bool errorAllowed = tunnelLogger->state_.lock()->canSendStderr; + tunnelLogger->stopWork(false, e.msg(), e.status); + if (!errorAllowed) { + throw; + } + } catch (std::bad_alloc& e) { + tunnelLogger->stopWork(false, "Nix daemon out of memory", 1); + throw; + } + + to.flush(); + + assert(!tunnelLogger->state_.lock()->canSendStderr); + }; + + } catch (std::exception& e) { + tunnelLogger->stopWork(false, e.what(), 1); + to.flush(); + return; + } +} + +static void sigChldHandler(int sigNo) { + // Ensure we don't modify errno of whatever we've interrupted + auto saved_errno = errno; + /* Reap all dead children. */ + while (waitpid(-1, nullptr, WNOHANG) > 0) { + ; + } + errno = saved_errno; +} + +static void setSigChldAction(bool autoReap) { + struct sigaction act; + struct sigaction oact; + act.sa_handler = autoReap ? sigChldHandler : SIG_DFL; + sigfillset(&act.sa_mask); + act.sa_flags = 0; + if (sigaction(SIGCHLD, &act, &oact) != 0) { + throw SysError("setting SIGCHLD handler"); + } +} + +bool matchUser(const string& user, const string& group, const Strings& users) { + if (find(users.begin(), users.end(), "*") != users.end()) { + return true; + } + + if (find(users.begin(), users.end(), user) != users.end()) { + return true; + } + + for (auto& i : users) { + if (string(i, 0, 1) == "@") { + if (group == string(i, 1)) { + return true; + } + struct group* gr = getgrnam(i.c_str() + 1); + if (gr == nullptr) { + continue; + } + for (char** mem = gr->gr_mem; *mem != nullptr; mem++) { + if (user == string(*mem)) { + return true; + } + } + } + } + + return false; +} + +struct PeerInfo { + bool pidKnown; + pid_t pid; + bool uidKnown; + uid_t uid; + bool gidKnown; + gid_t gid; +}; + +/* Get the identity of the caller, if possible. */ +static PeerInfo getPeerInfo(int remote) { + PeerInfo peer = {false, 0, false, 0, false, 0}; + +#if defined(SO_PEERCRED) + + ucred cred; + socklen_t credLen = sizeof(cred); + if (getsockopt(remote, SOL_SOCKET, SO_PEERCRED, &cred, &credLen) == -1) { + throw SysError("getting peer credentials"); + } + peer = {true, cred.pid, true, cred.uid, true, cred.gid}; + +#elif defined(LOCAL_PEERCRED) + +#if !defined(SOL_LOCAL) +#define SOL_LOCAL 0 +#endif + + xucred cred; + socklen_t credLen = sizeof(cred); + if (getsockopt(remote, SOL_LOCAL, LOCAL_PEERCRED, &cred, &credLen) == -1) + throw SysError("getting peer credentials"); + peer = {false, 0, true, cred.cr_uid, false, 0}; + +#endif + + return peer; +} + +#define SD_LISTEN_FDS_START 3 + +static void daemonLoop(char** argv) { + if (chdir("/") == -1) { + throw SysError("cannot change current directory"); + } + + /* Get rid of children automatically; don't let them become + zombies. */ + setSigChldAction(true); + + AutoCloseFD fdSocket; + + /* Handle socket-based activation by systemd. */ + if (!getEnv("LISTEN_FDS").empty()) { + if (getEnv("LISTEN_PID") != std::to_string(getpid()) || + getEnv("LISTEN_FDS") != "1") { + throw Error("unexpected systemd environment variables"); + } + fdSocket = SD_LISTEN_FDS_START; + } + + /* Otherwise, create and bind to a Unix domain socket. */ + else { + /* Create and bind to a Unix domain socket. */ + fdSocket = socket(PF_UNIX, SOCK_STREAM, 0); + if (!fdSocket) { + throw SysError("cannot create Unix domain socket"); + } + + string socketPath = settings.nixDaemonSocketFile; + + createDirs(dirOf(socketPath)); + + /* Urgh, sockaddr_un allows path names of only 108 characters. + So chdir to the socket directory so that we can pass a + relative path name. */ + if (chdir(dirOf(socketPath).c_str()) == -1) { + throw SysError("cannot change current directory"); + } + Path socketPathRel = "./" + baseNameOf(socketPath); + + struct sockaddr_un addr; + addr.sun_family = AF_UNIX; + if (socketPathRel.size() >= sizeof(addr.sun_path)) { + throw Error(format("socket path '%1%' is too long") % socketPathRel); + } + strcpy(addr.sun_path, socketPathRel.c_str()); + + unlink(socketPath.c_str()); + + /* Make sure that the socket is created with 0666 permission + (everybody can connect --- provided they have access to the + directory containing the socket). */ + mode_t oldMode = umask(0111); + int res = bind(fdSocket.get(), (struct sockaddr*)&addr, sizeof(addr)); + umask(oldMode); + if (res == -1) { + throw SysError(format("cannot bind to socket '%1%'") % socketPath); + } + + if (chdir("/") == -1) { /* back to the root */ + throw SysError("cannot change current directory"); + } + + if (listen(fdSocket.get(), 5) == -1) { + throw SysError(format("cannot listen on socket '%1%'") % socketPath); + } + } + + closeOnExec(fdSocket.get()); + + /* Loop accepting connections. */ + while (true) { + try { + /* Accept a connection. */ + struct sockaddr_un remoteAddr; + socklen_t remoteAddrLen = sizeof(remoteAddr); + + AutoCloseFD remote = + accept(fdSocket.get(), (struct sockaddr*)&remoteAddr, &remoteAddrLen); + checkInterrupt(); + if (!remote) { + if (errno == EINTR) { + continue; + } + throw SysError("accepting connection"); + } + + closeOnExec(remote.get()); + + bool trusted = false; + PeerInfo peer = getPeerInfo(remote.get()); + + struct passwd* pw = peer.uidKnown ? getpwuid(peer.uid) : nullptr; + string user = pw != nullptr ? pw->pw_name : std::to_string(peer.uid); + + struct group* gr = peer.gidKnown ? getgrgid(peer.gid) : nullptr; + string group = gr != nullptr ? gr->gr_name : std::to_string(peer.gid); + + Strings trustedUsers = settings.trustedUsers; + Strings allowedUsers = settings.allowedUsers; + + if (matchUser(user, group, trustedUsers)) { + trusted = true; + } + + if ((!trusted && !matchUser(user, group, allowedUsers)) || + group == settings.buildUsersGroup) { + throw Error( + format("user '%1%' is not allowed to connect to the Nix daemon") % + user); + } + + LOG(INFO) << "accepted connection from pid " + << (peer.pidKnown ? std::to_string(peer.pid) : "<unknown>") + << ", user " << (peer.uidKnown ? user : "<unknown>") + << (trusted ? " (trusted)" : ""); + + /* Fork a child to handle the connection. */ + ProcessOptions options; + options.errorPrefix = "unexpected Nix daemon error: "; + options.dieWithParent = false; + options.runExitHandlers = true; + options.allowVfork = false; + startProcess( + [&]() { + fdSocket = -1; + + /* Background the daemon. */ + if (setsid() == -1) { + throw SysError(format("creating a new session")); + } + + /* Restore normal handling of SIGCHLD. */ + setSigChldAction(false); + + /* For debugging, stuff the pid into argv[1]. */ + if (peer.pidKnown && (argv[1] != nullptr)) { + string processName = std::to_string(peer.pid); + strncpy(argv[1], processName.c_str(), strlen(argv[1])); + } + + /* Handle the connection. */ + from.fd = remote.get(); + to.fd = remote.get(); + processConnection(trusted, user, peer.uid); + + exit(0); + }, + options); + + } catch (Interrupted& e) { + return; + } catch (Error& e) { + LOG(ERROR) << "error processing connection: " << e.msg(); + } + } +} + +static int _main(int argc, char** argv) { + { + auto stdio = false; + + parseCmdLine(argc, argv, + [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--daemon") { + ; /* ignored for backwards compatibility */ + } else if (*arg == "--help") { + showManPage("nix-daemon"); + } else if (*arg == "--version") { + printVersion("nix-daemon"); + } else if (*arg == "--stdio") { + stdio = true; + } else { + return false; + } + return true; + }); + + initPlugins(); + + if (stdio) { + if (getStoreType() == tDaemon) { + /* Forward on this connection to the real daemon */ + auto socketPath = settings.nixDaemonSocketFile; + auto s = socket(PF_UNIX, SOCK_STREAM, 0); + if (s == -1) { + throw SysError("creating Unix domain socket"); + } + + auto socketDir = dirOf(socketPath); + if (chdir(socketDir.c_str()) == -1) { + throw SysError(format("changing to socket directory '%1%'") % + socketDir); + } + + auto socketName = baseNameOf(socketPath); + auto addr = sockaddr_un{}; + addr.sun_family = AF_UNIX; + if (socketName.size() + 1 >= sizeof(addr.sun_path)) { + throw Error(format("socket name %1% is too long") % socketName); + } + strcpy(addr.sun_path, socketName.c_str()); + + if (connect(s, (struct sockaddr*)&addr, sizeof(addr)) == -1) { + throw SysError(format("cannot connect to daemon at %1%") % + socketPath); + } + + auto nfds = (s > STDIN_FILENO ? s : STDIN_FILENO) + 1; + while (true) { + fd_set fds; + FD_ZERO(&fds); + FD_SET(s, &fds); + FD_SET(STDIN_FILENO, &fds); + if (select(nfds, &fds, nullptr, nullptr, nullptr) == -1) { + throw SysError("waiting for data from client or server"); + } + if (FD_ISSET(s, &fds)) { + auto res = splice(s, nullptr, STDOUT_FILENO, nullptr, SSIZE_MAX, + SPLICE_F_MOVE); + if (res == -1) { + throw SysError("splicing data from daemon socket to stdout"); + } + if (res == 0) { + throw EndOfFile("unexpected EOF from daemon socket"); + } + } + if (FD_ISSET(STDIN_FILENO, &fds)) { + auto res = splice(STDIN_FILENO, nullptr, s, nullptr, SSIZE_MAX, + SPLICE_F_MOVE); + if (res == -1) { + throw SysError("splicing data from stdin to daemon socket"); + } + if (res == 0) { + return 0; + } + } + } + } else { + processConnection(true, "root", 0); + } + } else { + daemonLoop(argv); + } + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-daemon", _main); diff --git a/third_party/nix/src/nix-env/nix-env.cc b/third_party/nix/src/nix-env/nix-env.cc new file mode 100644 index 000000000000..e5b76979245d --- /dev/null +++ b/third_party/nix/src/nix-env/nix-env.cc @@ -0,0 +1,1538 @@ +#include <algorithm> +#include <cerrno> +#include <ctime> +#include <iostream> +#include <sstream> + +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "attr-path.hh" +#include "common-eval-args.hh" +#include "derivations.hh" +#include "eval.hh" +#include "get-drvs.hh" +#include "globals.hh" +#include "json.hh" +#include "legacy.hh" +#include "names.hh" +#include "profiles.hh" +#include "shared.hh" +#include "store-api.hh" +#include "user-env.hh" +#include "util.hh" +#include "value-to-json.hh" +#include "xml-writer.hh" + +using namespace nix; +using std::cout; + +typedef enum { + srcNixExprDrvs, + srcNixExprs, + srcStorePaths, + srcProfile, + srcAttrPath, + srcUnknown +} InstallSourceType; + +struct InstallSourceInfo { + InstallSourceType type; + Path nixExprPath; /* for srcNixExprDrvs, srcNixExprs */ + Path profile; /* for srcProfile */ + string systemFilter; /* for srcNixExprDrvs */ + Bindings* autoArgs; +}; + +struct Globals { + InstallSourceInfo instSource; + Path profile; + std::shared_ptr<EvalState> state; + bool dryRun; + bool preserveInstalled; + bool removeAll; + string forceName; + bool prebuiltOnly; +}; + +using Operation = void (*)(Globals&, Strings, Strings); + +static string needArg(Strings::iterator& i, Strings& args, const string& arg) { + if (i == args.end()) { + throw UsageError(format("'%1%' requires an argument") % arg); + } + return *i++; +} + +static bool parseInstallSourceOptions(Globals& globals, Strings::iterator& i, + Strings& args, const string& arg) { + if (arg == "--from-expression" || arg == "-E") { + globals.instSource.type = srcNixExprs; + } else if (arg == "--from-profile") { + globals.instSource.type = srcProfile; + globals.instSource.profile = needArg(i, args, arg); + } else if (arg == "--attr" || arg == "-A") { + globals.instSource.type = srcAttrPath; + } else { + return false; + } + return true; +} + +static bool isNixExpr(const Path& path, struct stat& st) { + return S_ISREG(st.st_mode) || + (S_ISDIR(st.st_mode) && pathExists(path + "/default.nix")); +} + +static void getAllExprs(EvalState& state, const Path& path, StringSet& attrs, + Value& v) { + StringSet namesSorted; + for (auto& i : readDirectory(path)) { + namesSorted.insert(i.name); + } + + for (auto& i : namesSorted) { + /* Ignore the manifest.nix used by profiles. This is + necessary to prevent it from showing up in channels (which + are implemented using profiles). */ + if (i == "manifest.nix") { + continue; + } + + Path path2 = path + "/" + i; + + struct stat st; + if (stat(path2.c_str(), &st) == -1) { + continue; // ignore dangling symlinks in ~/.nix-defexpr + } + + if (isNixExpr(path2, st) && + (!S_ISREG(st.st_mode) || hasSuffix(path2, ".nix"))) { + /* Strip off the `.nix' filename suffix (if applicable), + otherwise the attribute cannot be selected with the + `-A' option. Useful if you want to stick a Nix + expression directly in ~/.nix-defexpr. */ + string attrName = i; + if (hasSuffix(attrName, ".nix")) { + attrName = string(attrName, 0, attrName.size() - 4); + } + if (attrs.find(attrName) != attrs.end()) { + LOG(WARNING) << "name collision in input Nix expressions, skipping '" + << path2 << "'"; + continue; + } + attrs.insert(attrName); + /* Load the expression on demand. */ + Value& vFun = state.getBuiltin("import"); + Value& vArg(*state.allocValue()); + mkString(vArg, path2); + mkApp(*state.allocAttr(v, state.symbols.Create(attrName)), vFun, vArg); + } else if (S_ISDIR(st.st_mode)) { + /* `path2' is a directory (with no default.nix in it); + recurse into it. */ + getAllExprs(state, path2, attrs, v); + } + } +} + +static void loadSourceExpr(EvalState& state, const Path& path, Value& v) { + struct stat st; + if (stat(path.c_str(), &st) == -1) { + throw SysError(format("getting information about '%1%'") % path); + } + + if (isNixExpr(path, st)) { + state.evalFile(path, v); + } + + /* The path is a directory. Put the Nix expressions in the + directory in a set, with the file name of each expression as + the attribute name. Recurse into subdirectories (but keep the + set flat, not nested, to make it easier for a user to have a + ~/.nix-defexpr directory that includes some system-wide + directory). */ + else if (S_ISDIR(st.st_mode)) { + state.mkAttrs(v, 1024); + state.mkList(*state.allocAttr(v, state.symbols.Create("_combineChannels")), + 0); + StringSet attrs; + getAllExprs(state, path, attrs, v); + } + + else { + throw Error("path '%s' is not a directory or a Nix expression", path); + } +} + +static void loadDerivations(EvalState& state, const Path& nixExprPath, + const string& systemFilter, Bindings& autoArgs, + const string& pathPrefix, DrvInfos& elems) { + Value vRoot; + loadSourceExpr(state, nixExprPath, vRoot); + + Value& v(*findAlongAttrPath(state, pathPrefix, autoArgs, vRoot)); + + getDerivations(state, v, pathPrefix, autoArgs, elems, true); + + /* Filter out all derivations not applicable to the current + system. */ + for (DrvInfos::iterator i = elems.begin(), j; i != elems.end(); i = j) { + j = i; + j++; + if (systemFilter != "*" && i->querySystem() != systemFilter) { + elems.erase(i); + } + } +} + +static long getPriority(EvalState& state, DrvInfo& drv) { + return drv.queryMetaInt("priority", 0); +} + +static long comparePriorities(EvalState& state, DrvInfo& drv1, DrvInfo& drv2) { + return getPriority(state, drv2) - getPriority(state, drv1); +} + +// FIXME: this function is rather slow since it checks a single path +// at a time. +static bool isPrebuilt(EvalState& state, DrvInfo& elem) { + Path path = elem.queryOutPath(); + if (state.store->isValidPath(path)) { + return true; + } + PathSet ps = state.store->querySubstitutablePaths({path}); + return ps.find(path) != ps.end(); +} + +static void checkSelectorUse(DrvNames& selectors) { + /* Check that all selectors have been used. */ + for (auto& i : selectors) { + if (i.hits == 0 && i.fullName != "*") { + throw Error(format("selector '%1%' matches no derivations") % i.fullName); + } + } +} + +static DrvInfos filterBySelector(EvalState& state, const DrvInfos& allElems, + const Strings& args, bool newestOnly) { + DrvNames selectors = drvNamesFromArgs(args); + if (selectors.empty()) { + selectors.push_back(DrvName("*")); + } + + DrvInfos elems; + set<unsigned int> done; + + for (auto& i : selectors) { + typedef list<std::pair<DrvInfo, unsigned int> > Matches; + Matches matches; + unsigned int n = 0; + for (auto j = allElems.begin(); j != allElems.end(); ++j, ++n) { + DrvName drvName(j->queryName()); + if (i.matches(drvName)) { + i.hits++; + matches.push_back(std::pair<DrvInfo, unsigned int>(*j, n)); + } + } + + /* If `newestOnly', if a selector matches multiple derivations + with the same name, pick the one matching the current + system. If there are still multiple derivations, pick the + one with the highest priority. If there are still multiple + derivations, pick the one with the highest version. + Finally, if there are still multiple derivations, + arbitrarily pick the first one. */ + if (newestOnly) { + /* Map from package names to derivations. */ + typedef map<string, std::pair<DrvInfo, unsigned int> > Newest; + Newest newest; + StringSet multiple; + + for (auto& j : matches) { + DrvName drvName(j.first.queryName()); + long d = 1; + + auto k = newest.find(drvName.name); + + if (k != newest.end()) { + d = j.first.querySystem() == k->second.first.querySystem() + ? 0 + : j.first.querySystem() == settings.thisSystem + ? 1 + : k->second.first.querySystem() == settings.thisSystem + ? -1 + : 0; + if (d == 0) { + d = comparePriorities(state, j.first, k->second.first); + } + if (d == 0) { + d = compareVersions(drvName.version, + DrvName(k->second.first.queryName()).version); + } + } + + if (d > 0) { + newest.erase(drvName.name); + newest.insert(Newest::value_type(drvName.name, j)); + multiple.erase(j.first.queryName()); + } else if (d == 0) { + multiple.insert(j.first.queryName()); + } + } + + matches.clear(); + for (auto& j : newest) { + if (multiple.find(j.second.first.queryName()) != multiple.end()) { + LOG(WARNING) << "warning: there are multiple derivations named '" + << j.second.first.queryName() + << "'; using the first one"; + } + matches.push_back(j.second); + } + } + + /* Insert only those elements in the final list that we + haven't inserted before. */ + for (auto& j : matches) { + if (done.find(j.second) == done.end()) { + done.insert(j.second); + elems.push_back(j.first); + } + } + } + + checkSelectorUse(selectors); + + return elems; +} + +static bool isPath(const string& s) { return s.find('/') != string::npos; } + +static void queryInstSources(EvalState& state, InstallSourceInfo& instSource, + const Strings& args, DrvInfos& elems, + bool newestOnly) { + InstallSourceType type = instSource.type; + if (type == srcUnknown && !args.empty() && isPath(args.front())) { + type = srcStorePaths; + } + + switch (type) { + /* Get the available user environment elements from the + derivations specified in a Nix expression, including only + those with names matching any of the names in `args'. */ + case srcUnknown: + case srcNixExprDrvs: { + /* Load the derivations from the (default or specified) + Nix expression. */ + DrvInfos allElems; + loadDerivations(state, instSource.nixExprPath, instSource.systemFilter, + *instSource.autoArgs, "", allElems); + + elems = filterBySelector(state, allElems, args, newestOnly); + + break; + } + + /* Get the available user environment elements from the Nix + expressions specified on the command line; these should be + functions that take the default Nix expression file as + argument, e.g., if the file is `./foo.nix', then the + argument `x: x.bar' is equivalent to `(x: x.bar) + (import ./foo.nix)' = `(import ./foo.nix).bar'. */ + case srcNixExprs: { + Value vArg; + loadSourceExpr(state, instSource.nixExprPath, vArg); + + for (auto& i : args) { + Expr* eFun = state.parseExprFromString(i, absPath(".")); + Value vFun; + Value vTmp; + state.eval(eFun, vFun); + mkApp(vTmp, vFun, vArg); + getDerivations(state, vTmp, "", *instSource.autoArgs, elems, true); + } + + break; + } + + /* The available user environment elements are specified as a + list of store paths (which may or may not be + derivations). */ + case srcStorePaths: { + for (auto& i : args) { + Path path = state.store->followLinksToStorePath(i); + + string name = baseNameOf(path); + string::size_type dash = name.find('-'); + if (dash != string::npos) { + name = string(name, dash + 1); + } + + DrvInfo elem(state, "", nullptr); + elem.setName(name); + + if (isDerivation(path)) { + elem.setDrvPath(path); + elem.setOutPath( + state.store->derivationFromPath(path).findOutput("out")); + if (name.size() >= drvExtension.size() && + string(name, name.size() - drvExtension.size()) == drvExtension) { + name = string(name, 0, name.size() - drvExtension.size()); + } + } else { + elem.setOutPath(path); + } + + elems.push_back(elem); + } + + break; + } + + /* Get the available user environment elements from another + user environment. These are then filtered as in the + `srcNixExprDrvs' case. */ + case srcProfile: { + elems = filterBySelector(state, queryInstalled(state, instSource.profile), + args, newestOnly); + break; + } + + case srcAttrPath: { + Value vRoot; + loadSourceExpr(state, instSource.nixExprPath, vRoot); + for (auto& i : args) { + Value& v(*findAlongAttrPath(state, i, *instSource.autoArgs, vRoot)); + getDerivations(state, v, "", *instSource.autoArgs, elems, true); + } + break; + } + } +} + +static void printMissing(EvalState& state, DrvInfos& elems) { + PathSet targets; + for (auto& i : elems) { + Path drvPath = i.queryDrvPath(); + if (!drvPath.empty()) { + targets.insert(drvPath); + } else { + targets.insert(i.queryOutPath()); + } + } + + printMissing(state.store, targets); +} + +static bool keep(DrvInfo& drv) { return drv.queryMetaBool("keep", false); } + +static void installDerivations(Globals& globals, const Strings& args, + const Path& profile) { + DLOG(INFO) << "installing derivations"; + + /* Get the set of user environment elements to be installed. */ + DrvInfos newElems; + DrvInfos newElemsTmp; + queryInstSources(*globals.state, globals.instSource, args, newElemsTmp, true); + + /* If --prebuilt-only is given, filter out source-only packages. */ + for (auto& i : newElemsTmp) { + if (!globals.prebuiltOnly || isPrebuilt(*globals.state, i)) { + newElems.push_back(i); + } + } + + StringSet newNames; + for (auto& i : newElems) { + /* `forceName' is a hack to get package names right in some + one-click installs, namely those where the name used in the + path is not the one we want (e.g., `java-front' versus + `java-front-0.9pre15899'). */ + if (!globals.forceName.empty()) { + i.setName(globals.forceName); + } + newNames.insert(DrvName(i.queryName()).name); + } + + while (true) { + string lockToken = optimisticLockProfile(profile); + + DrvInfos allElems(newElems); + + /* Add in the already installed derivations, unless they have + the same name as a to-be-installed element. */ + if (!globals.removeAll) { + DrvInfos installedElems = queryInstalled(*globals.state, profile); + + for (auto& i : installedElems) { + DrvName drvName(i.queryName()); + if (!globals.preserveInstalled && + newNames.find(drvName.name) != newNames.end() && !keep(i)) { + LOG(INFO) << "replacing old '" << i.queryName() << "'"; + } else { + allElems.push_back(i); + } + } + + for (auto& i : newElems) { + LOG(INFO) << "installing " << i.queryName(); + } + } + + printMissing(*globals.state, newElems); + + if (globals.dryRun) { + return; + } + + if (createUserEnv(*globals.state, allElems, profile, + settings.envKeepDerivations, lockToken)) { + break; + } + } +} + +static void opInstall(Globals& globals, Strings opFlags, Strings opArgs) { + for (auto i = opFlags.begin(); i != opFlags.end();) { + string arg = *i++; + if (parseInstallSourceOptions(globals, i, opFlags, arg)) { + ; + } else if (arg == "--preserve-installed" || arg == "-P") { + globals.preserveInstalled = true; + } else if (arg == "--remove-all" || arg == "-r") { + globals.removeAll = true; + } else { + throw UsageError(format("unknown flag '%1%'") % arg); + } + } + + installDerivations(globals, opArgs, globals.profile); +} + +typedef enum { utLt, utLeq, utEq, utAlways } UpgradeType; + +static void upgradeDerivations(Globals& globals, const Strings& args, + UpgradeType upgradeType) { + DLOG(INFO) << "upgrading derivations"; + + /* Upgrade works as follows: we take all currently installed + derivations, and for any derivation matching any selector, look + for a derivation in the input Nix expression that has the same + name and a higher version number. */ + + while (true) { + string lockToken = optimisticLockProfile(globals.profile); + + DrvInfos installedElems = queryInstalled(*globals.state, globals.profile); + + /* Fetch all derivations from the input file. */ + DrvInfos availElems; + queryInstSources(*globals.state, globals.instSource, args, availElems, + false); + + /* Go through all installed derivations. */ + DrvInfos newElems; + for (auto& i : installedElems) { + DrvName drvName(i.queryName()); + + try { + if (keep(i)) { + newElems.push_back(i); + continue; + } + + /* Find the derivation in the input Nix expression + with the same name that satisfies the version + constraints specified by upgradeType. If there are + multiple matches, take the one with the highest + priority. If there are still multiple matches, + take the one with the highest version. + Do not upgrade if it would decrease the priority. */ + auto bestElem = availElems.end(); + string bestVersion; + for (auto j = availElems.begin(); j != availElems.end(); ++j) { + if (comparePriorities(*globals.state, i, *j) > 0) { + continue; + } + DrvName newName(j->queryName()); + if (newName.name == drvName.name) { + int d = compareVersions(drvName.version, newName.version); + if ((upgradeType == utLt && d < 0) || + (upgradeType == utLeq && d <= 0) || + (upgradeType == utEq && d == 0) || upgradeType == utAlways) { + long d2 = -1; + if (bestElem != availElems.end()) { + d2 = comparePriorities(*globals.state, *bestElem, *j); + if (d2 == 0) { + d2 = compareVersions(bestVersion, newName.version); + } + } + if (d2 < 0 && + (!globals.prebuiltOnly || isPrebuilt(*globals.state, *j))) { + bestElem = j; + bestVersion = newName.version; + } + } + } + } + + if (bestElem != availElems.end() && + i.queryOutPath() != bestElem->queryOutPath()) { + const char* action = + compareVersions(drvName.version, bestVersion) <= 0 + ? "upgrading" + : "downgrading"; + LOG(INFO) << action << " '" << i.queryName() << "' to '" + << bestElem->queryName() << "'"; + newElems.push_back(*bestElem); + } else { + newElems.push_back(i); + } + + } catch (Error& e) { + e.addPrefix( + fmt("while trying to find an upgrade for '%s':\n", i.queryName())); + throw; + } + } + + printMissing(*globals.state, newElems); + + if (globals.dryRun) { + return; + } + + if (createUserEnv(*globals.state, newElems, globals.profile, + settings.envKeepDerivations, lockToken)) { + break; + } + } +} + +static void opUpgrade(Globals& globals, Strings opFlags, Strings opArgs) { + UpgradeType upgradeType = utLt; + for (auto i = opFlags.begin(); i != opFlags.end();) { + string arg = *i++; + if (parseInstallSourceOptions(globals, i, opFlags, arg)) { + ; + } else if (arg == "--lt") { + upgradeType = utLt; + } else if (arg == "--leq") { + upgradeType = utLeq; + } else if (arg == "--eq") { + upgradeType = utEq; + } else if (arg == "--always") { + upgradeType = utAlways; + } else { + throw UsageError(format("unknown flag '%1%'") % arg); + } + } + + upgradeDerivations(globals, opArgs, upgradeType); +} + +static void setMetaFlag(EvalState& state, DrvInfo& drv, const string& name, + const string& value) { + Value* v = state.allocValue(); + mkString(*v, value.c_str()); + drv.setMeta(name, v); +} + +static void opSetFlag(Globals& globals, Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError(format("unknown flag '%1%'") % opFlags.front()); + } + if (opArgs.size() < 2) { + throw UsageError("not enough arguments to '--set-flag'"); + } + + auto arg = opArgs.begin(); + string flagName = *arg++; + string flagValue = *arg++; + DrvNames selectors = drvNamesFromArgs(Strings(arg, opArgs.end())); + + while (true) { + string lockToken = optimisticLockProfile(globals.profile); + + DrvInfos installedElems = queryInstalled(*globals.state, globals.profile); + + /* Update all matching derivations. */ + for (auto& i : installedElems) { + DrvName drvName(i.queryName()); + for (auto& j : selectors) { + if (j.matches(drvName)) { + LOG(INFO) << "setting flag on '" << i.queryName() << "'"; + j.hits++; + setMetaFlag(*globals.state, i, flagName, flagValue); + break; + } + } + } + + checkSelectorUse(selectors); + + /* Write the new user environment. */ + if (createUserEnv(*globals.state, installedElems, globals.profile, + settings.envKeepDerivations, lockToken)) { + break; + } + } +} + +static void opSet(Globals& globals, Strings opFlags, Strings opArgs) { + auto store2 = globals.state->store.dynamic_pointer_cast<LocalFSStore>(); + if (!store2) { + throw Error("--set is not supported for this Nix store"); + } + + for (auto i = opFlags.begin(); i != opFlags.end();) { + string arg = *i++; + if (parseInstallSourceOptions(globals, i, opFlags, arg)) { + ; + } else { + throw UsageError(format("unknown flag '%1%'") % arg); + } + } + + DrvInfos elems; + queryInstSources(*globals.state, globals.instSource, opArgs, elems, true); + + if (elems.size() != 1) { + throw Error("--set requires exactly one derivation"); + } + + DrvInfo& drv(elems.front()); + + if (!globals.forceName.empty()) { + drv.setName(globals.forceName); + } + + if (!drv.queryDrvPath().empty()) { + PathSet paths = {drv.queryDrvPath()}; + printMissing(globals.state->store, paths); + if (globals.dryRun) { + return; + } + globals.state->store->buildPaths( + paths, globals.state->repair != 0u ? bmRepair : bmNormal); + } else { + printMissing(globals.state->store, {drv.queryOutPath()}); + if (globals.dryRun) { + return; + } + globals.state->store->ensurePath(drv.queryOutPath()); + } + + DLOG(INFO) << "switching to new user environment"; + Path generation = createGeneration(ref<LocalFSStore>(store2), globals.profile, + drv.queryOutPath()); + switchLink(globals.profile, generation); +} + +static void uninstallDerivations(Globals& globals, Strings& selectors, + Path& profile) { + while (true) { + string lockToken = optimisticLockProfile(profile); + + DrvInfos installedElems = queryInstalled(*globals.state, profile); + DrvInfos newElems; + + for (auto& i : installedElems) { + DrvName drvName(i.queryName()); + bool found = false; + for (auto& j : selectors) { + /* !!! the repeated calls to followLinksToStorePath() + are expensive, should pre-compute them. */ + if ((isPath(j) && + i.queryOutPath() == + globals.state->store->followLinksToStorePath(j)) || + DrvName(j).matches(drvName)) { + LOG(INFO) << "uninstalling '" << i.queryName() << "'"; + found = true; + break; + } + } + if (!found) { + newElems.push_back(i); + } + } + + if (globals.dryRun) { + return; + } + + if (createUserEnv(*globals.state, newElems, profile, + settings.envKeepDerivations, lockToken)) { + break; + } + } +} + +static void opUninstall(Globals& globals, Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError(format("unknown flag '%1%'") % opFlags.front()); + } + uninstallDerivations(globals, opArgs, globals.profile); +} + +static bool cmpChars(char a, char b) { return toupper(a) < toupper(b); } + +static bool cmpElemByName(const DrvInfo& a, const DrvInfo& b) { + auto a_name = a.queryName(); + auto b_name = b.queryName(); + return lexicographical_compare(a_name.begin(), a_name.end(), b_name.begin(), + b_name.end(), cmpChars); +} + +using Table = list<Strings>; + +void printTable(Table& table) { + auto nrColumns = !table.empty() ? table.front().size() : 0; + + vector<size_t> widths; + widths.resize(nrColumns); + + for (auto& i : table) { + assert(i.size() == nrColumns); + Strings::iterator j; + size_t column; + for (j = i.begin(), column = 0; j != i.end(); ++j, ++column) { + if (j->size() > widths[column]) { + widths[column] = j->size(); + } + } + } + + for (auto& i : table) { + Strings::iterator j; + size_t column; + for (j = i.begin(), column = 0; j != i.end(); ++j, ++column) { + string s = *j; + replace(s.begin(), s.end(), '\n', ' '); + cout << s; + if (column < nrColumns - 1) { + cout << string(widths[column] - s.size() + 2, ' '); + } + } + cout << std::endl; + } +} + +/* This function compares the version of an element against the + versions in the given set of elements. `cvLess' means that only + lower versions are in the set, `cvEqual' means that at most an + equal version is in the set, and `cvGreater' means that there is at + least one element with a higher version in the set. `cvUnavail' + means that there are no elements with the same name in the set. */ + +typedef enum { cvLess, cvEqual, cvGreater, cvUnavail } VersionDiff; + +static VersionDiff compareVersionAgainstSet(const DrvInfo& elem, + const DrvInfos& elems, + string& version) { + DrvName name(elem.queryName()); + + VersionDiff diff = cvUnavail; + version = "?"; + + for (auto& i : elems) { + DrvName name2(i.queryName()); + if (name.name == name2.name) { + int d = compareVersions(name.version, name2.version); + if (d < 0) { + diff = cvGreater; + version = name2.version; + } else if (diff != cvGreater && d == 0) { + diff = cvEqual; + version = name2.version; + } else if (diff != cvGreater && diff != cvEqual && d > 0) { + diff = cvLess; + if (version.empty() || compareVersions(version, name2.version) < 0) { + version = name2.version; + } + } + } + } + + return diff; +} + +static void queryJSON(Globals& globals, vector<DrvInfo>& elems) { + JSONObject topObj(cout, true); + for (auto& i : elems) { + JSONObject pkgObj = topObj.object(i.attrPath); + + auto drvName = DrvName(i.queryName()); + pkgObj.attr("name", drvName.fullName); + pkgObj.attr("pname", drvName.name); + pkgObj.attr("version", drvName.version); + pkgObj.attr("system", i.querySystem()); + + JSONObject metaObj = pkgObj.object("meta"); + StringSet metaNames = i.queryMetaNames(); + for (auto& j : metaNames) { + auto placeholder = metaObj.placeholder(j); + Value* v = i.queryMeta(j); + if (v == nullptr) { + LOG(ERROR) << "derivation '" << i.queryName() + << "' has invalid meta attribute '" << j << "'"; + placeholder.write(nullptr); + } else { + PathSet context; + printValueAsJSON(*globals.state, true, *v, placeholder, context); + } + } + } +} + +static void opQuery(Globals& globals, Strings opFlags, Strings opArgs) { + Strings remaining; + string attrPath; + + bool printStatus = false; + bool printName = true; + bool printAttrPath = false; + bool printSystem = false; + bool printDrvPath = false; + bool printOutPath = false; + bool printDescription = false; + bool printMeta = false; + bool compareVersions = false; + bool xmlOutput = false; + bool jsonOutput = false; + + enum { sInstalled, sAvailable } source = sInstalled; + + settings.readOnlyMode = true; /* makes evaluation a bit faster */ + + for (auto i = opFlags.begin(); i != opFlags.end();) { + string arg = *i++; + if (arg == "--status" || arg == "-s") { + printStatus = true; + } else if (arg == "--no-name") { + printName = false; + } else if (arg == "--system") { + printSystem = true; + } else if (arg == "--description") { + printDescription = true; + } else if (arg == "--compare-versions" || arg == "-c") { + compareVersions = true; + } else if (arg == "--drv-path") { + printDrvPath = true; + } else if (arg == "--out-path") { + printOutPath = true; + } else if (arg == "--meta") { + printMeta = true; + } else if (arg == "--installed") { + source = sInstalled; + } else if (arg == "--available" || arg == "-a") { + source = sAvailable; + } else if (arg == "--xml") { + xmlOutput = true; + } else if (arg == "--json") { + jsonOutput = true; + } else if (arg == "--attr-path" || arg == "-P") { + printAttrPath = true; + } else if (arg == "--attr" || arg == "-A") { + attrPath = needArg(i, opFlags, arg); + } else { + throw UsageError(format("unknown flag '%1%'") % arg); + } + } + + /* Obtain derivation information from the specified source. */ + DrvInfos availElems; + DrvInfos installedElems; + + if (source == sInstalled || compareVersions || printStatus) { + installedElems = queryInstalled(*globals.state, globals.profile); + } + + if (source == sAvailable || compareVersions) { + loadDerivations(*globals.state, globals.instSource.nixExprPath, + globals.instSource.systemFilter, + *globals.instSource.autoArgs, attrPath, availElems); + } + + DrvInfos elems_ = filterBySelector( + *globals.state, source == sInstalled ? installedElems : availElems, + opArgs, false); + + DrvInfos& otherElems(source == sInstalled ? availElems : installedElems); + + /* Sort them by name. */ + /* !!! */ + vector<DrvInfo> elems; + for (auto& i : elems_) { + elems.push_back(i); + } + sort(elems.begin(), elems.end(), cmpElemByName); + + /* We only need to know the installed paths when we are querying + the status of the derivation. */ + PathSet installed; /* installed paths */ + + if (printStatus) { + for (auto& i : installedElems) { + installed.insert(i.queryOutPath()); + } + } + + /* Query which paths have substitutes. */ + PathSet validPaths; + PathSet substitutablePaths; + if (printStatus || globals.prebuiltOnly) { + PathSet paths; + for (auto& i : elems) { + try { + paths.insert(i.queryOutPath()); + } catch (AssertionError& e) { + DLOG(WARNING) << "skipping derivation named '" << i.queryName() + << "' which gives an assertion failure"; + i.setFailed(); + } + } + validPaths = globals.state->store->queryValidPaths(paths); + substitutablePaths = globals.state->store->querySubstitutablePaths(paths); + } + + /* Print the desired columns, or XML output. */ + if (jsonOutput) { + queryJSON(globals, elems); + return; + } + + bool tty = isatty(STDOUT_FILENO) != 0; + RunPager pager; + + Table table; + std::ostringstream dummy; + XMLWriter xml(true, *(xmlOutput ? &cout : &dummy)); + XMLOpenElement xmlRoot(xml, "items"); + + for (auto& i : elems) { + try { + if (i.hasFailed()) { + continue; + } + + // Activity act(*logger, lvlDebug, format("outputting query result '%1%'") + // % i.attrPath); + + if (globals.prebuiltOnly && + validPaths.find(i.queryOutPath()) == validPaths.end() && + substitutablePaths.find(i.queryOutPath()) == + substitutablePaths.end()) { + continue; + } + + /* For table output. */ + Strings columns; + + /* For XML output. */ + XMLAttrs attrs; + + if (printStatus) { + Path outPath = i.queryOutPath(); + bool hasSubs = + substitutablePaths.find(outPath) != substitutablePaths.end(); + bool isInstalled = installed.find(outPath) != installed.end(); + bool isValid = validPaths.find(outPath) != validPaths.end(); + if (xmlOutput) { + attrs["installed"] = isInstalled ? "1" : "0"; + attrs["valid"] = isValid ? "1" : "0"; + attrs["substitutable"] = hasSubs ? "1" : "0"; + } else { + columns.push_back((string)(isInstalled ? "I" : "-") + + (isValid ? "P" : "-") + (hasSubs ? "S" : "-")); + } + } + + if (xmlOutput) { + attrs["attrPath"] = i.attrPath; + } else if (printAttrPath) { + columns.push_back(i.attrPath); + } + + if (xmlOutput) { + auto drvName = DrvName(i.queryName()); + attrs["name"] = drvName.fullName; + attrs["pname"] = drvName.name; + attrs["version"] = drvName.version; + } else if (printName) { + columns.push_back(i.queryName()); + } + + if (compareVersions) { + /* Compare this element against the versions of the + same named packages in either the set of available + elements, or the set of installed elements. !!! + This is O(N * M), should be O(N * lg M). */ + string version; + VersionDiff diff = compareVersionAgainstSet(i, otherElems, version); + + char ch; + switch (diff) { + case cvLess: + ch = '>'; + break; + case cvEqual: + ch = '='; + break; + case cvGreater: + ch = '<'; + break; + case cvUnavail: + ch = '-'; + break; + default: + abort(); + } + + if (xmlOutput) { + if (diff != cvUnavail) { + attrs["versionDiff"] = ch; + attrs["maxComparedVersion"] = version; + } + } else { + string column = (string) "" + ch + " " + version; + if (diff == cvGreater && tty) { + column = ANSI_RED + column + ANSI_NORMAL; + } + columns.push_back(column); + } + } + + if (xmlOutput) { + if (!i.querySystem().empty()) { + attrs["system"] = i.querySystem(); + } + } else if (printSystem) { + columns.push_back(i.querySystem()); + } + + if (printDrvPath) { + string drvPath = i.queryDrvPath(); + if (xmlOutput) { + if (!drvPath.empty()) { + attrs["drvPath"] = drvPath; + } + } else { + columns.push_back(drvPath.empty() ? "-" : drvPath); + } + } + + if (printOutPath && !xmlOutput) { + DrvInfo::Outputs outputs = i.queryOutputs(); + string s; + for (auto& j : outputs) { + if (!s.empty()) { + s += ';'; + } + if (j.first != "out") { + s += j.first; + s += "="; + } + s += j.second; + } + columns.push_back(s); + } + + if (printDescription) { + string descr = i.queryMetaString("description"); + if (xmlOutput) { + if (!descr.empty()) { + attrs["description"] = descr; + } + } else { + columns.push_back(descr); + } + } + + if (xmlOutput) { + if (printOutPath || printMeta) { + XMLOpenElement item(xml, "item", attrs); + if (printOutPath) { + DrvInfo::Outputs outputs = i.queryOutputs(); + for (auto& j : outputs) { + XMLAttrs attrs2; + attrs2["name"] = j.first; + attrs2["path"] = j.second; + xml.writeEmptyElement("output", attrs2); + } + } + if (printMeta) { + StringSet metaNames = i.queryMetaNames(); + for (auto& j : metaNames) { + XMLAttrs attrs2; + attrs2["name"] = j; + Value* v = i.queryMeta(j); + if (v == nullptr) { + LOG(ERROR) << "derivation '" << i.queryName() + << "' has invalid meta attribute '" << j << "'"; + } else { + if (v->type == tString) { + attrs2["type"] = "string"; + attrs2["value"] = v->string.s; + xml.writeEmptyElement("meta", attrs2); + } else if (v->type == tInt) { + attrs2["type"] = "int"; + attrs2["value"] = (format("%1%") % v->integer).str(); + xml.writeEmptyElement("meta", attrs2); + } else if (v->type == tFloat) { + attrs2["type"] = "float"; + attrs2["value"] = (format("%1%") % v->fpoint).str(); + xml.writeEmptyElement("meta", attrs2); + } else if (v->type == tBool) { + attrs2["type"] = "bool"; + attrs2["value"] = v->boolean ? "true" : "false"; + xml.writeEmptyElement("meta", attrs2); + } else if (v->isList()) { + attrs2["type"] = "strings"; + XMLOpenElement m(xml, "meta", attrs2); + for (unsigned int j = 0; j < v->listSize(); ++j) { + if (v->listElems()[j]->type != tString) { + continue; + } + XMLAttrs attrs3; + attrs3["value"] = v->listElems()[j]->string.s; + xml.writeEmptyElement("string", attrs3); + } + } else if (v->type == tAttrs) { + attrs2["type"] = "strings"; + XMLOpenElement m(xml, "meta", attrs2); + Bindings& attrs = *v->attrs; + for (auto& [name, a] : attrs) { + if (a.value->type != tString) { + continue; + } + XMLAttrs attrs3; + attrs3["type"] = name; + attrs3["value"] = a.value->string.s; + xml.writeEmptyElement("string", attrs3); + } + } + } + } + } + } else { + xml.writeEmptyElement("item", attrs); + } + } else { + table.push_back(columns); + } + + cout.flush(); + + } catch (AssertionError& e) { + DLOG(WARNING) << "skipping derivation named '" << i.queryName() + << "' which gives an assertion failure"; + } catch (Error& e) { + e.addPrefix( + fmt("while querying the derivation named '%1%':\n", i.queryName())); + throw; + } + } + + if (!xmlOutput) { + printTable(table); + } +} + +static void opSwitchProfile(Globals& globals, Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError(format("unknown flag '%1%'") % opFlags.front()); + } + if (opArgs.size() != 1) { + throw UsageError(format("exactly one argument expected")); + } + + Path profile = absPath(opArgs.front()); + Path profileLink = getHome() + "/.nix-profile"; + + switchLink(profileLink, profile); +} + +static const int prevGen = -2; + +static void switchGeneration(Globals& globals, int dstGen) { + PathLocks lock; + lockProfile(lock, globals.profile); + + int curGen; + Generations gens = findGenerations(globals.profile, curGen); + + Generation dst; + for (auto& i : gens) { + if ((dstGen == prevGen && i.number < curGen) || + (dstGen >= 0 && i.number == dstGen)) { + dst = i; + } + } + + if (!dst) { + if (dstGen == prevGen) { + throw Error(format("no generation older than the current (%1%) exists") % + curGen); + } + throw Error(format("generation %1% does not exist") % dstGen); + } + + LOG(INFO) << "switching from generation " << curGen << " to " << dst.number; + + if (globals.dryRun) { + return; + } + + switchLink(globals.profile, dst.path); +} + +static void opSwitchGeneration(Globals& globals, Strings opFlags, + Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError(format("unknown flag '%1%'") % opFlags.front()); + } + if (opArgs.size() != 1) { + throw UsageError(format("exactly one argument expected")); + } + + int dstGen; + if (!string2Int(opArgs.front(), dstGen)) { + throw UsageError(format("expected a generation number")); + } + + switchGeneration(globals, dstGen); +} + +static void opRollback(Globals& globals, Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError(format("unknown flag '%1%'") % opFlags.front()); + } + if (!opArgs.empty()) { + throw UsageError(format("no arguments expected")); + } + + switchGeneration(globals, prevGen); +} + +static void opListGenerations(Globals& globals, Strings opFlags, + Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError(format("unknown flag '%1%'") % opFlags.front()); + } + if (!opArgs.empty()) { + throw UsageError(format("no arguments expected")); + } + + PathLocks lock; + lockProfile(lock, globals.profile); + + int curGen; + Generations gens = findGenerations(globals.profile, curGen); + + RunPager pager; + + for (auto& i : gens) { + tm t; + if (localtime_r(&i.creationTime, &t) == nullptr) { + throw Error("cannot convert time"); + } + cout << format("%|4| %|4|-%|02|-%|02| %|02|:%|02|:%|02| %||\n") % + i.number % (t.tm_year + 1900) % (t.tm_mon + 1) % t.tm_mday % + t.tm_hour % t.tm_min % t.tm_sec % + (i.number == curGen ? "(current)" : ""); + } +} + +static void opDeleteGenerations(Globals& globals, Strings opFlags, + Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError(format("unknown flag '%1%'") % opFlags.front()); + } + + if (opArgs.size() == 1 && opArgs.front() == "old") { + deleteOldGenerations(globals.profile, globals.dryRun); + } else if (opArgs.size() == 1 && opArgs.front().find('d') != string::npos) { + deleteGenerationsOlderThan(globals.profile, opArgs.front(), globals.dryRun); + } else if (opArgs.size() == 1 && opArgs.front().find('+') != string::npos) { + if (opArgs.front().size() < 2) { + throw Error(format("invalid number of generations โ%1%โ") % + opArgs.front()); + } + string str_max = string(opArgs.front(), 1, opArgs.front().size()); + int max; + if (!string2Int(str_max, max) || max == 0) { + throw Error(format("invalid number of generations to keep โ%1%โ") % + opArgs.front()); + } + deleteGenerationsGreaterThan(globals.profile, max, globals.dryRun); + } else { + std::set<unsigned int> gens; + for (auto& i : opArgs) { + unsigned int n; + if (!string2Int(i, n)) { + throw UsageError(format("invalid generation number '%1%'") % i); + } + gens.insert(n); + } + deleteGenerations(globals.profile, gens, globals.dryRun); + } +} + +static void opVersion(Globals& globals, Strings opFlags, Strings opArgs) { + printVersion("nix-env"); +} + +static int _main(int argc, char** argv) { + { + Strings opFlags; + Strings opArgs; + Operation op = nullptr; + RepairFlag repair = NoRepair; + string file; + + Globals globals; + + globals.instSource.type = srcUnknown; + globals.instSource.nixExprPath = getHome() + "/.nix-defexpr"; + globals.instSource.systemFilter = "*"; + + if (!pathExists(globals.instSource.nixExprPath)) { + try { + createDirs(globals.instSource.nixExprPath); + replaceSymlink(fmt("%s/profiles/per-user/%s/channels", + settings.nixStateDir, getUserName()), + globals.instSource.nixExprPath + "/channels"); + if (getuid() != 0) { + replaceSymlink( + fmt("%s/profiles/per-user/root/channels", settings.nixStateDir), + globals.instSource.nixExprPath + "/channels_root"); + } + } catch (Error&) { + } + } + + globals.dryRun = false; + globals.preserveInstalled = false; + globals.removeAll = false; + globals.prebuiltOnly = false; + + struct MyArgs : LegacyArgs, MixEvalArgs { + using LegacyArgs::LegacyArgs; + }; + + MyArgs myArgs(baseNameOf(argv[0]), [&](Strings::iterator& arg, + const Strings::iterator& end) { + Operation oldOp = op; + + if (*arg == "--help") { + showManPage("nix-env"); + } else if (*arg == "--version") { + op = opVersion; + } else if (*arg == "--install" || *arg == "-i") { + op = opInstall; + } else if (*arg == + "--force-name") { // undocumented flag for nix-install-package + globals.forceName = getArg(*arg, arg, end); + } else if (*arg == "--uninstall" || *arg == "-e") { + op = opUninstall; + } else if (*arg == "--upgrade" || *arg == "-u") { + op = opUpgrade; + } else if (*arg == "--set-flag") { + op = opSetFlag; + } else if (*arg == "--set") { + op = opSet; + } else if (*arg == "--query" || *arg == "-q") { + op = opQuery; + } else if (*arg == "--profile" || *arg == "-p") { + globals.profile = absPath(getArg(*arg, arg, end)); + } else if (*arg == "--file" || *arg == "-f") { + file = getArg(*arg, arg, end); + } else if (*arg == "--switch-profile" || *arg == "-S") { + op = opSwitchProfile; + } else if (*arg == "--switch-generation" || *arg == "-G") { + op = opSwitchGeneration; + } else if (*arg == "--rollback") { + op = opRollback; + } else if (*arg == "--list-generations") { + op = opListGenerations; + } else if (*arg == "--delete-generations") { + op = opDeleteGenerations; + } else if (*arg == "--dry-run") { + LOG(INFO) << "(dry run; not doing anything)"; + globals.dryRun = true; + } else if (*arg == "--system-filter") { + globals.instSource.systemFilter = getArg(*arg, arg, end); + } else if (*arg == "--prebuilt-only" || *arg == "-b") { + globals.prebuiltOnly = true; + } else if (*arg == "--repair") { + repair = Repair; + } else if (*arg != "" && arg->at(0) == '-') { + opFlags.push_back(*arg); + /* FIXME: hacky */ + if (*arg == "--from-profile" || + (op == opQuery && (*arg == "--attr" || *arg == "-A"))) { + opFlags.push_back(getArg(*arg, arg, end)); + } + } else { + opArgs.push_back(*arg); + } + + if ((oldOp != nullptr) && oldOp != op) { + throw UsageError("only one operation may be specified"); + } + + return true; + }); + + myArgs.parseCmdline(argvToStrings(argc, argv)); + + initPlugins(); + + if (op == nullptr) { + throw UsageError("no operation specified"); + } + + auto store = openStore(); + + globals.state = + std::shared_ptr<EvalState>(new EvalState(myArgs.searchPath, store)); + globals.state->repair = repair; + + if (!file.empty()) { + globals.instSource.nixExprPath = lookupFileArg(*globals.state, file); + } + + globals.instSource.autoArgs = myArgs.getAutoArgs(*globals.state); + + if (globals.profile.empty()) { + globals.profile = getEnv("NIX_PROFILE", ""); + } + + if (globals.profile.empty()) { + Path profileLink = getHome() + "/.nix-profile"; + try { + if (!pathExists(profileLink)) { + replaceSymlink(getuid() == 0 + ? settings.nixStateDir + "/profiles/default" + : fmt("%s/profiles/per-user/%s/profile", + settings.nixStateDir, getUserName()), + profileLink); + } + globals.profile = absPath(readLink(profileLink), dirOf(profileLink)); + } catch (Error&) { + globals.profile = profileLink; + } + } + + op(globals, opFlags, opArgs); + + globals.state->printStats(); + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-env", _main); diff --git a/third_party/nix/src/nix-env/user-env.cc b/third_party/nix/src/nix-env/user-env.cc new file mode 100644 index 000000000000..2ca56ed96620 --- /dev/null +++ b/third_party/nix/src/nix-env/user-env.cc @@ -0,0 +1,166 @@ +#include "user-env.hh" + +#include <glog/logging.h> + +#include "derivations.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "globals.hh" +#include "profiles.hh" +#include "shared.hh" +#include "store-api.hh" +#include "util.hh" + +namespace nix { + +DrvInfos queryInstalled(EvalState& state, const Path& userEnv) { + DrvInfos elems; + Path manifestFile = userEnv + "/manifest.nix"; + if (pathExists(manifestFile)) { + Value v; + state.evalFile(manifestFile, v); + Bindings& bindings(*Bindings::NewGC()); + getDerivations(state, v, "", bindings, elems, false); + } + return elems; +} + +bool createUserEnv(EvalState& state, DrvInfos& elems, const Path& profile, + bool keepDerivations, const string& lockToken) { + /* Build the components in the user environment, if they don't + exist already. */ + PathSet drvsToBuild; + for (auto& i : elems) { + if (!i.queryDrvPath().empty()) { + drvsToBuild.insert(i.queryDrvPath()); + } + } + + DLOG(INFO) << "building user environment dependencies"; + state.store->buildPaths(drvsToBuild, + state.repair != 0u ? bmRepair : bmNormal); + + /* Construct the whole top level derivation. */ + PathSet references; + Value manifest; + state.mkList(manifest, elems.size()); + unsigned int n = 0; + for (auto& i : elems) { + /* Create a pseudo-derivation containing the name, system, + output paths, and optionally the derivation path, as well + as the meta attributes. */ + Path drvPath = keepDerivations ? i.queryDrvPath() : ""; + + Value& v(*state.allocValue()); + manifest.listElems()[n++] = &v; + state.mkAttrs(v, 16); + + mkString(*state.allocAttr(v, state.sType), "derivation"); + mkString(*state.allocAttr(v, state.sName), i.queryName()); + auto system = i.querySystem(); + if (!system.empty()) { + mkString(*state.allocAttr(v, state.sSystem), system); + } + mkString(*state.allocAttr(v, state.sOutPath), i.queryOutPath()); + if (!drvPath.empty()) { + mkString(*state.allocAttr(v, state.sDrvPath), i.queryDrvPath()); + } + + // Copy each output meant for installation. + DrvInfo::Outputs outputs = i.queryOutputs(true); + Value& vOutputs = *state.allocAttr(v, state.sOutputs); + state.mkList(vOutputs, outputs.size()); + unsigned int m = 0; + for (auto& j : outputs) { + mkString(*(vOutputs.listElems()[m++] = state.allocValue()), j.first); + Value& vOutputs = *state.allocAttr(v, state.symbols.Create(j.first)); + state.mkAttrs(vOutputs, 2); + mkString(*state.allocAttr(vOutputs, state.sOutPath), j.second); + + /* This is only necessary when installing store paths, e.g., + `nix-env -i /nix/store/abcd...-foo'. */ + state.store->addTempRoot(j.second); + state.store->ensurePath(j.second); + + references.insert(j.second); + } + + // Copy the meta attributes. + Value& vMeta = *state.allocAttr(v, state.sMeta); + state.mkAttrs(vMeta, 16); + StringSet metaNames = i.queryMetaNames(); + for (auto& j : metaNames) { + Value* v = i.queryMeta(j); + if (v == nullptr) { + continue; + } + vMeta.attrs->push_back(Attr(state.symbols.Create(j), v)); + } + + if (!drvPath.empty()) { + references.insert(drvPath); + } + } + + /* Also write a copy of the list of user environment elements to + the store; we need it for future modifications of the + environment. */ + Path manifestFile = state.store->addTextToStore( + "env-manifest.nix", (format("%1%") % manifest).str(), references); + + /* Get the environment builder expression. */ + Value envBuilder; + state.evalFile(state.findFile("nix/buildenv.nix"), envBuilder); + + /* Construct a Nix expression that calls the user environment + builder with the manifest as argument. */ + Value args; + Value topLevel; + state.mkAttrs(args, 3); + mkString(*state.allocAttr(args, state.symbols.Create("manifest")), + manifestFile, {manifestFile}); + args.attrs->push_back(Attr(state.symbols.Create("derivations"), &manifest)); + mkApp(topLevel, envBuilder, args); + + /* Evaluate it. */ + DLOG(INFO) << "evaluating user environment builder"; + state.forceValue(topLevel); + PathSet context; + Attr& aDrvPath(topLevel.attrs->find(state.sDrvPath)->second); + Path topLevelDrv = + state.coerceToPath(aDrvPath.pos != nullptr ? *(aDrvPath.pos) : noPos, + *(aDrvPath.value), context); + Attr& aOutPath(topLevel.attrs->find(state.sOutPath)->second); + Path topLevelOut = + state.coerceToPath(aOutPath.pos != nullptr ? *(aOutPath.pos) : noPos, + *(aOutPath.value), context); + + /* Realise the resulting store expression. */ + DLOG(INFO) << "building user environment"; + state.store->buildPaths({topLevelDrv}, + state.repair != 0u ? bmRepair : bmNormal); + + /* Switch the current user environment to the output path. */ + auto store2 = state.store.dynamic_pointer_cast<LocalFSStore>(); + + if (store2) { + PathLocks lock; + lockProfile(lock, profile); + + Path lockTokenCur = optimisticLockProfile(profile); + if (lockToken != lockTokenCur) { + LOG(WARNING) << "profile '" << profile + << "' changed while we were busy; restarting"; + return false; + } + + DLOG(INFO) << "switching to new user environment"; + Path generation = + createGeneration(ref<LocalFSStore>(store2), profile, topLevelOut); + switchLink(profile, generation); + } + + return true; +} + +} // namespace nix diff --git a/third_party/nix/src/nix-env/user-env.hh b/third_party/nix/src/nix-env/user-env.hh new file mode 100644 index 000000000000..6111b21c353d --- /dev/null +++ b/third_party/nix/src/nix-env/user-env.hh @@ -0,0 +1,12 @@ +#pragma once + +#include "get-drvs.hh" + +namespace nix { + +DrvInfos queryInstalled(EvalState& state, const Path& userEnv); + +bool createUserEnv(EvalState& state, DrvInfos& elems, const Path& profile, + bool keepDerivations, const string& lockToken); + +} // namespace nix diff --git a/third_party/nix/src/nix-instantiate/nix-instantiate.cc b/third_party/nix/src/nix-instantiate/nix-instantiate.cc new file mode 100644 index 000000000000..95e5341ade4a --- /dev/null +++ b/third_party/nix/src/nix-instantiate/nix-instantiate.cc @@ -0,0 +1,207 @@ +#include <iostream> +#include <map> + +#include "attr-path.hh" +#include "common-eval-args.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "get-drvs.hh" +#include "globals.hh" +#include "legacy.hh" +#include "shared.hh" +#include "store-api.hh" +#include "util.hh" +#include "value-to-json.hh" +#include "value-to-xml.hh" + +using namespace nix; + +static Path gcRoot; +static int rootNr = 0; +static bool indirectRoot = false; + +enum OutputKind { okPlain, okXML, okJSON }; + +void processExpr(EvalState& state, const Strings& attrPaths, bool parseOnly, + bool strict, Bindings& autoArgs, bool evalOnly, + OutputKind output, bool location, Expr* e) { + if (parseOnly) { + std::cout << format("%1%\n") % *e; + return; + } + + Value vRoot; + state.eval(e, vRoot); + + for (auto& i : attrPaths) { + Value& v(*findAlongAttrPath(state, i, autoArgs, vRoot)); + state.forceValue(v); + + PathSet context; + if (evalOnly) { + Value vRes; + if (autoArgs.empty()) { + vRes = v; + } else { + state.autoCallFunction(autoArgs, v, vRes); + } + if (output == okXML) { + printValueAsXML(state, strict, location, vRes, std::cout, context); + } else if (output == okJSON) { + printValueAsJSON(state, strict, vRes, std::cout, context); + } else { + if (strict) { + state.forceValueDeep(vRes); + } + std::cout << vRes << std::endl; + } + } else { + DrvInfos drvs; + getDerivations(state, v, "", autoArgs, drvs, false); + for (auto& i : drvs) { + Path drvPath = i.queryDrvPath(); + + /* What output do we want? */ + string outputName = i.queryOutputName(); + if (outputName.empty()) { + throw Error( + format("derivation '%1%' lacks an 'outputName' attribute ") % + drvPath); + } + + if (gcRoot.empty()) { + printGCWarning(); + } else { + Path rootName = indirectRoot ? absPath(gcRoot) : gcRoot; + if (++rootNr > 1) { + rootName += "-" + std::to_string(rootNr); + } + auto store2 = state.store.dynamic_pointer_cast<LocalFSStore>(); + if (store2) { + drvPath = store2->addPermRoot(drvPath, rootName, indirectRoot); + } + } + std::cout << format("%1%%2%\n") % drvPath % + (outputName != "out" ? "!" + outputName : ""); + } + } + } +} + +static int _main(int argc, char** argv) { + { + Strings files; + bool readStdin = false; + bool fromArgs = false; + bool findFile = false; + bool evalOnly = false; + bool parseOnly = false; + OutputKind outputKind = okPlain; + bool xmlOutputSourceLocation = true; + bool strict = false; + Strings attrPaths; + bool wantsReadWrite = false; + RepairFlag repair = NoRepair; + + struct MyArgs : LegacyArgs, MixEvalArgs { + using LegacyArgs::LegacyArgs; + }; + + MyArgs myArgs(baseNameOf(argv[0]), + [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--help") { + showManPage("nix-instantiate"); + } else if (*arg == "--version") { + printVersion("nix-instantiate"); + } else if (*arg == "-") { + readStdin = true; + } else if (*arg == "--expr" || *arg == "-E") { + fromArgs = true; + } else if (*arg == "--eval" || *arg == "--eval-only") { + evalOnly = true; + } else if (*arg == "--read-write-mode") { + wantsReadWrite = true; + } else if (*arg == "--parse" || *arg == "--parse-only") { + parseOnly = evalOnly = true; + } else if (*arg == "--find-file") { + findFile = true; + } else if (*arg == "--attr" || *arg == "-A") { + attrPaths.push_back(getArg(*arg, arg, end)); + } else if (*arg == "--add-root") { + gcRoot = getArg(*arg, arg, end); + } else if (*arg == "--indirect") { + indirectRoot = true; + } else if (*arg == "--xml") { + outputKind = okXML; + } else if (*arg == "--json") { + outputKind = okJSON; + } else if (*arg == "--no-location") { + xmlOutputSourceLocation = false; + } else if (*arg == "--strict") { + strict = true; + } else if (*arg == "--repair") { + repair = Repair; + } else if (*arg == "--dry-run") { + settings.readOnlyMode = true; + } else if (*arg != "" && arg->at(0) == '-') { + return false; + } else { + files.push_back(*arg); + } + return true; + }); + + myArgs.parseCmdline(argvToStrings(argc, argv)); + + initPlugins(); + + if (evalOnly && !wantsReadWrite) { + settings.readOnlyMode = true; + } + + auto store = openStore(); + + auto state = std::make_unique<EvalState>(myArgs.searchPath, store); + state->repair = repair; + + Bindings& autoArgs = *myArgs.getAutoArgs(*state); + + if (attrPaths.empty()) { + attrPaths = {""}; + } + + if (findFile) { + for (auto& i : files) { + Path p = state->findFile(i); + if (p.empty()) { + throw Error(format("unable to find '%1%'") % i); + } + std::cout << p << std::endl; + } + return 0; + } + + if (readStdin) { + Expr* e = state->parseStdin(); + processExpr(*state, attrPaths, parseOnly, strict, autoArgs, evalOnly, + outputKind, xmlOutputSourceLocation, e); + } else if (files.empty() && !fromArgs) { + files.push_back("./default.nix"); + } + + for (auto& i : files) { + Expr* e = fromArgs + ? state->parseExprFromString(i, absPath(".")) + : state->parseExprFromFile(resolveExprPath( + state->checkSourcePath(lookupFileArg(*state, i)))); + processExpr(*state, attrPaths, parseOnly, strict, autoArgs, evalOnly, + outputKind, xmlOutputSourceLocation, e); + } + + state->printStats(); + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-instantiate", _main); diff --git a/third_party/nix/src/nix-prefetch-url/nix-prefetch-url.cc b/third_party/nix/src/nix-prefetch-url/nix-prefetch-url.cc new file mode 100644 index 000000000000..fa88dc9bc67e --- /dev/null +++ b/third_party/nix/src/nix-prefetch-url/nix-prefetch-url.cc @@ -0,0 +1,252 @@ +#include <iostream> + +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "attr-path.hh" +#include "common-eval-args.hh" +#include "download.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "finally.hh" +#include "hash.hh" +#include "legacy.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +/* If โuriโ starts with โmirror://โ, then resolve it using the list of + mirrors defined in Nixpkgs. */ +string resolveMirrorUri(EvalState& state, string uri) { + if (string(uri, 0, 9) != "mirror://") { + return uri; + } + + string s(uri, 9); + auto p = s.find('/'); + if (p == string::npos) { + throw Error("invalid mirror URI"); + } + string mirrorName(s, 0, p); + + Value vMirrors; + state.eval( + state.parseExprFromString( + "import <nixpkgs/pkgs/build-support/fetchurl/mirrors.nix>", "."), + vMirrors); + state.forceAttrs(vMirrors); + + auto mirrorList = vMirrors.attrs->find(state.symbols.Create(mirrorName)); + if (mirrorList == vMirrors.attrs->end()) { + throw Error(format("unknown mirror name '%1%'") % mirrorName); + } + state.forceList(*mirrorList->second.value); + + if (mirrorList->second.value->listSize() < 1) { + throw Error(format("mirror URI '%1%' did not expand to anything") % uri); + } + + string mirror = state.forceString(*mirrorList->second.value->listElems()[0]); + return mirror + (hasSuffix(mirror, "/") ? "" : "/") + string(s, p + 1); +} + +static int _main(int argc, char** argv) { + { + HashType ht = htSHA256; + std::vector<string> args; + bool printPath = !getEnv("PRINT_PATH").empty(); + bool fromExpr = false; + string attrPath; + bool unpack = false; + string name; + + struct MyArgs : LegacyArgs, MixEvalArgs { + using LegacyArgs::LegacyArgs; + }; + + MyArgs myArgs(baseNameOf(argv[0]), + [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--help") { + showManPage("nix-prefetch-url"); + } else if (*arg == "--version") { + printVersion("nix-prefetch-url"); + } else if (*arg == "--type") { + string s = getArg(*arg, arg, end); + ht = parseHashType(s); + if (ht == htUnknown) { + throw UsageError(format("unknown hash type '%1%'") % s); + } + } else if (*arg == "--print-path") { + printPath = true; + } else if (*arg == "--attr" || *arg == "-A") { + fromExpr = true; + attrPath = getArg(*arg, arg, end); + } else if (*arg == "--unpack") { + unpack = true; + } else if (*arg == "--name") { + name = getArg(*arg, arg, end); + } else if (*arg != "" && arg->at(0) == '-') { + return false; + } else { + args.push_back(*arg); + } + return true; + }); + + myArgs.parseCmdline(argvToStrings(argc, argv)); + + initPlugins(); + + if (args.size() > 2) { + throw UsageError("too many arguments"); + } + + auto store = openStore(); + auto state = std::make_unique<EvalState>(myArgs.searchPath, store); + + Bindings& autoArgs = *myArgs.getAutoArgs(*state); + + /* If -A is given, get the URI from the specified Nix + expression. */ + string uri; + if (!fromExpr) { + if (args.empty()) { + throw UsageError("you must specify a URI"); + } + uri = args[0]; + } else { + Path path = + resolveExprPath(lookupFileArg(*state, args.empty() ? "." : args[0])); + Value vRoot; + state->evalFile(path, vRoot); + Value& v(*findAlongAttrPath(*state, attrPath, autoArgs, vRoot)); + state->forceAttrs(v); + + /* Extract the URI. */ + auto attr = v.attrs->find(state->symbols.Create("urls")); + if (attr == v.attrs->end()) { + throw Error("attribute set does not contain a 'urls' attribute"); + } + state->forceList(*attr->second.value); + if (attr->second.value->listSize() < 1) { + throw Error("'urls' list is empty"); + } + uri = state->forceString(*attr->second.value->listElems()[0]); + + /* Extract the hash mode. */ + attr = v.attrs->find(state->symbols.Create("outputHashMode")); + if (attr == v.attrs->end()) { + LOG(WARNING) << "this does not look like a fetchurl call"; + } else { + unpack = state->forceString(*attr->second.value) == "recursive"; + } + + /* Extract the name. */ + if (name.empty()) { + attr = v.attrs->find(state->symbols.Create("name")); + if (attr != v.attrs->end()) { + name = state->forceString(*attr->second.value); + } + } + } + + /* Figure out a name in the Nix store. */ + if (name.empty()) { + name = baseNameOf(uri); + } + if (name.empty()) { + throw Error(format("cannot figure out file name for '%1%'") % uri); + } + + /* If an expected hash is given, the file may already exist in + the store. */ + Hash hash; + Hash expectedHash(ht); + Path storePath; + if (args.size() == 2) { + expectedHash = Hash(args[1], ht); + storePath = store->makeFixedOutputPath(unpack, expectedHash, name); + if (store->isValidPath(storePath)) { + hash = expectedHash; + } else { + storePath.clear(); + } + } + + if (storePath.empty()) { + auto actualUri = resolveMirrorUri(*state, uri); + + AutoDelete tmpDir(createTempDir(), true); + Path tmpFile = (Path)tmpDir + "/tmp"; + + /* Download the file. */ + { + AutoCloseFD fd = + open(tmpFile.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0600); + if (!fd) { + throw SysError("creating temporary file '%s'", tmpFile); + } + + FdSink sink(fd.get()); + + DownloadRequest req(actualUri); + req.decompress = false; + getDownloader()->download(std::move(req), sink); + } + + /* Optionally unpack the file. */ + if (unpack) { + LOG(INFO) << "unpacking..."; + Path unpacked = (Path)tmpDir + "/unpacked"; + createDirs(unpacked); + if (hasSuffix(baseNameOf(uri), ".zip")) { + runProgram("unzip", true, {"-qq", tmpFile, "-d", unpacked}); + } else { + // FIXME: this requires GNU tar for decompression. + runProgram("tar", true, {"xf", tmpFile, "-C", unpacked}); + } + + /* If the archive unpacks to a single file/directory, then use + that as the top-level. */ + auto entries = readDirectory(unpacked); + if (entries.size() == 1) { + tmpFile = unpacked + "/" + entries[0].name; + } else { + tmpFile = unpacked; + } + } + + /* FIXME: inefficient; addToStore() will also hash + this. */ + hash = unpack ? hashPath(ht, tmpFile).first : hashFile(ht, tmpFile); + + if (expectedHash != Hash(ht) && expectedHash != hash) { + throw Error(format("hash mismatch for '%1%'") % uri); + } + + /* Copy the file to the Nix store. FIXME: if RemoteStore + implemented addToStoreFromDump() and downloadFile() + supported a sink, we could stream the download directly + into the Nix store. */ + storePath = store->addToStore(name, tmpFile, unpack, ht); + + assert(storePath == store->makeFixedOutputPath(unpack, hash, name)); + } + + if (!printPath) { + LOG(INFO) << "path is '" << storePath << "'"; + } + + std::cout << printHash16or32(hash) << std::endl; + if (printPath) { + std::cout << storePath << std::endl; + } + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-prefetch-url", _main); diff --git a/third_party/nix/src/nix-store/dotgraph.cc b/third_party/nix/src/nix-store/dotgraph.cc new file mode 100644 index 000000000000..0186a7b22ebd --- /dev/null +++ b/third_party/nix/src/nix-store/dotgraph.cc @@ -0,0 +1,141 @@ +#include "dotgraph.hh" + +#include <iostream> + +#include "store-api.hh" +#include "util.hh" + +using std::cout; + +namespace nix { + +static string dotQuote(const string& s) { return "\"" + s + "\""; } + +static string nextColour() { + static int n = 0; + static string colours[] = {"black", "red", "green", + "blue", "magenta", "burlywood"}; + return colours[n++ % (sizeof(colours) / sizeof(string))]; +} + +static string makeEdge(const string& src, const string& dst) { + format f = format("%1% -> %2% [color = %3%];\n") % dotQuote(src) % + dotQuote(dst) % dotQuote(nextColour()); + return f.str(); +} + +static string makeNode(const string& id, const string& label, + const string& colour) { + format f = format( + "%1% [label = %2%, shape = box, " + "style = filled, fillcolor = %3%];\n") % + dotQuote(id) % dotQuote(label) % dotQuote(colour); + return f.str(); +} + +static string symbolicName(const string& path) { + string p = baseNameOf(path); + return string(p, p.find('-') + 1); +} + +#if 0 +string pathLabel(const Path & nePath, const string & elemPath) +{ + return (string) nePath + "-" + elemPath; +} + + +void printClosure(const Path & nePath, const StoreExpr & fs) +{ + PathSet workList(fs.closure.roots); + PathSet doneSet; + + for (PathSet::iterator i = workList.begin(); i != workList.end(); ++i) { + cout << makeEdge(pathLabel(nePath, *i), nePath); + } + + while (!workList.empty()) { + Path path = *(workList.begin()); + workList.erase(path); + + if (doneSet.find(path) == doneSet.end()) { + doneSet.insert(path); + + ClosureElems::const_iterator elem = fs.closure.elems.find(path); + if (elem == fs.closure.elems.end()) + throw Error(format("bad closure, missing path '%1%'") % path); + + for (StringSet::const_iterator i = elem->second.refs.begin(); + i != elem->second.refs.end(); ++i) + { + workList.insert(*i); + cout << makeEdge(pathLabel(nePath, *i), pathLabel(nePath, path)); + } + + cout << makeNode(pathLabel(nePath, path), + symbolicName(path), "#ff0000"); + } + } +} +#endif + +void printDotGraph(const ref<Store>& store, const PathSet& roots) { + PathSet workList(roots); + PathSet doneSet; + + cout << "digraph G {\n"; + + while (!workList.empty()) { + Path path = *(workList.begin()); + workList.erase(path); + + if (doneSet.find(path) != doneSet.end()) { + continue; + } + doneSet.insert(path); + + cout << makeNode(path, symbolicName(path), "#ff0000"); + + for (auto& p : store->queryPathInfo(path)->references) { + if (p != path) { + workList.insert(p); + cout << makeEdge(p, path); + } + } + +#if 0 + StoreExpr ne = storeExprFromPath(path); + + string label, colour; + + if (ne.type == StoreExpr::neDerivation) { + for (PathSet::iterator i = ne.derivation.inputs.begin(); + i != ne.derivation.inputs.end(); ++i) + { + workList.insert(*i); + cout << makeEdge(*i, path); + } + + label = "derivation"; + colour = "#00ff00"; + for (StringPairs::iterator i = ne.derivation.env.begin(); + i != ne.derivation.env.end(); ++i) + if (i->first == "name") { label = i->second; } + } + + else if (ne.type == StoreExpr::neClosure) { + label = "<closure>"; + colour = "#00ffff"; + printClosure(path, ne); + } + + else abort(); + + cout << makeNode(path, label, colour); +#endif + } + + cout << "}\n"; +} + +} // namespace nix diff --git a/third_party/nix/src/nix-store/dotgraph.hh b/third_party/nix/src/nix-store/dotgraph.hh new file mode 100644 index 000000000000..20fc357b1bd4 --- /dev/null +++ b/third_party/nix/src/nix-store/dotgraph.hh @@ -0,0 +1,11 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +class Store; + +void printDotGraph(const ref<Store>& store, const PathSet& roots); + +} // namespace nix diff --git a/third_party/nix/src/nix-store/graphml.cc b/third_party/nix/src/nix-store/graphml.cc new file mode 100644 index 000000000000..a1a16cc8f28a --- /dev/null +++ b/third_party/nix/src/nix-store/graphml.cc @@ -0,0 +1,80 @@ +#include "graphml.hh" + +#include <iostream> + +#include "derivations.hh" +#include "store-api.hh" +#include "util.hh" + +using std::cout; + +namespace nix { + +static inline const string& xmlQuote(const string& s) { + // Luckily, store paths shouldn't contain any character that needs to be + // quoted. + return s; +} + +static string symbolicName(const string& path) { + string p = baseNameOf(path); + return string(p, p.find('-') + 1); +} + +static string makeEdge(const string& src, const string& dst) { + return fmt(" <edge source=\"%1%\" target=\"%2%\"/>\n", xmlQuote(src), + xmlQuote(dst)); +} + +static string makeNode(const ValidPathInfo& info) { + return fmt( + " <node id=\"%1%\">\n" + " <data key=\"narSize\">%2%</data>\n" + " <data key=\"name\">%3%</data>\n" + " <data key=\"type\">%4%</data>\n" + " </node>\n", + info.path, info.narSize, symbolicName(info.path), + (isDerivation(info.path) ? "derivation" : "output-path")); +} + +void printGraphML(const ref<Store>& store, const PathSet& roots) { + PathSet workList(roots); + PathSet doneSet; + std::pair<PathSet::iterator, bool> ret; + + cout << "<?xml version='1.0' encoding='utf-8'?>\n" + << "<graphml xmlns='http://graphml.graphdrawing.org/xmlns'\n" + << " xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'\n" + << " " + "xsi:schemaLocation='http://graphml.graphdrawing.org/xmlns/1.0/" + "graphml.xsd'>\n" + << "<key id='narSize' for='node' attr.name='narSize' attr.type='int'/>" + << "<key id='name' for='node' attr.name='name' attr.type='string'/>" + << "<key id='type' for='node' attr.name='type' attr.type='string'/>" + << "<graph id='G' edgedefault='directed'>\n"; + + while (!workList.empty()) { + Path path = *(workList.begin()); + workList.erase(path); + + ret = doneSet.insert(path); + if (!ret.second) { + continue; + } + + ValidPathInfo info = *(store->queryPathInfo(path)); + cout << makeNode(info); + + for (auto& p : store->queryPathInfo(path)->references) { + if (p != path) { + workList.insert(p); + cout << makeEdge(path, p); + } + } + } + + cout << "</graph>\n"; + cout << "</graphml>\n"; +} + +} // namespace nix diff --git a/third_party/nix/src/nix-store/graphml.hh b/third_party/nix/src/nix-store/graphml.hh new file mode 100644 index 000000000000..199421acb264 --- /dev/null +++ b/third_party/nix/src/nix-store/graphml.hh @@ -0,0 +1,11 @@ +#pragma once + +#include "types.hh" + +namespace nix { + +class Store; + +void printGraphML(const ref<Store>& store, const PathSet& roots); + +} // namespace nix diff --git a/third_party/nix/src/nix-store/nix-store.cc b/third_party/nix/src/nix-store/nix-store.cc new file mode 100644 index 000000000000..40d6f01cd96c --- /dev/null +++ b/third_party/nix/src/nix-store/nix-store.cc @@ -0,0 +1,1295 @@ +#include <algorithm> +#include <cstdio> +#include <iostream> + +#include <fcntl.h> +#include <glog/logging.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "archive.hh" +#include "derivations.hh" +#include "dotgraph.hh" +#include "globals.hh" +#include "graphml.hh" +#include "legacy.hh" +#include "local-store.hh" +#include "monitor-fd.hh" +#include "serve-protocol.hh" +#include "shared.hh" +#include "util.hh" +#include "worker-protocol.hh" + +#if HAVE_SODIUM +#include <sodium.h> +#endif + +using namespace nix; +using std::cin; +using std::cout; + +// TODO(tazjin): clang-tidy's performance lints don't like this, but +// the automatic fixes fail (it seems that some of the ops want to own +// the args for whatever reason) +using Operation = void (*)(Strings, Strings); + +static Path gcRoot; +static int rootNr = 0; +static bool indirectRoot = false; +static bool noOutput = false; +static std::shared_ptr<Store> store; + +ref<LocalStore> ensureLocalStore() { + auto store2 = std::dynamic_pointer_cast<LocalStore>(store); + if (!store2) { + throw Error("you don't have sufficient rights to use this command"); + } + return ref<LocalStore>(store2); +} + +static Path useDeriver(Path path) { + if (isDerivation(path)) { + return path; + } + Path drvPath = store->queryPathInfo(path)->deriver; + if (drvPath.empty()) { + throw Error(format("deriver of path '%1%' is not known") % path); + } + return drvPath; +} + +/* Realise the given path. For a derivation that means build it; for + other paths it means ensure their validity. */ +static PathSet realisePath(Path path, bool build = true) { + DrvPathWithOutputs p = parseDrvPathWithOutputs(path); + + auto store2 = std::dynamic_pointer_cast<LocalFSStore>(store); + + if (isDerivation(p.first)) { + if (build) { + store->buildPaths({path}); + } + Derivation drv = store->derivationFromPath(p.first); + rootNr++; + + if (p.second.empty()) { + for (auto& i : drv.outputs) { + p.second.insert(i.first); + } + } + + PathSet outputs; + for (auto& j : p.second) { + auto i = drv.outputs.find(j); + if (i == drv.outputs.end()) { + throw Error( + format("derivation '%1%' does not have an output named '%2%'") % + p.first % j); + } + Path outPath = i->second.path; + if (store2) { + if (gcRoot.empty()) { + printGCWarning(); + } else { + Path rootName = gcRoot; + if (rootNr > 1) { + rootName += "-" + std::to_string(rootNr); + } + if (i->first != "out") { + rootName += "-" + i->first; + } + outPath = store2->addPermRoot(outPath, rootName, indirectRoot); + } + } + outputs.insert(outPath); + } + return outputs; + } + + if (build) { + store->ensurePath(path); + } else if (!store->isValidPath(path)) { + throw Error(format("path '%1%' does not exist and cannot be created") % + path); + } + if (store2) { + if (gcRoot.empty()) { + printGCWarning(); + } else { + Path rootName = gcRoot; + rootNr++; + if (rootNr > 1) { + rootName += "-" + std::to_string(rootNr); + } + path = store2->addPermRoot(path, rootName, indirectRoot); + } + } + return {path}; +} + +/* Realise the given paths. */ +static void opRealise(Strings opFlags, Strings opArgs) { + bool dryRun = false; + BuildMode buildMode = bmNormal; + bool ignoreUnknown = false; + + for (auto& i : opFlags) { + if (i == "--dry-run") { + dryRun = true; + } else if (i == "--repair") { + buildMode = bmRepair; + } else if (i == "--check") { + buildMode = bmCheck; + } else if (i == "--ignore-unknown") { + ignoreUnknown = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + Paths paths; + for (auto& i : opArgs) { + DrvPathWithOutputs p = parseDrvPathWithOutputs(i); + paths.push_back(makeDrvPathWithOutputs( + store->followLinksToStorePath(p.first), p.second)); + } + + unsigned long long downloadSize; + unsigned long long narSize; + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + store->queryMissing(PathSet(paths.begin(), paths.end()), willBuild, + willSubstitute, unknown, downloadSize, narSize); + + if (ignoreUnknown) { + Paths paths2; + for (auto& i : paths) { + if (unknown.find(i) == unknown.end()) { + paths2.push_back(i); + } + } + paths = paths2; + unknown = PathSet(); + } + + if (settings.printMissing) { + printMissing(ref<Store>(store), willBuild, willSubstitute, unknown, + downloadSize, narSize); + } + + if (dryRun) { + return; + } + + /* Build all paths at the same time to exploit parallelism. */ + store->buildPaths(PathSet(paths.begin(), paths.end()), buildMode); + + if (!ignoreUnknown) { + for (auto& i : paths) { + PathSet paths = realisePath(i, false); + if (!noOutput) { + for (auto& j : paths) { + cout << format("%1%\n") % j; + } + } + } + } +} + +/* Add files to the Nix store and print the resulting paths. */ +static void opAdd(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + + for (auto& i : opArgs) { + cout << format("%1%\n") % store->addToStore(baseNameOf(i), i); + } +} + +/* Preload the output of a fixed-output derivation into the Nix + store. */ +static void opAddFixed(Strings opFlags, Strings opArgs) { + bool recursive = false; + + for (auto& i : opFlags) { + if (i == "--recursive") { + recursive = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + if (opArgs.empty()) { + throw UsageError("first argument must be hash algorithm"); + } + + HashType hashAlgo = parseHashType(opArgs.front()); + opArgs.pop_front(); + + for (auto& i : opArgs) { + cout << format("%1%\n") % + store->addToStore(baseNameOf(i), i, recursive, hashAlgo); + } +} + +/* Hack to support caching in `nix-prefetch-url'. */ +static void opPrintFixedPath(Strings opFlags, Strings opArgs) { + bool recursive = false; + + for (auto i : opFlags) { + if (i == "--recursive") { + recursive = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + if (opArgs.size() != 3) { + throw UsageError(format("'--print-fixed-path' requires three arguments")); + } + + auto i = opArgs.begin(); + HashType hashAlgo = parseHashType(*i++); + string hash = *i++; + string name = *i++; + + cout << format("%1%\n") % + store->makeFixedOutputPath(recursive, Hash(hash, hashAlgo), name); +} + +static PathSet maybeUseOutputs(const Path& storePath, bool useOutput, + bool forceRealise) { + if (forceRealise) { + realisePath(storePath); + } + if (useOutput && isDerivation(storePath)) { + Derivation drv = store->derivationFromPath(storePath); + PathSet outputs; + for (auto& i : drv.outputs) { + outputs.insert(i.second.path); + } + return outputs; + } + return {storePath}; +} + +/* Some code to print a tree representation of a derivation dependency + graph. Topological sorting is used to keep the tree relatively + flat. */ + +const string treeConn = "+---"; +const string treeLine = "| "; +const string treeNull = " "; + +static void printTree(const Path& path, const string& firstPad, + const string& tailPad, PathSet& done) { + if (done.find(path) != done.end()) { + cout << format("%1%%2% [...]\n") % firstPad % path; + return; + } + done.insert(path); + + cout << format("%1%%2%\n") % firstPad % path; + + auto references = store->queryPathInfo(path)->references; + + /* Topologically sort under the relation A < B iff A \in + closure(B). That is, if derivation A is an (possibly indirect) + input of B, then A is printed first. This has the effect of + flattening the tree, preventing deeply nested structures. */ + Paths sorted = store->topoSortPaths(references); + reverse(sorted.begin(), sorted.end()); + + for (auto i = sorted.begin(); i != sorted.end(); ++i) { + auto j = i; + ++j; + printTree(*i, tailPad + treeConn, + j == sorted.end() ? tailPad + treeNull : tailPad + treeLine, + done); + } +} + +/* Perform various sorts of queries. */ +static void opQuery(Strings opFlags, Strings opArgs) { + enum QueryType { + qDefault, + qOutputs, + qRequisites, + qReferences, + qReferrers, + qReferrersClosure, + qDeriver, + qBinding, + qHash, + qSize, + qTree, + qGraph, + qGraphML, + qResolve, + qRoots + }; + QueryType query = qDefault; + bool useOutput = false; + bool includeOutputs = false; + bool forceRealise = false; + string bindingName; + + for (auto& i : opFlags) { + QueryType prev = query; + if (i == "--outputs") { + query = qOutputs; + } else if (i == "--requisites" || i == "-R") { + query = qRequisites; + } else if (i == "--references") { + query = qReferences; + } else if (i == "--referrers" || i == "--referers") { + query = qReferrers; + } else if (i == "--referrers-closure" || i == "--referers-closure") { + query = qReferrersClosure; + } else if (i == "--deriver" || i == "-d") { + query = qDeriver; + } else if (i == "--binding" || i == "-b") { + if (opArgs.empty()) { + throw UsageError("expected binding name"); + } + bindingName = opArgs.front(); + opArgs.pop_front(); + query = qBinding; + } else if (i == "--hash") { + query = qHash; + } else if (i == "--size") { + query = qSize; + } else if (i == "--tree") { + query = qTree; + } else if (i == "--graph") { + query = qGraph; + } else if (i == "--graphml") { + query = qGraphML; + } else if (i == "--resolve") { + query = qResolve; + } else if (i == "--roots") { + query = qRoots; + } else if (i == "--use-output" || i == "-u") { + useOutput = true; + } else if (i == "--force-realise" || i == "--force-realize" || i == "-f") { + forceRealise = true; + } else if (i == "--include-outputs") { + includeOutputs = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + if (prev != qDefault && prev != query) { + throw UsageError(format("query type '%1%' conflicts with earlier flag") % + i); + } + } + + if (query == qDefault) { + query = qOutputs; + } + + RunPager pager; + + switch (query) { + case qOutputs: { + for (auto& i : opArgs) { + i = store->followLinksToStorePath(i); + if (forceRealise) { + realisePath(i); + } + Derivation drv = store->derivationFromPath(i); + for (auto& j : drv.outputs) { + cout << format("%1%\n") % j.second.path; + } + } + break; + } + + case qRequisites: + case qReferences: + case qReferrers: + case qReferrersClosure: { + PathSet paths; + for (auto& i : opArgs) { + PathSet ps = maybeUseOutputs(store->followLinksToStorePath(i), + useOutput, forceRealise); + for (auto& j : ps) { + if (query == qRequisites) { + store->computeFSClosure(j, paths, false, includeOutputs); + } else if (query == qReferences) { + for (auto& p : store->queryPathInfo(j)->references) { + paths.insert(p); + } + } else if (query == qReferrers) { + store->queryReferrers(j, paths); + } else if (query == qReferrersClosure) { + store->computeFSClosure(j, paths, true); + } + } + } + Paths sorted = store->topoSortPaths(paths); + for (auto i = sorted.rbegin(); i != sorted.rend(); ++i) { + cout << format("%s\n") % *i; + } + break; + } + + case qDeriver: + for (auto& i : opArgs) { + Path deriver = + store->queryPathInfo(store->followLinksToStorePath(i))->deriver; + cout << format("%1%\n") % + (deriver.empty() ? "unknown-deriver" : deriver); + } + break; + + case qBinding: + for (auto& i : opArgs) { + Path path = useDeriver(store->followLinksToStorePath(i)); + Derivation drv = store->derivationFromPath(path); + auto j = drv.env.find(bindingName); + if (j == drv.env.end()) { + throw Error( + format( + "derivation '%1%' has no environment binding named '%2%'") % + path % bindingName); + } + cout << format("%1%\n") % j->second; + } + break; + + case qHash: + case qSize: + for (auto& i : opArgs) { + PathSet paths = maybeUseOutputs(store->followLinksToStorePath(i), + useOutput, forceRealise); + for (auto& j : paths) { + auto info = store->queryPathInfo(j); + if (query == qHash) { + assert(info->narHash.type == htSHA256); + cout << fmt("%s\n", info->narHash.to_string(Base32)); + } else if (query == qSize) { + cout << fmt("%d\n", info->narSize); + } + } + } + break; + + case qTree: { + PathSet done; + for (auto& i : opArgs) { + printTree(store->followLinksToStorePath(i), "", "", done); + } + break; + } + + case qGraph: { + PathSet roots; + for (auto& i : opArgs) { + PathSet paths = maybeUseOutputs(store->followLinksToStorePath(i), + useOutput, forceRealise); + roots.insert(paths.begin(), paths.end()); + } + printDotGraph(ref<Store>(store), roots); + break; + } + + case qGraphML: { + PathSet roots; + for (auto& i : opArgs) { + PathSet paths = maybeUseOutputs(store->followLinksToStorePath(i), + useOutput, forceRealise); + roots.insert(paths.begin(), paths.end()); + } + printGraphML(ref<Store>(store), roots); + break; + } + + case qResolve: { + for (auto& i : opArgs) { + cout << format("%1%\n") % store->followLinksToStorePath(i); + } + break; + } + + case qRoots: { + PathSet referrers; + for (auto& i : opArgs) { + store->computeFSClosure( + maybeUseOutputs(store->followLinksToStorePath(i), useOutput, + forceRealise), + referrers, true, settings.gcKeepOutputs, + settings.gcKeepDerivations); + } + Roots roots = store->findRoots(false); + for (auto& [target, links] : roots) { + if (referrers.find(target) != referrers.end()) { + for (auto& link : links) { + cout << format("%1% -> %2%\n") % link % target; + } + } + } + break; + } + + default: + abort(); + } +} + +static void opPrintEnv(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + if (opArgs.size() != 1) { + throw UsageError("'--print-env' requires one derivation store path"); + } + + Path drvPath = opArgs.front(); + Derivation drv = store->derivationFromPath(drvPath); + + /* Print each environment variable in the derivation in a format + that can be sourced by the shell. */ + for (auto& i : drv.env) { + cout << format("export %1%; %1%=%2%\n") % i.first % shellEscape(i.second); + } + + /* Also output the arguments. This doesn't preserve whitespace in + arguments. */ + cout << "export _args; _args='"; + bool first = true; + for (auto& i : drv.args) { + if (!first) { + cout << ' '; + } + first = false; + cout << shellEscape(i); + } + cout << "'\n"; +} + +static void opReadLog(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + + RunPager pager; + + for (auto& i : opArgs) { + auto path = store->followLinksToStorePath(i); + auto log = store->getBuildLog(path); + if (!log) { + throw Error("build log of derivation '%s' is not available", path); + } + std::cout << *log; + } +} + +static void opDumpDB(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + if (!opArgs.empty()) { + for (auto& i : opArgs) { + i = store->followLinksToStorePath(i); + } + for (auto& i : opArgs) { + cout << store->makeValidityRegistration({i}, true, true); + } + } else { + PathSet validPaths = store->queryAllValidPaths(); + for (auto& i : validPaths) { + cout << store->makeValidityRegistration({i}, true, true); + } + } +} + +static void registerValidity(bool reregister, bool hashGiven, + bool canonicalise) { + ValidPathInfos infos; + + while (true) { + ValidPathInfo info = decodeValidPathInfo(cin, hashGiven); + if (info.path.empty()) { + break; + } + if (!store->isValidPath(info.path) || reregister) { + /* !!! races */ + if (canonicalise) { + canonicalisePathMetaData(info.path, -1); + } + if (!hashGiven) { + HashResult hash = hashPath(htSHA256, info.path); + info.narHash = hash.first; + info.narSize = hash.second; + } + infos.push_back(info); + } + } + + ensureLocalStore()->registerValidPaths(infos); +} + +static void opLoadDB(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + if (!opArgs.empty()) { + throw UsageError("no arguments expected"); + } + registerValidity(true, true, false); +} + +static void opRegisterValidity(Strings opFlags, Strings opArgs) { + bool reregister = false; // !!! maybe this should be the default + bool hashGiven = false; + + for (auto& i : opFlags) { + if (i == "--reregister") { + reregister = true; + } else if (i == "--hash-given") { + hashGiven = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + if (!opArgs.empty()) { + throw UsageError("no arguments expected"); + } + + registerValidity(reregister, hashGiven, true); +} + +static void opCheckValidity(Strings opFlags, Strings opArgs) { + bool printInvalid = false; + + for (auto& i : opFlags) { + if (i == "--print-invalid") { + printInvalid = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + for (auto& i : opArgs) { + Path path = store->followLinksToStorePath(i); + if (!store->isValidPath(path)) { + if (printInvalid) { + cout << format("%1%\n") % path; + } else { + throw Error(format("path '%1%' is not valid") % path); + } + } + } +} + +static void opGC(Strings opFlags, Strings opArgs) { + bool printRoots = false; + GCOptions options; + options.action = GCOptions::gcDeleteDead; + + GCResults results; + + /* Do what? */ + for (auto i = opFlags.begin(); i != opFlags.end(); ++i) { + if (*i == "--print-roots") { + printRoots = true; + } else if (*i == "--print-live") { + options.action = GCOptions::gcReturnLive; + } else if (*i == "--print-dead") { + options.action = GCOptions::gcReturnDead; + } else if (*i == "--delete") { + options.action = GCOptions::gcDeleteDead; + } else if (*i == "--max-freed") { + auto maxFreed = getIntArg<long long>(*i, i, opFlags.end(), true); + options.maxFreed = maxFreed >= 0 ? maxFreed : 0; + } else { + throw UsageError(format("bad sub-operation '%1%' in GC") % *i); + } + } + + if (!opArgs.empty()) { + throw UsageError("no arguments expected"); + } + + if (printRoots) { + Roots roots = store->findRoots(false); + std::set<std::pair<Path, Path>> roots2; + // Transpose and sort the roots. + for (auto& [target, links] : roots) { + for (auto& link : links) { + roots2.emplace(link, target); + } + } + for (auto& [link, target] : roots2) { + std::cout << link << " -> " << target << "\n"; + } + } + + else { + PrintFreed freed(options.action == GCOptions::gcDeleteDead, results); + store->collectGarbage(options, results); + + if (options.action != GCOptions::gcDeleteDead) { + for (auto& i : results.paths) { + cout << i << std::endl; + } + } + } +} + +/* Remove paths from the Nix store if possible (i.e., if they do not + have any remaining referrers and are not reachable from any GC + roots). */ +static void opDelete(Strings opFlags, Strings opArgs) { + GCOptions options; + options.action = GCOptions::gcDeleteSpecific; + + for (auto& i : opFlags) { + if (i == "--ignore-liveness") { + options.ignoreLiveness = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + for (auto& i : opArgs) { + options.pathsToDelete.insert(store->followLinksToStorePath(i)); + } + + GCResults results; + PrintFreed freed(true, results); + store->collectGarbage(options, results); +} + +/* Dump a path as a Nix archive. The archive is written to standard + output. */ +static void opDump(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + if (opArgs.size() != 1) { + throw UsageError("only one argument allowed"); + } + + FdSink sink(STDOUT_FILENO); + string path = *opArgs.begin(); + dumpPath(path, sink); + sink.flush(); +} + +/* Restore a value from a Nix archive. The archive is read from + standard input. */ +static void opRestore(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + if (opArgs.size() != 1) { + throw UsageError("only one argument allowed"); + } + + FdSource source(STDIN_FILENO); + restorePath(*opArgs.begin(), source); +} + +static void opExport(Strings opFlags, Strings opArgs) { + for (auto& i : opFlags) { + throw UsageError(format("unknown flag '%1%'") % i); + } + + for (auto& i : opArgs) { + i = store->followLinksToStorePath(i); + } + + FdSink sink(STDOUT_FILENO); + store->exportPaths(opArgs, sink); + sink.flush(); +} + +static void opImport(Strings opFlags, Strings opArgs) { + for (auto& i : opFlags) { + throw UsageError(format("unknown flag '%1%'") % i); + } + + if (!opArgs.empty()) { + throw UsageError("no arguments expected"); + } + + FdSource source(STDIN_FILENO); + Paths paths = store->importPaths(source, nullptr, NoCheckSigs); + + for (auto& i : paths) { + cout << format("%1%\n") % i << std::flush; + } +} + +/* Initialise the Nix databases. */ +static void opInit(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("unknown flag"); + } + if (!opArgs.empty()) { + throw UsageError("no arguments expected"); + } + /* Doesn't do anything right now; database tables are initialised + automatically. */ +} + +/* Verify the consistency of the Nix environment. */ +static void opVerify(Strings opFlags, Strings opArgs) { + if (!opArgs.empty()) { + throw UsageError("no arguments expected"); + } + + bool checkContents = false; + RepairFlag repair = NoRepair; + + for (auto& i : opFlags) { + if (i == "--check-contents") { + checkContents = true; + } else if (i == "--repair") { + repair = Repair; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + if (store->verifyStore(checkContents, repair)) { + LOG(WARNING) << "not all errors were fixed"; + throw Exit(1); + } +} + +/* Verify whether the contents of the given store path have not changed. */ +static void opVerifyPath(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("no flags expected"); + } + + int status = 0; + + for (auto& i : opArgs) { + Path path = store->followLinksToStorePath(i); + LOG(INFO) << "checking path '" << path << "'..."; + auto info = store->queryPathInfo(path); + HashSink sink(info->narHash.type); + store->narFromPath(path, sink); + auto current = sink.finish(); + if (current.first != info->narHash) { + LOG(ERROR) << "path '" << path << "' was modified! expected hash '" + << info->narHash.to_string() << "', got '" + << current.first.to_string() << "'"; + status = 1; + } + } + + throw Exit(status); +} + +/* Repair the contents of the given path by redownloading it using a + substituter (if available). */ +static void opRepairPath(Strings opFlags, Strings opArgs) { + if (!opFlags.empty()) { + throw UsageError("no flags expected"); + } + + for (auto& i : opArgs) { + Path path = store->followLinksToStorePath(i); + ensureLocalStore()->repairPath(path); + } +} + +/* Optimise the disk space usage of the Nix store by hard-linking + files with the same contents. */ +static void opOptimise(Strings opFlags, Strings opArgs) { + if (!opArgs.empty() || !opFlags.empty()) { + throw UsageError("no arguments expected"); + } + + store->optimiseStore(); +} + +/* Serve the nix store in a way usable by a restricted ssh user. */ +static void opServe(Strings opFlags, Strings opArgs) { + bool writeAllowed = false; + for (auto& i : opFlags) { + if (i == "--write") { + writeAllowed = true; + } else { + throw UsageError(format("unknown flag '%1%'") % i); + } + } + + if (!opArgs.empty()) { + throw UsageError("no arguments expected"); + } + + FdSource in(STDIN_FILENO); + FdSink out(STDOUT_FILENO); + + /* Exchange the greeting. */ + unsigned int magic = readInt(in); + if (magic != SERVE_MAGIC_1) { + throw Error("protocol mismatch"); + } + out << SERVE_MAGIC_2 << SERVE_PROTOCOL_VERSION; + out.flush(); + unsigned int clientVersion = readInt(in); + + auto getBuildSettings = [&]() { + // FIXME: changing options here doesn't work if we're + // building through the daemon. + settings.keepLog = false; + settings.useSubstitutes = false; + settings.maxSilentTime = readInt(in); + settings.buildTimeout = readInt(in); + if (GET_PROTOCOL_MINOR(clientVersion) >= 2) { + settings.maxLogSize = readNum<unsigned long>(in); + } + if (GET_PROTOCOL_MINOR(clientVersion) >= 3) { + settings.buildRepeat = readInt(in); + settings.enforceDeterminism = readInt(in) != 0u; + settings.runDiffHook = true; + } + settings.printRepeatedBuilds = false; + }; + + while (true) { + ServeCommand cmd; + try { + cmd = (ServeCommand)readInt(in); + } catch (EndOfFile& e) { + break; + } + + switch (cmd) { + case cmdQueryValidPaths: { + bool lock = readInt(in) != 0u; + bool substitute = readInt(in) != 0u; + auto paths = readStorePaths<PathSet>(*store, in); + if (lock && writeAllowed) { + for (auto& path : paths) { + store->addTempRoot(path); + } + } + + /* If requested, substitute missing paths. This + implements nix-copy-closure's --use-substitutes + flag. */ + if (substitute && writeAllowed) { + /* Filter out .drv files (we don't want to build anything). */ + PathSet paths2; + for (auto& path : paths) { + if (!isDerivation(path)) { + paths2.insert(path); + } + } + unsigned long long downloadSize; + unsigned long long narSize; + PathSet willBuild; + PathSet willSubstitute; + PathSet unknown; + store->queryMissing(PathSet(paths2.begin(), paths2.end()), willBuild, + willSubstitute, unknown, downloadSize, narSize); + /* FIXME: should use ensurePath(), but it only + does one path at a time. */ + if (!willSubstitute.empty()) { + try { + store->buildPaths(willSubstitute); + } catch (Error& e) { + LOG(WARNING) << e.msg(); + } + } + } + + out << store->queryValidPaths(paths); + break; + } + + case cmdQueryPathInfos: { + auto paths = readStorePaths<PathSet>(*store, in); + // !!! Maybe we want a queryPathInfos? + for (auto& i : paths) { + try { + auto info = store->queryPathInfo(i); + out << info->path << info->deriver << info->references; + // !!! Maybe we want compression? + out << info->narSize // downloadSize + << info->narSize; + if (GET_PROTOCOL_MINOR(clientVersion) >= 4) { + out << (info->narHash ? info->narHash.to_string() : "") + << info->ca << info->sigs; + } + } catch (InvalidPath&) { + } + } + out << ""; + break; + } + + case cmdDumpStorePath: + store->narFromPath(readStorePath(*store, in), out); + break; + + case cmdImportPaths: { + if (!writeAllowed) { + throw Error("importing paths is not allowed"); + } + store->importPaths(in, nullptr, + NoCheckSigs); // FIXME: should we skip sig checking? + out << 1; // indicate success + break; + } + + case cmdExportPaths: { + readInt(in); // obsolete + store->exportPaths(readStorePaths<Paths>(*store, in), out); + break; + } + + case cmdBuildPaths: { + if (!writeAllowed) { + throw Error("building paths is not allowed"); + } + auto paths = readStorePaths<PathSet>(*store, in); + + getBuildSettings(); + + try { + MonitorFdHup monitor(in.fd); + store->buildPaths(paths); + out << 0; + } catch (Error& e) { + assert(e.status); + out << e.status << e.msg(); + } + break; + } + + case cmdBuildDerivation: { /* Used by hydra-queue-runner. */ + + if (!writeAllowed) { + throw Error("building paths is not allowed"); + } + + Path drvPath = readStorePath(*store, in); // informational only + BasicDerivation drv; + readDerivation(in, *store, drv); + + getBuildSettings(); + + MonitorFdHup monitor(in.fd); + auto status = store->buildDerivation(drvPath, drv); + + out << status.status << status.errorMsg; + + if (GET_PROTOCOL_MINOR(clientVersion) >= 3) { + out << status.timesBuilt + << static_cast<uint64_t>(status.isNonDeterministic) + << status.startTime << status.stopTime; + } + + break; + } + + case cmdQueryClosure: { + bool includeOutputs = readInt(in) != 0u; + PathSet closure; + store->computeFSClosure(readStorePaths<PathSet>(*store, in), closure, + false, includeOutputs); + out << closure; + break; + } + + case cmdAddToStoreNar: { + if (!writeAllowed) { + throw Error("importing paths is not allowed"); + } + + ValidPathInfo info; + info.path = readStorePath(*store, in); + in >> info.deriver; + if (!info.deriver.empty()) { + store->assertStorePath(info.deriver); + } + info.narHash = Hash(readString(in), htSHA256); + info.references = readStorePaths<PathSet>(*store, in); + in >> info.registrationTime >> info.narSize >> info.ultimate; + info.sigs = readStrings<StringSet>(in); + in >> info.ca; + + if (info.narSize == 0) { + throw Error("narInfo is too old and missing the narSize field"); + } + + SizedSource sizedSource(in, info.narSize); + + store->addToStore(info, sizedSource, NoRepair, NoCheckSigs); + + // consume all the data that has been sent before continuing. + sizedSource.drainAll(); + + out << 1; // indicate success + + break; + } + + default: + throw Error(format("unknown serve command %1%") % cmd); + } + + out.flush(); + } +} + +static void opGenerateBinaryCacheKey(Strings opFlags, Strings opArgs) { + for (auto& i : opFlags) { + throw UsageError(format("unknown flag '%1%'") % i); + } + + if (opArgs.size() != 3) { + throw UsageError("three arguments expected"); + } + auto i = opArgs.begin(); + string keyName = *i++; + string secretKeyFile = *i++; + string publicKeyFile = *i++; + +#if HAVE_SODIUM + if (sodium_init() == -1) { + throw Error("could not initialise libsodium"); + } + + unsigned char pk[crypto_sign_PUBLICKEYBYTES]; + unsigned char sk[crypto_sign_SECRETKEYBYTES]; + if (crypto_sign_keypair(pk, sk) != 0) { + throw Error("key generation failed"); + } + + writeFile(publicKeyFile, + keyName + ":" + + base64Encode(string((char*)pk, crypto_sign_PUBLICKEYBYTES))); + umask(0077); + writeFile(secretKeyFile, + keyName + ":" + + base64Encode(string((char*)sk, crypto_sign_SECRETKEYBYTES))); +#else + throw Error( + "Nix was not compiled with libsodium, required for signed binary cache " + "support"); +#endif +} + +static void opVersion(Strings opFlags, Strings opArgs) { + printVersion("nix-store"); +} + +/* Scan the arguments; find the operation, set global flags, put all + other flags in a list, and put all other arguments in another + list. */ +static int _main(int argc, char** argv) { + { + Strings opFlags; + Strings opArgs; + Operation op = nullptr; + + parseCmdLine(argc, argv, + [&](Strings::iterator& arg, const Strings::iterator& end) { + Operation oldOp = op; + + if (*arg == "--help") { + showManPage("nix-store"); + } else if (*arg == "--version") { + op = opVersion; + } else if (*arg == "--realise" || *arg == "--realize" || + *arg == "-r") { + op = opRealise; + } else if (*arg == "--add" || *arg == "-A") { + op = opAdd; + } else if (*arg == "--add-fixed") { + op = opAddFixed; + } else if (*arg == "--print-fixed-path") { + op = opPrintFixedPath; + } else if (*arg == "--delete") { + op = opDelete; + } else if (*arg == "--query" || *arg == "-q") { + op = opQuery; + } else if (*arg == "--print-env") { + op = opPrintEnv; + } else if (*arg == "--read-log" || *arg == "-l") { + op = opReadLog; + } else if (*arg == "--dump-db") { + op = opDumpDB; + } else if (*arg == "--load-db") { + op = opLoadDB; + } else if (*arg == "--register-validity") { + op = opRegisterValidity; + } else if (*arg == "--check-validity") { + op = opCheckValidity; + } else if (*arg == "--gc") { + op = opGC; + } else if (*arg == "--dump") { + op = opDump; + } else if (*arg == "--restore") { + op = opRestore; + } else if (*arg == "--export") { + op = opExport; + } else if (*arg == "--import") { + op = opImport; + } else if (*arg == "--init") { + op = opInit; + } else if (*arg == "--verify") { + op = opVerify; + } else if (*arg == "--verify-path") { + op = opVerifyPath; + } else if (*arg == "--repair-path") { + op = opRepairPath; + } else if (*arg == "--optimise" || *arg == "--optimize") { + op = opOptimise; + } else if (*arg == "--serve") { + op = opServe; + } else if (*arg == "--generate-binary-cache-key") { + op = opGenerateBinaryCacheKey; + } else if (*arg == "--add-root") { + gcRoot = absPath(getArg(*arg, arg, end)); + } else if (*arg == "--indirect") { + indirectRoot = true; + } else if (*arg == "--no-output") { + noOutput = true; + } else if (*arg != "" && arg->at(0) == '-') { + opFlags.push_back(*arg); + if (*arg == "--max-freed" || *arg == "--max-links" || + *arg == "--max-atime") { /* !!! hack */ + opFlags.push_back(getArg(*arg, arg, end)); + } + } else { + opArgs.push_back(*arg); + } + + if ((oldOp != nullptr) && oldOp != op) { + throw UsageError("only one operation may be specified"); + } + + return true; + }); + + initPlugins(); + + if (op == nullptr) { + throw UsageError("no operation specified"); + } + + if (op != opDump && op != opRestore) { /* !!! hack */ + store = openStore(); + } + + op(opFlags, opArgs); + + return 0; + } +} + +static RegisterLegacyCommand s1("nix-store", _main); diff --git a/third_party/nix/src/nix/add-to-store.cc b/third_party/nix/src/nix/add-to-store.cc new file mode 100644 index 000000000000..b7fda9c9d390 --- /dev/null +++ b/third_party/nix/src/nix/add-to-store.cc @@ -0,0 +1,51 @@ +#include "archive.hh" +#include "command.hh" +#include "common-args.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdAddToStore : MixDryRun, StoreCommand { + Path path; + std::optional<std::string> namePart; + + CmdAddToStore() { + expectArg("path", &path); + + mkFlag() + .longName("name") + .shortName('n') + .description("name component of the store path") + .labels({"name"}) + .dest(&namePart); + } + + std::string name() override { return "add-to-store"; } + + std::string description() override { return "add a path to the Nix store"; } + + Examples examples() override { return {}; } + + void run(ref<Store> store) override { + if (!namePart) { + namePart = baseNameOf(path); + } + + StringSink sink; + dumpPath(path, sink); + + ValidPathInfo info; + info.narHash = hashString(htSHA256, *sink.s); + info.narSize = sink.s->size(); + info.path = store->makeFixedOutputPath(true, info.narHash, *namePart); + info.ca = makeFixedOutputCA(true, info.narHash); + + if (!dryRun) { + store->addToStore(info, sink.s); + } + + std::cout << fmt("%s\n", info.path); + } +}; + +static RegisterCommand r1(make_ref<CmdAddToStore>()); diff --git a/third_party/nix/src/nix/build.cc b/third_party/nix/src/nix/build.cc new file mode 100644 index 000000000000..9ab2d27e0fc9 --- /dev/null +++ b/third_party/nix/src/nix/build.cc @@ -0,0 +1,68 @@ +#include "command.hh" +#include "common-args.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdBuild : MixDryRun, InstallablesCommand { + Path outLink = "result"; + + CmdBuild() { + mkFlag() + .longName("out-link") + .shortName('o') + .description("path of the symlink to the build result") + .labels({"path"}) + .dest(&outLink); + + mkFlag() + .longName("no-link") + .description("do not create a symlink to the build result") + .set(&outLink, Path("")); + } + + std::string name() override { return "build"; } + + std::string description() override { + return "build a derivation or fetch a store path"; + } + + Examples examples() override { + return { + Example{"To build and run GNU Hello from NixOS 17.03:", + "nix build -f channel:nixos-17.03 hello; ./result/bin/hello"}, + Example{"To build the build.x86_64-linux attribute from release.nix:", + "nix build -f release.nix build.x86_64-linux"}, + }; + } + + void run(ref<Store> store) override { + auto buildables = build(store, dryRun ? DryRun : Build, installables); + + if (dryRun) { + return; + } + + for (size_t i = 0; i < buildables.size(); ++i) { + auto& b(buildables[i]); + + if (!outLink.empty()) { + for (auto& output : b.outputs) { + if (auto store2 = store.dynamic_pointer_cast<LocalFSStore>()) { + std::string symlink = outLink; + if (i != 0u) { + symlink += fmt("-%d", i); + } + if (output.first != "out") { + symlink += fmt("-%s", output.first); + } + store2->addPermRoot(output.second, absPath(symlink), true); + } + } + } + } + } +}; + +static RegisterCommand r1(make_ref<CmdBuild>()); diff --git a/third_party/nix/src/nix/cat.cc b/third_party/nix/src/nix/cat.cc new file mode 100644 index 000000000000..2d662a453f54 --- /dev/null +++ b/third_party/nix/src/nix/cat.cc @@ -0,0 +1,56 @@ +#include "command.hh" +#include "fs-accessor.hh" +#include "nar-accessor.hh" +#include "store-api.hh" + +using namespace nix; + +struct MixCat : virtual Args { + std::string path; + + void cat(const ref<FSAccessor>& accessor) { + auto st = accessor->stat(path); + if (st.type == FSAccessor::Type::tMissing) { + throw Error(format("path '%1%' does not exist") % path); + } + if (st.type != FSAccessor::Type::tRegular) { + throw Error(format("path '%1%' is not a regular file") % path); + } + + std::cout << accessor->readFile(path); + } +}; + +struct CmdCatStore : StoreCommand, MixCat { + CmdCatStore() { expectArg("path", &path); } + + std::string name() override { return "cat-store"; } + + std::string description() override { + return "print the contents of a store file on stdout"; + } + + void run(ref<Store> store) override { cat(store->getFSAccessor()); } +}; + +struct CmdCatNar : StoreCommand, MixCat { + Path narPath; + + CmdCatNar() { + expectArg("nar", &narPath); + expectArg("path", &path); + } + + std::string name() override { return "cat-nar"; } + + std::string description() override { + return "print the contents of a file inside a NAR file"; + } + + void run(ref<Store> store) override { + cat(makeNarAccessor(make_ref<std::string>(readFile(narPath)))); + } +}; + +static RegisterCommand r1(make_ref<CmdCatStore>()); +static RegisterCommand r2(make_ref<CmdCatNar>()); diff --git a/third_party/nix/src/nix/command.cc b/third_party/nix/src/nix/command.cc new file mode 100644 index 000000000000..439f22dc3b67 --- /dev/null +++ b/third_party/nix/src/nix/command.cc @@ -0,0 +1,155 @@ +#include "command.hh" + +#include <utility> + +#include "derivations.hh" +#include "store-api.hh" + +namespace nix { + +Commands* RegisterCommand::commands = nullptr; + +void Command::printHelp(const string& programName, std::ostream& out) { + Args::printHelp(programName, out); + + auto exs = examples(); + if (!exs.empty()) { + out << "\n"; + out << "Examples:\n"; + for (auto& ex : exs) { + out << "\n" + << " " << ex.description << "\n" // FIXME: wrap + << " $ " << ex.command << "\n"; + } + } +} + +MultiCommand::MultiCommand(Commands _commands) + : commands(std::move(_commands)) { + expectedArgs.push_back(ExpectedArg{ + "command", 1, true, [=](std::vector<std::string> ss) { + assert(!command); + auto i = commands.find(ss[0]); + if (i == commands.end()) { + throw UsageError("'%s' is not a recognised command", ss[0]); + } + command = i->second; + }}); +} + +void MultiCommand::printHelp(const string& programName, std::ostream& out) { + if (command) { + command->printHelp(programName + " " + command->name(), out); + return; + } + + out << "Usage: " << programName << " <COMMAND> <FLAGS>... <ARGS>...\n"; + + out << "\n"; + out << "Common flags:\n"; + printFlags(out); + + out << "\n"; + out << "Available commands:\n"; + + Table2 table; + for (auto& command : commands) { + auto descr = command.second->description(); + if (!descr.empty()) { + table.push_back(std::make_pair(command.second->name(), descr)); + } + } + printTable(out, table); + +#if 0 + out << "\n"; + out << "For full documentation, run 'man " << programName << "' or 'man " << programName << "-<COMMAND>'.\n"; +#endif +} + +bool MultiCommand::processFlag(Strings::iterator& pos, Strings::iterator end) { + if (Args::processFlag(pos, end)) { + return true; + } + if (command && command->processFlag(pos, end)) { + return true; + } + return false; +} + +bool MultiCommand::processArgs(const Strings& args, bool finish) { + if (command) { + return command->processArgs(args, finish); + } + return Args::processArgs(args, finish); +} + +StoreCommand::StoreCommand() = default; + +ref<Store> StoreCommand::getStore() { + if (!_store) { + _store = createStore(); + } + return ref<Store>(_store); +} + +ref<Store> StoreCommand::createStore() { return openStore(); } + +void StoreCommand::run() { run(getStore()); } + +StorePathsCommand::StorePathsCommand(bool recursive) : recursive(recursive) { + if (recursive) { + mkFlag() + .longName("no-recursive") + .description("apply operation to specified paths only") + .set(&this->recursive, false); + } else { + mkFlag() + .longName("recursive") + .shortName('r') + .description("apply operation to closure of the specified paths") + .set(&this->recursive, true); + } + + mkFlag(0, "all", "apply operation to the entire store", &all); +} + +void StorePathsCommand::run(ref<Store> store) { + Paths storePaths; + + if (all) { + if (!installables.empty() != 0u) { + throw UsageError("'--all' does not expect arguments"); + } + for (auto& p : store->queryAllValidPaths()) { + storePaths.push_back(p); + } + } + + else { + for (auto& p : toStorePaths(store, NoBuild, installables)) { + storePaths.push_back(p); + } + + if (recursive) { + PathSet closure; + store->computeFSClosure(PathSet(storePaths.begin(), storePaths.end()), + closure, false, false); + storePaths = Paths(closure.begin(), closure.end()); + } + } + + run(store, storePaths); +} + +void StorePathCommand::run(ref<Store> store) { + auto storePaths = toStorePaths(store, NoBuild, installables); + + if (storePaths.size() != 1) { + throw UsageError("this command requires exactly one store path"); + } + + run(store, *storePaths.begin()); +} + +} // namespace nix diff --git a/third_party/nix/src/nix/command.hh b/third_party/nix/src/nix/command.hh new file mode 100644 index 000000000000..4122bdd0e58f --- /dev/null +++ b/third_party/nix/src/nix/command.hh @@ -0,0 +1,193 @@ +#pragma once + +#include "args.hh" +#include "common-eval-args.hh" + +namespace nix { + +extern std::string programPath; + +struct Value; +class Bindings; +class EvalState; + +/* A command is an argument parser that can be executed by calling its + run() method. */ +struct Command : virtual Args { + virtual std::string name() = 0; + virtual void prepare(){}; + virtual void run() = 0; + + struct Example { + std::string description; + std::string command; + }; + + typedef std::list<Example> Examples; + + virtual Examples examples() { return Examples(); } + + void printHelp(const string& programName, std::ostream& out) override; +}; + +class Store; + +/* A command that require a Nix store. */ +struct StoreCommand : virtual Command { + StoreCommand(); + void run() override; + ref<Store> getStore(); + virtual ref<Store> createStore(); + virtual void run(ref<Store>) = 0; + + private: + std::shared_ptr<Store> _store; +}; + +struct Buildable { + Path drvPath; // may be empty + std::map<std::string, Path> outputs; +}; + +typedef std::vector<Buildable> Buildables; + +struct Installable { + virtual std::string what() = 0; + + virtual Buildables toBuildables() { + throw Error("argument '%s' cannot be built", what()); + } + + Buildable toBuildable(); + + virtual Value* toValue(EvalState& state) { + throw Error("argument '%s' cannot be evaluated", what()); + } +}; + +struct SourceExprCommand : virtual Args, StoreCommand, MixEvalArgs { + Path file; + + SourceExprCommand(); + + /* Return a value representing the Nix expression from which we + are installing. This is either the file specified by โ--fileโ, + or an attribute set constructed from $NIX_PATH, e.g. โ{ nixpkgs + = import ...; bla = import ...; }โ. */ + Value* getSourceExpr(EvalState& state); + + ref<EvalState> getEvalState(); + + private: + std::shared_ptr<EvalState> evalState; + + Value* vSourceExpr = 0; +}; + +enum RealiseMode { Build, NoBuild, DryRun }; + +/* A command that operates on a list of "installables", which can be + store paths, attribute paths, Nix expressions, etc. */ +struct InstallablesCommand : virtual Args, SourceExprCommand { + std::vector<std::shared_ptr<Installable>> installables; + + InstallablesCommand() { expectArgs("installables", &_installables); } + + void prepare() override; + + virtual bool useDefaultInstallables() { return true; } + + private: + std::vector<std::string> _installables; +}; + +struct InstallableCommand : virtual Args, SourceExprCommand { + std::shared_ptr<Installable> installable; + + InstallableCommand() { expectArg("installable", &_installable); } + + void prepare() override; + + private: + std::string _installable; +}; + +/* A command that operates on zero or more store paths. */ +struct StorePathsCommand : public InstallablesCommand { + private: + bool recursive = false; + bool all = false; + + public: + StorePathsCommand(bool recursive = false); + + using StoreCommand::run; + + virtual void run(ref<Store> store, Paths storePaths) = 0; + + void run(ref<Store> store) override; + + bool useDefaultInstallables() override { return !all; } +}; + +/* A command that operates on exactly one store path. */ +struct StorePathCommand : public InstallablesCommand { + using StoreCommand::run; + + virtual void run(ref<Store> store, const Path& storePath) = 0; + + void run(ref<Store> store) override; +}; + +typedef std::map<std::string, ref<Command>> Commands; + +/* An argument parser that supports multiple subcommands, + i.e. โ<command> <subcommand>โ. */ +class MultiCommand : virtual Args { + public: + Commands commands; + + std::shared_ptr<Command> command; + + MultiCommand(Commands commands); + + void printHelp(const string& programName, std::ostream& out) override; + + bool processFlag(Strings::iterator& pos, Strings::iterator end) override; + + bool processArgs(const Strings& args, bool finish) override; +}; + +/* A helper class for registering commands globally. */ +struct RegisterCommand { + static Commands* commands; + + RegisterCommand(ref<Command> command) { + if (!commands) { + commands = new Commands; + } + commands->emplace(command->name(), command); + } +}; + +std::shared_ptr<Installable> parseInstallable(SourceExprCommand& cmd, + const ref<Store>& store, + const std::string& installable, + bool useDefaultInstallables); + +Buildables build(const ref<Store>& store, RealiseMode mode, + const std::vector<std::shared_ptr<Installable>>& installables); + +PathSet toStorePaths( + const ref<Store>& store, RealiseMode mode, + const std::vector<std::shared_ptr<Installable>>& installables); + +Path toStorePath(const ref<Store>& store, RealiseMode mode, + const std::shared_ptr<Installable>& installable); + +PathSet toDerivations( + const ref<Store>& store, + const std::vector<std::shared_ptr<Installable>>& installables, + bool useDeriver = false); + +} // namespace nix diff --git a/third_party/nix/src/nix/copy.cc b/third_party/nix/src/nix/copy.cc new file mode 100644 index 000000000000..633fe399b48c --- /dev/null +++ b/third_party/nix/src/nix/copy.cc @@ -0,0 +1,86 @@ +#include <atomic> + +#include "command.hh" +#include "shared.hh" +#include "store-api.hh" +#include "sync.hh" +#include "thread-pool.hh" + +using namespace nix; + +struct CmdCopy : StorePathsCommand { + std::string srcUri, dstUri; + + CheckSigsFlag checkSigs = CheckSigs; + + SubstituteFlag substitute = NoSubstitute; + + CmdCopy() : StorePathsCommand(true) { + mkFlag() + .longName("from") + .labels({"store-uri"}) + .description("URI of the source Nix store") + .dest(&srcUri); + mkFlag() + .longName("to") + .labels({"store-uri"}) + .description("URI of the destination Nix store") + .dest(&dstUri); + + mkFlag() + .longName("no-check-sigs") + .description("do not require that paths are signed by trusted keys") + .set(&checkSigs, NoCheckSigs); + + mkFlag() + .longName("substitute-on-destination") + .shortName('s') + .description( + "whether to try substitutes on the destination store (only " + "supported by SSH)") + .set(&substitute, Substitute); + } + + std::string name() override { return "copy"; } + + std::string description() override { return "copy paths between Nix stores"; } + + Examples examples() override { + return { + Example{"To copy Firefox from the local store to a binary cache in " + "file:///tmp/cache:", + "nix copy --to file:///tmp/cache $(type -p firefox)"}, + Example{"To copy the entire current NixOS system closure to another " + "machine via SSH:", + "nix copy --to ssh://server /run/current-system"}, + Example{"To copy a closure from another machine via SSH:", + "nix copy --from ssh://server " + "/nix/store/a6cnl93nk1wxnq84brbbwr6hxw9gp2w9-blender-2.79-rc2"}, +#ifdef ENABLE_S3 + Example{"To copy Hello to an S3 binary cache:", + "nix copy --to s3://my-bucket?region=eu-west-1 nixpkgs.hello"}, + Example{"To copy Hello to an S3-compatible binary cache:", + "nix copy --to " + "s3://my-bucket?region=eu-west-1&endpoint=example.com " + "nixpkgs.hello"}, +#endif + }; + } + + ref<Store> createStore() override { + return srcUri.empty() ? StoreCommand::createStore() : openStore(srcUri); + } + + void run(ref<Store> srcStore, Paths storePaths) override { + if (srcUri.empty() && dstUri.empty()) { + throw UsageError("you must pass '--from' and/or '--to'"); + } + + ref<Store> dstStore = dstUri.empty() ? openStore() : openStore(dstUri); + + copyPaths(srcStore, dstStore, PathSet(storePaths.begin(), storePaths.end()), + NoRepair, checkSigs, substitute); + } +}; + +static RegisterCommand r1(make_ref<CmdCopy>()); diff --git a/third_party/nix/src/nix/doctor.cc b/third_party/nix/src/nix/doctor.cc new file mode 100644 index 000000000000..44b4bd605d6c --- /dev/null +++ b/third_party/nix/src/nix/doctor.cc @@ -0,0 +1,136 @@ +#include "command.hh" +#include "serve-protocol.hh" +#include "shared.hh" +#include "store-api.hh" +#include "worker-protocol.hh" + +using namespace nix; + +std::string formatProtocol(unsigned int proto) { + if (proto != 0u) { + auto major = GET_PROTOCOL_MAJOR(proto) >> 8; + auto minor = GET_PROTOCOL_MINOR(proto); + return (format("%1%.%2%") % major % minor).str(); + } + return "unknown"; +} + +struct CmdDoctor : StoreCommand { + bool success = true; + + std::string name() override { return "doctor"; } + + std::string description() override { + return "check your system for potential problems"; + } + + void run(ref<Store> store) override { + std::cout << "Store uri: " << store->getUri() << std::endl; + std::cout << std::endl; + + auto type = getStoreType(); + + if (type < tOther) { + success &= checkNixInPath(); + success &= checkProfileRoots(store); + } + success &= checkStoreProtocol(store->getProtocol()); + + if (!success) { + throw Exit(2); + } + } + + static bool checkNixInPath() { + PathSet dirs; + + for (auto& dir : tokenizeString<Strings>(getEnv("PATH"), ":")) { + if (pathExists(dir + "/nix-env")) { + dirs.insert(dirOf(canonPath(dir + "/nix-env", true))); + } + } + + if (dirs.size() != 1) { + std::cout << "Warning: multiple versions of nix found in PATH." + << std::endl; + std::cout << std::endl; + for (auto& dir : dirs) { + std::cout << " " << dir << std::endl; + } + std::cout << std::endl; + return false; + } + + return true; + } + + static bool checkProfileRoots(const ref<Store>& store) { + PathSet dirs; + + for (auto& dir : tokenizeString<Strings>(getEnv("PATH"), ":")) { + Path profileDir = dirOf(dir); + try { + Path userEnv = canonPath(profileDir, true); + + if (store->isStorePath(userEnv) && + hasSuffix(userEnv, "user-environment")) { + while (profileDir.find("/profiles/") == std::string::npos && + isLink(profileDir)) { + profileDir = absPath(readLink(profileDir), dirOf(profileDir)); + } + + if (profileDir.find("/profiles/") == std::string::npos) { + dirs.insert(dir); + } + } + } catch (SysError&) { + } + } + + if (!dirs.empty()) { + std::cout << "Warning: found profiles outside of " << settings.nixStateDir + << "/profiles." << std::endl; + std::cout << "The generation this profile points to might not have a " + "gcroot and could be" + << std::endl; + std::cout << "garbage collected, resulting in broken symlinks." + << std::endl; + std::cout << std::endl; + for (auto& dir : dirs) { + std::cout << " " << dir << std::endl; + } + std::cout << std::endl; + return false; + } + + return true; + } + + static bool checkStoreProtocol(unsigned int storeProto) { + unsigned int clientProto = GET_PROTOCOL_MAJOR(SERVE_PROTOCOL_VERSION) == + GET_PROTOCOL_MAJOR(storeProto) + ? SERVE_PROTOCOL_VERSION + : PROTOCOL_VERSION; + + if (clientProto != storeProto) { + std::cout << "Warning: protocol version of this client does not match " + "the store." + << std::endl; + std::cout << "While this is not necessarily a problem it's recommended " + "to keep the client in" + << std::endl; + std::cout << "sync with the daemon." << std::endl; + std::cout << std::endl; + std::cout << "Client protocol: " << formatProtocol(clientProto) + << std::endl; + std::cout << "Store protocol: " << formatProtocol(storeProto) + << std::endl; + std::cout << std::endl; + return false; + } + + return true; + } +}; + +static RegisterCommand r1(make_ref<CmdDoctor>()); diff --git a/third_party/nix/src/nix/dump-path.cc b/third_party/nix/src/nix/dump-path.cc new file mode 100644 index 000000000000..5e93a3a44b83 --- /dev/null +++ b/third_party/nix/src/nix/dump-path.cc @@ -0,0 +1,28 @@ +#include "command.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdDumpPath : StorePathCommand { + std::string name() override { return "dump-path"; } + + std::string description() override { + return "dump a store path to stdout (in NAR format)"; + } + + Examples examples() override { + return { + Example{"To get a NAR from the binary cache https://cache.nixos.org/:", + "nix dump-path --store https://cache.nixos.org/ " + "/nix/store/7crrmih8c52r8fbnqb933dxrsp44md93-glibc-2.25"}, + }; + } + + void run(ref<Store> store, const Path& storePath) override { + FdSink sink(STDOUT_FILENO); + store->narFromPath(storePath, sink); + sink.flush(); + } +}; + +static RegisterCommand r1(make_ref<CmdDumpPath>()); diff --git a/third_party/nix/src/nix/edit.cc b/third_party/nix/src/nix/edit.cc new file mode 100644 index 000000000000..330a845dc520 --- /dev/null +++ b/third_party/nix/src/nix/edit.cc @@ -0,0 +1,73 @@ +#include <glog/logging.h> +#include <unistd.h> + +#include "attr-path.hh" +#include "command.hh" +#include "eval.hh" +#include "shared.hh" + +using namespace nix; + +struct CmdEdit : InstallableCommand { + std::string name() override { return "edit"; } + + std::string description() override { + return "open the Nix expression of a Nix package in $EDITOR"; + } + + Examples examples() override { + return { + Example{"To open the Nix expression of the GNU Hello package:", + "nix edit nixpkgs.hello"}, + }; + } + + void run(ref<Store> store) override { + auto state = getEvalState(); + + auto v = installable->toValue(*state); + + Value* v2; + try { + auto dummyArgs = Bindings::NewGC(); + v2 = findAlongAttrPath(*state, "meta.position", *dummyArgs, *v); + } catch (Error&) { + throw Error("package '%s' has no source location information", + installable->what()); + } + + auto pos = state->forceString(*v2); + DLOG(INFO) << "position is " << pos; + + auto colon = pos.rfind(':'); + if (colon == std::string::npos) { + throw Error("cannot parse meta.position attribute '%s'", pos); + } + + std::string filename(pos, 0, colon); + int lineno; + try { + lineno = std::stoi(std::string(pos, colon + 1)); + } catch (std::invalid_argument& e) { + throw Error("cannot parse line number '%s'", pos); + } + + auto editor = getEnv("EDITOR", "cat"); + + auto args = tokenizeString<Strings>(editor); + + if (editor.find("emacs") != std::string::npos || + editor.find("nano") != std::string::npos || + editor.find("vim") != std::string::npos) { + args.push_back(fmt("+%d", lineno)); + } + + args.push_back(filename); + + execvp(args.front().c_str(), stringsToCharPtrs(args).data()); + + throw SysError("cannot run editor '%s'", editor); + } +}; + +static RegisterCommand r1(make_ref<CmdEdit>()); diff --git a/third_party/nix/src/nix/eval.cc b/third_party/nix/src/nix/eval.cc new file mode 100644 index 000000000000..661d587d7385 --- /dev/null +++ b/third_party/nix/src/nix/eval.cc @@ -0,0 +1,56 @@ +#include "eval.hh" + +#include "command.hh" +#include "common-args.hh" +#include "json.hh" +#include "shared.hh" +#include "store-api.hh" +#include "value-to-json.hh" + +using namespace nix; + +struct CmdEval : MixJSON, InstallableCommand { + bool raw = false; + + CmdEval() { mkFlag(0, "raw", "print strings unquoted", &raw); } + + std::string name() override { return "eval"; } + + std::string description() override { return "evaluate a Nix expression"; } + + Examples examples() override { + return { + Example{"To evaluate a Nix expression given on the command line:", + "nix eval '(1 + 2)'"}, + Example{"To evaluate a Nix expression from a file or URI:", + "nix eval -f channel:nixos-17.09 hello.name"}, + Example{"To get the current version of Nixpkgs:", + "nix eval --raw nixpkgs.lib.nixpkgsVersion"}, + Example{"To print the store path of the Hello package:", + "nix eval --raw nixpkgs.hello"}, + }; + } + + void run(ref<Store> store) override { + if (raw && json) { + throw UsageError("--raw and --json are mutually exclusive"); + } + + auto state = getEvalState(); + + auto v = installable->toValue(*state); + PathSet context; + + if (raw) { + std::cout << state->coerceToString(noPos, *v, context); + } else if (json) { + JSONPlaceholder jsonOut(std::cout); + printValueAsJSON(*state, true, *v, jsonOut, context); + } else { + state->forceValueDeep(*v); + std::cout << *v << "\n"; + } + } +}; + +static RegisterCommand r1(make_ref<CmdEval>()); diff --git a/third_party/nix/src/nix/hash.cc b/third_party/nix/src/nix/hash.cc new file mode 100644 index 000000000000..74ff45d0d84e --- /dev/null +++ b/third_party/nix/src/nix/hash.cc @@ -0,0 +1,145 @@ +#include "hash.hh" + +#include "command.hh" +#include "legacy.hh" +#include "shared.hh" + +using namespace nix; + +struct CmdHash : Command { + enum Mode { mFile, mPath }; + Mode mode; + Base base = SRI; + bool truncate = false; + HashType ht = htSHA256; + std::vector<std::string> paths; + + explicit CmdHash(Mode mode) : mode(mode) { + mkFlag(0, "sri", "print hash in SRI format", &base, SRI); + mkFlag(0, "base64", "print hash in base-64", &base, Base64); + mkFlag(0, "base32", "print hash in base-32 (Nix-specific)", &base, Base32); + mkFlag(0, "base16", "print hash in base-16", &base, Base16); + mkFlag().longName("type").mkHashTypeFlag(&ht); + expectArgs("paths", &paths); + } + + std::string name() override { + return mode == mFile ? "hash-file" : "hash-path"; + } + + std::string description() override { + return mode == mFile + ? "print cryptographic hash of a regular file" + : "print cryptographic hash of the NAR serialisation of a path"; + } + + void run() override { + for (const auto& path : paths) { + Hash h = mode == mFile ? hashFile(ht, path) : hashPath(ht, path).first; + if (truncate && h.hashSize > 20) { + h = compressHash(h, 20); + } + std::cout << format("%1%\n") % h.to_string(base, base == SRI); + } + } +}; + +static RegisterCommand r1(make_ref<CmdHash>(CmdHash::mFile)); +static RegisterCommand r2(make_ref<CmdHash>(CmdHash::mPath)); + +struct CmdToBase : Command { + Base base; + HashType ht = htUnknown; + std::vector<std::string> args; + + explicit CmdToBase(Base base) : base(base) { + mkFlag().longName("type").mkHashTypeFlag(&ht); + expectArgs("strings", &args); + } + + std::string name() override { + return base == Base16 + ? "to-base16" + : base == Base32 ? "to-base32" + : base == Base64 ? "to-base64" : "to-sri"; + } + + std::string description() override { + return fmt( + "convert a hash to %s representation", + base == Base16 + ? "base-16" + : base == Base32 ? "base-32" : base == Base64 ? "base-64" : "SRI"); + } + + void run() override { + for (const auto& s : args) { + std::cout << fmt("%s\n", Hash(s, ht).to_string(base, base == SRI)); + } + } +}; + +static RegisterCommand r3(make_ref<CmdToBase>(Base16)); +static RegisterCommand r4(make_ref<CmdToBase>(Base32)); +static RegisterCommand r5(make_ref<CmdToBase>(Base64)); +static RegisterCommand r6(make_ref<CmdToBase>(SRI)); + +/* Legacy nix-hash command. */ +static int compatNixHash(int argc, char** argv) { + HashType ht = htMD5; + bool flat = false; + bool base32 = false; + bool truncate = false; + enum { opHash, opTo32, opTo16 } op = opHash; + std::vector<std::string> ss; + + parseCmdLine(argc, argv, + [&](Strings::iterator& arg, const Strings::iterator& end) { + if (*arg == "--help") { + showManPage("nix-hash"); + } else if (*arg == "--version") { + printVersion("nix-hash"); + } else if (*arg == "--flat") { + flat = true; + } else if (*arg == "--base32") { + base32 = true; + } else if (*arg == "--truncate") { + truncate = true; + } else if (*arg == "--type") { + string s = getArg(*arg, arg, end); + ht = parseHashType(s); + if (ht == htUnknown) { + throw UsageError(format("unknown hash type '%1%'") % s); + } + } else if (*arg == "--to-base16") { + op = opTo16; + } else if (*arg == "--to-base32") { + op = opTo32; + } else if (*arg != "" && arg->at(0) == '-') { + return false; + } else { + ss.push_back(*arg); + } + return true; + }); + + if (op == opHash) { + CmdHash cmd(flat ? CmdHash::mFile : CmdHash::mPath); + cmd.ht = ht; + cmd.base = base32 ? Base32 : Base16; + cmd.truncate = truncate; + cmd.paths = ss; + cmd.run(); + } + + else { + CmdToBase cmd(op == opTo32 ? Base32 : Base16); + cmd.args = ss; + cmd.ht = ht; + cmd.run(); + } + + return 0; +} + +static RegisterLegacyCommand s1("nix-hash", compatNixHash); diff --git a/third_party/nix/src/nix/installables.cc b/third_party/nix/src/nix/installables.cc new file mode 100644 index 000000000000..38a3f0c8ecf6 --- /dev/null +++ b/third_party/nix/src/nix/installables.cc @@ -0,0 +1,344 @@ +#include <regex> +#include <utility> + +#include "attr-path.hh" +#include "command.hh" +#include "common-eval-args.hh" +#include "derivations.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "get-drvs.hh" +#include "shared.hh" +#include "store-api.hh" + +namespace nix { + +SourceExprCommand::SourceExprCommand() { + mkFlag() + .shortName('f') + .longName("file") + .label("file") + .description("evaluate FILE rather than the default") + .dest(&file); +} + +Value* SourceExprCommand::getSourceExpr(EvalState& state) { + if (vSourceExpr != nullptr) { + return vSourceExpr; + } + + auto sToplevel = state.symbols.Create("_toplevel"); + + vSourceExpr = state.allocValue(); + + if (!file.empty()) { + state.evalFile(lookupFileArg(state, file), *vSourceExpr); + } else { + /* Construct the installation source from $NIX_PATH. */ + + auto searchPath = state.getSearchPath(); + + state.mkAttrs(*vSourceExpr, 1024); + + mkBool(*state.allocAttr(*vSourceExpr, sToplevel), true); + + std::unordered_set<std::string> seen; + + auto addEntry = [&](const std::string& name) { + if (name.empty()) { + return; + } + if (!seen.insert(name).second) { + return; + } + Value* v1 = state.allocValue(); + mkPrimOpApp(*v1, state.getBuiltin("findFile"), + state.getBuiltin("nixPath")); + Value* v2 = state.allocValue(); + mkApp(*v2, *v1, mkString(*state.allocValue(), name)); + mkApp(*state.allocAttr(*vSourceExpr, state.symbols.Create(name)), + state.getBuiltin("import"), *v2); + }; + + for (auto& i : searchPath) { /* Hack to handle channels. */ + if (i.first.empty() && pathExists(i.second + "/manifest.nix")) { + for (auto& j : readDirectory(i.second)) { + if (j.name != "manifest.nix" && + pathExists(fmt("%s/%s/default.nix", i.second, j.name))) { + addEntry(j.name); + } + } + } else { + addEntry(i.first); + } + } + } + + return vSourceExpr; +} + +ref<EvalState> SourceExprCommand::getEvalState() { + if (!evalState) { + evalState = std::make_shared<EvalState>(searchPath, getStore()); + } + return ref<EvalState>(evalState); +} + +Buildable Installable::toBuildable() { + auto buildables = toBuildables(); + if (buildables.size() != 1) { + throw Error( + "installable '%s' evaluates to %d derivations, where only one is " + "expected", + what(), buildables.size()); + } + return std::move(buildables[0]); +} + +struct InstallableStorePath : Installable { + Path storePath; + + explicit InstallableStorePath(Path storePath) + : storePath(std::move(storePath)) {} + + std::string what() override { return storePath; } + + Buildables toBuildables() override { + return {{isDerivation(storePath) ? storePath : "", {{"out", storePath}}}}; + } +}; + +struct InstallableValue : Installable { + SourceExprCommand& cmd; + + explicit InstallableValue(SourceExprCommand& cmd) : cmd(cmd) {} + + Buildables toBuildables() override { + auto state = cmd.getEvalState(); + + auto v = toValue(*state); + + Bindings& autoArgs = *cmd.getAutoArgs(*state); + + DrvInfos drvs; + getDerivations(*state, *v, "", autoArgs, drvs, false); + + Buildables res; + + PathSet drvPaths; + + for (auto& drv : drvs) { + Buildable b{drv.queryDrvPath()}; + drvPaths.insert(b.drvPath); + + auto outputName = drv.queryOutputName(); + if (outputName.empty()) { + throw Error("derivation '%s' lacks an 'outputName' attribute", + b.drvPath); + } + + b.outputs.emplace(outputName, drv.queryOutPath()); + + res.push_back(std::move(b)); + } + + // Hack to recognize .all: if all drvs have the same drvPath, + // merge the buildables. + if (drvPaths.size() == 1) { + Buildable b{*drvPaths.begin()}; + for (auto& b2 : res) { + b.outputs.insert(b2.outputs.begin(), b2.outputs.end()); + } + return {b}; + } + return res; + } +}; + +struct InstallableExpr : InstallableValue { + std::string text; + + InstallableExpr(SourceExprCommand& cmd, std::string text) + : InstallableValue(cmd), text(std::move(text)) {} + + std::string what() override { return text; } + + Value* toValue(EvalState& state) override { + auto v = state.allocValue(); + state.eval(state.parseExprFromString(text, absPath(".")), *v); + return v; + } +}; + +struct InstallableAttrPath : InstallableValue { + std::string attrPath; + + InstallableAttrPath(SourceExprCommand& cmd, std::string attrPath) + : InstallableValue(cmd), attrPath(std::move(attrPath)) {} + + std::string what() override { return attrPath; } + + Value* toValue(EvalState& state) override { + auto source = cmd.getSourceExpr(state); + + Bindings& autoArgs = *cmd.getAutoArgs(state); + + Value* v = findAlongAttrPath(state, attrPath, autoArgs, *source); + state.forceValue(*v); + + return v; + } +}; + +// FIXME: extend +std::string attrRegex = R"([A-Za-z_][A-Za-z0-9-_+]*)"; +static std::regex attrPathRegex(fmt(R"(%1%(\.%1%)*)", attrRegex)); + +static std::vector<std::shared_ptr<Installable>> parseInstallables( + SourceExprCommand& cmd, const ref<Store>& store, + std::vector<std::string> ss, bool useDefaultInstallables) { + std::vector<std::shared_ptr<Installable>> result; + + if (ss.empty() && useDefaultInstallables) { + if (cmd.file.empty()) { + cmd.file = "."; + } + ss = {""}; + } + + for (auto& s : ss) { + if (s.compare(0, 1, "(") == 0) { + result.push_back(std::make_shared<InstallableExpr>(cmd, s)); + + } else if (s.find('/') != std::string::npos) { + auto path = store->toStorePath(store->followLinksToStore(s)); + + if (store->isStorePath(path)) { + result.push_back(std::make_shared<InstallableStorePath>(path)); + } + } + + else if (s.empty() || std::regex_match(s, attrPathRegex)) { + result.push_back(std::make_shared<InstallableAttrPath>(cmd, s)); + + } else { + throw UsageError("don't know what to do with argument '%s'", s); + } + } + + return result; +} + +std::shared_ptr<Installable> parseInstallable(SourceExprCommand& cmd, + const ref<Store>& store, + const std::string& installable, + bool useDefaultInstallables) { + auto installables = parseInstallables(cmd, store, {installable}, false); + assert(installables.size() == 1); + return installables.front(); +} + +Buildables build( + const ref<Store>& store, RealiseMode mode, + const std::vector<std::shared_ptr<Installable>>& installables) { + if (mode != Build) { + settings.readOnlyMode = true; + } + + Buildables buildables; + + PathSet pathsToBuild; + + for (auto& i : installables) { + for (auto& b : i->toBuildables()) { + if (!b.drvPath.empty()) { + StringSet outputNames; + for (auto& output : b.outputs) { + outputNames.insert(output.first); + } + pathsToBuild.insert(b.drvPath + "!" + + concatStringsSep(",", outputNames)); + } else { + for (auto& output : b.outputs) { + pathsToBuild.insert(output.second); + } + } + buildables.push_back(std::move(b)); + } + } + + if (mode == DryRun) { + printMissing(store, pathsToBuild); + } else if (mode == Build) { + store->buildPaths(pathsToBuild); + } + + return buildables; +} + +PathSet toStorePaths( + const ref<Store>& store, RealiseMode mode, + const std::vector<std::shared_ptr<Installable>>& installables) { + PathSet outPaths; + + for (auto& b : build(store, mode, installables)) { + for (auto& output : b.outputs) { + outPaths.insert(output.second); + } + } + + return outPaths; +} + +Path toStorePath(const ref<Store>& store, RealiseMode mode, + const std::shared_ptr<Installable>& installable) { + auto paths = toStorePaths(store, mode, {installable}); + + if (paths.size() != 1) { + throw Error("argument '%s' should evaluate to one store path", + installable->what()); + } + + return *paths.begin(); +} + +PathSet toDerivations( + const ref<Store>& store, + const std::vector<std::shared_ptr<Installable>>& installables, + bool useDeriver) { + PathSet drvPaths; + + for (auto& i : installables) { + for (auto& b : i->toBuildables()) { + if (b.drvPath.empty()) { + if (!useDeriver) { + throw Error("argument '%s' did not evaluate to a derivation", + i->what()); + } + for (auto& output : b.outputs) { + auto derivers = store->queryValidDerivers(output.second); + if (derivers.empty()) { + throw Error("'%s' does not have a known deriver", i->what()); + } + // FIXME: use all derivers? + drvPaths.insert(*derivers.begin()); + } + } else { + drvPaths.insert(b.drvPath); + } + } + } + + return drvPaths; +} + +void InstallablesCommand::prepare() { + installables = parseInstallables(*this, getStore(), _installables, + useDefaultInstallables()); +} + +void InstallableCommand::prepare() { + installable = parseInstallable(*this, getStore(), _installable, false); +} + +} // namespace nix diff --git a/third_party/nix/src/nix/legacy.cc b/third_party/nix/src/nix/legacy.cc new file mode 100644 index 000000000000..b851392b4276 --- /dev/null +++ b/third_party/nix/src/nix/legacy.cc @@ -0,0 +1,7 @@ +#include "legacy.hh" + +namespace nix { + +RegisterLegacyCommand::Commands* RegisterLegacyCommand::commands = nullptr; + +} diff --git a/third_party/nix/src/nix/legacy.hh b/third_party/nix/src/nix/legacy.hh new file mode 100644 index 000000000000..a9bc65c02e76 --- /dev/null +++ b/third_party/nix/src/nix/legacy.hh @@ -0,0 +1,23 @@ +#pragma once + +#include <functional> +#include <map> +#include <string> + +namespace nix { + +typedef std::function<void(int, char**)> MainFunction; + +struct RegisterLegacyCommand { + typedef std::map<std::string, MainFunction> Commands; + static Commands* commands; + + RegisterLegacyCommand(const std::string& name, MainFunction fun) { + if (!commands) { + commands = new Commands; + } + (*commands)[name] = fun; + } +}; + +} // namespace nix diff --git a/third_party/nix/src/nix/log.cc b/third_party/nix/src/nix/log.cc new file mode 100644 index 000000000000..63d3554cc2fb --- /dev/null +++ b/third_party/nix/src/nix/log.cc @@ -0,0 +1,63 @@ +#include <glog/logging.h> + +#include "command.hh" +#include "common-args.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdLog : InstallableCommand { + CmdLog() = default; + + std::string name() override { return "log"; } + + std::string description() override { + return "show the build log of the specified packages or paths, if " + "available"; + } + + Examples examples() override { + return { + Example{"To get the build log of GNU Hello:", "nix log nixpkgs.hello"}, + Example{ + "To get the build log of a specific path:", + "nix log " + "/nix/store/lmngj4wcm9rkv3w4dfhzhcyij3195hiq-thunderbird-52.2.1"}, + Example{"To get a build log from a specific binary cache:", + "nix log --store https://cache.nixos.org nixpkgs.hello"}, + }; + } + + void run(ref<Store> store) override { + settings.readOnlyMode = true; + + auto subs = getDefaultSubstituters(); + + subs.push_front(store); + + auto b = installable->toBuildable(); + + RunPager pager; + for (auto& sub : subs) { + auto log = !b.drvPath.empty() ? sub->getBuildLog(b.drvPath) : nullptr; + for (auto& output : b.outputs) { + if (log) { + break; + } + log = sub->getBuildLog(output.second); + } + if (!log) { + continue; + } + LOG(INFO) << "got build log for '" << installable->what() << "' from '" + << sub->getUri() << "'"; + std::cout << *log; + return; + } + + throw Error("build log of '%s' is not available", installable->what()); + } +}; + +static RegisterCommand r1(make_ref<CmdLog>()); diff --git a/third_party/nix/src/nix/ls.cc b/third_party/nix/src/nix/ls.cc new file mode 100644 index 000000000000..5b052f8fcc7c --- /dev/null +++ b/third_party/nix/src/nix/ls.cc @@ -0,0 +1,138 @@ +#include "command.hh" +#include "common-args.hh" +#include "fs-accessor.hh" +#include "json.hh" +#include "nar-accessor.hh" +#include "store-api.hh" + +using namespace nix; + +struct MixLs : virtual Args, MixJSON { + std::string path; + + bool recursive = false; + bool verbose = false; + bool showDirectory = false; + + MixLs() { + mkFlag('R', "recursive", "list subdirectories recursively", &recursive); + mkFlag('l', "long", "show more file information", &verbose); + mkFlag('d', "directory", "show directories rather than their contents", + &showDirectory); + } + + void listText(ref<FSAccessor> accessor) { + std::function<void(const FSAccessor::Stat&, const Path&, const std::string&, + bool)> + doPath; + + auto showFile = [&](const Path& curPath, const std::string& relPath) { + if (verbose) { + auto st = accessor->stat(curPath); + std::string tp = st.type == FSAccessor::Type::tRegular + ? (st.isExecutable ? "-r-xr-xr-x" : "-r--r--r--") + : st.type == FSAccessor::Type::tSymlink + ? "lrwxrwxrwx" + : "dr-xr-xr-x"; + std::cout << (format("%s %20d %s") % tp % st.fileSize % relPath); + if (st.type == FSAccessor::Type::tSymlink) { + std::cout << " -> " << accessor->readLink(curPath); + } + std::cout << "\n"; + if (recursive && st.type == FSAccessor::Type::tDirectory) { + doPath(st, curPath, relPath, false); + } + } else { + std::cout << relPath << "\n"; + if (recursive) { + auto st = accessor->stat(curPath); + if (st.type == FSAccessor::Type::tDirectory) { + doPath(st, curPath, relPath, false); + } + } + } + }; + + doPath = [&](const FSAccessor::Stat& st, const Path& curPath, + const std::string& relPath, bool showDirectory) { + if (st.type == FSAccessor::Type::tDirectory && !showDirectory) { + auto names = accessor->readDirectory(curPath); + for (auto& name : names) { + showFile(curPath + "/" + name, relPath + "/" + name); + } + } else { + showFile(curPath, relPath); + } + }; + + auto st = accessor->stat(path); + if (st.type == FSAccessor::Type::tMissing) { + throw Error(format("path '%1%' does not exist") % path); + } + doPath(st, path, + st.type == FSAccessor::Type::tDirectory ? "." : baseNameOf(path), + showDirectory); + } + + void list(const ref<FSAccessor>& accessor) { + if (path == "/") { + path = ""; + } + + if (json) { + JSONPlaceholder jsonRoot(std::cout); + listNar(jsonRoot, accessor, path, recursive); + } else { + listText(accessor); + } + } +}; + +struct CmdLsStore : StoreCommand, MixLs { + CmdLsStore() { expectArg("path", &path); } + + Examples examples() override { + return { + Example{"To list the contents of a store path in a binary cache:", + "nix ls-store --store https://cache.nixos.org/ -lR " + "/nix/store/0i2jd68mp5g6h2sa5k9c85rb80sn8hi9-hello-2.10"}, + }; + } + + std::string name() override { return "ls-store"; } + + std::string description() override { + return "show information about a store path"; + } + + void run(ref<Store> store) override { list(store->getFSAccessor()); } +}; + +struct CmdLsNar : Command, MixLs { + Path narPath; + + CmdLsNar() { + expectArg("nar", &narPath); + expectArg("path", &path); + } + + Examples examples() override { + return { + Example{"To list a specific file in a NAR:", + "nix ls-nar -l hello.nar /bin/hello"}, + }; + } + + std::string name() override { return "ls-nar"; } + + std::string description() override { + return "show information about the contents of a NAR file"; + } + + void run() override { + list(makeNarAccessor(make_ref<std::string>(readFile(narPath, true)))); + } +}; + +static RegisterCommand r1(make_ref<CmdLsStore>()); +static RegisterCommand r2(make_ref<CmdLsNar>()); diff --git a/third_party/nix/src/nix/main.cc b/third_party/nix/src/nix/main.cc new file mode 100644 index 000000000000..11ba3f8410c4 --- /dev/null +++ b/third_party/nix/src/nix/main.cc @@ -0,0 +1,187 @@ +#include <algorithm> + +#include <ifaddrs.h> +#include <netdb.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "command.hh" +#include "common-args.hh" +#include "download.hh" +#include "eval.hh" +#include "finally.hh" +#include "globals.hh" +#include "glog/logging.h" +#include "legacy.hh" +#include "shared.hh" +#include "store-api.hh" + +extern std::string chrootHelperName; + +void chrootHelper(int argc, char** argv); + +namespace nix { + +/* Check if we have a non-loopback/link-local network interface. */ +static bool haveInternet() { + struct ifaddrs* addrs; + + if (getifaddrs(&addrs) != 0) { + return true; + } + + Finally free([&]() { freeifaddrs(addrs); }); + + for (auto i = addrs; i != nullptr; i = i->ifa_next) { + if (i->ifa_addr == nullptr) { + continue; + } + if (i->ifa_addr->sa_family == AF_INET) { + if (ntohl(((sockaddr_in*)i->ifa_addr)->sin_addr.s_addr) != + INADDR_LOOPBACK) { + return true; + } + } else if (i->ifa_addr->sa_family == AF_INET6) { + if (!IN6_IS_ADDR_LOOPBACK(&((sockaddr_in6*)i->ifa_addr)->sin6_addr) && + !IN6_IS_ADDR_LINKLOCAL(&((sockaddr_in6*)i->ifa_addr)->sin6_addr)) { + return true; + } + } + } + + return false; +} + +std::string programPath; + +struct NixArgs : virtual MultiCommand, virtual MixCommonArgs { + bool printBuildLogs = false; + bool useNet = true; + + NixArgs() : MultiCommand(*RegisterCommand::commands), MixCommonArgs("nix") { + mkFlag() + .longName("help") + .description("show usage information") + .handler([&]() { showHelpAndExit(); }); + + mkFlag() + .longName("help-config") + .description("show configuration options") + .handler([&]() { + std::cout << "The following configuration options are available:\n\n"; + Table2 tbl; + std::map<std::string, Config::SettingInfo> settings; + globalConfig.getSettings(settings); + for (const auto& s : settings) { + tbl.emplace_back(s.first, s.second.description); + } + printTable(std::cout, tbl); + throw Exit(); + }); + + mkFlag() + .longName("print-build-logs") + .shortName('L') + .description("print full build logs on stderr") + .set(&printBuildLogs, true); + + mkFlag() + .longName("version") + .description("show version information") + .handler([&]() { printVersion(programName); }); + + mkFlag() + .longName("no-net") + .description( + "disable substituters and consider all previously downloaded files " + "up-to-date") + .handler([&]() { useNet = false; }); + } + + void printFlags(std::ostream& out) override { + Args::printFlags(out); + std::cout << "\n" + "In addition, most configuration settings can be overriden " + "using '--<name> <value>'.\n" + "Boolean settings can be overriden using '--<name>' or " + "'--no-<name>'. See 'nix\n" + "--help-config' for a list of configuration settings.\n"; + } + + void showHelpAndExit() { + printHelp(programName, std::cout); + std::cout + << "\nNote: this program is EXPERIMENTAL and subject to change.\n"; + throw Exit(); + } +}; + +void mainWrapped(int argc, char** argv) { + /* The chroot helper needs to be run before any threads have been + started. */ + if (argc > 0 && argv[0] == chrootHelperName) { + chrootHelper(argc, argv); + return; + } + + initNix(); + initGC(); + + programPath = argv[0]; + string programName = baseNameOf(programPath); + + { + auto legacy = (*RegisterLegacyCommand::commands)[programName]; + if (legacy) { + return legacy(argc, argv); + } + } + + settings.verboseBuild = false; + + NixArgs args; + + args.parseCmdline(argvToStrings(argc, argv)); + + initPlugins(); + + if (!args.command) { + args.showHelpAndExit(); + } + + if (args.useNet && !haveInternet()) { + LOG(WARNING) << "you don't have Internet access; " + << "disabling some network-dependent features"; + args.useNet = false; + } + + if (!args.useNet) { + // FIXME: should check for command line overrides only. + if (!settings.useSubstitutes.overriden) { + settings.useSubstitutes = false; + } + if (!settings.tarballTtl.overriden) { + settings.tarballTtl = std::numeric_limits<unsigned int>::max(); + } + if (!downloadSettings.tries.overriden) { + downloadSettings.tries = 0; + } + if (!downloadSettings.connectTimeout.overriden) { + downloadSettings.connectTimeout = 1; + } + } + + args.command->prepare(); + args.command->run(); +} + +} // namespace nix + +int main(int argc, char* argv[]) { + FLAGS_logtostderr = true; + google::InitGoogleLogging(argv[0]); + + return nix::handleExceptions(argv[0], + [&]() { nix::mainWrapped(argc, argv); }); +} diff --git a/third_party/nix/src/nix/meson.build b/third_party/nix/src/nix/meson.build new file mode 100644 index 000000000000..259de78e2476 --- /dev/null +++ b/third_party/nix/src/nix/meson.build @@ -0,0 +1,78 @@ +src_inc += include_directories('.') + +nix_src = files( + join_paths(meson.source_root(), 'src/nix/add-to-store.cc'), + join_paths(meson.source_root(), 'src/nix/build.cc'), + join_paths(meson.source_root(), 'src/nix/cat.cc'), + join_paths(meson.source_root(), 'src/nix/command.cc'), + join_paths(meson.source_root(), 'src/nix/copy.cc'), + join_paths(meson.source_root(), 'src/nix/doctor.cc'), + join_paths(meson.source_root(), 'src/nix/dump-path.cc'), + join_paths(meson.source_root(), 'src/nix/edit.cc'), + join_paths(meson.source_root(), 'src/nix/eval.cc'), + join_paths(meson.source_root(), 'src/nix/hash.cc'), + join_paths(meson.source_root(), 'src/nix/installables.cc'), + join_paths(meson.source_root(), 'src/nix/legacy.cc'), + join_paths(meson.source_root(), 'src/nix/log.cc'), + join_paths(meson.source_root(), 'src/nix/ls.cc'), + join_paths(meson.source_root(), 'src/nix/main.cc'), + join_paths(meson.source_root(), 'src/nix/optimise-store.cc'), + join_paths(meson.source_root(), 'src/nix/path-info.cc'), + join_paths(meson.source_root(), 'src/nix/ping-store.cc'), + join_paths(meson.source_root(), 'src/nix/repl.cc'), + join_paths(meson.source_root(), 'src/nix/run.cc'), + join_paths(meson.source_root(), 'src/nix/search.cc'), + join_paths(meson.source_root(), 'src/nix/show-config.cc'), + join_paths(meson.source_root(), 'src/nix/show-derivation.cc'), + join_paths(meson.source_root(), 'src/nix/sigs.cc'), + join_paths(meson.source_root(), 'src/nix/upgrade-nix.cc'), + join_paths(meson.source_root(), 'src/nix/verify.cc'), + join_paths(meson.source_root(), 'src/nix/why-depends.cc'), + + join_paths(meson.source_root(), 'src/build-remote/build-remote.cc'), + join_paths(meson.source_root(), 'src/nix-build/nix-build.cc'), + join_paths(meson.source_root(), 'src/nix-channel/nix-channel.cc'), + join_paths(meson.source_root(), 'src/nix-collect-garbage/nix-collect-garbage.cc'), + join_paths(meson.source_root(), 'src/nix-copy-closure/nix-copy-closure.cc'), + join_paths(meson.source_root(), 'src/nix-daemon/nix-daemon.cc'), + join_paths(meson.source_root(), 'src/nix-env/nix-env.cc'), + join_paths(meson.source_root(), 'src/nix-env/user-env.cc'), + join_paths(meson.source_root(), 'src/nix-instantiate/nix-instantiate.cc'), + join_paths(meson.source_root(), 'src/nix-prefetch-url/nix-prefetch-url.cc'), + join_paths(meson.source_root(), 'src/nix-store/dotgraph.cc'), + join_paths(meson.source_root(), 'src/nix-store/graphml.cc'), + join_paths(meson.source_root(), 'src/nix-store/nix-store.cc')) + +nix_headers = files ( + join_paths(meson.source_root(), 'src/nix/command.hh'), + join_paths(meson.source_root(), 'src/nix/legacy.hh'), + join_paths(meson.source_root(), 'src/nix-env/user-env.hh'), + join_paths(meson.source_root(), 'src/nix-store/dotgraph.hh'), + join_paths(meson.source_root(), 'src/nix-store/graphml.hh')) + +nix_dep_list = [ + boost_dep, + editline_dep, + gc_dep, + glog_dep, + libdl_dep, + libsodium_dep, + pthread_dep, +] + +nix_link_list = [ + libutil_lib, + libstore_lib, + libmain_lib, + libexpr_lib, +] + +nix_bin = executable( + 'nix', + install : true, + install_mode : 'rwxr-xr-x', + install_dir : bindir, + include_directories : src_inc, + sources : nix_src, + link_with : nix_link_list, + dependencies : nix_dep_list) diff --git a/third_party/nix/src/nix/optimise-store.cc b/third_party/nix/src/nix/optimise-store.cc new file mode 100644 index 000000000000..5fdcfb742465 --- /dev/null +++ b/third_party/nix/src/nix/optimise-store.cc @@ -0,0 +1,27 @@ +#include <atomic> + +#include "command.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdOptimiseStore : StoreCommand { + CmdOptimiseStore() = default; + + std::string name() override { return "optimise-store"; } + + std::string description() override { + return "replace identical files in the store by hard links"; + } + + Examples examples() override { + return { + Example{"To optimise the Nix store:", "nix optimise-store"}, + }; + } + + void run(ref<Store> store) override { store->optimiseStore(); } +}; + +static RegisterCommand r1(make_ref<CmdOptimiseStore>()); diff --git a/third_party/nix/src/nix/path-info.cc b/third_party/nix/src/nix/path-info.cc new file mode 100644 index 000000000000..e94dcc962831 --- /dev/null +++ b/third_party/nix/src/nix/path-info.cc @@ -0,0 +1,131 @@ +#include <algorithm> +#include <array> + +#include "command.hh" +#include "common-args.hh" +#include "json.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdPathInfo : StorePathsCommand, MixJSON { + bool showSize = false; + bool showClosureSize = false; + bool humanReadable = false; + bool showSigs = false; + + CmdPathInfo() { + mkFlag('s', "size", "print size of the NAR dump of each path", &showSize); + mkFlag('S', "closure-size", + "print sum size of the NAR dumps of the closure of each path", + &showClosureSize); + mkFlag('h', "human-readable", + "with -s and -S, print sizes like 1K 234M 5.67G etc.", + &humanReadable); + mkFlag(0, "sigs", "show signatures", &showSigs); + } + + std::string name() override { return "path-info"; } + + std::string description() override { + return "query information about store paths"; + } + + Examples examples() override { + return { + Example{"To show the closure sizes of every path in the current NixOS " + "system closure, sorted by size:", + "nix path-info -rS /run/current-system | sort -nk2"}, + Example{"To show a package's closure size and all its dependencies " + "with human readable sizes:", + "nix path-info -rsSh nixpkgs.rust"}, + Example{"To check the existence of a path in a binary cache:", + "nix path-info -r /nix/store/7qvk5c91...-geeqie-1.1 --store " + "https://cache.nixos.org/"}, + Example{"To print the 10 most recently added paths (using --json and " + "the jq(1) command):", + "nix path-info --json --all | jq -r " + "'sort_by(.registrationTime)[-11:-1][].path'"}, + Example{"To show the size of the entire Nix store:", + "nix path-info --json --all | jq 'map(.narSize) | add'"}, + Example{"To show every path whose closure is bigger than 1 GB, sorted " + "by closure size:", + "nix path-info --json --all -S | jq 'map(select(.closureSize > " + "1e9)) | sort_by(.closureSize) | map([.path, .closureSize])'"}, + }; + } + + void printSize(unsigned long long value) { + if (!humanReadable) { + std::cout << fmt("\t%11d", value); + return; + } + + static const std::array<char, 9> idents{ + {' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'}}; + size_t power = 0; + double res = value; + while (res > 1024 && power < idents.size()) { + ++power; + res /= 1024; + } + std::cout << fmt("\t%6.1f%c", res, idents.at(power)); + } + + void run(ref<Store> store, Paths storePaths) override { + size_t pathLen = 0; + for (auto& storePath : storePaths) { + pathLen = std::max(pathLen, storePath.size()); + } + + if (json) { + JSONPlaceholder jsonRoot(std::cout); + store->pathInfoToJSON(jsonRoot, + // FIXME: preserve order? + PathSet(storePaths.begin(), storePaths.end()), true, + showClosureSize, AllowInvalid); + } + + else { + for (auto storePath : storePaths) { + auto info = store->queryPathInfo(storePath); + storePath = info->path; // FIXME: screws up padding + + std::cout << storePath; + + if (showSize || showClosureSize || showSigs) { + std::cout << std::string( + std::max(0, (int)pathLen - (int)storePath.size()), ' '); + } + + if (showSize) { + printSize(info->narSize); + } + + if (showClosureSize) { + printSize(store->getClosureSize(storePath).first); + } + + if (showSigs) { + std::cout << '\t'; + Strings ss; + if (info->ultimate) { + ss.push_back("ultimate"); + } + if (!info->ca.empty()) { + ss.push_back("ca:" + info->ca); + } + for (auto& sig : info->sigs) { + ss.push_back(sig); + } + std::cout << concatStringsSep(" ", ss); + } + + std::cout << std::endl; + } + } + } +}; + +static RegisterCommand r1(make_ref<CmdPathInfo>()); diff --git a/third_party/nix/src/nix/ping-store.cc b/third_party/nix/src/nix/ping-store.cc new file mode 100644 index 000000000000..9e9ee15c8966 --- /dev/null +++ b/third_party/nix/src/nix/ping-store.cc @@ -0,0 +1,25 @@ +#include "command.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdPingStore : StoreCommand { + std::string name() override { return "ping-store"; } + + std::string description() override { + return "test whether a store can be opened"; + } + + Examples examples() override { + return { + Example{ + "To test whether connecting to a remote Nix store via SSH works:", + "nix ping-store --store ssh://mac1"}, + }; + } + + void run(ref<Store> store) override { store->connect(); } +}; + +static RegisterCommand r1(make_ref<CmdPingStore>()); diff --git a/third_party/nix/src/nix/repl.cc b/third_party/nix/src/nix/repl.cc new file mode 100644 index 000000000000..87caf9dd3fc2 --- /dev/null +++ b/third_party/nix/src/nix/repl.cc @@ -0,0 +1,837 @@ +#include <climits> +#include <csetjmp> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <utility> + +#include <glog/logging.h> + +#ifdef READLINE +#include <readline/history.h> +#include <readline/readline.h> +#else +// editline < 1.15.2 don't wrap their API for C++ usage +// (added in +// https://github.com/troglobit/editline/commit/91398ceb3427b730995357e9d120539fb9bb7461). +// This results in linker errors due to to name-mangling of editline C symbols. +// For compatibility with these versions, we wrap the API here +// (wrapping multiple times on newer versions is no problem). +extern "C" { +#include <editline.h> +} +#endif + +#include "affinity.hh" +#include "command.hh" +#include "common-eval-args.hh" +#include "derivations.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "finally.hh" +#include "get-drvs.hh" +#include "globals.hh" +#include "shared.hh" +#include "store-api.hh" + +namespace nix { + +#define ESC_RED "\033[31m" +#define ESC_GRE "\033[32m" +#define ESC_YEL "\033[33m" +#define ESC_BLU "\033[34;1m" +#define ESC_MAG "\033[35m" +#define ESC_CYA "\033[36m" +#define ESC_END "\033[0m" + +struct NixRepl { + string curDir; + EvalState state; + Bindings* autoArgs; + + Strings loadedFiles; + + const static int envSize = 32768; + StaticEnv staticEnv; + Env* env; + int displ; + StringSet varNames; + + const Path historyFile; + + NixRepl(const Strings& searchPath, const nix::ref<Store>& store); + ~NixRepl(); + void mainLoop(const std::vector<std::string>& files); + StringSet completePrefix(const string& prefix); + static bool getLine(string& input, const std::string& prompt); + Path getDerivationPath(Value& v); + bool processLine(string line); + void loadFile(const Path& path); + void initEnv(); + void reloadFiles(); + void addAttrsToScope(Value& attrs); + void addVarToScope(const Symbol& name, Value& v); + Expr* parseString(const string& s); + void evalString(string s, Value& v); + + using ValuesSeen = set<Value*>; + std::ostream& printValue(std::ostream& str, Value& v, unsigned int maxDepth); + std::ostream& printValue(std::ostream& str, Value& v, unsigned int maxDepth, + ValuesSeen& seen); +}; + +void printHelp() { + std::cout << "Usage: nix-repl [--help] [--version] [-I path] paths...\n" + << "\n" + << "nix-repl is a simple read-eval-print loop (REPL) for the Nix " + "package manager.\n" + << "\n" + << "Options:\n" + << " --help\n" + << " Prints out a summary of the command syntax and exits.\n" + << "\n" + << " --version\n" + << " Prints out the Nix version number on standard output " + "and exits.\n" + << "\n" + << " -I path\n" + << " Add a path to the Nix expression search path. This " + "option may be given\n" + << " multiple times. See the NIX_PATH environment variable " + "for information on\n" + << " the semantics of the Nix search path. Paths added " + "through -I take\n" + << " precedence over NIX_PATH.\n" + << "\n" + << " paths...\n" + << " A list of paths to files containing Nix expressions " + "which nix-repl will\n" + << " load and add to its scope.\n" + << "\n" + << " A path surrounded in < and > will be looked up in the " + "Nix expression search\n" + << " path, as in the Nix language itself.\n" + << "\n" + << " If an element of paths starts with http:// or " + "https://, it is interpreted\n" + << " as the URL of a tarball that will be downloaded and " + "unpacked to a temporary\n" + << " location. The tarball must include a single top-level " + "directory containing\n" + << " at least a file named default.nix.\n"; +} + +string removeWhitespace(string s) { + s = chomp(s); + size_t n = s.find_first_not_of(" \n\r\t"); + if (n != string::npos) { + s = string(s, n); + } + return s; +} + +NixRepl::NixRepl(const Strings& searchPath, const nix::ref<Store>& store) + : state(searchPath, store), + staticEnv(false, &state.staticBaseEnv), + historyFile(getDataDir() + "/nix/repl-history") { + curDir = absPath("."); +} + +NixRepl::~NixRepl() { write_history(historyFile.c_str()); } + +static NixRepl* curRepl; // ugly + +static char* completionCallback(char* s, int* match) { + auto possible = curRepl->completePrefix(s); + if (possible.size() == 1) { + *match = 1; + auto* res = strdup(possible.begin()->c_str() + strlen(s)); + if (res == nullptr) { + throw Error("allocation failure"); + } + return res; + } + if (possible.size() > 1) { + auto checkAllHaveSameAt = [&](size_t pos) { + auto& first = *possible.begin(); + for (auto& p : possible) { + if (p.size() <= pos || p[pos] != first[pos]) { + return false; + } + } + return true; + }; + size_t start = strlen(s); + size_t len = 0; + while (checkAllHaveSameAt(start + len)) { + ++len; + } + if (len > 0) { + *match = 1; + auto* res = strdup(std::string(*possible.begin(), start, len).c_str()); + if (res == nullptr) { + throw Error("allocation failure"); + } + return res; + } + } + + *match = 0; + return nullptr; +} + +static int listPossibleCallback(char* s, char*** avp) { + auto possible = curRepl->completePrefix(s); + + if (possible.size() > (INT_MAX / sizeof(char*))) { + throw Error("too many completions"); + } + + int ac = 0; + char** vp = nullptr; + + auto check = [&](auto* p) { + if (!p) { + if (vp) { + while (--ac >= 0) { + free(vp[ac]); + } + free(vp); + } + throw Error("allocation failure"); + } + return p; + }; + + vp = check((char**)malloc(possible.size() * sizeof(char*))); + + for (auto& p : possible) { + vp[ac++] = check(strdup(p.c_str())); + } + + *avp = vp; + + return ac; +} + +namespace { +// Used to communicate to NixRepl::getLine whether a signal occurred in +// ::readline. +volatile sig_atomic_t g_signal_received = 0; + +void sigintHandler(int signo) { g_signal_received = signo; } +} // namespace + +void NixRepl::mainLoop(const std::vector<std::string>& files) { + string error = ANSI_RED "error:" ANSI_NORMAL " "; + std::cout << "Welcome to Nix version " << nixVersion << ". Type :? for help." + << std::endl + << std::endl; + + for (auto& i : files) { + loadedFiles.push_back(i); + } + + reloadFiles(); + if (!loadedFiles.empty()) { + std::cout << std::endl; + } + + // Allow nix-repl specific settings in .inputrc + rl_readline_name = "nix-repl"; + createDirs(dirOf(historyFile)); +#ifndef READLINE + el_hist_size = 1000; +#endif + read_history(historyFile.c_str()); + curRepl = this; +#ifndef READLINE + rl_set_complete_func(completionCallback); + rl_set_list_possib_func(listPossibleCallback); +#endif + + std::string input; + + while (true) { + // When continuing input from previous lines, don't print a prompt, just + // align to the same number of chars as the prompt. + if (!getLine(input, input.empty() ? "nix-repl> " : " ")) { + break; + } + + try { + if (!removeWhitespace(input).empty() && !processLine(input)) { + return; + } + } catch (ParseError& e) { + if (e.msg().find("unexpected $end") != std::string::npos) { + // For parse errors on incomplete input, we continue waiting for the + // next line of input without clearing the input so far. + continue; + } + LOG(ERROR) << error << (settings.showTrace ? e.prefix() : "") << e.msg(); + + } catch (Error& e) { + LOG(ERROR) << error << (settings.showTrace ? e.prefix() : "") << e.msg(); + } catch (Interrupted& e) { + LOG(ERROR) << error << (settings.showTrace ? e.prefix() : "") << e.msg(); + } + + // We handled the current input fully, so we should clear it + // and read brand new input. + input.clear(); + std::cout << std::endl; + } +} + +bool NixRepl::getLine(string& input, const std::string& prompt) { + struct sigaction act; + struct sigaction old; + sigset_t savedSignalMask; + sigset_t set; + + auto setupSignals = [&]() { + act.sa_handler = sigintHandler; + sigfillset(&act.sa_mask); + act.sa_flags = 0; + if (sigaction(SIGINT, &act, &old) != 0) { + throw SysError("installing handler for SIGINT"); + } + + sigemptyset(&set); + sigaddset(&set, SIGINT); + if (sigprocmask(SIG_UNBLOCK, &set, &savedSignalMask) != 0) { + throw SysError("unblocking SIGINT"); + } + }; + auto restoreSignals = [&]() { + if (sigprocmask(SIG_SETMASK, &savedSignalMask, nullptr) != 0) { + throw SysError("restoring signals"); + } + + if (sigaction(SIGINT, &old, nullptr) != 0) { + throw SysError("restoring handler for SIGINT"); + } + }; + + setupSignals(); + char* s = readline(prompt.c_str()); + Finally doFree([&]() { free(s); }); + restoreSignals(); + + if (g_signal_received != 0) { + g_signal_received = 0; + input.clear(); + return true; + } + + if (s == nullptr) { + return false; + } + input += s; + input += '\n'; + return true; +} + +StringSet NixRepl::completePrefix(const string& prefix) { + StringSet completions; + + size_t start = prefix.find_last_of(" \n\r\t(){}[]"); + std::string prev; + std::string cur; + if (start == std::string::npos) { + prev = ""; + cur = prefix; + } else { + prev = std::string(prefix, 0, start + 1); + cur = std::string(prefix, start + 1); + } + + size_t slash; + size_t dot; + + if ((slash = cur.rfind('/')) != string::npos) { + try { + auto dir = std::string(cur, 0, slash); + auto prefix2 = std::string(cur, slash + 1); + for (auto& entry : readDirectory(dir.empty() ? "/" : dir)) { + if (entry.name[0] != '.' && hasPrefix(entry.name, prefix2)) { + completions.insert(prev + dir + "/" + entry.name); + } + } + } catch (Error&) { + } + } else if ((dot = cur.rfind('.')) == string::npos) { + /* This is a variable name; look it up in the current scope. */ + auto i = varNames.lower_bound(cur); + while (i != varNames.end()) { + if (string(*i, 0, cur.size()) != cur) { + break; + } + completions.insert(prev + *i); + i++; + } + } else { + try { + /* This is an expression that should evaluate to an + attribute set. Evaluate it to get the names of the + attributes. */ + string expr(cur, 0, dot); + string cur2 = string(cur, dot + 1); + + Expr* e = parseString(expr); + Value v; + e->eval(state, *env, v); + state.forceAttrs(v); + + for (auto& i : *v.attrs) { + string name = i.second.name; + if (string(name, 0, cur2.size()) != cur2) { + continue; + } + completions.insert(prev + expr + "." + name); + } + + } catch (ParseError& e) { + // Quietly ignore parse errors. + } catch (EvalError& e) { + // Quietly ignore evaluation errors. + } catch (UndefinedVarError& e) { + // Quietly ignore undefined variable errors. + } + } + + return completions; +} + +static int runProgram(const string& program, const Strings& args) { + Strings args2(args); + args2.push_front(program); + + Pid pid; + pid = fork(); + if (pid == -1) { + throw SysError("forking"); + } + if (pid == 0) { + restoreAffinity(); + execvp(program.c_str(), stringsToCharPtrs(args2).data()); + _exit(1); + } + + return pid.wait(); +} + +bool isVarName(const string& s) { + if (s.empty()) { + return false; + } + char c = s[0]; + if ((c >= '0' && c <= '9') || c == '-' || c == '\'') { + return false; + } + for (auto& i : s) { + if (!((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z') || + (i >= '0' && i <= '9') || i == '_' || i == '-' || i == '\'')) { + return false; + } + } + return true; +} + +Path NixRepl::getDerivationPath(Value& v) { + auto drvInfo = getDerivation(state, v, false); + if (!drvInfo) { + throw Error( + "expression does not evaluate to a derivation, so I can't build it"); + } + Path drvPath = drvInfo->queryDrvPath(); + if (drvPath.empty() || !state.store->isValidPath(drvPath)) { + throw Error("expression did not evaluate to a valid derivation"); + } + return drvPath; +} + +bool NixRepl::processLine(string line) { + if (line.empty()) { + return true; + } + + string command; + string arg; + + if (line[0] == ':') { + size_t p = line.find_first_of(" \n\r\t"); + command = string(line, 0, p); + if (p != string::npos) { + arg = removeWhitespace(string(line, p)); + } + } else { + arg = line; + } + + if (command == ":?" || command == ":help") { + std::cout << "The following commands are available:\n" + << "\n" + << " <expr> Evaluate and print expression\n" + << " <x> = <expr> Bind expression to variable\n" + << " :a <expr> Add attributes from resulting set to scope\n" + << " :b <expr> Build derivation\n" + << " :i <expr> Build derivation, then install result into " + "current profile\n" + << " :l <path> Load Nix expression and add it to scope\n" + << " :p <expr> Evaluate and print expression recursively\n" + << " :q Exit nix-repl\n" + << " :r Reload all files\n" + << " :s <expr> Build dependencies of derivation, then start " + "nix-shell\n" + << " :t <expr> Describe result of evaluation\n" + << " :u <expr> Build derivation, then start nix-shell\n"; + } + + else if (command == ":a" || command == ":add") { + Value v; + evalString(arg, v); + addAttrsToScope(v); + } + + else if (command == ":l" || command == ":load") { + state.resetFileCache(); + loadFile(arg); + } + + else if (command == ":r" || command == ":reload") { + state.resetFileCache(); + reloadFiles(); + } + + else if (command == ":t") { + Value v; + evalString(arg, v); + std::cout << showType(v) << std::endl; + + } else if (command == ":u") { + Value v; + Value f; + Value result; + evalString(arg, v); + evalString( + "drv: (import <nixpkgs> {}).runCommand \"shell\" { buildInputs = [ drv " + "]; } \"\"", + f); + state.callFunction(f, v, result, Pos()); + + Path drvPath = getDerivationPath(result); + runProgram(settings.nixBinDir + "/nix-shell", Strings{drvPath}); + } + + else if (command == ":b" || command == ":i" || command == ":s") { + Value v; + evalString(arg, v); + Path drvPath = getDerivationPath(v); + + if (command == ":b") { + /* We could do the build in this process using buildPaths(), + but doing it in a child makes it easier to recover from + problems / SIGINT. */ + if (runProgram(settings.nixBinDir + "/nix", + Strings{"build", "--no-link", drvPath}) == 0) { + Derivation drv = readDerivation(drvPath); + std::cout << std::endl + << "this derivation produced the following outputs:" + << std::endl; + for (auto& i : drv.outputs) { + std::cout << format(" %1% -> %2%") % i.first % i.second.path + << std::endl; + } + } + } else if (command == ":i") { + runProgram(settings.nixBinDir + "/nix-env", Strings{"-i", drvPath}); + } else { + runProgram(settings.nixBinDir + "/nix-shell", Strings{drvPath}); + } + } + + else if (command == ":p" || command == ":print") { + Value v; + evalString(arg, v); + printValue(std::cout, v, 1000000000) << std::endl; + } + + else if (command == ":q" || command == ":quit") { + return false; + + } else if (!command.empty()) { + throw Error(format("unknown command '%1%'") % command); + + } else { + size_t p = line.find('='); + string name; + if (p != string::npos && p < line.size() && line[p + 1] != '=' && + isVarName(name = removeWhitespace(string(line, 0, p)))) { + Expr* e = parseString(string(line, p + 1)); + Value& v(*state.allocValue()); + v.type = tThunk; + v.thunk.env = env; + v.thunk.expr = e; + addVarToScope(state.symbols.Create(name), v); + } else { + Value v; + evalString(line, v); + printValue(std::cout, v, 1) << std::endl; + } + } + + return true; +} + +void NixRepl::loadFile(const Path& path) { + loadedFiles.remove(path); + loadedFiles.push_back(path); + Value v; + Value v2; + state.evalFile(lookupFileArg(state, path), v); + state.autoCallFunction(*autoArgs, v, v2); + addAttrsToScope(v2); +} + +void NixRepl::initEnv() { + env = &state.allocEnv(envSize); + env->up = &state.baseEnv; + displ = 0; + staticEnv.vars.clear(); + + varNames.clear(); + for (auto& i : state.staticBaseEnv.vars) { + varNames.insert(i.first); + } +} + +void NixRepl::reloadFiles() { + initEnv(); + + Strings old = loadedFiles; + loadedFiles.clear(); + + bool first = true; + for (auto& i : old) { + if (!first) { + std::cout << std::endl; + } + first = false; + std::cout << format("Loading '%1%'...") % i << std::endl; + loadFile(i); + } +} + +void NixRepl::addAttrsToScope(Value& attrs) { + state.forceAttrs(attrs); + for (auto& i : *attrs.attrs) { + addVarToScope(i.second.name, *i.second.value); + } + std::cout << format("Added %1% variables.") % attrs.attrs->size() + << std::endl; +} + +void NixRepl::addVarToScope(const Symbol& name, Value& v) { + if (displ >= envSize) { + throw Error("environment full; cannot add more variables"); + } + staticEnv.vars[name] = displ; + env->values[displ++] = &v; + varNames.insert((string)name); +} + +Expr* NixRepl::parseString(const string& s) { + Expr* e = state.parseExprFromString(s, curDir, staticEnv); + return e; +} + +void NixRepl::evalString(string s, Value& v) { + Expr* e = parseString(std::move(s)); + e->eval(state, *env, v); + state.forceValue(v); +} + +std::ostream& NixRepl::printValue(std::ostream& str, Value& v, + unsigned int maxDepth) { + ValuesSeen seen; + return printValue(str, v, maxDepth, seen); +} + +std::ostream& printStringValue(std::ostream& str, const char* string) { + str << "\""; + for (const char* i = string; *i != 0; i++) { + if (*i == '\"' || *i == '\\') { + str << "\\" << *i; + } else if (*i == '\n') { + str << "\\n"; + } else if (*i == '\r') { + str << "\\r"; + } else if (*i == '\t') { + str << "\\t"; + } else { + str << *i; + } + } + str << "\""; + return str; +} + +// FIXME: lot of cut&paste from Nix's eval.cc. +std::ostream& NixRepl::printValue(std::ostream& str, Value& v, + unsigned int maxDepth, ValuesSeen& seen) { + str.flush(); + checkInterrupt(); + + state.forceValue(v); + + switch (v.type) { + case tInt: + str << ESC_CYA << v.integer << ESC_END; + break; + + case tBool: + str << ESC_CYA << (v.boolean ? "true" : "false") << ESC_END; + break; + + case tString: + str << ESC_YEL; + printStringValue(str, v.string.s); + str << ESC_END; + break; + + case tPath: + str << ESC_GRE << v.path << ESC_END; // !!! escaping? + break; + + case tNull: + str << ESC_CYA "null" ESC_END; + break; + + case tAttrs: { + seen.insert(&v); + + bool isDrv = state.isDerivation(v); + + if (isDrv) { + str << "ยซderivation "; + Bindings::iterator i = v.attrs->find(state.sDrvPath); + PathSet context; + Path drvPath = + i != v.attrs->end() + ? state.coerceToPath(*i->second.pos, *i->second.value, context) + : "???"; + str << drvPath << "ยป"; + } + + else if (maxDepth > 0) { + str << "{ "; + + typedef std::map<string, Value*> Sorted; + Sorted sorted; + for (auto& i : *v.attrs) { + sorted[i.second.name] = i.second.value; + } + + for (auto& i : sorted) { + if (isVarName(i.first)) { + str << i.first; + } else { + printStringValue(str, i.first.c_str()); + } + str << " = "; + if (seen.find(i.second) != seen.end()) { + str << "ยซrepeatedยป"; + } else { + try { + printValue(str, *i.second, maxDepth - 1, seen); + } catch (AssertionError& e) { + str << ESC_RED "ยซerror: " << e.msg() << "ยป" ESC_END; + } + } + str << "; "; + } + + str << "}"; + } else { + str << "{ ... }"; + } + + break; + } + + case tList1: + case tList2: + case tListN: + seen.insert(&v); + + str << "[ "; + if (maxDepth > 0) { + for (unsigned int n = 0; n < v.listSize(); ++n) { + if (seen.find(v.listElems()[n]) != seen.end()) { + str << "ยซrepeatedยป"; + } else { + try { + printValue(str, *v.listElems()[n], maxDepth - 1, seen); + } catch (AssertionError& e) { + str << ESC_RED "ยซerror: " << e.msg() << "ยป" ESC_END; + } + } + str << " "; + } + } else { + str << "... "; + } + + str << "]"; + break; + + case tLambda: { + std::ostringstream s; + s << v.lambda.fun->pos; + str << ESC_BLU "ยซlambda @ " << filterANSIEscapes(s.str()) << "ยป" ESC_END; + break; + } + + case tPrimOp: + str << ESC_MAG "ยซprimopยป" ESC_END; + break; + + case tPrimOpApp: + str << ESC_BLU "ยซprimop-appยป" ESC_END; + break; + + case tFloat: + str << v.fpoint; + break; + + default: + str << ESC_RED "ยซunknownยป" ESC_END; + break; + } + + return str; +} + +struct CmdRepl : StoreCommand, MixEvalArgs { + std::vector<std::string> files; + + CmdRepl() { expectArgs("files", &files); } + + std::string name() override { return "repl"; } + + std::string description() override { + return "start an interactive environment for evaluating Nix expressions"; + } + + void run(ref<Store> store) override { + auto repl = std::make_unique<NixRepl>(searchPath, openStore()); + repl->autoArgs = getAutoArgs(repl->state); + repl->mainLoop(files); + } +}; + +static RegisterCommand r1(make_ref<CmdRepl>()); + +} // namespace nix diff --git a/third_party/nix/src/nix/run.cc b/third_party/nix/src/nix/run.cc new file mode 100644 index 000000000000..fa471bd542a1 --- /dev/null +++ b/third_party/nix/src/nix/run.cc @@ -0,0 +1,280 @@ +#include "affinity.hh" +#include "command.hh" +#include "common-args.hh" +#include "derivations.hh" +#include "finally.hh" +#include "fs-accessor.hh" +#include "local-store.hh" +#include "shared.hh" +#include "store-api.hh" + +#if __linux__ +#include <sys/mount.h> +#endif + +#include <queue> + +using namespace nix; + +std::string chrootHelperName = "__run_in_chroot"; + +struct CmdRun : InstallablesCommand { + std::vector<std::string> command = {"bash"}; + StringSet keep, unset; + bool ignoreEnvironment = false; + + CmdRun() { + mkFlag() + .longName("command") + .shortName('c') + .description("command and arguments to be executed; defaults to 'bash'") + .labels({"command", "args"}) + .arity(ArityAny) + .handler([&](const std::vector<std::string>& ss) { + if (ss.empty()) { + throw UsageError("--command requires at least one argument"); + } + command = ss; + }); + + mkFlag() + .longName("ignore-environment") + .shortName('i') + .description( + "clear the entire environment (except those specified with --keep)") + .set(&ignoreEnvironment, true); + + mkFlag() + .longName("keep") + .shortName('k') + .description("keep specified environment variable") + .arity(1) + .labels({"name"}) + .handler([&](std::vector<std::string> ss) { keep.insert(ss.front()); }); + + mkFlag() + .longName("unset") + .shortName('u') + .description("unset specified environment variable") + .arity(1) + .labels({"name"}) + .handler( + [&](std::vector<std::string> ss) { unset.insert(ss.front()); }); + } + + std::string name() override { return "run"; } + + std::string description() override { + return "run a shell in which the specified packages are available"; + } + + Examples examples() override { + return { + Example{"To start a shell providing GNU Hello from NixOS 17.03:", + "nix run -f channel:nixos-17.03 hello"}, + Example{"To start a shell providing youtube-dl from your 'nixpkgs' " + "channel:", + "nix run nixpkgs.youtube-dl"}, + Example{"To run GNU Hello:", + "nix run nixpkgs.hello -c hello --greeting 'Hi everybody!'"}, + Example{"To run GNU Hello in a chroot store:", + "nix run --store ~/my-nix nixpkgs.hello -c hello"}, + }; + } + + void run(ref<Store> store) override { + auto outPaths = toStorePaths(store, Build, installables); + + auto accessor = store->getFSAccessor(); + + if (ignoreEnvironment) { + if (!unset.empty()) { + throw UsageError( + "--unset does not make sense with --ignore-environment"); + } + + std::map<std::string, std::string> kept; + for (auto& var : keep) { + auto s = getenv(var.c_str()); + if (s != nullptr) { + kept[var] = s; + } + } + + clearEnv(); + + for (auto& var : kept) { + setenv(var.first.c_str(), var.second.c_str(), 1); + } + + } else { + if (!keep.empty()) { + throw UsageError( + "--keep does not make sense without --ignore-environment"); + } + + for (auto& var : unset) { + unsetenv(var.c_str()); + } + } + + std::unordered_set<Path> done; + std::queue<Path> todo; + for (auto& path : outPaths) { + todo.push(path); + } + + auto unixPath = tokenizeString<Strings>(getEnv("PATH"), ":"); + + while (!todo.empty()) { + Path path = todo.front(); + todo.pop(); + if (!done.insert(path).second) { + continue; + } + + { unixPath.push_front(path + "/bin"); } + + auto propPath = path + "/nix-support/propagated-user-env-packages"; + if (accessor->stat(propPath).type == FSAccessor::tRegular) { + for (auto& p : tokenizeString<Paths>(readFile(propPath))) { + todo.push(p); + } + } + } + + setenv("PATH", concatStringsSep(":", unixPath).c_str(), 1); + + std::string cmd = *command.begin(); + Strings args; + for (auto& arg : command) { + args.push_back(arg); + } + + restoreSignals(); + + restoreAffinity(); + + /* If this is a diverted store (i.e. its "logical" location + (typically /nix/store) differs from its "physical" location + (e.g. /home/eelco/nix/store), then run the command in a + chroot. For non-root users, this requires running it in new + mount and user namespaces. Unfortunately, + unshare(CLONE_NEWUSER) doesn't work in a multithreaded + program (which "nix" is), so we exec() a single-threaded + helper program (chrootHelper() below) to do the work. */ + auto store2 = store.dynamic_pointer_cast<LocalStore>(); + + if (store2 && store->storeDir != store2->realStoreDir) { + Strings helperArgs = {chrootHelperName, store->storeDir, + store2->realStoreDir, cmd}; + for (auto& arg : args) { + helperArgs.push_back(arg); + } + + execv(readLink("/proc/self/exe").c_str(), + stringsToCharPtrs(helperArgs).data()); + + throw SysError("could not execute chroot helper"); + } + + execvp(cmd.c_str(), stringsToCharPtrs(args).data()); + + throw SysError("unable to exec '%s'", cmd); + } +}; + +static RegisterCommand r1(make_ref<CmdRun>()); + +void chrootHelper(int argc, char** argv) { + int p = 1; + std::string storeDir = argv[p++]; + std::string realStoreDir = argv[p++]; + std::string cmd = argv[p++]; + Strings args; + while (p < argc) { + args.push_back(argv[p++]); + } + +#if __linux__ + uid_t uid = getuid(); + uid_t gid = getgid(); + + if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == -1) { + /* Try with just CLONE_NEWNS in case user namespaces are + specifically disabled. */ + if (unshare(CLONE_NEWNS) == -1) { + throw SysError("setting up a private mount namespace"); + } + } + + /* Bind-mount realStoreDir on /nix/store. If the latter mount + point doesn't already exists, we have to create a chroot + environment containing the mount point and bind mounts for the + children of /. Would be nice if we could use overlayfs here, + but that doesn't work in a user namespace yet (Ubuntu has a + patch for this: + https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1478578). */ + if (!pathExists(storeDir)) { + // FIXME: Use overlayfs? + + Path tmpDir = createTempDir(); + + createDirs(tmpDir + storeDir); + + if (mount(realStoreDir.c_str(), (tmpDir + storeDir).c_str(), "", MS_BIND, + nullptr) == -1) { + throw SysError("mounting '%s' on '%s'", realStoreDir, storeDir); + } + + for (const auto& entry : readDirectory("/")) { + auto src = "/" + entry.name; + auto st = lstat(src); + if (!S_ISDIR(st.st_mode)) { + continue; + } + Path dst = tmpDir + "/" + entry.name; + if (pathExists(dst)) { + continue; + } + if (mkdir(dst.c_str(), 0700) == -1) { + throw SysError("creating directory '%s'", dst); + } + if (mount(src.c_str(), dst.c_str(), "", MS_BIND | MS_REC, nullptr) == + -1) { + throw SysError("mounting '%s' on '%s'", src, dst); + } + } + + char* cwd = getcwd(nullptr, 0); + if (cwd == nullptr) { + throw SysError("getting current directory"); + } + Finally freeCwd([&]() { free(cwd); }); + + if (chroot(tmpDir.c_str()) == -1) { + throw SysError(format("chrooting into '%s'") % tmpDir); + } + + if (chdir(cwd) == -1) { + throw SysError(format("chdir to '%s' in chroot") % cwd); + } + } else if (mount(realStoreDir.c_str(), storeDir.c_str(), "", MS_BIND, + nullptr) == -1) { + throw SysError("mounting '%s' on '%s'", realStoreDir, storeDir); + } + + writeFile("/proc/self/setgroups", "deny"); + writeFile("/proc/self/uid_map", fmt("%d %d %d", uid, uid, 1)); + writeFile("/proc/self/gid_map", fmt("%d %d %d", gid, gid, 1)); + + execvp(cmd.c_str(), stringsToCharPtrs(args).data()); + + throw SysError("unable to exec '%s'", cmd); + +#else + throw Error( + "mounting the Nix store on '%s' is not supported on this platform", + storeDir); +#endif +} diff --git a/third_party/nix/src/nix/search.cc b/third_party/nix/src/nix/search.cc new file mode 100644 index 000000000000..9f03fc6f3202 --- /dev/null +++ b/third_party/nix/src/nix/search.cc @@ -0,0 +1,273 @@ +#include <fstream> +#include <regex> + +#include <glog/logging.h> + +#include "command.hh" +#include "common-args.hh" +#include "eval-inline.hh" +#include "eval.hh" +#include "get-drvs.hh" +#include "globals.hh" +#include "json-to-value.hh" +#include "json.hh" +#include "names.hh" +#include "shared.hh" + +using namespace nix; + +std::string wrap(const std::string& prefix, const std::string& s) { + return prefix + s + ANSI_NORMAL; +} + +std::string hilite(const std::string& s, const std::smatch& m, + const std::string& postfix) { + return m.empty() ? s + : std::string(m.prefix()) + ANSI_RED + std::string(m.str()) + + postfix + std::string(m.suffix()); +} + +struct CmdSearch : SourceExprCommand, MixJSON { + std::vector<std::string> res; + + bool writeCache = true; + bool useCache = true; + + CmdSearch() { + expectArgs("regex", &res); + + mkFlag() + .longName("update-cache") + .shortName('u') + .description("update the package search cache") + .handler([&]() { + writeCache = true; + useCache = false; + }); + + mkFlag() + .longName("no-cache") + .description("do not use or update the package search cache") + .handler([&]() { + writeCache = false; + useCache = false; + }); + } + + std::string name() override { return "search"; } + + std::string description() override { return "query available packages"; } + + Examples examples() override { + return {Example{"To show all available packages:", "nix search"}, + Example{"To show any packages containing 'blender' in its name or " + "description:", + "nix search blender"}, + Example{"To search for Firefox or Chromium:", + "nix search 'firefox|chromium'"}, + Example{"To search for git and frontend or gui:", + "nix search git 'frontend|gui'"}}; + } + + void run(ref<Store> store) override { + settings.readOnlyMode = true; + + // Empty search string should match all packages + // Use "^" here instead of ".*" due to differences in resulting highlighting + // (see #1893 -- libc++ claims empty search string is not in POSIX grammar) + if (res.empty()) { + res.emplace_back("^"); + } + + std::vector<std::regex> regexes; + regexes.reserve(res.size()); + + for (auto& re : res) { + regexes.emplace_back(re, std::regex::extended | std::regex::icase); + } + + auto state = getEvalState(); + + auto jsonOut = json ? std::make_unique<JSONObject>(std::cout) : nullptr; + + auto sToplevel = state->symbols.Create("_toplevel"); + auto sRecurse = state->symbols.Create("recurseForDerivations"); + + bool fromCache = false; + + std::map<std::string, std::string> results; + + std::function<void(Value*, std::string, bool, JSONObject*)> doExpr; + + doExpr = [&](Value* v, const std::string& attrPath, bool toplevel, + JSONObject* cache) { + DLOG(INFO) << "at attribute '" << attrPath << "'"; + + try { + uint found = 0; + + state->forceValue(*v); + + if (v->type == tLambda && toplevel) { + Value* v2 = state->allocValue(); + state->autoCallFunction(*Bindings::NewGC(), *v, *v2); + v = v2; + state->forceValue(*v); + } + + if (state->isDerivation(*v)) { + DrvInfo drv(*state, attrPath, v->attrs); + std::string description; + std::smatch attrPathMatch; + std::smatch descriptionMatch; + std::smatch nameMatch; + std::string name; + + DrvName parsed(drv.queryName()); + + for (auto& regex : regexes) { + std::regex_search(attrPath, attrPathMatch, regex); + + name = parsed.name; + std::regex_search(name, nameMatch, regex); + + description = drv.queryMetaString("description"); + std::replace(description.begin(), description.end(), '\n', ' '); + std::regex_search(description, descriptionMatch, regex); + + if (!attrPathMatch.empty() || !nameMatch.empty() || + !descriptionMatch.empty()) { + found++; + } + } + + if (found == res.size()) { + if (json) { + auto jsonElem = jsonOut->object(attrPath); + + jsonElem.attr("pkgName", parsed.name); + jsonElem.attr("version", parsed.version); + jsonElem.attr("description", description); + + } else { + auto name = hilite(parsed.name, nameMatch, "\e[0;2m") + + std::string(parsed.fullName, parsed.name.length()); + results[attrPath] = fmt( + "* %s (%s)\n %s\n", + wrap("\e[0;1m", hilite(attrPath, attrPathMatch, "\e[0;1m")), + wrap("\e[0;2m", hilite(name, nameMatch, "\e[0;2m")), + hilite(description, descriptionMatch, ANSI_NORMAL)); + } + } + + if (cache != nullptr) { + cache->attr("type", "derivation"); + cache->attr("name", drv.queryName()); + cache->attr("system", drv.querySystem()); + if (!description.empty()) { + auto meta(cache->object("meta")); + meta.attr("description", description); + } + } + } + + else if (v->type == tAttrs) { + if (!toplevel) { + auto attrs = v->attrs; + Bindings::iterator j = attrs->find(sRecurse); + if (j == attrs->end() || + !state->forceBool(*j->second.value, *j->second.pos)) { + DLOG(INFO) << "skip attribute '" << attrPath << "'"; + return; + } + } + + bool toplevel2 = false; + if (!fromCache) { + Bindings::iterator j = v->attrs->find(sToplevel); + toplevel2 = j != v->attrs->end() && + state->forceBool(*j->second.value, *j->second.pos); + } + + for (auto& i : *v->attrs) { + auto cache2 = + cache != nullptr + ? std::make_unique<JSONObject>(cache->object(i.second.name)) + : nullptr; + doExpr(i.second.value, + attrPath.empty() + ? (std::string)i.second.name + : attrPath + "." + (std::string)i.second.name, + toplevel2 || fromCache, cache2 ? cache2.get() : nullptr); + } + } + + } catch (AssertionError& e) { + } catch (Error& e) { + if (!toplevel) { + e.addPrefix(fmt("While evaluating the attribute '%s':\n", attrPath)); + throw; + } + } + }; + + Path jsonCacheFileName = getCacheDir() + "/nix/package-search.json"; + + if (useCache && pathExists(jsonCacheFileName)) { + LOG(WARNING) << "using cached results; pass '-u' to update the cache"; + + Value vRoot; + parseJSON(*state, readFile(jsonCacheFileName), vRoot); + + fromCache = true; + + doExpr(&vRoot, "", true, nullptr); + } + + else { + createDirs(dirOf(jsonCacheFileName)); + + Path tmpFile = fmt("%s.tmp.%d", jsonCacheFileName, getpid()); + + std::ofstream jsonCacheFile; + + try { + // iostream considered harmful + jsonCacheFile.exceptions(std::ofstream::failbit); + jsonCacheFile.open(tmpFile); + + auto cache = writeCache + ? std::make_unique<JSONObject>(jsonCacheFile, false) + : nullptr; + + doExpr(getSourceExpr(*state), "", true, cache.get()); + + } catch (std::exception&) { + /* Fun fact: catching std::ios::failure does not work + due to C++11 ABI shenanigans. + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66145 */ + if (!jsonCacheFile) { + throw Error("error writing to %s", tmpFile); + } + throw; + } + + if (writeCache && + rename(tmpFile.c_str(), jsonCacheFileName.c_str()) == -1) { + throw SysError("cannot rename '%s' to '%s'", tmpFile, + jsonCacheFileName); + } + } + + if (results.empty()) { + throw Error("no results for the given search term(s)!"); + } + + RunPager pager; + for (const auto& el : results) { + std::cout << el.second << "\n"; + } + } +}; + +static RegisterCommand r1(make_ref<CmdSearch>()); diff --git a/third_party/nix/src/nix/show-config.cc b/third_party/nix/src/nix/show-config.cc new file mode 100644 index 000000000000..51f02ed61bbe --- /dev/null +++ b/third_party/nix/src/nix/show-config.cc @@ -0,0 +1,31 @@ +#include "command.hh" +#include "common-args.hh" +#include "json.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdShowConfig : Command, MixJSON { + CmdShowConfig() = default; + + std::string name() override { return "show-config"; } + + std::string description() override { return "show the Nix configuration"; } + + void run() override { + if (json) { + // FIXME: use appropriate JSON types (bool, ints, etc). + JSONObject jsonObj(std::cout); + globalConfig.toJSON(jsonObj); + } else { + std::map<std::string, Config::SettingInfo> settings; + globalConfig.getSettings(settings); + for (auto& s : settings) { + std::cout << s.first + " = " + s.second.value + "\n"; + } + } + } +}; + +static RegisterCommand r1(make_ref<CmdShowConfig>()); diff --git a/third_party/nix/src/nix/show-derivation.cc b/third_party/nix/src/nix/show-derivation.cc new file mode 100644 index 000000000000..b821a9ee1205 --- /dev/null +++ b/third_party/nix/src/nix/show-derivation.cc @@ -0,0 +1,113 @@ +// FIXME: integrate this with nix path-info? + +#include "archive.hh" +#include "command.hh" +#include "common-args.hh" +#include "derivations.hh" +#include "json.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdShowDerivation : InstallablesCommand { + bool recursive = false; + + CmdShowDerivation() { + mkFlag() + .longName("recursive") + .shortName('r') + .description("include the dependencies of the specified derivations") + .set(&recursive, true); + } + + std::string name() override { return "show-derivation"; } + + std::string description() override { + return "show the contents of a store derivation"; + } + + Examples examples() override { + return { + Example{"To show the store derivation that results from evaluating the " + "Hello package:", + "nix show-derivation nixpkgs.hello"}, + Example{"To show the full derivation graph (if available) that " + "produced your NixOS system:", + "nix show-derivation -r /run/current-system"}, + }; + } + + void run(ref<Store> store) override { + auto drvPaths = toDerivations(store, installables, true); + + if (recursive) { + PathSet closure; + store->computeFSClosure(drvPaths, closure); + drvPaths = closure; + } + + { + JSONObject jsonRoot(std::cout, true); + + for (auto& drvPath : drvPaths) { + if (!isDerivation(drvPath)) { + continue; + } + + auto drvObj(jsonRoot.object(drvPath)); + + auto drv = readDerivation(drvPath); + + { + auto outputsObj(drvObj.object("outputs")); + for (auto& output : drv.outputs) { + auto outputObj(outputsObj.object(output.first)); + outputObj.attr("path", output.second.path); + if (!output.second.hash.empty()) { + outputObj.attr("hashAlgo", output.second.hashAlgo); + outputObj.attr("hash", output.second.hash); + } + } + } + + { + auto inputsList(drvObj.list("inputSrcs")); + for (auto& input : drv.inputSrcs) { + inputsList.elem(input); + } + } + + { + auto inputDrvsObj(drvObj.object("inputDrvs")); + for (auto& input : drv.inputDrvs) { + auto inputList(inputDrvsObj.list(input.first)); + for (auto& outputId : input.second) { + inputList.elem(outputId); + } + } + } + + drvObj.attr("platform", drv.platform); + drvObj.attr("builder", drv.builder); + + { + auto argsList(drvObj.list("args")); + for (auto& arg : drv.args) { + argsList.elem(arg); + } + } + + { + auto envObj(drvObj.object("env")); + for (auto& var : drv.env) { + envObj.attr(var.first, var.second); + } + } + } + } + + std::cout << "\n"; + } +}; + +static RegisterCommand r1(make_ref<CmdShowDerivation>()); diff --git a/third_party/nix/src/nix/sigs.cc b/third_party/nix/src/nix/sigs.cc new file mode 100644 index 000000000000..420296a8023f --- /dev/null +++ b/third_party/nix/src/nix/sigs.cc @@ -0,0 +1,145 @@ +#include <atomic> + +#include <glog/logging.h> + +#include "command.hh" +#include "shared.hh" +#include "store-api.hh" +#include "thread-pool.hh" + +using namespace nix; + +struct CmdCopySigs : StorePathsCommand { + Strings substituterUris; + + CmdCopySigs() { + mkFlag() + .longName("substituter") + .shortName('s') + .labels({"store-uri"}) + .description("use signatures from specified store") + .arity(1) + .handler([&](std::vector<std::string> ss) { + substituterUris.push_back(ss[0]); + }); + } + + std::string name() override { return "copy-sigs"; } + + std::string description() override { + return "copy path signatures from substituters (like binary caches)"; + } + + void run(ref<Store> store, Paths storePaths) override { + if (substituterUris.empty()) { + throw UsageError("you must specify at least one substituter using '-s'"); + } + + // FIXME: factor out commonality with MixVerify. + std::vector<ref<Store>> substituters; + for (auto& s : substituterUris) { + substituters.push_back(openStore(s)); + } + + ThreadPool pool; + + std::string doneLabel = "done"; + std::atomic<size_t> added{0}; + + // logger->setExpected(doneLabel, storePaths.size()); + + auto doPath = [&](const Path& storePath) { + // Activity act(*logger, lvlInfo, format("getting signatures for '%s'") % + // storePath); + + checkInterrupt(); + + auto info = store->queryPathInfo(storePath); + + StringSet newSigs; + + for (auto& store2 : substituters) { + try { + auto info2 = store2->queryPathInfo(storePath); + + /* Don't import signatures that don't match this + binary. */ + if (info->narHash != info2->narHash || + info->narSize != info2->narSize || + info->references != info2->references) { + continue; + } + + for (auto& sig : info2->sigs) { + if (info->sigs.count(sig) == 0u) { + newSigs.insert(sig); + } + } + } catch (InvalidPath&) { + } + } + + if (!newSigs.empty()) { + store->addSignatures(storePath, newSigs); + added += newSigs.size(); + } + + // logger->incProgress(doneLabel); + }; + + for (auto& storePath : storePaths) { + pool.enqueue(std::bind(doPath, storePath)); + } + + pool.process(); + + LOG(INFO) << "imported " << added << " signatures"; + } +}; + +static RegisterCommand r1(make_ref<CmdCopySigs>()); + +struct CmdSignPaths : StorePathsCommand { + Path secretKeyFile; + + CmdSignPaths() { + mkFlag() + .shortName('k') + .longName("key-file") + .label("file") + .description("file containing the secret signing key") + .dest(&secretKeyFile); + } + + std::string name() override { return "sign-paths"; } + + std::string description() override { return "sign the specified paths"; } + + void run(ref<Store> store, Paths storePaths) override { + if (secretKeyFile.empty()) { + throw UsageError("you must specify a secret key file using '-k'"); + } + + SecretKey secretKey(readFile(secretKeyFile)); + + size_t added{0}; + + for (auto& storePath : storePaths) { + auto info = store->queryPathInfo(storePath); + + auto info2(*info); + info2.sigs.clear(); + info2.sign(secretKey); + assert(!info2.sigs.empty()); + + if (info->sigs.count(*info2.sigs.begin()) == 0u) { + store->addSignatures(storePath, info2.sigs); + added++; + } + } + + LOG(INFO) << "added " << added << " signatures"; + } +}; + +static RegisterCommand r3(make_ref<CmdSignPaths>()); diff --git a/third_party/nix/src/nix/upgrade-nix.cc b/third_party/nix/src/nix/upgrade-nix.cc new file mode 100644 index 000000000000..08c411f4b4c5 --- /dev/null +++ b/third_party/nix/src/nix/upgrade-nix.cc @@ -0,0 +1,161 @@ +#include <glog/logging.h> + +#include "attr-path.hh" +#include "command.hh" +#include "common-args.hh" +#include "download.hh" +#include "eval.hh" +#include "names.hh" +#include "store-api.hh" + +using namespace nix; + +struct CmdUpgradeNix : MixDryRun, StoreCommand { + Path profileDir; + std::string storePathsUrl = + "https://github.com/NixOS/nixpkgs/raw/master/nixos/modules/installer/" + "tools/nix-fallback-paths.nix"; + + CmdUpgradeNix() { + mkFlag() + .longName("profile") + .shortName('p') + .labels({"profile-dir"}) + .description("the Nix profile to upgrade") + .dest(&profileDir); + + mkFlag() + .longName("nix-store-paths-url") + .labels({"url"}) + .description( + "URL of the file that contains the store paths of the latest Nix " + "release") + .dest(&storePathsUrl); + } + + std::string name() override { return "upgrade-nix"; } + + std::string description() override { + return "upgrade Nix to the latest stable version"; + } + + Examples examples() override { + return { + Example{"To upgrade Nix to the latest stable version:", + "nix upgrade-nix"}, + Example{ + "To upgrade Nix in a specific profile:", + "nix upgrade-nix -p /nix/var/nix/profiles/per-user/alice/profile"}, + }; + } + + void run(ref<Store> store) override { + evalSettings.pureEval = true; + + if (profileDir.empty()) { + profileDir = getProfileDir(store); + } + + LOG(INFO) << "upgrading Nix in profile '" << profileDir << "'"; + + Path storePath; + { + LOG(INFO) << "querying latest Nix version"; + storePath = getLatestNix(store); + } + + auto version = DrvName(storePathToName(storePath)).version; + + if (dryRun) { + LOG(ERROR) << "would upgrade to version " << version; + return; + } + + { + LOG(INFO) << "downloading '" << storePath << "'..."; + store->ensurePath(storePath); + } + + { + LOG(INFO) << "verifying that '" << storePath << "' works..."; + auto program = storePath + "/bin/nix-env"; + auto s = runProgram(program, false, {"--version"}); + if (s.find("Nix") == std::string::npos) { + throw Error("could not verify that '%s' works", program); + } + } + + { + LOG(INFO) << "installing '" << storePath << "' into profile '" + << profileDir << "'..."; + runProgram(settings.nixBinDir + "/nix-env", false, + {"--profile", profileDir, "-i", storePath, "--no-sandbox"}); + } + + LOG(INFO) << ANSI_GREEN << "upgrade to version " << version << " done" + << ANSI_NORMAL; + } + + /* Return the profile in which Nix is installed. */ + static Path getProfileDir(const ref<Store>& store) { + Path where; + + for (auto& dir : tokenizeString<Strings>(getEnv("PATH"), ":")) { + if (pathExists(dir + "/nix-env")) { + where = dir; + break; + } + } + + if (where.empty()) { + throw Error( + "couldn't figure out how Nix is installed, so I can't upgrade it"); + } + + LOG(INFO) << "found Nix in '" << where << "'"; + + if (hasPrefix(where, "/run/current-system")) { + throw Error("Nix on NixOS must be upgraded via 'nixos-rebuild'"); + } + + Path profileDir = dirOf(where); + + // Resolve profile to /nix/var/nix/profiles/<name> link. + while (canonPath(profileDir).find("/profiles/") == std::string::npos && + isLink(profileDir)) { + profileDir = readLink(profileDir); + } + + LOG(INFO) << "found profile '" << profileDir << "'"; + + Path userEnv = canonPath(profileDir, true); + + if (baseNameOf(where) != "bin" || !hasSuffix(userEnv, "user-environment")) { + throw Error("directory '%s' does not appear to be part of a Nix profile", + where); + } + + if (!store->isValidPath(userEnv)) { + throw Error("directory '%s' is not in the Nix store", userEnv); + } + + return profileDir; + } + + /* Return the store path of the latest stable Nix. */ + Path getLatestNix(const ref<Store>& store) { + // FIXME: use nixos.org? + auto req = DownloadRequest(storePathsUrl); + auto res = getDownloader()->download(req); + + auto state = std::make_unique<EvalState>(Strings(), store); + auto v = state->allocValue(); + state->eval(state->parseExprFromString(*res.data, "/no-such-path"), *v); + Bindings& bindings(*Bindings::NewGC()); + auto v2 = findAlongAttrPath(*state, settings.thisSystem, bindings, *v); + + return state->forceString(*v2); + } +}; + +static RegisterCommand r1(make_ref<CmdUpgradeNix>()); diff --git a/third_party/nix/src/nix/verify.cc b/third_party/nix/src/nix/verify.cc new file mode 100644 index 000000000000..a773b3a2c8de --- /dev/null +++ b/third_party/nix/src/nix/verify.cc @@ -0,0 +1,170 @@ +#include <atomic> + +#include <glog/logging.h> + +#include "command.hh" +#include "shared.hh" +#include "store-api.hh" +#include "sync.hh" +#include "thread-pool.hh" + +using namespace nix; + +struct CmdVerify : StorePathsCommand { + bool noContents = false; + bool noTrust = false; + Strings substituterUris; + size_t sigsNeeded = 0; + + CmdVerify() { + mkFlag(0, "no-contents", "do not verify the contents of each store path", + &noContents); + mkFlag(0, "no-trust", "do not verify whether each store path is trusted", + &noTrust); + mkFlag() + .longName("substituter") + .shortName('s') + .labels({"store-uri"}) + .description("use signatures from specified store") + .arity(1) + .handler([&](std::vector<std::string> ss) { + substituterUris.push_back(ss[0]); + }); + mkIntFlag('n', "sigs-needed", + "require that each path has at least N valid signatures", + &sigsNeeded); + } + + std::string name() override { return "verify"; } + + std::string description() override { + return "verify the integrity of store paths"; + } + + Examples examples() override { + return { + Example{"To verify the entire Nix store:", "nix verify --all"}, + Example{"To check whether each path in the closure of Firefox has at " + "least 2 signatures:", + "nix verify -r -n2 --no-contents $(type -p firefox)"}, + }; + } + + void run(ref<Store> store, Paths storePaths) override { + std::vector<ref<Store>> substituters; + for (auto& s : substituterUris) { + substituters.push_back(openStore(s)); + } + + auto publicKeys = getDefaultPublicKeys(); + + std::atomic<size_t> done{0}; + std::atomic<size_t> untrusted{0}; + std::atomic<size_t> corrupted{0}; + std::atomic<size_t> failed{0}; + std::atomic<size_t> active{0}; + + ThreadPool pool; + + auto doPath = [&](const Path& storePath) { + try { + checkInterrupt(); + + LOG(INFO) << "checking '" << storePath << "'"; + + MaintainCount<std::atomic<size_t>> mcActive(active); + + auto info = store->queryPathInfo(storePath); + + if (!noContents) { + HashSink sink(info->narHash.type); + store->narFromPath(info->path, sink); + + auto hash = sink.finish(); + + if (hash.first != info->narHash) { + corrupted++; + LOG(WARNING) << "path '" << info->path + << "' was modified! expected hash '" + << info->narHash.to_string() << "', got '" + << hash.first.to_string() << "'"; + } + } + + if (!noTrust) { + bool good = false; + + if (info->ultimate && (sigsNeeded == 0u)) { + good = true; + + } else { + StringSet sigsSeen; + size_t actualSigsNeeded = std::max(sigsNeeded, (size_t)1); + size_t validSigs = 0; + + auto doSigs = [&](const StringSet& sigs) { + for (const auto& sig : sigs) { + if (sigsSeen.count(sig) != 0u) { + continue; + } + sigsSeen.insert(sig); + if (validSigs < ValidPathInfo::maxSigs && + info->checkSignature(publicKeys, sig)) { + validSigs++; + } + } + }; + + if (info->isContentAddressed(*store)) { + validSigs = ValidPathInfo::maxSigs; + } + + doSigs(info->sigs); + + for (auto& store2 : substituters) { + if (validSigs >= actualSigsNeeded) { + break; + } + try { + auto info2 = store2->queryPathInfo(info->path); + if (info2->isContentAddressed(*store)) { + validSigs = ValidPathInfo::maxSigs; + } + doSigs(info2->sigs); + } catch (InvalidPath&) { + } catch (Error& e) { + LOG(ERROR) << e.what(); + } + } + + if (validSigs >= actualSigsNeeded) { + good = true; + } + } + + if (!good) { + untrusted++; + LOG(WARNING) << "path '" << info->path << "' is untrusted"; + } + } + + done++; + + } catch (Error& e) { + LOG(ERROR) << e.what(); + failed++; + } + }; + + for (auto& storePath : storePaths) { + pool.enqueue(std::bind(doPath, storePath)); + } + + pool.process(); + + throw Exit((corrupted != 0u ? 1 : 0) | (untrusted != 0u ? 2 : 0) | + (failed != 0u ? 4 : 0)); + } +}; + +static RegisterCommand r1(make_ref<CmdVerify>()); diff --git a/third_party/nix/src/nix/why-depends.cc b/third_party/nix/src/nix/why-depends.cc new file mode 100644 index 000000000000..bbbfd1fe0182 --- /dev/null +++ b/third_party/nix/src/nix/why-depends.cc @@ -0,0 +1,265 @@ +#include <queue> + +#include <glog/logging.h> + +#include "command.hh" +#include "fs-accessor.hh" +#include "shared.hh" +#include "store-api.hh" + +using namespace nix; + +static std::string hilite(const std::string& s, size_t pos, size_t len, + const std::string& colour = ANSI_RED) { + return std::string(s, 0, pos) + colour + std::string(s, pos, len) + + ANSI_NORMAL + std::string(s, pos + len); +} + +static std::string filterPrintable(const std::string& s) { + std::string res; + for (char c : s) { + res += isprint(c) != 0 ? c : '.'; + } + return res; +} + +struct CmdWhyDepends : SourceExprCommand { + std::string _package, _dependency; + bool all = false; + + CmdWhyDepends() { + expectArg("package", &_package); + expectArg("dependency", &_dependency); + + mkFlag() + .longName("all") + .shortName('a') + .description( + "show all edges in the dependency graph leading from 'package' to " + "'dependency', rather than just a shortest path") + .set(&all, true); + } + + std::string name() override { return "why-depends"; } + + std::string description() override { + return "show why a package has another package in its closure"; + } + + Examples examples() override { + return { + Example{"To show one path through the dependency graph leading from " + "Hello to Glibc:", + "nix why-depends nixpkgs.hello nixpkgs.glibc"}, + Example{ + "To show all files and paths in the dependency graph leading from " + "Thunderbird to libX11:", + "nix why-depends --all nixpkgs.thunderbird nixpkgs.xorg.libX11"}, + Example{"To show why Glibc depends on itself:", + "nix why-depends nixpkgs.glibc nixpkgs.glibc"}, + }; + } + + void run(ref<Store> store) override { + auto package = parseInstallable(*this, store, _package, false); + auto packagePath = toStorePath(store, Build, package); + auto dependency = parseInstallable(*this, store, _dependency, false); + auto dependencyPath = toStorePath(store, NoBuild, dependency); + auto dependencyPathHash = storePathToHash(dependencyPath); + + PathSet closure; + store->computeFSClosure({packagePath}, closure, false, false); + + if (closure.count(dependencyPath) == 0u) { + LOG(WARNING) << "'" << package->what() << "' does not depend on '" + << dependency->what() << "'"; + return; + } + + auto accessor = store->getFSAccessor(); + + auto const inf = std::numeric_limits<size_t>::max(); + + struct Node { + Path path; + PathSet refs; + PathSet rrefs; + size_t dist = inf; + Node* prev = nullptr; + bool queued = false; + bool visited = false; + }; + + std::map<Path, Node> graph; + + for (auto& path : closure) { + graph.emplace(path, Node{path, store->queryPathInfo(path)->references}); + } + + // Transpose the graph. + for (auto& node : graph) { + for (auto& ref : node.second.refs) { + graph[ref].rrefs.insert(node.first); + } + } + + /* Run Dijkstra's shortest path algorithm to get the distance + of every path in the closure to 'dependency'. */ + graph[dependencyPath].dist = 0; + + std::priority_queue<Node*> queue; + + queue.push(&graph.at(dependencyPath)); + + while (!queue.empty()) { + auto& node = *queue.top(); + queue.pop(); + + for (auto& rref : node.rrefs) { + auto& node2 = graph.at(rref); + auto dist = node.dist + 1; + if (dist < node2.dist) { + node2.dist = dist; + node2.prev = &node; + if (!node2.queued) { + node2.queued = true; + queue.push(&node2); + } + } + } + } + + /* Print the subgraph of nodes that have 'dependency' in their + closure (i.e., that have a non-infinite distance to + 'dependency'). Print every edge on a path between `package` + and `dependency`. */ + std::function<void(Node&, const string&, const string&)> printNode; + + const string treeConn = "โ โโโ"; + const string treeLast = "โโโโ"; + const string treeLine = "โ "; + const string treeNull = " "; + + struct BailOut {}; + + printNode = [&](Node& node, const string& firstPad, const string& tailPad) { + assert(node.dist != inf); + std::cout << fmt("%s%s%s%s" ANSI_NORMAL "\n", firstPad, + node.visited ? "\e[38;5;244m" : "", + !firstPad.empty() ? "=> " : "", node.path); + + if (node.path == dependencyPath && !all && + packagePath != dependencyPath) { + throw BailOut(); + } + + if (node.visited) { + return; + } + node.visited = true; + + /* Sort the references by distance to `dependency` to + ensure that the shortest path is printed first. */ + std::multimap<size_t, Node*> refs; + std::set<std::string> hashes; + + for (auto& ref : node.refs) { + if (ref == node.path && packagePath != dependencyPath) { + continue; + } + auto& node2 = graph.at(ref); + if (node2.dist == inf) { + continue; + } + refs.emplace(node2.dist, &node2); + hashes.insert(storePathToHash(node2.path)); + } + + /* For each reference, find the files and symlinks that + contain the reference. */ + std::map<std::string, Strings> hits; + + std::function<void(const Path&)> visitPath; + + visitPath = [&](const Path& p) { + auto st = accessor->stat(p); + + auto p2 = p == node.path ? "/" : std::string(p, node.path.size() + 1); + + auto getColour = [&](const std::string& hash) { + return hash == dependencyPathHash ? ANSI_GREEN : ANSI_BLUE; + }; + + if (st.type == FSAccessor::Type::tDirectory) { + auto names = accessor->readDirectory(p); + for (auto& name : names) { + visitPath(p + "/" + name); + } + } + + else if (st.type == FSAccessor::Type::tRegular) { + auto contents = accessor->readFile(p); + + for (auto& hash : hashes) { + auto pos = contents.find(hash); + if (pos != std::string::npos) { + size_t margin = 32; + auto pos2 = pos >= margin ? pos - margin : 0; + hits[hash].emplace_back(fmt( + "%s: โฆ%sโฆ\n", p2, + hilite( + filterPrintable(std::string( + contents, pos2, pos - pos2 + hash.size() + margin)), + pos - pos2, storePathHashLen, getColour(hash)))); + } + } + } + + else if (st.type == FSAccessor::Type::tSymlink) { + auto target = accessor->readLink(p); + + for (auto& hash : hashes) { + auto pos = target.find(hash); + if (pos != std::string::npos) { + hits[hash].emplace_back( + fmt("%s -> %s\n", p2, + hilite(target, pos, storePathHashLen, getColour(hash)))); + } + } + } + }; + + // FIXME: should use scanForReferences(). + + visitPath(node.path); + + RunPager pager; + for (auto& ref : refs) { + auto hash = storePathToHash(ref.second->path); + + bool last = all ? ref == *refs.rbegin() : true; + + for (auto& hit : hits[hash]) { + bool first = hit == *hits[hash].begin(); + std::cout << tailPad + << (first ? (last ? treeLast : treeConn) + : (last ? treeNull : treeLine)) + << hit; + if (!all) { + break; + } + } + + printNode(*ref.second, tailPad + (last ? treeNull : treeLine), + tailPad + (last ? treeNull : treeLine)); + } + }; + + try { + printNode(graph.at(packagePath), "", ""); + } catch (BailOut&) { + } + } +}; + +static RegisterCommand r1(make_ref<CmdWhyDepends>()); diff --git a/third_party/nix/src/nlohmann/json.hpp b/third_party/nix/src/nlohmann/json.hpp new file mode 100644 index 000000000000..c9af0bed36d6 --- /dev/null +++ b/third_party/nix/src/nlohmann/json.hpp @@ -0,0 +1,20406 @@ +/* + __ _____ _____ _____ + __| | __| | | | JSON for Modern C++ +| | |__ | | | | | | version 3.5.0 +|_____|_____|_____|_|___| https://github.com/nlohmann/json + +Licensed under the MIT License <http://opensource.org/licenses/MIT>. +SPDX-License-Identifier: MIT +Copyright (c) 2013-2018 Niels Lohmann <http://nlohmann.me>. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef NLOHMANN_JSON_HPP +#define NLOHMANN_JSON_HPP + +#define NLOHMANN_JSON_VERSION_MAJOR 3 +#define NLOHMANN_JSON_VERSION_MINOR 5 +#define NLOHMANN_JSON_VERSION_PATCH 0 + +#include <algorithm> // all_of, find, for_each +#include <cassert> // assert +#include <ciso646> // and, not, or +#include <cstddef> // nullptr_t, ptrdiff_t, size_t +#include <functional> // hash, less +#include <initializer_list> // initializer_list +#include <iosfwd> // istream, ostream +#include <iterator> // random_access_iterator_tag +#include <numeric> // accumulate +#include <string> // string, stoi, to_string +#include <utility> // declval, forward, move, pair, swap + +// #include <nlohmann/json_fwd.hpp> +#ifndef NLOHMANN_JSON_FWD_HPP +#define NLOHMANN_JSON_FWD_HPP + +#include <cstdint> // int64_t, uint64_t +#include <map> // map +#include <memory> // allocator +#include <string> // string +#include <vector> // vector + +/*! +@brief namespace for Niels Lohmann +@see https://github.com/nlohmann +@since version 1.0.0 +*/ +namespace nlohmann +{ +/*! +@brief default JSONSerializer template argument + +This serializer ignores the template arguments and uses ADL +([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) +for serialization. +*/ +template<typename T = void, typename SFINAE = void> +struct adl_serializer; + +template<template<typename U, typename V, typename... Args> class ObjectType = + std::map, + template<typename U, typename... Args> class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template<typename U> class AllocatorType = std::allocator, + template<typename T, typename SFINAE = void> class JSONSerializer = + adl_serializer> +class basic_json; + +/*! +@brief JSON Pointer + +A JSON pointer defines a string syntax for identifying a specific value +within a JSON document. It can be used with functions `at` and +`operator[]`. Furthermore, JSON pointers are the base for JSON patches. + +@sa [RFC 6901](https://tools.ietf.org/html/rfc6901) + +@since version 2.0.0 +*/ +template<typename BasicJsonType> +class json_pointer; + +/*! +@brief default JSON class + +This type is the default specialization of the @ref basic_json class which +uses the standard template types. + +@since version 1.0.0 +*/ +using json = basic_json<>; +} // namespace nlohmann + +#endif + +// #include <nlohmann/detail/macro_scope.hpp> + + +// This file contains all internal macro definitions +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// disable float-equal warnings on GCC/clang +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdocumentation" +#endif + +// allow for portable deprecation warnings +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define JSON_DEPRECATED __attribute__((deprecated)) +#elif defined(_MSC_VER) + #define JSON_DEPRECATED __declspec(deprecated) +#else + #define JSON_DEPRECATED +#endif + +// allow to disable exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +// manual branch prediction +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #define JSON_LIKELY(x) __builtin_expect(!!(x), 1) + #define JSON_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else + #define JSON_LIKELY(x) x + #define JSON_UNLIKELY(x) x +#endif + +// C++ language standard detection +#if (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 +#elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template<typename BasicJsonType> \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template<typename BasicJsonType> \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum<ENUM_TYPE>::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair<ENUM_TYPE, BasicJsonType> m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [j](const std::pair<ENUM_TYPE, BasicJsonType>& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template<template<typename, typename, typename...> class ObjectType, \ + template<typename, typename...> class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template<typename> class AllocatorType, \ + template<typename, typename = void> class JSONSerializer> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json<ObjectType, ArrayType, StringType, BooleanType, \ + NumberIntegerType, NumberUnsignedType, NumberFloatType, \ + AllocatorType, JSONSerializer> + +// #include <nlohmann/detail/meta/cpp_future.hpp> + + +#include <ciso646> // not +#include <cstddef> // size_t +#include <type_traits> // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type + +namespace nlohmann +{ +namespace detail +{ +// alias templates to reduce boilerplate +template<bool B, typename T = void> +using enable_if_t = typename std::enable_if<B, T>::type; + +template<typename T> +using uncvref_t = typename std::remove_cv<typename std::remove_reference<T>::type>::type; + +// implementation of C++14 index_sequence and affiliates +// source: https://stackoverflow.com/a/32223343 +template<std::size_t... Ints> +struct index_sequence +{ + using type = index_sequence; + using value_type = std::size_t; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +template<class Sequence1, class Sequence2> +struct merge_and_renumber; + +template<std::size_t... I1, std::size_t... I2> +struct merge_and_renumber<index_sequence<I1...>, index_sequence<I2...>> + : index_sequence < I1..., (sizeof...(I1) + I2)... > {}; + +template<std::size_t N> +struct make_index_sequence + : merge_and_renumber < typename make_index_sequence < N / 2 >::type, + typename make_index_sequence < N - N / 2 >::type > {}; + +template<> struct make_index_sequence<0> : index_sequence<> {}; +template<> struct make_index_sequence<1> : index_sequence<0> {}; + +template<typename... Ts> +using index_sequence_for = make_index_sequence<sizeof...(Ts)>; + +// dispatch utility (taken from ranges-v3) +template<unsigned N> struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template<typename T> +struct static_const +{ + static constexpr T value{}; +}; + +template<typename T> +constexpr T static_const<T>::value; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/meta/type_traits.hpp> + + +#include <ciso646> // not +#include <limits> // numeric_limits +#include <type_traits> // false_type, is_constructible, is_integral, is_same, true_type +#include <utility> // declval + +// #include <nlohmann/json_fwd.hpp> + +// #include <nlohmann/detail/iterators/iterator_traits.hpp> + + +#include <iterator> // random_access_iterator_tag + +// #include <nlohmann/detail/meta/void_t.hpp> + + +namespace nlohmann +{ +namespace detail +{ +template <typename ...Ts> struct make_void +{ + using type = void; +}; +template <typename ...Ts> using void_t = typename make_void<Ts...>::type; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/meta/cpp_future.hpp> + + +namespace nlohmann +{ +namespace detail +{ +template <typename It, typename = void> +struct iterator_types {}; + +template <typename It> +struct iterator_types < + It, + void_t<typename It::difference_type, typename It::value_type, typename It::pointer, + typename It::reference, typename It::iterator_category >> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template <typename T, typename = void> +struct iterator_traits +{ +}; + +template <typename T> +struct iterator_traits < T, enable_if_t < !std::is_pointer<T>::value >> + : iterator_types<T> +{ +}; + +template <typename T> +struct iterator_traits<T*, enable_if_t<std::is_object<T>::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; +} +} + +// #include <nlohmann/detail/meta/cpp_future.hpp> + +// #include <nlohmann/detail/meta/detected.hpp> + + +#include <type_traits> + +// #include <nlohmann/detail/meta/void_t.hpp> + + +// http://en.cppreference.com/w/cpp/experimental/is_detected +namespace nlohmann +{ +namespace detail +{ +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + void operator=(nonesuch const&) = delete; +}; + +template <class Default, + class AlwaysVoid, + template <class...> class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template <class Default, template <class...> class Op, class... Args> +struct detector<Default, void_t<Op<Args...>>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op<Args...>; +}; + +template <template <class...> class Op, class... Args> +using is_detected = typename detector<nonesuch, void, Op, Args...>::value_t; + +template <template <class...> class Op, class... Args> +using detected_t = typename detector<nonesuch, void, Op, Args...>::type; + +template <class Default, template <class...> class Op, class... Args> +using detected_or = detector<Default, void, Op, Args...>; + +template <class Default, template <class...> class Op, class... Args> +using detected_or_t = typename detected_or<Default, Op, Args...>::type; + +template <class Expected, template <class...> class Op, class... Args> +using is_detected_exact = std::is_same<Expected, detected_t<Op, Args...>>; + +template <class To, template <class...> class Op, class... Args> +using is_detected_convertible = + std::is_convertible<detected_t<Op, Args...>, To>; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/macro_scope.hpp> + + +namespace nlohmann +{ +/*! +@brief detail namespace with internal helper functions + +This namespace collects functions that should not be exposed, +implementations of some @ref basic_json methods, and meta-programming helpers. + +@since version 2.1.0 +*/ +namespace detail +{ +///////////// +// helpers // +///////////// + +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval<T>()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + +template<typename> struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json<NLOHMANN_BASIC_JSON_TPL> : std::true_type {}; + +////////////////////////// +// aliases for detected // +////////////////////////// + +template <typename T> +using mapped_type_t = typename T::mapped_type; + +template <typename T> +using key_type_t = typename T::key_type; + +template <typename T> +using value_type_t = typename T::value_type; + +template <typename T> +using difference_type_t = typename T::difference_type; + +template <typename T> +using pointer_t = typename T::pointer; + +template <typename T> +using reference_t = typename T::reference; + +template <typename T> +using iterator_category_t = typename T::iterator_category; + +template <typename T> +using iterator_t = typename T::iterator; + +template <typename T, typename... Args> +using to_json_function = decltype(T::to_json(std::declval<Args>()...)); + +template <typename T, typename... Args> +using from_json_function = decltype(T::from_json(std::declval<Args>()...)); + +template <typename T, typename U> +using get_template_function = decltype(std::declval<T>().template get<U>()); + +// trait checking if JSONSerializer<T>::from_json(json const&, udt&) exists +template <typename BasicJsonType, typename T, typename = void> +struct has_from_json : std::false_type {}; + +template <typename BasicJsonType, typename T> +struct has_from_json<BasicJsonType, T, + enable_if_t<not is_basic_json<T>::value>> +{ + using serializer = typename BasicJsonType::template json_serializer<T, void>; + + static constexpr bool value = + is_detected_exact<void, from_json_function, serializer, + const BasicJsonType&, T&>::value; +}; + +// This trait checks if JSONSerializer<T>::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template <typename BasicJsonType, typename T, typename = void> +struct has_non_default_from_json : std::false_type {}; + +template<typename BasicJsonType, typename T> +struct has_non_default_from_json<BasicJsonType, T, enable_if_t<not is_basic_json<T>::value>> +{ + using serializer = typename BasicJsonType::template json_serializer<T, void>; + + static constexpr bool value = + is_detected_exact<T, from_json_function, serializer, + const BasicJsonType&>::value; +}; + +// This trait checks if BasicJsonType::json_serializer<T>::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template <typename BasicJsonType, typename T, typename = void> +struct has_to_json : std::false_type {}; + +template <typename BasicJsonType, typename T> +struct has_to_json<BasicJsonType, T, enable_if_t<not is_basic_json<T>::value>> +{ + using serializer = typename BasicJsonType::template json_serializer<T, void>; + + static constexpr bool value = + is_detected_exact<void, to_json_function, serializer, BasicJsonType&, + T>::value; +}; + + +/////////////////// +// is_ functions // +/////////////////// + +template <typename T, typename = void> +struct is_iterator_traits : std::false_type {}; + +template <typename T> +struct is_iterator_traits<iterator_traits<T>> +{ + private: + using traits = iterator_traits<T>; + + public: + static constexpr auto value = + is_detected<value_type_t, traits>::value && + is_detected<difference_type_t, traits>::value && + is_detected<pointer_t, traits>::value && + is_detected<iterator_category_t, traits>::value && + is_detected<reference_t, traits>::value; +}; + +// source: https://stackoverflow.com/a/37193089/4116453 + +template <typename T, typename = void> +struct is_complete_type : std::false_type {}; + +template <typename T> +struct is_complete_type<T, decltype(void(sizeof(T)))> : std::true_type {}; + +template <typename BasicJsonType, typename CompatibleObjectType, + typename = void> +struct is_compatible_object_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename CompatibleObjectType> +struct is_compatible_object_type_impl < + BasicJsonType, CompatibleObjectType, + enable_if_t<is_detected<mapped_type_t, CompatibleObjectType>::value and + is_detected<key_type_t, CompatibleObjectType>::value >> +{ + + using object_t = typename BasicJsonType::object_t; + + // macOS's is_constructible does not play well with nonesuch... + static constexpr bool value = + std::is_constructible<typename object_t::key_type, + typename CompatibleObjectType::key_type>::value and + std::is_constructible<typename object_t::mapped_type, + typename CompatibleObjectType::mapped_type>::value; +}; + +template <typename BasicJsonType, typename CompatibleObjectType> +struct is_compatible_object_type + : is_compatible_object_type_impl<BasicJsonType, CompatibleObjectType> {}; + +template <typename BasicJsonType, typename ConstructibleObjectType, + typename = void> +struct is_constructible_object_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename ConstructibleObjectType> +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t<is_detected<mapped_type_t, ConstructibleObjectType>::value and + is_detected<key_type_t, ConstructibleObjectType>::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (std::is_constructible<typename ConstructibleObjectType::key_type, typename object_t::key_type>::value and + std::is_same<typename object_t::mapped_type, typename ConstructibleObjectType::mapped_type>::value) or + (has_from_json<BasicJsonType, typename ConstructibleObjectType::mapped_type>::value or + has_non_default_from_json<BasicJsonType, typename ConstructibleObjectType::mapped_type >::value); +}; + +template <typename BasicJsonType, typename ConstructibleObjectType> +struct is_constructible_object_type + : is_constructible_object_type_impl<BasicJsonType, + ConstructibleObjectType> {}; + +template <typename BasicJsonType, typename CompatibleStringType, + typename = void> +struct is_compatible_string_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename CompatibleStringType> +struct is_compatible_string_type_impl < + BasicJsonType, CompatibleStringType, + enable_if_t<is_detected_exact<typename BasicJsonType::string_t::value_type, + value_type_t, CompatibleStringType>::value >> +{ + static constexpr auto value = + std::is_constructible<typename BasicJsonType::string_t, CompatibleStringType>::value; +}; + +template <typename BasicJsonType, typename ConstructibleStringType> +struct is_compatible_string_type + : is_compatible_string_type_impl<BasicJsonType, ConstructibleStringType> {}; + +template <typename BasicJsonType, typename ConstructibleStringType, + typename = void> +struct is_constructible_string_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename ConstructibleStringType> +struct is_constructible_string_type_impl < + BasicJsonType, ConstructibleStringType, + enable_if_t<is_detected_exact<typename BasicJsonType::string_t::value_type, + value_type_t, ConstructibleStringType>::value >> +{ + static constexpr auto value = + std::is_constructible<ConstructibleStringType, + typename BasicJsonType::string_t>::value; +}; + +template <typename BasicJsonType, typename ConstructibleStringType> +struct is_constructible_string_type + : is_constructible_string_type_impl<BasicJsonType, ConstructibleStringType> {}; + +template <typename BasicJsonType, typename CompatibleArrayType, typename = void> +struct is_compatible_array_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename CompatibleArrayType> +struct is_compatible_array_type_impl < + BasicJsonType, CompatibleArrayType, + enable_if_t<is_detected<value_type_t, CompatibleArrayType>::value and + is_detected<iterator_t, CompatibleArrayType>::value and +// This is needed because json_reverse_iterator has a ::iterator type... +// Therefore it is detected as a CompatibleArrayType. +// The real fix would be to have an Iterable concept. + not is_iterator_traits< + iterator_traits<CompatibleArrayType>>::value >> +{ + static constexpr bool value = + std::is_constructible<BasicJsonType, + typename CompatibleArrayType::value_type>::value; +}; + +template <typename BasicJsonType, typename CompatibleArrayType> +struct is_compatible_array_type + : is_compatible_array_type_impl<BasicJsonType, CompatibleArrayType> {}; + +template <typename BasicJsonType, typename ConstructibleArrayType, typename = void> +struct is_constructible_array_type_impl : std::false_type {}; + +template <typename BasicJsonType, typename ConstructibleArrayType> +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t<std::is_same<ConstructibleArrayType, + typename BasicJsonType::value_type>::value >> + : std::true_type {}; + +template <typename BasicJsonType, typename ConstructibleArrayType> +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t<not std::is_same<ConstructibleArrayType, + typename BasicJsonType::value_type>::value and + is_detected<value_type_t, ConstructibleArrayType>::value and + is_detected<iterator_t, ConstructibleArrayType>::value and + is_complete_type< + detected_t<value_type_t, ConstructibleArrayType>>::value >> +{ + static constexpr bool value = + // This is needed because json_reverse_iterator has a ::iterator type, + // furthermore, std::back_insert_iterator (and other iterators) have a base class `iterator`... + // Therefore it is detected as a ConstructibleArrayType. + // The real fix would be to have an Iterable concept. + not is_iterator_traits < + iterator_traits<ConstructibleArrayType >>::value and + + (std::is_same<typename ConstructibleArrayType::value_type, typename BasicJsonType::array_t::value_type>::value or + has_from_json<BasicJsonType, + typename ConstructibleArrayType::value_type>::value or + has_non_default_from_json < + BasicJsonType, typename ConstructibleArrayType::value_type >::value); +}; + +template <typename BasicJsonType, typename ConstructibleArrayType> +struct is_constructible_array_type + : is_constructible_array_type_impl<BasicJsonType, ConstructibleArrayType> {}; + +template <typename RealIntegerType, typename CompatibleNumberIntegerType, + typename = void> +struct is_compatible_integer_type_impl : std::false_type {}; + +template <typename RealIntegerType, typename CompatibleNumberIntegerType> +struct is_compatible_integer_type_impl < + RealIntegerType, CompatibleNumberIntegerType, + enable_if_t<std::is_integral<RealIntegerType>::value and + std::is_integral<CompatibleNumberIntegerType>::value and + not std::is_same<bool, CompatibleNumberIntegerType>::value >> +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits<RealIntegerType>; + using CompatibleLimits = std::numeric_limits<CompatibleNumberIntegerType>; + + static constexpr auto value = + std::is_constructible<RealIntegerType, + CompatibleNumberIntegerType>::value and + CompatibleLimits::is_integer and + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template <typename RealIntegerType, typename CompatibleNumberIntegerType> +struct is_compatible_integer_type + : is_compatible_integer_type_impl<RealIntegerType, + CompatibleNumberIntegerType> {}; + +template <typename BasicJsonType, typename CompatibleType, typename = void> +struct is_compatible_type_impl: std::false_type {}; + +template <typename BasicJsonType, typename CompatibleType> +struct is_compatible_type_impl < + BasicJsonType, CompatibleType, + enable_if_t<is_complete_type<CompatibleType>::value >> +{ + static constexpr bool value = + has_to_json<BasicJsonType, CompatibleType>::value; +}; + +template <typename BasicJsonType, typename CompatibleType> +struct is_compatible_type + : is_compatible_type_impl<BasicJsonType, CompatibleType> {}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/exceptions.hpp> + + +#include <exception> // exception +#include <stdexcept> // runtime_error +#include <string> // to_string + +// #include <nlohmann/detail/input/position_t.hpp> + + +#include <cstddef> // size_t + +namespace nlohmann +{ +namespace detail +{ +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} +} + + +namespace nlohmann +{ +namespace detail +{ +//////////////// +// exceptions // +//////////////// + +/*! +@brief general exception of the @ref basic_json class + +This class is an extension of `std::exception` objects with a member @a id for +exception ids. It is used as the base class for all exceptions thrown by the +@ref basic_json class. This class can hence be used as "wildcard" to catch +exceptions. + +Subclasses: +- @ref parse_error for exceptions indicating a parse error +- @ref invalid_iterator for exceptions indicating errors with iterators +- @ref type_error for exceptions indicating executing a member function with + a wrong type +- @ref out_of_range for exceptions indicating access out of the defined range +- @ref other_error for exceptions indicating other library errors + +@internal +@note To have nothrow-copy-constructible exceptions, we internally use + `std::runtime_error` which can cope with arbitrary-length error messages. + Intermediate strings are built with static functions and then passed to + the actual constructor. +@endinternal + +@liveexample{The following code shows how arbitrary library exceptions can be +caught.,exception} + +@since version 3.0.0 +*/ +class exception : public std::exception +{ + public: + /// returns the explanatory string + const char* what() const noexcept override + { + return m.what(); + } + + /// the id of the exception + const int id; + + protected: + exception(int id_, const char* what_arg) : id(id_), m(what_arg) {} + + static std::string name(const std::string& ename, int id_) + { + return "[json.exception." + ename + "." + std::to_string(id_) + "] "; + } + + private: + /// an exception object as storage for error messages + std::runtime_error m; +}; + +/*! +@brief exception indicating a parse error + +This exception is thrown by the library when a parse error occurs. Parse errors +can occur during the deserialization of JSON text, CBOR, MessagePack, as well +as when using JSON Patch. + +Member @a byte holds the byte index of the last read character in the input +file. + +Exceptions have ids 1xx. + +name / id | example message | description +------------------------------ | --------------- | ------------------------- +json.exception.parse_error.101 | parse error at 2: unexpected end of input; expected string literal | This error indicates a syntax error while deserializing a JSON text. The error message describes that an unexpected token (character) was encountered, and the member @a byte indicates the error position. +json.exception.parse_error.102 | parse error at 14: missing or wrong low surrogate | JSON uses the `\uxxxx` format to describe Unicode characters. Code points above above 0xFFFF are split into two `\uxxxx` entries ("surrogate pairs"). This error indicates that the surrogate pair is incomplete or contains an invalid code point. +json.exception.parse_error.103 | parse error: code points above 0x10FFFF are invalid | Unicode supports code points up to 0x10FFFF. Code points above 0x10FFFF are invalid. +json.exception.parse_error.104 | parse error: JSON patch must be an array of objects | [RFC 6902](https://tools.ietf.org/html/rfc6902) requires a JSON Patch document to be a JSON document that represents an array of objects. +json.exception.parse_error.105 | parse error: operation must have string member 'op' | An operation of a JSON Patch document must contain exactly one "op" member, whose value indicates the operation to perform. Its value must be one of "add", "remove", "replace", "move", "copy", or "test"; other values are errors. +json.exception.parse_error.106 | parse error: array index '01' must not begin with '0' | An array index in a JSON Pointer ([RFC 6901](https://tools.ietf.org/html/rfc6901)) may be `0` or any number without a leading `0`. +json.exception.parse_error.107 | parse error: JSON pointer must be empty or begin with '/' - was: 'foo' | A JSON Pointer must be a Unicode string containing a sequence of zero or more reference tokens, each prefixed by a `/` character. +json.exception.parse_error.108 | parse error: escape character '~' must be followed with '0' or '1' | In a JSON Pointer, only `~0` and `~1` are valid escape sequences. +json.exception.parse_error.109 | parse error: array index 'one' is not a number | A JSON Pointer array index must be a number. +json.exception.parse_error.110 | parse error at 1: cannot read 2 bytes from vector | When parsing CBOR or MessagePack, the byte vector ends before the complete value has been read. +json.exception.parse_error.112 | parse error at 1: error reading CBOR; last byte: 0xF8 | Not all types of CBOR or MessagePack are supported. This exception occurs if an unsupported byte was read. +json.exception.parse_error.113 | parse error at 2: expected a CBOR string; last byte: 0x98 | While parsing a map key, a value that is not a string has been read. +json.exception.parse_error.114 | parse error: Unsupported BSON record type 0x0F | The parsing of the corresponding BSON record type is not implemented (yet). + +@note For an input with n bytes, 1 is the index of the first character and n+1 + is the index of the terminating null byte or the end of file. This also + holds true when reading a byte vector (CBOR or MessagePack). + +@liveexample{The following code shows how a `parse_error` exception can be +caught.,parse_error} + +@sa @ref exception for the base class of the library exceptions +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref out_of_range for exceptions indicating access out of the defined range +@sa @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class parse_error : public exception +{ + public: + /*! + @brief create a parse error exception + @param[in] id_ the id of the exception + @param[in] position the position where the error occurred (or with + chars_read_total=0 if the position cannot be + determined) + @param[in] what_arg the explanatory string + @return parse_error object + */ + static parse_error create(int id_, const position_t& pos, const std::string& what_arg) + { + std::string w = exception::name("parse_error", id_) + "parse error" + + position_string(pos) + ": " + what_arg; + return parse_error(id_, pos.chars_read_total, w.c_str()); + } + + static parse_error create(int id_, std::size_t byte_, const std::string& what_arg) + { + std::string w = exception::name("parse_error", id_) + "parse error" + + (byte_ != 0 ? (" at byte " + std::to_string(byte_)) : "") + + ": " + what_arg; + return parse_error(id_, byte_, w.c_str()); + } + + /*! + @brief byte index of the parse error + + The byte index of the last read character in the input file. + + @note For an input with n bytes, 1 is the index of the first character and + n+1 is the index of the terminating null byte or the end of file. + This also holds true when reading a byte vector (CBOR or MessagePack). + */ + const std::size_t byte; + + private: + parse_error(int id_, std::size_t byte_, const char* what_arg) + : exception(id_, what_arg), byte(byte_) {} + + static std::string position_string(const position_t& pos) + { + return " at line " + std::to_string(pos.lines_read + 1) + + ", column " + std::to_string(pos.chars_read_current_line); + } +}; + +/*! +@brief exception indicating errors with iterators + +This exception is thrown if iterators passed to a library function do not match +the expected semantics. + +Exceptions have ids 2xx. + +name / id | example message | description +----------------------------------- | --------------- | ------------------------- +json.exception.invalid_iterator.201 | iterators are not compatible | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. +json.exception.invalid_iterator.202 | iterator does not fit current value | In an erase or insert function, the passed iterator @a pos does not belong to the JSON value for which the function was called. It hence does not define a valid position for the deletion/insertion. +json.exception.invalid_iterator.203 | iterators do not fit current value | Either iterator passed to function @ref erase(IteratorType first, IteratorType last) does not belong to the JSON value from which values shall be erased. It hence does not define a valid range to delete values from. +json.exception.invalid_iterator.204 | iterators out of range | When an iterator range for a primitive type (number, boolean, or string) is passed to a constructor or an erase function, this range has to be exactly (@ref begin(), @ref end()), because this is the only way the single stored value is expressed. All other ranges are invalid. +json.exception.invalid_iterator.205 | iterator out of range | When an iterator for a primitive type (number, boolean, or string) is passed to an erase function, the iterator has to be the @ref begin() iterator, because it is the only way to address the stored value. All other iterators are invalid. +json.exception.invalid_iterator.206 | cannot construct with iterators from null | The iterators passed to constructor @ref basic_json(InputIT first, InputIT last) belong to a JSON null value and hence to not define a valid range. +json.exception.invalid_iterator.207 | cannot use key() for non-object iterators | The key() member function can only be used on iterators belonging to a JSON object, because other types do not have a concept of a key. +json.exception.invalid_iterator.208 | cannot use operator[] for object iterators | The operator[] to specify a concrete offset cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. +json.exception.invalid_iterator.209 | cannot use offsets with object iterators | The offset operators (+, -, +=, -=) cannot be used on iterators belonging to a JSON object, because JSON objects are unordered. +json.exception.invalid_iterator.210 | iterators do not fit | The iterator range passed to the insert function are not compatible, meaning they do not belong to the same container. Therefore, the range (@a first, @a last) is invalid. +json.exception.invalid_iterator.211 | passed iterators may not belong to container | The iterator range passed to the insert function must not be a subrange of the container to insert to. +json.exception.invalid_iterator.212 | cannot compare iterators of different containers | When two iterators are compared, they must belong to the same container. +json.exception.invalid_iterator.213 | cannot compare order of object iterators | The order of object iterators cannot be compared, because JSON objects are unordered. +json.exception.invalid_iterator.214 | cannot get value | Cannot get value for iterator: Either the iterator belongs to a null value or it is an iterator to a primitive type (number, boolean, or string), but the iterator is different to @ref begin(). + +@liveexample{The following code shows how an `invalid_iterator` exception can be +caught.,invalid_iterator} + +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref out_of_range for exceptions indicating access out of the defined range +@sa @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class invalid_iterator : public exception +{ + public: + static invalid_iterator create(int id_, const std::string& what_arg) + { + std::string w = exception::name("invalid_iterator", id_) + what_arg; + return invalid_iterator(id_, w.c_str()); + } + + private: + invalid_iterator(int id_, const char* what_arg) + : exception(id_, what_arg) {} +}; + +/*! +@brief exception indicating executing a member function with a wrong type + +This exception is thrown in case of a type error; that is, a library function is +executed on a JSON value whose type does not match the expected semantics. + +Exceptions have ids 3xx. + +name / id | example message | description +----------------------------- | --------------- | ------------------------- +json.exception.type_error.301 | cannot create object from initializer list | To create an object from an initializer list, the initializer list must consist only of a list of pairs whose first element is a string. When this constraint is violated, an array is created instead. +json.exception.type_error.302 | type must be object, but is array | During implicit or explicit value conversion, the JSON type must be compatible to the target type. For instance, a JSON string can only be converted into string types, but not into numbers or boolean types. +json.exception.type_error.303 | incompatible ReferenceType for get_ref, actual type is object | To retrieve a reference to a value stored in a @ref basic_json object with @ref get_ref, the type of the reference must match the value type. For instance, for a JSON array, the @a ReferenceType must be @ref array_t&. +json.exception.type_error.304 | cannot use at() with string | The @ref at() member functions can only be executed for certain JSON types. +json.exception.type_error.305 | cannot use operator[] with string | The @ref operator[] member functions can only be executed for certain JSON types. +json.exception.type_error.306 | cannot use value() with string | The @ref value() member functions can only be executed for certain JSON types. +json.exception.type_error.307 | cannot use erase() with string | The @ref erase() member functions can only be executed for certain JSON types. +json.exception.type_error.308 | cannot use push_back() with string | The @ref push_back() and @ref operator+= member functions can only be executed for certain JSON types. +json.exception.type_error.309 | cannot use insert() with | The @ref insert() member functions can only be executed for certain JSON types. +json.exception.type_error.310 | cannot use swap() with number | The @ref swap() member functions can only be executed for certain JSON types. +json.exception.type_error.311 | cannot use emplace_back() with string | The @ref emplace_back() member function can only be executed for certain JSON types. +json.exception.type_error.312 | cannot use update() with string | The @ref update() member functions can only be executed for certain JSON types. +json.exception.type_error.313 | invalid value to unflatten | The @ref unflatten function converts an object whose keys are JSON Pointers back into an arbitrary nested JSON value. The JSON Pointers must not overlap, because then the resulting value would not be well defined. +json.exception.type_error.314 | only objects can be unflattened | The @ref unflatten function only works for an object whose keys are JSON Pointers. +json.exception.type_error.315 | values in object must be primitive | The @ref unflatten function only works for an object whose keys are JSON Pointers and whose values are primitive. +json.exception.type_error.316 | invalid UTF-8 byte at index 10: 0x7E | The @ref dump function only works with UTF-8 encoded strings; that is, if you assign a `std::string` to a JSON value, make sure it is UTF-8 encoded. | +json.exception.type_error.317 | JSON value cannot be serialized to requested format | The dynamic type of the object cannot be represented in the requested serialization format (e.g. a raw `true` or `null` JSON object cannot be serialized to BSON) | + +@liveexample{The following code shows how a `type_error` exception can be +caught.,type_error} + +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref out_of_range for exceptions indicating access out of the defined range +@sa @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class type_error : public exception +{ + public: + static type_error create(int id_, const std::string& what_arg) + { + std::string w = exception::name("type_error", id_) + what_arg; + return type_error(id_, w.c_str()); + } + + private: + type_error(int id_, const char* what_arg) : exception(id_, what_arg) {} +}; + +/*! +@brief exception indicating access out of the defined range + +This exception is thrown in case a library function is called on an input +parameter that exceeds the expected range, for instance in case of array +indices or nonexisting object keys. + +Exceptions have ids 4xx. + +name / id | example message | description +------------------------------- | --------------- | ------------------------- +json.exception.out_of_range.401 | array index 3 is out of range | The provided array index @a i is larger than @a size-1. +json.exception.out_of_range.402 | array index '-' (3) is out of range | The special array index `-` in a JSON Pointer never describes a valid element of the array, but the index past the end. That is, it can only be used to add elements at this position, but not to read it. +json.exception.out_of_range.403 | key 'foo' not found | The provided key was not found in the JSON object. +json.exception.out_of_range.404 | unresolved reference token 'foo' | A reference token in a JSON Pointer could not be resolved. +json.exception.out_of_range.405 | JSON pointer has no parent | The JSON Patch operations 'remove' and 'add' can not be applied to the root element of the JSON value. +json.exception.out_of_range.406 | number overflow parsing '10E1000' | A parsed number could not be stored as without changing it to NaN or INF. +json.exception.out_of_range.407 | number overflow serializing '9223372036854775808' | UBJSON and BSON only support integer numbers up to 9223372036854775807. | +json.exception.out_of_range.408 | excessive array size: 8658170730974374167 | The size (following `#`) of an UBJSON array or object exceeds the maximal capacity. | +json.exception.out_of_range.409 | BSON key cannot contain code point U+0000 (at byte 2) | Key identifiers to be serialized to BSON cannot contain code point U+0000, since the key is stored as zero-terminated c-string | + +@liveexample{The following code shows how an `out_of_range` exception can be +caught.,out_of_range} + +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref other_error for exceptions indicating other library errors + +@since version 3.0.0 +*/ +class out_of_range : public exception +{ + public: + static out_of_range create(int id_, const std::string& what_arg) + { + std::string w = exception::name("out_of_range", id_) + what_arg; + return out_of_range(id_, w.c_str()); + } + + private: + out_of_range(int id_, const char* what_arg) : exception(id_, what_arg) {} +}; + +/*! +@brief exception indicating other library errors + +This exception is thrown in case of errors that cannot be classified with the +other exception types. + +Exceptions have ids 5xx. + +name / id | example message | description +------------------------------ | --------------- | ------------------------- +json.exception.other_error.501 | unsuccessful: {"op":"test","path":"/baz", "value":"bar"} | A JSON Patch operation 'test' failed. The unsuccessful operation is also printed. + +@sa @ref exception for the base class of the library exceptions +@sa @ref parse_error for exceptions indicating a parse error +@sa @ref invalid_iterator for exceptions indicating errors with iterators +@sa @ref type_error for exceptions indicating executing a member function with + a wrong type +@sa @ref out_of_range for exceptions indicating access out of the defined range + +@liveexample{The following code shows how an `other_error` exception can be +caught.,other_error} + +@since version 3.0.0 +*/ +class other_error : public exception +{ + public: + static other_error create(int id_, const std::string& what_arg) + { + std::string w = exception::name("other_error", id_) + what_arg; + return other_error(id_, w.c_str()); + } + + private: + other_error(int id_, const char* what_arg) : exception(id_, what_arg) {} +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/value_t.hpp> + + +#include <array> // array +#include <ciso646> // and +#include <cstddef> // size_t +#include <cstdint> // uint8_t + +namespace nlohmann +{ +namespace detail +{ +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + +@since version 1.0.0 +*/ +enum class value_t : std::uint8_t +{ + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + discarded ///< discarded by the the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string +- furthermore, each type is not smaller than itself +- discarded values are not comparable + +@since version 1.0.0 +*/ +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + static constexpr std::array<std::uint8_t, 8> order = {{ + 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */ + } + }; + + const auto l_index = static_cast<std::size_t>(lhs); + const auto r_index = static_cast<std::size_t>(rhs); + return l_index < order.size() and r_index < order.size() and order[l_index] < order[r_index]; +} +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/conversions/from_json.hpp> + + +#include <algorithm> // transform +#include <array> // array +#include <ciso646> // and, not +#include <forward_list> // forward_list +#include <iterator> // inserter, front_inserter, end +#include <map> // map +#include <string> // string +#include <tuple> // tuple, make_tuple +#include <type_traits> // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include <unordered_map> // unordered_map +#include <utility> // pair, declval +#include <valarray> // valarray + +// #include <nlohmann/detail/exceptions.hpp> + +// #include <nlohmann/detail/macro_scope.hpp> + +// #include <nlohmann/detail/meta/cpp_future.hpp> + +// #include <nlohmann/detail/meta/type_traits.hpp> + +// #include <nlohmann/detail/value_t.hpp> + + +namespace nlohmann +{ +namespace detail +{ +template<typename BasicJsonType> +void from_json(const BasicJsonType& j, typename std::nullptr_t& n) +{ + if (JSON_UNLIKELY(not j.is_null())) + { + JSON_THROW(type_error::create(302, "type must be null, but is " + std::string(j.type_name()))); + } + n = nullptr; +} + +// overloads for basic_json template parameters +template<typename BasicJsonType, typename ArithmeticType, + enable_if_t<std::is_arithmetic<ArithmeticType>::value and + not std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value, + int> = 0> +void get_arithmetic_value(const BasicJsonType& j, ArithmeticType& val) +{ + switch (static_cast<value_t>(j)) + { + case value_t::number_unsigned: + { + val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>()); + break; + } + case value_t::number_integer: + { + val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>()); + break; + } + case value_t::number_float: + { + val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>()); + break; + } + + default: + JSON_THROW(type_error::create(302, "type must be number, but is " + std::string(j.type_name()))); + } +} + +template<typename BasicJsonType> +void from_json(const BasicJsonType& j, typename BasicJsonType::boolean_t& b) +{ + if (JSON_UNLIKELY(not j.is_boolean())) + { + JSON_THROW(type_error::create(302, "type must be boolean, but is " + std::string(j.type_name()))); + } + b = *j.template get_ptr<const typename BasicJsonType::boolean_t*>(); +} + +template<typename BasicJsonType> +void from_json(const BasicJsonType& j, typename BasicJsonType::string_t& s) +{ + if (JSON_UNLIKELY(not j.is_string())) + { + JSON_THROW(type_error::create(302, "type must be string, but is " + std::string(j.type_name()))); + } + s = *j.template get_ptr<const typename BasicJsonType::string_t*>(); +} + +template < + typename BasicJsonType, typename ConstructibleStringType, + enable_if_t < + is_constructible_string_type<BasicJsonType, ConstructibleStringType>::value and + not std::is_same<typename BasicJsonType::string_t, + ConstructibleStringType>::value, + int > = 0 > +void from_json(const BasicJsonType& j, ConstructibleStringType& s) +{ + if (JSON_UNLIKELY(not j.is_string())) + { + JSON_THROW(type_error::create(302, "type must be string, but is " + std::string(j.type_name()))); + } + + s = *j.template get_ptr<const typename BasicJsonType::string_t*>(); +} + +template<typename BasicJsonType> +void from_json(const BasicJsonType& j, typename BasicJsonType::number_float_t& val) +{ + get_arithmetic_value(j, val); +} + +template<typename BasicJsonType> +void from_json(const BasicJsonType& j, typename BasicJsonType::number_unsigned_t& val) +{ + get_arithmetic_value(j, val); +} + +template<typename BasicJsonType> +void from_json(const BasicJsonType& j, typename BasicJsonType::number_integer_t& val) +{ + get_arithmetic_value(j, val); +} + +template<typename BasicJsonType, typename EnumType, + enable_if_t<std::is_enum<EnumType>::value, int> = 0> +void from_json(const BasicJsonType& j, EnumType& e) +{ + typename std::underlying_type<EnumType>::type val; + get_arithmetic_value(j, val); + e = static_cast<EnumType>(val); +} + +// forward_list doesn't have an insert method +template<typename BasicJsonType, typename T, typename Allocator, + enable_if_t<std::is_convertible<BasicJsonType, T>::value, int> = 0> +void from_json(const BasicJsonType& j, std::forward_list<T, Allocator>& l) +{ + if (JSON_UNLIKELY(not j.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); + } + std::transform(j.rbegin(), j.rend(), + std::front_inserter(l), [](const BasicJsonType & i) + { + return i.template get<T>(); + }); +} + +// valarray doesn't have an insert method +template<typename BasicJsonType, typename T, + enable_if_t<std::is_convertible<BasicJsonType, T>::value, int> = 0> +void from_json(const BasicJsonType& j, std::valarray<T>& l) +{ + if (JSON_UNLIKELY(not j.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); + } + l.resize(j.size()); + std::copy(j.m_value.array->begin(), j.m_value.array->end(), std::begin(l)); +} + +template<typename BasicJsonType> +void from_json_array_impl(const BasicJsonType& j, typename BasicJsonType::array_t& arr, priority_tag<3> /*unused*/) +{ + arr = *j.template get_ptr<const typename BasicJsonType::array_t*>(); +} + +template <typename BasicJsonType, typename T, std::size_t N> +auto from_json_array_impl(const BasicJsonType& j, std::array<T, N>& arr, + priority_tag<2> /*unused*/) +-> decltype(j.template get<T>(), void()) +{ + for (std::size_t i = 0; i < N; ++i) + { + arr[i] = j.at(i).template get<T>(); + } +} + +template<typename BasicJsonType, typename ConstructibleArrayType> +auto from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr, priority_tag<1> /*unused*/) +-> decltype( + arr.reserve(std::declval<typename ConstructibleArrayType::size_type>()), + j.template get<typename ConstructibleArrayType::value_type>(), + void()) +{ + using std::end; + + arr.reserve(j.size()); + std::transform(j.begin(), j.end(), + std::inserter(arr, end(arr)), [](const BasicJsonType & i) + { + // get<BasicJsonType>() returns *this, this won't call a from_json + // method when value_type is BasicJsonType + return i.template get<typename ConstructibleArrayType::value_type>(); + }); +} + +template <typename BasicJsonType, typename ConstructibleArrayType> +void from_json_array_impl(const BasicJsonType& j, ConstructibleArrayType& arr, + priority_tag<0> /*unused*/) +{ + using std::end; + + std::transform( + j.begin(), j.end(), std::inserter(arr, end(arr)), + [](const BasicJsonType & i) + { + // get<BasicJsonType>() returns *this, this won't call a from_json + // method when value_type is BasicJsonType + return i.template get<typename ConstructibleArrayType::value_type>(); + }); +} + +template <typename BasicJsonType, typename ConstructibleArrayType, + enable_if_t < + is_constructible_array_type<BasicJsonType, ConstructibleArrayType>::value and + not is_constructible_object_type<BasicJsonType, ConstructibleArrayType>::value and + not is_constructible_string_type<BasicJsonType, ConstructibleArrayType>::value and + not is_basic_json<ConstructibleArrayType>::value, + int > = 0 > + +auto from_json(const BasicJsonType& j, ConstructibleArrayType& arr) +-> decltype(from_json_array_impl(j, arr, priority_tag<3> {}), +j.template get<typename ConstructibleArrayType::value_type>(), +void()) +{ + if (JSON_UNLIKELY(not j.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + + std::string(j.type_name()))); + } + + from_json_array_impl(j, arr, priority_tag<3> {}); +} + +template<typename BasicJsonType, typename ConstructibleObjectType, + enable_if_t<is_constructible_object_type<BasicJsonType, ConstructibleObjectType>::value, int> = 0> +void from_json(const BasicJsonType& j, ConstructibleObjectType& obj) +{ + if (JSON_UNLIKELY(not j.is_object())) + { + JSON_THROW(type_error::create(302, "type must be object, but is " + std::string(j.type_name()))); + } + + auto inner_object = j.template get_ptr<const typename BasicJsonType::object_t*>(); + using value_type = typename ConstructibleObjectType::value_type; + std::transform( + inner_object->begin(), inner_object->end(), + std::inserter(obj, obj.begin()), + [](typename BasicJsonType::object_t::value_type const & p) + { + return value_type(p.first, p.second.template get<typename ConstructibleObjectType::mapped_type>()); + }); +} + +// overload for arithmetic types, not chosen for basic_json template arguments +// (BooleanType, etc..); note: Is it really necessary to provide explicit +// overloads for boolean_t etc. in case of a custom BooleanType which is not +// an arithmetic type? +template<typename BasicJsonType, typename ArithmeticType, + enable_if_t < + std::is_arithmetic<ArithmeticType>::value and + not std::is_same<ArithmeticType, typename BasicJsonType::number_unsigned_t>::value and + not std::is_same<ArithmeticType, typename BasicJsonType::number_integer_t>::value and + not std::is_same<ArithmeticType, typename BasicJsonType::number_float_t>::value and + not std::is_same<ArithmeticType, typename BasicJsonType::boolean_t>::value, + int> = 0> +void from_json(const BasicJsonType& j, ArithmeticType& val) +{ + switch (static_cast<value_t>(j)) + { + case value_t::number_unsigned: + { + val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_unsigned_t*>()); + break; + } + case value_t::number_integer: + { + val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_integer_t*>()); + break; + } + case value_t::number_float: + { + val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::number_float_t*>()); + break; + } + case value_t::boolean: + { + val = static_cast<ArithmeticType>(*j.template get_ptr<const typename BasicJsonType::boolean_t*>()); + break; + } + + default: + JSON_THROW(type_error::create(302, "type must be number, but is " + std::string(j.type_name()))); + } +} + +template<typename BasicJsonType, typename A1, typename A2> +void from_json(const BasicJsonType& j, std::pair<A1, A2>& p) +{ + p = {j.at(0).template get<A1>(), j.at(1).template get<A2>()}; +} + +template<typename BasicJsonType, typename Tuple, std::size_t... Idx> +void from_json_tuple_impl(const BasicJsonType& j, Tuple& t, index_sequence<Idx...> /*unused*/) +{ + t = std::make_tuple(j.at(Idx).template get<typename std::tuple_element<Idx, Tuple>::type>()...); +} + +template<typename BasicJsonType, typename... Args> +void from_json(const BasicJsonType& j, std::tuple<Args...>& t) +{ + from_json_tuple_impl(j, t, index_sequence_for<Args...> {}); +} + +template <typename BasicJsonType, typename Key, typename Value, typename Compare, typename Allocator, + typename = enable_if_t<not std::is_constructible< + typename BasicJsonType::string_t, Key>::value>> +void from_json(const BasicJsonType& j, std::map<Key, Value, Compare, Allocator>& m) +{ + if (JSON_UNLIKELY(not j.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); + } + for (const auto& p : j) + { + if (JSON_UNLIKELY(not p.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(p.type_name()))); + } + m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>()); + } +} + +template <typename BasicJsonType, typename Key, typename Value, typename Hash, typename KeyEqual, typename Allocator, + typename = enable_if_t<not std::is_constructible< + typename BasicJsonType::string_t, Key>::value>> +void from_json(const BasicJsonType& j, std::unordered_map<Key, Value, Hash, KeyEqual, Allocator>& m) +{ + if (JSON_UNLIKELY(not j.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(j.type_name()))); + } + for (const auto& p : j) + { + if (JSON_UNLIKELY(not p.is_array())) + { + JSON_THROW(type_error::create(302, "type must be array, but is " + std::string(p.type_name()))); + } + m.emplace(p.at(0).template get<Key>(), p.at(1).template get<Value>()); + } +} + +struct from_json_fn +{ + template<typename BasicJsonType, typename T> + auto operator()(const BasicJsonType& j, T& val) const + noexcept(noexcept(from_json(j, val))) + -> decltype(from_json(j, val), void()) + { + return from_json(j, val); + } +}; +} // namespace detail + +/// namespace to hold default `from_json` function +/// to see why this is required: +/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4381.html +namespace +{ +constexpr const auto& from_json = detail::static_const<detail::from_json_fn>::value; +} // namespace +} // namespace nlohmann + +// #include <nlohmann/detail/conversions/to_json.hpp> + + +#include <ciso646> // or, and, not +#include <iterator> // begin, end +#include <tuple> // tuple, get +#include <type_traits> // is_same, is_constructible, is_floating_point, is_enum, underlying_type +#include <utility> // move, forward, declval, pair +#include <valarray> // valarray +#include <vector> // vector + +// #include <nlohmann/detail/meta/cpp_future.hpp> + +// #include <nlohmann/detail/meta/type_traits.hpp> + +// #include <nlohmann/detail/value_t.hpp> + +// #include <nlohmann/detail/iterators/iteration_proxy.hpp> + + +#include <cstddef> // size_t +#include <string> // string, to_string +#include <iterator> // input_iterator_tag +#include <tuple> // tuple_size, get, tuple_element + +// #include <nlohmann/detail/value_t.hpp> + +// #include <nlohmann/detail/meta/type_traits.hpp> + + +namespace nlohmann +{ +namespace detail +{ +template <typename IteratorType> class iteration_proxy_value +{ + public: + using difference_type = std::ptrdiff_t; + using value_type = iteration_proxy_value; + using pointer = value_type * ; + using reference = value_type & ; + using iterator_category = std::input_iterator_tag; + + private: + /// the iterator + IteratorType anchor; + /// an index for arrays (used to create key names) + std::size_t array_index = 0; + /// last stringified array index + mutable std::size_t array_index_last = 0; + /// a string representation of the array index + mutable std::string array_index_str = "0"; + /// an empty string (to return a reference for primitive values) + const std::string empty_str = ""; + + public: + explicit iteration_proxy_value(IteratorType it) noexcept : anchor(it) {} + + /// dereference operator (needed for range-based for) + iteration_proxy_value& operator*() + { + return *this; + } + + /// increment operator (needed for range-based for) + iteration_proxy_value& operator++() + { + ++anchor; + ++array_index; + + return *this; + } + + /// equality operator (needed for InputIterator) + bool operator==(const iteration_proxy_value& o) const noexcept + { + return anchor == o.anchor; + } + + /// inequality operator (needed for range-based for) + bool operator!=(const iteration_proxy_value& o) const noexcept + { + return anchor != o.anchor; + } + + /// return key of the iterator + const std::string& key() const + { + assert(anchor.m_object != nullptr); + + switch (anchor.m_object->type()) + { + // use integer array index as key + case value_t::array: + { + if (array_index != array_index_last) + { + array_index_str = std::to_string(array_index); + array_index_last = array_index; + } + return array_index_str; + } + + // use key from the object + case value_t::object: + return anchor.key(); + + // use an empty key for all primitive types + default: + return empty_str; + } + } + + /// return value of the iterator + typename IteratorType::reference value() const + { + return anchor.value(); + } +}; + +/// proxy class for the items() function +template<typename IteratorType> class iteration_proxy +{ + private: + /// the container to iterate + typename IteratorType::reference container; + + public: + /// construct iteration proxy from a container + explicit iteration_proxy(typename IteratorType::reference cont) noexcept + : container(cont) {} + + /// return iterator begin (needed for range-based for) + iteration_proxy_value<IteratorType> begin() noexcept + { + return iteration_proxy_value<IteratorType>(container.begin()); + } + + /// return iterator end (needed for range-based for) + iteration_proxy_value<IteratorType> end() noexcept + { + return iteration_proxy_value<IteratorType>(container.end()); + } +}; +// Structured Bindings Support +// For further reference see https://blog.tartanllama.xyz/structured-bindings/ +// And see https://github.com/nlohmann/json/pull/1391 +template <std::size_t N, typename IteratorType, enable_if_t<N == 0, int> = 0> +auto get(const nlohmann::detail::iteration_proxy_value<IteratorType>& i) -> decltype(i.key()) +{ + return i.key(); +} +// Structured Bindings Support +// For further reference see https://blog.tartanllama.xyz/structured-bindings/ +// And see https://github.com/nlohmann/json/pull/1391 +template <std::size_t N, typename IteratorType, enable_if_t<N == 1, int> = 0> +auto get(const nlohmann::detail::iteration_proxy_value<IteratorType>& i) -> decltype(i.value()) +{ + return i.value(); +} +} // namespace detail +} // namespace nlohmann + +// The Addition to the STD Namespace is required to add +// Structured Bindings Support to the iteration_proxy_value class +// For further reference see https://blog.tartanllama.xyz/structured-bindings/ +// And see https://github.com/nlohmann/json/pull/1391 +namespace std +{ +template <typename IteratorType> +class tuple_size<::nlohmann::detail::iteration_proxy_value<IteratorType>> + : public std::integral_constant<std::size_t, 2> {}; + +template <std::size_t N, typename IteratorType> +class tuple_element<N, ::nlohmann::detail::iteration_proxy_value<IteratorType >> +{ + public: + using type = decltype( + get<N>(std::declval < + ::nlohmann::detail::iteration_proxy_value<IteratorType >> ())); +}; +} + +namespace nlohmann +{ +namespace detail +{ +////////////////// +// constructors // +////////////////// + +template<value_t> struct external_constructor; + +template<> +struct external_constructor<value_t::boolean> +{ + template<typename BasicJsonType> + static void construct(BasicJsonType& j, typename BasicJsonType::boolean_t b) noexcept + { + j.m_type = value_t::boolean; + j.m_value = b; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor<value_t::string> +{ + template<typename BasicJsonType> + static void construct(BasicJsonType& j, const typename BasicJsonType::string_t& s) + { + j.m_type = value_t::string; + j.m_value = s; + j.assert_invariant(); + } + + template<typename BasicJsonType> + static void construct(BasicJsonType& j, typename BasicJsonType::string_t&& s) + { + j.m_type = value_t::string; + j.m_value = std::move(s); + j.assert_invariant(); + } + + template<typename BasicJsonType, typename CompatibleStringType, + enable_if_t<not std::is_same<CompatibleStringType, typename BasicJsonType::string_t>::value, + int> = 0> + static void construct(BasicJsonType& j, const CompatibleStringType& str) + { + j.m_type = value_t::string; + j.m_value.string = j.template create<typename BasicJsonType::string_t>(str); + j.assert_invariant(); + } +}; + +template<> +struct external_constructor<value_t::number_float> +{ + template<typename BasicJsonType> + static void construct(BasicJsonType& j, typename BasicJsonType::number_float_t val) noexcept + { + j.m_type = value_t::number_float; + j.m_value = val; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor<value_t::number_unsigned> +{ + template<typename BasicJsonType> + static void construct(BasicJsonType& j, typename BasicJsonType::number_unsigned_t val) noexcept + { + j.m_type = value_t::number_unsigned; + j.m_value = val; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor<value_t::number_integer> +{ + template<typename BasicJsonType> + static void construct(BasicJsonType& j, typename BasicJsonType::number_integer_t val) noexcept + { + j.m_type = value_t::number_integer; + j.m_value = val; + j.assert_invariant(); + } +}; + +template<> +struct external_constructor<value_t::array> +{ + template<typename BasicJsonType> + static void construct(BasicJsonType& j, const typename BasicJsonType::array_t& arr) + { + j.m_type = value_t::array; + j.m_value = arr; + j.assert_invariant(); + } + + template<typename BasicJsonType> + static void construct(BasicJsonType& j, typename BasicJsonType::array_t&& arr) + { + j.m_type = value_t::array; + j.m_value = std::move(arr); + j.assert_invariant(); + } + + template<typename BasicJsonType, typename CompatibleArrayType, + enable_if_t<not std::is_same<CompatibleArrayType, typename BasicJsonType::array_t>::value, + int> = 0> + static void construct(BasicJsonType& j, const CompatibleArrayType& arr) + { + using std::begin; + using std::end; + j.m_type = value_t::array; + j.m_value.array = j.template create<typename BasicJsonType::array_t>(begin(arr), end(arr)); + j.assert_invariant(); + } + + template<typename BasicJsonType> + static void construct(BasicJsonType& j, const std::vector<bool>& arr) + { + j.m_type = value_t::array; + j.m_value = value_t::array; + j.m_value.array->reserve(arr.size()); + for (const bool x : arr) + { + j.m_value.array->push_back(x); + } + j.assert_invariant(); + } + + template<typename BasicJsonType, typename T, + enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0> + static void construct(BasicJsonType& j, const std::valarray<T>& arr) + { + j.m_type = value_t::array; + j.m_value = value_t::array; + j.m_value.array->resize(arr.size()); + std::copy(std::begin(arr), std::end(arr), j.m_value.array->begin()); + j.assert_invariant(); + } +}; + +template<> +struct external_constructor<value_t::object> +{ + template<typename BasicJsonType> + static void construct(BasicJsonType& j, const typename BasicJsonType::object_t& obj) + { + j.m_type = value_t::object; + j.m_value = obj; + j.assert_invariant(); + } + + template<typename BasicJsonType> + static void construct(BasicJsonType& j, typename BasicJsonType::object_t&& obj) + { + j.m_type = value_t::object; + j.m_value = std::move(obj); + j.assert_invariant(); + } + + template<typename BasicJsonType, typename CompatibleObjectType, + enable_if_t<not std::is_same<CompatibleObjectType, typename BasicJsonType::object_t>::value, int> = 0> + static void construct(BasicJsonType& j, const CompatibleObjectType& obj) + { + using std::begin; + using std::end; + + j.m_type = value_t::object; + j.m_value.object = j.template create<typename BasicJsonType::object_t>(begin(obj), end(obj)); + j.assert_invariant(); + } +}; + +///////////// +// to_json // +///////////// + +template<typename BasicJsonType, typename T, + enable_if_t<std::is_same<T, typename BasicJsonType::boolean_t>::value, int> = 0> +void to_json(BasicJsonType& j, T b) noexcept +{ + external_constructor<value_t::boolean>::construct(j, b); +} + +template<typename BasicJsonType, typename CompatibleString, + enable_if_t<std::is_constructible<typename BasicJsonType::string_t, CompatibleString>::value, int> = 0> +void to_json(BasicJsonType& j, const CompatibleString& s) +{ + external_constructor<value_t::string>::construct(j, s); +} + +template<typename BasicJsonType> +void to_json(BasicJsonType& j, typename BasicJsonType::string_t&& s) +{ + external_constructor<value_t::string>::construct(j, std::move(s)); +} + +template<typename BasicJsonType, typename FloatType, + enable_if_t<std::is_floating_point<FloatType>::value, int> = 0> +void to_json(BasicJsonType& j, FloatType val) noexcept +{ + external_constructor<value_t::number_float>::construct(j, static_cast<typename BasicJsonType::number_float_t>(val)); +} + +template<typename BasicJsonType, typename CompatibleNumberUnsignedType, + enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_unsigned_t, CompatibleNumberUnsignedType>::value, int> = 0> +void to_json(BasicJsonType& j, CompatibleNumberUnsignedType val) noexcept +{ + external_constructor<value_t::number_unsigned>::construct(j, static_cast<typename BasicJsonType::number_unsigned_t>(val)); +} + +template<typename BasicJsonType, typename CompatibleNumberIntegerType, + enable_if_t<is_compatible_integer_type<typename BasicJsonType::number_integer_t, CompatibleNumberIntegerType>::value, int> = 0> +void to_json(BasicJsonType& j, CompatibleNumberIntegerType val) noexcept +{ + external_constructor<value_t::number_integer>::construct(j, static_cast<typename BasicJsonType::number_integer_t>(val)); +} + +template<typename BasicJsonType, typename EnumType, + enable_if_t<std::is_enum<EnumType>::value, int> = 0> +void to_json(BasicJsonType& j, EnumType e) noexcept +{ + using underlying_type = typename std::underlying_type<EnumType>::type; + external_constructor<value_t::number_integer>::construct(j, static_cast<underlying_type>(e)); +} + +template<typename BasicJsonType> +void to_json(BasicJsonType& j, const std::vector<bool>& e) +{ + external_constructor<value_t::array>::construct(j, e); +} + +template <typename BasicJsonType, typename CompatibleArrayType, + enable_if_t<is_compatible_array_type<BasicJsonType, + CompatibleArrayType>::value and + not is_compatible_object_type< + BasicJsonType, CompatibleArrayType>::value and + not is_compatible_string_type<BasicJsonType, CompatibleArrayType>::value and + not is_basic_json<CompatibleArrayType>::value, + int> = 0> +void to_json(BasicJsonType& j, const CompatibleArrayType& arr) +{ + external_constructor<value_t::array>::construct(j, arr); +} + +template<typename BasicJsonType, typename T, + enable_if_t<std::is_convertible<T, BasicJsonType>::value, int> = 0> +void to_json(BasicJsonType& j, const std::valarray<T>& arr) +{ + external_constructor<value_t::array>::construct(j, std::move(arr)); +} + +template<typename BasicJsonType> +void to_json(BasicJsonType& j, typename BasicJsonType::array_t&& arr) +{ + external_constructor<value_t::array>::construct(j, std::move(arr)); +} + +template<typename BasicJsonType, typename CompatibleObjectType, + enable_if_t<is_compatible_object_type<BasicJsonType, CompatibleObjectType>::value and not is_basic_json<CompatibleObjectType>::value, int> = 0> +void to_json(BasicJsonType& j, const CompatibleObjectType& obj) +{ + external_constructor<value_t::object>::construct(j, obj); +} + +template<typename BasicJsonType> +void to_json(BasicJsonType& j, typename BasicJsonType::object_t&& obj) +{ + external_constructor<value_t::object>::construct(j, std::move(obj)); +} + +template < + typename BasicJsonType, typename T, std::size_t N, + enable_if_t<not std::is_constructible<typename BasicJsonType::string_t, + const T(&)[N]>::value, + int> = 0 > +void to_json(BasicJsonType& j, const T(&arr)[N]) +{ + external_constructor<value_t::array>::construct(j, arr); +} + +template<typename BasicJsonType, typename... Args> +void to_json(BasicJsonType& j, const std::pair<Args...>& p) +{ + j = { p.first, p.second }; +} + +// for https://github.com/nlohmann/json/pull/1134 +template < typename BasicJsonType, typename T, + enable_if_t<std::is_same<T, iteration_proxy_value<typename BasicJsonType::iterator>>::value, int> = 0> +void to_json(BasicJsonType& j, const T& b) +{ + j = { {b.key(), b.value()} }; +} + +template<typename BasicJsonType, typename Tuple, std::size_t... Idx> +void to_json_tuple_impl(BasicJsonType& j, const Tuple& t, index_sequence<Idx...> /*unused*/) +{ + j = { std::get<Idx>(t)... }; +} + +template<typename BasicJsonType, typename... Args> +void to_json(BasicJsonType& j, const std::tuple<Args...>& t) +{ + to_json_tuple_impl(j, t, index_sequence_for<Args...> {}); +} + +struct to_json_fn +{ + template<typename BasicJsonType, typename T> + auto operator()(BasicJsonType& j, T&& val) const noexcept(noexcept(to_json(j, std::forward<T>(val)))) + -> decltype(to_json(j, std::forward<T>(val)), void()) + { + return to_json(j, std::forward<T>(val)); + } +}; +} // namespace detail + +/// namespace to hold default `to_json` function +namespace +{ +constexpr const auto& to_json = detail::static_const<detail::to_json_fn>::value; +} // namespace +} // namespace nlohmann + +// #include <nlohmann/detail/input/input_adapters.hpp> + + +#include <cassert> // assert +#include <cstddef> // size_t +#include <cstring> // strlen +#include <istream> // istream +#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next +#include <memory> // shared_ptr, make_shared, addressof +#include <numeric> // accumulate +#include <string> // string, char_traits +#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer +#include <utility> // pair, declval +#include <cstdio> //FILE * + +// #include <nlohmann/detail/macro_scope.hpp> + + +namespace nlohmann +{ +namespace detail +{ +/// the supported input formats +enum class input_format_t { json, cbor, msgpack, ubjson, bson }; + +//////////////////// +// input adapters // +//////////////////// + +/*! +@brief abstract input adapter interface + +Produces a stream of std::char_traits<char>::int_type characters from a +std::istream, a buffer, or some other input type. Accepts the return of +exactly one non-EOF character for future input. The int_type characters +returned consist of all valid char values as positive values (typically +unsigned char), plus an EOF value outside that range, specified by the value +of the function std::char_traits<char>::eof(). This value is typically -1, but +could be any arbitrary value which is not a valid char value. +*/ +struct input_adapter_protocol +{ + /// get a character [0,255] or std::char_traits<char>::eof(). + virtual std::char_traits<char>::int_type get_character() = 0; + virtual ~input_adapter_protocol() = default; +}; + +/// a type to simplify interfaces +using input_adapter_t = std::shared_ptr<input_adapter_protocol>; + +/*! +Input adapter for stdio file access. This adapter read only 1 byte and do not use any + buffer. This adapter is a very low level adapter. +*/ +class file_input_adapter : public input_adapter_protocol +{ + public: + explicit file_input_adapter(std::FILE* f) noexcept + : m_file(f) + {} + + std::char_traits<char>::int_type get_character() noexcept override + { + return std::fgetc(m_file); + } + private: + /// the file pointer to read from + std::FILE* m_file; +}; + + +/*! +Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at +beginning of input. Does not support changing the underlying std::streambuf +in mid-input. Maintains underlying std::istream and std::streambuf to support +subsequent use of standard std::istream operations to process any input +characters following those used in parsing the JSON input. Clears the +std::istream flags; any input errors (e.g., EOF) will be detected by the first +subsequent call for input from the std::istream. +*/ +class input_stream_adapter : public input_adapter_protocol +{ + public: + ~input_stream_adapter() override + { + // clear stream flags; we use underlying streambuf I/O, do not + // maintain ifstream flags, except eof + is.clear(is.rdstate() & std::ios::eofbit); + } + + explicit input_stream_adapter(std::istream& i) + : is(i), sb(*i.rdbuf()) + {} + + // delete because of pointer members + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(input_stream_adapter&) = delete; + input_stream_adapter(input_stream_adapter&&) = delete; + input_stream_adapter& operator=(input_stream_adapter&&) = delete; + + // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to + // ensure that std::char_traits<char>::eof() and the character 0xFF do not + // end up as the same value, eg. 0xFFFFFFFF. + std::char_traits<char>::int_type get_character() override + { + auto res = sb.sbumpc(); + // set eof manually, as we don't use the istream interface. + if (res == EOF) + { + is.clear(is.rdstate() | std::ios::eofbit); + } + return res; + } + + private: + /// the associated input stream + std::istream& is; + std::streambuf& sb; +}; + +/// input adapter for buffer input +class input_buffer_adapter : public input_adapter_protocol +{ + public: + input_buffer_adapter(const char* b, const std::size_t l) noexcept + : cursor(b), limit(b + l) + {} + + // delete because of pointer members + input_buffer_adapter(const input_buffer_adapter&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + input_buffer_adapter(input_buffer_adapter&&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; + ~input_buffer_adapter() override = default; + + std::char_traits<char>::int_type get_character() noexcept override + { + if (JSON_LIKELY(cursor < limit)) + { + return std::char_traits<char>::to_int_type(*(cursor++)); + } + + return std::char_traits<char>::eof(); + } + + private: + /// pointer to the current character + const char* cursor; + /// pointer past the last character + const char* const limit; +}; + +template<typename WideStringType, size_t T> +struct wide_string_input_helper +{ + // UTF-32 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits<char>::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const auto wc = static_cast<int>(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } +}; + +template<typename WideStringType> +struct wide_string_input_helper<WideStringType, 2> +{ + // UTF-16 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits<char>::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const auto wc = static_cast<int>(str[current_wchar++]); + + // UTF-16 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6)); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (0xD800 > wc or wc >= 0xE000) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12)); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else + { + if (current_wchar < str.size()) + { + const auto wc2 = static_cast<int>(str[current_wchar++]); + const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); + utf8_bytes[0] = 0xf0 | (charcode >> 18); + utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + ++current_wchar; + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } + } +}; + +template<typename WideStringType> +class wide_string_input_adapter : public input_adapter_protocol +{ + public: + explicit wide_string_input_adapter(const WideStringType& w) noexcept + : str(w) + {} + + std::char_traits<char>::int_type get_character() noexcept override + { + // check if buffer needs to be filled + if (utf8_bytes_index == utf8_bytes_filled) + { + fill_buffer<sizeof(typename WideStringType::value_type)>(); + + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index == 0); + } + + // use buffer + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index < utf8_bytes_filled); + return utf8_bytes[utf8_bytes_index++]; + } + + private: + template<size_t T> + void fill_buffer() + { + wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); + } + + /// the wstring to process + const WideStringType& str; + + /// index of the current wchar in str + std::size_t current_wchar = 0; + + /// a buffer for UTF-8 bytes + std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; + + /// index to the utf8_codes array for the next valid byte + std::size_t utf8_bytes_index = 0; + /// number of valid bytes in the utf8_codes array + std::size_t utf8_bytes_filled = 0; +}; + +class input_adapter +{ + public: + // native support + input_adapter(std::FILE* file) + : ia(std::make_shared<file_input_adapter>(file)) {} + /// input adapter for input stream + input_adapter(std::istream& i) + : ia(std::make_shared<input_stream_adapter>(i)) {} + + /// input adapter for input stream + input_adapter(std::istream&& i) + : ia(std::make_shared<input_stream_adapter>(i)) {} + + input_adapter(const std::wstring& ws) + : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {} + + input_adapter(const std::u16string& ws) + : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {} + + input_adapter(const std::u32string& ws) + : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {} + + /// input adapter for buffer + template<typename CharT, + typename std::enable_if< + std::is_pointer<CharT>::value and + std::is_integral<typename std::remove_pointer<CharT>::type>::value and + sizeof(typename std::remove_pointer<CharT>::type) == 1, + int>::type = 0> + input_adapter(CharT b, std::size_t l) + : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {} + + // derived support + + /// input adapter for string literal + template<typename CharT, + typename std::enable_if< + std::is_pointer<CharT>::value and + std::is_integral<typename std::remove_pointer<CharT>::type>::value and + sizeof(typename std::remove_pointer<CharT>::type) == 1, + int>::type = 0> + input_adapter(CharT b) + : input_adapter(reinterpret_cast<const char*>(b), + std::strlen(reinterpret_cast<const char*>(b))) {} + + /// input adapter for iterator range with contiguous storage + template<class IteratorType, + typename std::enable_if< + std::is_same<typename iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value, + int>::type = 0> + input_adapter(IteratorType first, IteratorType last) + { +#ifndef NDEBUG + // assertion to check that the iterator range is indeed contiguous, + // see http://stackoverflow.com/a/35008842/266378 for more discussion + const auto is_contiguous = std::accumulate( + first, last, std::pair<bool, int>(true, 0), + [&first](std::pair<bool, int> res, decltype(*first) val) + { + res.first &= (val == *(std::next(std::addressof(*first), res.second++))); + return res; + }).first; + assert(is_contiguous); +#endif + + // assertion to check that each element is 1 byte long + static_assert( + sizeof(typename iterator_traits<IteratorType>::value_type) == 1, + "each element in the iterator range must have the size of 1 byte"); + + const auto len = static_cast<size_t>(std::distance(first, last)); + if (JSON_LIKELY(len > 0)) + { + // there is at least one element: use the address of first + ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len); + } + else + { + // the address of first cannot be used: use nullptr + ia = std::make_shared<input_buffer_adapter>(nullptr, len); + } + } + + /// input adapter for array + template<class T, std::size_t N> + input_adapter(T (&array)[N]) + : input_adapter(std::begin(array), std::end(array)) {} + + /// input adapter for contiguous container + template<class ContiguousContainer, typename + std::enable_if<not std::is_pointer<ContiguousContainer>::value and + std::is_base_of<std::random_access_iterator_tag, typename iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value, + int>::type = 0> + input_adapter(const ContiguousContainer& c) + : input_adapter(std::begin(c), std::end(c)) {} + + operator input_adapter_t() + { + return ia; + } + + private: + /// the actual adapter + input_adapter_t ia = nullptr; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/input/lexer.hpp> + + +#include <clocale> // localeconv +#include <cstddef> // size_t +#include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull +#include <cstdio> // snprintf +#include <initializer_list> // initializer_list +#include <string> // char_traits, string +#include <vector> // vector + +// #include <nlohmann/detail/macro_scope.hpp> + +// #include <nlohmann/detail/input/input_adapters.hpp> + +// #include <nlohmann/detail/input/position_t.hpp> + + +namespace nlohmann +{ +namespace detail +{ +/////////// +// lexer // +/////////// + +/*! +@brief lexical analysis + +This class organizes the lexical analysis during JSON deserialization. +*/ +template<typename BasicJsonType> +class lexer +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + + public: + /// token types for the parser + enum class token_type + { + uninitialized, ///< indicating the scanner is uninitialized + literal_true, ///< the `true` literal + literal_false, ///< the `false` literal + literal_null, ///< the `null` literal + value_string, ///< a string -- use get_string() for actual value + value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value + value_integer, ///< a signed integer -- use get_number_integer() for actual value + value_float, ///< an floating point number -- use get_number_float() for actual value + begin_array, ///< the character for array begin `[` + begin_object, ///< the character for object begin `{` + end_array, ///< the character for array end `]` + end_object, ///< the character for object end `}` + name_separator, ///< the name separator `:` + value_separator, ///< the value separator `,` + parse_error, ///< indicating a parse error + end_of_input, ///< indicating the end of the input buffer + literal_or_value ///< a literal or the begin of a value (only for diagnostics) + }; + + /// return name of values of type token_type (only used for errors) + static const char* token_type_name(const token_type t) noexcept + { + switch (t) + { + case token_type::uninitialized: + return "<uninitialized>"; + case token_type::literal_true: + return "true literal"; + case token_type::literal_false: + return "false literal"; + case token_type::literal_null: + return "null literal"; + case token_type::value_string: + return "string literal"; + case lexer::token_type::value_unsigned: + case lexer::token_type::value_integer: + case lexer::token_type::value_float: + return "number literal"; + case token_type::begin_array: + return "'['"; + case token_type::begin_object: + return "'{'"; + case token_type::end_array: + return "']'"; + case token_type::end_object: + return "'}'"; + case token_type::name_separator: + return "':'"; + case token_type::value_separator: + return "','"; + case token_type::parse_error: + return "<parse error>"; + case token_type::end_of_input: + return "end of input"; + case token_type::literal_or_value: + return "'[', '{', or a literal"; + // LCOV_EXCL_START + default: // catch non-enum values + return "unknown token"; + // LCOV_EXCL_STOP + } + } + + explicit lexer(detail::input_adapter_t&& adapter) + : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} + + // delete because of pointer members + lexer(const lexer&) = delete; + lexer(lexer&&) = delete; + lexer& operator=(lexer&) = delete; + lexer& operator=(lexer&&) = delete; + ~lexer() = default; + + private: + ///////////////////// + // locales + ///////////////////// + + /// return the locale-dependent decimal point + static char get_decimal_point() noexcept + { + const auto loc = localeconv(); + assert(loc != nullptr); + return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); + } + + ///////////////////// + // scan functions + ///////////////////// + + /*! + @brief get codepoint from 4 hex characters following `\u` + + For input "\u c1 c2 c3 c4" the codepoint is: + (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 + = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) + + Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' + must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The + conversion is done by subtracting the offset (0x30, 0x37, and 0x57) + between the ASCII value of the character and the desired integer value. + + @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or + non-hex character) + */ + int get_codepoint() + { + // this function only makes sense after reading `\u` + assert(current == 'u'); + int codepoint = 0; + + const auto factors = { 12, 8, 4, 0 }; + for (const auto factor : factors) + { + get(); + + if (current >= '0' and current <= '9') + { + codepoint += ((current - 0x30) << factor); + } + else if (current >= 'A' and current <= 'F') + { + codepoint += ((current - 0x37) << factor); + } + else if (current >= 'a' and current <= 'f') + { + codepoint += ((current - 0x57) << factor); + } + else + { + return -1; + } + } + + assert(0x0000 <= codepoint and codepoint <= 0xFFFF); + return codepoint; + } + + /*! + @brief check if the next byte(s) are inside a given range + + Adds the current byte and, for each passed range, reads a new byte and + checks if it is inside the range. If a violation was detected, set up an + error message and return false. Otherwise, return true. + + @param[in] ranges list of integers; interpreted as list of pairs of + inclusive lower and upper bound, respectively + + @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, + 1, 2, or 3 pairs. This precondition is enforced by an assertion. + + @return true if and only if no range violation was detected + */ + bool next_byte_in_range(std::initializer_list<int> ranges) + { + assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6); + add(current); + + for (auto range = ranges.begin(); range != ranges.end(); ++range) + { + get(); + if (JSON_LIKELY(*range <= current and current <= *(++range))) + { + add(current); + } + else + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return false; + } + } + + return true; + } + + /*! + @brief scan a string literal + + This function scans a string according to Sect. 7 of RFC 7159. While + scanning, bytes are escaped and copied into buffer token_buffer. Then the + function returns successfully, token_buffer is *not* null-terminated (as it + may contain \0 bytes), and token_buffer.size() is the number of bytes in the + string. + + @return token_type::value_string if string could be successfully scanned, + token_type::parse_error otherwise + + @note In case of errors, variable error_message contains a textual + description. + */ + token_type scan_string() + { + // reset token_buffer (ignore opening quote) + reset(); + + // we entered the function by reading an open quote + assert(current == '\"'); + + while (true) + { + // get next character + switch (get()) + { + // end of file while parsing string + case std::char_traits<char>::eof(): + { + error_message = "invalid string: missing closing quote"; + return token_type::parse_error; + } + + // closing quote + case '\"': + { + return token_type::value_string; + } + + // escapes + case '\\': + { + switch (get()) + { + // quotation mark + case '\"': + add('\"'); + break; + // reverse solidus + case '\\': + add('\\'); + break; + // solidus + case '/': + add('/'); + break; + // backspace + case 'b': + add('\b'); + break; + // form feed + case 'f': + add('\f'); + break; + // line feed + case 'n': + add('\n'); + break; + // carriage return + case 'r': + add('\r'); + break; + // tab + case 't': + add('\t'); + break; + + // unicode escapes + case 'u': + { + const int codepoint1 = get_codepoint(); + int codepoint = codepoint1; // start with codepoint1 + + if (JSON_UNLIKELY(codepoint1 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if code point is a high surrogate + if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF) + { + // expect next \uxxxx entry + if (JSON_LIKELY(get() == '\\' and get() == 'u')) + { + const int codepoint2 = get_codepoint(); + + if (JSON_UNLIKELY(codepoint2 == -1)) + { + error_message = "invalid string: '\\u' must be followed by 4 hex digits"; + return token_type::parse_error; + } + + // check if codepoint2 is a low surrogate + if (JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF)) + { + // overwrite codepoint + codepoint = + // high surrogate occupies the most significant 22 bits + (codepoint1 << 10) + // low surrogate occupies the least significant 15 bits + + codepoint2 + // there is still the 0xD800, 0xDC00 and 0x10000 noise + // in the result so we have to subtract with: + // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 + - 0x35FDC00; + } + else + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; + return token_type::parse_error; + } + } + else + { + if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) + { + error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; + return token_type::parse_error; + } + } + + // result of the above calculation yields a proper codepoint + assert(0x00 <= codepoint and codepoint <= 0x10FFFF); + + // translate codepoint into bytes + if (codepoint < 0x80) + { + // 1-byte characters: 0xxxxxxx (ASCII) + add(codepoint); + } + else if (codepoint <= 0x7FF) + { + // 2-byte characters: 110xxxxx 10xxxxxx + add(0xC0 | (codepoint >> 6)); + add(0x80 | (codepoint & 0x3F)); + } + else if (codepoint <= 0xFFFF) + { + // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx + add(0xE0 | (codepoint >> 12)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + else + { + // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + add(0xF0 | (codepoint >> 18)); + add(0x80 | ((codepoint >> 12) & 0x3F)); + add(0x80 | ((codepoint >> 6) & 0x3F)); + add(0x80 | (codepoint & 0x3F)); + } + + break; + } + + // other characters after escape + default: + error_message = "invalid string: forbidden character after backslash"; + return token_type::parse_error; + } + + break; + } + + // invalid control characters + case 0x00: + { + error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; + return token_type::parse_error; + } + + case 0x01: + { + error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; + return token_type::parse_error; + } + + case 0x02: + { + error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; + return token_type::parse_error; + } + + case 0x03: + { + error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; + return token_type::parse_error; + } + + case 0x04: + { + error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; + return token_type::parse_error; + } + + case 0x05: + { + error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; + return token_type::parse_error; + } + + case 0x06: + { + error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; + return token_type::parse_error; + } + + case 0x07: + { + error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; + return token_type::parse_error; + } + + case 0x08: + { + error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; + return token_type::parse_error; + } + + case 0x09: + { + error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; + return token_type::parse_error; + } + + case 0x0A: + { + error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; + return token_type::parse_error; + } + + case 0x0B: + { + error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; + return token_type::parse_error; + } + + case 0x0C: + { + error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; + return token_type::parse_error; + } + + case 0x0D: + { + error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; + return token_type::parse_error; + } + + case 0x0E: + { + error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; + return token_type::parse_error; + } + + case 0x0F: + { + error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; + return token_type::parse_error; + } + + case 0x10: + { + error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; + return token_type::parse_error; + } + + case 0x11: + { + error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; + return token_type::parse_error; + } + + case 0x12: + { + error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; + return token_type::parse_error; + } + + case 0x13: + { + error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; + return token_type::parse_error; + } + + case 0x14: + { + error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; + return token_type::parse_error; + } + + case 0x15: + { + error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; + return token_type::parse_error; + } + + case 0x16: + { + error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; + return token_type::parse_error; + } + + case 0x17: + { + error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; + return token_type::parse_error; + } + + case 0x18: + { + error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; + return token_type::parse_error; + } + + case 0x19: + { + error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; + return token_type::parse_error; + } + + case 0x1A: + { + error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; + return token_type::parse_error; + } + + case 0x1B: + { + error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; + return token_type::parse_error; + } + + case 0x1C: + { + error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; + return token_type::parse_error; + } + + case 0x1D: + { + error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; + return token_type::parse_error; + } + + case 0x1E: + { + error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; + return token_type::parse_error; + } + + case 0x1F: + { + error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; + return token_type::parse_error; + } + + // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) + case 0x20: + case 0x21: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + { + add(current); + break; + } + + // U+0080..U+07FF: bytes C2..DF 80..BF + case 0xC2: + case 0xC3: + case 0xC4: + case 0xC5: + case 0xC6: + case 0xC7: + case 0xC8: + case 0xC9: + case 0xCA: + case 0xCB: + case 0xCC: + case 0xCD: + case 0xCE: + case 0xCF: + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + case 0xD8: + case 0xD9: + case 0xDA: + case 0xDB: + case 0xDC: + case 0xDD: + case 0xDE: + case 0xDF: + { + if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF}))) + { + return token_type::parse_error; + } + break; + } + + // U+0800..U+0FFF: bytes E0 A0..BF 80..BF + case 0xE0: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF + // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xEE: + case 0xEF: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+D000..U+D7FF: bytes ED 80..9F 80..BF + case 0xED: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF + case 0xF0: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF + case 0xF1: + case 0xF2: + case 0xF3: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF + case 0xF4: + { + if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) + { + return token_type::parse_error; + } + break; + } + + // remaining bytes (80..C1 and F5..FF) are ill-formed + default: + { + error_message = "invalid string: ill-formed UTF-8 byte"; + return token_type::parse_error; + } + } + } + } + + static void strtof(float& f, const char* str, char** endptr) noexcept + { + f = std::strtof(str, endptr); + } + + static void strtof(double& f, const char* str, char** endptr) noexcept + { + f = std::strtod(str, endptr); + } + + static void strtof(long double& f, const char* str, char** endptr) noexcept + { + f = std::strtold(str, endptr); + } + + /*! + @brief scan a number literal + + This function scans a string according to Sect. 6 of RFC 7159. + + The function is realized with a deterministic finite state machine derived + from the grammar described in RFC 7159. Starting in state "init", the + input is read and used to determined the next state. Only state "done" + accepts the number. State "error" is a trap state to model errors. In the + table below, "anything" means any character but the ones listed before. + + state | 0 | 1-9 | e E | + | - | . | anything + ---------|----------|----------|----------|---------|---------|----------|----------- + init | zero | any1 | [error] | [error] | minus | [error] | [error] + minus | zero | any1 | [error] | [error] | [error] | [error] | [error] + zero | done | done | exponent | done | done | decimal1 | done + any1 | any1 | any1 | exponent | done | done | decimal1 | done + decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] + decimal2 | decimal2 | decimal2 | exponent | done | done | done | done + exponent | any2 | any2 | [error] | sign | sign | [error] | [error] + sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] + any2 | any2 | any2 | done | done | done | done | done + + The state machine is realized with one label per state (prefixed with + "scan_number_") and `goto` statements between them. The state machine + contains cycles, but any cycle can be left when EOF is read. Therefore, + the function is guaranteed to terminate. + + During scanning, the read bytes are stored in token_buffer. This string is + then converted to a signed integer, an unsigned integer, or a + floating-point number. + + @return token_type::value_unsigned, token_type::value_integer, or + token_type::value_float if number could be successfully scanned, + token_type::parse_error otherwise + + @note The scanner is independent of the current locale. Internally, the + locale's decimal point is used instead of `.` to work with the + locale-dependent converters. + */ + token_type scan_number() // lgtm [cpp/use-of-goto] + { + // reset token_buffer to store the number's bytes + reset(); + + // the type of the parsed number; initially set to unsigned; will be + // changed if minus sign, decimal point or exponent is read + token_type number_type = token_type::value_unsigned; + + // state (init): we just found out we need to scan a number + switch (current) + { + case '-': + { + add(current); + goto scan_number_minus; + } + + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + // LCOV_EXCL_START + default: + { + // all other characters are rejected outside scan_number() + assert(false); + } + // LCOV_EXCL_STOP + } + +scan_number_minus: + // state: we just parsed a leading minus sign + number_type = token_type::value_integer; + switch (get()) + { + case '0': + { + add(current); + goto scan_number_zero; + } + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + default: + { + error_message = "invalid number; expected digit after '-'"; + return token_type::parse_error; + } + } + +scan_number_zero: + // state: we just parse a zero (maybe with a leading minus sign) + switch (get()) + { + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_any1: + // state: we just parsed a number 0-9 (maybe with a leading minus sign) + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any1; + } + + case '.': + { + add(decimal_point_char); + goto scan_number_decimal1; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_decimal1: + // state: we just parsed a decimal point + number_type = token_type::value_float; + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + default: + { + error_message = "invalid number; expected digit after '.'"; + return token_type::parse_error; + } + } + +scan_number_decimal2: + // we just parsed at least one number after a decimal point + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_decimal2; + } + + case 'e': + case 'E': + { + add(current); + goto scan_number_exponent; + } + + default: + goto scan_number_done; + } + +scan_number_exponent: + // we just parsed an exponent + number_type = token_type::value_float; + switch (get()) + { + case '+': + case '-': + { + add(current); + goto scan_number_sign; + } + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = + "invalid number; expected '+', '-', or digit after exponent"; + return token_type::parse_error; + } + } + +scan_number_sign: + // we just parsed an exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + { + error_message = "invalid number; expected digit after exponent sign"; + return token_type::parse_error; + } + } + +scan_number_any2: + // we just parsed a number after the exponent or exponent sign + switch (get()) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + add(current); + goto scan_number_any2; + } + + default: + goto scan_number_done; + } + +scan_number_done: + // unget the character after the number (we only read it to know that + // we are done scanning a number) + unget(); + + char* endptr = nullptr; + errno = 0; + + // try to parse integers first and fall back to floats + if (number_type == token_type::value_unsigned) + { + const auto x = std::strtoull(token_buffer.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == token_buffer.data() + token_buffer.size()); + + if (errno == 0) + { + value_unsigned = static_cast<number_unsigned_t>(x); + if (value_unsigned == x) + { + return token_type::value_unsigned; + } + } + } + else if (number_type == token_type::value_integer) + { + const auto x = std::strtoll(token_buffer.data(), &endptr, 10); + + // we checked the number format before + assert(endptr == token_buffer.data() + token_buffer.size()); + + if (errno == 0) + { + value_integer = static_cast<number_integer_t>(x); + if (value_integer == x) + { + return token_type::value_integer; + } + } + } + + // this code is reached if we parse a floating-point number or if an + // integer conversion above failed + strtof(value_float, token_buffer.data(), &endptr); + + // we checked the number format before + assert(endptr == token_buffer.data() + token_buffer.size()); + + return token_type::value_float; + } + + /*! + @param[in] literal_text the literal text to expect + @param[in] length the length of the passed literal text + @param[in] return_type the token type to return on success + */ + token_type scan_literal(const char* literal_text, const std::size_t length, + token_type return_type) + { + assert(current == literal_text[0]); + for (std::size_t i = 1; i < length; ++i) + { + if (JSON_UNLIKELY(get() != literal_text[i])) + { + error_message = "invalid literal"; + return token_type::parse_error; + } + } + return return_type; + } + + ///////////////////// + // input management + ///////////////////// + + /// reset token_buffer; current character is beginning of token + void reset() noexcept + { + token_buffer.clear(); + token_string.clear(); + token_string.push_back(std::char_traits<char>::to_char_type(current)); + } + + /* + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a + `std::char_traits<char>::eof()` in that case. Stores the scanned characters + for use in error messages. + + @return character read from the input + */ + std::char_traits<char>::int_type get() + { + ++position.chars_read_total; + ++position.chars_read_current_line; + + if (next_unget) + { + // just reset the next_unget variable and work with current + next_unget = false; + } + else + { + current = ia->get_character(); + } + + if (JSON_LIKELY(current != std::char_traits<char>::eof())) + { + token_string.push_back(std::char_traits<char>::to_char_type(current)); + } + + if (current == '\n') + { + ++position.lines_read; + ++position.chars_read_current_line = 0; + } + + return current; + } + + /*! + @brief unget current character (read it again on next get) + + We implement unget by setting variable next_unget to true. The input is not + changed - we just simulate ungetting by modifying chars_read_total, + chars_read_current_line, and token_string. The next call to get() will + behave as if the unget character is read again. + */ + void unget() + { + next_unget = true; + + --position.chars_read_total; + + // in case we "unget" a newline, we have to also decrement the lines_read + if (position.chars_read_current_line == 0) + { + if (position.lines_read > 0) + { + --position.lines_read; + } + } + else + { + --position.chars_read_current_line; + } + + if (JSON_LIKELY(current != std::char_traits<char>::eof())) + { + assert(token_string.size() != 0); + token_string.pop_back(); + } + } + + /// add a character to token_buffer + void add(int c) + { + token_buffer.push_back(std::char_traits<char>::to_char_type(c)); + } + + public: + ///////////////////// + // value getters + ///////////////////// + + /// return integer value + constexpr number_integer_t get_number_integer() const noexcept + { + return value_integer; + } + + /// return unsigned integer value + constexpr number_unsigned_t get_number_unsigned() const noexcept + { + return value_unsigned; + } + + /// return floating-point value + constexpr number_float_t get_number_float() const noexcept + { + return value_float; + } + + /// return current string value (implicitly resets the token; useful only once) + string_t& get_string() + { + return token_buffer; + } + + ///////////////////// + // diagnostics + ///////////////////// + + /// return position of last read token + constexpr position_t get_position() const noexcept + { + return position; + } + + /// return the last read token (for errors only). Will never contain EOF + /// (an arbitrary value that is not a valid char value, often -1), because + /// 255 may legitimately occur. May contain NUL, which should be escaped. + std::string get_token_string() const + { + // escape control characters + std::string result; + for (const auto c : token_string) + { + if ('\x00' <= c and c <= '\x1F') + { + // escape control characters + char cs[9]; + (std::snprintf)(cs, 9, "<U+%.4X>", static_cast<unsigned char>(c)); + result += cs; + } + else + { + // add character as is + result.push_back(c); + } + } + + return result; + } + + /// return syntax error message + constexpr const char* get_error_message() const noexcept + { + return error_message; + } + + ///////////////////// + // actual scanner + ///////////////////// + + /*! + @brief skip the UTF-8 byte order mark + @return true iff there is no BOM or the correct BOM has been skipped + */ + bool skip_bom() + { + if (get() == 0xEF) + { + // check if we completely parse the BOM + return get() == 0xBB and get() == 0xBF; + } + + // the first character is not the beginning of the BOM; unget it to + // process is later + unget(); + return true; + } + + token_type scan() + { + // initially, skip the BOM + if (position.chars_read_total == 0 and not skip_bom()) + { + error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; + return token_type::parse_error; + } + + // read next character and ignore whitespace + do + { + get(); + } + while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); + + switch (current) + { + // structural characters + case '[': + return token_type::begin_array; + case ']': + return token_type::end_array; + case '{': + return token_type::begin_object; + case '}': + return token_type::end_object; + case ':': + return token_type::name_separator; + case ',': + return token_type::value_separator; + + // literals + case 't': + return scan_literal("true", 4, token_type::literal_true); + case 'f': + return scan_literal("false", 5, token_type::literal_false); + case 'n': + return scan_literal("null", 4, token_type::literal_null); + + // string + case '\"': + return scan_string(); + + // number + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return scan_number(); + + // end of input (the null byte is needed when parsing from + // string literals) + case '\0': + case std::char_traits<char>::eof(): + return token_type::end_of_input; + + // error + default: + error_message = "invalid literal"; + return token_type::parse_error; + } + } + + private: + /// input adapter + detail::input_adapter_t ia = nullptr; + + /// the current character + std::char_traits<char>::int_type current = std::char_traits<char>::eof(); + + /// whether the next get() call should just return current + bool next_unget = false; + + /// the start position of the current token + position_t position; + + /// raw input token string (for error messages) + std::vector<char> token_string {}; + + /// buffer for variable-length tokens (numbers, strings) + string_t token_buffer {}; + + /// a description of occurred lexer errors + const char* error_message = ""; + + // number values + number_integer_t value_integer = 0; + number_unsigned_t value_unsigned = 0; + number_float_t value_float = 0; + + /// the decimal point + const char decimal_point_char = '.'; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/input/parser.hpp> + + +#include <cassert> // assert +#include <cmath> // isfinite +#include <cstdint> // uint8_t +#include <functional> // function +#include <string> // string +#include <utility> // move + +// #include <nlohmann/detail/exceptions.hpp> + +// #include <nlohmann/detail/macro_scope.hpp> + +// #include <nlohmann/detail/meta/is_sax.hpp> + + +#include <cstdint> // size_t +#include <utility> // declval + +// #include <nlohmann/detail/meta/detected.hpp> + +// #include <nlohmann/detail/meta/type_traits.hpp> + + +namespace nlohmann +{ +namespace detail +{ +template <typename T> +using null_function_t = decltype(std::declval<T&>().null()); + +template <typename T> +using boolean_function_t = + decltype(std::declval<T&>().boolean(std::declval<bool>())); + +template <typename T, typename Integer> +using number_integer_function_t = + decltype(std::declval<T&>().number_integer(std::declval<Integer>())); + +template <typename T, typename Unsigned> +using number_unsigned_function_t = + decltype(std::declval<T&>().number_unsigned(std::declval<Unsigned>())); + +template <typename T, typename Float, typename String> +using number_float_function_t = decltype(std::declval<T&>().number_float( + std::declval<Float>(), std::declval<const String&>())); + +template <typename T, typename String> +using string_function_t = + decltype(std::declval<T&>().string(std::declval<String&>())); + +template <typename T> +using start_object_function_t = + decltype(std::declval<T&>().start_object(std::declval<std::size_t>())); + +template <typename T, typename String> +using key_function_t = + decltype(std::declval<T&>().key(std::declval<String&>())); + +template <typename T> +using end_object_function_t = decltype(std::declval<T&>().end_object()); + +template <typename T> +using start_array_function_t = + decltype(std::declval<T&>().start_array(std::declval<std::size_t>())); + +template <typename T> +using end_array_function_t = decltype(std::declval<T&>().end_array()); + +template <typename T, typename Exception> +using parse_error_function_t = decltype(std::declval<T&>().parse_error( + std::declval<std::size_t>(), std::declval<const std::string&>(), + std::declval<const Exception&>())); + +template <typename SAX, typename BasicJsonType> +struct is_sax +{ + private: + static_assert(is_basic_json<BasicJsonType>::value, + "BasicJsonType must be of type basic_json<...>"); + + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using exception_t = typename BasicJsonType::exception; + + public: + static constexpr bool value = + is_detected_exact<bool, null_function_t, SAX>::value && + is_detected_exact<bool, boolean_function_t, SAX>::value && + is_detected_exact<bool, number_integer_function_t, SAX, + number_integer_t>::value && + is_detected_exact<bool, number_unsigned_function_t, SAX, + number_unsigned_t>::value && + is_detected_exact<bool, number_float_function_t, SAX, number_float_t, + string_t>::value && + is_detected_exact<bool, string_function_t, SAX, string_t>::value && + is_detected_exact<bool, start_object_function_t, SAX>::value && + is_detected_exact<bool, key_function_t, SAX, string_t>::value && + is_detected_exact<bool, end_object_function_t, SAX>::value && + is_detected_exact<bool, start_array_function_t, SAX>::value && + is_detected_exact<bool, end_array_function_t, SAX>::value && + is_detected_exact<bool, parse_error_function_t, SAX, exception_t>::value; +}; + +template <typename SAX, typename BasicJsonType> +struct is_sax_static_asserts +{ + private: + static_assert(is_basic_json<BasicJsonType>::value, + "BasicJsonType must be of type basic_json<...>"); + + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using exception_t = typename BasicJsonType::exception; + + public: + static_assert(is_detected_exact<bool, null_function_t, SAX>::value, + "Missing/invalid function: bool null()"); + static_assert(is_detected_exact<bool, boolean_function_t, SAX>::value, + "Missing/invalid function: bool boolean(bool)"); + static_assert(is_detected_exact<bool, boolean_function_t, SAX>::value, + "Missing/invalid function: bool boolean(bool)"); + static_assert( + is_detected_exact<bool, number_integer_function_t, SAX, + number_integer_t>::value, + "Missing/invalid function: bool number_integer(number_integer_t)"); + static_assert( + is_detected_exact<bool, number_unsigned_function_t, SAX, + number_unsigned_t>::value, + "Missing/invalid function: bool number_unsigned(number_unsigned_t)"); + static_assert(is_detected_exact<bool, number_float_function_t, SAX, + number_float_t, string_t>::value, + "Missing/invalid function: bool number_float(number_float_t, const string_t&)"); + static_assert( + is_detected_exact<bool, string_function_t, SAX, string_t>::value, + "Missing/invalid function: bool string(string_t&)"); + static_assert(is_detected_exact<bool, start_object_function_t, SAX>::value, + "Missing/invalid function: bool start_object(std::size_t)"); + static_assert(is_detected_exact<bool, key_function_t, SAX, string_t>::value, + "Missing/invalid function: bool key(string_t&)"); + static_assert(is_detected_exact<bool, end_object_function_t, SAX>::value, + "Missing/invalid function: bool end_object()"); + static_assert(is_detected_exact<bool, start_array_function_t, SAX>::value, + "Missing/invalid function: bool start_array(std::size_t)"); + static_assert(is_detected_exact<bool, end_array_function_t, SAX>::value, + "Missing/invalid function: bool end_array()"); + static_assert( + is_detected_exact<bool, parse_error_function_t, SAX, exception_t>::value, + "Missing/invalid function: bool parse_error(std::size_t, const " + "std::string&, const exception&)"); +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/input/input_adapters.hpp> + +// #include <nlohmann/detail/input/json_sax.hpp> + + +#include <cstddef> +#include <string> +#include <vector> + +// #include <nlohmann/detail/input/parser.hpp> + +// #include <nlohmann/detail/exceptions.hpp> + + +namespace nlohmann +{ + +/*! +@brief SAX interface + +This class describes the SAX interface used by @ref nlohmann::json::sax_parse. +Each function is called in different situations while the input is parsed. The +boolean return value informs the parser whether to continue processing the +input. +*/ +template<typename BasicJsonType> +struct json_sax +{ + /// type for (signed) integers + using number_integer_t = typename BasicJsonType::number_integer_t; + /// type for unsigned integers + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + /// type for floating-point numbers + using number_float_t = typename BasicJsonType::number_float_t; + /// type for strings + using string_t = typename BasicJsonType::string_t; + + /*! + @brief a null value was read + @return whether parsing should proceed + */ + virtual bool null() = 0; + + /*! + @brief a boolean value was read + @param[in] val boolean value + @return whether parsing should proceed + */ + virtual bool boolean(bool val) = 0; + + /*! + @brief an integer number was read + @param[in] val integer value + @return whether parsing should proceed + */ + virtual bool number_integer(number_integer_t val) = 0; + + /*! + @brief an unsigned integer number was read + @param[in] val unsigned integer value + @return whether parsing should proceed + */ + virtual bool number_unsigned(number_unsigned_t val) = 0; + + /*! + @brief an floating-point number was read + @param[in] val floating-point value + @param[in] s raw token value + @return whether parsing should proceed + */ + virtual bool number_float(number_float_t val, const string_t& s) = 0; + + /*! + @brief a string was read + @param[in] val string value + @return whether parsing should proceed + @note It is safe to move the passed string. + */ + virtual bool string(string_t& val) = 0; + + /*! + @brief the beginning of an object was read + @param[in] elements number of object elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_object(std::size_t elements) = 0; + + /*! + @brief an object key was read + @param[in] val object key + @return whether parsing should proceed + @note It is safe to move the passed string. + */ + virtual bool key(string_t& val) = 0; + + /*! + @brief the end of an object was read + @return whether parsing should proceed + */ + virtual bool end_object() = 0; + + /*! + @brief the beginning of an array was read + @param[in] elements number of array elements or -1 if unknown + @return whether parsing should proceed + @note binary formats may report the number of elements + */ + virtual bool start_array(std::size_t elements) = 0; + + /*! + @brief the end of an array was read + @return whether parsing should proceed + */ + virtual bool end_array() = 0; + + /*! + @brief a parse error occurred + @param[in] position the position in the input where the error occurs + @param[in] last_token the last read token + @param[in] ex an exception object describing the error + @return whether parsing should proceed (must return false) + */ + virtual bool parse_error(std::size_t position, + const std::string& last_token, + const detail::exception& ex) = 0; + + virtual ~json_sax() = default; +}; + + +namespace detail +{ +/*! +@brief SAX implementation to create a JSON value from SAX events + +This class implements the @ref json_sax interface and processes the SAX events +to create a JSON value which makes it basically a DOM parser. The structure or +hierarchy of the JSON value is managed by the stack `ref_stack` which contains +a pointer to the respective array or object for each recursion depth. + +After successful parsing, the value that is passed by reference to the +constructor contains the parsed value. + +@tparam BasicJsonType the JSON type +*/ +template<typename BasicJsonType> +class json_sax_dom_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + + /*! + @param[in, out] r reference to a JSON value that is manipulated while + parsing + @param[in] allow_exceptions_ whether parse errors yield exceptions + */ + explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) + : root(r), allow_exceptions(allow_exceptions_) + {} + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool start_object(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); + + if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } + + return true; + } + + bool key(string_t& val) + { + // add null at given key and store the reference for later + object_element = &(ref_stack.back()->m_value.object->operator[](val)); + return true; + } + + bool end_object() + { + ref_stack.pop_back(); + return true; + } + + bool start_array(std::size_t len) + { + ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); + + if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } + + return true; + } + + bool end_array() + { + ref_stack.pop_back(); + return true; + } + + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const detail::exception& ex) + { + errored = true; + if (allow_exceptions) + { + // determine the proper exception type from the id + switch ((ex.id / 100) % 100) + { + case 1: + JSON_THROW(*reinterpret_cast<const detail::parse_error*>(&ex)); + case 4: + JSON_THROW(*reinterpret_cast<const detail::out_of_range*>(&ex)); + // LCOV_EXCL_START + case 2: + JSON_THROW(*reinterpret_cast<const detail::invalid_iterator*>(&ex)); + case 3: + JSON_THROW(*reinterpret_cast<const detail::type_error*>(&ex)); + case 5: + JSON_THROW(*reinterpret_cast<const detail::other_error*>(&ex)); + default: + assert(false); + // LCOV_EXCL_STOP + } + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + /*! + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + */ + template<typename Value> + BasicJsonType* handle_value(Value&& v) + { + if (ref_stack.empty()) + { + root = BasicJsonType(std::forward<Value>(v)); + return &root; + } + + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v)); + return &(ref_stack.back()->m_value.array->back()); + } + else + { + assert(object_element); + *object_element = BasicJsonType(std::forward<Value>(v)); + return object_element; + } + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector<BasicJsonType*> ref_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; +}; + +template<typename BasicJsonType> +class json_sax_dom_callback_parser +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using parser_callback_t = typename BasicJsonType::parser_callback_t; + using parse_event_t = typename BasicJsonType::parse_event_t; + + json_sax_dom_callback_parser(BasicJsonType& r, + const parser_callback_t cb, + const bool allow_exceptions_ = true) + : root(r), callback(cb), allow_exceptions(allow_exceptions_) + { + keep_stack.push_back(true); + } + + bool null() + { + handle_value(nullptr); + return true; + } + + bool boolean(bool val) + { + handle_value(val); + return true; + } + + bool number_integer(number_integer_t val) + { + handle_value(val); + return true; + } + + bool number_unsigned(number_unsigned_t val) + { + handle_value(val); + return true; + } + + bool number_float(number_float_t val, const string_t& /*unused*/) + { + handle_value(val); + return true; + } + + bool string(string_t& val) + { + handle_value(val); + return true; + } + + bool start_object(std::size_t len) + { + // check callback for object start + const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::object, true); + ref_stack.push_back(val.second); + + // check object limit + if (ref_stack.back()) + { + if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive object size: " + std::to_string(len))); + } + } + + return true; + } + + bool key(string_t& val) + { + BasicJsonType k = BasicJsonType(val); + + // check callback for key + const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k); + key_keep_stack.push_back(keep); + + // add discarded value at given key and store the reference for later + if (keep and ref_stack.back()) + { + object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded); + } + + return true; + } + + bool end_object() + { + if (ref_stack.back()) + { + if (not callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) + { + // discard object + *ref_stack.back() = discarded; + } + } + + assert(not ref_stack.empty()); + assert(not keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + if (not ref_stack.empty() and ref_stack.back()) + { + // remove discarded value + if (ref_stack.back()->is_object()) + { + for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) + { + if (it->is_discarded()) + { + ref_stack.back()->erase(it); + break; + } + } + } + } + + return true; + } + + bool start_array(std::size_t len) + { + const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded); + keep_stack.push_back(keep); + + auto val = handle_value(BasicJsonType::value_t::array, true); + ref_stack.push_back(val.second); + + // check array limit + if (ref_stack.back()) + { + if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) + { + JSON_THROW(out_of_range::create(408, + "excessive array size: " + std::to_string(len))); + } + } + + return true; + } + + bool end_array() + { + bool keep = true; + + if (ref_stack.back()) + { + keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); + if (not keep) + { + // discard array + *ref_stack.back() = discarded; + } + } + + assert(not ref_stack.empty()); + assert(not keep_stack.empty()); + ref_stack.pop_back(); + keep_stack.pop_back(); + + // remove discarded value + if (not keep and not ref_stack.empty()) + { + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->pop_back(); + } + } + + return true; + } + + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, + const detail::exception& ex) + { + errored = true; + if (allow_exceptions) + { + // determine the proper exception type from the id + switch ((ex.id / 100) % 100) + { + case 1: + JSON_THROW(*reinterpret_cast<const detail::parse_error*>(&ex)); + case 4: + JSON_THROW(*reinterpret_cast<const detail::out_of_range*>(&ex)); + // LCOV_EXCL_START + case 2: + JSON_THROW(*reinterpret_cast<const detail::invalid_iterator*>(&ex)); + case 3: + JSON_THROW(*reinterpret_cast<const detail::type_error*>(&ex)); + case 5: + JSON_THROW(*reinterpret_cast<const detail::other_error*>(&ex)); + default: + assert(false); + // LCOV_EXCL_STOP + } + } + return false; + } + + constexpr bool is_errored() const + { + return errored; + } + + private: + /*! + @param[in] v value to add to the JSON value we build during parsing + @param[in] skip_callback whether we should skip calling the callback + function; this is required after start_array() and + start_object() SAX events, because otherwise we would call the + callback function with an empty array or object, respectively. + + @invariant If the ref stack is empty, then the passed value will be the new + root. + @invariant If the ref stack contains a value, then it is an array or an + object to which we can add elements + + @return pair of boolean (whether value should be kept) and pointer (to the + passed value in the ref_stack hierarchy; nullptr if not kept) + */ + template<typename Value> + std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false) + { + assert(not keep_stack.empty()); + + // do not handle this value if we know it would be added to a discarded + // container + if (not keep_stack.back()) + { + return {false, nullptr}; + } + + // create value + auto value = BasicJsonType(std::forward<Value>(v)); + + // check callback + const bool keep = skip_callback or callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value); + + // do not handle this value if we just learnt it shall be discarded + if (not keep) + { + return {false, nullptr}; + } + + if (ref_stack.empty()) + { + root = std::move(value); + return {true, &root}; + } + + // skip this value if we already decided to skip the parent + // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) + if (not ref_stack.back()) + { + return {false, nullptr}; + } + + // we now only expect arrays and objects + assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); + + if (ref_stack.back()->is_array()) + { + ref_stack.back()->m_value.array->push_back(std::move(value)); + return {true, &(ref_stack.back()->m_value.array->back())}; + } + else + { + // check if we should store an element for the current key + assert(not key_keep_stack.empty()); + const bool store_element = key_keep_stack.back(); + key_keep_stack.pop_back(); + + if (not store_element) + { + return {false, nullptr}; + } + + assert(object_element); + *object_element = std::move(value); + return {true, object_element}; + } + } + + /// the parsed JSON value + BasicJsonType& root; + /// stack to model hierarchy of values + std::vector<BasicJsonType*> ref_stack; + /// stack to manage which values to keep + std::vector<bool> keep_stack; + /// stack to manage which object keys to keep + std::vector<bool> key_keep_stack; + /// helper to hold the reference for the next object element + BasicJsonType* object_element = nullptr; + /// whether a syntax error occurred + bool errored = false; + /// callback function + const parser_callback_t callback = nullptr; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; + /// a discarded value for the callback + BasicJsonType discarded = BasicJsonType::value_t::discarded; +}; + +template<typename BasicJsonType> +class json_sax_acceptor +{ + public: + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + + bool null() + { + return true; + } + + bool boolean(bool /*unused*/) + { + return true; + } + + bool number_integer(number_integer_t /*unused*/) + { + return true; + } + + bool number_unsigned(number_unsigned_t /*unused*/) + { + return true; + } + + bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) + { + return true; + } + + bool string(string_t& /*unused*/) + { + return true; + } + + bool start_object(std::size_t /*unused*/ = std::size_t(-1)) + { + return true; + } + + bool key(string_t& /*unused*/) + { + return true; + } + + bool end_object() + { + return true; + } + + bool start_array(std::size_t /*unused*/ = std::size_t(-1)) + { + return true; + } + + bool end_array() + { + return true; + } + + bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) + { + return false; + } +}; +} // namespace detail + +} // namespace nlohmann + +// #include <nlohmann/detail/input/lexer.hpp> + +// #include <nlohmann/detail/value_t.hpp> + + +namespace nlohmann +{ +namespace detail +{ +//////////// +// parser // +//////////// + +/*! +@brief syntax analysis + +This class implements a recursive decent parser. +*/ +template<typename BasicJsonType> +class parser +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using lexer_t = lexer<BasicJsonType>; + using token_type = typename lexer_t::token_type; + + public: + enum class parse_event_t : uint8_t + { + /// the parser read `{` and started to process a JSON object + object_start, + /// the parser read `}` and finished processing a JSON object + object_end, + /// the parser read `[` and started to process a JSON array + array_start, + /// the parser read `]` and finished processing a JSON array + array_end, + /// the parser read a key of a value in an object + key, + /// the parser finished reading a JSON value + value + }; + + using parser_callback_t = + std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>; + + /// a parser reading from an input adapter + explicit parser(detail::input_adapter_t&& adapter, + const parser_callback_t cb = nullptr, + const bool allow_exceptions_ = true) + : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) + { + // read first token + get_token(); + } + + /*! + @brief public parser interface + + @param[in] strict whether to expect the last token to be EOF + @param[in,out] result parsed JSON value + + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + */ + void parse(const bool strict, BasicJsonType& result) + { + if (callback) + { + json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_of_input, "value"))); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } + + // set top-level value to null if it was discarded by the callback + // function + if (result.is_discarded()) + { + result = nullptr; + } + } + else + { + json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); + sax_parse_internal(&sdp); + result.assert_invariant(); + + // in strict mode, input must be completely read + if (strict and (get_token() != token_type::end_of_input)) + { + sdp.parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_of_input, "value"))); + } + + // in case of an error, return discarded value + if (sdp.is_errored()) + { + result = value_t::discarded; + return; + } + } + } + + /*! + @brief public accept interface + + @param[in] strict whether to expect the last token to be EOF + @return whether the input is a proper JSON text + */ + bool accept(const bool strict = true) + { + json_sax_acceptor<BasicJsonType> sax_acceptor; + return sax_parse(&sax_acceptor, strict); + } + + template <typename SAX> + bool sax_parse(SAX* sax, const bool strict = true) + { + (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; + const bool result = sax_parse_internal(sax); + + // strict mode: next byte must be EOF + if (result and strict and (get_token() != token_type::end_of_input)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_of_input, "value"))); + } + + return result; + } + + private: + template <typename SAX> + bool sax_parse_internal(SAX* sax) + { + // stack to remember the hierarchy of structured values we are parsing + // true = array; false = object + std::vector<bool> states; + // value to avoid a goto (see comment where set to true) + bool skip_to_state_evaluation = false; + + while (true) + { + if (not skip_to_state_evaluation) + { + // invariant: get_token() was called before each iteration + switch (last_token) + { + case token_type::begin_object: + { + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + // closing } -> we are done + if (get_token() == token_type::end_object) + { + if (JSON_UNLIKELY(not sax->end_object())) + { + return false; + } + break; + } + + // parse key + if (JSON_UNLIKELY(last_token != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::value_string, "object key"))); + } + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) + { + return false; + } + + // parse separator (:) + if (JSON_UNLIKELY(get_token() != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::name_separator, "object separator"))); + } + + // remember we are now inside an object + states.push_back(false); + + // parse values + get_token(); + continue; + } + + case token_type::begin_array: + { + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) + { + return false; + } + + // closing ] -> we are done + if (get_token() == token_type::end_array) + { + if (JSON_UNLIKELY(not sax->end_array())) + { + return false; + } + break; + } + + // remember we are now inside an array + states.push_back(true); + + // parse values (no need to call get_token) + continue; + } + + case token_type::value_float: + { + const auto res = m_lexer.get_number_float(); + + if (JSON_UNLIKELY(not std::isfinite(res))) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); + } + else + { + if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string()))) + { + return false; + } + break; + } + } + + case token_type::literal_false: + { + if (JSON_UNLIKELY(not sax->boolean(false))) + { + return false; + } + break; + } + + case token_type::literal_null: + { + if (JSON_UNLIKELY(not sax->null())) + { + return false; + } + break; + } + + case token_type::literal_true: + { + if (JSON_UNLIKELY(not sax->boolean(true))) + { + return false; + } + break; + } + + case token_type::value_integer: + { + if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer()))) + { + return false; + } + break; + } + + case token_type::value_string: + { + if (JSON_UNLIKELY(not sax->string(m_lexer.get_string()))) + { + return false; + } + break; + } + + case token_type::value_unsigned: + { + if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned()))) + { + return false; + } + break; + } + + case token_type::parse_error: + { + // using "uninitialized" to avoid "expected" message + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::uninitialized, "value"))); + } + + default: // the last token was unexpected + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::literal_or_value, "value"))); + } + } + } + else + { + skip_to_state_evaluation = false; + } + + // we reached this line after we successfully parsed a value + if (states.empty()) + { + // empty stack: we reached the end of the hierarchy: done + return true; + } + else + { + if (states.back()) // array + { + // comma -> next value + if (get_token() == token_type::value_separator) + { + // parse a new value + get_token(); + continue; + } + + // closing ] + if (JSON_LIKELY(last_token == token_type::end_array)) + { + if (JSON_UNLIKELY(not sax->end_array())) + { + return false; + } + + // We are done with this array. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_array, "array"))); + } + } + else // object + { + // comma -> next value + if (get_token() == token_type::value_separator) + { + // parse key + if (JSON_UNLIKELY(get_token() != token_type::value_string)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::value_string, "object key"))); + } + else + { + if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) + { + return false; + } + } + + // parse separator (:) + if (JSON_UNLIKELY(get_token() != token_type::name_separator)) + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::name_separator, "object separator"))); + } + + // parse values + get_token(); + continue; + } + + // closing } + if (JSON_LIKELY(last_token == token_type::end_object)) + { + if (JSON_UNLIKELY(not sax->end_object())) + { + return false; + } + + // We are done with this object. Before we can parse a + // new value, we need to evaluate the new state first. + // By setting skip_to_state_evaluation to false, we + // are effectively jumping to the beginning of this if. + assert(not states.empty()); + states.pop_back(); + skip_to_state_evaluation = true; + continue; + } + else + { + return sax->parse_error(m_lexer.get_position(), + m_lexer.get_token_string(), + parse_error::create(101, m_lexer.get_position(), + exception_message(token_type::end_object, "object"))); + } + } + } + } + } + + /// get next token from lexer + token_type get_token() + { + return (last_token = m_lexer.scan()); + } + + std::string exception_message(const token_type expected, const std::string& context) + { + std::string error_msg = "syntax error "; + + if (not context.empty()) + { + error_msg += "while parsing " + context + " "; + } + + error_msg += "- "; + + if (last_token == token_type::parse_error) + { + error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + + m_lexer.get_token_string() + "'"; + } + else + { + error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token)); + } + + if (expected != token_type::uninitialized) + { + error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); + } + + return error_msg; + } + + private: + /// callback function + const parser_callback_t callback = nullptr; + /// the type of the last read token + token_type last_token = token_type::uninitialized; + /// the lexer + lexer_t m_lexer; + /// whether to throw exceptions in case of errors + const bool allow_exceptions = true; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/iterators/primitive_iterator.hpp> + + +#include <cstddef> // ptrdiff_t +#include <limits> // numeric_limits + +namespace nlohmann +{ +namespace detail +{ +/* +@brief an iterator for primitive JSON types + +This class models an iterator for primitive JSON types (boolean, number, +string). It's only purpose is to allow the iterator/const_iterator classes +to "iterate" over primitive values. Internally, the iterator is modeled by +a `difference_type` variable. Value begin_value (`0`) models the begin, +end_value (`1`) models past the end. +*/ +class primitive_iterator_t +{ + private: + using difference_type = std::ptrdiff_t; + static constexpr difference_type begin_value = 0; + static constexpr difference_type end_value = begin_value + 1; + + /// iterator as signed integer type + difference_type m_it = (std::numeric_limits<std::ptrdiff_t>::min)(); + + public: + constexpr difference_type get_value() const noexcept + { + return m_it; + } + + /// set iterator to a defined beginning + void set_begin() noexcept + { + m_it = begin_value; + } + + /// set iterator to a defined past the end + void set_end() noexcept + { + m_it = end_value; + } + + /// return whether the iterator can be dereferenced + constexpr bool is_begin() const noexcept + { + return m_it == begin_value; + } + + /// return whether the iterator is at end + constexpr bool is_end() const noexcept + { + return m_it == end_value; + } + + friend constexpr bool operator==(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it == rhs.m_it; + } + + friend constexpr bool operator<(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it < rhs.m_it; + } + + primitive_iterator_t operator+(difference_type n) noexcept + { + auto result = *this; + result += n; + return result; + } + + friend constexpr difference_type operator-(primitive_iterator_t lhs, primitive_iterator_t rhs) noexcept + { + return lhs.m_it - rhs.m_it; + } + + primitive_iterator_t& operator++() noexcept + { + ++m_it; + return *this; + } + + primitive_iterator_t const operator++(int) noexcept + { + auto result = *this; + ++m_it; + return result; + } + + primitive_iterator_t& operator--() noexcept + { + --m_it; + return *this; + } + + primitive_iterator_t const operator--(int) noexcept + { + auto result = *this; + --m_it; + return result; + } + + primitive_iterator_t& operator+=(difference_type n) noexcept + { + m_it += n; + return *this; + } + + primitive_iterator_t& operator-=(difference_type n) noexcept + { + m_it -= n; + return *this; + } +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/iterators/internal_iterator.hpp> + + +// #include <nlohmann/detail/iterators/primitive_iterator.hpp> + + +namespace nlohmann +{ +namespace detail +{ +/*! +@brief an iterator value + +@note This structure could easily be a union, but MSVC currently does not allow +unions members with complex constructors, see https://github.com/nlohmann/json/pull/105. +*/ +template<typename BasicJsonType> struct internal_iterator +{ + /// iterator for JSON objects + typename BasicJsonType::object_t::iterator object_iterator {}; + /// iterator for JSON arrays + typename BasicJsonType::array_t::iterator array_iterator {}; + /// generic iterator for all other types + primitive_iterator_t primitive_iterator {}; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/iterators/iter_impl.hpp> + + +#include <ciso646> // not +#include <iterator> // iterator, random_access_iterator_tag, bidirectional_iterator_tag, advance, next +#include <type_traits> // conditional, is_const, remove_const + +// #include <nlohmann/detail/exceptions.hpp> + +// #include <nlohmann/detail/iterators/internal_iterator.hpp> + +// #include <nlohmann/detail/iterators/primitive_iterator.hpp> + +// #include <nlohmann/detail/macro_scope.hpp> + +// #include <nlohmann/detail/meta/cpp_future.hpp> + +// #include <nlohmann/detail/value_t.hpp> + + +namespace nlohmann +{ +namespace detail +{ +// forward declare, to be able to friend it later on +template<typename IteratorType> class iteration_proxy; +template<typename IteratorType> class iteration_proxy_value; + +/*! +@brief a template for a bidirectional iterator for the @ref basic_json class +This class implements a both iterators (iterator and const_iterator) for the +@ref basic_json class. +@note An iterator is called *initialized* when a pointer to a JSON value has + been set (e.g., by a constructor or a copy assignment). If the iterator is + default-constructed, it is *uninitialized* and most methods are undefined. + **The library uses assertions to detect calls on uninitialized iterators.** +@requirement The class satisfies the following concept requirements: +- +[BidirectionalIterator](https://en.cppreference.com/w/cpp/named_req/BidirectionalIterator): + The iterator that can be moved can be moved in both directions (i.e. + incremented and decremented). +@since version 1.0.0, simplified in version 2.0.9, change to bidirectional + iterators in version 3.0.0 (see https://github.com/nlohmann/json/issues/593) +*/ +template<typename BasicJsonType> +class iter_impl +{ + /// allow basic_json to access private members + friend iter_impl<typename std::conditional<std::is_const<BasicJsonType>::value, typename std::remove_const<BasicJsonType>::type, const BasicJsonType>::type>; + friend BasicJsonType; + friend iteration_proxy<iter_impl>; + friend iteration_proxy_value<iter_impl>; + + using object_t = typename BasicJsonType::object_t; + using array_t = typename BasicJsonType::array_t; + // make sure BasicJsonType is basic_json or const basic_json + static_assert(is_basic_json<typename std::remove_const<BasicJsonType>::type>::value, + "iter_impl only accepts (const) basic_json"); + + public: + + /// The std::iterator class template (used as a base class to provide typedefs) is deprecated in C++17. + /// The C++ Standard has never required user-defined iterators to derive from std::iterator. + /// A user-defined iterator should provide publicly accessible typedefs named + /// iterator_category, value_type, difference_type, pointer, and reference. + /// Note that value_type is required to be non-const, even for constant iterators. + using iterator_category = std::bidirectional_iterator_tag; + + /// the type of the values when the iterator is dereferenced + using value_type = typename BasicJsonType::value_type; + /// a type to represent differences between iterators + using difference_type = typename BasicJsonType::difference_type; + /// defines a pointer to the type iterated over (value_type) + using pointer = typename std::conditional<std::is_const<BasicJsonType>::value, + typename BasicJsonType::const_pointer, + typename BasicJsonType::pointer>::type; + /// defines a reference to the type iterated over (value_type) + using reference = + typename std::conditional<std::is_const<BasicJsonType>::value, + typename BasicJsonType::const_reference, + typename BasicJsonType::reference>::type; + + /// default constructor + iter_impl() = default; + + /*! + @brief constructor for a given JSON instance + @param[in] object pointer to a JSON object for this iterator + @pre object != nullptr + @post The iterator is initialized; i.e. `m_object != nullptr`. + */ + explicit iter_impl(pointer object) noexcept : m_object(object) + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + m_it.object_iterator = typename object_t::iterator(); + break; + } + + case value_t::array: + { + m_it.array_iterator = typename array_t::iterator(); + break; + } + + default: + { + m_it.primitive_iterator = primitive_iterator_t(); + break; + } + } + } + + /*! + @note The conventional copy constructor and copy assignment are implicitly + defined. Combined with the following converting constructor and + assignment, they support: (1) copy from iterator to iterator, (2) + copy from const iterator to const iterator, and (3) conversion from + iterator to const iterator. However conversion from const iterator + to iterator is not defined. + */ + + /*! + @brief converting constructor + @param[in] other non-const iterator to copy from + @note It is not checked whether @a other is initialized. + */ + iter_impl(const iter_impl<typename std::remove_const<BasicJsonType>::type>& other) noexcept + : m_object(other.m_object), m_it(other.m_it) {} + + /*! + @brief converting assignment + @param[in,out] other non-const iterator to copy from + @return const/non-const iterator + @note It is not checked whether @a other is initialized. + */ + iter_impl& operator=(const iter_impl<typename std::remove_const<BasicJsonType>::type>& other) noexcept + { + m_object = other.m_object; + m_it = other.m_it; + return *this; + } + + private: + /*! + @brief set the iterator to the first value + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + void set_begin() noexcept + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + m_it.object_iterator = m_object->m_value.object->begin(); + break; + } + + case value_t::array: + { + m_it.array_iterator = m_object->m_value.array->begin(); + break; + } + + case value_t::null: + { + // set to end so begin()==end() is true: null is empty + m_it.primitive_iterator.set_end(); + break; + } + + default: + { + m_it.primitive_iterator.set_begin(); + break; + } + } + } + + /*! + @brief set the iterator past the last value + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + void set_end() noexcept + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + m_it.object_iterator = m_object->m_value.object->end(); + break; + } + + case value_t::array: + { + m_it.array_iterator = m_object->m_value.array->end(); + break; + } + + default: + { + m_it.primitive_iterator.set_end(); + break; + } + } + } + + public: + /*! + @brief return a reference to the value pointed to by the iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + reference operator*() const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + assert(m_it.object_iterator != m_object->m_value.object->end()); + return m_it.object_iterator->second; + } + + case value_t::array: + { + assert(m_it.array_iterator != m_object->m_value.array->end()); + return *m_it.array_iterator; + } + + case value_t::null: + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + + default: + { + if (JSON_LIKELY(m_it.primitive_iterator.is_begin())) + { + return *m_object; + } + + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + } + } + } + + /*! + @brief dereference the iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + pointer operator->() const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + assert(m_it.object_iterator != m_object->m_value.object->end()); + return &(m_it.object_iterator->second); + } + + case value_t::array: + { + assert(m_it.array_iterator != m_object->m_value.array->end()); + return &*m_it.array_iterator; + } + + default: + { + if (JSON_LIKELY(m_it.primitive_iterator.is_begin())) + { + return m_object; + } + + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + } + } + } + + /*! + @brief post-increment (it++) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl const operator++(int) + { + auto result = *this; + ++(*this); + return result; + } + + /*! + @brief pre-increment (++it) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator++() + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + std::advance(m_it.object_iterator, 1); + break; + } + + case value_t::array: + { + std::advance(m_it.array_iterator, 1); + break; + } + + default: + { + ++m_it.primitive_iterator; + break; + } + } + + return *this; + } + + /*! + @brief post-decrement (it--) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl const operator--(int) + { + auto result = *this; + --(*this); + return result; + } + + /*! + @brief pre-decrement (--it) + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator--() + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + { + std::advance(m_it.object_iterator, -1); + break; + } + + case value_t::array: + { + std::advance(m_it.array_iterator, -1); + break; + } + + default: + { + --m_it.primitive_iterator; + break; + } + } + + return *this; + } + + /*! + @brief comparison: equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator==(const iter_impl& other) const + { + // if objects are not the same, the comparison is undefined + if (JSON_UNLIKELY(m_object != other.m_object)) + { + JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); + } + + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + return (m_it.object_iterator == other.m_it.object_iterator); + + case value_t::array: + return (m_it.array_iterator == other.m_it.array_iterator); + + default: + return (m_it.primitive_iterator == other.m_it.primitive_iterator); + } + } + + /*! + @brief comparison: not equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator!=(const iter_impl& other) const + { + return not operator==(other); + } + + /*! + @brief comparison: smaller + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator<(const iter_impl& other) const + { + // if objects are not the same, the comparison is undefined + if (JSON_UNLIKELY(m_object != other.m_object)) + { + JSON_THROW(invalid_iterator::create(212, "cannot compare iterators of different containers")); + } + + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(213, "cannot compare order of object iterators")); + + case value_t::array: + return (m_it.array_iterator < other.m_it.array_iterator); + + default: + return (m_it.primitive_iterator < other.m_it.primitive_iterator); + } + } + + /*! + @brief comparison: less than or equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator<=(const iter_impl& other) const + { + return not other.operator < (*this); + } + + /*! + @brief comparison: greater than + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator>(const iter_impl& other) const + { + return not operator<=(other); + } + + /*! + @brief comparison: greater than or equal + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + bool operator>=(const iter_impl& other) const + { + return not operator<(other); + } + + /*! + @brief add to iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator+=(difference_type i) + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); + + case value_t::array: + { + std::advance(m_it.array_iterator, i); + break; + } + + default: + { + m_it.primitive_iterator += i; + break; + } + } + + return *this; + } + + /*! + @brief subtract from iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl& operator-=(difference_type i) + { + return operator+=(-i); + } + + /*! + @brief add to iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl operator+(difference_type i) const + { + auto result = *this; + result += i; + return result; + } + + /*! + @brief addition of distance and iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + friend iter_impl operator+(difference_type i, const iter_impl& it) + { + auto result = it; + result += i; + return result; + } + + /*! + @brief subtract from iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + iter_impl operator-(difference_type i) const + { + auto result = *this; + result -= i; + return result; + } + + /*! + @brief return difference + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + difference_type operator-(const iter_impl& other) const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(209, "cannot use offsets with object iterators")); + + case value_t::array: + return m_it.array_iterator - other.m_it.array_iterator; + + default: + return m_it.primitive_iterator - other.m_it.primitive_iterator; + } + } + + /*! + @brief access to successor + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + reference operator[](difference_type n) const + { + assert(m_object != nullptr); + + switch (m_object->m_type) + { + case value_t::object: + JSON_THROW(invalid_iterator::create(208, "cannot use operator[] for object iterators")); + + case value_t::array: + return *std::next(m_it.array_iterator, n); + + case value_t::null: + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + + default: + { + if (JSON_LIKELY(m_it.primitive_iterator.get_value() == -n)) + { + return *m_object; + } + + JSON_THROW(invalid_iterator::create(214, "cannot get value")); + } + } + } + + /*! + @brief return the key of an object iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + const typename object_t::key_type& key() const + { + assert(m_object != nullptr); + + if (JSON_LIKELY(m_object->is_object())) + { + return m_it.object_iterator->first; + } + + JSON_THROW(invalid_iterator::create(207, "cannot use key() for non-object iterators")); + } + + /*! + @brief return the value of an iterator + @pre The iterator is initialized; i.e. `m_object != nullptr`. + */ + reference value() const + { + return operator*(); + } + + private: + /// associated JSON instance + pointer m_object = nullptr; + /// the actual iterator of the associated instance + internal_iterator<typename std::remove_const<BasicJsonType>::type> m_it; +}; +} // namespace detail +} // namespace nlohmann +// #include <nlohmann/detail/iterators/iteration_proxy.hpp> + +// #include <nlohmann/detail/iterators/json_reverse_iterator.hpp> + + +#include <cstddef> // ptrdiff_t +#include <iterator> // reverse_iterator +#include <utility> // declval + +namespace nlohmann +{ +namespace detail +{ +////////////////////// +// reverse_iterator // +////////////////////// + +/*! +@brief a template for a reverse iterator class + +@tparam Base the base iterator type to reverse. Valid types are @ref +iterator (to create @ref reverse_iterator) and @ref const_iterator (to +create @ref const_reverse_iterator). + +@requirement The class satisfies the following concept requirements: +- +[BidirectionalIterator](https://en.cppreference.com/w/cpp/named_req/BidirectionalIterator): + The iterator that can be moved can be moved in both directions (i.e. + incremented and decremented). +- [OutputIterator](https://en.cppreference.com/w/cpp/named_req/OutputIterator): + It is possible to write to the pointed-to element (only if @a Base is + @ref iterator). + +@since version 1.0.0 +*/ +template<typename Base> +class json_reverse_iterator : public std::reverse_iterator<Base> +{ + public: + using difference_type = std::ptrdiff_t; + /// shortcut to the reverse iterator adapter + using base_iterator = std::reverse_iterator<Base>; + /// the reference type for the pointed-to element + using reference = typename Base::reference; + + /// create reverse iterator from iterator + explicit json_reverse_iterator(const typename base_iterator::iterator_type& it) noexcept + : base_iterator(it) {} + + /// create reverse iterator from base class + explicit json_reverse_iterator(const base_iterator& it) noexcept : base_iterator(it) {} + + /// post-increment (it++) + json_reverse_iterator const operator++(int) + { + return static_cast<json_reverse_iterator>(base_iterator::operator++(1)); + } + + /// pre-increment (++it) + json_reverse_iterator& operator++() + { + return static_cast<json_reverse_iterator&>(base_iterator::operator++()); + } + + /// post-decrement (it--) + json_reverse_iterator const operator--(int) + { + return static_cast<json_reverse_iterator>(base_iterator::operator--(1)); + } + + /// pre-decrement (--it) + json_reverse_iterator& operator--() + { + return static_cast<json_reverse_iterator&>(base_iterator::operator--()); + } + + /// add to iterator + json_reverse_iterator& operator+=(difference_type i) + { + return static_cast<json_reverse_iterator&>(base_iterator::operator+=(i)); + } + + /// add to iterator + json_reverse_iterator operator+(difference_type i) const + { + return static_cast<json_reverse_iterator>(base_iterator::operator+(i)); + } + + /// subtract from iterator + json_reverse_iterator operator-(difference_type i) const + { + return static_cast<json_reverse_iterator>(base_iterator::operator-(i)); + } + + /// return difference + difference_type operator-(const json_reverse_iterator& other) const + { + return base_iterator(*this) - base_iterator(other); + } + + /// access to successor + reference operator[](difference_type n) const + { + return *(this->operator+(n)); + } + + /// return the key of an object iterator + auto key() const -> decltype(std::declval<Base>().key()) + { + auto it = --this->base(); + return it.key(); + } + + /// return the value of an iterator + reference value() const + { + auto it = --this->base(); + return it.operator * (); + } +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/output/output_adapters.hpp> + + +#include <algorithm> // copy +#include <cstddef> // size_t +#include <ios> // streamsize +#include <iterator> // back_inserter +#include <memory> // shared_ptr, make_shared +#include <ostream> // basic_ostream +#include <string> // basic_string +#include <vector> // vector + +namespace nlohmann +{ +namespace detail +{ +/// abstract output adapter interface +template<typename CharType> struct output_adapter_protocol +{ + virtual void write_character(CharType c) = 0; + virtual void write_characters(const CharType* s, std::size_t length) = 0; + virtual ~output_adapter_protocol() = default; +}; + +/// a type to simplify interfaces +template<typename CharType> +using output_adapter_t = std::shared_ptr<output_adapter_protocol<CharType>>; + +/// output adapter for byte vectors +template<typename CharType> +class output_vector_adapter : public output_adapter_protocol<CharType> +{ + public: + explicit output_vector_adapter(std::vector<CharType>& vec) noexcept + : v(vec) + {} + + void write_character(CharType c) override + { + v.push_back(c); + } + + void write_characters(const CharType* s, std::size_t length) override + { + std::copy(s, s + length, std::back_inserter(v)); + } + + private: + std::vector<CharType>& v; +}; + +/// output adapter for output streams +template<typename CharType> +class output_stream_adapter : public output_adapter_protocol<CharType> +{ + public: + explicit output_stream_adapter(std::basic_ostream<CharType>& s) noexcept + : stream(s) + {} + + void write_character(CharType c) override + { + stream.put(c); + } + + void write_characters(const CharType* s, std::size_t length) override + { + stream.write(s, static_cast<std::streamsize>(length)); + } + + private: + std::basic_ostream<CharType>& stream; +}; + +/// output adapter for basic_string +template<typename CharType, typename StringType = std::basic_string<CharType>> +class output_string_adapter : public output_adapter_protocol<CharType> +{ + public: + explicit output_string_adapter(StringType& s) noexcept + : str(s) + {} + + void write_character(CharType c) override + { + str.push_back(c); + } + + void write_characters(const CharType* s, std::size_t length) override + { + str.append(s, length); + } + + private: + StringType& str; +}; + +template<typename CharType, typename StringType = std::basic_string<CharType>> +class output_adapter +{ + public: + output_adapter(std::vector<CharType>& vec) + : oa(std::make_shared<output_vector_adapter<CharType>>(vec)) {} + + output_adapter(std::basic_ostream<CharType>& s) + : oa(std::make_shared<output_stream_adapter<CharType>>(s)) {} + + output_adapter(StringType& s) + : oa(std::make_shared<output_string_adapter<CharType, StringType>>(s)) {} + + operator output_adapter_t<CharType>() + { + return oa; + } + + private: + output_adapter_t<CharType> oa = nullptr; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/input/binary_reader.hpp> + + +#include <algorithm> // generate_n +#include <array> // array +#include <cassert> // assert +#include <cmath> // ldexp +#include <cstddef> // size_t +#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t +#include <cstdio> // snprintf +#include <cstring> // memcpy +#include <iterator> // back_inserter +#include <limits> // numeric_limits +#include <string> // char_traits, string +#include <utility> // make_pair, move + +// #include <nlohmann/detail/input/input_adapters.hpp> + +// #include <nlohmann/detail/input/json_sax.hpp> + +// #include <nlohmann/detail/exceptions.hpp> + +// #include <nlohmann/detail/macro_scope.hpp> + +// #include <nlohmann/detail/meta/is_sax.hpp> + +// #include <nlohmann/detail/value_t.hpp> + + +namespace nlohmann +{ +namespace detail +{ +/////////////////// +// binary reader // +/////////////////// + +/*! +@brief deserialization of CBOR, MessagePack, and UBJSON values +*/ +template<typename BasicJsonType, typename SAX = json_sax_dom_parser<BasicJsonType>> +class binary_reader +{ + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using number_float_t = typename BasicJsonType::number_float_t; + using string_t = typename BasicJsonType::string_t; + using json_sax_t = SAX; + + public: + /*! + @brief create a binary reader + + @param[in] adapter input adapter to read from + */ + explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter)) + { + (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; + assert(ia); + } + + /*! + @param[in] format the binary format to parse + @param[in] sax_ a SAX event processor + @param[in] strict whether to expect the input to be consumed completed + + @return + */ + bool sax_parse(const input_format_t format, + json_sax_t* sax_, + const bool strict = true) + { + sax = sax_; + bool result = false; + + switch (format) + { + case input_format_t::bson: + result = parse_bson_internal(); + break; + + case input_format_t::cbor: + result = parse_cbor_internal(); + break; + + case input_format_t::msgpack: + result = parse_msgpack_internal(); + break; + + case input_format_t::ubjson: + result = parse_ubjson_internal(); + break; + + // LCOV_EXCL_START + default: + assert(false); + // LCOV_EXCL_STOP + } + + // strict mode: next byte must be EOF + if (result and strict) + { + if (format == input_format_t::ubjson) + { + get_ignore_noop(); + } + else + { + get(); + } + + if (JSON_UNLIKELY(current != std::char_traits<char>::eof())) + { + return sax->parse_error(chars_read, get_token_string(), + parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"))); + } + } + + return result; + } + + /*! + @brief determine system byte order + + @return true if and only if system's byte order is little endian + + @note from http://stackoverflow.com/a/1001328/266378 + */ + static constexpr bool little_endianess(int num = 1) noexcept + { + return (*reinterpret_cast<char*>(&num) == 1); + } + + private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t document_size; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } + + /*! + @brief Parses a C-style string from the BSON input. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) + { + return false; + } + if (current == 0x00) + { + return true; + } + *out++ = static_cast<char>(current); + } + + return true; + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @tparam NumberType The type of the length @a len + @pre len >= 1 + @return `true` if the string was successfully parsed + */ + template<typename NumberType> + bool get_bson_string(const NumberType len, string_t& result) + { + if (JSON_UNLIKELY(len < 1)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"))); + } + + return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof(); + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double + { + double number; + return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); + } + + case 0x03: // object + { + return parse_bson_internal(); + } + + case 0x04: // array + { + return parse_bson_array(); + } + + case 0x08: // boolean + { + return sax->boolean(get() != 0); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value); + } + + default: // anything else not supported (yet) + { + char cr[3]; + (std::snprintf)(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(const bool is_array) + { + string_t key; + while (int element_type = get()) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + if (JSON_UNLIKELY(not get_bson_cstr(key))) + { + return false; + } + + if (not is_array) + { + if (not sax->key(key)) + { + return false; + } + } + + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + + // get_bson_cstr only appends + key.clear(); + } + + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t document_size; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + ////////// + // CBOR // + ////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid CBOR value was passed to the SAX parser + */ + bool parse_cbor_internal(const bool get_char = true) + { + switch (get_char ? get() : current) + { + // EOF + case std::char_traits<char>::eof(): + return unexpect_eof(input_format_t::cbor, "value"); + + // Integer 0x00..0x17 (0..23) + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0D: + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + return sax->number_unsigned(static_cast<number_unsigned_t>(current)); + + case 0x18: // Unsigned integer (one-byte uint8_t follows) + { + uint8_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); + } + + case 0x19: // Unsigned integer (two-byte uint16_t follows) + { + uint16_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); + } + + case 0x1A: // Unsigned integer (four-byte uint32_t follows) + { + uint32_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); + } + + case 0x1B: // Unsigned integer (eight-byte uint64_t follows) + { + uint64_t number; + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); + } + + // Negative integer -1-0x00..-1-0x17 (-1..-24) + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + return sax->number_integer(static_cast<int8_t>(0x20 - 1 - current)); + + case 0x38: // Negative integer (one-byte uint8_t follows) + { + uint8_t number; + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + } + + case 0x39: // Negative integer -1-n (two-byte uint16_t follows) + { + uint16_t number; + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + } + + case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) + { + uint32_t number; + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + } + + case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) + { + uint64_t number; + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) + - static_cast<number_integer_t>(number)); + } + + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + case 0x7F: // UTF-8 string (indefinite length) + { + string_t s; + return get_cbor_string(s) and sax->string(s); + } + + // array (0x00..0x17 data items follow) + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + return get_cbor_array(static_cast<std::size_t>(current & 0x1F)); + + case 0x98: // array (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); + } + + case 0x99: // array (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); + } + + case 0x9A: // array (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); + } + + case 0x9B: // array (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); + } + + case 0x9F: // array (indefinite length) + return get_cbor_array(std::size_t(-1)); + + // map (0x00..0x17 pairs of data items follow) + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return get_cbor_object(static_cast<std::size_t>(current & 0x1F)); + + case 0xB8: // map (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); + } + + case 0xB9: // map (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); + } + + case 0xBA: // map (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); + } + + case 0xBB: // map (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); + } + + case 0xBF: // map (indefinite length) + return get_cbor_object(std::size_t(-1)); + + case 0xF4: // false + return sax->boolean(false); + + case 0xF5: // true + return sax->boolean(true); + + case 0xF6: // null + return sax->null(); + + case 0xF9: // Half-Precision Float (two-byte IEEE 754) + { + const int byte1_raw = get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) + { + return false; + } + const int byte2_raw = get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) + { + return false; + } + + const auto byte1 = static_cast<unsigned char>(byte1_raw); + const auto byte2 = static_cast<unsigned char>(byte2_raw); + + // code from RFC 7049, Appendix D, Figure 3: + // As half-precision floating-point numbers were only added + // to IEEE 754 in 2008, today's programming platforms often + // still only have limited support for them. It is very + // easy to include at least decoding support for them even + // without such support. An example of a small decoder for + // half-precision floating-point numbers in the C language + // is shown in Fig. 3. + const int half = (byte1 << 8) + byte2; + const double val = [&half] + { + const int exp = (half >> 10) & 0x1F; + const int mant = half & 0x3FF; + assert(0 <= exp and exp <= 32); + assert(0 <= mant and mant <= 1024); + switch (exp) + { + case 0: + return std::ldexp(mant, -24); + case 31: + return (mant == 0) + ? std::numeric_limits<double>::infinity() + : std::numeric_limits<double>::quiet_NaN(); + default: + return std::ldexp(mant + 1024, exp - 25); + } + }(); + return sax->number_float((half & 0x8000) != 0 + ? static_cast<number_float_t>(-val) + : static_cast<number_float_t>(val), ""); + } + + case 0xFA: // Single-Precision Float (four-byte IEEE 754) + { + float number; + return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xFB: // Double-Precision Float (eight-byte IEEE 754) + { + double number; + return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + default: // anything else (0xFF is handled inside the other types) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"))); + } + } + } + + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + + /*! + @return whether a valid MessagePack value was passed to the SAX parser + */ + bool parse_msgpack_internal() + { + switch (get()) + { + // EOF + case std::char_traits<char>::eof(): + return unexpect_eof(input_format_t::msgpack, "value"); + + // positive fixint + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x08: + case 0x09: + case 0x0A: + case 0x0B: + case 0x0C: + case 0x0D: + case 0x0E: + case 0x0F: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1A: + case 0x1B: + case 0x1C: + case 0x1D: + case 0x1E: + case 0x1F: + case 0x20: + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2A: + case 0x2B: + case 0x2C: + case 0x2D: + case 0x2E: + case 0x2F: + case 0x30: + case 0x31: + case 0x32: + case 0x33: + case 0x34: + case 0x35: + case 0x36: + case 0x37: + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4A: + case 0x4B: + case 0x4C: + case 0x4D: + case 0x4E: + case 0x4F: + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + return sax->number_unsigned(static_cast<number_unsigned_t>(current)); + + // fixmap + case 0x80: + case 0x81: + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + return get_msgpack_object(static_cast<std::size_t>(current & 0x0F)); + + // fixarray + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9A: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + return get_msgpack_array(static_cast<std::size_t>(current & 0x0F)); + + // fixstr + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + { + string_t s; + return get_msgpack_string(s) and sax->string(s); + } + + case 0xC0: // nil + return sax->null(); + + case 0xC2: // false + return sax->boolean(false); + + case 0xC3: // true + return sax->boolean(true); + + case 0xCA: // float 32 + { + float number; + return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xCB: // float 64 + { + double number; + return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0xCC: // uint 8 + { + uint8_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); + } + + case 0xCD: // uint 16 + { + uint16_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); + } + + case 0xCE: // uint 32 + { + uint32_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); + } + + case 0xCF: // uint 64 + { + uint64_t number; + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); + } + + case 0xD0: // int 8 + { + int8_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); + } + + case 0xD1: // int 16 + { + int16_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); + } + + case 0xD2: // int 32 + { + int32_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); + } + + case 0xD3: // int 64 + { + int64_t number; + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); + } + + case 0xD9: // str 8 + case 0xDA: // str 16 + case 0xDB: // str 32 + { + string_t s; + return get_msgpack_string(s) and sax->string(s); + } + + case 0xDC: // array 16 + { + uint16_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); + } + + case 0xDD: // array 32 + { + uint32_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); + } + + case 0xDE: // map 16 + { + uint16_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); + } + + case 0xDF: // map 32 + { + uint32_t len; + return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); + } + + // negative fixint + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + case 0xF0: + case 0xF1: + case 0xF2: + case 0xF3: + case 0xF4: + case 0xF5: + case 0xF6: + case 0xF7: + case 0xF8: + case 0xF9: + case 0xFA: + case 0xFB: + case 0xFC: + case 0xFD: + case 0xFE: + case 0xFF: + return sax->number_integer(static_cast<int8_t>(current)); + + default: // anything else + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"))); + } + } + } + + /*! + @brief reads a MessagePack string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_msgpack_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string"))) + { + return false; + } + + switch (current) + { + // fixstr + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + case 0xA4: + case 0xA5: + case 0xA6: + case 0xA7: + case 0xA8: + case 0xA9: + case 0xAA: + case 0xAB: + case 0xAC: + case 0xAD: + case 0xAE: + case 0xAF: + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + { + return get_string(input_format_t::msgpack, current & 0x1F, result); + } + + case 0xD9: // str 8 + { + uint8_t len; + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); + } + + case 0xDA: // str 16 + { + uint16_t len; + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); + } + + case 0xDB: // str 32 + { + uint32_t len; + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"))); + } + } + } + + /*! + @param[in] len the length of the array + @return whether array creation completed + */ + bool get_msgpack_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_msgpack_internal())) + { + return false; + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object + @return whether object creation completed + */ + bool get_msgpack_object(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_object(len))) + { + return false; + } + + string_t key; + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_msgpack_internal())) + { + return false; + } + key.clear(); + } + + return sax->end_object(); + } + + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + + /*! + @brief reads a UBJSON string + + This function is either called after reading the 'S' byte explicitly + indicating a string, or in case of an object key where the 'S' byte can be + left out. + + @param[out] result created string + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether string creation completed + */ + bool get_ubjson_string(string_t& result, const bool get_char = true) + { + if (get_char) + { + get(); // TODO: may we ignore N here? + } + + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) + { + return false; + } + + switch (current) + { + case 'U': + { + uint8_t len; + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); + } + + case 'i': + { + int8_t len; + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); + } + + case 'I': + { + int16_t len; + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); + } + + case 'l': + { + int32_t len; + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); + } + + case 'L': + { + int64_t len; + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); + } + + default: + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"))); + } + } + + /*! + @param[out] result determined size + @return whether size determination completed + */ + bool get_ubjson_size_value(std::size_t& result) + { + switch (get_ignore_noop()) + { + case 'U': + { + uint8_t number; + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) + { + return false; + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'i': + { + int8_t number; + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) + { + return false; + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'I': + { + int16_t number; + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) + { + return false; + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'l': + { + int32_t number; + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) + { + return false; + } + result = static_cast<std::size_t>(number); + return true; + } + + case 'L': + { + int64_t number; + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) + { + return false; + } + result = static_cast<std::size_t>(number); + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"))); + } + } + } + + /*! + @brief determine the type and size for a container + + In the optimized UBJSON format, a type and a size can be provided to allow + for a more compact representation. + + @param[out] result pair of the size and the type + + @return whether pair creation completed + */ + bool get_ubjson_size_type(std::pair<std::size_t, int>& result) + { + result.first = string_t::npos; // size + result.second = 0; // type + + get_ignore_noop(); + + if (current == '$') + { + result.second = get(); // must not ignore 'N', because 'N' maybe the type + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type"))) + { + return false; + } + + get_ignore_noop(); + if (JSON_UNLIKELY(current != '#')) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) + { + return false; + } + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"))); + } + + return get_ubjson_size_value(result.first); + } + else if (current == '#') + { + return get_ubjson_size_value(result.first); + } + return true; + } + + /*! + @param prefix the previously read or set type prefix + @return whether value creation completed + */ + bool get_ubjson_value(const int prefix) + { + switch (prefix) + { + case std::char_traits<char>::eof(): // EOF + return unexpect_eof(input_format_t::ubjson, "value"); + + case 'T': // true + return sax->boolean(true); + case 'F': // false + return sax->boolean(false); + + case 'Z': // null + return sax->null(); + + case 'U': + { + uint8_t number; + return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number); + } + + case 'i': + { + int8_t number; + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); + } + + case 'I': + { + int16_t number; + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); + } + + case 'l': + { + int32_t number; + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); + } + + case 'L': + { + int64_t number; + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); + } + + case 'd': + { + float number; + return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 'D': + { + double number; + return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 'C': // char + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char"))) + { + return false; + } + if (JSON_UNLIKELY(current > 127)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"))); + } + string_t s(1, static_cast<char>(current)); + return sax->string(s); + } + + case 'S': // string + { + string_t s; + return get_ubjson_string(s) and sax->string(s); + } + + case '[': // array + return get_ubjson_array(); + + case '{': // object + return get_ubjson_object(); + + default: // anything else + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"))); + } + } + } + + /*! + @return whether array creation completed + */ + bool get_ubjson_array() + { + std::pair<std::size_t, int> size_and_type; + if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) + { + return false; + } + + if (size_and_type.first != string_t::npos) + { + if (JSON_UNLIKELY(not sax->start_array(size_and_type.first))) + { + return false; + } + + if (size_and_type.second != 0) + { + if (size_and_type.second != 'N') + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) + { + return false; + } + } + } + } + else + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_UNLIKELY(not parse_ubjson_internal())) + { + return false; + } + } + } + } + else + { + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) + { + return false; + } + + while (current != ']') + { + if (JSON_UNLIKELY(not parse_ubjson_internal(false))) + { + return false; + } + get_ignore_noop(); + } + } + + return sax->end_array(); + } + + /*! + @return whether object creation completed + */ + bool get_ubjson_object() + { + std::pair<std::size_t, int> size_and_type; + if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) + { + return false; + } + + string_t key; + if (size_and_type.first != string_t::npos) + { + if (JSON_UNLIKELY(not sax->start_object(size_and_type.first))) + { + return false; + } + + if (size_and_type.second != 0) + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) + { + return false; + } + if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) + { + return false; + } + key.clear(); + } + } + else + { + for (std::size_t i = 0; i < size_and_type.first; ++i) + { + if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) + { + return false; + } + if (JSON_UNLIKELY(not parse_ubjson_internal())) + { + return false; + } + key.clear(); + } + } + } + else + { + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + while (current != '}') + { + if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) + { + return false; + } + if (JSON_UNLIKELY(not parse_ubjson_internal())) + { + return false; + } + get_ignore_noop(); + key.clear(); + } + } + + return sax->end_object(); + } + + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits<char>::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template<typename NumberType, bool InputIsLittleEndian = false> + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array<uint8_t, sizeof(NumberType)> vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current); + } + else + { + vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template<typename NumberType> + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast<char>(current); + }); + return success; + } + + /*! + @param[in] format the current format (for diagnostics) + @param[in] context further context information (for diagnostics) + @return whether the last read character is not EOF + */ + bool unexpect_eof(const input_format_t format, const char* context) const + { + if (JSON_UNLIKELY(current == std::char_traits<char>::eof())) + { + return sax->parse_error(chars_read, "<end of file>", + parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context))); + } + return true; + } + + /*! + @return a string representation of the last read byte + */ + std::string get_token_string() const + { + char cr[3]; + (std::snprintf)(cr, 3, "%.2hhX", static_cast<unsigned char>(current)); + return std::string{cr}; + } + + /*! + @param[in] format the current format + @param[in] detail a detailed error message + @param[in] context further contect information + @return a message string to use in the parse_error exceptions + */ + std::string exception_message(const input_format_t format, + const std::string& detail, + const std::string& context) const + { + std::string error_msg = "syntax error while parsing "; + + switch (format) + { + case input_format_t::cbor: + error_msg += "CBOR"; + break; + + case input_format_t::msgpack: + error_msg += "MessagePack"; + break; + + case input_format_t::ubjson: + error_msg += "UBJSON"; + break; + + case input_format_t::bson: + error_msg += "BSON"; + break; + + // LCOV_EXCL_START + default: + assert(false); + // LCOV_EXCL_STOP + } + + return error_msg + " " + context + ": " + detail; + } + + private: + /// input adapter + input_adapter_t ia = nullptr; + + /// the current character + int current = std::char_traits<char>::eof(); + + /// the number of characters read + std::size_t chars_read = 0; + + /// whether we can assume little endianess + const bool is_little_endian = little_endianess(); + + /// the SAX parser + json_sax_t* sax = nullptr; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/output/binary_writer.hpp> + + +#include <algorithm> // reverse +#include <array> // array +#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t +#include <cstring> // memcpy +#include <limits> // numeric_limits + +// #include <nlohmann/detail/input/binary_reader.hpp> + +// #include <nlohmann/detail/output/output_adapters.hpp> + + +namespace nlohmann +{ +namespace detail +{ +/////////////////// +// binary writer // +/////////////////// + +/*! +@brief serialization to CBOR and MessagePack values +*/ +template<typename BasicJsonType, typename CharType> +class binary_writer +{ + using string_t = typename BasicJsonType::string_t; + + public: + /*! + @brief create a binary writer + + @param[in] adapter output adapter to write to + */ + explicit binary_writer(output_adapter_t<CharType> adapter) : oa(adapter) + { + assert(oa); + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + } + } + } + + /*! + @param[in] j JSON value to serialize + */ + void write_cbor(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: + { + oa->write_character(to_char_type(0xF6)); + break; + } + + case value_t::boolean: + { + oa->write_character(j.m_value.boolean + ? to_char_type(0xF5) + : to_char_type(0xF4)); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // CBOR does not differentiate between positive signed + // integers and unsigned integers. Therefore, we used the + // code from the value_t::number_unsigned case here. + if (j.m_value.number_integer <= 0x17) + { + write_number(static_cast<uint8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)()) + { + oa->write_character(to_char_type(0x18)); + write_number(static_cast<uint8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits<uint16_t>::max)()) + { + oa->write_character(to_char_type(0x19)); + write_number(static_cast<uint16_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_integer <= (std::numeric_limits<uint32_t>::max)()) + { + oa->write_character(to_char_type(0x1A)); + write_number(static_cast<uint32_t>(j.m_value.number_integer)); + } + else + { + oa->write_character(to_char_type(0x1B)); + write_number(static_cast<uint64_t>(j.m_value.number_integer)); + } + } + else + { + // The conversions below encode the sign in the first + // byte, and the value is converted to a positive number. + const auto positive_number = -1 - j.m_value.number_integer; + if (j.m_value.number_integer >= -24) + { + write_number(static_cast<uint8_t>(0x20 + positive_number)); + } + else if (positive_number <= (std::numeric_limits<uint8_t>::max)()) + { + oa->write_character(to_char_type(0x38)); + write_number(static_cast<uint8_t>(positive_number)); + } + else if (positive_number <= (std::numeric_limits<uint16_t>::max)()) + { + oa->write_character(to_char_type(0x39)); + write_number(static_cast<uint16_t>(positive_number)); + } + else if (positive_number <= (std::numeric_limits<uint32_t>::max)()) + { + oa->write_character(to_char_type(0x3A)); + write_number(static_cast<uint32_t>(positive_number)); + } + else + { + oa->write_character(to_char_type(0x3B)); + write_number(static_cast<uint64_t>(positive_number)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned <= 0x17) + { + write_number(static_cast<uint8_t>(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) + { + oa->write_character(to_char_type(0x18)); + write_number(static_cast<uint8_t>(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)()) + { + oa->write_character(to_char_type(0x19)); + write_number(static_cast<uint16_t>(j.m_value.number_unsigned)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)()) + { + oa->write_character(to_char_type(0x1A)); + write_number(static_cast<uint32_t>(j.m_value.number_unsigned)); + } + else + { + oa->write_character(to_char_type(0x1B)); + write_number(static_cast<uint64_t>(j.m_value.number_unsigned)); + } + break; + } + + case value_t::number_float: + { + oa->write_character(get_cbor_float_prefix(j.m_value.number_float)); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + // step 1: write control byte and the string length + const auto N = j.m_value.string->size(); + if (N <= 0x17) + { + write_number(static_cast<uint8_t>(0x60 + N)); + } + else if (N <= (std::numeric_limits<uint8_t>::max)()) + { + oa->write_character(to_char_type(0x78)); + write_number(static_cast<uint8_t>(N)); + } + else if (N <= (std::numeric_limits<uint16_t>::max)()) + { + oa->write_character(to_char_type(0x79)); + write_number(static_cast<uint16_t>(N)); + } + else if (N <= (std::numeric_limits<uint32_t>::max)()) + { + oa->write_character(to_char_type(0x7A)); + write_number(static_cast<uint32_t>(N)); + } + // LCOV_EXCL_START + else if (N <= (std::numeric_limits<uint64_t>::max)()) + { + oa->write_character(to_char_type(0x7B)); + write_number(static_cast<uint64_t>(N)); + } + // LCOV_EXCL_STOP + + // step 2: write the string + oa->write_characters( + reinterpret_cast<const CharType*>(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + // step 1: write control byte and the array size + const auto N = j.m_value.array->size(); + if (N <= 0x17) + { + write_number(static_cast<uint8_t>(0x80 + N)); + } + else if (N <= (std::numeric_limits<uint8_t>::max)()) + { + oa->write_character(to_char_type(0x98)); + write_number(static_cast<uint8_t>(N)); + } + else if (N <= (std::numeric_limits<uint16_t>::max)()) + { + oa->write_character(to_char_type(0x99)); + write_number(static_cast<uint16_t>(N)); + } + else if (N <= (std::numeric_limits<uint32_t>::max)()) + { + oa->write_character(to_char_type(0x9A)); + write_number(static_cast<uint32_t>(N)); + } + // LCOV_EXCL_START + else if (N <= (std::numeric_limits<uint64_t>::max)()) + { + oa->write_character(to_char_type(0x9B)); + write_number(static_cast<uint64_t>(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + for (const auto& el : *j.m_value.array) + { + write_cbor(el); + } + break; + } + + case value_t::object: + { + // step 1: write control byte and the object size + const auto N = j.m_value.object->size(); + if (N <= 0x17) + { + write_number(static_cast<uint8_t>(0xA0 + N)); + } + else if (N <= (std::numeric_limits<uint8_t>::max)()) + { + oa->write_character(to_char_type(0xB8)); + write_number(static_cast<uint8_t>(N)); + } + else if (N <= (std::numeric_limits<uint16_t>::max)()) + { + oa->write_character(to_char_type(0xB9)); + write_number(static_cast<uint16_t>(N)); + } + else if (N <= (std::numeric_limits<uint32_t>::max)()) + { + oa->write_character(to_char_type(0xBA)); + write_number(static_cast<uint32_t>(N)); + } + // LCOV_EXCL_START + else if (N <= (std::numeric_limits<uint64_t>::max)()) + { + oa->write_character(to_char_type(0xBB)); + write_number(static_cast<uint64_t>(N)); + } + // LCOV_EXCL_STOP + + // step 2: write each element + for (const auto& el : *j.m_value.object) + { + write_cbor(el.first); + write_cbor(el.second); + } + break; + } + + default: + break; + } + } + + /*! + @param[in] j JSON value to serialize + */ + void write_msgpack(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::null: // nil + { + oa->write_character(to_char_type(0xC0)); + break; + } + + case value_t::boolean: // true and false + { + oa->write_character(j.m_value.boolean + ? to_char_type(0xC3) + : to_char_type(0xC2)); + break; + } + + case value_t::number_integer: + { + if (j.m_value.number_integer >= 0) + { + // MessagePack does not differentiate between positive + // signed integers and unsigned integers. Therefore, we used + // the code from the value_t::number_unsigned case here. + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast<uint8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) + { + // uint 8 + oa->write_character(to_char_type(0xCC)); + write_number(static_cast<uint8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)()) + { + // uint 16 + oa->write_character(to_char_type(0xCD)); + write_number(static_cast<uint16_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)()) + { + // uint 32 + oa->write_character(to_char_type(0xCE)); + write_number(static_cast<uint32_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint64_t>::max)()) + { + // uint 64 + oa->write_character(to_char_type(0xCF)); + write_number(static_cast<uint64_t>(j.m_value.number_integer)); + } + } + else + { + if (j.m_value.number_integer >= -32) + { + // negative fixnum + write_number(static_cast<int8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits<int8_t>::min)() and + j.m_value.number_integer <= (std::numeric_limits<int8_t>::max)()) + { + // int 8 + oa->write_character(to_char_type(0xD0)); + write_number(static_cast<int8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits<int16_t>::min)() and + j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)()) + { + // int 16 + oa->write_character(to_char_type(0xD1)); + write_number(static_cast<int16_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits<int32_t>::min)() and + j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)()) + { + // int 32 + oa->write_character(to_char_type(0xD2)); + write_number(static_cast<int32_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_integer >= (std::numeric_limits<int64_t>::min)() and + j.m_value.number_integer <= (std::numeric_limits<int64_t>::max)()) + { + // int 64 + oa->write_character(to_char_type(0xD3)); + write_number(static_cast<int64_t>(j.m_value.number_integer)); + } + } + break; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned < 128) + { + // positive fixnum + write_number(static_cast<uint8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) + { + // uint 8 + oa->write_character(to_char_type(0xCC)); + write_number(static_cast<uint8_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint16_t>::max)()) + { + // uint 16 + oa->write_character(to_char_type(0xCD)); + write_number(static_cast<uint16_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint32_t>::max)()) + { + // uint 32 + oa->write_character(to_char_type(0xCE)); + write_number(static_cast<uint32_t>(j.m_value.number_integer)); + } + else if (j.m_value.number_unsigned <= (std::numeric_limits<uint64_t>::max)()) + { + // uint 64 + oa->write_character(to_char_type(0xCF)); + write_number(static_cast<uint64_t>(j.m_value.number_integer)); + } + break; + } + + case value_t::number_float: + { + oa->write_character(get_msgpack_float_prefix(j.m_value.number_float)); + write_number(j.m_value.number_float); + break; + } + + case value_t::string: + { + // step 1: write control byte and the string length + const auto N = j.m_value.string->size(); + if (N <= 31) + { + // fixstr + write_number(static_cast<uint8_t>(0xA0 | N)); + } + else if (N <= (std::numeric_limits<uint8_t>::max)()) + { + // str 8 + oa->write_character(to_char_type(0xD9)); + write_number(static_cast<uint8_t>(N)); + } + else if (N <= (std::numeric_limits<uint16_t>::max)()) + { + // str 16 + oa->write_character(to_char_type(0xDA)); + write_number(static_cast<uint16_t>(N)); + } + else if (N <= (std::numeric_limits<uint32_t>::max)()) + { + // str 32 + oa->write_character(to_char_type(0xDB)); + write_number(static_cast<uint32_t>(N)); + } + + // step 2: write the string + oa->write_characters( + reinterpret_cast<const CharType*>(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + // step 1: write control byte and the array size + const auto N = j.m_value.array->size(); + if (N <= 15) + { + // fixarray + write_number(static_cast<uint8_t>(0x90 | N)); + } + else if (N <= (std::numeric_limits<uint16_t>::max)()) + { + // array 16 + oa->write_character(to_char_type(0xDC)); + write_number(static_cast<uint16_t>(N)); + } + else if (N <= (std::numeric_limits<uint32_t>::max)()) + { + // array 32 + oa->write_character(to_char_type(0xDD)); + write_number(static_cast<uint32_t>(N)); + } + + // step 2: write each element + for (const auto& el : *j.m_value.array) + { + write_msgpack(el); + } + break; + } + + case value_t::object: + { + // step 1: write control byte and the object size + const auto N = j.m_value.object->size(); + if (N <= 15) + { + // fixmap + write_number(static_cast<uint8_t>(0x80 | (N & 0xF))); + } + else if (N <= (std::numeric_limits<uint16_t>::max)()) + { + // map 16 + oa->write_character(to_char_type(0xDE)); + write_number(static_cast<uint16_t>(N)); + } + else if (N <= (std::numeric_limits<uint32_t>::max)()) + { + // map 32 + oa->write_character(to_char_type(0xDF)); + write_number(static_cast<uint32_t>(N)); + } + + // step 2: write each element + for (const auto& el : *j.m_value.object) + { + write_msgpack(el.first); + write_msgpack(el.second); + } + break; + } + + default: + break; + } + } + + /*! + @param[in] j JSON value to serialize + @param[in] use_count whether to use '#' prefixes (optimized format) + @param[in] use_type whether to use '$' prefixes (optimized format) + @param[in] add_prefix whether prefixes need to be used for this value + */ + void write_ubjson(const BasicJsonType& j, const bool use_count, + const bool use_type, const bool add_prefix = true) + { + switch (j.type()) + { + case value_t::null: + { + if (add_prefix) + { + oa->write_character(to_char_type('Z')); + } + break; + } + + case value_t::boolean: + { + if (add_prefix) + { + oa->write_character(j.m_value.boolean + ? to_char_type('T') + : to_char_type('F')); + } + break; + } + + case value_t::number_integer: + { + write_number_with_ubjson_prefix(j.m_value.number_integer, add_prefix); + break; + } + + case value_t::number_unsigned: + { + write_number_with_ubjson_prefix(j.m_value.number_unsigned, add_prefix); + break; + } + + case value_t::number_float: + { + write_number_with_ubjson_prefix(j.m_value.number_float, add_prefix); + break; + } + + case value_t::string: + { + if (add_prefix) + { + oa->write_character(to_char_type('S')); + } + write_number_with_ubjson_prefix(j.m_value.string->size(), true); + oa->write_characters( + reinterpret_cast<const CharType*>(j.m_value.string->c_str()), + j.m_value.string->size()); + break; + } + + case value_t::array: + { + if (add_prefix) + { + oa->write_character(to_char_type('[')); + } + + bool prefix_required = true; + if (use_type and not j.m_value.array->empty()) + { + assert(use_count); + const CharType first_prefix = ubjson_prefix(j.front()); + const bool same_prefix = std::all_of(j.begin() + 1, j.end(), + [this, first_prefix](const BasicJsonType & v) + { + return ubjson_prefix(v) == first_prefix; + }); + + if (same_prefix) + { + prefix_required = false; + oa->write_character(to_char_type('$')); + oa->write_character(first_prefix); + } + } + + if (use_count) + { + oa->write_character(to_char_type('#')); + write_number_with_ubjson_prefix(j.m_value.array->size(), true); + } + + for (const auto& el : *j.m_value.array) + { + write_ubjson(el, use_count, use_type, prefix_required); + } + + if (not use_count) + { + oa->write_character(to_char_type(']')); + } + + break; + } + + case value_t::object: + { + if (add_prefix) + { + oa->write_character(to_char_type('{')); + } + + bool prefix_required = true; + if (use_type and not j.m_value.object->empty()) + { + assert(use_count); + const CharType first_prefix = ubjson_prefix(j.front()); + const bool same_prefix = std::all_of(j.begin(), j.end(), + [this, first_prefix](const BasicJsonType & v) + { + return ubjson_prefix(v) == first_prefix; + }); + + if (same_prefix) + { + prefix_required = false; + oa->write_character(to_char_type('$')); + oa->write_character(first_prefix); + } + } + + if (use_count) + { + oa->write_character(to_char_type('#')); + write_number_with_ubjson_prefix(j.m_value.object->size(), true); + } + + for (const auto& el : *j.m_value.object) + { + write_number_with_ubjson_prefix(el.first.size(), true); + oa->write_characters( + reinterpret_cast<const CharType*>(el.first.c_str()), + el.first.size()); + write_ubjson(el.second, use_count, use_type, prefix_required); + } + + if (not use_count) + { + oa->write_character(to_char_type('}')); + } + + break; + } + + default: + break; + } + } + + private: + ////////// + // BSON // + ////////// + + /*! + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). + */ + static std::size_t calc_bson_entry_header_size(const string_t& name) + { + const auto it = name.find(static_cast<typename string_t::value_type>(0)); + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) + { + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); + } + + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; + } + + /*! + @brief Writes the given @a element_type and @a name to the output adapter + */ + void write_bson_entry_header(const string_t& name, + const std::uint8_t element_type) + { + oa->write_character(to_char_type(element_type)); // boolean + oa->write_characters( + reinterpret_cast<const CharType*>(name.c_str()), + name.size() + 1u); + } + + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const string_t& name, + const bool value) + { + write_bson_entry_header(name, 0x08); + oa->write_character(value ? to_char_type(0x01) : to_char_type(0x00)); + } + + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const string_t& name, + const double value) + { + write_bson_entry_header(name, 0x01); + write_number<double, true>(value); + } + + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const string_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const string_t& name, + const string_t& value) + { + write_bson_entry_header(name, 0x02); + + write_number<std::int32_t, true>(static_cast<std::int32_t>(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast<const CharType*>(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)()) + { + return sizeof(std::int32_t); + } + else + { + return sizeof(std::int64_t); + } + } + + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const string_t& name, + const std::int64_t value) + { + if ((std::numeric_limits<std::int32_t>::min)() <= value and value <= (std::numeric_limits<std::int32_t>::max)()) + { + write_bson_entry_header(name, 0x10); // int32 + write_number<std::int32_t, true>(static_cast<std::int32_t>(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number<std::int64_t, true>(static_cast<std::int64_t>(value)); + } + } + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static constexpr std::size_t calc_bson_unsigned_size(const std::uint64_t value) noexcept + { + return (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)())) + ? sizeof(std::int32_t) + : sizeof(std::int64_t); + } + + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const string_t& name, + const std::uint64_t value) + { + if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int32_t>::max)())) + { + write_bson_entry_header(name, 0x10 /* int32 */); + write_number<std::int32_t, true>(static_cast<std::int32_t>(value)); + } + else if (value <= static_cast<std::uint64_t>((std::numeric_limits<std::int64_t>::max)())) + { + write_bson_entry_header(name, 0x12 /* int64 */); + write_number<std::int64_t, true>(static_cast<std::int64_t>(value)); + } + else + { + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(value) + " cannot be represented by BSON as it does not fit int64")); + } + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const string_t& name, + const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const string_t& name, + const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_array_size(value))); + + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + write_bson_element(std::to_string(array_index++), el); + } + + oa->write_character(to_char_type(0x00)); + } + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const string_t& name, + const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry + */ + void write_bson_element(const string_t& name, + const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + + // LCOV_EXCL_START + default: + assert(false); + return; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = std::accumulate(value.begin(), value.end(), 0ul, + [](size_t result, const typename BasicJsonType::object_t::value_type & el) + { + return result += calc_bson_element_size(el.first, el.second); + }); + + return sizeof(std::int32_t) + document_size + 1ul; + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const typename BasicJsonType::object_t& value) + { + write_number<std::int32_t, true>(static_cast<std::int32_t>(calc_bson_object_size(value))); + + for (const auto& el : value) + { + write_bson_element(el.first, el.second); + } + + oa->write_character(to_char_type(0x00)); + } + + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) + { + return to_char_type(0xFA); // Single-Precision Float + } + + static constexpr CharType get_cbor_float_prefix(double /*unused*/) + { + return to_char_type(0xFB); // Double-Precision Float + } + + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return to_char_type(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return to_char_type(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + + // UBJSON: write number (floating point) + template<typename NumberType, typename std::enable_if< + std::is_floating_point<NumberType>::value, int>::type = 0> + void write_number_with_ubjson_prefix(const NumberType n, + const bool add_prefix) + { + if (add_prefix) + { + oa->write_character(get_ubjson_float_prefix(n)); + } + write_number(n); + } + + // UBJSON: write number (unsigned integer) + template<typename NumberType, typename std::enable_if< + std::is_unsigned<NumberType>::value, int>::type = 0> + void write_number_with_ubjson_prefix(const NumberType n, + const bool add_prefix) + { + if (n <= static_cast<uint64_t>((std::numeric_limits<int8_t>::max)())) + { + if (add_prefix) + { + oa->write_character(to_char_type('i')); // int8 + } + write_number(static_cast<uint8_t>(n)); + } + else if (n <= (std::numeric_limits<uint8_t>::max)()) + { + if (add_prefix) + { + oa->write_character(to_char_type('U')); // uint8 + } + write_number(static_cast<uint8_t>(n)); + } + else if (n <= static_cast<uint64_t>((std::numeric_limits<int16_t>::max)())) + { + if (add_prefix) + { + oa->write_character(to_char_type('I')); // int16 + } + write_number(static_cast<int16_t>(n)); + } + else if (n <= static_cast<uint64_t>((std::numeric_limits<int32_t>::max)())) + { + if (add_prefix) + { + oa->write_character(to_char_type('l')); // int32 + } + write_number(static_cast<int32_t>(n)); + } + else if (n <= static_cast<uint64_t>((std::numeric_limits<int64_t>::max)())) + { + if (add_prefix) + { + oa->write_character(to_char_type('L')); // int64 + } + write_number(static_cast<int64_t>(n)); + } + else + { + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64")); + } + } + + // UBJSON: write number (signed integer) + template<typename NumberType, typename std::enable_if< + std::is_signed<NumberType>::value and + not std::is_floating_point<NumberType>::value, int>::type = 0> + void write_number_with_ubjson_prefix(const NumberType n, + const bool add_prefix) + { + if ((std::numeric_limits<int8_t>::min)() <= n and n <= (std::numeric_limits<int8_t>::max)()) + { + if (add_prefix) + { + oa->write_character(to_char_type('i')); // int8 + } + write_number(static_cast<int8_t>(n)); + } + else if (static_cast<int64_t>((std::numeric_limits<uint8_t>::min)()) <= n and n <= static_cast<int64_t>((std::numeric_limits<uint8_t>::max)())) + { + if (add_prefix) + { + oa->write_character(to_char_type('U')); // uint8 + } + write_number(static_cast<uint8_t>(n)); + } + else if ((std::numeric_limits<int16_t>::min)() <= n and n <= (std::numeric_limits<int16_t>::max)()) + { + if (add_prefix) + { + oa->write_character(to_char_type('I')); // int16 + } + write_number(static_cast<int16_t>(n)); + } + else if ((std::numeric_limits<int32_t>::min)() <= n and n <= (std::numeric_limits<int32_t>::max)()) + { + if (add_prefix) + { + oa->write_character(to_char_type('l')); // int32 + } + write_number(static_cast<int32_t>(n)); + } + else if ((std::numeric_limits<int64_t>::min)() <= n and n <= (std::numeric_limits<int64_t>::max)()) + { + if (add_prefix) + { + oa->write_character(to_char_type('L')); // int64 + } + write_number(static_cast<int64_t>(n)); + } + // LCOV_EXCL_START + else + { + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64")); + } + // LCOV_EXCL_STOP + } + + /*! + @brief determine the type prefix of container values + + @note This function does not need to be 100% accurate when it comes to + integer limits. In case a number exceeds the limits of int64_t, + this will be detected by a later call to function + write_number_with_ubjson_prefix. Therefore, we return 'L' for any + value that does not fit the previous limits. + */ + CharType ubjson_prefix(const BasicJsonType& j) const noexcept + { + switch (j.type()) + { + case value_t::null: + return 'Z'; + + case value_t::boolean: + return j.m_value.boolean ? 'T' : 'F'; + + case value_t::number_integer: + { + if ((std::numeric_limits<int8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int8_t>::max)()) + { + return 'i'; + } + if ((std::numeric_limits<uint8_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<uint8_t>::max)()) + { + return 'U'; + } + if ((std::numeric_limits<int16_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int16_t>::max)()) + { + return 'I'; + } + if ((std::numeric_limits<int32_t>::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits<int32_t>::max)()) + { + return 'l'; + } + // no check and assume int64_t (see note above) + return 'L'; + } + + case value_t::number_unsigned: + { + if (j.m_value.number_unsigned <= (std::numeric_limits<int8_t>::max)()) + { + return 'i'; + } + if (j.m_value.number_unsigned <= (std::numeric_limits<uint8_t>::max)()) + { + return 'U'; + } + if (j.m_value.number_unsigned <= (std::numeric_limits<int16_t>::max)()) + { + return 'I'; + } + if (j.m_value.number_unsigned <= (std::numeric_limits<int32_t>::max)()) + { + return 'l'; + } + // no check and assume int64_t (see note above) + return 'L'; + } + + case value_t::number_float: + return get_ubjson_float_prefix(j.m_value.number_float); + + case value_t::string: + return 'S'; + + case value_t::array: + return '['; + + case value_t::object: + return '{'; + + default: // discarded values + return 'N'; + } + } + + static constexpr CharType get_ubjson_float_prefix(float /*unused*/) + { + return 'd'; // float 32 + } + + static constexpr CharType get_ubjson_float_prefix(double /*unused*/) + { + return 'D'; // float 64 + } + + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template<typename NumberType, bool OutputIsLittleEndian = false> + void write_number(const NumberType n) + { + // step 1: write number to array of length NumberType + std::array<CharType, sizeof(NumberType)> vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); + } + + public: + // The following to_char_type functions are implement the conversion + // between uint8_t and CharType. In case CharType is not unsigned, + // such a conversion is required to allow values greater than 128. + // See <https://github.com/nlohmann/json/issues/1286> for a discussion. + template < typename C = CharType, + enable_if_t < std::is_signed<C>::value and std::is_signed<char>::value > * = nullptr > + static constexpr CharType to_char_type(std::uint8_t x) noexcept + { + return *reinterpret_cast<char*>(&x); + } + + template < typename C = CharType, + enable_if_t < std::is_signed<C>::value and std::is_unsigned<char>::value > * = nullptr > + static CharType to_char_type(std::uint8_t x) noexcept + { + static_assert(sizeof(std::uint8_t) == sizeof(CharType), "size of CharType must be equal to std::uint8_t"); + static_assert(std::is_pod<CharType>::value, "CharType must be POD"); + CharType result; + std::memcpy(&result, &x, sizeof(x)); + return result; + } + + template<typename C = CharType, + enable_if_t<std::is_unsigned<C>::value>* = nullptr> + static constexpr CharType to_char_type(std::uint8_t x) noexcept + { + return x; + } + + template < typename InputCharType, typename C = CharType, + enable_if_t < + std::is_signed<C>::value and + std::is_signed<char>::value and + std::is_same<char, typename std::remove_cv<InputCharType>::type>::value + > * = nullptr > + static constexpr CharType to_char_type(InputCharType x) noexcept + { + return x; + } + + private: + /// whether we can assume little endianess + const bool is_little_endian = binary_reader<BasicJsonType>::little_endianess(); + + /// the output + output_adapter_t<CharType> oa = nullptr; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/output/serializer.hpp> + + +#include <algorithm> // reverse, remove, fill, find, none_of +#include <array> // array +#include <cassert> // assert +#include <ciso646> // and, or +#include <clocale> // localeconv, lconv +#include <cmath> // labs, isfinite, isnan, signbit +#include <cstddef> // size_t, ptrdiff_t +#include <cstdint> // uint8_t +#include <cstdio> // snprintf +#include <limits> // numeric_limits +#include <string> // string +#include <type_traits> // is_same + +// #include <nlohmann/detail/exceptions.hpp> + +// #include <nlohmann/detail/conversions/to_chars.hpp> + + +#include <cassert> // assert +#include <ciso646> // or, and, not +#include <cmath> // signbit, isfinite +#include <cstdint> // intN_t, uintN_t +#include <cstring> // memcpy, memmove + +namespace nlohmann +{ +namespace detail +{ + +/*! +@brief implements the Grisu2 algorithm for binary to decimal floating-point +conversion. + +This implementation is a slightly modified version of the reference +implementation which may be obtained from +http://florian.loitsch.com/publications (bench.tar.gz). + +The code is distributed under the MIT license, Copyright (c) 2009 Florian Loitsch. + +For a detailed description of the algorithm see: + +[1] Loitsch, "Printing Floating-Point Numbers Quickly and Accurately with + Integers", Proceedings of the ACM SIGPLAN 2010 Conference on Programming + Language Design and Implementation, PLDI 2010 +[2] Burger, Dybvig, "Printing Floating-Point Numbers Quickly and Accurately", + Proceedings of the ACM SIGPLAN 1996 Conference on Programming Language + Design and Implementation, PLDI 1996 +*/ +namespace dtoa_impl +{ + +template <typename Target, typename Source> +Target reinterpret_bits(const Source source) +{ + static_assert(sizeof(Target) == sizeof(Source), "size mismatch"); + + Target target; + std::memcpy(&target, &source, sizeof(Source)); + return target; +} + +struct diyfp // f * 2^e +{ + static constexpr int kPrecision = 64; // = q + + uint64_t f = 0; + int e = 0; + + constexpr diyfp(uint64_t f_, int e_) noexcept : f(f_), e(e_) {} + + /*! + @brief returns x - y + @pre x.e == y.e and x.f >= y.f + */ + static diyfp sub(const diyfp& x, const diyfp& y) noexcept + { + assert(x.e == y.e); + assert(x.f >= y.f); + + return {x.f - y.f, x.e}; + } + + /*! + @brief returns x * y + @note The result is rounded. (Only the upper q bits are returned.) + */ + static diyfp mul(const diyfp& x, const diyfp& y) noexcept + { + static_assert(kPrecision == 64, "internal error"); + + // Computes: + // f = round((x.f * y.f) / 2^q) + // e = x.e + y.e + q + + // Emulate the 64-bit * 64-bit multiplication: + // + // p = u * v + // = (u_lo + 2^32 u_hi) (v_lo + 2^32 v_hi) + // = (u_lo v_lo ) + 2^32 ((u_lo v_hi ) + (u_hi v_lo )) + 2^64 (u_hi v_hi ) + // = (p0 ) + 2^32 ((p1 ) + (p2 )) + 2^64 (p3 ) + // = (p0_lo + 2^32 p0_hi) + 2^32 ((p1_lo + 2^32 p1_hi) + (p2_lo + 2^32 p2_hi)) + 2^64 (p3 ) + // = (p0_lo ) + 2^32 (p0_hi + p1_lo + p2_lo ) + 2^64 (p1_hi + p2_hi + p3) + // = (p0_lo ) + 2^32 (Q ) + 2^64 (H ) + // = (p0_lo ) + 2^32 (Q_lo + 2^32 Q_hi ) + 2^64 (H ) + // + // (Since Q might be larger than 2^32 - 1) + // + // = (p0_lo + 2^32 Q_lo) + 2^64 (Q_hi + H) + // + // (Q_hi + H does not overflow a 64-bit int) + // + // = p_lo + 2^64 p_hi + + const uint64_t u_lo = x.f & 0xFFFFFFFF; + const uint64_t u_hi = x.f >> 32; + const uint64_t v_lo = y.f & 0xFFFFFFFF; + const uint64_t v_hi = y.f >> 32; + + const uint64_t p0 = u_lo * v_lo; + const uint64_t p1 = u_lo * v_hi; + const uint64_t p2 = u_hi * v_lo; + const uint64_t p3 = u_hi * v_hi; + + const uint64_t p0_hi = p0 >> 32; + const uint64_t p1_lo = p1 & 0xFFFFFFFF; + const uint64_t p1_hi = p1 >> 32; + const uint64_t p2_lo = p2 & 0xFFFFFFFF; + const uint64_t p2_hi = p2 >> 32; + + uint64_t Q = p0_hi + p1_lo + p2_lo; + + // The full product might now be computed as + // + // p_hi = p3 + p2_hi + p1_hi + (Q >> 32) + // p_lo = p0_lo + (Q << 32) + // + // But in this particular case here, the full p_lo is not required. + // Effectively we only need to add the highest bit in p_lo to p_hi (and + // Q_hi + 1 does not overflow). + + Q += uint64_t{1} << (64 - 32 - 1); // round, ties up + + const uint64_t h = p3 + p2_hi + p1_hi + (Q >> 32); + + return {h, x.e + y.e + 64}; + } + + /*! + @brief normalize x such that the significand is >= 2^(q-1) + @pre x.f != 0 + */ + static diyfp normalize(diyfp x) noexcept + { + assert(x.f != 0); + + while ((x.f >> 63) == 0) + { + x.f <<= 1; + x.e--; + } + + return x; + } + + /*! + @brief normalize x such that the result has the exponent E + @pre e >= x.e and the upper e - x.e bits of x.f must be zero. + */ + static diyfp normalize_to(const diyfp& x, const int target_exponent) noexcept + { + const int delta = x.e - target_exponent; + + assert(delta >= 0); + assert(((x.f << delta) >> delta) == x.f); + + return {x.f << delta, target_exponent}; + } +}; + +struct boundaries +{ + diyfp w; + diyfp minus; + diyfp plus; +}; + +/*! +Compute the (normalized) diyfp representing the input number 'value' and its +boundaries. + +@pre value must be finite and positive +*/ +template <typename FloatType> +boundaries compute_boundaries(FloatType value) +{ + assert(std::isfinite(value)); + assert(value > 0); + + // Convert the IEEE representation into a diyfp. + // + // If v is denormal: + // value = 0.F * 2^(1 - bias) = ( F) * 2^(1 - bias - (p-1)) + // If v is normalized: + // value = 1.F * 2^(E - bias) = (2^(p-1) + F) * 2^(E - bias - (p-1)) + + static_assert(std::numeric_limits<FloatType>::is_iec559, + "internal error: dtoa_short requires an IEEE-754 floating-point implementation"); + + constexpr int kPrecision = std::numeric_limits<FloatType>::digits; // = p (includes the hidden bit) + constexpr int kBias = std::numeric_limits<FloatType>::max_exponent - 1 + (kPrecision - 1); + constexpr int kMinExp = 1 - kBias; + constexpr uint64_t kHiddenBit = uint64_t{1} << (kPrecision - 1); // = 2^(p-1) + + using bits_type = typename std::conditional< kPrecision == 24, uint32_t, uint64_t >::type; + + const uint64_t bits = reinterpret_bits<bits_type>(value); + const uint64_t E = bits >> (kPrecision - 1); + const uint64_t F = bits & (kHiddenBit - 1); + + const bool is_denormal = (E == 0); + const diyfp v = is_denormal + ? diyfp(F, kMinExp) + : diyfp(F + kHiddenBit, static_cast<int>(E) - kBias); + + // Compute the boundaries m- and m+ of the floating-point value + // v = f * 2^e. + // + // Determine v- and v+, the floating-point predecessor and successor if v, + // respectively. + // + // v- = v - 2^e if f != 2^(p-1) or e == e_min (A) + // = v - 2^(e-1) if f == 2^(p-1) and e > e_min (B) + // + // v+ = v + 2^e + // + // Let m- = (v- + v) / 2 and m+ = (v + v+) / 2. All real numbers _strictly_ + // between m- and m+ round to v, regardless of how the input rounding + // algorithm breaks ties. + // + // ---+-------------+-------------+-------------+-------------+--- (A) + // v- m- v m+ v+ + // + // -----------------+------+------+-------------+-------------+--- (B) + // v- m- v m+ v+ + + const bool lower_boundary_is_closer = (F == 0 and E > 1); + const diyfp m_plus = diyfp(2 * v.f + 1, v.e - 1); + const diyfp m_minus = lower_boundary_is_closer + ? diyfp(4 * v.f - 1, v.e - 2) // (B) + : diyfp(2 * v.f - 1, v.e - 1); // (A) + + // Determine the normalized w+ = m+. + const diyfp w_plus = diyfp::normalize(m_plus); + + // Determine w- = m- such that e_(w-) = e_(w+). + const diyfp w_minus = diyfp::normalize_to(m_minus, w_plus.e); + + return {diyfp::normalize(v), w_minus, w_plus}; +} + +// Given normalized diyfp w, Grisu needs to find a (normalized) cached +// power-of-ten c, such that the exponent of the product c * w = f * 2^e lies +// within a certain range [alpha, gamma] (Definition 3.2 from [1]) +// +// alpha <= e = e_c + e_w + q <= gamma +// +// or +// +// f_c * f_w * 2^alpha <= f_c 2^(e_c) * f_w 2^(e_w) * 2^q +// <= f_c * f_w * 2^gamma +// +// Since c and w are normalized, i.e. 2^(q-1) <= f < 2^q, this implies +// +// 2^(q-1) * 2^(q-1) * 2^alpha <= c * w * 2^q < 2^q * 2^q * 2^gamma +// +// or +// +// 2^(q - 2 + alpha) <= c * w < 2^(q + gamma) +// +// The choice of (alpha,gamma) determines the size of the table and the form of +// the digit generation procedure. Using (alpha,gamma)=(-60,-32) works out well +// in practice: +// +// The idea is to cut the number c * w = f * 2^e into two parts, which can be +// processed independently: An integral part p1, and a fractional part p2: +// +// f * 2^e = ( (f div 2^-e) * 2^-e + (f mod 2^-e) ) * 2^e +// = (f div 2^-e) + (f mod 2^-e) * 2^e +// = p1 + p2 * 2^e +// +// The conversion of p1 into decimal form requires a series of divisions and +// modulos by (a power of) 10. These operations are faster for 32-bit than for +// 64-bit integers, so p1 should ideally fit into a 32-bit integer. This can be +// achieved by choosing +// +// -e >= 32 or e <= -32 := gamma +// +// In order to convert the fractional part +// +// p2 * 2^e = p2 / 2^-e = d[-1] / 10^1 + d[-2] / 10^2 + ... +// +// into decimal form, the fraction is repeatedly multiplied by 10 and the digits +// d[-i] are extracted in order: +// +// (10 * p2) div 2^-e = d[-1] +// (10 * p2) mod 2^-e = d[-2] / 10^1 + ... +// +// The multiplication by 10 must not overflow. It is sufficient to choose +// +// 10 * p2 < 16 * p2 = 2^4 * p2 <= 2^64. +// +// Since p2 = f mod 2^-e < 2^-e, +// +// -e <= 60 or e >= -60 := alpha + +constexpr int kAlpha = -60; +constexpr int kGamma = -32; + +struct cached_power // c = f * 2^e ~= 10^k +{ + uint64_t f; + int e; + int k; +}; + +/*! +For a normalized diyfp w = f * 2^e, this function returns a (normalized) cached +power-of-ten c = f_c * 2^e_c, such that the exponent of the product w * c +satisfies (Definition 3.2 from [1]) + + alpha <= e_c + e + q <= gamma. +*/ +inline cached_power get_cached_power_for_binary_exponent(int e) +{ + // Now + // + // alpha <= e_c + e + q <= gamma (1) + // ==> f_c * 2^alpha <= c * 2^e * 2^q + // + // and since the c's are normalized, 2^(q-1) <= f_c, + // + // ==> 2^(q - 1 + alpha) <= c * 2^(e + q) + // ==> 2^(alpha - e - 1) <= c + // + // If c were an exakt power of ten, i.e. c = 10^k, one may determine k as + // + // k = ceil( log_10( 2^(alpha - e - 1) ) ) + // = ceil( (alpha - e - 1) * log_10(2) ) + // + // From the paper: + // "In theory the result of the procedure could be wrong since c is rounded, + // and the computation itself is approximated [...]. In practice, however, + // this simple function is sufficient." + // + // For IEEE double precision floating-point numbers converted into + // normalized diyfp's w = f * 2^e, with q = 64, + // + // e >= -1022 (min IEEE exponent) + // -52 (p - 1) + // -52 (p - 1, possibly normalize denormal IEEE numbers) + // -11 (normalize the diyfp) + // = -1137 + // + // and + // + // e <= +1023 (max IEEE exponent) + // -52 (p - 1) + // -11 (normalize the diyfp) + // = 960 + // + // This binary exponent range [-1137,960] results in a decimal exponent + // range [-307,324]. One does not need to store a cached power for each + // k in this range. For each such k it suffices to find a cached power + // such that the exponent of the product lies in [alpha,gamma]. + // This implies that the difference of the decimal exponents of adjacent + // table entries must be less than or equal to + // + // floor( (gamma - alpha) * log_10(2) ) = 8. + // + // (A smaller distance gamma-alpha would require a larger table.) + + // NB: + // Actually this function returns c, such that -60 <= e_c + e + 64 <= -34. + + constexpr int kCachedPowersSize = 79; + constexpr int kCachedPowersMinDecExp = -300; + constexpr int kCachedPowersDecStep = 8; + + static constexpr cached_power kCachedPowers[] = + { + { 0xAB70FE17C79AC6CA, -1060, -300 }, + { 0xFF77B1FCBEBCDC4F, -1034, -292 }, + { 0xBE5691EF416BD60C, -1007, -284 }, + { 0x8DD01FAD907FFC3C, -980, -276 }, + { 0xD3515C2831559A83, -954, -268 }, + { 0x9D71AC8FADA6C9B5, -927, -260 }, + { 0xEA9C227723EE8BCB, -901, -252 }, + { 0xAECC49914078536D, -874, -244 }, + { 0x823C12795DB6CE57, -847, -236 }, + { 0xC21094364DFB5637, -821, -228 }, + { 0x9096EA6F3848984F, -794, -220 }, + { 0xD77485CB25823AC7, -768, -212 }, + { 0xA086CFCD97BF97F4, -741, -204 }, + { 0xEF340A98172AACE5, -715, -196 }, + { 0xB23867FB2A35B28E, -688, -188 }, + { 0x84C8D4DFD2C63F3B, -661, -180 }, + { 0xC5DD44271AD3CDBA, -635, -172 }, + { 0x936B9FCEBB25C996, -608, -164 }, + { 0xDBAC6C247D62A584, -582, -156 }, + { 0xA3AB66580D5FDAF6, -555, -148 }, + { 0xF3E2F893DEC3F126, -529, -140 }, + { 0xB5B5ADA8AAFF80B8, -502, -132 }, + { 0x87625F056C7C4A8B, -475, -124 }, + { 0xC9BCFF6034C13053, -449, -116 }, + { 0x964E858C91BA2655, -422, -108 }, + { 0xDFF9772470297EBD, -396, -100 }, + { 0xA6DFBD9FB8E5B88F, -369, -92 }, + { 0xF8A95FCF88747D94, -343, -84 }, + { 0xB94470938FA89BCF, -316, -76 }, + { 0x8A08F0F8BF0F156B, -289, -68 }, + { 0xCDB02555653131B6, -263, -60 }, + { 0x993FE2C6D07B7FAC, -236, -52 }, + { 0xE45C10C42A2B3B06, -210, -44 }, + { 0xAA242499697392D3, -183, -36 }, + { 0xFD87B5F28300CA0E, -157, -28 }, + { 0xBCE5086492111AEB, -130, -20 }, + { 0x8CBCCC096F5088CC, -103, -12 }, + { 0xD1B71758E219652C, -77, -4 }, + { 0x9C40000000000000, -50, 4 }, + { 0xE8D4A51000000000, -24, 12 }, + { 0xAD78EBC5AC620000, 3, 20 }, + { 0x813F3978F8940984, 30, 28 }, + { 0xC097CE7BC90715B3, 56, 36 }, + { 0x8F7E32CE7BEA5C70, 83, 44 }, + { 0xD5D238A4ABE98068, 109, 52 }, + { 0x9F4F2726179A2245, 136, 60 }, + { 0xED63A231D4C4FB27, 162, 68 }, + { 0xB0DE65388CC8ADA8, 189, 76 }, + { 0x83C7088E1AAB65DB, 216, 84 }, + { 0xC45D1DF942711D9A, 242, 92 }, + { 0x924D692CA61BE758, 269, 100 }, + { 0xDA01EE641A708DEA, 295, 108 }, + { 0xA26DA3999AEF774A, 322, 116 }, + { 0xF209787BB47D6B85, 348, 124 }, + { 0xB454E4A179DD1877, 375, 132 }, + { 0x865B86925B9BC5C2, 402, 140 }, + { 0xC83553C5C8965D3D, 428, 148 }, + { 0x952AB45CFA97A0B3, 455, 156 }, + { 0xDE469FBD99A05FE3, 481, 164 }, + { 0xA59BC234DB398C25, 508, 172 }, + { 0xF6C69A72A3989F5C, 534, 180 }, + { 0xB7DCBF5354E9BECE, 561, 188 }, + { 0x88FCF317F22241E2, 588, 196 }, + { 0xCC20CE9BD35C78A5, 614, 204 }, + { 0x98165AF37B2153DF, 641, 212 }, + { 0xE2A0B5DC971F303A, 667, 220 }, + { 0xA8D9D1535CE3B396, 694, 228 }, + { 0xFB9B7CD9A4A7443C, 720, 236 }, + { 0xBB764C4CA7A44410, 747, 244 }, + { 0x8BAB8EEFB6409C1A, 774, 252 }, + { 0xD01FEF10A657842C, 800, 260 }, + { 0x9B10A4E5E9913129, 827, 268 }, + { 0xE7109BFBA19C0C9D, 853, 276 }, + { 0xAC2820D9623BF429, 880, 284 }, + { 0x80444B5E7AA7CF85, 907, 292 }, + { 0xBF21E44003ACDD2D, 933, 300 }, + { 0x8E679C2F5E44FF8F, 960, 308 }, + { 0xD433179D9C8CB841, 986, 316 }, + { 0x9E19DB92B4E31BA9, 1013, 324 }, + }; + + // This computation gives exactly the same results for k as + // k = ceil((kAlpha - e - 1) * 0.30102999566398114) + // for |e| <= 1500, but doesn't require floating-point operations. + // NB: log_10(2) ~= 78913 / 2^18 + assert(e >= -1500); + assert(e <= 1500); + const int f = kAlpha - e - 1; + const int k = (f * 78913) / (1 << 18) + static_cast<int>(f > 0); + + const int index = (-kCachedPowersMinDecExp + k + (kCachedPowersDecStep - 1)) / kCachedPowersDecStep; + assert(index >= 0); + assert(index < kCachedPowersSize); + static_cast<void>(kCachedPowersSize); // Fix warning. + + const cached_power cached = kCachedPowers[index]; + assert(kAlpha <= cached.e + e + 64); + assert(kGamma >= cached.e + e + 64); + + return cached; +} + +/*! +For n != 0, returns k, such that pow10 := 10^(k-1) <= n < 10^k. +For n == 0, returns 1 and sets pow10 := 1. +*/ +inline int find_largest_pow10(const uint32_t n, uint32_t& pow10) +{ + // LCOV_EXCL_START + if (n >= 1000000000) + { + pow10 = 1000000000; + return 10; + } + // LCOV_EXCL_STOP + else if (n >= 100000000) + { + pow10 = 100000000; + return 9; + } + else if (n >= 10000000) + { + pow10 = 10000000; + return 8; + } + else if (n >= 1000000) + { + pow10 = 1000000; + return 7; + } + else if (n >= 100000) + { + pow10 = 100000; + return 6; + } + else if (n >= 10000) + { + pow10 = 10000; + return 5; + } + else if (n >= 1000) + { + pow10 = 1000; + return 4; + } + else if (n >= 100) + { + pow10 = 100; + return 3; + } + else if (n >= 10) + { + pow10 = 10; + return 2; + } + else + { + pow10 = 1; + return 1; + } +} + +inline void grisu2_round(char* buf, int len, uint64_t dist, uint64_t delta, + uint64_t rest, uint64_t ten_k) +{ + assert(len >= 1); + assert(dist <= delta); + assert(rest <= delta); + assert(ten_k > 0); + + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // ten_k + // <------> + // <---- rest ----> + // --------------[------------------+----+--------------]-------------- + // w V + // = buf * 10^k + // + // ten_k represents a unit-in-the-last-place in the decimal representation + // stored in buf. + // Decrement buf by ten_k while this takes buf closer to w. + + // The tests are written in this order to avoid overflow in unsigned + // integer arithmetic. + + while (rest < dist + and delta - rest >= ten_k + and (rest + ten_k < dist or dist - rest > rest + ten_k - dist)) + { + assert(buf[len - 1] != '0'); + buf[len - 1]--; + rest += ten_k; + } +} + +/*! +Generates V = buffer * 10^decimal_exponent, such that M- <= V <= M+. +M- and M+ must be normalized and share the same exponent -60 <= e <= -32. +*/ +inline void grisu2_digit_gen(char* buffer, int& length, int& decimal_exponent, + diyfp M_minus, diyfp w, diyfp M_plus) +{ + static_assert(kAlpha >= -60, "internal error"); + static_assert(kGamma <= -32, "internal error"); + + // Generates the digits (and the exponent) of a decimal floating-point + // number V = buffer * 10^decimal_exponent in the range [M-, M+]. The diyfp's + // w, M- and M+ share the same exponent e, which satisfies alpha <= e <= gamma. + // + // <--------------------------- delta ----> + // <---- dist ---------> + // --------------[------------------+-------------------]-------------- + // M- w M+ + // + // Grisu2 generates the digits of M+ from left to right and stops as soon as + // V is in [M-,M+]. + + assert(M_plus.e >= kAlpha); + assert(M_plus.e <= kGamma); + + uint64_t delta = diyfp::sub(M_plus, M_minus).f; // (significand of (M+ - M-), implicit exponent is e) + uint64_t dist = diyfp::sub(M_plus, w ).f; // (significand of (M+ - w ), implicit exponent is e) + + // Split M+ = f * 2^e into two parts p1 and p2 (note: e < 0): + // + // M+ = f * 2^e + // = ((f div 2^-e) * 2^-e + (f mod 2^-e)) * 2^e + // = ((p1 ) * 2^-e + (p2 )) * 2^e + // = p1 + p2 * 2^e + + const diyfp one(uint64_t{1} << -M_plus.e, M_plus.e); + + auto p1 = static_cast<uint32_t>(M_plus.f >> -one.e); // p1 = f div 2^-e (Since -e >= 32, p1 fits into a 32-bit int.) + uint64_t p2 = M_plus.f & (one.f - 1); // p2 = f mod 2^-e + + // 1) + // + // Generate the digits of the integral part p1 = d[n-1]...d[1]d[0] + + assert(p1 > 0); + + uint32_t pow10; + const int k = find_largest_pow10(p1, pow10); + + // 10^(k-1) <= p1 < 10^k, pow10 = 10^(k-1) + // + // p1 = (p1 div 10^(k-1)) * 10^(k-1) + (p1 mod 10^(k-1)) + // = (d[k-1] ) * 10^(k-1) + (p1 mod 10^(k-1)) + // + // M+ = p1 + p2 * 2^e + // = d[k-1] * 10^(k-1) + (p1 mod 10^(k-1)) + p2 * 2^e + // = d[k-1] * 10^(k-1) + ((p1 mod 10^(k-1)) * 2^-e + p2) * 2^e + // = d[k-1] * 10^(k-1) + ( rest) * 2^e + // + // Now generate the digits d[n] of p1 from left to right (n = k-1,...,0) + // + // p1 = d[k-1]...d[n] * 10^n + d[n-1]...d[0] + // + // but stop as soon as + // + // rest * 2^e = (d[n-1]...d[0] * 2^-e + p2) * 2^e <= delta * 2^e + + int n = k; + while (n > 0) + { + // Invariants: + // M+ = buffer * 10^n + (p1 + p2 * 2^e) (buffer = 0 for n = k) + // pow10 = 10^(n-1) <= p1 < 10^n + // + const uint32_t d = p1 / pow10; // d = p1 div 10^(n-1) + const uint32_t r = p1 % pow10; // r = p1 mod 10^(n-1) + // + // M+ = buffer * 10^n + (d * 10^(n-1) + r) + p2 * 2^e + // = (buffer * 10 + d) * 10^(n-1) + (r + p2 * 2^e) + // + assert(d <= 9); + buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(n-1) + (r + p2 * 2^e) + // + p1 = r; + n--; + // + // M+ = buffer * 10^n + (p1 + p2 * 2^e) + // pow10 = 10^n + // + + // Now check if enough digits have been generated. + // Compute + // + // p1 + p2 * 2^e = (p1 * 2^-e + p2) * 2^e = rest * 2^e + // + // Note: + // Since rest and delta share the same exponent e, it suffices to + // compare the significands. + const uint64_t rest = (uint64_t{p1} << -one.e) + p2; + if (rest <= delta) + { + // V = buffer * 10^n, with M- <= V <= M+. + + decimal_exponent += n; + + // We may now just stop. But instead look if the buffer could be + // decremented to bring V closer to w. + // + // pow10 = 10^n is now 1 ulp in the decimal representation V. + // The rounding procedure works with diyfp's with an implicit + // exponent of e. + // + // 10^n = (10^n * 2^-e) * 2^e = ulp * 2^e + // + const uint64_t ten_n = uint64_t{pow10} << -one.e; + grisu2_round(buffer, length, dist, delta, rest, ten_n); + + return; + } + + pow10 /= 10; + // + // pow10 = 10^(n-1) <= p1 < 10^n + // Invariants restored. + } + + // 2) + // + // The digits of the integral part have been generated: + // + // M+ = d[k-1]...d[1]d[0] + p2 * 2^e + // = buffer + p2 * 2^e + // + // Now generate the digits of the fractional part p2 * 2^e. + // + // Note: + // No decimal point is generated: the exponent is adjusted instead. + // + // p2 actually represents the fraction + // + // p2 * 2^e + // = p2 / 2^-e + // = d[-1] / 10^1 + d[-2] / 10^2 + ... + // + // Now generate the digits d[-m] of p1 from left to right (m = 1,2,...) + // + // p2 * 2^e = d[-1]d[-2]...d[-m] * 10^-m + // + 10^-m * (d[-m-1] / 10^1 + d[-m-2] / 10^2 + ...) + // + // using + // + // 10^m * p2 = ((10^m * p2) div 2^-e) * 2^-e + ((10^m * p2) mod 2^-e) + // = ( d) * 2^-e + ( r) + // + // or + // 10^m * p2 * 2^e = d + r * 2^e + // + // i.e. + // + // M+ = buffer + p2 * 2^e + // = buffer + 10^-m * (d + r * 2^e) + // = (buffer * 10^m + d) * 10^-m + 10^-m * r * 2^e + // + // and stop as soon as 10^-m * r * 2^e <= delta * 2^e + + assert(p2 > delta); + + int m = 0; + for (;;) + { + // Invariant: + // M+ = buffer * 10^-m + 10^-m * (d[-m-1] / 10 + d[-m-2] / 10^2 + ...) * 2^e + // = buffer * 10^-m + 10^-m * (p2 ) * 2^e + // = buffer * 10^-m + 10^-m * (1/10 * (10 * p2) ) * 2^e + // = buffer * 10^-m + 10^-m * (1/10 * ((10*p2 div 2^-e) * 2^-e + (10*p2 mod 2^-e)) * 2^e + // + assert(p2 <= UINT64_MAX / 10); + p2 *= 10; + const uint64_t d = p2 >> -one.e; // d = (10 * p2) div 2^-e + const uint64_t r = p2 & (one.f - 1); // r = (10 * p2) mod 2^-e + // + // M+ = buffer * 10^-m + 10^-m * (1/10 * (d * 2^-e + r) * 2^e + // = buffer * 10^-m + 10^-m * (1/10 * (d + r * 2^e)) + // = (buffer * 10 + d) * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + assert(d <= 9); + buffer[length++] = static_cast<char>('0' + d); // buffer := buffer * 10 + d + // + // M+ = buffer * 10^(-m-1) + 10^(-m-1) * r * 2^e + // + p2 = r; + m++; + // + // M+ = buffer * 10^-m + 10^-m * p2 * 2^e + // Invariant restored. + + // Check if enough digits have been generated. + // + // 10^-m * p2 * 2^e <= delta * 2^e + // p2 * 2^e <= 10^m * delta * 2^e + // p2 <= 10^m * delta + delta *= 10; + dist *= 10; + if (p2 <= delta) + { + break; + } + } + + // V = buffer * 10^-m, with M- <= V <= M+. + + decimal_exponent -= m; + + // 1 ulp in the decimal representation is now 10^-m. + // Since delta and dist are now scaled by 10^m, we need to do the + // same with ulp in order to keep the units in sync. + // + // 10^m * 10^-m = 1 = 2^-e * 2^e = ten_m * 2^e + // + const uint64_t ten_m = one.f; + grisu2_round(buffer, length, dist, delta, p2, ten_m); + + // By construction this algorithm generates the shortest possible decimal + // number (Loitsch, Theorem 6.2) which rounds back to w. + // For an input number of precision p, at least + // + // N = 1 + ceil(p * log_10(2)) + // + // decimal digits are sufficient to identify all binary floating-point + // numbers (Matula, "In-and-Out conversions"). + // This implies that the algorithm does not produce more than N decimal + // digits. + // + // N = 17 for p = 53 (IEEE double precision) + // N = 9 for p = 24 (IEEE single precision) +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +inline void grisu2(char* buf, int& len, int& decimal_exponent, + diyfp m_minus, diyfp v, diyfp m_plus) +{ + assert(m_plus.e == m_minus.e); + assert(m_plus.e == v.e); + + // --------(-----------------------+-----------------------)-------- (A) + // m- v m+ + // + // --------------------(-----------+-----------------------)-------- (B) + // m- v m+ + // + // First scale v (and m- and m+) such that the exponent is in the range + // [alpha, gamma]. + + const cached_power cached = get_cached_power_for_binary_exponent(m_plus.e); + + const diyfp c_minus_k(cached.f, cached.e); // = c ~= 10^-k + + // The exponent of the products is = v.e + c_minus_k.e + q and is in the range [alpha,gamma] + const diyfp w = diyfp::mul(v, c_minus_k); + const diyfp w_minus = diyfp::mul(m_minus, c_minus_k); + const diyfp w_plus = diyfp::mul(m_plus, c_minus_k); + + // ----(---+---)---------------(---+---)---------------(---+---)---- + // w- w w+ + // = c*m- = c*v = c*m+ + // + // diyfp::mul rounds its result and c_minus_k is approximated too. w, w- and + // w+ are now off by a small amount. + // In fact: + // + // w - v * 10^k < 1 ulp + // + // To account for this inaccuracy, add resp. subtract 1 ulp. + // + // --------+---[---------------(---+---)---------------]---+-------- + // w- M- w M+ w+ + // + // Now any number in [M-, M+] (bounds included) will round to w when input, + // regardless of how the input rounding algorithm breaks ties. + // + // And digit_gen generates the shortest possible such number in [M-, M+]. + // Note that this does not mean that Grisu2 always generates the shortest + // possible number in the interval (m-, m+). + const diyfp M_minus(w_minus.f + 1, w_minus.e); + const diyfp M_plus (w_plus.f - 1, w_plus.e ); + + decimal_exponent = -cached.k; // = -(-k) = k + + grisu2_digit_gen(buf, len, decimal_exponent, M_minus, w, M_plus); +} + +/*! +v = buf * 10^decimal_exponent +len is the length of the buffer (number of decimal digits) +The buffer must be large enough, i.e. >= max_digits10. +*/ +template <typename FloatType> +void grisu2(char* buf, int& len, int& decimal_exponent, FloatType value) +{ + static_assert(diyfp::kPrecision >= std::numeric_limits<FloatType>::digits + 3, + "internal error: not enough precision"); + + assert(std::isfinite(value)); + assert(value > 0); + + // If the neighbors (and boundaries) of 'value' are always computed for double-precision + // numbers, all float's can be recovered using strtod (and strtof). However, the resulting + // decimal representations are not exactly "short". + // + // The documentation for 'std::to_chars' (https://en.cppreference.com/w/cpp/utility/to_chars) + // says "value is converted to a string as if by std::sprintf in the default ("C") locale" + // and since sprintf promotes float's to double's, I think this is exactly what 'std::to_chars' + // does. + // On the other hand, the documentation for 'std::to_chars' requires that "parsing the + // representation using the corresponding std::from_chars function recovers value exactly". That + // indicates that single precision floating-point numbers should be recovered using + // 'std::strtof'. + // + // NB: If the neighbors are computed for single-precision numbers, there is a single float + // (7.0385307e-26f) which can't be recovered using strtod. The resulting double precision + // value is off by 1 ulp. +#if 0 + const boundaries w = compute_boundaries(static_cast<double>(value)); +#else + const boundaries w = compute_boundaries(value); +#endif + + grisu2(buf, len, decimal_exponent, w.minus, w.w, w.plus); +} + +/*! +@brief appends a decimal representation of e to buf +@return a pointer to the element following the exponent. +@pre -1000 < e < 1000 +*/ +inline char* append_exponent(char* buf, int e) +{ + assert(e > -1000); + assert(e < 1000); + + if (e < 0) + { + e = -e; + *buf++ = '-'; + } + else + { + *buf++ = '+'; + } + + auto k = static_cast<uint32_t>(e); + if (k < 10) + { + // Always print at least two digits in the exponent. + // This is for compatibility with printf("%g"). + *buf++ = '0'; + *buf++ = static_cast<char>('0' + k); + } + else if (k < 100) + { + *buf++ = static_cast<char>('0' + k / 10); + k %= 10; + *buf++ = static_cast<char>('0' + k); + } + else + { + *buf++ = static_cast<char>('0' + k / 100); + k %= 100; + *buf++ = static_cast<char>('0' + k / 10); + k %= 10; + *buf++ = static_cast<char>('0' + k); + } + + return buf; +} + +/*! +@brief prettify v = buf * 10^decimal_exponent + +If v is in the range [10^min_exp, 10^max_exp) it will be printed in fixed-point +notation. Otherwise it will be printed in exponential notation. + +@pre min_exp < 0 +@pre max_exp > 0 +*/ +inline char* format_buffer(char* buf, int len, int decimal_exponent, + int min_exp, int max_exp) +{ + assert(min_exp < 0); + assert(max_exp > 0); + + const int k = len; + const int n = len + decimal_exponent; + + // v = buf * 10^(n-k) + // k is the length of the buffer (number of decimal digits) + // n is the position of the decimal point relative to the start of the buffer. + + if (k <= n and n <= max_exp) + { + // digits[000] + // len <= max_exp + 2 + + std::memset(buf + k, '0', static_cast<size_t>(n - k)); + // Make it look like a floating-point number (#362, #378) + buf[n + 0] = '.'; + buf[n + 1] = '0'; + return buf + (n + 2); + } + + if (0 < n and n <= max_exp) + { + // dig.its + // len <= max_digits10 + 1 + + assert(k > n); + + std::memmove(buf + (n + 1), buf + n, static_cast<size_t>(k - n)); + buf[n] = '.'; + return buf + (k + 1); + } + + if (min_exp < n and n <= 0) + { + // 0.[000]digits + // len <= 2 + (-min_exp - 1) + max_digits10 + + std::memmove(buf + (2 + -n), buf, static_cast<size_t>(k)); + buf[0] = '0'; + buf[1] = '.'; + std::memset(buf + 2, '0', static_cast<size_t>(-n)); + return buf + (2 + (-n) + k); + } + + if (k == 1) + { + // dE+123 + // len <= 1 + 5 + + buf += 1; + } + else + { + // d.igitsE+123 + // len <= max_digits10 + 1 + 5 + + std::memmove(buf + 2, buf + 1, static_cast<size_t>(k - 1)); + buf[1] = '.'; + buf += 1 + k; + } + + *buf++ = 'e'; + return append_exponent(buf, n - 1); +} + +} // namespace dtoa_impl + +/*! +@brief generates a decimal representation of the floating-point number value in [first, last). + +The format of the resulting decimal representation is similar to printf's %g +format. Returns an iterator pointing past-the-end of the decimal representation. + +@note The input number must be finite, i.e. NaN's and Inf's are not supported. +@note The buffer must be large enough. +@note The result is NOT null-terminated. +*/ +template <typename FloatType> +char* to_chars(char* first, const char* last, FloatType value) +{ + static_cast<void>(last); // maybe unused - fix warning + assert(std::isfinite(value)); + + // Use signbit(value) instead of (value < 0) since signbit works for -0. + if (std::signbit(value)) + { + value = -value; + *first++ = '-'; + } + + if (value == 0) // +-0 + { + *first++ = '0'; + // Make it look like a floating-point number (#362, #378) + *first++ = '.'; + *first++ = '0'; + return first; + } + + assert(last - first >= std::numeric_limits<FloatType>::max_digits10); + + // Compute v = buffer * 10^decimal_exponent. + // The decimal digits are stored in the buffer, which needs to be interpreted + // as an unsigned decimal integer. + // len is the length of the buffer, i.e. the number of decimal digits. + int len = 0; + int decimal_exponent = 0; + dtoa_impl::grisu2(first, len, decimal_exponent, value); + + assert(len <= std::numeric_limits<FloatType>::max_digits10); + + // Format the buffer like printf("%.*g", prec, value) + constexpr int kMinExp = -4; + // Use digits10 here to increase compatibility with version 2. + constexpr int kMaxExp = std::numeric_limits<FloatType>::digits10; + + assert(last - first >= kMaxExp + 2); + assert(last - first >= 2 + (-kMinExp - 1) + std::numeric_limits<FloatType>::max_digits10); + assert(last - first >= std::numeric_limits<FloatType>::max_digits10 + 6); + + return dtoa_impl::format_buffer(first, len, decimal_exponent, kMinExp, kMaxExp); +} + +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/macro_scope.hpp> + +// #include <nlohmann/detail/meta/cpp_future.hpp> + +// #include <nlohmann/detail/output/binary_writer.hpp> + +// #include <nlohmann/detail/output/output_adapters.hpp> + +// #include <nlohmann/detail/value_t.hpp> + + +namespace nlohmann +{ +namespace detail +{ +/////////////////// +// serialization // +/////////////////// + +/// how to treat decoding errors +enum class error_handler_t +{ + strict, ///< throw a type_error exception in case of invalid UTF-8 + replace, ///< replace invalid UTF-8 sequences with U+FFFD + ignore ///< ignore invalid UTF-8 sequences +}; + +template<typename BasicJsonType> +class serializer +{ + using string_t = typename BasicJsonType::string_t; + using number_float_t = typename BasicJsonType::number_float_t; + using number_integer_t = typename BasicJsonType::number_integer_t; + using number_unsigned_t = typename BasicJsonType::number_unsigned_t; + static constexpr uint8_t UTF8_ACCEPT = 0; + static constexpr uint8_t UTF8_REJECT = 1; + + public: + /*! + @param[in] s output stream to serialize to + @param[in] ichar indentation character to use + @param[in] error_handler_ how to react on decoding errors + */ + serializer(output_adapter_t<char> s, const char ichar, + error_handler_t error_handler_ = error_handler_t::strict) + : o(std::move(s)) + , loc(std::localeconv()) + , thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)) + , decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)) + , indent_char(ichar) + , indent_string(512, indent_char) + , error_handler(error_handler_) + {} + + // delete because of pointer members + serializer(const serializer&) = delete; + serializer& operator=(const serializer&) = delete; + serializer(serializer&&) = delete; + serializer& operator=(serializer&&) = delete; + ~serializer() = default; + + /*! + @brief internal implementation of the serialization function + + This function is called by the public member function dump and organizes + the serialization internally. The indentation level is propagated as + additional parameter. In case of arrays and objects, the function is + called recursively. + + - strings and object keys are escaped using `escape_string()` + - integer numbers are converted implicitly via `operator<<` + - floating-point numbers are converted to a string using `"%g"` format + + @param[in] val value to serialize + @param[in] pretty_print whether the output shall be pretty-printed + @param[in] indent_step the indent level + @param[in] current_indent the current indent level (only used internally) + */ + void dump(const BasicJsonType& val, const bool pretty_print, + const bool ensure_ascii, + const unsigned int indent_step, + const unsigned int current_indent = 0) + { + switch (val.m_type) + { + case value_t::object: + { + if (val.m_value.object->empty()) + { + o->write_characters("{}", 2); + return; + } + + if (pretty_print) + { + o->write_characters("{\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + if (JSON_UNLIKELY(indent_string.size() < new_indent)) + { + indent_string.resize(indent_string.size() * 2, ' '); + } + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\": ", 3); + dump(i->second, true, ensure_ascii, indent_step, new_indent); + o->write_characters(",\n", 2); + } + + // last element + assert(i != val.m_value.object->cend()); + assert(std::next(i) == val.m_value.object->cend()); + o->write_characters(indent_string.c_str(), new_indent); + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\": ", 3); + dump(i->second, true, ensure_ascii, indent_step, new_indent); + + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character('}'); + } + else + { + o->write_character('{'); + + // first n-1 elements + auto i = val.m_value.object->cbegin(); + for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i) + { + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\":", 2); + dump(i->second, false, ensure_ascii, indent_step, current_indent); + o->write_character(','); + } + + // last element + assert(i != val.m_value.object->cend()); + assert(std::next(i) == val.m_value.object->cend()); + o->write_character('\"'); + dump_escaped(i->first, ensure_ascii); + o->write_characters("\":", 2); + dump(i->second, false, ensure_ascii, indent_step, current_indent); + + o->write_character('}'); + } + + return; + } + + case value_t::array: + { + if (val.m_value.array->empty()) + { + o->write_characters("[]", 2); + return; + } + + if (pretty_print) + { + o->write_characters("[\n", 2); + + // variable to hold indentation for recursive calls + const auto new_indent = current_indent + indent_step; + if (JSON_UNLIKELY(indent_string.size() < new_indent)) + { + indent_string.resize(indent_string.size() * 2, ' '); + } + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); + i != val.m_value.array->cend() - 1; ++i) + { + o->write_characters(indent_string.c_str(), new_indent); + dump(*i, true, ensure_ascii, indent_step, new_indent); + o->write_characters(",\n", 2); + } + + // last element + assert(not val.m_value.array->empty()); + o->write_characters(indent_string.c_str(), new_indent); + dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent); + + o->write_character('\n'); + o->write_characters(indent_string.c_str(), current_indent); + o->write_character(']'); + } + else + { + o->write_character('['); + + // first n-1 elements + for (auto i = val.m_value.array->cbegin(); + i != val.m_value.array->cend() - 1; ++i) + { + dump(*i, false, ensure_ascii, indent_step, current_indent); + o->write_character(','); + } + + // last element + assert(not val.m_value.array->empty()); + dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent); + + o->write_character(']'); + } + + return; + } + + case value_t::string: + { + o->write_character('\"'); + dump_escaped(*val.m_value.string, ensure_ascii); + o->write_character('\"'); + return; + } + + case value_t::boolean: + { + if (val.m_value.boolean) + { + o->write_characters("true", 4); + } + else + { + o->write_characters("false", 5); + } + return; + } + + case value_t::number_integer: + { + dump_integer(val.m_value.number_integer); + return; + } + + case value_t::number_unsigned: + { + dump_integer(val.m_value.number_unsigned); + return; + } + + case value_t::number_float: + { + dump_float(val.m_value.number_float); + return; + } + + case value_t::discarded: + { + o->write_characters("<discarded>", 11); + return; + } + + case value_t::null: + { + o->write_characters("null", 4); + return; + } + } + } + + private: + /*! + @brief dump escaped string + + Escape a string by replacing certain special characters by a sequence of an + escape character (backslash) and another character and other control + characters by a sequence of "\u" followed by a four-digit hex + representation. The escaped string is written to output stream @a o. + + @param[in] s the string to escape + @param[in] ensure_ascii whether to escape non-ASCII characters with + \uXXXX sequences + + @complexity Linear in the length of string @a s. + */ + void dump_escaped(const string_t& s, const bool ensure_ascii) + { + uint32_t codepoint; + uint8_t state = UTF8_ACCEPT; + std::size_t bytes = 0; // number of bytes written to string_buffer + + // number of bytes written at the point of the last valid byte + std::size_t bytes_after_last_accept = 0; + std::size_t undumped_chars = 0; + + for (std::size_t i = 0; i < s.size(); ++i) + { + const auto byte = static_cast<uint8_t>(s[i]); + + switch (decode(state, codepoint, byte)) + { + case UTF8_ACCEPT: // decode found a new code point + { + switch (codepoint) + { + case 0x08: // backspace + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'b'; + break; + } + + case 0x09: // horizontal tab + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 't'; + break; + } + + case 0x0A: // newline + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'n'; + break; + } + + case 0x0C: // formfeed + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'f'; + break; + } + + case 0x0D: // carriage return + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'r'; + break; + } + + case 0x22: // quotation mark + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = '\"'; + break; + } + + case 0x5C: // reverse solidus + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = '\\'; + break; + } + + default: + { + // escape control characters (0x00..0x1F) or, if + // ensure_ascii parameter is used, non-ASCII characters + if ((codepoint <= 0x1F) or (ensure_ascii and (codepoint >= 0x7F))) + { + if (codepoint <= 0xFFFF) + { + (std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x", + static_cast<uint16_t>(codepoint)); + bytes += 6; + } + else + { + (std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x", + static_cast<uint16_t>(0xD7C0 + (codepoint >> 10)), + static_cast<uint16_t>(0xDC00 + (codepoint & 0x3FF))); + bytes += 12; + } + } + else + { + // copy byte to buffer (all previous bytes + // been copied have in default case above) + string_buffer[bytes++] = s[i]; + } + break; + } + } + + // write buffer and reset index; there must be 13 bytes + // left, as this is the maximal number of bytes to be + // written ("\uxxxx\uxxxx\0") for one code point + if (string_buffer.size() - bytes < 13) + { + o->write_characters(string_buffer.data(), bytes); + bytes = 0; + } + + // remember the byte position of this accept + bytes_after_last_accept = bytes; + undumped_chars = 0; + break; + } + + case UTF8_REJECT: // decode found invalid UTF-8 byte + { + switch (error_handler) + { + case error_handler_t::strict: + { + std::string sn(3, '\0'); + (std::snprintf)(&sn[0], sn.size(), "%.2X", byte); + JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn)); + } + + case error_handler_t::ignore: + case error_handler_t::replace: + { + // in case we saw this character the first time, we + // would like to read it again, because the byte + // may be OK for itself, but just not OK for the + // previous sequence + if (undumped_chars > 0) + { + --i; + } + + // reset length buffer to the last accepted index; + // thus removing/ignoring the invalid characters + bytes = bytes_after_last_accept; + + if (error_handler == error_handler_t::replace) + { + // add a replacement character + if (ensure_ascii) + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'u'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'd'; + } + else + { + string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF'); + string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF'); + string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD'); + } + bytes_after_last_accept = bytes; + } + + undumped_chars = 0; + + // continue processing the string + state = UTF8_ACCEPT; + break; + } + } + break; + } + + default: // decode found yet incomplete multi-byte code point + { + if (not ensure_ascii) + { + // code point will not be escaped - copy byte to buffer + string_buffer[bytes++] = s[i]; + } + ++undumped_chars; + break; + } + } + } + + // we finished processing the string + if (JSON_LIKELY(state == UTF8_ACCEPT)) + { + // write buffer + if (bytes > 0) + { + o->write_characters(string_buffer.data(), bytes); + } + } + else + { + // we finish reading, but do not accept: string was incomplete + switch (error_handler) + { + case error_handler_t::strict: + { + std::string sn(3, '\0'); + (std::snprintf)(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back())); + JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn)); + } + + case error_handler_t::ignore: + { + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes_after_last_accept); + break; + } + + case error_handler_t::replace: + { + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes_after_last_accept); + // add a replacement character + if (ensure_ascii) + { + o->write_characters("\\ufffd", 6); + } + else + { + o->write_characters("\xEF\xBF\xBD", 3); + } + break; + } + } + } + } + + /*! + @brief dump an integer + + Dump a given integer to output stream @a o. Works internally with + @a number_buffer. + + @param[in] x integer number (signed or unsigned) to dump + @tparam NumberType either @a number_integer_t or @a number_unsigned_t + */ + template<typename NumberType, detail::enable_if_t< + std::is_same<NumberType, number_unsigned_t>::value or + std::is_same<NumberType, number_integer_t>::value, + int> = 0> + void dump_integer(NumberType x) + { + // special case for "0" + if (x == 0) + { + o->write_character('0'); + return; + } + + const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not (x >= 0); // see issue #755 + std::size_t i = 0; + + while (x != 0) + { + // spare 1 byte for '\0' + assert(i < number_buffer.size() - 1); + + const auto digit = std::labs(static_cast<long>(x % 10)); + number_buffer[i++] = static_cast<char>('0' + digit); + x /= 10; + } + + if (is_negative) + { + // make sure there is capacity for the '-' + assert(i < number_buffer.size() - 2); + number_buffer[i++] = '-'; + } + + std::reverse(number_buffer.begin(), number_buffer.begin() + i); + o->write_characters(number_buffer.data(), i); + } + + /*! + @brief dump a floating-point number + + Dump a given floating-point number to output stream @a o. Works internally + with @a number_buffer. + + @param[in] x floating-point number to dump + */ + void dump_float(number_float_t x) + { + // NaN / inf + if (not std::isfinite(x)) + { + o->write_characters("null", 4); + return; + } + + // If number_float_t is an IEEE-754 single or double precision number, + // use the Grisu2 algorithm to produce short numbers which are + // guaranteed to round-trip, using strtof and strtod, resp. + // + // NB: The test below works if <long double> == <double>. + static constexpr bool is_ieee_single_or_double + = (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 24 and std::numeric_limits<number_float_t>::max_exponent == 128) or + (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 53 and std::numeric_limits<number_float_t>::max_exponent == 1024); + + dump_float(x, std::integral_constant<bool, is_ieee_single_or_double>()); + } + + void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_double*/) + { + char* begin = number_buffer.data(); + char* end = ::nlohmann::detail::to_chars(begin, begin + number_buffer.size(), x); + + o->write_characters(begin, static_cast<size_t>(end - begin)); + } + + void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_double*/) + { + // get number of digits for a float -> text -> float round-trip + static constexpr auto d = std::numeric_limits<number_float_t>::max_digits10; + + // the actual conversion + std::ptrdiff_t len = (std::snprintf)(number_buffer.data(), number_buffer.size(), "%.*g", d, x); + + // negative value indicates an error + assert(len > 0); + // check if buffer was large enough + assert(static_cast<std::size_t>(len) < number_buffer.size()); + + // erase thousands separator + if (thousands_sep != '\0') + { + const auto end = std::remove(number_buffer.begin(), + number_buffer.begin() + len, thousands_sep); + std::fill(end, number_buffer.end(), '\0'); + assert((end - number_buffer.begin()) <= len); + len = (end - number_buffer.begin()); + } + + // convert decimal point to '.' + if (decimal_point != '\0' and decimal_point != '.') + { + const auto dec_pos = std::find(number_buffer.begin(), number_buffer.end(), decimal_point); + if (dec_pos != number_buffer.end()) + { + *dec_pos = '.'; + } + } + + o->write_characters(number_buffer.data(), static_cast<std::size_t>(len)); + + // determine if need to append ".0" + const bool value_is_int_like = + std::none_of(number_buffer.begin(), number_buffer.begin() + len + 1, + [](char c) + { + return (c == '.' or c == 'e'); + }); + + if (value_is_int_like) + { + o->write_characters(".0", 2); + } + } + + /*! + @brief check whether a string is UTF-8 encoded + + The function checks each byte of a string whether it is UTF-8 encoded. The + result of the check is stored in the @a state parameter. The function must + be called initially with state 0 (accept). State 1 means the string must + be rejected, because the current byte is not allowed. If the string is + completely processed, but the state is non-zero, the string ended + prematurely; that is, the last byte indicated more bytes should have + followed. + + @param[in,out] state the state of the decoding + @param[in,out] codep codepoint (valid only if resulting state is UTF8_ACCEPT) + @param[in] byte next byte to decode + @return new state + + @note The function has been edited: a std::array is used. + + @copyright Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> + @sa http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ + */ + static uint8_t decode(uint8_t& state, uint32_t& codep, const uint8_t byte) noexcept + { + static const std::array<uint8_t, 400> utf8d = + { + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9F + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // A0..BF + 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..DF + 0xA, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // E0..EF + 0xB, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // F0..FF + 0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2 + 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4 + 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6 + 1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // s7..s8 + } + }; + + const uint8_t type = utf8d[byte]; + + codep = (state != UTF8_ACCEPT) + ? (byte & 0x3fu) | (codep << 6) + : static_cast<uint32_t>(0xff >> type) & (byte); + + state = utf8d[256u + state * 16u + type]; + return state; + } + + private: + /// the output of the serializer + output_adapter_t<char> o = nullptr; + + /// a (hopefully) large enough character buffer + std::array<char, 64> number_buffer{{}}; + + /// the locale + const std::lconv* loc = nullptr; + /// the locale's thousand separator character + const char thousands_sep = '\0'; + /// the locale's decimal point character + const char decimal_point = '\0'; + + /// string buffer + std::array<char, 512> string_buffer{{}}; + + /// the indentation character + const char indent_char; + /// the indentation string + string_t indent_string; + + /// error_handler how to react on decoding errors + const error_handler_t error_handler; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/json_ref.hpp> + + +#include <initializer_list> +#include <utility> + +// #include <nlohmann/detail/meta/type_traits.hpp> + + +namespace nlohmann +{ +namespace detail +{ +template<typename BasicJsonType> +class json_ref +{ + public: + using value_type = BasicJsonType; + + json_ref(value_type&& value) + : owned_value(std::move(value)), value_ref(&owned_value), is_rvalue(true) + {} + + json_ref(const value_type& value) + : value_ref(const_cast<value_type*>(&value)), is_rvalue(false) + {} + + json_ref(std::initializer_list<json_ref> init) + : owned_value(init), value_ref(&owned_value), is_rvalue(true) + {} + + template < + class... Args, + enable_if_t<std::is_constructible<value_type, Args...>::value, int> = 0 > + json_ref(Args && ... args) + : owned_value(std::forward<Args>(args)...), value_ref(&owned_value), + is_rvalue(true) {} + + // class should be movable only + json_ref(json_ref&&) = default; + json_ref(const json_ref&) = delete; + json_ref& operator=(const json_ref&) = delete; + json_ref& operator=(json_ref&&) = delete; + ~json_ref() = default; + + value_type moved_or_copied() const + { + if (is_rvalue) + { + return std::move(*value_ref); + } + return *value_ref; + } + + value_type const& operator*() const + { + return *static_cast<value_type const*>(value_ref); + } + + value_type const* operator->() const + { + return static_cast<value_type const*>(value_ref); + } + + private: + mutable value_type owned_value = nullptr; + value_type* value_ref = nullptr; + const bool is_rvalue; +}; +} // namespace detail +} // namespace nlohmann + +// #include <nlohmann/detail/json_pointer.hpp> + + +#include <cassert> // assert +#include <numeric> // accumulate +#include <string> // string +#include <vector> // vector + +// #include <nlohmann/detail/macro_scope.hpp> + +// #include <nlohmann/detail/exceptions.hpp> + +// #include <nlohmann/detail/value_t.hpp> + + +namespace nlohmann +{ +template<typename BasicJsonType> +class json_pointer +{ + // allow basic_json to access private members + NLOHMANN_BASIC_JSON_TPL_DECLARATION + friend class basic_json; + + public: + /*! + @brief create JSON pointer + + Create a JSON pointer according to the syntax described in + [Section 3 of RFC6901](https://tools.ietf.org/html/rfc6901#section-3). + + @param[in] s string representing the JSON pointer; if omitted, the empty + string is assumed which references the whole JSON value + + @throw parse_error.107 if the given JSON pointer @a s is nonempty and does + not begin with a slash (`/`); see example below + + @throw parse_error.108 if a tilde (`~`) in the given JSON pointer @a s is + not followed by `0` (representing `~`) or `1` (representing `/`); see + example below + + @liveexample{The example shows the construction several valid JSON pointers + as well as the exceptional behavior.,json_pointer} + + @since version 2.0.0 + */ + explicit json_pointer(const std::string& s = "") + : reference_tokens(split(s)) + {} + + /*! + @brief return a string representation of the JSON pointer + + @invariant For each JSON pointer `ptr`, it holds: + @code {.cpp} + ptr == json_pointer(ptr.to_string()); + @endcode + + @return a string representation of the JSON pointer + + @liveexample{The example shows the result of `to_string`., + json_pointer__to_string} + + @since version 2.0.0 + */ + std::string to_string() const + { + return std::accumulate(reference_tokens.begin(), reference_tokens.end(), + std::string{}, + [](const std::string & a, const std::string & b) + { + return a + "/" + escape(b); + }); + } + + /// @copydoc to_string() + operator std::string() const + { + return to_string(); + } + + /*! + @param[in] s reference token to be converted into an array index + + @return integer representation of @a s + + @throw out_of_range.404 if string @a s could not be converted to an integer + */ + static int array_index(const std::string& s) + { + std::size_t processed_chars = 0; + const int res = std::stoi(s, &processed_chars); + + // check if the string was completely read + if (JSON_UNLIKELY(processed_chars != s.size())) + { + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + s + "'")); + } + + return res; + } + + private: + /*! + @brief remove and return last reference pointer + @throw out_of_range.405 if JSON pointer has no parent + */ + std::string pop_back() + { + if (JSON_UNLIKELY(is_root())) + { + JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent")); + } + + auto last = reference_tokens.back(); + reference_tokens.pop_back(); + return last; + } + + /// return whether pointer points to the root document + bool is_root() const noexcept + { + return reference_tokens.empty(); + } + + json_pointer top() const + { + if (JSON_UNLIKELY(is_root())) + { + JSON_THROW(detail::out_of_range::create(405, "JSON pointer has no parent")); + } + + json_pointer result = *this; + result.reference_tokens = {reference_tokens[0]}; + return result; + } + + /*! + @brief create and return a reference to the pointed to value + + @complexity Linear in the number of reference tokens. + + @throw parse_error.109 if array index is not a number + @throw type_error.313 if value cannot be unflattened + */ + BasicJsonType& get_and_create(BasicJsonType& j) const + { + using size_type = typename BasicJsonType::size_type; + auto result = &j; + + // in case no reference tokens exist, return a reference to the JSON value + // j which will be overwritten by a primitive value + for (const auto& reference_token : reference_tokens) + { + switch (result->m_type) + { + case detail::value_t::null: + { + if (reference_token == "0") + { + // start a new array if reference token is 0 + result = &result->operator[](0); + } + else + { + // start a new object otherwise + result = &result->operator[](reference_token); + } + break; + } + + case detail::value_t::object: + { + // create an entry in the object + result = &result->operator[](reference_token); + break; + } + + case detail::value_t::array: + { + // create an entry in the array + JSON_TRY + { + result = &result->operator[](static_cast<size_type>(array_index(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + /* + The following code is only reached if there exists a reference + token _and_ the current value is primitive. In this case, we have + an error situation, because primitive values may only occur as + single value; that is, with an empty list of reference tokens. + */ + default: + JSON_THROW(detail::type_error::create(313, "invalid value to unflatten")); + } + } + + return *result; + } + + /*! + @brief return a reference to the pointed to value + + @note This version does not throw if a value is not present, but tries to + create nested values instead. For instance, calling this function + with pointer `"/this/that"` on a null value is equivalent to calling + `operator[]("this").operator[]("that")` on that value, effectively + changing the null value to an object. + + @param[in] ptr a JSON value + + @return reference to the JSON value pointed to by the JSON pointer + + @complexity Linear in the length of the JSON pointer. + + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + BasicJsonType& get_unchecked(BasicJsonType* ptr) const + { + using size_type = typename BasicJsonType::size_type; + for (const auto& reference_token : reference_tokens) + { + // convert null values to arrays or objects before continuing + if (ptr->m_type == detail::value_t::null) + { + // check if reference token is a number + const bool nums = + std::all_of(reference_token.begin(), reference_token.end(), + [](const char x) + { + return (x >= '0' and x <= '9'); + }); + + // change value to array for numbers or "-" or to object otherwise + *ptr = (nums or reference_token == "-") + ? detail::value_t::array + : detail::value_t::object; + } + + switch (ptr->m_type) + { + case detail::value_t::object: + { + // use unchecked object access + ptr = &ptr->operator[](reference_token); + break; + } + + case detail::value_t::array: + { + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + if (reference_token == "-") + { + // explicitly treat "-" as index beyond the end + ptr = &ptr->operator[](ptr->m_value.array->size()); + } + else + { + // convert array index to number; unchecked access + JSON_TRY + { + ptr = &ptr->operator[]( + static_cast<size_type>(array_index(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; + } + + /*! + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + BasicJsonType& get_checked(BasicJsonType* ptr) const + { + using size_type = typename BasicJsonType::size_type; + for (const auto& reference_token : reference_tokens) + { + switch (ptr->m_type) + { + case detail::value_t::object: + { + // note: at performs range check + ptr = &ptr->at(reference_token); + break; + } + + case detail::value_t::array: + { + if (JSON_UNLIKELY(reference_token == "-")) + { + // "-" always fails the range check + JSON_THROW(detail::out_of_range::create(402, + "array index '-' (" + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); + } + + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + // note: at performs range check + JSON_TRY + { + ptr = &ptr->at(static_cast<size_type>(array_index(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; + } + + /*! + @brief return a const reference to the pointed to value + + @param[in] ptr a JSON value + + @return const reference to the JSON value pointed to by the JSON + pointer + + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + const BasicJsonType& get_unchecked(const BasicJsonType* ptr) const + { + using size_type = typename BasicJsonType::size_type; + for (const auto& reference_token : reference_tokens) + { + switch (ptr->m_type) + { + case detail::value_t::object: + { + // use unchecked object access + ptr = &ptr->operator[](reference_token); + break; + } + + case detail::value_t::array: + { + if (JSON_UNLIKELY(reference_token == "-")) + { + // "-" cannot be used for const access + JSON_THROW(detail::out_of_range::create(402, + "array index '-' (" + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); + } + + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + // use unchecked array access + JSON_TRY + { + ptr = &ptr->operator[]( + static_cast<size_type>(array_index(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; + } + + /*! + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + */ + const BasicJsonType& get_checked(const BasicJsonType* ptr) const + { + using size_type = typename BasicJsonType::size_type; + for (const auto& reference_token : reference_tokens) + { + switch (ptr->m_type) + { + case detail::value_t::object: + { + // note: at performs range check + ptr = &ptr->at(reference_token); + break; + } + + case detail::value_t::array: + { + if (JSON_UNLIKELY(reference_token == "-")) + { + // "-" always fails the range check + JSON_THROW(detail::out_of_range::create(402, + "array index '-' (" + std::to_string(ptr->m_value.array->size()) + + ") is out of range")); + } + + // error condition (cf. RFC 6901, Sect. 4) + if (JSON_UNLIKELY(reference_token.size() > 1 and reference_token[0] == '0')) + { + JSON_THROW(detail::parse_error::create(106, 0, + "array index '" + reference_token + + "' must not begin with '0'")); + } + + // note: at performs range check + JSON_TRY + { + ptr = &ptr->at(static_cast<size_type>(array_index(reference_token))); + } + JSON_CATCH(std::invalid_argument&) + { + JSON_THROW(detail::parse_error::create(109, 0, "array index '" + reference_token + "' is not a number")); + } + break; + } + + default: + JSON_THROW(detail::out_of_range::create(404, "unresolved reference token '" + reference_token + "'")); + } + } + + return *ptr; + } + + /*! + @brief split the string input to reference tokens + + @note This function is only called by the json_pointer constructor. + All exceptions below are documented there. + + @throw parse_error.107 if the pointer is not empty or begins with '/' + @throw parse_error.108 if character '~' is not followed by '0' or '1' + */ + static std::vector<std::string> split(const std::string& reference_string) + { + std::vector<std::string> result; + + // special case: empty reference string -> no reference tokens + if (reference_string.empty()) + { + return result; + } + + // check if nonempty reference string begins with slash + if (JSON_UNLIKELY(reference_string[0] != '/')) + { + JSON_THROW(detail::parse_error::create(107, 1, + "JSON pointer must be empty or begin with '/' - was: '" + + reference_string + "'")); + } + + // extract the reference tokens: + // - slash: position of the last read slash (or end of string) + // - start: position after the previous slash + for ( + // search for the first slash after the first character + std::size_t slash = reference_string.find_first_of('/', 1), + // set the beginning of the first reference token + start = 1; + // we can stop if start == 0 (if slash == std::string::npos) + start != 0; + // set the beginning of the next reference token + // (will eventually be 0 if slash == std::string::npos) + start = (slash == std::string::npos) ? 0 : slash + 1, + // find next slash + slash = reference_string.find_first_of('/', start)) + { + // use the text between the beginning of the reference token + // (start) and the last slash (slash). + auto reference_token = reference_string.substr(start, slash - start); + + // check reference tokens are properly escaped + for (std::size_t pos = reference_token.find_first_of('~'); + pos != std::string::npos; + pos = reference_token.find_first_of('~', pos + 1)) + { + assert(reference_token[pos] == '~'); + + // ~ must be followed by 0 or 1 + if (JSON_UNLIKELY(pos == reference_token.size() - 1 or + (reference_token[pos + 1] != '0' and + reference_token[pos + 1] != '1'))) + { + JSON_THROW(detail::parse_error::create(108, 0, "escape character '~' must be followed with '0' or '1'")); + } + } + + // finally, store the reference token + unescape(reference_token); + result.push_back(reference_token); + } + + return result; + } + + /*! + @brief replace all occurrences of a substring by another string + + @param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t + @param[in] f the substring to replace with @a t + @param[in] t the string to replace @a f + + @pre The search string @a f must not be empty. **This precondition is + enforced with an assertion.** + + @since version 2.0.0 + */ + static void replace_substring(std::string& s, const std::string& f, + const std::string& t) + { + assert(not f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != std::string::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} + } + + /// escape "~" to "~0" and "/" to "~1" + static std::string escape(std::string s) + { + replace_substring(s, "~", "~0"); + replace_substring(s, "/", "~1"); + return s; + } + + /// unescape "~1" to tilde and "~0" to slash (order is important!) + static void unescape(std::string& s) + { + replace_substring(s, "~1", "/"); + replace_substring(s, "~0", "~"); + } + + /*! + @param[in] reference_string the reference string to the current value + @param[in] value the value to consider + @param[in,out] result the result object to insert values to + + @note Empty objects or arrays are flattened to `null`. + */ + static void flatten(const std::string& reference_string, + const BasicJsonType& value, + BasicJsonType& result) + { + switch (value.m_type) + { + case detail::value_t::array: + { + if (value.m_value.array->empty()) + { + // flatten empty array as null + result[reference_string] = nullptr; + } + else + { + // iterate array and use index as reference string + for (std::size_t i = 0; i < value.m_value.array->size(); ++i) + { + flatten(reference_string + "/" + std::to_string(i), + value.m_value.array->operator[](i), result); + } + } + break; + } + + case detail::value_t::object: + { + if (value.m_value.object->empty()) + { + // flatten empty object as null + result[reference_string] = nullptr; + } + else + { + // iterate object and use keys as reference string + for (const auto& element : *value.m_value.object) + { + flatten(reference_string + "/" + escape(element.first), element.second, result); + } + } + break; + } + + default: + { + // add primitive value with its reference string + result[reference_string] = value; + break; + } + } + } + + /*! + @param[in] value flattened JSON + + @return unflattened JSON + + @throw parse_error.109 if array index is not a number + @throw type_error.314 if value is not an object + @throw type_error.315 if object values are not primitive + @throw type_error.313 if value cannot be unflattened + */ + static BasicJsonType + unflatten(const BasicJsonType& value) + { + if (JSON_UNLIKELY(not value.is_object())) + { + JSON_THROW(detail::type_error::create(314, "only objects can be unflattened")); + } + + BasicJsonType result; + + // iterate the JSON object values + for (const auto& element : *value.m_value.object) + { + if (JSON_UNLIKELY(not element.second.is_primitive())) + { + JSON_THROW(detail::type_error::create(315, "values in object must be primitive")); + } + + // assign value to reference pointed to by JSON pointer; Note that if + // the JSON pointer is "" (i.e., points to the whole value), function + // get_and_create returns a reference to result itself. An assignment + // will then create a primitive value. + json_pointer(element.first).get_and_create(result) = element.second; + } + + return result; + } + + friend bool operator==(json_pointer const& lhs, + json_pointer const& rhs) noexcept + { + return (lhs.reference_tokens == rhs.reference_tokens); + } + + friend bool operator!=(json_pointer const& lhs, + json_pointer const& rhs) noexcept + { + return not (lhs == rhs); + } + + /// the reference tokens + std::vector<std::string> reference_tokens; +}; +} // namespace nlohmann + +// #include <nlohmann/adl_serializer.hpp> + + +#include <utility> + +// #include <nlohmann/detail/conversions/from_json.hpp> + +// #include <nlohmann/detail/conversions/to_json.hpp> + + +namespace nlohmann +{ + +template<typename, typename> +struct adl_serializer +{ + /*! + @brief convert a JSON value to any value type + + This function is usually called by the `get()` function of the + @ref basic_json class (either explicit or via conversion operators). + + @param[in] j JSON value to read from + @param[in,out] val value to write to + */ + template<typename BasicJsonType, typename ValueType> + static auto from_json(BasicJsonType&& j, ValueType& val) noexcept( + noexcept(::nlohmann::from_json(std::forward<BasicJsonType>(j), val))) + -> decltype(::nlohmann::from_json(std::forward<BasicJsonType>(j), val), void()) + { + ::nlohmann::from_json(std::forward<BasicJsonType>(j), val); + } + + /*! + @brief convert any value type to a JSON value + + This function is usually called by the constructors of the @ref basic_json + class. + + @param[in,out] j JSON value to write to + @param[in] val value to read from + */ + template <typename BasicJsonType, typename ValueType> + static auto to_json(BasicJsonType& j, ValueType&& val) noexcept( + noexcept(::nlohmann::to_json(j, std::forward<ValueType>(val)))) + -> decltype(::nlohmann::to_json(j, std::forward<ValueType>(val)), void()) + { + ::nlohmann::to_json(j, std::forward<ValueType>(val)); + } +}; + +} // namespace nlohmann + + +/*! +@brief namespace for Niels Lohmann +@see https://github.com/nlohmann +@since version 1.0.0 +*/ +namespace nlohmann +{ + +/*! +@brief a class to store JSON values + +@tparam ObjectType type for JSON objects (`std::map` by default; will be used +in @ref object_t) +@tparam ArrayType type for JSON arrays (`std::vector` by default; will be used +in @ref array_t) +@tparam StringType type for JSON strings and object keys (`std::string` by +default; will be used in @ref string_t) +@tparam BooleanType type for JSON booleans (`bool` by default; will be used +in @ref boolean_t) +@tparam NumberIntegerType type for JSON integer numbers (`int64_t` by +default; will be used in @ref number_integer_t) +@tparam NumberUnsignedType type for JSON unsigned integer numbers (@c +`uint64_t` by default; will be used in @ref number_unsigned_t) +@tparam NumberFloatType type for JSON floating-point numbers (`double` by +default; will be used in @ref number_float_t) +@tparam AllocatorType type of the allocator to use (`std::allocator` by +default) +@tparam JSONSerializer the serializer to resolve internal calls to `to_json()` +and `from_json()` (@ref adl_serializer by default) + +@requirement The class satisfies the following concept requirements: +- Basic + - [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible): + JSON values can be default constructed. The result will be a JSON null + value. + - [MoveConstructible](https://en.cppreference.com/w/cpp/named_req/MoveConstructible): + A JSON value can be constructed from an rvalue argument. + - [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible): + A JSON value can be copy-constructed from an lvalue expression. + - [MoveAssignable](https://en.cppreference.com/w/cpp/named_req/MoveAssignable): + A JSON value van be assigned from an rvalue argument. + - [CopyAssignable](https://en.cppreference.com/w/cpp/named_req/CopyAssignable): + A JSON value can be copy-assigned from an lvalue expression. + - [Destructible](https://en.cppreference.com/w/cpp/named_req/Destructible): + JSON values can be destructed. +- Layout + - [StandardLayoutType](https://en.cppreference.com/w/cpp/named_req/StandardLayoutType): + JSON values have + [standard layout](https://en.cppreference.com/w/cpp/language/data_members#Standard_layout): + All non-static data members are private and standard layout types, the + class has no virtual functions or (virtual) base classes. +- Library-wide + - [EqualityComparable](https://en.cppreference.com/w/cpp/named_req/EqualityComparable): + JSON values can be compared with `==`, see @ref + operator==(const_reference,const_reference). + - [LessThanComparable](https://en.cppreference.com/w/cpp/named_req/LessThanComparable): + JSON values can be compared with `<`, see @ref + operator<(const_reference,const_reference). + - [Swappable](https://en.cppreference.com/w/cpp/named_req/Swappable): + Any JSON lvalue or rvalue of can be swapped with any lvalue or rvalue of + other compatible types, using unqualified function call @ref swap(). + - [NullablePointer](https://en.cppreference.com/w/cpp/named_req/NullablePointer): + JSON values can be compared against `std::nullptr_t` objects which are used + to model the `null` value. +- Container + - [Container](https://en.cppreference.com/w/cpp/named_req/Container): + JSON values can be used like STL containers and provide iterator access. + - [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer); + JSON values can be used like STL containers and provide reverse iterator + access. + +@invariant The member variables @a m_value and @a m_type have the following +relationship: +- If `m_type == value_t::object`, then `m_value.object != nullptr`. +- If `m_type == value_t::array`, then `m_value.array != nullptr`. +- If `m_type == value_t::string`, then `m_value.string != nullptr`. +The invariants are checked by member function assert_invariant(). + +@internal +@note ObjectType trick from http://stackoverflow.com/a/9860911 +@endinternal + +@see [RFC 7159: The JavaScript Object Notation (JSON) Data Interchange +Format](http://rfc7159.net/rfc7159) + +@since version 1.0.0 + +@nosubgrouping +*/ +NLOHMANN_BASIC_JSON_TPL_DECLARATION +class basic_json +{ + private: + template<detail::value_t> friend struct detail::external_constructor; + friend ::nlohmann::json_pointer<basic_json>; + friend ::nlohmann::detail::parser<basic_json>; + friend ::nlohmann::detail::serializer<basic_json>; + template<typename BasicJsonType> + friend class ::nlohmann::detail::iter_impl; + template<typename BasicJsonType, typename CharType> + friend class ::nlohmann::detail::binary_writer; + template<typename BasicJsonType, typename SAX> + friend class ::nlohmann::detail::binary_reader; + template<typename BasicJsonType> + friend class ::nlohmann::detail::json_sax_dom_parser; + template<typename BasicJsonType> + friend class ::nlohmann::detail::json_sax_dom_callback_parser; + + /// workaround type for MSVC + using basic_json_t = NLOHMANN_BASIC_JSON_TPL; + + // convenience aliases for types residing in namespace detail; + using lexer = ::nlohmann::detail::lexer<basic_json>; + using parser = ::nlohmann::detail::parser<basic_json>; + + using primitive_iterator_t = ::nlohmann::detail::primitive_iterator_t; + template<typename BasicJsonType> + using internal_iterator = ::nlohmann::detail::internal_iterator<BasicJsonType>; + template<typename BasicJsonType> + using iter_impl = ::nlohmann::detail::iter_impl<BasicJsonType>; + template<typename Iterator> + using iteration_proxy = ::nlohmann::detail::iteration_proxy<Iterator>; + template<typename Base> using json_reverse_iterator = ::nlohmann::detail::json_reverse_iterator<Base>; + + template<typename CharType> + using output_adapter_t = ::nlohmann::detail::output_adapter_t<CharType>; + + using binary_reader = ::nlohmann::detail::binary_reader<basic_json>; + template<typename CharType> using binary_writer = ::nlohmann::detail::binary_writer<basic_json, CharType>; + + using serializer = ::nlohmann::detail::serializer<basic_json>; + + public: + using value_t = detail::value_t; + /// JSON Pointer, see @ref nlohmann::json_pointer + using json_pointer = ::nlohmann::json_pointer<basic_json>; + template<typename T, typename SFINAE> + using json_serializer = JSONSerializer<T, SFINAE>; + /// how to treat decoding errors + using error_handler_t = detail::error_handler_t; + /// helper type for initializer lists of basic_json values + using initializer_list_t = std::initializer_list<detail::json_ref<basic_json>>; + + using input_format_t = detail::input_format_t; + /// SAX interface type, see @ref nlohmann::json_sax + using json_sax_t = json_sax<basic_json>; + + //////////////// + // exceptions // + //////////////// + + /// @name exceptions + /// Classes to implement user-defined exceptions. + /// @{ + + /// @copydoc detail::exception + using exception = detail::exception; + /// @copydoc detail::parse_error + using parse_error = detail::parse_error; + /// @copydoc detail::invalid_iterator + using invalid_iterator = detail::invalid_iterator; + /// @copydoc detail::type_error + using type_error = detail::type_error; + /// @copydoc detail::out_of_range + using out_of_range = detail::out_of_range; + /// @copydoc detail::other_error + using other_error = detail::other_error; + + /// @} + + + ///////////////////// + // container types // + ///////////////////// + + /// @name container types + /// The canonic container types to use @ref basic_json like any other STL + /// container. + /// @{ + + /// the type of elements in a basic_json container + using value_type = basic_json; + + /// the type of an element reference + using reference = value_type&; + /// the type of an element const reference + using const_reference = const value_type&; + + /// a type to represent differences between iterators + using difference_type = std::ptrdiff_t; + /// a type to represent container sizes + using size_type = std::size_t; + + /// the allocator type + using allocator_type = AllocatorType<basic_json>; + + /// the type of an element pointer + using pointer = typename std::allocator_traits<allocator_type>::pointer; + /// the type of an element const pointer + using const_pointer = typename std::allocator_traits<allocator_type>::const_pointer; + + /// an iterator for a basic_json container + using iterator = iter_impl<basic_json>; + /// a const iterator for a basic_json container + using const_iterator = iter_impl<const basic_json>; + /// a reverse iterator for a basic_json container + using reverse_iterator = json_reverse_iterator<typename basic_json::iterator>; + /// a const reverse iterator for a basic_json container + using const_reverse_iterator = json_reverse_iterator<typename basic_json::const_iterator>; + + /// @} + + + /*! + @brief returns the allocator associated with the container + */ + static allocator_type get_allocator() + { + return allocator_type(); + } + + /*! + @brief returns version information on the library + + This function returns a JSON object with information about the library, + including the version number and information on the platform and compiler. + + @return JSON object holding version information + key | description + ----------- | --------------- + `compiler` | Information on the used compiler. It is an object with the following keys: `c++` (the used C++ standard), `family` (the compiler family; possible values are `clang`, `icc`, `gcc`, `ilecpp`, `msvc`, `pgcpp`, `sunpro`, and `unknown`), and `version` (the compiler version). + `copyright` | The copyright line for the library as string. + `name` | The name of the library as string. + `platform` | The used platform as string. Possible values are `win32`, `linux`, `apple`, `unix`, and `unknown`. + `url` | The URL of the project as string. + `version` | The version of the library. It is an object with the following keys: `major`, `minor`, and `patch` as defined by [Semantic Versioning](http://semver.org), and `string` (the version string). + + @liveexample{The following code shows an example output of the `meta()` + function.,meta} + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @complexity Constant. + + @since 2.1.0 + */ + static basic_json meta() + { + basic_json result; + + result["copyright"] = "(C) 2013-2017 Niels Lohmann"; + result["name"] = "JSON for Modern C++"; + result["url"] = "https://github.com/nlohmann/json"; + result["version"]["string"] = + std::to_string(NLOHMANN_JSON_VERSION_MAJOR) + "." + + std::to_string(NLOHMANN_JSON_VERSION_MINOR) + "." + + std::to_string(NLOHMANN_JSON_VERSION_PATCH); + result["version"]["major"] = NLOHMANN_JSON_VERSION_MAJOR; + result["version"]["minor"] = NLOHMANN_JSON_VERSION_MINOR; + result["version"]["patch"] = NLOHMANN_JSON_VERSION_PATCH; + +#ifdef _WIN32 + result["platform"] = "win32"; +#elif defined __linux__ + result["platform"] = "linux"; +#elif defined __APPLE__ + result["platform"] = "apple"; +#elif defined __unix__ + result["platform"] = "unix"; +#else + result["platform"] = "unknown"; +#endif + +#if defined(__ICC) || defined(__INTEL_COMPILER) + result["compiler"] = {{"family", "icc"}, {"version", __INTEL_COMPILER}}; +#elif defined(__clang__) + result["compiler"] = {{"family", "clang"}, {"version", __clang_version__}}; +#elif defined(__GNUC__) || defined(__GNUG__) + result["compiler"] = {{"family", "gcc"}, {"version", std::to_string(__GNUC__) + "." + std::to_string(__GNUC_MINOR__) + "." + std::to_string(__GNUC_PATCHLEVEL__)}}; +#elif defined(__HP_cc) || defined(__HP_aCC) + result["compiler"] = "hp" +#elif defined(__IBMCPP__) + result["compiler"] = {{"family", "ilecpp"}, {"version", __IBMCPP__}}; +#elif defined(_MSC_VER) + result["compiler"] = {{"family", "msvc"}, {"version", _MSC_VER}}; +#elif defined(__PGI) + result["compiler"] = {{"family", "pgcpp"}, {"version", __PGI}}; +#elif defined(__SUNPRO_CC) + result["compiler"] = {{"family", "sunpro"}, {"version", __SUNPRO_CC}}; +#else + result["compiler"] = {{"family", "unknown"}, {"version", "unknown"}}; +#endif + +#ifdef __cplusplus + result["compiler"]["c++"] = std::to_string(__cplusplus); +#else + result["compiler"]["c++"] = "unknown"; +#endif + return result; + } + + + /////////////////////////// + // JSON value data types // + /////////////////////////// + + /// @name JSON value data types + /// The data types to store a JSON value. These types are derived from + /// the template arguments passed to class @ref basic_json. + /// @{ + +#if defined(JSON_HAS_CPP_14) + // Use transparent comparator if possible, combined with perfect forwarding + // on find() and count() calls prevents unnecessary string construction. + using object_comparator_t = std::less<>; +#else + using object_comparator_t = std::less<StringType>; +#endif + + /*! + @brief a type for an object + + [RFC 7159](http://rfc7159.net/rfc7159) describes JSON objects as follows: + > An object is an unordered collection of zero or more name/value pairs, + > where a name is a string and a value is a string, number, boolean, null, + > object, or array. + + To store objects in C++, a type is defined by the template parameters + described below. + + @tparam ObjectType the container to store objects (e.g., `std::map` or + `std::unordered_map`) + @tparam StringType the type of the keys or names (e.g., `std::string`). + The comparison function `std::less<StringType>` is used to order elements + inside the container. + @tparam AllocatorType the allocator to use for objects (e.g., + `std::allocator`) + + #### Default type + + With the default values for @a ObjectType (`std::map`), @a StringType + (`std::string`), and @a AllocatorType (`std::allocator`), the default + value for @a object_t is: + + @code {.cpp} + std::map< + std::string, // key_type + basic_json, // value_type + std::less<std::string>, // key_compare + std::allocator<std::pair<const std::string, basic_json>> // allocator_type + > + @endcode + + #### Behavior + + The choice of @a object_t influences the behavior of the JSON class. With + the default type, objects have the following behavior: + + - When all names are unique, objects will be interoperable in the sense + that all software implementations receiving that object will agree on + the name-value mappings. + - When the names within an object are not unique, it is unspecified which + one of the values for a given key will be chosen. For instance, + `{"key": 2, "key": 1}` could be equal to either `{"key": 1}` or + `{"key": 2}`. + - Internally, name/value pairs are stored in lexicographical order of the + names. Objects will also be serialized (see @ref dump) in this order. + For instance, `{"b": 1, "a": 2}` and `{"a": 2, "b": 1}` will be stored + and serialized as `{"a": 2, "b": 1}`. + - When comparing objects, the order of the name/value pairs is irrelevant. + This makes objects interoperable in the sense that they will not be + affected by these differences. For instance, `{"b": 1, "a": 2}` and + `{"a": 2, "b": 1}` will be treated as equal. + + #### Limits + + [RFC 7159](http://rfc7159.net/rfc7159) specifies: + > An implementation may set limits on the maximum depth of nesting. + + In this class, the object's limit of nesting is not explicitly constrained. + However, a maximum depth of nesting may be introduced by the compiler or + runtime environment. A theoretical limit can be queried by calling the + @ref max_size function of a JSON object. + + #### Storage + + Objects are stored as pointers in a @ref basic_json type. That is, for any + access to object values, a pointer of type `object_t*` must be + dereferenced. + + @sa @ref array_t -- type for an array value + + @since version 1.0.0 + + @note The order name/value pairs are added to the object is *not* + preserved by the library. Therefore, iterating an object may return + name/value pairs in a different order than they were originally stored. In + fact, keys will be traversed in alphabetical order as `std::map` with + `std::less` is used by default. Please note this behavior conforms to [RFC + 7159](http://rfc7159.net/rfc7159), because any order implements the + specified "unordered" nature of JSON objects. + */ + using object_t = ObjectType<StringType, + basic_json, + object_comparator_t, + AllocatorType<std::pair<const StringType, + basic_json>>>; + + /*! + @brief a type for an array + + [RFC 7159](http://rfc7159.net/rfc7159) describes JSON arrays as follows: + > An array is an ordered sequence of zero or more values. + + To store objects in C++, a type is defined by the template parameters + explained below. + + @tparam ArrayType container type to store arrays (e.g., `std::vector` or + `std::list`) + @tparam AllocatorType allocator to use for arrays (e.g., `std::allocator`) + + #### Default type + + With the default values for @a ArrayType (`std::vector`) and @a + AllocatorType (`std::allocator`), the default value for @a array_t is: + + @code {.cpp} + std::vector< + basic_json, // value_type + std::allocator<basic_json> // allocator_type + > + @endcode + + #### Limits + + [RFC 7159](http://rfc7159.net/rfc7159) specifies: + > An implementation may set limits on the maximum depth of nesting. + + In this class, the array's limit of nesting is not explicitly constrained. + However, a maximum depth of nesting may be introduced by the compiler or + runtime environment. A theoretical limit can be queried by calling the + @ref max_size function of a JSON array. + + #### Storage + + Arrays are stored as pointers in a @ref basic_json type. That is, for any + access to array values, a pointer of type `array_t*` must be dereferenced. + + @sa @ref object_t -- type for an object value + + @since version 1.0.0 + */ + using array_t = ArrayType<basic_json, AllocatorType<basic_json>>; + + /*! + @brief a type for a string + + [RFC 7159](http://rfc7159.net/rfc7159) describes JSON strings as follows: + > A string is a sequence of zero or more Unicode characters. + + To store objects in C++, a type is defined by the template parameter + described below. Unicode values are split by the JSON class into + byte-sized characters during deserialization. + + @tparam StringType the container to store strings (e.g., `std::string`). + Note this container is used for keys/names in objects, see @ref object_t. + + #### Default type + + With the default values for @a StringType (`std::string`), the default + value for @a string_t is: + + @code {.cpp} + std::string + @endcode + + #### Encoding + + Strings are stored in UTF-8 encoding. Therefore, functions like + `std::string::size()` or `std::string::length()` return the number of + bytes in the string rather than the number of characters or glyphs. + + #### String comparison + + [RFC 7159](http://rfc7159.net/rfc7159) states: + > Software implementations are typically required to test names of object + > members for equality. Implementations that transform the textual + > representation into sequences of Unicode code units and then perform the + > comparison numerically, code unit by code unit, are interoperable in the + > sense that implementations will agree in all cases on equality or + > inequality of two strings. For example, implementations that compare + > strings with escaped characters unconverted may incorrectly find that + > `"a\\b"` and `"a\u005Cb"` are not equal. + + This implementation is interoperable as it does compare strings code unit + by code unit. + + #### Storage + + String values are stored as pointers in a @ref basic_json type. That is, + for any access to string values, a pointer of type `string_t*` must be + dereferenced. + + @since version 1.0.0 + */ + using string_t = StringType; + + /*! + @brief a type for a boolean + + [RFC 7159](http://rfc7159.net/rfc7159) implicitly describes a boolean as a + type which differentiates the two literals `true` and `false`. + + To store objects in C++, a type is defined by the template parameter @a + BooleanType which chooses the type to use. + + #### Default type + + With the default values for @a BooleanType (`bool`), the default value for + @a boolean_t is: + + @code {.cpp} + bool + @endcode + + #### Storage + + Boolean values are stored directly inside a @ref basic_json type. + + @since version 1.0.0 + */ + using boolean_t = BooleanType; + + /*! + @brief a type for a number (integer) + + [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: + > The representation of numbers is similar to that used in most + > programming languages. A number is represented in base 10 using decimal + > digits. It contains an integer component that may be prefixed with an + > optional minus sign, which may be followed by a fraction part and/or an + > exponent part. Leading zeros are not allowed. (...) Numeric values that + > cannot be represented in the grammar below (such as Infinity and NaN) + > are not permitted. + + This description includes both integer and floating-point numbers. + However, C++ allows more precise storage if it is known whether the number + is a signed integer, an unsigned integer or a floating-point number. + Therefore, three different types, @ref number_integer_t, @ref + number_unsigned_t and @ref number_float_t are used. + + To store integer numbers in C++, a type is defined by the template + parameter @a NumberIntegerType which chooses the type to use. + + #### Default type + + With the default values for @a NumberIntegerType (`int64_t`), the default + value for @a number_integer_t is: + + @code {.cpp} + int64_t + @endcode + + #### Default behavior + + - The restrictions about leading zeros is not enforced in C++. Instead, + leading zeros in integer literals lead to an interpretation as octal + number. Internally, the value will be stored as decimal number. For + instance, the C++ integer literal `010` will be serialized to `8`. + During deserialization, leading zeros yield an error. + - Not-a-number (NaN) values will be serialized to `null`. + + #### Limits + + [RFC 7159](http://rfc7159.net/rfc7159) specifies: + > An implementation may set limits on the range and precision of numbers. + + When the default type is used, the maximal integer number that can be + stored is `9223372036854775807` (INT64_MAX) and the minimal integer number + that can be stored is `-9223372036854775808` (INT64_MIN). Integer numbers + that are out of range will yield over/underflow when used in a + constructor. During deserialization, too large or small integer numbers + will be automatically be stored as @ref number_unsigned_t or @ref + number_float_t. + + [RFC 7159](http://rfc7159.net/rfc7159) further states: + > Note that when such software is used, numbers that are integers and are + > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense + > that implementations will agree exactly on their numeric values. + + As this range is a subrange of the exactly supported range [INT64_MIN, + INT64_MAX], this class's integer type is interoperable. + + #### Storage + + Integer number values are stored directly inside a @ref basic_json type. + + @sa @ref number_float_t -- type for number values (floating-point) + + @sa @ref number_unsigned_t -- type for number values (unsigned integer) + + @since version 1.0.0 + */ + using number_integer_t = NumberIntegerType; + + /*! + @brief a type for a number (unsigned) + + [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: + > The representation of numbers is similar to that used in most + > programming languages. A number is represented in base 10 using decimal + > digits. It contains an integer component that may be prefixed with an + > optional minus sign, which may be followed by a fraction part and/or an + > exponent part. Leading zeros are not allowed. (...) Numeric values that + > cannot be represented in the grammar below (such as Infinity and NaN) + > are not permitted. + + This description includes both integer and floating-point numbers. + However, C++ allows more precise storage if it is known whether the number + is a signed integer, an unsigned integer or a floating-point number. + Therefore, three different types, @ref number_integer_t, @ref + number_unsigned_t and @ref number_float_t are used. + + To store unsigned integer numbers in C++, a type is defined by the + template parameter @a NumberUnsignedType which chooses the type to use. + + #### Default type + + With the default values for @a NumberUnsignedType (`uint64_t`), the + default value for @a number_unsigned_t is: + + @code {.cpp} + uint64_t + @endcode + + #### Default behavior + + - The restrictions about leading zeros is not enforced in C++. Instead, + leading zeros in integer literals lead to an interpretation as octal + number. Internally, the value will be stored as decimal number. For + instance, the C++ integer literal `010` will be serialized to `8`. + During deserialization, leading zeros yield an error. + - Not-a-number (NaN) values will be serialized to `null`. + + #### Limits + + [RFC 7159](http://rfc7159.net/rfc7159) specifies: + > An implementation may set limits on the range and precision of numbers. + + When the default type is used, the maximal integer number that can be + stored is `18446744073709551615` (UINT64_MAX) and the minimal integer + number that can be stored is `0`. Integer numbers that are out of range + will yield over/underflow when used in a constructor. During + deserialization, too large or small integer numbers will be automatically + be stored as @ref number_integer_t or @ref number_float_t. + + [RFC 7159](http://rfc7159.net/rfc7159) further states: + > Note that when such software is used, numbers that are integers and are + > in the range \f$[-2^{53}+1, 2^{53}-1]\f$ are interoperable in the sense + > that implementations will agree exactly on their numeric values. + + As this range is a subrange (when considered in conjunction with the + number_integer_t type) of the exactly supported range [0, UINT64_MAX], + this class's integer type is interoperable. + + #### Storage + + Integer number values are stored directly inside a @ref basic_json type. + + @sa @ref number_float_t -- type for number values (floating-point) + @sa @ref number_integer_t -- type for number values (integer) + + @since version 2.0.0 + */ + using number_unsigned_t = NumberUnsignedType; + + /*! + @brief a type for a number (floating-point) + + [RFC 7159](http://rfc7159.net/rfc7159) describes numbers as follows: + > The representation of numbers is similar to that used in most + > programming languages. A number is represented in base 10 using decimal + > digits. It contains an integer component that may be prefixed with an + > optional minus sign, which may be followed by a fraction part and/or an + > exponent part. Leading zeros are not allowed. (...) Numeric values that + > cannot be represented in the grammar below (such as Infinity and NaN) + > are not permitted. + + This description includes both integer and floating-point numbers. + However, C++ allows more precise storage if it is known whether the number + is a signed integer, an unsigned integer or a floating-point number. + Therefore, three different types, @ref number_integer_t, @ref + number_unsigned_t and @ref number_float_t are used. + + To store floating-point numbers in C++, a type is defined by the template + parameter @a NumberFloatType which chooses the type to use. + + #### Default type + + With the default values for @a NumberFloatType (`double`), the default + value for @a number_float_t is: + + @code {.cpp} + double + @endcode + + #### Default behavior + + - The restrictions about leading zeros is not enforced in C++. Instead, + leading zeros in floating-point literals will be ignored. Internally, + the value will be stored as decimal number. For instance, the C++ + floating-point literal `01.2` will be serialized to `1.2`. During + deserialization, leading zeros yield an error. + - Not-a-number (NaN) values will be serialized to `null`. + + #### Limits + + [RFC 7159](http://rfc7159.net/rfc7159) states: + > This specification allows implementations to set limits on the range and + > precision of numbers accepted. Since software that implements IEEE + > 754-2008 binary64 (double precision) numbers is generally available and + > widely used, good interoperability can be achieved by implementations + > that expect no more precision or range than these provide, in the sense + > that implementations will approximate JSON numbers within the expected + > precision. + + This implementation does exactly follow this approach, as it uses double + precision floating-point numbers. Note values smaller than + `-1.79769313486232e+308` and values greater than `1.79769313486232e+308` + will be stored as NaN internally and be serialized to `null`. + + #### Storage + + Floating-point number values are stored directly inside a @ref basic_json + type. + + @sa @ref number_integer_t -- type for number values (integer) + + @sa @ref number_unsigned_t -- type for number values (unsigned integer) + + @since version 1.0.0 + */ + using number_float_t = NumberFloatType; + + /// @} + + private: + + /// helper for exception-safe object creation + template<typename T, typename... Args> + static T* create(Args&& ... args) + { + AllocatorType<T> alloc; + using AllocatorTraits = std::allocator_traits<AllocatorType<T>>; + + auto deleter = [&](T * object) + { + AllocatorTraits::deallocate(alloc, object, 1); + }; + std::unique_ptr<T, decltype(deleter)> object(AllocatorTraits::allocate(alloc, 1), deleter); + AllocatorTraits::construct(alloc, object.get(), std::forward<Args>(args)...); + assert(object != nullptr); + return object.release(); + } + + //////////////////////// + // JSON value storage // + //////////////////////// + + /*! + @brief a JSON value + + The actual storage for a JSON value of the @ref basic_json class. This + union combines the different storage types for the JSON value types + defined in @ref value_t. + + JSON type | value_t type | used type + --------- | --------------- | ------------------------ + object | object | pointer to @ref object_t + array | array | pointer to @ref array_t + string | string | pointer to @ref string_t + boolean | boolean | @ref boolean_t + number | number_integer | @ref number_integer_t + number | number_unsigned | @ref number_unsigned_t + number | number_float | @ref number_float_t + null | null | *no value is stored* + + @note Variable-length types (objects, arrays, and strings) are stored as + pointers. The size of the union should not exceed 64 bits if the default + value types are used. + + @since version 1.0.0 + */ + union json_value + { + /// object (stored with pointer to save storage) + object_t* object; + /// array (stored with pointer to save storage) + array_t* array; + /// string (stored with pointer to save storage) + string_t* string; + /// boolean + boolean_t boolean; + /// number (integer) + number_integer_t number_integer; + /// number (unsigned integer) + number_unsigned_t number_unsigned; + /// number (floating-point) + number_float_t number_float; + + /// default constructor (for null values) + json_value() = default; + /// constructor for booleans + json_value(boolean_t v) noexcept : boolean(v) {} + /// constructor for numbers (integer) + json_value(number_integer_t v) noexcept : number_integer(v) {} + /// constructor for numbers (unsigned) + json_value(number_unsigned_t v) noexcept : number_unsigned(v) {} + /// constructor for numbers (floating-point) + json_value(number_float_t v) noexcept : number_float(v) {} + /// constructor for empty values of a given type + json_value(value_t t) + { + switch (t) + { + case value_t::object: + { + object = create<object_t>(); + break; + } + + case value_t::array: + { + array = create<array_t>(); + break; + } + + case value_t::string: + { + string = create<string_t>(""); + break; + } + + case value_t::boolean: + { + boolean = boolean_t(false); + break; + } + + case value_t::number_integer: + { + number_integer = number_integer_t(0); + break; + } + + case value_t::number_unsigned: + { + number_unsigned = number_unsigned_t(0); + break; + } + + case value_t::number_float: + { + number_float = number_float_t(0.0); + break; + } + + case value_t::null: + { + object = nullptr; // silence warning, see #821 + break; + } + + default: + { + object = nullptr; // silence warning, see #821 + if (JSON_UNLIKELY(t == value_t::null)) + { + JSON_THROW(other_error::create(500, "961c151d2e87f2686a955a9be24d316f1362bf21 3.5.0")); // LCOV_EXCL_LINE + } + break; + } + } + } + + /// constructor for strings + json_value(const string_t& value) + { + string = create<string_t>(value); + } + + /// constructor for rvalue strings + json_value(string_t&& value) + { + string = create<string_t>(std::move(value)); + } + + /// constructor for objects + json_value(const object_t& value) + { + object = create<object_t>(value); + } + + /// constructor for rvalue objects + json_value(object_t&& value) + { + object = create<object_t>(std::move(value)); + } + + /// constructor for arrays + json_value(const array_t& value) + { + array = create<array_t>(value); + } + + /// constructor for rvalue arrays + json_value(array_t&& value) + { + array = create<array_t>(std::move(value)); + } + + void destroy(value_t t) noexcept + { + switch (t) + { + case value_t::object: + { + AllocatorType<object_t> alloc; + std::allocator_traits<decltype(alloc)>::destroy(alloc, object); + std::allocator_traits<decltype(alloc)>::deallocate(alloc, object, 1); + break; + } + + case value_t::array: + { + AllocatorType<array_t> alloc; + std::allocator_traits<decltype(alloc)>::destroy(alloc, array); + std::allocator_traits<decltype(alloc)>::deallocate(alloc, array, 1); + break; + } + + case value_t::string: + { + AllocatorType<string_t> alloc; + std::allocator_traits<decltype(alloc)>::destroy(alloc, string); + std::allocator_traits<decltype(alloc)>::deallocate(alloc, string, 1); + break; + } + + default: + { + break; + } + } + } + }; + + /*! + @brief checks the class invariants + + This function asserts the class invariants. It needs to be called at the + end of every constructor to make sure that created objects respect the + invariant. Furthermore, it has to be called each time the type of a JSON + value is changed, because the invariant expresses a relationship between + @a m_type and @a m_value. + */ + void assert_invariant() const noexcept + { + assert(m_type != value_t::object or m_value.object != nullptr); + assert(m_type != value_t::array or m_value.array != nullptr); + assert(m_type != value_t::string or m_value.string != nullptr); + } + + public: + ////////////////////////// + // JSON parser callback // + ////////////////////////// + + /*! + @brief parser event types + + The parser callback distinguishes the following events: + - `object_start`: the parser read `{` and started to process a JSON object + - `key`: the parser read a key of a value in an object + - `object_end`: the parser read `}` and finished processing a JSON object + - `array_start`: the parser read `[` and started to process a JSON array + - `array_end`: the parser read `]` and finished processing a JSON array + - `value`: the parser finished reading a JSON value + + @image html callback_events.png "Example when certain parse events are triggered" + + @sa @ref parser_callback_t for more information and examples + */ + using parse_event_t = typename parser::parse_event_t; + + /*! + @brief per-element parser callback type + + With a parser callback function, the result of parsing a JSON text can be + influenced. When passed to @ref parse, it is called on certain events + (passed as @ref parse_event_t via parameter @a event) with a set recursion + depth @a depth and context JSON value @a parsed. The return value of the + callback function is a boolean indicating whether the element that emitted + the callback shall be kept or not. + + We distinguish six scenarios (determined by the event type) in which the + callback function can be called. The following table describes the values + of the parameters @a depth, @a event, and @a parsed. + + parameter @a event | description | parameter @a depth | parameter @a parsed + ------------------ | ----------- | ------------------ | ------------------- + parse_event_t::object_start | the parser read `{` and started to process a JSON object | depth of the parent of the JSON object | a JSON value with type discarded + parse_event_t::key | the parser read a key of a value in an object | depth of the currently parsed JSON object | a JSON string containing the key + parse_event_t::object_end | the parser read `}` and finished processing a JSON object | depth of the parent of the JSON object | the parsed JSON object + parse_event_t::array_start | the parser read `[` and started to process a JSON array | depth of the parent of the JSON array | a JSON value with type discarded + parse_event_t::array_end | the parser read `]` and finished processing a JSON array | depth of the parent of the JSON array | the parsed JSON array + parse_event_t::value | the parser finished reading a JSON value | depth of the value | the parsed JSON value + + @image html callback_events.png "Example when certain parse events are triggered" + + Discarding a value (i.e., returning `false`) has different effects + depending on the context in which function was called: + + - Discarded values in structured types are skipped. That is, the parser + will behave as if the discarded value was never read. + - In case a value outside a structured type is skipped, it is replaced + with `null`. This case happens if the top-level element is skipped. + + @param[in] depth the depth of the recursion during parsing + + @param[in] event an event of type parse_event_t indicating the context in + the callback function has been called + + @param[in,out] parsed the current intermediate parse result; note that + writing to this value has no effect for parse_event_t::key events + + @return Whether the JSON value which called the function during parsing + should be kept (`true`) or not (`false`). In the latter case, it is either + skipped completely or replaced by an empty discarded object. + + @sa @ref parse for examples + + @since version 1.0.0 + */ + using parser_callback_t = typename parser::parser_callback_t; + + ////////////////// + // constructors // + ////////////////// + + /// @name constructors and destructors + /// Constructors of class @ref basic_json, copy/move constructor, copy + /// assignment, static functions creating objects, and the destructor. + /// @{ + + /*! + @brief create an empty value with a given type + + Create an empty JSON value with a given type. The value will be default + initialized with an empty value which depends on the type: + + Value type | initial value + ----------- | ------------- + null | `null` + boolean | `false` + string | `""` + number | `0` + object | `{}` + array | `[]` + + @param[in] v the type of the value to create + + @complexity Constant. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @liveexample{The following code shows the constructor for different @ref + value_t values,basic_json__value_t} + + @sa @ref clear() -- restores the postcondition of this constructor + + @since version 1.0.0 + */ + basic_json(const value_t v) + : m_type(v), m_value(v) + { + assert_invariant(); + } + + /*! + @brief create a null object + + Create a `null` JSON value. It either takes a null pointer as parameter + (explicitly creating `null`) or no parameter (implicitly creating `null`). + The passed null pointer itself is not read -- it is only used to choose + the right constructor. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this constructor never throws + exceptions. + + @liveexample{The following code shows the constructor with and without a + null pointer parameter.,basic_json__nullptr_t} + + @since version 1.0.0 + */ + basic_json(std::nullptr_t = nullptr) noexcept + : basic_json(value_t::null) + { + assert_invariant(); + } + + /*! + @brief create a JSON value + + This is a "catch all" constructor for all compatible JSON types; that is, + types for which a `to_json()` method exists. The constructor forwards the + parameter @a val to that method (to `json_serializer<U>::to_json` method + with `U = uncvref_t<CompatibleType>`, to be exact). + + Template type @a CompatibleType includes, but is not limited to, the + following types: + - **arrays**: @ref array_t and all kinds of compatible containers such as + `std::vector`, `std::deque`, `std::list`, `std::forward_list`, + `std::array`, `std::valarray`, `std::set`, `std::unordered_set`, + `std::multiset`, and `std::unordered_multiset` with a `value_type` from + which a @ref basic_json value can be constructed. + - **objects**: @ref object_t and all kinds of compatible associative + containers such as `std::map`, `std::unordered_map`, `std::multimap`, + and `std::unordered_multimap` with a `key_type` compatible to + @ref string_t and a `value_type` from which a @ref basic_json value can + be constructed. + - **strings**: @ref string_t, string literals, and all compatible string + containers can be used. + - **numbers**: @ref number_integer_t, @ref number_unsigned_t, + @ref number_float_t, and all convertible number types such as `int`, + `size_t`, `int64_t`, `float` or `double` can be used. + - **boolean**: @ref boolean_t / `bool` can be used. + + See the examples below. + + @tparam CompatibleType a type such that: + - @a CompatibleType is not derived from `std::istream`, + - @a CompatibleType is not @ref basic_json (to avoid hijacking copy/move + constructors), + - @a CompatibleType is not a different @ref basic_json type (i.e. with different template arguments) + - @a CompatibleType is not a @ref basic_json nested type (e.g., + @ref json_pointer, @ref iterator, etc ...) + - @ref @ref json_serializer<U> has a + `to_json(basic_json_t&, CompatibleType&&)` method + + @tparam U = `uncvref_t<CompatibleType>` + + @param[in] val the value to be forwarded to the respective constructor + + @complexity Usually linear in the size of the passed @a val, also + depending on the implementation of the called `to_json()` + method. + + @exceptionsafety Depends on the called constructor. For types directly + supported by the library (i.e., all types for which no `to_json()` function + was provided), strong guarantee holds: if an exception is thrown, there are + no changes to any JSON value. + + @liveexample{The following code shows the constructor with several + compatible types.,basic_json__CompatibleType} + + @since version 2.1.0 + */ + template <typename CompatibleType, + typename U = detail::uncvref_t<CompatibleType>, + detail::enable_if_t< + not detail::is_basic_json<U>::value and detail::is_compatible_type<basic_json_t, U>::value, int> = 0> + basic_json(CompatibleType && val) noexcept(noexcept( + JSONSerializer<U>::to_json(std::declval<basic_json_t&>(), + std::forward<CompatibleType>(val)))) + { + JSONSerializer<U>::to_json(*this, std::forward<CompatibleType>(val)); + assert_invariant(); + } + + /*! + @brief create a JSON value from an existing one + + This is a constructor for existing @ref basic_json types. + It does not hijack copy/move constructors, since the parameter has different + template arguments than the current ones. + + The constructor tries to convert the internal @ref m_value of the parameter. + + @tparam BasicJsonType a type such that: + - @a BasicJsonType is a @ref basic_json type. + - @a BasicJsonType has different template arguments than @ref basic_json_t. + + @param[in] val the @ref basic_json value to be converted. + + @complexity Usually linear in the size of the passed @a val, also + depending on the implementation of the called `to_json()` + method. + + @exceptionsafety Depends on the called constructor. For types directly + supported by the library (i.e., all types for which no `to_json()` function + was provided), strong guarantee holds: if an exception is thrown, there are + no changes to any JSON value. + + @since version 3.2.0 + */ + template <typename BasicJsonType, + detail::enable_if_t< + detail::is_basic_json<BasicJsonType>::value and not std::is_same<basic_json, BasicJsonType>::value, int> = 0> + basic_json(const BasicJsonType& val) + { + using other_boolean_t = typename BasicJsonType::boolean_t; + using other_number_float_t = typename BasicJsonType::number_float_t; + using other_number_integer_t = typename BasicJsonType::number_integer_t; + using other_number_unsigned_t = typename BasicJsonType::number_unsigned_t; + using other_string_t = typename BasicJsonType::string_t; + using other_object_t = typename BasicJsonType::object_t; + using other_array_t = typename BasicJsonType::array_t; + + switch (val.type()) + { + case value_t::boolean: + JSONSerializer<other_boolean_t>::to_json(*this, val.template get<other_boolean_t>()); + break; + case value_t::number_float: + JSONSerializer<other_number_float_t>::to_json(*this, val.template get<other_number_float_t>()); + break; + case value_t::number_integer: + JSONSerializer<other_number_integer_t>::to_json(*this, val.template get<other_number_integer_t>()); + break; + case value_t::number_unsigned: + JSONSerializer<other_number_unsigned_t>::to_json(*this, val.template get<other_number_unsigned_t>()); + break; + case value_t::string: + JSONSerializer<other_string_t>::to_json(*this, val.template get_ref<const other_string_t&>()); + break; + case value_t::object: + JSONSerializer<other_object_t>::to_json(*this, val.template get_ref<const other_object_t&>()); + break; + case value_t::array: + JSONSerializer<other_array_t>::to_json(*this, val.template get_ref<const other_array_t&>()); + break; + case value_t::null: + *this = nullptr; + break; + case value_t::discarded: + m_type = value_t::discarded; + break; + } + assert_invariant(); + } + + /*! + @brief create a container (array or object) from an initializer list + + Creates a JSON value of type array or object from the passed initializer + list @a init. In case @a type_deduction is `true` (default), the type of + the JSON value to be created is deducted from the initializer list @a init + according to the following rules: + + 1. If the list is empty, an empty JSON object value `{}` is created. + 2. If the list consists of pairs whose first element is a string, a JSON + object value is created where the first elements of the pairs are + treated as keys and the second elements are as values. + 3. In all other cases, an array is created. + + The rules aim to create the best fit between a C++ initializer list and + JSON values. The rationale is as follows: + + 1. The empty initializer list is written as `{}` which is exactly an empty + JSON object. + 2. C++ has no way of describing mapped types other than to list a list of + pairs. As JSON requires that keys must be of type string, rule 2 is the + weakest constraint one can pose on initializer lists to interpret them + as an object. + 3. In all other cases, the initializer list could not be interpreted as + JSON object type, so interpreting it as JSON array type is safe. + + With the rules described above, the following JSON values cannot be + expressed by an initializer list: + + - the empty array (`[]`): use @ref array(initializer_list_t) + with an empty initializer list in this case + - arrays whose elements satisfy rule 2: use @ref + array(initializer_list_t) with the same initializer list + in this case + + @note When used without parentheses around an empty initializer list, @ref + basic_json() is called instead of this function, yielding the JSON null + value. + + @param[in] init initializer list with JSON values + + @param[in] type_deduction internal parameter; when set to `true`, the type + of the JSON value is deducted from the initializer list @a init; when set + to `false`, the type provided via @a manual_type is forced. This mode is + used by the functions @ref array(initializer_list_t) and + @ref object(initializer_list_t). + + @param[in] manual_type internal parameter; when @a type_deduction is set + to `false`, the created JSON value will use the provided type (only @ref + value_t::array and @ref value_t::object are valid); when @a type_deduction + is set to `true`, this parameter has no effect + + @throw type_error.301 if @a type_deduction is `false`, @a manual_type is + `value_t::object`, but @a init contains an element which is not a pair + whose first element is a string. In this case, the constructor could not + create an object. If @a type_deduction would have be `true`, an array + would have been created. See @ref object(initializer_list_t) + for an example. + + @complexity Linear in the size of the initializer list @a init. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @liveexample{The example below shows how JSON values are created from + initializer lists.,basic_json__list_init_t} + + @sa @ref array(initializer_list_t) -- create a JSON array + value from an initializer list + @sa @ref object(initializer_list_t) -- create a JSON object + value from an initializer list + + @since version 1.0.0 + */ + basic_json(initializer_list_t init, + bool type_deduction = true, + value_t manual_type = value_t::array) + { + // check if each element is an array with two elements whose first + // element is a string + bool is_an_object = std::all_of(init.begin(), init.end(), + [](const detail::json_ref<basic_json>& element_ref) + { + return (element_ref->is_array() and element_ref->size() == 2 and (*element_ref)[0].is_string()); + }); + + // adjust type if type deduction is not wanted + if (not type_deduction) + { + // if array is wanted, do not create an object though possible + if (manual_type == value_t::array) + { + is_an_object = false; + } + + // if object is wanted but impossible, throw an exception + if (JSON_UNLIKELY(manual_type == value_t::object and not is_an_object)) + { + JSON_THROW(type_error::create(301, "cannot create object from initializer list")); + } + } + + if (is_an_object) + { + // the initializer list is a list of pairs -> create object + m_type = value_t::object; + m_value = value_t::object; + + std::for_each(init.begin(), init.end(), [this](const detail::json_ref<basic_json>& element_ref) + { + auto element = element_ref.moved_or_copied(); + m_value.object->emplace( + std::move(*((*element.m_value.array)[0].m_value.string)), + std::move((*element.m_value.array)[1])); + }); + } + else + { + // the initializer list describes an array -> create array + m_type = value_t::array; + m_value.array = create<array_t>(init.begin(), init.end()); + } + + assert_invariant(); + } + + /*! + @brief explicitly create an array from an initializer list + + Creates a JSON array value from a given initializer list. That is, given a + list of values `a, b, c`, creates the JSON value `[a, b, c]`. If the + initializer list is empty, the empty array `[]` is created. + + @note This function is only needed to express two edge cases that cannot + be realized with the initializer list constructor (@ref + basic_json(initializer_list_t, bool, value_t)). These cases + are: + 1. creating an array whose elements are all pairs whose first element is a + string -- in this case, the initializer list constructor would create an + object, taking the first elements as keys + 2. creating an empty array -- passing the empty initializer list to the + initializer list constructor yields an empty object + + @param[in] init initializer list with JSON values to create an array from + (optional) + + @return JSON array value + + @complexity Linear in the size of @a init. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @liveexample{The following code shows an example for the `array` + function.,array} + + @sa @ref basic_json(initializer_list_t, bool, value_t) -- + create a JSON value from an initializer list + @sa @ref object(initializer_list_t) -- create a JSON object + value from an initializer list + + @since version 1.0.0 + */ + static basic_json array(initializer_list_t init = {}) + { + return basic_json(init, false, value_t::array); + } + + /*! + @brief explicitly create an object from an initializer list + + Creates a JSON object value from a given initializer list. The initializer + lists elements must be pairs, and their first elements must be strings. If + the initializer list is empty, the empty object `{}` is created. + + @note This function is only added for symmetry reasons. In contrast to the + related function @ref array(initializer_list_t), there are + no cases which can only be expressed by this function. That is, any + initializer list @a init can also be passed to the initializer list + constructor @ref basic_json(initializer_list_t, bool, value_t). + + @param[in] init initializer list to create an object from (optional) + + @return JSON object value + + @throw type_error.301 if @a init is not a list of pairs whose first + elements are strings. In this case, no object can be created. When such a + value is passed to @ref basic_json(initializer_list_t, bool, value_t), + an array would have been created from the passed initializer list @a init. + See example below. + + @complexity Linear in the size of @a init. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @liveexample{The following code shows an example for the `object` + function.,object} + + @sa @ref basic_json(initializer_list_t, bool, value_t) -- + create a JSON value from an initializer list + @sa @ref array(initializer_list_t) -- create a JSON array + value from an initializer list + + @since version 1.0.0 + */ + static basic_json object(initializer_list_t init = {}) + { + return basic_json(init, false, value_t::object); + } + + /*! + @brief construct an array with count copies of given value + + Constructs a JSON array value by creating @a cnt copies of a passed value. + In case @a cnt is `0`, an empty array is created. + + @param[in] cnt the number of JSON copies of @a val to create + @param[in] val the JSON value to copy + + @post `std::distance(begin(),end()) == cnt` holds. + + @complexity Linear in @a cnt. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @liveexample{The following code shows examples for the @ref + basic_json(size_type\, const basic_json&) + constructor.,basic_json__size_type_basic_json} + + @since version 1.0.0 + */ + basic_json(size_type cnt, const basic_json& val) + : m_type(value_t::array) + { + m_value.array = create<array_t>(cnt, val); + assert_invariant(); + } + + /*! + @brief construct a JSON container given an iterator range + + Constructs the JSON value with the contents of the range `[first, last)`. + The semantics depends on the different types a JSON value can have: + - In case of a null type, invalid_iterator.206 is thrown. + - In case of other primitive types (number, boolean, or string), @a first + must be `begin()` and @a last must be `end()`. In this case, the value is + copied. Otherwise, invalid_iterator.204 is thrown. + - In case of structured types (array, object), the constructor behaves as + similar versions for `std::vector` or `std::map`; that is, a JSON array + or object is constructed from the values in the range. + + @tparam InputIT an input iterator type (@ref iterator or @ref + const_iterator) + + @param[in] first begin of the range to copy from (included) + @param[in] last end of the range to copy from (excluded) + + @pre Iterators @a first and @a last must be initialized. **This + precondition is enforced with an assertion (see warning).** If + assertions are switched off, a violation of this precondition yields + undefined behavior. + + @pre Range `[first, last)` is valid. Usually, this precondition cannot be + checked efficiently. Only certain edge cases are detected; see the + description of the exceptions below. A violation of this precondition + yields undefined behavior. + + @warning A precondition is enforced with a runtime assertion that will + result in calling `std::abort` if this precondition is not met. + Assertions can be disabled by defining `NDEBUG` at compile time. + See https://en.cppreference.com/w/cpp/error/assert for more + information. + + @throw invalid_iterator.201 if iterators @a first and @a last are not + compatible (i.e., do not belong to the same JSON value). In this case, + the range `[first, last)` is undefined. + @throw invalid_iterator.204 if iterators @a first and @a last belong to a + primitive type (number, boolean, or string), but @a first does not point + to the first element any more. In this case, the range `[first, last)` is + undefined. See example code below. + @throw invalid_iterator.206 if iterators @a first and @a last belong to a + null value. In this case, the range `[first, last)` is undefined. + + @complexity Linear in distance between @a first and @a last. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @liveexample{The example below shows several ways to create JSON values by + specifying a subrange with iterators.,basic_json__InputIt_InputIt} + + @since version 1.0.0 + */ + template<class InputIT, typename std::enable_if< + std::is_same<InputIT, typename basic_json_t::iterator>::value or + std::is_same<InputIT, typename basic_json_t::const_iterator>::value, int>::type = 0> + basic_json(InputIT first, InputIT last) + { + assert(first.m_object != nullptr); + assert(last.m_object != nullptr); + + // make sure iterator fits the current value + if (JSON_UNLIKELY(first.m_object != last.m_object)) + { + JSON_THROW(invalid_iterator::create(201, "iterators are not compatible")); + } + + // copy type from first iterator + m_type = first.m_object->m_type; + + // check if iterator range is complete for primitive values + switch (m_type) + { + case value_t::boolean: + case value_t::number_float: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::string: + { + if (JSON_UNLIKELY(not first.m_it.primitive_iterator.is_begin() + or not last.m_it.primitive_iterator.is_end())) + { + JSON_THROW(invalid_iterator::create(204, "iterators out of range")); + } + break; + } + + default: + break; + } + + switch (m_type) + { + case value_t::number_integer: + { + m_value.number_integer = first.m_object->m_value.number_integer; + break; + } + + case value_t::number_unsigned: + { + m_value.number_unsigned = first.m_object->m_value.number_unsigned; + break; + } + + case value_t::number_float: + { + m_value.number_float = first.m_object->m_value.number_float; + break; + } + + case value_t::boolean: + { + m_value.boolean = first.m_object->m_value.boolean; + break; + } + + case value_t::string: + { + m_value = *first.m_object->m_value.string; + break; + } + + case value_t::object: + { + m_value.object = create<object_t>(first.m_it.object_iterator, + last.m_it.object_iterator); + break; + } + + case value_t::array: + { + m_value.array = create<array_t>(first.m_it.array_iterator, + last.m_it.array_iterator); + break; + } + + default: + JSON_THROW(invalid_iterator::create(206, "cannot construct with iterators from " + + std::string(first.m_object->type_name()))); + } + + assert_invariant(); + } + + + /////////////////////////////////////// + // other constructors and destructor // + /////////////////////////////////////// + + /// @private + basic_json(const detail::json_ref<basic_json>& ref) + : basic_json(ref.moved_or_copied()) + {} + + /*! + @brief copy constructor + + Creates a copy of a given JSON value. + + @param[in] other the JSON value to copy + + @post `*this == other` + + @complexity Linear in the size of @a other. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes to any JSON value. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is linear. + - As postcondition, it holds: `other == basic_json(other)`. + + @liveexample{The following code shows an example for the copy + constructor.,basic_json__basic_json} + + @since version 1.0.0 + */ + basic_json(const basic_json& other) + : m_type(other.m_type) + { + // check of passed value is valid + other.assert_invariant(); + + switch (m_type) + { + case value_t::object: + { + m_value = *other.m_value.object; + break; + } + + case value_t::array: + { + m_value = *other.m_value.array; + break; + } + + case value_t::string: + { + m_value = *other.m_value.string; + break; + } + + case value_t::boolean: + { + m_value = other.m_value.boolean; + break; + } + + case value_t::number_integer: + { + m_value = other.m_value.number_integer; + break; + } + + case value_t::number_unsigned: + { + m_value = other.m_value.number_unsigned; + break; + } + + case value_t::number_float: + { + m_value = other.m_value.number_float; + break; + } + + default: + break; + } + + assert_invariant(); + } + + /*! + @brief move constructor + + Move constructor. Constructs a JSON value with the contents of the given + value @a other using move semantics. It "steals" the resources from @a + other and leaves it as JSON null value. + + @param[in,out] other value to move to this object + + @post `*this` has the same value as @a other before the call. + @post @a other is a JSON null value. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this constructor never throws + exceptions. + + @requirement This function helps `basic_json` satisfying the + [MoveConstructible](https://en.cppreference.com/w/cpp/named_req/MoveConstructible) + requirements. + + @liveexample{The code below shows the move constructor explicitly called + via std::move.,basic_json__moveconstructor} + + @since version 1.0.0 + */ + basic_json(basic_json&& other) noexcept + : m_type(std::move(other.m_type)), + m_value(std::move(other.m_value)) + { + // check that passed value is valid + other.assert_invariant(); + + // invalidate payload + other.m_type = value_t::null; + other.m_value = {}; + + assert_invariant(); + } + + /*! + @brief copy assignment + + Copy assignment operator. Copies a JSON value via the "copy and swap" + strategy: It is expressed in terms of the copy constructor, destructor, + and the `swap()` member function. + + @param[in] other value to copy from + + @complexity Linear. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is linear. + + @liveexample{The code below shows and example for the copy assignment. It + creates a copy of value `a` which is then swapped with `b`. Finally\, the + copy of `a` (which is the null value after the swap) is + destroyed.,basic_json__copyassignment} + + @since version 1.0.0 + */ + basic_json& operator=(basic_json other) noexcept ( + std::is_nothrow_move_constructible<value_t>::value and + std::is_nothrow_move_assignable<value_t>::value and + std::is_nothrow_move_constructible<json_value>::value and + std::is_nothrow_move_assignable<json_value>::value + ) + { + // check that passed value is valid + other.assert_invariant(); + + using std::swap; + swap(m_type, other.m_type); + swap(m_value, other.m_value); + + assert_invariant(); + return *this; + } + + /*! + @brief destructor + + Destroys the JSON value and frees all allocated memory. + + @complexity Linear. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is linear. + - All stored elements are destroyed and all memory is freed. + + @since version 1.0.0 + */ + ~basic_json() noexcept + { + assert_invariant(); + m_value.destroy(m_type); + } + + /// @} + + public: + /////////////////////// + // object inspection // + /////////////////////// + + /// @name object inspection + /// Functions to inspect the type of a JSON value. + /// @{ + + /*! + @brief serialization + + Serialization function for JSON values. The function tries to mimic + Python's `json.dumps()` function, and currently supports its @a indent + and @a ensure_ascii parameters. + + @param[in] indent If indent is nonnegative, then array elements and object + members will be pretty-printed with that indent level. An indent level of + `0` will only insert newlines. `-1` (the default) selects the most compact + representation. + @param[in] indent_char The character to use for indentation if @a indent is + greater than `0`. The default is ` ` (space). + @param[in] ensure_ascii If @a ensure_ascii is true, all non-ASCII characters + in the output are escaped with `\uXXXX` sequences, and the result consists + of ASCII characters only. + @param[in] error_handler how to react on decoding errors; there are three + possible values: `strict` (throws and exception in case a decoding error + occurs; default), `replace` (replace invalid UTF-8 sequences with U+FFFD), + and `ignore` (ignore invalid UTF-8 sequences during serialization). + + @return string containing the serialization of the JSON value + + @throw type_error.316 if a string stored inside the JSON value is not + UTF-8 encoded + + @complexity Linear. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @liveexample{The following example shows the effect of different @a indent\, + @a indent_char\, and @a ensure_ascii parameters to the result of the + serialization.,dump} + + @see https://docs.python.org/2/library/json.html#json.dump + + @since version 1.0.0; indentation character @a indent_char, option + @a ensure_ascii and exceptions added in version 3.0.0; error + handlers added in version 3.4.0. + */ + string_t dump(const int indent = -1, + const char indent_char = ' ', + const bool ensure_ascii = false, + const error_handler_t error_handler = error_handler_t::strict) const + { + string_t result; + serializer s(detail::output_adapter<char, string_t>(result), indent_char, error_handler); + + if (indent >= 0) + { + s.dump(*this, true, ensure_ascii, static_cast<unsigned int>(indent)); + } + else + { + s.dump(*this, false, ensure_ascii, 0); + } + + return result; + } + + /*! + @brief return the type of the JSON value (explicit) + + Return the type of the JSON value as a value from the @ref value_t + enumeration. + + @return the type of the JSON value + Value type | return value + ------------------------- | ------------------------- + null | value_t::null + boolean | value_t::boolean + string | value_t::string + number (integer) | value_t::number_integer + number (unsigned integer) | value_t::number_unsigned + number (floating-point) | value_t::number_float + object | value_t::object + array | value_t::array + discarded | value_t::discarded + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `type()` for all JSON + types.,type} + + @sa @ref operator value_t() -- return the type of the JSON value (implicit) + @sa @ref type_name() -- return the type as string + + @since version 1.0.0 + */ + constexpr value_t type() const noexcept + { + return m_type; + } + + /*! + @brief return whether type is primitive + + This function returns true if and only if the JSON type is primitive + (string, number, boolean, or null). + + @return `true` if type is primitive (string, number, boolean, or null), + `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_primitive()` for all JSON + types.,is_primitive} + + @sa @ref is_structured() -- returns whether JSON value is structured + @sa @ref is_null() -- returns whether JSON value is `null` + @sa @ref is_string() -- returns whether JSON value is a string + @sa @ref is_boolean() -- returns whether JSON value is a boolean + @sa @ref is_number() -- returns whether JSON value is a number + + @since version 1.0.0 + */ + constexpr bool is_primitive() const noexcept + { + return is_null() or is_string() or is_boolean() or is_number(); + } + + /*! + @brief return whether type is structured + + This function returns true if and only if the JSON type is structured + (array or object). + + @return `true` if type is structured (array or object), `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_structured()` for all JSON + types.,is_structured} + + @sa @ref is_primitive() -- returns whether value is primitive + @sa @ref is_array() -- returns whether value is an array + @sa @ref is_object() -- returns whether value is an object + + @since version 1.0.0 + */ + constexpr bool is_structured() const noexcept + { + return is_array() or is_object(); + } + + /*! + @brief return whether value is null + + This function returns true if and only if the JSON value is null. + + @return `true` if type is null, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_null()` for all JSON + types.,is_null} + + @since version 1.0.0 + */ + constexpr bool is_null() const noexcept + { + return (m_type == value_t::null); + } + + /*! + @brief return whether value is a boolean + + This function returns true if and only if the JSON value is a boolean. + + @return `true` if type is boolean, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_boolean()` for all JSON + types.,is_boolean} + + @since version 1.0.0 + */ + constexpr bool is_boolean() const noexcept + { + return (m_type == value_t::boolean); + } + + /*! + @brief return whether value is a number + + This function returns true if and only if the JSON value is a number. This + includes both integer (signed and unsigned) and floating-point values. + + @return `true` if type is number (regardless whether integer, unsigned + integer or floating-type), `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_number()` for all JSON + types.,is_number} + + @sa @ref is_number_integer() -- check if value is an integer or unsigned + integer number + @sa @ref is_number_unsigned() -- check if value is an unsigned integer + number + @sa @ref is_number_float() -- check if value is a floating-point number + + @since version 1.0.0 + */ + constexpr bool is_number() const noexcept + { + return is_number_integer() or is_number_float(); + } + + /*! + @brief return whether value is an integer number + + This function returns true if and only if the JSON value is a signed or + unsigned integer number. This excludes floating-point values. + + @return `true` if type is an integer or unsigned integer number, `false` + otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_number_integer()` for all + JSON types.,is_number_integer} + + @sa @ref is_number() -- check if value is a number + @sa @ref is_number_unsigned() -- check if value is an unsigned integer + number + @sa @ref is_number_float() -- check if value is a floating-point number + + @since version 1.0.0 + */ + constexpr bool is_number_integer() const noexcept + { + return (m_type == value_t::number_integer or m_type == value_t::number_unsigned); + } + + /*! + @brief return whether value is an unsigned integer number + + This function returns true if and only if the JSON value is an unsigned + integer number. This excludes floating-point and signed integer values. + + @return `true` if type is an unsigned integer number, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_number_unsigned()` for all + JSON types.,is_number_unsigned} + + @sa @ref is_number() -- check if value is a number + @sa @ref is_number_integer() -- check if value is an integer or unsigned + integer number + @sa @ref is_number_float() -- check if value is a floating-point number + + @since version 2.0.0 + */ + constexpr bool is_number_unsigned() const noexcept + { + return (m_type == value_t::number_unsigned); + } + + /*! + @brief return whether value is a floating-point number + + This function returns true if and only if the JSON value is a + floating-point number. This excludes signed and unsigned integer values. + + @return `true` if type is a floating-point number, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_number_float()` for all + JSON types.,is_number_float} + + @sa @ref is_number() -- check if value is number + @sa @ref is_number_integer() -- check if value is an integer number + @sa @ref is_number_unsigned() -- check if value is an unsigned integer + number + + @since version 1.0.0 + */ + constexpr bool is_number_float() const noexcept + { + return (m_type == value_t::number_float); + } + + /*! + @brief return whether value is an object + + This function returns true if and only if the JSON value is an object. + + @return `true` if type is object, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_object()` for all JSON + types.,is_object} + + @since version 1.0.0 + */ + constexpr bool is_object() const noexcept + { + return (m_type == value_t::object); + } + + /*! + @brief return whether value is an array + + This function returns true if and only if the JSON value is an array. + + @return `true` if type is array, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_array()` for all JSON + types.,is_array} + + @since version 1.0.0 + */ + constexpr bool is_array() const noexcept + { + return (m_type == value_t::array); + } + + /*! + @brief return whether value is a string + + This function returns true if and only if the JSON value is a string. + + @return `true` if type is string, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_string()` for all JSON + types.,is_string} + + @since version 1.0.0 + */ + constexpr bool is_string() const noexcept + { + return (m_type == value_t::string); + } + + /*! + @brief return whether value is discarded + + This function returns true if and only if the JSON value was discarded + during parsing with a callback function (see @ref parser_callback_t). + + @note This function will always be `false` for JSON values after parsing. + That is, discarded values can only occur during parsing, but will be + removed when inside a structured value or replaced by null in other cases. + + @return `true` if type is discarded, `false` otherwise. + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies `is_discarded()` for all JSON + types.,is_discarded} + + @since version 1.0.0 + */ + constexpr bool is_discarded() const noexcept + { + return (m_type == value_t::discarded); + } + + /*! + @brief return the type of the JSON value (implicit) + + Implicitly return the type of the JSON value as a value from the @ref + value_t enumeration. + + @return the type of the JSON value + + @complexity Constant. + + @exceptionsafety No-throw guarantee: this member function never throws + exceptions. + + @liveexample{The following code exemplifies the @ref value_t operator for + all JSON types.,operator__value_t} + + @sa @ref type() -- return the type of the JSON value (explicit) + @sa @ref type_name() -- return the type as string + + @since version 1.0.0 + */ + constexpr operator value_t() const noexcept + { + return m_type; + } + + /// @} + + private: + ////////////////// + // value access // + ////////////////// + + /// get a boolean (explicit) + boolean_t get_impl(boolean_t* /*unused*/) const + { + if (JSON_LIKELY(is_boolean())) + { + return m_value.boolean; + } + + JSON_THROW(type_error::create(302, "type must be boolean, but is " + std::string(type_name()))); + } + + /// get a pointer to the value (object) + object_t* get_impl_ptr(object_t* /*unused*/) noexcept + { + return is_object() ? m_value.object : nullptr; + } + + /// get a pointer to the value (object) + constexpr const object_t* get_impl_ptr(const object_t* /*unused*/) const noexcept + { + return is_object() ? m_value.object : nullptr; + } + + /// get a pointer to the value (array) + array_t* get_impl_ptr(array_t* /*unused*/) noexcept + { + return is_array() ? m_value.array : nullptr; + } + + /// get a pointer to the value (array) + constexpr const array_t* get_impl_ptr(const array_t* /*unused*/) const noexcept + { + return is_array() ? m_value.array : nullptr; + } + + /// get a pointer to the value (string) + string_t* get_impl_ptr(string_t* /*unused*/) noexcept + { + return is_string() ? m_value.string : nullptr; + } + + /// get a pointer to the value (string) + constexpr const string_t* get_impl_ptr(const string_t* /*unused*/) const noexcept + { + return is_string() ? m_value.string : nullptr; + } + + /// get a pointer to the value (boolean) + boolean_t* get_impl_ptr(boolean_t* /*unused*/) noexcept + { + return is_boolean() ? &m_value.boolean : nullptr; + } + + /// get a pointer to the value (boolean) + constexpr const boolean_t* get_impl_ptr(const boolean_t* /*unused*/) const noexcept + { + return is_boolean() ? &m_value.boolean : nullptr; + } + + /// get a pointer to the value (integer number) + number_integer_t* get_impl_ptr(number_integer_t* /*unused*/) noexcept + { + return is_number_integer() ? &m_value.number_integer : nullptr; + } + + /// get a pointer to the value (integer number) + constexpr const number_integer_t* get_impl_ptr(const number_integer_t* /*unused*/) const noexcept + { + return is_number_integer() ? &m_value.number_integer : nullptr; + } + + /// get a pointer to the value (unsigned number) + number_unsigned_t* get_impl_ptr(number_unsigned_t* /*unused*/) noexcept + { + return is_number_unsigned() ? &m_value.number_unsigned : nullptr; + } + + /// get a pointer to the value (unsigned number) + constexpr const number_unsigned_t* get_impl_ptr(const number_unsigned_t* /*unused*/) const noexcept + { + return is_number_unsigned() ? &m_value.number_unsigned : nullptr; + } + + /// get a pointer to the value (floating-point number) + number_float_t* get_impl_ptr(number_float_t* /*unused*/) noexcept + { + return is_number_float() ? &m_value.number_float : nullptr; + } + + /// get a pointer to the value (floating-point number) + constexpr const number_float_t* get_impl_ptr(const number_float_t* /*unused*/) const noexcept + { + return is_number_float() ? &m_value.number_float : nullptr; + } + + /*! + @brief helper function to implement get_ref() + + This function helps to implement get_ref() without code duplication for + const and non-const overloads + + @tparam ThisType will be deduced as `basic_json` or `const basic_json` + + @throw type_error.303 if ReferenceType does not match underlying value + type of the current JSON + */ + template<typename ReferenceType, typename ThisType> + static ReferenceType get_ref_impl(ThisType& obj) + { + // delegate the call to get_ptr<>() + auto ptr = obj.template get_ptr<typename std::add_pointer<ReferenceType>::type>(); + + if (JSON_LIKELY(ptr != nullptr)) + { + return *ptr; + } + + JSON_THROW(type_error::create(303, "incompatible ReferenceType for get_ref, actual type is " + std::string(obj.type_name()))); + } + + public: + /// @name value access + /// Direct access to the stored value of a JSON value. + /// @{ + + /*! + @brief get special-case overload + + This overloads avoids a lot of template boilerplate, it can be seen as the + identity method + + @tparam BasicJsonType == @ref basic_json + + @return a copy of *this + + @complexity Constant. + + @since version 2.1.0 + */ + template<typename BasicJsonType, detail::enable_if_t< + std::is_same<typename std::remove_const<BasicJsonType>::type, basic_json_t>::value, + int> = 0> + basic_json get() const + { + return *this; + } + + /*! + @brief get special-case overload + + This overloads converts the current @ref basic_json in a different + @ref basic_json type + + @tparam BasicJsonType == @ref basic_json + + @return a copy of *this, converted into @tparam BasicJsonType + + @complexity Depending on the implementation of the called `from_json()` + method. + + @since version 3.2.0 + */ + template<typename BasicJsonType, detail::enable_if_t< + not std::is_same<BasicJsonType, basic_json>::value and + detail::is_basic_json<BasicJsonType>::value, int> = 0> + BasicJsonType get() const + { + return *this; + } + + /*! + @brief get a value (explicit) + + Explicit type conversion between the JSON value and a compatible value + which is [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible) + and [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible). + The value is converted by calling the @ref json_serializer<ValueType> + `from_json()` method. + + The function is equivalent to executing + @code {.cpp} + ValueType ret; + JSONSerializer<ValueType>::from_json(*this, ret); + return ret; + @endcode + + This overloads is chosen if: + - @a ValueType is not @ref basic_json, + - @ref json_serializer<ValueType> has a `from_json()` method of the form + `void from_json(const basic_json&, ValueType&)`, and + - @ref json_serializer<ValueType> does not have a `from_json()` method of + the form `ValueType from_json(const basic_json&)` + + @tparam ValueTypeCV the provided value type + @tparam ValueType the returned value type + + @return copy of the JSON value, converted to @a ValueType + + @throw what @ref json_serializer<ValueType> `from_json()` method throws + + @liveexample{The example below shows several conversions from JSON values + to other types. There a few things to note: (1) Floating-point numbers can + be converted to integers\, (2) A JSON array can be converted to a standard + `std::vector<short>`\, (3) A JSON object can be converted to C++ + associative containers such as `std::unordered_map<std::string\, + json>`.,get__ValueType_const} + + @since version 2.1.0 + */ + template<typename ValueTypeCV, typename ValueType = detail::uncvref_t<ValueTypeCV>, + detail::enable_if_t < + not detail::is_basic_json<ValueType>::value and + detail::has_from_json<basic_json_t, ValueType>::value and + not detail::has_non_default_from_json<basic_json_t, ValueType>::value, + int> = 0> + ValueType get() const noexcept(noexcept( + JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>(), std::declval<ValueType&>()))) + { + // we cannot static_assert on ValueTypeCV being non-const, because + // there is support for get<const basic_json_t>(), which is why we + // still need the uncvref + static_assert(not std::is_reference<ValueTypeCV>::value, + "get() cannot be used with reference types, you might want to use get_ref()"); + static_assert(std::is_default_constructible<ValueType>::value, + "types must be DefaultConstructible when used with get()"); + + ValueType ret; + JSONSerializer<ValueType>::from_json(*this, ret); + return ret; + } + + /*! + @brief get a value (explicit); special case + + Explicit type conversion between the JSON value and a compatible value + which is **not** [CopyConstructible](https://en.cppreference.com/w/cpp/named_req/CopyConstructible) + and **not** [DefaultConstructible](https://en.cppreference.com/w/cpp/named_req/DefaultConstructible). + The value is converted by calling the @ref json_serializer<ValueType> + `from_json()` method. + + The function is equivalent to executing + @code {.cpp} + return JSONSerializer<ValueTypeCV>::from_json(*this); + @endcode + + This overloads is chosen if: + - @a ValueType is not @ref basic_json and + - @ref json_serializer<ValueType> has a `from_json()` method of the form + `ValueType from_json(const basic_json&)` + + @note If @ref json_serializer<ValueType> has both overloads of + `from_json()`, this one is chosen. + + @tparam ValueTypeCV the provided value type + @tparam ValueType the returned value type + + @return copy of the JSON value, converted to @a ValueType + + @throw what @ref json_serializer<ValueType> `from_json()` method throws + + @since version 2.1.0 + */ + template<typename ValueTypeCV, typename ValueType = detail::uncvref_t<ValueTypeCV>, + detail::enable_if_t<not std::is_same<basic_json_t, ValueType>::value and + detail::has_non_default_from_json<basic_json_t, ValueType>::value, + int> = 0> + ValueType get() const noexcept(noexcept( + JSONSerializer<ValueTypeCV>::from_json(std::declval<const basic_json_t&>()))) + { + static_assert(not std::is_reference<ValueTypeCV>::value, + "get() cannot be used with reference types, you might want to use get_ref()"); + return JSONSerializer<ValueTypeCV>::from_json(*this); + } + + /*! + @brief get a value (explicit) + + Explicit type conversion between the JSON value and a compatible value. + The value is filled into the input parameter by calling the @ref json_serializer<ValueType> + `from_json()` method. + + The function is equivalent to executing + @code {.cpp} + ValueType v; + JSONSerializer<ValueType>::from_json(*this, v); + @endcode + + This overloads is chosen if: + - @a ValueType is not @ref basic_json, + - @ref json_serializer<ValueType> has a `from_json()` method of the form + `void from_json(const basic_json&, ValueType&)`, and + + @tparam ValueType the input parameter type. + + @return the input parameter, allowing chaining calls. + + @throw what @ref json_serializer<ValueType> `from_json()` method throws + + @liveexample{The example below shows several conversions from JSON values + to other types. There a few things to note: (1) Floating-point numbers can + be converted to integers\, (2) A JSON array can be converted to a standard + `std::vector<short>`\, (3) A JSON object can be converted to C++ + associative containers such as `std::unordered_map<std::string\, + json>`.,get_to} + + @since version 3.3.0 + */ + template<typename ValueType, + detail::enable_if_t < + not detail::is_basic_json<ValueType>::value and + detail::has_from_json<basic_json_t, ValueType>::value, + int> = 0> + ValueType & get_to(ValueType& v) const noexcept(noexcept( + JSONSerializer<ValueType>::from_json(std::declval<const basic_json_t&>(), v))) + { + JSONSerializer<ValueType>::from_json(*this, v); + return v; + } + + + /*! + @brief get a pointer value (implicit) + + Implicit pointer access to the internally stored JSON value. No copies are + made. + + @warning Writing data to the pointee of the result yields an undefined + state. + + @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref + object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, + @ref number_unsigned_t, or @ref number_float_t. Enforced by a static + assertion. + + @return pointer to the internally stored JSON value if the requested + pointer type @a PointerType fits to the JSON value; `nullptr` otherwise + + @complexity Constant. + + @liveexample{The example below shows how pointers to internal values of a + JSON value can be requested. Note that no type conversions are made and a + `nullptr` is returned if the value and the requested pointer type does not + match.,get_ptr} + + @since version 1.0.0 + */ + template<typename PointerType, typename std::enable_if< + std::is_pointer<PointerType>::value, int>::type = 0> + auto get_ptr() noexcept -> decltype(std::declval<basic_json_t&>().get_impl_ptr(std::declval<PointerType>())) + { + // delegate the call to get_impl_ptr<>() + return get_impl_ptr(static_cast<PointerType>(nullptr)); + } + + /*! + @brief get a pointer value (implicit) + @copydoc get_ptr() + */ + template<typename PointerType, typename std::enable_if< + std::is_pointer<PointerType>::value and + std::is_const<typename std::remove_pointer<PointerType>::type>::value, int>::type = 0> + constexpr auto get_ptr() const noexcept -> decltype(std::declval<const basic_json_t&>().get_impl_ptr(std::declval<PointerType>())) + { + // delegate the call to get_impl_ptr<>() const + return get_impl_ptr(static_cast<PointerType>(nullptr)); + } + + /*! + @brief get a pointer value (explicit) + + Explicit pointer access to the internally stored JSON value. No copies are + made. + + @warning The pointer becomes invalid if the underlying JSON object + changes. + + @tparam PointerType pointer type; must be a pointer to @ref array_t, @ref + object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, + @ref number_unsigned_t, or @ref number_float_t. + + @return pointer to the internally stored JSON value if the requested + pointer type @a PointerType fits to the JSON value; `nullptr` otherwise + + @complexity Constant. + + @liveexample{The example below shows how pointers to internal values of a + JSON value can be requested. Note that no type conversions are made and a + `nullptr` is returned if the value and the requested pointer type does not + match.,get__PointerType} + + @sa @ref get_ptr() for explicit pointer-member access + + @since version 1.0.0 + */ + template<typename PointerType, typename std::enable_if< + std::is_pointer<PointerType>::value, int>::type = 0> + auto get() noexcept -> decltype(std::declval<basic_json_t&>().template get_ptr<PointerType>()) + { + // delegate the call to get_ptr + return get_ptr<PointerType>(); + } + + /*! + @brief get a pointer value (explicit) + @copydoc get() + */ + template<typename PointerType, typename std::enable_if< + std::is_pointer<PointerType>::value, int>::type = 0> + constexpr auto get() const noexcept -> decltype(std::declval<const basic_json_t&>().template get_ptr<PointerType>()) + { + // delegate the call to get_ptr + return get_ptr<PointerType>(); + } + + /*! + @brief get a reference value (implicit) + + Implicit reference access to the internally stored JSON value. No copies + are made. + + @warning Writing data to the referee of the result yields an undefined + state. + + @tparam ReferenceType reference type; must be a reference to @ref array_t, + @ref object_t, @ref string_t, @ref boolean_t, @ref number_integer_t, or + @ref number_float_t. Enforced by static assertion. + + @return reference to the internally stored JSON value if the requested + reference type @a ReferenceType fits to the JSON value; throws + type_error.303 otherwise + + @throw type_error.303 in case passed type @a ReferenceType is incompatible + with the stored JSON value; see example below + + @complexity Constant. + + @liveexample{The example shows several calls to `get_ref()`.,get_ref} + + @since version 1.1.0 + */ + template<typename ReferenceType, typename std::enable_if< + std::is_reference<ReferenceType>::value, int>::type = 0> + ReferenceType get_ref() + { + // delegate call to get_ref_impl + return get_ref_impl<ReferenceType>(*this); + } + + /*! + @brief get a reference value (implicit) + @copydoc get_ref() + */ + template<typename ReferenceType, typename std::enable_if< + std::is_reference<ReferenceType>::value and + std::is_const<typename std::remove_reference<ReferenceType>::type>::value, int>::type = 0> + ReferenceType get_ref() const + { + // delegate call to get_ref_impl + return get_ref_impl<ReferenceType>(*this); + } + + /*! + @brief get a value (implicit) + + Implicit type conversion between the JSON value and a compatible value. + The call is realized by calling @ref get() const. + + @tparam ValueType non-pointer type compatible to the JSON value, for + instance `int` for JSON integer numbers, `bool` for JSON booleans, or + `std::vector` types for JSON arrays. The character type of @ref string_t + as well as an initializer list of this type is excluded to avoid + ambiguities as these types implicitly convert to `std::string`. + + @return copy of the JSON value, converted to type @a ValueType + + @throw type_error.302 in case passed type @a ValueType is incompatible + to the JSON value type (e.g., the JSON value is of type boolean, but a + string is requested); see example below + + @complexity Linear in the size of the JSON value. + + @liveexample{The example below shows several conversions from JSON values + to other types. There a few things to note: (1) Floating-point numbers can + be converted to integers\, (2) A JSON array can be converted to a standard + `std::vector<short>`\, (3) A JSON object can be converted to C++ + associative containers such as `std::unordered_map<std::string\, + json>`.,operator__ValueType} + + @since version 1.0.0 + */ + template < typename ValueType, typename std::enable_if < + not std::is_pointer<ValueType>::value and + not std::is_same<ValueType, detail::json_ref<basic_json>>::value and + not std::is_same<ValueType, typename string_t::value_type>::value and + not detail::is_basic_json<ValueType>::value + +#ifndef _MSC_VER // fix for issue #167 operator<< ambiguity under VS2015 + and not std::is_same<ValueType, std::initializer_list<typename string_t::value_type>>::value +#if defined(JSON_HAS_CPP_17) && defined(_MSC_VER) and _MSC_VER <= 1914 + and not std::is_same<ValueType, typename std::string_view>::value +#endif +#endif + and detail::is_detected<detail::get_template_function, const basic_json_t&, ValueType>::value + , int >::type = 0 > + operator ValueType() const + { + // delegate the call to get<>() const + return get<ValueType>(); + } + + /// @} + + + //////////////////// + // element access // + //////////////////// + + /// @name element access + /// Access to the JSON value. + /// @{ + + /*! + @brief access specified array element with bounds checking + + Returns a reference to the element at specified location @a idx, with + bounds checking. + + @param[in] idx index of the element to access + + @return reference to the element at index @a idx + + @throw type_error.304 if the JSON value is not an array; in this case, + calling `at` with an index makes no sense. See example below. + @throw out_of_range.401 if the index @a idx is out of range of the array; + that is, `idx >= size()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Constant. + + @since version 1.0.0 + + @liveexample{The example below shows how array elements can be read and + written using `at()`. It also demonstrates the different exceptions that + can be thrown.,at__size_type} + */ + reference at(size_type idx) + { + // at only works for arrays + if (JSON_LIKELY(is_array())) + { + JSON_TRY + { + return m_value.array->at(idx); + } + JSON_CATCH (std::out_of_range&) + { + // create better exception explanation + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); + } + } + else + { + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); + } + } + + /*! + @brief access specified array element with bounds checking + + Returns a const reference to the element at specified location @a idx, + with bounds checking. + + @param[in] idx index of the element to access + + @return const reference to the element at index @a idx + + @throw type_error.304 if the JSON value is not an array; in this case, + calling `at` with an index makes no sense. See example below. + @throw out_of_range.401 if the index @a idx is out of range of the array; + that is, `idx >= size()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Constant. + + @since version 1.0.0 + + @liveexample{The example below shows how array elements can be read using + `at()`. It also demonstrates the different exceptions that can be thrown., + at__size_type_const} + */ + const_reference at(size_type idx) const + { + // at only works for arrays + if (JSON_LIKELY(is_array())) + { + JSON_TRY + { + return m_value.array->at(idx); + } + JSON_CATCH (std::out_of_range&) + { + // create better exception explanation + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); + } + } + else + { + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); + } + } + + /*! + @brief access specified object element with bounds checking + + Returns a reference to the element at with specified key @a key, with + bounds checking. + + @param[in] key key of the element to access + + @return reference to the element at key @a key + + @throw type_error.304 if the JSON value is not an object; in this case, + calling `at` with a key makes no sense. See example below. + @throw out_of_range.403 if the key @a key is is not stored in the object; + that is, `find(key) == end()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Logarithmic in the size of the container. + + @sa @ref operator[](const typename object_t::key_type&) for unchecked + access by reference + @sa @ref value() for access by value with a default value + + @since version 1.0.0 + + @liveexample{The example below shows how object elements can be read and + written using `at()`. It also demonstrates the different exceptions that + can be thrown.,at__object_t_key_type} + */ + reference at(const typename object_t::key_type& key) + { + // at only works for objects + if (JSON_LIKELY(is_object())) + { + JSON_TRY + { + return m_value.object->at(key); + } + JSON_CATCH (std::out_of_range&) + { + // create better exception explanation + JSON_THROW(out_of_range::create(403, "key '" + key + "' not found")); + } + } + else + { + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); + } + } + + /*! + @brief access specified object element with bounds checking + + Returns a const reference to the element at with specified key @a key, + with bounds checking. + + @param[in] key key of the element to access + + @return const reference to the element at key @a key + + @throw type_error.304 if the JSON value is not an object; in this case, + calling `at` with a key makes no sense. See example below. + @throw out_of_range.403 if the key @a key is is not stored in the object; + that is, `find(key) == end()`. See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Logarithmic in the size of the container. + + @sa @ref operator[](const typename object_t::key_type&) for unchecked + access by reference + @sa @ref value() for access by value with a default value + + @since version 1.0.0 + + @liveexample{The example below shows how object elements can be read using + `at()`. It also demonstrates the different exceptions that can be thrown., + at__object_t_key_type_const} + */ + const_reference at(const typename object_t::key_type& key) const + { + // at only works for objects + if (JSON_LIKELY(is_object())) + { + JSON_TRY + { + return m_value.object->at(key); + } + JSON_CATCH (std::out_of_range&) + { + // create better exception explanation + JSON_THROW(out_of_range::create(403, "key '" + key + "' not found")); + } + } + else + { + JSON_THROW(type_error::create(304, "cannot use at() with " + std::string(type_name()))); + } + } + + /*! + @brief access specified array element + + Returns a reference to the element at specified location @a idx. + + @note If @a idx is beyond the range of the array (i.e., `idx >= size()`), + then the array is silently filled up with `null` values to make `idx` a + valid reference to the last stored element. + + @param[in] idx index of the element to access + + @return reference to the element at index @a idx + + @throw type_error.305 if the JSON value is not an array or null; in that + cases, using the [] operator with an index makes no sense. + + @complexity Constant if @a idx is in the range of the array. Otherwise + linear in `idx - size()`. + + @liveexample{The example below shows how array elements can be read and + written using `[]` operator. Note the addition of `null` + values.,operatorarray__size_type} + + @since version 1.0.0 + */ + reference operator[](size_type idx) + { + // implicitly convert null value to an empty array + if (is_null()) + { + m_type = value_t::array; + m_value.array = create<array_t>(); + assert_invariant(); + } + + // operator[] only works for arrays + if (JSON_LIKELY(is_array())) + { + // fill up array with null values if given idx is outside range + if (idx >= m_value.array->size()) + { + m_value.array->insert(m_value.array->end(), + idx - m_value.array->size() + 1, + basic_json()); + } + + return m_value.array->operator[](idx); + } + + JSON_THROW(type_error::create(305, "cannot use operator[] with a numeric argument with " + std::string(type_name()))); + } + + /*! + @brief access specified array element + + Returns a const reference to the element at specified location @a idx. + + @param[in] idx index of the element to access + + @return const reference to the element at index @a idx + + @throw type_error.305 if the JSON value is not an array; in that case, + using the [] operator with an index makes no sense. + + @complexity Constant. + + @liveexample{The example below shows how array elements can be read using + the `[]` operator.,operatorarray__size_type_const} + + @since version 1.0.0 + */ + const_reference operator[](size_type idx) const + { + // const operator[] only works for arrays + if (JSON_LIKELY(is_array())) + { + return m_value.array->operator[](idx); + } + + JSON_THROW(type_error::create(305, "cannot use operator[] with a numeric argument with " + std::string(type_name()))); + } + + /*! + @brief access specified object element + + Returns a reference to the element at with specified key @a key. + + @note If @a key is not found in the object, then it is silently added to + the object and filled with a `null` value to make `key` a valid reference. + In case the value was `null` before, it is converted to an object. + + @param[in] key key of the element to access + + @return reference to the element at key @a key + + @throw type_error.305 if the JSON value is not an object or null; in that + cases, using the [] operator with a key makes no sense. + + @complexity Logarithmic in the size of the container. + + @liveexample{The example below shows how object elements can be read and + written using the `[]` operator.,operatorarray__key_type} + + @sa @ref at(const typename object_t::key_type&) for access by reference + with range checking + @sa @ref value() for access by value with a default value + + @since version 1.0.0 + */ + reference operator[](const typename object_t::key_type& key) + { + // implicitly convert null value to an empty object + if (is_null()) + { + m_type = value_t::object; + m_value.object = create<object_t>(); + assert_invariant(); + } + + // operator[] only works for objects + if (JSON_LIKELY(is_object())) + { + return m_value.object->operator[](key); + } + + JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); + } + + /*! + @brief read-only access specified object element + + Returns a const reference to the element at with specified key @a key. No + bounds checking is performed. + + @warning If the element with key @a key does not exist, the behavior is + undefined. + + @param[in] key key of the element to access + + @return const reference to the element at key @a key + + @pre The element with key @a key must exist. **This precondition is + enforced with an assertion.** + + @throw type_error.305 if the JSON value is not an object; in that case, + using the [] operator with a key makes no sense. + + @complexity Logarithmic in the size of the container. + + @liveexample{The example below shows how object elements can be read using + the `[]` operator.,operatorarray__key_type_const} + + @sa @ref at(const typename object_t::key_type&) for access by reference + with range checking + @sa @ref value() for access by value with a default value + + @since version 1.0.0 + */ + const_reference operator[](const typename object_t::key_type& key) const + { + // const operator[] only works for objects + if (JSON_LIKELY(is_object())) + { + assert(m_value.object->find(key) != m_value.object->end()); + return m_value.object->find(key)->second; + } + + JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); + } + + /*! + @brief access specified object element + + Returns a reference to the element at with specified key @a key. + + @note If @a key is not found in the object, then it is silently added to + the object and filled with a `null` value to make `key` a valid reference. + In case the value was `null` before, it is converted to an object. + + @param[in] key key of the element to access + + @return reference to the element at key @a key + + @throw type_error.305 if the JSON value is not an object or null; in that + cases, using the [] operator with a key makes no sense. + + @complexity Logarithmic in the size of the container. + + @liveexample{The example below shows how object elements can be read and + written using the `[]` operator.,operatorarray__key_type} + + @sa @ref at(const typename object_t::key_type&) for access by reference + with range checking + @sa @ref value() for access by value with a default value + + @since version 1.1.0 + */ + template<typename T> + reference operator[](T* key) + { + // implicitly convert null to object + if (is_null()) + { + m_type = value_t::object; + m_value = value_t::object; + assert_invariant(); + } + + // at only works for objects + if (JSON_LIKELY(is_object())) + { + return m_value.object->operator[](key); + } + + JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); + } + + /*! + @brief read-only access specified object element + + Returns a const reference to the element at with specified key @a key. No + bounds checking is performed. + + @warning If the element with key @a key does not exist, the behavior is + undefined. + + @param[in] key key of the element to access + + @return const reference to the element at key @a key + + @pre The element with key @a key must exist. **This precondition is + enforced with an assertion.** + + @throw type_error.305 if the JSON value is not an object; in that case, + using the [] operator with a key makes no sense. + + @complexity Logarithmic in the size of the container. + + @liveexample{The example below shows how object elements can be read using + the `[]` operator.,operatorarray__key_type_const} + + @sa @ref at(const typename object_t::key_type&) for access by reference + with range checking + @sa @ref value() for access by value with a default value + + @since version 1.1.0 + */ + template<typename T> + const_reference operator[](T* key) const + { + // at only works for objects + if (JSON_LIKELY(is_object())) + { + assert(m_value.object->find(key) != m_value.object->end()); + return m_value.object->find(key)->second; + } + + JSON_THROW(type_error::create(305, "cannot use operator[] with a string argument with " + std::string(type_name()))); + } + + /*! + @brief access specified object element with default value + + Returns either a copy of an object's element at the specified key @a key + or a given default value if no element with key @a key exists. + + The function is basically equivalent to executing + @code {.cpp} + try { + return at(key); + } catch(out_of_range) { + return default_value; + } + @endcode + + @note Unlike @ref at(const typename object_t::key_type&), this function + does not throw if the given key @a key was not found. + + @note Unlike @ref operator[](const typename object_t::key_type& key), this + function does not implicitly add an element to the position defined by @a + key. This function is furthermore also applicable to const objects. + + @param[in] key key of the element to access + @param[in] default_value the value to return if @a key is not found + + @tparam ValueType type compatible to JSON values, for instance `int` for + JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for + JSON arrays. Note the type of the expected value at @a key and the default + value @a default_value must be compatible. + + @return copy of the element at key @a key or @a default_value if @a key + is not found + + @throw type_error.306 if the JSON value is not an object; in that case, + using `value()` with a key makes no sense. + + @complexity Logarithmic in the size of the container. + + @liveexample{The example below shows how object elements can be queried + with a default value.,basic_json__value} + + @sa @ref at(const typename object_t::key_type&) for access by reference + with range checking + @sa @ref operator[](const typename object_t::key_type&) for unchecked + access by reference + + @since version 1.0.0 + */ + template<class ValueType, typename std::enable_if< + std::is_convertible<basic_json_t, ValueType>::value, int>::type = 0> + ValueType value(const typename object_t::key_type& key, const ValueType& default_value) const + { + // at only works for objects + if (JSON_LIKELY(is_object())) + { + // if key is found, return value and given default value otherwise + const auto it = find(key); + if (it != end()) + { + return *it; + } + + return default_value; + } + + JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()))); + } + + /*! + @brief overload for a default value of type const char* + @copydoc basic_json::value(const typename object_t::key_type&, const ValueType&) const + */ + string_t value(const typename object_t::key_type& key, const char* default_value) const + { + return value(key, string_t(default_value)); + } + + /*! + @brief access specified object element via JSON Pointer with default value + + Returns either a copy of an object's element at the specified key @a key + or a given default value if no element with key @a key exists. + + The function is basically equivalent to executing + @code {.cpp} + try { + return at(ptr); + } catch(out_of_range) { + return default_value; + } + @endcode + + @note Unlike @ref at(const json_pointer&), this function does not throw + if the given key @a key was not found. + + @param[in] ptr a JSON pointer to the element to access + @param[in] default_value the value to return if @a ptr found no value + + @tparam ValueType type compatible to JSON values, for instance `int` for + JSON integer numbers, `bool` for JSON booleans, or `std::vector` types for + JSON arrays. Note the type of the expected value at @a key and the default + value @a default_value must be compatible. + + @return copy of the element at key @a key or @a default_value if @a key + is not found + + @throw type_error.306 if the JSON value is not an object; in that case, + using `value()` with a key makes no sense. + + @complexity Logarithmic in the size of the container. + + @liveexample{The example below shows how object elements can be queried + with a default value.,basic_json__value_ptr} + + @sa @ref operator[](const json_pointer&) for unchecked access by reference + + @since version 2.0.2 + */ + template<class ValueType, typename std::enable_if< + std::is_convertible<basic_json_t, ValueType>::value, int>::type = 0> + ValueType value(const json_pointer& ptr, const ValueType& default_value) const + { + // at only works for objects + if (JSON_LIKELY(is_object())) + { + // if pointer resolves a value, return it or use default value + JSON_TRY + { + return ptr.get_checked(this); + } + JSON_INTERNAL_CATCH (out_of_range&) + { + return default_value; + } + } + + JSON_THROW(type_error::create(306, "cannot use value() with " + std::string(type_name()))); + } + + /*! + @brief overload for a default value of type const char* + @copydoc basic_json::value(const json_pointer&, ValueType) const + */ + string_t value(const json_pointer& ptr, const char* default_value) const + { + return value(ptr, string_t(default_value)); + } + + /*! + @brief access the first element + + Returns a reference to the first element in the container. For a JSON + container `c`, the expression `c.front()` is equivalent to `*c.begin()`. + + @return In case of a structured type (array or object), a reference to the + first element is returned. In case of number, string, or boolean values, a + reference to the value is returned. + + @complexity Constant. + + @pre The JSON value must not be `null` (would throw `std::out_of_range`) + or an empty array or object (undefined behavior, **guarded by + assertions**). + @post The JSON value remains unchanged. + + @throw invalid_iterator.214 when called on `null` value + + @liveexample{The following code shows an example for `front()`.,front} + + @sa @ref back() -- access the last element + + @since version 1.0.0 + */ + reference front() + { + return *begin(); + } + + /*! + @copydoc basic_json::front() + */ + const_reference front() const + { + return *cbegin(); + } + + /*! + @brief access the last element + + Returns a reference to the last element in the container. For a JSON + container `c`, the expression `c.back()` is equivalent to + @code {.cpp} + auto tmp = c.end(); + --tmp; + return *tmp; + @endcode + + @return In case of a structured type (array or object), a reference to the + last element is returned. In case of number, string, or boolean values, a + reference to the value is returned. + + @complexity Constant. + + @pre The JSON value must not be `null` (would throw `std::out_of_range`) + or an empty array or object (undefined behavior, **guarded by + assertions**). + @post The JSON value remains unchanged. + + @throw invalid_iterator.214 when called on a `null` value. See example + below. + + @liveexample{The following code shows an example for `back()`.,back} + + @sa @ref front() -- access the first element + + @since version 1.0.0 + */ + reference back() + { + auto tmp = end(); + --tmp; + return *tmp; + } + + /*! + @copydoc basic_json::back() + */ + const_reference back() const + { + auto tmp = cend(); + --tmp; + return *tmp; + } + + /*! + @brief remove element given an iterator + + Removes the element specified by iterator @a pos. The iterator @a pos must + be valid and dereferenceable. Thus the `end()` iterator (which is valid, + but is not dereferenceable) cannot be used as a value for @a pos. + + If called on a primitive type other than `null`, the resulting JSON value + will be `null`. + + @param[in] pos iterator to the element to remove + @return Iterator following the last removed element. If the iterator @a + pos refers to the last element, the `end()` iterator is returned. + + @tparam IteratorType an @ref iterator or @ref const_iterator + + @post Invalidates iterators and references at or after the point of the + erase, including the `end()` iterator. + + @throw type_error.307 if called on a `null` value; example: `"cannot use + erase() with null"` + @throw invalid_iterator.202 if called on an iterator which does not belong + to the current JSON value; example: `"iterator does not fit current + value"` + @throw invalid_iterator.205 if called on a primitive type with invalid + iterator (i.e., any iterator which is not `begin()`); example: `"iterator + out of range"` + + @complexity The complexity depends on the type: + - objects: amortized constant + - arrays: linear in distance between @a pos and the end of the container + - strings: linear in the length of the string + - other types: constant + + @liveexample{The example shows the result of `erase()` for different JSON + types.,erase__IteratorType} + + @sa @ref erase(IteratorType, IteratorType) -- removes the elements in + the given range + @sa @ref erase(const typename object_t::key_type&) -- removes the element + from an object at the given key + @sa @ref erase(const size_type) -- removes the element from an array at + the given index + + @since version 1.0.0 + */ + template<class IteratorType, typename std::enable_if< + std::is_same<IteratorType, typename basic_json_t::iterator>::value or + std::is_same<IteratorType, typename basic_json_t::const_iterator>::value, int>::type + = 0> + IteratorType erase(IteratorType pos) + { + // make sure iterator fits the current value + if (JSON_UNLIKELY(this != pos.m_object)) + { + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); + } + + IteratorType result = end(); + + switch (m_type) + { + case value_t::boolean: + case value_t::number_float: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::string: + { + if (JSON_UNLIKELY(not pos.m_it.primitive_iterator.is_begin())) + { + JSON_THROW(invalid_iterator::create(205, "iterator out of range")); + } + + if (is_string()) + { + AllocatorType<string_t> alloc; + std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.string); + std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.string, 1); + m_value.string = nullptr; + } + + m_type = value_t::null; + assert_invariant(); + break; + } + + case value_t::object: + { + result.m_it.object_iterator = m_value.object->erase(pos.m_it.object_iterator); + break; + } + + case value_t::array: + { + result.m_it.array_iterator = m_value.array->erase(pos.m_it.array_iterator); + break; + } + + default: + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); + } + + return result; + } + + /*! + @brief remove elements given an iterator range + + Removes the element specified by the range `[first; last)`. The iterator + @a first does not need to be dereferenceable if `first == last`: erasing + an empty range is a no-op. + + If called on a primitive type other than `null`, the resulting JSON value + will be `null`. + + @param[in] first iterator to the beginning of the range to remove + @param[in] last iterator past the end of the range to remove + @return Iterator following the last removed element. If the iterator @a + second refers to the last element, the `end()` iterator is returned. + + @tparam IteratorType an @ref iterator or @ref const_iterator + + @post Invalidates iterators and references at or after the point of the + erase, including the `end()` iterator. + + @throw type_error.307 if called on a `null` value; example: `"cannot use + erase() with null"` + @throw invalid_iterator.203 if called on iterators which does not belong + to the current JSON value; example: `"iterators do not fit current value"` + @throw invalid_iterator.204 if called on a primitive type with invalid + iterators (i.e., if `first != begin()` and `last != end()`); example: + `"iterators out of range"` + + @complexity The complexity depends on the type: + - objects: `log(size()) + std::distance(first, last)` + - arrays: linear in the distance between @a first and @a last, plus linear + in the distance between @a last and end of the container + - strings: linear in the length of the string + - other types: constant + + @liveexample{The example shows the result of `erase()` for different JSON + types.,erase__IteratorType_IteratorType} + + @sa @ref erase(IteratorType) -- removes the element at a given position + @sa @ref erase(const typename object_t::key_type&) -- removes the element + from an object at the given key + @sa @ref erase(const size_type) -- removes the element from an array at + the given index + + @since version 1.0.0 + */ + template<class IteratorType, typename std::enable_if< + std::is_same<IteratorType, typename basic_json_t::iterator>::value or + std::is_same<IteratorType, typename basic_json_t::const_iterator>::value, int>::type + = 0> + IteratorType erase(IteratorType first, IteratorType last) + { + // make sure iterator fits the current value + if (JSON_UNLIKELY(this != first.m_object or this != last.m_object)) + { + JSON_THROW(invalid_iterator::create(203, "iterators do not fit current value")); + } + + IteratorType result = end(); + + switch (m_type) + { + case value_t::boolean: + case value_t::number_float: + case value_t::number_integer: + case value_t::number_unsigned: + case value_t::string: + { + if (JSON_LIKELY(not first.m_it.primitive_iterator.is_begin() + or not last.m_it.primitive_iterator.is_end())) + { + JSON_THROW(invalid_iterator::create(204, "iterators out of range")); + } + + if (is_string()) + { + AllocatorType<string_t> alloc; + std::allocator_traits<decltype(alloc)>::destroy(alloc, m_value.string); + std::allocator_traits<decltype(alloc)>::deallocate(alloc, m_value.string, 1); + m_value.string = nullptr; + } + + m_type = value_t::null; + assert_invariant(); + break; + } + + case value_t::object: + { + result.m_it.object_iterator = m_value.object->erase(first.m_it.object_iterator, + last.m_it.object_iterator); + break; + } + + case value_t::array: + { + result.m_it.array_iterator = m_value.array->erase(first.m_it.array_iterator, + last.m_it.array_iterator); + break; + } + + default: + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); + } + + return result; + } + + /*! + @brief remove element from a JSON object given a key + + Removes elements from a JSON object with the key value @a key. + + @param[in] key value of the elements to remove + + @return Number of elements removed. If @a ObjectType is the default + `std::map` type, the return value will always be `0` (@a key was not + found) or `1` (@a key was found). + + @post References and iterators to the erased elements are invalidated. + Other references and iterators are not affected. + + @throw type_error.307 when called on a type other than JSON object; + example: `"cannot use erase() with null"` + + @complexity `log(size()) + count(key)` + + @liveexample{The example shows the effect of `erase()`.,erase__key_type} + + @sa @ref erase(IteratorType) -- removes the element at a given position + @sa @ref erase(IteratorType, IteratorType) -- removes the elements in + the given range + @sa @ref erase(const size_type) -- removes the element from an array at + the given index + + @since version 1.0.0 + */ + size_type erase(const typename object_t::key_type& key) + { + // this erase only works for objects + if (JSON_LIKELY(is_object())) + { + return m_value.object->erase(key); + } + + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); + } + + /*! + @brief remove element from a JSON array given an index + + Removes element from a JSON array at the index @a idx. + + @param[in] idx index of the element to remove + + @throw type_error.307 when called on a type other than JSON object; + example: `"cannot use erase() with null"` + @throw out_of_range.401 when `idx >= size()`; example: `"array index 17 + is out of range"` + + @complexity Linear in distance between @a idx and the end of the container. + + @liveexample{The example shows the effect of `erase()`.,erase__size_type} + + @sa @ref erase(IteratorType) -- removes the element at a given position + @sa @ref erase(IteratorType, IteratorType) -- removes the elements in + the given range + @sa @ref erase(const typename object_t::key_type&) -- removes the element + from an object at the given key + + @since version 1.0.0 + */ + void erase(const size_type idx) + { + // this erase only works for arrays + if (JSON_LIKELY(is_array())) + { + if (JSON_UNLIKELY(idx >= size())) + { + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); + } + + m_value.array->erase(m_value.array->begin() + static_cast<difference_type>(idx)); + } + else + { + JSON_THROW(type_error::create(307, "cannot use erase() with " + std::string(type_name()))); + } + } + + /// @} + + + //////////// + // lookup // + //////////// + + /// @name lookup + /// @{ + + /*! + @brief find an element in a JSON object + + Finds an element in a JSON object with key equivalent to @a key. If the + element is not found or the JSON value is not an object, end() is + returned. + + @note This method always returns @ref end() when executed on a JSON type + that is not an object. + + @param[in] key key value of the element to search for. + + @return Iterator to an element with key equivalent to @a key. If no such + element is found or the JSON value is not an object, past-the-end (see + @ref end()) iterator is returned. + + @complexity Logarithmic in the size of the JSON object. + + @liveexample{The example shows how `find()` is used.,find__key_type} + + @since version 1.0.0 + */ + template<typename KeyT> + iterator find(KeyT&& key) + { + auto result = end(); + + if (is_object()) + { + result.m_it.object_iterator = m_value.object->find(std::forward<KeyT>(key)); + } + + return result; + } + + /*! + @brief find an element in a JSON object + @copydoc find(KeyT&&) + */ + template<typename KeyT> + const_iterator find(KeyT&& key) const + { + auto result = cend(); + + if (is_object()) + { + result.m_it.object_iterator = m_value.object->find(std::forward<KeyT>(key)); + } + + return result; + } + + /*! + @brief returns the number of occurrences of a key in a JSON object + + Returns the number of elements with key @a key. If ObjectType is the + default `std::map` type, the return value will always be `0` (@a key was + not found) or `1` (@a key was found). + + @note This method always returns `0` when executed on a JSON type that is + not an object. + + @param[in] key key value of the element to count + + @return Number of elements with key @a key. If the JSON value is not an + object, the return value will be `0`. + + @complexity Logarithmic in the size of the JSON object. + + @liveexample{The example shows how `count()` is used.,count} + + @since version 1.0.0 + */ + template<typename KeyT> + size_type count(KeyT&& key) const + { + // return 0 for all nonobject types + return is_object() ? m_value.object->count(std::forward<KeyT>(key)) : 0; + } + + /// @} + + + /////////////// + // iterators // + /////////////// + + /// @name iterators + /// @{ + + /*! + @brief returns an iterator to the first element + + Returns an iterator to the first element. + + @image html range-begin-end.svg "Illustration from cppreference.com" + + @return iterator to the first element + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is constant. + + @liveexample{The following code shows an example for `begin()`.,begin} + + @sa @ref cbegin() -- returns a const iterator to the beginning + @sa @ref end() -- returns an iterator to the end + @sa @ref cend() -- returns a const iterator to the end + + @since version 1.0.0 + */ + iterator begin() noexcept + { + iterator result(this); + result.set_begin(); + return result; + } + + /*! + @copydoc basic_json::cbegin() + */ + const_iterator begin() const noexcept + { + return cbegin(); + } + + /*! + @brief returns a const iterator to the first element + + Returns a const iterator to the first element. + + @image html range-begin-end.svg "Illustration from cppreference.com" + + @return const iterator to the first element + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is constant. + - Has the semantics of `const_cast<const basic_json&>(*this).begin()`. + + @liveexample{The following code shows an example for `cbegin()`.,cbegin} + + @sa @ref begin() -- returns an iterator to the beginning + @sa @ref end() -- returns an iterator to the end + @sa @ref cend() -- returns a const iterator to the end + + @since version 1.0.0 + */ + const_iterator cbegin() const noexcept + { + const_iterator result(this); + result.set_begin(); + return result; + } + + /*! + @brief returns an iterator to one past the last element + + Returns an iterator to one past the last element. + + @image html range-begin-end.svg "Illustration from cppreference.com" + + @return iterator one past the last element + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is constant. + + @liveexample{The following code shows an example for `end()`.,end} + + @sa @ref cend() -- returns a const iterator to the end + @sa @ref begin() -- returns an iterator to the beginning + @sa @ref cbegin() -- returns a const iterator to the beginning + + @since version 1.0.0 + */ + iterator end() noexcept + { + iterator result(this); + result.set_end(); + return result; + } + + /*! + @copydoc basic_json::cend() + */ + const_iterator end() const noexcept + { + return cend(); + } + + /*! + @brief returns a const iterator to one past the last element + + Returns a const iterator to one past the last element. + + @image html range-begin-end.svg "Illustration from cppreference.com" + + @return const iterator one past the last element + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is constant. + - Has the semantics of `const_cast<const basic_json&>(*this).end()`. + + @liveexample{The following code shows an example for `cend()`.,cend} + + @sa @ref end() -- returns an iterator to the end + @sa @ref begin() -- returns an iterator to the beginning + @sa @ref cbegin() -- returns a const iterator to the beginning + + @since version 1.0.0 + */ + const_iterator cend() const noexcept + { + const_iterator result(this); + result.set_end(); + return result; + } + + /*! + @brief returns an iterator to the reverse-beginning + + Returns an iterator to the reverse-beginning; that is, the last element. + + @image html range-rbegin-rend.svg "Illustration from cppreference.com" + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) + requirements: + - The complexity is constant. + - Has the semantics of `reverse_iterator(end())`. + + @liveexample{The following code shows an example for `rbegin()`.,rbegin} + + @sa @ref crbegin() -- returns a const reverse iterator to the beginning + @sa @ref rend() -- returns a reverse iterator to the end + @sa @ref crend() -- returns a const reverse iterator to the end + + @since version 1.0.0 + */ + reverse_iterator rbegin() noexcept + { + return reverse_iterator(end()); + } + + /*! + @copydoc basic_json::crbegin() + */ + const_reverse_iterator rbegin() const noexcept + { + return crbegin(); + } + + /*! + @brief returns an iterator to the reverse-end + + Returns an iterator to the reverse-end; that is, one before the first + element. + + @image html range-rbegin-rend.svg "Illustration from cppreference.com" + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) + requirements: + - The complexity is constant. + - Has the semantics of `reverse_iterator(begin())`. + + @liveexample{The following code shows an example for `rend()`.,rend} + + @sa @ref crend() -- returns a const reverse iterator to the end + @sa @ref rbegin() -- returns a reverse iterator to the beginning + @sa @ref crbegin() -- returns a const reverse iterator to the beginning + + @since version 1.0.0 + */ + reverse_iterator rend() noexcept + { + return reverse_iterator(begin()); + } + + /*! + @copydoc basic_json::crend() + */ + const_reverse_iterator rend() const noexcept + { + return crend(); + } + + /*! + @brief returns a const reverse iterator to the last element + + Returns a const iterator to the reverse-beginning; that is, the last + element. + + @image html range-rbegin-rend.svg "Illustration from cppreference.com" + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) + requirements: + - The complexity is constant. + - Has the semantics of `const_cast<const basic_json&>(*this).rbegin()`. + + @liveexample{The following code shows an example for `crbegin()`.,crbegin} + + @sa @ref rbegin() -- returns a reverse iterator to the beginning + @sa @ref rend() -- returns a reverse iterator to the end + @sa @ref crend() -- returns a const reverse iterator to the end + + @since version 1.0.0 + */ + const_reverse_iterator crbegin() const noexcept + { + return const_reverse_iterator(cend()); + } + + /*! + @brief returns a const reverse iterator to one before the first + + Returns a const reverse iterator to the reverse-end; that is, one before + the first element. + + @image html range-rbegin-rend.svg "Illustration from cppreference.com" + + @complexity Constant. + + @requirement This function helps `basic_json` satisfying the + [ReversibleContainer](https://en.cppreference.com/w/cpp/named_req/ReversibleContainer) + requirements: + - The complexity is constant. + - Has the semantics of `const_cast<const basic_json&>(*this).rend()`. + + @liveexample{The following code shows an example for `crend()`.,crend} + + @sa @ref rend() -- returns a reverse iterator to the end + @sa @ref rbegin() -- returns a reverse iterator to the beginning + @sa @ref crbegin() -- returns a const reverse iterator to the beginning + + @since version 1.0.0 + */ + const_reverse_iterator crend() const noexcept + { + return const_reverse_iterator(cbegin()); + } + + public: + /*! + @brief wrapper to access iterator member functions in range-based for + + This function allows to access @ref iterator::key() and @ref + iterator::value() during range-based for loops. In these loops, a + reference to the JSON values is returned, so there is no access to the + underlying iterator. + + For loop without iterator_wrapper: + + @code{cpp} + for (auto it = j_object.begin(); it != j_object.end(); ++it) + { + std::cout << "key: " << it.key() << ", value:" << it.value() << '\n'; + } + @endcode + + Range-based for loop without iterator proxy: + + @code{cpp} + for (auto it : j_object) + { + // "it" is of type json::reference and has no key() member + std::cout << "value: " << it << '\n'; + } + @endcode + + Range-based for loop with iterator proxy: + + @code{cpp} + for (auto it : json::iterator_wrapper(j_object)) + { + std::cout << "key: " << it.key() << ", value:" << it.value() << '\n'; + } + @endcode + + @note When iterating over an array, `key()` will return the index of the + element as string (see example). + + @param[in] ref reference to a JSON value + @return iteration proxy object wrapping @a ref with an interface to use in + range-based for loops + + @liveexample{The following code shows how the wrapper is used,iterator_wrapper} + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Constant. + + @note The name of this function is not yet final and may change in the + future. + + @deprecated This stream operator is deprecated and will be removed in + future 4.0.0 of the library. Please use @ref items() instead; + that is, replace `json::iterator_wrapper(j)` with `j.items()`. + */ + JSON_DEPRECATED + static iteration_proxy<iterator> iterator_wrapper(reference ref) noexcept + { + return ref.items(); + } + + /*! + @copydoc iterator_wrapper(reference) + */ + JSON_DEPRECATED + static iteration_proxy<const_iterator> iterator_wrapper(const_reference ref) noexcept + { + return ref.items(); + } + + /*! + @brief helper to access iterator member functions in range-based for + + This function allows to access @ref iterator::key() and @ref + iterator::value() during range-based for loops. In these loops, a + reference to the JSON values is returned, so there is no access to the + underlying iterator. + + For loop without `items()` function: + + @code{cpp} + for (auto it = j_object.begin(); it != j_object.end(); ++it) + { + std::cout << "key: " << it.key() << ", value:" << it.value() << '\n'; + } + @endcode + + Range-based for loop without `items()` function: + + @code{cpp} + for (auto it : j_object) + { + // "it" is of type json::reference and has no key() member + std::cout << "value: " << it << '\n'; + } + @endcode + + Range-based for loop with `items()` function: + + @code{cpp} + for (auto& el : j_object.items()) + { + std::cout << "key: " << el.key() << ", value:" << el.value() << '\n'; + } + @endcode + + The `items()` function also allows to use + [structured bindings](https://en.cppreference.com/w/cpp/language/structured_binding) + (C++17): + + @code{cpp} + for (auto& [key, val] : j_object.items()) + { + std::cout << "key: " << key << ", value:" << val << '\n'; + } + @endcode + + @note When iterating over an array, `key()` will return the index of the + element as string (see example). For primitive types (e.g., numbers), + `key()` returns an empty string. + + @return iteration proxy object wrapping @a ref with an interface to use in + range-based for loops + + @liveexample{The following code shows how the function is used.,items} + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Constant. + + @since version 3.1.0, structured bindings support since 3.5.0. + */ + iteration_proxy<iterator> items() noexcept + { + return iteration_proxy<iterator>(*this); + } + + /*! + @copydoc items() + */ + iteration_proxy<const_iterator> items() const noexcept + { + return iteration_proxy<const_iterator>(*this); + } + + /// @} + + + ////////////// + // capacity // + ////////////// + + /// @name capacity + /// @{ + + /*! + @brief checks whether the container is empty. + + Checks if a JSON value has no elements (i.e. whether its @ref size is `0`). + + @return The return value depends on the different types and is + defined as follows: + Value type | return value + ----------- | ------------- + null | `true` + boolean | `false` + string | `false` + number | `false` + object | result of function `object_t::empty()` + array | result of function `array_t::empty()` + + @liveexample{The following code uses `empty()` to check if a JSON + object contains any elements.,empty} + + @complexity Constant, as long as @ref array_t and @ref object_t satisfy + the Container concept; that is, their `empty()` functions have constant + complexity. + + @iterators No changes. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @note This function does not return whether a string stored as JSON value + is empty - it returns whether the JSON container itself is empty which is + false in the case of a string. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is constant. + - Has the semantics of `begin() == end()`. + + @sa @ref size() -- returns the number of elements + + @since version 1.0.0 + */ + bool empty() const noexcept + { + switch (m_type) + { + case value_t::null: + { + // null values are empty + return true; + } + + case value_t::array: + { + // delegate call to array_t::empty() + return m_value.array->empty(); + } + + case value_t::object: + { + // delegate call to object_t::empty() + return m_value.object->empty(); + } + + default: + { + // all other types are nonempty + return false; + } + } + } + + /*! + @brief returns the number of elements + + Returns the number of elements in a JSON value. + + @return The return value depends on the different types and is + defined as follows: + Value type | return value + ----------- | ------------- + null | `0` + boolean | `1` + string | `1` + number | `1` + object | result of function object_t::size() + array | result of function array_t::size() + + @liveexample{The following code calls `size()` on the different value + types.,size} + + @complexity Constant, as long as @ref array_t and @ref object_t satisfy + the Container concept; that is, their size() functions have constant + complexity. + + @iterators No changes. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @note This function does not return the length of a string stored as JSON + value - it returns the number of elements in the JSON value which is 1 in + the case of a string. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is constant. + - Has the semantics of `std::distance(begin(), end())`. + + @sa @ref empty() -- checks whether the container is empty + @sa @ref max_size() -- returns the maximal number of elements + + @since version 1.0.0 + */ + size_type size() const noexcept + { + switch (m_type) + { + case value_t::null: + { + // null values are empty + return 0; + } + + case value_t::array: + { + // delegate call to array_t::size() + return m_value.array->size(); + } + + case value_t::object: + { + // delegate call to object_t::size() + return m_value.object->size(); + } + + default: + { + // all other types have size 1 + return 1; + } + } + } + + /*! + @brief returns the maximum possible number of elements + + Returns the maximum number of elements a JSON value is able to hold due to + system or library implementation limitations, i.e. `std::distance(begin(), + end())` for the JSON value. + + @return The return value depends on the different types and is + defined as follows: + Value type | return value + ----------- | ------------- + null | `0` (same as `size()`) + boolean | `1` (same as `size()`) + string | `1` (same as `size()`) + number | `1` (same as `size()`) + object | result of function `object_t::max_size()` + array | result of function `array_t::max_size()` + + @liveexample{The following code calls `max_size()` on the different value + types. Note the output is implementation specific.,max_size} + + @complexity Constant, as long as @ref array_t and @ref object_t satisfy + the Container concept; that is, their `max_size()` functions have constant + complexity. + + @iterators No changes. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @requirement This function helps `basic_json` satisfying the + [Container](https://en.cppreference.com/w/cpp/named_req/Container) + requirements: + - The complexity is constant. + - Has the semantics of returning `b.size()` where `b` is the largest + possible JSON value. + + @sa @ref size() -- returns the number of elements + + @since version 1.0.0 + */ + size_type max_size() const noexcept + { + switch (m_type) + { + case value_t::array: + { + // delegate call to array_t::max_size() + return m_value.array->max_size(); + } + + case value_t::object: + { + // delegate call to object_t::max_size() + return m_value.object->max_size(); + } + + default: + { + // all other types have max_size() == size() + return size(); + } + } + } + + /// @} + + + /////////////// + // modifiers // + /////////////// + + /// @name modifiers + /// @{ + + /*! + @brief clears the contents + + Clears the content of a JSON value and resets it to the default value as + if @ref basic_json(value_t) would have been called with the current value + type from @ref type(): + + Value type | initial value + ----------- | ------------- + null | `null` + boolean | `false` + string | `""` + number | `0` + object | `{}` + array | `[]` + + @post Has the same effect as calling + @code {.cpp} + *this = basic_json(type()); + @endcode + + @liveexample{The example below shows the effect of `clear()` to different + JSON types.,clear} + + @complexity Linear in the size of the JSON value. + + @iterators All iterators, pointers and references related to this container + are invalidated. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @sa @ref basic_json(value_t) -- constructor that creates an object with the + same value than calling `clear()` + + @since version 1.0.0 + */ + void clear() noexcept + { + switch (m_type) + { + case value_t::number_integer: + { + m_value.number_integer = 0; + break; + } + + case value_t::number_unsigned: + { + m_value.number_unsigned = 0; + break; + } + + case value_t::number_float: + { + m_value.number_float = 0.0; + break; + } + + case value_t::boolean: + { + m_value.boolean = false; + break; + } + + case value_t::string: + { + m_value.string->clear(); + break; + } + + case value_t::array: + { + m_value.array->clear(); + break; + } + + case value_t::object: + { + m_value.object->clear(); + break; + } + + default: + break; + } + } + + /*! + @brief add an object to an array + + Appends the given element @a val to the end of the JSON value. If the + function is called on a JSON null value, an empty array is created before + appending @a val. + + @param[in] val the value to add to the JSON array + + @throw type_error.308 when called on a type other than JSON array or + null; example: `"cannot use push_back() with number"` + + @complexity Amortized constant. + + @liveexample{The example shows how `push_back()` and `+=` can be used to + add elements to a JSON array. Note how the `null` value was silently + converted to a JSON array.,push_back} + + @since version 1.0.0 + */ + void push_back(basic_json&& val) + { + // push_back only works for null objects or arrays + if (JSON_UNLIKELY(not(is_null() or is_array()))) + { + JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); + } + + // transform null object into an array + if (is_null()) + { + m_type = value_t::array; + m_value = value_t::array; + assert_invariant(); + } + + // add element to array (move semantics) + m_value.array->push_back(std::move(val)); + // invalidate object + val.m_type = value_t::null; + } + + /*! + @brief add an object to an array + @copydoc push_back(basic_json&&) + */ + reference operator+=(basic_json&& val) + { + push_back(std::move(val)); + return *this; + } + + /*! + @brief add an object to an array + @copydoc push_back(basic_json&&) + */ + void push_back(const basic_json& val) + { + // push_back only works for null objects or arrays + if (JSON_UNLIKELY(not(is_null() or is_array()))) + { + JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); + } + + // transform null object into an array + if (is_null()) + { + m_type = value_t::array; + m_value = value_t::array; + assert_invariant(); + } + + // add element to array + m_value.array->push_back(val); + } + + /*! + @brief add an object to an array + @copydoc push_back(basic_json&&) + */ + reference operator+=(const basic_json& val) + { + push_back(val); + return *this; + } + + /*! + @brief add an object to an object + + Inserts the given element @a val to the JSON object. If the function is + called on a JSON null value, an empty object is created before inserting + @a val. + + @param[in] val the value to add to the JSON object + + @throw type_error.308 when called on a type other than JSON object or + null; example: `"cannot use push_back() with number"` + + @complexity Logarithmic in the size of the container, O(log(`size()`)). + + @liveexample{The example shows how `push_back()` and `+=` can be used to + add elements to a JSON object. Note how the `null` value was silently + converted to a JSON object.,push_back__object_t__value} + + @since version 1.0.0 + */ + void push_back(const typename object_t::value_type& val) + { + // push_back only works for null objects or objects + if (JSON_UNLIKELY(not(is_null() or is_object()))) + { + JSON_THROW(type_error::create(308, "cannot use push_back() with " + std::string(type_name()))); + } + + // transform null object into an object + if (is_null()) + { + m_type = value_t::object; + m_value = value_t::object; + assert_invariant(); + } + + // add element to array + m_value.object->insert(val); + } + + /*! + @brief add an object to an object + @copydoc push_back(const typename object_t::value_type&) + */ + reference operator+=(const typename object_t::value_type& val) + { + push_back(val); + return *this; + } + + /*! + @brief add an object to an object + + This function allows to use `push_back` with an initializer list. In case + + 1. the current value is an object, + 2. the initializer list @a init contains only two elements, and + 3. the first element of @a init is a string, + + @a init is converted into an object element and added using + @ref push_back(const typename object_t::value_type&). Otherwise, @a init + is converted to a JSON value and added using @ref push_back(basic_json&&). + + @param[in] init an initializer list + + @complexity Linear in the size of the initializer list @a init. + + @note This function is required to resolve an ambiguous overload error, + because pairs like `{"key", "value"}` can be both interpreted as + `object_t::value_type` or `std::initializer_list<basic_json>`, see + https://github.com/nlohmann/json/issues/235 for more information. + + @liveexample{The example shows how initializer lists are treated as + objects when possible.,push_back__initializer_list} + */ + void push_back(initializer_list_t init) + { + if (is_object() and init.size() == 2 and (*init.begin())->is_string()) + { + basic_json&& key = init.begin()->moved_or_copied(); + push_back(typename object_t::value_type( + std::move(key.get_ref<string_t&>()), (init.begin() + 1)->moved_or_copied())); + } + else + { + push_back(basic_json(init)); + } + } + + /*! + @brief add an object to an object + @copydoc push_back(initializer_list_t) + */ + reference operator+=(initializer_list_t init) + { + push_back(init); + return *this; + } + + /*! + @brief add an object to an array + + Creates a JSON value from the passed parameters @a args to the end of the + JSON value. If the function is called on a JSON null value, an empty array + is created before appending the value created from @a args. + + @param[in] args arguments to forward to a constructor of @ref basic_json + @tparam Args compatible types to create a @ref basic_json object + + @throw type_error.311 when called on a type other than JSON array or + null; example: `"cannot use emplace_back() with number"` + + @complexity Amortized constant. + + @liveexample{The example shows how `push_back()` can be used to add + elements to a JSON array. Note how the `null` value was silently converted + to a JSON array.,emplace_back} + + @since version 2.0.8 + */ + template<class... Args> + void emplace_back(Args&& ... args) + { + // emplace_back only works for null objects or arrays + if (JSON_UNLIKELY(not(is_null() or is_array()))) + { + JSON_THROW(type_error::create(311, "cannot use emplace_back() with " + std::string(type_name()))); + } + + // transform null object into an array + if (is_null()) + { + m_type = value_t::array; + m_value = value_t::array; + assert_invariant(); + } + + // add element to array (perfect forwarding) + m_value.array->emplace_back(std::forward<Args>(args)...); + } + + /*! + @brief add an object to an object if key does not exist + + Inserts a new element into a JSON object constructed in-place with the + given @a args if there is no element with the key in the container. If the + function is called on a JSON null value, an empty object is created before + appending the value created from @a args. + + @param[in] args arguments to forward to a constructor of @ref basic_json + @tparam Args compatible types to create a @ref basic_json object + + @return a pair consisting of an iterator to the inserted element, or the + already-existing element if no insertion happened, and a bool + denoting whether the insertion took place. + + @throw type_error.311 when called on a type other than JSON object or + null; example: `"cannot use emplace() with number"` + + @complexity Logarithmic in the size of the container, O(log(`size()`)). + + @liveexample{The example shows how `emplace()` can be used to add elements + to a JSON object. Note how the `null` value was silently converted to a + JSON object. Further note how no value is added if there was already one + value stored with the same key.,emplace} + + @since version 2.0.8 + */ + template<class... Args> + std::pair<iterator, bool> emplace(Args&& ... args) + { + // emplace only works for null objects or arrays + if (JSON_UNLIKELY(not(is_null() or is_object()))) + { + JSON_THROW(type_error::create(311, "cannot use emplace() with " + std::string(type_name()))); + } + + // transform null object into an object + if (is_null()) + { + m_type = value_t::object; + m_value = value_t::object; + assert_invariant(); + } + + // add element to array (perfect forwarding) + auto res = m_value.object->emplace(std::forward<Args>(args)...); + // create result iterator and set iterator to the result of emplace + auto it = begin(); + it.m_it.object_iterator = res.first; + + // return pair of iterator and boolean + return {it, res.second}; + } + + /// Helper for insertion of an iterator + /// @note: This uses std::distance to support GCC 4.8, + /// see https://github.com/nlohmann/json/pull/1257 + template<typename... Args> + iterator insert_iterator(const_iterator pos, Args&& ... args) + { + iterator result(this); + assert(m_value.array != nullptr); + + auto insert_pos = std::distance(m_value.array->begin(), pos.m_it.array_iterator); + m_value.array->insert(pos.m_it.array_iterator, std::forward<Args>(args)...); + result.m_it.array_iterator = m_value.array->begin() + insert_pos; + + // This could have been written as: + // result.m_it.array_iterator = m_value.array->insert(pos.m_it.array_iterator, cnt, val); + // but the return value of insert is missing in GCC 4.8, so it is written this way instead. + + return result; + } + + /*! + @brief inserts element + + Inserts element @a val before iterator @a pos. + + @param[in] pos iterator before which the content will be inserted; may be + the end() iterator + @param[in] val element to insert + @return iterator pointing to the inserted @a val. + + @throw type_error.309 if called on JSON values other than arrays; + example: `"cannot use insert() with string"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` + + @complexity Constant plus linear in the distance between @a pos and end of + the container. + + @liveexample{The example shows how `insert()` is used.,insert} + + @since version 1.0.0 + */ + iterator insert(const_iterator pos, const basic_json& val) + { + // insert only works for arrays + if (JSON_LIKELY(is_array())) + { + // check if iterator pos fits to this JSON value + if (JSON_UNLIKELY(pos.m_object != this)) + { + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); + } + + // insert to array and return iterator + return insert_iterator(pos, val); + } + + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); + } + + /*! + @brief inserts element + @copydoc insert(const_iterator, const basic_json&) + */ + iterator insert(const_iterator pos, basic_json&& val) + { + return insert(pos, val); + } + + /*! + @brief inserts elements + + Inserts @a cnt copies of @a val before iterator @a pos. + + @param[in] pos iterator before which the content will be inserted; may be + the end() iterator + @param[in] cnt number of copies of @a val to insert + @param[in] val element to insert + @return iterator pointing to the first element inserted, or @a pos if + `cnt==0` + + @throw type_error.309 if called on JSON values other than arrays; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` + + @complexity Linear in @a cnt plus linear in the distance between @a pos + and end of the container. + + @liveexample{The example shows how `insert()` is used.,insert__count} + + @since version 1.0.0 + */ + iterator insert(const_iterator pos, size_type cnt, const basic_json& val) + { + // insert only works for arrays + if (JSON_LIKELY(is_array())) + { + // check if iterator pos fits to this JSON value + if (JSON_UNLIKELY(pos.m_object != this)) + { + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); + } + + // insert to array and return iterator + return insert_iterator(pos, cnt, val); + } + + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); + } + + /*! + @brief inserts elements + + Inserts elements from range `[first, last)` before iterator @a pos. + + @param[in] pos iterator before which the content will be inserted; may be + the end() iterator + @param[in] first begin of the range of elements to insert + @param[in] last end of the range of elements to insert + + @throw type_error.309 if called on JSON values other than arrays; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` + @throw invalid_iterator.210 if @a first and @a last do not belong to the + same JSON value; example: `"iterators do not fit"` + @throw invalid_iterator.211 if @a first or @a last are iterators into + container for which insert is called; example: `"passed iterators may not + belong to container"` + + @return iterator pointing to the first element inserted, or @a pos if + `first==last` + + @complexity Linear in `std::distance(first, last)` plus linear in the + distance between @a pos and end of the container. + + @liveexample{The example shows how `insert()` is used.,insert__range} + + @since version 1.0.0 + */ + iterator insert(const_iterator pos, const_iterator first, const_iterator last) + { + // insert only works for arrays + if (JSON_UNLIKELY(not is_array())) + { + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); + } + + // check if iterator pos fits to this JSON value + if (JSON_UNLIKELY(pos.m_object != this)) + { + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); + } + + // check if range iterators belong to the same JSON object + if (JSON_UNLIKELY(first.m_object != last.m_object)) + { + JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); + } + + if (JSON_UNLIKELY(first.m_object == this)) + { + JSON_THROW(invalid_iterator::create(211, "passed iterators may not belong to container")); + } + + // insert to array and return iterator + return insert_iterator(pos, first.m_it.array_iterator, last.m_it.array_iterator); + } + + /*! + @brief inserts elements + + Inserts elements from initializer list @a ilist before iterator @a pos. + + @param[in] pos iterator before which the content will be inserted; may be + the end() iterator + @param[in] ilist initializer list to insert the values from + + @throw type_error.309 if called on JSON values other than arrays; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if @a pos is not an iterator of *this; + example: `"iterator does not fit current value"` + + @return iterator pointing to the first element inserted, or @a pos if + `ilist` is empty + + @complexity Linear in `ilist.size()` plus linear in the distance between + @a pos and end of the container. + + @liveexample{The example shows how `insert()` is used.,insert__ilist} + + @since version 1.0.0 + */ + iterator insert(const_iterator pos, initializer_list_t ilist) + { + // insert only works for arrays + if (JSON_UNLIKELY(not is_array())) + { + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); + } + + // check if iterator pos fits to this JSON value + if (JSON_UNLIKELY(pos.m_object != this)) + { + JSON_THROW(invalid_iterator::create(202, "iterator does not fit current value")); + } + + // insert to array and return iterator + return insert_iterator(pos, ilist.begin(), ilist.end()); + } + + /*! + @brief inserts elements + + Inserts elements from range `[first, last)`. + + @param[in] first begin of the range of elements to insert + @param[in] last end of the range of elements to insert + + @throw type_error.309 if called on JSON values other than objects; example: + `"cannot use insert() with string"` + @throw invalid_iterator.202 if iterator @a first or @a last does does not + point to an object; example: `"iterators first and last must point to + objects"` + @throw invalid_iterator.210 if @a first and @a last do not belong to the + same JSON value; example: `"iterators do not fit"` + + @complexity Logarithmic: `O(N*log(size() + N))`, where `N` is the number + of elements to insert. + + @liveexample{The example shows how `insert()` is used.,insert__range_object} + + @since version 3.0.0 + */ + void insert(const_iterator first, const_iterator last) + { + // insert only works for objects + if (JSON_UNLIKELY(not is_object())) + { + JSON_THROW(type_error::create(309, "cannot use insert() with " + std::string(type_name()))); + } + + // check if range iterators belong to the same JSON object + if (JSON_UNLIKELY(first.m_object != last.m_object)) + { + JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); + } + + // passed iterators must belong to objects + if (JSON_UNLIKELY(not first.m_object->is_object())) + { + JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects")); + } + + m_value.object->insert(first.m_it.object_iterator, last.m_it.object_iterator); + } + + /*! + @brief updates a JSON object from another object, overwriting existing keys + + Inserts all values from JSON object @a j and overwrites existing keys. + + @param[in] j JSON object to read values from + + @throw type_error.312 if called on JSON values other than objects; example: + `"cannot use update() with string"` + + @complexity O(N*log(size() + N)), where N is the number of elements to + insert. + + @liveexample{The example shows how `update()` is used.,update} + + @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update + + @since version 3.0.0 + */ + void update(const_reference j) + { + // implicitly convert null value to an empty object + if (is_null()) + { + m_type = value_t::object; + m_value.object = create<object_t>(); + assert_invariant(); + } + + if (JSON_UNLIKELY(not is_object())) + { + JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()))); + } + if (JSON_UNLIKELY(not j.is_object())) + { + JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(j.type_name()))); + } + + for (auto it = j.cbegin(); it != j.cend(); ++it) + { + m_value.object->operator[](it.key()) = it.value(); + } + } + + /*! + @brief updates a JSON object from another object, overwriting existing keys + + Inserts all values from from range `[first, last)` and overwrites existing + keys. + + @param[in] first begin of the range of elements to insert + @param[in] last end of the range of elements to insert + + @throw type_error.312 if called on JSON values other than objects; example: + `"cannot use update() with string"` + @throw invalid_iterator.202 if iterator @a first or @a last does does not + point to an object; example: `"iterators first and last must point to + objects"` + @throw invalid_iterator.210 if @a first and @a last do not belong to the + same JSON value; example: `"iterators do not fit"` + + @complexity O(N*log(size() + N)), where N is the number of elements to + insert. + + @liveexample{The example shows how `update()` is used__range.,update} + + @sa https://docs.python.org/3.6/library/stdtypes.html#dict.update + + @since version 3.0.0 + */ + void update(const_iterator first, const_iterator last) + { + // implicitly convert null value to an empty object + if (is_null()) + { + m_type = value_t::object; + m_value.object = create<object_t>(); + assert_invariant(); + } + + if (JSON_UNLIKELY(not is_object())) + { + JSON_THROW(type_error::create(312, "cannot use update() with " + std::string(type_name()))); + } + + // check if range iterators belong to the same JSON object + if (JSON_UNLIKELY(first.m_object != last.m_object)) + { + JSON_THROW(invalid_iterator::create(210, "iterators do not fit")); + } + + // passed iterators must belong to objects + if (JSON_UNLIKELY(not first.m_object->is_object() + or not last.m_object->is_object())) + { + JSON_THROW(invalid_iterator::create(202, "iterators first and last must point to objects")); + } + + for (auto it = first; it != last; ++it) + { + m_value.object->operator[](it.key()) = it.value(); + } + } + + /*! + @brief exchanges the values + + Exchanges the contents of the JSON value with those of @a other. Does not + invoke any move, copy, or swap operations on individual elements. All + iterators and references remain valid. The past-the-end iterator is + invalidated. + + @param[in,out] other JSON value to exchange the contents with + + @complexity Constant. + + @liveexample{The example below shows how JSON values can be swapped with + `swap()`.,swap__reference} + + @since version 1.0.0 + */ + void swap(reference other) noexcept ( + std::is_nothrow_move_constructible<value_t>::value and + std::is_nothrow_move_assignable<value_t>::value and + std::is_nothrow_move_constructible<json_value>::value and + std::is_nothrow_move_assignable<json_value>::value + ) + { + std::swap(m_type, other.m_type); + std::swap(m_value, other.m_value); + assert_invariant(); + } + + /*! + @brief exchanges the values + + Exchanges the contents of a JSON array with those of @a other. Does not + invoke any move, copy, or swap operations on individual elements. All + iterators and references remain valid. The past-the-end iterator is + invalidated. + + @param[in,out] other array to exchange the contents with + + @throw type_error.310 when JSON value is not an array; example: `"cannot + use swap() with string"` + + @complexity Constant. + + @liveexample{The example below shows how arrays can be swapped with + `swap()`.,swap__array_t} + + @since version 1.0.0 + */ + void swap(array_t& other) + { + // swap only works for arrays + if (JSON_LIKELY(is_array())) + { + std::swap(*(m_value.array), other); + } + else + { + JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); + } + } + + /*! + @brief exchanges the values + + Exchanges the contents of a JSON object with those of @a other. Does not + invoke any move, copy, or swap operations on individual elements. All + iterators and references remain valid. The past-the-end iterator is + invalidated. + + @param[in,out] other object to exchange the contents with + + @throw type_error.310 when JSON value is not an object; example: + `"cannot use swap() with string"` + + @complexity Constant. + + @liveexample{The example below shows how objects can be swapped with + `swap()`.,swap__object_t} + + @since version 1.0.0 + */ + void swap(object_t& other) + { + // swap only works for objects + if (JSON_LIKELY(is_object())) + { + std::swap(*(m_value.object), other); + } + else + { + JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); + } + } + + /*! + @brief exchanges the values + + Exchanges the contents of a JSON string with those of @a other. Does not + invoke any move, copy, or swap operations on individual elements. All + iterators and references remain valid. The past-the-end iterator is + invalidated. + + @param[in,out] other string to exchange the contents with + + @throw type_error.310 when JSON value is not a string; example: `"cannot + use swap() with boolean"` + + @complexity Constant. + + @liveexample{The example below shows how strings can be swapped with + `swap()`.,swap__string_t} + + @since version 1.0.0 + */ + void swap(string_t& other) + { + // swap only works for strings + if (JSON_LIKELY(is_string())) + { + std::swap(*(m_value.string), other); + } + else + { + JSON_THROW(type_error::create(310, "cannot use swap() with " + std::string(type_name()))); + } + } + + /// @} + + public: + ////////////////////////////////////////// + // lexicographical comparison operators // + ////////////////////////////////////////// + + /// @name lexicographical comparison operators + /// @{ + + /*! + @brief comparison: equal + + Compares two JSON values for equality according to the following rules: + - Two JSON values are equal if (1) they are from the same type and (2) + their stored values are the same according to their respective + `operator==`. + - Integer and floating-point numbers are automatically converted before + comparison. Note than two NaN values are always treated as unequal. + - Two JSON null values are equal. + + @note Floating-point inside JSON values numbers are compared with + `json::number_float_t::operator==` which is `double::operator==` by + default. To compare floating-point while respecting an epsilon, an alternative + [comparison function](https://github.com/mariokonrad/marnav/blob/master/src/marnav/math/floatingpoint.hpp#L34-#L39) + could be used, for instance + @code {.cpp} + template<typename T, typename = typename std::enable_if<std::is_floating_point<T>::value, T>::type> + inline bool is_same(T a, T b, T epsilon = std::numeric_limits<T>::epsilon()) noexcept + { + return std::abs(a - b) <= epsilon; + } + @endcode + + @note NaN values never compare equal to themselves or to other NaN values. + + @param[in] lhs first JSON value to consider + @param[in] rhs second JSON value to consider + @return whether the values @a lhs and @a rhs are equal + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @complexity Linear. + + @liveexample{The example demonstrates comparing several JSON + types.,operator__equal} + + @since version 1.0.0 + */ + friend bool operator==(const_reference lhs, const_reference rhs) noexcept + { + const auto lhs_type = lhs.type(); + const auto rhs_type = rhs.type(); + + if (lhs_type == rhs_type) + { + switch (lhs_type) + { + case value_t::array: + return (*lhs.m_value.array == *rhs.m_value.array); + + case value_t::object: + return (*lhs.m_value.object == *rhs.m_value.object); + + case value_t::null: + return true; + + case value_t::string: + return (*lhs.m_value.string == *rhs.m_value.string); + + case value_t::boolean: + return (lhs.m_value.boolean == rhs.m_value.boolean); + + case value_t::number_integer: + return (lhs.m_value.number_integer == rhs.m_value.number_integer); + + case value_t::number_unsigned: + return (lhs.m_value.number_unsigned == rhs.m_value.number_unsigned); + + case value_t::number_float: + return (lhs.m_value.number_float == rhs.m_value.number_float); + + default: + return false; + } + } + else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float) + { + return (static_cast<number_float_t>(lhs.m_value.number_integer) == rhs.m_value.number_float); + } + else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer) + { + return (lhs.m_value.number_float == static_cast<number_float_t>(rhs.m_value.number_integer)); + } + else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_float) + { + return (static_cast<number_float_t>(lhs.m_value.number_unsigned) == rhs.m_value.number_float); + } + else if (lhs_type == value_t::number_float and rhs_type == value_t::number_unsigned) + { + return (lhs.m_value.number_float == static_cast<number_float_t>(rhs.m_value.number_unsigned)); + } + else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_integer) + { + return (static_cast<number_integer_t>(lhs.m_value.number_unsigned) == rhs.m_value.number_integer); + } + else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_unsigned) + { + return (lhs.m_value.number_integer == static_cast<number_integer_t>(rhs.m_value.number_unsigned)); + } + + return false; + } + + /*! + @brief comparison: equal + @copydoc operator==(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator==(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs == basic_json(rhs)); + } + + /*! + @brief comparison: equal + @copydoc operator==(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator==(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) == rhs); + } + + /*! + @brief comparison: not equal + + Compares two JSON values for inequality by calculating `not (lhs == rhs)`. + + @param[in] lhs first JSON value to consider + @param[in] rhs second JSON value to consider + @return whether the values @a lhs and @a rhs are not equal + + @complexity Linear. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @liveexample{The example demonstrates comparing several JSON + types.,operator__notequal} + + @since version 1.0.0 + */ + friend bool operator!=(const_reference lhs, const_reference rhs) noexcept + { + return not (lhs == rhs); + } + + /*! + @brief comparison: not equal + @copydoc operator!=(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator!=(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs != basic_json(rhs)); + } + + /*! + @brief comparison: not equal + @copydoc operator!=(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator!=(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) != rhs); + } + + /*! + @brief comparison: less than + + Compares whether one JSON value @a lhs is less than another JSON value @a + rhs according to the following rules: + - If @a lhs and @a rhs have the same type, the values are compared using + the default `<` operator. + - Integer and floating-point numbers are automatically converted before + comparison + - In case @a lhs and @a rhs have different types, the values are ignored + and the order of the types is considered, see + @ref operator<(const value_t, const value_t). + + @param[in] lhs first JSON value to consider + @param[in] rhs second JSON value to consider + @return whether @a lhs is less than @a rhs + + @complexity Linear. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @liveexample{The example demonstrates comparing several JSON + types.,operator__less} + + @since version 1.0.0 + */ + friend bool operator<(const_reference lhs, const_reference rhs) noexcept + { + const auto lhs_type = lhs.type(); + const auto rhs_type = rhs.type(); + + if (lhs_type == rhs_type) + { + switch (lhs_type) + { + case value_t::array: + return (*lhs.m_value.array) < (*rhs.m_value.array); + + case value_t::object: + return *lhs.m_value.object < *rhs.m_value.object; + + case value_t::null: + return false; + + case value_t::string: + return *lhs.m_value.string < *rhs.m_value.string; + + case value_t::boolean: + return lhs.m_value.boolean < rhs.m_value.boolean; + + case value_t::number_integer: + return lhs.m_value.number_integer < rhs.m_value.number_integer; + + case value_t::number_unsigned: + return lhs.m_value.number_unsigned < rhs.m_value.number_unsigned; + + case value_t::number_float: + return lhs.m_value.number_float < rhs.m_value.number_float; + + default: + return false; + } + } + else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_float) + { + return static_cast<number_float_t>(lhs.m_value.number_integer) < rhs.m_value.number_float; + } + else if (lhs_type == value_t::number_float and rhs_type == value_t::number_integer) + { + return lhs.m_value.number_float < static_cast<number_float_t>(rhs.m_value.number_integer); + } + else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_float) + { + return static_cast<number_float_t>(lhs.m_value.number_unsigned) < rhs.m_value.number_float; + } + else if (lhs_type == value_t::number_float and rhs_type == value_t::number_unsigned) + { + return lhs.m_value.number_float < static_cast<number_float_t>(rhs.m_value.number_unsigned); + } + else if (lhs_type == value_t::number_integer and rhs_type == value_t::number_unsigned) + { + return lhs.m_value.number_integer < static_cast<number_integer_t>(rhs.m_value.number_unsigned); + } + else if (lhs_type == value_t::number_unsigned and rhs_type == value_t::number_integer) + { + return static_cast<number_integer_t>(lhs.m_value.number_unsigned) < rhs.m_value.number_integer; + } + + // We only reach this line if we cannot compare values. In that case, + // we compare types. Note we have to call the operator explicitly, + // because MSVC has problems otherwise. + return operator<(lhs_type, rhs_type); + } + + /*! + @brief comparison: less than + @copydoc operator<(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator<(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs < basic_json(rhs)); + } + + /*! + @brief comparison: less than + @copydoc operator<(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator<(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) < rhs); + } + + /*! + @brief comparison: less than or equal + + Compares whether one JSON value @a lhs is less than or equal to another + JSON value by calculating `not (rhs < lhs)`. + + @param[in] lhs first JSON value to consider + @param[in] rhs second JSON value to consider + @return whether @a lhs is less than or equal to @a rhs + + @complexity Linear. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @liveexample{The example demonstrates comparing several JSON + types.,operator__greater} + + @since version 1.0.0 + */ + friend bool operator<=(const_reference lhs, const_reference rhs) noexcept + { + return not (rhs < lhs); + } + + /*! + @brief comparison: less than or equal + @copydoc operator<=(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator<=(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs <= basic_json(rhs)); + } + + /*! + @brief comparison: less than or equal + @copydoc operator<=(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator<=(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) <= rhs); + } + + /*! + @brief comparison: greater than + + Compares whether one JSON value @a lhs is greater than another + JSON value by calculating `not (lhs <= rhs)`. + + @param[in] lhs first JSON value to consider + @param[in] rhs second JSON value to consider + @return whether @a lhs is greater than to @a rhs + + @complexity Linear. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @liveexample{The example demonstrates comparing several JSON + types.,operator__lessequal} + + @since version 1.0.0 + */ + friend bool operator>(const_reference lhs, const_reference rhs) noexcept + { + return not (lhs <= rhs); + } + + /*! + @brief comparison: greater than + @copydoc operator>(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator>(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs > basic_json(rhs)); + } + + /*! + @brief comparison: greater than + @copydoc operator>(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator>(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) > rhs); + } + + /*! + @brief comparison: greater than or equal + + Compares whether one JSON value @a lhs is greater than or equal to another + JSON value by calculating `not (lhs < rhs)`. + + @param[in] lhs first JSON value to consider + @param[in] rhs second JSON value to consider + @return whether @a lhs is greater than or equal to @a rhs + + @complexity Linear. + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @liveexample{The example demonstrates comparing several JSON + types.,operator__greaterequal} + + @since version 1.0.0 + */ + friend bool operator>=(const_reference lhs, const_reference rhs) noexcept + { + return not (lhs < rhs); + } + + /*! + @brief comparison: greater than or equal + @copydoc operator>=(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator>=(const_reference lhs, const ScalarType rhs) noexcept + { + return (lhs >= basic_json(rhs)); + } + + /*! + @brief comparison: greater than or equal + @copydoc operator>=(const_reference, const_reference) + */ + template<typename ScalarType, typename std::enable_if< + std::is_scalar<ScalarType>::value, int>::type = 0> + friend bool operator>=(const ScalarType lhs, const_reference rhs) noexcept + { + return (basic_json(lhs) >= rhs); + } + + /// @} + + /////////////////// + // serialization // + /////////////////// + + /// @name serialization + /// @{ + + /*! + @brief serialize to stream + + Serialize the given JSON value @a j to the output stream @a o. The JSON + value will be serialized using the @ref dump member function. + + - The indentation of the output can be controlled with the member variable + `width` of the output stream @a o. For instance, using the manipulator + `std::setw(4)` on @a o sets the indentation level to `4` and the + serialization result is the same as calling `dump(4)`. + + - The indentation character can be controlled with the member variable + `fill` of the output stream @a o. For instance, the manipulator + `std::setfill('\\t')` sets indentation to use a tab character rather than + the default space character. + + @param[in,out] o stream to serialize to + @param[in] j JSON value to serialize + + @return the stream @a o + + @throw type_error.316 if a string stored inside the JSON value is not + UTF-8 encoded + + @complexity Linear. + + @liveexample{The example below shows the serialization with different + parameters to `width` to adjust the indentation level.,operator_serialize} + + @since version 1.0.0; indentation character added in version 3.0.0 + */ + friend std::ostream& operator<<(std::ostream& o, const basic_json& j) + { + // read width member and use it as indentation parameter if nonzero + const bool pretty_print = (o.width() > 0); + const auto indentation = (pretty_print ? o.width() : 0); + + // reset width to 0 for subsequent calls to this stream + o.width(0); + + // do the actual serialization + serializer s(detail::output_adapter<char>(o), o.fill()); + s.dump(j, pretty_print, false, static_cast<unsigned int>(indentation)); + return o; + } + + /*! + @brief serialize to stream + @deprecated This stream operator is deprecated and will be removed in + future 4.0.0 of the library. Please use + @ref operator<<(std::ostream&, const basic_json&) + instead; that is, replace calls like `j >> o;` with `o << j;`. + @since version 1.0.0; deprecated since version 3.0.0 + */ + JSON_DEPRECATED + friend std::ostream& operator>>(const basic_json& j, std::ostream& o) + { + return o << j; + } + + /// @} + + + ///////////////////// + // deserialization // + ///////////////////// + + /// @name deserialization + /// @{ + + /*! + @brief deserialize from a compatible input + + This function reads from a compatible input. Examples are: + - an array of 1-byte values + - strings with character/literal type with size of 1 byte + - input streams + - container with contiguous storage of 1-byte values. Compatible container + types include `std::vector`, `std::string`, `std::array`, + `std::valarray`, and `std::initializer_list`. Furthermore, C-style + arrays can be used with `std::begin()`/`std::end()`. User-defined + containers can be used as long as they implement random-access iterators + and a contiguous storage. + + @pre Each element of the container has a size of 1 byte. Violating this + precondition yields undefined behavior. **This precondition is enforced + with a static assertion.** + + @pre The container storage is contiguous. Violating this precondition + yields undefined behavior. **This precondition is enforced with an + assertion.** + @pre Each element of the container has a size of 1 byte. Violating this + precondition yields undefined behavior. **This precondition is enforced + with a static assertion.** + + @warning There is no way to enforce all preconditions at compile-time. If + the function is called with a noncompliant container and with + assertions switched off, the behavior is undefined and will most + likely yield segmentation violation. + + @param[in] i input to read from + @param[in] cb a parser callback function of type @ref parser_callback_t + which is used to control the deserialization by filtering unwanted values + (optional) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return result of the deserialization + + @throw parse_error.101 if a parse error occurs; example: `""unexpected end + of input; expected string literal""` + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + + @complexity Linear in the length of the input. The parser is a predictive + LL(1) parser. The complexity can be higher if the parser callback function + @a cb has a super-linear complexity. + + @note A UTF-8 byte order mark is silently ignored. + + @liveexample{The example below demonstrates the `parse()` function reading + from an array.,parse__array__parser_callback_t} + + @liveexample{The example below demonstrates the `parse()` function with + and without callback function.,parse__string__parser_callback_t} + + @liveexample{The example below demonstrates the `parse()` function with + and without callback function.,parse__istream__parser_callback_t} + + @liveexample{The example below demonstrates the `parse()` function reading + from a contiguous container.,parse__contiguouscontainer__parser_callback_t} + + @since version 2.0.3 (contiguous containers) + */ + static basic_json parse(detail::input_adapter&& i, + const parser_callback_t cb = nullptr, + const bool allow_exceptions = true) + { + basic_json result; + parser(i, cb, allow_exceptions).parse(true, result); + return result; + } + + static bool accept(detail::input_adapter&& i) + { + return parser(i).accept(true); + } + + /*! + @brief generate SAX events + + The SAX event lister must follow the interface of @ref json_sax. + + This function reads from a compatible input. Examples are: + - an array of 1-byte values + - strings with character/literal type with size of 1 byte + - input streams + - container with contiguous storage of 1-byte values. Compatible container + types include `std::vector`, `std::string`, `std::array`, + `std::valarray`, and `std::initializer_list`. Furthermore, C-style + arrays can be used with `std::begin()`/`std::end()`. User-defined + containers can be used as long as they implement random-access iterators + and a contiguous storage. + + @pre Each element of the container has a size of 1 byte. Violating this + precondition yields undefined behavior. **This precondition is enforced + with a static assertion.** + + @pre The container storage is contiguous. Violating this precondition + yields undefined behavior. **This precondition is enforced with an + assertion.** + @pre Each element of the container has a size of 1 byte. Violating this + precondition yields undefined behavior. **This precondition is enforced + with a static assertion.** + + @warning There is no way to enforce all preconditions at compile-time. If + the function is called with a noncompliant container and with + assertions switched off, the behavior is undefined and will most + likely yield segmentation violation. + + @param[in] i input to read from + @param[in,out] sax SAX event listener + @param[in] format the format to parse (JSON, CBOR, MessagePack, or UBJSON) + @param[in] strict whether the input has to be consumed completely + + @return return value of the last processed SAX event + + @throw parse_error.101 if a parse error occurs; example: `""unexpected end + of input; expected string literal""` + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + + @complexity Linear in the length of the input. The parser is a predictive + LL(1) parser. The complexity can be higher if the SAX consumer @a sax has + a super-linear complexity. + + @note A UTF-8 byte order mark is silently ignored. + + @liveexample{The example below demonstrates the `sax_parse()` function + reading from string and processing the events with a user-defined SAX + event consumer.,sax_parse} + + @since version 3.2.0 + */ + template <typename SAX> + static bool sax_parse(detail::input_adapter&& i, SAX* sax, + input_format_t format = input_format_t::json, + const bool strict = true) + { + assert(sax); + switch (format) + { + case input_format_t::json: + return parser(std::move(i)).sax_parse(sax, strict); + default: + return detail::binary_reader<basic_json, SAX>(std::move(i)).sax_parse(format, sax, strict); + } + } + + /*! + @brief deserialize from an iterator range with contiguous storage + + This function reads from an iterator range of a container with contiguous + storage of 1-byte values. Compatible container types include + `std::vector`, `std::string`, `std::array`, `std::valarray`, and + `std::initializer_list`. Furthermore, C-style arrays can be used with + `std::begin()`/`std::end()`. User-defined containers can be used as long + as they implement random-access iterators and a contiguous storage. + + @pre The iterator range is contiguous. Violating this precondition yields + undefined behavior. **This precondition is enforced with an assertion.** + @pre Each element in the range has a size of 1 byte. Violating this + precondition yields undefined behavior. **This precondition is enforced + with a static assertion.** + + @warning There is no way to enforce all preconditions at compile-time. If + the function is called with noncompliant iterators and with + assertions switched off, the behavior is undefined and will most + likely yield segmentation violation. + + @tparam IteratorType iterator of container with contiguous storage + @param[in] first begin of the range to parse (included) + @param[in] last end of the range to parse (excluded) + @param[in] cb a parser callback function of type @ref parser_callback_t + which is used to control the deserialization by filtering unwanted values + (optional) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return result of the deserialization + + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + + @complexity Linear in the length of the input. The parser is a predictive + LL(1) parser. The complexity can be higher if the parser callback function + @a cb has a super-linear complexity. + + @note A UTF-8 byte order mark is silently ignored. + + @liveexample{The example below demonstrates the `parse()` function reading + from an iterator range.,parse__iteratortype__parser_callback_t} + + @since version 2.0.3 + */ + template<class IteratorType, typename std::enable_if< + std::is_base_of< + std::random_access_iterator_tag, + typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0> + static basic_json parse(IteratorType first, IteratorType last, + const parser_callback_t cb = nullptr, + const bool allow_exceptions = true) + { + basic_json result; + parser(detail::input_adapter(first, last), cb, allow_exceptions).parse(true, result); + return result; + } + + template<class IteratorType, typename std::enable_if< + std::is_base_of< + std::random_access_iterator_tag, + typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0> + static bool accept(IteratorType first, IteratorType last) + { + return parser(detail::input_adapter(first, last)).accept(true); + } + + template<class IteratorType, class SAX, typename std::enable_if< + std::is_base_of< + std::random_access_iterator_tag, + typename std::iterator_traits<IteratorType>::iterator_category>::value, int>::type = 0> + static bool sax_parse(IteratorType first, IteratorType last, SAX* sax) + { + return parser(detail::input_adapter(first, last)).sax_parse(sax); + } + + /*! + @brief deserialize from stream + @deprecated This stream operator is deprecated and will be removed in + version 4.0.0 of the library. Please use + @ref operator>>(std::istream&, basic_json&) + instead; that is, replace calls like `j << i;` with `i >> j;`. + @since version 1.0.0; deprecated since version 3.0.0 + */ + JSON_DEPRECATED + friend std::istream& operator<<(basic_json& j, std::istream& i) + { + return operator>>(i, j); + } + + /*! + @brief deserialize from stream + + Deserializes an input stream to a JSON value. + + @param[in,out] i input stream to read a serialized JSON value from + @param[in,out] j JSON value to write the deserialized input to + + @throw parse_error.101 in case of an unexpected token + @throw parse_error.102 if to_unicode fails or surrogate error + @throw parse_error.103 if to_unicode fails + + @complexity Linear in the length of the input. The parser is a predictive + LL(1) parser. + + @note A UTF-8 byte order mark is silently ignored. + + @liveexample{The example below shows how a JSON value is constructed by + reading a serialization from a stream.,operator_deserialize} + + @sa parse(std::istream&, const parser_callback_t) for a variant with a + parser callback function to filter values while parsing + + @since version 1.0.0 + */ + friend std::istream& operator>>(std::istream& i, basic_json& j) + { + parser(detail::input_adapter(i)).parse(false, j); + return i; + } + + /// @} + + /////////////////////////// + // convenience functions // + /////////////////////////// + + /*! + @brief return the type as string + + Returns the type name as string to be used in error messages - usually to + indicate that a function was called on a wrong JSON type. + + @return a string representation of a the @a m_type member: + Value type | return value + ----------- | ------------- + null | `"null"` + boolean | `"boolean"` + string | `"string"` + number | `"number"` (for all number types) + object | `"object"` + array | `"array"` + discarded | `"discarded"` + + @exceptionsafety No-throw guarantee: this function never throws exceptions. + + @complexity Constant. + + @liveexample{The following code exemplifies `type_name()` for all JSON + types.,type_name} + + @sa @ref type() -- return the type of the JSON value + @sa @ref operator value_t() -- return the type of the JSON value (implicit) + + @since version 1.0.0, public since 2.1.0, `const char*` and `noexcept` + since 3.0.0 + */ + const char* type_name() const noexcept + { + { + switch (m_type) + { + case value_t::null: + return "null"; + case value_t::object: + return "object"; + case value_t::array: + return "array"; + case value_t::string: + return "string"; + case value_t::boolean: + return "boolean"; + case value_t::discarded: + return "discarded"; + default: + return "number"; + } + } + } + + + private: + ////////////////////// + // member variables // + ////////////////////// + + /// the type of the current element + value_t m_type = value_t::null; + + /// the value of the current element + json_value m_value = {}; + + ////////////////////////////////////////// + // binary serialization/deserialization // + ////////////////////////////////////////// + + /// @name binary serialization/deserialization support + /// @{ + + public: + /*! + @brief create a CBOR serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the CBOR (Concise + Binary Object Representation) serialization format. CBOR is a binary + serialization format which aims to be more compact than JSON itself, yet + more efficient to parse. + + The library uses the following mapping from JSON values types to + CBOR types according to the CBOR specification (RFC 7049): + + JSON value type | value/range | CBOR type | first byte + --------------- | ------------------------------------------ | ---------------------------------- | --------------- + null | `null` | Null | 0xF6 + boolean | `true` | True | 0xF5 + boolean | `false` | False | 0xF4 + number_integer | -9223372036854775808..-2147483649 | Negative integer (8 bytes follow) | 0x3B + number_integer | -2147483648..-32769 | Negative integer (4 bytes follow) | 0x3A + number_integer | -32768..-129 | Negative integer (2 bytes follow) | 0x39 + number_integer | -128..-25 | Negative integer (1 byte follow) | 0x38 + number_integer | -24..-1 | Negative integer | 0x20..0x37 + number_integer | 0..23 | Integer | 0x00..0x17 + number_integer | 24..255 | Unsigned integer (1 byte follow) | 0x18 + number_integer | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 + number_integer | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1A + number_integer | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1B + number_unsigned | 0..23 | Integer | 0x00..0x17 + number_unsigned | 24..255 | Unsigned integer (1 byte follow) | 0x18 + number_unsigned | 256..65535 | Unsigned integer (2 bytes follow) | 0x19 + number_unsigned | 65536..4294967295 | Unsigned integer (4 bytes follow) | 0x1A + number_unsigned | 4294967296..18446744073709551615 | Unsigned integer (8 bytes follow) | 0x1B + number_float | *any value* | Double-Precision Float | 0xFB + string | *length*: 0..23 | UTF-8 string | 0x60..0x77 + string | *length*: 23..255 | UTF-8 string (1 byte follow) | 0x78 + string | *length*: 256..65535 | UTF-8 string (2 bytes follow) | 0x79 + string | *length*: 65536..4294967295 | UTF-8 string (4 bytes follow) | 0x7A + string | *length*: 4294967296..18446744073709551615 | UTF-8 string (8 bytes follow) | 0x7B + array | *size*: 0..23 | array | 0x80..0x97 + array | *size*: 23..255 | array (1 byte follow) | 0x98 + array | *size*: 256..65535 | array (2 bytes follow) | 0x99 + array | *size*: 65536..4294967295 | array (4 bytes follow) | 0x9A + array | *size*: 4294967296..18446744073709551615 | array (8 bytes follow) | 0x9B + object | *size*: 0..23 | map | 0xA0..0xB7 + object | *size*: 23..255 | map (1 byte follow) | 0xB8 + object | *size*: 256..65535 | map (2 bytes follow) | 0xB9 + object | *size*: 65536..4294967295 | map (4 bytes follow) | 0xBA + object | *size*: 4294967296..18446744073709551615 | map (8 bytes follow) | 0xBB + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a CBOR value. + + @note If NaN or Infinity are stored inside a JSON number, they are + serialized properly. This behavior differs from the @ref dump() + function which serializes NaN or Infinity to `null`. + + @note The following CBOR types are not used in the conversion: + - byte strings (0x40..0x5F) + - UTF-8 strings terminated by "break" (0x7F) + - arrays terminated by "break" (0x9F) + - maps terminated by "break" (0xBF) + - date/time (0xC0..0xC1) + - bignum (0xC2..0xC3) + - decimal fraction (0xC4) + - bigfloat (0xC5) + - tagged items (0xC6..0xD4, 0xD8..0xDB) + - expected conversions (0xD5..0xD7) + - simple values (0xE0..0xF3, 0xF8) + - undefined (0xF7) + - half and single-precision floats (0xF9-0xFA) + - break (0xFF) + + @param[in] j JSON value to serialize + @return MessagePack serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in CBOR format.,to_cbor} + + @sa http://cbor.io + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the + analogous deserialization + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format + + @since version 2.0.9 + */ + static std::vector<uint8_t> to_cbor(const basic_json& j) + { + std::vector<uint8_t> result; + to_cbor(j, result); + return result; + } + + static void to_cbor(const basic_json& j, detail::output_adapter<uint8_t> o) + { + binary_writer<uint8_t>(o).write_cbor(j); + } + + static void to_cbor(const basic_json& j, detail::output_adapter<char> o) + { + binary_writer<char>(o).write_cbor(j); + } + + /*! + @brief create a MessagePack serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the MessagePack + serialization format. MessagePack is a binary serialization format which + aims to be more compact than JSON itself, yet more efficient to parse. + + The library uses the following mapping from JSON values types to + MessagePack types according to the MessagePack specification: + + JSON value type | value/range | MessagePack type | first byte + --------------- | --------------------------------- | ---------------- | ---------- + null | `null` | nil | 0xC0 + boolean | `true` | true | 0xC3 + boolean | `false` | false | 0xC2 + number_integer | -9223372036854775808..-2147483649 | int64 | 0xD3 + number_integer | -2147483648..-32769 | int32 | 0xD2 + number_integer | -32768..-129 | int16 | 0xD1 + number_integer | -128..-33 | int8 | 0xD0 + number_integer | -32..-1 | negative fixint | 0xE0..0xFF + number_integer | 0..127 | positive fixint | 0x00..0x7F + number_integer | 128..255 | uint 8 | 0xCC + number_integer | 256..65535 | uint 16 | 0xCD + number_integer | 65536..4294967295 | uint 32 | 0xCE + number_integer | 4294967296..18446744073709551615 | uint 64 | 0xCF + number_unsigned | 0..127 | positive fixint | 0x00..0x7F + number_unsigned | 128..255 | uint 8 | 0xCC + number_unsigned | 256..65535 | uint 16 | 0xCD + number_unsigned | 65536..4294967295 | uint 32 | 0xCE + number_unsigned | 4294967296..18446744073709551615 | uint 64 | 0xCF + number_float | *any value* | float 64 | 0xCB + string | *length*: 0..31 | fixstr | 0xA0..0xBF + string | *length*: 32..255 | str 8 | 0xD9 + string | *length*: 256..65535 | str 16 | 0xDA + string | *length*: 65536..4294967295 | str 32 | 0xDB + array | *size*: 0..15 | fixarray | 0x90..0x9F + array | *size*: 16..65535 | array 16 | 0xDC + array | *size*: 65536..4294967295 | array 32 | 0xDD + object | *size*: 0..15 | fix map | 0x80..0x8F + object | *size*: 16..65535 | map 16 | 0xDE + object | *size*: 65536..4294967295 | map 32 | 0xDF + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a MessagePack value. + + @note The following values can **not** be converted to a MessagePack value: + - strings with more than 4294967295 bytes + - arrays with more than 4294967295 elements + - objects with more than 4294967295 elements + + @note The following MessagePack types are not used in the conversion: + - bin 8 - bin 32 (0xC4..0xC6) + - ext 8 - ext 32 (0xC7..0xC9) + - float 32 (0xCA) + - fixext 1 - fixext 16 (0xD4..0xD8) + + @note Any MessagePack output created @ref to_msgpack can be successfully + parsed by @ref from_msgpack. + + @note If NaN or Infinity are stored inside a JSON number, they are + serialized properly. This behavior differs from the @ref dump() + function which serializes NaN or Infinity to `null`. + + @param[in] j JSON value to serialize + @return MessagePack serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in MessagePack format.,to_msgpack} + + @sa http://msgpack.org + @sa @ref from_msgpack for the analogous deserialization + @sa @ref to_cbor(const basic_json& for the related CBOR format + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format + + @since version 2.0.9 + */ + static std::vector<uint8_t> to_msgpack(const basic_json& j) + { + std::vector<uint8_t> result; + to_msgpack(j, result); + return result; + } + + static void to_msgpack(const basic_json& j, detail::output_adapter<uint8_t> o) + { + binary_writer<uint8_t>(o).write_msgpack(j); + } + + static void to_msgpack(const basic_json& j, detail::output_adapter<char> o) + { + binary_writer<char>(o).write_msgpack(j); + } + + /*! + @brief create a UBJSON serialization of a given JSON value + + Serializes a given JSON value @a j to a byte vector using the UBJSON + (Universal Binary JSON) serialization format. UBJSON aims to be more compact + than JSON itself, yet more efficient to parse. + + The library uses the following mapping from JSON values types to + UBJSON types according to the UBJSON specification: + + JSON value type | value/range | UBJSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | `Z` + boolean | `true` | true | `T` + boolean | `false` | false | `F` + number_integer | -9223372036854775808..-2147483649 | int64 | `L` + number_integer | -2147483648..-32769 | int32 | `l` + number_integer | -32768..-129 | int16 | `I` + number_integer | -128..127 | int8 | `i` + number_integer | 128..255 | uint8 | `U` + number_integer | 256..32767 | int16 | `I` + number_integer | 32768..2147483647 | int32 | `l` + number_integer | 2147483648..9223372036854775807 | int64 | `L` + number_unsigned | 0..127 | int8 | `i` + number_unsigned | 128..255 | uint8 | `U` + number_unsigned | 256..32767 | int16 | `I` + number_unsigned | 32768..2147483647 | int32 | `l` + number_unsigned | 2147483648..9223372036854775807 | int64 | `L` + number_float | *any value* | float64 | `D` + string | *with shortest length indicator* | string | `S` + array | *see notes on optimized format* | array | `[` + object | *see notes on optimized format* | map | `{` + + @note The mapping is **complete** in the sense that any JSON value type + can be converted to a UBJSON value. + + @note The following values can **not** be converted to a UBJSON value: + - strings with more than 9223372036854775807 bytes (theoretical) + - unsigned integer numbers above 9223372036854775807 + + @note The following markers are not used in the conversion: + - `Z`: no-op values are not created. + - `C`: single-byte strings are serialized with `S` markers. + + @note Any UBJSON output created @ref to_ubjson can be successfully parsed + by @ref from_ubjson. + + @note If NaN or Infinity are stored inside a JSON number, they are + serialized properly. This behavior differs from the @ref dump() + function which serializes NaN or Infinity to `null`. + + @note The optimized formats for containers are supported: Parameter + @a use_size adds size information to the beginning of a container and + removes the closing marker. Parameter @a use_type further checks + whether all elements of a container have the same type and adds the + type marker to the beginning of the container. The @a use_type + parameter must only be used together with @a use_size = true. Note + that @a use_size = true alone may result in larger representations - + the benefit of this parameter is that the receiving side is + immediately informed on the number of elements of the container. + + @param[in] j JSON value to serialize + @param[in] use_size whether to add size annotations to container types + @param[in] use_type whether to add type annotations to container types + (must be combined with @a use_size = true) + @return UBJSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in UBJSON format.,to_ubjson} + + @sa http://ubjson.org + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the + analogous deserialization + @sa @ref to_cbor(const basic_json& for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + + @since version 3.1.0 + */ + static std::vector<uint8_t> to_ubjson(const basic_json& j, + const bool use_size = false, + const bool use_type = false) + { + std::vector<uint8_t> result; + to_ubjson(j, result, use_size, use_type); + return result; + } + + static void to_ubjson(const basic_json& j, detail::output_adapter<uint8_t> o, + const bool use_size = false, const bool use_type = false) + { + binary_writer<uint8_t>(o).write_ubjson(j, use_size, use_type); + } + + static void to_ubjson(const basic_json& j, detail::output_adapter<char> o, + const bool use_size = false, const bool use_type = false) + { + binary_writer<char>(o).write_ubjson(j, use_size, use_type); + } + + + /*! + @brief Serializes the given JSON object `j` to BSON and returns a vector + containing the corresponding BSON-representation. + + BSON (Binary JSON) is a binary format in which zero or more ordered key/value pairs are + stored as a single entity (a so-called document). + + The library uses the following mapping from JSON values types to BSON types: + + JSON value type | value/range | BSON type | marker + --------------- | --------------------------------- | ----------- | ------ + null | `null` | null | 0x0A + boolean | `true`, `false` | boolean | 0x08 + number_integer | -9223372036854775808..-2147483649 | int64 | 0x12 + number_integer | -2147483648..2147483647 | int32 | 0x10 + number_integer | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 0..2147483647 | int32 | 0x10 + number_unsigned | 2147483648..9223372036854775807 | int64 | 0x12 + number_unsigned | 9223372036854775808..18446744073709551615| -- | -- + number_float | *any value* | double | 0x01 + string | *any value* | string | 0x02 + array | *any value* | document | 0x04 + object | *any value* | document | 0x03 + + @warning The mapping is **incomplete**, since only JSON-objects (and things + contained therein) can be serialized to BSON. + Also, integers larger than 9223372036854775807 cannot be serialized to BSON, + and the keys may not contain U+0000, since they are serialized a + zero-terminated c-strings. + + @throw out_of_range.407 if `j.is_number_unsigned() && j.get<std::uint64_t>() > 9223372036854775807` + @throw out_of_range.409 if a key in `j` contains a NULL (U+0000) + @throw type_error.317 if `!j.is_object()` + + @pre The input `j` is required to be an object: `j.is_object() == true`. + + @note Any BSON output created via @ref to_bson can be successfully parsed + by @ref from_bson. + + @param[in] j JSON value to serialize + @return BSON serialization as byte vector + + @complexity Linear in the size of the JSON value @a j. + + @liveexample{The example shows the serialization of a JSON value to a byte + vector in BSON format.,to_bson} + + @sa http://bsonspec.org/spec.html + @sa @ref from_bson(detail::input_adapter&&, const bool strict) for the + analogous deserialization + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + related UBJSON format + @sa @ref to_cbor(const basic_json&) for the related CBOR format + @sa @ref to_msgpack(const basic_json&) for the related MessagePack format + */ + static std::vector<uint8_t> to_bson(const basic_json& j) + { + std::vector<uint8_t> result; + to_bson(j, result); + return result; + } + + /*! + @brief Serializes the given JSON object `j` to BSON and forwards the + corresponding BSON-representation to the given output_adapter `o`. + @param j The JSON object to convert to BSON. + @param o The output adapter that receives the binary BSON representation. + @pre The input `j` shall be an object: `j.is_object() == true` + @sa @ref to_bson(const basic_json&) + */ + static void to_bson(const basic_json& j, detail::output_adapter<uint8_t> o) + { + binary_writer<uint8_t>(o).write_bson(j); + } + + /*! + @copydoc to_bson(const basic_json&, detail::output_adapter<uint8_t>) + */ + static void to_bson(const basic_json& j, detail::output_adapter<char> o) + { + binary_writer<char>(o).write_bson(j); + } + + + /*! + @brief create a JSON value from an input in CBOR format + + Deserializes a given input @a i to a JSON value using the CBOR (Concise + Binary Object Representation) serialization format. + + The library maps CBOR types to JSON value types as follows: + + CBOR type | JSON value type | first byte + ---------------------- | --------------- | ---------- + Integer | number_unsigned | 0x00..0x17 + Unsigned integer | number_unsigned | 0x18 + Unsigned integer | number_unsigned | 0x19 + Unsigned integer | number_unsigned | 0x1A + Unsigned integer | number_unsigned | 0x1B + Negative integer | number_integer | 0x20..0x37 + Negative integer | number_integer | 0x38 + Negative integer | number_integer | 0x39 + Negative integer | number_integer | 0x3A + Negative integer | number_integer | 0x3B + Negative integer | number_integer | 0x40..0x57 + UTF-8 string | string | 0x60..0x77 + UTF-8 string | string | 0x78 + UTF-8 string | string | 0x79 + UTF-8 string | string | 0x7A + UTF-8 string | string | 0x7B + UTF-8 string | string | 0x7F + array | array | 0x80..0x97 + array | array | 0x98 + array | array | 0x99 + array | array | 0x9A + array | array | 0x9B + array | array | 0x9F + map | object | 0xA0..0xB7 + map | object | 0xB8 + map | object | 0xB9 + map | object | 0xBA + map | object | 0xBB + map | object | 0xBF + False | `false` | 0xF4 + True | `true` | 0xF5 + Null | `null` | 0xF6 + Half-Precision Float | number_float | 0xF9 + Single-Precision Float | number_float | 0xFA + Double-Precision Float | number_float | 0xFB + + @warning The mapping is **incomplete** in the sense that not all CBOR + types can be converted to a JSON value. The following CBOR types + are not supported and will yield parse errors (parse_error.112): + - byte strings (0x40..0x5F) + - date/time (0xC0..0xC1) + - bignum (0xC2..0xC3) + - decimal fraction (0xC4) + - bigfloat (0xC5) + - tagged items (0xC6..0xD4, 0xD8..0xDB) + - expected conversions (0xD5..0xD7) + - simple values (0xE0..0xF3, 0xF8) + - undefined (0xF7) + + @warning CBOR allows map keys of any type, whereas JSON only allows + strings as keys in object values. Therefore, CBOR maps with keys + other than UTF-8 strings are rejected (parse_error.113). + + @note Any CBOR output created @ref to_cbor can be successfully parsed by + @ref from_cbor. + + @param[in] i an input in CBOR format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.110 if the given input ends prematurely or the end of + file was not reached when @a strict was set to true + @throw parse_error.112 if unsupported features from CBOR were + used in the given input @a v or if the input is not valid CBOR + @throw parse_error.113 if a string was expected as map key, but not found + + @complexity Linear in the size of the input @a i. + + @liveexample{The example shows the deserialization of a byte vector in CBOR + format to a JSON value.,from_cbor} + + @sa http://cbor.io + @sa @ref to_cbor(const basic_json&) for the analogous serialization + @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for the + related MessagePack format + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the + related UBJSON format + + @since version 2.0.9; parameter @a start_index since 2.1.1; changed to + consume input adapters, removed start_index parameter, and added + @a strict parameter since 3.0.0; added @a allow_exceptions parameter + since 3.2.0 + */ + static basic_json from_cbor(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::cbor, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_cbor(detail::input_adapter&&, const bool, const bool) + */ + template<typename A1, typename A2, + detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> + static basic_json from_cbor(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).sax_parse(input_format_t::cbor, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @brief create a JSON value from an input in MessagePack format + + Deserializes a given input @a i to a JSON value using the MessagePack + serialization format. + + The library maps MessagePack types to JSON value types as follows: + + MessagePack type | JSON value type | first byte + ---------------- | --------------- | ---------- + positive fixint | number_unsigned | 0x00..0x7F + fixmap | object | 0x80..0x8F + fixarray | array | 0x90..0x9F + fixstr | string | 0xA0..0xBF + nil | `null` | 0xC0 + false | `false` | 0xC2 + true | `true` | 0xC3 + float 32 | number_float | 0xCA + float 64 | number_float | 0xCB + uint 8 | number_unsigned | 0xCC + uint 16 | number_unsigned | 0xCD + uint 32 | number_unsigned | 0xCE + uint 64 | number_unsigned | 0xCF + int 8 | number_integer | 0xD0 + int 16 | number_integer | 0xD1 + int 32 | number_integer | 0xD2 + int 64 | number_integer | 0xD3 + str 8 | string | 0xD9 + str 16 | string | 0xDA + str 32 | string | 0xDB + array 16 | array | 0xDC + array 32 | array | 0xDD + map 16 | object | 0xDE + map 32 | object | 0xDF + negative fixint | number_integer | 0xE0-0xFF + + @warning The mapping is **incomplete** in the sense that not all + MessagePack types can be converted to a JSON value. The following + MessagePack types are not supported and will yield parse errors: + - bin 8 - bin 32 (0xC4..0xC6) + - ext 8 - ext 32 (0xC7..0xC9) + - fixext 1 - fixext 16 (0xD4..0xD8) + + @note Any MessagePack output created @ref to_msgpack can be successfully + parsed by @ref from_msgpack. + + @param[in] i an input in MessagePack format convertible to an input + adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.110 if the given input ends prematurely or the end of + file was not reached when @a strict was set to true + @throw parse_error.112 if unsupported features from MessagePack were + used in the given input @a i or if the input is not valid MessagePack + @throw parse_error.113 if a string was expected as map key, but not found + + @complexity Linear in the size of the input @a i. + + @liveexample{The example shows the deserialization of a byte vector in + MessagePack format to a JSON value.,from_msgpack} + + @sa http://msgpack.org + @sa @ref to_msgpack(const basic_json&) for the analogous serialization + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the + related CBOR format + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for + the related UBJSON format + @sa @ref from_bson(detail::input_adapter&&, const bool, const bool) for + the related BSON format + + @since version 2.0.9; parameter @a start_index since 2.1.1; changed to + consume input adapters, removed start_index parameter, and added + @a strict parameter since 3.0.0; added @a allow_exceptions parameter + since 3.2.0 + */ + static basic_json from_msgpack(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::msgpack, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_msgpack(detail::input_adapter&&, const bool, const bool) + */ + template<typename A1, typename A2, + detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> + static basic_json from_msgpack(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).sax_parse(input_format_t::msgpack, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @brief create a JSON value from an input in UBJSON format + + Deserializes a given input @a i to a JSON value using the UBJSON (Universal + Binary JSON) serialization format. + + The library maps UBJSON types to JSON value types as follows: + + UBJSON type | JSON value type | marker + ----------- | --------------------------------------- | ------ + no-op | *no value, next value is read* | `N` + null | `null` | `Z` + false | `false` | `F` + true | `true` | `T` + float32 | number_float | `d` + float64 | number_float | `D` + uint8 | number_unsigned | `U` + int8 | number_integer | `i` + int16 | number_integer | `I` + int32 | number_integer | `l` + int64 | number_integer | `L` + string | string | `S` + char | string | `C` + array | array (optimized values are supported) | `[` + object | object (optimized values are supported) | `{` + + @note The mapping is **complete** in the sense that any UBJSON value can + be converted to a JSON value. + + @param[in] i an input in UBJSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.110 if the given input ends prematurely or the end of + file was not reached when @a strict was set to true + @throw parse_error.112 if a parse error occurs + @throw parse_error.113 if a string could not be parsed successfully + + @complexity Linear in the size of the input @a i. + + @liveexample{The example shows the deserialization of a byte vector in + UBJSON format to a JSON value.,from_ubjson} + + @sa http://ubjson.org + @sa @ref to_ubjson(const basic_json&, const bool, const bool) for the + analogous serialization + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for + the related MessagePack format + @sa @ref from_bson(detail::input_adapter&&, const bool, const bool) for + the related BSON format + + @since version 3.1.0; added @a allow_exceptions parameter since 3.2.0 + */ + static basic_json from_ubjson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::ubjson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_ubjson(detail::input_adapter&&, const bool, const bool) + */ + template<typename A1, typename A2, + detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> + static basic_json from_ubjson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).sax_parse(input_format_t::ubjson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @brief Create a JSON value from an input in BSON format + + Deserializes a given input @a i to a JSON value using the BSON (Binary JSON) + serialization format. + + The library maps BSON record types to JSON value types as follows: + + BSON type | BSON marker byte | JSON value type + --------------- | ---------------- | --------------------------- + double | 0x01 | number_float + string | 0x02 | string + document | 0x03 | object + array | 0x04 | array + binary | 0x05 | still unsupported + undefined | 0x06 | still unsupported + ObjectId | 0x07 | still unsupported + boolean | 0x08 | boolean + UTC Date-Time | 0x09 | still unsupported + null | 0x0A | null + Regular Expr. | 0x0B | still unsupported + DB Pointer | 0x0C | still unsupported + JavaScript Code | 0x0D | still unsupported + Symbol | 0x0E | still unsupported + JavaScript Code | 0x0F | still unsupported + int32 | 0x10 | number_integer + Timestamp | 0x11 | still unsupported + 128-bit decimal float | 0x13 | still unsupported + Max Key | 0x7F | still unsupported + Min Key | 0xFF | still unsupported + + @warning The mapping is **incomplete**. The unsupported mappings + are indicated in the table above. + + @param[in] i an input in BSON format convertible to an input adapter + @param[in] strict whether to expect the input to be consumed until EOF + (true by default) + @param[in] allow_exceptions whether to throw exceptions in case of a + parse error (optional, true by default) + + @return deserialized JSON value + + @throw parse_error.114 if an unsupported BSON record type is encountered + + @complexity Linear in the size of the input @a i. + + @liveexample{The example shows the deserialization of a byte vector in + BSON format to a JSON value.,from_bson} + + @sa http://bsonspec.org/spec.html + @sa @ref to_bson(const basic_json&) for the analogous serialization + @sa @ref from_cbor(detail::input_adapter&&, const bool, const bool) for the + related CBOR format + @sa @ref from_msgpack(detail::input_adapter&&, const bool, const bool) for + the related MessagePack format + @sa @ref from_ubjson(detail::input_adapter&&, const bool, const bool) for the + related UBJSON format + */ + static basic_json from_bson(detail::input_adapter&& i, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(i)).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + /*! + @copydoc from_bson(detail::input_adapter&&, const bool, const bool) + */ + template<typename A1, typename A2, + detail::enable_if_t<std::is_constructible<detail::input_adapter, A1, A2>::value, int> = 0> + static basic_json from_bson(A1 && a1, A2 && a2, + const bool strict = true, + const bool allow_exceptions = true) + { + basic_json result; + detail::json_sax_dom_parser<basic_json> sdp(result, allow_exceptions); + const bool res = binary_reader(detail::input_adapter(std::forward<A1>(a1), std::forward<A2>(a2))).sax_parse(input_format_t::bson, &sdp, strict); + return res ? result : basic_json(value_t::discarded); + } + + + + /// @} + + ////////////////////////// + // JSON Pointer support // + ////////////////////////// + + /// @name JSON Pointer functions + /// @{ + + /*! + @brief access specified element via JSON Pointer + + Uses a JSON pointer to retrieve a reference to the respective JSON value. + No bound checking is performed. Similar to @ref operator[](const typename + object_t::key_type&), `null` values are created in arrays and objects if + necessary. + + In particular: + - If the JSON pointer points to an object key that does not exist, it + is created an filled with a `null` value before a reference to it + is returned. + - If the JSON pointer points to an array index that does not exist, it + is created an filled with a `null` value before a reference to it + is returned. All indices between the current maximum and the given + index are also filled with `null`. + - The special value `-` is treated as a synonym for the index past the + end. + + @param[in] ptr a JSON pointer + + @return reference to the element pointed to by @a ptr + + @complexity Constant. + + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.404 if the JSON pointer can not be resolved + + @liveexample{The behavior is shown in the example.,operatorjson_pointer} + + @since version 2.0.0 + */ + reference operator[](const json_pointer& ptr) + { + return ptr.get_unchecked(this); + } + + /*! + @brief access specified element via JSON Pointer + + Uses a JSON pointer to retrieve a reference to the respective JSON value. + No bound checking is performed. The function does not change the JSON + value; no `null` values are created. In particular, the the special value + `-` yields an exception. + + @param[in] ptr JSON pointer to the desired element + + @return const reference to the element pointed to by @a ptr + + @complexity Constant. + + @throw parse_error.106 if an array index begins with '0' + @throw parse_error.109 if an array index was not a number + @throw out_of_range.402 if the array index '-' is used + @throw out_of_range.404 if the JSON pointer can not be resolved + + @liveexample{The behavior is shown in the example.,operatorjson_pointer_const} + + @since version 2.0.0 + */ + const_reference operator[](const json_pointer& ptr) const + { + return ptr.get_unchecked(this); + } + + /*! + @brief access specified element via JSON Pointer + + Returns a reference to the element at with specified JSON pointer @a ptr, + with bounds checking. + + @param[in] ptr JSON pointer to the desired element + + @return reference to the element pointed to by @a ptr + + @throw parse_error.106 if an array index in the passed JSON pointer @a ptr + begins with '0'. See example below. + + @throw parse_error.109 if an array index in the passed JSON pointer @a ptr + is not a number. See example below. + + @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr + is out of range. See example below. + + @throw out_of_range.402 if the array index '-' is used in the passed JSON + pointer @a ptr. As `at` provides checked access (and no elements are + implicitly inserted), the index '-' is always invalid. See example below. + + @throw out_of_range.403 if the JSON pointer describes a key of an object + which cannot be found. See example below. + + @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. + See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Constant. + + @since version 2.0.0 + + @liveexample{The behavior is shown in the example.,at_json_pointer} + */ + reference at(const json_pointer& ptr) + { + return ptr.get_checked(this); + } + + /*! + @brief access specified element via JSON Pointer + + Returns a const reference to the element at with specified JSON pointer @a + ptr, with bounds checking. + + @param[in] ptr JSON pointer to the desired element + + @return reference to the element pointed to by @a ptr + + @throw parse_error.106 if an array index in the passed JSON pointer @a ptr + begins with '0'. See example below. + + @throw parse_error.109 if an array index in the passed JSON pointer @a ptr + is not a number. See example below. + + @throw out_of_range.401 if an array index in the passed JSON pointer @a ptr + is out of range. See example below. + + @throw out_of_range.402 if the array index '-' is used in the passed JSON + pointer @a ptr. As `at` provides checked access (and no elements are + implicitly inserted), the index '-' is always invalid. See example below. + + @throw out_of_range.403 if the JSON pointer describes a key of an object + which cannot be found. See example below. + + @throw out_of_range.404 if the JSON pointer @a ptr can not be resolved. + See example below. + + @exceptionsafety Strong guarantee: if an exception is thrown, there are no + changes in the JSON value. + + @complexity Constant. + + @since version 2.0.0 + + @liveexample{The behavior is shown in the example.,at_json_pointer_const} + */ + const_reference at(const json_pointer& ptr) const + { + return ptr.get_checked(this); + } + + /*! + @brief return flattened JSON value + + The function creates a JSON object whose keys are JSON pointers (see [RFC + 6901](https://tools.ietf.org/html/rfc6901)) and whose values are all + primitive. The original JSON value can be restored using the @ref + unflatten() function. + + @return an object that maps JSON pointers to primitive values + + @note Empty objects and arrays are flattened to `null` and will not be + reconstructed correctly by the @ref unflatten() function. + + @complexity Linear in the size the JSON value. + + @liveexample{The following code shows how a JSON object is flattened to an + object whose keys consist of JSON pointers.,flatten} + + @sa @ref unflatten() for the reverse function + + @since version 2.0.0 + */ + basic_json flatten() const + { + basic_json result(value_t::object); + json_pointer::flatten("", *this, result); + return result; + } + + /*! + @brief unflatten a previously flattened JSON value + + The function restores the arbitrary nesting of a JSON value that has been + flattened before using the @ref flatten() function. The JSON value must + meet certain constraints: + 1. The value must be an object. + 2. The keys must be JSON pointers (see + [RFC 6901](https://tools.ietf.org/html/rfc6901)) + 3. The mapped values must be primitive JSON types. + + @return the original JSON from a flattened version + + @note Empty objects and arrays are flattened by @ref flatten() to `null` + values and can not unflattened to their original type. Apart from + this example, for a JSON value `j`, the following is always true: + `j == j.flatten().unflatten()`. + + @complexity Linear in the size the JSON value. + + @throw type_error.314 if value is not an object + @throw type_error.315 if object values are not primitive + + @liveexample{The following code shows how a flattened JSON object is + unflattened into the original nested JSON object.,unflatten} + + @sa @ref flatten() for the reverse function + + @since version 2.0.0 + */ + basic_json unflatten() const + { + return json_pointer::unflatten(*this); + } + + /// @} + + ////////////////////////// + // JSON Patch functions // + ////////////////////////// + + /// @name JSON Patch functions + /// @{ + + /*! + @brief applies a JSON patch + + [JSON Patch](http://jsonpatch.com) defines a JSON document structure for + expressing a sequence of operations to apply to a JSON) document. With + this function, a JSON Patch is applied to the current JSON value by + executing all operations from the patch. + + @param[in] json_patch JSON patch document + @return patched document + + @note The application of a patch is atomic: Either all operations succeed + and the patched document is returned or an exception is thrown. In + any case, the original value is not changed: the patch is applied + to a copy of the value. + + @throw parse_error.104 if the JSON patch does not consist of an array of + objects + + @throw parse_error.105 if the JSON patch is malformed (e.g., mandatory + attributes are missing); example: `"operation add must have member path"` + + @throw out_of_range.401 if an array index is out of range. + + @throw out_of_range.403 if a JSON pointer inside the patch could not be + resolved successfully in the current JSON value; example: `"key baz not + found"` + + @throw out_of_range.405 if JSON pointer has no parent ("add", "remove", + "move") + + @throw other_error.501 if "test" operation was unsuccessful + + @complexity Linear in the size of the JSON value and the length of the + JSON patch. As usually only a fraction of the JSON value is affected by + the patch, the complexity can usually be neglected. + + @liveexample{The following code shows how a JSON patch is applied to a + value.,patch} + + @sa @ref diff -- create a JSON patch by comparing two JSON values + + @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902) + @sa [RFC 6901 (JSON Pointer)](https://tools.ietf.org/html/rfc6901) + + @since version 2.0.0 + */ + basic_json patch(const basic_json& json_patch) const + { + // make a working copy to apply the patch to + basic_json result = *this; + + // the valid JSON Patch operations + enum class patch_operations {add, remove, replace, move, copy, test, invalid}; + + const auto get_op = [](const std::string & op) + { + if (op == "add") + { + return patch_operations::add; + } + if (op == "remove") + { + return patch_operations::remove; + } + if (op == "replace") + { + return patch_operations::replace; + } + if (op == "move") + { + return patch_operations::move; + } + if (op == "copy") + { + return patch_operations::copy; + } + if (op == "test") + { + return patch_operations::test; + } + + return patch_operations::invalid; + }; + + // wrapper for "add" operation; add value at ptr + const auto operation_add = [&result](json_pointer & ptr, basic_json val) + { + // adding to the root of the target document means replacing it + if (ptr.is_root()) + { + result = val; + } + else + { + // make sure the top element of the pointer exists + json_pointer top_pointer = ptr.top(); + if (top_pointer != ptr) + { + result.at(top_pointer); + } + + // get reference to parent of JSON pointer ptr + const auto last_path = ptr.pop_back(); + basic_json& parent = result[ptr]; + + switch (parent.m_type) + { + case value_t::null: + case value_t::object: + { + // use operator[] to add value + parent[last_path] = val; + break; + } + + case value_t::array: + { + if (last_path == "-") + { + // special case: append to back + parent.push_back(val); + } + else + { + const auto idx = json_pointer::array_index(last_path); + if (JSON_UNLIKELY(static_cast<size_type>(idx) > parent.size())) + { + // avoid undefined behavior + JSON_THROW(out_of_range::create(401, "array index " + std::to_string(idx) + " is out of range")); + } + + // default case: insert add offset + parent.insert(parent.begin() + static_cast<difference_type>(idx), val); + } + break; + } + + // LCOV_EXCL_START + default: + { + // if there exists a parent it cannot be primitive + assert(false); + } + // LCOV_EXCL_STOP + } + } + }; + + // wrapper for "remove" operation; remove value at ptr + const auto operation_remove = [&result](json_pointer & ptr) + { + // get reference to parent of JSON pointer ptr + const auto last_path = ptr.pop_back(); + basic_json& parent = result.at(ptr); + + // remove child + if (parent.is_object()) + { + // perform range check + auto it = parent.find(last_path); + if (JSON_LIKELY(it != parent.end())) + { + parent.erase(it); + } + else + { + JSON_THROW(out_of_range::create(403, "key '" + last_path + "' not found")); + } + } + else if (parent.is_array()) + { + // note erase performs range check + parent.erase(static_cast<size_type>(json_pointer::array_index(last_path))); + } + }; + + // type check: top level value must be an array + if (JSON_UNLIKELY(not json_patch.is_array())) + { + JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); + } + + // iterate and apply the operations + for (const auto& val : json_patch) + { + // wrapper to get a value for an operation + const auto get_value = [&val](const std::string & op, + const std::string & member, + bool string_type) -> basic_json & + { + // find value + auto it = val.m_value.object->find(member); + + // context-sensitive error message + const auto error_msg = (op == "op") ? "operation" : "operation '" + op + "'"; + + // check if desired value is present + if (JSON_UNLIKELY(it == val.m_value.object->end())) + { + JSON_THROW(parse_error::create(105, 0, error_msg + " must have member '" + member + "'")); + } + + // check if result is of type string + if (JSON_UNLIKELY(string_type and not it->second.is_string())) + { + JSON_THROW(parse_error::create(105, 0, error_msg + " must have string member '" + member + "'")); + } + + // no error: return value + return it->second; + }; + + // type check: every element of the array must be an object + if (JSON_UNLIKELY(not val.is_object())) + { + JSON_THROW(parse_error::create(104, 0, "JSON patch must be an array of objects")); + } + + // collect mandatory members + const std::string op = get_value("op", "op", true); + const std::string path = get_value(op, "path", true); + json_pointer ptr(path); + + switch (get_op(op)) + { + case patch_operations::add: + { + operation_add(ptr, get_value("add", "value", false)); + break; + } + + case patch_operations::remove: + { + operation_remove(ptr); + break; + } + + case patch_operations::replace: + { + // the "path" location must exist - use at() + result.at(ptr) = get_value("replace", "value", false); + break; + } + + case patch_operations::move: + { + const std::string from_path = get_value("move", "from", true); + json_pointer from_ptr(from_path); + + // the "from" location must exist - use at() + basic_json v = result.at(from_ptr); + + // The move operation is functionally identical to a + // "remove" operation on the "from" location, followed + // immediately by an "add" operation at the target + // location with the value that was just removed. + operation_remove(from_ptr); + operation_add(ptr, v); + break; + } + + case patch_operations::copy: + { + const std::string from_path = get_value("copy", "from", true); + const json_pointer from_ptr(from_path); + + // the "from" location must exist - use at() + basic_json v = result.at(from_ptr); + + // The copy is functionally identical to an "add" + // operation at the target location using the value + // specified in the "from" member. + operation_add(ptr, v); + break; + } + + case patch_operations::test: + { + bool success = false; + JSON_TRY + { + // check if "value" matches the one at "path" + // the "path" location must exist - use at() + success = (result.at(ptr) == get_value("test", "value", false)); + } + JSON_INTERNAL_CATCH (out_of_range&) + { + // ignore out of range errors: success remains false + } + + // throw an exception if test fails + if (JSON_UNLIKELY(not success)) + { + JSON_THROW(other_error::create(501, "unsuccessful: " + val.dump())); + } + + break; + } + + case patch_operations::invalid: + { + // op must be "add", "remove", "replace", "move", "copy", or + // "test" + JSON_THROW(parse_error::create(105, 0, "operation value '" + op + "' is invalid")); + } + } + } + + return result; + } + + /*! + @brief creates a diff as a JSON patch + + Creates a [JSON Patch](http://jsonpatch.com) so that value @a source can + be changed into the value @a target by calling @ref patch function. + + @invariant For two JSON values @a source and @a target, the following code + yields always `true`: + @code {.cpp} + source.patch(diff(source, target)) == target; + @endcode + + @note Currently, only `remove`, `add`, and `replace` operations are + generated. + + @param[in] source JSON value to compare from + @param[in] target JSON value to compare against + @param[in] path helper value to create JSON pointers + + @return a JSON patch to convert the @a source to @a target + + @complexity Linear in the lengths of @a source and @a target. + + @liveexample{The following code shows how a JSON patch is created as a + diff for two JSON values.,diff} + + @sa @ref patch -- apply a JSON patch + @sa @ref merge_patch -- apply a JSON Merge Patch + + @sa [RFC 6902 (JSON Patch)](https://tools.ietf.org/html/rfc6902) + + @since version 2.0.0 + */ + static basic_json diff(const basic_json& source, const basic_json& target, + const std::string& path = "") + { + // the patch + basic_json result(value_t::array); + + // if the values are the same, return empty patch + if (source == target) + { + return result; + } + + if (source.type() != target.type()) + { + // different types: replace value + result.push_back( + { + {"op", "replace"}, {"path", path}, {"value", target} + }); + } + else + { + switch (source.type()) + { + case value_t::array: + { + // first pass: traverse common elements + std::size_t i = 0; + while (i < source.size() and i < target.size()) + { + // recursive call to compare array values at index i + auto temp_diff = diff(source[i], target[i], path + "/" + std::to_string(i)); + result.insert(result.end(), temp_diff.begin(), temp_diff.end()); + ++i; + } + + // i now reached the end of at least one array + // in a second pass, traverse the remaining elements + + // remove my remaining elements + const auto end_index = static_cast<difference_type>(result.size()); + while (i < source.size()) + { + // add operations in reverse order to avoid invalid + // indices + result.insert(result.begin() + end_index, object( + { + {"op", "remove"}, + {"path", path + "/" + std::to_string(i)} + })); + ++i; + } + + // add other remaining elements + while (i < target.size()) + { + result.push_back( + { + {"op", "add"}, + {"path", path + "/" + std::to_string(i)}, + {"value", target[i]} + }); + ++i; + } + + break; + } + + case value_t::object: + { + // first pass: traverse this object's elements + for (auto it = source.cbegin(); it != source.cend(); ++it) + { + // escape the key name to be used in a JSON patch + const auto key = json_pointer::escape(it.key()); + + if (target.find(it.key()) != target.end()) + { + // recursive call to compare object values at key it + auto temp_diff = diff(it.value(), target[it.key()], path + "/" + key); + result.insert(result.end(), temp_diff.begin(), temp_diff.end()); + } + else + { + // found a key that is not in o -> remove it + result.push_back(object( + { + {"op", "remove"}, {"path", path + "/" + key} + })); + } + } + + // second pass: traverse other object's elements + for (auto it = target.cbegin(); it != target.cend(); ++it) + { + if (source.find(it.key()) == source.end()) + { + // found a key that is not in this -> add it + const auto key = json_pointer::escape(it.key()); + result.push_back( + { + {"op", "add"}, {"path", path + "/" + key}, + {"value", it.value()} + }); + } + } + + break; + } + + default: + { + // both primitive type: replace value + result.push_back( + { + {"op", "replace"}, {"path", path}, {"value", target} + }); + break; + } + } + } + + return result; + } + + /// @} + + //////////////////////////////// + // JSON Merge Patch functions // + //////////////////////////////// + + /// @name JSON Merge Patch functions + /// @{ + + /*! + @brief applies a JSON Merge Patch + + The merge patch format is primarily intended for use with the HTTP PATCH + method as a means of describing a set of modifications to a target + resource's content. This function applies a merge patch to the current + JSON value. + + The function implements the following algorithm from Section 2 of + [RFC 7396 (JSON Merge Patch)](https://tools.ietf.org/html/rfc7396): + + ``` + define MergePatch(Target, Patch): + if Patch is an Object: + if Target is not an Object: + Target = {} // Ignore the contents and set it to an empty Object + for each Name/Value pair in Patch: + if Value is null: + if Name exists in Target: + remove the Name/Value pair from Target + else: + Target[Name] = MergePatch(Target[Name], Value) + return Target + else: + return Patch + ``` + + Thereby, `Target` is the current object; that is, the patch is applied to + the current value. + + @param[in] apply_patch the patch to apply + + @complexity Linear in the lengths of @a patch. + + @liveexample{The following code shows how a JSON Merge Patch is applied to + a JSON document.,merge_patch} + + @sa @ref patch -- apply a JSON patch + @sa [RFC 7396 (JSON Merge Patch)](https://tools.ietf.org/html/rfc7396) + + @since version 3.0.0 + */ + void merge_patch(const basic_json& apply_patch) + { + if (apply_patch.is_object()) + { + if (not is_object()) + { + *this = object(); + } + for (auto it = apply_patch.begin(); it != apply_patch.end(); ++it) + { + if (it.value().is_null()) + { + erase(it.key()); + } + else + { + operator[](it.key()).merge_patch(it.value()); + } + } + } + else + { + *this = apply_patch; + } + } + + /// @} +}; +} // namespace nlohmann + +/////////////////////// +// nonmember support // +/////////////////////// + +// specialization of std::swap, and std::hash +namespace std +{ + +/// hash value for JSON objects +template<> +struct hash<nlohmann::json> +{ + /*! + @brief return a hash value for a JSON object + + @since version 1.0.0 + */ + std::size_t operator()(const nlohmann::json& j) const + { + // a naive hashing via the string representation + const auto& h = hash<nlohmann::json::string_t>(); + return h(j.dump()); + } +}; + +/// specialization for std::less<value_t> +/// @note: do not remove the space after '<', +/// see https://github.com/nlohmann/json/pull/679 +template<> +struct less< ::nlohmann::detail::value_t> +{ + /*! + @brief compare two value_t enum values + @since version 3.0.0 + */ + bool operator()(nlohmann::detail::value_t lhs, + nlohmann::detail::value_t rhs) const noexcept + { + return nlohmann::detail::operator<(lhs, rhs); + } +}; + +/*! +@brief exchanges the values of two JSON objects + +@since version 1.0.0 +*/ +template<> +inline void swap<nlohmann::json>(nlohmann::json& j1, nlohmann::json& j2) noexcept( + is_nothrow_move_constructible<nlohmann::json>::value and + is_nothrow_move_assignable<nlohmann::json>::value +) +{ + j1.swap(j2); +} + +} // namespace std + +/*! +@brief user-defined string literal for JSON values + +This operator implements a user-defined string literal for JSON objects. It +can be used by adding `"_json"` to a string literal and returns a JSON object +if no parse error occurred. + +@param[in] s a string representation of a JSON object +@param[in] n the length of string @a s +@return a JSON object + +@since version 1.0.0 +*/ +inline nlohmann::json operator "" _json(const char* s, std::size_t n) +{ + return nlohmann::json::parse(s, s + n); +} + +/*! +@brief user-defined string literal for JSON pointer + +This operator implements a user-defined string literal for JSON Pointers. It +can be used by adding `"_json_pointer"` to a string literal and returns a JSON pointer +object if no parse error occurred. + +@param[in] s a string representation of a JSON Pointer +@param[in] n the length of string @a s +@return a JSON pointer object + +@since version 2.0.0 +*/ +inline nlohmann::json::json_pointer operator "" _json_pointer(const char* s, std::size_t n) +{ + return nlohmann::json::json_pointer(std::string(s, n)); +} + +// #include <nlohmann/detail/macro_unscope.hpp> + + +// restore GCC/clang diagnostic settings +#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + #pragma GCC diagnostic pop +#endif +#if defined(__clang__) + #pragma GCC diagnostic pop +#endif + +// clean up +#undef JSON_INTERNAL_CATCH +#undef JSON_CATCH +#undef JSON_THROW +#undef JSON_TRY +#undef JSON_LIKELY +#undef JSON_UNLIKELY +#undef JSON_DEPRECATED +#undef JSON_HAS_CPP_14 +#undef JSON_HAS_CPP_17 +#undef NLOHMANN_BASIC_JSON_TPL_DECLARATION +#undef NLOHMANN_BASIC_JSON_TPL + + +#endif |