diff options
author | Vincent Ambo <tazjin@google.com> | 2020-05-17T14·52+0100 |
---|---|---|
committer | Vincent Ambo <tazjin@google.com> | 2020-05-17T14·52+0100 |
commit | 7994fd1d545cc5c876d6f21db7ddf9185d23dad6 (patch) | |
tree | 32dd695785378c5b9c8be97fc583e9dfc62cb105 /third_party/nix/src/libstore/s3-binary-cache-store.cc | |
parent | cf8cd640c1adf74a3706efbcb0ea4625da106fb2 (diff) | |
parent | 90b3b31dc27f31e9b11653a636025d29ddb087a3 (diff) |
Add 'third_party/nix/' from commit 'be66c7a6b24e3c3c6157fd37b86c7203d14acf10' r/724
git-subtree-dir: third_party/nix git-subtree-mainline: cf8cd640c1adf74a3706efbcb0ea4625da106fb2 git-subtree-split: be66c7a6b24e3c3c6157fd37b86c7203d14acf10
Diffstat (limited to 'third_party/nix/src/libstore/s3-binary-cache-store.cc')
-rw-r--r-- | third_party/nix/src/libstore/s3-binary-cache-store.cc | 431 |
1 files changed, 431 insertions, 0 deletions
diff --git a/third_party/nix/src/libstore/s3-binary-cache-store.cc b/third_party/nix/src/libstore/s3-binary-cache-store.cc new file mode 100644 index 000000000000..cd547a964850 --- /dev/null +++ b/third_party/nix/src/libstore/s3-binary-cache-store.cc @@ -0,0 +1,431 @@ +#if ENABLE_S3 + +#include "s3.hh" +#include "s3-binary-cache-store.hh" +#include "nar-info.hh" +#include "nar-info-disk-cache.hh" +#include "globals.hh" +#include "compression.hh" +#include "download.hh" +#include "istringstream_nocopy.hh" + +#include <aws/core/Aws.h> +#include <aws/core/VersionConfig.h> +#include <aws/core/auth/AWSCredentialsProvider.h> +#include <aws/core/auth/AWSCredentialsProviderChain.h> +#include <aws/core/client/ClientConfiguration.h> +#include <aws/core/client/DefaultRetryStrategy.h> +#include <aws/core/utils/logging/FormattedLogSystem.h> +#include <aws/core/utils/logging/LogMacros.h> +#include <aws/core/utils/threading/Executor.h> +#include <aws/s3/S3Client.h> +#include <aws/s3/model/GetObjectRequest.h> +#include <aws/s3/model/HeadObjectRequest.h> +#include <aws/s3/model/ListObjectsRequest.h> +#include <aws/s3/model/PutObjectRequest.h> +#include <aws/transfer/TransferManager.h> + +using namespace Aws::Transfer; + +namespace nix { + +struct S3Error : public Error +{ + Aws::S3::S3Errors err; + S3Error(Aws::S3::S3Errors err, const FormatOrString & fs) + : Error(fs), err(err) { }; +}; + +/* Helper: given an Outcome<R, E>, return R in case of success, or + throw an exception in case of an error. */ +template<typename R, typename E> +R && checkAws(const FormatOrString & fs, Aws::Utils::Outcome<R, E> && outcome) +{ + if (!outcome.IsSuccess()) + throw S3Error( + outcome.GetError().GetErrorType(), + fs.s + ": " + outcome.GetError().GetMessage()); + return outcome.GetResultWithOwnership(); +} + +class AwsLogger : public Aws::Utils::Logging::FormattedLogSystem +{ + using Aws::Utils::Logging::FormattedLogSystem::FormattedLogSystem; + + void ProcessFormattedStatement(Aws::String && statement) override + { + debug("AWS: %s", chomp(statement)); + } +}; + +static void initAWS() +{ + static std::once_flag flag; + std::call_once(flag, []() { + Aws::SDKOptions options; + + /* We install our own OpenSSL locking function (see + shared.cc), so don't let aws-sdk-cpp override it. */ + options.cryptoOptions.initAndCleanupOpenSSL = false; + + if (verbosity >= lvlDebug) { + options.loggingOptions.logLevel = + verbosity == lvlDebug + ? Aws::Utils::Logging::LogLevel::Debug + : Aws::Utils::Logging::LogLevel::Trace; + options.loggingOptions.logger_create_fn = [options]() { + return std::make_shared<AwsLogger>(options.loggingOptions.logLevel); + }; + } + + Aws::InitAPI(options); + }); +} + +S3Helper::S3Helper(const string & profile, const string & region, const string & scheme, const string & endpoint) + : config(makeConfig(region, scheme, endpoint)) + , client(make_ref<Aws::S3::S3Client>( + profile == "" + ? std::dynamic_pointer_cast<Aws::Auth::AWSCredentialsProvider>( + std::make_shared<Aws::Auth::DefaultAWSCredentialsProviderChain>()) + : std::dynamic_pointer_cast<Aws::Auth::AWSCredentialsProvider>( + std::make_shared<Aws::Auth::ProfileConfigFileAWSCredentialsProvider>(profile.c_str())), + *config, + // FIXME: https://github.com/aws/aws-sdk-cpp/issues/759 +#if AWS_VERSION_MAJOR == 1 && AWS_VERSION_MINOR < 3 + false, +#else + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, +#endif + endpoint.empty())) +{ +} + +/* Log AWS retries. */ +class RetryStrategy : public Aws::Client::DefaultRetryStrategy +{ + bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error, long attemptedRetries) const override + { + auto retry = Aws::Client::DefaultRetryStrategy::ShouldRetry(error, attemptedRetries); + if (retry) + printError("AWS error '%s' (%s), will retry in %d ms", + error.GetExceptionName(), error.GetMessage(), CalculateDelayBeforeNextRetry(error, attemptedRetries)); + return retry; + } +}; + +ref<Aws::Client::ClientConfiguration> S3Helper::makeConfig(const string & region, const string & scheme, const string & endpoint) +{ + initAWS(); + auto res = make_ref<Aws::Client::ClientConfiguration>(); + res->region = region; + if (!scheme.empty()) { + res->scheme = Aws::Http::SchemeMapper::FromString(scheme.c_str()); + } + if (!endpoint.empty()) { + res->endpointOverride = endpoint; + } + res->requestTimeoutMs = 600 * 1000; + res->connectTimeoutMs = 5 * 1000; + res->retryStrategy = std::make_shared<RetryStrategy>(); + res->caFile = settings.caFile; + return res; +} + +S3Helper::DownloadResult S3Helper::getObject( + const std::string & bucketName, const std::string & key) +{ + debug("fetching 's3://%s/%s'...", bucketName, key); + + auto request = + Aws::S3::Model::GetObjectRequest() + .WithBucket(bucketName) + .WithKey(key); + + request.SetResponseStreamFactory([&]() { + return Aws::New<std::stringstream>("STRINGSTREAM"); + }); + + DownloadResult res; + + auto now1 = std::chrono::steady_clock::now(); + + try { + + auto result = checkAws(fmt("AWS error fetching '%s'", key), + client->GetObject(request)); + + res.data = decompress(result.GetContentEncoding(), + dynamic_cast<std::stringstream &>(result.GetBody()).str()); + + } catch (S3Error & e) { + if (e.err != Aws::S3::S3Errors::NO_SUCH_KEY) throw; + } + + auto now2 = std::chrono::steady_clock::now(); + + res.durationMs = std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count(); + + return res; +} + +struct S3BinaryCacheStoreImpl : public S3BinaryCacheStore +{ + const Setting<std::string> profile{this, "", "profile", "The name of the AWS configuration profile to use."}; + const Setting<std::string> region{this, Aws::Region::US_EAST_1, "region", {"aws-region"}}; + const Setting<std::string> scheme{this, "", "scheme", "The scheme to use for S3 requests, https by default."}; + const Setting<std::string> endpoint{this, "", "endpoint", "An optional override of the endpoint to use when talking to S3."}; + const Setting<std::string> narinfoCompression{this, "", "narinfo-compression", "compression method for .narinfo files"}; + const Setting<std::string> lsCompression{this, "", "ls-compression", "compression method for .ls files"}; + const Setting<std::string> logCompression{this, "", "log-compression", "compression method for log/* files"}; + const Setting<bool> multipartUpload{ + this, false, "multipart-upload", "whether to use multi-part uploads"}; + const Setting<uint64_t> bufferSize{ + this, 5 * 1024 * 1024, "buffer-size", "size (in bytes) of each part in multi-part uploads"}; + + std::string bucketName; + + Stats stats; + + S3Helper s3Helper; + + S3BinaryCacheStoreImpl( + const Params & params, const std::string & bucketName) + : S3BinaryCacheStore(params) + , bucketName(bucketName) + , s3Helper(profile, region, scheme, endpoint) + { + diskCache = getNarInfoDiskCache(); + } + + std::string getUri() override + { + return "s3://" + bucketName; + } + + void init() override + { + if (!diskCache->cacheExists(getUri(), wantMassQuery_, priority)) { + + BinaryCacheStore::init(); + + diskCache->createCache(getUri(), storeDir, wantMassQuery_, priority); + } + } + + const Stats & getS3Stats() override + { + return stats; + } + + /* This is a specialisation of isValidPath() that optimistically + fetches the .narinfo file, rather than first checking for its + existence via a HEAD request. Since .narinfos are small, doing + a GET is unlikely to be slower than HEAD. */ + bool isValidPathUncached(const Path & storePath) override + { + try { + queryPathInfo(storePath); + return true; + } catch (InvalidPath & e) { + return false; + } + } + + bool fileExists(const std::string & path) override + { + stats.head++; + + auto res = s3Helper.client->HeadObject( + Aws::S3::Model::HeadObjectRequest() + .WithBucket(bucketName) + .WithKey(path)); + + if (!res.IsSuccess()) { + auto & error = res.GetError(); + if (error.GetErrorType() == Aws::S3::S3Errors::RESOURCE_NOT_FOUND + || error.GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY + // If bucket listing is disabled, 404s turn into 403s + || error.GetErrorType() == Aws::S3::S3Errors::ACCESS_DENIED) + return false; + throw Error(format("AWS error fetching '%s': %s") % path % error.GetMessage()); + } + + return true; + } + + std::shared_ptr<TransferManager> transferManager; + std::once_flag transferManagerCreated; + + void uploadFile(const std::string & path, const std::string & data, + const std::string & mimeType, + const std::string & contentEncoding) + { + auto stream = std::make_shared<istringstream_nocopy>(data); + + auto maxThreads = std::thread::hardware_concurrency(); + + static std::shared_ptr<Aws::Utils::Threading::PooledThreadExecutor> + executor = std::make_shared<Aws::Utils::Threading::PooledThreadExecutor>(maxThreads); + + std::call_once(transferManagerCreated, [&]() + { + if (multipartUpload) { + TransferManagerConfiguration transferConfig(executor.get()); + + transferConfig.s3Client = s3Helper.client; + transferConfig.bufferSize = bufferSize; + + transferConfig.uploadProgressCallback = + [](const TransferManager *transferManager, + const std::shared_ptr<const TransferHandle> + &transferHandle) + { + //FIXME: find a way to properly abort the multipart upload. + //checkInterrupt(); + debug("upload progress ('%s'): '%d' of '%d' bytes", + transferHandle->GetKey(), + transferHandle->GetBytesTransferred(), + transferHandle->GetBytesTotalSize()); + }; + + transferManager = TransferManager::Create(transferConfig); + } + }); + + auto now1 = std::chrono::steady_clock::now(); + + if (transferManager) { + + if (contentEncoding != "") + throw Error("setting a content encoding is not supported with S3 multi-part uploads"); + + std::shared_ptr<TransferHandle> transferHandle = + transferManager->UploadFile( + stream, bucketName, path, mimeType, + Aws::Map<Aws::String, Aws::String>(), + nullptr /*, contentEncoding */); + + transferHandle->WaitUntilFinished(); + + if (transferHandle->GetStatus() == TransferStatus::FAILED) + throw Error("AWS error: failed to upload 's3://%s/%s': %s", + bucketName, path, transferHandle->GetLastError().GetMessage()); + + if (transferHandle->GetStatus() != TransferStatus::COMPLETED) + throw Error("AWS error: transfer status of 's3://%s/%s' in unexpected state", + bucketName, path); + + } else { + + auto request = + Aws::S3::Model::PutObjectRequest() + .WithBucket(bucketName) + .WithKey(path); + + request.SetContentType(mimeType); + + if (contentEncoding != "") + request.SetContentEncoding(contentEncoding); + + auto stream = std::make_shared<istringstream_nocopy>(data); + + request.SetBody(stream); + + auto result = checkAws(fmt("AWS error uploading '%s'", path), + s3Helper.client->PutObject(request)); + } + + auto now2 = std::chrono::steady_clock::now(); + + auto duration = + std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1) + .count(); + + printInfo(format("uploaded 's3://%1%/%2%' (%3% bytes) in %4% ms") % + bucketName % path % data.size() % duration); + + stats.putTimeMs += duration; + stats.putBytes += data.size(); + stats.put++; + } + + void upsertFile(const std::string & path, const std::string & data, + const std::string & mimeType) override + { + if (narinfoCompression != "" && hasSuffix(path, ".narinfo")) + uploadFile(path, *compress(narinfoCompression, data), mimeType, narinfoCompression); + else if (lsCompression != "" && hasSuffix(path, ".ls")) + uploadFile(path, *compress(lsCompression, data), mimeType, lsCompression); + else if (logCompression != "" && hasPrefix(path, "log/")) + uploadFile(path, *compress(logCompression, data), mimeType, logCompression); + else + uploadFile(path, data, mimeType, ""); + } + + void getFile(const std::string & path, Sink & sink) override + { + stats.get++; + + // FIXME: stream output to sink. + auto res = s3Helper.getObject(bucketName, path); + + stats.getBytes += res.data ? res.data->size() : 0; + stats.getTimeMs += res.durationMs; + + if (res.data) { + printTalkative("downloaded 's3://%s/%s' (%d bytes) in %d ms", + bucketName, path, res.data->size(), res.durationMs); + + sink((unsigned char *) res.data->data(), res.data->size()); + } else + throw NoSuchBinaryCacheFile("file '%s' does not exist in binary cache '%s'", path, getUri()); + } + + PathSet queryAllValidPaths() override + { + PathSet paths; + std::string marker; + + do { + debug(format("listing bucket 's3://%s' from key '%s'...") % bucketName % marker); + + auto res = checkAws(format("AWS error listing bucket '%s'") % bucketName, + s3Helper.client->ListObjects( + Aws::S3::Model::ListObjectsRequest() + .WithBucket(bucketName) + .WithDelimiter("/") + .WithMarker(marker))); + + auto & contents = res.GetContents(); + + debug(format("got %d keys, next marker '%s'") + % contents.size() % res.GetNextMarker()); + + for (auto object : contents) { + auto & key = object.GetKey(); + if (key.size() != 40 || !hasSuffix(key, ".narinfo")) continue; + paths.insert(storeDir + "/" + key.substr(0, key.size() - 8)); + } + + marker = res.GetNextMarker(); + } while (!marker.empty()); + + return paths; + } + +}; + +static RegisterStoreImplementation regStore([]( + const std::string & uri, const Store::Params & params) + -> std::shared_ptr<Store> +{ + if (std::string(uri, 0, 5) != "s3://") return 0; + auto store = std::make_shared<S3BinaryCacheStoreImpl>(params, std::string(uri, 5)); + store->init(); + return store; +}); + +} + +#endif |