From 3fe455cd4ada32990f67af640becd4cf8ae6117c Mon Sep 17 00:00:00 2001 From: edef Date: Sun, 12 Nov 2023 14:32:38 +0000 Subject: chore(3p/nixpkgs/clickhouse): 23.3.13.6 -> 23.10.3.5 Change-Id: I3e4c43690fcaf50965152bf40e1ca2b027010fcf Reviewed-on: https://cl.tvl.fyi/c/depot/+/9997 Reviewed-by: flokli Tested-by: BuildkiteCI --- third_party/overlays/clickhouse/.skip-tree | 1 + third_party/overlays/clickhouse/default.nix | 203 +++++++++++++++++++++ ...ouse-support-reading-arrow-LargeListArray.patch | 49 +++-- third_party/overlays/tvl.nix | 7 +- 4 files changed, 234 insertions(+), 26 deletions(-) create mode 100644 third_party/overlays/clickhouse/.skip-tree create mode 100644 third_party/overlays/clickhouse/default.nix diff --git a/third_party/overlays/clickhouse/.skip-tree b/third_party/overlays/clickhouse/.skip-tree new file mode 100644 index 000000000000..2b09755665cf --- /dev/null +++ b/third_party/overlays/clickhouse/.skip-tree @@ -0,0 +1 @@ +this needs to be callPackage'd diff --git a/third_party/overlays/clickhouse/default.nix b/third_party/overlays/clickhouse/default.nix new file mode 100644 index 000000000000..978a115f5c3d --- /dev/null +++ b/third_party/overlays/clickhouse/default.nix @@ -0,0 +1,203 @@ +{ lib +, llvmPackages +, fetchFromGitHub +, fetchpatch +, cmake +, ninja +, python3 +, perl +, nasm +, yasm +, nixosTests +, darwin +, findutils + + # currently for BLAKE3 hash function +, rustSupport ? true + +, corrosion +, rustc +, cargo +, rustPlatform +}: + +let + inherit (llvmPackages) stdenv; + mkDerivation = ( + if stdenv.isDarwin + then darwin.apple_sdk_11_0.llvmPackages_16.stdenv + else llvmPackages.stdenv + ).mkDerivation; +in +mkDerivation rec { + pname = "clickhouse"; + version = "23.10.3.5"; + + src = fetchFromGitHub rec { + owner = "ClickHouse"; + repo = "ClickHouse"; + rev = "v${version}-stable"; + fetchSubmodules = true; + name = "clickhouse-${rev}.tar.gz"; + hash = "sha256-H3nIhBydLBxSesGrvqmwHmBoQGCGQlWgVVUudKLLkIY="; + postFetch = '' + # delete files that make the source too big + rm -rf $out/contrib/llvm-project/llvm/test + rm -rf $out/contrib/llvm-project/clang/test + rm -rf $out/contrib/croaring/benchmarks + + # fix case insensitivity on macos https://github.com/NixOS/nixpkgs/issues/39308 + rm -rf $out/contrib/sysroot/linux-* + rm -rf $out/contrib/liburing/man + + # compress to not exceed the 2GB output limit + # try to make a deterministic tarball + tar -I 'gzip -n' \ + --sort=name \ + --mtime=1970-01-01 \ + --owner=0 --group=0 \ + --numeric-owner --mode=go=rX,u+rw,a-s \ + --transform='s@^@source/@S' \ + -cf temp -C "$out" . + rm -r "$out" + mv temp "$out" + ''; + }; + + strictDeps = true; + nativeBuildInputs = [ + cmake + ninja + python3 + perl + llvmPackages.lld + ] ++ lib.optionals stdenv.isx86_64 [ + nasm + yasm + ] ++ lib.optionals stdenv.isDarwin [ + llvmPackages.bintools + findutils + darwin.bootstrap_cmds + ] ++ lib.optionals rustSupport [ + rustc + cargo + rustPlatform.cargoSetupHook + ]; + + # their vendored version is too old and missing this patch: https://github.com/corrosion-rs/corrosion/pull/205 + corrosionSrc = + if rustSupport then + fetchFromGitHub + { + owner = "corrosion-rs"; + repo = "corrosion"; + rev = "v0.3.5"; + hash = "sha256-r/jrck4RiQynH1+Hx4GyIHpw/Kkr8dHe1+vTHg+fdRs="; + } else null; + corrosionDeps = + if rustSupport then + rustPlatform.fetchCargoTarball + { + src = corrosionSrc; + name = "corrosion-deps"; + preBuild = "cd generator"; + hash = "sha256-dhUgpwSjE9NZ2mCkhGiydI51LIOClA5wwk1O3mnnbM8="; + } else null; + rustDeps = + if rustSupport then + rustPlatform.fetchCargoTarball + { + inherit src; + name = "rust-deps"; + preBuild = "cd rust"; + hash = "sha256-fWDAGm19b7uZv8aBdBoieY5c6POd8IxFXbGdtONpZbw="; + } else null; + + dontCargoSetupPostUnpack = true; + postUnpack = lib.optionalString rustSupport '' + pushd source + + rm -rf contrib/corrosion + cp -r --no-preserve=mode $corrosionSrc contrib/corrosion + + pushd contrib/corrosion/generator + cargoDeps="$corrosionDeps" cargoSetupPostUnpackHook + corrosionDepsCopy="$cargoDepsCopy" + popd + + pushd rust + cargoDeps="$rustDeps" cargoSetupPostUnpackHook + rustDepsCopy="$cargoDepsCopy" + cat .cargo/config >> .cargo/config.toml.in + cat .cargo/config >> skim/.cargo/config.toml.in + rm .cargo/config + popd + + popd + ''; + + postPatch = '' + patchShebangs src/ + + substituteInPlace src/Storages/System/StorageSystemLicenses.sh \ + --replace 'git rev-parse --show-toplevel' '$src' + substituteInPlace utils/check-style/check-duplicate-includes.sh \ + --replace 'git rev-parse --show-toplevel' '$src' + substituteInPlace utils/check-style/check-ungrouped-includes.sh \ + --replace 'git rev-parse --show-toplevel' '$src' + substituteInPlace utils/list-licenses/list-licenses.sh \ + --replace 'git rev-parse --show-toplevel' '$src' + substituteInPlace utils/check-style/check-style \ + --replace 'git rev-parse --show-toplevel' '$src' + '' + lib.optionalString stdenv.isDarwin '' + sed -i 's|gfind|find|' cmake/tools.cmake + sed -i 's|ggrep|grep|' cmake/tools.cmake + '' + lib.optionalString rustSupport '' + + pushd contrib/corrosion/generator + cargoDepsCopy="$corrosionDepsCopy" cargoSetupPostPatchHook + popd + + pushd rust + cargoDepsCopy="$rustDepsCopy" cargoSetupPostPatchHook + popd + + cargoSetupPostPatchHook() { true; } + ''; + + cmakeFlags = [ + "-DENABLE_TESTS=OFF" + "-DCOMPILER_CACHE=disabled" + "-DENABLE_EMBEDDED_COMPILER=ON" + ]; + + # https://github.com/ClickHouse/ClickHouse/issues/49988 + hardeningDisable = [ "fortify" ]; + + postInstall = '' + rm -rf $out/share/clickhouse-test + + sed -i -e '\!/var/log/clickhouse-server/clickhouse-server\.log!d' \ + $out/etc/clickhouse-server/config.xml + substituteInPlace $out/etc/clickhouse-server/config.xml \ + --replace "/var/log/clickhouse-server/clickhouse-server.err.log" "1" + substituteInPlace $out/etc/clickhouse-server/config.xml \ + --replace "trace" "warning" + ''; + + # Builds in 7+h with 2 cores, and ~20m with a big-parallel builder. + requiredSystemFeatures = [ "big-parallel" ]; + + passthru.tests.clickhouse = nixosTests.clickhouse; + + meta = with lib; { + homepage = "https://clickhouse.com"; + description = "Column-oriented database management system"; + license = licenses.asl20; + maintainers = with maintainers; [ orivej ]; + + # not supposed to work on 32-bit https://github.com/ClickHouse/ClickHouse/pull/23959#issuecomment-835343685 + platforms = lib.filter (x: (lib.systems.elaborate x).is64bit) (platforms.linux ++ platforms.darwin); + broken = stdenv.buildPlatform != stdenv.hostPlatform; + }; +} diff --git a/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch b/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch index 59231dbbc011..9e79aa7267da 100644 --- a/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch +++ b/third_party/overlays/patches/clickhouse-support-reading-arrow-LargeListArray.patch @@ -1,41 +1,40 @@ -From 26e65e4addc990cc09b59b587792ac4a454e5cdd Mon Sep 17 00:00:00 2001 +From cdea2e8ad98995202ce81c9c030f2ae64d73b05a Mon Sep 17 00:00:00 2001 From: edef Date: Mon, 30 Oct 2023 08:08:10 +0000 -Subject: [PATCH] [backport] Support reading arrow::LargeListArray +Subject: [PATCH] Support reading arrow::LargeListArray --- - .../Formats/Impl/ArrowColumnToCHColumn.cpp | 35 ++++++++++++++----- - 1 file changed, 26 insertions(+), 9 deletions(-) + .../Formats/Impl/ArrowColumnToCHColumn.cpp | 33 +++++++++++++++---- + 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp -index 54a6c8493ea..94cf59fd357 100644 +index 6f9d49498f2..b93846cd4eb 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp -@@ -336,7 +336,22 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr +@@ -436,6 +436,22 @@ static ColumnPtr readByteMapFromArrowColumn(std::shared_ptr return nullmap_column; } --static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column) -+template ++template +struct ArrowOffsetArray; + -+template<> ++template <> +struct ArrowOffsetArray +{ + using type = arrow::Int32Array; +}; + -+template<> ++template <> +struct ArrowOffsetArray +{ + using type = arrow::Int64Array; +}; + -+template static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column) ++template + static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column) { auto offsets_column = ColumnUInt64::create(); - ColumnArray::Offsets & offsets_data = assert_cast &>(*offsets_column).getData(); -@@ -346,9 +361,9 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptrnum_chunks(); chunk_i < num_chunks; ++chunk_i) { @@ -43,16 +42,16 @@ index 54a6c8493ea..94cf59fd357 100644 + ArrowListArray & list_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); auto arrow_offsets_array = list_chunk.offsets(); - auto & arrow_offsets = dynamic_cast(*arrow_offsets_array); -+ auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array); ++ auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array); /* - * It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks. -@@ -498,13 +513,13 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr + * CH uses element size as "offsets", while arrow uses actual offsets as offsets. +@@ -602,13 +618,14 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr } } --static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column) -+template static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column) ++template + static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column) { arrow::ArrayVector array_vector; array_vector.reserve(arrow_column->num_chunks()); @@ -63,13 +62,13 @@ index 54a6c8493ea..94cf59fd357 100644 /* * It seems like arrow::ListArray::values() (nested column data) might or might not be shared across chunks. -@@ -636,12 +651,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( - if (map_type_hint) - nested_type_hint = assert_cast(map_type_hint->getNestedType().get())->getNestedType(); +@@ -819,12 +836,12 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( + key_type_hint = map_type_hint->getKeyType(); + } } - auto arrow_nested_column = getNestedArrowColumn(arrow_column); + auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint, true); if (skipped) return {}; @@ -78,7 +77,7 @@ index 54a6c8493ea..94cf59fd357 100644 const auto * tuple_column = assert_cast(nested_column.column.get()); const auto * tuple_type = assert_cast(nested_column.type.get()); -@@ -650,7 +665,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( +@@ -846,7 +863,9 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( return {std::move(map_column), std::move(map_type), column_name}; } case arrow::Type::LIST: @@ -88,13 +87,13 @@ index 54a6c8493ea..94cf59fd357 100644 DataTypePtr nested_type_hint; if (type_hint) { -@@ -658,11 +675,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( +@@ -854,11 +873,11 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( if (array_type_hint) nested_type_hint = array_type_hint->getNestedType(); } - auto arrow_nested_column = getNestedArrowColumn(arrow_column); + auto arrow_nested_column = is_large ? getNestedArrowColumn(arrow_column) : getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, nested_type_hint); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint); if (skipped) return {}; - auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); diff --git a/third_party/overlays/tvl.nix b/third_party/overlays/tvl.nix index 600161b66143..fb2a89e7fd42 100644 --- a/third_party/overlays/tvl.nix +++ b/third_party/overlays/tvl.nix @@ -148,7 +148,12 @@ depot.nix.readTree.drvTargets { }; }; - clickhouse = super.clickhouse.overrideAttrs (old: { + # we're vendoring this for now, since the version upgrade has a lot of changes + # we can't merge it upstream yet because the Darwin build is broken + # https://github.com/NixOS/nixpkgs/pull/267033 + clickhouse = (super.callPackage ./clickhouse { + llvmPackages = super.llvmPackages_16; + }).overrideAttrs (old: { patches = old.patches or [ ] ++ [ # https://github.com/ClickHouse/ClickHouse/pull/56118 ./patches/clickhouse-support-reading-arrow-LargeListArray.patch -- cgit 1.4.1