diff options
author | Vincent Ambo <mail@tazj.in> | 2023-01-20T08·54+0300 |
---|---|---|
committer | tazjin <tazjin@tvl.su> | 2023-01-21T17·49+0000 |
commit | ee0c0ee95103fa10e227a1976149d20e6944001c (patch) | |
tree | 9348ea54fc9ccedb01355f80c9d7c4bf55aa4e9c /corp/russian/data-import/default.nix | |
parent | 1a84e3e6d800fed1709364174d4a615b1d146ad0 (diff) |
chore(corp/data-import): make OR data archive available in env r/5728
Change-Id: Idacf42743051eae0cf7010f952a4f91af17ad708 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7892 Reviewed-by: tazjin <tazjin@tvl.su> Tested-by: BuildkiteCI
Diffstat (limited to 'corp/russian/data-import/default.nix')
-rw-r--r-- | corp/russian/data-import/default.nix | 23 |
1 files changed, 15 insertions, 8 deletions
diff --git a/corp/russian/data-import/default.nix b/corp/russian/data-import/default.nix index c2fc1bf1cb5e..cf358874dce6 100644 --- a/corp/russian/data-import/default.nix +++ b/corp/russian/data-import/default.nix @@ -9,22 +9,29 @@ let # mirrored input data from OpenCorpora, as of 2023-01-17. # # This data is licensed under CC-BY-SA. - inputDataArchive = pkgs.fetchurl { + openCorporaArchive = pkgs.fetchurl { name = "dict.opcorpora.xml.bz"; - url = "https://tazj.in/blobs/dict.opcorpora.xml.bz2"; + url = "https://tazj.in/blobs/opencorpora-20230117.xml.bz2"; sha256 = "04n5g43fkfc93z6xlwf2qfdrfdfl562pc2ivdb3cbbbsy56gkqg6"; }; - inputData = pkgs.runCommand "dict.opcorpora.xml" { } '' - ${pkgs.bzip2}/bin/bunzip2 -k -c ${inputDataArchive} > $out + openCorpora = pkgs.runCommand "dict.opcorpora.xml" { } '' + ${pkgs.bzip2}/bin/bunzip2 -k -c ${openCorporaArchive} > $out ''; + openRussianArchive = pkgs.fetchzip { + name = "openrussian-20230117"; + url = "https://tazj.in/blobs/openrussian-20230117.tar.xz"; + sha256 = "06jl7i23cx58a0n2626hb82xlzimixvnxp7lxdw0g664kv9bmw25"; + }; + # development shell with native deps shell = pkgs.mkShell { inherit buildInputs; - # make OPENCORPORA_DATA available in the environment - OPENCORPORA_DATA = inputData; + # make datasets available in the environment + OPENCORPORA_DATA = openCorpora; + OPENRUSSIAN_DATA = openRussianArchive; }; in @@ -33,11 +40,11 @@ lib.fix (self: depot.third_party.naersk.buildPackage { inherit buildInputs; passthru = depot.nix.readTree.drvTargets { - inherit shell inputData; + inherit shell openCorpora; # target that actually builds an entire database database = pkgs.runCommand "tvl-russian-db.sqlite" { } '' - ${self}/bin/data-import ${inputData} $out + ${self}/bin/data-import ${openCorpora} $out ''; }; }) |