From c8918334142be3cf79e3555467a00545ea5fea07 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Wed, 18 Jan 2023 18:35:18 +0300 Subject: feat(corp/data-import): build morphology database in derivation This makes the actual imported database of the ~whole Russian language (all lemmas, grammemes, forms etc.) a Nix build target which is built in CI. This still needs schema normalisation (it's fairly directly mapped to the raw data), but it's already starting to be a useful data set. This also happens to be a pretty cool demonstration of the power of Nix. You can do `nix-build -A corp.russian.data-import.database` and out comes a perfectly valid SQLite database with a valid external data import! Change-Id: I5d6d15e67d0e4a7ff590fad06252be34f5d561fd Reviewed-on: https://cl.tvl.fyi/c/depot/+/7866 Reviewed-by: tazjin Tested-by: BuildkiteCI --- corp/russian/data-import/default.nix | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'corp/russian') diff --git a/corp/russian/data-import/default.nix b/corp/russian/data-import/default.nix index b4cdc50c25..c2fc1bf1cb 100644 --- a/corp/russian/data-import/default.nix +++ b/corp/russian/data-import/default.nix @@ -1,4 +1,4 @@ -{ depot, pkgs, ... }: +{ depot, lib, pkgs, ... }: let buildInputs = with pkgs; [ @@ -26,14 +26,18 @@ let # make OPENCORPORA_DATA available in the environment OPENCORPORA_DATA = inputData; }; + in -depot.third_party.naersk.buildPackage { +lib.fix (self: depot.third_party.naersk.buildPackage { src = depot.third_party.gitignoreSource ./.; inherit buildInputs; - passthru = { - inherit shell; - + passthru = depot.nix.readTree.drvTargets { + inherit shell inputData; + # target that actually builds an entire database + database = pkgs.runCommand "tvl-russian-db.sqlite" { } '' + ${self}/bin/data-import ${inputData} $out + ''; }; -} +}) -- cgit 1.4.1