about summary refs log tree commit diff
path: root/corp
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2023-01-18T15·35+0300
committertazjin <tazjin@tvl.su>2023-01-18T15·44+0000
commitc8918334142be3cf79e3555467a00545ea5fea07 (patch)
tree682b7465c8482076e5d3fd0d999283780b5b604a /corp
parent0ed6583edcdaac73f4ed7fb30b09d8fb95c25f9d (diff)
feat(corp/data-import): build morphology database in derivation r/5693
This makes the actual imported database of the ~whole Russian
language (all lemmas, grammemes, forms etc.) a Nix build target which
is built in CI.

This still needs schema normalisation (it's fairly directly mapped to
the raw data), but it's already starting to be a useful data set.

This also happens to be a pretty cool demonstration of the power of
Nix. You can do `nix-build -A corp.russian.data-import.database` and
out comes a perfectly valid SQLite database with a valid external data
import!

Change-Id: I5d6d15e67d0e4a7ff590fad06252be34f5d561fd
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7866
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Diffstat (limited to 'corp')
-rw-r--r--corp/russian/data-import/default.nix16
1 files changed, 10 insertions, 6 deletions
diff --git a/corp/russian/data-import/default.nix b/corp/russian/data-import/default.nix
index b4cdc50c25c1..c2fc1bf1cb5e 100644
--- a/corp/russian/data-import/default.nix
+++ b/corp/russian/data-import/default.nix
@@ -1,4 +1,4 @@
-{ depot, pkgs, ... }:
+{ depot, lib, pkgs, ... }:
 
 let
   buildInputs = with pkgs; [
@@ -26,14 +26,18 @@ let
     # make OPENCORPORA_DATA available in the environment
     OPENCORPORA_DATA = inputData;
   };
+
 in
-depot.third_party.naersk.buildPackage {
+lib.fix (self: depot.third_party.naersk.buildPackage {
   src = depot.third_party.gitignoreSource ./.;
   inherit buildInputs;
 
-  passthru = {
-    inherit shell;
-
+  passthru = depot.nix.readTree.drvTargets {
+    inherit shell inputData;
 
+    # target that actually builds an entire database
+    database = pkgs.runCommand "tvl-russian-db.sqlite" { } ''
+      ${self}/bin/data-import ${inputData} $out
+    '';
   };
-}
+})