about summary refs log tree commit diff
path: root/corp/russian/data-import/src/db_setup.rs
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2023-01-21T15·00+0300
committertazjin <tazjin@tvl.su>2023-01-21T17·49+0000
commit8eeb5d3bccf831681b2cad5c3b322e6a08f596df (patch)
tree4e02105c8ab39a4466b29e98c16aa9e2c69ac9b4 /corp/russian/data-import/src/db_setup.rs
parent429c0d00c4cd07ea90c85bf1ec2f2c742d970420 (diff)
feat(corp/data-import): add import of OR 'words_forms' table r/5730
This is the full morphological set table for all the words from the
lemmata table, which they don't call it that.

Change-Id: I6f5be673c5f59f11e36bd8c8c935844a7d4fd170
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7894
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'corp/russian/data-import/src/db_setup.rs')
-rw-r--r--corp/russian/data-import/src/db_setup.rs39
1 files changed, 38 insertions, 1 deletions
diff --git a/corp/russian/data-import/src/db_setup.rs b/corp/russian/data-import/src/db_setup.rs
index 5fe64717ad..4644edf094 100644
--- a/corp/russian/data-import/src/db_setup.rs
+++ b/corp/russian/data-import/src/db_setup.rs
@@ -6,7 +6,7 @@
 //! introduce things like foreign key constraints between tables that
 //! represent relations.
 
-use super::{bail, Ensure};
+use super::Ensure;
 use crate::oc_parser::*;
 use crate::or_parser;
 use log::{debug, info};
@@ -181,6 +181,16 @@ CREATE TABLE or_words (
     word_type TEXT,
     level TEXT
 ) STRICT;
+
+CREATE TABLE or_words_forms (
+    id INTEGER PRIMARY KEY,
+    word_id INTEGER NOT NULL,
+    form_type TEXT,
+    position TEXT,
+    form TEXT,
+    form_bare TEXT,
+    FOREIGN KEY(word_id) REFERENCES words(id)
+) STRICT;
 "#,
     )
     .ensure("setting up OpenRussian table schema failed");
@@ -215,3 +225,30 @@ VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)
 
     info!("inserted {} OpenRussian words", count);
 }
+
+pub fn insert_or_word_forms<I: Iterator<Item = or_parser::WordForm>>(conn: &Connection, forms: I) {
+    let mut stmt = conn
+        .prepare_cached(
+            "
+INSERT INTO or_words_forms (id, word_id, form_type, position, form, form_bare)
+VALUES (?1, ?2, ?3, ?4, ?5, ?6)
+",
+        )
+        .ensure("failed to prepare OR word forms statement");
+    let mut count = 0;
+
+    for form in forms {
+        stmt.execute((
+            form.id,
+            form.word_id,
+            form.form_type,
+            form.position,
+            form.form,
+            form.form_bare,
+        ))
+        .ensure("failed to insert OR word form");
+        count += 1;
+    }
+
+    info!("inserted {} OpenRussian word forms", count);
+}