about summary refs log tree commit diff
path: root/corp/russian/data-import/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'corp/russian/data-import/src/main.rs')
-rw-r--r--corp/russian/data-import/src/main.rs36
1 files changed, 27 insertions, 9 deletions
diff --git a/corp/russian/data-import/src/main.rs b/corp/russian/data-import/src/main.rs
index 9f2f5089a6..502351cf9d 100644
--- a/corp/russian/data-import/src/main.rs
+++ b/corp/russian/data-import/src/main.rs
@@ -55,11 +55,13 @@
 //!   between the lemmas "два" and "второй".
 
 use log::{error, info};
+use rusqlite::{Connection, Result};
 use std::env;
 use std::fmt::Display;
 use std::fs::File;
-use std::io::{BufReader, BufWriter, Write};
+use std::io::BufReader;
 
+mod db_setup;
 mod oc_parser;
 
 fn main() {
@@ -77,18 +79,34 @@ fn main() {
 
     let mut parser = oc_parser::OpenCorporaParser::new(BufReader::new(input_file));
 
-    let mut out = BufWriter::new(std::io::stdout().lock());
+    let conn = Connection::open("out.db").ensure("failed to open DB connection");
+
+    db_setup::initial_schema(&conn);
+
+    // afterwards:
+    // add actual IDs to grammemes
+    // properly reference keys internally
+    // add foreign key constraint on lemma_grammemes.grammeme
+
+    let mut tx = conn
+        .unchecked_transaction()
+        .ensure("failed to start transaction");
+    let mut count = 0;
 
     while let Some(elem) = parser.next_element() {
-        if let oc_parser::OcElement::Lemma(lemma) = elem {
-            if lemma.lemma.word == "тяжёлый" {
-                writeln!(out, "{:?}", lemma).ensure("writing output failed");
-                break;
-            }
+        // commit every 1000 things
+        if count % 1000 == 0 {
+            tx.commit().ensure("transaction failed");
+            tx = conn
+                .unchecked_transaction()
+                .ensure("failed to start new transaction");
+            info!("transaction committed at watermark {}", count);
         }
-    }
 
-    out.flush().ensure("flushing the out buffer failed");
+        db_setup::insert_oc_element(&tx, elem);
+
+        count += 1;
+    }
 }
 
 /// It's like `expect`, but through `log::error`.