about summary refs log tree commit diff
path: root/corp/russian/data-import/src/or_parser.rs
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2023-01-21T18·17+0300
committertazjin <tazjin@tvl.su>2023-01-22T16·13+0000
commited8dd4acd71161893a06dd25567852b1855ac1ab (patch)
tree5dd998379e20c6601ef734b2fd15ef99a65ef3c6 /corp/russian/data-import/src/or_parser.rs
parent2b308c64b94a14592f928a7d2511fc74c8846eb3 (diff)
feat(corp/data-import): add import of OR 'translations' table r/5732
The original dataset contains translations into different languages,
but only the English ones are imported here.

Note that translations are for lemmata only.

Change-Id: Ifb9c32c25fda44c38ad899efca9d205c520c0fa3
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7895
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Diffstat (limited to '')
-rw-r--r--corp/russian/data-import/src/or_parser.rs17
1 files changed, 17 insertions, 0 deletions
diff --git a/corp/russian/data-import/src/or_parser.rs b/corp/russian/data-import/src/or_parser.rs
index 28e4f14d31..eace850c24 100644
--- a/corp/russian/data-import/src/or_parser.rs
+++ b/corp/russian/data-import/src/or_parser.rs
@@ -44,6 +44,19 @@ pub struct WordForm {
     pub form_bare: String,
 }
 
+/// A translation from the `translations` table.
+#[derive(Debug, Deserialize)]
+pub struct Translation {
+    pub id: usize,
+    pub lang: String,
+    pub word_id: usize,
+    pub position: String,
+    pub tl: String, // unknown
+    pub example_ru: String,
+    pub example_tl: String,
+    pub info: String,
+}
+
 pub struct OpenRussianParser {
     or_directory: PathBuf,
 }
@@ -65,6 +78,10 @@ impl OpenRussianParser {
         self.parser_for("words_forms.csv")
     }
 
+    pub fn translations(&self) -> DynIter<Translation> {
+        self.parser_for("translations.csv")
+    }
+
     fn parser_for<T: serde::de::DeserializeOwned + 'static>(
         &self,
         file_name: &str,