From ed8dd4acd71161893a06dd25567852b1855ac1ab Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sat, 21 Jan 2023 21:17:58 +0300 Subject: feat(corp/data-import): add import of OR 'translations' table The original dataset contains translations into different languages, but only the English ones are imported here. Note that translations are for lemmata only. Change-Id: Ifb9c32c25fda44c38ad899efca9d205c520c0fa3 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7895 Reviewed-by: tazjin Tested-by: BuildkiteCI --- corp/russian/data-import/src/or_parser.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'corp/russian/data-import/src/or_parser.rs') diff --git a/corp/russian/data-import/src/or_parser.rs b/corp/russian/data-import/src/or_parser.rs index 28e4f14d3169..eace850c24eb 100644 --- a/corp/russian/data-import/src/or_parser.rs +++ b/corp/russian/data-import/src/or_parser.rs @@ -44,6 +44,19 @@ pub struct WordForm { pub form_bare: String, } +/// A translation from the `translations` table. +#[derive(Debug, Deserialize)] +pub struct Translation { + pub id: usize, + pub lang: String, + pub word_id: usize, + pub position: String, + pub tl: String, // unknown + pub example_ru: String, + pub example_tl: String, + pub info: String, +} + pub struct OpenRussianParser { or_directory: PathBuf, } @@ -65,6 +78,10 @@ impl OpenRussianParser { self.parser_for("words_forms.csv") } + pub fn translations(&self) -> DynIter { + self.parser_for("translations.csv") + } + fn parser_for( &self, file_name: &str, -- cgit 1.4.1