about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--corp/russian/data-import/src/mappings.rs13
1 files changed, 13 insertions, 0 deletions
diff --git a/corp/russian/data-import/src/mappings.rs b/corp/russian/data-import/src/mappings.rs
index 8a581ff86b..985088a566 100644
--- a/corp/russian/data-import/src/mappings.rs
+++ b/corp/russian/data-import/src/mappings.rs
@@ -1,5 +1,18 @@
 //! Manual mapping of some data structures in OC/OR corpora.
 
+/// Maps the *names* of OpenRussian word types (the `word_type` field
+/// in the `or_words` table) to the *set* of OpenCorpora grammemes
+/// commonly attached to lemmata of this type in OC.
+///
+/// Some word types just don't map over, and are omitted. Many words
+/// also have an empty word type.
+pub const WORD_TYPES_GRAMMEME_MAP: &'static [(&'static str, &'static [&'static str])] = &[
+    ("adjective", &["ADJF"]),
+    ("adverb", &["ADVB"]),
+    ("noun", &["NOUN"]),
+    ("verb", &["INFN"]), // or "VERB" ...
+];
+
 /// Maps the *names* of OpenRussian grammemes (the `form_type` fields
 /// in the `or_word_forms` table) to the *set* of OpenCorpora
 /// grammemes attached to them corresponding lemma in the `oc_lemmas`