From 485c3cc912a5713a22cd655c0e35d77d686e3ccc Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Wed, 18 Jan 2023 03:22:53 +0300 Subject: feat(corp/data-import): parse lemmas from OpenCorpora dump Change-Id: I1e4efcfc8e555f61578b563411d5e6ed9590d8e8 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7860 Reviewed-by: tazjin Tested-by: BuildkiteCI --- corp/russian/data-import/src/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'corp/russian/data-import/src/main.rs') diff --git a/corp/russian/data-import/src/main.rs b/corp/russian/data-import/src/main.rs index 336cc3d14f9f..9f2f5089a603 100644 --- a/corp/russian/data-import/src/main.rs +++ b/corp/russian/data-import/src/main.rs @@ -80,11 +80,11 @@ fn main() { let mut out = BufWriter::new(std::io::stdout().lock()); while let Some(elem) = parser.next_element() { - match elem { - oc_parser::OcElement::Grammeme(g) => { - writeln!(out, "{:?}", g).ensure("writing element failed") + if let oc_parser::OcElement::Lemma(lemma) = elem { + if lemma.lemma.word == "тяжёлый" { + writeln!(out, "{:?}", lemma).ensure("writing output failed"); + break; } - oc_parser::OcElement::Lemma(_) => continue, } } -- cgit 1.4.1