From 429c0d00c4cd07ea90c85bf1ec2f2c742d970420 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Fri, 20 Jan 2023 13:31:12 +0300 Subject: feat(corp/data-import): add import of OpenRussian 'words' table This is actually the lemmata table of this corpus, not the forms of all words (they're in a separate table). Change-Id: I89a2c2817ccce840f47406fa2a636f4ed3f49154 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7893 Reviewed-by: tazjin Tested-by: BuildkiteCI --- corp/russian/data-import/Cargo.lock | 115 ++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) (limited to 'corp/russian/data-import/Cargo.lock') diff --git a/corp/russian/data-import/Cargo.lock b/corp/russian/data-import/Cargo.lock index 125b62d43e90..cd85e058108f 100644 --- a/corp/russian/data-import/Cargo.lock +++ b/corp/russian/data-import/Cargo.lock @@ -28,6 +28,18 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "cc" version = "1.0.78" @@ -40,13 +52,37 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + [[package]] name = "data-import" version = "0.1.0" dependencies = [ + "csv", "env_logger", "log", "rusqlite", + "serde", "xml-rs", ] @@ -162,6 +198,18 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.139" @@ -211,6 +259,24 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +[[package]] +name = "proc-macro2" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + [[package]] name = "regex" version = "1.7.1" @@ -222,6 +288,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.28" @@ -256,12 +328,49 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "ryu" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" + +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "smallvec" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "termcolor" version = "1.2.0" @@ -271,6 +380,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + [[package]] name = "vcpkg" version = "0.2.15" -- cgit 1.4.1