From 6c98805500ad7b023ca7fc8dc4eefcc0842db028 Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Mon, 13 Jul 2020 15:09:41 -0400 Subject: feat(owothia): Remove stopwords Remove a (currently rather brief) list of stopwords from the verbs considered for owo'ing. The specific case I wanted to remove here is the verb "'s" from a contraction (there have been a fair number of "'s me owo") but I've added a few more just for good measure. Change-Id: I53994663db1f8767ec5dc3ddc58dd35fb6cd6d70 Reviewed-on: https://cl.tvl.fyi/c/depot/+/1134 Reviewed-by: glittershark Tested-by: BuildkiteCI --- users/glittershark/owothia/src/Main.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/users/glittershark/owothia/src/Main.hs b/users/glittershark/owothia/src/Main.hs index 9c6ec529e15f..b8cbfd919a46 100644 --- a/users/glittershark/owothia/src/Main.hs +++ b/users/glittershark/owothia/src/Main.hs @@ -21,8 +21,21 @@ data Config = Config deriving anyclass (FromEnv) makeLenses ''Config +stopWord :: Text -> Bool +stopWord "'s" = True +stopWord "is" = True +stopWord "are" = True +stopWord "am" = True +stopWord "were" = True +stopWord "was" = True +stopWord "be" = True +stopWord _ = False + verbs :: POSTagger Tag -> Text -> [Text] -verbs tagger s = mapMaybe pickVerb $ tag tagger s >>= \(TaggedSent ps) -> ps +verbs tagger s + = filter (not . stopWord) + . mapMaybe pickVerb + $ tag tagger s >>= \(TaggedSent ps) -> ps where pickVerb (POS Conll.VB (Token verb)) = Just verb pickVerb (POS Conll.VBD (Token verb)) = Just verb -- cgit 1.4.1