about summary refs log tree commit diff
path: root/users/glittershark/owothia/src
diff options
context:
space:
mode:
authorGriffin Smith <grfn@gws.fyi>2020-07-13T19·09-0400
committerglittershark <grfn@gws.fyi>2020-07-13T19·13+0000
commit6c98805500ad7b023ca7fc8dc4eefcc0842db028 (patch)
treeafec96a48726cf6e449ae53247b403f064d66528 /users/glittershark/owothia/src
parentf808afc89c1554bcac00c0e9ef8a753a0d662705 (diff)
feat(owothia): Remove stopwords r/1275
Remove a (currently rather brief) list of stopwords from the verbs
considered for owo'ing. The specific case I wanted to remove here is the
verb "'s" from a contraction (there have been a fair number of "'s me
owo") but I've added a few more just for good measure.

Change-Id: I53994663db1f8767ec5dc3ddc58dd35fb6cd6d70
Reviewed-on: https://cl.tvl.fyi/c/depot/+/1134
Reviewed-by: glittershark <grfn@gws.fyi>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/glittershark/owothia/src')
-rw-r--r--users/glittershark/owothia/src/Main.hs15
1 files changed, 14 insertions, 1 deletions
diff --git a/users/glittershark/owothia/src/Main.hs b/users/glittershark/owothia/src/Main.hs
index 9c6ec529e15f..b8cbfd919a46 100644
--- a/users/glittershark/owothia/src/Main.hs
+++ b/users/glittershark/owothia/src/Main.hs
@@ -21,8 +21,21 @@ data Config = Config
   deriving anyclass (FromEnv)
 makeLenses ''Config
 
+stopWord :: Text -> Bool
+stopWord "'s"   = True
+stopWord "is"   = True
+stopWord "are"  = True
+stopWord "am"   = True
+stopWord "were" = True
+stopWord "was"  = True
+stopWord "be"   = True
+stopWord _      = False
+
 verbs :: POSTagger Tag -> Text -> [Text]
-verbs tagger s = mapMaybe pickVerb $ tag tagger s >>= \(TaggedSent ps) -> ps
+verbs tagger s
+  = filter (not . stopWord)
+  . mapMaybe pickVerb
+  $ tag tagger s >>= \(TaggedSent ps) -> ps
   where
     pickVerb (POS Conll.VB (Token verb)) = Just verb
     pickVerb (POS Conll.VBD (Token verb)) = Just verb