feat(xanthous): Add a generator for random english syllables r/2656

Add a new "speech" generator module, with the beginnings of the vague definition of the phonotactics of the language (there's one in here for English based on the wikipedia article for English phonology, but it's less than ideal as it has generated words like "sprurlkt") and the ability to generate random syllables of a language by picking an onset, nucleus, and coda from the list for that language (within a range of the number of allowed of each syllable part). This will be used down the road to automatically generate utterances from various non-english-speaking creatures (so the accuracy is less important, just that it "feels real"). Change-Id: I7b81375ec595239c05c5c800cbde1a2a900e38ac Reviewed-on: https://cl.tvl.fyi/c/depot/+/3202 Reviewed-by: grfn <grfn@gws.fyi> Tested-by: BuildkiteCI
author: Griffin Smith <grfn@gws.fyi> 2021-06-13T01·11-0400
committer: grfn <grfn@gws.fyi> 2021-06-13T01·24+0000
commit: 2cfe4069bb9b46778d737f5ca535edcb9993f356 (patch)
tree: 5b1d72f34c42fd0d941792923172a94d26468174 /users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
parent: 006e5231e526b3b1e9d06644bd1d2de9d5decb1e (diff)
1 files changed, 108 insertions, 0 deletions
diff --git a/users/grfn/xanthous/src/Xanthous/Generators/Speech.hs b/users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
new file mode 100644
index 0000000000..76830c4d91
--- /dev/null
+++ b/users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
@@ -0,0 +1,108 @@
+{-# LANGUAGE TemplateHaskell #-}
+--------------------------------------------------------------------------------
+module Xanthous.Generators.Speech
+  ( -- * Abstract phonotactics
+    Phonotactics(..)
+    -- ** Lenses
+  , onsets
+  , nuclei
+  , codas
+  , numOnsets
+  , numNuclei
+  , numCodas
+
+    -- ** Definitions for languages
+  , english
+
+    -- * Language generation
+  , syllable
+
+  ) where
+--------------------------------------------------------------------------------
+import           Xanthous.Prelude hiding (replicateM)
+import           Data.Interval (Interval)
+import qualified Data.Interval as Interval
+import           Control.Monad.Random.Class (MonadRandom)
+import           Xanthous.Random (chooseRange, choose, ChooseElement (ChooseElement))
+import           Control.Monad (replicateM)
+--------------------------------------------------------------------------------
+
+newtype Phoneme = Phoneme Text
+  deriving stock (Show, Eq, Generic)
+  deriving anyclass (NFData)
+  deriving newtype (IsString, Semigroup, Monoid)
+
+data Phonotactics = Phonotactics
+  { _onsets    :: [Phoneme]
+  , _nuclei    :: [Phoneme]
+  , _codas     :: [Phoneme]
+  , _numOnsets :: Interval Word
+  , _numNuclei :: Interval Word
+  , _numCodas  :: Interval Word
+  }
+  deriving stock (Show, Eq, Generic)
+  deriving anyclass (NFData)
+makeLenses ''Phonotactics
+
+syllable :: MonadRandom m => Phonotactics -> m Text
+syllable phonotactics = do
+  let genPart num choices = do
+        n <- fromIntegral . fromMaybe 0 <$> chooseRange (phonotactics ^. num)
+        fmap (fromMaybe mempty . mconcat)
+          . replicateM n
+          . choose . ChooseElement
+          $ phonotactics ^. choices
+
+  (Phoneme onset) <- genPart numOnsets onsets
+  (Phoneme nucleus) <- genPart numNuclei nuclei
+  (Phoneme coda) <- genPart numCodas codas
+
+  pure $ onset <> nucleus <> coda
+
+--------------------------------------------------------------------------------
+
+-- <https://en.wikipedia.org/wiki/English_phonology#Phonotactics>
+english :: Phonotactics
+english = Phonotactics
+  { _onsets = [ "pl" , "bl" , "kl" , "gl" , "pr" , "br" , "tr" , "dr" , "kr"
+              , "gr" , "tw" , "dw" , "gw" , "kw" , "pw"
+
+              , "fl" , "sl" , {- "thl", -} "shl" {- , "vl" -}
+              , "p", "b", "t", "d", "k", "ɡ", "m", "n", "f", "v", "th", "s"
+              , "z", "h", "l", "w"
+
+              , "sp", "st", "sk"
+
+              , "sm", "sn"
+
+              , "sf", "sθ"
+
+              , "spl", "skl", "spr", "str", "skr", "skw", "sm", "sp", "st", "sk"
+              ]
+  , _nuclei = [ "a", "e", "i", "o", "u", "ur", "ar", "or", "ear", "are", "ure"
+              , "oa", "ee", "oo", "ei", "ie", "oi", "ou"
+              ]
+  , _codas = [ "m", "n", "ng", "p", "t", "tsh", "k", "f", "sh", "s", "th", "x"
+             , "v", "z", "zh", "l", "r", "w"
+
+             , "lk", "lb", "lt", "ld", "ltsh", "ldsh", "lk"
+             , "rp", "rb", "rt", "rd", "rtsh", "rdsh", "rk", "rɡ"
+             , "lf", "lv", "lth", "ls", "lz", "lsh", "lth"
+             , "rf", "rv", "rth", "rs", "rz", "rth"
+             , "lm", "ln"
+             , "rm", "rn", "rl"
+             , "mp", "nt", "nd", "nth", "nsh", "ŋk"
+             , "mf", "ms", "mth", "nf", "nth", "ns", "nz", "ŋθ"
+             , "ft", "sp", "st", "sk"
+             , "fth"
+             , "pt", "kt"
+             , "pth", "ps", "th", "ts", "dth", "dz", "ks"
+             , "lpt", "lps", "lfth", "lts", "lst", "lkt", "lks"
+             , "rmth", "rpt", "rps", "rts", "rst", "rkt"
+             , "mpt", "mps", "ndth", "ŋkt", "ŋks", "ŋkth"
+             , "ksth", "kst"
+             ]
+  , _numOnsets = Interval.singleton 1
+  , _numNuclei = Interval.singleton 1
+  , _numCodas  = Interval.singleton 1
+  }
author	Griffin Smith <grfn@gws.fyi>	2021-06-13T01·11-0400
committer	grfn <grfn@gws.fyi>	2021-06-13T01·24+0000
commit	2cfe4069bb9b46778d737f5ca535edcb9993f356 (patch)
tree	5b1d72f34c42fd0d941792923172a94d26468174 /users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
parent	006e5231e526b3b1e9d06644bd1d2de9d5decb1e (diff)