about summary refs log tree commit diff
path: root/users/grfn/xanthous/src/Xanthous
diff options
context:
space:
mode:
authorGriffin Smith <grfn@gws.fyi>2021-06-13T14·20-0400
committergrfn <grfn@gws.fyi>2021-06-14T13·04+0000
commitefbb135cfc0511ffcd350def9e9e5efcfc7e887c (patch)
treec6bb00ca465c9d65e056b603c74f0cacaf0eb39d /users/grfn/xanthous/src/Xanthous
parent2cfe4069bb9b46778d737f5ca535edcb9993f356 (diff)
feat(xanthous): Extend speech generation to arbitrary words r/2657
Wrap up the Phonotactics type into a Language type including both the
phonotactics and a distribution of the number of syllables per word, so
we can generate arbitrary words in addition to just arbitrary syllables

Change-Id: I8a37ce9c0eec019c9b84d21b0f2b3b9f5fd319eb
Reviewed-on: https://cl.tvl.fyi/c/depot/+/3203
Reviewed-by: grfn <grfn@gws.fyi>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/grfn/xanthous/src/Xanthous')
-rw-r--r--users/grfn/xanthous/src/Xanthous/Generators/Speech.hs121
-rw-r--r--users/grfn/xanthous/src/Xanthous/Random.hs5
2 files changed, 102 insertions, 24 deletions
diff --git a/users/grfn/xanthous/src/Xanthous/Generators/Speech.hs b/users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
index 76830c4d9116..8abc00b6a2fc 100644
--- a/users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
+++ b/users/grfn/xanthous/src/Xanthous/Generators/Speech.hs
@@ -1,9 +1,16 @@
 {-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE OverloadedLists #-}
 --------------------------------------------------------------------------------
 module Xanthous.Generators.Speech
-  ( -- * Abstract phonotactics
-    Phonotactics(..)
+  ( -- * Language definition
+    Language(..)
     -- ** Lenses
+  , phonotactics
+  , syllablesPerWord
+
+    -- ** Phonotactics
+  , Phonotactics(..)
+    -- *** Lenses
   , onsets
   , nuclei
   , codas
@@ -11,39 +18,55 @@ module Xanthous.Generators.Speech
   , numNuclei
   , numCodas
 
-    -- ** Definitions for languages
-  , english
-
     -- * Language generation
   , syllable
+  , word
+
+    -- * Languages
+  , english
+  , gormlak
 
   ) where
 --------------------------------------------------------------------------------
 import           Xanthous.Prelude hiding (replicateM)
-import           Data.Interval (Interval)
+import           Data.Interval (Interval, (<=..<=))
 import qualified Data.Interval as Interval
 import           Control.Monad.Random.Class (MonadRandom)
-import           Xanthous.Random (chooseRange, choose, ChooseElement (ChooseElement))
+import           Xanthous.Random (chooseRange, choose, ChooseElement (..), Weighted (Weighted))
 import           Control.Monad (replicateM)
+import           Test.QuickCheck (Arbitrary, CoArbitrary, Function)
+import           Test.QuickCheck.Instances.Text ()
+import           Data.List.NonEmpty (NonEmpty)
 --------------------------------------------------------------------------------
 
 newtype Phoneme = Phoneme Text
   deriving stock (Show, Eq, Generic)
-  deriving anyclass (NFData)
-  deriving newtype (IsString, Semigroup, Monoid)
-
+  deriving anyclass (NFData, CoArbitrary, Function)
+  deriving newtype (IsString, Semigroup, Monoid, Arbitrary)
+
+-- | The phonotactics of a language
+--
+-- The phonotactics of a language represent the restriction on the phonemes in
+-- the syllables of a language.
+--
+-- Syllables in a language consist of an onset, a nucleus, and a coda (the
+-- nucleus and the coda together representing the "rhyme" of the syllable).
 data Phonotactics = Phonotactics
-  { _onsets    :: [Phoneme]
-  , _nuclei    :: [Phoneme]
-  , _codas     :: [Phoneme]
-  , _numOnsets :: Interval Word
-  , _numNuclei :: Interval Word
-  , _numCodas  :: Interval Word
+  { _onsets    :: [Phoneme] -- ^ The permissible onsets, or consonant clusters
+                           --   at the beginning of a syllable
+  , _nuclei    :: [Phoneme] -- ^ The permissible nuclei, or vowel clusters in
+                           --   the middle of a syllable
+  , _codas     :: [Phoneme] -- ^ The permissible codas, or consonant clusters at
+                           --   the end of a syllable
+  , _numOnsets :: Interval Word -- ^ The range of number of allowable onsets
+  , _numNuclei :: Interval Word -- ^ The range of number of allowable nuclei
+  , _numCodas  :: Interval Word -- ^ The range of number of allowable codas
   }
   deriving stock (Show, Eq, Generic)
   deriving anyclass (NFData)
 makeLenses ''Phonotactics
 
+-- | Randomly generate a syllable with the given 'Phonotactics'
 syllable :: MonadRandom m => Phonotactics -> m Text
 syllable phonotactics = do
   let genPart num choices = do
@@ -59,11 +82,28 @@ syllable phonotactics = do
 
   pure $ onset <> nucleus <> coda
 
+-- | A definition for a language
+--
+-- Currently this provides enough information to generate multi-syllabic words,
+-- but in the future will likely also include grammar-related things.
+data Language = Language
+  { _phonotactics :: Phonotactics
+  , _syllablesPerWord :: Weighted Int NonEmpty Int
+  }
+  deriving stock (Show, Eq, Generic)
+  deriving anyclass (NFData)
+makeLenses ''Language
+
+word :: MonadRandom m => Language -> m Text
+word lang = do
+  numSyllables <- choose $ lang ^. syllablesPerWord
+  mconcat <$> replicateM numSyllables (syllable $ lang ^. phonotactics)
+
 --------------------------------------------------------------------------------
 
 -- <https://en.wikipedia.org/wiki/English_phonology#Phonotactics>
-english :: Phonotactics
-english = Phonotactics
+englishPhonotactics :: Phonotactics
+englishPhonotactics = Phonotactics
   { _onsets = [ "pl" , "bl" , "kl" , "gl" , "pr" , "br" , "tr" , "dr" , "kr"
               , "gr" , "tw" , "dw" , "gw" , "kw" , "pw"
 
@@ -75,7 +115,7 @@ english = Phonotactics
 
               , "sm", "sn"
 
-              , "sf", "sθ"
+              , "sf", "sth"
 
               , "spl", "skl", "spr", "str", "skr", "skw", "sm", "sp", "st", "sk"
               ]
@@ -91,18 +131,51 @@ english = Phonotactics
              , "rf", "rv", "rth", "rs", "rz", "rth"
              , "lm", "ln"
              , "rm", "rn", "rl"
-             , "mp", "nt", "nd", "nth", "nsh", "ŋk"
-             , "mf", "ms", "mth", "nf", "nth", "ns", "nz", "ŋθ"
+             , "mp", "nt", "nd", "nth", "nsh", "nk"
+             , "mf", "ms", "mth", "nf", "nth", "ns", "nz", "nth"
              , "ft", "sp", "st", "sk"
              , "fth"
              , "pt", "kt"
              , "pth", "ps", "th", "ts", "dth", "dz", "ks"
              , "lpt", "lps", "lfth", "lts", "lst", "lkt", "lks"
              , "rmth", "rpt", "rps", "rts", "rst", "rkt"
-             , "mpt", "mps", "ndth", "ŋkt", "ŋks", "ŋkth"
+             , "mpt", "mps", "ndth", "nkt", "nks", "nkth"
              , "ksth", "kst"
              ]
-  , _numOnsets = Interval.singleton 1
+  , _numOnsets = 0 <=..<= 1
   , _numNuclei = Interval.singleton 1
-  , _numCodas  = Interval.singleton 1
+  , _numCodas  = 0 <=..<= 1
+  }
+
+english :: Language
+english = Language
+  { _phonotactics = englishPhonotactics
+  , _syllablesPerWord = Weighted [(20, 1),
+                                  (7,  2),
+                                  (2,  3),
+                                  (1,  4)]
+  }
+
+gormlakPhonotactics :: Phonotactics
+gormlakPhonotactics = Phonotactics
+ { _onsets = [ "h", "l", "g", "b", "m", "n", "ng"
+             , "gl", "bl", "fl"
+             ]
+ , _numOnsets = Interval.singleton 1
+ , _nuclei = [ "a", "o", "aa", "u" ]
+ , _numNuclei = Interval.singleton 1
+ , _codas = [ "r", "l", "g", "m", "n"
+            , "rl", "gl", "ml", "rm"
+            , "n", "k"
+            ]
+ , _numCodas = Interval.singleton 1
+ }
+
+gormlak :: Language
+gormlak = Language
+  { _phonotactics = gormlakPhonotactics
+  , _syllablesPerWord = Weighted [ (5, 2)
+                                 , (5, 1)
+                                 , (1, 3)
+                                 ]
   }
diff --git a/users/grfn/xanthous/src/Xanthous/Random.hs b/users/grfn/xanthous/src/Xanthous/Random.hs
index d97dcb9e1175..72bdb63d2c61 100644
--- a/users/grfn/xanthous/src/Xanthous/Random.hs
+++ b/users/grfn/xanthous/src/Xanthous/Random.hs
@@ -1,5 +1,6 @@
 --------------------------------------------------------------------------------
 {-# LANGUAGE UndecidableInstances #-}
+{-# LANGUAGE StandaloneDeriving #-}
 {-# OPTIONS_GHC -fno-warn-orphans #-}
 --------------------------------------------------------------------------------
 module Xanthous.Random
@@ -74,6 +75,10 @@ instance Choose (a, a) where
 newtype Weighted w t a = Weighted (t (w, a))
   deriving (Functor, Foldable) via (t `Compose` (,) w)
 
+deriving newtype instance Eq (t (w, a)) => Eq (Weighted w t a)
+deriving newtype instance Show (t (w, a)) => Show (Weighted w t a)
+deriving newtype instance NFData (t (w, a)) => NFData (Weighted w t a)
+
 instance Traversable t => Traversable (Weighted w t) where
   traverse f (Weighted twa) = Weighted <$> (traverse . traverse) f twa