about summary refs log tree commit diff
path: root/users/Profpatsch/netencode
diff options
context:
space:
mode:
authorProfpatsch <mail@profpatsch.de>2023-01-08T22·41+0100
committerProfpatsch <mail@profpatsch.de>2023-01-08T23·10+0000
commitcd40585ea4481625ed8c198ee56ce2e453a1cd9c (patch)
treed01e057271c64df72624e022b67265141b4d13e5 /users/Profpatsch/netencode
parent8cdefc5b253109d319267b68f0f45c0d3f021d17 (diff)
feat(users/Profpatsch/netencode): Add initial Haskell parser r/5632
A simple categorical parser that does not implement Monad, and does
not contain an `m` and some rudementary error message handling.

In the future I’d probably want to wrap everything in an additional
`m`, so that subparsers can somehow use `Selective` to throw errors
from within `m` that contain the parsing context if at all possible.
Hard to do without Monad, I have to say. Not even stuff like `StateT`
works without the inner `m` implementing `Monad`.

Change-Id: I1366eda606ddfb019637b09c82d8b0e30bd4e318
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7797
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
Diffstat (limited to 'users/Profpatsch/netencode')
-rw-r--r--users/Profpatsch/netencode/Netencode.hs75
-rw-r--r--users/Profpatsch/netencode/Netencode/Parse.hs112
-rw-r--r--users/Profpatsch/netencode/default.nix1
-rw-r--r--users/Profpatsch/netencode/netencode.cabal7
4 files changed, 158 insertions, 37 deletions
diff --git a/users/Profpatsch/netencode/Netencode.hs b/users/Profpatsch/netencode/Netencode.hs
index dfc57ce8dc..36d3907ffc 100644
--- a/users/Profpatsch/netencode/Netencode.hs
+++ b/users/Profpatsch/netencode/Netencode.hs
@@ -1,6 +1,7 @@
 {-# LANGUAGE AllowAmbiguousTypes #-}
 {-# LANGUAGE DeriveFunctor #-}
 {-# LANGUAGE DerivingStrategies #-}
+{-# LANGUAGE GHC2021 #-}
 {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 {-# LANGUAGE LambdaCase #-}
 {-# LANGUAGE OverloadedStrings #-}
@@ -13,57 +14,56 @@
 module Netencode where
 
 import Control.Applicative (many)
-import qualified Data.Attoparsec.ByteString as Atto
-import qualified Data.Attoparsec.ByteString.Char8 as Atto.Char
-import qualified Data.ByteString as ByteString
+import Data.Attoparsec.ByteString qualified as Atto
+import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char
+import Data.ByteString qualified as ByteString
 import Data.ByteString.Builder (Builder)
-import qualified Data.ByteString.Builder as Builder
-import qualified Data.ByteString.Lazy as ByteString.Lazy
+import Data.ByteString.Builder qualified as Builder
+import Data.ByteString.Lazy qualified as ByteString.Lazy
 import Data.Coerce (coerce)
 import Data.Fix (Fix (Fix))
-import qualified Data.Fix as Fix
+import Data.Fix qualified as Fix
 import Data.Functor.Classes (Eq1 (liftEq))
 import Data.Int (Int16, Int32, Int64, Int8)
 import Data.List.NonEmpty (nonEmpty)
 import Data.Map.NonEmpty (NEMap)
-import qualified Data.Map.NonEmpty as NEMap
-import qualified Data.Semigroup as Semi
+import Data.Map.NonEmpty qualified as NEMap
+import Data.Maybe (fromMaybe)
+import Data.Semigroup qualified as Semi
 import Data.String (IsString)
 import Data.Word (Word16, Word32, Word64)
 import GHC.Exts (fromString)
-import qualified Hedgehog as Hedge
-import qualified Hedgehog.Gen as Gen
-import qualified Hedgehog.Range as Range
+import Hedgehog qualified as Hedge
+import Hedgehog.Gen qualified as Gen
+import Hedgehog.Range qualified as Range
 import MyPrelude
 import Text.Show.Deriving
 import Prelude hiding (sum)
-import Data.Maybe (fromMaybe)
-
 
 -- | Netencode type base functor.
 --
 -- Recursive elements have a @rec@.
 data TF rec
-  = Unit
-  -- ^ Unit value
-  | N1 Bool
-  -- ^ Boolean (2^1)
-  | N3 Word8
-  -- ^ Byte (2^3)
-  | N6 Word64
-  -- ^ 64-bit Natural (2^6)
-  | I6 Int64
-  -- ^ 64-bit Integer (2^6)
-  | Text Text
-  -- ^ Unicode Text
-  | Bytes ByteString
-  -- ^ Arbitrary Bytestring
-  | Sum (Tag Text rec)
-  -- ^ A constructor of a(n open) Sum
-  | Record (NEMap Text rec)
-  -- ^ Record
-  | List [rec]
-  -- ^ List
+  = -- | Unit value
+    Unit
+  | -- | Boolean (2^1)
+    N1 Bool
+  | -- | Byte (2^3)
+    N3 Word8
+  | -- | 64-bit Natural (2^6)
+    N6 Word64
+  | -- | 64-bit Integer (2^6)
+    I6 Int64
+  | -- | Unicode Text
+    Text Text
+  | -- | Arbitrary Bytestring
+    Bytes ByteString
+  | -- | A constructor of a(n open) Sum
+    Sum (Tag Text rec)
+  | -- | Record
+    Record (NEMap Text rec)
+  | -- | List
+    List [rec]
   deriving stock (Show, Eq, Functor)
 
 instance Eq1 TF where
@@ -90,7 +90,7 @@ $(Text.Show.Deriving.deriveShow1 ''Tag)
 $(Text.Show.Deriving.deriveShow1 ''TF)
 
 -- | The Netencode type
-newtype T = T (Fix TF)
+newtype T = T {unT :: Fix TF}
   deriving stock (Eq, Show)
 
 -- | Create a unit
@@ -291,7 +291,8 @@ netencodeParserF inner = do
           Nothing -> fail "record is not allowed to have 0 elements"
           Just tags ->
             pure $
-              tags <&> (\t -> (t & tagTag, t & tagVal))
+              tags
+                <&> (\t -> (t & tagTag, t & tagVal))
                 -- later keys are preferred if they are duplicates, according to the standard
                 & NEMap.fromList
       _ <- Atto.Char.char '}' Atto.<?> "record did not end with }"
@@ -421,7 +422,9 @@ genNetencode =
       record
         <$> ( let k = Gen.text (Range.linear 3 10) Gen.lower
                   v = genNetencode
-               in NEMap.insertMap <$> k <*> v
+               in NEMap.insertMap
+                    <$> k
+                    <*> v
                     <*> ( (Gen.map (Range.linear 0 3)) $
                             (,) <$> k <*> v
                         )
diff --git a/users/Profpatsch/netencode/Netencode/Parse.hs b/users/Profpatsch/netencode/Netencode/Parse.hs
new file mode 100644
index 0000000000..de313571f7
--- /dev/null
+++ b/users/Profpatsch/netencode/Netencode/Parse.hs
@@ -0,0 +1,112 @@
+{-# LANGUAGE ApplicativeDo #-}
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DerivingVia #-}
+{-# LANGUAGE GHC2021 #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE OverloadedRecordDot #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE QuasiQuotes #-}
+{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE NoFieldSelectors #-}
+{-# OPTIONS_GHC -Wall #-}
+
+module Netencode.Parse where
+
+import Control.Category qualified
+import Control.Selective (Selective)
+import Data.Error.Tree
+import Data.Fix (Fix (..))
+import Data.Functor.Compose
+import Data.List qualified as List
+import Data.Map.NonEmpty (NEMap)
+import Data.Map.NonEmpty qualified as NEMap
+import Data.Semigroupoid qualified as Semigroupiod
+import Data.Semigroupoid qualified as Semigroupoid
+import Data.Text qualified as Text
+import MyPrelude
+import Netencode qualified
+import Prelude hiding (log)
+
+newtype Parse from to
+  = -- TODO: the way @Context = [Text]@ has to be forwarded to everything is kinda shitty.
+    -- This is essentially just a difference list, and can probably be treated as a function in the output?
+    Parse (([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to))
+  deriving
+    (Functor, Applicative, Selective)
+    via ( Compose
+            ( Compose
+                ((->) ([Text], from))
+                (Validation (NonEmpty ErrorTree))
+            )
+            ((,) [Text])
+        )
+
+runParse :: Error -> Parse from to -> from -> Either ErrorTree to
+runParse errMsg parser t =
+  (["$"], t)
+    & runParse' parser
+    <&> snd
+    & first (nestedMultiError errMsg)
+    & validationToEither
+
+runParse' :: Parse from to -> ([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to)
+runParse' (Parse f) from = f from
+
+instance Semigroupoid Parse where
+  o p2 p1 = Parse $ \from -> case runParse' p1 from of
+    Failure err -> Failure err
+    Success to1 -> runParse' p2 to1
+
+instance Category Parse where
+  (.) = Semigroupoid.o
+  id = Parse $ \t -> Success t
+
+parseEither :: (([Text], from) -> Either ErrorTree ([Text], to)) -> Parse from to
+parseEither f = Parse $ \from -> f from & eitherToListValidation
+
+tAs :: (Netencode.TF (Fix Netencode.TF) -> Either ([Text] -> ErrorTree) to) -> Parse Netencode.T to
+tAs f = parseEither ((\(context, Netencode.T (Fix tf)) -> f tf & bimap ($ context) (context,)))
+
+key :: Text -> Parse (NEMap Text to) to
+key name = parseEither $ \(context, rec) ->
+  rec
+    & NEMap.lookup name
+    & annotate (errorTreeContext (showContext context) [fmt|Key "{name}" does not exist|])
+    <&> (addContext name context,)
+
+showContext :: [Text] -> Text
+showContext context = context & List.reverse & Text.intercalate "."
+
+addContext :: a -> [a] -> [a]
+addContext = (:)
+
+asText :: Parse Netencode.T Text
+asText = tAs $ \case
+  Netencode.Text t -> pure t
+  other -> typeError "of text" other
+
+asBytes :: Parse Netencode.T ByteString
+asBytes = tAs $ \case
+  Netencode.Bytes b -> pure b
+  other -> typeError "of bytes" other
+
+asRecord :: Parse Netencode.T (NEMap Text (Netencode.T))
+asRecord = tAs $ \case
+  Netencode.Record rec -> pure (rec <&> Netencode.T)
+  other -> typeError "a record" other
+
+typeError :: Text -> Netencode.TF ignored -> (Either ([Text] -> ErrorTree) b)
+typeError should is = do
+  let otherS = is <&> (\_ -> ("…" :: String)) & show
+  Left $ \context -> errorTreeContext (showContext context) [fmt|Value is not {should}, but a {otherS}|]
+
+orThrowParseError ::
+  Parse (Either Error to) to
+orThrowParseError = Parse $ \case
+  (context, Left err) ->
+    err
+      & singleError
+      & errorTreeContext (showContext context)
+      & singleton
+      & Failure
+  (context, Right to) -> Success (context, to)
diff --git a/users/Profpatsch/netencode/default.nix b/users/Profpatsch/netencode/default.nix
index 00fadf6953..cb3dfaee45 100644
--- a/users/Profpatsch/netencode/default.nix
+++ b/users/Profpatsch/netencode/default.nix
@@ -18,6 +18,7 @@ let
     src = depot.users.Profpatsch.exactSource ./. [
       ./netencode.cabal
       ./Netencode.hs
+      ./Netencode/Parse.hs
     ];
 
     libraryHaskellDepends = [
diff --git a/users/Profpatsch/netencode/netencode.cabal b/users/Profpatsch/netencode/netencode.cabal
index 1bd1d6052a..4e418d6dd8 100644
--- a/users/Profpatsch/netencode/netencode.cabal
+++ b/users/Profpatsch/netencode/netencode.cabal
@@ -5,7 +5,9 @@ author:             Profpatsch
 maintainer:         mail@profpatsch.de
 
 library
-    exposed-modules:          Netencode
+    exposed-modules:
+        Netencode,
+        Netencode.Parse
 
     build-depends:
         base >=4.15 && <5,
@@ -16,5 +18,8 @@ library
         data-fix,
         bytestring,
         attoparsec,
+        text,
+        semigroupoids,
+        selective
 
     default-language: Haskell2010