diff options
author | Profpatsch <mail@profpatsch.de> | 2023-01-08T22·41+0100 |
---|---|---|
committer | Profpatsch <mail@profpatsch.de> | 2023-01-08T23·10+0000 |
commit | cd40585ea4481625ed8c198ee56ce2e453a1cd9c (patch) | |
tree | d01e057271c64df72624e022b67265141b4d13e5 /users/Profpatsch/netencode | |
parent | 8cdefc5b253109d319267b68f0f45c0d3f021d17 (diff) |
feat(users/Profpatsch/netencode): Add initial Haskell parser r/5632
A simple categorical parser that does not implement Monad, and does not contain an `m` and some rudementary error message handling. In the future I’d probably want to wrap everything in an additional `m`, so that subparsers can somehow use `Selective` to throw errors from within `m` that contain the parsing context if at all possible. Hard to do without Monad, I have to say. Not even stuff like `StateT` works without the inner `m` implementing `Monad`. Change-Id: I1366eda606ddfb019637b09c82d8b0e30bd4e318 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7797 Tested-by: BuildkiteCI Reviewed-by: Profpatsch <mail@profpatsch.de>
Diffstat (limited to 'users/Profpatsch/netencode')
-rw-r--r-- | users/Profpatsch/netencode/Netencode.hs | 75 | ||||
-rw-r--r-- | users/Profpatsch/netencode/Netencode/Parse.hs | 112 | ||||
-rw-r--r-- | users/Profpatsch/netencode/default.nix | 1 | ||||
-rw-r--r-- | users/Profpatsch/netencode/netencode.cabal | 7 |
4 files changed, 158 insertions, 37 deletions
diff --git a/users/Profpatsch/netencode/Netencode.hs b/users/Profpatsch/netencode/Netencode.hs index dfc57ce8dc27..36d3907ffc0a 100644 --- a/users/Profpatsch/netencode/Netencode.hs +++ b/users/Profpatsch/netencode/Netencode.hs @@ -1,6 +1,7 @@ {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GHC2021 #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} @@ -13,57 +14,56 @@ module Netencode where import Control.Applicative (many) -import qualified Data.Attoparsec.ByteString as Atto -import qualified Data.Attoparsec.ByteString.Char8 as Atto.Char -import qualified Data.ByteString as ByteString +import Data.Attoparsec.ByteString qualified as Atto +import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char +import Data.ByteString qualified as ByteString import Data.ByteString.Builder (Builder) -import qualified Data.ByteString.Builder as Builder -import qualified Data.ByteString.Lazy as ByteString.Lazy +import Data.ByteString.Builder qualified as Builder +import Data.ByteString.Lazy qualified as ByteString.Lazy import Data.Coerce (coerce) import Data.Fix (Fix (Fix)) -import qualified Data.Fix as Fix +import Data.Fix qualified as Fix import Data.Functor.Classes (Eq1 (liftEq)) import Data.Int (Int16, Int32, Int64, Int8) import Data.List.NonEmpty (nonEmpty) import Data.Map.NonEmpty (NEMap) -import qualified Data.Map.NonEmpty as NEMap -import qualified Data.Semigroup as Semi +import Data.Map.NonEmpty qualified as NEMap +import Data.Maybe (fromMaybe) +import Data.Semigroup qualified as Semi import Data.String (IsString) import Data.Word (Word16, Word32, Word64) import GHC.Exts (fromString) -import qualified Hedgehog as Hedge -import qualified Hedgehog.Gen as Gen -import qualified Hedgehog.Range as Range +import Hedgehog qualified as Hedge +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range import MyPrelude import Text.Show.Deriving import Prelude hiding (sum) -import Data.Maybe (fromMaybe) - -- | Netencode type base functor. -- -- Recursive elements have a @rec@. data TF rec - = Unit - -- ^ Unit value - | N1 Bool - -- ^ Boolean (2^1) - | N3 Word8 - -- ^ Byte (2^3) - | N6 Word64 - -- ^ 64-bit Natural (2^6) - | I6 Int64 - -- ^ 64-bit Integer (2^6) - | Text Text - -- ^ Unicode Text - | Bytes ByteString - -- ^ Arbitrary Bytestring - | Sum (Tag Text rec) - -- ^ A constructor of a(n open) Sum - | Record (NEMap Text rec) - -- ^ Record - | List [rec] - -- ^ List + = -- | Unit value + Unit + | -- | Boolean (2^1) + N1 Bool + | -- | Byte (2^3) + N3 Word8 + | -- | 64-bit Natural (2^6) + N6 Word64 + | -- | 64-bit Integer (2^6) + I6 Int64 + | -- | Unicode Text + Text Text + | -- | Arbitrary Bytestring + Bytes ByteString + | -- | A constructor of a(n open) Sum + Sum (Tag Text rec) + | -- | Record + Record (NEMap Text rec) + | -- | List + List [rec] deriving stock (Show, Eq, Functor) instance Eq1 TF where @@ -90,7 +90,7 @@ $(Text.Show.Deriving.deriveShow1 ''Tag) $(Text.Show.Deriving.deriveShow1 ''TF) -- | The Netencode type -newtype T = T (Fix TF) +newtype T = T {unT :: Fix TF} deriving stock (Eq, Show) -- | Create a unit @@ -291,7 +291,8 @@ netencodeParserF inner = do Nothing -> fail "record is not allowed to have 0 elements" Just tags -> pure $ - tags <&> (\t -> (t & tagTag, t & tagVal)) + tags + <&> (\t -> (t & tagTag, t & tagVal)) -- later keys are preferred if they are duplicates, according to the standard & NEMap.fromList _ <- Atto.Char.char '}' Atto.<?> "record did not end with }" @@ -421,7 +422,9 @@ genNetencode = record <$> ( let k = Gen.text (Range.linear 3 10) Gen.lower v = genNetencode - in NEMap.insertMap <$> k <*> v + in NEMap.insertMap + <$> k + <*> v <*> ( (Gen.map (Range.linear 0 3)) $ (,) <$> k <*> v ) diff --git a/users/Profpatsch/netencode/Netencode/Parse.hs b/users/Profpatsch/netencode/Netencode/Parse.hs new file mode 100644 index 000000000000..de313571f713 --- /dev/null +++ b/users/Profpatsch/netencode/Netencode/Parse.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE ApplicativeDo #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingVia #-} +{-# LANGUAGE GHC2021 #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE NoFieldSelectors #-} +{-# OPTIONS_GHC -Wall #-} + +module Netencode.Parse where + +import Control.Category qualified +import Control.Selective (Selective) +import Data.Error.Tree +import Data.Fix (Fix (..)) +import Data.Functor.Compose +import Data.List qualified as List +import Data.Map.NonEmpty (NEMap) +import Data.Map.NonEmpty qualified as NEMap +import Data.Semigroupoid qualified as Semigroupiod +import Data.Semigroupoid qualified as Semigroupoid +import Data.Text qualified as Text +import MyPrelude +import Netencode qualified +import Prelude hiding (log) + +newtype Parse from to + = -- TODO: the way @Context = [Text]@ has to be forwarded to everything is kinda shitty. + -- This is essentially just a difference list, and can probably be treated as a function in the output? + Parse (([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to)) + deriving + (Functor, Applicative, Selective) + via ( Compose + ( Compose + ((->) ([Text], from)) + (Validation (NonEmpty ErrorTree)) + ) + ((,) [Text]) + ) + +runParse :: Error -> Parse from to -> from -> Either ErrorTree to +runParse errMsg parser t = + (["$"], t) + & runParse' parser + <&> snd + & first (nestedMultiError errMsg) + & validationToEither + +runParse' :: Parse from to -> ([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to) +runParse' (Parse f) from = f from + +instance Semigroupoid Parse where + o p2 p1 = Parse $ \from -> case runParse' p1 from of + Failure err -> Failure err + Success to1 -> runParse' p2 to1 + +instance Category Parse where + (.) = Semigroupoid.o + id = Parse $ \t -> Success t + +parseEither :: (([Text], from) -> Either ErrorTree ([Text], to)) -> Parse from to +parseEither f = Parse $ \from -> f from & eitherToListValidation + +tAs :: (Netencode.TF (Fix Netencode.TF) -> Either ([Text] -> ErrorTree) to) -> Parse Netencode.T to +tAs f = parseEither ((\(context, Netencode.T (Fix tf)) -> f tf & bimap ($ context) (context,))) + +key :: Text -> Parse (NEMap Text to) to +key name = parseEither $ \(context, rec) -> + rec + & NEMap.lookup name + & annotate (errorTreeContext (showContext context) [fmt|Key "{name}" does not exist|]) + <&> (addContext name context,) + +showContext :: [Text] -> Text +showContext context = context & List.reverse & Text.intercalate "." + +addContext :: a -> [a] -> [a] +addContext = (:) + +asText :: Parse Netencode.T Text +asText = tAs $ \case + Netencode.Text t -> pure t + other -> typeError "of text" other + +asBytes :: Parse Netencode.T ByteString +asBytes = tAs $ \case + Netencode.Bytes b -> pure b + other -> typeError "of bytes" other + +asRecord :: Parse Netencode.T (NEMap Text (Netencode.T)) +asRecord = tAs $ \case + Netencode.Record rec -> pure (rec <&> Netencode.T) + other -> typeError "a record" other + +typeError :: Text -> Netencode.TF ignored -> (Either ([Text] -> ErrorTree) b) +typeError should is = do + let otherS = is <&> (\_ -> ("…" :: String)) & show + Left $ \context -> errorTreeContext (showContext context) [fmt|Value is not {should}, but a {otherS}|] + +orThrowParseError :: + Parse (Either Error to) to +orThrowParseError = Parse $ \case + (context, Left err) -> + err + & singleError + & errorTreeContext (showContext context) + & singleton + & Failure + (context, Right to) -> Success (context, to) diff --git a/users/Profpatsch/netencode/default.nix b/users/Profpatsch/netencode/default.nix index 00fadf695357..cb3dfaee4529 100644 --- a/users/Profpatsch/netencode/default.nix +++ b/users/Profpatsch/netencode/default.nix @@ -18,6 +18,7 @@ let src = depot.users.Profpatsch.exactSource ./. [ ./netencode.cabal ./Netencode.hs + ./Netencode/Parse.hs ]; libraryHaskellDepends = [ diff --git a/users/Profpatsch/netencode/netencode.cabal b/users/Profpatsch/netencode/netencode.cabal index 1bd1d6052ab2..4e418d6dd887 100644 --- a/users/Profpatsch/netencode/netencode.cabal +++ b/users/Profpatsch/netencode/netencode.cabal @@ -5,7 +5,9 @@ author: Profpatsch maintainer: mail@profpatsch.de library - exposed-modules: Netencode + exposed-modules: + Netencode, + Netencode.Parse build-depends: base >=4.15 && <5, @@ -16,5 +18,8 @@ library data-fix, bytestring, attoparsec, + text, + semigroupoids, + selective default-language: Haskell2010 |