From cd40585ea4481625ed8c198ee56ce2e453a1cd9c Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Sun, 8 Jan 2023 23:41:17 +0100 Subject: feat(users/Profpatsch/netencode): Add initial Haskell parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A simple categorical parser that does not implement Monad, and does not contain an `m` and some rudementary error message handling. In the future I’d probably want to wrap everything in an additional `m`, so that subparsers can somehow use `Selective` to throw errors from within `m` that contain the parsing context if at all possible. Hard to do without Monad, I have to say. Not even stuff like `StateT` works without the inner `m` implementing `Monad`. Change-Id: I1366eda606ddfb019637b09c82d8b0e30bd4e318 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7797 Tested-by: BuildkiteCI Reviewed-by: Profpatsch --- users/Profpatsch/my-prelude/MyPrelude.hs | 15 ++-- users/Profpatsch/my-prelude/my-prelude.cabal | 3 +- users/Profpatsch/netencode/Netencode.hs | 75 ++++++++--------- users/Profpatsch/netencode/Netencode/Parse.hs | 112 ++++++++++++++++++++++++++ users/Profpatsch/netencode/default.nix | 1 + users/Profpatsch/netencode/netencode.cabal | 7 +- users/Profpatsch/shell.nix | 1 + 7 files changed, 169 insertions(+), 45 deletions(-) create mode 100644 users/Profpatsch/netencode/Netencode/Parse.hs (limited to 'users') diff --git a/users/Profpatsch/my-prelude/MyPrelude.hs b/users/Profpatsch/my-prelude/MyPrelude.hs index 4ef59c05ff..1be248d091 100644 --- a/users/Profpatsch/my-prelude/MyPrelude.hs +++ b/users/Profpatsch/my-prelude/MyPrelude.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE GHC2021 #-} {-# LANGUAGE ImplicitParams #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MagicHash #-} @@ -52,10 +51,10 @@ module MyPrelude when, unless, guard, - ExceptT, + ExceptT (..), runExceptT, - MonadError, - throwError, + MonadThrow, + throwM, MonadIO, liftIO, MonadReader, @@ -79,6 +78,8 @@ module MyPrelude traverseFold, traverseFold1, traverseFoldDefault, + MonadTrans, + lift, -- * Data types Coercible, @@ -145,15 +146,15 @@ where import Control.Applicative ((<|>)) import Control.Category (Category, (>>>)) import Control.Monad (guard, join, unless, when) +import Control.Monad.Catch (MonadThrow (throwM)) import Control.Monad.Except - ( ExceptT, - MonadError, + ( ExceptT (..), runExceptT, - throwError, ) import Control.Monad.IO.Class (MonadIO, liftIO) import Control.Monad.Identity (Identity (Identity)) import Control.Monad.Reader (MonadReader, asks) +import Control.Monad.Trans (MonadTrans (lift)) import Data.Bifunctor (Bifunctor, bimap, first, second) import Data.ByteString ( ByteString, diff --git a/users/Profpatsch/my-prelude/my-prelude.cabal b/users/Profpatsch/my-prelude/my-prelude.cabal index 8c41e179b2..fd02578013 100644 --- a/users/Profpatsch/my-prelude/my-prelude.cabal +++ b/users/Profpatsch/my-prelude/my-prelude.cabal @@ -30,6 +30,7 @@ library , profunctors , containers , error + , exceptions , bytestring , mtl , hspec @@ -38,4 +39,4 @@ library , nicify-lib , ansi-terminal , vector - default-language: Haskell2010 + default-language: GHC2021 diff --git a/users/Profpatsch/netencode/Netencode.hs b/users/Profpatsch/netencode/Netencode.hs index dfc57ce8dc..36d3907ffc 100644 --- a/users/Profpatsch/netencode/Netencode.hs +++ b/users/Profpatsch/netencode/Netencode.hs @@ -1,6 +1,7 @@ {-# LANGUAGE AllowAmbiguousTypes #-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GHC2021 #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} @@ -13,57 +14,56 @@ module Netencode where import Control.Applicative (many) -import qualified Data.Attoparsec.ByteString as Atto -import qualified Data.Attoparsec.ByteString.Char8 as Atto.Char -import qualified Data.ByteString as ByteString +import Data.Attoparsec.ByteString qualified as Atto +import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char +import Data.ByteString qualified as ByteString import Data.ByteString.Builder (Builder) -import qualified Data.ByteString.Builder as Builder -import qualified Data.ByteString.Lazy as ByteString.Lazy +import Data.ByteString.Builder qualified as Builder +import Data.ByteString.Lazy qualified as ByteString.Lazy import Data.Coerce (coerce) import Data.Fix (Fix (Fix)) -import qualified Data.Fix as Fix +import Data.Fix qualified as Fix import Data.Functor.Classes (Eq1 (liftEq)) import Data.Int (Int16, Int32, Int64, Int8) import Data.List.NonEmpty (nonEmpty) import Data.Map.NonEmpty (NEMap) -import qualified Data.Map.NonEmpty as NEMap -import qualified Data.Semigroup as Semi +import Data.Map.NonEmpty qualified as NEMap +import Data.Maybe (fromMaybe) +import Data.Semigroup qualified as Semi import Data.String (IsString) import Data.Word (Word16, Word32, Word64) import GHC.Exts (fromString) -import qualified Hedgehog as Hedge -import qualified Hedgehog.Gen as Gen -import qualified Hedgehog.Range as Range +import Hedgehog qualified as Hedge +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range import MyPrelude import Text.Show.Deriving import Prelude hiding (sum) -import Data.Maybe (fromMaybe) - -- | Netencode type base functor. -- -- Recursive elements have a @rec@. data TF rec - = Unit - -- ^ Unit value - | N1 Bool - -- ^ Boolean (2^1) - | N3 Word8 - -- ^ Byte (2^3) - | N6 Word64 - -- ^ 64-bit Natural (2^6) - | I6 Int64 - -- ^ 64-bit Integer (2^6) - | Text Text - -- ^ Unicode Text - | Bytes ByteString - -- ^ Arbitrary Bytestring - | Sum (Tag Text rec) - -- ^ A constructor of a(n open) Sum - | Record (NEMap Text rec) - -- ^ Record - | List [rec] - -- ^ List + = -- | Unit value + Unit + | -- | Boolean (2^1) + N1 Bool + | -- | Byte (2^3) + N3 Word8 + | -- | 64-bit Natural (2^6) + N6 Word64 + | -- | 64-bit Integer (2^6) + I6 Int64 + | -- | Unicode Text + Text Text + | -- | Arbitrary Bytestring + Bytes ByteString + | -- | A constructor of a(n open) Sum + Sum (Tag Text rec) + | -- | Record + Record (NEMap Text rec) + | -- | List + List [rec] deriving stock (Show, Eq, Functor) instance Eq1 TF where @@ -90,7 +90,7 @@ $(Text.Show.Deriving.deriveShow1 ''Tag) $(Text.Show.Deriving.deriveShow1 ''TF) -- | The Netencode type -newtype T = T (Fix TF) +newtype T = T {unT :: Fix TF} deriving stock (Eq, Show) -- | Create a unit @@ -291,7 +291,8 @@ netencodeParserF inner = do Nothing -> fail "record is not allowed to have 0 elements" Just tags -> pure $ - tags <&> (\t -> (t & tagTag, t & tagVal)) + tags + <&> (\t -> (t & tagTag, t & tagVal)) -- later keys are preferred if they are duplicates, according to the standard & NEMap.fromList _ <- Atto.Char.char '}' Atto. "record did not end with }" @@ -421,7 +422,9 @@ genNetencode = record <$> ( let k = Gen.text (Range.linear 3 10) Gen.lower v = genNetencode - in NEMap.insertMap <$> k <*> v + in NEMap.insertMap + <$> k + <*> v <*> ( (Gen.map (Range.linear 0 3)) $ (,) <$> k <*> v ) diff --git a/users/Profpatsch/netencode/Netencode/Parse.hs b/users/Profpatsch/netencode/Netencode/Parse.hs new file mode 100644 index 0000000000..de313571f7 --- /dev/null +++ b/users/Profpatsch/netencode/Netencode/Parse.hs @@ -0,0 +1,112 @@ +{-# LANGUAGE ApplicativeDo #-} +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE DerivingVia #-} +{-# LANGUAGE GHC2021 #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE QuasiQuotes #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE NoFieldSelectors #-} +{-# OPTIONS_GHC -Wall #-} + +module Netencode.Parse where + +import Control.Category qualified +import Control.Selective (Selective) +import Data.Error.Tree +import Data.Fix (Fix (..)) +import Data.Functor.Compose +import Data.List qualified as List +import Data.Map.NonEmpty (NEMap) +import Data.Map.NonEmpty qualified as NEMap +import Data.Semigroupoid qualified as Semigroupiod +import Data.Semigroupoid qualified as Semigroupoid +import Data.Text qualified as Text +import MyPrelude +import Netencode qualified +import Prelude hiding (log) + +newtype Parse from to + = -- TODO: the way @Context = [Text]@ has to be forwarded to everything is kinda shitty. + -- This is essentially just a difference list, and can probably be treated as a function in the output? + Parse (([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to)) + deriving + (Functor, Applicative, Selective) + via ( Compose + ( Compose + ((->) ([Text], from)) + (Validation (NonEmpty ErrorTree)) + ) + ((,) [Text]) + ) + +runParse :: Error -> Parse from to -> from -> Either ErrorTree to +runParse errMsg parser t = + (["$"], t) + & runParse' parser + <&> snd + & first (nestedMultiError errMsg) + & validationToEither + +runParse' :: Parse from to -> ([Text], from) -> Validation (NonEmpty ErrorTree) ([Text], to) +runParse' (Parse f) from = f from + +instance Semigroupoid Parse where + o p2 p1 = Parse $ \from -> case runParse' p1 from of + Failure err -> Failure err + Success to1 -> runParse' p2 to1 + +instance Category Parse where + (.) = Semigroupoid.o + id = Parse $ \t -> Success t + +parseEither :: (([Text], from) -> Either ErrorTree ([Text], to)) -> Parse from to +parseEither f = Parse $ \from -> f from & eitherToListValidation + +tAs :: (Netencode.TF (Fix Netencode.TF) -> Either ([Text] -> ErrorTree) to) -> Parse Netencode.T to +tAs f = parseEither ((\(context, Netencode.T (Fix tf)) -> f tf & bimap ($ context) (context,))) + +key :: Text -> Parse (NEMap Text to) to +key name = parseEither $ \(context, rec) -> + rec + & NEMap.lookup name + & annotate (errorTreeContext (showContext context) [fmt|Key "{name}" does not exist|]) + <&> (addContext name context,) + +showContext :: [Text] -> Text +showContext context = context & List.reverse & Text.intercalate "." + +addContext :: a -> [a] -> [a] +addContext = (:) + +asText :: Parse Netencode.T Text +asText = tAs $ \case + Netencode.Text t -> pure t + other -> typeError "of text" other + +asBytes :: Parse Netencode.T ByteString +asBytes = tAs $ \case + Netencode.Bytes b -> pure b + other -> typeError "of bytes" other + +asRecord :: Parse Netencode.T (NEMap Text (Netencode.T)) +asRecord = tAs $ \case + Netencode.Record rec -> pure (rec <&> Netencode.T) + other -> typeError "a record" other + +typeError :: Text -> Netencode.TF ignored -> (Either ([Text] -> ErrorTree) b) +typeError should is = do + let otherS = is <&> (\_ -> ("…" :: String)) & show + Left $ \context -> errorTreeContext (showContext context) [fmt|Value is not {should}, but a {otherS}|] + +orThrowParseError :: + Parse (Either Error to) to +orThrowParseError = Parse $ \case + (context, Left err) -> + err + & singleError + & errorTreeContext (showContext context) + & singleton + & Failure + (context, Right to) -> Success (context, to) diff --git a/users/Profpatsch/netencode/default.nix b/users/Profpatsch/netencode/default.nix index 00fadf6953..cb3dfaee45 100644 --- a/users/Profpatsch/netencode/default.nix +++ b/users/Profpatsch/netencode/default.nix @@ -18,6 +18,7 @@ let src = depot.users.Profpatsch.exactSource ./. [ ./netencode.cabal ./Netencode.hs + ./Netencode/Parse.hs ]; libraryHaskellDepends = [ diff --git a/users/Profpatsch/netencode/netencode.cabal b/users/Profpatsch/netencode/netencode.cabal index 1bd1d6052a..4e418d6dd8 100644 --- a/users/Profpatsch/netencode/netencode.cabal +++ b/users/Profpatsch/netencode/netencode.cabal @@ -5,7 +5,9 @@ author: Profpatsch maintainer: mail@profpatsch.de library - exposed-modules: Netencode + exposed-modules: + Netencode, + Netencode.Parse build-depends: base >=4.15 && <5, @@ -16,5 +18,8 @@ library data-fix, bytestring, attoparsec, + text, + semigroupoids, + selective default-language: Haskell2010 diff --git a/users/Profpatsch/shell.nix b/users/Profpatsch/shell.nix index cde599a8b4..499109ec37 100644 --- a/users/Profpatsch/shell.nix +++ b/users/Profpatsch/shell.nix @@ -37,6 +37,7 @@ pkgs.mkShell { h.hspec-expectations-pretty-diff depot.users.Profpatsch.my-prelude depot.users.Profpatsch.netencode.netencode-hs + depot.users.Profpatsch.arglib.netencode.haskell depot.users.Profpatsch.execline.exec-helpers-hs ])) -- cgit 1.4.1