1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
|
{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Netencode where
import Control.Applicative (many)
import Data.Attoparsec.ByteString qualified as Atto
import Data.Attoparsec.ByteString.Char8 qualified as Atto.Char
import Data.ByteString qualified as ByteString
import Data.ByteString.Builder (Builder)
import Data.ByteString.Builder qualified as Builder
import Data.ByteString.Lazy qualified as ByteString.Lazy
import Data.Fix (Fix (Fix))
import Data.Fix qualified as Fix
import Data.Functor.Classes (Eq1 (liftEq))
import Data.Int (Int16, Int32, Int64, Int8)
import Data.Map.NonEmpty (NEMap)
import Data.Map.NonEmpty qualified as NEMap
import Data.Semigroup qualified as Semi
import Data.String (IsString)
import Data.Word (Word16, Word32, Word64)
import GHC.Exts (fromString)
import Hedgehog qualified as Hedge
import Hedgehog.Gen qualified as Gen
import Hedgehog.Range qualified as Range
import PossehlAnalyticsPrelude
import Text.Show.Deriving
import Prelude hiding (sum)
-- | Netencode type base functor.
--
-- Recursive elements have a @rec@.
data TF rec
= -- | Unit value
Unit
| -- | Boolean (2^1)
N1 Bool
| -- | Byte (2^3)
N3 Word8
| -- | 64-bit Natural (2^6)
N6 Word64
| -- | 64-bit Integer (2^6)
I6 Int64
| -- | Unicode Text
Text Text
| -- | Arbitrary Bytestring
Bytes ByteString
| -- | A constructor of a(n open) Sum
Sum (Tag Text rec)
| -- | Record
Record (NEMap Text rec)
| -- | List
List [rec]
deriving stock (Show, Eq, Functor)
instance Eq1 TF where
liftEq _ Unit Unit = True
liftEq _ (N1 b) (N1 b') = b == b'
liftEq _ (N3 w8) (N3 w8') = w8 == w8'
liftEq _ (N6 w64) (N6 w64') = w64 == w64'
liftEq _ (I6 i64) (I6 i64') = i64 == i64'
liftEq _ (Text t) (Text t') = t == t'
liftEq _ (Bytes b) (Bytes b') = b == b'
liftEq eq (Sum t) (Sum t') = eq (t.tagVal) (t'.tagVal)
liftEq eq (Record m) (Record m') = liftEq eq m m'
liftEq eq (List xs) (List xs') = liftEq eq xs xs'
liftEq _ _ _ = False
-- | A tagged value
data Tag tag val = Tag
{ tagTag :: tag,
tagVal :: val
}
deriving stock (Show, Eq, Functor)
$(Text.Show.Deriving.deriveShow1 ''Tag)
$(Text.Show.Deriving.deriveShow1 ''TF)
-- | The Netencode type
newtype T = T {unT :: Fix TF}
deriving stock (Eq, Show)
-- | Create a unit
unit :: T
unit = T $ Fix Unit
-- | Create a boolean
n1 :: Bool -> T
n1 = T . Fix . N1
-- | Create a byte
n3 :: Word8 -> T
n3 = T . Fix . N3
-- | Create a 64-bit natural
n6 :: Word64 -> T
n6 = T . Fix . N6
-- | Create a 64-bit integer
i6 :: Int64 -> T
i6 = T . Fix . I6
-- | Create a UTF-8 unicode text
text :: Text -> T
text = T . Fix . Text
-- | Create an arbitrary bytestring
bytes :: ByteString -> T
bytes = T . Fix . Bytes
-- | Create a tagged value from a tag name and a value
tag :: Text -> T -> T
tag key val = T $ Fix $ Sum $ coerce @(Tag Text T) @(Tag Text (Fix TF)) $ Tag key val
-- | Create a record from a non-empty map
record :: NEMap Text T -> T
record = T . Fix . Record . coerce @(NEMap Text T) @(NEMap Text (Fix TF))
-- | Create a list
list :: [T] -> T
list = T . Fix . List . coerce @[T] @([Fix TF])
-- | Stable encoding of a netencode value. Record keys will be sorted lexicographically ascending.
netencodeEncodeStable :: T -> Builder
netencodeEncodeStable (T fix) = Fix.foldFix (netencodeEncodeStableF id) fix
-- | Stable encoding of a netencode functor value. Record keys will be sorted lexicographically ascending.
--
-- The given function is used for encoding the recursive values.
netencodeEncodeStableF :: (rec -> Builder) -> TF rec -> Builder
netencodeEncodeStableF inner tf = builder go
where
-- TODO: directly pass in BL?
innerBL = fromBuilder . inner
go = case tf of
Unit -> "u,"
N1 False -> "n1:0,"
N1 True -> "n1:1,"
N3 w8 -> "n3:" <> fromBuilder (Builder.word8Dec w8) <> ","
N6 w64 -> "n6:" <> fromBuilder (Builder.word64Dec w64) <> ","
I6 i64 -> "i6:" <> fromBuilder (Builder.int64Dec i64) <> ","
Text t ->
let b = fromText t
in "t" <> builderLenDec b <> ":" <> b <> ","
Bytes b -> "b" <> builderLenDec (fromByteString b) <> ":" <> fromByteString b <> ","
Sum (Tag key val) -> encTag key val
Record m ->
-- NEMap uses Map internally, and that folds in lexicographic ascending order over the key.
-- Since these are `Text` in our case, this is stable.
let mBuilder = m & NEMap.foldMapWithKey encTag
in "{" <> builderLenDec mBuilder <> ":" <> mBuilder <> "}"
List xs ->
let xsBuilder = xs <&> innerBL & mconcat
in "[" <> builderLenDec xsBuilder <> ":" <> xsBuilder <> "]"
where
encTag key val =
let bKey = fromText key
in "<" <> builderLenDec bKey <> ":" <> bKey <> "|" <> innerBL val
-- | A builder that knows its own size in bytes
newtype BL = BL (Builder, Semi.Sum Natural)
deriving newtype (Monoid, Semigroup)
instance IsString BL where
fromString s =
BL
( fromString @Builder s,
fromString @ByteString s
& ByteString.length
& intToNatural
& fromMaybe 0
& Semi.Sum
)
-- | Retrieve the builder
builder :: BL -> Builder
builder (BL (b, _)) = b
-- | Retrieve the bytestring length
builderLen :: BL -> Natural
builderLen (BL (_, len)) = Semi.getSum $ len
-- | Take a 'BL' and create a new 'BL' that represents the length as a decimal integer
builderLenDec :: BL -> BL
builderLenDec (BL (_, len)) =
let b = Builder.intDec $ (len & Semi.getSum & fromIntegral @Natural @Int)
in b & fromBuilder
-- | Create a 'BL' from a 'Builder'.
--
-- Not efficient, goes back to a lazy bytestring to get the length
fromBuilder :: Builder -> BL
fromBuilder b =
BL
( b,
b
& Builder.toLazyByteString
& ByteString.Lazy.length
& fromIntegral @Int64 @Natural
& Semi.Sum
)
-- | Create a 'BL' from a 'ByteString'.
fromByteString :: ByteString -> BL
fromByteString b =
BL
( Builder.byteString b,
b
& ByteString.length
& fromIntegral @Int @Natural
& Semi.Sum
)
-- | Create a 'BL' from a 'Text'.
fromText :: Text -> BL
fromText t = t & textToBytesUtf8 & fromByteString
-- | Parser for a netencode value.
netencodeParser :: Atto.Parser T
netencodeParser = T <$> go
where
go = Fix <$> netencodeParserF go
-- | Parser for one level of a netencode value. Requires a parser for the recursion.
netencodeParserF :: Atto.Parser rec -> Atto.Parser (TF rec)
netencodeParserF inner = do
typeTag <- Atto.Char.anyChar
case typeTag of
't' -> Text <$> textParser
'b' -> Bytes <$> bytesParser
'u' -> unitParser
'<' -> Sum <$> tagParser
'{' -> Record <$> recordParser
'[' -> List <$> listParser
'n' -> naturalParser
'i' -> I6 <$> intParser
c -> fail ([c] <> " is not a valid netencode tag")
where
bytesParser = do
len <- boundedDecimalFail Atto.<?> "bytes is missing a digit specifying the length"
_ <- Atto.Char.char ':' Atto.<?> "bytes did not have : after length"
bytes' <- Atto.take len
_ <- Atto.Char.char ',' Atto.<?> "bytes did not end with ,"
pure bytes'
textParser = do
len <- boundedDecimalFail Atto.<?> "text is missing a digit specifying the length"
_ <- Atto.Char.char ':' Atto.<?> "text did not have : after length"
text' <-
Atto.take len <&> bytesToTextUtf8 >>= \case
Left err -> fail [fmt|cannot decode text as utf8: {err & prettyError}|]
Right t -> pure t
_ <- Atto.Char.char ',' Atto.<?> "text did not end with ,"
pure text'
unitParser = do
_ <- Atto.Char.char ',' Atto.<?> "unit did not end with ,"
pure $ Unit
tagParser = do
len <- boundedDecimalFail Atto.<?> "tag is missing a digit specifying the length"
_ <- Atto.Char.char ':' Atto.<?> "tag did not have : after length"
tagTag <-
Atto.take len <&> bytesToTextUtf8 >>= \case
Left err -> fail [fmt|cannot decode tag key as utf8: {err & prettyError}|]
Right t -> pure t
_ <- Atto.Char.char '|' Atto.<?> "tag was missing the key/value separator (|)"
tagVal <- inner
pure $ Tag {..}
recordParser = do
-- TODO: the record does not use its inner length because we are descending into the inner parsers.
-- This is a smell! In theory it can be used to skip parsing the whole inner keys.
_len <- boundedDecimalFail Atto.<?> "record is missing a digit specifying the length"
_ <- Atto.Char.char ':' Atto.<?> "record did not have : after length"
record' <-
many (Atto.Char.char '<' >> tagParser) <&> nonEmpty >>= \case
Nothing -> fail "record is not allowed to have 0 elements"
Just tags ->
pure $
tags
<&> (\t -> (t.tagTag, t.tagVal))
-- later keys are preferred if they are duplicates, according to the standard
& NEMap.fromList
_ <- Atto.Char.char '}' Atto.<?> "record did not end with }"
pure record'
listParser = do
-- TODO: the list does not use its inner length because we are descending into the inner parsers.
-- This is a smell! In theory it can be used to skip parsing the whole inner keys.
_len <- boundedDecimalFail Atto.<?> "list is missing a digit specifying the length"
_ <- Atto.Char.char ':' Atto.<?> "list did not have : after length"
-- TODO: allow empty lists?
list' <- many inner
_ <- Atto.Char.char ']' Atto.<?> "list did not end with ]"
pure list'
intParser = do
let p :: forall parseSize. (Bounded parseSize, Integral parseSize) => (Integer -> Atto.Parser Int64)
p n = do
_ <- Atto.Char.char ':' Atto.<?> [fmt|i{n & show} did not have : after length|]
isNegative <- Atto.option False (Atto.Char.char '-' <&> \_c -> True)
int <-
boundedDecimal @parseSize >>= \case
Nothing -> fail [fmt|cannot parse into i{n & show}, the number is too big (would overflow)|]
Just i ->
pure $
if isNegative
then -- TODO: this should alread be done in the decimal parser, @minBound@ cannot be parsed cause it’s one more than @(-maxBound)@!
(-i)
else i
_ <- Atto.Char.char ',' Atto.<?> [fmt|i{n & show} did not end with ,|]
pure $ fromIntegral @parseSize @Int64 int
digit <- Atto.Char.digit
case digit of
-- TODO: separate parser for i1 and i2 that makes sure the boundaries are right!
'1' -> p @Int8 1
'2' -> p @Int8 2
'3' -> p @Int8 3
'4' -> p @Int16 4
'5' -> p @Int32 5
'6' -> p @Int64 6
'7' -> fail [fmt|i parser only supports numbers up to size 6, was 7|]
'8' -> fail [fmt|i parser only supports numbers up to size 6, was 8|]
'9' -> fail [fmt|i parser only supports numbers up to size 6, was 9|]
o -> fail [fmt|i number with length {o & show} not possible|]
naturalParser = do
let p :: forall parseSize finalSize. (Bounded parseSize, Integral parseSize, Num finalSize) => (Integer -> Atto.Parser finalSize)
p n = do
_ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|]
int <-
boundedDecimal @parseSize >>= \case
Nothing -> fail [fmt|cannot parse into n{n & show}, the number is too big (would overflow)|]
Just i -> pure i
_ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|]
pure $ fromIntegral @parseSize @finalSize int
let b n = do
_ <- Atto.Char.char ':' Atto.<?> [fmt|n{n & show} did not have : after length|]
bool <-
(Atto.Char.char '0' >> pure False)
<|> (Atto.Char.char '1' >> pure True)
_ <- Atto.Char.char ',' Atto.<?> [fmt|n{n & show} did not end with ,|]
pure bool
digit <- Atto.Char.digit
case digit of
-- TODO: separate parser for n1 and n2 that makes sure the boundaries are right!
'1' -> N1 <$> b 1
'2' -> N3 <$> p @Word8 @Word8 2
'3' -> N3 <$> p @Word8 @Word8 3
'4' -> N6 <$> p @Word16 @Word64 4
'5' -> N6 <$> p @Word32 @Word64 5
'6' -> N6 <$> p @Word64 @Word64 6
'7' -> fail [fmt|n parser only supports numbers up to size 6, was 7|]
'8' -> fail [fmt|n parser only supports numbers up to size 6, was 8|]
'9' -> fail [fmt|n parser only supports numbers up to size 6, was 9|]
o -> fail [fmt|n number with length {o & show} not possible|]
-- | Parser for a bounded decimal that does not overflow the decimal.
--
-- via https://www.extrema.is/blog/2021/10/20/parsing-bounded-integers
boundedDecimal :: forall a. (Bounded a, Integral a) => Atto.Parser (Maybe a)
boundedDecimal = do
i :: Integer <- decimal
pure $
if (i :: Integer) > fromIntegral (maxBound :: a)
then Nothing
else Just $ fromIntegral i
where
-- Copied from @Attoparsec.Text@ and adjusted to bytestring
decimal :: (Integral a2) => Atto.Parser a2
decimal = ByteString.foldl' step 0 <$> Atto.Char.takeWhile1 Atto.Char.isDigit
where
step a c = a * 10 + fromIntegral (c - 48)
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int) #-}
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Int64) #-}
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word8) #-}
{-# SPECIALIZE boundedDecimal :: Atto.Parser (Maybe Word64) #-}
-- | 'boundedDecimal', but fail the parser if the decimal overflows.
boundedDecimalFail :: Atto.Parser Int
boundedDecimalFail =
boundedDecimal >>= \case
Nothing -> fail "decimal out of range"
Just a -> pure a
-- | Hedgehog generator for a netencode value.
genNetencode :: Hedge.MonadGen m => m T
genNetencode =
Gen.recursive
Gen.choice
[ -- these are bundled into one Gen, so that scalar elements get chosen less frequently, and the generator produces nicely nested examples
Gen.frequency
[ (1, pure unit),
(1, n1 <$> Gen.bool),
(1, n3 <$> Gen.element [0, 1, 5]),
(1, n6 <$> Gen.element [0, 1, 5]),
(1, i6 <$> Gen.element [-1, 1, 5]),
(2, text <$> Gen.text (Range.linear 1 10) Gen.lower),
(2, bytes <$> Gen.bytes (Range.linear 1 10))
]
]
[ do
key <- Gen.text (Range.linear 3 10) Gen.lower
val <- genNetencode
pure $ tag key val,
record
<$> ( let k = Gen.text (Range.linear 3 10) Gen.lower
v = genNetencode
in NEMap.insertMap
<$> k
<*> v
<*> ( (Gen.map (Range.linear 0 3)) $
(,) <$> k <*> v
)
)
]
-- | Hedgehog property: encoding a netencode value and parsing it again returns the same result.
prop_netencodeRoundtrip :: Hedge.Property
prop_netencodeRoundtrip = Hedge.property $ do
enc <- Hedge.forAll genNetencode
( Atto.parseOnly
netencodeParser
( netencodeEncodeStable enc
& Builder.toLazyByteString
& toStrictBytes
)
)
Hedge.=== (Right enc)
|