From a5f2b446aa6e33b533cf9e16c325cf2360f69693 Mon Sep 17 00:00:00 2001 From: sterni Date: Thu, 1 Apr 2021 11:15:57 +0200 Subject: feat(sterni/nix/url): implement urldecoding We use builtins.split directly as it should be a bit more efficient as lib.splitStrings. Also its returning of a list for every regex match is useful to update the state while parsing the tokens: * The tokens are obtained by splitting the string at every '%' * Everytime we see a boundary (that is a list in the returned list of builtins.split), we know that the first two chars of the next string are a percent encoded character. One implementation flaw is that it will currently crash if it encounters mal-formed URLs (since int.fromHex chrashes if it encounters any non hex digit characters) and accepts some malformed urlencoding like "foo %A". Change-Id: I90d08d7a71b16b4f4a4879214abd7aeff46c20c8 Reviewed-on: https://cl.tvl.fyi/c/depot/+/2744 Tested-by: BuildkiteCI Reviewed-by: sterni --- users/sterni/nix/url/default.nix | 39 ++++++++++++++++++++++++++++++++++ users/sterni/nix/url/tests/default.nix | 7 ++++++ 2 files changed, 46 insertions(+) (limited to 'users/sterni') diff --git a/users/sterni/nix/url/default.nix b/users/sterni/nix/url/default.nix index ce7ed9b834..37bd0de66a 100644 --- a/users/sterni/nix/url/default.nix +++ b/users/sterni/nix/url/default.nix @@ -6,6 +6,7 @@ let char int string + flow ; reserved = c: builtins.elem c [ @@ -35,8 +36,46 @@ let else percentEncode c; in lib.concatStrings (builtins.map tr chars); + decode = s: + let + tokens = builtins.split "%" s; + decodeStep = + { result ? "" + , inPercent ? false + }: s: + flow.cond [ + [ + (builtins.isList s) + { + inherit result; + inPercent = true; + } + ] + [ + inPercent + { + inPercent = false; + # first two characters came after an % + # the rest is the string until the next % + result = result + + char.chr (int.fromHex (string.take 2 s)) + + (string.drop 2 s); + } + ] + [ + (!inPercent) + { + result = result + s; + } + ] + ]; + + in + (builtins.foldl' decodeStep {} tokens).result; + in { inherit encode + decode ; } diff --git a/users/sterni/nix/url/tests/default.nix b/users/sterni/nix/url/tests/default.nix index f58cf12a02..7cf53cde15 100644 --- a/users/sterni/nix/url/tests/default.nix +++ b/users/sterni/nix/url/tests/default.nix @@ -16,6 +16,10 @@ let assertEq "encode ${builtins.toJSON left} == ${builtins.toJSON right}" (url.encode args left) right; + checkDecoding = { left, right }: + assertEq "${builtins.toJSON left} == decode ${builtins.toJSON right}" + (url.decode left) right; + unreserved = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_~"; encodeExpected = [ @@ -31,6 +35,9 @@ let testEncode = it "checks url.encode" (builtins.map (checkEncoding {}) encodeExpected); + testDecode = it "checks url.decode" + (builtins.map checkDecoding encodeExpected); + testLeaveReserved = it "checks that leaveReserved is like id for valid URLs" (builtins.map (x: checkEncoding { leaveReserved = true; } { left = x; right = x; }) [ "ftp://ftp.is.co.za/rfc/rfc1808.txt" -- cgit 1.4.1