about summary refs log tree commit diff
path: root/users/sterni/nix/url
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2021-04-01T09·15+0200
committersterni <sternenseemann@systemli.org>2021-04-01T13·09+0000
commita5f2b446aa6e33b533cf9e16c325cf2360f69693 (patch)
treef536e8833cccac28a8f7e691542c420406f4ae8e /users/sterni/nix/url
parent28b2c79c1459c5ced640cd3e1594267a2bf86101 (diff)
feat(sterni/nix/url): implement urldecoding r/2385
We use builtins.split directly as it should be a bit more efficient as
lib.splitStrings. Also its returning of a list for every regex match is
useful to update the state while parsing the tokens:

* The tokens are obtained by splitting the string at every '%'
* Everytime we see a boundary (that is a list in the returned
  list of builtins.split), we know that the first two chars of
  the next string are a percent encoded character.

One implementation flaw is that it will currently crash if it encounters
mal-formed URLs (since int.fromHex chrashes if it encounters any non
hex digit characters) and accepts some malformed urlencoding like
"foo %A".

Change-Id: I90d08d7a71b16b4f4a4879214abd7aeff46c20c8
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2744
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Diffstat (limited to 'users/sterni/nix/url')
-rw-r--r--users/sterni/nix/url/default.nix39
-rw-r--r--users/sterni/nix/url/tests/default.nix7
2 files changed, 46 insertions, 0 deletions
diff --git a/users/sterni/nix/url/default.nix b/users/sterni/nix/url/default.nix
index ce7ed9b83437..37bd0de66ac9 100644
--- a/users/sterni/nix/url/default.nix
+++ b/users/sterni/nix/url/default.nix
@@ -6,6 +6,7 @@ let
     char
     int
     string
+    flow
     ;
 
   reserved = c: builtins.elem c [
@@ -35,8 +36,46 @@ let
         else percentEncode c;
     in lib.concatStrings (builtins.map tr chars);
 
+  decode = s:
+    let
+      tokens = builtins.split "%" s;
+      decodeStep =
+        { result ? ""
+        , inPercent ? false
+        }: s:
+        flow.cond [
+          [
+            (builtins.isList s)
+            {
+              inherit result;
+              inPercent = true;
+            }
+          ]
+          [
+            inPercent
+            {
+              inPercent = false;
+              # first two characters came after an %
+              # the rest is the string until the next %
+              result = result
+                + char.chr (int.fromHex (string.take 2 s))
+                + (string.drop 2 s);
+            }
+          ]
+          [
+            (!inPercent)
+            {
+              result = result + s;
+            }
+          ]
+        ];
+
+    in
+      (builtins.foldl' decodeStep {} tokens).result;
+
 in {
   inherit
     encode
+    decode
     ;
 }
diff --git a/users/sterni/nix/url/tests/default.nix b/users/sterni/nix/url/tests/default.nix
index f58cf12a02b2..7cf53cde1555 100644
--- a/users/sterni/nix/url/tests/default.nix
+++ b/users/sterni/nix/url/tests/default.nix
@@ -16,6 +16,10 @@ let
     assertEq "encode ${builtins.toJSON left} == ${builtins.toJSON right}"
       (url.encode args left) right;
 
+  checkDecoding = { left, right }:
+  assertEq "${builtins.toJSON left} == decode ${builtins.toJSON right}"
+    (url.decode left) right;
+
   unreserved = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-_~";
 
   encodeExpected = [
@@ -31,6 +35,9 @@ let
   testEncode = it "checks url.encode"
     (builtins.map (checkEncoding {}) encodeExpected);
 
+  testDecode = it "checks url.decode"
+    (builtins.map checkDecoding encodeExpected);
+
   testLeaveReserved = it "checks that leaveReserved is like id for valid URLs"
     (builtins.map (x: checkEncoding { leaveReserved = true; } { left = x; right = x; }) [
       "ftp://ftp.is.co.za/rfc/rfc1808.txt"