From a5f2b446aa6e33b533cf9e16c325cf2360f69693 Mon Sep 17 00:00:00 2001 From: sterni Date: Thu, 1 Apr 2021 11:15:57 +0200 Subject: feat(sterni/nix/url): implement urldecoding We use builtins.split directly as it should be a bit more efficient as lib.splitStrings. Also its returning of a list for every regex match is useful to update the state while parsing the tokens: * The tokens are obtained by splitting the string at every '%' * Everytime we see a boundary (that is a list in the returned list of builtins.split), we know that the first two chars of the next string are a percent encoded character. One implementation flaw is that it will currently crash if it encounters mal-formed URLs (since int.fromHex chrashes if it encounters any non hex digit characters) and accepts some malformed urlencoding like "foo %A". Change-Id: I90d08d7a71b16b4f4a4879214abd7aeff46c20c8 Reviewed-on: https://cl.tvl.fyi/c/depot/+/2744 Tested-by: BuildkiteCI Reviewed-by: sterni --- users/sterni/nix/url/default.nix | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'users/sterni/nix/url/default.nix') diff --git a/users/sterni/nix/url/default.nix b/users/sterni/nix/url/default.nix index ce7ed9b83437..37bd0de66ac9 100644 --- a/users/sterni/nix/url/default.nix +++ b/users/sterni/nix/url/default.nix @@ -6,6 +6,7 @@ let char int string + flow ; reserved = c: builtins.elem c [ @@ -35,8 +36,46 @@ let else percentEncode c; in lib.concatStrings (builtins.map tr chars); + decode = s: + let + tokens = builtins.split "%" s; + decodeStep = + { result ? "" + , inPercent ? false + }: s: + flow.cond [ + [ + (builtins.isList s) + { + inherit result; + inPercent = true; + } + ] + [ + inPercent + { + inPercent = false; + # first two characters came after an % + # the rest is the string until the next % + result = result + + char.chr (int.fromHex (string.take 2 s)) + + (string.drop 2 s); + } + ] + [ + (!inPercent) + { + result = result + s; + } + ] + ]; + + in + (builtins.foldl' decodeStep {} tokens).result; + in { inherit encode + decode ; } -- cgit 1.4.1