about summary refs log tree commit diff
path: root/users/wpcarro/slx.js
diff options
context:
space:
mode:
authorWilliam Carroll <wpcarro@gmail.com>2023-01-19T18·09-0800
committerclbot <clbot@tvl.fyi>2023-01-19T18·12+0000
commit509e356bb8fcba2264368ca1e973e270ab614f98 (patch)
tree24f141931b5c1708e522701de9907cc5803fd774 /users/wpcarro/slx.js
parent0dfe460fbb8cda0831fbcf4d9e42948c2bb88afa (diff)
feat(wpcarro/slx.js): Support JavaScript simple-select impl r/5704
See README.md

Change-Id: I6a50e34398c42aabe3cceba160be006f1867eca4
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7874
Reviewed-by: wpcarro <wpcarro@gmail.com>
Autosubmit: wpcarro <wpcarro@gmail.com>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/wpcarro/slx.js')
-rw-r--r--users/wpcarro/slx.js/README.md55
-rw-r--r--users/wpcarro/slx.js/default.nix11
-rw-r--r--users/wpcarro/slx.js/index.js360
3 files changed, 426 insertions, 0 deletions
diff --git a/users/wpcarro/slx.js/README.md b/users/wpcarro/slx.js/README.md
new file mode 100644
index 000000000000..3fbebc470633
--- /dev/null
+++ b/users/wpcarro/slx.js/README.md
@@ -0,0 +1,55 @@
+# slx.js
+
+Filter tabular data in the browser using an ergonomic query language.
+
+## Status
+
+This project is usable today (I use it in my projects), but it's currently alpha
+status. See the wish list for remaining features.
+
+## Installation
+
+`slx.js` is available via CDN:
+
+```shell
+<script src="https://cdn.jsdelivr.net/gh/wpcarro/slx.js/index.js" async></script>
+```
+
+## Usage
+
+`slx.js` hasn't been properly benchmarked, but in my personal projects, it works
+fine with `O(1,000)s` of records.
+
+```javascript
+const cast = [
+  { first: "Graham", last: "Chapman" },
+  { first: "John", last: "Cleese" },
+  { first: "Terry", last: "Gilliam" },
+  { first: "Eric", last: "Idle" },
+  { first: "Terry", last: "Jones" },
+  { first: "Michael", last: "Palin" },
+];
+
+const config = {
+    // Match values case sensitively when filtering.
+    caseSensitive: false,
+    // Coerce values into regular expressions (instead of strings) when they're defined as atoms.
+    preferRegex: true,
+    // The key in the JS object that hosts the Date type against which we filter.
+    dateKey: 'Date',
+};
+
+console.log(select('last:^C.+$', cast, config));
+// [{ first: "Graham", last: "Chapman" }, { first: "John", last: "Cleese" }]
+```
+
+## Wish List
+
+- Support explicit grouping with parentheses (e.g. `title:once (director:Tarantino OR director:Coen)`).
+- Proper benchmarking (see "Usage" section).
+- Something something documentation.
+- Something something testing.
+
+## See also:
+
+- [`slx`](https://github.com/wpcarro/slx)
diff --git a/users/wpcarro/slx.js/default.nix b/users/wpcarro/slx.js/default.nix
new file mode 100644
index 000000000000..bf903e77aadd
--- /dev/null
+++ b/users/wpcarro/slx.js/default.nix
@@ -0,0 +1,11 @@
+{ pkgs, depot, ... }:
+
+(pkgs.writeText "source.txt" ''
+  ${depot.third_party.gitignoreSource ./.}
+'').overrideAttrs (_: {
+  meta.ci.extraSteps.github = depot.tools.releases.filteredGitPush {
+    filter = ":/users/wpcarro/slx.js";
+    remote = "git@github.com:wpcarro/slx.js.git";
+    ref = "refs/heads/canon";
+  };
+})
diff --git a/users/wpcarro/slx.js/index.js b/users/wpcarro/slx.js/index.js
new file mode 100644
index 000000000000..7eba2dca91bf
--- /dev/null
+++ b/users/wpcarro/slx.js/index.js
@@ -0,0 +1,360 @@
+function select(query, xs, config) {
+    const predicate = compile(parse(query), config);
+    return xs.filter(predicate);
+}
+
+function compile(ast, config) {
+    if (ast.type === 'CONJUNCTION') {
+        const lhs = compile(ast.lhs);
+        const rhs = compile(ast.rhs);
+
+        if (ast.joint === 'AND') {
+            return function(x) {
+                return lhs(x) && rhs(x);
+            };
+        }
+        if (ast.joint === 'OR') {
+            return function(x) {
+                return lhs(x) || rhs(x);
+            };
+        }
+    }
+    if (ast.type === 'DATE_SELECTION') {
+        if (ast.key === 'before') {
+            return function(row) {
+                let t = new Date();
+                if (ast.val === 'yesterday') {
+                    t.setDate(t.getDate() - 1);
+                    console.log(t);
+                }
+                // MM/DD/YYYY
+                else {
+                    t = new Date(ast.val);
+                }
+                return row[config.dateKey] < t;
+            };
+        }
+        if (ast.key === 'after') {
+            return function(row) {
+                let t = new Date();
+                if (ast.val === 'yesterday') {
+                    t.setDate(t.getDate() - 1);
+                    console.log(t);
+                }
+                // MM/DD/YYYY
+                else {
+                    t = new Date(ast.val);
+                }
+                return row[config.dateKey] > t;
+            };
+        }
+    }
+    if (ast.type === 'SELECTION') {
+        const f = compile(ast.val);
+        return function(row) {
+            return ast.negate ? !f(row[ast.key]) : f(row[ast.key]);
+        };
+    }
+    if (ast.type === 'MATCH_ALL') {
+        if (ast.matchType === 'STRING') {
+            return function(row) {
+                return Object.values(row).some(x => {
+                    if (config.caseSensitive) {
+                        return x === ast.val;
+                    } else {
+                        return x.toLowerCase() === ast.val.toLowerCase();
+                    }
+                })
+            };
+        }
+        if (ast.matchType === 'REGEX') {
+            return function(row) {
+                return Object.values(row).some(x => ast.val.test(x));
+            };
+        }
+    }
+    if (ast.type === 'STRING') {
+        return function(x) {
+            if (config.caseSensitive) {
+                return x === ast.val;
+            } else {
+                return x.toLowerCase() === ast.val.toLowerCase();
+            }
+        };
+    }
+    if (ast.type === 'REGEX') {
+        return function(x) {
+            return ast.val.test(x);
+        };
+    }
+}
+
+// A "selection" without a "$column:" prefix should fuzzy-search all columns.
+//
+// conjunction -> selection ( ( "AND" | "OR" )? selection )* ;
+// selection   -> "-"? COLUMN ":" ( regex | string ) | regex ;
+// regex       -> [_-a-zA-Z0-9] | "/" [ _-a-zA-Z0-9] "/" | string ;
+// string      -> "\"" [ _-a-zA-Z0-9] "\"" ;
+
+// Whatever characters are valid for a JS regex.
+const ATOM_REGEX = /[-_.\[\]a-zA-Z0-9*+^$]/;
+
+function tokenize(x) {
+    const result = [];
+    let i = 0;
+    while (i < x.length) {
+        if (x[i] === ' ') {
+            i += 1;
+            while (i < x.length && x[i] === ' ') {
+                i += 1;
+            }
+            result.push(['WHITESPACE', null]);
+            continue;
+        }
+        if (x[i] === '-') {
+            result.push(['NEGATE', null]);
+            i += 1;
+            continue;
+        }
+        if (ATOM_REGEX.test(x[i])) {
+            let curr = x[i];
+            i += 1;
+            while (i < x.length && ATOM_REGEX.test(x[i])) {
+                curr += x[i];
+                i += 1;
+            }
+            result.push(['ATOM', curr]);
+            continue;
+        }
+        if (x[i] === ':') {
+            result.push(['COLON', null]);
+            i += 1;
+            continue;
+        }
+        if (x[i] === '(') {
+            result.push(['LPAREN', null]);
+            i += 1;
+            continue;
+        }
+        if (x[i] === ')') {
+            result.push(['RPAREN', null]);
+            i += 1;
+            continue;
+        }
+        if (x[i] === '/') {
+            let start = i;
+            let curr = '';
+            i += 1;
+            while (i < x.length && x[i] !== '/') {
+                curr += x[i];
+                i += 1;
+            }
+            // error
+            if (i >= x.length) {
+                throw `Tokenize Error: EOL while attempting to tokenize the regex beginning at column: ${start}`;
+            }
+            if (x[i] === '/') {
+                result.push(['REGEX', curr]);
+                i += 1;
+            }
+            continue;
+        }
+        if (x[i] === '"') {
+            let start = i;
+            let curr = '';
+            i += 1;
+            while (i < x.length && x[i] !== '"') {
+                // continue on \"
+                if (x[i] === '\\' && x[i + 1] === '"') {
+                    curr += '\"';
+                    i += 2;
+                } else {
+                    curr += x[i];
+                    i += 1;
+                }
+            }
+            if (i >= x.length) {
+                throw `Tokenize Error: EOL while attempting to tokenize the string starting at column: ${start}`;
+            }
+            if (x[i] === '"') {
+                result.push(['STRING', curr]);
+                i += 1;
+            }
+            continue;
+        }
+        else {
+            i += 1;
+        }
+    }
+    return result;
+}
+
+function expect(f, expectation, p) {
+    const [type, val] = p.tokens[p.i];
+    if (f(type, val)) {
+        p.i += 1;
+    } else {
+        throw `Parse Error: expected ${expectation}, but got ${p.tokens[p.i]}; ${JSON.stringify(p)}`
+    }
+}
+
+function matches(f, p) {
+    const [type, val] = p.tokens[p.i];
+    if (f(type, val)) {
+        return true;
+    }
+    return false;
+}
+
+function match(f, expectation, p) {
+    const [type, val] = p.tokens[p.i];
+    if (f(type, val)) {
+        p.i += 1;
+        return val;
+    }
+    throw `Parse Error: expected ${expectation}, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`;
+}
+
+function skipWhitespace(p) {
+    while (p.i < p.tokens.length && matches((type, _) => type === 'WHITESPACE', p)) {
+        p.i += 1;
+    }
+}
+
+function parser(tokens) {
+    return { i: 0, tokens };
+}
+
+function parse(x) {
+    const tokens = tokenize(x);
+    const p = parser(tokens);
+    return conjunction(p);
+}
+
+function conjunction(p) {
+    skipWhitespace(p);
+
+    const lhs = selection(p);
+    skipWhitespace(p);
+
+    if (p.i >= p.tokens.length) {
+        return lhs;
+    }
+
+    let joint = 'AND';
+    if (matches((type, val) => type === 'ATOM' && val === 'AND', p)) {
+        joint = 'AND';
+        p.i += 1;
+    } else if (matches((type, val) => type === 'ATOM' && val === 'OR', p)) {
+        joint = 'OR';
+        p.i += 1;
+    }
+    skipWhitespace(p);
+    let rhs = conjunction(p);
+
+    return {
+        type: 'CONJUNCTION',
+        joint,
+        lhs,
+        rhs,
+    };
+}
+
+function peekType(n, p) {
+    if (p.i + n < p.tokens.length) {
+        return p.tokens[p.i + n][0];
+    }
+    return null;
+}
+
+function selection(p) {
+    // column:value OR -column:value
+    if ((peekType(0, p) === 'ATOM' && peekType(1, p) === 'COLON') ||
+        (peekType(0, p) === 'NEGATE' && peekType(1, p) === 'ATOM' && peekType(2, p) === 'COLON')) {
+
+        let negate = false;
+        if (p.tokens[p.i][0] === 'NEGATE') {
+            negate = true;
+            p.i += 1;
+        }
+
+        const key = match((type, _) => type === 'ATOM', 'a column label', p);
+        expect((type, val) => type === 'COLON', 'a colon', p);
+
+        if (key === 'before' || key === 'after') {
+            const val = date(p);
+            return {
+                type: 'DATE_SELECTION',
+                key,
+                val,
+            };
+        } else {
+            const val = value(p);
+            return {
+                type: 'SELECTION',
+                negate,
+                key,
+                val,
+            };
+        }
+    } else {
+        return matchAll(p, config);
+    }
+}
+
+function matchAll(p, config) {
+    const [type, val] = p.tokens[p.i];
+
+    // Cast atoms into strings or regexes depending on the current config.
+    if (type === 'ATOM') {
+        p.i += 1;
+        if (config.preferRegex) {
+            const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
+            return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex };
+        } else {
+            return { type: 'MATCH_ALL', matchType: 'STRING', val }
+        }
+    }
+    if (type === 'STRING') {
+        p.i += 1;
+        return { type: 'MATCH_ALL', matchType: 'STRING', val };
+    }
+    if (type === 'REGEX') {
+        p.i += 1;
+        const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
+        return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex };
+    }
+    throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`;
+}
+
+function value(p, config) {
+    const [type, val] = p.tokens[p.i];
+
+    // Cast atoms into strings or regexes depending on the current config.
+    if (type === 'ATOM') {
+        p.i += 1;
+        if (config.preferRegex) {
+            const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
+            return { type: 'REGEX', val: regex };
+        } else {
+            return { type: 'STRING', val }
+        }
+    }
+    if (type === 'STRING') {
+        p.i += 1;
+        return { type, val };
+    }
+    if (type === 'REGEX') {
+        p.i += 1;
+        const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i");
+        return { type, val: regex };
+    }
+    throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`;
+}
+
+function date(p) {
+    const [type, val] = p.tokens[p.i];
+    p.i += 1;
+
+    return val;
+}