From 509e356bb8fcba2264368ca1e973e270ab614f98 Mon Sep 17 00:00:00 2001 From: William Carroll Date: Thu, 19 Jan 2023 10:09:13 -0800 Subject: feat(wpcarro/slx.js): Support JavaScript simple-select impl See README.md Change-Id: I6a50e34398c42aabe3cceba160be006f1867eca4 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7874 Reviewed-by: wpcarro Autosubmit: wpcarro Tested-by: BuildkiteCI --- users/wpcarro/scratch/simple-select/index.html | 9 - users/wpcarro/scratch/simple-select/index.js | 372 ------------------------- users/wpcarro/slx.js/README.md | 55 ++++ users/wpcarro/slx.js/default.nix | 11 + users/wpcarro/slx.js/index.js | 360 ++++++++++++++++++++++++ 5 files changed, 426 insertions(+), 381 deletions(-) delete mode 100644 users/wpcarro/scratch/simple-select/index.html delete mode 100644 users/wpcarro/scratch/simple-select/index.js create mode 100644 users/wpcarro/slx.js/README.md create mode 100644 users/wpcarro/slx.js/default.nix create mode 100644 users/wpcarro/slx.js/index.js (limited to 'users/wpcarro') diff --git a/users/wpcarro/scratch/simple-select/index.html b/users/wpcarro/scratch/simple-select/index.html deleted file mode 100644 index f7d4576f79a1..000000000000 --- a/users/wpcarro/scratch/simple-select/index.html +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/users/wpcarro/scratch/simple-select/index.js b/users/wpcarro/scratch/simple-select/index.js deleted file mode 100644 index 15a35ab74334..000000000000 --- a/users/wpcarro/scratch/simple-select/index.js +++ /dev/null @@ -1,372 +0,0 @@ -const state = { - // Match values case sensitively when filtering. - caseSensitive: false, - // Coerce values into regular expressions (instead of strings) when they're defined as atoms. - preferRegex: true, - // The key in the JS object that hosts the Date type against which we filter. - dateKey: 'Date', -}; - -// TODO(wpcarro): Support filtering by date (before, after). -// TODO(wpcarro): Support grouping with parentheses. - -function select(query, xs) { - const predicate = compile(parse(query)); - return xs.filter(predicate); -} - -function compile(ast) { - if (ast.type === 'CONJUNCTION') { - const lhs = compile(ast.lhs); - const rhs = compile(ast.rhs); - - if (ast.joint === 'AND') { - return function(x) { - return lhs(x) && rhs(x); - }; - } - if (ast.joint === 'OR') { - return function(x) { - return lhs(x) || rhs(x); - }; - } - } - if (ast.type === 'DATE_SELECTION') { - if (ast.key === 'before') { - return function(row) { - let t = new Date(); - if (ast.val === 'yesterday') { - t.setDate(t.getDate() - 1); - console.log(t); - } - // MM/DD/YYYY - else { - t = new Date(ast.val); - } - return row[state.dateKey] < t; - }; - } - if (ast.key === 'after') { - return function(row) { - let t = new Date(); - if (ast.val === 'yesterday') { - t.setDate(t.getDate() - 1); - console.log(t); - } - // MM/DD/YYYY - else { - t = new Date(ast.val); - } - return row[state.dateKey] > t; - }; - } - } - if (ast.type === 'SELECTION') { - const f = compile(ast.val); - return function(row) { - return ast.negate ? !f(row[ast.key]) : f(row[ast.key]); - }; - } - if (ast.type === 'MATCH_ALL') { - if (ast.matchType === 'STRING') { - return function(row) { - return Object.values(row).some(x => { - if (state.caseSensitive) { - return x === ast.val; - } else { - return x.toLowerCase() === ast.val.toLowerCase(); - } - }) - }; - } - if (ast.matchType === 'REGEX') { - return function(row) { - return Object.values(row).some(x => ast.val.test(x)); - }; - } - } - if (ast.type === 'STRING') { - return function(x) { - if (state.caseSensitive) { - return x === ast.val; - } else { - return x.toLowerCase() === ast.val.toLowerCase(); - } - }; - } - if (ast.type === 'REGEX') { - return function(x) { - return ast.val.test(x); - }; - } -} - -// A "selection" without a "$column:" prefix should fuzzy-search all columns. -// -// conjunction -> selection ( ( "AND" | "OR" )? selection )* ; -// selection -> "-"? COLUMN ":" ( regex | string ) | regex ; -// regex -> [_-a-zA-Z0-9] | "/" [ _-a-zA-Z0-9] "/" | string ; -// string -> "\"" [ _-a-zA-Z0-9] "\"" ; - -// Whatever characters are valid for a JS regex. -const ATOM_REGEX = /[-_.\[\]a-zA-Z0-9*+^$]/; - -function tokenize(x) { - const result = []; - let i = 0; - while (i < x.length) { - if (x[i] === ' ') { - i += 1; - while (i < x.length && x[i] === ' ') { - i += 1; - } - result.push(['WHITESPACE', null]); - continue; - } - if (x[i] === '-') { - result.push(['NEGATE', null]); - i += 1; - continue; - } - if (ATOM_REGEX.test(x[i])) { - let curr = x[i]; - i += 1; - while (i < x.length && ATOM_REGEX.test(x[i])) { - curr += x[i]; - i += 1; - } - result.push(['ATOM', curr]); - continue; - } - if (x[i] === ':') { - result.push(['COLON', null]); - i += 1; - continue; - } - if (x[i] === '(') { - result.push(['LPAREN', null]); - i += 1; - continue; - } - if (x[i] === ')') { - result.push(['RPAREN', null]); - i += 1; - continue; - } - if (x[i] === '/') { - let start = i; - let curr = ''; - i += 1; - while (i < x.length && x[i] !== '/') { - curr += x[i]; - i += 1; - } - // error - if (i >= x.length) { - throw `Tokenize Error: EOL while attempting to tokenize the regex beginning at column: ${start}`; - } - if (x[i] === '/') { - result.push(['REGEX', curr]); - i += 1; - } - continue; - } - if (x[i] === '"') { - let start = i; - let curr = ''; - i += 1; - while (i < x.length && x[i] !== '"') { - // continue on \" - if (x[i] === '\\' && x[i + 1] === '"') { - curr += '\"'; - i += 2; - } else { - curr += x[i]; - i += 1; - } - } - if (i >= x.length) { - throw `Tokenize Error: EOL while attempting to tokenize the string starting at column: ${start}`; - } - if (x[i] === '"') { - result.push(['STRING', curr]); - i += 1; - } - continue; - } - else { - i += 1; - } - } - return result; -} - -function expect(f, expectation, p) { - const [type, val] = p.tokens[p.i]; - if (f(type, val)) { - p.i += 1; - } else { - throw `Parse Error: expected ${expectation}, but got ${p.tokens[p.i]}; ${JSON.stringify(p)}` - } -} - -function matches(f, p) { - const [type, val] = p.tokens[p.i]; - if (f(type, val)) { - return true; - } - return false; -} - -function match(f, expectation, p) { - const [type, val] = p.tokens[p.i]; - if (f(type, val)) { - p.i += 1; - return val; - } - throw `Parse Error: expected ${expectation}, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`; -} - -function skipWhitespace(p) { - while (p.i < p.tokens.length && matches((type, _) => type === 'WHITESPACE', p)) { - p.i += 1; - } -} - -function parser(tokens) { - return { i: 0, tokens }; -} - -function parse(x) { - const tokens = tokenize(x); - const p = parser(tokens); - return conjunction(p); -} - -function conjunction(p) { - skipWhitespace(p); - - const lhs = selection(p); - skipWhitespace(p); - - if (p.i >= p.tokens.length) { - return lhs; - } - - let joint = 'AND'; - if (matches((type, val) => type === 'ATOM' && val === 'AND', p)) { - joint = 'AND'; - p.i += 1; - } else if (matches((type, val) => type === 'ATOM' && val === 'OR', p)) { - joint = 'OR'; - p.i += 1; - } - skipWhitespace(p); - let rhs = conjunction(p); - - return { - type: 'CONJUNCTION', - joint, - lhs, - rhs, - }; -} - -function peekType(n, p) { - if (p.i + n < p.tokens.length) { - return p.tokens[p.i + n][0]; - } - return null; -} - -function selection(p) { - // column:value OR -column:value - if ((peekType(0, p) === 'ATOM' && peekType(1, p) === 'COLON') || - (peekType(0, p) === 'NEGATE' && peekType(1, p) === 'ATOM' && peekType(2, p) === 'COLON')) { - - let negate = false; - if (p.tokens[p.i][0] === 'NEGATE') { - negate = true; - p.i += 1; - } - - const key = match((type, _) => type === 'ATOM', 'a column label', p); - expect((type, val) => type === 'COLON', 'a colon', p); - - if (key === 'before' || key === 'after') { - const val = date(p); - return { - type: 'DATE_SELECTION', - key, - val, - }; - } else { - const val = value(p); - return { - type: 'SELECTION', - negate, - key, - val, - }; - } - } else { - return matchAll(p); - } -} - -function matchAll(p) { - const [type, val] = p.tokens[p.i]; - - // Cast atoms into strings or regexes depending on the current state. - if (type === 'ATOM') { - p.i += 1; - if (state.preferRegex) { - const regex = state.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); - return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex }; - } else { - return { type: 'MATCH_ALL', matchType: 'STRING', val } - } - } - if (type === 'STRING') { - p.i += 1; - return { type: 'MATCH_ALL', matchType: 'STRING', val }; - } - if (type === 'REGEX') { - p.i += 1; - const regex = state.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); - return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex }; - } - throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`; -} - -function value(p) { - const [type, val] = p.tokens[p.i]; - - // Cast atoms into strings or regexes depending on the current state. - if (type === 'ATOM') { - p.i += 1; - if (state.preferRegex) { - const regex = state.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); - return { type: 'REGEX', val: regex }; - } else { - return { type: 'STRING', val } - } - } - if (type === 'STRING') { - p.i += 1; - return { type, val }; - } - if (type === 'REGEX') { - p.i += 1; - const regex = state.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); - return { type, val: regex }; - } - throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`; -} - -function date(p) { - const [type, val] = p.tokens[p.i]; - p.i += 1; - - return val; -} diff --git a/users/wpcarro/slx.js/README.md b/users/wpcarro/slx.js/README.md new file mode 100644 index 000000000000..3fbebc470633 --- /dev/null +++ b/users/wpcarro/slx.js/README.md @@ -0,0 +1,55 @@ +# slx.js + +Filter tabular data in the browser using an ergonomic query language. + +## Status + +This project is usable today (I use it in my projects), but it's currently alpha +status. See the wish list for remaining features. + +## Installation + +`slx.js` is available via CDN: + +```shell + +``` + +## Usage + +`slx.js` hasn't been properly benchmarked, but in my personal projects, it works +fine with `O(1,000)s` of records. + +```javascript +const cast = [ + { first: "Graham", last: "Chapman" }, + { first: "John", last: "Cleese" }, + { first: "Terry", last: "Gilliam" }, + { first: "Eric", last: "Idle" }, + { first: "Terry", last: "Jones" }, + { first: "Michael", last: "Palin" }, +]; + +const config = { + // Match values case sensitively when filtering. + caseSensitive: false, + // Coerce values into regular expressions (instead of strings) when they're defined as atoms. + preferRegex: true, + // The key in the JS object that hosts the Date type against which we filter. + dateKey: 'Date', +}; + +console.log(select('last:^C.+$', cast, config)); +// [{ first: "Graham", last: "Chapman" }, { first: "John", last: "Cleese" }] +``` + +## Wish List + +- Support explicit grouping with parentheses (e.g. `title:once (director:Tarantino OR director:Coen)`). +- Proper benchmarking (see "Usage" section). +- Something something documentation. +- Something something testing. + +## See also: + +- [`slx`](https://github.com/wpcarro/slx) diff --git a/users/wpcarro/slx.js/default.nix b/users/wpcarro/slx.js/default.nix new file mode 100644 index 000000000000..bf903e77aadd --- /dev/null +++ b/users/wpcarro/slx.js/default.nix @@ -0,0 +1,11 @@ +{ pkgs, depot, ... }: + +(pkgs.writeText "source.txt" '' + ${depot.third_party.gitignoreSource ./.} +'').overrideAttrs (_: { + meta.ci.extraSteps.github = depot.tools.releases.filteredGitPush { + filter = ":/users/wpcarro/slx.js"; + remote = "git@github.com:wpcarro/slx.js.git"; + ref = "refs/heads/canon"; + }; +}) diff --git a/users/wpcarro/slx.js/index.js b/users/wpcarro/slx.js/index.js new file mode 100644 index 000000000000..7eba2dca91bf --- /dev/null +++ b/users/wpcarro/slx.js/index.js @@ -0,0 +1,360 @@ +function select(query, xs, config) { + const predicate = compile(parse(query), config); + return xs.filter(predicate); +} + +function compile(ast, config) { + if (ast.type === 'CONJUNCTION') { + const lhs = compile(ast.lhs); + const rhs = compile(ast.rhs); + + if (ast.joint === 'AND') { + return function(x) { + return lhs(x) && rhs(x); + }; + } + if (ast.joint === 'OR') { + return function(x) { + return lhs(x) || rhs(x); + }; + } + } + if (ast.type === 'DATE_SELECTION') { + if (ast.key === 'before') { + return function(row) { + let t = new Date(); + if (ast.val === 'yesterday') { + t.setDate(t.getDate() - 1); + console.log(t); + } + // MM/DD/YYYY + else { + t = new Date(ast.val); + } + return row[config.dateKey] < t; + }; + } + if (ast.key === 'after') { + return function(row) { + let t = new Date(); + if (ast.val === 'yesterday') { + t.setDate(t.getDate() - 1); + console.log(t); + } + // MM/DD/YYYY + else { + t = new Date(ast.val); + } + return row[config.dateKey] > t; + }; + } + } + if (ast.type === 'SELECTION') { + const f = compile(ast.val); + return function(row) { + return ast.negate ? !f(row[ast.key]) : f(row[ast.key]); + }; + } + if (ast.type === 'MATCH_ALL') { + if (ast.matchType === 'STRING') { + return function(row) { + return Object.values(row).some(x => { + if (config.caseSensitive) { + return x === ast.val; + } else { + return x.toLowerCase() === ast.val.toLowerCase(); + } + }) + }; + } + if (ast.matchType === 'REGEX') { + return function(row) { + return Object.values(row).some(x => ast.val.test(x)); + }; + } + } + if (ast.type === 'STRING') { + return function(x) { + if (config.caseSensitive) { + return x === ast.val; + } else { + return x.toLowerCase() === ast.val.toLowerCase(); + } + }; + } + if (ast.type === 'REGEX') { + return function(x) { + return ast.val.test(x); + }; + } +} + +// A "selection" without a "$column:" prefix should fuzzy-search all columns. +// +// conjunction -> selection ( ( "AND" | "OR" )? selection )* ; +// selection -> "-"? COLUMN ":" ( regex | string ) | regex ; +// regex -> [_-a-zA-Z0-9] | "/" [ _-a-zA-Z0-9] "/" | string ; +// string -> "\"" [ _-a-zA-Z0-9] "\"" ; + +// Whatever characters are valid for a JS regex. +const ATOM_REGEX = /[-_.\[\]a-zA-Z0-9*+^$]/; + +function tokenize(x) { + const result = []; + let i = 0; + while (i < x.length) { + if (x[i] === ' ') { + i += 1; + while (i < x.length && x[i] === ' ') { + i += 1; + } + result.push(['WHITESPACE', null]); + continue; + } + if (x[i] === '-') { + result.push(['NEGATE', null]); + i += 1; + continue; + } + if (ATOM_REGEX.test(x[i])) { + let curr = x[i]; + i += 1; + while (i < x.length && ATOM_REGEX.test(x[i])) { + curr += x[i]; + i += 1; + } + result.push(['ATOM', curr]); + continue; + } + if (x[i] === ':') { + result.push(['COLON', null]); + i += 1; + continue; + } + if (x[i] === '(') { + result.push(['LPAREN', null]); + i += 1; + continue; + } + if (x[i] === ')') { + result.push(['RPAREN', null]); + i += 1; + continue; + } + if (x[i] === '/') { + let start = i; + let curr = ''; + i += 1; + while (i < x.length && x[i] !== '/') { + curr += x[i]; + i += 1; + } + // error + if (i >= x.length) { + throw `Tokenize Error: EOL while attempting to tokenize the regex beginning at column: ${start}`; + } + if (x[i] === '/') { + result.push(['REGEX', curr]); + i += 1; + } + continue; + } + if (x[i] === '"') { + let start = i; + let curr = ''; + i += 1; + while (i < x.length && x[i] !== '"') { + // continue on \" + if (x[i] === '\\' && x[i + 1] === '"') { + curr += '\"'; + i += 2; + } else { + curr += x[i]; + i += 1; + } + } + if (i >= x.length) { + throw `Tokenize Error: EOL while attempting to tokenize the string starting at column: ${start}`; + } + if (x[i] === '"') { + result.push(['STRING', curr]); + i += 1; + } + continue; + } + else { + i += 1; + } + } + return result; +} + +function expect(f, expectation, p) { + const [type, val] = p.tokens[p.i]; + if (f(type, val)) { + p.i += 1; + } else { + throw `Parse Error: expected ${expectation}, but got ${p.tokens[p.i]}; ${JSON.stringify(p)}` + } +} + +function matches(f, p) { + const [type, val] = p.tokens[p.i]; + if (f(type, val)) { + return true; + } + return false; +} + +function match(f, expectation, p) { + const [type, val] = p.tokens[p.i]; + if (f(type, val)) { + p.i += 1; + return val; + } + throw `Parse Error: expected ${expectation}, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`; +} + +function skipWhitespace(p) { + while (p.i < p.tokens.length && matches((type, _) => type === 'WHITESPACE', p)) { + p.i += 1; + } +} + +function parser(tokens) { + return { i: 0, tokens }; +} + +function parse(x) { + const tokens = tokenize(x); + const p = parser(tokens); + return conjunction(p); +} + +function conjunction(p) { + skipWhitespace(p); + + const lhs = selection(p); + skipWhitespace(p); + + if (p.i >= p.tokens.length) { + return lhs; + } + + let joint = 'AND'; + if (matches((type, val) => type === 'ATOM' && val === 'AND', p)) { + joint = 'AND'; + p.i += 1; + } else if (matches((type, val) => type === 'ATOM' && val === 'OR', p)) { + joint = 'OR'; + p.i += 1; + } + skipWhitespace(p); + let rhs = conjunction(p); + + return { + type: 'CONJUNCTION', + joint, + lhs, + rhs, + }; +} + +function peekType(n, p) { + if (p.i + n < p.tokens.length) { + return p.tokens[p.i + n][0]; + } + return null; +} + +function selection(p) { + // column:value OR -column:value + if ((peekType(0, p) === 'ATOM' && peekType(1, p) === 'COLON') || + (peekType(0, p) === 'NEGATE' && peekType(1, p) === 'ATOM' && peekType(2, p) === 'COLON')) { + + let negate = false; + if (p.tokens[p.i][0] === 'NEGATE') { + negate = true; + p.i += 1; + } + + const key = match((type, _) => type === 'ATOM', 'a column label', p); + expect((type, val) => type === 'COLON', 'a colon', p); + + if (key === 'before' || key === 'after') { + const val = date(p); + return { + type: 'DATE_SELECTION', + key, + val, + }; + } else { + const val = value(p); + return { + type: 'SELECTION', + negate, + key, + val, + }; + } + } else { + return matchAll(p, config); + } +} + +function matchAll(p, config) { + const [type, val] = p.tokens[p.i]; + + // Cast atoms into strings or regexes depending on the current config. + if (type === 'ATOM') { + p.i += 1; + if (config.preferRegex) { + const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); + return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex }; + } else { + return { type: 'MATCH_ALL', matchType: 'STRING', val } + } + } + if (type === 'STRING') { + p.i += 1; + return { type: 'MATCH_ALL', matchType: 'STRING', val }; + } + if (type === 'REGEX') { + p.i += 1; + const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); + return { type: 'MATCH_ALL', matchType: 'REGEX', val: regex }; + } + throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`; +} + +function value(p, config) { + const [type, val] = p.tokens[p.i]; + + // Cast atoms into strings or regexes depending on the current config. + if (type === 'ATOM') { + p.i += 1; + if (config.preferRegex) { + const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); + return { type: 'REGEX', val: regex }; + } else { + return { type: 'STRING', val } + } + } + if (type === 'STRING') { + p.i += 1; + return { type, val }; + } + if (type === 'REGEX') { + p.i += 1; + const regex = config.caseSensitive ? new RegExp(val) : new RegExp(val, "i"); + return { type, val: regex }; + } + throw `Parse Error: Expected a regular expression or a string, but got: ${p.tokens[p.i]}; ${JSON.stringify(p)}`; +} + +function date(p) { + const [type, val] = p.tokens[p.i]; + p.i += 1; + + return val; +} -- cgit 1.4.1