From 9ea76fdf1ac08cd9b594ad37f6b963e78f818efc Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Sat, 16 Jan 2021 14:09:26 +0100 Subject: feat(users/Profpatsch): add a tree-sitter parser for nix Uses the rust library to set up a simple nix parsing expression, which reads a nix file and prints the sexp tree. Change-Id: I32dc9c7b39aa0f7ffa2b99348d6c2269e5fe1a6a Reviewed-on: https://cl.tvl.fyi/c/depot/+/2402 Tested-by: BuildkiteCI Reviewed-by: Profpatsch --- users/Profpatsch/rust-crates.nix | 53 +++++++++++++++++++ users/Profpatsch/tree-sitter.nix | 109 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 users/Profpatsch/rust-crates.nix create mode 100644 users/Profpatsch/tree-sitter.nix diff --git a/users/Profpatsch/rust-crates.nix b/users/Profpatsch/rust-crates.nix new file mode 100644 index 000000000000..ebe6d73ceba9 --- /dev/null +++ b/users/Profpatsch/rust-crates.nix @@ -0,0 +1,53 @@ +{ depot, pkgs, ... }: +rec { + cfg-if = pkgs.buildRustCrate { + pname = "cfg-if"; + crateName = "cfg-if"; + version = "1.0.0"; + sha256 = "1fzidq152hnxhg4lj6r2gv4jpnn8yivp27z6q6xy7w6v0dp6bai9"; + }; + + cc = pkgs.buildRustCrate { + pname = "cc"; + crateName = "cc"; + version = "1.0.66"; + sha256 = "12q71z6ck8wlqrwgi25x3lrryyks9djymswn9b1c6qq0i01jpc1p"; + }; + + regex-syntax = pkgs.buildRustCrate { + pname = "regex-syntax"; + crateName = "regex-syntax"; + version = "0.6.22"; + sha256 = "0r00n2dgyixacl1sczqp18gxf0xh7x272hcdp62412lypba2gqyg"; + }; + + regex = pkgs.buildRustCrate { + pname = "regex"; + crateName = "regex"; + version = "1.4.3"; + features = [ "std" ]; + dependencies = [ regex-syntax ]; + sha256 = "0w0b4bh0ng20lf5y8raaxmxj46ikjqpgwy1iggzpby9lhv9vydkp"; + }; + + libloading = pkgs.buildRustCrate { + pname = "libloading"; + crateName = "libloading"; + version = "0.6.7"; + dependencies = [ cfg-if ]; + sha256 = "111d8zsizswnxiqn43vcgnc2ym9spsx1i6pcfp35ca3yw2ixq95j"; + }; + + tree-sitter = pkgs.buildRustCrate { + pname = "tree_sitter"; + crateName = "tree-sitter"; + # buildRustCrate isn’t really smart enough to detect the subdir + libPath = "binding_rust/lib.rs"; + # and the build.rs is also not where buildRustCrate would find it + build = "binding_rust/build.rs"; + version = "0.17.1"; + dependencies = [ regex ]; + buildDependencies = [ cc ]; + sha256 = "0jwwbvs4icpra7m1ycvnyri5h3sbw4qrfvgnnvnk72h4w93qhzhr"; + }; +} diff --git a/users/Profpatsch/tree-sitter.nix b/users/Profpatsch/tree-sitter.nix new file mode 100644 index 000000000000..dd1dc05a6401 --- /dev/null +++ b/users/Profpatsch/tree-sitter.nix @@ -0,0 +1,109 @@ +{ depot, pkgs, lib, ... }: + +let + print-ast = depot.users.Profpatsch.writers.rustSimple { + name = "print-ast"; + dependencies = with depot.users.Profpatsch.rust-crates; [ + libloading + tree-sitter + ]; + buildInputs = [ + pkgs.tree-sitter + ]; + } '' + extern crate libloading; + extern crate tree_sitter; + use std::mem; + use std::io::{Read}; + use libloading::{Library, Symbol}; + use tree_sitter::{Language, Parser}; + + /// Load the shared lib FILE and return the language under SYMBOL-NAME. + /// Inspired by the rust source of emacs-tree-sitter. + fn _load_language(file: String, symbol_name: String) -> Result { + let lib = Library::new(file)?; + let tree_sitter_lang: Symbol<'_, unsafe extern "C" fn() -> _> = + unsafe { lib.get(symbol_name.as_bytes())? }; + let language: Language = unsafe { tree_sitter_lang() }; + // Avoid segmentation fault by not unloading the lib, as language is a static piece of data. + // TODO: Attach an Rc to Language instead. + mem::forget(lib); + Ok(language) + } + + fn main() { + let mut args = std::env::args(); + let so = args.nth(1).unwrap(); + let symbol_name = args.nth(0).unwrap(); + let file = args.nth(0).unwrap(); + let mut parser = Parser::new(); + let lang = _load_language(so, symbol_name).unwrap(); + parser.set_language(lang).unwrap(); + let mut bytes = Vec::new(); + let mut file = std::fs::OpenOptions::new().read(true).open(file).unwrap(); + file.read_to_end(&mut bytes); + print!("{}", parser.parse(&bytes, None).unwrap().root_node().to_sexp()); + } + + + ''; + + tree-sitter-nix = buildTreeSitterGrammar { + language = "tree-sitter-nix"; + source = pkgs.fetchFromGitHub { + owner = "cstrahan"; + repo = "tree-sitter-nix"; + rev = "791b5ff0e4f0da358cbb941788b78d436a2ca621"; + sha256 = "1y5b3wh3fcmbgq8r2i97likzfp1zp02m58zacw5a1cjqs5raqz66"; + }; + }; + + parse-nix-file = depot.nix.writeExecline "parse-nix-file" { readNArgs = 1; } [ + print-ast "${tree-sitter-nix}/parser" "tree_sitter_nix" "$1" + ]; + + # copied from nixpkgs + buildTreeSitterGrammar = + { + # language name + language + # source for the language grammar + , source + }: + + pkgs.stdenv.mkDerivation { + + pname = "${language}-grammar"; + inherit (pkgs.tree-sitter) version; + + src = source; + + buildInputs = [ pkgs.tree-sitter ]; + + dontUnpack = true; + configurePhase= ":"; + buildPhase = '' + runHook preBuild + scanner_cc="$src/src/scanner.cc" + if [ ! -f "$scanner_cc" ]; then + scanner_cc="" + fi + $CXX -I$src/src/ -c $scanner_cc + $CC -I$src/src/ -shared -o parser -Os scanner.o $src/src/parser.c -lstdc++ + runHook postBuild + ''; + installPhase = '' + runHook preInstall + mkdir $out + mv parser $out/ + runHook postInstall + ''; + }; + +in { + inherit + print-ast + tree-sitter-nix + parse-nix-file + ; +} -- cgit 1.4.1