about summary refs log tree commit diff
path: root/users/Profpatsch
diff options
context:
space:
mode:
authorProfpatsch <mail@profpatsch.de>2021-01-16T13·09+0100
committerProfpatsch <mail@profpatsch.de>2021-01-17T07·51+0000
commit9ea76fdf1ac08cd9b594ad37f6b963e78f818efc (patch)
tree51040e3772d4fec98a3472e4354e7e5b8ab8d666 /users/Profpatsch
parente4976c49dc54a6bce0271dc291a27b721259f27b (diff)
feat(users/Profpatsch): add a tree-sitter parser for nix r/2117
Uses the rust library to set up a simple nix parsing expression, which
reads a nix file and prints the sexp tree.

Change-Id: I32dc9c7b39aa0f7ffa2b99348d6c2269e5fe1a6a
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2402
Tested-by: BuildkiteCI
Reviewed-by: Profpatsch <mail@profpatsch.de>
Diffstat (limited to 'users/Profpatsch')
-rw-r--r--users/Profpatsch/rust-crates.nix53
-rw-r--r--users/Profpatsch/tree-sitter.nix109
2 files changed, 162 insertions, 0 deletions
diff --git a/users/Profpatsch/rust-crates.nix b/users/Profpatsch/rust-crates.nix
new file mode 100644
index 000000000000..ebe6d73ceba9
--- /dev/null
+++ b/users/Profpatsch/rust-crates.nix
@@ -0,0 +1,53 @@
+{ depot, pkgs, ... }:
+rec {
+  cfg-if = pkgs.buildRustCrate {
+    pname = "cfg-if";
+    crateName = "cfg-if";
+    version = "1.0.0";
+    sha256 = "1fzidq152hnxhg4lj6r2gv4jpnn8yivp27z6q6xy7w6v0dp6bai9";
+  };
+
+  cc = pkgs.buildRustCrate {
+    pname = "cc";
+    crateName = "cc";
+    version = "1.0.66";
+    sha256 = "12q71z6ck8wlqrwgi25x3lrryyks9djymswn9b1c6qq0i01jpc1p";
+  };
+
+  regex-syntax = pkgs.buildRustCrate {
+    pname = "regex-syntax";
+    crateName = "regex-syntax";
+    version = "0.6.22";
+    sha256 = "0r00n2dgyixacl1sczqp18gxf0xh7x272hcdp62412lypba2gqyg";
+  };
+
+  regex = pkgs.buildRustCrate {
+    pname = "regex";
+    crateName = "regex";
+    version = "1.4.3";
+    features = [ "std" ];
+    dependencies = [ regex-syntax ];
+    sha256 = "0w0b4bh0ng20lf5y8raaxmxj46ikjqpgwy1iggzpby9lhv9vydkp";
+  };
+
+  libloading = pkgs.buildRustCrate {
+    pname = "libloading";
+    crateName = "libloading";
+    version = "0.6.7";
+    dependencies = [ cfg-if ];
+    sha256 = "111d8zsizswnxiqn43vcgnc2ym9spsx1i6pcfp35ca3yw2ixq95j";
+  };
+
+  tree-sitter = pkgs.buildRustCrate {
+    pname = "tree_sitter";
+    crateName = "tree-sitter";
+    # buildRustCrate isn’t really smart enough to detect the subdir
+    libPath = "binding_rust/lib.rs";
+    # and the build.rs is also not where buildRustCrate would find it
+    build = "binding_rust/build.rs";
+    version = "0.17.1";
+    dependencies = [ regex ];
+    buildDependencies = [ cc ];
+    sha256 = "0jwwbvs4icpra7m1ycvnyri5h3sbw4qrfvgnnvnk72h4w93qhzhr";
+  };
+}
diff --git a/users/Profpatsch/tree-sitter.nix b/users/Profpatsch/tree-sitter.nix
new file mode 100644
index 000000000000..dd1dc05a6401
--- /dev/null
+++ b/users/Profpatsch/tree-sitter.nix
@@ -0,0 +1,109 @@
+{ depot, pkgs, lib, ... }:
+
+let
+  print-ast = depot.users.Profpatsch.writers.rustSimple {
+    name = "print-ast";
+    dependencies = with depot.users.Profpatsch.rust-crates; [
+      libloading
+      tree-sitter
+    ];
+    buildInputs = [
+      pkgs.tree-sitter
+    ];
+  } ''
+    extern crate libloading;
+    extern crate tree_sitter;
+    use std::mem;
+    use std::io::{Read};
+    use libloading::{Library, Symbol};
+    use tree_sitter::{Language, Parser};
+
+    /// Load the shared lib FILE and return the language under SYMBOL-NAME.
+    /// Inspired by the rust source of emacs-tree-sitter.
+    fn _load_language(file: String, symbol_name: String) -> Result<Language, libloading::Error> {
+        let lib = Library::new(file)?;
+        let tree_sitter_lang: Symbol<'_, unsafe extern "C" fn() -> _> =
+            unsafe { lib.get(symbol_name.as_bytes())? };
+        let language: Language = unsafe { tree_sitter_lang() };
+        // Avoid segmentation fault by not unloading the lib, as language is a static piece of data.
+        // TODO: Attach an Rc<Library> to Language instead.
+        mem::forget(lib);
+        Ok(language)
+    }
+
+    fn main() {
+      let mut args = std::env::args();
+      let so = args.nth(1).unwrap();
+      let symbol_name = args.nth(0).unwrap();
+      let file = args.nth(0).unwrap();
+      let mut parser = Parser::new();
+      let lang = _load_language(so, symbol_name).unwrap();
+      parser.set_language(lang).unwrap();
+      let mut bytes = Vec::new();
+      let mut file = std::fs::OpenOptions::new().read(true).open(file).unwrap();
+      file.read_to_end(&mut bytes);
+      print!("{}", parser.parse(&bytes, None).unwrap().root_node().to_sexp());
+    }
+
+
+  '';
+
+  tree-sitter-nix = buildTreeSitterGrammar {
+    language = "tree-sitter-nix";
+    source = pkgs.fetchFromGitHub {
+      owner = "cstrahan";
+      repo = "tree-sitter-nix";
+      rev = "791b5ff0e4f0da358cbb941788b78d436a2ca621";
+      sha256 = "1y5b3wh3fcmbgq8r2i97likzfp1zp02m58zacw5a1cjqs5raqz66";
+    };
+  };
+
+  parse-nix-file = depot.nix.writeExecline "parse-nix-file" { readNArgs = 1; } [
+    print-ast "${tree-sitter-nix}/parser" "tree_sitter_nix" "$1"
+  ];
+
+  # copied from nixpkgs
+  buildTreeSitterGrammar =
+      {
+        # language name
+        language
+        # source for the language grammar
+      , source
+      }:
+
+      pkgs.stdenv.mkDerivation {
+
+        pname = "${language}-grammar";
+        inherit (pkgs.tree-sitter) version;
+
+        src = source;
+
+        buildInputs = [ pkgs.tree-sitter ];
+
+        dontUnpack = true;
+        configurePhase= ":";
+        buildPhase = ''
+          runHook preBuild
+          scanner_cc="$src/src/scanner.cc"
+          if [ ! -f "$scanner_cc" ]; then
+            scanner_cc=""
+          fi
+          $CXX -I$src/src/ -c $scanner_cc
+          $CC -I$src/src/ -shared -o parser -Os  scanner.o $src/src/parser.c -lstdc++
+          runHook postBuild
+        '';
+        installPhase = ''
+          runHook preInstall
+          mkdir $out
+          mv parser $out/
+          runHook postInstall
+        '';
+      };
+
+in {
+  inherit
+    print-ast
+    tree-sitter-nix
+    parse-nix-file
+    ;
+}