about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2004-01-30T15·21+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2004-01-30T15·21+0000
commitc5baaafae69394082817ede9e6eb3910c4601a72 (patch)
treebfc2599717c4274e16ba02959617254873c0c007
parentabd1878b26200ba3fa75592637aa87e04f52100d (diff)
* Replaced the SDF parser by a substantially faster Bison/Flex
  parser (roughly 80x faster).

  The absolutely latest version of Bison (1.875c) is required for
  reentrant GLR support, as well as a recent version of Flex (say,
  2.5.31).  Note that most Unix distributions ship with the
  prehistoric Flex 2.5.4, which doesn't support reentrancy.

-rw-r--r--src/libexpr/Makefile.am18
-rw-r--r--src/libexpr/eval.cc2
-rw-r--r--src/libexpr/lexer.l78
-rw-r--r--src/libexpr/nix.sdf131
-rw-r--r--src/libexpr/parser.cc154
-rw-r--r--src/libexpr/parser.y128
6 files changed, 260 insertions, 251 deletions
diff --git a/src/libexpr/Makefile.am b/src/libexpr/Makefile.am
index a11dbbda6c26..66a3008ed507 100644
--- a/src/libexpr/Makefile.am
+++ b/src/libexpr/Makefile.am
@@ -1,20 +1,22 @@
 noinst_LIBRARIES = libexpr.a
 
 libexpr_a_SOURCES = nixexpr.cc nixexpr.hh parser.cc parser.hh \
- eval.cc eval.hh primops.cc primops.hh nix.sdf
+ eval.cc eval.hh primops.cc primops.hh \
+ lexer-tab.c lexer-tab.h parser-tab.c parser-tab.h
 
 AM_CXXFLAGS = \
  -I.. -I../../externals/inst/include -I../libutil -I../libstore
+AM_CFLAGS = \
+ -I../../externals/inst/include
 
 
-# Parse table generation.
+# Parser generation.
 
-parser.o: parse-table.h
+parser-tab.c parser-tab.h: parser.y
+	../grammartest/inst/bin/bison -v -o parser-tab.c parser.y -d
 
-parse-table.h: nix.tbl
-	../bin2c/bin2c nixParseTable < $< > $@ || (rm $@ && exit 1)
+lexer-tab.c lexer-tab.h: lexer.l
+	flex --outfile lexer-tab.c --header-file=lexer-tab.h lexer.l 
 
-%.tbl: %.sdf
-	../../externals/inst/bin/sdf2table -s -i $< -o $@
 
-CLEANFILES = parse-table.h nix.tbl
+CLEANFILES =
diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc
index 0470deee9c0c..77cab55d034c 100644
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@@ -137,6 +137,8 @@ Expr evalExpr2(EvalState & state, Expr e)
     /* Any encountered variables must be undeclared or primops. */
     if (atMatch(m, e) >> "Var" >> s1) {
         if (s1 == "null") return primNull(state);
+        if (s1 == "true") return ATmake("Bool(True)");
+        if (s1 == "false") return ATmake("Bool(False)");
         return e;
     }
 
diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l
new file mode 100644
index 000000000000..705b31b41a77
--- /dev/null
+++ b/src/libexpr/lexer.l
@@ -0,0 +1,78 @@
+%option reentrant bison-bridge bison-locations
+%option noyywrap
+%option never-interactive
+
+
+%{
+#include <string.h>
+#include <aterm2.h>
+#include "parser-tab.h"
+
+static void initLoc(YYLTYPE * loc)
+{
+    loc->first_line = 1;
+    loc->first_column = 1;
+}
+
+static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
+{
+    while (len--) {
+       switch (*s++) {
+       case '\n': 
+           ++loc->first_line;
+           loc->first_column = 1;
+           break;
+       default:
+           ++loc->first_column;
+       }
+    }
+}
+
+#define YY_USER_INIT initLoc(yylloc)
+#define YY_USER_ACTION adjustLoc(yylloc, yytext, yyleng);
+
+%}
+
+
+ID          [a-zA-Z\_][a-zA-Z0-9\_\']*
+INT         [0-9]+
+STR         \"[^\n\"]*\"
+PATH        [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+
+URI         [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']*
+
+
+%%
+
+
+if          { return IF; }
+then        { return THEN; }
+else        { return ELSE; }
+assert      { return ASSERT; }
+let         { return LET; }
+rec         { return REC; }
+
+\=\=        { return EQ; }
+\!\=        { return NEQ; }
+\&\&        { return AND; }
+\|\|        { return OR; }
+\-\>        { return IMPL; }
+
+{ID}        { yylval->t = ATmake("<str>", yytext); return ID; /* !!! alloc */ }
+{INT}       { return INT; }
+{STR}       { int len = strlen(yytext);
+              yytext[len - 1] = 0;
+              yylval->t = ATmake("<str>", yytext + 1);
+              yytext[len - 1] = '\"';
+              return STR; /* !!! alloc */
+            }
+{PATH}      { yylval->t = ATmake("<str>", yytext); return PATH; /* !!! alloc */ }
+{URI}       { yylval->t = ATmake("<str>", yytext); return URI; /* !!! alloc */ }
+
+[ \t\n]+    /* eat up whitespace */
+\#[^\n]*    /* single-line comments */
+\/\*(.|\n)*\*\/  /* long comments */
+
+.           return yytext[0];
+
+
+%%
diff --git a/src/libexpr/nix.sdf b/src/libexpr/nix.sdf
deleted file mode 100644
index 36fbef39cb28..000000000000
--- a/src/libexpr/nix.sdf
+++ /dev/null
@@ -1,131 +0,0 @@
-definition
-
-module Main
-imports Fix
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Top level syntax.
-
-module Fix
-imports Fix-Exprs Fix-Layout
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Expressions.
-
-module Fix-Exprs
-imports Fix-Lexicals
-exports
-  sorts Expr Formal Bind Binds ExprList
-  context-free syntax
-
-    Id -> Expr {cons("Var")}
-
-    Int -> Expr {cons("Int")}
-
-    Str -> Expr {cons("Str")}
-
-    Uri -> Expr {cons("Uri")}
-
-    Path -> Expr {cons("Path")}
-
-    "(" Expr ")" -> Expr {bracket}
-
-    Expr Expr -> Expr {cons("Call"), left}
-
-    "{" {Formal ","}* "}" ":" Expr -> Expr {cons("Function")}
-    Id -> Formal {cons("NoDefFormal")}
-    Id "?" Expr -> Formal {cons("DefFormal")}
-
-    "assert" Expr ";" Expr -> Expr {cons("Assert")}
-
-    "rec" "{" Binds "}" -> Expr {cons("Rec")}
-    "let" "{" Binds "}" -> Expr {cons("LetRec")}
-    "{" Binds "}" -> Expr {cons("Attrs")}
-
-    Bind* -> Binds
-    Id "=" Expr ";" -> Bind {cons("Bind")}
-
-    "[" ExprList "]" -> Expr {cons("List")}
-    "" -> ExprList {cons("ExprNil")}
-    Expr ExprList -> ExprList {cons("ExprCons")}
-
-    Expr "." Id -> Expr {cons("Select")}
-
-    "if" Expr "then" Expr "else" Expr -> Expr {cons("If")}
-
-    Expr "==" Expr -> Expr {cons("OpEq"), non-assoc}
-    Expr "!=" Expr -> Expr {cons("OpNEq"), non-assoc}
-
-    "!" Expr -> Expr {cons("OpNot")}
-    Expr "&&" Expr -> Expr {cons("OpAnd"), right}
-    Expr "||" Expr -> Expr {cons("OpOr"), right}
-    Expr "->" Expr -> Expr {cons("OpImpl"), right}
-
-  context-free priorities
-
-    Expr "." Id -> Expr
-  > Expr ExprList -> ExprList
-  > Expr Expr -> Expr
-  > "!" Expr -> Expr
-  > Expr "==" Expr -> Expr
-  > Expr "!=" Expr -> Expr
-  > Expr "&&" Expr -> Expr
-  > Expr "||" Expr -> Expr
-  > Expr "->" Expr -> Expr
-  > "assert" Expr ";" Expr -> Expr
-  > "{" {Formal ","}* "}" ":" Expr -> Expr
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Lexical syntax.
-
-module Fix-Lexicals
-exports
-  sorts Id Int Str Path PathComp Uri
-  lexical syntax
-    [a-zA-Z\_][a-zA-Z0-9\_\']* -> Id
-    "rec" -> Id {reject}
-    "let" -> Id {reject}
-    "if" -> Id {reject}
-    "then" -> Id {reject}
-    "else" -> Id {reject}
-    "assert" -> Id {reject}
-
-    [0-9]+ -> Int
-
-    "\"" ~[\n\"]* "\"" -> Str
-
-    "." ("/" PathComp)+ -> Path
-    ".." ("/" PathComp)+ -> Path
-    ("/" PathComp)+ -> Path
-    [a-zA-Z0-9\.\_\-\+]+ -> PathComp
-
-    [a-zA-Z] [a-zA-Z0-9\+\-\.]* ":" [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']* -> Uri
-
-  lexical restrictions
-    Id -/- [a-zA-Z0-9\_\']
-    Int -/- [0-9]
-    Path -/- [a-zA-Z0-9\.\_\-\+\/]
-    Uri -/- [a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']
-
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%% Layout.
-
-module Fix-Layout
-exports
-  sorts HashComment Asterisk Comment
-  lexical syntax
-    [\ \t\n] -> LAYOUT
-    HashComment -> LAYOUT
-    Comment -> LAYOUT
-    "#" ~[\n]* -> HashComment
-    "/*" ( ~[\*] | Asterisk )* "*/" -> Comment
-    [\*] -> Asterisk
-  lexical restrictions
-    Asterisk -/- [\/]
-    HashComment -/- ~[\n]
-  context-free restrictions
-    LAYOUT? -/- [\ \t\n]
diff --git a/src/libexpr/parser.cc b/src/libexpr/parser.cc
index 1e0ef0c45be8..167c34bd83e4 100644
--- a/src/libexpr/parser.cc
+++ b/src/libexpr/parser.cc
@@ -5,133 +5,63 @@
 #include <fcntl.h>
 #include <unistd.h>
 
-extern "C" {
-#include <sglr.h>
-#include <asfix2.h>
-}
-
 #include "aterm.hh"
 #include "parser.hh"
-#include "parse-table.h"
 
 
-/* Cleanup cleans up an imploded parse tree into an actual abstract
-   syntax tree that we can evaluate.  It removes quotes around
-   strings, converts integer literals into actual integers, and
-   absolutises paths relative to the directory containing the input
-   file. */
-struct Cleanup : TermFun
+struct ParseData 
 {
+    Expr result;
     string basePath;
+    string location;
+    string error;
+};
 
-    virtual ATerm operator () (ATerm e)
-    {
-        checkInterrupt();
-        
-        ATMatcher m;
-        string s;
-
-        if (atMatch(m, e) >> "Str" >> s)
-            return ATmake("Str(<str>)",
-                string(s, 1, s.size() - 2).c_str());
-
-        if (atMatch(m, e) >> "Path" >> s)
-            return ATmake("Path(<str>)", absPath(s, basePath).c_str());
-
-        if (atMatch(m, e) >> "Int" >> s) {
-            istringstream s2(s);
-            int n;
-            s2 >> n;
-            return ATmake("Int(<int>)", n);
-        }
-
-        if (atMatch(m, e) >> "Var" >> "true")
-            return ATmake("Bool(True)");
-        
-        if (atMatch(m, e) >> "Var" >> "false")
-            return ATmake("Bool(False)");
-
-        if (atMatch(m, e) >> "ExprNil")
-            return (ATerm) ATempty;
+extern "C" {
 
-        ATerm e1;
-        ATermList e2;
-        if (atMatch(m, e) >> "ExprCons" >> e1 >> e2)
-            return (ATerm) ATinsert(e2, e1);
+#include "parser-tab.h"
+#include "lexer-tab.h"
+    
+    /* Callbacks for getting from C to C++.  Due to a (small) bug in the
+       GLR code of Bison we cannot currently compile the parser as C++
+       code. */
+   
+    void setParseResult(ParseData * data, ATerm t)
+    {
+        data->result = t;
+    }
 
-        return e;
+    ATerm absParsedPath(ParseData * data, ATerm t)
+    {
+        return string2ATerm(absPath(aterm2String(t), data->basePath).c_str());
     }
-};
+    
+    void parseError(ParseData * data, char * error, int line, int column)
+    {
+        data->error = (format("%1%, at line %2%, column %3%, of %4%")
+            % error % line % column % data->location).str();
+    }
+        
+    int yyparse(yyscan_t scanner, ParseData * data);
+}
 
 
 static Expr parse(const char * text, const string & location,
     const Path & basePath)
 {
-    /* Initialise the SDF libraries. */
-    static bool initialised = false;
-    static ATerm parseTable = 0;
-    static language lang = 0;
-
-    if (!initialised) {
-        PT_initMEPTApi();
-        PT_initAsFix2Api();
-        SGinitParser(ATfalse);
-
-        ATprotect(&parseTable);
-        parseTable = ATreadFromBinaryString(
-            (char *) nixParseTable, sizeof nixParseTable);
-        if (!parseTable)
-            throw Error(format("cannot construct parse table term"));
-
-        ATprotect(&lang);
-        lang = ATmake("Nix");
-        if (!SGopenLanguageFromTerm("nix-parse", lang, parseTable))
-            throw Error(format("cannot open language"));
-
-        SG_STARTSYMBOL_ON();
-        SG_OUTPUT_ON();
-        SG_ASFIX2ME_ON();
-        SG_AMBIGUITY_ERROR_ON();
-        SG_FILTER_OFF();
-
-        initialised = true;
-    }
-
-    /* Parse it. */
-    ATerm result = SGparseString(lang, "Expr", (char *) text);
-    if (!result)
-        throw SysError(format("parse failed in `%1%'") % location);
-    if (SGisParseError(result))
-        throw Error(format("parse error in `%1%': %2%")
-            % location % result);
-
-    /* Implode it. */
-    PT_ParseTree tree = PT_makeParseTreeFromTerm(result);
-    if (!tree)
-        throw Error(format("cannot create parse tree"));
+    yyscan_t scanner;
+    ParseData data;
+    data.basePath = basePath;
+    data.location = location;
+
+    yylex_init(&scanner);
+    yy_scan_string(text, scanner);
+    int res = yyparse(scanner, &data);
+    yylex_destroy(scanner);
     
-    ATerm imploded = PT_implodeParseTree(tree,
-        ATtrue,
-        ATtrue,
-        ATtrue,
-        ATtrue,
-        ATtrue,
-        ATtrue,
-        ATfalse,
-        ATtrue,
-        ATtrue,
-        ATtrue,
-        ATfalse);
-    if (!imploded)
-        throw Error(format("cannot implode parse tree"));
-
-    printMsg(lvlVomit, format("imploded parse tree of `%1%': %2%")
-        % location % imploded);
-
-    /* Finally, clean it up. */
-    Cleanup cleanup;
-    cleanup.basePath = basePath;
-    return bottomupRewrite(cleanup, imploded);
+    if (res) throw Error(data.error);
+
+    return data.result;
 }
 
 
@@ -171,7 +101,7 @@ Expr parseExprFromFile(Path path)
     readFull(fd, (unsigned char *) text, st.st_size);
     text[st.st_size] = 0;
 
-    return parse(text, path, dirOf(path));
+    return parse(text, "`" + path + "'", dirOf(path));
 }
 
 
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
new file mode 100644
index 000000000000..45e6a98e843a
--- /dev/null
+++ b/src/libexpr/parser.y
@@ -0,0 +1,128 @@
+%glr-parser
+%pure-parser
+%locations
+%error-verbose
+%parse-param { yyscan_t scanner }
+%parse-param { void * data }
+%lex-param { yyscan_t scanner }
+
+%{
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <aterm2.h>
+
+#include "parser-tab.h"
+#include "lexer-tab.h"
+
+void setParseResult(void * data, ATerm t);
+void parseError(void * data, char * error, int line, int column);
+ATerm absParsedPath(void * data, ATerm t);
+
+void yyerror(YYLTYPE * loc, yyscan_t scanner, void * data, char * s)
+{
+    parseError(data, s, loc->first_line, loc->first_column);
+}
+ 
+%}
+
+%union {
+  ATerm t;
+  ATermList ts;
+}
+
+%type <t> start expr expr_function expr_assert expr_op
+%type <t> expr_app expr_select expr_simple bind formal
+%type <ts> binds expr_list formals
+%token <t> ID INT STR PATH URI
+%token IF THEN ELSE ASSERT LET REC EQ NEQ AND OR IMPL
+
+%nonassoc IMPL
+%left OR
+%left AND
+%nonassoc EQ NEQ
+%left NEG
+
+%%
+
+start: expr { setParseResult(data, $1); };
+
+expr: expr_function;
+
+expr_function
+  : '{' formals '}' ':' expr_function
+    { $$ = ATmake("Function(<term>, <term>)", $2, $5); }
+  | expr_assert
+  ;
+
+expr_assert
+  : ASSERT expr ';' expr_assert
+    { $$ = ATmake("Assert(<term>, <term>)", $2, $4); }
+  | expr_op
+  ;
+
+expr_op
+  : '!' expr_op %prec NEG { $$ = ATmake("OpNot(<term>)", $2); }
+  | expr_op EQ expr_op { $$ = ATmake("OpEq(<term>, <term>)", $1, $3); }
+  | expr_op NEQ expr_op { $$ = ATmake("OpNEq(<term>, <term>)", $1, $3); }
+  | expr_op AND expr_op { $$ = ATmake("OpAnd(<term>, <term>)", $1, $3); }
+  | expr_op OR expr_op { $$ = ATmake("OpOr(<term>, <term>)", $1, $3); }
+  | expr_op IMPL expr_op { $$ = ATmake("OpImpl(<term>, <term>)", $1, $3); }
+  | expr_app
+  ;
+
+expr_app
+  : expr_app expr_select
+    { $$ = ATmake("Call(<term>, <term>)", $1, $2); }
+  | expr_select { $$ = $1; }
+  ;
+
+expr_select
+  : expr_select '.' ID
+    { $$ = ATmake("Select(<term>, <term>)", $1, $3); }
+  | expr_simple { $$ = $1; }
+  ;
+
+expr_simple
+  : ID { $$ = ATmake("Var(<term>)", $1); }
+  | STR { $$ = ATmake("Str(<term>)", $1); }
+  | PATH { $$ = ATmake("Path(<term>)", absParsedPath(data, $1)); }
+  | URI { $$ = ATmake("Uri(<term>)", $1); }
+  | '(' expr ')' { $$ = $2; }
+  | LET '{' binds '}' { $$ = ATmake("LetRec(<term>)", $3); }
+  | REC '{' binds '}' { $$ = ATmake("Rec(<term>)", $3); }
+  | '{' binds '}' { $$ = ATmake("Attrs(<term>)", $2); }
+  | '[' expr_list ']' { $$ = ATmake("List(<term>)", $2); }
+  | IF expr THEN expr ELSE expr
+    { $$ = ATmake("If(<term>, <term>, <term>)", $2, $4, $6); }
+  ;
+
+binds
+  : binds bind { $$ = ATinsert($1, $2); }
+  | { $$ = ATempty; }
+  ;
+
+bind
+  : ID '=' expr ';'
+    { $$ = ATmake("Bind(<term>, <term>)", $1, $3); }
+  ;
+
+expr_list
+  : expr_select expr_list { $$ = ATinsert($2, $1); }
+    /* yes, this is right-recursive, but it doesn't matter since
+       otherwise we would need ATreverse which requires unbounded
+       stack space */
+  | { $$ = ATempty; }
+  ;
+
+formals
+  : formal ',' formals { $$ = ATinsert($3, $1); } /* idem - right recursive */
+  | formal { $$ = ATinsert(ATempty, $1); }
+  ;
+
+formal
+  : ID { $$ = ATmake("NoDefFormal(<term>)", $1); }
+  | ID '?' expr { $$ = ATmake("DefFormal(<term>, <term>)", $1, $3); }
+  ;
+  
+%%