From b2ba62170cc8359d2f8bbbd9dbacf331b98151fe Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Sat, 23 Oct 2010 21:11:59 +0000 Subject: * Optimise string constants by putting them in the symbol table. --- src/libexpr/eval.cc | 10 ++++++- src/libexpr/lexer.l | 8 +++--- src/libexpr/nixexpr.hh | 4 +-- src/libexpr/parser.y | 72 ++++++++++++++++++++++++++++---------------------- 4 files changed, 55 insertions(+), 39 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 72a3bf9b9c52..8c33fd22486c 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -249,6 +249,14 @@ void mkString(Value & v, const string & s, const PathSet & context) } +void mkString(Value & v, const Symbol & s) +{ + v.type = tString; + v.string.s = ((string) s).c_str(); + v.string.context = 0; +} + + void mkPath(Value & v, const char * s) { clearValue(v); @@ -429,7 +437,7 @@ void ExprInt::eval(EvalState & state, Env & env, Value & v) void ExprString::eval(EvalState & state, Env & env, Value & v) { - mkString(v, s.c_str()); + mkString(v, s); } diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l index f29f9b684332..5b27e2582d2b 100644 --- a/src/libexpr/lexer.l +++ b/src/libexpr/lexer.l @@ -46,7 +46,7 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len) } -static Expr * unescapeStr(const char * s) +static Expr * unescapeStr(SymbolTable & symbols, const char * s) { string t; char c; @@ -66,7 +66,7 @@ static Expr * unescapeStr(const char * s) } else t += c; } - return new ExprString(t); + return new ExprString(symbols.create(t)); } @@ -119,7 +119,7 @@ inherit { return INHERIT; } "$\"" will be consumed as part of a string, rather than a "$" followed by the string terminator. Disallow "$\"" for now. */ - yylval->e = unescapeStr(yytext); + yylval->e = unescapeStr(data->symbols, yytext); return STR; } \$\{ { BEGIN(INITIAL); return DOLLAR_CURLY; } @@ -140,7 +140,7 @@ inherit { return INHERIT; } return IND_STR; } \'\'\\. { - yylval->e = unescapeStr(yytext + 2); + yylval->e = unescapeStr(data->symbols, yytext + 2); return IND_STR; } \$\{ { BEGIN(INITIAL); return DOLLAR_CURLY; } diff --git a/src/libexpr/nixexpr.hh b/src/libexpr/nixexpr.hh index 5c0071dca2ce..1aa3df3689a6 100644 --- a/src/libexpr/nixexpr.hh +++ b/src/libexpr/nixexpr.hh @@ -65,8 +65,8 @@ struct ExprInt : Expr struct ExprString : Expr { - string s; - ExprString(const string & s) : s(s) { }; + Symbol s; + ExprString(const Symbol & s) : s(s) { }; COMMON_METHODS }; diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y index b0c54339b051..3a72a4ade257 100644 --- a/src/libexpr/parser.y +++ b/src/libexpr/parser.y @@ -7,19 +7,44 @@ %parse-param { yyscan_t scanner } %parse-param { ParseData * data } %lex-param { yyscan_t scanner } +%lex-param { ParseData * data } - -%{ -/* Newer versions of Bison copy the declarations below to - parser-tab.hh, which sucks bigtime since lexer.l doesn't want that - stuff. So allow it to be excluded. */ -#ifndef BISON_HEADER_HACK -#define BISON_HEADER_HACK - +%code requires { + +#ifndef BISON_HEADER +#define BISON_HEADER + #include "util.hh" #include "nixexpr.hh" +namespace nix { + + struct ParseData + { + SymbolTable & symbols; + Expr * result; + Path basePath; + Path path; + string error; + Symbol sLetBody; + ParseData(SymbolTable & symbols) + : symbols(symbols) + , sLetBody(symbols.create("")) + { }; + }; + +} + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param, YYLTYPE * yylloc_param, yyscan_t yyscanner, nix::ParseData * data) + +#endif + +} + +%{ + #include "parser-tab.hh" #include "lexer-tab.hh" #define YYSTYPE YYSTYPE // workaround a bug in Bison 2.4 @@ -28,27 +53,13 @@ #include #include +YY_DECL; using namespace nix; namespace nix { - -struct ParseData -{ - SymbolTable & symbols; - Expr * result; - Path basePath; - Path path; - string error; - Symbol sLetBody; - ParseData(SymbolTable & symbols) - : symbols(symbols) - , sLetBody(symbols.create("")) - { }; -}; - static string showAttrPath(const vector & attrPath) { @@ -113,9 +124,9 @@ static void addFormal(const Pos & pos, Formals * formals, const Formal & formal) } -static Expr * stripIndentation(vector & es) +static Expr * stripIndentation(SymbolTable & symbols, vector & es) { - if (es.empty()) return new ExprString(""); + if (es.empty()) return new ExprString(symbols.create("")); /* Figure out the minimum indentation. Note that by design whitespace-only final lines are not taken into account. (So @@ -195,7 +206,7 @@ static Expr * stripIndentation(vector & es) s2 = string(s2, 0, p + 1); } - es2->push_back(new ExprString(s2)); + es2->push_back(new ExprString(symbols.create(s2))); } return new ExprConcatStrings(es2); @@ -224,9 +235,6 @@ void yyerror(YYLTYPE * loc, yyscan_t scanner, ParseData * data, const char * err } -#endif - - %} %union { @@ -337,15 +345,15 @@ expr_simple | INT { $$ = new ExprInt($1); } | '"' string_parts '"' { /* For efficiency, and to simplify parse trees a bit. */ - if ($2->empty()) $$ = new ExprString(""); + if ($2->empty()) $$ = new ExprString(data->symbols.create("")); else if ($2->size() == 1) $$ = $2->front(); else $$ = new ExprConcatStrings($2); } | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE { - $$ = stripIndentation(*$2); + $$ = stripIndentation(data->symbols, *$2); } | PATH { $$ = new ExprPath(absPath($1, data->basePath)); } - | URI { $$ = new ExprString($1); } + | URI { $$ = new ExprString(data->symbols.create($1)); } | '(' expr ')' { $$ = $2; } /* Let expressions `let {..., body = ...}' are just desugared into `(rec {..., body = ...}).body'. */ -- cgit 1.4.1