about summary refs log tree commit diff
path: root/src/libexpr/lexer.l
diff options
context:
space:
mode:
Diffstat (limited to 'src/libexpr/lexer.l')
-rw-r--r--src/libexpr/lexer.l24
1 files changed, 14 insertions, 10 deletions
diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l
index 828356bbf447..29ca327c1e4e 100644
--- a/src/libexpr/lexer.l
+++ b/src/libexpr/lexer.l
@@ -49,9 +49,10 @@ static void adjustLoc(YYLTYPE * loc, const char * s, size_t len)
 }
 
 
-static Expr * unescapeStr(SymbolTable & symbols, const char * s)
+static Expr * unescapeStr(SymbolTable & symbols, const char * s, size_t length)
 {
     string t;
+    t.reserve(length);
     char c;
     while ((c = *s++)) {
         if (c == '\\') {
@@ -84,13 +85,14 @@ static Expr * unescapeStr(SymbolTable & symbols, const char * s)
 %}
 
 
+ANY         .|\n
 ID          [a-zA-Z\_][a-zA-Z0-9\_\'\-]*
 INT         [0-9]+
 FLOAT       (([1-9][0-9]*\.[0-9]*)|(0?\.[0-9]+))([Ee][+-]?[0-9]+)?
 PATH        [a-zA-Z0-9\.\_\-\+]*(\/[a-zA-Z0-9\.\_\-\+]+)+\/?
 HPATH       \~(\/[a-zA-Z0-9\.\_\-\+]+)+\/?
 SPATH       \<[a-zA-Z0-9\.\_\-\+]+(\/[a-zA-Z0-9\.\_\-\+]+)*\>
-URI         [a-zA-Z][a-zA-Z0-9\+\-\.]*\:\/\/[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+|channel\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
+URI         [a-zA-Z][a-zA-Z0-9\+\-\.]*\:[a-zA-Z0-9\%\/\?\:\@\&\=\+\$\,\-\_\.\!\~\*\']+
 
 
 %%
@@ -145,12 +147,12 @@ or          { return OR_KW; }
 <INITIAL,INSIDE_DOLLAR_CURLY>\" {
                 PUSH_STATE(STRING); return '"';
               }
-<STRING>([^\$\"\\]|\$[^\{\"\\]|\\.|\$\\.)*\$/\" |
-<STRING>([^\$\"\\]|\$[^\{\"\\]|\\.|\$\\.)+ {
+<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})*\$/\" |
+<STRING>([^\$\"\\]|\$[^\{\"\\]|\\{ANY}|\$\\{ANY})+ {
                 /* It is impossible to match strings ending with '$' with one
                    regex because trailing contexts are only valid at the end
                    of a rule. (A sane but undocumented limitation.) */
-                yylval->e = unescapeStr(data->symbols, yytext);
+                yylval->e = unescapeStr(data->symbols, yytext, yyleng);
                 return STR;
               }
 <STRING>\$\{  { PUSH_STATE(INSIDE_DOLLAR_CURLY); return DOLLAR_CURLY; }
@@ -177,8 +179,8 @@ or          { return OR_KW; }
                    yylval->e = new ExprIndStr("''");
                    return IND_STR;
                  }
-<IND_STRING>\'\'\\. {
-                   yylval->e = unescapeStr(data->symbols, yytext + 2);
+<IND_STRING>\'\'\\{ANY} {
+                   yylval->e = unescapeStr(data->symbols, yytext + 2, yyleng - 2);
                    return IND_STR;
                  }
 <IND_STRING>\$\{ { PUSH_STATE(INSIDE_DOLLAR_CURLY); return DOLLAR_CURLY; }
@@ -207,11 +209,13 @@ or          { return OR_KW; }
 \#[^\r\n]*    /* single-line comments */
 \/\*([^*]|\*+[^*/])*\*+\/  /* long comments */
 
-.           return yytext[0];
+{ANY}       {
+              /* Don't return a negative number, as this will cause
+                 Bison to stop parsing without an error. */
+              return (unsigned char) yytext[0];
+            }
 
 }
 
-<<EOF>> { data->atEnd = true; return 0; }
-
 %%