about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEelco Dolstra <e.dolstra@tudelft.nl>2007-11-30T16·48+0000
committerEelco Dolstra <e.dolstra@tudelft.nl>2007-11-30T16·48+0000
commit6d6c68c0d29310b6eca35f58b1e68f495d6cd33a (patch)
treeeec6c3c138951f1f0cb3f9b4b1f3b3c177c31afb
parent633518628f48fb9c06bfd570eeca6f62696aba05 (diff)
* Added a new kind of multi-line string literal delimited by two
  single quotes.  Example (from NixOS):

    job = ''
      start on network-interfaces

      start script

        rm -f /var/run/opengl-driver
        ${if videoDriver == "nvidia"        
          then "ln -sf ${nvidiaDrivers} /var/run/opengl-driver"
          else if cfg.driSupport
          then "ln -sf ${mesa} /var/run/opengl-driver"
          else ""
        }

        rm -f /var/log/slim.log

      end script
    '';

  This style has two big advantages:

  - \, ' and " aren't special, only '' and ${.  So you get a lot less
    escaping in shell scripts / configuration files in Nixpkgs/NixOS.
    The delimiter '' is rare in scripts (and can usually be written as
    "").  ${ is also fairly rare.

    Other delimiters such as <<...>>, {{...}} and <|...|> were also
    considered but this one appears to have the fewest drawbacks
    (thanks Martin).

  - Indentation is intelligently stripped so that multi-line strings
    can follow the nesting structure of the containing Nix
    expression.  E.g. in the example above 6 spaces are stripped from
    the start of each line.  This prevents unnecessary indentation in
    generated files (which sometimes even breaks things).

  See tests/lang/eval-okay-ind-string.nix for some examples.


-rw-r--r--doc/manual/release-notes.xml4
-rw-r--r--src/libexpr/lexer.l15
-rw-r--r--src/libexpr/nixexpr-ast.def3
-rw-r--r--src/libexpr/parser.y107
-rw-r--r--tests/lang/eval-okay-ind-string.exp1
-rw-r--r--tests/lang/eval-okay-ind-string.nix107
6 files changed, 234 insertions, 3 deletions
diff --git a/doc/manual/release-notes.xml b/doc/manual/release-notes.xml
index 727a3e4a6179..ef2905d8ad89 100644
--- a/doc/manual/release-notes.xml
+++ b/doc/manual/release-notes.xml
@@ -131,6 +131,10 @@
   <listitem><para>TODO: chroot support.</para></listitem>
   
 
+  <listitem><para>TODO: <literal>''</literal>-style string
+  literals.</para></listitem>
+  
+
 </itemizedlist>
 
 </section>
diff --git a/src/libexpr/lexer.l b/src/libexpr/lexer.l
index 9f0f0b335f57..23a14324f32c 100644
--- a/src/libexpr/lexer.l
+++ b/src/libexpr/lexer.l
@@ -4,6 +4,7 @@
 
 
 %x STRING
+%x IND_STRING
 
 
 %{
@@ -122,6 +123,14 @@ inherit     { return INHERIT; }
 <STRING>\"  { BEGIN(INITIAL); return '"'; }
 <STRING>.   return yytext[0]; /* just in case: shouldn't be reached */
 
+\'\'(\ *\n)?     { BEGIN(IND_STRING); return IND_STRING_OPEN; }
+<IND_STRING>([^\$\']|\$[^\{\']|\'[^\'])+ {
+                   yylval->t = makeIndStr(toATerm(yytext));
+                   return IND_STR;
+                 }
+<IND_STRING>\$\{ { BEGIN(INITIAL); return DOLLAR_CURLY; }
+<IND_STRING>\'\' { BEGIN(INITIAL); return IND_STRING_CLOSE; }
+<IND_STRING>.    return yytext[0]; /* just in case: shouldn't be reached */
 
 {PATH}      { yylval->t = toATerm(yytext); return PATH; /* !!! alloc */ }
 {URI}       { yylval->t = toATerm(yytext); return URI; /* !!! alloc */ }
@@ -148,4 +157,10 @@ void backToString(yyscan_t scanner)
     BEGIN(STRING);
 }
 
+void backToIndString(yyscan_t scanner)
+{
+    struct yyguts_t * yyg = (struct yyguts_t *) scanner;
+    BEGIN(IND_STRING);
+}
+
 }
diff --git a/src/libexpr/nixexpr-ast.def b/src/libexpr/nixexpr-ast.def
index c7029e927047..a06d34311ae1 100644
--- a/src/libexpr/nixexpr-ast.def
+++ b/src/libexpr/nixexpr-ast.def
@@ -46,6 +46,9 @@ Int | int | Expr |
 Str | string ATermList | Expr |
 Str | string | Expr | ObsoleteStr
 
+# Internal to the parser, doesn't occur in ASTs.
+IndStr | string | Expr |
+
 # A path is a reference to a file system object that is to be copied
 # to the Nix store when used as a derivation attribute.  When it is
 # concatenated to a string (i.e., `str + path'), it is also copied and
diff --git a/src/libexpr/parser.y b/src/libexpr/parser.y
index 82b24cd07379..cd3ba88aa658 100644
--- a/src/libexpr/parser.y
+++ b/src/libexpr/parser.y
@@ -68,9 +68,100 @@ static Expr fixAttrs(int recursive, ATermList as)
 }
 
 
+static Expr stripIndentation(ATermList es)
+{
+    if (es == ATempty) return makeStr("");
+    
+    /* Figure out the minimum indentation.  Note that by design
+       whitespace-only final lines are not taken into account.  (So
+       the " " in "\n ''" is ignored, but the " " in "\n foo''" is.) */
+    bool atStartOfLine = true; /* = seen only whitespace in the current line */
+    unsigned int minIndent = 1000000;
+    unsigned int curIndent = 0;
+    ATerm e;
+    for (ATermIterator i(es); i; ++i) {
+        if (!matchIndStr(*i, e)) {
+            /* Anti-quotations end the current start-of-line whitespace. */
+            if (atStartOfLine) {
+                atStartOfLine = false;
+                if (curIndent < minIndent) minIndent = curIndent;
+            }
+            continue;
+        }
+        string s = aterm2String(e);
+        for (unsigned int j = 0; j < s.size(); ++j) {
+            if (atStartOfLine) {
+                if (s[j] == ' ')
+                    curIndent++;
+                else if (s[j] == '\n') {
+                    /* Empty line, doesn't influence minimum
+                       indentation. */
+                    curIndent = 0;
+                } else {
+                    atStartOfLine = false;
+                    if (curIndent < minIndent) minIndent = curIndent;
+                }
+            } else if (s[j] == '\n') {
+                atStartOfLine = true;
+                curIndent = 0;
+            }
+        }
+    }
+
+    /* Strip spaces from each line. */
+    ATermList es2 = ATempty;
+    atStartOfLine = true;
+    unsigned int curDropped = 0;
+    unsigned int n = ATgetLength(es);
+    for (ATermIterator i(es); i; ++i, --n) {
+        if (!matchIndStr(*i, e)) {
+            atStartOfLine = false;
+            curDropped = 0;
+            es2 = ATinsert(es2, *i);
+            continue;
+        }
+        
+        string s = aterm2String(e);
+        string s2;
+        for (unsigned int j = 0; j < s.size(); ++j) {
+            if (atStartOfLine) {
+                if (s[j] == ' ') {
+                    if (curDropped++ >= minIndent)
+                        s2 += s[j];
+                }
+                else if (s[j] == '\n') {
+                    curDropped = 0;
+                    s2 += s[j];
+                } else {
+                    atStartOfLine = false;
+                    curDropped = 0;
+                    s2 += s[j];
+                }
+            } else {
+                s2 += s[j];
+                if (s[j] == '\n') atStartOfLine = true;
+            }
+        }
+
+        /* Remove the last line if it is empty and consists only of
+           spaces. */
+        if (n == 1) {
+            unsigned int p = s2.find_last_of('\n');
+            if (p != string::npos && s2.find_first_not_of(' ', p + 1) == string::npos)
+                s2 = string(s2, 0, p + 1);
+        }
+            
+        es2 = ATinsert(es2, makeStr(s2));
+    }
+
+    return makeConcatStrings(ATreverse(es2));
+}
+
+
 void backToString(yyscan_t scanner);
+void backToIndString(yyscan_t scanner);
+
 
- 
 static Pos makeCurPos(YYLTYPE * loc, ParseData * data)
 {
     return makePos(toATerm(data->path),
@@ -121,10 +212,11 @@ static void freeAndUnprotect(void * p)
 
 %type <t> start expr expr_function expr_if expr_op
 %type <t> expr_app expr_select expr_simple bind inheritsrc formal
-%type <ts> binds ids expr_list formals string_parts
-%token <t> ID INT STR PATH URI
+%type <ts> binds ids expr_list formals string_parts ind_string_parts
+%token <t> ID INT STR IND_STR PATH URI
 %token IF THEN ELSE ASSERT WITH LET IN REC INHERIT EQ NEQ AND OR IMPL
 %token DOLLAR_CURLY /* == ${ */
+%token IND_STRING_OPEN IND_STRING_CLOSE
 
 %nonassoc IMPL
 %left OR
@@ -199,6 +291,9 @@ expr_simple
       else if (ATgetNext($2) == ATempty) $$ = ATgetFirst($2);
       else $$ = makeConcatStrings(ATreverse($2));
   }
+  | IND_STRING_OPEN ind_string_parts IND_STRING_CLOSE {
+      $$ = stripIndentation(ATreverse($2));
+  }
   | PATH { $$ = makePath(toATerm(absPath(aterm2String($1), data->basePath))); }
   | URI { $$ = makeStr($1, ATempty); }
   | '(' expr ')' { $$ = $2; }
@@ -219,6 +314,12 @@ string_parts
   | { $$ = ATempty; }
   ;
 
+ind_string_parts
+  : ind_string_parts IND_STR { $$ = ATinsert($1, $2); }
+  | ind_string_parts DOLLAR_CURLY expr '}' { backToIndString(scanner); $$ = ATinsert($1, $3); }
+  | { $$ = ATempty; }
+  ;
+
 binds
   : binds bind { $$ = ATinsert($1, $2); }
   | { $$ = ATempty; }
diff --git a/tests/lang/eval-okay-ind-string.exp b/tests/lang/eval-okay-ind-string.exp
new file mode 100644
index 000000000000..b13217edd191
--- /dev/null
+++ b/tests/lang/eval-okay-ind-string.exp
@@ -0,0 +1 @@
+Str("This is an indented multi-line string\nliteral.  An amount of whitespace at\nthe start of each line matching the minimum\nindentation of all lines in the string\nliteral together will be removed.  Thus,\nin this case four spaces will be\nstripped from each line, even though\n  THIS LINE is indented six spaces.\n\nAlso, empty lines don't count in the\ndetermination of the indentation level (the\nprevious empty line has indentation 0, but\nit doesn't matter).\nIf the string starts with whitespace\n  followed by a newline, it's stripped, but\n  that's not the case here. Two spaces are\n  stripped because of the \"  \" at the start. \nThis line is indented\na bit further.\nAnti-quotations, like so, are\nalso allowed.\n  The \\ is not special here.\n' can be followed by any character except another ', e.g. 'x'.\nLikewise for $, e.g. $$ or $varName.\nBut ' followed by ' is special, as is $ followed by {.\nIf you want them, use anti-quotations: '', ${.\n   Tabs are not interpreted as whitespace (since we can't guess\n   what tab settings are intended), so don't use them.\n\tThis line starts with a space and a tab, so only one\n   space will be stripped from each line.\nAlso note that if the last line (just before the closing ' ')\nconsists only of whitespace, it's ignored.  But here there is\nsome non-whitespace stuff, so the line isn't removed. \nThis shows a hacky way to preserve an empty line after the start.\nBut there's no reason to do so: you could just repeat the empty\nline.\n  Similarly you can force an indentation level,\n  in this case to 2 spaces.  This works because the anti-quote\n  is significant (not whitespace).\nstart on network-interfaces\n\nstart script\n\n  rm -f /var/run/opengl-driver\n  ln -sf 123 /var/run/opengl-driver\n\n  rm -f /var/log/slim.log\n   \nend script\n\nenv SLIM_CFGFILE=abc\nenv SLIM_THEMESDIR=def\nenv FONTCONFIG_FILE=/etc/fonts/fonts.conf  \t\t\t\t# !!! cleanup\nenv XKB_BINDIR=foo/bin         \t\t\t\t# Needed for the Xkb extension.\nenv LD_LIBRARY_PATH=libX11/lib:libXext/lib:/usr/lib/          # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)\n\nenv XORG_DRI_DRIVER_PATH=nvidiaDrivers/X11R6/lib/modules/drivers/ \n\nexec slim/bin/slim\n",[])
diff --git a/tests/lang/eval-okay-ind-string.nix b/tests/lang/eval-okay-ind-string.nix
new file mode 100644
index 000000000000..7d5d61091588
--- /dev/null
+++ b/tests/lang/eval-okay-ind-string.nix
@@ -0,0 +1,107 @@
+let
+
+  s1 = ''
+    This is an indented multi-line string
+    literal.  An amount of whitespace at
+    the start of each line matching the minimum
+    indentation of all lines in the string
+    literal together will be removed.  Thus,
+    in this case four spaces will be
+    stripped from each line, even though
+      THIS LINE is indented six spaces.
+
+    Also, empty lines don't count in the
+    determination of the indentation level (the
+    previous empty line has indentation 0, but
+    it doesn't matter).
+  '';
+
+  s2 = ''  If the string starts with whitespace
+    followed by a newline, it's stripped, but
+    that's not the case here. Two spaces are
+    stripped because of the "  " at the start. 
+  '';
+
+  s3 = ''
+      This line is indented
+      a bit further.
+        ''; # indentation of last line doesn't count if it's empty
+
+  s4 = ''
+    Anti-quotations, like ${if true then "so" else "not so"}, are
+    also allowed.
+  '';
+
+  s5 = ''
+      The \ is not special here.
+    ' can be followed by any character except another ', e.g. 'x'.
+    Likewise for $, e.g. $$ or $varName.
+    But ' followed by ' is special, as is $ followed by {.
+    If you want them, use anti-quotations: ${"''"}, ${"\${"}.
+  '';
+
+  s6 = ''  
+    Tabs are not interpreted as whitespace (since we can't guess
+    what tab settings are intended), so don't use them.
+ 	This line starts with a space and a tab, so only one
+    space will be stripped from each line.
+  '';
+
+  s7 = ''
+    Also note that if the last line (just before the closing ' ')
+    consists only of whitespace, it's ignored.  But here there is
+    some non-whitespace stuff, so the line isn't removed. '';
+
+  s8 = ''    ${""}
+    This shows a hacky way to preserve an empty line after the start.
+    But there's no reason to do so: you could just repeat the empty
+    line.
+  '';
+
+  s9 = ''
+  ${""}  Similarly you can force an indentation level,
+    in this case to 2 spaces.  This works because the anti-quote
+    is significant (not whitespace).
+  '';
+
+  s10 = ''
+  '';
+
+  s11 = '''';
+
+  s12 = ''   '';
+
+  s13 = ''
+    start on network-interfaces
+
+    start script
+    
+      rm -f /var/run/opengl-driver
+      ${if true
+        then "ln -sf 123 /var/run/opengl-driver"
+        else if true
+        then "ln -sf 456 /var/run/opengl-driver"
+        else ""
+      }
+
+      rm -f /var/log/slim.log
+       
+    end script
+
+    env SLIM_CFGFILE=${"abc"}
+    env SLIM_THEMESDIR=${"def"}
+    env FONTCONFIG_FILE=/etc/fonts/fonts.conf  				# !!! cleanup
+    env XKB_BINDIR=${"foo"}/bin         				# Needed for the Xkb extension.
+    env LD_LIBRARY_PATH=${"libX11"}/lib:${"libXext"}/lib:/usr/lib/          # related to xorg-sys-opengl - needed to load libglx for (AI)GLX support (for compiz)
+
+    ${if true
+      then "env XORG_DRI_DRIVER_PATH=${"nvidiaDrivers"}/X11R6/lib/modules/drivers/"
+    else if true
+      then "env XORG_DRI_DRIVER_PATH=${"mesa"}/lib/modules/dri"
+      else ""
+    } 
+
+    exec ${"slim"}/bin/slim
+  '';
+
+in s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10 + s11 + s12 + s13