diff options
author | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-11-25T10·47+0100 |
---|---|---|
committer | Eelco Dolstra <eelco.dolstra@logicblox.com> | 2014-11-25T10·47+0100 |
commit | 976df480c918f050608f7a23a4a21415c43475c3 (patch) | |
tree | 41463834cb5e30bd50c719f6ccaa8ecdb3a8a976 /src/libutil | |
parent | 4e340a983f928973d3915455d46a4bbadbc3269c (diff) |
Add a primop for regular expression pattern matching
The function ‘builtins.match’ takes a POSIX extended regular expression and an arbitrary string. It returns ‘null’ if the string does not match the regular expression. Otherwise, it returns a list containing substring matches corresponding to parenthesis groups in the regex. The regex must match the entire string (i.e. there is an implied "^<pat>$" around the regex). For example: match "foo" "foobar" => null match "foo" "foo" => [] match "f(o+)(.*)" "foooobar" => ["oooo" "bar"] match "(.*/)?([^/]*)" "/dir/file.nix" => ["/dir/" "file.nix"] match "(.*/)?([^/]*)" "file.nix" => [null "file.nix"] The following example finds all regular files with extension .nix or .patch underneath the current directory: let findFiles = pat: dir: concatLists (mapAttrsToList (name: type: if type == "directory" then findFiles pat (dir + "/" + name) else if type == "regular" && match pat name != null then [(dir + "/" + name)] else []) (readDir dir)); in findFiles ".*\\.(nix|patch)" (toString ./.)
Diffstat (limited to 'src/libutil')
-rw-r--r-- | src/libutil/regex.cc | 23 | ||||
-rw-r--r-- | src/libutil/regex.hh | 9 |
2 files changed, 28 insertions, 4 deletions
diff --git a/src/libutil/regex.cc b/src/libutil/regex.cc index 36c8458cee08..84274b3e1da9 100644 --- a/src/libutil/regex.cc +++ b/src/libutil/regex.cc @@ -1,13 +1,16 @@ #include "regex.hh" #include "types.hh" +#include <algorithm> + namespace nix { -Regex::Regex(const string & pattern) +Regex::Regex(const string & pattern, bool subs) { /* Patterns must match the entire string. */ - int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), REG_NOSUB | REG_EXTENDED); - if (err) throw Error(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err)); + int err = regcomp(&preg, ("^(" + pattern + ")$").c_str(), (subs ? 0 : REG_NOSUB) | REG_EXTENDED); + if (err) throw RegexError(format("compiling pattern ‘%1%’: %2%") % pattern % showError(err)); + nrParens = subs ? std::count(pattern.begin(), pattern.end(), '(') : 0; } Regex::~Regex() @@ -23,6 +26,20 @@ bool Regex::matches(const string & s) throw Error(format("matching string ‘%1%’: %2%") % s % showError(err)); } +bool Regex::matches(const string & s, Subs & subs) +{ + regmatch_t pmatch[nrParens + 2]; + int err = regexec(&preg, s.c_str(), nrParens + 2, pmatch, 0); + if (err == 0) { + for (unsigned int n = 2; n < nrParens + 2; ++n) + if (pmatch[n].rm_eo != -1) + subs[n - 2] = string(s, pmatch[n].rm_so, pmatch[n].rm_eo - pmatch[n].rm_so); + return true; + } + else if (err == REG_NOMATCH) return false; + throw Error(format("matching string ‘%1%’: %2%") % s % showError(err)); +} + string Regex::showError(int err) { char buf[256]; diff --git a/src/libutil/regex.hh b/src/libutil/regex.hh index aa012b721cb7..53e31f4edc4a 100644 --- a/src/libutil/regex.hh +++ b/src/libutil/regex.hh @@ -5,16 +5,23 @@ #include <sys/types.h> #include <regex.h> +#include <map> + namespace nix { +MakeError(RegexError, Error) + class Regex { public: - Regex(const string & pattern); + Regex(const string & pattern, bool subs = false); ~Regex(); bool matches(const string & s); + typedef std::map<unsigned int, string> Subs; + bool matches(const string & s, Subs & subs); private: + unsigned nrParens; regex_t preg; string showError(int err); }; |