about summary refs log tree commit diff
path: root/third_party/git/grep.c
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2020-11-21T18·20+0100
committerVincent Ambo <mail@tazj.in>2020-11-21T18·45+0100
commitf4609b896fac842433bd495c166d5987852a6a73 (patch)
tree95511c465c54c4f5d27e5d39ce187e2a1dd82bd3 /third_party/git/grep.c
parent082c006c04343a78d87b6c6ab3608c25d6213c3f (diff)
merge(3p/git): Merge git subtree at v2.29.2 r/1890
This also bumps the stable nixpkgs to 20.09 as of 2020-11-21, because
there is some breakage in the git build related to the netrc
credentials helper which someone has taken care of in nixpkgs.

The stable channel is not used for anything other than git, so this
should be fine.

Change-Id: I3575a19dab09e1e9556cf8231d717de9890484fb
Diffstat (limited to 'third_party/git/grep.c')
-rw-r--r--third_party/git/grep.c275
1 files changed, 143 insertions, 132 deletions
diff --git a/third_party/git/grep.c b/third_party/git/grep.c
index cd952ef5d3..54af9f813e 100644
--- a/third_party/git/grep.c
+++ b/third_party/git/grep.c
@@ -16,6 +16,20 @@ static int grep_source_is_binary(struct grep_source *gs,
 
 static struct grep_opt grep_defaults;
 
+#ifdef USE_LIBPCRE2
+static pcre2_general_context *pcre2_global_context;
+
+static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
+{
+	return malloc(size);
+}
+
+static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
+{
+	free(pointer);
+}
+#endif
+
 static const char *color_grep_slots[] = {
 	[GREP_COLOR_CONTEXT]	    = "context",
 	[GREP_COLOR_FILENAME]	    = "filename",
@@ -150,12 +164,28 @@ int grep_config(const char *var, const char *value, void *cb)
  * Initialize one instance of grep_opt and copy the
  * default values from the template we read the configuration
  * information in an earlier call to git_config(grep_config).
+ *
+ * If using PCRE, make sure that the library is configured
+ * to use the same allocator as Git (e.g. nedmalloc on Windows).
+ *
+ * Any allocated memory needs to be released in grep_destroy().
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix)
 {
 	struct grep_opt *def = &grep_defaults;
 	int i;
 
+#if defined(USE_LIBPCRE2)
+	if (!pcre2_global_context)
+		pcre2_global_context = pcre2_general_context_create(
+					pcre2_malloc, pcre2_free, NULL);
+#endif
+
+#ifdef USE_LIBPCRE1
+	pcre_malloc = malloc;
+	pcre_free = free;
+#endif
+
 	memset(opt, 0, sizeof(*opt));
 	opt->repo = repo;
 	opt->prefix = prefix;
@@ -178,6 +208,13 @@ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
 		color_set(opt->colors[i], def->colors[i]);
 }
 
+void grep_destroy(void)
+{
+#ifdef USE_LIBPCRE2
+	pcre2_general_context_free(pcre2_global_context);
+#endif
+}
+
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 {
 	/*
@@ -368,31 +405,20 @@ static int is_fixed(const char *s, size_t len)
 	return 1;
 }
 
-static int has_null(const char *s, size_t len)
-{
-	/*
-	 * regcomp cannot accept patterns with NULs so when using it
-	 * we consider any pattern containing a NUL fixed.
-	 */
-	if (memchr(s, 0, len))
-		return 1;
-
-	return 0;
-}
-
 #ifdef USE_LIBPCRE1
 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 {
 	const char *error;
 	int erroffset;
 	int options = PCRE_MULTILINE;
+	int study_options = 0;
 
 	if (opt->ignore_case) {
-		if (has_non_ascii(p->pattern))
+		if (!opt->ignore_locale && has_non_ascii(p->pattern))
 			p->pcre1_tables = pcre_maketables();
 		options |= PCRE_CASELESS;
 	}
-	if (is_utf8_locale() && has_non_ascii(p->pattern))
+	if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern))
 		options |= PCRE_UTF8;
 
 	p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
@@ -400,44 +426,31 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 	if (!p->pcre1_regexp)
 		compile_regexp_failed(p, error);
 
-	p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
-	if (!p->pcre1_extra_info && error)
-		die("%s", error);
-
-#ifdef GIT_PCRE1_USE_JIT
+#if defined(PCRE_CONFIG_JIT) && !defined(NO_LIBPCRE1_JIT)
 	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
-	if (p->pcre1_jit_on == 1) {
-		p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
-		if (!p->pcre1_jit_stack)
-			die("Couldn't allocate PCRE JIT stack");
-		pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
-	} else if (p->pcre1_jit_on != 0) {
-		BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
-		    p->pcre1_jit_on);
-	}
+	if (opt->debug)
+		fprintf(stderr, "pcre1_jit_on=%d\n", p->pcre1_jit_on);
+
+	if (p->pcre1_jit_on)
+		study_options = PCRE_STUDY_JIT_COMPILE;
 #endif
+
+	p->pcre1_extra_info = pcre_study(p->pcre1_regexp, study_options, &error);
+	if (!p->pcre1_extra_info && error)
+		die("%s", error);
 }
 
 static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 		regmatch_t *match, int eflags)
 {
-	int ovector[30], ret, flags = 0;
+	int ovector[30], ret, flags = PCRE_NO_UTF8_CHECK;
 
 	if (eflags & REG_NOTBOL)
 		flags |= PCRE_NOTBOL;
 
-#ifdef GIT_PCRE1_USE_JIT
-	if (p->pcre1_jit_on) {
-		ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
-				    eol - line, 0, flags, ovector,
-				    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
-	} else
-#endif
-	{
-		ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
-				eol - line, 0, flags, ovector,
-				ARRAY_SIZE(ovector));
-	}
+	ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
+			eol - line, 0, flags, ovector,
+			ARRAY_SIZE(ovector));
 
 	if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 		die("pcre_exec failed with error code %d", ret);
@@ -453,15 +466,12 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 static void free_pcre1_regexp(struct grep_pat *p)
 {
 	pcre_free(p->pcre1_regexp);
-#ifdef GIT_PCRE1_USE_JIT
-	if (p->pcre1_jit_on) {
+#ifdef PCRE_CONFIG_JIT
+	if (p->pcre1_jit_on)
 		pcre_free_study(p->pcre1_extra_info);
-		pcre_jit_stack_free(p->pcre1_jit_stack);
-	} else
+	else
 #endif
-	{
 		pcre_free(p->pcre1_extra_info);
-	}
 	pcre_free((void *)p->pcre1_tables);
 }
 #else /* !USE_LIBPCRE1 */
@@ -488,7 +498,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 	PCRE2_UCHAR errbuf[256];
 	PCRE2_SIZE erroffset;
 	int options = PCRE2_MULTILINE;
-	const uint8_t *character_tables = NULL;
 	int jitret;
 	int patinforet;
 	size_t jitsizearg;
@@ -497,15 +506,20 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 
 	p->pcre2_compile_context = NULL;
 
+	/* pcre2_global_context is initialized in append_grep_pattern */
 	if (opt->ignore_case) {
-		if (has_non_ascii(p->pattern)) {
-			character_tables = pcre2_maketables(NULL);
+		if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
+			if (!pcre2_global_context)
+				BUG("pcre2_global_context uninitialized");
+			p->pcre2_tables = pcre2_maketables(pcre2_global_context);
 			p->pcre2_compile_context = pcre2_compile_context_create(NULL);
-			pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
+			pcre2_set_character_tables(p->pcre2_compile_context,
+							p->pcre2_tables);
 		}
 		options |= PCRE2_CASELESS;
 	}
-	if (is_utf8_locale() && has_non_ascii(p->pattern))
+	if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) &&
+	    !(!opt->ignore_case && (p->fixed || p->is_fixed)))
 		options |= PCRE2_UTF;
 
 	p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
@@ -522,7 +536,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 	}
 
 	pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
-	if (p->pcre2_jit_on == 1) {
+	if (opt->debug)
+		fprintf(stderr, "pcre2_jit_on=%d\n", p->pcre2_jit_on);
+	if (p->pcre2_jit_on) {
 		jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 		if (jitret)
 			die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
@@ -547,19 +563,11 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 			BUG("pcre2_pattern_info() failed: %d", patinforet);
 		if (jitsizearg == 0) {
 			p->pcre2_jit_on = 0;
+			if (opt->debug)
+				fprintf(stderr, "pcre2_jit_on=%d: (*NO_JIT) in regex\n",
+					p->pcre2_jit_on);
 			return;
 		}
-
-		p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
-		if (!p->pcre2_jit_stack)
-			die("Couldn't allocate PCRE2 JIT stack");
-		p->pcre2_match_context = pcre2_match_context_create(NULL);
-		if (!p->pcre2_match_context)
-			die("Couldn't allocate PCRE2 match context");
-		pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
-	} else if (p->pcre2_jit_on != 0) {
-		BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
-		    p->pcre2_jit_on);
 	}
 }
 
@@ -603,8 +611,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
 	pcre2_compile_context_free(p->pcre2_compile_context);
 	pcre2_code_free(p->pcre2_pattern);
 	pcre2_match_data_free(p->pcre2_match_data);
-	pcre2_jit_stack_free(p->pcre2_jit_stack);
-	pcre2_match_context_free(p->pcre2_match_context);
+	free((void *)p->pcre2_tables);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
@@ -626,7 +633,6 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 static void free_pcre2_pattern(struct grep_pat *p)
 {
 }
-#endif /* !USE_LIBPCRE2 */
 
 static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
@@ -647,46 +653,66 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 		compile_regexp_failed(p, errbuf);
 	}
 }
+#endif /* !USE_LIBPCRE2 */
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
-	int ascii_only;
 	int err;
 	int regflags = REG_NEWLINE;
 
 	p->word_regexp = opt->word_regexp;
 	p->ignore_case = opt->ignore_case;
-	ascii_only     = !has_non_ascii(p->pattern);
+	p->fixed = opt->fixed;
 
-	/*
-	 * Even when -F (fixed) asks us to do a non-regexp search, we
-	 * may not be able to correctly case-fold when -i
-	 * (ignore-case) is asked (in which case, we'll synthesize a
-	 * regexp to match the pattern that matches regexp special
-	 * characters literally, while ignoring case differences).  On
-	 * the other hand, even without -F, if the pattern does not
-	 * have any regexp special characters and there is no need for
-	 * case-folding search, we can internally turn it into a
-	 * simple string match using kws.  p->fixed tells us if we
-	 * want to use kws.
-	 */
-	if (opt->fixed ||
-	    has_null(p->pattern, p->patternlen) ||
-	    is_fixed(p->pattern, p->patternlen))
-		p->fixed = !p->ignore_case || ascii_only;
-
-	if (p->fixed) {
-		p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
-		kwsincr(p->kws, p->pattern, p->patternlen);
-		kwsprep(p->kws);
-		return;
-	} else if (opt->fixed) {
-		/*
-		 * We come here when the pattern has the non-ascii
-		 * characters we cannot case-fold, and asked to
-		 * ignore-case.
-		 */
+	if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2)
+		die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
+
+	p->is_fixed = is_fixed(p->pattern, p->patternlen);
+#ifdef USE_LIBPCRE2
+       if (!p->fixed && !p->is_fixed) {
+	       const char *no_jit = "(*NO_JIT)";
+	       const int no_jit_len = strlen(no_jit);
+	       if (starts_with(p->pattern, no_jit) &&
+		   is_fixed(p->pattern + no_jit_len,
+			    p->patternlen - no_jit_len))
+		       p->is_fixed = 1;
+       }
+#endif
+	if (p->fixed || p->is_fixed) {
+#ifdef USE_LIBPCRE2
+		opt->pcre2 = 1;
+		if (p->is_fixed) {
+			compile_pcre2_pattern(p, opt);
+		} else {
+			/*
+			 * E.g. t7811-grep-open.sh relies on the
+			 * pattern being restored.
+			 */
+			char *old_pattern = p->pattern;
+			size_t old_patternlen = p->patternlen;
+			struct strbuf sb = STRBUF_INIT;
+
+			/*
+			 * There is the PCRE2_LITERAL flag, but it's
+			 * only in PCRE v2 10.30 and later. Needing to
+			 * ifdef our way around that and dealing with
+			 * it + PCRE2_MULTILINE being an error is more
+			 * complex than just quoting this ourselves.
+			*/
+			strbuf_add(&sb, "\\Q", 2);
+			strbuf_add(&sb, p->pattern, p->patternlen);
+			strbuf_add(&sb, "\\E", 2);
+
+			p->pattern = sb.buf;
+			p->patternlen = sb.len;
+			compile_pcre2_pattern(p, opt);
+			p->pattern = old_pattern;
+			p->patternlen = old_patternlen;
+			strbuf_release(&sb);
+		}
+#else /* !USE_LIBPCRE2 */
 		compile_fixed_regexp(p, opt);
+#endif /* !USE_LIBPCRE2 */
 		return;
 	}
 
@@ -1053,9 +1079,7 @@ void free_grep_patterns(struct grep_opt *opt)
 		case GREP_PATTERN: /* atom */
 		case GREP_PATTERN_HEAD:
 		case GREP_PATTERN_BODY:
-			if (p->kws)
-				kwsfree(p->kws);
-			else if (p->pcre1_regexp)
+			if (p->pcre1_regexp)
 				free_pcre1_regexp(p);
 			else if (p->pcre2_pattern)
 				free_pcre2_pattern(p);
@@ -1115,29 +1139,12 @@ static void show_name(struct grep_opt *opt, const char *name)
 	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 }
 
-static int fixmatch(struct grep_pat *p, char *line, char *eol,
-		    regmatch_t *match)
-{
-	struct kwsmatch kwsm;
-	size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
-	if (offset == -1) {
-		match->rm_so = match->rm_eo = -1;
-		return REG_NOMATCH;
-	} else {
-		match->rm_so = offset;
-		match->rm_eo = match->rm_so + kwsm.size[0];
-		return 0;
-	}
-}
-
 static int patmatch(struct grep_pat *p, char *line, char *eol,
 		    regmatch_t *match, int eflags)
 {
 	int hit;
 
-	if (p->fixed)
-		hit = !fixmatch(p, line, eol, match);
-	else if (p->pcre1_regexp)
+	if (p->pcre1_regexp)
 		hit = !pcre1match(p, line, eol, match, eflags);
 	else if (p->pcre2_pattern)
 		hit = !pcre2match(p, line, eol, match, eflags);
@@ -1533,11 +1540,6 @@ static inline void grep_attr_unlock(void)
 		pthread_mutex_unlock(&grep_attr_mutex);
 }
 
-/*
- * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
- */
-pthread_mutex_t grep_read_mutex;
-
 static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
 {
 	xdemitconf_t *xecfg = opt->priv;
@@ -1734,13 +1736,20 @@ static int fill_textconv_grep(struct repository *r,
 	}
 
 	/*
-	 * fill_textconv is not remotely thread-safe; it may load objects
-	 * behind the scenes, and it modifies the global diff tempfile
-	 * structure.
+	 * fill_textconv is not remotely thread-safe; it modifies the global
+	 * diff tempfile structure, writes to the_repo's odb and might
+	 * internally call thread-unsafe functions such as the
+	 * prepare_packed_git() lazy-initializator. Because of the last two, we
+	 * must ensure mutual exclusion between this call and the object reading
+	 * API, thus we use obj_read_lock() here.
+	 *
+	 * TODO: allowing text conversion to run in parallel with object
+	 * reading operations might increase performance in the multithreaded
+	 * non-worktreee git-grep with --textconv.
 	 */
-	grep_read_lock();
+	obj_read_lock();
 	size = fill_textconv(r, driver, df, &buf);
-	grep_read_unlock();
+	obj_read_unlock();
 	free_filespec(df);
 
 	/*
@@ -1806,10 +1815,15 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 		grep_source_load_driver(gs, opt->repo->index);
 		/*
 		 * We might set up the shared textconv cache data here, which
-		 * is not thread-safe.
+		 * is not thread-safe. Also, get_oid_with_context() and
+		 * parse_object() might be internally called. As they are not
+		 * currently thread-safe and might be racy with object reading,
+		 * obj_read_lock() must be called.
 		 */
 		grep_attr_lock();
+		obj_read_lock();
 		textconv = userdiff_get_textconv(opt->repo, gs->driver);
+		obj_read_unlock();
 		grep_attr_unlock();
 	}
 
@@ -2109,10 +2123,7 @@ static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
 
-	grep_read_lock();
 	gs->buf = read_object_file(gs->identifier, &type, &gs->size);
-	grep_read_unlock();
-
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,