about summary refs log tree commit diff
path: root/third_party/git/grep.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/git/grep.c')
-rw-r--r--third_party/git/grep.c275
1 files changed, 132 insertions, 143 deletions
diff --git a/third_party/git/grep.c b/third_party/git/grep.c
index 13232a904a..cd952ef5d3 100644
--- a/third_party/git/grep.c
+++ b/third_party/git/grep.c
@@ -16,20 +16,6 @@ static int grep_source_is_binary(struct grep_source *gs,
 
 static struct grep_opt grep_defaults;
 
-#ifdef USE_LIBPCRE2
-static pcre2_general_context *pcre2_global_context;
-
-static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
-{
-	return malloc(size);
-}
-
-static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
-{
-	free(pointer);
-}
-#endif
-
 static const char *color_grep_slots[] = {
 	[GREP_COLOR_CONTEXT]	    = "context",
 	[GREP_COLOR_FILENAME]	    = "filename",
@@ -164,28 +150,12 @@ int grep_config(const char *var, const char *value, void *cb)
  * Initialize one instance of grep_opt and copy the
  * default values from the template we read the configuration
  * information in an earlier call to git_config(grep_config).
- *
- * If using PCRE, make sure that the library is configured
- * to use the same allocator as Git (e.g. nedmalloc on Windows).
- *
- * Any allocated memory needs to be released in grep_destroy().
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix)
 {
 	struct grep_opt *def = &grep_defaults;
 	int i;
 
-#if defined(USE_LIBPCRE2)
-	if (!pcre2_global_context)
-		pcre2_global_context = pcre2_general_context_create(
-					pcre2_malloc, pcre2_free, NULL);
-#endif
-
-#ifdef USE_LIBPCRE1
-	pcre_malloc = malloc;
-	pcre_free = free;
-#endif
-
 	memset(opt, 0, sizeof(*opt));
 	opt->repo = repo;
 	opt->prefix = prefix;
@@ -208,13 +178,6 @@ void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix
 		color_set(opt->colors[i], def->colors[i]);
 }
 
-void grep_destroy(void)
-{
-#ifdef USE_LIBPCRE2
-	pcre2_general_context_free(pcre2_global_context);
-#endif
-}
-
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
 {
 	/*
@@ -405,20 +368,31 @@ static int is_fixed(const char *s, size_t len)
 	return 1;
 }
 
+static int has_null(const char *s, size_t len)
+{
+	/*
+	 * regcomp cannot accept patterns with NULs so when using it
+	 * we consider any pattern containing a NUL fixed.
+	 */
+	if (memchr(s, 0, len))
+		return 1;
+
+	return 0;
+}
+
 #ifdef USE_LIBPCRE1
 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 {
 	const char *error;
 	int erroffset;
 	int options = PCRE_MULTILINE;
-	int study_options = 0;
 
 	if (opt->ignore_case) {
-		if (!opt->ignore_locale && has_non_ascii(p->pattern))
+		if (has_non_ascii(p->pattern))
 			p->pcre1_tables = pcre_maketables();
 		options |= PCRE_CASELESS;
 	}
-	if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern))
+	if (is_utf8_locale() && has_non_ascii(p->pattern))
 		options |= PCRE_UTF8;
 
 	p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
@@ -426,31 +400,44 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 	if (!p->pcre1_regexp)
 		compile_regexp_failed(p, error);
 
-#if defined(PCRE_CONFIG_JIT) && !defined(NO_LIBPCRE1_JIT)
-	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
-	if (opt->debug)
-		fprintf(stderr, "pcre1_jit_on=%d\n", p->pcre1_jit_on);
-
-	if (p->pcre1_jit_on)
-		study_options = PCRE_STUDY_JIT_COMPILE;
-#endif
-
-	p->pcre1_extra_info = pcre_study(p->pcre1_regexp, study_options, &error);
+	p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
 	if (!p->pcre1_extra_info && error)
 		die("%s", error);
+
+#ifdef GIT_PCRE1_USE_JIT
+	pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
+	if (p->pcre1_jit_on == 1) {
+		p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
+		if (!p->pcre1_jit_stack)
+			die("Couldn't allocate PCRE JIT stack");
+		pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
+	} else if (p->pcre1_jit_on != 0) {
+		BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
+		    p->pcre1_jit_on);
+	}
+#endif
 }
 
 static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 		regmatch_t *match, int eflags)
 {
-	int ovector[30], ret, flags = PCRE_NO_UTF8_CHECK;
+	int ovector[30], ret, flags = 0;
 
 	if (eflags & REG_NOTBOL)
 		flags |= PCRE_NOTBOL;
 
-	ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
-			eol - line, 0, flags, ovector,
-			ARRAY_SIZE(ovector));
+#ifdef GIT_PCRE1_USE_JIT
+	if (p->pcre1_jit_on) {
+		ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
+				    eol - line, 0, flags, ovector,
+				    ARRAY_SIZE(ovector), p->pcre1_jit_stack);
+	} else
+#endif
+	{
+		ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
+				eol - line, 0, flags, ovector,
+				ARRAY_SIZE(ovector));
+	}
 
 	if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
 		die("pcre_exec failed with error code %d", ret);
@@ -466,12 +453,15 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
 static void free_pcre1_regexp(struct grep_pat *p)
 {
 	pcre_free(p->pcre1_regexp);
-#ifdef PCRE_CONFIG_JIT
-	if (p->pcre1_jit_on)
+#ifdef GIT_PCRE1_USE_JIT
+	if (p->pcre1_jit_on) {
 		pcre_free_study(p->pcre1_extra_info);
-	else
+		pcre_jit_stack_free(p->pcre1_jit_stack);
+	} else
 #endif
+	{
 		pcre_free(p->pcre1_extra_info);
+	}
 	pcre_free((void *)p->pcre1_tables);
 }
 #else /* !USE_LIBPCRE1 */
@@ -498,6 +488,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 	PCRE2_UCHAR errbuf[256];
 	PCRE2_SIZE erroffset;
 	int options = PCRE2_MULTILINE;
+	const uint8_t *character_tables = NULL;
 	int jitret;
 	int patinforet;
 	size_t jitsizearg;
@@ -506,20 +497,15 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 
 	p->pcre2_compile_context = NULL;
 
-	/* pcre2_global_context is initialized in append_grep_pattern */
 	if (opt->ignore_case) {
-		if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
-			if (!pcre2_global_context)
-				BUG("pcre2_global_context uninitialized");
-			p->pcre2_tables = pcre2_maketables(pcre2_global_context);
+		if (has_non_ascii(p->pattern)) {
+			character_tables = pcre2_maketables(NULL);
 			p->pcre2_compile_context = pcre2_compile_context_create(NULL);
-			pcre2_set_character_tables(p->pcre2_compile_context,
-							p->pcre2_tables);
+			pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
 		}
 		options |= PCRE2_CASELESS;
 	}
-	if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) &&
-	    !(!opt->ignore_case && (p->fixed || p->is_fixed)))
+	if (is_utf8_locale() && has_non_ascii(p->pattern))
 		options |= PCRE2_UTF;
 
 	p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
@@ -536,9 +522,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 	}
 
 	pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
-	if (opt->debug)
-		fprintf(stderr, "pcre2_jit_on=%d\n", p->pcre2_jit_on);
-	if (p->pcre2_jit_on) {
+	if (p->pcre2_jit_on == 1) {
 		jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
 		if (jitret)
 			die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
@@ -563,11 +547,19 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
 			BUG("pcre2_pattern_info() failed: %d", patinforet);
 		if (jitsizearg == 0) {
 			p->pcre2_jit_on = 0;
-			if (opt->debug)
-				fprintf(stderr, "pcre2_jit_on=%d: (*NO_JIT) in regex\n",
-					p->pcre2_jit_on);
 			return;
 		}
+
+		p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
+		if (!p->pcre2_jit_stack)
+			die("Couldn't allocate PCRE2 JIT stack");
+		p->pcre2_match_context = pcre2_match_context_create(NULL);
+		if (!p->pcre2_match_context)
+			die("Couldn't allocate PCRE2 match context");
+		pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
+	} else if (p->pcre2_jit_on != 0) {
+		BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
+		    p->pcre2_jit_on);
 	}
 }
 
@@ -611,7 +603,8 @@ static void free_pcre2_pattern(struct grep_pat *p)
 	pcre2_compile_context_free(p->pcre2_compile_context);
 	pcre2_code_free(p->pcre2_pattern);
 	pcre2_match_data_free(p->pcre2_match_data);
-	free((void *)p->pcre2_tables);
+	pcre2_jit_stack_free(p->pcre2_jit_stack);
+	pcre2_match_context_free(p->pcre2_match_context);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
@@ -633,6 +626,7 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
 static void free_pcre2_pattern(struct grep_pat *p)
 {
 }
+#endif /* !USE_LIBPCRE2 */
 
 static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
@@ -653,66 +647,46 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 		compile_regexp_failed(p, errbuf);
 	}
 }
-#endif /* !USE_LIBPCRE2 */
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
+	int ascii_only;
 	int err;
 	int regflags = REG_NEWLINE;
 
 	p->word_regexp = opt->word_regexp;
 	p->ignore_case = opt->ignore_case;
-	p->fixed = opt->fixed;
-
-	if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2)
-		die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
+	ascii_only     = !has_non_ascii(p->pattern);
 
-	p->is_fixed = is_fixed(p->pattern, p->patternlen);
-#ifdef USE_LIBPCRE2
-       if (!p->fixed && !p->is_fixed) {
-	       const char *no_jit = "(*NO_JIT)";
-	       const int no_jit_len = strlen(no_jit);
-	       if (starts_with(p->pattern, no_jit) &&
-		   is_fixed(p->pattern + no_jit_len,
-			    p->patternlen - no_jit_len))
-		       p->is_fixed = 1;
-       }
-#endif
-	if (p->fixed || p->is_fixed) {
-#ifdef USE_LIBPCRE2
-		opt->pcre2 = 1;
-		if (p->is_fixed) {
-			compile_pcre2_pattern(p, opt);
-		} else {
-			/*
-			 * E.g. t7811-grep-open.sh relies on the
-			 * pattern being restored.
-			 */
-			char *old_pattern = p->pattern;
-			size_t old_patternlen = p->patternlen;
-			struct strbuf sb = STRBUF_INIT;
-
-			/*
-			 * There is the PCRE2_LITERAL flag, but it's
-			 * only in PCRE v2 10.30 and later. Needing to
-			 * ifdef our way around that and dealing with
-			 * it + PCRE2_MULTILINE being an error is more
-			 * complex than just quoting this ourselves.
-			*/
-			strbuf_add(&sb, "\\Q", 2);
-			strbuf_add(&sb, p->pattern, p->patternlen);
-			strbuf_add(&sb, "\\E", 2);
-
-			p->pattern = sb.buf;
-			p->patternlen = sb.len;
-			compile_pcre2_pattern(p, opt);
-			p->pattern = old_pattern;
-			p->patternlen = old_patternlen;
-			strbuf_release(&sb);
-		}
-#else /* !USE_LIBPCRE2 */
+	/*
+	 * Even when -F (fixed) asks us to do a non-regexp search, we
+	 * may not be able to correctly case-fold when -i
+	 * (ignore-case) is asked (in which case, we'll synthesize a
+	 * regexp to match the pattern that matches regexp special
+	 * characters literally, while ignoring case differences).  On
+	 * the other hand, even without -F, if the pattern does not
+	 * have any regexp special characters and there is no need for
+	 * case-folding search, we can internally turn it into a
+	 * simple string match using kws.  p->fixed tells us if we
+	 * want to use kws.
+	 */
+	if (opt->fixed ||
+	    has_null(p->pattern, p->patternlen) ||
+	    is_fixed(p->pattern, p->patternlen))
+		p->fixed = !p->ignore_case || ascii_only;
+
+	if (p->fixed) {
+		p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
+		kwsincr(p->kws, p->pattern, p->patternlen);
+		kwsprep(p->kws);
+		return;
+	} else if (opt->fixed) {
+		/*
+		 * We come here when the pattern has the non-ascii
+		 * characters we cannot case-fold, and asked to
+		 * ignore-case.
+		 */
 		compile_fixed_regexp(p, opt);
-#endif /* !USE_LIBPCRE2 */
 		return;
 	}
 
@@ -1079,7 +1053,9 @@ void free_grep_patterns(struct grep_opt *opt)
 		case GREP_PATTERN: /* atom */
 		case GREP_PATTERN_HEAD:
 		case GREP_PATTERN_BODY:
-			if (p->pcre1_regexp)
+			if (p->kws)
+				kwsfree(p->kws);
+			else if (p->pcre1_regexp)
 				free_pcre1_regexp(p);
 			else if (p->pcre2_pattern)
 				free_pcre2_pattern(p);
@@ -1139,12 +1115,29 @@ static void show_name(struct grep_opt *opt, const char *name)
 	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 }
 
+static int fixmatch(struct grep_pat *p, char *line, char *eol,
+		    regmatch_t *match)
+{
+	struct kwsmatch kwsm;
+	size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
+	if (offset == -1) {
+		match->rm_so = match->rm_eo = -1;
+		return REG_NOMATCH;
+	} else {
+		match->rm_so = offset;
+		match->rm_eo = match->rm_so + kwsm.size[0];
+		return 0;
+	}
+}
+
 static int patmatch(struct grep_pat *p, char *line, char *eol,
 		    regmatch_t *match, int eflags)
 {
 	int hit;
 
-	if (p->pcre1_regexp)
+	if (p->fixed)
+		hit = !fixmatch(p, line, eol, match);
+	else if (p->pcre1_regexp)
 		hit = !pcre1match(p, line, eol, match, eflags);
 	else if (p->pcre2_pattern)
 		hit = !pcre2match(p, line, eol, match, eflags);
@@ -1540,6 +1533,11 @@ static inline void grep_attr_unlock(void)
 		pthread_mutex_unlock(&grep_attr_mutex);
 }
 
+/*
+ * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
+ */
+pthread_mutex_t grep_read_mutex;
+
 static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
 {
 	xdemitconf_t *xecfg = opt->priv;
@@ -1736,20 +1734,13 @@ static int fill_textconv_grep(struct repository *r,
 	}
 
 	/*
-	 * fill_textconv is not remotely thread-safe; it modifies the global
-	 * diff tempfile structure, writes to the_repo's odb and might
-	 * internally call thread-unsafe functions such as the
-	 * prepare_packed_git() lazy-initializator. Because of the last two, we
-	 * must ensure mutual exclusion between this call and the object reading
-	 * API, thus we use obj_read_lock() here.
-	 *
-	 * TODO: allowing text conversion to run in parallel with object
-	 * reading operations might increase performance in the multithreaded
-	 * non-worktreee git-grep with --textconv.
+	 * fill_textconv is not remotely thread-safe; it may load objects
+	 * behind the scenes, and it modifies the global diff tempfile
+	 * structure.
 	 */
-	obj_read_lock();
+	grep_read_lock();
 	size = fill_textconv(r, driver, df, &buf);
-	obj_read_unlock();
+	grep_read_unlock();
 	free_filespec(df);
 
 	/*
@@ -1815,15 +1806,10 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 		grep_source_load_driver(gs, opt->repo->index);
 		/*
 		 * We might set up the shared textconv cache data here, which
-		 * is not thread-safe. Also, get_oid_with_context() and
-		 * parse_object() might be internally called. As they are not
-		 * currenty thread-safe and might be racy with object reading,
-		 * obj_read_lock() must be called.
+		 * is not thread-safe.
 		 */
 		grep_attr_lock();
-		obj_read_lock();
 		textconv = userdiff_get_textconv(opt->repo, gs->driver);
-		obj_read_unlock();
 		grep_attr_unlock();
 	}
 
@@ -2123,7 +2109,10 @@ static int grep_source_load_oid(struct grep_source *gs)
 {
 	enum object_type type;
 
+	grep_read_lock();
 	gs->buf = read_object_file(gs->identifier, &type, &gs->size);
+	grep_read_unlock();
+
 	if (!gs->buf)
 		return error(_("'%s': unable to read %s"),
 			     gs->name,