about summary refs log tree commit diff
path: root/third_party/git/blame.c
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2020-11-21T18·20+0100
committerVincent Ambo <mail@tazj.in>2020-11-21T18·45+0100
commitf4609b896fac842433bd495c166d5987852a6a73 (patch)
tree95511c465c54c4f5d27e5d39ce187e2a1dd82bd3 /third_party/git/blame.c
parent082c006c04343a78d87b6c6ab3608c25d6213c3f (diff)
merge(3p/git): Merge git subtree at v2.29.2 r/1890
This also bumps the stable nixpkgs to 20.09 as of 2020-11-21, because
there is some breakage in the git build related to the netrc
credentials helper which someone has taken care of in nixpkgs.

The stable channel is not used for anything other than git, so this
should be fine.

Change-Id: I3575a19dab09e1e9556cf8231d717de9890484fb
Diffstat (limited to 'third_party/git/blame.c')
-rw-r--r--third_party/git/blame.c167
1 files changed, 146 insertions, 21 deletions
diff --git a/third_party/git/blame.c b/third_party/git/blame.c
index 36a2e7ef119d..686845b2b43d 100644
--- a/third_party/git/blame.c
+++ b/third_party/git/blame.c
@@ -9,6 +9,8 @@
 #include "blame.h"
 #include "alloc.h"
 #include "commit-slab.h"
+#include "bloom.h"
+#include "commit-graph.h"
 
 define_commit_slab(blame_suspects, struct blame_origin *);
 static struct blame_suspects blame_suspects;
@@ -144,7 +146,7 @@ static void append_merge_parents(struct repository *r,
 
 	while (!strbuf_getwholeline_fd(&line, merge_head, '\n')) {
 		struct object_id oid;
-		if (line.len < GIT_SHA1_HEXSZ || get_oid_hex(line.buf, &oid))
+		if (get_oid_hex(line.buf, &oid))
 			die("unknown line in '%s': %s",
 			    git_path_merge_head(r), line.buf);
 		tail = append_parent(r, tail, &oid);
@@ -417,14 +419,15 @@ static void get_fingerprint(struct fingerprint *result,
 		/* Ignore whitespace pairs */
 		if (hash == 0)
 			continue;
-		hashmap_entry_init(entry, hash);
+		hashmap_entry_init(&entry->entry, hash);
 
-		found_entry = hashmap_get(&result->map, entry, NULL);
+		found_entry = hashmap_get_entry(&result->map, entry,
+						/* member name */ entry, NULL);
 		if (found_entry) {
 			found_entry->count += 1;
 		} else {
 			entry->count = 1;
-			hashmap_add(&result->map, entry);
+			hashmap_add(&result->map, &entry->entry);
 			++entry;
 		}
 	}
@@ -432,7 +435,7 @@ static void get_fingerprint(struct fingerprint *result,
 
 static void free_fingerprint(struct fingerprint *f)
 {
-	hashmap_free(&f->map, 0);
+	hashmap_free(&f->map);
 	free(f->entries);
 }
 
@@ -449,10 +452,10 @@ static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b)
 	struct hashmap_iter iter;
 	const struct fingerprint_entry *entry_a, *entry_b;
 
-	hashmap_iter_init(&b->map, &iter);
-
-	while ((entry_b = hashmap_iter_next(&iter))) {
-		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+	hashmap_for_each_entry(&b->map, &iter, entry_b,
+				entry /* member name */) {
+		entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL);
+		if (entry_a) {
 			intersection += entry_a->count < entry_b->count ?
 					entry_a->count : entry_b->count;
 		}
@@ -470,10 +473,12 @@ static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b)
 
 	hashmap_iter_init(&b->map, &iter);
 
-	while ((entry_b = hashmap_iter_next(&iter))) {
-		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+	hashmap_for_each_entry(&b->map, &iter, entry_b,
+				entry /* member name */) {
+		entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL);
+		if (entry_a) {
 			if (entry_a->count <= entry_b->count)
-				hashmap_remove(&a->map, entry_b, NULL);
+				hashmap_remove(&a->map, &entry_b->entry, NULL);
 			else
 				entry_a->count -= entry_b->count;
 		}
@@ -1179,6 +1184,7 @@ void blame_coalesce(struct blame_scoreboard *sb)
 	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 		if (ent->suspect == next->suspect &&
 		    ent->s_lno + ent->num_lines == next->s_lno &&
+		    ent->lno + ent->num_lines == next->lno &&
 		    ent->ignored == next->ignored &&
 		    ent->unblamable == next->unblamable) {
 			ent->num_lines += next->num_lines;
@@ -1243,13 +1249,74 @@ static int fill_blob_sha1_and_mode(struct repository *r,
 	return -1;
 }
 
+struct blame_bloom_data {
+	/*
+	 * Changed-path Bloom filter keys. These can help prevent
+	 * computing diffs against first parents, but we need to
+	 * expand the list as code is moved or files are renamed.
+	 */
+	struct bloom_filter_settings *settings;
+	struct bloom_key **keys;
+	int nr;
+	int alloc;
+};
+
+static int bloom_count_queries = 0;
+static int bloom_count_no = 0;
+static int maybe_changed_path(struct repository *r,
+			      struct blame_origin *origin,
+			      struct blame_bloom_data *bd)
+{
+	int i;
+	struct bloom_filter *filter;
+
+	if (!bd)
+		return 1;
+
+	if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY)
+		return 1;
+
+	filter = get_bloom_filter(r, origin->commit);
+
+	if (!filter)
+		return 1;
+
+	bloom_count_queries++;
+	for (i = 0; i < bd->nr; i++) {
+		if (bloom_filter_contains(filter,
+					  bd->keys[i],
+					  bd->settings))
+			return 1;
+	}
+
+	bloom_count_no++;
+	return 0;
+}
+
+static void add_bloom_key(struct blame_bloom_data *bd,
+			  const char *path)
+{
+	if (!bd)
+		return;
+
+	if (bd->nr >= bd->alloc) {
+		bd->alloc *= 2;
+		REALLOC_ARRAY(bd->keys, bd->alloc);
+	}
+
+	bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
+	fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
+	bd->nr++;
+}
+
 /*
  * We have an origin -- check if the same path exists in the
  * parent and return an origin structure to represent it.
  */
 static struct blame_origin *find_origin(struct repository *r,
 					struct commit *parent,
-					struct blame_origin *origin)
+					struct blame_origin *origin,
+					struct blame_bloom_data *bd)
 {
 	struct blame_origin *porigin;
 	struct diff_options diff_opts;
@@ -1283,10 +1350,18 @@ static struct blame_origin *find_origin(struct repository *r,
 
 	if (is_null_oid(&origin->commit->object.oid))
 		do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
-	else
-		diff_tree_oid(get_commit_tree_oid(parent),
-			      get_commit_tree_oid(origin->commit),
-			      "", &diff_opts);
+	else {
+		int compute_diff = 1;
+		if (origin->commit->parents &&
+		    oideq(&parent->object.oid,
+			  &origin->commit->parents->item->object.oid))
+			compute_diff = maybe_changed_path(r, origin, bd);
+
+		if (compute_diff)
+			diff_tree_oid(get_commit_tree_oid(parent),
+				      get_commit_tree_oid(origin->commit),
+				      "", &diff_opts);
+	}
 	diffcore_std(&diff_opts);
 
 	if (!diff_queued_diff.nr) {
@@ -1338,7 +1413,8 @@ static struct blame_origin *find_origin(struct repository *r,
  */
 static struct blame_origin *find_rename(struct repository *r,
 					struct commit *parent,
-					struct blame_origin *origin)
+					struct blame_origin *origin,
+					struct blame_bloom_data *bd)
 {
 	struct blame_origin *porigin = NULL;
 	struct diff_options diff_opts;
@@ -1363,6 +1439,7 @@ static struct blame_origin *find_rename(struct repository *r,
 		struct diff_filepair *p = diff_queued_diff.queue[i];
 		if ((p->status == 'R' || p->status == 'C') &&
 		    !strcmp(p->two->path, origin->path)) {
+			add_bloom_key(bd, p->one->path);
 			porigin = get_origin(parent, p->one->path);
 			oidcpy(&porigin->blob_oid, &p->one->oid);
 			porigin->mode = p->one->mode;
@@ -2329,6 +2406,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl
 
 #define MAXSG 16
 
+typedef struct blame_origin *(*blame_find_alg)(struct repository *,
+					       struct commit *,
+					       struct blame_origin *,
+					       struct blame_bloom_data *);
+
 static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt)
 {
 	struct rev_info *revs = sb->revs;
@@ -2353,8 +2435,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
 	 * common cases, then we look for renames in the second pass.
 	 */
 	for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
-		struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *);
-		find = pass ? find_rename : find_origin;
+		blame_find_alg find = pass ? find_rename : find_origin;
 
 		for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
 		     i < num_sg && sg;
@@ -2366,7 +2447,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
 				continue;
 			if (parse_commit(p))
 				continue;
-			porigin = find(sb->repo, p, origin);
+			porigin = find(sb->repo, p, origin, sb->bloom_data);
 			if (!porigin)
 				continue;
 			if (oideq(&porigin->blob_oid, &origin->blob_oid)) {
@@ -2806,3 +2887,47 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head,
 	blame_origin_incref(o);
 	return new_head;
 }
+
+void setup_blame_bloom_data(struct blame_scoreboard *sb,
+			    const char *path)
+{
+	struct blame_bloom_data *bd;
+	struct bloom_filter_settings *bs;
+
+	if (!sb->repo->objects->commit_graph)
+		return;
+
+	bs = get_bloom_filter_settings(sb->repo);
+	if (!bs)
+		return;
+
+	bd = xmalloc(sizeof(struct blame_bloom_data));
+
+	bd->settings = bs;
+
+	bd->alloc = 4;
+	bd->nr = 0;
+	ALLOC_ARRAY(bd->keys, bd->alloc);
+
+	add_bloom_key(bd, path);
+
+	sb->bloom_data = bd;
+}
+
+void cleanup_scoreboard(struct blame_scoreboard *sb)
+{
+	if (sb->bloom_data) {
+		int i;
+		for (i = 0; i < sb->bloom_data->nr; i++) {
+			free(sb->bloom_data->keys[i]->hashes);
+			free(sb->bloom_data->keys[i]);
+		}
+		free(sb->bloom_data->keys);
+		FREE_AND_NULL(sb->bloom_data);
+
+		trace2_data_intmax("blame", sb->repo,
+				   "bloom/queries", bloom_count_queries);
+		trace2_data_intmax("blame", sb->repo,
+				   "bloom/response-no", bloom_count_no);
+	}
+}