about summary refs log tree commit diff
path: root/third_party/git/blame.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/git/blame.c')
-rw-r--r--third_party/git/blame.c167
1 files changed, 146 insertions, 21 deletions
diff --git a/third_party/git/blame.c b/third_party/git/blame.c
index 36a2e7ef119d..686845b2b43d 100644
--- a/third_party/git/blame.c
+++ b/third_party/git/blame.c
@@ -9,6 +9,8 @@
 #include "blame.h"
 #include "alloc.h"
 #include "commit-slab.h"
+#include "bloom.h"
+#include "commit-graph.h"
 
 define_commit_slab(blame_suspects, struct blame_origin *);
 static struct blame_suspects blame_suspects;
@@ -144,7 +146,7 @@ static void append_merge_parents(struct repository *r,
 
 	while (!strbuf_getwholeline_fd(&line, merge_head, '\n')) {
 		struct object_id oid;
-		if (line.len < GIT_SHA1_HEXSZ || get_oid_hex(line.buf, &oid))
+		if (get_oid_hex(line.buf, &oid))
 			die("unknown line in '%s': %s",
 			    git_path_merge_head(r), line.buf);
 		tail = append_parent(r, tail, &oid);
@@ -417,14 +419,15 @@ static void get_fingerprint(struct fingerprint *result,
 		/* Ignore whitespace pairs */
 		if (hash == 0)
 			continue;
-		hashmap_entry_init(entry, hash);
+		hashmap_entry_init(&entry->entry, hash);
 
-		found_entry = hashmap_get(&result->map, entry, NULL);
+		found_entry = hashmap_get_entry(&result->map, entry,
+						/* member name */ entry, NULL);
 		if (found_entry) {
 			found_entry->count += 1;
 		} else {
 			entry->count = 1;
-			hashmap_add(&result->map, entry);
+			hashmap_add(&result->map, &entry->entry);
 			++entry;
 		}
 	}
@@ -432,7 +435,7 @@ static void get_fingerprint(struct fingerprint *result,
 
 static void free_fingerprint(struct fingerprint *f)
 {
-	hashmap_free(&f->map, 0);
+	hashmap_free(&f->map);
 	free(f->entries);
 }
 
@@ -449,10 +452,10 @@ static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b)
 	struct hashmap_iter iter;
 	const struct fingerprint_entry *entry_a, *entry_b;
 
-	hashmap_iter_init(&b->map, &iter);
-
-	while ((entry_b = hashmap_iter_next(&iter))) {
-		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+	hashmap_for_each_entry(&b->map, &iter, entry_b,
+				entry /* member name */) {
+		entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL);
+		if (entry_a) {
 			intersection += entry_a->count < entry_b->count ?
 					entry_a->count : entry_b->count;
 		}
@@ -470,10 +473,12 @@ static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b)
 
 	hashmap_iter_init(&b->map, &iter);
 
-	while ((entry_b = hashmap_iter_next(&iter))) {
-		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+	hashmap_for_each_entry(&b->map, &iter, entry_b,
+				entry /* member name */) {
+		entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL);
+		if (entry_a) {
 			if (entry_a->count <= entry_b->count)
-				hashmap_remove(&a->map, entry_b, NULL);
+				hashmap_remove(&a->map, &entry_b->entry, NULL);
 			else
 				entry_a->count -= entry_b->count;
 		}
@@ -1179,6 +1184,7 @@ void blame_coalesce(struct blame_scoreboard *sb)
 	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 		if (ent->suspect == next->suspect &&
 		    ent->s_lno + ent->num_lines == next->s_lno &&
+		    ent->lno + ent->num_lines == next->lno &&
 		    ent->ignored == next->ignored &&
 		    ent->unblamable == next->unblamable) {
 			ent->num_lines += next->num_lines;
@@ -1243,13 +1249,74 @@ static int fill_blob_sha1_and_mode(struct repository *r,
 	return -1;
 }
 
+struct blame_bloom_data {
+	/*
+	 * Changed-path Bloom filter keys. These can help prevent
+	 * computing diffs against first parents, but we need to
+	 * expand the list as code is moved or files are renamed.
+	 */
+	struct bloom_filter_settings *settings;
+	struct bloom_key **keys;
+	int nr;
+	int alloc;
+};
+
+static int bloom_count_queries = 0;
+static int bloom_count_no = 0;
+static int maybe_changed_path(struct repository *r,
+			      struct blame_origin *origin,
+			      struct blame_bloom_data *bd)
+{
+	int i;
+	struct bloom_filter *filter;
+
+	if (!bd)
+		return 1;
+
+	if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY)
+		return 1;
+
+	filter = get_bloom_filter(r, origin->commit);
+
+	if (!filter)
+		return 1;
+
+	bloom_count_queries++;
+	for (i = 0; i < bd->nr; i++) {
+		if (bloom_filter_contains(filter,
+					  bd->keys[i],
+					  bd->settings))
+			return 1;
+	}
+
+	bloom_count_no++;
+	return 0;
+}
+
+static void add_bloom_key(struct blame_bloom_data *bd,
+			  const char *path)
+{
+	if (!bd)
+		return;
+
+	if (bd->nr >= bd->alloc) {
+		bd->alloc *= 2;
+		REALLOC_ARRAY(bd->keys, bd->alloc);
+	}
+
+	bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
+	fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
+	bd->nr++;
+}
+
 /*
  * We have an origin -- check if the same path exists in the
  * parent and return an origin structure to represent it.
  */
 static struct blame_origin *find_origin(struct repository *r,
 					struct commit *parent,
-					struct blame_origin *origin)
+					struct blame_origin *origin,
+					struct blame_bloom_data *bd)
 {
 	struct blame_origin *porigin;
 	struct diff_options diff_opts;
@@ -1283,10 +1350,18 @@ static struct blame_origin *find_origin(struct repository *r,
 
 	if (is_null_oid(&origin->commit->object.oid))
 		do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
-	else
-		diff_tree_oid(get_commit_tree_oid(parent),
-			      get_commit_tree_oid(origin->commit),
-			      "", &diff_opts);
+	else {
+		int compute_diff = 1;
+		if (origin->commit->parents &&
+		    oideq(&parent->object.oid,
+			  &origin->commit->parents->item->object.oid))
+			compute_diff = maybe_changed_path(r, origin, bd);
+
+		if (compute_diff)
+			diff_tree_oid(get_commit_tree_oid(parent),
+				      get_commit_tree_oid(origin->commit),
+				      "", &diff_opts);
+	}
 	diffcore_std(&diff_opts);
 
 	if (!diff_queued_diff.nr) {
@@ -1338,7 +1413,8 @@ static struct blame_origin *find_origin(struct repository *r,
  */
 static struct blame_origin *find_rename(struct repository *r,
 					struct commit *parent,
-					struct blame_origin *origin)
+					struct blame_origin *origin,
+					struct blame_bloom_data *bd)
 {
 	struct blame_origin *porigin = NULL;
 	struct diff_options diff_opts;
@@ -1363,6 +1439,7 @@ static struct blame_origin *find_rename(struct repository *r,
 		struct diff_filepair *p = diff_queued_diff.queue[i];
 		if ((p->status == 'R' || p->status == 'C') &&
 		    !strcmp(p->two->path, origin->path)) {
+			add_bloom_key(bd, p->one->path);
 			porigin = get_origin(parent, p->one->path);
 			oidcpy(&porigin->blob_oid, &p->one->oid);
 			porigin->mode = p->one->mode;
@@ -2329,6 +2406,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl
 
 #define MAXSG 16
 
+typedef struct blame_origin *(*blame_find_alg)(struct repository *,
+					       struct commit *,
+					       struct blame_origin *,
+					       struct blame_bloom_data *);
+
 static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt)
 {
 	struct rev_info *revs = sb->revs;
@@ -2353,8 +2435,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
 	 * common cases, then we look for renames in the second pass.
 	 */
 	for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
-		struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *);
-		find = pass ? find_rename : find_origin;
+		blame_find_alg find = pass ? find_rename : find_origin;
 
 		for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
 		     i < num_sg && sg;
@@ -2366,7 +2447,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
 				continue;
 			if (parse_commit(p))
 				continue;
-			porigin = find(sb->repo, p, origin);
+			porigin = find(sb->repo, p, origin, sb->bloom_data);
 			if (!porigin)
 				continue;
 			if (oideq(&porigin->blob_oid, &origin->blob_oid)) {
@@ -2806,3 +2887,47 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head,
 	blame_origin_incref(o);
 	return new_head;
 }
+
+void setup_blame_bloom_data(struct blame_scoreboard *sb,
+			    const char *path)
+{
+	struct blame_bloom_data *bd;
+	struct bloom_filter_settings *bs;
+
+	if (!sb->repo->objects->commit_graph)
+		return;
+
+	bs = get_bloom_filter_settings(sb->repo);
+	if (!bs)
+		return;
+
+	bd = xmalloc(sizeof(struct blame_bloom_data));
+
+	bd->settings = bs;
+
+	bd->alloc = 4;
+	bd->nr = 0;
+	ALLOC_ARRAY(bd->keys, bd->alloc);
+
+	add_bloom_key(bd, path);
+
+	sb->bloom_data = bd;
+}
+
+void cleanup_scoreboard(struct blame_scoreboard *sb)
+{
+	if (sb->bloom_data) {
+		int i;
+		for (i = 0; i < sb->bloom_data->nr; i++) {
+			free(sb->bloom_data->keys[i]->hashes);
+			free(sb->bloom_data->keys[i]);
+		}
+		free(sb->bloom_data->keys);
+		FREE_AND_NULL(sb->bloom_data);
+
+		trace2_data_intmax("blame", sb->repo,
+				   "bloom/queries", bloom_count_queries);
+		trace2_data_intmax("blame", sb->repo,
+				   "bloom/response-no", bloom_count_no);
+	}
+}