diff options
Diffstat (limited to 'third_party/git/blame.c')
-rw-r--r-- | third_party/git/blame.c | 167 |
1 files changed, 146 insertions, 21 deletions
diff --git a/third_party/git/blame.c b/third_party/git/blame.c index 36a2e7ef119d..686845b2b43d 100644 --- a/third_party/git/blame.c +++ b/third_party/git/blame.c @@ -9,6 +9,8 @@ #include "blame.h" #include "alloc.h" #include "commit-slab.h" +#include "bloom.h" +#include "commit-graph.h" define_commit_slab(blame_suspects, struct blame_origin *); static struct blame_suspects blame_suspects; @@ -144,7 +146,7 @@ static void append_merge_parents(struct repository *r, while (!strbuf_getwholeline_fd(&line, merge_head, '\n')) { struct object_id oid; - if (line.len < GIT_SHA1_HEXSZ || get_oid_hex(line.buf, &oid)) + if (get_oid_hex(line.buf, &oid)) die("unknown line in '%s': %s", git_path_merge_head(r), line.buf); tail = append_parent(r, tail, &oid); @@ -417,14 +419,15 @@ static void get_fingerprint(struct fingerprint *result, /* Ignore whitespace pairs */ if (hash == 0) continue; - hashmap_entry_init(entry, hash); + hashmap_entry_init(&entry->entry, hash); - found_entry = hashmap_get(&result->map, entry, NULL); + found_entry = hashmap_get_entry(&result->map, entry, + /* member name */ entry, NULL); if (found_entry) { found_entry->count += 1; } else { entry->count = 1; - hashmap_add(&result->map, entry); + hashmap_add(&result->map, &entry->entry); ++entry; } } @@ -432,7 +435,7 @@ static void get_fingerprint(struct fingerprint *result, static void free_fingerprint(struct fingerprint *f) { - hashmap_free(&f->map, 0); + hashmap_free(&f->map); free(f->entries); } @@ -449,10 +452,10 @@ static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b) struct hashmap_iter iter; const struct fingerprint_entry *entry_a, *entry_b; - hashmap_iter_init(&b->map, &iter); - - while ((entry_b = hashmap_iter_next(&iter))) { - if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) { + hashmap_for_each_entry(&b->map, &iter, entry_b, + entry /* member name */) { + entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL); + if (entry_a) { intersection += entry_a->count < entry_b->count ? entry_a->count : entry_b->count; } @@ -470,10 +473,12 @@ static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b) hashmap_iter_init(&b->map, &iter); - while ((entry_b = hashmap_iter_next(&iter))) { - if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) { + hashmap_for_each_entry(&b->map, &iter, entry_b, + entry /* member name */) { + entry_a = hashmap_get_entry(&a->map, entry_b, entry, NULL); + if (entry_a) { if (entry_a->count <= entry_b->count) - hashmap_remove(&a->map, entry_b, NULL); + hashmap_remove(&a->map, &entry_b->entry, NULL); else entry_a->count -= entry_b->count; } @@ -1179,6 +1184,7 @@ void blame_coalesce(struct blame_scoreboard *sb) for (ent = sb->ent; ent && (next = ent->next); ent = next) { if (ent->suspect == next->suspect && ent->s_lno + ent->num_lines == next->s_lno && + ent->lno + ent->num_lines == next->lno && ent->ignored == next->ignored && ent->unblamable == next->unblamable) { ent->num_lines += next->num_lines; @@ -1243,13 +1249,74 @@ static int fill_blob_sha1_and_mode(struct repository *r, return -1; } +struct blame_bloom_data { + /* + * Changed-path Bloom filter keys. These can help prevent + * computing diffs against first parents, but we need to + * expand the list as code is moved or files are renamed. + */ + struct bloom_filter_settings *settings; + struct bloom_key **keys; + int nr; + int alloc; +}; + +static int bloom_count_queries = 0; +static int bloom_count_no = 0; +static int maybe_changed_path(struct repository *r, + struct blame_origin *origin, + struct blame_bloom_data *bd) +{ + int i; + struct bloom_filter *filter; + + if (!bd) + return 1; + + if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY) + return 1; + + filter = get_bloom_filter(r, origin->commit); + + if (!filter) + return 1; + + bloom_count_queries++; + for (i = 0; i < bd->nr; i++) { + if (bloom_filter_contains(filter, + bd->keys[i], + bd->settings)) + return 1; + } + + bloom_count_no++; + return 0; +} + +static void add_bloom_key(struct blame_bloom_data *bd, + const char *path) +{ + if (!bd) + return; + + if (bd->nr >= bd->alloc) { + bd->alloc *= 2; + REALLOC_ARRAY(bd->keys, bd->alloc); + } + + bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key)); + fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings); + bd->nr++; +} + /* * We have an origin -- check if the same path exists in the * parent and return an origin structure to represent it. */ static struct blame_origin *find_origin(struct repository *r, struct commit *parent, - struct blame_origin *origin) + struct blame_origin *origin, + struct blame_bloom_data *bd) { struct blame_origin *porigin; struct diff_options diff_opts; @@ -1283,10 +1350,18 @@ static struct blame_origin *find_origin(struct repository *r, if (is_null_oid(&origin->commit->object.oid)) do_diff_cache(get_commit_tree_oid(parent), &diff_opts); - else - diff_tree_oid(get_commit_tree_oid(parent), - get_commit_tree_oid(origin->commit), - "", &diff_opts); + else { + int compute_diff = 1; + if (origin->commit->parents && + oideq(&parent->object.oid, + &origin->commit->parents->item->object.oid)) + compute_diff = maybe_changed_path(r, origin, bd); + + if (compute_diff) + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(origin->commit), + "", &diff_opts); + } diffcore_std(&diff_opts); if (!diff_queued_diff.nr) { @@ -1338,7 +1413,8 @@ static struct blame_origin *find_origin(struct repository *r, */ static struct blame_origin *find_rename(struct repository *r, struct commit *parent, - struct blame_origin *origin) + struct blame_origin *origin, + struct blame_bloom_data *bd) { struct blame_origin *porigin = NULL; struct diff_options diff_opts; @@ -1363,6 +1439,7 @@ static struct blame_origin *find_rename(struct repository *r, struct diff_filepair *p = diff_queued_diff.queue[i]; if ((p->status == 'R' || p->status == 'C') && !strcmp(p->two->path, origin->path)) { + add_bloom_key(bd, p->one->path); porigin = get_origin(parent, p->one->path); oidcpy(&porigin->blob_oid, &p->one->oid); porigin->mode = p->one->mode; @@ -2329,6 +2406,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl #define MAXSG 16 +typedef struct blame_origin *(*blame_find_alg)(struct repository *, + struct commit *, + struct blame_origin *, + struct blame_bloom_data *); + static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt) { struct rev_info *revs = sb->revs; @@ -2353,8 +2435,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, * common cases, then we look for renames in the second pass. */ for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) { - struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *); - find = pass ? find_rename : find_origin; + blame_find_alg find = pass ? find_rename : find_origin; for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse); i < num_sg && sg; @@ -2366,7 +2447,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, continue; if (parse_commit(p)) continue; - porigin = find(sb->repo, p, origin); + porigin = find(sb->repo, p, origin, sb->bloom_data); if (!porigin) continue; if (oideq(&porigin->blob_oid, &origin->blob_oid)) { @@ -2806,3 +2887,47 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head, blame_origin_incref(o); return new_head; } + +void setup_blame_bloom_data(struct blame_scoreboard *sb, + const char *path) +{ + struct blame_bloom_data *bd; + struct bloom_filter_settings *bs; + + if (!sb->repo->objects->commit_graph) + return; + + bs = get_bloom_filter_settings(sb->repo); + if (!bs) + return; + + bd = xmalloc(sizeof(struct blame_bloom_data)); + + bd->settings = bs; + + bd->alloc = 4; + bd->nr = 0; + ALLOC_ARRAY(bd->keys, bd->alloc); + + add_bloom_key(bd, path); + + sb->bloom_data = bd; +} + +void cleanup_scoreboard(struct blame_scoreboard *sb) +{ + if (sb->bloom_data) { + int i; + for (i = 0; i < sb->bloom_data->nr; i++) { + free(sb->bloom_data->keys[i]->hashes); + free(sb->bloom_data->keys[i]); + } + free(sb->bloom_data->keys); + FREE_AND_NULL(sb->bloom_data); + + trace2_data_intmax("blame", sb->repo, + "bloom/queries", bloom_count_queries); + trace2_data_intmax("blame", sb->repo, + "bloom/response-no", bloom_count_no); + } +} |