diff options
Diffstat (limited to 'third_party/git/builtin/pack-objects.c')
-rw-r--r-- | third_party/git/builtin/pack-objects.c | 336 |
1 files changed, 102 insertions, 234 deletions
diff --git a/third_party/git/builtin/pack-objects.c b/third_party/git/builtin/pack-objects.c index 02aa6ee4808a..76ce9069467e 100644 --- a/third_party/git/builtin/pack-objects.c +++ b/third_party/git/builtin/pack-objects.c @@ -92,11 +92,10 @@ static struct progress *progress_state; static struct packed_git *reuse_packfile; static uint32_t reuse_packfile_objects; -static struct bitmap *reuse_packfile_bitmap; +static off_t reuse_packfile_offset; static int use_bitmap_index_default = 1; static int use_bitmap_index = -1; -static int allow_pack_reuse = 1; static enum { WRITE_BITMAP_FALSE = 0, WRITE_BITMAP_QUIET, @@ -164,7 +163,7 @@ static void *get_delta(struct object_entry *entry) delta_buf = diff_delta(base_buf, base_size, buf, size, &delta_size, 0); /* - * We successfully computed this delta once but dropped it for + * We succesfully computed this delta once but dropped it for * memory reasons. Something is very wrong if this time we * recompute and create a different delta. */ @@ -304,8 +303,7 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent if (!usable_delta) { if (oe_type(entry) == OBJ_BLOB && oe_size_greater_than(&to_pack, entry, big_file_threshold) && - (st = open_istream(the_repository, &entry->idx.oid, &type, - &size, NULL)) != NULL) + (st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL) buf = NULL; else { buf = read_object_file(&entry->idx.oid, &type, &size); @@ -612,12 +610,12 @@ static int mark_tagged(const char *path, const struct object_id *oid, int flag, void *cb_data) { struct object_id peeled; - struct object_entry *entry = packlist_find(&to_pack, oid); + struct object_entry *entry = packlist_find(&to_pack, oid, NULL); if (entry) entry->tagged = 1; if (!peel_ref(path, &peeled)) { - entry = packlist_find(&to_pack, &peeled); + entry = packlist_find(&to_pack, &peeled, NULL); if (entry) entry->tagged = 1; } @@ -786,186 +784,57 @@ static struct object_entry **compute_write_order(void) return wo; } - -/* - * A reused set of objects. All objects in a chunk have the same - * relative position in the original packfile and the generated - * packfile. - */ - -static struct reused_chunk { - /* The offset of the first object of this chunk in the original - * packfile. */ - off_t original; - /* The offset of the first object of this chunk in the generated - * packfile minus "original". */ - off_t difference; -} *reused_chunks; -static int reused_chunks_nr; -static int reused_chunks_alloc; - -static void record_reused_object(off_t where, off_t offset) +static off_t write_reused_pack(struct hashfile *f) { - if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset) - return; + unsigned char buffer[8192]; + off_t to_write, total; + int fd; - ALLOC_GROW(reused_chunks, reused_chunks_nr + 1, - reused_chunks_alloc); - reused_chunks[reused_chunks_nr].original = where; - reused_chunks[reused_chunks_nr].difference = offset; - reused_chunks_nr++; -} + if (!is_pack_valid(reuse_packfile)) + die(_("packfile is invalid: %s"), reuse_packfile->pack_name); -/* - * Binary search to find the chunk that "where" is in. Note - * that we're not looking for an exact match, just the first - * chunk that contains it (which implicitly ends at the start - * of the next chunk. - */ -static off_t find_reused_offset(off_t where) -{ - int lo = 0, hi = reused_chunks_nr; - while (lo < hi) { - int mi = lo + ((hi - lo) / 2); - if (where == reused_chunks[mi].original) - return reused_chunks[mi].difference; - if (where < reused_chunks[mi].original) - hi = mi; - else - lo = mi + 1; - } + fd = git_open(reuse_packfile->pack_name); + if (fd < 0) + die_errno(_("unable to open packfile for reuse: %s"), + reuse_packfile->pack_name); - /* - * The first chunk starts at zero, so we can't have gone below - * there. - */ - assert(lo); - return reused_chunks[lo-1].difference; -} + if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1) + die_errno(_("unable to seek in reused packfile")); -static void write_reused_pack_one(size_t pos, struct hashfile *out, - struct pack_window **w_curs) -{ - off_t offset, next, cur; - enum object_type type; - unsigned long size; + if (reuse_packfile_offset < 0) + reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz; - offset = reuse_packfile->revindex[pos].offset; - next = reuse_packfile->revindex[pos + 1].offset; + total = to_write = reuse_packfile_offset - sizeof(struct pack_header); - record_reused_object(offset, offset - hashfile_total(out)); + while (to_write) { + int read_pack = xread(fd, buffer, sizeof(buffer)); - cur = offset; - type = unpack_object_header(reuse_packfile, w_curs, &cur, &size); - assert(type >= 0); + if (read_pack <= 0) + die_errno(_("unable to read from reused packfile")); - if (type == OBJ_OFS_DELTA) { - off_t base_offset; - off_t fixup; - - unsigned char header[MAX_PACK_OBJECT_HEADER]; - unsigned len; - - base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset); - assert(base_offset != 0); - - /* Convert to REF_DELTA if we must... */ - if (!allow_ofs_delta) { - int base_pos = find_revindex_position(reuse_packfile, base_offset); - struct object_id base_oid; - - nth_packed_object_id(&base_oid, reuse_packfile, - reuse_packfile->revindex[base_pos].nr); - - len = encode_in_pack_object_header(header, sizeof(header), - OBJ_REF_DELTA, size); - hashwrite(out, header, len); - hashwrite(out, base_oid.hash, 20); - copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); - return; - } - - /* Otherwise see if we need to rewrite the offset... */ - fixup = find_reused_offset(offset) - - find_reused_offset(base_offset); - if (fixup) { - unsigned char ofs_header[10]; - unsigned i, ofs_len; - off_t ofs = offset - base_offset - fixup; - - len = encode_in_pack_object_header(header, sizeof(header), - OBJ_OFS_DELTA, size); + if (read_pack > to_write) + read_pack = to_write; - i = sizeof(ofs_header) - 1; - ofs_header[i] = ofs & 127; - while (ofs >>= 7) - ofs_header[--i] = 128 | (--ofs & 127); - - ofs_len = sizeof(ofs_header) - i; - - hashwrite(out, header, len); - hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len); - copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur); - return; - } - - /* ...otherwise we have no fixup, and can write it verbatim */ - } - - copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset); -} - -static size_t write_reused_pack_verbatim(struct hashfile *out, - struct pack_window **w_curs) -{ - size_t pos = 0; - - while (pos < reuse_packfile_bitmap->word_alloc && - reuse_packfile_bitmap->words[pos] == (eword_t)~0) - pos++; - - if (pos) { - off_t to_write; - - written = (pos * BITS_IN_EWORD); - to_write = reuse_packfile->revindex[written].offset - - sizeof(struct pack_header); - - /* We're recording one chunk, not one object. */ - record_reused_object(sizeof(struct pack_header), 0); - hashflush(out); - copy_pack_data(out, reuse_packfile, w_curs, - sizeof(struct pack_header), to_write); + hashwrite(f, buffer, read_pack); + to_write -= read_pack; + /* + * We don't know the actual number of objects written, + * only how many bytes written, how many bytes total, and + * how many objects total. So we can fake it by pretending all + * objects we are writing are the same size. This gives us a + * smooth progress meter, and at the end it matches the true + * answer. + */ + written = reuse_packfile_objects * + (((double)(total - to_write)) / total); display_progress(progress_state, written); } - return pos; -} - -static void write_reused_pack(struct hashfile *f) -{ - size_t i = 0; - uint32_t offset; - struct pack_window *w_curs = NULL; - - if (allow_ofs_delta) - i = write_reused_pack_verbatim(f, &w_curs); - - for (; i < reuse_packfile_bitmap->word_alloc; ++i) { - eword_t word = reuse_packfile_bitmap->words[i]; - size_t pos = (i * BITS_IN_EWORD); - - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) - break; - - offset += ewah_bit_ctz64(word >> offset); - write_reused_pack_one(pos + offset, f, &w_curs); - display_progress(progress_state, ++written); - } - } - unuse_pack(&w_curs); + close(fd); + written = reuse_packfile_objects; + display_progress(progress_state, written); + return reuse_packfile_offset - sizeof(struct pack_header); } static const char no_split_warning[] = N_( @@ -998,9 +867,11 @@ static void write_pack_file(void) offset = write_pack_header(f, nr_remaining); if (reuse_packfile) { + off_t packfile_size; assert(pack_to_stdout); - write_reused_pack(f); - offset = hashfile_total(f); + + packfile_size = write_reused_pack(f); + offset += packfile_size; } nr_written = 0; @@ -1125,15 +996,12 @@ static int no_try_delta(const char *path) * few lines later when we want to add the new entry. */ static int have_duplicate_entry(const struct object_id *oid, - int exclude) + int exclude, + uint32_t *index_pos) { struct object_entry *entry; - if (reuse_packfile_bitmap && - bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid)) - return 1; - - entry = packlist_find(&to_pack, oid); + entry = packlist_find(&to_pack, oid, index_pos); if (!entry) return 0; @@ -1273,12 +1141,13 @@ static void create_object_entry(const struct object_id *oid, uint32_t hash, int exclude, int no_try_delta, + uint32_t index_pos, struct packed_git *found_pack, off_t found_offset) { struct object_entry *entry; - entry = packlist_alloc(&to_pack, oid); + entry = packlist_alloc(&to_pack, oid->hash, index_pos); entry->hash = hash; oe_set_type(entry, type); if (exclude) @@ -1302,10 +1171,11 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, { struct packed_git *found_pack = NULL; off_t found_offset = 0; + uint32_t index_pos; display_progress(progress_state, ++nr_seen); - if (have_duplicate_entry(oid, exclude)) + if (have_duplicate_entry(oid, exclude, &index_pos)) return 0; if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) { @@ -1320,7 +1190,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type, create_object_entry(oid, type, pack_name_hash(name), exclude, name && no_try_delta(name), - found_pack, found_offset); + index_pos, found_pack, found_offset); return 1; } @@ -1329,15 +1199,17 @@ static int add_object_entry_from_bitmap(const struct object_id *oid, int flags, uint32_t name_hash, struct packed_git *pack, off_t offset) { + uint32_t index_pos; + display_progress(progress_state, ++nr_seen); - if (have_duplicate_entry(oid, 0)) + if (have_duplicate_entry(oid, 0, &index_pos)) return 0; if (!want_object_in_pack(oid, 0, &pack, &offset)) return 0; - create_object_entry(oid, type, name_hash, 0, 0, pack, offset); + create_object_entry(oid, type, name_hash, 0, 0, index_pos, pack, offset); return 1; } @@ -1619,17 +1491,23 @@ static void cleanup_preferred_base(void) * deltify other objects against, in order to avoid * circular deltas. */ -static int can_reuse_delta(const struct object_id *base_oid, +static int can_reuse_delta(const unsigned char *base_sha1, struct object_entry *delta, struct object_entry **base_out) { struct object_entry *base; + struct object_id base_oid; + + if (!base_sha1) + return 0; + + oidread(&base_oid, base_sha1); /* * First see if we're already sending the base (or it's explicitly in * our "excluded" list). */ - base = packlist_find(&to_pack, base_oid); + base = packlist_find(&to_pack, &base_oid, NULL); if (base) { if (!in_same_island(&delta->idx.oid, &base->idx.oid)) return 0; @@ -1642,9 +1520,9 @@ static int can_reuse_delta(const struct object_id *base_oid, * even if it was buried too deep in history to make it into the * packing list. */ - if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) { + if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) { if (use_delta_islands) { - if (!in_same_island(&delta->idx.oid, base_oid)) + if (!in_same_island(&delta->idx.oid, &base_oid)) return 0; } *base_out = NULL; @@ -1661,8 +1539,7 @@ static void check_object(struct object_entry *entry) if (IN_PACK(entry)) { struct packed_git *p = IN_PACK(entry); struct pack_window *w_curs = NULL; - int have_base = 0; - struct object_id base_ref; + const unsigned char *base_ref = NULL; struct object_entry *base_entry; unsigned long used, used_0; unsigned long avail; @@ -1703,13 +1580,9 @@ static void check_object(struct object_entry *entry) unuse_pack(&w_curs); return; case OBJ_REF_DELTA: - if (reuse_delta && !entry->preferred_base) { - oidread(&base_ref, - use_pack(p, &w_curs, - entry->in_pack_offset + used, - NULL)); - have_base = 1; - } + if (reuse_delta && !entry->preferred_base) + base_ref = use_pack(p, &w_curs, + entry->in_pack_offset + used, NULL); entry->in_pack_header_size = used + the_hash_algo->rawsz; break; case OBJ_OFS_DELTA: @@ -1739,15 +1612,13 @@ static void check_object(struct object_entry *entry) revidx = find_pack_revindex(p, ofs); if (!revidx) goto give_up; - if (!nth_packed_object_id(&base_ref, p, revidx->nr)) - have_base = 1; + base_ref = nth_packed_object_sha1(p, revidx->nr); } entry->in_pack_header_size = used + used_0; break; } - if (have_base && - can_reuse_delta(&base_ref, entry, &base_entry)) { + if (can_reuse_delta(base_ref, entry, &base_entry)) { oe_set_type(entry, entry->in_pack_type); SET_SIZE(entry, in_pack_size); /* delta size */ SET_DELTA_SIZE(entry, in_pack_size); @@ -1757,7 +1628,7 @@ static void check_object(struct object_entry *entry) entry->delta_sibling_idx = base_entry->delta_child_idx; SET_DELTA_CHILD(base_entry, entry); } else { - SET_DELTA_EXT(entry, &base_ref); + SET_DELTA_EXT(entry, base_ref); } unuse_pack(&w_curs); @@ -2471,6 +2342,15 @@ static void find_deltas(struct object_entry **list, unsigned *list_size, free(array); } +static void try_to_free_from_threads(size_t size) +{ + packing_data_lock(&to_pack); + release_pack_memory(size); + packing_data_unlock(&to_pack); +} + +static try_to_free_t old_try_to_free_routine; + /* * The main object list is split into smaller lists, each is handed to * one worker. @@ -2511,10 +2391,12 @@ static void init_threaded_search(void) pthread_mutex_init(&cache_mutex, NULL); pthread_mutex_init(&progress_mutex, NULL); pthread_cond_init(&progress_cond, NULL); + old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads); } static void cleanup_threaded_search(void) { + set_try_to_free_routine(old_try_to_free_routine); pthread_cond_destroy(&progress_cond); pthread_mutex_destroy(&cache_mutex); pthread_mutex_destroy(&progress_mutex); @@ -2686,13 +2568,6 @@ static void ll_find_deltas(struct object_entry **list, unsigned list_size, free(p); } -static int obj_is_packed(const struct object_id *oid) -{ - return packlist_find(&to_pack, oid) || - (reuse_packfile_bitmap && - bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid)); -} - static void add_tag_chain(const struct object_id *oid) { struct tag *tag; @@ -2704,7 +2579,7 @@ static void add_tag_chain(const struct object_id *oid) * it was included via bitmaps, we would not have parsed it * previously). */ - if (obj_is_packed(oid)) + if (packlist_find(&to_pack, oid, NULL)) return; tag = lookup_tag(the_repository, oid); @@ -2728,7 +2603,7 @@ static int add_ref_tag(const char *path, const struct object_id *oid, int flag, if (starts_with(path, "refs/tags/") && /* is a tag? */ !peel_ref(path, &peeled) && /* peelable? */ - obj_is_packed(&peeled)) /* object packed? */ + packlist_find(&to_pack, &peeled, NULL)) /* object packed? */ add_tag_chain(oid); return 0; } @@ -2796,7 +2671,6 @@ static void prepare_pack(int window, int depth) if (nr_deltas && n > 1) { unsigned nr_done = 0; - if (progress) progress_state = start_progress(_("Compressing objects"), nr_deltas); @@ -2841,8 +2715,8 @@ static int git_pack_config(const char *k, const char *v, void *cb) use_bitmap_index_default = git_config_bool(k, v); return 0; } - if (!strcmp(k, "pack.allowpackreuse")) { - allow_pack_reuse = git_config_bool(k, v); + if (!strcmp(k, "pack.usesparse")) { + sparse = git_config_bool(k, v); return 0; } if (!strcmp(k, "pack.threads")) { @@ -2929,7 +2803,7 @@ static void show_object(struct object *obj, const char *name, void *data) for (p = strchr(name, '/'); p; p = strchr(p + 1, '/')) depth++; - ent = packlist_find(&to_pack, &obj->oid); + ent = packlist_find(&to_pack, &obj->oid, NULL); if (ent && depth > oe_tree_depth(&to_pack, ent)) oe_set_tree_depth(&to_pack, ent, depth); } @@ -3055,7 +2929,7 @@ static void add_objects_in_unpacked_packs(void) in_pack.alloc); for (i = 0; i < p->num_objects; i++) { - nth_packed_object_id(&oid, p, i); + nth_packed_object_oid(&oid, p, i); o = lookup_unknown_object(&oid); if (!(o->flags & OBJECT_ADDED)) mark_in_pack_object(o, p, &in_pack); @@ -3159,8 +3033,8 @@ static void loosen_unused_packed_objects(void) die(_("cannot open pack index")); for (i = 0; i < p->num_objects; i++) { - nth_packed_object_id(&oid, p, i); - if (!packlist_find(&to_pack, &oid) && + nth_packed_object_oid(&oid, p, i); + if (!packlist_find(&to_pack, &oid, NULL) && !has_sha1_pack_kept_or_nonlocal(&oid) && !loosened_object_can_be_discarded(&oid, p->mtime)) if (force_object_loose(&oid, p->mtime)) @@ -3176,8 +3050,8 @@ static void loosen_unused_packed_objects(void) */ static int pack_options_allow_reuse(void) { - return allow_pack_reuse && - pack_to_stdout && + return pack_to_stdout && + allow_ofs_delta && !ignore_packed_keep_on_disk && !ignore_packed_keep_in_core && (!local || !have_non_local_packs) && @@ -3186,7 +3060,7 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { - if (!(bitmap_git = prepare_bitmap_walk(revs, &filter_options))) + if (!(bitmap_git = prepare_bitmap_walk(revs))) return -1; if (pack_options_allow_reuse() && @@ -3194,14 +3068,13 @@ static int get_object_list_from_bitmap(struct rev_info *revs) bitmap_git, &reuse_packfile, &reuse_packfile_objects, - &reuse_packfile_bitmap)) { + &reuse_packfile_offset)) { assert(reuse_packfile_objects); nr_result += reuse_packfile_objects; display_progress(progress_state, nr_result); } - traverse_bitmap_commit_list(bitmap_git, revs, - &add_object_entry_from_bitmap); + traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap); return 0; } @@ -3470,10 +3343,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) read_replace_refs = 0; sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0); - prepare_repo_settings(the_repository); - if (!sparse && the_repository->settings.pack_use_sparse != -1) - sparse = the_repository->settings.pack_use_sparse; - reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); @@ -3565,6 +3434,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (filter_options.choice) { if (!pack_to_stdout) die(_("cannot use --filter without --stdout")); + use_bitmap_index = 0; } /* @@ -3655,9 +3525,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (progress) fprintf_ln(stderr, _("Total %"PRIu32" (delta %"PRIu32")," - " reused %"PRIu32" (delta %"PRIu32")," - " pack-reused %"PRIu32), - written, written_delta, reused, reused_delta, - reuse_packfile_objects); + " reused %"PRIu32" (delta %"PRIu32")"), + written, written_delta, reused, reused_delta); return 0; } |