From 1b593e1ea4d2af0f6444d9a7788d5d99abd6fde5 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sat, 11 Jan 2020 23:36:56 +0000 Subject: Squashed 'third_party/git/' content from commit cb71568594 git-subtree-dir: third_party/git git-subtree-split: cb715685942260375e1eb8153b0768a376e4ece7 --- builtin/pack-objects.c | 3531 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 3531 insertions(+) create mode 100644 builtin/pack-objects.c (limited to 'builtin/pack-objects.c') diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c new file mode 100644 index 000000000000..76ce9069467e --- /dev/null +++ b/builtin/pack-objects.c @@ -0,0 +1,3531 @@ +#include "builtin.h" +#include "cache.h" +#include "repository.h" +#include "config.h" +#include "attr.h" +#include "object.h" +#include "blob.h" +#include "commit.h" +#include "tag.h" +#include "tree.h" +#include "delta.h" +#include "pack.h" +#include "pack-revindex.h" +#include "csum-file.h" +#include "tree-walk.h" +#include "diff.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" +#include "pack-objects.h" +#include "progress.h" +#include "refs.h" +#include "streaming.h" +#include "thread-utils.h" +#include "pack-bitmap.h" +#include "delta-islands.h" +#include "reachable.h" +#include "sha1-array.h" +#include "argv-array.h" +#include "list.h" +#include "packfile.h" +#include "object-store.h" +#include "dir.h" +#include "midx.h" +#include "trace2.h" + +#define IN_PACK(obj) oe_in_pack(&to_pack, obj) +#define SIZE(obj) oe_size(&to_pack, obj) +#define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size) +#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj) +#define DELTA(obj) oe_delta(&to_pack, obj) +#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj) +#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj) +#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val) +#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid) +#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val) +#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val) +#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val) + +static const char *pack_usage[] = { + N_("git pack-objects --stdout [...] [< | < ]"), + N_("git pack-objects [...] [< | < ]"), + NULL +}; + +/* + * Objects we are going to pack are collected in the `to_pack` structure. + * It contains an array (dynamically expanded) of the object data, and a map + * that can resolve SHA1s to their position in the array. + */ +static struct packing_data to_pack; + +static struct pack_idx_entry **written_list; +static uint32_t nr_result, nr_written, nr_seen; +static struct bitmap_index *bitmap_git; +static uint32_t write_layer; + +static int non_empty; +static int reuse_delta = 1, reuse_object = 1; +static int keep_unreachable, unpack_unreachable, include_tag; +static timestamp_t unpack_unreachable_expiration; +static int pack_loose_unreachable; +static int local; +static int have_non_local_packs; +static int incremental; +static int ignore_packed_keep_on_disk; +static int ignore_packed_keep_in_core; +static int allow_ofs_delta; +static struct pack_idx_option pack_idx_opts; +static const char *base_name; +static int progress = 1; +static int window = 10; +static unsigned long pack_size_limit; +static int depth = 50; +static int delta_search_threads; +static int pack_to_stdout; +static int sparse; +static int thin; +static int num_preferred_base; +static struct progress *progress_state; + +static struct packed_git *reuse_packfile; +static uint32_t reuse_packfile_objects; +static off_t reuse_packfile_offset; + +static int use_bitmap_index_default = 1; +static int use_bitmap_index = -1; +static enum { + WRITE_BITMAP_FALSE = 0, + WRITE_BITMAP_QUIET, + WRITE_BITMAP_TRUE, +} write_bitmap_index; +static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE; + +static int exclude_promisor_objects; + +static int use_delta_islands; + +static unsigned long delta_cache_size = 0; +static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE; +static unsigned long cache_max_small_delta_size = 1000; + +static unsigned long window_memory_limit = 0; + +static struct list_objects_filter_options filter_options; + +enum missing_action { + MA_ERROR = 0, /* fail if any missing objects are encountered */ + MA_ALLOW_ANY, /* silently allow ALL missing objects */ + MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */ +}; +static enum missing_action arg_missing_action; +static show_object_fn fn_show_object; + +/* + * stats + */ +static uint32_t written, written_delta; +static uint32_t reused, reused_delta; + +/* + * Indexed commits + */ +static struct commit **indexed_commits; +static unsigned int indexed_commits_nr; +static unsigned int indexed_commits_alloc; + +static void index_commit_for_bitmap(struct commit *commit) +{ + if (indexed_commits_nr >= indexed_commits_alloc) { + indexed_commits_alloc = (indexed_commits_alloc + 32) * 2; + REALLOC_ARRAY(indexed_commits, indexed_commits_alloc); + } + + indexed_commits[indexed_commits_nr++] = commit; +} + +static void *get_delta(struct object_entry *entry) +{ + unsigned long size, base_size, delta_size; + void *buf, *base_buf, *delta_buf; + enum object_type type; + + buf = read_object_file(&entry->idx.oid, &type, &size); + if (!buf) + die(_("unable to read %s"), oid_to_hex(&entry->idx.oid)); + base_buf = read_object_file(&DELTA(entry)->idx.oid, &type, + &base_size); + if (!base_buf) + die("unable to read %s", + oid_to_hex(&DELTA(entry)->idx.oid)); + delta_buf = diff_delta(base_buf, base_size, + buf, size, &delta_size, 0); + /* + * We succesfully computed this delta once but dropped it for + * memory reasons. Something is very wrong if this time we + * recompute and create a different delta. + */ + if (!delta_buf || delta_size != DELTA_SIZE(entry)) + BUG("delta size changed"); + free(buf); + free(base_buf); + return delta_buf; +} + +static unsigned long do_compress(void **pptr, unsigned long size) +{ + git_zstream stream; + void *in, *out; + unsigned long maxsize; + + git_deflate_init(&stream, pack_compression_level); + maxsize = git_deflate_bound(&stream, size); + + in = *pptr; + out = xmalloc(maxsize); + *pptr = out; + + stream.next_in = in; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (git_deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + git_deflate_end(&stream); + + free(in); + return stream.total_out; +} + +static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f, + const struct object_id *oid) +{ + git_zstream stream; + unsigned char ibuf[1024 * 16]; + unsigned char obuf[1024 * 16]; + unsigned long olen = 0; + + git_deflate_init(&stream, pack_compression_level); + + for (;;) { + ssize_t readlen; + int zret = Z_OK; + readlen = read_istream(st, ibuf, sizeof(ibuf)); + if (readlen == -1) + die(_("unable to read %s"), oid_to_hex(oid)); + + stream.next_in = ibuf; + stream.avail_in = readlen; + while ((stream.avail_in || readlen == 0) && + (zret == Z_OK || zret == Z_BUF_ERROR)) { + stream.next_out = obuf; + stream.avail_out = sizeof(obuf); + zret = git_deflate(&stream, readlen ? 0 : Z_FINISH); + hashwrite(f, obuf, stream.next_out - obuf); + olen += stream.next_out - obuf; + } + if (stream.avail_in) + die(_("deflate error (%d)"), zret); + if (readlen == 0) { + if (zret != Z_STREAM_END) + die(_("deflate error (%d)"), zret); + break; + } + } + git_deflate_end(&stream); + return olen; +} + +/* + * we are going to reuse the existing object data as is. make + * sure it is not corrupt. + */ +static int check_pack_inflate(struct packed_git *p, + struct pack_window **w_curs, + off_t offset, + off_t len, + unsigned long expect) +{ + git_zstream stream; + unsigned char fakebuf[4096], *in; + int st; + + memset(&stream, 0, sizeof(stream)); + git_inflate_init(&stream); + do { + in = use_pack(p, w_curs, offset, &stream.avail_in); + stream.next_in = in; + stream.next_out = fakebuf; + stream.avail_out = sizeof(fakebuf); + st = git_inflate(&stream, Z_FINISH); + offset += stream.next_in - in; + } while (st == Z_OK || st == Z_BUF_ERROR); + git_inflate_end(&stream); + return (st == Z_STREAM_END && + stream.total_out == expect && + stream.total_in == len) ? 0 : -1; +} + +static void copy_pack_data(struct hashfile *f, + struct packed_git *p, + struct pack_window **w_curs, + off_t offset, + off_t len) +{ + unsigned char *in; + unsigned long avail; + + while (len) { + in = use_pack(p, w_curs, offset, &avail); + if (avail > len) + avail = (unsigned long)len; + hashwrite(f, in, avail); + offset += avail; + len -= avail; + } +} + +/* Return 0 if we will bust the pack-size limit */ +static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry, + unsigned long limit, int usable_delta) +{ + unsigned long size, datalen; + unsigned char header[MAX_PACK_OBJECT_HEADER], + dheader[MAX_PACK_OBJECT_HEADER]; + unsigned hdrlen; + enum object_type type; + void *buf; + struct git_istream *st = NULL; + const unsigned hashsz = the_hash_algo->rawsz; + + if (!usable_delta) { + if (oe_type(entry) == OBJ_BLOB && + oe_size_greater_than(&to_pack, entry, big_file_threshold) && + (st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL) + buf = NULL; + else { + buf = read_object_file(&entry->idx.oid, &type, &size); + if (!buf) + die(_("unable to read %s"), + oid_to_hex(&entry->idx.oid)); + } + /* + * make sure no cached delta data remains from a + * previous attempt before a pack split occurred. + */ + FREE_AND_NULL(entry->delta_data); + entry->z_delta_size = 0; + } else if (entry->delta_data) { + size = DELTA_SIZE(entry); + buf = entry->delta_data; + entry->delta_data = NULL; + type = (allow_ofs_delta && DELTA(entry)->idx.offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; + } else { + buf = get_delta(entry); + size = DELTA_SIZE(entry); + type = (allow_ofs_delta && DELTA(entry)->idx.offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; + } + + if (st) /* large blob case, just assume we don't compress well */ + datalen = size; + else if (entry->z_delta_size) + datalen = entry->z_delta_size; + else + datalen = do_compress(&buf, size); + + /* + * The object header is a byte of 'type' followed by zero or + * more bytes of length. + */ + hdrlen = encode_in_pack_object_header(header, sizeof(header), + type, size); + + if (type == OBJ_OFS_DELTA) { + /* + * Deltas with relative base contain an additional + * encoding of the relative offset for the delta + * base from this object's position in the pack. + */ + off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset; + unsigned pos = sizeof(dheader) - 1; + dheader[pos] = ofs & 127; + while (ofs >>= 7) + dheader[--pos] = 128 | (--ofs & 127); + if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { + if (st) + close_istream(st); + free(buf); + return 0; + } + hashwrite(f, header, hdrlen); + hashwrite(f, dheader + pos, sizeof(dheader) - pos); + hdrlen += sizeof(dheader) - pos; + } else if (type == OBJ_REF_DELTA) { + /* + * Deltas with a base reference contain + * additional bytes for the base object ID. + */ + if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { + if (st) + close_istream(st); + free(buf); + return 0; + } + hashwrite(f, header, hdrlen); + hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz); + hdrlen += hashsz; + } else { + if (limit && hdrlen + datalen + hashsz >= limit) { + if (st) + close_istream(st); + free(buf); + return 0; + } + hashwrite(f, header, hdrlen); + } + if (st) { + datalen = write_large_blob_data(st, f, &entry->idx.oid); + close_istream(st); + } else { + hashwrite(f, buf, datalen); + free(buf); + } + + return hdrlen + datalen; +} + +/* Return 0 if we will bust the pack-size limit */ +static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry, + unsigned long limit, int usable_delta) +{ + struct packed_git *p = IN_PACK(entry); + struct pack_window *w_curs = NULL; + struct revindex_entry *revidx; + off_t offset; + enum object_type type = oe_type(entry); + off_t datalen; + unsigned char header[MAX_PACK_OBJECT_HEADER], + dheader[MAX_PACK_OBJECT_HEADER]; + unsigned hdrlen; + const unsigned hashsz = the_hash_algo->rawsz; + unsigned long entry_size = SIZE(entry); + + if (DELTA(entry)) + type = (allow_ofs_delta && DELTA(entry)->idx.offset) ? + OBJ_OFS_DELTA : OBJ_REF_DELTA; + hdrlen = encode_in_pack_object_header(header, sizeof(header), + type, entry_size); + + offset = entry->in_pack_offset; + revidx = find_pack_revindex(p, offset); + datalen = revidx[1].offset - offset; + if (!pack_to_stdout && p->index_version > 1 && + check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { + error(_("bad packed object CRC for %s"), + oid_to_hex(&entry->idx.oid)); + unuse_pack(&w_curs); + return write_no_reuse_object(f, entry, limit, usable_delta); + } + + offset += entry->in_pack_header_size; + datalen -= entry->in_pack_header_size; + + if (!pack_to_stdout && p->index_version == 1 && + check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) { + error(_("corrupt packed object for %s"), + oid_to_hex(&entry->idx.oid)); + unuse_pack(&w_curs); + return write_no_reuse_object(f, entry, limit, usable_delta); + } + + if (type == OBJ_OFS_DELTA) { + off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset; + unsigned pos = sizeof(dheader) - 1; + dheader[pos] = ofs & 127; + while (ofs >>= 7) + dheader[--pos] = 128 | (--ofs & 127); + if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) { + unuse_pack(&w_curs); + return 0; + } + hashwrite(f, header, hdrlen); + hashwrite(f, dheader + pos, sizeof(dheader) - pos); + hdrlen += sizeof(dheader) - pos; + reused_delta++; + } else if (type == OBJ_REF_DELTA) { + if (limit && hdrlen + hashsz + datalen + hashsz >= limit) { + unuse_pack(&w_curs); + return 0; + } + hashwrite(f, header, hdrlen); + hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz); + hdrlen += hashsz; + reused_delta++; + } else { + if (limit && hdrlen + datalen + hashsz >= limit) { + unuse_pack(&w_curs); + return 0; + } + hashwrite(f, header, hdrlen); + } + copy_pack_data(f, p, &w_curs, offset, datalen); + unuse_pack(&w_curs); + reused++; + return hdrlen + datalen; +} + +/* Return 0 if we will bust the pack-size limit */ +static off_t write_object(struct hashfile *f, + struct object_entry *entry, + off_t write_offset) +{ + unsigned long limit; + off_t len; + int usable_delta, to_reuse; + + if (!pack_to_stdout) + crc32_begin(f); + + /* apply size limit if limited packsize and not first object */ + if (!pack_size_limit || !nr_written) + limit = 0; + else if (pack_size_limit <= write_offset) + /* + * the earlier object did not fit the limit; avoid + * mistaking this with unlimited (i.e. limit = 0). + */ + limit = 1; + else + limit = pack_size_limit - write_offset; + + if (!DELTA(entry)) + usable_delta = 0; /* no delta */ + else if (!pack_size_limit) + usable_delta = 1; /* unlimited packfile */ + else if (DELTA(entry)->idx.offset == (off_t)-1) + usable_delta = 0; /* base was written to another pack */ + else if (DELTA(entry)->idx.offset) + usable_delta = 1; /* base already exists in this pack */ + else + usable_delta = 0; /* base could end up in another pack */ + + if (!reuse_object) + to_reuse = 0; /* explicit */ + else if (!IN_PACK(entry)) + to_reuse = 0; /* can't reuse what we don't have */ + else if (oe_type(entry) == OBJ_REF_DELTA || + oe_type(entry) == OBJ_OFS_DELTA) + /* check_object() decided it for us ... */ + to_reuse = usable_delta; + /* ... but pack split may override that */ + else if (oe_type(entry) != entry->in_pack_type) + to_reuse = 0; /* pack has delta which is unusable */ + else if (DELTA(entry)) + to_reuse = 0; /* we want to pack afresh */ + else + to_reuse = 1; /* we have it in-pack undeltified, + * and we do not need to deltify it. + */ + + if (!to_reuse) + len = write_no_reuse_object(f, entry, limit, usable_delta); + else + len = write_reuse_object(f, entry, limit, usable_delta); + if (!len) + return 0; + + if (usable_delta) + written_delta++; + written++; + if (!pack_to_stdout) + entry->idx.crc32 = crc32_end(f); + return len; +} + +enum write_one_status { + WRITE_ONE_SKIP = -1, /* already written */ + WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */ + WRITE_ONE_WRITTEN = 1, /* normal */ + WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */ +}; + +static enum write_one_status write_one(struct hashfile *f, + struct object_entry *e, + off_t *offset) +{ + off_t size; + int recursing; + + /* + * we set offset to 1 (which is an impossible value) to mark + * the fact that this object is involved in "write its base + * first before writing a deltified object" recursion. + */ + recursing = (e->idx.offset == 1); + if (recursing) { + warning(_("recursive delta detected for object %s"), + oid_to_hex(&e->idx.oid)); + return WRITE_ONE_RECURSIVE; + } else if (e->idx.offset || e->preferred_base) { + /* offset is non zero if object is written already. */ + return WRITE_ONE_SKIP; + } + + /* if we are deltified, write out base object first. */ + if (DELTA(e)) { + e->idx.offset = 1; /* now recurse */ + switch (write_one(f, DELTA(e), offset)) { + case WRITE_ONE_RECURSIVE: + /* we cannot depend on this one */ + SET_DELTA(e, NULL); + break; + default: + break; + case WRITE_ONE_BREAK: + e->idx.offset = recursing; + return WRITE_ONE_BREAK; + } + } + + e->idx.offset = *offset; + size = write_object(f, e, *offset); + if (!size) { + e->idx.offset = recursing; + return WRITE_ONE_BREAK; + } + written_list[nr_written++] = &e->idx; + + /* make sure off_t is sufficiently large not to wrap */ + if (signed_add_overflows(*offset, size)) + die(_("pack too large for current definition of off_t")); + *offset += size; + return WRITE_ONE_WRITTEN; +} + +static int mark_tagged(const char *path, const struct object_id *oid, int flag, + void *cb_data) +{ + struct object_id peeled; + struct object_entry *entry = packlist_find(&to_pack, oid, NULL); + + if (entry) + entry->tagged = 1; + if (!peel_ref(path, &peeled)) { + entry = packlist_find(&to_pack, &peeled, NULL); + if (entry) + entry->tagged = 1; + } + return 0; +} + +static inline void add_to_write_order(struct object_entry **wo, + unsigned int *endp, + struct object_entry *e) +{ + if (e->filled || oe_layer(&to_pack, e) != write_layer) + return; + wo[(*endp)++] = e; + e->filled = 1; +} + +static void add_descendants_to_write_order(struct object_entry **wo, + unsigned int *endp, + struct object_entry *e) +{ + int add_to_order = 1; + while (e) { + if (add_to_order) { + struct object_entry *s; + /* add this node... */ + add_to_write_order(wo, endp, e); + /* all its siblings... */ + for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) { + add_to_write_order(wo, endp, s); + } + } + /* drop down a level to add left subtree nodes if possible */ + if (DELTA_CHILD(e)) { + add_to_order = 1; + e = DELTA_CHILD(e); + } else { + add_to_order = 0; + /* our sibling might have some children, it is next */ + if (DELTA_SIBLING(e)) { + e = DELTA_SIBLING(e); + continue; + } + /* go back to our parent node */ + e = DELTA(e); + while (e && !DELTA_SIBLING(e)) { + /* we're on the right side of a subtree, keep + * going up until we can go right again */ + e = DELTA(e); + } + if (!e) { + /* done- we hit our original root node */ + return; + } + /* pass it off to sibling at this level */ + e = DELTA_SIBLING(e); + } + }; +} + +static void add_family_to_write_order(struct object_entry **wo, + unsigned int *endp, + struct object_entry *e) +{ + struct object_entry *root; + + for (root = e; DELTA(root); root = DELTA(root)) + ; /* nothing */ + add_descendants_to_write_order(wo, endp, root); +} + +static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end) +{ + unsigned int i, last_untagged; + struct object_entry *objects = to_pack.objects; + + for (i = 0; i < to_pack.nr_objects; i++) { + if (objects[i].tagged) + break; + add_to_write_order(wo, wo_end, &objects[i]); + } + last_untagged = i; + + /* + * Then fill all the tagged tips. + */ + for (; i < to_pack.nr_objects; i++) { + if (objects[i].tagged) + add_to_write_order(wo, wo_end, &objects[i]); + } + + /* + * And then all remaining commits and tags. + */ + for (i = last_untagged; i < to_pack.nr_objects; i++) { + if (oe_type(&objects[i]) != OBJ_COMMIT && + oe_type(&objects[i]) != OBJ_TAG) + continue; + add_to_write_order(wo, wo_end, &objects[i]); + } + + /* + * And then all the trees. + */ + for (i = last_untagged; i < to_pack.nr_objects; i++) { + if (oe_type(&objects[i]) != OBJ_TREE) + continue; + add_to_write_order(wo, wo_end, &objects[i]); + } + + /* + * Finally all the rest in really tight order + */ + for (i = last_untagged; i < to_pack.nr_objects; i++) { + if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer) + add_family_to_write_order(wo, wo_end, &objects[i]); + } +} + +static struct object_entry **compute_write_order(void) +{ + uint32_t max_layers = 1; + unsigned int i, wo_end; + + struct object_entry **wo; + struct object_entry *objects = to_pack.objects; + + for (i = 0; i < to_pack.nr_objects; i++) { + objects[i].tagged = 0; + objects[i].filled = 0; + SET_DELTA_CHILD(&objects[i], NULL); + SET_DELTA_SIBLING(&objects[i], NULL); + } + + /* + * Fully connect delta_child/delta_sibling network. + * Make sure delta_sibling is sorted in the original + * recency order. + */ + for (i = to_pack.nr_objects; i > 0;) { + struct object_entry *e = &objects[--i]; + if (!DELTA(e)) + continue; + /* Mark me as the first child */ + e->delta_sibling_idx = DELTA(e)->delta_child_idx; + SET_DELTA_CHILD(DELTA(e), e); + } + + /* + * Mark objects that are at the tip of tags. + */ + for_each_tag_ref(mark_tagged, NULL); + + if (use_delta_islands) + max_layers = compute_pack_layers(&to_pack); + + ALLOC_ARRAY(wo, to_pack.nr_objects); + wo_end = 0; + + for (; write_layer < max_layers; ++write_layer) + compute_layer_order(wo, &wo_end); + + if (wo_end != to_pack.nr_objects) + die(_("ordered %u objects, expected %"PRIu32), + wo_end, to_pack.nr_objects); + + return wo; +} + +static off_t write_reused_pack(struct hashfile *f) +{ + unsigned char buffer[8192]; + off_t to_write, total; + int fd; + + if (!is_pack_valid(reuse_packfile)) + die(_("packfile is invalid: %s"), reuse_packfile->pack_name); + + fd = git_open(reuse_packfile->pack_name); + if (fd < 0) + die_errno(_("unable to open packfile for reuse: %s"), + reuse_packfile->pack_name); + + if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1) + die_errno(_("unable to seek in reused packfile")); + + if (reuse_packfile_offset < 0) + reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz; + + total = to_write = reuse_packfile_offset - sizeof(struct pack_header); + + while (to_write) { + int read_pack = xread(fd, buffer, sizeof(buffer)); + + if (read_pack <= 0) + die_errno(_("unable to read from reused packfile")); + + if (read_pack > to_write) + read_pack = to_write; + + hashwrite(f, buffer, read_pack); + to_write -= read_pack; + + /* + * We don't know the actual number of objects written, + * only how many bytes written, how many bytes total, and + * how many objects total. So we can fake it by pretending all + * objects we are writing are the same size. This gives us a + * smooth progress meter, and at the end it matches the true + * answer. + */ + written = reuse_packfile_objects * + (((double)(total - to_write)) / total); + display_progress(progress_state, written); + } + + close(fd); + written = reuse_packfile_objects; + display_progress(progress_state, written); + return reuse_packfile_offset - sizeof(struct pack_header); +} + +static const char no_split_warning[] = N_( +"disabling bitmap writing, packs are split due to pack.packSizeLimit" +); + +static void write_pack_file(void) +{ + uint32_t i = 0, j; + struct hashfile *f; + off_t offset; + uint32_t nr_remaining = nr_result; + time_t last_mtime = 0; + struct object_entry **write_order; + + if (progress > pack_to_stdout) + progress_state = start_progress(_("Writing objects"), nr_result); + ALLOC_ARRAY(written_list, to_pack.nr_objects); + write_order = compute_write_order(); + + do { + struct object_id oid; + char *pack_tmp_name = NULL; + + if (pack_to_stdout) + f = hashfd_throughput(1, "", progress_state); + else + f = create_tmp_packfile(&pack_tmp_name); + + offset = write_pack_header(f, nr_remaining); + + if (reuse_packfile) { + off_t packfile_size; + assert(pack_to_stdout); + + packfile_size = write_reused_pack(f); + offset += packfile_size; + } + + nr_written = 0; + for (; i < to_pack.nr_objects; i++) { + struct object_entry *e = write_order[i]; + if (write_one(f, e, &offset) == WRITE_ONE_BREAK) + break; + display_progress(progress_state, written); + } + + /* + * Did we write the wrong # entries in the header? + * If so, rewrite it like in fast-import + */ + if (pack_to_stdout) { + finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE); + } else if (nr_written == nr_remaining) { + finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + } else { + int fd = finalize_hashfile(f, oid.hash, 0); + fixup_pack_header_footer(fd, oid.hash, pack_tmp_name, + nr_written, oid.hash, offset); + close(fd); + if (write_bitmap_index) { + if (write_bitmap_index != WRITE_BITMAP_QUIET) + warning(_(no_split_warning)); + write_bitmap_index = 0; + } + } + + if (!pack_to_stdout) { + struct stat st; + struct strbuf tmpname = STRBUF_INIT; + + /* + * Packs are runtime accessed in their mtime + * order since newer packs are more likely to contain + * younger objects. So if we are creating multiple + * packs then we should modify the mtime of later ones + * to preserve this property. + */ + if (stat(pack_tmp_name, &st) < 0) { + warning_errno(_("failed to stat %s"), pack_tmp_name); + } else if (!last_mtime) { + last_mtime = st.st_mtime; + } else { + struct utimbuf utb; + utb.actime = st.st_atime; + utb.modtime = --last_mtime; + if (utime(pack_tmp_name, &utb) < 0) + warning_errno(_("failed utime() on %s"), pack_tmp_name); + } + + strbuf_addf(&tmpname, "%s-", base_name); + + if (write_bitmap_index) { + bitmap_writer_set_checksum(oid.hash); + bitmap_writer_build_type_index( + &to_pack, written_list, nr_written); + } + + finish_tmp_packfile(&tmpname, pack_tmp_name, + written_list, nr_written, + &pack_idx_opts, oid.hash); + + if (write_bitmap_index) { + strbuf_addf(&tmpname, "%s.bitmap", oid_to_hex(&oid)); + + stop_progress(&progress_state); + + bitmap_writer_show_progress(progress); + bitmap_writer_reuse_bitmaps(&to_pack); + bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1); + bitmap_writer_build(&to_pack); + bitmap_writer_finish(written_list, nr_written, + tmpname.buf, write_bitmap_options); + write_bitmap_index = 0; + } + + strbuf_release(&tmpname); + free(pack_tmp_name); + puts(oid_to_hex(&oid)); + } + + /* mark written objects as written to previous pack */ + for (j = 0; j < nr_written; j++) { + written_list[j]->offset = (off_t)-1; + } + nr_remaining -= nr_written; + } while (nr_remaining && i < to_pack.nr_objects); + + free(written_list); + free(write_order); + stop_progress(&progress_state); + if (written != nr_result) + die(_("wrote %"PRIu32" objects while expecting %"PRIu32), + written, nr_result); + trace2_data_intmax("pack-objects", the_repository, + "write_pack_file/wrote", nr_result); +} + +static int no_try_delta(const char *path) +{ + static struct attr_check *check; + + if (!check) + check = attr_check_initl("delta", NULL); + git_check_attr(the_repository->index, path, check); + if (ATTR_FALSE(check->items[0].value)) + return 1; + return 0; +} + +/* + * When adding an object, check whether we have already added it + * to our packing list. If so, we can skip. However, if we are + * being asked to excludei t, but the previous mention was to include + * it, make sure to adjust its flags and tweak our numbers accordingly. + * + * As an optimization, we pass out the index position where we would have + * found the item, since that saves us from having to look it up again a + * few lines later when we want to add the new entry. + */ +static int have_duplicate_entry(const struct object_id *oid, + int exclude, + uint32_t *index_pos) +{ + struct object_entry *entry; + + entry = packlist_find(&to_pack, oid, index_pos); + if (!entry) + return 0; + + if (exclude) { + if (!entry->preferred_base) + nr_result--; + entry->preferred_base = 1; + } + + return 1; +} + +static int want_found_object(int exclude, struct packed_git *p) +{ + if (exclude) + return 1; + if (incremental) + return 0; + + /* + * When asked to do --local (do not include an object that appears in a + * pack we borrow from elsewhere) or --honor-pack-keep (do not include + * an object that appears in a pack marked with .keep), finding a pack + * that matches the criteria is sufficient for us to decide to omit it. + * However, even if this pack does not satisfy the criteria, we need to + * make sure no copy of this object appears in _any_ pack that makes us + * to omit the object, so we need to check all the packs. + * + * We can however first check whether these options can possible matter; + * if they do not matter we know we want the object in generated pack. + * Otherwise, we signal "-1" at the end to tell the caller that we do + * not know either way, and it needs to check more packs. + */ + if (!ignore_packed_keep_on_disk && + !ignore_packed_keep_in_core && + (!local || !have_non_local_packs)) + return 1; + + if (local && !p->pack_local) + return 0; + if (p->pack_local && + ((ignore_packed_keep_on_disk && p->pack_keep) || + (ignore_packed_keep_in_core && p->pack_keep_in_core))) + return 0; + + /* we don't know yet; keep looking for more packs */ + return -1; +} + +/* + * Check whether we want the object in the pack (e.g., we do not want + * objects found in non-local stores if the "--local" option was used). + * + * If the caller already knows an existing pack it wants to take the object + * from, that is passed in *found_pack and *found_offset; otherwise this + * function finds if there is any pack that has the object and returns the pack + * and its offset in these variables. + */ +static int want_object_in_pack(const struct object_id *oid, + int exclude, + struct packed_git **found_pack, + off_t *found_offset) +{ + int want; + struct list_head *pos; + struct multi_pack_index *m; + + if (!exclude && local && has_loose_object_nonlocal(oid)) + return 0; + + /* + * If we already know the pack object lives in, start checks from that + * pack - in the usual case when neither --local was given nor .keep files + * are present we will determine the answer right now. + */ + if (*found_pack) { + want = want_found_object(exclude, *found_pack); + if (want != -1) + return want; + } + + for (m = get_multi_pack_index(the_repository); m; m = m->next) { + struct pack_entry e; + if (fill_midx_entry(the_repository, oid, &e, m)) { + struct packed_git *p = e.p; + off_t offset; + + if (p == *found_pack) + offset = *found_offset; + else + offset = find_pack_entry_one(oid->hash, p); + + if (offset) { + if (!*found_pack) { + if (!is_pack_valid(p)) + continue; + *found_offset = offset; + *found_pack = p; + } + want = want_found_object(exclude, p); + if (want != -1) + return want; + } + } + } + + list_for_each(pos, get_packed_git_mru(the_repository)) { + struct packed_git *p = list_entry(pos, struct packed_git, mru); + off_t offset; + + if (p == *found_pack) + offset = *found_offset; + else + offset = find_pack_entry_one(oid->hash, p); + + if (offset) { + if (!*found_pack) { + if (!is_pack_valid(p)) + continue; + *found_offset = offset; + *found_pack = p; + } + want = want_found_object(exclude, p); + if (!exclude && want > 0) + list_move(&p->mru, + get_packed_git_mru(the_repository)); + if (want != -1) + return want; + } + } + + return 1; +} + +static void create_object_entry(const struct object_id *oid, + enum object_type type, + uint32_t hash, + int exclude, + int no_try_delta, + uint32_t index_pos, + struct packed_git *found_pack, + off_t found_offset) +{ + struct object_entry *entry; + + entry = packlist_alloc(&to_pack, oid->hash, index_pos); + entry->hash = hash; + oe_set_type(entry, type); + if (exclude) + entry->preferred_base = 1; + else + nr_result++; + if (found_pack) { + oe_set_in_pack(&to_pack, entry, found_pack); + entry->in_pack_offset = found_offset; + } + + entry->no_try_delta = no_try_delta; +} + +static const char no_closure_warning[] = N_( +"disabling bitmap writing, as some objects are not being packed" +); + +static int add_object_entry(const struct object_id *oid, enum object_type type, + const char *name, int exclude) +{ + struct packed_git *found_pack = NULL; + off_t found_offset = 0; + uint32_t index_pos; + + display_progress(progress_state, ++nr_seen); + + if (have_duplicate_entry(oid, exclude, &index_pos)) + return 0; + + if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) { + /* The pack is missing an object, so it will not have closure */ + if (write_bitmap_index) { + if (write_bitmap_index != WRITE_BITMAP_QUIET) + warning(_(no_closure_warning)); + write_bitmap_index = 0; + } + return 0; + } + + create_object_entry(oid, type, pack_name_hash(name), + exclude, name && no_try_delta(name), + index_pos, found_pack, found_offset); + return 1; +} + +static int add_object_entry_from_bitmap(const struct object_id *oid, + enum object_type type, + int flags, uint32_t name_hash, + struct packed_git *pack, off_t offset) +{ + uint32_t index_pos; + + display_progress(progress_state, ++nr_seen); + + if (have_duplicate_entry(oid, 0, &index_pos)) + return 0; + + if (!want_object_in_pack(oid, 0, &pack, &offset)) + return 0; + + create_object_entry(oid, type, name_hash, 0, 0, index_pos, pack, offset); + return 1; +} + +struct pbase_tree_cache { + struct object_id oid; + int ref; + int temporary; + void *tree_data; + unsigned long tree_size; +}; + +static struct pbase_tree_cache *(pbase_tree_cache[256]); +static int pbase_tree_cache_ix(const struct object_id *oid) +{ + return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache); +} +static int pbase_tree_cache_ix_incr(int ix) +{ + return (ix+1) % ARRAY_SIZE(pbase_tree_cache); +} + +static struct pbase_tree { + struct pbase_tree *next; + /* This is a phony "cache" entry; we are not + * going to evict it or find it through _get() + * mechanism -- this is for the toplevel node that + * would almost always change with any commit. + */ + struct pbase_tree_cache pcache; +} *pbase_tree; + +static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid) +{ + struct pbase_tree_cache *ent, *nent; + void *data; + unsigned long size; + enum object_type type; + int neigh; + int my_ix = pbase_tree_cache_ix(oid); + int available_ix = -1; + + /* pbase-tree-cache acts as a limited hashtable. + * your object will be found at your index or within a few + * slots after that slot if it is cached. + */ + for (neigh = 0; neigh < 8; neigh++) { + ent = pbase_tree_cache[my_ix]; + if (ent && oideq(&ent->oid, oid)) { + ent->ref++; + return ent; + } + else if (((available_ix < 0) && (!ent || !ent->ref)) || + ((0 <= available_ix) && + (!ent && pbase_tree_cache[available_ix]))) + available_ix = my_ix; + if (!ent) + break; + my_ix = pbase_tree_cache_ix_incr(my_ix); + } + + /* Did not find one. Either we got a bogus request or + * we need to read and perhaps cache. + */ + data = read_object_file(oid, &type, &size); + if (!data) + return NULL; + if (type != OBJ_TREE) { + free(data); + return NULL; + } + + /* We need to either cache or return a throwaway copy */ + + if (available_ix < 0) + ent = NULL; + else { + ent = pbase_tree_cache[available_ix]; + my_ix = available_ix; + } + + if (!ent) { + nent = xmalloc(sizeof(*nent)); + nent->temporary = (available_ix < 0); + } + else { + /* evict and reuse */ + free(ent->tree_data); + nent = ent; + } + oidcpy(&nent->oid, oid); + nent->tree_data = data; + nent->tree_size = size; + nent->ref = 1; + if (!nent->temporary) + pbase_tree_cache[my_ix] = nent; + return nent; +} + +static void pbase_tree_put(struct pbase_tree_cache *cache) +{ + if (!cache->temporary) { + cache->ref--; + return; + } + free(cache->tree_data); + free(cache); +} + +static int name_cmp_len(const char *name) +{ + int i; + for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++) + ; + return i; +} + +static void add_pbase_object(struct tree_desc *tree, + const char *name, + int cmplen, + const char *fullname) +{ + struct name_entry entry; + int cmp; + + while (tree_entry(tree,&entry)) { + if (S_ISGITLINK(entry.mode)) + continue; + cmp = tree_entry_len(&entry) != cmplen ? 1 : + memcmp(name, entry.path, cmplen); + if (cmp > 0) + continue; + if (cmp < 0) + return; + if (name[cmplen] != '/') { + add_object_entry(&entry.oid, + object_type(entry.mode), + fullname, 1); + return; + } + if (S_ISDIR(entry.mode)) { + struct tree_desc sub; + struct pbase_tree_cache *tree; + const char *down = name+cmplen+1; + int downlen = name_cmp_len(down); + + tree = pbase_tree_get(&entry.oid); + if (!tree) + return; + init_tree_desc(&sub, tree->tree_data, tree->tree_size); + + add_pbase_object(&sub, down, downlen, fullname); + pbase_tree_put(tree); + } + } +} + +static unsigned *done_pbase_paths; +static int done_pbase_paths_num; +static int done_pbase_paths_alloc; +static int done_pbase_path_pos(unsigned hash) +{ + int lo = 0; + int hi = done_pbase_paths_num; + while (lo < hi) { + int mi = lo + (hi - lo) / 2; + if (done_pbase_paths[mi] == hash) + return mi; + if (done_pbase_paths[mi] < hash) + hi = mi; + else + lo = mi + 1; + } + return -lo-1; +} + +static int check_pbase_path(unsigned hash) +{ + int pos = done_pbase_path_pos(hash); + if (0 <= pos) + return 1; + pos = -pos - 1; + ALLOC_GROW(done_pbase_paths, + done_pbase_paths_num + 1, + done_pbase_paths_alloc); + done_pbase_paths_num++; + if (pos < done_pbase_paths_num) + MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos, + done_pbase_paths_num - pos - 1); + done_pbase_paths[pos] = hash; + return 0; +} + +static void add_preferred_base_object(const char *name) +{ + struct pbase_tree *it; + int cmplen; + unsigned hash = pack_name_hash(name); + + if (!num_preferred_base || check_pbase_path(hash)) + return; + + cmplen = name_cmp_len(name); + for (it = pbase_tree; it; it = it->next) { + if (cmplen == 0) { + add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1); + } + else { + struct tree_desc tree; + init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size); + add_pbase_object(&tree, name, cmplen, name); + } + } +} + +static void add_preferred_base(struct object_id *oid) +{ + struct pbase_tree *it; + void *data; + unsigned long size; + struct object_id tree_oid; + + if (window <= num_preferred_base++) + return; + + data = read_object_with_reference(the_repository, oid, + tree_type, &size, &tree_oid); + if (!data) + return; + + for (it = pbase_tree; it; it = it->next) { + if (oideq(&it->pcache.oid, &tree_oid)) { + free(data); + return; + } + } + + it = xcalloc(1, sizeof(*it)); + it->next = pbase_tree; + pbase_tree = it; + + oidcpy(&it->pcache.oid, &tree_oid); + it->pcache.tree_data = data; + it->pcache.tree_size = size; +} + +static void cleanup_preferred_base(void) +{ + struct pbase_tree *it; + unsigned i; + + it = pbase_tree; + pbase_tree = NULL; + while (it) { + struct pbase_tree *tmp = it; + it = tmp->next; + free(tmp->pcache.tree_data); + free(tmp); + } + + for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) { + if (!pbase_tree_cache[i]) + continue; + free(pbase_tree_cache[i]->tree_data); + FREE_AND_NULL(pbase_tree_cache[i]); + } + + FREE_AND_NULL(done_pbase_paths); + done_pbase_paths_num = done_pbase_paths_alloc = 0; +} + +/* + * Return 1 iff the object specified by "delta" can be sent + * literally as a delta against the base in "base_sha1". If + * so, then *base_out will point to the entry in our packing + * list, or NULL if we must use the external-base list. + * + * Depth value does not matter - find_deltas() will + * never consider reused delta as the base object to + * deltify other objects against, in order to avoid + * circular deltas. + */ +static int can_reuse_delta(const unsigned char *base_sha1, + struct object_entry *delta, + struct object_entry **base_out) +{ + struct object_entry *base; + struct object_id base_oid; + + if (!base_sha1) + return 0; + + oidread(&base_oid, base_sha1); + + /* + * First see if we're already sending the base (or it's explicitly in + * our "excluded" list). + */ + base = packlist_find(&to_pack, &base_oid, NULL); + if (base) { + if (!in_same_island(&delta->idx.oid, &base->idx.oid)) + return 0; + *base_out = base; + return 1; + } + + /* + * Otherwise, reachability bitmaps may tell us if the receiver has it, + * even if it was buried too deep in history to make it into the + * packing list. + */ + if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) { + if (use_delta_islands) { + if (!in_same_island(&delta->idx.oid, &base_oid)) + return 0; + } + *base_out = NULL; + return 1; + } + + return 0; +} + +static void check_object(struct object_entry *entry) +{ + unsigned long canonical_size; + + if (IN_PACK(entry)) { + struct packed_git *p = IN_PACK(entry); + struct pack_window *w_curs = NULL; + const unsigned char *base_ref = NULL; + struct object_entry *base_entry; + unsigned long used, used_0; + unsigned long avail; + off_t ofs; + unsigned char *buf, c; + enum object_type type; + unsigned long in_pack_size; + + buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail); + + /* + * We want in_pack_type even if we do not reuse delta + * since non-delta representations could still be reused. + */ + used = unpack_object_header_buffer(buf, avail, + &type, + &in_pack_size); + if (used == 0) + goto give_up; + + if (type < 0) + BUG("invalid type %d", type); + entry->in_pack_type = type; + + /* + * Determine if this is a delta and if so whether we can + * reuse it or not. Otherwise let's find out as cheaply as + * possible what the actual type and size for this object is. + */ + switch (entry->in_pack_type) { + default: + /* Not a delta hence we've already got all we need. */ + oe_set_type(entry, entry->in_pack_type); + SET_SIZE(entry, in_pack_size); + entry->in_pack_header_size = used; + if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB) + goto give_up; + unuse_pack(&w_curs); + return; + case OBJ_REF_DELTA: + if (reuse_delta && !entry->preferred_base) + base_ref = use_pack(p, &w_curs, + entry->in_pack_offset + used, NULL); + entry->in_pack_header_size = used + the_hash_algo->rawsz; + break; + case OBJ_OFS_DELTA: + buf = use_pack(p, &w_curs, + entry->in_pack_offset + used, NULL); + used_0 = 0; + c = buf[used_0++]; + ofs = c & 127; + while (c & 128) { + ofs += 1; + if (!ofs || MSB(ofs, 7)) { + error(_("delta base offset overflow in pack for %s"), + oid_to_hex(&entry->idx.oid)); + goto give_up; + } + c = buf[used_0++]; + ofs = (ofs << 7) + (c & 127); + } + ofs = entry->in_pack_offset - ofs; + if (ofs <= 0 || ofs >= entry->in_pack_offset) { + error(_("delta base offset out of bound for %s"), + oid_to_hex(&entry->idx.oid)); + goto give_up; + } + if (reuse_delta && !entry->preferred_base) { + struct revindex_entry *revidx; + revidx = find_pack_revindex(p, ofs); + if (!revidx) + goto give_up; + base_ref = nth_packed_object_sha1(p, revidx->nr); + } + entry->in_pack_header_size = used + used_0; + break; + } + + if (can_reuse_delta(base_ref, entry, &base_entry)) { + oe_set_type(entry, entry->in_pack_type); + SET_SIZE(entry, in_pack_size); /* delta size */ + SET_DELTA_SIZE(entry, in_pack_size); + + if (base_entry) { + SET_DELTA(entry, base_entry); + entry->delta_sibling_idx = base_entry->delta_child_idx; + SET_DELTA_CHILD(base_entry, entry); + } else { + SET_DELTA_EXT(entry, base_ref); + } + + unuse_pack(&w_curs); + return; + } + + if (oe_type(entry)) { + off_t delta_pos; + + /* + * This must be a delta and we already know what the + * final object type is. Let's extract the actual + * object size from the delta header. + */ + delta_pos = entry->in_pack_offset + entry->in_pack_header_size; + canonical_size = get_size_from_delta(p, &w_curs, delta_pos); + if (canonical_size == 0) + goto give_up; + SET_SIZE(entry, canonical_size); + unuse_pack(&w_curs); + return; + } + + /* + * No choice but to fall back to the recursive delta walk + * with oid_object_info() to find about the object type + * at this point... + */ + give_up: + unuse_pack(&w_curs); + } + + oe_set_type(entry, + oid_object_info(the_repository, &entry->idx.oid, &canonical_size)); + if (entry->type_valid) { + SET_SIZE(entry, canonical_size); + } else { + /* + * Bad object type is checked in prepare_pack(). This is + * to permit a missing preferred base object to be ignored + * as a preferred base. Doing so can result in a larger + * pack file, but the transfer will still take place. + */ + } +} + +static int pack_offset_sort(const void *_a, const void *_b) +{ + const struct object_entry *a = *(struct object_entry **)_a; + const struct object_entry *b = *(struct object_entry **)_b; + const struct packed_git *a_in_pack = IN_PACK(a); + const struct packed_git *b_in_pack = IN_PACK(b); + + /* avoid filesystem trashing with loose objects */ + if (!a_in_pack && !b_in_pack) + return oidcmp(&a->idx.oid, &b->idx.oid); + + if (a_in_pack < b_in_pack) + return -1; + if (a_in_pack > b_in_pack) + return 1; + return a->in_pack_offset < b->in_pack_offset ? -1 : + (a->in_pack_offset > b->in_pack_offset); +} + +/* + * Drop an on-disk delta we were planning to reuse. Naively, this would + * just involve blanking out the "delta" field, but we have to deal + * with some extra book-keeping: + * + * 1. Removing ourselves from the delta_sibling linked list. + * + * 2. Updating our size/type to the non-delta representation. These were + * either not recorded initially (size) or overwritten with the delta type + * (type) when check_object() decided to reuse the delta. + * + * 3. Resetting our delta depth, as we are now a base object. + */ +static void drop_reused_delta(struct object_entry *entry) +{ + unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx; + struct object_info oi = OBJECT_INFO_INIT; + enum object_type type; + unsigned long size; + + while (*idx) { + struct object_entry *oe = &to_pack.objects[*idx - 1]; + + if (oe == entry) + *idx = oe->delta_sibling_idx; + else + idx = &oe->delta_sibling_idx; + } + SET_DELTA(entry, NULL); + entry->depth = 0; + + oi.sizep = &size; + oi.typep = &type; + if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) { + /* + * We failed to get the info from this pack for some reason; + * fall back to oid_object_info, which may find another copy. + * And if that fails, the error will be recorded in oe_type(entry) + * and dealt with in prepare_pack(). + */ + oe_set_type(entry, + oid_object_info(the_repository, &entry->idx.oid, &size)); + } else { + oe_set_type(entry, type); + } + SET_SIZE(entry, size); +} + +/* + * Follow the chain of deltas from this entry onward, throwing away any links + * that cause us to hit a cycle (as determined by the DFS state flags in + * the entries). + * + * We also detect too-long reused chains that would violate our --depth + * limit. + */ +static void break_delta_chains(struct object_entry *entry) +{ + /* + * The actual depth of each object we will write is stored as an int, + * as it cannot exceed our int "depth" limit. But before we break + * changes based no that limit, we may potentially go as deep as the + * number of objects, which is elsewhere bounded to a uint32_t. + */ + uint32_t total_depth; + struct object_entry *cur, *next; + + for (cur = entry, total_depth = 0; + cur; + cur = DELTA(cur), total_depth++) { + if (cur->dfs_state == DFS_DONE) { + /* + * We've already seen this object and know it isn't + * part of a cycle. We do need to append its depth + * to our count. + */ + total_depth += cur->depth; + break; + } + + /* + * We break cycles before looping, so an ACTIVE state (or any + * other cruft which made its way into the state variable) + * is a bug. + */ + if (cur->dfs_state != DFS_NONE) + BUG("confusing delta dfs state in first pass: %d", + cur->dfs_state); + + /* + * Now we know this is the first time we've seen the object. If + * it's not a delta, we're done traversing, but we'll mark it + * done to save time on future traversals. + */ + if (!DELTA(cur)) { + cur->dfs_state = DFS_DONE; + break; + } + + /* + * Mark ourselves as active and see if the next step causes + * us to cycle to another active object. It's important to do + * this _before_ we loop, because it impacts where we make the + * cut, and thus how our total_depth counter works. + * E.g., We may see a partial loop like: + * + * A -> B -> C -> D -> B + * + * Cutting B->C breaks the cycle. But now the depth of A is + * only 1, and our total_depth counter is at 3. The size of the + * error is always one less than the size of the cycle we + * broke. Commits C and D were "lost" from A's chain. + * + * If we instead cut D->B, then the depth of A is correct at 3. + * We keep all commits in the chain that we examined. + */ + cur->dfs_state = DFS_ACTIVE; + if (DELTA(cur)->dfs_state == DFS_ACTIVE) { + drop_reused_delta(cur); + cur->dfs_state = DFS_DONE; + break; + } + } + + /* + * And now that we've gone all the way to the bottom of the chain, we + * need to clear the active flags and set the depth fields as + * appropriate. Unlike the loop above, which can quit when it drops a + * delta, we need to keep going to look for more depth cuts. So we need + * an extra "next" pointer to keep going after we reset cur->delta. + */ + for (cur = entry; cur; cur = next) { + next = DELTA(cur); + + /* + * We should have a chain of zero or more ACTIVE states down to + * a final DONE. We can quit after the DONE, because either it + * has no bases, or we've already handled them in a previous + * call. + */ + if (cur->dfs_state == DFS_DONE) + break; + else if (cur->dfs_state != DFS_ACTIVE) + BUG("confusing delta dfs state in second pass: %d", + cur->dfs_state); + + /* + * If the total_depth is more than depth, then we need to snip + * the chain into two or more smaller chains that don't exceed + * the maximum depth. Most of the resulting chains will contain + * (depth + 1) entries (i.e., depth deltas plus one base), and + * the last chain (i.e., the one containing entry) will contain + * whatever entries are left over, namely + * (total_depth % (depth + 1)) of them. + * + * Since we are iterating towards decreasing depth, we need to + * decrement total_depth as we go, and we need to write to the + * entry what its final depth will be after all of the + * snipping. Since we're snipping into chains of length (depth + * + 1) entries, the final depth of an entry will be its + * original depth modulo (depth + 1). Any time we encounter an + * entry whose final depth is supposed to be zero, we snip it + * from its delta base, thereby making it so. + */ + cur->depth = (total_depth--) % (depth + 1); + if (!cur->depth) + drop_reused_delta(cur); + + cur->dfs_state = DFS_DONE; + } +} + +static void get_object_details(void) +{ + uint32_t i; + struct object_entry **sorted_by_offset; + + if (progress) + progress_state = start_progress(_("Counting objects"), + to_pack.nr_objects); + + sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *)); + for (i = 0; i < to_pack.nr_objects; i++) + sorted_by_offset[i] = to_pack.objects + i; + QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort); + + for (i = 0; i < to_pack.nr_objects; i++) { + struct object_entry *entry = sorted_by_offset[i]; + check_object(entry); + if (entry->type_valid && + oe_size_greater_than(&to_pack, entry, big_file_threshold)) + entry->no_try_delta = 1; + display_progress(progress_state, i + 1); + } + stop_progress(&progress_state); + + /* + * This must happen in a second pass, since we rely on the delta + * information for the whole list being completed. + */ + for (i = 0; i < to_pack.nr_objects; i++) + break_delta_chains(&to_pack.objects[i]); + + free(sorted_by_offset); +} + +/* + * We search for deltas in a list sorted by type, by filename hash, and then + * by size, so that we see progressively smaller and smaller files. + * That's because we prefer deltas to be from the bigger file + * to the smaller -- deletes are potentially cheaper, but perhaps + * more importantly, the bigger file is likely the more recent + * one. The deepest deltas are therefore the oldest objects which are + * less susceptible to be accessed often. + */ +static int type_size_sort(const void *_a, const void *_b) +{ + const struct object_entry *a = *(struct object_entry **)_a; + const struct object_entry *b = *(struct object_entry **)_b; + const enum object_type a_type = oe_type(a); + const enum object_type b_type = oe_type(b); + const unsigned long a_size = SIZE(a); + const unsigned long b_size = SIZE(b); + + if (a_type > b_type) + return -1; + if (a_type < b_type) + return 1; + if (a->hash > b->hash) + return -1; + if (a->hash < b->hash) + return 1; + if (a->preferred_base > b->preferred_base) + return -1; + if (a->preferred_base < b->preferred_base) + return 1; + if (use_delta_islands) { + const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid); + if (island_cmp) + return island_cmp; + } + if (a_size > b_size) + return -1; + if (a_size < b_size) + return 1; + return a < b ? -1 : (a > b); /* newest first */ +} + +struct unpacked { + struct object_entry *entry; + void *data; + struct delta_index *index; + unsigned depth; +}; + +static int delta_cacheable(unsigned long src_size, unsigned long trg_size, + unsigned long delta_size) +{ + if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size) + return 0; + + if (delta_size < cache_max_small_delta_size) + return 1; + + /* cache delta, if objects are large enough compared to delta size */ + if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10)) + return 1; + + return 0; +} + +/* Protect delta_cache_size */ +static pthread_mutex_t cache_mutex; +#define cache_lock() pthread_mutex_lock(&cache_mutex) +#define cache_unlock() pthread_mutex_unlock(&cache_mutex) + +/* + * Protect object list partitioning (e.g. struct thread_param) and + * progress_state + */ +static pthread_mutex_t progress_mutex; +#define progress_lock() pthread_mutex_lock(&progress_mutex) +#define progress_unlock() pthread_mutex_unlock(&progress_mutex) + +/* + * Access to struct object_entry is unprotected since each thread owns + * a portion of the main object list. Just don't access object entries + * ahead in the list because they can be stolen and would need + * progress_mutex for protection. + */ + +/* + * Return the size of the object without doing any delta + * reconstruction (so non-deltas are true object sizes, but deltas + * return the size of the delta data). + */ +unsigned long oe_get_size_slow(struct packing_data *pack, + const struct object_entry *e) +{ + struct packed_git *p; + struct pack_window *w_curs; + unsigned char *buf; + enum object_type type; + unsigned long used, avail, size; + + if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) { + packing_data_lock(&to_pack); + if (oid_object_info(the_repository, &e->idx.oid, &size) < 0) + die(_("unable to get size of %s"), + oid_to_hex(&e->idx.oid)); + packing_data_unlock(&to_pack); + return size; + } + + p = oe_in_pack(pack, e); + if (!p) + BUG("when e->type is a delta, it must belong to a pack"); + + packing_data_lock(&to_pack); + w_curs = NULL; + buf = use_pack(p, &w_curs, e->in_pack_offset, &avail); + used = unpack_object_header_buffer(buf, avail, &type, &size); + if (used == 0) + die(_("unable to parse object header of %s"), + oid_to_hex(&e->idx.oid)); + + unuse_pack(&w_curs); + packing_data_unlock(&to_pack); + return size; +} + +static int try_delta(struct unpacked *trg, struct unpacked *src, + unsigned max_depth, unsigned long *mem_usage) +{ + struct object_entry *trg_entry = trg->entry; + struct object_entry *src_entry = src->entry; + unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz; + unsigned ref_depth; + enum object_type type; + void *delta_buf; + + /* Don't bother doing diffs between different types */ + if (oe_type(trg_entry) != oe_type(src_entry)) + return -1; + + /* + * We do not bother to try a delta that we discarded on an + * earlier try, but only when reusing delta data. Note that + * src_entry that is marked as the preferred_base should always + * be considered, as even if we produce a suboptimal delta against + * it, we will still save the transfer cost, as we already know + * the other side has it and we won't send src_entry at all. + */ + if (reuse_delta && IN_PACK(trg_entry) && + IN_PACK(trg_entry) == IN_PACK(src_entry) && + !src_entry->preferred_base && + trg_entry->in_pack_type != OBJ_REF_DELTA && + trg_entry->in_pack_type != OBJ_OFS_DELTA) + return 0; + + /* Let's not bust the allowed depth. */ + if (src->depth >= max_depth) + return 0; + + /* Now some size filtering heuristics. */ + trg_size = SIZE(trg_entry); + if (!DELTA(trg_entry)) { + max_size = trg_size/2 - the_hash_algo->rawsz; + ref_depth = 1; + } else { + max_size = DELTA_SIZE(trg_entry); + ref_depth = trg->depth; + } + max_size = (uint64_t)max_size * (max_depth - src->depth) / + (max_depth - ref_depth + 1); + if (max_size == 0) + return 0; + src_size = SIZE(src_entry); + sizediff = src_size < trg_size ? trg_size - src_size : 0; + if (sizediff >= max_size) + return 0; + if (trg_size < src_size / 32) + return 0; + + if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid)) + return 0; + + /* Load data if not already done */ + if (!trg->data) { + packing_data_lock(&to_pack); + trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz); + packing_data_unlock(&to_pack); + if (!trg->data) + die(_("object %s cannot be read"), + oid_to_hex(&trg_entry->idx.oid)); + if (sz != trg_size) + die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"), + oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz, + (uintmax_t)trg_size); + *mem_usage += sz; + } + if (!src->data) { + packing_data_lock(&to_pack); + src->data = read_object_file(&src_entry->idx.oid, &type, &sz); + packing_data_unlock(&to_pack); + if (!src->data) { + if (src_entry->preferred_base) { + static int warned = 0; + if (!warned++) + warning(_("object %s cannot be read"), + oid_to_hex(&src_entry->idx.oid)); + /* + * Those objects are not included in the + * resulting pack. Be resilient and ignore + * them if they can't be read, in case the + * pack could be created nevertheless. + */ + return 0; + } + die(_("object %s cannot be read"), + oid_to_hex(&src_entry->idx.oid)); + } + if (sz != src_size) + die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"), + oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz, + (uintmax_t)src_size); + *mem_usage += sz; + } + if (!src->index) { + src->index = create_delta_index(src->data, src_size); + if (!src->index) { + static int warned = 0; + if (!warned++) + warning(_("suboptimal pack - out of memory")); + return 0; + } + *mem_usage += sizeof_delta_index(src->index); + } + + delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size); + if (!delta_buf) + return 0; + + if (DELTA(trg_entry)) { + /* Prefer only shallower same-sized deltas. */ + if (delta_size == DELTA_SIZE(trg_entry) && + src->depth + 1 >= trg->depth) { + free(delta_buf); + return 0; + } + } + + /* + * Handle memory allocation outside of the cache + * accounting lock. Compiler will optimize the strangeness + * away when NO_PTHREADS is defined. + */ + free(trg_entry->delta_data); + cache_lock(); + if (trg_entry->delta_data) { + delta_cache_size -= DELTA_SIZE(trg_entry); + trg_entry->delta_data = NULL; + } + if (delta_cacheable(src_size, trg_size, delta_size)) { + delta_cache_size += delta_size; + cache_unlock(); + trg_entry->delta_data = xrealloc(delta_buf, delta_size); + } else { + cache_unlock(); + free(delta_buf); + } + + SET_DELTA(trg_entry, src_entry); + SET_DELTA_SIZE(trg_entry, delta_size); + trg->depth = src->depth + 1; + + return 1; +} + +static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) +{ + struct object_entry *child = DELTA_CHILD(me); + unsigned int m = n; + while (child) { + const unsigned int c = check_delta_limit(child, n + 1); + if (m < c) + m = c; + child = DELTA_SIBLING(child); + } + return m; +} + +static unsigned long free_unpacked(struct unpacked *n) +{ + unsigned long freed_mem = sizeof_delta_index(n->index); + free_delta_index(n->index); + n->index = NULL; + if (n->data) { + freed_mem += SIZE(n->entry); + FREE_AND_NULL(n->data); + } + n->entry = NULL; + n->depth = 0; + return freed_mem; +} + +static void find_deltas(struct object_entry **list, unsigned *list_size, + int window, int depth, unsigned *processed) +{ + uint32_t i, idx = 0, count = 0; + struct unpacked *array; + unsigned long mem_usage = 0; + + array = xcalloc(window, sizeof(struct unpacked)); + + for (;;) { + struct object_entry *entry; + struct unpacked *n = array + idx; + int j, max_depth, best_base = -1; + + progress_lock(); + if (!*list_size) { + progress_unlock(); + break; + } + entry = *list++; + (*list_size)--; + if (!entry->preferred_base) { + (*processed)++; + display_progress(progress_state, *processed); + } + progress_unlock(); + + mem_usage -= free_unpacked(n); + n->entry = entry; + + while (window_memory_limit && + mem_usage > window_memory_limit && + count > 1) { + const uint32_t tail = (idx + window - count) % window; + mem_usage -= free_unpacked(array + tail); + count--; + } + + /* We do not compute delta to *create* objects we are not + * going to pack. + */ + if (entry->preferred_base) + goto next; + + /* + * If the current object is at pack edge, take the depth the + * objects that depend on the current object into account + * otherwise they would become too deep. + */ + max_depth = depth; + if (DELTA_CHILD(entry)) { + max_depth -= check_delta_limit(entry, 0); + if (max_depth <= 0) + goto next; + } + + j = window; + while (--j > 0) { + int ret; + uint32_t other_idx = idx + j; + struct unpacked *m; + if (other_idx >= window) + other_idx -= window; + m = array + other_idx; + if (!m->entry) + break; + ret = try_delta(n, m, max_depth, &mem_usage); + if (ret < 0) + break; + else if (ret > 0) + best_base = other_idx; + } + + /* + * If we decided to cache the delta data, then it is best + * to compress it right away. First because we have to do + * it anyway, and doing it here while we're threaded will + * save a lot of time in the non threaded write phase, + * as well as allow for caching more deltas within + * the same cache size limit. + * ... + * But only if not writing to stdout, since in that case + * the network is most likely throttling writes anyway, + * and therefore it is best to go to the write phase ASAP + * instead, as we can afford spending more time compressing + * between writes at that moment. + */ + if (entry->delta_data && !pack_to_stdout) { + unsigned long size; + + size = do_compress(&entry->delta_data, DELTA_SIZE(entry)); + if (size < (1U << OE_Z_DELTA_BITS)) { + entry->z_delta_size = size; + cache_lock(); + delta_cache_size -= DELTA_SIZE(entry); + delta_cache_size += entry->z_delta_size; + cache_unlock(); + } else { + FREE_AND_NULL(entry->delta_data); + entry->z_delta_size = 0; + } + } + + /* if we made n a delta, and if n is already at max + * depth, leaving it in the window is pointless. we + * should evict it first. + */ + if (DELTA(entry) && max_depth <= n->depth) + continue; + + /* + * Move the best delta base up in the window, after the + * currently deltified object, to keep it longer. It will + * be the first base object to be attempted next. + */ + if (DELTA(entry)) { + struct unpacked swap = array[best_base]; + int dist = (window + idx - best_base) % window; + int dst = best_base; + while (dist--) { + int src = (dst + 1) % window; + array[dst] = array[src]; + dst = src; + } + array[dst] = swap; + } + + next: + idx++; + if (count + 1 < window) + count++; + if (idx >= window) + idx = 0; + } + + for (i = 0; i < window; ++i) { + free_delta_index(array[i].index); + free(array[i].data); + } + free(array); +} + +static void try_to_free_from_threads(size_t size) +{ + packing_data_lock(&to_pack); + release_pack_memory(size); + packing_data_unlock(&to_pack); +} + +static try_to_free_t old_try_to_free_routine; + +/* + * The main object list is split into smaller lists, each is handed to + * one worker. + * + * The main thread waits on the condition that (at least) one of the workers + * has stopped working (which is indicated in the .working member of + * struct thread_params). + * + * When a work thread has completed its work, it sets .working to 0 and + * signals the main thread and waits on the condition that .data_ready + * becomes 1. + * + * The main thread steals half of the work from the worker that has + * most work left to hand it to the idle worker. + */ + +struct thread_params { + pthread_t thread; + struct object_entry **list; + unsigned list_size; + unsigned remaining; + int window; + int depth; + int working; + int data_ready; + pthread_mutex_t mutex; + pthread_cond_t cond; + unsigned *processed; +}; + +static pthread_cond_t progress_cond; + +/* + * Mutex and conditional variable can't be statically-initialized on Windows. + */ +static void init_threaded_search(void) +{ + pthread_mutex_init(&cache_mutex, NULL); + pthread_mutex_init(&progress_mutex, NULL); + pthread_cond_init(&progress_cond, NULL); + old_try_to_free_routine = set_try_to_free_routine(try_to_free_from_threads); +} + +static void cleanup_threaded_search(void) +{ + set_try_to_free_routine(old_try_to_free_routine); + pthread_cond_destroy(&progress_cond); + pthread_mutex_destroy(&cache_mutex); + pthread_mutex_destroy(&progress_mutex); +} + +static void *threaded_find_deltas(void *arg) +{ + struct thread_params *me = arg; + + progress_lock(); + while (me->remaining) { + progress_unlock(); + + find_deltas(me->list, &me->remaining, + me->window, me->depth, me->processed); + + progress_lock(); + me->working = 0; + pthread_cond_signal(&progress_cond); + progress_unlock(); + + /* + * We must not set ->data_ready before we wait on the + * condition because the main thread may have set it to 1 + * before we get here. In order to be sure that new + * work is available if we see 1 in ->data_ready, it + * was initialized to 0 before this thread was spawned + * and we reset it to 0 right away. + */ + pthread_mutex_lock(&me->mutex); + while (!me->data_ready) + pthread_cond_wait(&me->cond, &me->mutex); + me->data_ready = 0; + pthread_mutex_unlock(&me->mutex); + + progress_lock(); + } + progress_unlock(); + /* leave ->working 1 so that this doesn't get more work assigned */ + return NULL; +} + +static void ll_find_deltas(struct object_entry **list, unsigned list_size, + int window, int depth, unsigned *processed) +{ + struct thread_params *p; + int i, ret, active_threads = 0; + + init_threaded_search(); + + if (delta_search_threads <= 1) { + find_deltas(list, &list_size, window, depth, processed); + cleanup_threaded_search(); + return; + } + if (progress > pack_to_stdout) + fprintf_ln(stderr, _("Delta compression using up to %d threads"), + delta_search_threads); + p = xcalloc(delta_search_threads, sizeof(*p)); + + /* Partition the work amongst work threads. */ + for (i = 0; i < delta_search_threads; i++) { + unsigned sub_size = list_size / (delta_search_threads - i); + + /* don't use too small segments or no deltas will be found */ + if (sub_size < 2*window && i+1 < delta_search_threads) + sub_size = 0; + + p[i].window = window; + p[i].depth = depth; + p[i].processed = processed; + p[i].working = 1; + p[i].data_ready = 0; + + /* try to split chunks on "path" boundaries */ + while (sub_size && sub_size < list_size && + list[sub_size]->hash && + list[sub_size]->hash == list[sub_size-1]->hash) + sub_size++; + + p[i].list = list; + p[i].list_size = sub_size; + p[i].remaining = sub_size; + + list += sub_size; + list_size -= sub_size; + } + + /* Start work threads. */ + for (i = 0; i < delta_search_threads; i++) { + if (!p[i].list_size) + continue; + pthread_mutex_init(&p[i].mutex, NULL); + pthread_cond_init(&p[i].cond, NULL); + ret = pthread_create(&p[i].thread, NULL, + threaded_find_deltas, &p[i]); + if (ret) + die(_("unable to create thread: %s"), strerror(ret)); + active_threads++; + } + + /* + * Now let's wait for work completion. Each time a thread is done + * with its work, we steal half of the remaining work from the + * thread with the largest number of unprocessed objects and give + * it to that newly idle thread. This ensure good load balancing + * until the remaining object list segments are simply too short + * to be worth splitting anymore. + */ + while (active_threads) { + struct thread_params *target = NULL; + struct thread_params *victim = NULL; + unsigned sub_size = 0; + + progress_lock(); + for (;;) { + for (i = 0; !target && i < delta_search_threads; i++) + if (!p[i].working) + target = &p[i]; + if (target) + break; + pthread_cond_wait(&progress_cond, &progress_mutex); + } + + for (i = 0; i < delta_search_threads; i++) + if (p[i].remaining > 2*window && + (!victim || victim->remaining < p[i].remaining)) + victim = &p[i]; + if (victim) { + sub_size = victim->remaining / 2; + list = victim->list + victim->list_size - sub_size; + while (sub_size && list[0]->hash && + list[0]->hash == list[-1]->hash) { + list++; + sub_size--; + } + if (!sub_size) { + /* + * It is possible for some "paths" to have + * so many objects that no hash boundary + * might be found. Let's just steal the + * exact half in that case. + */ + sub_size = victim->remaining / 2; + list -= sub_size; + } + target->list = list; + victim->list_size -= sub_size; + victim->remaining -= sub_size; + } + target->list_size = sub_size; + target->remaining = sub_size; + target->working = 1; + progress_unlock(); + + pthread_mutex_lock(&target->mutex); + target->data_ready = 1; + pthread_cond_signal(&target->cond); + pthread_mutex_unlock(&target->mutex); + + if (!sub_size) { + pthread_join(target->thread, NULL); + pthread_cond_destroy(&target->cond); + pthread_mutex_destroy(&target->mutex); + active_threads--; + } + } + cleanup_threaded_search(); + free(p); +} + +static void add_tag_chain(const struct object_id *oid) +{ + struct tag *tag; + + /* + * We catch duplicates already in add_object_entry(), but we'd + * prefer to do this extra check to avoid having to parse the + * tag at all if we already know that it's being packed (e.g., if + * it was included via bitmaps, we would not have parsed it + * previously). + */ + if (packlist_find(&to_pack, oid, NULL)) + return; + + tag = lookup_tag(the_repository, oid); + while (1) { + if (!tag || parse_tag(tag) || !tag->tagged) + die(_("unable to pack objects reachable from tag %s"), + oid_to_hex(oid)); + + add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0); + + if (tag->tagged->type != OBJ_TAG) + return; + + tag = (struct tag *)tag->tagged; + } +} + +static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data) +{ + struct object_id peeled; + + if (starts_with(path, "refs/tags/") && /* is a tag? */ + !peel_ref(path, &peeled) && /* peelable? */ + packlist_find(&to_pack, &peeled, NULL)) /* object packed? */ + add_tag_chain(oid); + return 0; +} + +static void prepare_pack(int window, int depth) +{ + struct object_entry **delta_list; + uint32_t i, nr_deltas; + unsigned n; + + if (use_delta_islands) + resolve_tree_islands(the_repository, progress, &to_pack); + + get_object_details(); + + /* + * If we're locally repacking then we need to be doubly careful + * from now on in order to make sure no stealth corruption gets + * propagated to the new pack. Clients receiving streamed packs + * should validate everything they get anyway so no need to incur + * the additional cost here in that case. + */ + if (!pack_to_stdout) + do_check_packed_object_crc = 1; + + if (!to_pack.nr_objects || !window || !depth) + return; + + ALLOC_ARRAY(delta_list, to_pack.nr_objects); + nr_deltas = n = 0; + + for (i = 0; i < to_pack.nr_objects; i++) { + struct object_entry *entry = to_pack.objects + i; + + if (DELTA(entry)) + /* This happens if we decided to reuse existing + * delta from a pack. "reuse_delta &&" is implied. + */ + continue; + + if (!entry->type_valid || + oe_size_less_than(&to_pack, entry, 50)) + continue; + + if (entry->no_try_delta) + continue; + + if (!entry->preferred_base) { + nr_deltas++; + if (oe_type(entry) < 0) + die(_("unable to get type of object %s"), + oid_to_hex(&entry->idx.oid)); + } else { + if (oe_type(entry) < 0) { + /* + * This object is not found, but we + * don't have to include it anyway. + */ + continue; + } + } + + delta_list[n++] = entry; + } + + if (nr_deltas && n > 1) { + unsigned nr_done = 0; + if (progress) + progress_state = start_progress(_("Compressing objects"), + nr_deltas); + QSORT(delta_list, n, type_size_sort); + ll_find_deltas(delta_list, n, window+1, depth, &nr_done); + stop_progress(&progress_state); + if (nr_done != nr_deltas) + die(_("inconsistency with delta count")); + } + free(delta_list); +} + +static int git_pack_config(const char *k, const char *v, void *cb) +{ + if (!strcmp(k, "pack.window")) { + window = git_config_int(k, v); + return 0; + } + if (!strcmp(k, "pack.windowmemory")) { + window_memory_limit = git_config_ulong(k, v); + return 0; + } + if (!strcmp(k, "pack.depth")) { + depth = git_config_int(k, v); + return 0; + } + if (!strcmp(k, "pack.deltacachesize")) { + max_delta_cache_size = git_config_int(k, v); + return 0; + } + if (!strcmp(k, "pack.deltacachelimit")) { + cache_max_small_delta_size = git_config_int(k, v); + return 0; + } + if (!strcmp(k, "pack.writebitmaphashcache")) { + if (git_config_bool(k, v)) + write_bitmap_options |= BITMAP_OPT_HASH_CACHE; + else + write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE; + } + if (!strcmp(k, "pack.usebitmaps")) { + use_bitmap_index_default = git_config_bool(k, v); + return 0; + } + if (!strcmp(k, "pack.usesparse")) { + sparse = git_config_bool(k, v); + return 0; + } + if (!strcmp(k, "pack.threads")) { + delta_search_threads = git_config_int(k, v); + if (delta_search_threads < 0) + die(_("invalid number of threads specified (%d)"), + delta_search_threads); + if (!HAVE_THREADS && delta_search_threads != 1) { + warning(_("no threads support, ignoring %s"), k); + delta_search_threads = 0; + } + return 0; + } + if (!strcmp(k, "pack.indexversion")) { + pack_idx_opts.version = git_config_int(k, v); + if (pack_idx_opts.version > 2) + die(_("bad pack.indexversion=%"PRIu32), + pack_idx_opts.version); + return 0; + } + return git_default_config(k, v, cb); +} + +static void read_object_list_from_stdin(void) +{ + char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2]; + struct object_id oid; + const char *p; + + for (;;) { + if (!fgets(line, sizeof(line), stdin)) { + if (feof(stdin)) + break; + if (!ferror(stdin)) + die("BUG: fgets returned NULL, not EOF, not error!"); + if (errno != EINTR) + die_errno("fgets"); + clearerr(stdin); + continue; + } + if (line[0] == '-') { + if (get_oid_hex(line+1, &oid)) + die(_("expected edge object ID, got garbage:\n %s"), + line); + add_preferred_base(&oid); + continue; + } + if (parse_oid_hex(line, &oid, &p)) + die(_("expected object ID, got garbage:\n %s"), line); + + add_preferred_base_object(p + 1); + add_object_entry(&oid, OBJ_NONE, p + 1, 0); + } +} + +/* Remember to update object flag allocation in object.h */ +#define OBJECT_ADDED (1u<<20) + +static void show_commit(struct commit *commit, void *data) +{ + add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0); + commit->object.flags |= OBJECT_ADDED; + + if (write_bitmap_index) + index_commit_for_bitmap(commit); + + if (use_delta_islands) + propagate_island_marks(commit); +} + +static void show_object(struct object *obj, const char *name, void *data) +{ + add_preferred_base_object(name); + add_object_entry(&obj->oid, obj->type, name, 0); + obj->flags |= OBJECT_ADDED; + + if (use_delta_islands) { + const char *p; + unsigned depth; + struct object_entry *ent; + + /* the empty string is a root tree, which is depth 0 */ + depth = *name ? 1 : 0; + for (p = strchr(name, '/'); p; p = strchr(p + 1, '/')) + depth++; + + ent = packlist_find(&to_pack, &obj->oid, NULL); + if (ent && depth > oe_tree_depth(&to_pack, ent)) + oe_set_tree_depth(&to_pack, ent, depth); + } +} + +static void show_object__ma_allow_any(struct object *obj, const char *name, void *data) +{ + assert(arg_missing_action == MA_ALLOW_ANY); + + /* + * Quietly ignore ALL missing objects. This avoids problems with + * staging them now and getting an odd error later. + */ + if (!has_object_file(&obj->oid)) + return; + + show_object(obj, name, data); +} + +static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data) +{ + assert(arg_missing_action == MA_ALLOW_PROMISOR); + + /* + * Quietly ignore EXPECTED missing objects. This avoids problems with + * staging them now and getting an odd error later. + */ + if (!has_object_file(&obj->oid) && is_promisor_object(&obj->oid)) + return; + + show_object(obj, name, data); +} + +static int option_parse_missing_action(const struct option *opt, + const char *arg, int unset) +{ + assert(arg); + assert(!unset); + + if (!strcmp(arg, "error")) { + arg_missing_action = MA_ERROR; + fn_show_object = show_object; + return 0; + } + + if (!strcmp(arg, "allow-any")) { + arg_missing_action = MA_ALLOW_ANY; + fetch_if_missing = 0; + fn_show_object = show_object__ma_allow_any; + return 0; + } + + if (!strcmp(arg, "allow-promisor")) { + arg_missing_action = MA_ALLOW_PROMISOR; + fetch_if_missing = 0; + fn_show_object = show_object__ma_allow_promisor; + return 0; + } + + die(_("invalid value for --missing")); + return 0; +} + +static void show_edge(struct commit *commit) +{ + add_preferred_base(&commit->object.oid); +} + +struct in_pack_object { + off_t offset; + struct object *object; +}; + +struct in_pack { + unsigned int alloc; + unsigned int nr; + struct in_pack_object *array; +}; + +static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack) +{ + in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p); + in_pack->array[in_pack->nr].object = object; + in_pack->nr++; +} + +/* + * Compare the objects in the offset order, in order to emulate the + * "git rev-list --objects" output that produced the pack originally. + */ +static int ofscmp(const void *a_, const void *b_) +{ + struct in_pack_object *a = (struct in_pack_object *)a_; + struct in_pack_object *b = (struct in_pack_object *)b_; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + else + return oidcmp(&a->object->oid, &b->object->oid); +} + +static void add_objects_in_unpacked_packs(void) +{ + struct packed_git *p; + struct in_pack in_pack; + uint32_t i; + + memset(&in_pack, 0, sizeof(in_pack)); + + for (p = get_all_packs(the_repository); p; p = p->next) { + struct object_id oid; + struct object *o; + + if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) + continue; + if (open_pack_index(p)) + die(_("cannot open pack index")); + + ALLOC_GROW(in_pack.array, + in_pack.nr + p->num_objects, + in_pack.alloc); + + for (i = 0; i < p->num_objects; i++) { + nth_packed_object_oid(&oid, p, i); + o = lookup_unknown_object(&oid); + if (!(o->flags & OBJECT_ADDED)) + mark_in_pack_object(o, p, &in_pack); + o->flags |= OBJECT_ADDED; + } + } + + if (in_pack.nr) { + QSORT(in_pack.array, in_pack.nr, ofscmp); + for (i = 0; i < in_pack.nr; i++) { + struct object *o = in_pack.array[i].object; + add_object_entry(&o->oid, o->type, "", 0); + } + } + free(in_pack.array); +} + +static int add_loose_object(const struct object_id *oid, const char *path, + void *data) +{ + enum object_type type = oid_object_info(the_repository, oid, NULL); + + if (type < 0) { + warning(_("loose object at %s could not be examined"), path); + return 0; + } + + add_object_entry(oid, type, "", 0); + return 0; +} + +/* + * We actually don't even have to worry about reachability here. + * add_object_entry will weed out duplicates, so we just add every + * loose object we find. + */ +static void add_unreachable_loose_objects(void) +{ + for_each_loose_file_in_objdir(get_object_directory(), + add_loose_object, + NULL, NULL, NULL); +} + +static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid) +{ + static struct packed_git *last_found = (void *)1; + struct packed_git *p; + + p = (last_found != (void *)1) ? last_found : + get_all_packs(the_repository); + + while (p) { + if ((!p->pack_local || p->pack_keep || + p->pack_keep_in_core) && + find_pack_entry_one(oid->hash, p)) { + last_found = p; + return 1; + } + if (p == last_found) + p = get_all_packs(the_repository); + else + p = p->next; + if (p == last_found) + p = p->next; + } + return 0; +} + +/* + * Store a list of sha1s that are should not be discarded + * because they are either written too recently, or are + * reachable from another object that was. + * + * This is filled by get_object_list. + */ +static struct oid_array recent_objects; + +static int loosened_object_can_be_discarded(const struct object_id *oid, + timestamp_t mtime) +{ + if (!unpack_unreachable_expiration) + return 0; + if (mtime > unpack_unreachable_expiration) + return 0; + if (oid_array_lookup(&recent_objects, oid) >= 0) + return 0; + return 1; +} + +static void loosen_unused_packed_objects(void) +{ + struct packed_git *p; + uint32_t i; + struct object_id oid; + + for (p = get_all_packs(the_repository); p; p = p->next) { + if (!p->pack_local || p->pack_keep || p->pack_keep_in_core) + continue; + + if (open_pack_index(p)) + die(_("cannot open pack index")); + + for (i = 0; i < p->num_objects; i++) { + nth_packed_object_oid(&oid, p, i); + if (!packlist_find(&to_pack, &oid, NULL) && + !has_sha1_pack_kept_or_nonlocal(&oid) && + !loosened_object_can_be_discarded(&oid, p->mtime)) + if (force_object_loose(&oid, p->mtime)) + die(_("unable to force loose object")); + } + } +} + +/* + * This tracks any options which pack-reuse code expects to be on, or which a + * reader of the pack might not understand, and which would therefore prevent + * blind reuse of what we have on disk. + */ +static int pack_options_allow_reuse(void) +{ + return pack_to_stdout && + allow_ofs_delta && + !ignore_packed_keep_on_disk && + !ignore_packed_keep_in_core && + (!local || !have_non_local_packs) && + !incremental; +} + +static int get_object_list_from_bitmap(struct rev_info *revs) +{ + if (!(bitmap_git = prepare_bitmap_walk(revs))) + return -1; + + if (pack_options_allow_reuse() && + !reuse_partial_packfile_from_bitmap( + bitmap_git, + &reuse_packfile, + &reuse_packfile_objects, + &reuse_packfile_offset)) { + assert(reuse_packfile_objects); + nr_result += reuse_packfile_objects; + display_progress(progress_state, nr_result); + } + + traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap); + return 0; +} + +static void record_recent_object(struct object *obj, + const char *name, + void *data) +{ + oid_array_append(&recent_objects, &obj->oid); +} + +static void record_recent_commit(struct commit *commit, void *data) +{ + oid_array_append(&recent_objects, &commit->object.oid); +} + +static void get_object_list(int ac, const char **av) +{ + struct rev_info revs; + struct setup_revision_opt s_r_opt = { + .allow_exclude_promisor_objects = 1, + }; + char line[1000]; + int flags = 0; + int save_warning; + + repo_init_revisions(the_repository, &revs, NULL); + save_commit_buffer = 0; + setup_revisions(ac, av, &revs, &s_r_opt); + + /* make sure shallows are read */ + is_repository_shallow(the_repository); + + save_warning = warn_on_object_refname_ambiguity; + warn_on_object_refname_ambiguity = 0; + + while (fgets(line, sizeof(line), stdin) != NULL) { + int len = strlen(line); + if (len && line[len - 1] == '\n') + line[--len] = 0; + if (!len) + break; + if (*line == '-') { + if (!strcmp(line, "--not")) { + flags ^= UNINTERESTING; + write_bitmap_index = 0; + continue; + } + if (starts_with(line, "--shallow ")) { + struct object_id oid; + if (get_oid_hex(line + 10, &oid)) + die("not an SHA-1 '%s'", line + 10); + register_shallow(the_repository, &oid); + use_bitmap_index = 0; + continue; + } + die(_("not a rev '%s'"), line); + } + if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME)) + die(_("bad revision '%s'"), line); + } + + warn_on_object_refname_ambiguity = save_warning; + + if (use_bitmap_index && !get_object_list_from_bitmap(&revs)) + return; + + if (use_delta_islands) + load_delta_islands(the_repository, progress); + + if (prepare_revision_walk(&revs)) + die(_("revision walk setup failed")); + mark_edges_uninteresting(&revs, show_edge, sparse); + + if (!fn_show_object) + fn_show_object = show_object; + traverse_commit_list_filtered(&filter_options, &revs, + show_commit, fn_show_object, NULL, + NULL); + + if (unpack_unreachable_expiration) { + revs.ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(&revs, + unpack_unreachable_expiration)) + die(_("unable to add recent objects")); + if (prepare_revision_walk(&revs)) + die(_("revision walk setup failed")); + traverse_commit_list(&revs, record_recent_commit, + record_recent_object, NULL); + } + + if (keep_unreachable) + add_objects_in_unpacked_packs(); + if (pack_loose_unreachable) + add_unreachable_loose_objects(); + if (unpack_unreachable) + loosen_unused_packed_objects(); + + oid_array_clear(&recent_objects); +} + +static void add_extra_kept_packs(const struct string_list *names) +{ + struct packed_git *p; + + if (!names->nr) + return; + + for (p = get_all_packs(the_repository); p; p = p->next) { + const char *name = basename(p->pack_name); + int i; + + if (!p->pack_local) + continue; + + for (i = 0; i < names->nr; i++) + if (!fspathcmp(name, names->items[i].string)) + break; + + if (i < names->nr) { + p->pack_keep_in_core = 1; + ignore_packed_keep_in_core = 1; + continue; + } + } +} + +static int option_parse_index_version(const struct option *opt, + const char *arg, int unset) +{ + char *c; + const char *val = arg; + + BUG_ON_OPT_NEG(unset); + + pack_idx_opts.version = strtoul(val, &c, 10); + if (pack_idx_opts.version > 2) + die(_("unsupported index version %s"), val); + if (*c == ',' && c[1]) + pack_idx_opts.off32_limit = strtoul(c+1, &c, 0); + if (*c || pack_idx_opts.off32_limit & 0x80000000) + die(_("bad index version '%s'"), val); + return 0; +} + +static int option_parse_unpack_unreachable(const struct option *opt, + const char *arg, int unset) +{ + if (unset) { + unpack_unreachable = 0; + unpack_unreachable_expiration = 0; + } + else { + unpack_unreachable = 1; + if (arg) + unpack_unreachable_expiration = approxidate(arg); + } + return 0; +} + +int cmd_pack_objects(int argc, const char **argv, const char *prefix) +{ + int use_internal_rev_list = 0; + int shallow = 0; + int all_progress_implied = 0; + struct argv_array rp = ARGV_ARRAY_INIT; + int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; + int rev_list_index = 0; + struct string_list keep_pack_list = STRING_LIST_INIT_NODUP; + struct option pack_objects_options[] = { + OPT_SET_INT('q', "quiet", &progress, + N_("do not show progress meter"), 0), + OPT_SET_INT(0, "progress", &progress, + N_("show progress meter"), 1), + OPT_SET_INT(0, "all-progress", &progress, + N_("show progress meter during object writing phase"), 2), + OPT_BOOL(0, "all-progress-implied", + &all_progress_implied, + N_("similar to --all-progress when progress meter is shown")), + { OPTION_CALLBACK, 0, "index-version", NULL, N_("[,]"), + N_("write the pack index file in the specified idx format version"), + PARSE_OPT_NONEG, option_parse_index_version }, + OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit, + N_("maximum size of each output pack file")), + OPT_BOOL(0, "local", &local, + N_("ignore borrowed objects from alternate object store")), + OPT_BOOL(0, "incremental", &incremental, + N_("ignore packed objects")), + OPT_INTEGER(0, "window", &window, + N_("limit pack window by objects")), + OPT_MAGNITUDE(0, "window-memory", &window_memory_limit, + N_("limit pack window by memory in addition to object limit")), + OPT_INTEGER(0, "depth", &depth, + N_("maximum length of delta chain allowed in the resulting pack")), + OPT_BOOL(0, "reuse-delta", &reuse_delta, + N_("reuse existing deltas")), + OPT_BOOL(0, "reuse-object", &reuse_object, + N_("reuse existing objects")), + OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta, + N_("use OFS_DELTA objects")), + OPT_INTEGER(0, "threads", &delta_search_threads, + N_("use threads when searching for best delta matches")), + OPT_BOOL(0, "non-empty", &non_empty, + N_("do not create an empty pack output")), + OPT_BOOL(0, "revs", &use_internal_rev_list, + N_("read revision arguments from standard input")), + OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked, + N_("limit the objects to those that are not yet packed"), + 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "all", &rev_list_all, + N_("include objects reachable from any reference"), + 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "reflog", &rev_list_reflog, + N_("include objects referred by reflog entries"), + 1, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "indexed-objects", &rev_list_index, + N_("include objects referred to by the index"), + 1, PARSE_OPT_NONEG), + OPT_BOOL(0, "stdout", &pack_to_stdout, + N_("output pack to stdout")), + OPT_BOOL(0, "include-tag", &include_tag, + N_("include tag objects that refer to objects to be packed")), + OPT_BOOL(0, "keep-unreachable", &keep_unreachable, + N_("keep unreachable objects")), + OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable, + N_("pack loose unreachable objects")), + { OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"), + N_("unpack unreachable objects newer than