diff options
Diffstat (limited to 'third_party/git/list-objects-filter.c')
-rw-r--r-- | third_party/git/list-objects-filter.c | 711 |
1 files changed, 711 insertions, 0 deletions
diff --git a/third_party/git/list-objects-filter.c b/third_party/git/list-objects-filter.c new file mode 100644 index 000000000000..1e8d4e763da0 --- /dev/null +++ b/third_party/git/list-objects-filter.c @@ -0,0 +1,711 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" +#include "oidmap.h" +#include "oidset.h" +#include "object-store.h" + +/* Remember to update object flag allocation in object.h */ +/* + * FILTER_SHOWN_BUT_REVISIT -- we set this bit on tree objects + * that have been shown, but should be revisited if they appear + * in the traversal (until we mark it SEEN). This is a way to + * let us silently de-dup calls to show() in the caller. This + * is subtly different from the "revision.h:SHOWN" and the + * "sha1-name.c:ONELINE_SEEN" bits. And also different from + * the non-de-dup usage in pack-bitmap.c + */ +#define FILTER_SHOWN_BUT_REVISIT (1<<21) + +struct subfilter { + struct filter *filter; + struct oidset seen; + struct oidset omits; + struct object_id skip_tree; + unsigned is_skipping_tree : 1; +}; + +struct filter { + enum list_objects_filter_result (*filter_object_fn)( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data); + + /* + * Optional. If this function is supplied and the filter needs + * to collect omits, then this function is called once before + * free_fn is called. + * + * This is required because the following two conditions hold: + * + * a. A tree filter can add and remove objects as an object + * graph is traversed. + * b. A combine filter's omit set is the union of all its + * subfilters, which may include tree: filters. + * + * As such, the omits sets must be separate sets, and can only + * be unioned after the traversal is completed. + */ + void (*finalize_omits_fn)(struct oidset *omits, void *filter_data); + + void (*free_fn)(void *filter_data); + + void *filter_data; + + /* If non-NULL, the filter collects a list of the omitted OIDs here. */ + struct oidset *omits; +}; + +static enum list_objects_filter_result filter_blobs_none( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data_) +{ + switch (filter_situation) { + default: + BUG("unknown filter_situation: %d", filter_situation); + + case LOFS_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_DO_SHOW; + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFS_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + if (omits) + oidset_insert(omits, &obj->oid); + return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ + } +} + +static void filter_blobs_none__init( + struct list_objects_filter_options *filter_options, + struct filter *filter) +{ + filter->filter_object_fn = filter_blobs_none; + filter->free_fn = free; +} + +/* + * A filter for list-objects to omit ALL trees and blobs from the traversal. + * Can OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_trees_depth_data { + /* + * Maps trees to the minimum depth at which they were seen. It is not + * necessary to re-traverse a tree at deeper or equal depths than it has + * already been traversed. + * + * We can't use LOFR_MARK_SEEN for tree objects since this will prevent + * it from being traversed at shallower depths. + */ + struct oidmap seen_at_depth; + + unsigned long exclude_depth; + unsigned long current_depth; +}; + +struct seen_map_entry { + struct oidmap_entry base; + size_t depth; +}; + +/* Returns 1 if the oid was in the omits set before it was invoked. */ +static int filter_trees_update_omits( + struct object *obj, + struct oidset *omits, + int include_it) +{ + if (!omits) + return 0; + + if (include_it) + return oidset_remove(omits, &obj->oid); + else + return oidset_insert(omits, &obj->oid); +} + +static enum list_objects_filter_result filter_trees_depth( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data_) +{ + struct filter_trees_depth_data *filter_data = filter_data_; + struct seen_map_entry *seen_info; + int include_it = filter_data->current_depth < + filter_data->exclude_depth; + int filter_res; + int already_seen; + + /* + * Note that we do not use _MARK_SEEN in order to allow re-traversal in + * case we encounter a tree or blob again at a shallower depth. + */ + + switch (filter_situation) { + default: + BUG("unknown filter_situation: %d", filter_situation); + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + filter_data->current_depth--; + return LOFR_ZERO; + + case LOFS_BLOB: + filter_trees_update_omits(obj, omits, include_it); + return include_it ? LOFR_MARK_SEEN | LOFR_DO_SHOW : LOFR_ZERO; + + case LOFS_BEGIN_TREE: + seen_info = oidmap_get( + &filter_data->seen_at_depth, &obj->oid); + if (!seen_info) { + seen_info = xcalloc(1, sizeof(*seen_info)); + oidcpy(&seen_info->base.oid, &obj->oid); + seen_info->depth = filter_data->current_depth; + oidmap_put(&filter_data->seen_at_depth, seen_info); + already_seen = 0; + } else { + already_seen = + filter_data->current_depth >= seen_info->depth; + } + + if (already_seen) { + filter_res = LOFR_SKIP_TREE; + } else { + int been_omitted = filter_trees_update_omits( + obj, omits, include_it); + seen_info->depth = filter_data->current_depth; + + if (include_it) + filter_res = LOFR_DO_SHOW; + else if (omits && !been_omitted) + /* + * Must update omit information of children + * recursively; they have not been omitted yet. + */ + filter_res = LOFR_ZERO; + else + filter_res = LOFR_SKIP_TREE; + } + + filter_data->current_depth++; + return filter_res; + } +} + +static void filter_trees_free(void *filter_data) { + struct filter_trees_depth_data *d = filter_data; + if (!d) + return; + oidmap_free(&d->seen_at_depth, 1); + free(d); +} + +static void filter_trees_depth__init( + struct list_objects_filter_options *filter_options, + struct filter *filter) +{ + struct filter_trees_depth_data *d = xcalloc(1, sizeof(*d)); + oidmap_init(&d->seen_at_depth, 0); + d->exclude_depth = filter_options->tree_exclude_depth; + d->current_depth = 0; + + filter->filter_data = d; + filter->filter_object_fn = filter_trees_depth; + filter->free_fn = filter_trees_free; +} + +/* + * A filter for list-objects to omit large blobs. + * And to OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_blobs_limit_data { + unsigned long max_bytes; +}; + +static enum list_objects_filter_result filter_blobs_limit( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data_) +{ + struct filter_blobs_limit_data *filter_data = filter_data_; + unsigned long object_length; + enum object_type t; + + switch (filter_situation) { + default: + BUG("unknown filter_situation: %d", filter_situation); + + case LOFS_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_DO_SHOW; + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFS_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + t = oid_object_info(r, &obj->oid, &object_length); + if (t != OBJ_BLOB) { /* probably OBJ_NONE */ + /* + * We DO NOT have the blob locally, so we cannot + * apply the size filter criteria. Be conservative + * and force show it (and let the caller deal with + * the ambiguity). + */ + goto include_it; + } + + if (object_length < filter_data->max_bytes) + goto include_it; + + if (omits) + oidset_insert(omits, &obj->oid); + return LOFR_MARK_SEEN; /* but not LOFR_DO_SHOW (hard omit) */ + } + +include_it: + if (omits) + oidset_remove(omits, &obj->oid); + return LOFR_MARK_SEEN | LOFR_DO_SHOW; +} + +static void filter_blobs_limit__init( + struct list_objects_filter_options *filter_options, + struct filter *filter) +{ + struct filter_blobs_limit_data *d = xcalloc(1, sizeof(*d)); + d->max_bytes = filter_options->blob_limit_value; + + filter->filter_data = d; + filter->filter_object_fn = filter_blobs_limit; + filter->free_fn = free; +} + +/* + * A filter driven by a sparse-checkout specification to only + * include blobs that a sparse checkout would populate. + * + * The sparse-checkout spec can be loaded from a blob with the + * given OID or from a local pathname. We allow an OID because + * the repo may be bare or we may be doing the filtering on the + * server. + */ +struct frame { + /* + * default_match is the usual default include/exclude value that + * should be inherited as we recurse into directories based + * upon pattern matching of the directory itself or of a + * containing directory. + */ + enum pattern_match_result default_match; + + /* + * 1 if the directory (recursively) contains any provisionally + * omitted objects. + * + * 0 if everything (recursively) contained in this directory + * has been explicitly included (SHOWN) in the result and + * the directory may be short-cut later in the traversal. + */ + unsigned child_prov_omit : 1; +}; + +struct filter_sparse_data { + struct pattern_list pl; + + size_t nr, alloc; + struct frame *array_frame; +}; + +static enum list_objects_filter_result filter_sparse( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data_) +{ + struct filter_sparse_data *filter_data = filter_data_; + int dtype; + struct frame *frame; + enum pattern_match_result match; + + switch (filter_situation) { + default: + BUG("unknown filter_situation: %d", filter_situation); + + case LOFS_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + dtype = DT_DIR; + match = path_matches_pattern_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->pl, + r->index); + if (match == UNDECIDED) + match = filter_data->array_frame[filter_data->nr - 1].default_match; + + ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1, + filter_data->alloc); + filter_data->array_frame[filter_data->nr].default_match = match; + filter_data->array_frame[filter_data->nr].child_prov_omit = 0; + filter_data->nr++; + + /* + * A directory with this tree OID may appear in multiple + * places in the tree. (Think of a directory move or copy, + * with no other changes, so the OID is the same, but the + * full pathnames of objects within this directory are new + * and may match is_excluded() patterns differently.) + * So we cannot mark this directory as SEEN (yet), since + * that will prevent process_tree() from revisiting this + * tree object with other pathname prefixes. + * + * Only _DO_SHOW the tree object the first time we visit + * this tree object. + * + * We always show all tree objects. A future optimization + * may want to attempt to narrow this. + */ + if (obj->flags & FILTER_SHOWN_BUT_REVISIT) + return LOFR_ZERO; + obj->flags |= FILTER_SHOWN_BUT_REVISIT; + return LOFR_DO_SHOW; + + case LOFS_END_TREE: + assert(obj->type == OBJ_TREE); + assert(filter_data->nr > 1); + + frame = &filter_data->array_frame[--filter_data->nr]; + + /* + * Tell our parent directory if any of our children were + * provisionally omitted. + */ + filter_data->array_frame[filter_data->nr - 1].child_prov_omit |= + frame->child_prov_omit; + + /* + * If there are NO provisionally omitted child objects (ALL child + * objects in this folder were INCLUDED), then we can mark the + * folder as SEEN (so we will not have to revisit it again). + */ + if (!frame->child_prov_omit) + return LOFR_MARK_SEEN; + return LOFR_ZERO; + + case LOFS_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + frame = &filter_data->array_frame[filter_data->nr - 1]; + + dtype = DT_REG; + match = path_matches_pattern_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->pl, + r->index); + if (match == UNDECIDED) + match = frame->default_match; + if (match == MATCHED) { + if (omits) + oidset_remove(omits, &obj->oid); + return LOFR_MARK_SEEN | LOFR_DO_SHOW; + } + + /* + * Provisionally omit it. We've already established that + * this pathname is not in the sparse-checkout specification + * with the CURRENT pathname, so we *WANT* to omit this blob. + * + * However, a pathname elsewhere in the tree may also + * reference this same blob, so we cannot reject it yet. + * Leave the LOFR_ bits unset so that if the blob appears + * again in the traversal, we will be asked again. + */ + if (omits) + oidset_insert(omits, &obj->oid); + + /* + * Remember that at least 1 blob in this tree was + * provisionally omitted. This prevents us from short + * cutting the tree in future iterations. + */ + frame->child_prov_omit = 1; + return LOFR_ZERO; + } +} + + +static void filter_sparse_free(void *filter_data) +{ + struct filter_sparse_data *d = filter_data; + free(d->array_frame); + free(d); +} + +static void filter_sparse_oid__init( + struct list_objects_filter_options *filter_options, + struct filter *filter) +{ + struct filter_sparse_data *d = xcalloc(1, sizeof(*d)); + struct object_context oc; + struct object_id sparse_oid; + + if (get_oid_with_context(the_repository, + filter_options->sparse_oid_name, + GET_OID_BLOB, &sparse_oid, &oc)) + die(_("unable to access sparse blob in '%s'"), + filter_options->sparse_oid_name); + if (add_patterns_from_blob_to_list(&sparse_oid, "", 0, &d->pl) < 0) + die(_("unable to parse sparse filter data in %s"), + oid_to_hex(&sparse_oid)); + + ALLOC_GROW(d->array_frame, d->nr + 1, d->alloc); + d->array_frame[d->nr].default_match = 0; /* default to include */ + d->array_frame[d->nr].child_prov_omit = 0; + d->nr++; + + filter->filter_data = d; + filter->filter_object_fn = filter_sparse; + filter->free_fn = filter_sparse_free; +} + +/* A filter which only shows objects shown by all sub-filters. */ +struct combine_filter_data { + struct subfilter *sub; + size_t nr; +}; + +static enum list_objects_filter_result process_subfilter( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct subfilter *sub) +{ + enum list_objects_filter_result result; + + /* + * Check and update is_skipping_tree before oidset_contains so + * that is_skipping_tree gets unset even when the object is + * marked as seen. As of this writing, no filter uses + * LOFR_MARK_SEEN on trees that also uses LOFR_SKIP_TREE, so the + * ordering is only theoretically important. Be cautious if you + * change the order of the below checks and more filters have + * been added! + */ + if (sub->is_skipping_tree) { + if (filter_situation == LOFS_END_TREE && + oideq(&obj->oid, &sub->skip_tree)) + sub->is_skipping_tree = 0; + else + return LOFR_ZERO; + } + if (oidset_contains(&sub->seen, &obj->oid)) + return LOFR_ZERO; + + result = list_objects_filter__filter_object( + r, filter_situation, obj, pathname, filename, sub->filter); + + if (result & LOFR_MARK_SEEN) + oidset_insert(&sub->seen, &obj->oid); + + if (result & LOFR_SKIP_TREE) { + sub->is_skipping_tree = 1; + sub->skip_tree = obj->oid; + } + + return result; +} + +static enum list_objects_filter_result filter_combine( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct oidset *omits, + void *filter_data) +{ + struct combine_filter_data *d = filter_data; + enum list_objects_filter_result combined_result = + LOFR_DO_SHOW | LOFR_MARK_SEEN | LOFR_SKIP_TREE; + size_t sub; + + for (sub = 0; sub < d->nr; sub++) { + enum list_objects_filter_result sub_result = process_subfilter( + r, filter_situation, obj, pathname, filename, + &d->sub[sub]); + if (!(sub_result & LOFR_DO_SHOW)) + combined_result &= ~LOFR_DO_SHOW; + if (!(sub_result & LOFR_MARK_SEEN)) + combined_result &= ~LOFR_MARK_SEEN; + if (!d->sub[sub].is_skipping_tree) + combined_result &= ~LOFR_SKIP_TREE; + } + + return combined_result; +} + +static void filter_combine__free(void *filter_data) +{ + struct combine_filter_data *d = filter_data; + size_t sub; + for (sub = 0; sub < d->nr; sub++) { + list_objects_filter__free(d->sub[sub].filter); + oidset_clear(&d->sub[sub].seen); + if (d->sub[sub].omits.set.size) + BUG("expected oidset to be cleared already"); + } + free(d->sub); +} + +static void add_all(struct oidset *dest, struct oidset *src) { + struct oidset_iter iter; + struct object_id *src_oid; + + oidset_iter_init(src, &iter); + while ((src_oid = oidset_iter_next(&iter)) != NULL) + oidset_insert(dest, src_oid); +} + +static void filter_combine__finalize_omits( + struct oidset *omits, + void *filter_data) +{ + struct combine_filter_data *d = filter_data; + size_t sub; + + for (sub = 0; sub < d->nr; sub++) { + add_all(omits, &d->sub[sub].omits); + oidset_clear(&d->sub[sub].omits); + } +} + +static void filter_combine__init( + struct list_objects_filter_options *filter_options, + struct filter* filter) +{ + struct combine_filter_data *d = xcalloc(1, sizeof(*d)); + size_t sub; + + d->nr = filter_options->sub_nr; + d->sub = xcalloc(d->nr, sizeof(*d->sub)); + for (sub = 0; sub < d->nr; sub++) + d->sub[sub].filter = list_objects_filter__init( + filter->omits ? &d->sub[sub].omits : NULL, + &filter_options->sub[sub]); + + filter->filter_data = d; + filter->filter_object_fn = filter_combine; + filter->free_fn = filter_combine__free; + filter->finalize_omits_fn = filter_combine__finalize_omits; +} + +typedef void (*filter_init_fn)( + struct list_objects_filter_options *filter_options, + struct filter *filter); + +/* + * Must match "enum list_objects_filter_choice". + */ +static filter_init_fn s_filters[] = { + NULL, + filter_blobs_none__init, + filter_blobs_limit__init, + filter_trees_depth__init, + filter_sparse_oid__init, + filter_combine__init, +}; + +struct filter *list_objects_filter__init( + struct oidset *omitted, + struct list_objects_filter_options *filter_options) +{ + struct filter *filter; + filter_init_fn init_fn; + + assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT); + + if (filter_options->choice >= LOFC__COUNT) + BUG("invalid list-objects filter choice: %d", + filter_options->choice); + + init_fn = s_filters[filter_options->choice]; + if (!init_fn) + return NULL; + + filter = xcalloc(1, sizeof(*filter)); + filter->omits = omitted; + init_fn(filter_options, filter); + return filter; +} + +enum list_objects_filter_result list_objects_filter__filter_object( + struct repository *r, + enum list_objects_filter_situation filter_situation, + struct object *obj, + const char *pathname, + const char *filename, + struct filter *filter) +{ + if (filter && (obj->flags & NOT_USER_GIVEN)) + return filter->filter_object_fn(r, filter_situation, obj, + pathname, filename, + filter->omits, + filter->filter_data); + /* + * No filter is active or user gave object explicitly. In this case, + * always show the object (except when LOFS_END_TREE, since this tree + * had already been shown when LOFS_BEGIN_TREE). + */ + if (filter_situation == LOFS_END_TREE) + return 0; + return LOFR_MARK_SEEN | LOFR_DO_SHOW; +} + +void list_objects_filter__free(struct filter *filter) +{ + if (!filter) + return; + if (filter->finalize_omits_fn && filter->omits) + filter->finalize_omits_fn(filter->omits, filter->filter_data); + filter->free_fn(filter->filter_data); + free(filter); +} |