about summary refs log tree commit diff
path: root/third_party/git/bloom.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/git/bloom.h')
-rw-r--r--third_party/git/bloom.h112
1 files changed, 112 insertions, 0 deletions
diff --git a/third_party/git/bloom.h b/third_party/git/bloom.h
new file mode 100644
index 0000000000..adde6dfe21
--- /dev/null
+++ b/third_party/git/bloom.h
@@ -0,0 +1,112 @@
+#ifndef BLOOM_H
+#define BLOOM_H
+
+struct commit;
+struct repository;
+
+struct bloom_filter_settings {
+	/*
+	 * The version of the hashing technique being used.
+	 * We currently only support version = 1 which is
+	 * the seeded murmur3 hashing technique implemented
+	 * in bloom.c.
+	 */
+	uint32_t hash_version;
+
+	/*
+	 * The number of times a path is hashed, i.e. the
+	 * number of bit positions tht cumulatively
+	 * determine whether a path is present in the
+	 * Bloom filter.
+	 */
+	uint32_t num_hashes;
+
+	/*
+	 * The minimum number of bits per entry in the Bloom
+	 * filter. If the filter contains 'n' entries, then
+	 * filter size is the minimum number of 8-bit words
+	 * that contain n*b bits.
+	 */
+	uint32_t bits_per_entry;
+
+	/*
+	 * The maximum number of changed paths per commit
+	 * before declaring a Bloom filter to be too-large.
+	 *
+	 * Not written to the commit-graph file.
+	 */
+	uint32_t max_changed_paths;
+};
+
+#define DEFAULT_BLOOM_MAX_CHANGES 512
+#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
+#define BITS_PER_WORD 8
+#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
+
+/*
+ * A bloom_filter struct represents a data segment to
+ * use when testing hash values. The 'len' member
+ * dictates how many entries are stored in
+ * 'data'.
+ */
+struct bloom_filter {
+	unsigned char *data;
+	size_t len;
+};
+
+/*
+ * A bloom_key represents the k hash values for a
+ * given string. These can be precomputed and
+ * stored in a bloom_key for re-use when testing
+ * against a bloom_filter. The number of hashes is
+ * given by the Bloom filter settings and is the same
+ * for all Bloom filters and keys interacting with
+ * the loaded version of the commit graph file and
+ * the Bloom data chunks.
+ */
+struct bloom_key {
+	uint32_t *hashes;
+};
+
+/*
+ * Calculate the murmur3 32-bit hash value for the given data
+ * using the given seed.
+ * Produces a uniformly distributed hash value.
+ * Not considered to be cryptographically secure.
+ * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
+ */
+uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
+
+void fill_bloom_key(const char *data,
+		    size_t len,
+		    struct bloom_key *key,
+		    const struct bloom_filter_settings *settings);
+void clear_bloom_key(struct bloom_key *key);
+
+void add_key_to_filter(const struct bloom_key *key,
+		       struct bloom_filter *filter,
+		       const struct bloom_filter_settings *settings);
+
+void init_bloom_filters(void);
+
+enum bloom_filter_computed {
+	BLOOM_NOT_COMPUTED = (1 << 0),
+	BLOOM_COMPUTED     = (1 << 1),
+	BLOOM_TRUNC_LARGE  = (1 << 2),
+	BLOOM_TRUNC_EMPTY  = (1 << 3),
+};
+
+struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
+						 struct commit *c,
+						 int compute_if_not_present,
+						 const struct bloom_filter_settings *settings,
+						 enum bloom_filter_computed *computed);
+
+#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
+	(r), (c), 0, NULL, NULL)
+
+int bloom_filter_contains(const struct bloom_filter *filter,
+			  const struct bloom_key *key,
+			  const struct bloom_filter_settings *settings);
+
+#endif