summary refs log tree commit diff
path: root/pack-objects.c
diff options
context:
space:
mode:
authorVicent Marti <tanoku@gmail.com>2013-10-24 14:01:06 -0400
committerJunio C Hamano <gitster@pobox.com>2013-10-24 15:44:48 -0700
commit2834bc27c16e4ae103768dd6dda530f0e46116af (patch)
tree5c26ff9d0bdf39c34dc7cb1c56d96d730167fb48 /pack-objects.c
parent92e5c77c3791567e299cc858a7ddfed410e2cec5 (diff)
pack-objects: refactor the packing list
The hash table that stores the packing list for a given `pack-objects`
run was tightly coupled to the pack-objects code.

In this commit, we refactor the hash table and the underlying storage
array into a `packing_data` struct. The functionality for accessing and
adding entries to the packing list is hence accessible from other parts
of Git besides the `pack-objects` builtin.

This refactoring is a requirement for further patches in this series
that will require accessing the commit packing list from outside of
`pack-objects`.

The hash table implementation has been minimally altered: we now
use table sizes which are always a power of two, to ensure a uniform
index distribution in the array.

Signed-off-by: Vicent Marti <tanoku@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'pack-objects.c')
-rw-r--r--pack-objects.c111
1 files changed, 111 insertions, 0 deletions
diff --git a/pack-objects.c b/pack-objects.c
new file mode 100644
index 0000000000..d01d851ce9
--- /dev/null
+++ b/pack-objects.c
@@ -0,0 +1,111 @@
+#include "cache.h"
+#include "object.h"
+#include "pack.h"
+#include "pack-objects.h"
+
+static uint32_t locate_object_entry_hash(struct packing_data *pdata,
+					 const unsigned char *sha1,
+					 int *found)
+{
+	uint32_t i, hash, mask = (pdata->index_size - 1);
+
+	memcpy(&hash, sha1, sizeof(uint32_t));
+	i = hash & mask;
+
+	while (pdata->index[i] > 0) {
+		uint32_t pos = pdata->index[i] - 1;
+
+		if (!hashcmp(sha1, pdata->objects[pos].idx.sha1)) {
+			*found = 1;
+			return i;
+		}
+
+		i = (i + 1) & mask;
+	}
+
+	*found = 0;
+	return i;
+}
+
+static inline uint32_t closest_pow2(uint32_t v)
+{
+	v = v - 1;
+	v |= v >> 1;
+	v |= v >> 2;
+	v |= v >> 4;
+	v |= v >> 8;
+	v |= v >> 16;
+	return v + 1;
+}
+
+static void rehash_objects(struct packing_data *pdata)
+{
+	uint32_t i;
+	struct object_entry *entry;
+
+	pdata->index_size = closest_pow2(pdata->nr_objects * 3);
+	if (pdata->index_size < 1024)
+		pdata->index_size = 1024;
+
+	pdata->index = xrealloc(pdata->index, sizeof(uint32_t) * pdata->index_size);
+	memset(pdata->index, 0, sizeof(int) * pdata->index_size);
+
+	entry = pdata->objects;
+
+	for (i = 0; i < pdata->nr_objects; i++) {
+		int found;
+		uint32_t ix = locate_object_entry_hash(pdata, entry->idx.sha1, &found);
+
+		if (found)
+			die("BUG: Duplicate object in hash");
+
+		pdata->index[ix] = i + 1;
+		entry++;
+	}
+}
+
+struct object_entry *packlist_find(struct packing_data *pdata,
+				   const unsigned char *sha1,
+				   uint32_t *index_pos)
+{
+	uint32_t i;
+	int found;
+
+	if (!pdata->index_size)
+		return NULL;
+
+	i = locate_object_entry_hash(pdata, sha1, &found);
+
+	if (index_pos)
+		*index_pos = i;
+
+	if (!found)
+		return NULL;
+
+	return &pdata->objects[pdata->index[i] - 1];
+}
+
+struct object_entry *packlist_alloc(struct packing_data *pdata,
+				    const unsigned char *sha1,
+				    uint32_t index_pos)
+{
+	struct object_entry *new_entry;
+
+	if (pdata->nr_objects >= pdata->nr_alloc) {
+		pdata->nr_alloc = (pdata->nr_alloc  + 1024) * 3 / 2;
+		pdata->objects = xrealloc(pdata->objects,
+					  pdata->nr_alloc * sizeof(*new_entry));
+	}
+
+	new_entry = pdata->objects + pdata->nr_objects++;
+
+	memset(new_entry, 0, sizeof(*new_entry));
+	hashcpy(new_entry->idx.sha1, sha1);
+
+	if (pdata->index_size * 3 <= pdata->nr_objects * 4)
+		rehash_objects(pdata);
+	else
+		pdata->index[index_pos] = pdata->nr_objects;
+
+	return new_entry;
+}