From 9027f53cb5051bf83a0254e7f8aeb5d1a206de0b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 25 Oct 2007 11:23:26 -0700 Subject: Do linear-time/space rename logic for exact renames This implements a smarter rename detector for exact renames, which rather than doing a pairwise comparison (time O(m*n)) will just hash the files into a hash-table (size O(n+m)), and only do pairwise comparisons to renames that have the same hash (time O(n+m) except for unrealistic hash collissions, which we just cull aggressively). Admittedly the exact rename case is not nearly as interesting as the generic case, but it's an important case none-the-less. A similar general approach should work for the generic case too, but even then you do need to handle the exact renames/copies separately (to avoid the inevitable added cost factor that comes from the _size_ of the file), so this is worth doing. In the expectation that we will indeed do the same hashing trick for the general rename case, this code uses a generic hash-table implementation that can be used for other things too. In fact, we might be able to consolidate some of our existing hash tables with the new generic code in hash.[ch]. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- hash.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 hash.h (limited to 'hash.h') diff --git a/hash.h b/hash.h new file mode 100644 index 0000000000..a8b0fbb5b5 --- /dev/null +++ b/hash.h @@ -0,0 +1,43 @@ +#ifndef HASH_H +#define HASH_H + +/* + * These are some simple generic hash table helper functions. + * Not necessarily suitable for all users, but good for things + * where you want to just keep track of a list of things, and + * have a good hash to use on them. + * + * It keeps the hash table at roughly 50-75% free, so the memory + * cost of the hash table itself is roughly + * + * 3 * 2*sizeof(void *) * nr_of_objects + * + * bytes. + * + * FIXME: on 64-bit architectures, we waste memory. It would be + * good to have just 32-bit pointers, requiring a special allocator + * for hashed entries or something. + */ +struct hash_table_entry { + unsigned int hash; + void *ptr; +}; + +struct hash_table { + unsigned int size, nr; + struct hash_table_entry *array; +}; + +extern void *lookup_hash(unsigned int hash, struct hash_table *table); +extern void **insert_hash(unsigned int hash, void *ptr, struct hash_table *table); +extern int for_each_hash(struct hash_table *table, int (*fn)(void *)); +extern void free_hash(struct hash_table *table); + +static inline void init_hash(struct hash_table *table) +{ + table->size = 0; + table->nr = 0; + table->array = NULL; +} + +#endif -- cgit v1.2.3 From d1f128b0509dce8b492c233b36131f096fd71274 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 6 Mar 2008 12:46:09 -0800 Subject: Add 'const' where appropriate to index handling functions This is in an effort to make the source index of 'unpack_trees()' as being const, and thus making the compiler help us verify that we only access it for reading. The constification also extended to some of the hashing helpers that get called indirectly. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- hash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'hash.h') diff --git a/hash.h b/hash.h index a8b0fbb5b5..69e33a47b9 100644 --- a/hash.h +++ b/hash.h @@ -28,9 +28,9 @@ struct hash_table { struct hash_table_entry *array; }; -extern void *lookup_hash(unsigned int hash, struct hash_table *table); +extern void *lookup_hash(unsigned int hash, const struct hash_table *table); extern void **insert_hash(unsigned int hash, void *ptr, struct hash_table *table); -extern int for_each_hash(struct hash_table *table, int (*fn)(void *)); +extern int for_each_hash(const struct hash_table *table, int (*fn)(void *)); extern void free_hash(struct hash_table *table); static inline void init_hash(struct hash_table *table) -- cgit v1.2.3