From bc8d6b9b904350c72017a3c92fdacbfba5b1be3f Mon Sep 17 00:00:00 2001 From: Jens Lehmann Date: Thu, 7 Nov 2013 15:33:43 +0100 Subject: submodule: don't access the .gitmodules cache entry after removing it Commit 5fee995244e introduced the stage_updated_gitmodules() function to add submodule configuration updates to the index. It assumed that even after calling remove_cache_entry_at() the same cache entry would still be valid. This was true in the old days, as cache entries could never be freed, but that is not so sure in the present as there is ongoing work to free removed cache entries, which makes this code segfault. Fix that by calling add_file_to_cache() instead of open coding it. Also remove the "could not find .gitmodules in index" warning, as that won't happen in regular use cases (and by then just silently adding it to the index we do the right thing). Thanks-to: Karsten Blees Signed-off-by: Jens Lehmann Signed-off-by: Junio C Hamano --- submodule.c | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/submodule.c b/submodule.c index 1905d75b2b..e3884877f2 100644 --- a/submodule.c +++ b/submodule.c @@ -116,30 +116,7 @@ int remove_path_from_gitmodules(const char *path) void stage_updated_gitmodules(void) { - struct strbuf buf = STRBUF_INIT; - struct stat st; - int pos; - struct cache_entry *ce; - int namelen = strlen(".gitmodules"); - - pos = cache_name_pos(".gitmodules", namelen); - if (pos < 0) { - warning(_("could not find .gitmodules in index")); - return; - } - ce = active_cache[pos]; - ce->ce_flags = namelen; - if (strbuf_read_file(&buf, ".gitmodules", 0) < 0) - die(_("reading updated .gitmodules failed")); - if (lstat(".gitmodules", &st) < 0) - die_errno(_("unable to stat updated .gitmodules")); - fill_stat_cache_info(ce, &st); - ce->ce_mode = ce_mode_from_stat(ce, st.st_mode); - if (remove_cache_entry_at(pos) < 0) - die(_("unable to remove .gitmodules from index")); - if (write_sha1_file(buf.buf, buf.len, blob_type, ce->sha1)) - die(_("adding updated .gitmodules failed")); - if (add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE)) + if (add_file_to_cache(".gitmodules", 0)) die(_("staging updated .gitmodules failed")); } -- cgit v1.2.3 From 6a364ced497e407ab3ffb2554d4ef2c78f801832 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:17:54 +0100 Subject: add a hashtable implementation that supports O(1) removal The existing hashtable implementation (in hash.[ch]) uses open addressing (i.e. resolve hash collisions by distributing entries across the table). Thus, removal is difficult to implement with less than O(n) complexity. Resolving collisions of entries with identical hashes (e.g. via chaining) is left to the client code. Add a hashtable implementation that supports O(1) removal and is slightly easier to use due to builtin entry chaining. Supports all basic operations init, free, get, add, remove and iteration. Also includes ready-to-use hash functions based on the public domain FNV-1 algorithm (http://www.isthe.com/chongo/tech/comp/fnv). The per-entry data structure (hashmap_entry) is piggybacked in front of the client's data structure to save memory. See test-hashmap.c for usage examples. The hashtable is resized by a factor of four when 80% full. With these settings, average memory consumption is about 2/3 of hash.[ch], and insertion is about twice as fast due to less frequent resizing. Lookups are also slightly faster, because entries are strictly confined to their bucket (i.e. no data of other buckets needs to be traversed). Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- Documentation/technical/api-hashmap.txt | 235 ++++++++++++++++++++++ Makefile | 3 + hashmap.c | 228 +++++++++++++++++++++ hashmap.h | 71 +++++++ t/t0011-hashmap.sh | 240 ++++++++++++++++++++++ test-hashmap.c | 340 ++++++++++++++++++++++++++++++++ 6 files changed, 1117 insertions(+) create mode 100644 Documentation/technical/api-hashmap.txt create mode 100644 hashmap.c create mode 100644 hashmap.h create mode 100755 t/t0011-hashmap.sh create mode 100644 test-hashmap.c diff --git a/Documentation/technical/api-hashmap.txt b/Documentation/technical/api-hashmap.txt new file mode 100644 index 0000000000..b2280f1b0d --- /dev/null +++ b/Documentation/technical/api-hashmap.txt @@ -0,0 +1,235 @@ +hashmap API +=========== + +The hashmap API is a generic implementation of hash-based key-value mappings. + +Data Structures +--------------- + +`struct hashmap`:: + + The hash table structure. ++ +The `size` member keeps track of the total number of entries. The `cmpfn` +member is a function used to compare two entries for equality. The `table` and +`tablesize` members store the hash table and its size, respectively. + +`struct hashmap_entry`:: + + An opaque structure representing an entry in the hash table, which must + be used as first member of user data structures. Ideally it should be + followed by an int-sized member to prevent unused memory on 64-bit + systems due to alignment. ++ +The `hash` member is the entry's hash code and the `next` member points to the +next entry in case of collisions (i.e. if multiple entries map to the same +bucket). + +`struct hashmap_iter`:: + + An iterator structure, to be used with hashmap_iter_* functions. + +Types +----- + +`int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key, const void *keydata)`:: + + User-supplied function to test two hashmap entries for equality. Shall + return 0 if the entries are equal. ++ +This function is always called with non-NULL `entry` / `entry_or_key` +parameters that have the same hash code. When looking up an entry, the `key` +and `keydata` parameters to hashmap_get and hashmap_remove are always passed +as second and third argument, respectively. Otherwise, `keydata` is NULL. + +Functions +--------- + +`unsigned int strhash(const char *buf)`:: +`unsigned int strihash(const char *buf)`:: +`unsigned int memhash(const void *buf, size_t len)`:: +`unsigned int memihash(const void *buf, size_t len)`:: + + Ready-to-use hash functions for strings, using the FNV-1 algorithm (see + http://www.isthe.com/chongo/tech/comp/fnv). ++ +`strhash` and `strihash` take 0-terminated strings, while `memhash` and +`memihash` operate on arbitrary-length memory. ++ +`strihash` and `memihash` are case insensitive versions. + +`void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, size_t initial_size)`:: + + Initializes a hashmap structure. ++ +`map` is the hashmap to initialize. ++ +The `equals_function` can be specified to compare two entries for equality. +If NULL, entries are considered equal if their hash codes are equal. ++ +If the total number of entries is known in advance, the `initial_size` +parameter may be used to preallocate a sufficiently large table and thus +prevent expensive resizing. If 0, the table is dynamically resized. + +`void hashmap_free(struct hashmap *map, int free_entries)`:: + + Frees a hashmap structure and allocated memory. ++ +`map` is the hashmap to free. ++ +If `free_entries` is true, each hashmap_entry in the map is freed as well +(using stdlib's free()). + +`void hashmap_entry_init(void *entry, int hash)`:: + + Initializes a hashmap_entry structure. ++ +`entry` points to the entry to initialize. ++ +`hash` is the hash code of the entry. + +`void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata)`:: + + Returns the hashmap entry for the specified key, or NULL if not found. ++ +`map` is the hashmap structure. ++ +`key` is a hashmap_entry structure (or user data structure that starts with +hashmap_entry) that has at least been initialized with the proper hash code +(via `hashmap_entry_init`). ++ +If an entry with matching hash code is found, `key` and `keydata` are passed +to `hashmap_cmp_fn` to decide whether the entry matches the key. + +`void *hashmap_get_next(const struct hashmap *map, const void *entry)`:: + + Returns the next equal hashmap entry, or NULL if not found. This can be + used to iterate over duplicate entries (see `hashmap_add`). ++ +`map` is the hashmap structure. ++ +`entry` is the hashmap_entry to start the search from, obtained via a previous +call to `hashmap_get` or `hashmap_get_next`. + +`void hashmap_add(struct hashmap *map, void *entry)`:: + + Adds a hashmap entry. This allows to add duplicate entries (i.e. + separate values with the same key according to hashmap_cmp_fn). ++ +`map` is the hashmap structure. ++ +`entry` is the entry to add. + +`void *hashmap_put(struct hashmap *map, void *entry)`:: + + Adds or replaces a hashmap entry. If the hashmap contains duplicate + entries equal to the specified entry, only one of them will be replaced. ++ +`map` is the hashmap structure. ++ +`entry` is the entry to add or replace. ++ +Returns the replaced entry, or NULL if not found (i.e. the entry was added). + +`void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata)`:: + + Removes a hashmap entry matching the specified key. If the hashmap + contains duplicate entries equal to the specified key, only one of + them will be removed. ++ +`map` is the hashmap structure. ++ +`key` is a hashmap_entry structure (or user data structure that starts with +hashmap_entry) that has at least been initialized with the proper hash code +(via `hashmap_entry_init`). ++ +If an entry with matching hash code is found, `key` and `keydata` are +passed to `hashmap_cmp_fn` to decide whether the entry matches the key. ++ +Returns the removed entry, or NULL if not found. + +`void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`:: +`void *hashmap_iter_next(struct hashmap_iter *iter)`:: +`void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`:: + + Used to iterate over all entries of a hashmap. ++ +`hashmap_iter_init` initializes a `hashmap_iter` structure. ++ +`hashmap_iter_next` returns the next hashmap_entry, or NULL if there are no +more entries. ++ +`hashmap_iter_first` is a combination of both (i.e. initializes the iterator +and returns the first entry, if any). + +Usage example +------------- + +Here's a simple usage example that maps long keys to double values. +[source,c] +------------ +struct hashmap map; + +struct long2double { + struct hashmap_entry ent; /* must be the first member! */ + long key; + double value; +}; + +static int long2double_cmp(const struct long2double *e1, const struct long2double *e2, const void *unused) +{ + return !(e1->key == e2->key); +} + +void long2double_init(void) +{ + hashmap_init(&map, (hashmap_cmp_fn) long2double_cmp, 0); +} + +void long2double_free(void) +{ + hashmap_free(&map, 1); +} + +static struct long2double *find_entry(long key) +{ + struct long2double k; + hashmap_entry_init(&k, memhash(&key, sizeof(long))); + k.key = key; + return hashmap_get(&map, &k, NULL); +} + +double get_value(long key) +{ + struct long2double *e = find_entry(key); + return e ? e->value : 0; +} + +void set_value(long key, double value) +{ + struct long2double *e = find_entry(key); + if (!e) { + e = malloc(sizeof(struct long2double)); + hashmap_entry_init(e, memhash(&key, sizeof(long))); + e->key = key; + hashmap_add(&map, e); + } + e->value = value; +} +------------ + +Using variable-sized keys +------------------------- + +The `hashmap_entry_get` and `hashmap_entry_remove` functions expect an ordinary +`hashmap_entry` structure as key to find the correct entry. If the key data is +variable-sized (e.g. a FLEX_ARRAY string) or quite large, it is undesirable +to create a full-fledged entry structure on the heap and copy all the key data +into the structure. + +In this case, the `keydata` parameter can be used to pass +variable-sized key data directly to the comparison function, and the `key` +parameter can be a stripped-down, fixed size entry structure allocated on the +stack. + +See test-hashmap.c for an example using arbitrary-length strings as keys. diff --git a/Makefile b/Makefile index 4fde227f1f..d8d3d6705b 100644 --- a/Makefile +++ b/Makefile @@ -557,6 +557,7 @@ TEST_PROGRAMS_NEED_X += test-date TEST_PROGRAMS_NEED_X += test-delta TEST_PROGRAMS_NEED_X += test-dump-cache-tree TEST_PROGRAMS_NEED_X += test-genrandom +TEST_PROGRAMS_NEED_X += test-hashmap TEST_PROGRAMS_NEED_X += test-index-version TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-match-trees @@ -677,6 +678,7 @@ LIB_H += gpg-interface.h LIB_H += graph.h LIB_H += grep.h LIB_H += hash.h +LIB_H += hashmap.h LIB_H += help.h LIB_H += http.h LIB_H += kwset.h @@ -808,6 +810,7 @@ LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o LIB_OBJS += hash.o +LIB_OBJS += hashmap.o LIB_OBJS += help.o LIB_OBJS += hex.o LIB_OBJS += ident.o diff --git a/hashmap.c b/hashmap.c new file mode 100644 index 0000000000..d1b8056d8d --- /dev/null +++ b/hashmap.c @@ -0,0 +1,228 @@ +/* + * Generic implementation of hash-based key value mappings. + */ +#include "cache.h" +#include "hashmap.h" + +#define FNV32_BASE ((unsigned int) 0x811c9dc5) +#define FNV32_PRIME ((unsigned int) 0x01000193) + +unsigned int strhash(const char *str) +{ + unsigned int c, hash = FNV32_BASE; + while ((c = (unsigned char) *str++)) + hash = (hash * FNV32_PRIME) ^ c; + return hash; +} + +unsigned int strihash(const char *str) +{ + unsigned int c, hash = FNV32_BASE; + while ((c = (unsigned char) *str++)) { + if (c >= 'a' && c <= 'z') + c -= 'a' - 'A'; + hash = (hash * FNV32_PRIME) ^ c; + } + return hash; +} + +unsigned int memhash(const void *buf, size_t len) +{ + unsigned int hash = FNV32_BASE; + unsigned char *ucbuf = (unsigned char *) buf; + while (len--) { + unsigned int c = *ucbuf++; + hash = (hash * FNV32_PRIME) ^ c; + } + return hash; +} + +unsigned int memihash(const void *buf, size_t len) +{ + unsigned int hash = FNV32_BASE; + unsigned char *ucbuf = (unsigned char *) buf; + while (len--) { + unsigned int c = *ucbuf++; + if (c >= 'a' && c <= 'z') + c -= 'a' - 'A'; + hash = (hash * FNV32_PRIME) ^ c; + } + return hash; +} + +#define HASHMAP_INITIAL_SIZE 64 +/* grow / shrink by 2^2 */ +#define HASHMAP_RESIZE_BITS 2 +/* load factor in percent */ +#define HASHMAP_LOAD_FACTOR 80 + +static void alloc_table(struct hashmap *map, unsigned int size) +{ + map->tablesize = size; + map->table = xcalloc(size, sizeof(struct hashmap_entry *)); + + /* calculate resize thresholds for new size */ + map->grow_at = (unsigned int) ((uint64_t) size * HASHMAP_LOAD_FACTOR / 100); + if (size <= HASHMAP_INITIAL_SIZE) + map->shrink_at = 0; + else + /* + * The shrink-threshold must be slightly smaller than + * (grow-threshold / resize-factor) to prevent erratic resizing, + * thus we divide by (resize-factor + 1). + */ + map->shrink_at = map->grow_at / ((1 << HASHMAP_RESIZE_BITS) + 1); +} + +static inline int entry_equals(const struct hashmap *map, + const struct hashmap_entry *e1, const struct hashmap_entry *e2, + const void *keydata) +{ + return (e1 == e2) || (e1->hash == e2->hash && !map->cmpfn(e1, e2, keydata)); +} + +static inline unsigned int bucket(const struct hashmap *map, + const struct hashmap_entry *key) +{ + return key->hash & (map->tablesize - 1); +} + +static void rehash(struct hashmap *map, unsigned int newsize) +{ + unsigned int i, oldsize = map->tablesize; + struct hashmap_entry **oldtable = map->table; + + alloc_table(map, newsize); + for (i = 0; i < oldsize; i++) { + struct hashmap_entry *e = oldtable[i]; + while (e) { + struct hashmap_entry *next = e->next; + unsigned int b = bucket(map, e); + e->next = map->table[b]; + map->table[b] = e; + e = next; + } + } + free(oldtable); +} + +static inline struct hashmap_entry **find_entry_ptr(const struct hashmap *map, + const struct hashmap_entry *key, const void *keydata) +{ + struct hashmap_entry **e = &map->table[bucket(map, key)]; + while (*e && !entry_equals(map, *e, key, keydata)) + e = &(*e)->next; + return e; +} + +static int always_equal(const void *unused1, const void *unused2, const void *unused3) +{ + return 0; +} + +void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, + size_t initial_size) +{ + unsigned int size = HASHMAP_INITIAL_SIZE; + map->size = 0; + map->cmpfn = equals_function ? equals_function : always_equal; + + /* calculate initial table size and allocate the table */ + initial_size = (unsigned int) ((uint64_t) initial_size * 100 + / HASHMAP_LOAD_FACTOR); + while (initial_size > size) + size <<= HASHMAP_RESIZE_BITS; + alloc_table(map, size); +} + +void hashmap_free(struct hashmap *map, int free_entries) +{ + if (!map || !map->table) + return; + if (free_entries) { + struct hashmap_iter iter; + struct hashmap_entry *e; + hashmap_iter_init(map, &iter); + while ((e = hashmap_iter_next(&iter))) + free(e); + } + free(map->table); + memset(map, 0, sizeof(*map)); +} + +void *hashmap_get(const struct hashmap *map, const void *key, const void *keydata) +{ + return *find_entry_ptr(map, key, keydata); +} + +void *hashmap_get_next(const struct hashmap *map, const void *entry) +{ + struct hashmap_entry *e = ((struct hashmap_entry *) entry)->next; + for (; e; e = e->next) + if (entry_equals(map, entry, e, NULL)) + return e; + return NULL; +} + +void hashmap_add(struct hashmap *map, void *entry) +{ + unsigned int b = bucket(map, entry); + + /* add entry */ + ((struct hashmap_entry *) entry)->next = map->table[b]; + map->table[b] = entry; + + /* fix size and rehash if appropriate */ + map->size++; + if (map->size > map->grow_at) + rehash(map, map->tablesize << HASHMAP_RESIZE_BITS); +} + +void *hashmap_remove(struct hashmap *map, const void *key, const void *keydata) +{ + struct hashmap_entry *old; + struct hashmap_entry **e = find_entry_ptr(map, key, keydata); + if (!*e) + return NULL; + + /* remove existing entry */ + old = *e; + *e = old->next; + old->next = NULL; + + /* fix size and rehash if appropriate */ + map->size--; + if (map->size < map->shrink_at) + rehash(map, map->tablesize >> HASHMAP_RESIZE_BITS); + return old; +} + +void *hashmap_put(struct hashmap *map, void *entry) +{ + struct hashmap_entry *old = hashmap_remove(map, entry, NULL); + hashmap_add(map, entry); + return old; +} + +void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter) +{ + iter->map = map; + iter->tablepos = 0; + iter->next = NULL; +} + +void *hashmap_iter_next(struct hashmap_iter *iter) +{ + struct hashmap_entry *current = iter->next; + for (;;) { + if (current) { + iter->next = current->next; + return current; + } + + if (iter->tablepos >= iter->map->tablesize) + return NULL; + + current = iter->map->table[iter->tablepos++]; + } +} diff --git a/hashmap.h b/hashmap.h new file mode 100644 index 0000000000..f5b3b61073 --- /dev/null +++ b/hashmap.h @@ -0,0 +1,71 @@ +#ifndef HASHMAP_H +#define HASHMAP_H + +/* + * Generic implementation of hash-based key-value mappings. + * See Documentation/technical/api-hashmap.txt. + */ + +/* FNV-1 functions */ + +extern unsigned int strhash(const char *buf); +extern unsigned int strihash(const char *buf); +extern unsigned int memhash(const void *buf, size_t len); +extern unsigned int memihash(const void *buf, size_t len); + +/* data structures */ + +struct hashmap_entry { + struct hashmap_entry *next; + unsigned int hash; +}; + +typedef int (*hashmap_cmp_fn)(const void *entry, const void *entry_or_key, + const void *keydata); + +struct hashmap { + struct hashmap_entry **table; + hashmap_cmp_fn cmpfn; + unsigned int size, tablesize, grow_at, shrink_at; +}; + +struct hashmap_iter { + struct hashmap *map; + struct hashmap_entry *next; + unsigned int tablepos; +}; + +/* hashmap functions */ + +extern void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function, + size_t initial_size); +extern void hashmap_free(struct hashmap *map, int free_entries); + +/* hashmap_entry functions */ + +static inline void hashmap_entry_init(void *entry, int hash) +{ + struct hashmap_entry *e = entry; + e->hash = hash; + e->next = NULL; +} +extern void *hashmap_get(const struct hashmap *map, const void *key, + const void *keydata); +extern void *hashmap_get_next(const struct hashmap *map, const void *entry); +extern void hashmap_add(struct hashmap *map, void *entry); +extern void *hashmap_put(struct hashmap *map, void *entry); +extern void *hashmap_remove(struct hashmap *map, const void *key, + const void *keydata); + +/* hashmap_iter functions */ + +extern void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter); +extern void *hashmap_iter_next(struct hashmap_iter *iter); +static inline void *hashmap_iter_first(struct hashmap *map, + struct hashmap_iter *iter) +{ + hashmap_iter_init(map, iter); + return hashmap_iter_next(iter); +} + +#endif diff --git a/t/t0011-hashmap.sh b/t/t0011-hashmap.sh new file mode 100755 index 0000000000..391e2b6492 --- /dev/null +++ b/t/t0011-hashmap.sh @@ -0,0 +1,240 @@ +#!/bin/sh + +test_description='test hashmap and string hash functions' +. ./test-lib.sh + +test_hashmap() { + echo "$1" | test-hashmap $3 > actual && + echo "$2" > expect && + test_cmp expect actual +} + +test_expect_success 'hash functions' ' + +test_hashmap "hash key1" "2215982743 2215982743 116372151 116372151" && +test_hashmap "hash key2" "2215982740 2215982740 116372148 116372148" && +test_hashmap "hash fooBarFrotz" "1383912807 1383912807 3189766727 3189766727" && +test_hashmap "hash foobarfrotz" "2862305959 2862305959 3189766727 3189766727" + +' + +test_expect_success 'put' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +put foobarfrotz value4 +size" "NULL +NULL +NULL +NULL +64 4" + +' + +test_expect_success 'put (case insensitive)' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +size" "NULL +NULL +NULL +64 3" ignorecase + +' + +test_expect_success 'replace' ' + +test_hashmap "put key1 value1 +put key1 value2 +put fooBarFrotz value3 +put fooBarFrotz value4 +size" "NULL +value1 +NULL +value3 +64 2" + +' + +test_expect_success 'replace (case insensitive)' ' + +test_hashmap "put key1 value1 +put Key1 value2 +put fooBarFrotz value3 +put foobarfrotz value4 +size" "NULL +value1 +NULL +value3 +64 2" ignorecase + +' + +test_expect_success 'get' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +put foobarfrotz value4 +get key1 +get key2 +get fooBarFrotz +get notInMap" "NULL +NULL +NULL +NULL +value1 +value2 +value3 +NULL" + +' + +test_expect_success 'get (case insensitive)' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +get Key1 +get keY2 +get foobarfrotz +get notInMap" "NULL +NULL +NULL +value1 +value2 +value3 +NULL" ignorecase + +' + +test_expect_success 'add' ' + +test_hashmap "add key1 value1 +add key1 value2 +add fooBarFrotz value3 +add fooBarFrotz value4 +get key1 +get fooBarFrotz +get notInMap" "value2 +value1 +value4 +value3 +NULL" + +' + +test_expect_success 'add (case insensitive)' ' + +test_hashmap "add key1 value1 +add Key1 value2 +add fooBarFrotz value3 +add foobarfrotz value4 +get key1 +get Foobarfrotz +get notInMap" "value2 +value1 +value4 +value3 +NULL" ignorecase + +' + +test_expect_success 'remove' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +remove key1 +remove key2 +remove notInMap +size" "NULL +NULL +NULL +value1 +value2 +NULL +64 1" + +' + +test_expect_success 'remove (case insensitive)' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +remove Key1 +remove keY2 +remove notInMap +size" "NULL +NULL +NULL +value1 +value2 +NULL +64 1" ignorecase + +' + +test_expect_success 'iterate' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +iterate" "NULL +NULL +NULL +key2 value2 +key1 value1 +fooBarFrotz value3" + +' + +test_expect_success 'iterate (case insensitive)' ' + +test_hashmap "put key1 value1 +put key2 value2 +put fooBarFrotz value3 +iterate" "NULL +NULL +NULL +fooBarFrotz value3 +key2 value2 +key1 value1" ignorecase + +' + +test_expect_success 'grow / shrink' ' + + rm -f in && + rm -f expect && + for n in $(test_seq 51) + do + echo put key$n value$n >> in && + echo NULL >> expect + done && + echo size >> in && + echo 64 51 >> expect && + echo put key52 value52 >> in && + echo NULL >> expect + echo size >> in && + echo 256 52 >> expect && + for n in $(test_seq 12) + do + echo remove key$n >> in && + echo value$n >> expect + done && + echo size >> in && + echo 256 40 >> expect && + echo remove key40 >> in && + echo value40 >> expect && + echo size >> in && + echo 64 39 >> expect && + cat in | test-hashmap > out && + test_cmp expect out + +' + +test_done diff --git a/test-hashmap.c b/test-hashmap.c new file mode 100644 index 0000000000..581d2964e4 --- /dev/null +++ b/test-hashmap.c @@ -0,0 +1,340 @@ +#include "cache.h" +#include "hashmap.h" +#include + +struct test_entry +{ + struct hashmap_entry ent; + /* key and value as two \0-terminated strings */ + char key[FLEX_ARRAY]; +}; + +static const char *get_value(const struct test_entry *e) +{ + return e->key + strlen(e->key) + 1; +} + +static int test_entry_cmp(const struct test_entry *e1, + const struct test_entry *e2, const char* key) +{ + return strcmp(e1->key, key ? key : e2->key); +} + +static int test_entry_cmp_icase(const struct test_entry *e1, + const struct test_entry *e2, const char* key) +{ + return strcasecmp(e1->key, key ? key : e2->key); +} + +static struct test_entry *alloc_test_entry(int hash, char *key, int klen, + char *value, int vlen) +{ + struct test_entry *entry = malloc(sizeof(struct test_entry) + klen + + vlen + 2); + hashmap_entry_init(entry, hash); + memcpy(entry->key, key, klen + 1); + memcpy(entry->key + klen + 1, value, vlen + 1); + return entry; +} + +#define HASH_METHOD_FNV 0 +#define HASH_METHOD_I 1 +#define HASH_METHOD_IDIV10 2 +#define HASH_METHOD_0 3 +#define HASH_METHOD_X2 4 +#define TEST_SPARSE 8 +#define TEST_ADD 16 +#define TEST_SIZE 100000 + +static unsigned int hash(unsigned int method, unsigned int i, const char *key) +{ + unsigned int hash; + switch (method & 3) + { + case HASH_METHOD_FNV: + hash = strhash(key); + break; + case HASH_METHOD_I: + hash = i; + break; + case HASH_METHOD_IDIV10: + hash = i / 10; + break; + case HASH_METHOD_0: + hash = 0; + break; + } + + if (method & HASH_METHOD_X2) + hash = 2 * hash; + return hash; +} + +/* + * Test performance of hashmap.[ch] + * Usage: time echo "perfhashmap method rounds" | test-hashmap + */ +static void perf_hashmap(unsigned int method, unsigned int rounds) +{ + struct hashmap map; + char buf[16]; + struct test_entry **entries; + unsigned int *hashes; + unsigned int i, j; + + entries = malloc(TEST_SIZE * sizeof(struct test_entry *)); + hashes = malloc(TEST_SIZE * sizeof(int)); + for (i = 0; i < TEST_SIZE; i++) { + snprintf(buf, sizeof(buf), "%i", i); + entries[i] = alloc_test_entry(0, buf, strlen(buf), "", 0); + hashes[i] = hash(method, i, entries[i]->key); + } + + if (method & TEST_ADD) { + /* test adding to the map */ + for (j = 0; j < rounds; j++) { + hashmap_init(&map, (hashmap_cmp_fn) test_entry_cmp, 0); + + /* add entries */ + for (i = 0; i < TEST_SIZE; i++) { + hashmap_entry_init(entries[i], hashes[i]); + hashmap_add(&map, entries[i]); + } + + hashmap_free(&map, 0); + } + } else { + /* test map lookups */ + hashmap_init(&map, (hashmap_cmp_fn) test_entry_cmp, 0); + + /* fill the map (sparsely if specified) */ + j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE; + for (i = 0; i < j; i++) { + hashmap_entry_init(entries[i], hashes[i]); + hashmap_add(&map, entries[i]); + } + + for (j = 0; j < rounds; j++) { + for (i = 0; i < TEST_SIZE; i++) { + struct hashmap_entry key; + hashmap_entry_init(&key, hashes[i]); + hashmap_get(&map, &key, entries[i]->key); + } + } + + hashmap_free(&map, 0); + } +} + +struct hash_entry +{ + struct hash_entry *next; + char key[FLEX_ARRAY]; +}; + +/* + * Test performance of hash.[ch] + * Usage: time echo "perfhash method rounds" | test-hashmap + */ +static void perf_hash(unsigned int method, unsigned int rounds) +{ + struct hash_table map; + char buf[16]; + struct hash_entry **entries, **res, *entry; + unsigned int *hashes; + unsigned int i, j; + + entries = malloc(TEST_SIZE * sizeof(struct hash_entry *)); + hashes = malloc(TEST_SIZE * sizeof(int)); + for (i = 0; i < TEST_SIZE; i++) { + snprintf(buf, sizeof(buf), "%i", i); + entries[i] = malloc(sizeof(struct hash_entry) + strlen(buf) + 1); + strcpy(entries[i]->key, buf); + hashes[i] = hash(method, i, entries[i]->key); + } + + if (method & TEST_ADD) { + /* test adding to the map */ + for (j = 0; j < rounds; j++) { + init_hash(&map); + + /* add entries */ + for (i = 0; i < TEST_SIZE; i++) { + res = (struct hash_entry **) insert_hash( + hashes[i], entries[i], &map); + if (res) { + entries[i]->next = *res; + *res = entries[i]; + } else { + entries[i]->next = NULL; + } + } + + free_hash(&map); + } + } else { + /* test map lookups */ + init_hash(&map); + + /* fill the map (sparsely if specified) */ + j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE; + for (i = 0; i < j; i++) { + res = (struct hash_entry **) insert_hash(hashes[i], + entries[i], &map); + if (res) { + entries[i]->next = *res; + *res = entries[i]; + } else { + entries[i]->next = NULL; + } + } + + for (j = 0; j < rounds; j++) { + for (i = 0; i < TEST_SIZE; i++) { + entry = lookup_hash(hashes[i], &map); + while (entry) { + if (!strcmp(entries[i]->key, entry->key)) + break; + entry = entry->next; + } + } + } + + free_hash(&map); + + } +} + +#define DELIM " \t\r\n" + +/* + * Read stdin line by line and print result of commands to stdout: + * + * hash key -> strhash(key) memhash(key) strihash(key) memihash(key) + * put key value -> NULL / old value + * get key -> NULL / value + * remove key -> NULL / old value + * iterate -> key1 value1\nkey2 value2\n... + * size -> tablesize numentries + * + * perfhashmap method rounds -> test hashmap.[ch] performance + * perfhash method rounds -> test hash.[ch] performance + */ +int main(int argc, char *argv[]) +{ + char line[1024]; + struct hashmap map; + int icase; + + /* init hash map */ + icase = argc > 1 && !strcmp("ignorecase", argv[1]); + hashmap_init(&map, (hashmap_cmp_fn) (icase ? test_entry_cmp_icase + : test_entry_cmp), 0); + + /* process commands from stdin */ + while (fgets(line, sizeof(line), stdin)) { + char *cmd, *p1 = NULL, *p2 = NULL; + int l1 = 0, l2 = 0, hash = 0; + struct test_entry *entry; + + /* break line into command and up to two parameters */ + cmd = strtok(line, DELIM); + /* ignore empty lines */ + if (!cmd || *cmd == '#') + continue; + + p1 = strtok(NULL, DELIM); + if (p1) { + l1 = strlen(p1); + hash = icase ? strihash(p1) : strhash(p1); + p2 = strtok(NULL, DELIM); + if (p2) + l2 = strlen(p2); + } + + if (!strcmp("hash", cmd) && l1) { + + /* print results of different hash functions */ + printf("%u %u %u %u\n", strhash(p1), memhash(p1, l1), + strihash(p1), memihash(p1, l1)); + + } else if (!strcmp("add", cmd) && l1 && l2) { + + /* create entry with key = p1, value = p2 */ + entry = alloc_test_entry(hash, p1, l1, p2, l2); + + /* add to hashmap */ + hashmap_add(&map, entry); + + } else if (!strcmp("put", cmd) && l1 && l2) { + + /* create entry with key = p1, value = p2 */ + entry = alloc_test_entry(hash, p1, l1, p2, l2); + + /* add / replace entry */ + entry = hashmap_put(&map, entry); + + /* print and free replaced entry, if any */ + puts(entry ? get_value(entry) : "NULL"); + free(entry); + + } else if (!strcmp("get", cmd) && l1) { + + /* setup static key */ + struct hashmap_entry key; + hashmap_entry_init(&key, hash); + + /* lookup entry in hashmap */ + entry = hashmap_get(&map, &key, p1); + + /* print result */ + if (!entry) + puts("NULL"); + while (entry) { + puts(get_value(entry)); + entry = hashmap_get_next(&map, entry); + } + + } else if (!strcmp("remove", cmd) && l1) { + + /* setup static key */ + struct hashmap_entry key; + hashmap_entry_init(&key, hash); + + /* remove entry from hashmap */ + entry = hashmap_remove(&map, &key, p1); + + /* print result and free entry*/ + puts(entry ? get_value(entry) : "NULL"); + free(entry); + + } else if (!strcmp("iterate", cmd)) { + + struct hashmap_iter iter; + hashmap_iter_init(&map, &iter); + while ((entry = hashmap_iter_next(&iter))) + printf("%s %s\n", entry->key, get_value(entry)); + + } else if (!strcmp("size", cmd)) { + + /* print table sizes */ + printf("%u %u\n", map.tablesize, map.size); + + } else if (!strcmp("perfhashmap", cmd) && l1 && l2) { + + perf_hashmap(atoi(p1), atoi(p2)); + + } else if (!strcmp("perfhash", cmd) && l1 && l2) { + + perf_hash(atoi(p1), atoi(p2)); + + } else { + + printf("Unknown command %s\n", cmd); + + } + } + + hashmap_free(&map, 1); + return 0; +} -- cgit v1.2.3 From 29d8a834b5fcb1de057ed8563339b104e0251717 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:18:35 +0100 Subject: buitin/describe.c: use new hash map implementation Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- builtin/describe.c | 53 ++++++++++++++++++++++++----------------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/builtin/describe.c b/builtin/describe.c index b9d3603704..104202898e 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -6,7 +6,7 @@ #include "exec_cmd.h" #include "parse-options.h" #include "diff.h" -#include "hash.h" +#include "hashmap.h" #include "argv-array.h" #define SEEN (1u<<0) @@ -25,7 +25,7 @@ static int longformat; static int first_parent; static int abbrev = -1; /* unspecified */ static int max_candidates = 10; -static struct hash_table names; +static struct hashmap names; static int have_util; static const char *pattern; static int always; @@ -38,7 +38,7 @@ static const char *diff_index_args[] = { struct commit_name { - struct commit_name *next; + struct hashmap_entry entry; unsigned char peeled[20]; struct tag *tag; unsigned prio:2; /* annotated tag = 2, tag = 1, head = 0 */ @@ -50,6 +50,12 @@ static const char *prio_names[] = { "head", "lightweight", "annotated", }; +static int commit_name_cmp(const struct commit_name *cn1, + const struct commit_name *cn2, const void *peeled) +{ + return hashcmp(cn1->peeled, peeled ? peeled : cn2->peeled); +} + static inline unsigned int hash_sha1(const unsigned char *sha1) { unsigned int hash; @@ -59,21 +65,9 @@ static inline unsigned int hash_sha1(const unsigned char *sha1) static inline struct commit_name *find_commit_name(const unsigned char *peeled) { - struct commit_name *n = lookup_hash(hash_sha1(peeled), &names); - while (n && !!hashcmp(peeled, n->peeled)) - n = n->next; - return n; -} - -static int set_util(void *chain, void *data) -{ - struct commit_name *n; - for (n = chain; n; n = n->next) { - struct commit *c = lookup_commit_reference_gently(n->peeled, 1); - if (c) - c->util = n; - } - return 0; + struct commit_name key; + hashmap_entry_init(&key, hash_sha1(peeled)); + return hashmap_get(&names, &key, peeled); } static int replace_name(struct commit_name *e, @@ -118,16 +112,10 @@ static void add_to_known_names(const char *path, struct tag *tag = NULL; if (replace_name(e, prio, sha1, &tag)) { if (!e) { - void **pos; e = xmalloc(sizeof(struct commit_name)); hashcpy(e->peeled, peeled); - pos = insert_hash(hash_sha1(peeled), e, &names); - if (pos) { - e->next = *pos; - *pos = e; - } else { - e->next = NULL; - } + hashmap_entry_init(e, hash_sha1(peeled)); + hashmap_add(&names, e); e->path = NULL; } e->tag = tag; @@ -292,7 +280,14 @@ static void describe(const char *arg, int last_one) fprintf(stderr, _("searching to describe %s\n"), arg); if (!have_util) { - for_each_hash(&names, set_util, NULL); + struct hashmap_iter iter; + struct commit *c; + struct commit_name *n = hashmap_iter_first(&names, &iter); + for (; n; n = hashmap_iter_next(&iter)) { + c = lookup_commit_reference_gently(n->peeled, 1); + if (c) + c->util = n; + } have_util = 1; } @@ -463,9 +458,9 @@ int cmd_describe(int argc, const char **argv, const char *prefix) return cmd_name_rev(args.argc, args.argv, prefix); } - init_hash(&names); + hashmap_init(&names, (hashmap_cmp_fn) commit_name_cmp, 0); for_each_rawref(get_name, NULL); - if (!names.nr && !always) + if (!names.size && !always) die(_("No names found, cannot describe anything.")); if (argc == 0) { -- cgit v1.2.3 From 48f6407ffec1db18f5ef3316f0d761965bb17f1e Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:19:04 +0100 Subject: diffcore-rename.c: move code around to prepare for the next patch No actual code changes, just move hash_filespec up and outdent part of find_identical_files. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- diffcore-rename.c | 98 +++++++++++++++++++++++++++---------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/diffcore-rename.c b/diffcore-rename.c index 6c7a72fbe7..008a60c09d 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -248,6 +248,18 @@ struct file_similarity { struct file_similarity *next; }; +static unsigned int hash_filespec(struct diff_filespec *filespec) +{ + unsigned int hash; + if (!filespec->sha1_valid) { + if (diff_populate_filespec(filespec, 0)) + return 0; + hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1); + } + memcpy(&hash, filespec->sha1, sizeof(hash)); + return hash; +} + static int find_identical_files(struct file_similarity *src, struct file_similarity *dst, struct diff_options *options) @@ -258,46 +270,46 @@ static int find_identical_files(struct file_similarity *src, * Walk over all the destinations ... */ do { - struct diff_filespec *target = dst->filespec; - struct file_similarity *p, *best; - int i = 100, best_score = -1; - - /* - * .. to find the best source match - */ - best = NULL; - for (p = src; p; p = p->next) { - int score; - struct diff_filespec *source = p->filespec; - - /* False hash collision? */ - if (hashcmp(source->sha1, target->sha1)) - continue; - /* Non-regular files? If so, the modes must match! */ - if (!S_ISREG(source->mode) || !S_ISREG(target->mode)) { - if (source->mode != target->mode) - continue; - } - /* Give higher scores to sources that haven't been used already */ - score = !source->rename_used; - if (source->rename_used && options->detect_rename != DIFF_DETECT_COPY) - continue; - score += basename_same(source, target); - if (score > best_score) { - best = p; - best_score = score; - if (score == 2) - break; - } + struct diff_filespec *target = dst->filespec; + struct file_similarity *p, *best; + int i = 100, best_score = -1; - /* Too many identical alternatives? Pick one */ - if (!--i) - break; + /* + * .. to find the best source match + */ + best = NULL; + for (p = src; p; p = p->next) { + int score; + struct diff_filespec *source = p->filespec; + + /* False hash collision? */ + if (hashcmp(source->sha1, target->sha1)) + continue; + /* Non-regular files? If so, the modes must match! */ + if (!S_ISREG(source->mode) || !S_ISREG(target->mode)) { + if (source->mode != target->mode) + continue; } - if (best) { - record_rename_pair(dst->index, best->index, MAX_SCORE); - renames++; + /* Give higher scores to sources that haven't been used already */ + score = !source->rename_used; + if (source->rename_used && options->detect_rename != DIFF_DETECT_COPY) + continue; + score += basename_same(source, target); + if (score > best_score) { + best = p; + best_score = score; + if (score == 2) + break; } + + /* Too many identical alternatives? Pick one */ + if (!--i) + break; + } + if (best) { + record_rename_pair(dst->index, best->index, MAX_SCORE); + renames++; + } } while ((dst = dst->next) != NULL); return renames; } @@ -343,18 +355,6 @@ static int find_same_files(void *ptr, void *data) return ret; } -static unsigned int hash_filespec(struct diff_filespec *filespec) -{ - unsigned int hash; - if (!filespec->sha1_valid) { - if (diff_populate_filespec(filespec, 0)) - return 0; - hash_sha1_file(filespec->data, filespec->size, "blob", filespec->sha1); - } - memcpy(&hash, filespec->sha1, sizeof(hash)); - return hash; -} - static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec) { void **pos; -- cgit v1.2.3 From 7c85f8acb2282e3ed108c46b59fd5daa78bf17db Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:19:34 +0100 Subject: diffcore-rename.c: simplify finding exact renames The find_exact_renames function currently only uses the hash table for grouping, i.e.: 1. add sources 2. add destinations 3. iterate all buckets, per bucket: 4. split sources from destinations 5. iterate destinations, per destination: 6. iterate sources to find best match This can be simplified by utilizing the lookup functionality of the hash table, i.e.: 1. add sources 2. iterate destinations, per destination: 3. lookup sources matching the current destination 4. iterate sources to find best match This saves several iterations and file_similarity allocations for the destinations. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- diffcore-rename.c | 75 +++++++++++++++---------------------------------------- 1 file changed, 20 insertions(+), 55 deletions(-) diff --git a/diffcore-rename.c b/diffcore-rename.c index 008a60c09d..cfeb408ea7 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -243,7 +243,7 @@ static int score_compare(const void *a_, const void *b_) } struct file_similarity { - int src_dst, index; + int index; struct diff_filespec *filespec; struct file_similarity *next; }; @@ -260,25 +260,21 @@ static unsigned int hash_filespec(struct diff_filespec *filespec) return hash; } -static int find_identical_files(struct file_similarity *src, - struct file_similarity *dst, +static int find_identical_files(struct hash_table *srcs, + int dst_index, struct diff_options *options) { int renames = 0; - /* - * Walk over all the destinations ... - */ - do { - struct diff_filespec *target = dst->filespec; + struct diff_filespec *target = rename_dst[dst_index].two; struct file_similarity *p, *best; int i = 100, best_score = -1; /* - * .. to find the best source match + * Find the best source match for specified destination. */ best = NULL; - for (p = src; p; p = p->next) { + for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) { int score; struct diff_filespec *source = p->filespec; @@ -307,61 +303,28 @@ static int find_identical_files(struct file_similarity *src, break; } if (best) { - record_rename_pair(dst->index, best->index, MAX_SCORE); + record_rename_pair(dst_index, best->index, MAX_SCORE); renames++; } - } while ((dst = dst->next) != NULL); return renames; } -static void free_similarity_list(struct file_similarity *p) +static int free_similarity_list(void *p, void *unused) { while (p) { struct file_similarity *entry = p; - p = p->next; + p = entry->next; free(entry); } + return 0; } -static int find_same_files(void *ptr, void *data) -{ - int ret; - struct file_similarity *p = ptr; - struct file_similarity *src = NULL, *dst = NULL; - struct diff_options *options = data; - - /* Split the hash list up into sources and destinations */ - do { - struct file_similarity *entry = p; - p = p->next; - if (entry->src_dst < 0) { - entry->next = src; - src = entry; - } else { - entry->next = dst; - dst = entry; - } - } while (p); - - /* - * If we have both sources *and* destinations, see if - * we can match them up - */ - ret = (src && dst) ? find_identical_files(src, dst, options) : 0; - - /* Free the hashes and return the number of renames found */ - free_similarity_list(src); - free_similarity_list(dst); - return ret; -} - -static void insert_file_table(struct hash_table *table, int src_dst, int index, struct diff_filespec *filespec) +static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec) { void **pos; unsigned int hash; struct file_similarity *entry = xmalloc(sizeof(*entry)); - entry->src_dst = src_dst; entry->index = index; entry->filespec = filespec; entry->next = NULL; @@ -385,24 +348,26 @@ static void insert_file_table(struct hash_table *table, int src_dst, int index, */ static int find_exact_renames(struct diff_options *options) { - int i; + int i, renames = 0; struct hash_table file_table; + /* Add all sources to the hash table */ init_hash(&file_table); - preallocate_hash(&file_table, rename_src_nr + rename_dst_nr); + preallocate_hash(&file_table, rename_src_nr); for (i = 0; i < rename_src_nr; i++) - insert_file_table(&file_table, -1, i, rename_src[i].p->one); + insert_file_table(&file_table, i, rename_src[i].p->one); + /* Walk the destinations and find best source match */ for (i = 0; i < rename_dst_nr; i++) - insert_file_table(&file_table, 1, i, rename_dst[i].two); + renames += find_identical_files(&file_table, i, options); - /* Find the renames */ - i = for_each_hash(&file_table, find_same_files, options); + /* Free source file_similarity chains */ + for_each_hash(&file_table, free_similarity_list, options); /* .. and free the hash data structure */ free_hash(&file_table); - return i; + return renames; } #define NUM_CANDIDATE_PER_DST 4 -- cgit v1.2.3 From f79d9c581437d8f48e10506e62c34b9718c7f3f7 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:20:26 +0100 Subject: diffcore-rename.c: use new hash map implementation Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- diffcore-rename.c | 48 +++++++++++++----------------------------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/diffcore-rename.c b/diffcore-rename.c index cfeb408ea7..9b4f068eb3 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -4,7 +4,7 @@ #include "cache.h" #include "diff.h" #include "diffcore.h" -#include "hash.h" +#include "hashmap.h" #include "progress.h" /* Table of rename/copy destinations */ @@ -243,9 +243,9 @@ static int score_compare(const void *a_, const void *b_) } struct file_similarity { + struct hashmap_entry entry; int index; struct diff_filespec *filespec; - struct file_similarity *next; }; static unsigned int hash_filespec(struct diff_filespec *filespec) @@ -260,21 +260,22 @@ static unsigned int hash_filespec(struct diff_filespec *filespec) return hash; } -static int find_identical_files(struct hash_table *srcs, +static int find_identical_files(struct hashmap *srcs, int dst_index, struct diff_options *options) { int renames = 0; struct diff_filespec *target = rename_dst[dst_index].two; - struct file_similarity *p, *best; + struct file_similarity *p, *best, dst; int i = 100, best_score = -1; /* * Find the best source match for specified destination. */ best = NULL; - for (p = lookup_hash(hash_filespec(target), srcs); p; p = p->next) { + hashmap_entry_init(&dst, hash_filespec(target)); + for (p = hashmap_get(srcs, &dst, NULL); p; p = hashmap_get_next(srcs, p)) { int score; struct diff_filespec *source = p->filespec; @@ -309,34 +310,15 @@ static int find_identical_files(struct hash_table *srcs, return renames; } -static int free_similarity_list(void *p, void *unused) +static void insert_file_table(struct hashmap *table, int index, struct diff_filespec *filespec) { - while (p) { - struct file_similarity *entry = p; - p = entry->next; - free(entry); - } - return 0; -} - -static void insert_file_table(struct hash_table *table, int index, struct diff_filespec *filespec) -{ - void **pos; - unsigned int hash; struct file_similarity *entry = xmalloc(sizeof(*entry)); entry->index = index; entry->filespec = filespec; - entry->next = NULL; - - hash = hash_filespec(filespec); - pos = insert_hash(hash, entry, table); - /* We already had an entry there? */ - if (pos) { - entry->next = *pos; - *pos = entry; - } + hashmap_entry_init(entry, hash_filespec(filespec)); + hashmap_add(table, entry); } /* @@ -349,11 +331,10 @@ static void insert_file_table(struct hash_table *table, int index, struct diff_f static int find_exact_renames(struct diff_options *options) { int i, renames = 0; - struct hash_table file_table; + struct hashmap file_table; /* Add all sources to the hash table */ - init_hash(&file_table); - preallocate_hash(&file_table, rename_src_nr); + hashmap_init(&file_table, NULL, rename_src_nr); for (i = 0; i < rename_src_nr; i++) insert_file_table(&file_table, i, rename_src[i].p->one); @@ -361,11 +342,8 @@ static int find_exact_renames(struct diff_options *options) for (i = 0; i < rename_dst_nr; i++) renames += find_identical_files(&file_table, i, options); - /* Free source file_similarity chains */ - for_each_hash(&file_table, free_similarity_list, options); - - /* .. and free the hash data structure */ - free_hash(&file_table); + /* Free the hash data structure and entries */ + hashmap_free(&file_table, 1); return renames; } -- cgit v1.2.3 From e05881a4577cedca4d7456b3cb1f8bbd6f480df2 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:20:58 +0100 Subject: name-hash.c: use new hash map implementation for directories Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- cache.h | 3 ++- name-hash.c | 77 +++++++++++++++---------------------------------------------- 2 files changed, 20 insertions(+), 60 deletions(-) diff --git a/cache.h b/cache.h index 5e3fc72fd4..f9b13ecb3f 100644 --- a/cache.h +++ b/cache.h @@ -4,6 +4,7 @@ #include "git-compat-util.h" #include "strbuf.h" #include "hash.h" +#include "hashmap.h" #include "advice.h" #include "gettext.h" #include "convert.h" @@ -278,7 +279,7 @@ struct index_state { unsigned name_hash_initialized : 1, initialized : 1; struct hash_table name_hash; - struct hash_table dir_hash; + struct hashmap dir_hash; }; extern struct index_state the_index; diff --git a/name-hash.c b/name-hash.c index e5b6e1ad23..c75fadf33b 100644 --- a/name-hash.c +++ b/name-hash.c @@ -8,49 +8,28 @@ #define NO_THE_INDEX_COMPATIBILITY_MACROS #include "cache.h" -/* - * This removes bit 5 if bit 6 is set. - * - * That will make US-ASCII characters hash to their upper-case - * equivalent. We could easily do this one whole word at a time, - * but that's for future worries. - */ -static inline unsigned char icase_hash(unsigned char c) -{ - return c & ~((c & 0x40) >> 1); -} - -static unsigned int hash_name(const char *name, int namelen) -{ - unsigned int hash = 0x123; - - while (namelen--) { - unsigned char c = *name++; - c = icase_hash(c); - hash = hash*101 + c; - } - return hash; -} - struct dir_entry { - struct dir_entry *next; + struct hashmap_entry ent; struct dir_entry *parent; struct cache_entry *ce; int nr; unsigned int namelen; }; +static int dir_entry_cmp(const struct dir_entry *e1, + const struct dir_entry *e2, const char *name) +{ + return e1->namelen != e2->namelen || strncasecmp(e1->ce->name, + name ? name : e2->ce->name, e1->namelen); +} + static struct dir_entry *find_dir_entry(struct index_state *istate, const char *name, unsigned int namelen) { - unsigned int hash = hash_name(name, namelen); - struct dir_entry *dir; - - for (dir = lookup_hash(hash, &istate->dir_hash); dir; dir = dir->next) - if (dir->namelen == namelen && - !strncasecmp(dir->ce->name, name, namelen)) - return dir; - return NULL; + struct dir_entry key; + hashmap_entry_init(&key, memihash(name, namelen)); + key.namelen = namelen; + return hashmap_get(&istate->dir_hash, &key, name); } static struct dir_entry *hash_dir_entry(struct index_state *istate, @@ -84,18 +63,11 @@ static struct dir_entry *hash_dir_entry(struct index_state *istate, dir = find_dir_entry(istate, ce->name, namelen); if (!dir) { /* not found, create it and add to hash table */ - void **pdir; - unsigned int hash = hash_name(ce->name, namelen); - dir = xcalloc(1, sizeof(struct dir_entry)); + hashmap_entry_init(dir, memihash(ce->name, namelen)); dir->namelen = namelen; dir->ce = ce; - - pdir = insert_hash(hash, dir, &istate->dir_hash); - if (pdir) { - dir->next = *pdir; - *pdir = dir; - } + hashmap_add(&istate->dir_hash, dir); /* recursively add missing parent directories */ dir->parent = hash_dir_entry(istate, ce, namelen); @@ -134,7 +106,7 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) return; ce->ce_flags |= CE_HASHED; ce->next = NULL; - hash = hash_name(ce->name, ce_namelen(ce)); + hash = memihash(ce->name, ce_namelen(ce)); pos = insert_hash(hash, ce, &istate->name_hash); if (pos) { ce->next = *pos; @@ -153,6 +125,7 @@ static void lazy_init_name_hash(struct index_state *istate) return; if (istate->cache_nr) preallocate_hash(&istate->name_hash, istate->cache_nr); + hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0); for (nr = 0; nr < istate->cache_nr; nr++) hash_index_entry(istate, istate->cache[nr]); istate->name_hash_initialized = 1; @@ -247,7 +220,7 @@ struct cache_entry *index_dir_exists(struct index_state *istate, const char *nam struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase) { - unsigned int hash = hash_name(name, namelen); + unsigned int hash = memihash(name, namelen); struct cache_entry *ce; lazy_init_name_hash(istate); @@ -270,26 +243,12 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na return index_file_exists(istate, name, namelen, icase); } -static int free_dir_entry(void *entry, void *unused) -{ - struct dir_entry *dir = entry; - while (dir) { - struct dir_entry *next = dir->next; - free(dir); - dir = next; - } - return 0; -} - void free_name_hash(struct index_state *istate) { if (!istate->name_hash_initialized) return; istate->name_hash_initialized = 0; - if (ignore_case) - /* free directory entries */ - for_each_hash(&istate->dir_hash, free_dir_entry, NULL); free_hash(&istate->name_hash); - free_hash(&istate->dir_hash); + hashmap_free(&istate->dir_hash, 1); } -- cgit v1.2.3 From 1c8cca190a1029d16450e61fbc4ce6f85a867f30 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:21:26 +0100 Subject: name-hash.c: remove unreferenced directory entries The new hashmap implementation supports remove, so remove and free directory entries that are no longer referenced by active cache entries. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- name-hash.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/name-hash.c b/name-hash.c index c75fadf33b..effe96db0b 100644 --- a/name-hash.c +++ b/name-hash.c @@ -86,15 +86,16 @@ static void add_dir_entry(struct index_state *istate, struct cache_entry *ce) static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) { /* - * Release reference to the directory entry (and parents if 0). - * - * Note: we do not remove / free the entry because there's no - * hash.[ch]::remove_hash and dir->next may point to other entries - * that are still valid, so we must not free the memory. + * Release reference to the directory entry. If 0, remove and continue + * with parent directory. */ struct dir_entry *dir = hash_dir_entry(istate, ce, ce_namelen(ce)); - while (dir && dir->nr && !(--dir->nr)) - dir = dir->parent; + while (dir && !(--dir->nr)) { + struct dir_entry *parent = dir->parent; + hashmap_remove(&istate->dir_hash, dir, NULL); + free(dir); + dir = parent; + } } static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) -- cgit v1.2.3 From 8b013788a14b96b8d20b1f6bc76a42f9733aefad Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:21:58 +0100 Subject: name-hash.c: use new hash map implementation for cache entries Note: the "ce->next = NULL;" in unpack-trees.c::do_add_entry can safely be removed, as ce->next (now ce->ent.next) is always properly initialized in name-hash.c::hash_index_entry. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- cache.h | 8 +++++--- name-hash.c | 24 ++++++++---------------- unpack-trees.c | 1 - 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/cache.h b/cache.h index f9b13ecb3f..85210b1bd2 100644 --- a/cache.h +++ b/cache.h @@ -131,12 +131,12 @@ struct stat_data { }; struct cache_entry { + struct hashmap_entry ent; struct stat_data ce_stat_data; unsigned int ce_mode; unsigned int ce_flags; unsigned int ce_namelen; unsigned char sha1[20]; - struct cache_entry *next; char name[FLEX_ARRAY]; /* more */ }; @@ -203,7 +203,9 @@ static inline void copy_cache_entry(struct cache_entry *dst, unsigned int state = dst->ce_flags & CE_STATE_MASK; /* Don't copy hash chain and name */ - memcpy(dst, src, offsetof(struct cache_entry, next)); + memcpy(&dst->ce_stat_data, &src->ce_stat_data, + offsetof(struct cache_entry, name) - + offsetof(struct cache_entry, ce_stat_data)); /* Restore the hash state */ dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state; @@ -278,7 +280,7 @@ struct index_state { struct cache_time timestamp; unsigned name_hash_initialized : 1, initialized : 1; - struct hash_table name_hash; + struct hashmap name_hash; struct hashmap dir_hash; }; diff --git a/name-hash.c b/name-hash.c index effe96db0b..488eccf2f9 100644 --- a/name-hash.c +++ b/name-hash.c @@ -100,19 +100,11 @@ static void remove_dir_entry(struct index_state *istate, struct cache_entry *ce) static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) { - void **pos; - unsigned int hash; - if (ce->ce_flags & CE_HASHED) return; ce->ce_flags |= CE_HASHED; - ce->next = NULL; - hash = memihash(ce->name, ce_namelen(ce)); - pos = insert_hash(hash, ce, &istate->name_hash); - if (pos) { - ce->next = *pos; - *pos = ce; - } + hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce))); + hashmap_add(&istate->name_hash, ce); if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) add_dir_entry(istate, ce); @@ -124,8 +116,7 @@ static void lazy_init_name_hash(struct index_state *istate) if (istate->name_hash_initialized) return; - if (istate->cache_nr) - preallocate_hash(&istate->name_hash, istate->cache_nr); + hashmap_init(&istate->name_hash, NULL, istate->cache_nr); hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0); for (nr = 0; nr < istate->cache_nr; nr++) hash_index_entry(istate, istate->cache[nr]); @@ -221,18 +212,19 @@ struct cache_entry *index_dir_exists(struct index_state *istate, const char *nam struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int icase) { - unsigned int hash = memihash(name, namelen); struct cache_entry *ce; + struct hashmap_entry key; lazy_init_name_hash(istate); - ce = lookup_hash(hash, &istate->name_hash); + hashmap_entry_init(&key, memihash(name, namelen)); + ce = hashmap_get(&istate->name_hash, &key, NULL); while (ce) { if (!(ce->ce_flags & CE_UNHASHED)) { if (same_name(ce, name, namelen, icase)) return ce; } - ce = ce->next; + ce = hashmap_get_next(&istate->name_hash, ce); } return NULL; } @@ -250,6 +242,6 @@ void free_name_hash(struct index_state *istate) return; istate->name_hash_initialized = 0; - free_hash(&istate->name_hash); + hashmap_free(&istate->name_hash, 0); hashmap_free(&istate->dir_hash, 1); } diff --git a/unpack-trees.c b/unpack-trees.c index 35cb05e92b..86f5f8837e 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -110,7 +110,6 @@ static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce, if (set & CE_REMOVE) set |= CE_WT_REMOVE; - ce->next = NULL; ce->ce_flags = (ce->ce_flags & ~clear) | set; add_index_entry(&o->result, ce, ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE); -- cgit v1.2.3 From 419a597f6408d5b5cffe3e278258f58b47d15ad3 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:22:27 +0100 Subject: name-hash.c: remove cache entries instead of marking them CE_UNHASHED The new hashmap implementation supports remove, so really remove unused cache entries from the name hashmap instead of just marking them. The CE_UNHASHED flag and CE_STATE_MASK are no longer needed. Keep the CE_HASHED flag to prevent adding entries twice. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- cache.h | 6 ++---- name-hash.c | 46 ++++++++++++++++++++++------------------------ read-cache.c | 2 +- unpack-trees.c | 2 +- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/cache.h b/cache.h index 85210b1bd2..1f11e24cd0 100644 --- a/cache.h +++ b/cache.h @@ -160,7 +160,6 @@ struct cache_entry { #define CE_ADDED (1 << 19) #define CE_HASHED (1 << 20) -#define CE_UNHASHED (1 << 21) #define CE_WT_REMOVE (1 << 22) /* remove in work directory */ #define CE_CONFLICTED (1 << 23) @@ -196,11 +195,10 @@ struct pathspec; * Copy the sha1 and stat state of a cache entry from one to * another. But we never change the name, or the hash state! */ -#define CE_STATE_MASK (CE_HASHED | CE_UNHASHED) static inline void copy_cache_entry(struct cache_entry *dst, const struct cache_entry *src) { - unsigned int state = dst->ce_flags & CE_STATE_MASK; + unsigned int state = dst->ce_flags & CE_HASHED; /* Don't copy hash chain and name */ memcpy(&dst->ce_stat_data, &src->ce_stat_data, @@ -208,7 +206,7 @@ static inline void copy_cache_entry(struct cache_entry *dst, offsetof(struct cache_entry, ce_stat_data)); /* Restore the hash state */ - dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state; + dst->ce_flags = (dst->ce_flags & ~CE_HASHED) | state; } static inline unsigned create_ce_flags(unsigned stage) diff --git a/name-hash.c b/name-hash.c index 488eccf2f9..9a3bd3f9a6 100644 --- a/name-hash.c +++ b/name-hash.c @@ -106,17 +106,29 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce))); hashmap_add(&istate->name_hash, ce); - if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) + if (ignore_case) add_dir_entry(istate, ce); } +static int cache_entry_cmp(const struct cache_entry *ce1, + const struct cache_entry *ce2, const void *remove) +{ + /* + * For remove_name_hash, find the exact entry (pointer equality); for + * index_name_exists, find all entries with matching hash code and + * decide whether the entry matches in same_name. + */ + return remove ? !(ce1 == ce2) : 0; +} + static void lazy_init_name_hash(struct index_state *istate) { int nr; if (istate->name_hash_initialized) return; - hashmap_init(&istate->name_hash, NULL, istate->cache_nr); + hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp, + istate->cache_nr); hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0); for (nr = 0; nr < istate->cache_nr; nr++) hash_index_entry(istate, istate->cache[nr]); @@ -125,31 +137,19 @@ static void lazy_init_name_hash(struct index_state *istate) void add_name_hash(struct index_state *istate, struct cache_entry *ce) { - /* if already hashed, add reference to directory entries */ - if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) - add_dir_entry(istate, ce); - - ce->ce_flags &= ~CE_UNHASHED; if (istate->name_hash_initialized) hash_index_entry(istate, ce); } -/* - * We don't actually *remove* it, we can just mark it invalid so that - * we won't find it in lookups. - * - * Not only would we have to search the lists (simple enough), but - * we'd also have to rehash other hash buckets in case this makes the - * hash bucket empty (common). So it's much better to just mark - * it. - */ void remove_name_hash(struct index_state *istate, struct cache_entry *ce) { - /* if already hashed, release reference to directory entries */ - if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) - remove_dir_entry(istate, ce); + if (!istate->name_hash_initialized || !(ce->ce_flags & CE_HASHED)) + return; + ce->ce_flags &= ~CE_HASHED; + hashmap_remove(&istate->name_hash, ce, ce); - ce->ce_flags |= CE_UNHASHED; + if (ignore_case) + remove_dir_entry(istate, ce); } static int slow_same_name(const char *name1, int len1, const char *name2, int len2) @@ -220,10 +220,8 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na hashmap_entry_init(&key, memihash(name, namelen)); ce = hashmap_get(&istate->name_hash, &key, NULL); while (ce) { - if (!(ce->ce_flags & CE_UNHASHED)) { - if (same_name(ce, name, namelen, icase)) - return ce; - } + if (same_name(ce, name, namelen, icase)) + return ce; ce = hashmap_get_next(&istate->name_hash, ce); } return NULL; diff --git a/read-cache.c b/read-cache.c index 33dd676ccb..00af9addd9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -58,7 +58,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n new = xmalloc(cache_entry_size(namelen)); copy_cache_entry(new, old); - new->ce_flags &= ~CE_STATE_MASK; + new->ce_flags &= ~CE_HASHED; new->ce_namelen = namelen; memcpy(new->name, new_name, namelen + 1); diff --git a/unpack-trees.c b/unpack-trees.c index 86f5f8837e..36f3a7d06c 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -105,7 +105,7 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts, static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce, unsigned int set, unsigned int clear) { - clear |= CE_HASHED | CE_UNHASHED; + clear |= CE_HASHED; if (set & CE_REMOVE) set |= CE_WT_REMOVE; -- cgit v1.2.3 From efc684245b81ae0fb8f0afbd06dc1c3101c4e5a0 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:23:12 +0100 Subject: remove old hash.[ch] implementation Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- Documentation/technical/api-hash.txt | 52 ----------------- Makefile | 2 - cache.h | 1 - hash.c | 110 ----------------------------------- hash.h | 50 ---------------- test-hashmap.c | 84 -------------------------- 6 files changed, 299 deletions(-) delete mode 100644 Documentation/technical/api-hash.txt delete mode 100644 hash.c delete mode 100644 hash.h diff --git a/Documentation/technical/api-hash.txt b/Documentation/technical/api-hash.txt deleted file mode 100644 index e5061e0677..0000000000 --- a/Documentation/technical/api-hash.txt +++ /dev/null @@ -1,52 +0,0 @@ -hash API -======== - -The hash API is a collection of simple hash table functions. Users are expected -to implement their own hashing. - -Data Structures ---------------- - -`struct hash_table`:: - - The hash table structure. The `array` member points to the hash table - entries. The `size` member counts the total number of valid and invalid - entries in the table. The `nr` member keeps track of the number of - valid entries. - -`struct hash_table_entry`:: - - An opaque structure representing an entry in the hash table. The `hash` - member is the entry's hash key and the `ptr` member is the entry's - value. - -Functions ---------- - -`init_hash`:: - - Initialize the hash table. - -`free_hash`:: - - Release memory associated with the hash table. - -`insert_hash`:: - - Insert a pointer into the hash table. If an entry with that hash - already exists, a pointer to the existing entry's value is returned. - Otherwise NULL is returned. This allows callers to implement - chaining, etc. - -`lookup_hash`:: - - Lookup an entry in the hash table. If an entry with that hash exists - the entry's value is returned. Otherwise NULL is returned. - -`for_each_hash`:: - - Call a function for each entry in the hash table. The function is - expected to take the entry's value as its only argument and return an - int. If the function returns a negative int the loop is aborted - immediately. Otherwise, the return value is accumulated and the sum - returned upon completion of the loop. diff --git a/Makefile b/Makefile index d8d3d6705b..f495dd4c13 100644 --- a/Makefile +++ b/Makefile @@ -677,7 +677,6 @@ LIB_H += git-compat-util.h LIB_H += gpg-interface.h LIB_H += graph.h LIB_H += grep.h -LIB_H += hash.h LIB_H += hashmap.h LIB_H += help.h LIB_H += http.h @@ -809,7 +808,6 @@ LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o -LIB_OBJS += hash.o LIB_OBJS += hashmap.o LIB_OBJS += help.o LIB_OBJS += hex.o diff --git a/cache.h b/cache.h index 1f11e24cd0..407145c364 100644 --- a/cache.h +++ b/cache.h @@ -3,7 +3,6 @@ #include "git-compat-util.h" #include "strbuf.h" -#include "hash.h" #include "hashmap.h" #include "advice.h" #include "gettext.h" diff --git a/hash.c b/hash.c deleted file mode 100644 index 749ecfe484..0000000000 --- a/hash.c +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Some generic hashing helpers. - */ -#include "cache.h" -#include "hash.h" - -/* - * Look up a hash entry in the hash table. Return the pointer to - * the existing entry, or the empty slot if none existed. The caller - * can then look at the (*ptr) to see whether it existed or not. - */ -static struct hash_table_entry *lookup_hash_entry(unsigned int hash, const struct hash_table *table) -{ - unsigned int size = table->size, nr = hash % size; - struct hash_table_entry *array = table->array; - - while (array[nr].ptr) { - if (array[nr].hash == hash) - break; - nr++; - if (nr >= size) - nr = 0; - } - return array + nr; -} - - -/* - * Insert a new hash entry pointer into the table. - * - * If that hash entry already existed, return the pointer to - * the existing entry (and the caller can create a list of the - * pointers or do anything else). If it didn't exist, return - * NULL (and the caller knows the pointer has been inserted). - */ -static void **insert_hash_entry(unsigned int hash, void *ptr, struct hash_table *table) -{ - struct hash_table_entry *entry = lookup_hash_entry(hash, table); - - if (!entry->ptr) { - entry->ptr = ptr; - entry->hash = hash; - table->nr++; - return NULL; - } - return &entry->ptr; -} - -static void grow_hash_table(struct hash_table *table) -{ - unsigned int i; - unsigned int old_size = table->size, new_size; - struct hash_table_entry *old_array = table->array, *new_array; - - new_size = alloc_nr(old_size); - new_array = xcalloc(sizeof(struct hash_table_entry), new_size); - table->size = new_size; - table->array = new_array; - table->nr = 0; - for (i = 0; i < old_size; i++) { - unsigned int hash = old_array[i].hash; - void *ptr = old_array[i].ptr; - if (ptr) - insert_hash_entry(hash, ptr, table); - } - free(old_array); -} - -void *lookup_hash(unsigned int hash, const struct hash_table *table) -{ - if (!table->array) - return NULL; - return lookup_hash_entry(hash, table)->ptr; -} - -void **insert_hash(unsigned int hash, void *ptr, struct hash_table *table) -{ - unsigned int nr = table->nr; - if (nr >= table->size/2) - grow_hash_table(table); - return insert_hash_entry(hash, ptr, table); -} - -int for_each_hash(const struct hash_table *table, int (*fn)(void *, void *), void *data) -{ - int sum = 0; - unsigned int i; - unsigned int size = table->size; - struct hash_table_entry *array = table->array; - - for (i = 0; i < size; i++) { - void *ptr = array->ptr; - array++; - if (ptr) { - int val = fn(ptr, data); - if (val < 0) - return val; - sum += val; - } - } - return sum; -} - -void free_hash(struct hash_table *table) -{ - free(table->array); - table->array = NULL; - table->size = 0; - table->nr = 0; -} diff --git a/hash.h b/hash.h deleted file mode 100644 index 1d43ac0ba0..0000000000 --- a/hash.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef HASH_H -#define HASH_H - -/* - * These are some simple generic hash table helper functions. - * Not necessarily suitable for all users, but good for things - * where you want to just keep track of a list of things, and - * have a good hash to use on them. - * - * It keeps the hash table at roughly 50-75% free, so the memory - * cost of the hash table itself is roughly - * - * 3 * 2*sizeof(void *) * nr_of_objects - * - * bytes. - * - * FIXME: on 64-bit architectures, we waste memory. It would be - * good to have just 32-bit pointers, requiring a special allocator - * for hashed entries or something. - */ -struct hash_table_entry { - unsigned int hash; - void *ptr; -}; - -struct hash_table { - unsigned int size, nr; - struct hash_table_entry *array; -}; - -extern void *lookup_hash(unsigned int hash, const struct hash_table *table); -extern void **insert_hash(unsigned int hash, void *ptr, struct hash_table *table); -extern int for_each_hash(const struct hash_table *table, int (*fn)(void *, void *), void *data); -extern void free_hash(struct hash_table *table); - -static inline void init_hash(struct hash_table *table) -{ - table->size = 0; - table->nr = 0; - table->array = NULL; -} - -static inline void preallocate_hash(struct hash_table *table, unsigned int elts) -{ - assert(table->size == 0 && table->nr == 0 && table->array == NULL); - table->size = elts * 2; - table->array = xcalloc(sizeof(struct hash_table_entry), table->size); -} - -#endif diff --git a/test-hashmap.c b/test-hashmap.c index 581d2964e4..7e86f886d8 100644 --- a/test-hashmap.c +++ b/test-hashmap.c @@ -126,85 +126,6 @@ static void perf_hashmap(unsigned int method, unsigned int rounds) } } -struct hash_entry -{ - struct hash_entry *next; - char key[FLEX_ARRAY]; -}; - -/* - * Test performance of hash.[ch] - * Usage: time echo "perfhash method rounds" | test-hashmap - */ -static void perf_hash(unsigned int method, unsigned int rounds) -{ - struct hash_table map; - char buf[16]; - struct hash_entry **entries, **res, *entry; - unsigned int *hashes; - unsigned int i, j; - - entries = malloc(TEST_SIZE * sizeof(struct hash_entry *)); - hashes = malloc(TEST_SIZE * sizeof(int)); - for (i = 0; i < TEST_SIZE; i++) { - snprintf(buf, sizeof(buf), "%i", i); - entries[i] = malloc(sizeof(struct hash_entry) + strlen(buf) + 1); - strcpy(entries[i]->key, buf); - hashes[i] = hash(method, i, entries[i]->key); - } - - if (method & TEST_ADD) { - /* test adding to the map */ - for (j = 0; j < rounds; j++) { - init_hash(&map); - - /* add entries */ - for (i = 0; i < TEST_SIZE; i++) { - res = (struct hash_entry **) insert_hash( - hashes[i], entries[i], &map); - if (res) { - entries[i]->next = *res; - *res = entries[i]; - } else { - entries[i]->next = NULL; - } - } - - free_hash(&map); - } - } else { - /* test map lookups */ - init_hash(&map); - - /* fill the map (sparsely if specified) */ - j = (method & TEST_SPARSE) ? TEST_SIZE / 10 : TEST_SIZE; - for (i = 0; i < j; i++) { - res = (struct hash_entry **) insert_hash(hashes[i], - entries[i], &map); - if (res) { - entries[i]->next = *res; - *res = entries[i]; - } else { - entries[i]->next = NULL; - } - } - - for (j = 0; j < rounds; j++) { - for (i = 0; i < TEST_SIZE; i++) { - entry = lookup_hash(hashes[i], &map); - while (entry) { - if (!strcmp(entries[i]->key, entry->key)) - break; - entry = entry->next; - } - } - } - - free_hash(&map); - - } -} - #define DELIM " \t\r\n" /* @@ -218,7 +139,6 @@ static void perf_hash(unsigned int method, unsigned int rounds) * size -> tablesize numentries * * perfhashmap method rounds -> test hashmap.[ch] performance - * perfhash method rounds -> test hash.[ch] performance */ int main(int argc, char *argv[]) { @@ -324,10 +244,6 @@ int main(int argc, char *argv[]) perf_hashmap(atoi(p1), atoi(p2)); - } else if (!strcmp("perfhash", cmd) && l1 && l2) { - - perf_hash(atoi(p1), atoi(p2)); - } else { printf("Unknown command %s\n", cmd); -- cgit v1.2.3 From e837af61345344448346afef79d53a5ff95bb0b3 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:23:42 +0100 Subject: fix 'git update-index --verbose --again' output 'git update-index --verbose' consistently reports paths relative to the work-tree root. The only exception is the '--again' option, which reports paths relative to the current working directory. Change do_reupdate to use non-prefixed paths. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- builtin/update-index.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index e3a10d706d..d180d80c04 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -579,7 +579,7 @@ static int do_reupdate(int ac, const char **av, * or worse yet 'allow_replace', active_nr may decrease. */ save_nr = active_nr; - update_one(ce->name + prefix_length, prefix, prefix_length); + update_one(ce->name, NULL, 0); if (save_nr != active_nr) goto redo; } -- cgit v1.2.3 From 6bb69077b74b792ddef66e372c86e66dea8e449b Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:24:08 +0100 Subject: builtin/update-index.c: cleanup update_one do_reupdate calls update_one with a cache_entry.name, there's no need for the extra sanitation / normalization that happens in prefix_path. cmd_update_index calls update_one with an already prefixed path, no need to prefix_path twice. Remove the extra prefix_path from update_one. Also remove the now unused 'prefix' and 'prefix_length' parameters. As of d089eba "setup: sanitize absolute and funny paths in get_pathspec()", prefix_path uncoditionally returns a copy, even if the passed in path isn't changed. Lets unconditionally free() the result. Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- builtin/update-index.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/builtin/update-index.c b/builtin/update-index.c index d180d80c04..c8f0d5f47c 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -274,36 +274,32 @@ static void chmod_path(int flip, const char *path) die("git update-index: cannot chmod %cx '%s'", flip, path); } -static void update_one(const char *path, const char *prefix, int prefix_length) +static void update_one(const char *path) { - const char *p = prefix_path(prefix, prefix_length, path); - if (!verify_path(p)) { + if (!verify_path(path)) { fprintf(stderr, "Ignoring path %s\n", path); - goto free_return; + return; } if (mark_valid_only) { - if (mark_ce_flags(p, CE_VALID, mark_valid_only == MARK_FLAG)) + if (mark_ce_flags(path, CE_VALID, mark_valid_only == MARK_FLAG)) die("Unable to mark file %s", path); - goto free_return; + return; } if (mark_skip_worktree_only) { - if (mark_ce_flags(p, CE_SKIP_WORKTREE, mark_skip_worktree_only == MARK_FLAG)) + if (mark_ce_flags(path, CE_SKIP_WORKTREE, mark_skip_worktree_only == MARK_FLAG)) die("Unable to mark file %s", path); - goto free_return; + return; } if (force_remove) { - if (remove_file_from_cache(p)) + if (remove_file_from_cache(path)) die("git update-index: unable to remove %s", path); report("remove '%s'", path); - goto free_return; + return; } - if (process_path(p)) + if (process_path(path)) die("Unable to process path %s", path); report("add '%s'", path); - free_return: - if (p < path || p > path + strlen(path)) - free((char *)p); } static void read_index_info(int line_termination) @@ -579,7 +575,7 @@ static int do_reupdate(int ac, const char **av, * or worse yet 'allow_replace', active_nr may decrease. */ save_nr = active_nr; - update_one(ce->name, NULL, 0); + update_one(ce->name); if (save_nr != active_nr) goto redo; } @@ -836,11 +832,10 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) setup_work_tree(); p = prefix_path(prefix, prefix_length, path); - update_one(p, NULL, 0); + update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); - if (p < path || p > path + strlen(path)) - free((char *)p); + free((char *)p); ctx.argc--; ctx.argv++; break; @@ -879,11 +874,10 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) strbuf_swap(&buf, &nbuf); } p = prefix_path(prefix, prefix_length, buf.buf); - update_one(p, NULL, 0); + update_one(p); if (set_executable_bit) chmod_path(set_executable_bit, p); - if (p < buf.buf || p > buf.buf + buf.len) - free((char *)p); + free((char *)p); } strbuf_release(&nbuf); strbuf_release(&buf); -- cgit v1.2.3 From 5699d17ee0949e6c01311a03dcfce485fcdd9b1a Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Thu, 14 Nov 2013 20:24:37 +0100 Subject: read-cache.c: fix memory leaks caused by removed cache entries When cache_entry structs are removed from index_state.cache, they are not properly freed. Freeing those entries wasn't possible before because we couldn't remove them from index_state.name_hash. Now that we _do_ remove the entries from name_hash, we can also free them. Add 'free(cache_entry)' to all call sites of name-hash.c::remove_name_hash in read-cache.c (we could free() directly in remove_name_hash(), but name-hash.c isn't concerned with cache_entry allocation at all). Accessing a cache_entry after removing it from the index is now no longer allowed, as the memory has been freed. The following functions need minor fixes (typically by copying ce->name before use): - builtin/rm.c::cmd_rm - builtin/update-index.c::do_reupdate - read-cache.c::read_index_unmerged - resolve-undo.c::unmerge_index_entry_at Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- builtin/rm.c | 2 +- builtin/update-index.c | 5 ++++- read-cache.c | 8 ++++++-- resolve-undo.c | 7 +++++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/builtin/rm.c b/builtin/rm.c index 3a0e0eaab7..171f37c1cc 100644 --- a/builtin/rm.c +++ b/builtin/rm.c @@ -311,7 +311,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix) if (!match_pathspec_depth(&pathspec, ce->name, ce_namelen(ce), 0, seen)) continue; ALLOC_GROW(list.entry, list.nr + 1, list.alloc); - list.entry[list.nr].name = ce->name; + list.entry[list.nr].name = xstrdup(ce->name); list.entry[list.nr].is_submodule = S_ISGITLINK(ce->ce_mode); if (list.entry[list.nr++].is_submodule && !is_staging_gitmodules_ok()) diff --git a/builtin/update-index.c b/builtin/update-index.c index c8f0d5f47c..00313f373a 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -559,6 +559,7 @@ static int do_reupdate(int ac, const char **av, const struct cache_entry *ce = active_cache[pos]; struct cache_entry *old = NULL; int save_nr; + char *path; if (ce_stage(ce) || !ce_path_match(ce, &pathspec)) continue; @@ -575,7 +576,9 @@ static int do_reupdate(int ac, const char **av, * or worse yet 'allow_replace', active_nr may decrease. */ save_nr = active_nr; - update_one(ce->name); + path = xstrdup(ce->name); + update_one(path); + free(path); if (save_nr != active_nr) goto redo; } diff --git a/read-cache.c b/read-cache.c index 00af9addd9..3f735f3c8e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -47,6 +47,7 @@ static void replace_index_entry(struct index_state *istate, int nr, struct cache struct cache_entry *old = istate->cache[nr]; remove_name_hash(istate, old); + free(old); set_index_entry(istate, nr, ce); istate->cache_changed = 1; } @@ -478,6 +479,7 @@ int remove_index_entry_at(struct index_state *istate, int pos) record_resolve_undo(istate, ce); remove_name_hash(istate, ce); + free(ce); istate->cache_changed = 1; istate->cache_nr--; if (pos >= istate->cache_nr) @@ -499,8 +501,10 @@ void remove_marked_cache_entries(struct index_state *istate) unsigned int i, j; for (i = j = 0; i < istate->cache_nr; i++) { - if (ce_array[i]->ce_flags & CE_REMOVE) + if (ce_array[i]->ce_flags & CE_REMOVE) { remove_name_hash(istate, ce_array[i]); + free(ce_array[i]); + } else ce_array[j++] = ce_array[i]; } @@ -1894,7 +1898,7 @@ int read_index_unmerged(struct index_state *istate) new_ce->ce_mode = ce->ce_mode; if (add_index_entry(istate, new_ce, 0)) return error("%s: cannot drop to stage #0", - ce->name); + new_ce->name); i = index_name_pos(istate, new_ce->name, len); } return unmerged; diff --git a/resolve-undo.c b/resolve-undo.c index c09b00664e..49ebaaf8d8 100644 --- a/resolve-undo.c +++ b/resolve-undo.c @@ -119,6 +119,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos) struct string_list_item *item; struct resolve_undo_info *ru; int i, err = 0, matched; + char *name; if (!istate->resolve_undo) return pos; @@ -138,20 +139,22 @@ int unmerge_index_entry_at(struct index_state *istate, int pos) if (!ru) return pos; matched = ce->ce_flags & CE_MATCHED; + name = xstrdup(ce->name); remove_index_entry_at(istate, pos); for (i = 0; i < 3; i++) { struct cache_entry *nce; if (!ru->mode[i]) continue; nce = make_cache_entry(ru->mode[i], ru->sha1[i], - ce->name, i + 1, 0); + name, i + 1, 0); if (matched) nce->ce_flags |= CE_MATCHED; if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) { err = 1; - error("cannot unmerge '%s'", ce->name); + error("cannot unmerge '%s'", name); } } + free(name); if (err) return pos; free(ru); -- cgit v1.2.3 From f7988c15ad6801383384ee56970a52f4e485f0cb Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Dec 2013 18:05:26 -0800 Subject: .gitignore: test-hashmap is a generated file Prevent the "test-hashmap" program from being accidentally tracked with "git add" or cluttering "git status" output. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 66199edd4a..cb1db5492c 100644 --- a/.gitignore +++ b/.gitignore @@ -185,6 +185,7 @@ /test-dump-cache-tree /test-scrap-cache-tree /test-genrandom +/test-hashmap /test-index-version /test-line-buffer /test-match-trees -- cgit v1.2.3 From e8fa59b9081d90299aa5b41412bf93e8856a612b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 13 Dec 2013 18:06:40 -0800 Subject: test-hashmap.c: drop unnecessary #includes Per Documentation/CodingGuidelines most C files in git start with a #include of git-compat-util.h or another header file that includes it, such as cache.h or builtin.h. This file doesn't need anything beyond "git-compat-util.h", so use that. Remove a #include of the system header since it is already included by "git-compat-util.h". Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- test-hashmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test-hashmap.c b/test-hashmap.c index 7e86f886d8..f5183fb9e8 100644 --- a/test-hashmap.c +++ b/test-hashmap.c @@ -1,6 +1,5 @@ -#include "cache.h" +#include "git-compat-util.h" #include "hashmap.h" -#include struct test_entry { -- cgit v1.2.3 From b6aad994737458177ddf68939719f90e7909f656 Mon Sep 17 00:00:00 2001 From: Karsten Blees Date: Wed, 18 Dec 2013 14:41:27 +0100 Subject: hashmap.h: use 'unsigned int' for hash-codes everywhere Signed-off-by: Karsten Blees Signed-off-by: Junio C Hamano --- Documentation/technical/api-hashmap.txt | 2 +- hashmap.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/technical/api-hashmap.txt b/Documentation/technical/api-hashmap.txt index b2280f1b0d..42ca2347ed 100644 --- a/Documentation/technical/api-hashmap.txt +++ b/Documentation/technical/api-hashmap.txt @@ -80,7 +80,7 @@ prevent expensive resizing. If 0, the table is dynamically resized. If `free_entries` is true, each hashmap_entry in the map is freed as well (using stdlib's free()). -`void hashmap_entry_init(void *entry, int hash)`:: +`void hashmap_entry_init(void *entry, unsigned int hash)`:: Initializes a hashmap_entry structure. + diff --git a/hashmap.h b/hashmap.h index f5b3b61073..a816ad47b1 100644 --- a/hashmap.h +++ b/hashmap.h @@ -43,7 +43,7 @@ extern void hashmap_free(struct hashmap *map, int free_entries); /* hashmap_entry functions */ -static inline void hashmap_entry_init(void *entry, int hash) +static inline void hashmap_entry_init(void *entry, unsigned int hash) { struct hashmap_entry *e = entry; e->hash = hash; -- cgit v1.2.3 From 7b359ea6b3333a87fd3fa8b84913f2b75ed244ad Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Thu, 2 Jan 2014 16:57:12 -0500 Subject: name-hash: retire unused index_name_exists() db5360f3f496 (name-hash: refactor polymorphic index_name_exists(); 2013-09-17) split index_name_exists() into index_file_exists() and index_dir_exists() but retained index_name_exists() as a thin wrapper to avoid disturbing possible in-flight topics. Since this change landed in 'master' some time ago and there are no in-flight topics referencing index_name_exists(), retire it. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- cache.h | 2 -- name-hash.c | 9 +-------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/cache.h b/cache.h index 407145c364..353c687c19 100644 --- a/cache.h +++ b/cache.h @@ -316,7 +316,6 @@ extern void free_name_hash(struct index_state *istate); #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options)) #define cache_dir_exists(name, namelen) index_dir_exists(&the_index, (name), (namelen)) #define cache_file_exists(name, namelen, igncase) index_file_exists(&the_index, (name), (namelen), (igncase)) -#define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase)) #define cache_name_is_other(name, namelen) index_name_is_other(&the_index, (name), (namelen)) #define resolve_undo_clear() resolve_undo_clear_index(&the_index) #define unmerge_cache_entry_at(at) unmerge_index_entry_at(&the_index, at) @@ -467,7 +466,6 @@ extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); extern struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen); extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase); -extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase); extern int index_name_pos(const struct index_state *, const char *name, int namelen); #define ADD_CACHE_OK_TO_ADD 1 /* Ok to add */ #define ADD_CACHE_OK_TO_REPLACE 2 /* Ok to replace file/directory */ diff --git a/name-hash.c b/name-hash.c index 9a3bd3f9a6..97444d0201 100644 --- a/name-hash.c +++ b/name-hash.c @@ -115,7 +115,7 @@ static int cache_entry_cmp(const struct cache_entry *ce1, { /* * For remove_name_hash, find the exact entry (pointer equality); for - * index_name_exists, find all entries with matching hash code and + * index_file_exists, find all entries with matching hash code and * decide whether the entry matches in same_name. */ return remove ? !(ce1 == ce2) : 0; @@ -227,13 +227,6 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na return NULL; } -struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase) -{ - if (namelen > 0 && name[namelen - 1] == '/') - return index_dir_exists(istate, name, namelen - 1); - return index_file_exists(istate, name, namelen, icase); -} - void free_name_hash(struct index_state *istate) { if (!istate->name_hash_initialized) -- cgit v1.2.3