diff options
Diffstat (limited to 'sha1_file.c')
-rw-r--r-- | sha1_file.c | 580 |
1 files changed, 391 insertions, 189 deletions
diff --git a/sha1_file.c b/sha1_file.c index 9152974642..b114cc922d 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -7,6 +7,7 @@ * creation etc. */ #include "cache.h" +#include "string-list.h" #include "delta.h" #include "pack.h" #include "blob.h" @@ -20,6 +21,7 @@ #include "sha1-lookup.h" #include "bulk-checkin.h" #include "streaming.h" +#include "dir.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -122,8 +124,13 @@ int safe_create_leading_directories(char *path) } } else if (mkdir(path, 0777)) { - *pos = '/'; - return -1; + if (errno == EEXIST && + !stat(path, &st) && S_ISDIR(st.st_mode)) { + ; /* somebody created it since we checked */ + } else { + *pos = '/'; + return -1; + } } else if (adjust_shared_perm(path)) { *pos = '/'; @@ -246,7 +253,7 @@ static int git_open_noatime(const char *name); * SHA1, an extra slash for the first level indirection, and the * terminating NUL. */ -static int link_alt_odb_entry(const char * entry, int len, const char * relative_base, int depth) +static int link_alt_odb_entry(const char *entry, const char *relative_base, int depth) { const char *objdir = get_object_directory(); struct alternate_object_database *ent; @@ -258,7 +265,7 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative strbuf_addstr(&pathbuf, real_path(relative_base)); strbuf_addch(&pathbuf, '/'); } - strbuf_add(&pathbuf, entry, len); + strbuf_addstr(&pathbuf, entry); normalize_path_copy(pathbuf.buf, pathbuf.buf); @@ -316,10 +323,12 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative return 0; } -static void link_alt_odb_entries(const char *alt, const char *ep, int sep, +static void link_alt_odb_entries(const char *alt, int len, int sep, const char *relative_base, int depth) { - const char *cp, *last; + struct string_list entries = STRING_LIST_INIT_NODUP; + char *alt_copy; + int i; if (depth > 5) { error("%s: ignoring alternate object stores, nesting too deep.", @@ -327,30 +336,21 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep, return; } - last = alt; - while (last < ep) { - cp = last; - if (cp < ep && *cp == '#') { - while (cp < ep && *cp != sep) - cp++; - last = cp + 1; + alt_copy = xmemdupz(alt, len); + string_list_split_in_place(&entries, alt_copy, sep, -1); + for (i = 0; i < entries.nr; i++) { + const char *entry = entries.items[i].string; + if (entry[0] == '\0' || entry[0] == '#') continue; + if (!is_absolute_path(entry) && depth) { + error("%s: ignoring relative alternate object store %s", + relative_base, entry); + } else { + link_alt_odb_entry(entry, relative_base, depth); } - while (cp < ep && *cp != sep) - cp++; - if (last != cp) { - if (!is_absolute_path(last) && depth) { - error("%s: ignoring relative alternate object store %s", - relative_base, last); - } else { - link_alt_odb_entry(last, cp - last, - relative_base, depth); - } - } - while (cp < ep && *cp == sep) - cp++; - last = cp; } + string_list_clear(&entries, 0); + free(alt_copy); } void read_info_alternates(const char * relative_base, int depth) @@ -377,7 +377,7 @@ void read_info_alternates(const char * relative_base, int depth) map = xmmap(NULL, mapsz, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - link_alt_odb_entries(map, map + mapsz, '\n', relative_base, depth); + link_alt_odb_entries(map, mapsz, '\n', relative_base, depth); munmap(map, mapsz); } @@ -391,7 +391,7 @@ void add_to_alternates_file(const char *reference) if (commit_lock_file(lock)) die("could not close alternates file"); if (alt_odb_tail) - link_alt_odb_entries(alt, alt + strlen(alt), '\n', NULL, 0); + link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0); } void foreach_alt_odb(alt_odb_fn fn, void *cb) @@ -415,7 +415,7 @@ void prepare_alt_odb(void) if (!alt) alt = ""; alt_odb_tail = &alt_odb_list; - link_alt_odb_entries(alt, alt + strlen(alt), PATH_SEP, NULL, 0); + link_alt_odb_entries(alt, strlen(alt), PATH_SEP, NULL, 0); read_info_alternates(get_object_directory(), 0); } @@ -1006,6 +1006,63 @@ void install_packed_git(struct packed_git *pack) packed_git = pack; } +void (*report_garbage)(const char *desc, const char *path); + +static void report_helper(const struct string_list *list, + int seen_bits, int first, int last) +{ + const char *msg; + switch (seen_bits) { + case 0: + msg = "no corresponding .idx nor .pack"; + break; + case 1: + msg = "no corresponding .idx"; + break; + case 2: + msg = "no corresponding .pack"; + break; + default: + return; + } + for (; first < last; first++) + report_garbage(msg, list->items[first].string); +} + +static void report_pack_garbage(struct string_list *list) +{ + int i, baselen = -1, first = 0, seen_bits = 0; + + if (!report_garbage) + return; + + sort_string_list(list); + + for (i = 0; i < list->nr; i++) { + const char *path = list->items[i].string; + if (baselen != -1 && + strncmp(path, list->items[first].string, baselen)) { + report_helper(list, seen_bits, first, i); + baselen = -1; + seen_bits = 0; + } + if (baselen == -1) { + const char *dot = strrchr(path, '.'); + if (!dot) { + report_garbage("garbage found", path); + continue; + } + baselen = dot - path + 1; + first = i; + } + if (!strcmp(path + baselen, "pack")) + seen_bits |= 1; + else if (!strcmp(path + baselen, "idx")) + seen_bits |= 2; + } + report_helper(list, seen_bits, first, list->nr); +} + static void prepare_packed_git_one(char *objdir, int local) { /* Ensure that this buffer is large enough so that we can @@ -1015,6 +1072,7 @@ static void prepare_packed_git_one(char *objdir, int local) int len; DIR *dir; struct dirent *de; + struct string_list garbage = STRING_LIST_INIT_DUP; sprintf(path, "%s/pack", objdir); len = strlen(path); @@ -1030,29 +1088,49 @@ static void prepare_packed_git_one(char *objdir, int local) int namelen = strlen(de->d_name); struct packed_git *p; - if (!has_extension(de->d_name, ".idx")) + if (len + namelen + 1 > sizeof(path)) { + if (report_garbage) { + struct strbuf sb = STRBUF_INIT; + strbuf_addf(&sb, "%.*s/%s", len - 1, path, de->d_name); + report_garbage("path too long", sb.buf); + strbuf_release(&sb); + } continue; + } - if (len + namelen + 1 > sizeof(path)) + if (is_dot_or_dotdot(de->d_name)) continue; - /* Don't reopen a pack we already have. */ strcpy(path + len, de->d_name); - for (p = packed_git; p; p = p->next) { - if (!memcmp(path, p->pack_name, len + namelen - 4)) - break; + + if (has_extension(de->d_name, ".idx")) { + /* Don't reopen a pack we already have. */ + for (p = packed_git; p; p = p->next) { + if (!memcmp(path, p->pack_name, len + namelen - 4)) + break; + } + if (p == NULL && + /* + * See if it really is a valid .idx file with + * corresponding .pack file that we can map. + */ + (p = add_packed_git(path, len + namelen, local)) != NULL) + install_packed_git(p); } - if (p) - continue; - /* See if it really is a valid .idx file with corresponding - * .pack file that we can map. - */ - p = add_packed_git(path, len + namelen, local); - if (!p) + + if (!report_garbage) continue; - install_packed_git(p); + + if (has_extension(de->d_name, ".idx") || + has_extension(de->d_name, ".pack") || + has_extension(de->d_name, ".keep")) + string_list_append(&garbage, path); + else + report_garbage("garbage found", path); } closedir(dir); + report_pack_garbage(&garbage); + string_list_clear(&garbage, 0); } static int sort_pack(const void *a_, const void *b_) @@ -1193,6 +1271,10 @@ int check_sha1_signature(const unsigned char *sha1, void *map, char buf[1024 * 16]; ssize_t readlen = read_istream(st, buf, sizeof(buf)); + if (readlen < 0) { + close_istream(st); + return -1; + } if (!readlen) break; git_SHA1_Update(&c, buf, readlen); @@ -1566,50 +1648,6 @@ static off_t get_delta_base(struct packed_git *p, return base_offset; } -/* forward declaration for a mutually recursive function */ -static int packed_object_info(struct packed_git *p, off_t offset, - unsigned long *sizep, int *rtype); - -static int packed_delta_info(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - enum object_type type, - off_t obj_offset, - unsigned long *sizep) -{ - off_t base_offset; - - base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); - if (!base_offset) - return OBJ_BAD; - type = packed_object_info(p, base_offset, NULL, NULL); - if (type <= OBJ_NONE) { - struct revindex_entry *revidx; - const unsigned char *base_sha1; - revidx = find_pack_revindex(p, base_offset); - if (!revidx) - return OBJ_BAD; - base_sha1 = nth_packed_object_sha1(p, revidx->nr); - mark_bad_packed_object(p, base_sha1); - type = sha1_object_info(base_sha1, NULL); - if (type <= OBJ_NONE) - return OBJ_BAD; - } - - /* We choose to only get the type of the base object and - * ignore potentially corrupt pack file that expects the delta - * based on a base with a wrong size. This saves tons of - * inflate() calls. - */ - if (sizep) { - *sizep = get_size_from_delta(p, w_curs, curpos); - if (*sizep == 0) - type = OBJ_BAD; - } - - return type; -} - int unpack_object_header(struct packed_git *p, struct pack_window **w_curs, off_t *curpos, @@ -1636,6 +1674,25 @@ int unpack_object_header(struct packed_git *p, return type; } +static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset) +{ + int type; + struct revindex_entry *revidx; + const unsigned char *sha1; + revidx = find_pack_revindex(p, obj_offset); + if (!revidx) + return OBJ_BAD; + sha1 = nth_packed_object_sha1(p, revidx->nr); + mark_bad_packed_object(p, sha1); + type = sha1_object_info(sha1, NULL); + if (type <= OBJ_NONE) + return OBJ_BAD; + return type; +} + + +#define POI_STACK_PREALLOC 64 + static int packed_object_info(struct packed_git *p, off_t obj_offset, unsigned long *sizep, int *rtype) { @@ -1643,31 +1700,89 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset, unsigned long size; off_t curpos = obj_offset; enum object_type type; + off_t small_poi_stack[POI_STACK_PREALLOC]; + off_t *poi_stack = small_poi_stack; + int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC; type = unpack_object_header(p, &w_curs, &curpos, &size); + if (rtype) *rtype = type; /* representation type */ + if (sizep) { + if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { + off_t tmp_pos = curpos; + off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos, + type, obj_offset); + if (!base_offset) { + type = OBJ_BAD; + goto out; + } + *sizep = get_size_from_delta(p, &w_curs, tmp_pos); + if (*sizep == 0) { + type = OBJ_BAD; + goto out; + } + } else { + *sizep = size; + } + } + + while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { + off_t base_offset; + /* Push the object we're going to leave behind */ + if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) { + poi_stack_alloc = alloc_nr(poi_stack_nr); + poi_stack = xmalloc(sizeof(off_t)*poi_stack_alloc); + memcpy(poi_stack, small_poi_stack, sizeof(off_t)*poi_stack_nr); + } else { + ALLOC_GROW(poi_stack, poi_stack_nr+1, poi_stack_alloc); + } + poi_stack[poi_stack_nr++] = obj_offset; + /* If parsing the base offset fails, just unwind */ + base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); + if (!base_offset) + goto unwind; + curpos = obj_offset = base_offset; + type = unpack_object_header(p, &w_curs, &curpos, &size); + if (type <= OBJ_NONE) { + /* If getting the base itself fails, we first + * retry the base, otherwise unwind */ + type = retry_bad_packed_offset(p, base_offset); + if (type > OBJ_NONE) + goto out; + goto unwind; + } + } + switch (type) { - case OBJ_OFS_DELTA: - case OBJ_REF_DELTA: - type = packed_delta_info(p, &w_curs, curpos, - type, obj_offset, sizep); - break; + case OBJ_BAD: case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - if (sizep) - *sizep = size; break; default: error("unknown object type %i at offset %"PRIuMAX" in %s", type, (uintmax_t)obj_offset, p->pack_name); type = OBJ_BAD; } + +out: + if (poi_stack != small_poi_stack) + free(poi_stack); unuse_pack(&w_curs); return type; + +unwind: + while (poi_stack_nr) { + obj_offset = poi_stack[--poi_stack_nr]; + type = retry_bad_packed_offset(p, obj_offset); + if (type > OBJ_NONE) + goto out; + } + type = OBJ_BAD; + goto out; } static void *unpack_compressed_entry(struct packed_git *p, @@ -1729,32 +1844,51 @@ static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset) return hash % MAX_DELTA_CACHE; } -static int in_delta_base_cache(struct packed_git *p, off_t base_offset) +static struct delta_base_cache_entry * +get_delta_base_cache_entry(struct packed_git *p, off_t base_offset) { unsigned long hash = pack_entry_hash(p, base_offset); - struct delta_base_cache_entry *ent = delta_base_cache + hash; + return delta_base_cache + hash; +} + +static int eq_delta_base_cache_entry(struct delta_base_cache_entry *ent, + struct packed_git *p, off_t base_offset) +{ return (ent->data && ent->p == p && ent->base_offset == base_offset); } +static int in_delta_base_cache(struct packed_git *p, off_t base_offset) +{ + struct delta_base_cache_entry *ent; + ent = get_delta_base_cache_entry(p, base_offset); + return eq_delta_base_cache_entry(ent, p, base_offset); +} + +static void clear_delta_base_cache_entry(struct delta_base_cache_entry *ent) +{ + ent->data = NULL; + ent->lru.next->prev = ent->lru.prev; + ent->lru.prev->next = ent->lru.next; + delta_base_cached -= ent->size; +} + static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, unsigned long *base_size, enum object_type *type, int keep_cache) { + struct delta_base_cache_entry *ent; void *ret; - unsigned long hash = pack_entry_hash(p, base_offset); - struct delta_base_cache_entry *ent = delta_base_cache + hash; - ret = ent->data; - if (!ret || ent->p != p || ent->base_offset != base_offset) + ent = get_delta_base_cache_entry(p, base_offset); + + if (!eq_delta_base_cache_entry(ent, p, base_offset)) return unpack_entry(p, base_offset, type, base_size); - if (!keep_cache) { - ent->data = NULL; - ent->lru.next->prev = ent->lru.prev; - ent->lru.prev->next = ent->lru.next; - delta_base_cached -= ent->size; - } else { + ret = ent->data; + + if (!keep_cache) + clear_delta_base_cache_entry(ent); + else ret = xmemdupz(ent->data, ent->size); - } *type = ent->type; *base_size = ent->size; return ret; @@ -1818,68 +1952,6 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, static void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size); -static void *unpack_delta_entry(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - unsigned long delta_size, - off_t obj_offset, - enum object_type *type, - unsigned long *sizep) -{ - void *delta_data, *result, *base; - unsigned long base_size; - off_t base_offset; - - base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset); - if (!base_offset) { - error("failed to validate delta base reference " - "at offset %"PRIuMAX" from %s", - (uintmax_t)curpos, p->pack_name); - return NULL; - } - unuse_pack(w_curs); - base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0); - if (!base) { - /* - * We're probably in deep shit, but let's try to fetch - * the required base anyway from another pack or loose. - * This is costly but should happen only in the presence - * of a corrupted pack, and is better than failing outright. - */ - struct revindex_entry *revidx; - const unsigned char *base_sha1; - revidx = find_pack_revindex(p, base_offset); - if (!revidx) - return NULL; - base_sha1 = nth_packed_object_sha1(p, revidx->nr); - error("failed to read delta base object %s" - " at offset %"PRIuMAX" from %s", - sha1_to_hex(base_sha1), (uintmax_t)base_offset, - p->pack_name); - mark_bad_packed_object(p, base_sha1); - base = read_object(base_sha1, type, &base_size); - if (!base) - return NULL; - } - - delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size); - if (!delta_data) { - error("failed to unpack compressed delta " - "at offset %"PRIuMAX" from %s", - (uintmax_t)curpos, p->pack_name); - free(base); - return NULL; - } - result = patch_delta(base, base_size, - delta_data, delta_size, - sizep); - if (!result) - die("failed to apply delta"); - free(delta_data); - add_delta_base_cache(p, base_offset, base, base_size, *type); - return result; -} - static void write_pack_access_log(struct packed_git *p, off_t obj_offset) { static FILE *log_file; @@ -1900,48 +1972,178 @@ static void write_pack_access_log(struct packed_git *p, off_t obj_offset) int do_check_packed_object_crc; +#define UNPACK_ENTRY_STACK_PREALLOC 64 +struct unpack_entry_stack_ent { + off_t obj_offset; + off_t curpos; + unsigned long size; +}; + void *unpack_entry(struct packed_git *p, off_t obj_offset, - enum object_type *type, unsigned long *sizep) + enum object_type *final_type, unsigned long *final_size) { struct pack_window *w_curs = NULL; off_t curpos = obj_offset; - void *data; + void *data = NULL; + unsigned long size; + enum object_type type; + struct unpack_entry_stack_ent small_delta_stack[UNPACK_ENTRY_STACK_PREALLOC]; + struct unpack_entry_stack_ent *delta_stack = small_delta_stack; + int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC; + int base_from_cache = 0; if (log_pack_access) write_pack_access_log(p, obj_offset); - if (do_check_packed_object_crc && p->index_version > 1) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - unsigned long len = revidx[1].offset - obj_offset; - if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { - const unsigned char *sha1 = - nth_packed_object_sha1(p, revidx->nr); - error("bad packed object CRC for %s", - sha1_to_hex(sha1)); - mark_bad_packed_object(p, sha1); - unuse_pack(&w_curs); - return NULL; + /* PHASE 1: drill down to the innermost base object */ + for (;;) { + off_t base_offset; + int i; + struct delta_base_cache_entry *ent; + + if (do_check_packed_object_crc && p->index_version > 1) { + struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); + unsigned long len = revidx[1].offset - obj_offset; + if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { + const unsigned char *sha1 = + nth_packed_object_sha1(p, revidx->nr); + error("bad packed object CRC for %s", + sha1_to_hex(sha1)); + mark_bad_packed_object(p, sha1); + unuse_pack(&w_curs); + return NULL; + } + } + + ent = get_delta_base_cache_entry(p, curpos); + if (eq_delta_base_cache_entry(ent, p, curpos)) { + type = ent->type; + data = ent->data; + size = ent->size; + clear_delta_base_cache_entry(ent); + base_from_cache = 1; + break; } + + type = unpack_object_header(p, &w_curs, &curpos, &size); + if (type != OBJ_OFS_DELTA && type != OBJ_REF_DELTA) + break; + + base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); + if (!base_offset) { + error("failed to validate delta base reference " + "at offset %"PRIuMAX" from %s", + (uintmax_t)curpos, p->pack_name); + /* bail to phase 2, in hopes of recovery */ + data = NULL; + break; + } + + /* push object, proceed to base */ + if (delta_stack_nr >= delta_stack_alloc + && delta_stack == small_delta_stack) { + delta_stack_alloc = alloc_nr(delta_stack_nr); + delta_stack = xmalloc(sizeof(*delta_stack)*delta_stack_alloc); + memcpy(delta_stack, small_delta_stack, + sizeof(*delta_stack)*delta_stack_nr); + } else { + ALLOC_GROW(delta_stack, delta_stack_nr+1, delta_stack_alloc); + } + i = delta_stack_nr++; + delta_stack[i].obj_offset = obj_offset; + delta_stack[i].curpos = curpos; + delta_stack[i].size = size; + + curpos = obj_offset = base_offset; } - *type = unpack_object_header(p, &w_curs, &curpos, sizep); - switch (*type) { + /* PHASE 2: handle the base */ + switch (type) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - data = unpack_delta_entry(p, &w_curs, curpos, *sizep, - obj_offset, type, sizep); + if (data) + die("BUG in unpack_entry: left loop at a valid delta"); break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: - data = unpack_compressed_entry(p, &w_curs, curpos, *sizep); + if (!base_from_cache) + data = unpack_compressed_entry(p, &w_curs, curpos, size); break; default: data = NULL; error("unknown object type %i at offset %"PRIuMAX" in %s", - *type, (uintmax_t)obj_offset, p->pack_name); + type, (uintmax_t)obj_offset, p->pack_name); } + + /* PHASE 3: apply deltas in order */ + + /* invariants: + * 'data' holds the base data, or NULL if there was corruption + */ + while (delta_stack_nr) { + void *delta_data; + void *base = data; + unsigned long delta_size, base_size = size; + int i; + + data = NULL; + + if (base) + add_delta_base_cache(p, obj_offset, base, base_size, type); + + if (!base) { + /* + * We're probably in deep shit, but let's try to fetch + * the required base anyway from another pack or loose. + * This is costly but should happen only in the presence + * of a corrupted pack, and is better than failing outright. + */ + struct revindex_entry *revidx; + const unsigned char *base_sha1; + revidx = find_pack_revindex(p, obj_offset); + if (revidx) { + base_sha1 = nth_packed_object_sha1(p, revidx->nr); + error("failed to read delta base object %s" + " at offset %"PRIuMAX" from %s", + sha1_to_hex(base_sha1), (uintmax_t)obj_offset, + p->pack_name); + mark_bad_packed_object(p, base_sha1); + base = read_object(base_sha1, &type, &base_size); + } + } + + i = --delta_stack_nr; + obj_offset = delta_stack[i].obj_offset; + curpos = delta_stack[i].curpos; + delta_size = delta_stack[i].size; + + if (!base) + continue; + + delta_data = unpack_compressed_entry(p, &w_curs, curpos, delta_size); + + if (!delta_data) { + error("failed to unpack compressed delta " + "at offset %"PRIuMAX" from %s", + (uintmax_t)curpos, p->pack_name); + data = NULL; + continue; + } + + data = patch_delta(base, base_size, + delta_data, delta_size, + &size); + if (!data) + die("failed to apply delta"); + + free(delta_data); + } + + *final_type = type; + *final_size = size; + unuse_pack(&w_curs); return data; } |