diff options
Diffstat (limited to 'sha1_file.c')
-rw-r--r-- | sha1_file.c | 399 |
1 files changed, 188 insertions, 211 deletions
diff --git a/sha1_file.c b/sha1_file.c index 32268d11d0..4f06a0e450 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -18,6 +18,7 @@ #include "refs.h" #include "pack-revindex.h" #include "sha1-lookup.h" +#include "bulk-checkin.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -53,6 +54,8 @@ static struct cached_object empty_tree = { 0 }; +static struct packed_git *last_found_pack; + static struct cached_object *find_cached_object(const unsigned char *sha1) { int i; @@ -248,27 +251,30 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative const char *objdir = get_object_directory(); struct alternate_object_database *ent; struct alternate_object_database *alt; - /* 43 = 40-byte + 2 '/' + terminating NUL */ - int pfxlen = len; - int entlen = pfxlen + 43; - int base_len = -1; + int pfxlen, entlen; + struct strbuf pathbuf = STRBUF_INIT; if (!is_absolute_path(entry) && relative_base) { - /* Relative alt-odb */ - if (base_len < 0) - base_len = strlen(relative_base) + 1; - entlen += base_len; - pfxlen += base_len; + strbuf_addstr(&pathbuf, real_path(relative_base)); + strbuf_addch(&pathbuf, '/'); } - ent = xmalloc(sizeof(*ent) + entlen); + strbuf_add(&pathbuf, entry, len); - if (!is_absolute_path(entry) && relative_base) { - memcpy(ent->base, relative_base, base_len - 1); - ent->base[base_len - 1] = '/'; - memcpy(ent->base + base_len, entry, len); - } - else - memcpy(ent->base, entry, pfxlen); + normalize_path_copy(pathbuf.buf, pathbuf.buf); + + pfxlen = strlen(pathbuf.buf); + + /* + * The trailing slash after the directory name is given by + * this function at the end. Remove duplicates. + */ + while (pfxlen && pathbuf.buf[pfxlen-1] == '/') + pfxlen -= 1; + + entlen = pfxlen + 43; /* '/' + 2 hex + '/' + 38 hex + NUL */ + ent = xmalloc(sizeof(*ent) + entlen); + memcpy(ent->base, pathbuf.buf, pfxlen); + strbuf_release(&pathbuf); ent->name = ent->base + pfxlen + 1; ent->base[pfxlen + 3] = '/'; @@ -716,6 +722,8 @@ void free_pack_by_name(const char *pack_name) close_pack_index(p); free(p->bad_object_sha1); *pp = p->next; + if (last_found_pack == p) + last_found_pack = NULL; free(p); return; } @@ -1186,7 +1194,7 @@ static int open_sha1_file(const unsigned char *sha1) return -1; } -static void *map_sha1_file(const unsigned char *sha1, unsigned long *size) +void *map_sha1_file(const unsigned char *sha1, unsigned long *size) { void *map; int fd; @@ -1198,6 +1206,11 @@ static void *map_sha1_file(const unsigned char *sha1, unsigned long *size) if (!fstat(fd, &st)) { *size = xsize_t(st.st_size); + if (!*size) { + /* mmap() is forbidden on empty files */ + error("object file %s is empty", sha1_file_name(sha1)); + return NULL; + } map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); } close(fd); @@ -1217,14 +1230,34 @@ static int experimental_loose_object(unsigned char *map) unsigned int word; /* - * Is it a zlib-compressed buffer? If so, the first byte - * must be 0x78 (15-bit window size, deflated), and the - * first 16-bit word is evenly divisible by 31. If so, - * we are looking at the official format, not the experimental - * one. + * We must determine if the buffer contains the standard + * zlib-deflated stream or the experimental format based + * on the in-pack object format. Compare the header byte + * for each format: + * + * RFC1950 zlib w/ deflate : 0www1000 : 0 <= www <= 7 + * Experimental pack-based : Stttssss : ttt = 1,2,3,4 + * + * If bit 7 is clear and bits 0-3 equal 8, the buffer MUST be + * in standard loose-object format, UNLESS it is a Git-pack + * format object *exactly* 8 bytes in size when inflated. + * + * However, RFC1950 also specifies that the 1st 16-bit word + * must be divisible by 31 - this checksum tells us our buffer + * is in the standard format, giving a false positive only if + * the 1st word of the Git-pack format object happens to be + * divisible by 31, ie: + * ((byte0 * 256) + byte1) % 31 = 0 + * => 0ttt10000www1000 % 31 = 0 + * + * As it happens, this case can only arise for www=3 & ttt=1 + * - ie, a Commit object, which would have to be 8 bytes in + * size. As no Commit can be that small, we find that the + * combination of these two criteria (bitmask & checksum) + * can always correctly determine the buffer format. */ word = (map[0] << 8) + map[1]; - if (map[0] == 0x78 && !(word % 31)) + if ((map[0] & 0x8F) == 0x08 && !(word % 31)) return 0; else return 1; @@ -1244,7 +1277,8 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, while (c & 0x80) { if (len <= used || bitsizeof(long) <= shift) { error("bad object header"); - return 0; + size = used = 0; + break; } c = buf[used++]; size += (c & 0x7f) << shift; @@ -1254,7 +1288,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, return used; } -static int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz) +int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz) { unsigned long size, used; static const char valid_loose_object_type[8] = { @@ -1346,7 +1380,7 @@ static void *unpack_sha1_rest(git_zstream *stream, void *buffer, unsigned long s * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_sha1_header(const char *hdr, unsigned long *sizep) +int parse_sha1_header(const char *hdr, unsigned long *sizep) { char type[10]; int i; @@ -1485,7 +1519,7 @@ static off_t get_delta_base(struct packed_git *p, /* forward declaration for a mutually recursive function */ static int packed_object_info(struct packed_git *p, off_t offset, - unsigned long *sizep); + unsigned long *sizep, int *rtype); static int packed_delta_info(struct packed_git *p, struct pack_window **w_curs, @@ -1499,7 +1533,7 @@ static int packed_delta_info(struct packed_git *p, base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); if (!base_offset) return OBJ_BAD; - type = packed_object_info(p, base_offset, NULL); + type = packed_object_info(p, base_offset, NULL, NULL); if (type <= OBJ_NONE) { struct revindex_entry *revidx; const unsigned char *base_sha1; @@ -1527,10 +1561,10 @@ static int packed_delta_info(struct packed_git *p, return type; } -static int unpack_object_header(struct packed_git *p, - struct pack_window **w_curs, - off_t *curpos, - unsigned long *sizep) +int unpack_object_header(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + unsigned long *sizep) { unsigned char *base; unsigned long left; @@ -1553,63 +1587,8 @@ static int unpack_object_header(struct packed_git *p, return type; } -const char *packed_object_info_detail(struct packed_git *p, - off_t obj_offset, - unsigned long *size, - unsigned long *store_size, - unsigned int *delta_chain_length, - unsigned char *base_sha1) -{ - struct pack_window *w_curs = NULL; - off_t curpos; - unsigned long dummy; - unsigned char *next_sha1; - enum object_type type; - struct revindex_entry *revidx; - - *delta_chain_length = 0; - curpos = obj_offset; - type = unpack_object_header(p, &w_curs, &curpos, size); - - revidx = find_pack_revindex(p, obj_offset); - *store_size = revidx[1].offset - obj_offset; - - for (;;) { - switch (type) { - default: - die("pack %s contains unknown object type %d", - p->pack_name, type); - case OBJ_COMMIT: - case OBJ_TREE: - case OBJ_BLOB: - case OBJ_TAG: - unuse_pack(&w_curs); - return typename(type); - case OBJ_OFS_DELTA: - obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset); - if (!obj_offset) - die("pack %s contains bad delta base reference of type %s", - p->pack_name, typename(type)); - if (*delta_chain_length == 0) { - revidx = find_pack_revindex(p, obj_offset); - hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr)); - } - break; - case OBJ_REF_DELTA: - next_sha1 = use_pack(p, &w_curs, curpos, NULL); - if (*delta_chain_length == 0) - hashcpy(base_sha1, next_sha1); - obj_offset = find_pack_entry_one(next_sha1, p); - break; - } - (*delta_chain_length)++; - curpos = obj_offset; - type = unpack_object_header(p, &w_curs, &curpos, &dummy); - } -} - static int packed_object_info(struct packed_git *p, off_t obj_offset, - unsigned long *sizep) + unsigned long *sizep, int *rtype) { struct pack_window *w_curs = NULL; unsigned long size; @@ -1617,6 +1596,8 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset, enum object_type type; type = unpack_object_header(p, &w_curs, &curpos, &size); + if (rtype) + *rtype = type; /* representation type */ switch (type) { case OBJ_OFS_DELTA: @@ -1699,6 +1680,13 @@ static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset) return hash % MAX_DELTA_CACHE; } +static int in_delta_base_cache(struct packed_git *p, off_t base_offset) +{ + unsigned long hash = pack_entry_hash(p, base_offset); + struct delta_base_cache_entry *ent = delta_base_cache + hash; + return (ent->data && ent->p == p && ent->base_offset == base_offset); +} + static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, unsigned long *base_size, enum object_type *type, int keep_cache) { @@ -1843,6 +1831,24 @@ static void *unpack_delta_entry(struct packed_git *p, return result; } +static void write_pack_access_log(struct packed_git *p, off_t obj_offset) +{ + static FILE *log_file; + + if (!log_file) { + log_file = fopen(log_pack_access, "w"); + if (!log_file) { + error("cannot open pack access log '%s' for writing: %s", + log_pack_access, strerror(errno)); + log_pack_access = NULL; + return; + } + } + fprintf(log_file, "%s %"PRIuMAX"\n", + p->pack_name, (uintmax_t)obj_offset); + fflush(log_file); +} + int do_check_packed_object_crc; void *unpack_entry(struct packed_git *p, off_t obj_offset, @@ -1852,6 +1858,9 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, off_t curpos = obj_offset; void *data; + if (log_pack_access) + write_pack_access_log(p, obj_offset); + if (do_check_packed_object_crc && p->index_version > 1) { struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); unsigned long len = revidx[1].offset - obj_offset; @@ -1989,7 +1998,7 @@ off_t find_pack_entry_one(const unsigned char *sha1, return 0; } -static int is_pack_valid(struct packed_git *p) +int is_pack_valid(struct packed_git *p) { /* An already open pack is known to be valid. */ if (p->pack_fd != -1) @@ -2010,54 +2019,58 @@ static int is_pack_valid(struct packed_git *p) return !open_packed_git(p); } +static int fill_pack_entry(const unsigned char *sha1, + struct pack_entry *e, + struct packed_git *p) +{ + off_t offset; + + if (p->num_bad_objects) { + unsigned i; + for (i = 0; i < p->num_bad_objects; i++) + if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) + return 0; + } + + offset = find_pack_entry_one(sha1, p); + if (!offset) + return 0; + + /* + * We are about to tell the caller where they can locate the + * requested object. We better make sure the packfile is + * still here and can be accessed before supplying that + * answer, as it may have been deleted since the index was + * loaded! + */ + if (!is_pack_valid(p)) { + warning("packfile %s cannot be accessed", p->pack_name); + return 0; + } + e->offset = offset; + e->p = p; + hashcpy(e->sha1, sha1); + return 1; +} + static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) { - static struct packed_git *last_found = (void *)1; struct packed_git *p; - off_t offset; prepare_packed_git(); if (!packed_git) return 0; - p = (last_found == (void *)1) ? packed_git : last_found; - do { - if (p->num_bad_objects) { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) - goto next; - } + if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack)) + return 1; - offset = find_pack_entry_one(sha1, p); - if (offset) { - /* - * We are about to tell the caller where they can - * locate the requested object. We better make - * sure the packfile is still here and can be - * accessed before supplying that answer, as - * it may have been deleted since the index - * was loaded! - */ - if (!is_pack_valid(p)) { - error("packfile %s cannot be accessed", p->pack_name); - goto next; - } - e->offset = offset; - e->p = p; - hashcpy(e->sha1, sha1); - last_found = p; - return 1; - } + for (p = packed_git; p; p = p->next) { + if (p == last_found_pack || !fill_pack_entry(sha1, e, p)) + continue; - next: - if (p == last_found) - p = packed_git; - else - p = p->next; - if (p == last_found) - p = p->next; - } while (p); + last_found_pack = p; + return 1; + } return 0; } @@ -2097,24 +2110,28 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size return status; } -int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) +/* returns enum object_type or negative */ +int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi) { struct cached_object *co; struct pack_entry e; - int status; + int status, rtype; co = find_cached_object(sha1); if (co) { - if (sizep) - *sizep = co->size; + if (oi->sizep) + *(oi->sizep) = co->size; + oi->whence = OI_CACHED; return co->type; } if (!find_pack_entry(sha1, &e)) { /* Most likely it's a loose object. */ - status = sha1_loose_object_info(sha1, sizep); - if (status >= 0) + status = sha1_loose_object_info(sha1, oi->sizep); + if (status >= 0) { + oi->whence = OI_LOOSE; return status; + } /* Not a loose object; someone else may have just packed it. */ reprepare_packed_git(); @@ -2122,15 +2139,31 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) return status; } - status = packed_object_info(e.p, e.offset, sizep); + status = packed_object_info(e.p, e.offset, oi->sizep, &rtype); if (status < 0) { mark_bad_packed_object(e.p, sha1); - status = sha1_object_info(sha1, sizep); + status = sha1_object_info_extended(sha1, oi); + } else if (in_delta_base_cache(e.p, e.offset)) { + oi->whence = OI_DBCACHED; + } else { + oi->whence = OI_PACKED; + oi->u.packed.offset = e.offset; + oi->u.packed.pack = e.p; + oi->u.packed.is_delta = (rtype == OBJ_REF_DELTA || + rtype == OBJ_OFS_DELTA); } return status; } +int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) +{ + struct object_info oi; + + oi.sizep = sizep; + return sha1_object_info_extended(sha1, &oi); +} + static void *read_packed_sha1(const unsigned char *sha1, enum object_type *type, unsigned long *size) { @@ -2432,15 +2465,15 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, git_SHA_CTX c; unsigned char parano_sha1[20]; char *filename; - static char tmpfile[PATH_MAX]; + static char tmp_file[PATH_MAX]; filename = sha1_file_name(sha1); - fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename); + fd = create_tmpfile(tmp_file, sizeof(tmp_file), filename); if (fd < 0) { if (errno == EACCES) return error("insufficient permission for adding an object to repository database %s\n", get_object_directory()); else - return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno)); + return error("unable to create temporary sha1 filename %s: %s\n", tmp_file, strerror(errno)); } /* Set it up */ @@ -2485,12 +2518,12 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, struct utimbuf utb; utb.actime = mtime; utb.modtime = mtime; - if (utime(tmpfile, &utb) < 0) + if (utime(tmp_file, &utb) < 0) warning("failed utime() on %s: %s", - tmpfile, strerror(errno)); + tmp_file, strerror(errno)); } - return move_temp_to_file(tmpfile, filename); + return move_temp_to_file(tmp_file, filename); } int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1) @@ -2598,7 +2631,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size, if ((type == OBJ_BLOB) && path) { struct strbuf nbuf = STRBUF_INIT; if (convert_to_git(path, buf, size, &nbuf, - write_object ? safe_crlf : 0)) { + write_object ? safe_crlf : SAFE_CRLF_FALSE)) { buf = strbuf_detach(&nbuf, &size); re_allocated = 1; } @@ -2661,82 +2694,25 @@ static int index_core(unsigned char *sha1, int fd, size_t size, } /* - * This creates one packfile per large blob, because the caller - * immediately wants the result sha1, and fast-import can report the - * object name via marks mechanism only by closing the created - * packfile. + * This creates one packfile per large blob unless bulk-checkin + * machinery is "plugged". * * This also bypasses the usual "convert-to-git" dance, and that is on * purpose. We could write a streaming version of the converting * functions and insert that before feeding the data to fast-import - * (or equivalent in-core API described above), but the primary - * motivation for trying to stream from the working tree file and to - * avoid mmaping it in core is to deal with large binary blobs, and - * by definition they do _not_ want to get any conversion. + * (or equivalent in-core API described above). However, that is + * somewhat complicated, as we do not know the size of the filter + * result, which we need to know beforehand when writing a git object. + * Since the primary motivation for trying to stream from the working + * tree file and to avoid mmaping it in core is to deal with large + * binary blobs, they generally do not want to get any conversion, and + * callers should avoid this code path when filters are requested. */ static int index_stream(unsigned char *sha1, int fd, size_t size, enum object_type type, const char *path, unsigned flags) { - struct child_process fast_import; - char export_marks[512]; - const char *argv[] = { "fast-import", "--quiet", export_marks, NULL }; - char tmpfile[512]; - char fast_import_cmd[512]; - char buf[512]; - int len, tmpfd; - - strcpy(tmpfile, git_path("hashstream_XXXXXX")); - tmpfd = git_mkstemp_mode(tmpfile, 0600); - if (tmpfd < 0) - die_errno("cannot create tempfile: %s", tmpfile); - if (close(tmpfd)) - die_errno("cannot close tempfile: %s", tmpfile); - sprintf(export_marks, "--export-marks=%s", tmpfile); - - memset(&fast_import, 0, sizeof(fast_import)); - fast_import.in = -1; - fast_import.argv = argv; - fast_import.git_cmd = 1; - if (start_command(&fast_import)) - die_errno("index-stream: git fast-import failed"); - - len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n", - (unsigned long) size); - write_or_whine(fast_import.in, fast_import_cmd, len, - "index-stream: feeding fast-import"); - while (size) { - char buf[10240]; - size_t sz = size < sizeof(buf) ? size : sizeof(buf); - size_t actual; - - actual = read_in_full(fd, buf, sz); - if (actual < 0) - die_errno("index-stream: reading input"); - if (write_in_full(fast_import.in, buf, actual) != actual) - die_errno("index-stream: feeding fast-import"); - size -= actual; - } - if (close(fast_import.in)) - die_errno("index-stream: closing fast-import"); - if (finish_command(&fast_import)) - die_errno("index-stream: finishing fast-import"); - - tmpfd = open(tmpfile, O_RDONLY); - if (tmpfd < 0) - die_errno("index-stream: cannot open fast-import mark"); - len = read(tmpfd, buf, sizeof(buf)); - if (len < 0) - die_errno("index-stream: reading fast-import mark"); - if (close(tmpfd) < 0) - die_errno("index-stream: closing fast-import mark"); - if (unlink(tmpfile)) - die_errno("index-stream: unlinking fast-import mark"); - if (len != 44 || - memcmp(":1 ", buf, 3) || - get_sha1_hex(buf + 3, sha1)) - die_errno("index-stream: unexpected fast-import mark: <%s>", buf); - return 0; + return index_bulk_checkin(sha1, fd, size, type, path, flags); } int index_fd(unsigned char *sha1, int fd, struct stat *st, @@ -2747,7 +2723,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, if (!S_ISREG(st->st_mode)) ret = index_pipe(sha1, fd, type, path, flags); - else if (size <= big_file_threshold || type != OBJ_BLOB) + else if (size <= big_file_threshold || type != OBJ_BLOB || + (path && would_convert_to_git(path, NULL, 0, 0))) ret = index_core(sha1, fd, size, type, path, flags); else ret = index_stream(sha1, fd, size, type, path, flags); |