summaryrefslogtreecommitdiff
path: root/sha1_file.c
diff options
context:
space:
mode:
Diffstat (limited to 'sha1_file.c')
-rw-r--r--sha1_file.c151
1 files changed, 103 insertions, 48 deletions
diff --git a/sha1_file.c b/sha1_file.c
index 88f2151ff3..3c4f1652f1 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -19,6 +19,7 @@
#include "pack-revindex.h"
#include "sha1-lookup.h"
#include "bulk-checkin.h"
+#include "streaming.h"
#ifndef O_NOATIME
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -54,6 +55,8 @@ static struct cached_object empty_tree = {
0
};
+static struct packed_git *last_found_pack;
+
static struct cached_object *find_cached_object(const unsigned char *sha1)
{
int i;
@@ -720,6 +723,8 @@ void free_pack_by_name(const char *pack_name)
close_pack_index(p);
free(p->bad_object_sha1);
*pp = p->next;
+ if (last_found_pack == p)
+ last_found_pack = NULL;
free(p);
return;
}
@@ -1142,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
return NULL;
}
-int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
+/*
+ * With an in-core object data in "map", rehash it to make sure the
+ * object name actually matches "sha1" to detect object corruption.
+ * With "map" == NULL, try reading the object named with "sha1" using
+ * the streaming interface and rehash it to do the same.
+ */
+int check_sha1_signature(const unsigned char *sha1, void *map,
+ unsigned long size, const char *type)
{
unsigned char real_sha1[20];
- hash_sha1_file(map, size, type, real_sha1);
+ enum object_type obj_type;
+ struct git_istream *st;
+ git_SHA_CTX c;
+ char hdr[32];
+ int hdrlen;
+
+ if (map) {
+ hash_sha1_file(map, size, type, real_sha1);
+ return hashcmp(sha1, real_sha1) ? -1 : 0;
+ }
+
+ st = open_istream(sha1, &obj_type, &size, NULL);
+ if (!st)
+ return -1;
+
+ /* Generate the header */
+ hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;
+
+ /* Sha1.. */
+ git_SHA1_Init(&c);
+ git_SHA1_Update(&c, hdr, hdrlen);
+ for (;;) {
+ char buf[1024 * 16];
+ ssize_t readlen = read_istream(st, buf, sizeof(buf));
+
+ if (!readlen)
+ break;
+ git_SHA1_Update(&c, buf, readlen);
+ }
+ git_SHA1_Final(real_sha1, &c);
+ close_istream(st);
return hashcmp(sha1, real_sha1) ? -1 : 0;
}
@@ -1202,6 +1244,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
if (!fstat(fd, &st)) {
*size = xsize_t(st.st_size);
+ if (!*size) {
+ /* mmap() is forbidden on empty files */
+ error("object file %s is empty", sha1_file_name(sha1));
+ return NULL;
+ }
map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
}
close(fd);
@@ -2010,54 +2057,58 @@ int is_pack_valid(struct packed_git *p)
return !open_packed_git(p);
}
+static int fill_pack_entry(const unsigned char *sha1,
+ struct pack_entry *e,
+ struct packed_git *p)
+{
+ off_t offset;
+
+ if (p->num_bad_objects) {
+ unsigned i;
+ for (i = 0; i < p->num_bad_objects; i++)
+ if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
+ return 0;
+ }
+
+ offset = find_pack_entry_one(sha1, p);
+ if (!offset)
+ return 0;
+
+ /*
+ * We are about to tell the caller where they can locate the
+ * requested object. We better make sure the packfile is
+ * still here and can be accessed before supplying that
+ * answer, as it may have been deleted since the index was
+ * loaded!
+ */
+ if (!is_pack_valid(p)) {
+ warning("packfile %s cannot be accessed", p->pack_name);
+ return 0;
+ }
+ e->offset = offset;
+ e->p = p;
+ hashcpy(e->sha1, sha1);
+ return 1;
+}
+
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
{
- static struct packed_git *last_found = (void *)1;
struct packed_git *p;
- off_t offset;
prepare_packed_git();
if (!packed_git)
return 0;
- p = (last_found == (void *)1) ? packed_git : last_found;
- do {
- if (p->num_bad_objects) {
- unsigned i;
- for (i = 0; i < p->num_bad_objects; i++)
- if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
- goto next;
- }
+ if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+ return 1;
- offset = find_pack_entry_one(sha1, p);
- if (offset) {
- /*
- * We are about to tell the caller where they can
- * locate the requested object. We better make
- * sure the packfile is still here and can be
- * accessed before supplying that answer, as
- * it may have been deleted since the index
- * was loaded!
- */
- if (!is_pack_valid(p)) {
- warning("packfile %s cannot be accessed", p->pack_name);
- goto next;
- }
- e->offset = offset;
- e->p = p;
- hashcpy(e->sha1, sha1);
- last_found = p;
- return 1;
- }
+ for (p = packed_git; p; p = p->next) {
+ if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+ continue;
- next:
- if (p == last_found)
- p = packed_git;
- else
- p = p->next;
- if (p == last_found)
- p = p->next;
- } while (p);
+ last_found_pack = p;
+ return 1;
+ }
return 0;
}
@@ -2366,7 +2417,7 @@ int move_temp_to_file(const char *tmpfile, const char *filename)
unlink_or_warn(tmpfile);
if (ret) {
if (ret != EEXIST) {
- return error("unable to write sha1 filename %s: %s\n", filename, strerror(ret));
+ return error("unable to write sha1 filename %s: %s", filename, strerror(ret));
}
/* FIXME!!! Collision check here ? */
}
@@ -2458,9 +2509,9 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
fd = create_tmpfile(tmp_file, sizeof(tmp_file), filename);
if (fd < 0) {
if (errno == EACCES)
- return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
+ return error("insufficient permission for adding an object to repository database %s", get_object_directory());
else
- return error("unable to create temporary sha1 filename %s: %s\n", tmp_file, strerror(errno));
+ return error("unable to create temporary file: %s", strerror(errno));
}
/* Set it up */
@@ -2687,10 +2738,13 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
* This also bypasses the usual "convert-to-git" dance, and that is on
* purpose. We could write a streaming version of the converting
* functions and insert that before feeding the data to fast-import
- * (or equivalent in-core API described above), but the primary
- * motivation for trying to stream from the working tree file and to
- * avoid mmaping it in core is to deal with large binary blobs, and
- * by definition they do _not_ want to get any conversion.
+ * (or equivalent in-core API described above). However, that is
+ * somewhat complicated, as we do not know the size of the filter
+ * result, which we need to know beforehand when writing a git object.
+ * Since the primary motivation for trying to stream from the working
+ * tree file and to avoid mmaping it in core is to deal with large
+ * binary blobs, they generally do not want to get any conversion, and
+ * callers should avoid this code path when filters are requested.
*/
static int index_stream(unsigned char *sha1, int fd, size_t size,
enum object_type type, const char *path,
@@ -2707,7 +2761,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
- else if (size <= big_file_threshold || type != OBJ_BLOB)
+ else if (size <= big_file_threshold || type != OBJ_BLOB ||
+ (path && would_convert_to_git(path, NULL, 0, 0)))
ret = index_core(sha1, fd, size, type, path, flags);
else
ret = index_stream(sha1, fd, size, type, path, flags);