summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2021-06-14 13:33:26 +0900
committerLibravatar Junio C Hamano <gitster@pobox.com>2021-06-14 13:33:26 +0900
commit0dd2fd18f80baa94d4518dc487e1dc790a59ebe2 (patch)
tree1cf3212944ff676c193d948a70ec5dbc720f3256
parentMerge branch 'jk/clone-clean-upon-transport-error' (diff)
parentread-cache: delete unused hashing methods (diff)
downloadtgif-0dd2fd18f80baa94d4518dc487e1dc790a59ebe2.tar.xz
Merge branch 'ds/write-index-with-hashfile-api'
Use the hashfile API in the codepath that writes the index file to reduce code duplication. * ds/write-index-with-hashfile-api: read-cache: delete unused hashing methods read-cache: use hashfile instead of git_hash_ctx csum-file.h: increase hashfile buffer size hashfile: use write_in_full()
-rw-r--r--chunk-format.c12
-rw-r--r--csum-file.c94
-rw-r--r--csum-file.h4
-rw-r--r--read-cache.c191
4 files changed, 134 insertions, 167 deletions
diff --git a/chunk-format.c b/chunk-format.c
index da191e59a2..1c3dca62e2 100644
--- a/chunk-format.c
+++ b/chunk-format.c
@@ -58,9 +58,11 @@ void add_chunk(struct chunkfile *cf,
int write_chunkfile(struct chunkfile *cf, void *data)
{
- int i;
+ int i, result = 0;
uint64_t cur_offset = hashfile_total(cf->f);
+ trace2_region_enter("chunkfile", "write", the_repository);
+
/* Add the table of contents to the current offset */
cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
@@ -77,10 +79,10 @@ int write_chunkfile(struct chunkfile *cf, void *data)
for (i = 0; i < cf->chunks_nr; i++) {
off_t start_offset = hashfile_total(cf->f);
- int result = cf->chunks[i].write_fn(cf->f, data);
+ result = cf->chunks[i].write_fn(cf->f, data);
if (result)
- return result;
+ goto cleanup;
if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
@@ -88,7 +90,9 @@ int write_chunkfile(struct chunkfile *cf, void *data)
hashfile_total(cf->f) - start_offset);
}
- return 0;
+cleanup:
+ trace2_region_leave("chunkfile", "write", the_repository);
+ return result;
}
int read_table_of_contents(struct chunkfile *cf,
diff --git a/csum-file.c b/csum-file.c
index 7510950fa3..3487d28ed7 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -11,35 +11,33 @@
#include "progress.h"
#include "csum-file.h"
+static void verify_buffer_or_die(struct hashfile *f,
+ const void *buf,
+ unsigned int count)
+{
+ ssize_t ret = read_in_full(f->check_fd, f->check_buffer, count);
+
+ if (ret < 0)
+ die_errno("%s: sha1 file read error", f->name);
+ if (ret != count)
+ die("%s: sha1 file truncated", f->name);
+ if (memcmp(buf, f->check_buffer, count))
+ die("sha1 file '%s' validation error", f->name);
+}
+
static void flush(struct hashfile *f, const void *buf, unsigned int count)
{
- if (0 <= f->check_fd && count) {
- unsigned char check_buffer[8192];
- ssize_t ret = read_in_full(f->check_fd, check_buffer, count);
-
- if (ret < 0)
- die_errno("%s: sha1 file read error", f->name);
- if (ret != count)
- die("%s: sha1 file truncated", f->name);
- if (memcmp(buf, check_buffer, count))
- die("sha1 file '%s' validation error", f->name);
- }
+ if (0 <= f->check_fd && count)
+ verify_buffer_or_die(f, buf, count);
- for (;;) {
- int ret = xwrite(f->fd, buf, count);
- if (ret > 0) {
- f->total += ret;
- display_throughput(f->tp, f->total);
- buf = (char *) buf + ret;
- count -= ret;
- if (count)
- continue;
- return;
- }
- if (!ret)
+ if (write_in_full(f->fd, buf, count) < 0) {
+ if (errno == ENOSPC)
die("sha1 file '%s' write error. Out of diskspace", f->name);
die_errno("sha1 file '%s' write error", f->name);
}
+
+ f->total += count;
+ display_throughput(f->tp, f->total);
}
void hashflush(struct hashfile *f)
@@ -53,6 +51,13 @@ void hashflush(struct hashfile *f)
}
}
+static void free_hashfile(struct hashfile *f)
+{
+ free(f->buffer);
+ free(f->check_buffer);
+ free(f);
+}
+
int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
{
int fd;
@@ -82,20 +87,20 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
if (close(f->check_fd))
die_errno("%s: sha1 file error on close", f->name);
}
- free(f);
+ free_hashfile(f);
return fd;
}
void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
{
while (count) {
- unsigned left = sizeof(f->buffer) - f->offset;
+ unsigned left = f->buffer_len - f->offset;
unsigned nr = count > left ? left : count;
if (f->do_crc)
f->crc32 = crc32(f->crc32, buf, nr);
- if (nr == sizeof(f->buffer)) {
+ if (nr == f->buffer_len) {
/*
* Flush a full batch worth of data directly
* from the input, skipping the memcpy() to
@@ -121,11 +126,6 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
}
}
-struct hashfile *hashfd(int fd, const char *name)
-{
- return hashfd_throughput(fd, name, NULL);
-}
-
struct hashfile *hashfd_check(const char *name)
{
int sink, check;
@@ -139,10 +139,14 @@ struct hashfile *hashfd_check(const char *name)
die_errno("unable to open '%s'", name);
f = hashfd(sink, name);
f->check_fd = check;
+ f->check_buffer = xmalloc(f->buffer_len);
+
return f;
}
-struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp)
+static struct hashfile *hashfd_internal(int fd, const char *name,
+ struct progress *tp,
+ size_t buffer_len)
{
struct hashfile *f = xmalloc(sizeof(*f));
f->fd = fd;
@@ -153,9 +157,35 @@ struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp
f->name = name;
f->do_crc = 0;
the_hash_algo->init_fn(&f->ctx);
+
+ f->buffer_len = buffer_len;
+ f->buffer = xmalloc(buffer_len);
+ f->check_buffer = NULL;
+
return f;
}
+struct hashfile *hashfd(int fd, const char *name)
+{
+ /*
+ * Since we are not going to use a progress meter to
+ * measure the rate of data passing through this hashfile,
+ * use a larger buffer size to reduce fsync() calls.
+ */
+ return hashfd_internal(fd, name, NULL, 128 * 1024);
+}
+
+struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp)
+{
+ /*
+ * Since we are expecting to report progress of the
+ * write into this hashfile, use a smaller buffer
+ * size so the progress indicators arrive at a more
+ * frequent rate.
+ */
+ return hashfd_internal(fd, name, tp, 8 * 1024);
+}
+
void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
{
hashflush(f);
diff --git a/csum-file.h b/csum-file.h
index e54d53d1d0..3044bd19ab 100644
--- a/csum-file.h
+++ b/csum-file.h
@@ -16,7 +16,9 @@ struct hashfile {
const char *name;
int do_crc;
uint32_t crc32;
- unsigned char buffer[8192];
+ size_t buffer_len;
+ unsigned char *buffer;
+ unsigned char *check_buffer;
};
/* Checkpoint */
diff --git a/read-cache.c b/read-cache.c
index 1b3c2eb408..77961a3885 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -26,6 +26,7 @@
#include "thread-utils.h"
#include "progress.h"
#include "sparse-index.h"
+#include "csum-file.h"
/* Mask for the name length in ce_flags in the on-disk index */
@@ -2521,80 +2522,23 @@ int repo_index_has_changes(struct repository *repo,
}
}
-#define WRITE_BUFFER_SIZE (128 * 1024)
-static unsigned char write_buffer[WRITE_BUFFER_SIZE];
-static unsigned long write_buffer_len;
-
-static int ce_write_flush(git_hash_ctx *context, int fd)
+static int write_index_ext_header(struct hashfile *f,
+ git_hash_ctx *eoie_f,
+ unsigned int ext,
+ unsigned int sz)
{
- unsigned int buffered = write_buffer_len;
- if (buffered) {
- the_hash_algo->update_fn(context, write_buffer, buffered);
- if (write_in_full(fd, write_buffer, buffered) < 0)
- return -1;
- write_buffer_len = 0;
- }
- return 0;
-}
+ hashwrite_be32(f, ext);
+ hashwrite_be32(f, sz);
-static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len)
-{
- while (len) {
- unsigned int buffered = write_buffer_len;
- unsigned int partial = WRITE_BUFFER_SIZE - buffered;
- if (partial > len)
- partial = len;
- memcpy(write_buffer + buffered, data, partial);
- buffered += partial;
- if (buffered == WRITE_BUFFER_SIZE) {
- write_buffer_len = buffered;
- if (ce_write_flush(context, fd))
- return -1;
- buffered = 0;
- }
- write_buffer_len = buffered;
- len -= partial;
- data = (char *) data + partial;
+ if (eoie_f) {
+ ext = htonl(ext);
+ sz = htonl(sz);
+ the_hash_algo->update_fn(eoie_f, &ext, sizeof(ext));
+ the_hash_algo->update_fn(eoie_f, &sz, sizeof(sz));
}
return 0;
}
-static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx *eoie_context,
- int fd, unsigned int ext, unsigned int sz)
-{
- ext = htonl(ext);
- sz = htonl(sz);
- if (eoie_context) {
- the_hash_algo->update_fn(eoie_context, &ext, 4);
- the_hash_algo->update_fn(eoie_context, &sz, 4);
- }
- return ((ce_write(context, fd, &ext, 4) < 0) ||
- (ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
-}
-
-static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash)
-{
- unsigned int left = write_buffer_len;
-
- if (left) {
- write_buffer_len = 0;
- the_hash_algo->update_fn(context, write_buffer, left);
- }
-
- /* Flush first if not enough space for hash signature */
- if (left + the_hash_algo->rawsz > WRITE_BUFFER_SIZE) {
- if (write_in_full(fd, write_buffer, left) < 0)
- return -1;
- left = 0;
- }
-
- /* Append the hash signature at the end */
- the_hash_algo->final_fn(write_buffer + left, context);
- hashcpy(hash, write_buffer + left);
- left += the_hash_algo->rawsz;
- return (write_in_full(fd, write_buffer, left) < 0) ? -1 : 0;
-}
-
static void ce_smudge_racily_clean_entry(struct index_state *istate,
struct cache_entry *ce)
{
@@ -2673,11 +2617,10 @@ static void copy_cache_entry_to_ondisk(struct ondisk_cache_entry *ondisk,
}
}
-static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
+static int ce_write_entry(struct hashfile *f, struct cache_entry *ce,
struct strbuf *previous_name, struct ondisk_cache_entry *ondisk)
{
int size;
- int result;
unsigned int saved_namelen;
int stripped_name = 0;
static unsigned char padding[8] = { 0x00 };
@@ -2693,11 +2636,9 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
if (!previous_name) {
int len = ce_namelen(ce);
copy_cache_entry_to_ondisk(ondisk, ce);
- result = ce_write(c, fd, ondisk, size);
- if (!result)
- result = ce_write(c, fd, ce->name, len);
- if (!result)
- result = ce_write(c, fd, padding, align_padding_size(size, len));
+ hashwrite(f, ondisk, size);
+ hashwrite(f, ce->name, len);
+ hashwrite(f, padding, align_padding_size(size, len));
} else {
int common, to_remove, prefix_size;
unsigned char to_remove_vi[16];
@@ -2711,13 +2652,10 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
prefix_size = encode_varint(to_remove, to_remove_vi);
copy_cache_entry_to_ondisk(ondisk, ce);
- result = ce_write(c, fd, ondisk, size);
- if (!result)
- result = ce_write(c, fd, to_remove_vi, prefix_size);
- if (!result)
- result = ce_write(c, fd, ce->name + common, ce_namelen(ce) - common);
- if (!result)
- result = ce_write(c, fd, padding, 1);
+ hashwrite(f, ondisk, size);
+ hashwrite(f, to_remove_vi, prefix_size);
+ hashwrite(f, ce->name + common, ce_namelen(ce) - common);
+ hashwrite(f, padding, 1);
strbuf_splice(previous_name, common, to_remove,
ce->name + common, ce_namelen(ce) - common);
@@ -2727,7 +2665,7 @@ static int ce_write_entry(git_hash_ctx *c, int fd, struct cache_entry *ce,
ce->ce_flags &= ~CE_STRIP_NAME;
}
- return result;
+ return 0;
}
/*
@@ -2839,8 +2777,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
int strip_extensions)
{
uint64_t start = getnanotime();
- int newfd = tempfile->fd;
- git_hash_ctx c, eoie_c;
+ struct hashfile *f;
+ git_hash_ctx *eoie_c = NULL;
struct cache_header hdr;
int i, err = 0, removed, extended, hdr_version;
struct cache_entry **cache = istate->cache;
@@ -2854,6 +2792,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct index_entry_offset_table *ieot = NULL;
int nr, nr_threads;
+ f = hashfd(tempfile->fd, tempfile->filename.buf);
+
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
removed++;
@@ -2882,9 +2822,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
hdr.hdr_version = htonl(hdr_version);
hdr.hdr_entries = htonl(entries - removed);
- the_hash_algo->init_fn(&c);
- if (ce_write(&c, newfd, &hdr, sizeof(hdr)) < 0)
- return -1;
+ hashwrite(f, &hdr, sizeof(hdr));
if (!HAVE_THREADS || git_config_get_index_threads(&nr_threads))
nr_threads = 1;
@@ -2919,12 +2857,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
}
}
- offset = lseek(newfd, 0, SEEK_CUR);
- if (offset < 0) {
- free(ieot);
- return -1;
- }
- offset += write_buffer_len;
+ offset = hashfile_total(f);
+
nr = 0;
previous_name = (hdr_version == 4) ? &previous_name_buf : NULL;
@@ -2959,14 +2893,10 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
if (previous_name)
previous_name->buf[0] = 0;
nr = 0;
- offset = lseek(newfd, 0, SEEK_CUR);
- if (offset < 0) {
- free(ieot);
- return -1;
- }
- offset += write_buffer_len;
+
+ offset = hashfile_total(f);
}
- if (ce_write_entry(&c, newfd, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0)
+ if (ce_write_entry(f, ce, previous_name, (struct ondisk_cache_entry *)&ondisk) < 0)
err = -1;
if (err)
@@ -2985,14 +2915,16 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
return err;
}
- /* Write extension data here */
- offset = lseek(newfd, 0, SEEK_CUR);
- if (offset < 0) {
- free(ieot);
- return -1;
+ offset = hashfile_total(f);
+
+ /*
+ * The extension headers must be hashed on their own for the
+ * EOIE extension. Create a hashfile here to compute that hash.
+ */
+ if (offset && record_eoie()) {
+ CALLOC_ARRAY(eoie_c, 1);
+ the_hash_algo->init_fn(eoie_c);
}
- offset += write_buffer_len;
- the_hash_algo->init_fn(&eoie_c);
/*
* Lets write out CACHE_EXT_INDEXENTRYOFFSETTABLE first so that we
@@ -3005,8 +2937,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct strbuf sb = STRBUF_INIT;
write_ieot_extension(&sb, ieot);
- err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_INDEXENTRYOFFSETTABLE, sb.len) < 0
- || ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ err = write_index_ext_header(f, eoie_c, CACHE_EXT_INDEXENTRYOFFSETTABLE, sb.len) < 0;
+ hashwrite(f, sb.buf, sb.len);
strbuf_release(&sb);
free(ieot);
if (err)
@@ -3018,9 +2950,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct strbuf sb = STRBUF_INIT;
err = write_link_extension(&sb, istate) < 0 ||
- write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_LINK,
- sb.len) < 0 ||
- ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ write_index_ext_header(f, eoie_c, CACHE_EXT_LINK,
+ sb.len) < 0;
+ hashwrite(f, sb.buf, sb.len);
strbuf_release(&sb);
if (err)
return -1;
@@ -3029,8 +2961,8 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct strbuf sb = STRBUF_INIT;
cache_tree_write(&sb, istate->cache_tree);
- err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_TREE, sb.len) < 0
- || ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ err = write_index_ext_header(f, eoie_c, CACHE_EXT_TREE, sb.len) < 0;
+ hashwrite(f, sb.buf, sb.len);
strbuf_release(&sb);
if (err)
return -1;
@@ -3039,9 +2971,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct strbuf sb = STRBUF_INIT;
resolve_undo_write(&sb, istate->resolve_undo);
- err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_RESOLVE_UNDO,
- sb.len) < 0
- || ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ err = write_index_ext_header(f, eoie_c, CACHE_EXT_RESOLVE_UNDO,
+ sb.len) < 0;
+ hashwrite(f, sb.buf, sb.len);
strbuf_release(&sb);
if (err)
return -1;
@@ -3050,9 +2982,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct strbuf sb = STRBUF_INIT;
write_untracked_extension(&sb, istate->untracked);
- err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_UNTRACKED,
- sb.len) < 0 ||
- ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ err = write_index_ext_header(f, eoie_c, CACHE_EXT_UNTRACKED,
+ sb.len) < 0;
+ hashwrite(f, sb.buf, sb.len);
strbuf_release(&sb);
if (err)
return -1;
@@ -3061,14 +2993,14 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
struct strbuf sb = STRBUF_INIT;
write_fsmonitor_extension(&sb, istate);
- err = write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_FSMONITOR, sb.len) < 0
- || ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ err = write_index_ext_header(f, eoie_c, CACHE_EXT_FSMONITOR, sb.len) < 0;
+ hashwrite(f, sb.buf, sb.len);
strbuf_release(&sb);
if (err)
return -1;
}
if (istate->sparse_index) {
- if (write_index_ext_header(&c, &eoie_c, newfd, CACHE_EXT_SPARSE_DIRECTORIES, 0) < 0)
+ if (write_index_ext_header(f, eoie_c, CACHE_EXT_SPARSE_DIRECTORIES, 0) < 0)
return -1;
}
@@ -3078,19 +3010,18 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
* read. Write it out regardless of the strip_extensions parameter as we need it
* when loading the shared index.
*/
- if (offset && record_eoie()) {
+ if (eoie_c) {
struct strbuf sb = STRBUF_INIT;
- write_eoie_extension(&sb, &eoie_c, offset);
- err = write_index_ext_header(&c, NULL, newfd, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0
- || ce_write(&c, newfd, sb.buf, sb.len) < 0;
+ write_eoie_extension(&sb, eoie_c, offset);
+ err = write_index_ext_header(f, NULL, CACHE_EXT_ENDOFINDEXENTRIES, sb.len) < 0;
+ hashwrite(f, sb.buf, sb.len);
strbuf_release(&sb);
if (err)
return -1;
}
- if (ce_flush(&c, newfd, istate->oid.hash))
- return -1;
+ finalize_hashfile(f, istate->oid.hash, CSUM_HASH_IN_STREAM);
if (close_tempfile_gently(tempfile)) {
error(_("could not close '%s'"), get_tempfile_path(tempfile));
return -1;