summaryrefslogtreecommitdiff
path: root/read-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'read-cache.c')
-rw-r--r--read-cache.c151
1 files changed, 147 insertions, 4 deletions
diff --git a/read-cache.c b/read-cache.c
index e447751823..0d0081a11b 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -887,9 +887,32 @@ static int has_file_name(struct index_state *istate,
return retval;
}
+
+/*
+ * Like strcmp(), but also return the offset of the first change.
+ * If strings are equal, return the length.
+ */
+int strcmp_offset(const char *s1, const char *s2, size_t *first_change)
+{
+ size_t k;
+
+ if (!first_change)
+ return strcmp(s1, s2);
+
+ for (k = 0; s1[k] == s2[k]; k++)
+ if (s1[k] == '\0')
+ break;
+
+ *first_change = k;
+ return (unsigned char)s1[k] - (unsigned char)s2[k];
+}
+
/*
* Do we have another file with a pathname that is a proper
* subset of the name we're trying to add?
+ *
+ * That is, is there another file in the index with a path
+ * that matches a sub-directory in the given entry?
*/
static int has_dir_name(struct index_state *istate,
const struct cache_entry *ce, int pos, int ok_to_replace)
@@ -898,9 +921,51 @@ static int has_dir_name(struct index_state *istate,
int stage = ce_stage(ce);
const char *name = ce->name;
const char *slash = name + ce_namelen(ce);
+ size_t len_eq_last;
+ int cmp_last = 0;
+
+ /*
+ * We are frequently called during an iteration on a sorted
+ * list of pathnames and while building a new index. Therefore,
+ * there is a high probability that this entry will eventually
+ * be appended to the index, rather than inserted in the middle.
+ * If we can confirm that, we can avoid binary searches on the
+ * components of the pathname.
+ *
+ * Compare the entry's full path with the last path in the index.
+ */
+ if (istate->cache_nr > 0) {
+ cmp_last = strcmp_offset(name,
+ istate->cache[istate->cache_nr - 1]->name,
+ &len_eq_last);
+ if (cmp_last > 0) {
+ if (len_eq_last == 0) {
+ /*
+ * The entry sorts AFTER the last one in the
+ * index and their paths have no common prefix,
+ * so there cannot be a F/D conflict.
+ */
+ return retval;
+ } else {
+ /*
+ * The entry sorts AFTER the last one in the
+ * index, but has a common prefix. Fall through
+ * to the loop below to disect the entry's path
+ * and see where the difference is.
+ */
+ }
+ } else if (cmp_last == 0) {
+ /*
+ * The entry exactly matches the last one in the
+ * index, but because of multiple stage and CE_REMOVE
+ * items, we fall through and let the regular search
+ * code handle it.
+ */
+ }
+ }
for (;;) {
- int len;
+ size_t len;
for (;;) {
if (*--slash == '/')
@@ -910,6 +975,67 @@ static int has_dir_name(struct index_state *istate,
}
len = slash - name;
+ if (cmp_last > 0) {
+ /*
+ * (len + 1) is a directory boundary (including
+ * the trailing slash). And since the loop is
+ * decrementing "slash", the first iteration is
+ * the longest directory prefix; subsequent
+ * iterations consider parent directories.
+ */
+
+ if (len + 1 <= len_eq_last) {
+ /*
+ * The directory prefix (including the trailing
+ * slash) also appears as a prefix in the last
+ * entry, so the remainder cannot collide (because
+ * strcmp said the whole path was greater).
+ *
+ * EQ: last: xxx/A
+ * this: xxx/B
+ *
+ * LT: last: xxx/file_A
+ * this: xxx/file_B
+ */
+ return retval;
+ }
+
+ if (len > len_eq_last) {
+ /*
+ * This part of the directory prefix (excluding
+ * the trailing slash) is longer than the known
+ * equal portions, so this sub-directory cannot
+ * collide with a file.
+ *
+ * GT: last: xxxA
+ * this: xxxB/file
+ */
+ return retval;
+ }
+
+ if (istate->cache_nr > 0 &&
+ ce_namelen(istate->cache[istate->cache_nr - 1]) > len) {
+ /*
+ * The directory prefix lines up with part of
+ * a longer file or directory name, but sorts
+ * after it, so this sub-directory cannot
+ * collide with a file.
+ *
+ * last: xxx/yy-file (because '-' sorts before '/')
+ * this: xxx/yy/abc
+ */
+ return retval;
+ }
+
+ /*
+ * This is a possible collision. Fall through and
+ * let the regular search code handle it.
+ *
+ * last: xxx
+ * this: xxx/file
+ */
+ }
+
pos = index_name_stage_pos(istate, name, len, stage);
if (pos >= 0) {
/*
@@ -1001,7 +1127,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
if (!(option & ADD_CACHE_KEEP_CACHE_TREE))
cache_tree_invalidate_path(istate, ce->name);
- pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
+
+ /*
+ * If this entry's path sorts after the last entry in the index,
+ * we can avoid searching for it.
+ */
+ if (istate->cache_nr > 0 &&
+ strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0)
+ pos = -istate->cache_nr - 1;
+ else
+ pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce));
/* existing match? Just replace it. */
if (pos >= 0) {
@@ -1371,6 +1506,9 @@ struct ondisk_cache_entry_extended {
ondisk_cache_entry_extended_size(ce_namelen(ce)) : \
ondisk_cache_entry_size(ce_namelen(ce)))
+/* Allow fsck to force verification of the index checksum. */
+int verify_index_checksum;
+
static int verify_hdr(struct cache_header *hdr, unsigned long size)
{
git_SHA_CTX c;
@@ -1382,6 +1520,10 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size)
hdr_version = ntohl(hdr->hdr_version);
if (hdr_version < INDEX_FORMAT_LB || INDEX_FORMAT_UB < hdr_version)
return error("bad index version %d", hdr_version);
+
+ if (!verify_index_checksum)
+ return 0;
+
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, size - 20);
git_SHA1_Final(sha1, &c);
@@ -1682,9 +1824,10 @@ unmap:
*/
static void freshen_shared_index(char *base_sha1_hex, int warn)
{
- const char *shared_index = git_path("sharedindex.%s", base_sha1_hex);
+ char *shared_index = git_pathdup("sharedindex.%s", base_sha1_hex);
if (!check_and_freshen_file(shared_index, 1) && warn)
warning("could not freshen shared index '%s'", shared_index);
+ free(shared_index);
}
int read_index_from(struct index_state *istate, const char *path)
@@ -2229,7 +2372,7 @@ static int should_delete_shared_index(const char *shared_index_path)
if (!expiration)
return 0;
if (stat(shared_index_path, &st))
- return error_errno(_("could not stat '%s"), shared_index_path);
+ return error_errno(_("could not stat '%s'"), shared_index_path);
if (st.st_mtime > expiration)
return 0;