summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--builtin/fsck.c120
-rwxr-xr-xt/t1450-fsck.sh29
2 files changed, 124 insertions, 25 deletions
diff --git a/builtin/fsck.c b/builtin/fsck.c
index 3e67203f9c..75e836e2fd 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -205,8 +205,6 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (has_sha1_pack(obj->oid.hash))
return; /* it is in pack - forget about it */
- if (connectivity_only && has_object_file(&obj->oid))
- return;
printf("missing %s %s\n", typename(obj->type),
describe_object(obj));
errors_found |= ERROR_REACHABLE;
@@ -584,6 +582,79 @@ static int fsck_cache_tree(struct cache_tree *it)
return err;
}
+static void mark_object_for_connectivity(const unsigned char *sha1)
+{
+ struct object *obj = lookup_object(sha1);
+
+ /*
+ * Setting the object type here isn't strictly necessary for a
+ * connectivity check. In most cases, our walk will expect a certain
+ * type (e.g., a tree referencing a blob) and will use lookup_blob() to
+ * assign the type. But doing it here has two advantages:
+ *
+ * 1. When the fsck_walk code looks at objects that _don't_ come from
+ * links (e.g., the tip of a ref), it may complain about the
+ * "unknown object type".
+ *
+ * 2. This serves as a nice cross-check that the graph links are
+ * sane. So --connectivity-only does not check that the bits of
+ * blobs are not corrupted, but it _does_ check that 100644 tree
+ * entries point to blobs, and so forth.
+ *
+ * Unfortunately we can't just use parse_object() here, because the
+ * whole point of --connectivity-only is to avoid reading the object
+ * data more than necessary.
+ */
+ if (!obj || obj->type == OBJ_NONE) {
+ enum object_type type = sha1_object_info(sha1, NULL);
+ switch (type) {
+ case OBJ_BAD:
+ error("%s: unable to read object type",
+ sha1_to_hex(sha1));
+ break;
+ case OBJ_COMMIT:
+ obj = (struct object *)lookup_commit(sha1);
+ break;
+ case OBJ_TREE:
+ obj = (struct object *)lookup_tree(sha1);
+ break;
+ case OBJ_BLOB:
+ obj = (struct object *)lookup_blob(sha1);
+ break;
+ case OBJ_TAG:
+ obj = (struct object *)lookup_tag(sha1);
+ break;
+ default:
+ error("%s: unknown object type %d",
+ sha1_to_hex(sha1), type);
+ }
+
+ if (!obj || obj->type == OBJ_NONE) {
+ errors_found |= ERROR_OBJECT;
+ return;
+ }
+ }
+
+ obj->flags |= HAS_OBJ;
+}
+
+static int mark_loose_for_connectivity(const unsigned char *sha1,
+ const char *path,
+ void *data)
+{
+ mark_object_for_connectivity(sha1);
+ return 0;
+}
+
+static int mark_packed_for_connectivity(const unsigned char *sha1,
+ struct packed_git *pack,
+ uint32_t pos,
+ void *data)
+{
+ mark_object_for_connectivity(sha1);
+ return 0;
+}
+
static char const * const fsck_usage[] = {
N_("git fsck [<options>] [<object>...]"),
NULL
@@ -640,38 +711,41 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
git_config(fsck_config, NULL);
fsck_head_link();
- if (!connectivity_only) {
+ if (connectivity_only) {
+ for_each_loose_object(mark_loose_for_connectivity, NULL, 0);
+ for_each_packed_object(mark_packed_for_connectivity, NULL, 0);
+ } else {
fsck_object_dir(get_object_directory());
prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next)
fsck_object_dir(alt->path);
- }
- if (check_full) {
- struct packed_git *p;
- uint32_t total = 0, count = 0;
- struct progress *progress = NULL;
+ if (check_full) {
+ struct packed_git *p;
+ uint32_t total = 0, count = 0;
+ struct progress *progress = NULL;
+
+ prepare_packed_git();
- prepare_packed_git();
+ if (show_progress) {
+ for (p = packed_git; p; p = p->next) {
+ if (open_pack_index(p))
+ continue;
+ total += p->num_objects;
+ }
- if (show_progress) {
+ progress = start_progress(_("Checking objects"), total);
+ }
for (p = packed_git; p; p = p->next) {
- if (open_pack_index(p))
- continue;
- total += p->num_objects;
+ /* verify gives error messages itself */
+ if (verify_pack(p, fsck_obj_buffer,
+ progress, count))
+ errors_found |= ERROR_PACK;
+ count += p->num_objects;
}
-
- progress = start_progress(_("Checking objects"), total);
- }
- for (p = packed_git; p; p = p->next) {
- /* verify gives error messages itself */
- if (verify_pack(p, fsck_obj_buffer,
- progress, count))
- errors_found |= ERROR_PACK;
- count += p->num_objects;
+ stop_progress(&progress);
}
- stop_progress(&progress);
}
heads = 0;
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index e88ec7747b..4d1c3ba664 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -523,9 +523,21 @@ test_expect_success 'fsck --connectivity-only' '
touch empty &&
git add empty &&
test_commit empty &&
+
+ # Drop the index now; we want to be sure that we
+ # recursively notice the broken objects
+ # because they are reachable from refs, not because
+ # they are in the index.
+ rm -f .git/index &&
+
+ # corrupt the blob, but in a way that we can still identify
+ # its type. That lets us see that --connectivity-only is
+ # not actually looking at the contents, but leaves it
+ # free to examine the type if it chooses.
empty=.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391 &&
- rm -f $empty &&
- echo invalid >$empty &&
+ blob=$(echo unrelated | git hash-object -w --stdin) &&
+ mv $(sha1_file $blob) $empty &&
+
test_must_fail git fsck --strict &&
git fsck --strict --connectivity-only &&
tree=$(git rev-parse HEAD:) &&
@@ -537,6 +549,19 @@ test_expect_success 'fsck --connectivity-only' '
)
'
+test_expect_success 'fsck --connectivity-only with explicit head' '
+ rm -rf connectivity-only &&
+ git init connectivity-only &&
+ (
+ cd connectivity-only &&
+ test_commit foo &&
+ rm -f .git/index &&
+ tree=$(git rev-parse HEAD^{tree}) &&
+ remove_object $(git rev-parse HEAD:foo.t) &&
+ test_must_fail git fsck --connectivity-only $tree
+ )
+'
+
remove_loose_object () {
sha1="$(git rev-parse "$1")" &&
remainder=${sha1#??} &&