summaryrefslogtreecommitdiff
path: root/refs/packed-backend.c
diff options
context:
space:
mode:
authorLibravatar Michael Haggerty <mhagger@alum.mit.edu>2017-10-28 11:16:02 +0200
committerLibravatar Junio C Hamano <gitster@pobox.com>2017-10-30 09:45:15 +0900
commit7c6bd25c7d65e777b5bb26ad5c8248fb3607e6d5 (patch)
treed2801cff3122ec0eca892bad37040c22e20fa0ba /refs/packed-backend.c
parentt1409: check that `packed-refs` is not rewritten unnecessarily (diff)
downloadtgif-7c6bd25c7d65e777b5bb26ad5c8248fb3607e6d5.tar.xz
files-backend: don't rewrite the `packed-refs` file unnecessarily
Even when we are deleting references, we needn't overwrite the `packed-refs` file if the references that we are deleting only exist as loose references. Implement this optimization as follows: * Add a function `is_packed_transaction_needed()`, which checks whether a given packed-refs transaction actually needs to be carried out (i.e., it returns false if the transaction obviously wouldn't have any effect). This function must be called while holding the `packed-refs` lock to avoid races. * Change `files_transaction_prepare()` to check whether the packed-refs transaction is actually needed. If not, squelch it, but continue holding the `packed-refs` lock until the end of the transaction to avoid races. This fixes a mild regression caused by dc39e09942 (files_ref_store: use a transaction to update packed refs, 2017-09-08). Before that commit, unnecessary rewrites of `packed-refs` were suppressed by `repack_without_refs()`. But the transaction-based writing introduced by that commit didn't perform that optimization. Note that the pre-dc39e09942 code still had to *read* the whole `packed-refs` file to determine that the rewrite could be skipped, so the performance for the cases that the write could be elided was `O(N)` in the number of packed references both before and after dc39e09942. But after that commit the constant factor increased. This commit reimplements the optimization of eliding unnecessary `packed-refs` rewrites. That, plus the fact that since cfa2e29c34 (packed_ref_store: get rid of the `ref_cache` entirely, 2017-03-17) we don't necessarily have to read the whole `packed-refs` file at all, means that deletes of one or a few loose references can now be done with `O(n lg N)` effort, where `n` is the number of loose references being deleted and `N` is the total number of packed references. This commit fixes two tests in t1409. Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'refs/packed-backend.c')
-rw-r--r--refs/packed-backend.c94
1 files changed, 94 insertions, 0 deletions
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 0279aeceea..0b0a17ca8e 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -754,6 +754,100 @@ error:
return -1;
}
+int is_packed_transaction_needed(struct ref_store *ref_store,
+ struct ref_transaction *transaction)
+{
+ struct packed_ref_store *refs = packed_downcast(
+ ref_store,
+ REF_STORE_READ,
+ "is_packed_transaction_needed");
+ struct strbuf referent = STRBUF_INIT;
+ size_t i;
+ int ret;
+
+ if (!is_lock_file_locked(&refs->lock))
+ BUG("is_packed_transaction_needed() called while unlocked");
+
+ /*
+ * We're only going to bother returning false for the common,
+ * trivial case that references are only being deleted, their
+ * old values are not being checked, and the old `packed-refs`
+ * file doesn't contain any of those reference(s). This gives
+ * false positives for some other cases that could
+ * theoretically be optimized away:
+ *
+ * 1. It could be that the old value is being verified without
+ * setting a new value. In this case, we could verify the
+ * old value here and skip the update if it agrees. If it
+ * disagrees, we could either let the update go through
+ * (the actual commit would re-detect and report the
+ * problem), or come up with a way of reporting such an
+ * error to *our* caller.
+ *
+ * 2. It could be that a new value is being set, but that it
+ * is identical to the current packed value of the
+ * reference.
+ *
+ * Neither of these cases will come up in the current code,
+ * because the only caller of this function passes to it a
+ * transaction that only includes `delete` updates with no
+ * `old_id`. Even if that ever changes, false positives only
+ * cause an optimization to be missed; they do not affect
+ * correctness.
+ */
+
+ /*
+ * Start with the cheap checks that don't require old
+ * reference values to be read:
+ */
+ for (i = 0; i < transaction->nr; i++) {
+ struct ref_update *update = transaction->updates[i];
+
+ if (update->flags & REF_HAVE_OLD)
+ /* Have to check the old value -> needed. */
+ return 1;
+
+ if ((update->flags & REF_HAVE_NEW) && !is_null_oid(&update->new_oid))
+ /* Have to set a new value -> needed. */
+ return 1;
+ }
+
+ /*
+ * The transaction isn't checking any old values nor is it
+ * setting any nonzero new values, so it still might be able
+ * to be skipped. Now do the more expensive check: the update
+ * is needed if any of the updates is a delete, and the old
+ * `packed-refs` file contains a value for that reference.
+ */
+ ret = 0;
+ for (i = 0; i < transaction->nr; i++) {
+ struct ref_update *update = transaction->updates[i];
+ unsigned int type;
+ struct object_id oid;
+
+ if (!(update->flags & REF_HAVE_NEW))
+ /*
+ * This reference isn't being deleted -> not
+ * needed.
+ */
+ continue;
+
+ if (!refs_read_raw_ref(ref_store, update->refname,
+ oid.hash, &referent, &type) ||
+ errno != ENOENT) {
+ /*
+ * We have to actually delete that reference
+ * -> this transaction is needed.
+ */
+ ret = 1;
+ break;
+ }
+ }
+
+ strbuf_release(&referent);
+ return ret;
+}
+
struct packed_transaction_backend_data {
/* True iff the transaction owns the packed-refs lock. */
int own_lock;