From 8bf14444c3989aa0f5b01ba67dfb4b592e6c30d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 14 Mar 2019 00:54:35 +0100 Subject: gc: remove redundant check for gc_auto_threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checking gc_auto_threshold in too_many_loose_objects() was added in 17815501a8 ("git-gc --auto: run "repack -A -d -l" as necessary.", 2007-09-17) when need_to_gc() itself was also reliant on gc_auto_pack_limit before its early return: gc_auto_threshold <= 0 && gc_auto_pack_limit <= 0 When that check was simplified to just checking "gc_auto_threshold <= 0" in b14d255ba8 ("builtin-gc.c: allow disabling all auto-gc'ing by assigning 0 to gc.auto", 2008-03-19) this unreachable code should have been removed. We only call too_many_loose_objects() from within need_to_gc() itself, which will return if this condition holds, and in cmd_gc() which will return before ever getting to "auto_gc && too_many_loose_objects()" if "auto_gc && !need_to_gc()" is true earlier in the function. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/gc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 020f725acc..8c2312681c 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -157,9 +157,6 @@ static int too_many_loose_objects(void) int num_loose = 0; int needed = 0; - if (gc_auto_threshold <= 0) - return 0; - dir = opendir(git_path("objects/17")); if (!dir) return 0; -- cgit v1.2.3 From e5cdbd5f7048d5dc9d84a7d1f2c467f0ebf4ac0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 15 Mar 2019 16:59:53 +0100 Subject: gc: convert to using the_hash_algo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's been a lot of changing of the hardcoded "40" values to the_hash_algo->hexsz, but we've so far missed this one where we hardcoded 38 for the loose object file length. This is because a SHA-1 like abcde[...] gets turned into objects/ab/cde[...]. There's no reason to suppose the same won't be the case for SHA-256, and reading between the lines in hash-function-transition.txt the format is planned to be the same. In the future we may want to further modify this code for the hash function transition. There's a potential pathological case here where we'll only consider the loose objects for the currently active hash, but objects for that hash will share a directory storage with the other hash. Thus we could theoretically have e.g. 1k SHA-1 loose objects, and 1 million SHA-256 objects. Then not notice that we need to pack them because we're currently using SHA-1, even though our FS may be straining under the stress of such humongous directories. So assuming that "gc" eventually learns to pack up both SHA-1 and SHA-256 objects regardless of what the current the_hash_algo is, perhaps this check should be changed to consider all files in objects/17/ matching [0-9a-f] 38 or 62 characters in length (i.e. both SHA-1 and SHA-256). But none of that is something we need to worry about now, and supporting both 38 and 62 characters depending on "the_hash_algo" removes another case of SHA-1 hardcoding. As noted in [1] I'm making no effort to somehow remove the hardcoding for "2" as in "use the first two hexdigits for the directory name". There's no indication that that'll ever change, and somehow generalizing it here would be a drop in the ocean, so there's no point in doing that. It also couldn't be done without coming up with some generalized version of the magical "objects/17" directory. See [2] for a discussion of that directory. 1. https://public-inbox.org/git/874l84ber7.fsf@evledraar.gmail.com/ 2. https://public-inbox.org/git/87k1mta9x5.fsf@evledraar.gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/gc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 8c2312681c..733bd7bdf4 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -156,6 +156,7 @@ static int too_many_loose_objects(void) int auto_threshold; int num_loose = 0; int needed = 0; + const unsigned hexsz_loose = the_hash_algo->hexsz - 2; dir = opendir(git_path("objects/17")); if (!dir) @@ -163,8 +164,8 @@ static int too_many_loose_objects(void) auto_threshold = DIV_ROUND_UP(gc_auto_threshold, 256); while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != 38 || - ent->d_name[38] != '\0') + if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || + ent->d_name[hexsz_loose] != '\0') continue; if (++num_loose > auto_threshold) { needed = 1; -- cgit v1.2.3 From cd8eb3a094c51482c80a97589e4aff53af2c7c63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 15 Mar 2019 16:59:54 +0100 Subject: gc: refactor a "call me once" pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change an idiom we're using to ensure that gc_before_repack() only does work once (see 62aad1849f ("gc --auto: do not lock refs in the background", 2014-05-25)) to be more obvious. Nothing except this function cares about the "pack_refs" and "prune_reflogs" variables, so let's not leave the reader wondering if they're being zero'd out for later use somewhere else. Signed-off-by: Jeff King Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/gc.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 733bd7bdf4..ae716a00d4 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -489,14 +489,20 @@ done: static void gc_before_repack(void) { + /* + * We may be called twice, as both the pre- and + * post-daemonized phases will call us, but running these + * commands more than once is pointless and wasteful. + */ + static int done = 0; + if (done++) + return; + if (pack_refs && run_command_v_opt(pack_refs_cmd.argv, RUN_GIT_CMD)) die(FAILED_RUN, pack_refs_cmd.argv[0]); if (prune_reflogs && run_command_v_opt(reflog.argv, RUN_GIT_CMD)) die(FAILED_RUN, reflog.argv[0]); - - pack_refs = 0; - prune_reflogs = 0; } int cmd_gc(int argc, const char **argv, const char *prefix) -- cgit v1.2.3 From a65bf78c4db0dda04ce726250ed84d23f109433f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 15 Mar 2019 16:59:55 +0100 Subject: reflog tests: make use of "test_config" idiom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change a couple of tests that weren't using the helper to use it. This makes the trailing "--unset" unnecessary. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1410-reflog.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/t/t1410-reflog.sh b/t/t1410-reflog.sh index ae8a448e34..42f5ac9ed9 100755 --- a/t/t1410-reflog.sh +++ b/t/t1410-reflog.sh @@ -232,24 +232,21 @@ test_expect_success '--expire=never' ' ' test_expect_success 'gc.reflogexpire=never' ' + test_config gc.reflogexpire never && + test_config gc.reflogexpireunreachable never && - git config gc.reflogexpire never && - git config gc.reflogexpireunreachable never && git reflog expire --verbose --all && git reflog refs/heads/master >output && test_line_count = 4 output ' test_expect_success 'gc.reflogexpire=false' ' + test_config gc.reflogexpire false && + test_config gc.reflogexpireunreachable false && - git config gc.reflogexpire false && - git config gc.reflogexpireunreachable false && git reflog expire --verbose --all && git reflog refs/heads/master >output && - test_line_count = 4 output && - - git config --unset gc.reflogexpire && - git config --unset gc.reflogexpireunreachable + test_line_count = 4 output ' -- cgit v1.2.3 From fe66776db0b841d4d4cf64f078e1ffd82193442a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Fri, 15 Mar 2019 16:59:56 +0100 Subject: reflog tests: test for the "points nowhere" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "git reflog expire" command when given an unknown reference has since 4264dc15e1 ("git reflog expire", 2006-12-19) when this command was implemented emit an error, but this has never been tested for. Let's test for it, also under gc.reflogExpire{Unreachable,}=never in case a future change is tempted to take shortcuts in the presence of such config. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1410-reflog.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/t/t1410-reflog.sh b/t/t1410-reflog.sh index 42f5ac9ed9..e8f8ac9785 100755 --- a/t/t1410-reflog.sh +++ b/t/t1410-reflog.sh @@ -250,6 +250,16 @@ test_expect_success 'gc.reflogexpire=false' ' ' +test_expect_success 'git reflog expire unknown reference' ' + test_config gc.reflogexpire never && + test_config gc.reflogexpireunreachable never && + + test_must_fail git reflog expire master@{123} 2>stderr && + test_i18ngrep "points nowhere" stderr && + test_must_fail git reflog expire does-not-exist 2>stderr && + test_i18ngrep "points nowhere" stderr +' + test_expect_success 'checkout should not delete log for packed ref' ' test $(git reflog master | wc -l) = 4 && git branch foo && -- cgit v1.2.3 From 978f4307638917127704716b754aed0953452bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 28 Mar 2019 17:14:33 +0100 Subject: reflog tests: assert lack of early exit with expiry="never" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When gc.reflogExpire and gc.reflogExpireUnreachable are set to "never" and --stale-fix isn't in effect we *could* exit early without pointlessly looping over all the reflogs. However, as an earlier change to add a test for the "points nowhere" warning shows even in such a mode we might want to print out a warning. So while it's conceivable to implement this, I don't think it's worth it. It's going to be too easy to inadvertently add some flag that'll make the expiry happen anyway, and even with "never" we'd like to see all the lines we're going to keep. So let's assert that we're going to loop over all the references even when this configuration is in effect. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- t/t1410-reflog.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/t/t1410-reflog.sh b/t/t1410-reflog.sh index e8f8ac9785..79f731db37 100755 --- a/t/t1410-reflog.sh +++ b/t/t1410-reflog.sh @@ -235,7 +235,9 @@ test_expect_success 'gc.reflogexpire=never' ' test_config gc.reflogexpire never && test_config gc.reflogexpireunreachable never && - git reflog expire --verbose --all && + git reflog expire --verbose --all >output && + test_line_count = 9 output && + git reflog refs/heads/master >output && test_line_count = 4 output ' -- cgit v1.2.3 From bf3d70fe93109616eb339e12dc9a7969b0bf025b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Thu, 28 Mar 2019 17:14:34 +0100 Subject: gc: handle & check gc.reflogExpire config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't redundantly run "git reflog expire --all" when gc.reflogExpire and gc.reflogExpireUnreachable are set to "never", and die immediately if those configuration valuer are bad. As an earlier "assert lack of early exit" change to the tests for "git reflog expire" shows, an early check of gc.reflogExpire{Unreachable,} isn't wanted in general for "git reflog expire", but it makes sense for "gc" because: 1) Similarly to 8ab5aa4bd8 ("parseopt: handle malformed --expire arguments more nicely", 2018-04-21) we'll now die early if the config variables are set to invalid values. We run "pack-refs" before "reflog expire", which can take a while, only to then die on an invalid gc.reflogExpire{Unreachable,} configuration. 2) Not invoking the command at all means it won't show up in trace output, which makes what's going on more obvious when the two are set to "never". 3) As a later change documents we lock the refs when looping over the refs to expire, even in cases where we end up doing nothing due to this config. For the reasons noted in the earlier "assert lack of early exit" change I don't think it's worth it to bend over backwards in "git reflog expire" itself to carefully detect if we'll really do nothing given the combination of all its possible options and skip that locking, but that's easy to detect here in "gc" where we'll only run "reflog expire" in a relatively simple mode. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano --- builtin/gc.c | 17 +++++++++++++++++ t/t6500-gc.sh | 19 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/builtin/gc.c b/builtin/gc.c index ae716a00d4..8943bcc300 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -116,6 +116,19 @@ static void process_log_file_on_signal(int signo) raise(signo); } +static int gc_config_is_timestamp_never(const char *var) +{ + const char *value; + timestamp_t expire; + + if (!git_config_get_value(var, &value) && value) { + if (parse_expiry_date(value, &expire)) + die(_("failed to parse '%s' value '%s'"), var, value); + return expire == 0; + } + return 0; +} + static void gc_config(void) { const char *value; @@ -127,6 +140,10 @@ static void gc_config(void) pack_refs = git_config_bool("gc.packrefs", value); } + if (gc_config_is_timestamp_never("gc.reflogexpire") && + gc_config_is_timestamp_never("gc.reflogexpireunreachable")) + prune_reflogs = 0; + git_config_get_int("gc.aggressivewindow", &aggressive_window); git_config_get_int("gc.aggressivedepth", &aggressive_depth); git_config_get_int("gc.auto", &gc_auto_threshold); diff --git a/t/t6500-gc.sh b/t/t6500-gc.sh index 4684d06552..7411bf7fec 100755 --- a/t/t6500-gc.sh +++ b/t/t6500-gc.sh @@ -120,6 +120,25 @@ test_expect_success 'gc --quiet' ' test_must_be_empty stderr ' +test_expect_success 'gc.reflogExpire{Unreachable,}=never skips "expire" via "gc"' ' + test_config gc.reflogExpire never && + test_config gc.reflogExpireUnreachable never && + + GIT_TRACE=$(pwd)/trace.out git gc && + + # Check that git-pack-refs is run as a sanity check (done via + # gc_before_repack()) but that git-expire is not. + grep -E "^trace: (built-in|exec|run_command): git pack-refs --" trace.out && + ! grep -E "^trace: (built-in|exec|run_command): git reflog expire --" trace.out +' + +test_expect_success 'one of gc.reflogExpire{Unreachable,}=never does not skip "expire" via "gc"' ' + >trace.out && + test_config gc.reflogExpire never && + GIT_TRACE=$(pwd)/trace.out git gc && + grep -E "^trace: (built-in|exec|run_command): git reflog expire --" trace.out +' + run_and_wait_for_auto_gc () { # We read stdout from gc for the side effect of waiting until the # background gc process exits, closing its fd 9. Furthermore, the -- cgit v1.2.3