summaryrefslogtreecommitdiff
path: root/builtin/gc.c
diff options
context:
space:
mode:
authorLibravatar Nguyễn Thái Ngọc Duy <pclouds@gmail.com>2018-04-15 17:36:17 +0200
committerLibravatar Junio C Hamano <gitster@pobox.com>2018-04-16 13:52:29 +0900
commit9806f5a7bf3d02247c2c500ef74f56213cd7b07a (patch)
treed81f3de59393e27d084d010e68c9f6f1b10fefa3 /builtin/gc.c
parentgc: handle a corner case in gc.bigPackThreshold (diff)
downloadtgif-9806f5a7bf3d02247c2c500ef74f56213cd7b07a.tar.xz
gc --auto: exclude base pack if not enough mem to "repack -ad"
pack-objects could be a big memory hog especially on large repos, everybody knows that. The suggestion to stick a .keep file on the giant base pack to avoid this problem is also known for a long time. Recent patches add an option to do just this, but it has to be either configured or activated manually. This patch lets `git gc --auto` activate this mode automatically when it thinks `repack -ad` will use a lot of memory and start affecting the system due to swapping or flushing OS cache. gc --auto decides to do this based on an estimation of pack-objects memory usage, which is quite accurate at least for the heap part, and whether that fits in half of system memory (the assumption here is for desktop environment where there are many other applications running). This mechanism only kicks in if gc.bigBasePackThreshold is not configured. If it is, it is assumed that the user already knows what they want. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/gc.c')
-rw-r--r--builtin/gc.c98
1 files changed, 97 insertions, 1 deletions
diff --git a/builtin/gc.c b/builtin/gc.c
index 23c17ba7e9..3c7c93e961 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -22,6 +22,10 @@
#include "commit.h"
#include "packfile.h"
#include "object-store.h"
+#include "pack.h"
+#include "pack-objects.h"
+#include "blob.h"
+#include "tree.h"
#define FAILED_RUN "failed to run %s"
@@ -42,6 +46,7 @@ static const char *gc_log_expire = "1.day.ago";
static const char *prune_expire = "2.weeks.ago";
static const char *prune_worktrees_expire = "3.months.ago";
static unsigned long big_pack_threshold;
+static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
static struct argv_array pack_refs_cmd = ARGV_ARRAY_INIT;
static struct argv_array reflog = ARGV_ARRAY_INIT;
@@ -130,6 +135,7 @@ static void gc_config(void)
git_config_get_expiry("gc.logexpiry", &gc_log_expire);
git_config_get_ulong("gc.bigpackthreshold", &big_pack_threshold);
+ git_config_get_ulong("pack.deltacachesize", &max_delta_cache_size);
git_config(git_default_config, NULL);
}
@@ -169,7 +175,8 @@ static int too_many_loose_objects(void)
return needed;
}
-static void find_base_packs(struct string_list *packs, unsigned long limit)
+static struct packed_git *find_base_packs(struct string_list *packs,
+ unsigned long limit)
{
struct packed_git *p, *base = NULL;
@@ -186,6 +193,8 @@ static void find_base_packs(struct string_list *packs, unsigned long limit)
if (base)
string_list_append(packs, base->pack_name);
+
+ return base;
}
static int too_many_packs(void)
@@ -210,6 +219,79 @@ static int too_many_packs(void)
return gc_auto_pack_limit < cnt;
}
+static uint64_t total_ram(void)
+{
+#if defined(HAVE_SYSINFO)
+ struct sysinfo si;
+
+ if (!sysinfo(&si))
+ return si.totalram;
+#elif defined(HAVE_BSD_SYSCTL) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM))
+ int64_t physical_memory;
+ int mib[2];
+ size_t length;
+
+ mib[0] = CTL_HW;
+# if defined(HW_MEMSIZE)
+ mib[1] = HW_MEMSIZE;
+# else
+ mib[1] = HW_PHYSMEM;
+# endif
+ length = sizeof(int64_t);
+ if (!sysctl(mib, 2, &physical_memory, &length, NULL, 0))
+ return physical_memory;
+#elif defined(GIT_WINDOWS_NATIVE)
+ MEMORYSTATUSEX memInfo;
+
+ memInfo.dwLength = sizeof(MEMORYSTATUSEX);
+ if (GlobalMemoryStatusEx(&memInfo))
+ return memInfo.ullTotalPhys;
+#endif
+ return 0;
+}
+
+static uint64_t estimate_repack_memory(struct packed_git *pack)
+{
+ unsigned long nr_objects = approximate_object_count();
+ size_t os_cache, heap;
+
+ if (!pack || !nr_objects)
+ return 0;
+
+ /*
+ * First we have to scan through at least one pack.
+ * Assume enough room in OS file cache to keep the entire pack
+ * or we may accidentally evict data of other processes from
+ * the cache.
+ */
+ os_cache = pack->pack_size + pack->index_size;
+ /* then pack-objects needs lots more for book keeping */
+ heap = sizeof(struct object_entry) * nr_objects;
+ /*
+ * internal rev-list --all --objects takes up some memory too,
+ * let's say half of it is for blobs
+ */
+ heap += sizeof(struct blob) * nr_objects / 2;
+ /*
+ * and the other half is for trees (commits and tags are
+ * usually insignificant)
+ */
+ heap += sizeof(struct tree) * nr_objects / 2;
+ /* and then obj_hash[], underestimated in fact */
+ heap += sizeof(struct object *) * nr_objects;
+ /* revindex is used also */
+ heap += sizeof(struct revindex_entry) * nr_objects;
+ /*
+ * read_sha1_file() (either at delta calculation phase, or
+ * writing phase) also fills up the delta base cache
+ */
+ heap += delta_base_cache_limit;
+ /* and of course pack-objects has its own delta cache */
+ heap += max_delta_cache_size;
+
+ return os_cache + heap;
+}
+
static int keep_one_pack(struct string_list_item *item, void *data)
{
argv_array_pushf(&repack, "--keep-pack=%s", basename(item->string));
@@ -260,6 +342,20 @@ static int need_to_gc(void)
string_list_clear(&keep_pack, 0);
find_base_packs(&keep_pack, 0);
}
+ } else {
+ struct packed_git *p = find_base_packs(&keep_pack, 0);
+ uint64_t mem_have, mem_want;
+
+ mem_have = total_ram();
+ mem_want = estimate_repack_memory(p);
+
+ /*
+ * Only allow 1/2 of memory for pack-objects, leave
+ * the rest for the OS and other processes in the
+ * system.
+ */
+ if (!mem_have || mem_want < mem_have / 2)
+ string_list_clear(&keep_pack, 0);
}
add_repack_all_option(&keep_pack);