summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2014-02-27 14:01:48 -0800
committerLibravatar Junio C Hamano <gitster@pobox.com>2014-02-27 14:01:48 -0800
commit0f9e62e0847c075678a7a5a748567d1e881d16f8 (patch)
tree8ef8989069ae40eef891b0964fc2cb8036a74e48
parentMerge branch 'dk/blame-janitorial' (diff)
parentewah: unconditionally ntohll ewah data (diff)
downloadtgif-0f9e62e0847c075678a7a5a748567d1e881d16f8.tar.xz
Merge branch 'jk/pack-bitmap'
Borrow the bitmap index into packfiles from JGit to speed up enumeration of objects involved in a commit range without having to fully traverse the history. * jk/pack-bitmap: (26 commits) ewah: unconditionally ntohll ewah data ewah: support platforms that require aligned reads read-cache: use get_be32 instead of hand-rolled ntoh_l block-sha1: factor out get_be and put_be wrappers do not discard revindex when re-preparing packfiles pack-bitmap: implement optional name_hash cache t/perf: add tests for pack bitmaps t: add basic bitmap functionality tests count-objects: recognize .bitmap in garbage-checking repack: consider bitmaps when performing repacks repack: handle optional files created by pack-objects repack: turn exts array into array-of-struct repack: stop using magic number for ARRAY_SIZE(exts) pack-objects: implement bitmap writing rev-list: add bitmap mode to speed up object lists pack-objects: use bitmaps when packing objects pack-objects: split add_object_entry pack-bitmap: add support for bitmap indexes documentation: add documentation for the bitmap format ewah: compressed bitmap implementation ...
-rw-r--r--Documentation/config.txt25
-rw-r--r--Documentation/git-repack.txt9
-rw-r--r--Documentation/git-rev-list.txt1
-rw-r--r--Documentation/rev-list-options.txt8
-rw-r--r--Documentation/technical/bitmap-format.txt164
-rw-r--r--Makefile16
-rw-r--r--block-sha1/sha1.c32
-rw-r--r--builtin/pack-objects.c451
-rw-r--r--builtin/repack.c41
-rw-r--r--builtin/rev-list.c39
-rw-r--r--cache.h1
-rw-r--r--compat/bswap.h112
-rw-r--r--ewah/bitmap.c221
-rw-r--r--ewah/ewah_bitmap.c714
-rw-r--r--ewah/ewah_io.c204
-rw-r--r--ewah/ewah_rlw.c115
-rw-r--r--ewah/ewok.h233
-rw-r--r--ewah/ewok_rlw.h114
-rw-r--r--khash.h338
-rw-r--r--pack-bitmap-write.c552
-rw-r--r--pack-bitmap.c1073
-rw-r--r--pack-bitmap.h64
-rw-r--r--pack-objects.c111
-rw-r--r--pack-objects.h68
-rw-r--r--pack-revindex.c43
-rw-r--r--pack-revindex.h9
-rw-r--r--pack-write.c2
-rw-r--r--read-cache.c44
-rw-r--r--revision.c4
-rw-r--r--revision.h2
-rw-r--r--sha1_file.c6
-rwxr-xr-xt/perf/p5310-pack-bitmaps.sh57
-rwxr-xr-xt/t5310-pack-bitmaps.sh139
33 files changed, 4736 insertions, 276 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 7eec746950..040197b10e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1870,6 +1870,31 @@ pack.packSizeLimit::
Common unit suffixes of 'k', 'm', or 'g' are
supported.
+pack.useBitmaps::
+ When true, git will use pack bitmaps (if available) when packing
+ to stdout (e.g., during the server side of a fetch). Defaults to
+ true. You should not generally need to turn this off unless
+ you are debugging pack bitmaps.
+
+pack.writebitmaps::
+ When true, git will write a bitmap index when packing all
+ objects to disk (e.g., when `git repack -a` is run). This
+ index can speed up the "counting objects" phase of subsequent
+ packs created for clones and fetches, at the cost of some disk
+ space and extra time spent on the initial repack. Defaults to
+ false.
+
+pack.writeBitmapHashCache::
+ When true, git will include a "hash cache" section in the bitmap
+ index (if one is written). This cache can be used to feed git's
+ delta heuristics, potentially leading to better deltas between
+ bitmapped and non-bitmapped objects (e.g., when serving a fetch
+ between an older, bitmapped pack and objects that have been
+ pushed since the last gc). The downside is that it consumes 4
+ bytes per object of disk space, and that JGit's bitmap
+ implementation does not understand it, causing it to complain if
+ Git and JGit are used on the same repository. Defaults to false.
+
pager.<cmd>::
If the value is boolean, turns on or off pagination of the
output of a particular Git subcommand when writing to a tty.
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index 509cf73e50..002cfd5eb9 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -9,7 +9,7 @@ git-repack - Pack unpacked objects in a repository
SYNOPSIS
--------
[verse]
-'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [--window=<n>] [--depth=<n>]
+'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [--window=<n>] [--depth=<n>]
DESCRIPTION
-----------
@@ -110,6 +110,13 @@ other objects in that pack they already have locally.
The default is unlimited, unless the config variable
`pack.packSizeLimit` is set.
+-b::
+--write-bitmap-index::
+ Write a reachability bitmap index as part of the repack. This
+ only makes sense when used with `-a` or `-A`, as the bitmaps
+ must be able to refer to all reachable objects. This option
+ overrides the setting of `pack.writebitmaps`.
+
Configuration
-------------
diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt
index 045b37b82e..7a1585def0 100644
--- a/Documentation/git-rev-list.txt
+++ b/Documentation/git-rev-list.txt
@@ -55,6 +55,7 @@ SYNOPSIS
[ \--reverse ]
[ \--walk-reflogs ]
[ \--no-walk ] [ \--do-walk ]
+ [ \--use-bitmap-index ]
<commit>... [ \-- <paths>... ]
DESCRIPTION
diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt
index 03533af715..9a3da3646e 100644
--- a/Documentation/rev-list-options.txt
+++ b/Documentation/rev-list-options.txt
@@ -257,6 +257,14 @@ See also linkgit:git-reflog[1].
Output excluded boundary commits. Boundary commits are
prefixed with `-`.
+ifdef::git-rev-list[]
+--use-bitmap-index::
+
+ Try to speed up the traversal using the pack bitmap index (if
+ one is available). Note that when traversing with `--objects`,
+ trees and blobs will not have their associated path printed.
+endif::git-rev-list[]
+
--
History Simplification
diff --git a/Documentation/technical/bitmap-format.txt b/Documentation/technical/bitmap-format.txt
new file mode 100644
index 0000000000..f8c18a0f7a
--- /dev/null
+++ b/Documentation/technical/bitmap-format.txt
@@ -0,0 +1,164 @@
+GIT bitmap v1 format
+====================
+
+ - A header appears at the beginning:
+
+ 4-byte signature: {'B', 'I', 'T', 'M'}
+
+ 2-byte version number (network byte order)
+ The current implementation only supports version 1
+ of the bitmap index (the same one as JGit).
+
+ 2-byte flags (network byte order)
+
+ The following flags are supported:
+
+ - BITMAP_OPT_FULL_DAG (0x1) REQUIRED
+ This flag must always be present. It implies that the bitmap
+ index has been generated for a packfile with full closure
+ (i.e. where every single object in the packfile can find
+ its parent links inside the same packfile). This is a
+ requirement for the bitmap index format, also present in JGit,
+ that greatly reduces the complexity of the implementation.
+
+ - BITMAP_OPT_HASH_CACHE (0x4)
+ If present, the end of the bitmap file contains
+ `N` 32-bit name-hash values, one per object in the
+ pack. The format and meaning of the name-hash is
+ described below.
+
+ 4-byte entry count (network byte order)
+
+ The total count of entries (bitmapped commits) in this bitmap index.
+
+ 20-byte checksum
+
+ The SHA1 checksum of the pack this bitmap index belongs to.
+
+ - 4 EWAH bitmaps that act as type indexes
+
+ Type indexes are serialized after the hash cache in the shape
+ of four EWAH bitmaps stored consecutively (see Appendix A for
+ the serialization format of an EWAH bitmap).
+
+ There is a bitmap for each Git object type, stored in the following
+ order:
+
+ - Commits
+ - Trees
+ - Blobs
+ - Tags
+
+ In each bitmap, the `n`th bit is set to true if the `n`th object
+ in the packfile is of that type.
+
+ The obvious consequence is that the OR of all 4 bitmaps will result
+ in a full set (all bits set), and the AND of all 4 bitmaps will
+ result in an empty bitmap (no bits set).
+
+ - N entries with compressed bitmaps, one for each indexed commit
+
+ Where `N` is the total amount of entries in this bitmap index.
+ Each entry contains the following:
+
+ - 4-byte object position (network byte order)
+ The position **in the index for the packfile** where the
+ bitmap for this commit is found.
+
+ - 1-byte XOR-offset
+ The xor offset used to compress this bitmap. For an entry
+ in position `x`, a XOR offset of `y` means that the actual
+ bitmap representing this commit is composed by XORing the
+ bitmap for this entry with the bitmap in entry `x-y` (i.e.
+ the bitmap `y` entries before this one).
+
+ Note that this compression can be recursive. In order to
+ XOR this entry with a previous one, the previous entry needs
+ to be decompressed first, and so on.
+
+ The hard-limit for this offset is 160 (an entry can only be
+ xor'ed against one of the 160 entries preceding it). This
+ number is always positive, and hence entries are always xor'ed
+ with **previous** bitmaps, not bitmaps that will come afterwards
+ in the index.
+
+ - 1-byte flags for this bitmap
+ At the moment the only available flag is `0x1`, which hints
+ that this bitmap can be re-used when rebuilding bitmap indexes
+ for the repository.
+
+ - The compressed bitmap itself, see Appendix A.
+
+== Appendix A: Serialization format for an EWAH bitmap
+
+Ewah bitmaps are serialized in the same protocol as the JAVAEWAH
+library, making them backwards compatible with the JGit
+implementation:
+
+ - 4-byte number of bits of the resulting UNCOMPRESSED bitmap
+
+ - 4-byte number of words of the COMPRESSED bitmap, when stored
+
+ - N x 8-byte words, as specified by the previous field
+
+ This is the actual content of the compressed bitmap.
+
+ - 4-byte position of the current RLW for the compressed
+ bitmap
+
+All words are stored in network byte order for their corresponding
+sizes.
+
+The compressed bitmap is stored in a form of run-length encoding, as
+follows. It consists of a concatenation of an arbitrary number of
+chunks. Each chunk consists of one or more 64-bit words
+
+ H L_1 L_2 L_3 .... L_M
+
+H is called RLW (run length word). It consists of (from lower to higher
+order bits):
+
+ - 1 bit: the repeated bit B
+
+ - 32 bits: repetition count K (unsigned)
+
+ - 31 bits: literal word count M (unsigned)
+
+The bitstream represented by the above chunk is then:
+
+ - K repetitions of B
+
+ - The bits stored in `L_1` through `L_M`. Within a word, bits at
+ lower order come earlier in the stream than those at higher
+ order.
+
+The next word after `L_M` (if any) must again be a RLW, for the next
+chunk. For efficient appending to the bitstream, the EWAH stores a
+pointer to the last RLW in the stream.
+
+
+== Appendix B: Optional Bitmap Sections
+
+These sections may or may not be present in the `.bitmap` file; their
+presence is indicated by the header flags section described above.
+
+Name-hash cache
+---------------
+
+If the BITMAP_OPT_HASH_CACHE flag is set, the end of the bitmap contains
+a cache of 32-bit values, one per object in the pack. The value at
+position `i` is the hash of the pathname at which the `i`th object
+(counting in index order) in the pack can be found. This can be fed
+into the delta heuristics to compare objects with similar pathnames.
+
+The hash algorithm used is:
+
+ hash = 0;
+ while ((c = *name++))
+ if (!isspace(c))
+ hash = (hash >> 2) + (c << 24);
+
+Note that this hashing scheme is tied to the BITMAP_OPT_HASH_CACHE flag.
+If implementations want to choose a different hashing scheme, they are
+free to do so, but MUST allocate a new header flag (because comparing
+hashes made under two different schemes would be pointless).
diff --git a/Makefile b/Makefile
index c3213bd54b..d4ce53a71b 100644
--- a/Makefile
+++ b/Makefile
@@ -664,6 +664,8 @@ LIB_H += diff.h
LIB_H += diffcore.h
LIB_H += dir.h
LIB_H += exec_cmd.h
+LIB_H += ewah/ewok.h
+LIB_H += ewah/ewok_rlw.h
LIB_H += fetch-pack.h
LIB_H += fmt-merge-msg.h
LIB_H += fsck.h
@@ -691,8 +693,10 @@ LIB_H += notes-merge.h
LIB_H += notes-utils.h
LIB_H += notes.h
LIB_H += object.h
+LIB_H += pack-objects.h
LIB_H += pack-revindex.h
LIB_H += pack.h
+LIB_H += pack-bitmap.h
LIB_H += parse-options.h
LIB_H += patch-ids.h
LIB_H += pathspec.h
@@ -796,6 +800,10 @@ LIB_OBJS += dir.o
LIB_OBJS += editor.o
LIB_OBJS += entry.o
LIB_OBJS += environment.o
+LIB_OBJS += ewah/bitmap.o
+LIB_OBJS += ewah/ewah_bitmap.o
+LIB_OBJS += ewah/ewah_io.o
+LIB_OBJS += ewah/ewah_rlw.o
LIB_OBJS += exec_cmd.o
LIB_OBJS += fetch-pack.o
LIB_OBJS += fsck.o
@@ -827,7 +835,10 @@ LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o
LIB_OBJS += notes-utils.o
LIB_OBJS += object.o
+LIB_OBJS += pack-bitmap.o
+LIB_OBJS += pack-bitmap-write.o
LIB_OBJS += pack-check.o
+LIB_OBJS += pack-objects.o
LIB_OBJS += pack-revindex.o
LIB_OBJS += pack-write.o
LIB_OBJS += pager.o
@@ -2480,8 +2491,9 @@ profile-clean:
$(RM) $(addsuffix *.gcno,$(addprefix $(PROFILE_DIR)/, $(object_dirs)))
clean: profile-clean coverage-clean
- $(RM) *.o *.res block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o xdiff/*.o vcs-svn/*.o \
- builtin/*.o $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB)
+ $(RM) *.o *.res block-sha1/*.o ppc/*.o compat/*.o compat/*/*.o
+ $(RM) xdiff/*.o vcs-svn/*.o ewah/*.o builtin/*.o
+ $(RM) $(LIB_FILE) $(XDIFF_LIB) $(VCSSVN_LIB)
$(RM) $(ALL_PROGRAMS) $(SCRIPT_LIB) $(BUILT_INS) git$X
$(RM) $(TEST_PROGRAMS) $(NO_INSTALL)
$(RM) -r bin-wrappers $(dep_dirs)
diff --git a/block-sha1/sha1.c b/block-sha1/sha1.c
index e1a1eb6097..22b125cf8c 100644
--- a/block-sha1/sha1.c
+++ b/block-sha1/sha1.c
@@ -62,38 +62,6 @@
#define setW(x, val) (W(x) = (val))
#endif
-/*
- * Performance might be improved if the CPU architecture is OK with
- * unaligned 32-bit loads and a fast ntohl() is available.
- * Otherwise fall back to byte loads and shifts which is portable,
- * and is faster on architectures with memory alignment issues.
- */
-
-#if defined(__i386__) || defined(__x86_64__) || \
- defined(_M_IX86) || defined(_M_X64) || \
- defined(__ppc__) || defined(__ppc64__) || \
- defined(__powerpc__) || defined(__powerpc64__) || \
- defined(__s390__) || defined(__s390x__)
-
-#define get_be32(p) ntohl(*(unsigned int *)(p))
-#define put_be32(p, v) do { *(unsigned int *)(p) = htonl(v); } while (0)
-
-#else
-
-#define get_be32(p) ( \
- (*((unsigned char *)(p) + 0) << 24) | \
- (*((unsigned char *)(p) + 1) << 16) | \
- (*((unsigned char *)(p) + 2) << 8) | \
- (*((unsigned char *)(p) + 3) << 0) )
-#define put_be32(p, v) do { \
- unsigned int __v = (v); \
- *((unsigned char *)(p) + 0) = __v >> 24; \
- *((unsigned char *)(p) + 1) = __v >> 16; \
- *((unsigned char *)(p) + 2) = __v >> 8; \
- *((unsigned char *)(p) + 3) = __v >> 0; } while (0)
-
-#endif
-
/* This "rolls" over the 512-bit array */
#define W(x) (array[(x)&15])
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 541667f102..c73337931b 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -14,10 +14,12 @@
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
+#include "pack-objects.h"
#include "progress.h"
#include "refs.h"
#include "streaming.h"
#include "thread-utils.h"
+#include "pack-bitmap.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"),
@@ -25,42 +27,15 @@ static const char *pack_usage[] = {
NULL
};
-struct object_entry {
- struct pack_idx_entry idx;
- unsigned long size; /* uncompressed size */
- struct packed_git *in_pack; /* already in pack */
- off_t in_pack_offset;
- struct object_entry *delta; /* delta base object */
- struct object_entry *delta_child; /* deltified objects who bases me */
- struct object_entry *delta_sibling; /* other deltified objects who
- * uses the same base as me
- */
- void *delta_data; /* cached delta (uncompressed) */
- unsigned long delta_size; /* delta data size (uncompressed) */
- unsigned long z_delta_size; /* delta data size (compressed) */
- enum object_type type;
- enum object_type in_pack_type; /* could be delta */
- uint32_t hash; /* name hint hash */
- unsigned char in_pack_header_size;
- unsigned preferred_base:1; /*
- * we do not pack this, but is available
- * to be used as the base object to delta
- * objects against.
- */
- unsigned no_try_delta:1;
- unsigned tagged:1; /* near the very tip of refs */
- unsigned filled:1; /* assigned write-order */
-};
-
/*
- * Objects we are going to pack are collected in objects array (dynamically
- * expanded). nr_objects & nr_alloc controls this array. They are stored
- * in the order we see -- typically rev-list --objects order that gives us
- * nice "minimum seek" order.
+ * Objects we are going to pack are collected in the `to_pack` structure.
+ * It contains an array (dynamically expanded) of the object data, and a map
+ * that can resolve SHA1s to their position in the array.
*/
-static struct object_entry *objects;
+static struct packing_data to_pack;
+
static struct pack_idx_entry **written_list;
-static uint32_t nr_objects, nr_alloc, nr_result, nr_written;
+static uint32_t nr_result, nr_written;
static int non_empty;
static int reuse_delta = 1, reuse_object = 1;
@@ -83,6 +58,14 @@ static struct progress *progress_state;
static int pack_compression_level = Z_DEFAULT_COMPRESSION;
static int pack_compression_seen;
+static struct packed_git *reuse_packfile;
+static uint32_t reuse_packfile_objects;
+static off_t reuse_packfile_offset;
+
+static int use_bitmap_index = 1;
+static int write_bitmap_index;
+static uint16_t write_bitmap_options;
+
static unsigned long delta_cache_size = 0;
static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
static unsigned long cache_max_small_delta_size = 1000;
@@ -90,20 +73,28 @@ static unsigned long cache_max_small_delta_size = 1000;
static unsigned long window_memory_limit = 0;
/*
- * The object names in objects array are hashed with this hashtable,
- * to help looking up the entry by object name.
- * This hashtable is built after all the objects are seen.
- */
-static int *object_ix;
-static int object_ix_hashsz;
-static struct object_entry *locate_object_entry(const unsigned char *sha1);
-
-/*
* stats
*/
static uint32_t written, written_delta;
static uint32_t reused, reused_delta;
+/*
+ * Indexed commits
+ */
+static struct commit **indexed_commits;
+static unsigned int indexed_commits_nr;
+static unsigned int indexed_commits_alloc;
+
+static void index_commit_for_bitmap(struct commit *commit)
+{
+ if (indexed_commits_nr >= indexed_commits_alloc) {
+ indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
+ indexed_commits = xrealloc(indexed_commits,
+ indexed_commits_alloc * sizeof(struct commit *));
+ }
+
+ indexed_commits[indexed_commits_nr++] = commit;
+}
static void *get_delta(struct object_entry *entry)
{
@@ -553,12 +544,12 @@ static int mark_tagged(const char *path, const unsigned char *sha1, int flag,
void *cb_data)
{
unsigned char peeled[20];
- struct object_entry *entry = locate_object_entry(sha1);
+ struct object_entry *entry = packlist_find(&to_pack, sha1, NULL);
if (entry)
entry->tagged = 1;
if (!peel_ref(path, peeled)) {
- entry = locate_object_entry(peeled);
+ entry = packlist_find(&to_pack, peeled, NULL);
if (entry)
entry->tagged = 1;
}
@@ -633,9 +624,10 @@ static struct object_entry **compute_write_order(void)
{
unsigned int i, wo_end, last_untagged;
- struct object_entry **wo = xmalloc(nr_objects * sizeof(*wo));
+ struct object_entry **wo = xmalloc(to_pack.nr_objects * sizeof(*wo));
+ struct object_entry *objects = to_pack.objects;
- for (i = 0; i < nr_objects; i++) {
+ for (i = 0; i < to_pack.nr_objects; i++) {
objects[i].tagged = 0;
objects[i].filled = 0;
objects[i].delta_child = NULL;
@@ -647,7 +639,7 @@ static struct object_entry **compute_write_order(void)
* Make sure delta_sibling is sorted in the original
* recency order.
*/
- for (i = nr_objects; i > 0;) {
+ for (i = to_pack.nr_objects; i > 0;) {
struct object_entry *e = &objects[--i];
if (!e->delta)
continue;
@@ -665,7 +657,7 @@ static struct object_entry **compute_write_order(void)
* Give the objects in the original recency order until
* we see a tagged tip.
*/
- for (i = wo_end = 0; i < nr_objects; i++) {
+ for (i = wo_end = 0; i < to_pack.nr_objects; i++) {
if (objects[i].tagged)
break;
add_to_write_order(wo, &wo_end, &objects[i]);
@@ -675,7 +667,7 @@ static struct object_entry **compute_write_order(void)
/*
* Then fill all the tagged tips.
*/
- for (; i < nr_objects; i++) {
+ for (; i < to_pack.nr_objects; i++) {
if (objects[i].tagged)
add_to_write_order(wo, &wo_end, &objects[i]);
}
@@ -683,7 +675,7 @@ static struct object_entry **compute_write_order(void)
/*
* And then all remaining commits and tags.
*/
- for (i = last_untagged; i < nr_objects; i++) {
+ for (i = last_untagged; i < to_pack.nr_objects; i++) {
if (objects[i].type != OBJ_COMMIT &&
objects[i].type != OBJ_TAG)
continue;
@@ -693,7 +685,7 @@ static struct object_entry **compute_write_order(void)
/*
* And then all the trees.
*/
- for (i = last_untagged; i < nr_objects; i++) {
+ for (i = last_untagged; i < to_pack.nr_objects; i++) {
if (objects[i].type != OBJ_TREE)
continue;
add_to_write_order(wo, &wo_end, &objects[i]);
@@ -702,17 +694,57 @@ static struct object_entry **compute_write_order(void)
/*
* Finally all the rest in really tight order
*/
- for (i = last_untagged; i < nr_objects; i++) {
+ for (i = last_untagged; i < to_pack.nr_objects; i++) {
if (!objects[i].filled)
add_family_to_write_order(wo, &wo_end, &objects[i]);
}
- if (wo_end != nr_objects)
- die("ordered %u objects, expected %"PRIu32, wo_end, nr_objects);
+ if (wo_end != to_pack.nr_objects)
+ die("ordered %u objects, expected %"PRIu32, wo_end, to_pack.nr_objects);
return wo;
}
+static off_t write_reused_pack(struct sha1file *f)
+{
+ unsigned char buffer[8192];
+ off_t to_write;
+ int fd;
+
+ if (!is_pack_valid(reuse_packfile))
+ die("packfile is invalid: %s", reuse_packfile->pack_name);
+
+ fd = git_open_noatime(reuse_packfile->pack_name);
+ if (fd < 0)
+ die_errno("unable to open packfile for reuse: %s",
+ reuse_packfile->pack_name);
+
+ if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
+ die_errno("unable to seek in reused packfile");
+
+ if (reuse_packfile_offset < 0)
+ reuse_packfile_offset = reuse_packfile->pack_size - 20;
+
+ to_write = reuse_packfile_offset - sizeof(struct pack_header);
+
+ while (to_write) {
+ int read_pack = xread(fd, buffer, sizeof(buffer));
+
+ if (read_pack <= 0)
+ die_errno("unable to read from reused packfile");
+
+ if (read_pack > to_write)
+ read_pack = to_write;
+
+ sha1write(f, buffer, read_pack);
+ to_write -= read_pack;
+ }
+
+ close(fd);
+ written += reuse_packfile_objects;
+ return reuse_packfile_offset - sizeof(struct pack_header);
+}
+
static void write_pack_file(void)
{
uint32_t i = 0, j;
@@ -724,7 +756,7 @@ static void write_pack_file(void)
if (progress > pack_to_stdout)
progress_state = start_progress("Writing objects", nr_result);
- written_list = xmalloc(nr_objects * sizeof(*written_list));
+ written_list = xmalloc(to_pack.nr_objects * sizeof(*written_list));
write_order = compute_write_order();
do {
@@ -737,8 +769,17 @@ static void write_pack_file(void)
f = create_tmp_packfile(&pack_tmp_name);
offset = write_pack_header(f, nr_remaining);
+
+ if (reuse_packfile) {
+ off_t packfile_size;
+ assert(pack_to_stdout);
+
+ packfile_size = write_reused_pack(f);
+ offset += packfile_size;
+ }
+
nr_written = 0;
- for (; i < nr_objects; i++) {
+ for (; i < to_pack.nr_objects; i++) {
struct object_entry *e = write_order[i];
if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
break;
@@ -789,9 +830,31 @@ static void write_pack_file(void)
if (sizeof(tmpname) <= strlen(base_name) + 50)
die("pack base name '%s' too long", base_name);
snprintf(tmpname, sizeof(tmpname), "%s-", base_name);
+
+ if (write_bitmap_index) {
+ bitmap_writer_set_checksum(sha1);
+ bitmap_writer_build_type_index(written_list, nr_written);
+ }
+
finish_tmp_packfile(tmpname, pack_tmp_name,
written_list, nr_written,
&pack_idx_opts, sha1);
+
+ if (write_bitmap_index) {
+ char *end_of_name_prefix = strrchr(tmpname, 0);
+ sprintf(end_of_name_prefix, "%s.bitmap", sha1_to_hex(sha1));
+
+ stop_progress(&progress_state);
+
+ bitmap_writer_show_progress(progress);
+ bitmap_writer_reuse_bitmaps(&to_pack);
+ bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
+ bitmap_writer_build(&to_pack);
+ bitmap_writer_finish(written_list, nr_written,
+ tmpname, write_bitmap_options);
+ write_bitmap_index = 0;
+ }
+
free(pack_tmp_name);
puts(sha1_to_hex(sha1));
}
@@ -801,7 +864,7 @@ static void write_pack_file(void)
written_list[j]->offset = (off_t)-1;
}
nr_remaining -= nr_written;
- } while (nr_remaining && i < nr_objects);
+ } while (nr_remaining && i < to_pack.nr_objects);
free(written_list);
free(write_order);
@@ -811,73 +874,6 @@ static void write_pack_file(void)
written, nr_result);
}
-static int locate_object_entry_hash(const unsigned char *sha1)
-{
- int i;
- unsigned int ui;
- memcpy(&ui, sha1, sizeof(unsigned int));
- i = ui % object_ix_hashsz;
- while (0 < object_ix[i]) {
- if (!hashcmp(sha1, objects[object_ix[i] - 1].idx.sha1))
- return i;
- if (++i == object_ix_hashsz)
- i = 0;
- }
- return -1 - i;
-}
-
-static struct object_entry *locate_object_entry(const unsigned char *sha1)
-{
- int i;
-
- if (!object_ix_hashsz)
- return NULL;
-
- i = locate_object_entry_hash(sha1);
- if (0 <= i)
- return &objects[object_ix[i]-1];
- return NULL;
-}
-
-static void rehash_objects(void)
-{
- uint32_t i;
- struct object_entry *oe;
-
- object_ix_hashsz = nr_objects * 3;
- if (object_ix_hashsz < 1024)
- object_ix_hashsz = 1024;
- object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
- memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
- for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
- int ix = locate_object_entry_hash(oe->idx.sha1);
- if (0 <= ix)
- continue;
- ix = -1 - ix;
- object_ix[ix] = i + 1;
- }
-}
-
-static uint32_t name_hash(const char *name)
-{
- uint32_t c, hash = 0;
-
- if (!name)
- return 0;
-
- /*
- * This effectively just creates a sortable number from the
- * last sixteen non-whitespace characters. Last characters
- * count "most", so things that end in ".c" sort together.
- */
- while ((c = *name++) != 0) {
- if (isspace(c))
- continue;
- hash = (hash >> 2) + (c << 24);
- }
- return hash;
-}
-
static void setup_delta_attr_check(struct git_attr_check *check)
{
static struct git_attr *attr_delta;
@@ -900,42 +896,69 @@ static int no_try_delta(const char *path)
return 0;
}
-static int add_object_entry(const unsigned char *sha1, enum object_type type,
- const char *name, int exclude)
+/*
+ * When adding an object, check whether we have already added it
+ * to our packing list. If so, we can skip. However, if we are
+ * being asked to excludei t, but the previous mention was to include
+ * it, make sure to adjust its flags and tweak our numbers accordingly.
+ *
+ * As an optimization, we pass out the index position where we would have
+ * found the item, since that saves us from having to look it up again a
+ * few lines later when we want to add the new entry.
+ */
+static int have_duplicate_entry(const unsigned char *sha1,
+ int exclude,
+ uint32_t *index_pos)
{
struct object_entry *entry;
- struct packed_git *p, *found_pack = NULL;
- off_t found_offset = 0;
- int ix;
- uint32_t hash = name_hash(name);
-
- ix = nr_objects ? locate_object_entry_hash(sha1) : -1;
- if (ix >= 0) {
- if (exclude) {
- e