From 7109c889f11b39a2c5a5122e3726be7ffce09faf Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 29 Oct 2007 11:53:55 -0700 Subject: sha1_file.c: avoid gcc signed overflow warnings With the recent gcc, we get: sha1_file.c: In check_packed_git_: sha1_file.c:527: warning: assuming signed overflow does not occur when assuming that (X + c) < X is always false sha1_file.c:527: warning: assuming signed overflow does not occur when assuming that (X + c) < X is always false for a piece of code that tries to make sure that off_t is large enough to hold more than 2^32 offset. The test tried to make sure these do not wrap-around: /* make sure we can deal with large pack offsets */ off_t x = 0x7fffffffUL, y = 0xffffffffUL; if (x > (x + 1) || y > (y + 1)) { but gcc assumes it can do whatever optimization it wants for a signed overflow (undefined behaviour) and warns about this construct. Follow Linus's suggestion to check sizeof(off_t) instead to work around the problem. Signed-off-by: Junio C Hamano --- sha1_file.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 9978a58da6..95b5a403d8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -521,13 +521,15 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) munmap(idx_map, idx_size); return error("wrong index v2 file size in %s", path); } - if (idx_size != min_size) { - /* make sure we can deal with large pack offsets */ - off_t x = 0x7fffffffUL, y = 0xffffffffUL; - if (x > (x + 1) || y > (y + 1)) { - munmap(idx_map, idx_size); - return error("pack too large for current definition of off_t in %s", path); - } + if (idx_size != min_size && + /* + * make sure we can deal with large pack offsets. + * 31-bit signed offset won't be enough, neither + * 32-bit unsigned one will be. + */ + (sizeof(off_t) <= 4)) { + munmap(idx_map, idx_size); + return error("pack too large for current definition of off_t in %s", path); } } -- cgit v1.2.3 From 85dadc389468211a2fc0006ce8ea524490d417f5 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 13 Nov 2007 21:05:00 +0100 Subject: Use is_absolute_path() in sha1_file.c. There are some places that test for an absolute path. Use the helper function to ease porting. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- sha1_file.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index f007874cbb..560c0e0d08 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -86,7 +86,7 @@ int safe_create_leading_directories(char *path) char *pos = path; struct stat st; - if (*pos == '/') + if (is_absolute_path(path)) pos++; while (pos) { @@ -253,7 +253,7 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative int entlen = pfxlen + 43; int base_len = -1; - if (*entry != '/' && relative_base) { + if (!is_absolute_path(entry) && relative_base) { /* Relative alt-odb */ if (base_len < 0) base_len = strlen(relative_base) + 1; @@ -262,7 +262,7 @@ static int link_alt_odb_entry(const char * entry, int len, const char * relative } ent = xmalloc(sizeof(*ent) + entlen); - if (*entry != '/' && relative_base) { + if (!is_absolute_path(entry) && relative_base) { memcpy(ent->base, relative_base, base_len - 1); ent->base[base_len - 1] = '/'; memcpy(ent->base + base_len, entry, len); @@ -333,7 +333,7 @@ static void link_alt_odb_entries(const char *alt, const char *ep, int sep, while (cp < ep && *cp != sep) cp++; if (last != cp) { - if ((*last != '/') && depth) { + if (!is_absolute_path(last) && depth) { error("%s: ignoring relative alternate object store %s", relative_base, last); } else { -- cgit v1.2.3 From 9e42d6a1c53dadd409fab11cc76e0eba9ec15365 Mon Sep 17 00:00:00 2001 From: Steffen Prohaska Date: Wed, 21 Nov 2007 21:27:19 +0100 Subject: sha1_file.c: Fix size_t related printf format warnings The old way of fixing warnings did not succeed on MinGW. MinGW does not support C99 printf format strings for size_t [1]. But gcc on MinGW issues warnings if C99 printf format is not used. Hence, the old stragegy to avoid warnings fails. [1] http://www.mingw.org/MinGWiki/index.php/C99 This commits passes arguments of type size_t through a tiny helper functions that casts to the type expected by the format string. Signed-off-by: Steffen Prohaska Signed-off-by: Junio C Hamano --- sha1_file.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 560c0e0d08..b0c24356ae 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -25,8 +25,10 @@ #ifdef NO_C99_FORMAT #define SZ_FMT "lu" +static unsigned long sz_fmt(size_t s) { return (unsigned long)s; } #else #define SZ_FMT "zu" +static size_t sz_fmt(size_t s) { return s; } #endif const unsigned char null_sha1[20]; @@ -423,9 +425,9 @@ void pack_report(void) "pack_report: getpagesize() = %10" SZ_FMT "\n" "pack_report: core.packedGitWindowSize = %10" SZ_FMT "\n" "pack_report: core.packedGitLimit = %10" SZ_FMT "\n", - (size_t) getpagesize(), - packed_git_window_size, - packed_git_limit); + sz_fmt(getpagesize()), + sz_fmt(packed_git_window_size), + sz_fmt(packed_git_limit)); fprintf(stderr, "pack_report: pack_used_ctr = %10u\n" "pack_report: pack_mmap_calls = %10u\n" @@ -435,7 +437,7 @@ void pack_report(void) pack_used_ctr, pack_mmap_calls, pack_open_windows, peak_pack_open_windows, - pack_mapped, peak_pack_mapped); + sz_fmt(pack_mapped), sz_fmt(peak_pack_mapped)); } static int check_packed_git_idx(const char *path, struct packed_git *p) -- cgit v1.2.3 From 790296fd8863d649054096191d33bacbbf5c2115 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 3 Jan 2008 15:18:07 +0100 Subject: Fix grammar nits in documentation and in code comments. Signed-off-by: Jim Meyering Signed-off-by: Junio C Hamano --- sha1_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index b0c24356ae..6583797ce5 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -148,7 +148,7 @@ static void fill_sha1_path(char *pathbuf, const unsigned char *sha1) /* * NOTE! This returns a statically allocated buffer, so you have to be - * careful about using it. Do a "xstrdup()" if you need to save the + * careful about using it. Do an "xstrdup()" if you need to save the * filename. * * Also note that this returns the location for creating. Reading -- cgit v1.2.3 From c9ced051c3afa6f3da7f59b0dcb92787b2b5c702 Mon Sep 17 00:00:00 2001 From: "Shawn O. Pearce" Date: Thu, 17 Jan 2008 22:57:00 -0500 Subject: Fix random fast-import errors when compiled with NO_MMAP fast-import was relying on the fact that on most systems mmap() and write() are synchronized by the filesystem's buffer cache. We were relying on the ability to mmap() 20 bytes beyond the current end of the file, then later fill in those bytes with a future write() call, then read them through the previously obtained mmap() address. This isn't always true with some implementations of NFS, but it is especially not true with our NO_MMAP=YesPlease build time option used on some platforms. If fast-import was built with NO_MMAP=YesPlease we used the malloc()+pread() emulation and the subsequent write() call does not update the trailing 20 bytes of a previously obtained "mmap()" (aka malloc'd) address. Under NO_MMAP that behavior causes unpack_entry() in sha1_file.c to be unable to read an object header (or data) that has been unlucky enough to be written to the packfile at a location such that it is in the trailing 20 bytes of a window previously opened on that same packfile. This bug has gone unnoticed for a very long time as it is highly data dependent. Not only does the object have to be placed at the right position, but it also needs to be positioned behind some other object that has been accessed due to a branch cache invalidation. In other words the stars had to align just right, and if you did run into this bug you probably should also have purchased a lottery ticket. Fortunately the workaround is a lot easier than the bug explanation. Before we allow unpack_entry() to read data from a pack window that has also (possibly) been modified through write() we force all existing windows on that packfile to be closed. By closing the windows we ensure that any new access via the emulated mmap() will reread the packfile, updating to the current file content. This comes at a slight performance degredation as we cannot reuse previously cached windows when we update the packfile. But it is a fairly minor difference as the window closes happen at only two points: - When the packfile is finalized and its .idx is generated: At this stage we are getting ready to update the refs and any data access into the packfile is going to be random, and is going after only the branch tips (to ensure they are valid). Our existing windows (if any) are not likely to be positioned at useful locations to access those final tip commits so we probably were closing them before anyway. - When the branch cache missed and we need to reload: At this point fast-import is getting change commands for the next commit and it needs to go re-read a tree object it previously had written out to the packfile. What windows we had (if any) are not likely to cover the tree in question so we probably were closing them before anyway. We do try to avoid unnecessarily closing windows in the second case by checking to see if the packfile size has increased since the last time we called unpack_entry() on that packfile. If the size has not changed then we have not written additional data, and any existing window is still vaild. This nicely handles the cases where fast-import is going through a branch cache reload and needs to read many trees at once. During such an event we are not likely to be updating the packfile so we do not cycle the windows between reads. With this change in place t9301-fast-export.sh (which was broken by c3b0dec509fe136c5417422f31898b5a4e2d5e02) finally works again. Signed-off-by: Shawn O. Pearce Signed-off-by: Junio C Hamano --- sha1_file.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 6583797ce5..66a4e00fa8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -611,6 +611,22 @@ void release_pack_memory(size_t need, int fd) ; /* nothing */ } +void close_pack_windows(struct packed_git *p) +{ + while (p->windows) { + struct pack_window *w = p->windows; + + if (w->inuse_cnt) + die("pack '%s' still has open windows to it", + p->pack_name); + munmap(w->base, w->len); + pack_mapped -= w->len; + pack_open_windows--; + p->windows = w->next; + free(w); + } +} + void unuse_pack(struct pack_window **w_cursor) { struct pack_window *w = *w_cursor; -- cgit v1.2.3 From 21e5ad50fc5e7277c74cfbb3cf6502468e840f86 Mon Sep 17 00:00:00 2001 From: Steffen Prohaska Date: Wed, 6 Feb 2008 12:25:58 +0100 Subject: safecrlf: Add mechanism to warn about irreversible crlf conversions CRLF conversion bears a slight chance of corrupting data. autocrlf=true will convert CRLF to LF during commit and LF to CRLF during checkout. A file that contains a mixture of LF and CRLF before the commit cannot be recreated by git. For text files this is the right thing to do: it corrects line endings such that we have only LF line endings in the repository. But for binary files that are accidentally classified as text the conversion can corrupt data. If you recognize such corruption early you can easily fix it by setting the conversion type explicitly in .gitattributes. Right after committing you still have the original file in your work tree and this file is not yet corrupted. You can explicitly tell git that this file is binary and git will handle the file appropriately. Unfortunately, the desired effect of cleaning up text files with mixed line endings and the undesired effect of corrupting binary files cannot be distinguished. In both cases CRLFs are removed in an irreversible way. For text files this is the right thing to do because CRLFs are line endings, while for binary files converting CRLFs corrupts data. This patch adds a mechanism that can either warn the user about an irreversible conversion or can even refuse to convert. The mechanism is controlled by the variable core.safecrlf, with the following values: - false: disable safecrlf mechanism - warn: warn about irreversible conversions - true: refuse irreversible conversions The default is to warn. Users are only affected by this default if core.autocrlf is set. But the current default of git is to leave core.autocrlf unset, so users will not see warnings unless they deliberately chose to activate the autocrlf mechanism. The safecrlf mechanism's details depend on the git command. The general principles when safecrlf is active (not false) are: - we warn/error out if files in the work tree can modified in an irreversible way without giving the user a chance to backup the original file. - for read-only operations that do not modify files in the work tree we do not not print annoying warnings. There are exceptions. Even though... - "git add" itself does not touch the files in the work tree, the next checkout would, so the safety triggers; - "git apply" to update a text file with a patch does touch the files in the work tree, but the operation is about text files and CRLF conversion is about fixing the line ending inconsistencies, so the safety does not trigger; - "git diff" itself does not touch the files in the work tree, it is often run to inspect the changes you intend to next "git add". To catch potential problems early, safety triggers. The concept of a safety check was originally proposed in a similar way by Linus Torvalds. Thanks to Dimitry Potapov for insisting on getting the naked LF/autocrlf=true case right. Signed-off-by: Steffen Prohaska --- sha1_file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 66a4e00fa8..41799492f9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2358,7 +2358,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, if ((type == OBJ_BLOB) && S_ISREG(st->st_mode)) { struct strbuf nbuf; strbuf_init(&nbuf, 0); - if (convert_to_git(path, buf, size, &nbuf)) { + if (convert_to_git(path, buf, size, &nbuf, + write_object ? safe_crlf : 0)) { munmap(buf, size); buf = strbuf_detach(&nbuf, &size); re_allocated = 1; -- cgit v1.2.3 From 346245a1bb6272dd370ba2f7b9bf86d3df5fed9a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 13 Feb 2008 06:25:04 -0500 Subject: hard-code the empty tree object Now any commands may reference the empty tree object by its sha1 (4b825dc642cb6eb9a060e54bf8d69288fbee4904). This is useful for showing some diffs, especially for initial commits. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 66a4e00fa8..f7b75b2acc 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1845,6 +1845,15 @@ static struct cached_object { } *cached_objects; static int cached_object_nr, cached_object_alloc; +static struct cached_object empty_tree = { + /* empty tree sha1: 4b825dc642cb6eb9a060e54bf8d69288fbee4904 */ + "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" + "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04", + OBJ_TREE, + "", + 0 +}; + static struct cached_object *find_cached_object(const unsigned char *sha1) { int i; @@ -1854,6 +1863,8 @@ static struct cached_object *find_cached_object(const unsigned char *sha1) if (!hashcmp(co->sha1, sha1)) return co; } + if (!hashcmp(sha1, empty_tree.sha1)) + return &empty_tree; return NULL; } -- cgit v1.2.3 From 50974ec99408b2d814360863e72a5eca613889c8 Mon Sep 17 00:00:00 2001 From: Martin Koegler Date: Mon, 18 Feb 2008 21:47:52 +0100 Subject: read_object_with_reference: don't read beyond the buffer Signed-off-by: Martin Koegler Signed-off-by: Junio C Hamano --- sha1_file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'sha1_file.c') diff --git a/sha1_file.c b/sha1_file.c index 66a4e00fa8..0ca7f0dbc6 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1943,7 +1943,8 @@ void *read_object_with_reference(const unsigned char *sha1, } ref_length = strlen(ref_type); - if (memcmp(buffer, ref_type, ref_length) || + if (ref_length + 40 > isize || + memcmp(buffer, ref_type, ref_length) || get_sha1_hex((char *) buffer + ref_length, actual_sha1)) { free(buffer); return NULL; -- cgit v1.2.3