From dc944b65f1d13e258edc88a7608e2fe957ec334e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:54:43 -0400 Subject: get_sha1_with_context: dynamically allocate oc->path When a sha1 lookup returns the tree path via "struct object_context", it just copies it into a fixed-size buffer. This means the result can be truncated, and it means our "struct object_context" consumes a lot of stack space. Instead, let's allocate a string on the heap. Because most callers don't care about this information, we'll avoid doing it by default (so they don't all have to start calling free() on the result). There are basically two options for the caller to signal to us that it's interested: 1. By setting a pointer to storage in the object_context. 2. By passing a flag in another parameter. Doing (1) would match the way that sha1_object_info_extended() works. But it would mean that every caller would have to initialize the object_context, which they don't currently have to do. This patch does (2), and adds a new bit to the function's flags field. All of the callers that look at the "path" field are updated to pass the new flag. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 4 +++- builtin/grep.c | 4 +++- builtin/log.c | 10 +++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'builtin') diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 1890d7a639..421709517c 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -61,7 +61,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, if (unknown_type) flags |= LOOKUP_UNKNOWN_OBJECT; - if (get_sha1_with_context(obj_name, 0, oid.hash, &obj_context)) + if (get_sha1_with_context(obj_name, GET_SHA1_RECORD_PATH, + oid.hash, &obj_context)) die("Not a valid object name %s", obj_name); if (!path) @@ -165,6 +166,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name, die("git cat-file %s: bad file", obj_name); write_or_die(1, buf, size); + free(obj_context.path); return 0; } diff --git a/builtin/grep.c b/builtin/grep.c index 3ffb5b4e81..254c1c7849 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -1190,7 +1190,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) break; } - if (get_sha1_with_context(arg, 0, oid.hash, &oc)) { + if (get_sha1_with_context(arg, GET_SHA1_RECORD_PATH, + oid.hash, &oc)) { if (seen_dashdash) die(_("unable to resolve revision: %s"), arg); break; @@ -1200,6 +1201,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!seen_dashdash) verify_non_filename(prefix, arg); add_object_array_with_path(object, arg, &list, oc.mode, oc.path); + free(oc.path); } /* diff --git a/builtin/log.c b/builtin/log.c index b3b10cc1ed..1db016bcd9 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -483,16 +483,20 @@ static int show_blob_object(const struct object_id *oid, struct rev_info *rev, c !DIFF_OPT_TST(&rev->diffopt, ALLOW_TEXTCONV)) return stream_blob_to_fd(1, oid, NULL, 0); - if (get_sha1_with_context(obj_name, 0, oidc.hash, &obj_context)) + if (get_sha1_with_context(obj_name, GET_SHA1_RECORD_PATH, + oidc.hash, &obj_context)) die(_("Not a valid object name %s"), obj_name); - if (!obj_context.path[0] || - !textconv_object(obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) + if (!obj_context.path || + !textconv_object(obj_context.path, obj_context.mode, &oidc, 1, &buf, &size)) { + free(obj_context.path); return stream_blob_to_fd(1, oid, NULL, 0); + } if (!buf) die(_("git show %s: bad file"), obj_name); write_or_die(1, buf, size); + free(obj_context.path); return 0; } -- cgit v1.2.3 From 42f5ba5bb6648c16a3c90a0110fbdb430e590a1b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:57:30 -0400 Subject: diff: pass whole pending entry in blobinfo When diffing blobs directly, git-diff picks the blobs out of the rev_info's pending array and copies the relevant bits to a custom "struct blobinfo". But the pending array entry already has all of this information (and more, which we'll use in future patches). Let's just pass the original entry instead. In practice, these two blobs are probably adjacent in the revs->pending array, and we could just pass the whole array. But the current code is careful to pick each blob out separately and put it into another array, so we'll continue to do so and make our own array-of-pointers. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/diff.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'builtin') diff --git a/builtin/diff.c b/builtin/diff.c index d184aafab9..8b276ae575 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -20,12 +20,6 @@ #define DIFF_NO_INDEX_EXPLICIT 1 #define DIFF_NO_INDEX_IMPLICIT 2 -struct blobinfo { - struct object_id oid; - const char *name; - unsigned mode; -}; - static const char builtin_diff_usage[] = "git diff [] [ []] [--] [...]"; @@ -65,7 +59,7 @@ static void stuff_change(struct diff_options *opt, static int builtin_diff_b_f(struct rev_info *revs, int argc, const char **argv, - struct blobinfo *blob) + struct object_array_entry **blob) { /* Blob vs file in the working tree*/ struct stat st; @@ -84,12 +78,12 @@ static int builtin_diff_b_f(struct rev_info *revs, diff_set_mnemonic_prefix(&revs->diffopt, "o/", "w/"); - if (blob[0].mode == S_IFINVALID) - blob[0].mode = canon_mode(st.st_mode); + if (blob[0]->mode == S_IFINVALID) + blob[0]->mode = canon_mode(st.st_mode); stuff_change(&revs->diffopt, - blob[0].mode, canon_mode(st.st_mode), - &blob[0].oid, &null_oid, + blob[0]->mode, canon_mode(st.st_mode), + &blob[0]->item->oid, &null_oid, 1, 0, path, path); diffcore_std(&revs->diffopt); @@ -99,24 +93,24 @@ static int builtin_diff_b_f(struct rev_info *revs, static int builtin_diff_blobs(struct rev_info *revs, int argc, const char **argv, - struct blobinfo *blob) + struct object_array_entry **blob) { unsigned mode = canon_mode(S_IFREG | 0644); if (argc > 1) usage(builtin_diff_usage); - if (blob[0].mode == S_IFINVALID) - blob[0].mode = mode; + if (blob[0]->mode == S_IFINVALID) + blob[0]->mode = mode; - if (blob[1].mode == S_IFINVALID) - blob[1].mode = mode; + if (blob[1]->mode == S_IFINVALID) + blob[1]->mode = mode; stuff_change(&revs->diffopt, - blob[0].mode, blob[1].mode, - &blob[0].oid, &blob[1].oid, + blob[0]->mode, blob[1]->mode, + &blob[0]->item->oid, &blob[1]->item->oid, 1, 1, - blob[0].name, blob[1].name); + blob[0]->name, blob[1]->name); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; @@ -259,7 +253,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) struct rev_info rev; struct object_array ent = OBJECT_ARRAY_INIT; int blobs = 0, paths = 0; - struct blobinfo blob[2]; + struct object_array_entry *blob[2]; int nongit = 0, no_index = 0; int result = 0; @@ -408,9 +402,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) } else if (obj->type == OBJ_BLOB) { if (2 <= blobs) die(_("more than two blobs given: '%s'"), name); - hashcpy(blob[blobs].oid.hash, obj->oid.hash); - blob[blobs].name = name; - blob[blobs].mode = entry->mode; + blob[blobs] = entry; blobs++; } else { -- cgit v1.2.3 From d04ec74b17138733463c0ca1024fdbb42be6096a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:58:05 -0400 Subject: diff: use the word "path" instead of "name" for blobs The stuff_change() function makes diff_filespecs out of blobs. The term we generally use for filespecs is "path", not "name", so let's be consistent here. That will make things less confusing when the next patch starts caring about the path/name distinction inside the pending object array. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/diff.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/diff.c b/builtin/diff.c index 8b276ae575..4c0811d6fc 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -29,8 +29,8 @@ static void stuff_change(struct diff_options *opt, const struct object_id *new_oid, int old_oid_valid, int new_oid_valid, - const char *old_name, - const char *new_name) + const char *old_path, + const char *new_path) { struct diff_filespec *one, *two; @@ -41,16 +41,16 @@ static void stuff_change(struct diff_options *opt, if (DIFF_OPT_TST(opt, REVERSE_DIFF)) { SWAP(old_mode, new_mode); SWAP(old_oid, new_oid); - SWAP(old_name, new_name); + SWAP(old_path, new_path); } if (opt->prefix && - (strncmp(old_name, opt->prefix, opt->prefix_length) || - strncmp(new_name, opt->prefix, opt->prefix_length))) + (strncmp(old_path, opt->prefix, opt->prefix_length) || + strncmp(new_path, opt->prefix, opt->prefix_length))) return; - one = alloc_filespec(old_name); - two = alloc_filespec(new_name); + one = alloc_filespec(old_path); + two = alloc_filespec(new_path); fill_filespec(one, old_oid->hash, old_oid_valid, old_mode); fill_filespec(two, new_oid->hash, new_oid_valid, new_mode); -- cgit v1.2.3 From 158b06caee5f9ea2987f444b8e49bd2d678b187d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:59:15 -0400 Subject: diff: use pending "path" if it is available There's a subtle distinction between "name" and "path" for a blob that we resolve: the name is what the user told us on the command line, and the path is what we traversed when finding the blob within a tree (if we did so). When we diff blobs directly, we use "name", but "path" is more likely to be useful to the user (it will find the correct .gitattributes, and give them a saner diff header). We still have to fall back to using the name for some cases (i.e., any blob reference that isn't of the form tree:path). That's the best we can do in such a case. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/diff.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/diff.c b/builtin/diff.c index 4c0811d6fc..1a1149eed4 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -23,6 +23,11 @@ static const char builtin_diff_usage[] = "git diff [] [ []] [--] [...]"; +static const char *blob_path(struct object_array_entry *entry) +{ + return entry->path ? entry->path : entry->name; +} + static void stuff_change(struct diff_options *opt, unsigned old_mode, unsigned new_mode, const struct object_id *old_oid, @@ -110,7 +115,7 @@ static int builtin_diff_blobs(struct rev_info *revs, blob[0]->mode, blob[1]->mode, &blob[0]->item->oid, &blob[1]->item->oid, 1, 1, - blob[0]->name, blob[1]->name); + blob_path(blob[0]), blob_path(blob[1])); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; -- cgit v1.2.3 From 30d005c02014680403b5d35ef274047ab91fa5bd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 19 May 2017 08:59:34 -0400 Subject: diff: use blob path for blob/file diffs When we diff a blob against a working tree file like: git diff HEAD:Makefile Makefile we always use the working tree filename for both sides of the diff. In most cases that's fine, as the two would be the same anyway, as above. And until recently, we used the "name" for the blob, not the path, which would have the messy "HEAD:" on the beginning. But when they don't match, like: git diff HEAD:old_path new_path it makes sense to show both names. This patch uses the blob's path field if it's available, and otherwise falls back to using the filename (in preference to the blob's name, which is likely to be garbage like a raw sha1). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/diff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/diff.c b/builtin/diff.c index 1a1149eed4..5e7c6428c9 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -90,7 +90,8 @@ static int builtin_diff_b_f(struct rev_info *revs, blob[0]->mode, canon_mode(st.st_mode), &blob[0]->item->oid, &null_oid, 1, 0, - path, path); + blob[0]->path ? blob[0]->path : path, + path); diffcore_std(&revs->diffopt); diff_flush(&revs->diffopt); return 0; -- cgit v1.2.3