From 021ba32a7bca954235e31338c4f27b221a1807de Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 10 Apr 2020 15:43:41 -0400 Subject: remote: drop auto-strlen behavior of make_branch() and make_rewrite() The make_branch() and make_rewrite() functions can take a NUL-terminated string or a ptr/len pair. They use a sentinel value of "0" for the len to tell the difference between the two. However, when parsing config like: [branch ""] merge = whatever whose key flattens to: branch..merge we might actually have a zero-length branch name. This is obviously nonsense, but the current code would consider it as a NUL-terminated string and use the branch name ".merge". We could use a better sentinel value here (like "-1"), but that gets in the way of moving to size_t, which is a more appropriate type for a ptr/len combo. Let's instead just drop this feature and have the callers (of which there are only two total) use strlen() themselves. This simplifies the code, and lets us move to using size_t. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/remote.c b/remote.c index c43196ec06..35bc3f9f2d 100644 --- a/remote.c +++ b/remote.c @@ -174,54 +174,43 @@ static void add_merge(struct branch *branch, const char *name) branch->merge_name[branch->merge_nr++] = name; } -static struct branch *make_branch(const char *name, int len) +static struct branch *make_branch(const char *name, size_t len) { struct branch *ret; int i; for (i = 0; i < branches_nr; i++) { - if (len ? (!strncmp(name, branches[i]->name, len) && - !branches[i]->name[len]) : - !strcmp(name, branches[i]->name)) + if (!strncmp(name, branches[i]->name, len) && + !branches[i]->name[len]) return branches[i]; } ALLOC_GROW(branches, branches_nr + 1, branches_alloc); ret = xcalloc(1, sizeof(struct branch)); branches[branches_nr++] = ret; - if (len) - ret->name = xstrndup(name, len); - else - ret->name = xstrdup(name); + ret->name = xstrndup(name, len); ret->refname = xstrfmt("refs/heads/%s", ret->name); return ret; } -static struct rewrite *make_rewrite(struct rewrites *r, const char *base, int len) +static struct rewrite *make_rewrite(struct rewrites *r, + const char *base, size_t len) { struct rewrite *ret; int i; for (i = 0; i < r->rewrite_nr; i++) { - if (len - ? (len == r->rewrite[i]->baselen && - !strncmp(base, r->rewrite[i]->base, len)) - : !strcmp(base, r->rewrite[i]->base)) + if (len == r->rewrite[i]->baselen && + !strncmp(base, r->rewrite[i]->base, len)) return r->rewrite[i]; } ALLOC_GROW(r->rewrite, r->rewrite_nr + 1, r->rewrite_alloc); ret = xcalloc(1, sizeof(struct rewrite)); r->rewrite[r->rewrite_nr++] = ret; - if (len) { - ret->base = xstrndup(base, len); - ret->baselen = len; - } - else { - ret->base = xstrdup(base); - ret->baselen = strlen(base); - } + ret->base = xstrndup(base, len); + ret->baselen = len; return ret; } @@ -470,7 +459,7 @@ static void read_config(void) const char *head_ref = resolve_ref_unsafe("HEAD", 0, NULL, &flag); if (head_ref && (flag & REF_ISSYMREF) && skip_prefix(head_ref, "refs/heads/", &head_ref)) { - current_branch = make_branch(head_ref, 0); + current_branch = make_branch(head_ref, strlen(head_ref)); } } git_config(handle_config, NULL); @@ -1584,7 +1573,7 @@ struct branch *branch_get(const char *name) if (!name || !*name || !strcmp(name, "HEAD")) ret = current_branch; else - ret = make_branch(name, 0); + ret = make_branch(name, strlen(name)); set_merge(ret); return ret; } -- cgit v1.2.3 From f5914f4b6bcdb517733c761fe5ba9d94471eb01d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 10 Apr 2020 15:44:28 -0400 Subject: parse_config_key(): return subsection len as size_t We return the length to a subset of a string using an "int *" out-parameter. This is fine most of the time, as we'd expect config keys to be relatively short, but it could behave oddly if we had a gigantic config key. A more appropriate type is size_t. Let's switch over, which lets our callers use size_t as appropriate (they are bound by our type because they must pass the out-parameter as a pointer). This is mostly just a cleanup to make it clear this code handles long strings correctly. In practice, our config parser already chokes on long key names (because of a similar int/size_t mixup!). When doing an int/size_t conversion, we have to be careful that nobody was trying to assign a negative value to the variable. I manually confirmed that for each case here. They tend to just feed the result to xmemdupz() or similar; in a few cases I adjusted the parameter types for helper functions to make sure the size_t is preserved. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- archive-tar.c | 4 ++-- builtin/help.c | 2 +- builtin/reflog.c | 2 +- config.c | 4 ++-- config.h | 2 +- convert.c | 2 +- fsck.c | 2 +- ll-merge.c | 2 +- promisor-remote.c | 2 +- remote.c | 2 +- submodule-config.c | 3 ++- userdiff.c | 4 ++-- 12 files changed, 16 insertions(+), 15 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 5a77701a15..5ceec3684b 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -364,7 +364,7 @@ static struct archiver **tar_filters; static int nr_tar_filters; static int alloc_tar_filters; -static struct archiver *find_tar_filter(const char *name, int len) +static struct archiver *find_tar_filter(const char *name, size_t len) { int i; for (i = 0; i < nr_tar_filters; i++) { @@ -380,7 +380,7 @@ static int tar_filter_config(const char *var, const char *value, void *data) struct archiver *ar; const char *name; const char *type; - int namelen; + size_t namelen; if (parse_config_key(var, "tar", &name, &namelen, &type) < 0 || !name) return 0; diff --git a/builtin/help.c b/builtin/help.c index e5590d7787..c024110531 100644 --- a/builtin/help.c +++ b/builtin/help.c @@ -242,7 +242,7 @@ static int add_man_viewer_cmd(const char *name, static int add_man_viewer_info(const char *var, const char *value) { const char *name, *subkey; - int namelen; + size_t namelen; if (parse_config_key(var, "man", &name, &namelen, &subkey) < 0 || !name) return 0; diff --git a/builtin/reflog.c b/builtin/reflog.c index 81dfd563c0..52ecf6d43c 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -459,7 +459,7 @@ static struct reflog_expire_cfg *find_cfg_ent(const char *pattern, size_t len) static int reflog_expire_config(const char *var, const char *value, void *cb) { const char *pattern, *key; - int pattern_len; + size_t pattern_len; timestamp_t expire; int slot; struct reflog_expire_cfg *ent; diff --git a/config.c b/config.c index d17d2bd9dc..ff7998df46 100644 --- a/config.c +++ b/config.c @@ -309,7 +309,7 @@ int git_config_include(const char *var, const char *value, void *data) { struct config_include_data *inc = data; const char *cond, *key; - int cond_len; + size_t cond_len; int ret; /* @@ -3238,7 +3238,7 @@ int config_error_nonbool(const char *var) int parse_config_key(const char *var, const char *section, - const char **subsection, int *subsection_len, + const char **subsection, size_t *subsection_len, const char **key) { const char *dot; diff --git a/config.h b/config.h index 9b3773f778..d57df283b3 100644 --- a/config.h +++ b/config.h @@ -359,7 +359,7 @@ int git_config_include(const char *name, const char *value, void *data); */ int parse_config_key(const char *var, const char *section, - const char **subsection, int *subsection_len, + const char **subsection, size_t *subsection_len, const char **key); /** diff --git a/convert.c b/convert.c index 5aa87d45e3..572449825c 100644 --- a/convert.c +++ b/convert.c @@ -1018,7 +1018,7 @@ static int apply_filter(const char *path, const char *src, size_t len, static int read_convert_config(const char *var, const char *value, void *cb) { const char *key, *name; - int namelen; + size_t namelen; struct convert_driver *drv; /* diff --git a/fsck.c b/fsck.c index 640d813d84..41031e459b 100644 --- a/fsck.c +++ b/fsck.c @@ -920,7 +920,7 @@ static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) { struct fsck_gitmodules_data *data = vdata; const char *subsection, *key; - int subsection_len; + size_t subsection_len; char *name; if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || diff --git a/ll-merge.c b/ll-merge.c index d65a8971db..1ec0b959e0 100644 --- a/ll-merge.c +++ b/ll-merge.c @@ -247,7 +247,7 @@ static int read_merge_config(const char *var, const char *value, void *cb) { struct ll_merge_driver *fn; const char *key, *name; - int namelen; + size_t namelen; if (!strcmp(var, "merge.default")) return git_config_string(&default_ll_merge, var, value); diff --git a/promisor-remote.c b/promisor-remote.c index 9f338c945f..ff8721fd56 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -101,7 +101,7 @@ static void promisor_remote_move_to_tail(struct promisor_remote *r, static int promisor_remote_config(const char *var, const char *value, void *data) { const char *name; - int namelen; + size_t namelen; const char *subkey; if (!strcmp(var, "core.partialclonefilter")) diff --git a/remote.c b/remote.c index 35bc3f9f2d..534c6426f1 100644 --- a/remote.c +++ b/remote.c @@ -305,7 +305,7 @@ static void read_branches_file(struct remote *remote) static int handle_config(const char *key, const char *value, void *cb) { const char *name; - int namelen; + size_t namelen; const char *subkey; struct remote *remote; struct branch *branch; diff --git a/submodule-config.c b/submodule-config.c index 4d1c92d582..e175dfbc38 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -225,7 +225,8 @@ static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) { const char *subsection, *key; - int subsection_len, parse; + size_t subsection_len; + int parse; parse = parse_config_key(var, "submodule", &subsection, &subsection_len, &key); if (parse < 0 || !subsection) diff --git a/userdiff.c b/userdiff.c index efbe05e5a5..30ab42df8e 100644 --- a/userdiff.c +++ b/userdiff.c @@ -222,7 +222,7 @@ static struct userdiff_driver driver_false = { { NULL, 0 } }; -static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len) +static struct userdiff_driver *userdiff_find_by_namelen(const char *k, size_t len) { int i; for (i = 0; i < ndrivers; i++) { @@ -266,7 +266,7 @@ int userdiff_config(const char *k, const char *v) { struct userdiff_driver *drv; const char *name, *type; - int namelen; + size_t namelen; if (parse_config_key(k, "diff", &name, &namelen, &type) || !name) return 0; -- cgit v1.2.3 From 6c7e6963c1b6d9a5344345d55dd537e4ab45158f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 10 Apr 2020 15:44:45 -0400 Subject: config: drop useless length variable in write_pair() We compute the length of a subset of a string, but then use that length only to feed a "%.*s" printf placeholder for the same string. We can just use "%s" to achieve the same thing. The variable became useless in cb891a5989 (Use a strbuf for building up section header and key/value pair strings., 2007-12-14), which swapped out a write() which _did_ use the length for a strbuf_addf() call. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- config.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/config.c b/config.c index ff7998df46..7ea588a7e0 100644 --- a/config.c +++ b/config.c @@ -2545,7 +2545,6 @@ static ssize_t write_pair(int fd, const char *key, const char *value, { int i; ssize_t ret; - int length = strlen(key + store->baselen + 1); const char *quote = ""; struct strbuf sb = STRBUF_INIT; @@ -2564,8 +2563,7 @@ static ssize_t write_pair(int fd, const char *key, const char *value, if (i && value[i - 1] == ' ') quote = "\""; - strbuf_addf(&sb, "\t%.*s = %s", - length, key + store->baselen + 1, quote); + strbuf_addf(&sb, "\t%s = %s", key + store->baselen + 1, quote); for (i = 0; value[i]; i++) switch (value[i]) { -- cgit v1.2.3 From f011a9654dc2a2e3238985ba7767c1058e7cb3c2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 10 Apr 2020 15:46:07 -0400 Subject: git_config_parse_key(): return baselen as size_t As with the recent change to parse_config_key(), the best type to return a string length is a size_t, as it won't cause integer truncation for a gigantic key. And as with that change, this is mostly a clarity / hygiene issue for now, as our config parser would choke on such a large key anyway. There are a few ripple effects within the config code, as callers switch to using size_t. I also adjusted a few related variables that iterate over strings. The most unexpected change is that a call to strbuf_addf() had to switch to strbuf_add(). We can't use a size_t with "%.*s", because printf precisions must have type "int" (we could cast, of course, but that would miss the point of using size_t in the first place). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- config.c | 17 ++++++++++------- config.h | 2 +- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/config.c b/config.c index 7ea588a7e0..c48bb35dc0 100644 --- a/config.c +++ b/config.c @@ -358,12 +358,13 @@ static inline int iskeychar(int c) * * store_key - pointer to char* which will hold a copy of the key with * lowercase section and variable name - * baselen - pointer to int which will hold the length of the + * baselen - pointer to size_t which will hold the length of the * section + subsection part, can be NULL */ -static int git_config_parse_key_1(const char *key, char **store_key, int *baselen_, int quiet) +static int git_config_parse_key_1(const char *key, char **store_key, size_t *baselen_, int quiet) { - int i, dot, baselen; + size_t i, baselen; + int dot; const char *last_dot = strrchr(key, '.'); /* @@ -425,7 +426,7 @@ out_free_ret_1: return -CONFIG_INVALID_KEY; } -int git_config_parse_key(const char *key, char **store_key, int *baselen) +int git_config_parse_key(const char *key, char **store_key, size_t *baselen) { return git_config_parse_key_1(key, store_key, baselen, 0); } @@ -2383,7 +2384,7 @@ void git_die_config(const char *key, const char *err, ...) */ struct config_store_data { - int baselen; + size_t baselen; char *key; int do_not_match; regex_t *value_regex; @@ -2509,7 +2510,7 @@ static struct strbuf store_create_section(const char *key, const struct config_store_data *store) { const char *dot; - int i; + size_t i; struct strbuf sb = STRBUF_INIT; dot = memchr(key, '.', store->baselen); @@ -2522,7 +2523,9 @@ static struct strbuf store_create_section(const char *key, } strbuf_addstr(&sb, "\"]\n"); } else { - strbuf_addf(&sb, "[%.*s]\n", store->baselen, key); + strbuf_addch(&sb, '['); + strbuf_add(&sb, key, store->baselen); + strbuf_addstr(&sb, "]\n"); } return sb; diff --git a/config.h b/config.h index d57df283b3..060874488f 100644 --- a/config.h +++ b/config.h @@ -254,7 +254,7 @@ int git_config_set_gently(const char *, const char *); */ void git_config_set(const char *, const char *); -int git_config_parse_key(const char *, char **, int *); +int git_config_parse_key(const char *, char **, size_t *); int git_config_key_is_valid(const char *key); int git_config_set_multivar_gently(const char *, const char *, const char *, int); void git_config_set_multivar(const char *, const char *, const char *, int); -- cgit v1.2.3 From 6a9c235eb4ba32701450aff2c989792b818e02b6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 10 Apr 2020 15:47:51 -0400 Subject: config: use size_t to store parsed variable baselen Most of the config parsing infrastructure is limited in what it can parse only by the size of memory, because it parses character by character, building up strbufs for keys, values, etc. One exception is the "baselen" value we keep in git_parse_source(), which is an int. That stores the length of the section.subsection base, to which we can then append individual key names (by truncating back to the baselen with strbuf_setlen(), and then appending characters for the key name). But because it's an int, if we see an absurdly long section or subsection, we may overflow the integer, wrapping negative. That negative value is then implicitly cast to a size_t when we pass it to strbuf_setlen(), creating a very large value and triggering a BUG. For example: $ { printf '[foo "' perl -e 'print "a" x 2**31' echo '"]bar = value' } >huge $ git config --file=huge --list fatal: BUG: strbuf_setlen() beyond buffer While this is obviously a silly case that we don't care about supporting, it's worth fixing it by switching to a size_t for a few reasons: - we should try to avoid hitting BUG assertions at all - avoiding integer truncation or overflow sets a good example and makes it easier to audit the code for more important issues - the BUG outcome is what happens in _this_ instance, because we wrap negative. If we used a 2**32 subsection, we'd wrap to a small positive value and actually generate wrong output (the subsection of our key would be truncated). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.c b/config.c index c48bb35dc0..1c25c94863 100644 --- a/config.c +++ b/config.c @@ -729,7 +729,7 @@ static int git_parse_source(config_fn_t fn, void *data, const struct config_options *opts) { int comment = 0; - int baselen = 0; + size_t baselen = 0; struct strbuf *var = &cf->var; int error_return = 0; char *error_msg = NULL; -- cgit v1.2.3 From 348482dede19296e5caa33ed73010278767c2348 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 10 Apr 2020 15:50:07 -0400 Subject: config: reject parsing of files over INT_MAX While the last few commits have made it possible for the config parser to handle config files up to the limits of size_t, the rest of the code isn't really ready for this. In particular, we often feed the keys as strings into printf "%s" format specifiers. And because the printf family of functions must return an int to specify the result, they complain. Here are two concrete examples (using glibc; we're in uncharted territory here so results may vary): Generate a gigantic .gitmodules file like this: git submodule add /some/other/repo foo { printf '[submodule "' perl -e 'print "a" x 2**31' echo '"]path = foo' } >.gitmodules git commit -m 'huge gitmodule' then try this: $ git show BUG: strbuf.c:397: your vsnprintf is broken (returned -1) The problem is that we end up calling: strbuf_addf(&sb, "submodule.%s.ignore", submodule_name); which relies on vsnprintf(), and that function has no way to report back a size larger than INT_MAX. Taking that same file, try this: git config --file=.gitmodules --list --name-only On my system it produces an output with exactly 4GB of spaces. I confirmed in a debugger that we reach the config callback with the key intact: it's 2147483663 bytes and full of a's. But when we print it with this call: printf("%s%c", key_, term); we just get the spaces. So given the fact that these are insane cases which we have no need to support, the weird behavior from feeding the results to printf even if the code is careful, and the possibility of uncareful code introducing its own integer truncation issues, let's just declare INT_MAX as a limit for parsing config files. We'll enforce the limit in get_next_char(), which generalizes over all sources (blobs, files, etc) and covers any element we're parsing (whether section, key, value, etc). For simplicity, the limit is over the length of the _whole_ file, so you couldn't have two 1GB values in the same file. This should be perfectly fine, as the expected size for config files is generally kilobytes at most. With this patch both cases above will yield: fatal: bad config line 1 in file .gitmodules That's not an amazing error message, but the parser isn't set up to provide specific messages (it just breaks out of the parsing loop and gives that generic error even if see a syntactic issue). And we really wouldn't expect to see this case outside of somebody maliciously probing the limits of the config system. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- config.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/config.c b/config.c index 1c25c94863..8db9c77098 100644 --- a/config.c +++ b/config.c @@ -37,6 +37,7 @@ struct config_source { enum config_error_action default_error_action; int linenr; int eof; + size_t total_len; struct strbuf value; struct strbuf var; unsigned subsection_case_sensitive : 1; @@ -524,6 +525,19 @@ static int get_next_char(void) c = '\r'; } } + + if (c != EOF && ++cf->total_len > INT_MAX) { + /* + * This is an absurdly long config file; refuse to parse + * further in order to protect downstream code from integer + * overflows. Note that we can't return an error specifically, + * but we can mark EOF and put trash in the return value, + * which will trigger a parse error. + */ + cf->eof = 1; + return 0; + } + if (c == '\n') cf->linenr++; if (c == EOF) { @@ -1540,6 +1554,7 @@ static int do_config_from(struct config_source *top, config_fn_t fn, void *data, top->prev = cf; top->linenr = 1; top->eof = 0; + top->total_len = 0; strbuf_init(&top->value, 1024); strbuf_init(&top->var, 1024); cf = top; -- cgit v1.2.3