diff options
Diffstat (limited to 'builtin/apply.c')
-rw-r--r-- | builtin/apply.c | 3775 |
1 files changed, 3775 insertions, 0 deletions
diff --git a/builtin/apply.c b/builtin/apply.c new file mode 100644 index 0000000000..7ca90472c1 --- /dev/null +++ b/builtin/apply.c @@ -0,0 +1,3775 @@ +/* + * apply.c + * + * Copyright (C) Linus Torvalds, 2005 + * + * This applies patches on top of some (arbitrary) version of the SCM. + * + */ +#include "cache.h" +#include "cache-tree.h" +#include "quote.h" +#include "blob.h" +#include "delta.h" +#include "builtin.h" +#include "string-list.h" +#include "dir.h" +#include "parse-options.h" + +/* + * --check turns on checking that the working tree matches the + * files that are being modified, but doesn't apply the patch + * --stat does just a diffstat, and doesn't actually apply + * --numstat does numeric diffstat, and doesn't actually apply + * --index-info shows the old and new index info for paths if available. + * --index updates the cache as well. + * --cached updates only the cache without ever touching the working tree. + */ +static const char *prefix; +static int prefix_length = -1; +static int newfd = -1; + +static int unidiff_zero; +static int p_value = 1; +static int p_value_known; +static int check_index; +static int update_index; +static int cached; +static int diffstat; +static int numstat; +static int summary; +static int check; +static int apply = 1; +static int apply_in_reverse; +static int apply_with_reject; +static int apply_verbosely; +static int no_add; +static const char *fake_ancestor; +static int line_termination = '\n'; +static unsigned int p_context = UINT_MAX; +static const char * const apply_usage[] = { + "git apply [options] [<patch>...]", + NULL +}; + +static enum ws_error_action { + nowarn_ws_error, + warn_on_ws_error, + die_on_ws_error, + correct_ws_error, +} ws_error_action = warn_on_ws_error; +static int whitespace_error; +static int squelch_whitespace_errors = 5; +static int applied_after_fixing_ws; + +static enum ws_ignore { + ignore_ws_none, + ignore_ws_change, +} ws_ignore_action = ignore_ws_none; + + +static const char *patch_input_file; +static const char *root; +static int root_len; +static int read_stdin = 1; +static int options; + +static void parse_whitespace_option(const char *option) +{ + if (!option) { + ws_error_action = warn_on_ws_error; + return; + } + if (!strcmp(option, "warn")) { + ws_error_action = warn_on_ws_error; + return; + } + if (!strcmp(option, "nowarn")) { + ws_error_action = nowarn_ws_error; + return; + } + if (!strcmp(option, "error")) { + ws_error_action = die_on_ws_error; + return; + } + if (!strcmp(option, "error-all")) { + ws_error_action = die_on_ws_error; + squelch_whitespace_errors = 0; + return; + } + if (!strcmp(option, "strip") || !strcmp(option, "fix")) { + ws_error_action = correct_ws_error; + return; + } + die("unrecognized whitespace option '%s'", option); +} + +static void parse_ignorewhitespace_option(const char *option) +{ + if (!option || !strcmp(option, "no") || + !strcmp(option, "false") || !strcmp(option, "never") || + !strcmp(option, "none")) { + ws_ignore_action = ignore_ws_none; + return; + } + if (!strcmp(option, "change")) { + ws_ignore_action = ignore_ws_change; + return; + } + die("unrecognized whitespace ignore option '%s'", option); +} + +static void set_default_whitespace_mode(const char *whitespace_option) +{ + if (!whitespace_option && !apply_default_whitespace) + ws_error_action = (apply ? warn_on_ws_error : nowarn_ws_error); +} + +/* + * For "diff-stat" like behaviour, we keep track of the biggest change + * we've seen, and the longest filename. That allows us to do simple + * scaling. + */ +static int max_change, max_len; + +/* + * Various "current state", notably line numbers and what + * file (and how) we're patching right now.. The "is_xxxx" + * things are flags, where -1 means "don't know yet". + */ +static int linenr = 1; + +/* + * This represents one "hunk" from a patch, starting with + * "@@ -oldpos,oldlines +newpos,newlines @@" marker. The + * patch text is pointed at by patch, and its byte length + * is stored in size. leading and trailing are the number + * of context lines. + */ +struct fragment { + unsigned long leading, trailing; + unsigned long oldpos, oldlines; + unsigned long newpos, newlines; + const char *patch; + int size; + int rejected; + int linenr; + struct fragment *next; +}; + +/* + * When dealing with a binary patch, we reuse "leading" field + * to store the type of the binary hunk, either deflated "delta" + * or deflated "literal". + */ +#define binary_patch_method leading +#define BINARY_DELTA_DEFLATED 1 +#define BINARY_LITERAL_DEFLATED 2 + +/* + * This represents a "patch" to a file, both metainfo changes + * such as creation/deletion, filemode and content changes represented + * as a series of fragments. + */ +struct patch { + char *new_name, *old_name, *def_name; + unsigned int old_mode, new_mode; + int is_new, is_delete; /* -1 = unknown, 0 = false, 1 = true */ + int rejected; + unsigned ws_rule; + unsigned long deflate_origlen; + int lines_added, lines_deleted; + int score; + unsigned int is_toplevel_relative:1; + unsigned int inaccurate_eof:1; + unsigned int is_binary:1; + unsigned int is_copy:1; + unsigned int is_rename:1; + unsigned int recount:1; + struct fragment *fragments; + char *result; + size_t resultsize; + char old_sha1_prefix[41]; + char new_sha1_prefix[41]; + struct patch *next; +}; + +/* + * A line in a file, len-bytes long (includes the terminating LF, + * except for an incomplete line at the end if the file ends with + * one), and its contents hashes to 'hash'. + */ +struct line { + size_t len; + unsigned hash : 24; + unsigned flag : 8; +#define LINE_COMMON 1 +}; + +/* + * This represents a "file", which is an array of "lines". + */ +struct image { + char *buf; + size_t len; + size_t nr; + size_t alloc; + struct line *line_allocated; + struct line *line; +}; + +/* + * Records filenames that have been touched, in order to handle + * the case where more than one patches touch the same file. + */ + +static struct string_list fn_table; + +static uint32_t hash_line(const char *cp, size_t len) +{ + size_t i; + uint32_t h; + for (i = 0, h = 0; i < len; i++) { + if (!isspace(cp[i])) { + h = h * 3 + (cp[i] & 0xff); + } + } + return h; +} + +/* + * Compare lines s1 of length n1 and s2 of length n2, ignoring + * whitespace difference. Returns 1 if they match, 0 otherwise + */ +static int fuzzy_matchlines(const char *s1, size_t n1, + const char *s2, size_t n2) +{ + const char *last1 = s1 + n1 - 1; + const char *last2 = s2 + n2 - 1; + int result = 0; + + if (n1 < 0 || n2 < 0) + return 0; + + /* ignore line endings */ + while ((*last1 == '\r') || (*last1 == '\n')) + last1--; + while ((*last2 == '\r') || (*last2 == '\n')) + last2--; + + /* skip leading whitespace */ + while (isspace(*s1) && (s1 <= last1)) + s1++; + while (isspace(*s2) && (s2 <= last2)) + s2++; + /* early return if both lines are empty */ + if ((s1 > last1) && (s2 > last2)) + return 1; + while (!result) { + result = *s1++ - *s2++; + /* + * Skip whitespace inside. We check for whitespace on + * both buffers because we don't want "a b" to match + * "ab" + */ + if (isspace(*s1) && isspace(*s2)) { + while (isspace(*s1) && s1 <= last1) + s1++; + while (isspace(*s2) && s2 <= last2) + s2++; + } + /* + * If we reached the end on one side only, + * lines don't match + */ + if ( + ((s2 > last2) && (s1 <= last1)) || + ((s1 > last1) && (s2 <= last2))) + return 0; + if ((s1 > last1) && (s2 > last2)) + break; + } + + return !result; +} + +static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag) +{ + ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc); + img->line_allocated[img->nr].len = len; + img->line_allocated[img->nr].hash = hash_line(bol, len); + img->line_allocated[img->nr].flag = flag; + img->nr++; +} + +static void prepare_image(struct image *image, char *buf, size_t len, + int prepare_linetable) +{ + const char *cp, *ep; + + memset(image, 0, sizeof(*image)); + image->buf = buf; + image->len = len; + + if (!prepare_linetable) + return; + + ep = image->buf + image->len; + cp = image->buf; + while (cp < ep) { + const char *next; + for (next = cp; next < ep && *next != '\n'; next++) + ; + if (next < ep) + next++; + add_line_info(image, cp, next - cp, 0); + cp = next; + } + image->line = image->line_allocated; +} + +static void clear_image(struct image *image) +{ + free(image->buf); + image->buf = NULL; + image->len = 0; +} + +static void say_patch_name(FILE *output, const char *pre, + struct patch *patch, const char *post) +{ + fputs(pre, output); + if (patch->old_name && patch->new_name && + strcmp(patch->old_name, patch->new_name)) { + quote_c_style(patch->old_name, NULL, output, 0); + fputs(" => ", output); + quote_c_style(patch->new_name, NULL, output, 0); + } else { + const char *n = patch->new_name; + if (!n) + n = patch->old_name; + quote_c_style(n, NULL, output, 0); + } + fputs(post, output); +} + +#define CHUNKSIZE (8192) +#define SLOP (16) + +static void read_patch_file(struct strbuf *sb, int fd) +{ + if (strbuf_read(sb, fd, 0) < 0) + die_errno("git apply: failed to read"); + + /* + * Make sure that we have some slop in the buffer + * so that we can do speculative "memcmp" etc, and + * see to it that it is NUL-filled. + */ + strbuf_grow(sb, SLOP); + memset(sb->buf + sb->len, 0, SLOP); +} + +static unsigned long linelen(const char *buffer, unsigned long size) +{ + unsigned long len = 0; + while (size--) { + len++; + if (*buffer++ == '\n') + break; + } + return len; +} + +static int is_dev_null(const char *str) +{ + return !memcmp("/dev/null", str, 9) && isspace(str[9]); +} + +#define TERM_SPACE 1 +#define TERM_TAB 2 + +static int name_terminate(const char *name, int namelen, int c, int terminate) +{ + if (c == ' ' && !(terminate & TERM_SPACE)) + return 0; + if (c == '\t' && !(terminate & TERM_TAB)) + return 0; + + return 1; +} + +/* remove double slashes to make --index work with such filenames */ +static char *squash_slash(char *name) +{ + int i = 0, j = 0; + + if (!name) + return NULL; + + while (name[i]) { + if ((name[j++] = name[i++]) == '/') + while (name[i] == '/') + i++; + } + name[j] = '\0'; + return name; +} + +static char *find_name(const char *line, char *def, int p_value, int terminate) +{ + int len; + const char *start = NULL; + + if (p_value == 0) + start = line; + + if (*line == '"') { + struct strbuf name = STRBUF_INIT; + + /* + * Proposed "new-style" GNU patch/diff format; see + * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2 + */ + if (!unquote_c_style(&name, line, NULL)) { + char *cp; + + for (cp = name.buf; p_value; p_value--) { + cp = strchr(cp, '/'); + if (!cp) + break; + cp++; + } + if (cp) { + /* name can later be freed, so we need + * to memmove, not just return cp + */ + strbuf_remove(&name, 0, cp - name.buf); + free(def); + if (root) + strbuf_insert(&name, 0, root, root_len); + return squash_slash(strbuf_detach(&name, NULL)); + } + } + strbuf_release(&name); + } + + for (;;) { + char c = *line; + + if (isspace(c)) { + if (c == '\n') + break; + if (name_terminate(start, line-start, c, terminate)) + break; + } + line++; + if (c == '/' && !--p_value) + start = line; + } + if (!start) + return squash_slash(def); + len = line - start; + if (!len) + return squash_slash(def); + + /* + * Generally we prefer the shorter name, especially + * if the other one is just a variation of that with + * something else tacked on to the end (ie "file.orig" + * or "file~"). + */ + if (def) { + int deflen = strlen(def); + if (deflen < len && !strncmp(start, def, deflen)) + return squash_slash(def); + free(def); + } + + if (root) { + char *ret = xmalloc(root_len + len + 1); + strcpy(ret, root); + memcpy(ret + root_len, start, len); + ret[root_len + len] = '\0'; + return squash_slash(ret); + } + + return squash_slash(xmemdupz(start, len)); +} + +static int count_slashes(const char *cp) +{ + int cnt = 0; + char ch; + + while ((ch = *cp++)) + if (ch == '/') + cnt++; + return cnt; +} + +/* + * Given the string after "--- " or "+++ ", guess the appropriate + * p_value for the given patch. + */ +static int guess_p_value(const char *nameline) +{ + char *name, *cp; + int val = -1; + + if (is_dev_null(nameline)) + return -1; + name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB); + if (!name) + return -1; + cp = strchr(name, '/'); + if (!cp) + val = 0; + else if (prefix) { + /* + * Does it begin with "a/$our-prefix" and such? Then this is + * very likely to apply to our directory. + */ + if (!strncmp(name, prefix, prefix_length)) + val = count_slashes(prefix); + else { + cp++; + if (!strncmp(cp, prefix, prefix_length)) + val = count_slashes(prefix) + 1; + } + } + free(name); + return val; +} + +/* + * Does the ---/+++ line has the POSIX timestamp after the last HT? + * GNU diff puts epoch there to signal a creation/deletion event. Is + * this such a timestamp? + */ +static int has_epoch_timestamp(const char *nameline) +{ + /* + * We are only interested in epoch timestamp; any non-zero + * fraction cannot be one, hence "(\.0+)?" in the regexp below. + * For the same reason, the date must be either 1969-12-31 or + * 1970-01-01, and the seconds part must be "00". + */ + const char stamp_regexp[] = + "^(1969-12-31|1970-01-01)" + " " + "[0-2][0-9]:[0-5][0-9]:00(\\.0+)?" + " " + "([-+][0-2][0-9][0-5][0-9])\n"; + const char *timestamp = NULL, *cp; + static regex_t *stamp; + regmatch_t m[10]; + int zoneoffset; + int hourminute; + int status; + + for (cp = nameline; *cp != '\n'; cp++) { + if (*cp == '\t') + timestamp = cp + 1; + } + if (!timestamp) + return 0; + if (!stamp) { + stamp = xmalloc(sizeof(*stamp)); + if (regcomp(stamp, stamp_regexp, REG_EXTENDED)) { + warning("Cannot prepare timestamp regexp %s", + stamp_regexp); + return 0; + } + } + + status = regexec(stamp, timestamp, ARRAY_SIZE(m), m, 0); + if (status) { + if (status != REG_NOMATCH) + warning("regexec returned %d for input: %s", + status, timestamp); + return 0; + } + + zoneoffset = strtol(timestamp + m[3].rm_so + 1, NULL, 10); + zoneoffset = (zoneoffset / 100) * 60 + (zoneoffset % 100); + if (timestamp[m[3].rm_so] == '-') + zoneoffset = -zoneoffset; + + /* + * YYYY-MM-DD hh:mm:ss must be from either 1969-12-31 + * (west of GMT) or 1970-01-01 (east of GMT) + */ + if ((zoneoffset < 0 && memcmp(timestamp, "1969-12-31", 10)) || + (0 <= zoneoffset && memcmp(timestamp, "1970-01-01", 10))) + return 0; + + hourminute = (strtol(timestamp + 11, NULL, 10) * 60 + + strtol(timestamp + 14, NULL, 10) - + zoneoffset); + + return ((zoneoffset < 0 && hourminute == 1440) || + (0 <= zoneoffset && !hourminute)); +} + +/* + * Get the name etc info from the ---/+++ lines of a traditional patch header + * + * FIXME! The end-of-filename heuristics are kind of screwy. For existing + * files, we can happily check the index for a match, but for creating a + * new file we should try to match whatever "patch" does. I have no idea. + */ +static void parse_traditional_patch(const char *first, const char *second, struct patch *patch) +{ + char *name; + + first += 4; /* skip "--- " */ + second += 4; /* skip "+++ " */ + if (!p_value_known) { + int p, q; + p = guess_p_value(first); + q = guess_p_value(second); + if (p < 0) p = q; + if (0 <= p && p == q) { + p_value = p; + p_value_known = 1; + } + } + if (is_dev_null(first)) { + patch->is_new = 1; + patch->is_delete = 0; + name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB); + patch->new_name = name; + } else if (is_dev_null(second)) { + patch->is_new = 0; + patch->is_delete = 1; + name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); + patch->old_name = name; + } else { + name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB); + name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB); + if (has_epoch_timestamp(first)) { + patch->is_new = 1; + patch->is_delete = 0; + patch->new_name = name; + } else if (has_epoch_timestamp(second)) { + patch->is_new = 0; + patch->is_delete = 1; + patch->old_name = name; + } else { + patch->old_name = patch->new_name = name; + } + } + if (!name) + die("unable to find filename in patch at line %d", linenr); +} + +static int gitdiff_hdrend(const char *line, struct patch *patch) +{ + return -1; +} + +/* + * We're anal about diff header consistency, to make + * sure that we don't end up having strange ambiguous + * patches floating around. + * + * As a result, gitdiff_{old|new}name() will check + * their names against any previous information, just + * to make sure.. + */ +static char *gitdiff_verify_name(const char *line, int isnull, char *orig_name, const char *oldnew) +{ + if (!orig_name && !isnull) + return find_name(line, NULL, p_value, TERM_TAB); + + if (orig_name) { + int len; + const char *name; + char *another; + name = orig_name; + len = strlen(name); + if (isnull) + die("git apply: bad git-diff - expected /dev/null, got %s on line %d", name, linenr); + another = find_name(line, NULL, p_value, TERM_TAB); + if (!another || memcmp(another, name, len + 1)) + die("git apply: bad git-diff - inconsistent %s filename on line %d", oldnew, linenr); + free(another); + return orig_name; + } + else { + /* expect "/dev/null" */ + if (memcmp("/dev/null", line, 9) || line[9] != '\n') + die("git apply: bad git-diff - expected /dev/null on line %d", linenr); + return NULL; + } +} + +static int gitdiff_oldname(const char *line, struct patch *patch) +{ + patch->old_name = gitdiff_verify_name(line, patch->is_new, patch->old_name, "old"); + return 0; +} + +static int gitdiff_newname(const char *line, struct patch *patch) +{ + patch->new_name = gitdiff_verify_name(line, patch->is_delete, patch->new_name, "new"); + return 0; +} + +static int gitdiff_oldmode(const char *line, struct patch *patch) +{ + patch->old_mode = strtoul(line, NULL, 8); + return 0; +} + +static int gitdiff_newmode(const char *line, struct patch *patch) +{ + patch->new_mode = strtoul(line, NULL, 8); + return 0; +} + +static int gitdiff_delete(const char *line, struct patch *patch) +{ + patch->is_delete = 1; + patch->old_name = patch->def_name; + return gitdiff_oldmode(line, patch); +} + +static int gitdiff_newfile(const char *line, struct patch *patch) +{ + patch->is_new = 1; + patch->new_name = patch->def_name; + return gitdiff_newmode(line, patch); +} + +static int gitdiff_copysrc(const char *line, struct patch *patch) +{ + patch->is_copy = 1; + patch->old_name = find_name(line, NULL, 0, 0); + return 0; +} + +static int gitdiff_copydst(const char *line, struct patch *patch) +{ + patch->is_copy = 1; + patch->new_name = find_name(line, NULL, 0, 0); + return 0; +} + +static int gitdiff_renamesrc(const char *line, struct patch *patch) +{ + patch->is_rename = 1; + patch->old_name = find_name(line, NULL, 0, 0); + return 0; +} + +static int gitdiff_renamedst(const char *line, struct patch *patch) +{ + patch->is_rename = 1; + patch->new_name = find_name(line, NULL, 0, 0); + return 0; +} + +static int gitdiff_similarity(const char *line, struct patch *patch) +{ + if ((patch->score = strtoul(line, NULL, 10)) == ULONG_MAX) + patch->score = 0; + return 0; +} + +static int gitdiff_dissimilarity(const char *line, struct patch *patch) +{ + if ((patch->score = strtoul(line, NULL, 10)) == ULONG_MAX) + patch->score = 0; + return 0; +} + +static int gitdiff_index(const char *line, struct patch *patch) +{ + /* + * index line is N hexadecimal, "..", N hexadecimal, + * and optional space with octal mode. + */ + const char *ptr, *eol; + int len; + + ptr = strchr(line, '.'); + if (!ptr || ptr[1] != '.' || 40 < ptr - line) + return 0; + len = ptr - line; + memcpy(patch->old_sha1_prefix, line, len); + patch->old_sha1_prefix[len] = 0; + + line = ptr + 2; + ptr = strchr(line, ' '); + eol = strchr(line, '\n'); + + if (!ptr || eol < ptr) + ptr = eol; + len = ptr - line; + + if (40 < len) + return 0; + memcpy(patch->new_sha1_prefix, line, len); + patch->new_sha1_prefix[len] = 0; + if (*ptr == ' ') + patch->old_mode = strtoul(ptr+1, NULL, 8); + return 0; +} + +/* + * This is normal for a diff that doesn't change anything: we'll fall through + * into the next diff. Tell the parser to break out. + */ +static int gitdiff_unrecognized(const char *line, struct patch *patch) +{ + return -1; +} + +static const char *stop_at_slash(const char *line, int llen) +{ + int nslash = p_value; + int i; + + for (i = 0; i < llen; i++) { + int ch = line[i]; + if (ch == '/' && --nslash <= 0) + return &line[i]; + } + return NULL; +} + +/* + * This is to extract the same name that appears on "diff --git" + * line. We do not find and return anything if it is a rename + * patch, and it is OK because we will find the name elsewhere. + * We need to reliably find name only when it is mode-change only, + * creation or deletion of an empty file. In any of these cases, + * both sides are the same name under a/ and b/ respectively. + */ +static char *git_header_name(char *line, int llen) +{ + const char *name; + const char *second = NULL; + size_t len; + + line += strlen("diff --git "); + llen -= strlen("diff --git "); + + if (*line == '"') { + const char *cp; + struct strbuf first = STRBUF_INIT; + struct strbuf sp = STRBUF_INIT; + + if (unquote_c_style(&first, line, &second)) + goto free_and_fail1; + + /* advance to the first slash */ + cp = stop_at_slash(first.buf, first.len); + /* we do not accept absolute paths */ + if (!cp || cp == first.buf) + goto free_and_fail1; + strbuf_remove(&first, 0, cp + 1 - first.buf); + + /* + * second points at one past closing dq of name. + * find the second name. + */ + while ((second < line + llen) && isspace(*second)) + second++; + + if (line + llen <= second) + goto free_and_fail1; + if (*second == '"') { + if (unquote_c_style(&sp, second, NULL)) + goto free_and_fail1; + cp = stop_at_slash(sp.buf, sp.len); + if (!cp || cp == sp.buf) + goto free_and_fail1; + /* They must match, otherwise ignore */ + if (strcmp(cp + 1, first.buf)) + goto free_and_fail1; + strbuf_release(&sp); + return strbuf_detach(&first, NULL); + } + + /* unquoted second */ + cp = stop_at_slash(second, line + llen - second); + if (!cp || cp == second) + goto free_and_fail1; + cp++; + if (line + llen - cp != first.len + 1 || + memcmp(first.buf, cp, first.len)) + goto free_and_fail1; + return strbuf_detach(&first, NULL); + + free_and_fail1: + strbuf_release(&first); + strbuf_release(&sp); + return NULL; + } + + /* unquoted first name */ + name = stop_at_slash(line, llen); + if (!name || name == line) + return NULL; + name++; + + /* + * since the first name is unquoted, a dq if exists must be + * the beginning of the second name. + */ + for (second = name; second < line + llen; second++) { + if (*second == '"') { + struct strbuf sp = STRBUF_INIT; + const char *np; + + if (unquote_c_style(&sp, second, NULL)) + goto free_and_fail2; + + np = stop_at_slash(sp.buf, sp.len); + if (!np || np == sp.buf) + goto free_and_fail2; + np++; + + len = sp.buf + sp.len - np; + if (len < second - name && + !strncmp(np, name, len) && + isspace(name[len])) { + /* Good */ + strbuf_remove(&sp, 0, np - sp.buf); + return strbuf_detach(&sp, NULL); + } + + free_and_fail2: + strbuf_release(&sp); + return NULL; + } + } + + /* + * Accept a name only if it shows up twice, exactly the same + * form. + */ + for (len = 0 ; ; len++) { + switch (name[len]) { + default: + continue; + case '\n': + return NULL; + case '\t': case ' ': + second = name+len; + for (;;) { + char c = *second++; + if (c == '\n') + return NULL; + if (c == '/') + break; + } + if (second[len] == '\n' && !memcmp(name, second, len)) { + return xmemdupz(name, len); + } + } + } +} + +/* Verify that we recognize the lines following a git header */ +static int parse_git_header(char *line, int len, unsigned int size, struct patch *patch) +{ + unsigned long offset; + + /* A git diff has explicit new/delete information, so we don't guess */ + patch->is_new = 0; + patch->is_delete = 0; + + /* + * Some things may not have the old name in the + * rest of the headers anywhere (pure mode changes, + * or removing or adding empty files), so we get + * the default name from the header. + */ + patch->def_name = git_header_name(line, len); + if (patch->def_name && root) { + char *s = xmalloc(root_len + strlen(patch->def_name) + 1); + strcpy(s, root); + strcpy(s + root_len, patch->def_name); + free(patch->def_name); + patch->def_name = s; + } + + line += len; + size -= len; + linenr++; + for (offset = len ; size > 0 ; offset += len, size -= len, line += len, linenr++) { + static const struct opentry { + const char *str; + int (*fn)(const char *, struct patch *); + } optable[] = { + { "@@ -", gitdiff_hdrend }, + { "--- ", gitdiff_oldname }, + { "+++ ", gitdiff_newname }, + { "old mode ", gitdiff_oldmode }, + { "new mode ", gitdiff_newmode }, + { "deleted file mode ", gitdiff_delete }, + { "new file mode ", gitdiff_newfile }, + { "copy from ", gitdiff_copysrc }, + { "copy to ", gitdiff_copydst }, + { "rename old ", gitdiff_renamesrc }, + { "rename new ", gitdiff_renamedst }, + { "rename from ", gitdiff_renamesrc }, + { "rename to ", gitdiff_renamedst }, + { "similarity index ", gitdiff_similarity }, + { "dissimilarity index ", gitdiff_dissimilarity }, + { "index ", gitdiff_index }, + { "", gitdiff_unrecognized }, + }; + int i; + + len = linelen(line, size); + if (!len || line[len-1] != '\n') + break; + for (i = 0; i < ARRAY_SIZE(optable); i++) { + const struct opentry *p = optable + i; + int oplen = strlen(p->str); + if (len < oplen || memcmp(p->str, line, oplen)) + continue; + if (p->fn(line + oplen, patch) < 0) + return offset; + break; + } + } + + return offset; +} + +static int parse_num(const char *line, unsigned long *p) +{ + char *ptr; + + if (!isdigit(*line)) + return 0; + *p = strtoul(line, &ptr, 10); + return ptr - line; +} + +static int parse_range(const char *line, int len, int offset, const char *expect, + unsigned long *p1, unsigned long *p2) +{ + int digits, ex; + + if (offset < 0 || offset >= len) + return -1; + line += offset; + len -= offset; + + digits = parse_num(line, p1); + if (!digits) + return -1; + + offset += digits; + line += digits; + len -= digits; + + *p2 = 1; + if (*line == ',') { + digits = parse_num(line+1, p2); + if (!digits) + return -1; + + offset += digits+1; + line += digits+1; + len -= digits+1; + } + + ex = strlen(expect); + if (ex > len) + return -1; + if (memcmp(line, expect, ex)) + return -1; + + return offset + ex; +} + +static void recount_diff(char *line, int size, struct fragment *fragment) +{ + int oldlines = 0, newlines = 0, ret = 0; + + if (size < 1) { + warning("recount: ignore empty hunk"); + return; + } + + for (;;) { + int len = linelen(line, size); + size -= len; + line += len; + + if (size < 1) + break; + + switch (*line) { + case ' ': case '\n': + newlines++; + /* fall through */ + case '-': + oldlines++; + continue; + case '+': + newlines++; + continue; + case '\\': + continue; + case '@': + ret = size < 3 || prefixcmp(line, "@@ "); + break; + case 'd': + ret = size < 5 || prefixcmp(line, "diff "); + break; + default: + ret = -1; + break; + } + if (ret) { + warning("recount: unexpected line: %.*s", + (int)linelen(line, size), line); + return; + } + break; + } + fragment->oldlines = oldlines; + fragment->newlines = newlines; +} + +/* + * Parse a unified diff fragment header of the + * form "@@ -a,b +c,d @@" + */ +static int parse_fragment_header(char *line, int len, struct fragment *fragment) +{ + int offset; + + if (!len || line[len-1] != '\n') + return -1; + + /* Figure out the number of lines in a fragment */ + offset = parse_range(line, len, 4, " +", &fragment->oldpos, &fragment->oldlines); + offset = parse_range(line, len, offset, " @@", &fragment->newpos, &fragment->newlines); + + return offset; +} + +static int find_header(char *line, unsigned long size, int *hdrsize, struct patch *patch) +{ + unsigned long offset, len; + + patch->is_toplevel_relative = 0; + patch->is_rename = patch->is_copy = 0; + patch->is_new = patch->is_delete = -1; + patch->old_mode = patch->new_mode = 0; + patch->old_name = patch->new_name = NULL; + for (offset = 0; size > 0; offset += len, size -= len, line += len, linenr++) { + unsigned long nextlen; + + len = linelen(line, size); + if (!len) + break; + + /* Testing this early allows us to take a few shortcuts.. */ + if (len < 6) + continue; + + /* + * Make sure we don't find any unconnected patch fragments. + * That's a sign that we didn't find a header, and that a + * patch has become corrupted/broken up. + */ + if (!memcmp("@@ -", line, 4)) { + struct fragment dummy; + if (parse_fragment_header(line, len, &dummy) < 0) + continue; + die("patch fragment without header at line %d: %.*s", + linenr, (int)len-1, line); + } + + if (size < len + 6) + break; + + /* + * Git patch? It might not have a real patch, just a rename + * or mode change, so we handle that specially + */ + if (!memcmp("diff --git ", line, 11)) { + int git_hdr_len = parse_git_header(line, len, size, patch); + if (git_hdr_len <= len) + continue; + if (!patch->old_name && !patch->new_name) { + if (!patch->def_name) + die("git diff header lacks filename information when removing " + "%d leading pathname components (line %d)" , p_value, linenr); + patch->old_name = patch->new_name = patch->def_name; + } + patch->is_toplevel_relative = 1; + *hdrsize = git_hdr_len; + return offset; + } + + /* --- followed by +++ ? */ + if (memcmp("--- ", line, 4) || memcmp("+++ ", line + len, 4)) + continue; + + /* + * We only accept unified patches, so we want it to + * at least have "@@ -a,b +c,d @@\n", which is 14 chars + * minimum ("@@ -0,0 +1 @@\n" is the shortest). + */ + nextlen = linelen(line + len, size - len); + if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4)) + continue; + + /* Ok, we'll consider it a patch */ + parse_traditional_patch(line, line+len, patch); + *hdrsize = len + nextlen; + linenr += 2; + return offset; + } + return -1; +} + +static void record_ws_error(unsigned result, const char *line, int len, int linenr) +{ + char *err; + + if (!result) + return; + + whitespace_error++; + if (squelch_whitespace_errors && + squelch_whitespace_errors < whitespace_error) + return; + + err = whitespace_error_string(result); + fprintf(stderr, "%s:%d: %s.\n%.*s\n", + patch_input_file, linenr, err, len, line); + free(err); +} + +static void check_whitespace(const char *line, int len, unsigned ws_rule) +{ + unsigned result = ws_chec |