summaryrefslogtreecommitdiff
path: root/builtin/apply.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin/apply.c')
-rw-r--r--builtin/apply.c3670
1 files changed, 3670 insertions, 0 deletions
diff --git a/builtin/apply.c b/builtin/apply.c
new file mode 100644
index 0000000000..3af4ae0c26
--- /dev/null
+++ b/builtin/apply.c
@@ -0,0 +1,3670 @@
+/*
+ * apply.c
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ *
+ * This applies patches on top of some (arbitrary) version of the SCM.
+ *
+ */
+#include "cache.h"
+#include "cache-tree.h"
+#include "quote.h"
+#include "blob.h"
+#include "delta.h"
+#include "builtin.h"
+#include "string-list.h"
+#include "dir.h"
+#include "parse-options.h"
+
+/*
+ * --check turns on checking that the working tree matches the
+ * files that are being modified, but doesn't apply the patch
+ * --stat does just a diffstat, and doesn't actually apply
+ * --numstat does numeric diffstat, and doesn't actually apply
+ * --index-info shows the old and new index info for paths if available.
+ * --index updates the cache as well.
+ * --cached updates only the cache without ever touching the working tree.
+ */
+static const char *prefix;
+static int prefix_length = -1;
+static int newfd = -1;
+
+static int unidiff_zero;
+static int p_value = 1;
+static int p_value_known;
+static int check_index;
+static int update_index;
+static int cached;
+static int diffstat;
+static int numstat;
+static int summary;
+static int check;
+static int apply = 1;
+static int apply_in_reverse;
+static int apply_with_reject;
+static int apply_verbosely;
+static int no_add;
+static const char *fake_ancestor;
+static int line_termination = '\n';
+static unsigned int p_context = UINT_MAX;
+static const char * const apply_usage[] = {
+ "git apply [options] [<patch>...]",
+ NULL
+};
+
+static enum ws_error_action {
+ nowarn_ws_error,
+ warn_on_ws_error,
+ die_on_ws_error,
+ correct_ws_error,
+} ws_error_action = warn_on_ws_error;
+static int whitespace_error;
+static int squelch_whitespace_errors = 5;
+static int applied_after_fixing_ws;
+
+static enum ws_ignore {
+ ignore_ws_none,
+ ignore_ws_change,
+} ws_ignore_action = ignore_ws_none;
+
+
+static const char *patch_input_file;
+static const char *root;
+static int root_len;
+static int read_stdin = 1;
+static int options;
+
+static void parse_whitespace_option(const char *option)
+{
+ if (!option) {
+ ws_error_action = warn_on_ws_error;
+ return;
+ }
+ if (!strcmp(option, "warn")) {
+ ws_error_action = warn_on_ws_error;
+ return;
+ }
+ if (!strcmp(option, "nowarn")) {
+ ws_error_action = nowarn_ws_error;
+ return;
+ }
+ if (!strcmp(option, "error")) {
+ ws_error_action = die_on_ws_error;
+ return;
+ }
+ if (!strcmp(option, "error-all")) {
+ ws_error_action = die_on_ws_error;
+ squelch_whitespace_errors = 0;
+ return;
+ }
+ if (!strcmp(option, "strip") || !strcmp(option, "fix")) {
+ ws_error_action = correct_ws_error;
+ return;
+ }
+ die("unrecognized whitespace option '%s'", option);
+}
+
+static void parse_ignorewhitespace_option(const char *option)
+{
+ if (!option || !strcmp(option, "no") ||
+ !strcmp(option, "false") || !strcmp(option, "never") ||
+ !strcmp(option, "none")) {
+ ws_ignore_action = ignore_ws_none;
+ return;
+ }
+ if (!strcmp(option, "change")) {
+ ws_ignore_action = ignore_ws_change;
+ return;
+ }
+ die("unrecognized whitespace ignore option '%s'", option);
+}
+
+static void set_default_whitespace_mode(const char *whitespace_option)
+{
+ if (!whitespace_option && !apply_default_whitespace)
+ ws_error_action = (apply ? warn_on_ws_error : nowarn_ws_error);
+}
+
+/*
+ * For "diff-stat" like behaviour, we keep track of the biggest change
+ * we've seen, and the longest filename. That allows us to do simple
+ * scaling.
+ */
+static int max_change, max_len;
+
+/*
+ * Various "current state", notably line numbers and what
+ * file (and how) we're patching right now.. The "is_xxxx"
+ * things are flags, where -1 means "don't know yet".
+ */
+static int linenr = 1;
+
+/*
+ * This represents one "hunk" from a patch, starting with
+ * "@@ -oldpos,oldlines +newpos,newlines @@" marker. The
+ * patch text is pointed at by patch, and its byte length
+ * is stored in size. leading and trailing are the number
+ * of context lines.
+ */
+struct fragment {
+ unsigned long leading, trailing;
+ unsigned long oldpos, oldlines;
+ unsigned long newpos, newlines;
+ const char *patch;
+ int size;
+ int rejected;
+ int linenr;
+ struct fragment *next;
+};
+
+/*
+ * When dealing with a binary patch, we reuse "leading" field
+ * to store the type of the binary hunk, either deflated "delta"
+ * or deflated "literal".
+ */
+#define binary_patch_method leading
+#define BINARY_DELTA_DEFLATED 1
+#define BINARY_LITERAL_DEFLATED 2
+
+/*
+ * This represents a "patch" to a file, both metainfo changes
+ * such as creation/deletion, filemode and content changes represented
+ * as a series of fragments.
+ */
+struct patch {
+ char *new_name, *old_name, *def_name;
+ unsigned int old_mode, new_mode;
+ int is_new, is_delete; /* -1 = unknown, 0 = false, 1 = true */
+ int rejected;
+ unsigned ws_rule;
+ unsigned long deflate_origlen;
+ int lines_added, lines_deleted;
+ int score;
+ unsigned int is_toplevel_relative:1;
+ unsigned int inaccurate_eof:1;
+ unsigned int is_binary:1;
+ unsigned int is_copy:1;
+ unsigned int is_rename:1;
+ unsigned int recount:1;
+ struct fragment *fragments;
+ char *result;
+ size_t resultsize;
+ char old_sha1_prefix[41];
+ char new_sha1_prefix[41];
+ struct patch *next;
+};
+
+/*
+ * A line in a file, len-bytes long (includes the terminating LF,
+ * except for an incomplete line at the end if the file ends with
+ * one), and its contents hashes to 'hash'.
+ */
+struct line {
+ size_t len;
+ unsigned hash : 24;
+ unsigned flag : 8;
+#define LINE_COMMON 1
+};
+
+/*
+ * This represents a "file", which is an array of "lines".
+ */
+struct image {
+ char *buf;
+ size_t len;
+ size_t nr;
+ size_t alloc;
+ struct line *line_allocated;
+ struct line *line;
+};
+
+/*
+ * Records filenames that have been touched, in order to handle
+ * the case where more than one patches touch the same file.
+ */
+
+static struct string_list fn_table;
+
+static uint32_t hash_line(const char *cp, size_t len)
+{
+ size_t i;
+ uint32_t h;
+ for (i = 0, h = 0; i < len; i++) {
+ if (!isspace(cp[i])) {
+ h = h * 3 + (cp[i] & 0xff);
+ }
+ }
+ return h;
+}
+
+/*
+ * Compare lines s1 of length n1 and s2 of length n2, ignoring
+ * whitespace difference. Returns 1 if they match, 0 otherwise
+ */
+static int fuzzy_matchlines(const char *s1, size_t n1,
+ const char *s2, size_t n2)
+{
+ const char *last1 = s1 + n1 - 1;
+ const char *last2 = s2 + n2 - 1;
+ int result = 0;
+
+ if (n1 < 0 || n2 < 0)
+ return 0;
+
+ /* ignore line endings */
+ while ((*last1 == '\r') || (*last1 == '\n'))
+ last1--;
+ while ((*last2 == '\r') || (*last2 == '\n'))
+ last2--;
+
+ /* skip leading whitespace */
+ while (isspace(*s1) && (s1 <= last1))
+ s1++;
+ while (isspace(*s2) && (s2 <= last2))
+ s2++;
+ /* early return if both lines are empty */
+ if ((s1 > last1) && (s2 > last2))
+ return 1;
+ while (!result) {
+ result = *s1++ - *s2++;
+ /*
+ * Skip whitespace inside. We check for whitespace on
+ * both buffers because we don't want "a b" to match
+ * "ab"
+ */
+ if (isspace(*s1) && isspace(*s2)) {
+ while (isspace(*s1) && s1 <= last1)
+ s1++;
+ while (isspace(*s2) && s2 <= last2)
+ s2++;
+ }
+ /*
+ * If we reached the end on one side only,
+ * lines don't match
+ */
+ if (
+ ((s2 > last2) && (s1 <= last1)) ||
+ ((s1 > last1) && (s2 <= last2)))
+ return 0;
+ if ((s1 > last1) && (s2 > last2))
+ break;
+ }
+
+ return !result;
+}
+
+static void add_line_info(struct image *img, const char *bol, size_t len, unsigned flag)
+{
+ ALLOC_GROW(img->line_allocated, img->nr + 1, img->alloc);
+ img->line_allocated[img->nr].len = len;
+ img->line_allocated[img->nr].hash = hash_line(bol, len);
+ img->line_allocated[img->nr].flag = flag;
+ img->nr++;
+}
+
+static void prepare_image(struct image *image, char *buf, size_t len,
+ int prepare_linetable)
+{
+ const char *cp, *ep;
+
+ memset(image, 0, sizeof(*image));
+ image->buf = buf;
+ image->len = len;
+
+ if (!prepare_linetable)
+ return;
+
+ ep = image->buf + image->len;
+ cp = image->buf;
+ while (cp < ep) {
+ const char *next;
+ for (next = cp; next < ep && *next != '\n'; next++)
+ ;
+ if (next < ep)
+ next++;
+ add_line_info(image, cp, next - cp, 0);
+ cp = next;
+ }
+ image->line = image->line_allocated;
+}
+
+static void clear_image(struct image *image)
+{
+ free(image->buf);
+ image->buf = NULL;
+ image->len = 0;
+}
+
+static void say_patch_name(FILE *output, const char *pre,
+ struct patch *patch, const char *post)
+{
+ fputs(pre, output);
+ if (patch->old_name && patch->new_name &&
+ strcmp(patch->old_name, patch->new_name)) {
+ quote_c_style(patch->old_name, NULL, output, 0);
+ fputs(" => ", output);
+ quote_c_style(patch->new_name, NULL, output, 0);
+ } else {
+ const char *n = patch->new_name;
+ if (!n)
+ n = patch->old_name;
+ quote_c_style(n, NULL, output, 0);
+ }
+ fputs(post, output);
+}
+
+#define CHUNKSIZE (8192)
+#define SLOP (16)
+
+static void read_patch_file(struct strbuf *sb, int fd)
+{
+ if (strbuf_read(sb, fd, 0) < 0)
+ die_errno("git apply: failed to read");
+
+ /*
+ * Make sure that we have some slop in the buffer
+ * so that we can do speculative "memcmp" etc, and
+ * see to it that it is NUL-filled.
+ */
+ strbuf_grow(sb, SLOP);
+ memset(sb->buf + sb->len, 0, SLOP);
+}
+
+static unsigned long linelen(const char *buffer, unsigned long size)
+{
+ unsigned long len = 0;
+ while (size--) {
+ len++;
+ if (*buffer++ == '\n')
+ break;
+ }
+ return len;
+}
+
+static int is_dev_null(const char *str)
+{
+ return !memcmp("/dev/null", str, 9) && isspace(str[9]);
+}
+
+#define TERM_SPACE 1
+#define TERM_TAB 2
+
+static int name_terminate(const char *name, int namelen, int c, int terminate)
+{
+ if (c == ' ' && !(terminate & TERM_SPACE))
+ return 0;
+ if (c == '\t' && !(terminate & TERM_TAB))
+ return 0;
+
+ return 1;
+}
+
+/* remove double slashes to make --index work with such filenames */
+static char *squash_slash(char *name)
+{
+ int i = 0, j = 0;
+
+ if (!name)
+ return NULL;
+
+ while (name[i]) {
+ if ((name[j++] = name[i++]) == '/')
+ while (name[i] == '/')
+ i++;
+ }
+ name[j] = '\0';
+ return name;
+}
+
+static char *find_name(const char *line, char *def, int p_value, int terminate)
+{
+ int len;
+ const char *start = NULL;
+
+ if (p_value == 0)
+ start = line;
+
+ if (*line == '"') {
+ struct strbuf name = STRBUF_INIT;
+
+ /*
+ * Proposed "new-style" GNU patch/diff format; see
+ * http://marc.theaimsgroup.com/?l=git&m=112927316408690&w=2
+ */
+ if (!unquote_c_style(&name, line, NULL)) {
+ char *cp;
+
+ for (cp = name.buf; p_value; p_value--) {
+ cp = strchr(cp, '/');
+ if (!cp)
+ break;
+ cp++;
+ }
+ if (cp) {
+ /* name can later be freed, so we need
+ * to memmove, not just return cp
+ */
+ strbuf_remove(&name, 0, cp - name.buf);
+ free(def);
+ if (root)
+ strbuf_insert(&name, 0, root, root_len);
+ return squash_slash(strbuf_detach(&name, NULL));
+ }
+ }
+ strbuf_release(&name);
+ }
+
+ for (;;) {
+ char c = *line;
+
+ if (isspace(c)) {
+ if (c == '\n')
+ break;
+ if (name_terminate(start, line-start, c, terminate))
+ break;
+ }
+ line++;
+ if (c == '/' && !--p_value)
+ start = line;
+ }
+ if (!start)
+ return squash_slash(def);
+ len = line - start;
+ if (!len)
+ return squash_slash(def);
+
+ /*
+ * Generally we prefer the shorter name, especially
+ * if the other one is just a variation of that with
+ * something else tacked on to the end (ie "file.orig"
+ * or "file~").
+ */
+ if (def) {
+ int deflen = strlen(def);
+ if (deflen < len && !strncmp(start, def, deflen))
+ return squash_slash(def);
+ free(def);
+ }
+
+ if (root) {
+ char *ret = xmalloc(root_len + len + 1);
+ strcpy(ret, root);
+ memcpy(ret + root_len, start, len);
+ ret[root_len + len] = '\0';
+ return squash_slash(ret);
+ }
+
+ return squash_slash(xmemdupz(start, len));
+}
+
+static int count_slashes(const char *cp)
+{
+ int cnt = 0;
+ char ch;
+
+ while ((ch = *cp++))
+ if (ch == '/')
+ cnt++;
+ return cnt;
+}
+
+/*
+ * Given the string after "--- " or "+++ ", guess the appropriate
+ * p_value for the given patch.
+ */
+static int guess_p_value(const char *nameline)
+{
+ char *name, *cp;
+ int val = -1;
+
+ if (is_dev_null(nameline))
+ return -1;
+ name = find_name(nameline, NULL, 0, TERM_SPACE | TERM_TAB);
+ if (!name)
+ return -1;
+ cp = strchr(name, '/');
+ if (!cp)
+ val = 0;
+ else if (prefix) {
+ /*
+ * Does it begin with "a/$our-prefix" and such? Then this is
+ * very likely to apply to our directory.
+ */
+ if (!strncmp(name, prefix, prefix_length))
+ val = count_slashes(prefix);
+ else {
+ cp++;
+ if (!strncmp(cp, prefix, prefix_length))
+ val = count_slashes(prefix) + 1;
+ }
+ }
+ free(name);
+ return val;
+}
+
+/*
+ * Does the ---/+++ line has the POSIX timestamp after the last HT?
+ * GNU diff puts epoch there to signal a creation/deletion event. Is
+ * this such a timestamp?
+ */
+static int has_epoch_timestamp(const char *nameline)
+{
+ /*
+ * We are only interested in epoch timestamp; any non-zero
+ * fraction cannot be one, hence "(\.0+)?" in the regexp below.
+ * For the same reason, the date must be either 1969-12-31 or
+ * 1970-01-01, and the seconds part must be "00".
+ */
+ const char stamp_regexp[] =
+ "^(1969-12-31|1970-01-01)"
+ " "
+ "[0-2][0-9]:[0-5][0-9]:00(\\.0+)?"
+ " "
+ "([-+][0-2][0-9][0-5][0-9])\n";
+ const char *timestamp = NULL, *cp;
+ static regex_t *stamp;
+ regmatch_t m[10];
+ int zoneoffset;
+ int hourminute;
+ int status;
+
+ for (cp = nameline; *cp != '\n'; cp++) {
+ if (*cp == '\t')
+ timestamp = cp + 1;
+ }
+ if (!timestamp)
+ return 0;
+ if (!stamp) {
+ stamp = xmalloc(sizeof(*stamp));
+ if (regcomp(stamp, stamp_regexp, REG_EXTENDED)) {
+ warning("Cannot prepare timestamp regexp %s",
+ stamp_regexp);
+ return 0;
+ }
+ }
+
+ status = regexec(stamp, timestamp, ARRAY_SIZE(m), m, 0);
+ if (status) {
+ if (status != REG_NOMATCH)
+ warning("regexec returned %d for input: %s",
+ status, timestamp);
+ return 0;
+ }
+
+ zoneoffset = strtol(timestamp + m[3].rm_so + 1, NULL, 10);
+ zoneoffset = (zoneoffset / 100) * 60 + (zoneoffset % 100);
+ if (timestamp[m[3].rm_so] == '-')
+ zoneoffset = -zoneoffset;
+
+ /*
+ * YYYY-MM-DD hh:mm:ss must be from either 1969-12-31
+ * (west of GMT) or 1970-01-01 (east of GMT)
+ */
+ if ((zoneoffset < 0 && memcmp(timestamp, "1969-12-31", 10)) ||
+ (0 <= zoneoffset && memcmp(timestamp, "1970-01-01", 10)))
+ return 0;
+
+ hourminute = (strtol(timestamp + 11, NULL, 10) * 60 +
+ strtol(timestamp + 14, NULL, 10) -
+ zoneoffset);
+
+ return ((zoneoffset < 0 && hourminute == 1440) ||
+ (0 <= zoneoffset && !hourminute));
+}
+
+/*
+ * Get the name etc info from the ---/+++ lines of a traditional patch header
+ *
+ * FIXME! The end-of-filename heuristics are kind of screwy. For existing
+ * files, we can happily check the index for a match, but for creating a
+ * new file we should try to match whatever "patch" does. I have no idea.
+ */
+static void parse_traditional_patch(const char *first, const char *second, struct patch *patch)
+{
+ char *name;
+
+ first += 4; /* skip "--- " */
+ second += 4; /* skip "+++ " */
+ if (!p_value_known) {
+ int p, q;
+ p = guess_p_value(first);
+ q = guess_p_value(second);
+ if (p < 0) p = q;
+ if (0 <= p && p == q) {
+ p_value = p;
+ p_value_known = 1;
+ }
+ }
+ if (is_dev_null(first)) {
+ patch->is_new = 1;
+ patch->is_delete = 0;
+ name = find_name(second, NULL, p_value, TERM_SPACE | TERM_TAB);
+ patch->new_name = name;
+ } else if (is_dev_null(second)) {
+ patch->is_new = 0;
+ patch->is_delete = 1;
+ name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+ patch->old_name = name;
+ } else {
+ name = find_name(first, NULL, p_value, TERM_SPACE | TERM_TAB);
+ name = find_name(second, name, p_value, TERM_SPACE | TERM_TAB);
+ if (has_epoch_timestamp(first)) {
+ patch->is_new = 1;
+ patch->is_delete = 0;
+ patch->new_name = name;
+ } else if (has_epoch_timestamp(second)) {
+ patch->is_new = 0;
+ patch->is_delete = 1;
+ patch->old_name = name;
+ } else {
+ patch->old_name = patch->new_name = name;
+ }
+ }
+ if (!name)
+ die("unable to find filename in patch at line %d", linenr);
+}
+
+static int gitdiff_hdrend(const char *line, struct patch *patch)
+{
+ return -1;
+}
+
+/*
+ * We're anal about diff header consistency, to make
+ * sure that we don't end up having strange ambiguous
+ * patches floating around.
+ *
+ * As a result, gitdiff_{old|new}name() will check
+ * their names against any previous information, just
+ * to make sure..
+ */
+static char *gitdiff_verify_name(const char *line, int isnull, char *orig_name, const char *oldnew)
+{
+ if (!orig_name && !isnull)
+ return find_name(line, NULL, p_value, TERM_TAB);
+
+ if (orig_name) {
+ int len;
+ const char *name;
+ char *another;
+ name = orig_name;
+ len = strlen(name);
+ if (isnull)
+ die("git apply: bad git-diff - expected /dev/null, got %s on line %d", name, linenr);
+ another = find_name(line, NULL, p_value, TERM_TAB);
+ if (!another || memcmp(another, name, len + 1))
+ die("git apply: bad git-diff - inconsistent %s filename on line %d", oldnew, linenr);
+ free(another);
+ return orig_name;
+ }
+ else {
+ /* expect "/dev/null" */
+ if (memcmp("/dev/null", line, 9) || line[9] != '\n')
+ die("git apply: bad git-diff - expected /dev/null on line %d", linenr);
+ return NULL;
+ }
+}
+
+static int gitdiff_oldname(const char *line, struct patch *patch)
+{
+ patch->old_name = gitdiff_verify_name(line, patch->is_new, patch->old_name, "old");
+ return 0;
+}
+
+static int gitdiff_newname(const char *line, struct patch *patch)
+{
+ patch->new_name = gitdiff_verify_name(line, patch->is_delete, patch->new_name, "new");
+ return 0;
+}
+
+static int gitdiff_oldmode(const char *line, struct patch *patch)
+{
+ patch->old_mode = strtoul(line, NULL, 8);
+ return 0;
+}
+
+static int gitdiff_newmode(const char *line, struct patch *patch)
+{
+ patch->new_mode = strtoul(line, NULL, 8);
+ return 0;
+}
+
+static int gitdiff_delete(const char *line, struct patch *patch)
+{
+ patch->is_delete = 1;
+ patch->old_name = patch->def_name;
+ return gitdiff_oldmode(line, patch);
+}
+
+static int gitdiff_newfile(const char *line, struct patch *patch)
+{
+ patch->is_new = 1;
+ patch->new_name = patch->def_name;
+ return gitdiff_newmode(line, patch);
+}
+
+static int gitdiff_copysrc(const char *line, struct patch *patch)
+{
+ patch->is_copy = 1;
+ patch->old_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_copydst(const char *line, struct patch *patch)
+{
+ patch->is_copy = 1;
+ patch->new_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_renamesrc(const char *line, struct patch *patch)
+{
+ patch->is_rename = 1;
+ patch->old_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_renamedst(const char *line, struct patch *patch)
+{
+ patch->is_rename = 1;
+ patch->new_name = find_name(line, NULL, 0, 0);
+ return 0;
+}
+
+static int gitdiff_similarity(const char *line, struct patch *patch)
+{
+ if ((patch->score = strtoul(line, NULL, 10)) == ULONG_MAX)
+ patch->score = 0;
+ return 0;
+}
+
+static int gitdiff_dissimilarity(const char *line, struct patch *patch)
+{
+ if ((patch->score = strtoul(line, NULL, 10)) == ULONG_MAX)
+ patch->score = 0;
+ return 0;
+}
+
+static int gitdiff_index(const char *line, struct patch *patch)
+{
+ /*
+ * index line is N hexadecimal, "..", N hexadecimal,
+ * and optional space with octal mode.
+ */
+ const char *ptr, *eol;
+ int len;
+
+ ptr = strchr(line, '.');
+ if (!ptr || ptr[1] != '.' || 40 < ptr - line)
+ return 0;
+ len = ptr - line;
+ memcpy(patch->old_sha1_prefix, line, len);
+ patch->old_sha1_prefix[len] = 0;
+
+ line = ptr + 2;
+ ptr = strchr(line, ' ');
+ eol = strchr(line, '\n');
+
+ if (!ptr || eol < ptr)
+ ptr = eol;
+ len = ptr - line;
+
+ if (40 < len)
+ return 0;
+ memcpy(patch->new_sha1_prefix, line, len);
+ patch->new_sha1_prefix[len] = 0;
+ if (*ptr == ' ')
+ patch->old_mode = strtoul(ptr+1, NULL, 8);
+ return 0;
+}
+
+/*
+ * This is normal for a diff that doesn't change anything: we'll fall through
+ * into the next diff. Tell the parser to break out.
+ */
+static int gitdiff_unrecognized(const char *line, struct patch *patch)
+{
+ return -1;
+}
+
+static const char *stop_at_slash(const char *line, int llen)
+{
+ int nslash = p_value;
+ int i;
+
+ for (i = 0; i < llen; i++) {
+ int ch = line[i];
+ if (ch == '/' && --nslash <= 0)
+ return &line[i];
+ }
+ return NULL;
+}
+
+/*
+ * This is to extract the same name that appears on "diff --git"
+ * line. We do not find and return anything if it is a rename
+ * patch, and it is OK because we will find the name elsewhere.
+ * We need to reliably find name only when it is mode-change only,
+ * creation or deletion of an empty file. In any of these cases,
+ * both sides are the same name under a/ and b/ respectively.
+ */
+static char *git_header_name(char *line, int llen)
+{
+ const char *name;
+ const char *second = NULL;
+ size_t len;
+
+ line += strlen("diff --git ");
+ llen -= strlen("diff --git ");
+
+ if (*line == '"') {
+ const char *cp;
+ struct strbuf first = STRBUF_INIT;
+ struct strbuf sp = STRBUF_INIT;
+
+ if (unquote_c_style(&first, line, &second))
+ goto free_and_fail1;
+
+ /* advance to the first slash */
+ cp = stop_at_slash(first.buf, first.len);
+ /* we do not accept absolute paths */
+ if (!cp || cp == first.buf)
+ goto free_and_fail1;
+ strbuf_remove(&first, 0, cp + 1 - first.buf);
+
+ /*
+ * second points at one past closing dq of name.
+ * find the second name.
+ */
+ while ((second < line + llen) && isspace(*second))
+ second++;
+
+ if (line + llen <= second)
+ goto free_and_fail1;
+ if (*second == '"') {
+ if (unquote_c_style(&sp, second, NULL))
+ goto free_and_fail1;
+ cp = stop_at_slash(sp.buf, sp.len);
+ if (!cp || cp == sp.buf)
+ goto free_and_fail1;
+ /* They must match, otherwise ignore */
+ if (strcmp(cp + 1, first.buf))
+ goto free_and_fail1;
+ strbuf_release(&sp);
+ return strbuf_detach(&first, NULL);
+ }
+
+ /* unquoted second */
+ cp = stop_at_slash(second, line + llen - second);
+ if (!cp || cp == second)
+ goto free_and_fail1;
+ cp++;
+ if (line + llen - cp != first.len + 1 ||
+ memcmp(first.buf, cp, first.len))
+ goto free_and_fail1;
+ return strbuf_detach(&first, NULL);
+
+ free_and_fail1:
+ strbuf_release(&first);
+ strbuf_release(&sp);
+ return NULL;
+ }
+
+ /* unquoted first name */
+ name = stop_at_slash(line, llen);
+ if (!name || name == line)
+ return NULL;
+ name++;
+
+ /*
+ * since the first name is unquoted, a dq if exists must be
+ * the beginning of the second name.
+ */
+ for (second = name; second < line + llen; second++) {
+ if (*second == '"') {
+ struct strbuf sp = STRBUF_INIT;
+ const char *np;
+
+ if (unquote_c_style(&sp, second, NULL))
+ goto free_and_fail2;
+
+ np = stop_at_slash(sp.buf, sp.len);
+ if (!np || np == sp.buf)
+ goto free_and_fail2;
+ np++;
+
+ len = sp.buf + sp.len - np;
+ if (len < second - name &&
+ !strncmp(np, name, len) &&
+ isspace(name[len])) {
+ /* Good */
+ strbuf_remove(&sp, 0, np - sp.buf);
+ return strbuf_detach(&sp, NULL);
+ }
+
+ free_and_fail2:
+ strbuf_release(&sp);
+ return NULL;
+ }
+ }
+
+ /*
+ * Accept a name only if it shows up twice, exactly the same
+ * form.
+ */
+ for (len = 0 ; ; len++) {
+ switch (name[len]) {
+ default:
+ continue;
+ case '\n':
+ return NULL;
+ case '\t': case ' ':
+ second = name+len;
+ for (;;) {
+ char c = *second++;
+ if (c == '\n')
+ return NULL;
+ if (c == '/')
+ break;
+ }
+ if (second[len] == '\n' && !memcmp(name, second, len)) {
+ return xmemdupz(name, len);
+ }
+ }
+ }
+}
+
+/* Verify that we recognize the lines following a git header */
+static int parse_git_header(char *line, int len, unsigned int size, struct patch *patch)
+{
+ unsigned long offset;
+
+ /* A git diff has explicit new/delete information, so we don't guess */
+ patch->is_new = 0;
+ patch->is_delete = 0;
+
+ /*
+ * Some things may not have the old name in the
+ * rest of the headers anywhere (pure mode changes,
+ * or removing or adding empty files), so we get
+ * the default name from the header.
+ */
+ patch->def_name = git_header_name(line, len);
+ if (patch->def_name && root) {
+ char *s = xmalloc(root_len + strlen(patch->def_name) + 1);
+ strcpy(s, root);
+ strcpy(s + root_len, patch->def_name);
+ free(patch->def_name);
+ patch->def_name = s;
+ }
+
+ line += len;
+ size -= len;
+ linenr++;
+ for (offset = len ; size > 0 ; offset += len, size -= len, line += len, linenr++) {
+ static const struct opentry {
+ const char *str;
+ int (*fn)(const char *, struct patch *);
+ } optable[] = {
+ { "@@ -", gitdiff_hdrend },
+ { "--- ", gitdiff_oldname },
+ { "+++ ", gitdiff_newname },
+ { "old mode ", gitdiff_oldmode },
+ { "new mode ", gitdiff_newmode },
+ { "deleted file mode ", gitdiff_delete },
+ { "new file mode ", gitdiff_newfile },
+ { "copy from ", gitdiff_copysrc },
+ { "copy to ", gitdiff_copydst },
+ { "rename old ", gitdiff_renamesrc },
+ { "rename new ", gitdiff_renamedst },
+ { "rename from ", gitdiff_renamesrc },
+ { "rename to ", gitdiff_renamedst },
+ { "similarity index ", gitdiff_similarity },
+ { "dissimilarity index ", gitdiff_dissimilarity },
+ { "index ", gitdiff_index },
+ { "", gitdiff_unrecognized },
+ };
+ int i;
+
+ len = linelen(line, size);
+ if (!len || line[len-1] != '\n')
+ break;
+ for (i = 0; i < ARRAY_SIZE(optable); i++) {
+ const struct opentry *p = optable + i;
+ int oplen = strlen(p->str);
+ if (len < oplen || memcmp(p->str, line, oplen))
+ continue;
+ if (p->fn(line + oplen, patch) < 0)
+ return offset;
+ break;
+ }
+ }
+
+ return offset;
+}
+
+static int parse_num(const char *line, unsigned long *p)
+{
+ char *ptr;
+
+ if (!isdigit(*line))
+ return 0;
+ *p = strtoul(line, &ptr, 10);
+ return ptr - line;
+}
+
+static int parse_range(const char *line, int len, int offset, const char *expect,
+ unsigned long *p1, unsigned long *p2)
+{
+ int digits, ex;
+
+ if (offset < 0 || offset >= len)
+ return -1;
+ line += offset;
+ len -= offset;
+
+ digits = parse_num(line, p1);
+ if (!digits)
+ return -1;
+
+ offset += digits;
+ line += digits;
+ len -= digits;
+
+ *p2 = 1;
+ if (*line == ',') {
+ digits = parse_num(line+1, p2);
+ if (!digits)
+ return -1;
+
+ offset += digits+1;
+ line += digits+1;
+ len -= digits+1;
+ }
+
+ ex = strlen(expect);
+ if (ex > len)
+ return -1;
+ if (memcmp(line, expect, ex))
+ return -1;
+
+ return offset + ex;
+}
+
+static void recount_diff(char *line, int size, struct fragment *fragment)
+{
+ int oldlines = 0, newlines = 0, ret = 0;
+
+ if (size < 1) {
+ warning("recount: ignore empty hunk");
+ return;
+ }
+
+ for (;;) {
+ int len = linelen(line, size);
+ size -= len;
+ line += len;
+
+ if (size < 1)
+ break;
+
+ switch (*line) {
+ case ' ': case '\n':
+ newlines++;
+ /* fall through */
+ case '-':
+ oldlines++;
+ continue;
+ case '+':
+ newlines++;
+ continue;
+ case '\\':
+ continue;
+ case '@':
+ ret = size < 3 || prefixcmp(line, "@@ ");
+ break;
+ case 'd':
+ ret = size < 5 || prefixcmp(line, "diff ");
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ if (ret) {
+ warning("recount: unexpected line: %.*s",
+ (int)linelen(line, size), line);
+ return;
+ }
+ break;
+ }
+ fragment->oldlines = oldlines;
+ fragment->newlines = newlines;
+}
+
+/*
+ * Parse a unified diff fragment header of the
+ * form "@@ -a,b +c,d @@"
+ */
+static int parse_fragment_header(char *line, int len, struct fragment *fragment)
+{
+ int offset;
+
+ if (!len || line[len-1] != '\n')
+ return -1;
+
+ /* Figure out the number of lines in a fragment */
+ offset = parse_range(line, len, 4, " +", &fragment->oldpos, &fragment->oldlines);
+ offset = parse_range(line, len, offset, " @@", &fragment->newpos, &fragment->newlines);
+
+ return offset;
+}
+
+static int find_header(char *line, unsigned long size, int *hdrsize, struct patch *patch)
+{
+ unsigned long offset, len;
+
+ patch->is_toplevel_relative = 0;
+ patch->is_rename = patch->is_copy = 0;
+ patch->is_new = patch->is_delete = -1;
+ patch->old_mode = patch->new_mode = 0;
+ patch->old_name = patch->new_name = NULL;
+ for (offset = 0; size > 0; offset += len, size -= len, line += len, linenr++) {
+ unsigned long nextlen;
+
+ len = linelen(line, size);
+ if (!len)
+ break;
+
+ /* Testing this early allows us to take a few shortcuts.. */
+ if (len < 6)
+ continue;
+
+ /*
+ * Make sure we don't find any unconnected patch fragments.
+ * That's a sign that we didn't find a header, and that a
+ * patch has become corrupted/broken up.
+ */
+ if (!memcmp("@@ -", line, 4)) {
+ struct fragment dummy;
+ if (parse_fragment_header(line, len, &dummy) < 0)
+ continue;
+ die("patch fragment without header at line %d: %.*s",
+ linenr, (int)len-1, line);
+ }
+
+ if (size < len + 6)
+ break;
+
+ /*
+ * Git patch? It might not have a real patch, just a rename
+ * or mode change, so we handle that specially
+ */
+ if (!memcmp("diff --git ", line, 11)) {
+ int git_hdr_len = parse_git_header(line, len, size, patch);
+ if (git_hdr_len <= len)
+ continue;
+ if (!patch->old_name && !patch->new_name) {
+ if (!patch->def_name)
+ die("git diff header lacks filename information when removing "
+ "%d leading pathname components (line %d)" , p_value, linenr);
+ patch->old_name = patch->new_name = patch->def_name;
+ }
+ patch->is_toplevel_relative = 1;
+ *hdrsize = git_hdr_len;
+ return offset;
+ }
+
+ /* --- followed by +++ ? */
+ if (memcmp("--- ", line, 4) || memcmp("+++ ", line + len, 4))
+ continue;
+
+ /*
+ * We only accept unified patches, so we want it to
+ * at least have "@@ -a,b +c,d @@\n", which is 14 chars
+ * minimum ("@@ -0,0 +1 @@\n" is the shortest).
+ */
+ nextlen = linelen(line + len, size - len);
+ if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4))
+ continue;
+
+ /* Ok, we'll consider it a patch */
+ parse_traditional_patch(line, line+len, patch);
+ *hdrsize = len + nextlen;
+ linenr += 2;
+ return offset;
+ }
+ return -1;
+}
+
+static void record_ws_error(unsigned result, const char *line, int len, int linenr)
+{
+ char *err;
+
+ if (!result)
+ return;
+
+ whitespace_error++;
+ if (squelch_whitespace_errors &&
+ squelch_whitespace_errors < whitespace_error)
+ return;
+
+ err = whitespace_error_string(result);
+ fprintf(stderr, "%s:%d: %s.\n%.*s\n",
+ patch_input_file, linenr, err, len, line);
+ free(err);
+}
+
+static void check_whitespace(const char *line, int len, unsigned ws_rule)
+{
+ unsigned result = ws_check(line + 1, len - 1, ws_rule);
+
+ record_ws_error(result, line + 1, len - 2, linenr);
+}
+
+/*
+ * Parse a unified diff. Note that this really needs to parse each
+ * fragment separately, since the only way to know the difference
+ * between a "---" that is part of a patch, and a "---" that starts
+ * the next patch is to look at the line counts..
+ */