diff options
Diffstat (limited to 'xdiff')
-rw-r--r-- | xdiff/xdiff.h | 18 | ||||
-rw-r--r-- | xdiff/xdiffi.c | 128 | ||||
-rw-r--r-- | xdiff/xemit.c | 21 | ||||
-rw-r--r-- | xdiff/xhistogram.c | 135 | ||||
-rw-r--r-- | xdiff/xinclude.h | 8 | ||||
-rw-r--r-- | xdiff/xpatience.c | 2 | ||||
-rw-r--r-- | xdiff/xutils.c | 25 |
7 files changed, 188 insertions, 149 deletions
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index c1937a2911..032e3a9f41 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -52,14 +52,6 @@ extern "C" { #define XDL_EMIT_FUNCNAMES (1 << 0) #define XDL_EMIT_FUNCCONTEXT (1 << 2) -#define XDL_MMB_READONLY (1 << 0) - -#define XDL_MMF_ATOMIC (1 << 0) - -#define XDL_BDOP_INS 1 -#define XDL_BDOP_CPY 2 -#define XDL_BDOP_INSB 3 - /* merge simplification levels */ #define XDL_MERGE_MINIMAL 0 #define XDL_MERGE_EAGER 1 @@ -94,7 +86,11 @@ typedef struct s_xpparam { typedef struct s_xdemitcb { void *priv; - int (*outf)(void *, mmbuffer_t *, int); + int (*out_hunk)(void *, + long old_begin, long old_nr, + long new_begin, long new_nr, + const char *func, long funclen); + int (*out_line)(void *, mmbuffer_t *, int); } xdemitcb_t; typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, void *priv); @@ -117,9 +113,9 @@ typedef struct s_bdiffparam { } bdiffparam_t; -#define xdl_malloc(x) malloc(x) +#define xdl_malloc(x) xmalloc(x) #define xdl_free(ptr) free(ptr) -#define xdl_realloc(ptr,x) realloc(ptr,x) +#define xdl_realloc(ptr,x) xrealloc(ptr,x) void *xdl_mmfile_first(mmfile_t *mmf, long *size); long xdl_mmfile_size(mmfile_t *mmf); diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 0de1ef463b..bd035139f9 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -22,42 +22,25 @@ #include "xinclude.h" - - #define XDL_MAX_COST_MIN 256 #define XDL_HEUR_MIN_COST 256 #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1) #define XDL_SNAKE_CNT 20 #define XDL_K_HEUR 4 - - typedef struct s_xdpsplit { long i1, i2; int min_lo, min_hi; } xdpsplit_t; - - - -static long xdl_split(unsigned long const *ha1, long off1, long lim1, - unsigned long const *ha2, long off2, long lim2, - long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, - xdalgoenv_t *xenv); -static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2); - - - - - /* * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both * the forward diagonal starting from (off1, off2) and the backward diagonal * starting from (lim1, lim2). If the K values on the same diagonal crosses - * returns the furthest point of reach. We might end up having to expensive - * cases using this algorithm is full, so a little bit of heuristic is needed - * to cut the search and to return a suboptimal point. + * returns the furthest point of reach. We might encounter expensive edge cases + * using this algorithm, so a little bit of heuristic is needed to cut the + * search and to return a suboptimal point. */ static long xdl_split(unsigned long const *ha1, long off1, long lim1, unsigned long const *ha2, long off2, long lim2, @@ -80,11 +63,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, int got_snake = 0; /* - * We need to extent the diagonal "domain" by one. If the next + * We need to extend the diagonal "domain" by one. If the next * values exits the box boundaries we need to change it in the - * opposite direction because (max - min) must be a power of two. + * opposite direction because (max - min) must be a power of + * two. + * * Also we initialize the external K value to -1 so that we can - * avoid extra conditions check inside the core loop. + * avoid extra conditions in the check inside the core loop. */ if (fmin > dmin) kvdf[--fmin - 1] = -1; @@ -115,11 +100,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, } /* - * We need to extent the diagonal "domain" by one. If the next + * We need to extend the diagonal "domain" by one. If the next * values exits the box boundaries we need to change it in the - * opposite direction because (max - min) must be a power of two. + * opposite direction because (max - min) must be a power of + * two. + * * Also we initialize the external K value to -1 so that we can - * avoid extra conditions check inside the core loop. + * avoid extra conditions in the check inside the core loop. */ if (bmin > dmin) kvdb[--bmin - 1] = XDL_LINE_MAX; @@ -155,7 +142,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, /* * If the edit cost is above the heuristic trigger and if * we got a good snake, we sample current diagonals to see - * if some of the, have reached an "interesting" path. Our + * if some of them have reached an "interesting" path. Our * measure is a function of the distance from the diagonal * corner (i1 + i2) penalized with the distance from the * mid diagonal itself. If this value is above the current @@ -213,8 +200,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, } /* - * Enough is enough. We spent too much time here and now we collect - * the furthest reaching path using the (i1 + i2) measure. + * Enough is enough. We spent too much time here and now we + * collect the furthest reaching path using the (i1 + i2) + * measure. */ if (ec >= xenv->mxcost) { long fbest, fbest1, bbest, bbest1; @@ -261,9 +249,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1, /* - * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling - * the box splitting function. Note that the real job (marking changed lines) - * is done in the two boundary reaching checks. + * Rule: "Divide et Impera" (divide & conquer). Recursively split the box in + * sub-boxes by calling the box splitting function. Note that the real job + * (marking changed lines) is done in the two boundary reaching checks. */ int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, diffdata_t *dd2, long off2, long lim2, @@ -340,7 +328,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, } /* - * Allocate and setup K vectors to be used by the differential algorithm. + * Allocate and setup K vectors to be used by the differential + * algorithm. + * * One is to store the forward path and one to store the backward path. */ ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3; @@ -411,8 +401,8 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags) /* * If a line is indented more than this, get_indent() just returns this value. * This avoids having to do absurd amounts of work for data that are not - * human-readable text, and also ensures that the output of get_indent fits within - * an int. + * human-readable text, and also ensures that the output of get_indent fits + * within an int. */ #define MAX_INDENT 200 @@ -446,9 +436,9 @@ static int get_indent(xrecord_t *rec) } /* - * If more than this number of consecutive blank rows are found, just return this - * value. This avoids requiring O(N^2) work for pathological cases, and also - * ensures that the output of score_split fits in an int. + * If more than this number of consecutive blank rows are found, just return + * this value. This avoids requiring O(N^2) work for pathological cases, and + * also ensures that the output of score_split fits in an int. */ #define MAX_BLANKS 20 @@ -460,8 +450,8 @@ struct split_measurement { int end_of_file; /* - * How much is the line immediately following the split indented (or -1 if - * the line is blank): + * How much is the line immediately following the split indented (or -1 + * if the line is blank): */ int indent; @@ -471,8 +461,8 @@ struct split_measurement { int pre_blank; /* - * How much is the nearest non-blank line above the split indented (or -1 - * if there is no such line)? + * How much is the nearest non-blank line above the split indented (or + * -1 if there is no such line)? */ int pre_indent; @@ -592,14 +582,19 @@ static void measure_split(const xdfile_t *xdf, long split, #define INDENT_WEIGHT 60 /* + * How far do we slide a hunk at most? + */ +#define INDENT_HEURISTIC_MAX_SLIDING 100 + +/* * Compute a badness score for the hypothetical split whose measurements are - * stored in m. The weight factors were determined empirically using the tools and - * corpus described in + * stored in m. The weight factors were determined empirically using the tools + * and corpus described in * * https://github.com/mhagger/diff-slider-tools * - * Also see that project if you want to improve the weights based on, for example, - * a larger or more diverse corpus. + * Also see that project if you want to improve the weights based on, for + * example, a larger or more diverse corpus. */ static void score_add_split(const struct split_measurement *m, struct split_score *s) { @@ -821,13 +816,16 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { group_init(xdfo, &go); while (1) { - /* If the group is empty in the to-be-compacted file, skip it: */ + /* + * If the group is empty in the to-be-compacted file, skip it: + */ if (g.end == g.start) goto next; /* * Now shift the change up and then down as far as possible in - * each direction. If it bumps into any other changes, merge them. + * each direction. If it bumps into any other changes, merge + * them. */ do { groupsize = g.end - g.start; @@ -870,17 +868,17 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { * If the group can be shifted, then we can possibly use this * freedom to produce a more intuitive diff. * - * The group is currently shifted as far down as possible, so the - * heuristics below only have to handle upwards shifts. + * The group is currently shifted as far down as possible, so + * the heuristics below only have to handle upwards shifts. */ if (g.end == earliest_end) { /* no shifting was possible */ } else if (end_matching_other != -1) { /* - * Move the possibly merged group of changes back to line - * up with the last group of changes from the other file - * that it can align with. + * Move the possibly merged group of changes back to + * line up with the last group of changes from the + * other file that it can align with. */ while (go.end == go.start) { if (group_slide_up(xdf, &g, flags)) @@ -891,19 +889,25 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { } else if (flags & XDF_INDENT_HEURISTIC) { /* * Indent heuristic: a group of pure add/delete lines - * implies two splits, one between the end of the "before" - * context and the start of the group, and another between - * the end of the group and the beginning of the "after" - * context. Some splits are aesthetically better and some - * are worse. We compute a badness "score" for each split, - * and add the scores for the two splits to define a - * "score" for each position that the group can be shifted - * to. Then we pick the shift with the lowest score. + * implies two splits, one between the end of the + * "before" context and the start of the group, and + * another between the end of the group and the + * beginning of the "after" context. Some splits are + * aesthetically better and some are worse. We compute + * a badness "score" for each split, and add the scores + * for the two splits to define a "score" for each + * position that the group can be shifted to. Then we + * pick the shift with the lowest score. */ long shift, best_shift = -1; struct split_score best_score; - for (shift = earliest_end; shift <= g.end; shift++) { + shift = earliest_end; + if (g.end - groupsize - 1 > shift) + shift = g.end - groupsize - 1; + if (g.end - INDENT_HEURISTIC_MAX_SLIDING > shift) + shift = g.end - INDENT_HEURISTIC_MAX_SLIDING; + for (; shift <= g.end; shift++) { struct split_measurement m; struct split_score score = {0, 0}; diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 7778dc2b19..9d7d6c5087 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -172,10 +172,12 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, struct func_line func_line = { 0 }; for (xch = xscr; xch; xch = xche->next) { + xdchange_t *xchp = xch; xche = xdl_get_hunk(&xch, xecfg); if (!xch) break; +pre_context_calculation: s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0); s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); @@ -210,8 +212,23 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, if (fs1 < 0) fs1 = 0; if (fs1 < s1) { - s2 -= s1 - fs1; + s2 = XDL_MAX(s2 - (s1 - fs1), 0); s1 = fs1; + + /* + * Did we extend context upwards into an + * ignored change? + */ + while (xchp != xch && + xchp->i1 + xchp->chg1 <= s1 && + xchp->i2 + xchp->chg2 <= s2) + xchp = xchp->next; + + /* If so, show it after all. */ + if (xchp != xch) { + xch = xchp; + goto pre_context_calculation; + } } } @@ -232,7 +249,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, if (fe1 < 0) fe1 = xe->xdf1.nrec; if (fe1 > e1) { - e2 += fe1 - e1; + e2 = XDL_MIN(e2 + (fe1 - e1), xe->xdf2.nrec); e1 = fe1; } diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 73210cb6f3..c7b35a9667 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -42,8 +42,6 @@ */ #include "xinclude.h" -#include "xtypes.h" -#include "xdiff.h" #define MAX_PTR UINT_MAX #define MAX_CNT UINT_MAX @@ -233,54 +231,31 @@ static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr, return b_next; } -static int find_lcs(struct histindex *index, struct region *lcs, - int line1, int count1, int line2, int count2) { - int b_ptr; - - if (scanA(index, line1, count1)) - return -1; - - index->cnt = index->max_chain_length + 1; - - for (b_ptr = line2; b_ptr <= LINE_END(2); ) - b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2); - - return index->has_common && index->max_chain_length < index->cnt; -} - -static int fall_back_to_classic_diff(struct histindex *index, +static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env, int line1, int count1, int line2, int count2) { - xpparam_t xpp; - xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; + xpparam_t xpparam; + xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK; - return xdl_fall_back_diff(index->env, &xpp, + return xdl_fall_back_diff(env, &xpparam, line1, count1, line2, count2); } -static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, - int line1, int count1, int line2, int count2) +static inline void free_index(struct histindex *index) { - struct histindex index; - struct region lcs; - int sz; - int result = -1; - - if (count1 <= 0 && count2 <= 0) - return 0; - - if (LINE_END(1) >= MAX_PTR) - return -1; + xdl_free(index->records); + xdl_free(index->line_map); + xdl_free(index->next_ptrs); + xdl_cha_free(&index->rcha); +} - if (!count1) { - while(count2--) - env->xdf2.rchg[line2++ - 1] = 1; - return 0; - } else if (!count2) { - while(count1--) - env->xdf1.rchg[line1++ - 1] = 1; - return 0; - } +static int find_lcs(xpparam_t const *xpp, xdfenv_t *env, + struct region *lcs, + int line1, int count1, int line2, int count2) +{ + int b_ptr; + int sz, ret = -1; + struct histindex index; memset(&index, 0, sizeof(index)); @@ -318,9 +293,55 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, index.ptr_shift = line1; index.max_chain_length = 64; + if (scanA(&index, line1, count1)) + goto cleanup; + + index.cnt = index.max_chain_length + 1; + + for (b_ptr = line2; b_ptr <= LINE_END(2); ) + b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2); + + if (index.has_common && index.max_chain_length < index.cnt) + ret = 1; + else + ret = 0; + +cleanup: + free_index(&index); + return ret; +} + +static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, + int line1, int count1, int line2, int count2) +{ + struct region lcs; + int lcs_found; + int result; +redo: + result = -1; + + if (count1 <= 0 && count2 <= 0) + return 0; + + if (LINE_END(1) >= MAX_PTR) + return -1; + + if (!count1) { + while(count2--) + env->xdf2.rchg[line2++ - 1] = 1; + return 0; + } else if (!count2) { + while(count1--) + env->xdf1.rchg[line1++ - 1] = 1; + return 0; + } + memset(&lcs, 0, sizeof(lcs)); - if (find_lcs(&index, &lcs, line1, count1, line2, count2)) - result = fall_back_to_classic_diff(&index, line1, count1, line2, count2); + lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2); + if (lcs_found < 0) + goto out; + else if (lcs_found) + result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2); else { if (lcs.begin1 == 0 && lcs.begin2 == 0) { while (count1--) @@ -333,21 +354,21 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env, line1, lcs.begin1 - line1, line2, lcs.begin2 - line2); if (result) - goto cleanup; - result = histogram_diff(xpp, env, - lcs.end1 + 1, LINE_END(1) - lcs.end1, - lcs.end2 + 1, LINE_END(2) - lcs.end2); - if (result) - goto cleanup; + goto out; + /* + * result = histogram_diff(xpp, env, + * lcs.end1 + 1, LINE_END(1) - lcs.end1, + * lcs.end2 + 1, LINE_END(2) - lcs.end2); + * but let's optimize tail recursion ourself: + */ + count1 = LINE_END(1) - lcs.end1; + line1 = lcs.end1 + 1; + count2 = LINE_END(2) - lcs.end2; + line2 = lcs.end2 + 1; + goto redo; } } - -cleanup: - xdl_free(index.records); - xdl_free(index.line_map); - xdl_free(index.next_ptrs); - xdl_cha_free(&index.rcha); - +out: return result; } diff --git a/xdiff/xinclude.h b/xdiff/xinclude.h index f35c4485df..a4285ac0eb 100644 --- a/xdiff/xinclude.h +++ b/xdiff/xinclude.h @@ -23,13 +23,7 @@ #if !defined(XINCLUDE_H) #define XINCLUDE_H -#include <ctype.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <limits.h> - +#include "git-compat-util.h" #include "xmacros.h" #include "xdiff.h" #include "xtypes.h" diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index f3573d9f00..3c5601b602 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -20,8 +20,6 @@ * */ #include "xinclude.h" -#include "xtypes.h" -#include "xdiff.h" /* * The basic idea of patience diff is to find lines that are unique in diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 88e5995535..cfa6e2220f 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -20,13 +20,9 @@ * */ -#include <limits.h> -#include <assert.h> #include "xinclude.h" - - long xdl_bogosqrt(long n) { long i; @@ -54,7 +50,7 @@ int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize, mb[2].size = strlen(mb[2].ptr); i++; } - if (ecb->outf(ecb->priv, mb, i) < 0) { + if (ecb->out_line(ecb->priv, mb, i) < 0) { return -1; } @@ -344,8 +340,9 @@ int xdl_num_out(char *out, long val) { return str - out; } -int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, - const char *func, long funclen, xdemitcb_t *ecb) { +static int xdl_format_hunk_hdr(long s1, long c1, long s2, long c2, + const char *func, long funclen, + xdemitcb_t *ecb) { int nb = 0; mmbuffer_t mb; char buf[128]; @@ -387,9 +384,21 @@ int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, mb.ptr = buf; mb.size = nb; - if (ecb->outf(ecb->priv, &mb, 1) < 0) + if (ecb->out_line(ecb->priv, &mb, 1) < 0) return -1; + return 0; +} +int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2, + const char *func, long funclen, + xdemitcb_t *ecb) { + if (!ecb->out_hunk) + return xdl_format_hunk_hdr(s1, c1, s2, c2, func, funclen, ecb); + if (ecb->out_hunk(ecb->priv, + c1 ? s1 : s1 - 1, c1, + c2 ? s2 : s2 - 1, c2, + func, funclen) < 0) + return -1; return 0; } |