summaryrefslogtreecommitdiff
path: root/xdiff
diff options
context:
space:
mode:
Diffstat (limited to 'xdiff')
-rw-r--r--xdiff/xdiff.h18
-rw-r--r--xdiff/xdiffi.c128
-rw-r--r--xdiff/xemit.c21
-rw-r--r--xdiff/xhistogram.c135
-rw-r--r--xdiff/xinclude.h8
-rw-r--r--xdiff/xpatience.c2
-rw-r--r--xdiff/xutils.c25
7 files changed, 188 insertions, 149 deletions
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index c1937a2911..032e3a9f41 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -52,14 +52,6 @@ extern "C" {
#define XDL_EMIT_FUNCNAMES (1 << 0)
#define XDL_EMIT_FUNCCONTEXT (1 << 2)
-#define XDL_MMB_READONLY (1 << 0)
-
-#define XDL_MMF_ATOMIC (1 << 0)
-
-#define XDL_BDOP_INS 1
-#define XDL_BDOP_CPY 2
-#define XDL_BDOP_INSB 3
-
/* merge simplification levels */
#define XDL_MERGE_MINIMAL 0
#define XDL_MERGE_EAGER 1
@@ -94,7 +86,11 @@ typedef struct s_xpparam {
typedef struct s_xdemitcb {
void *priv;
- int (*outf)(void *, mmbuffer_t *, int);
+ int (*out_hunk)(void *,
+ long old_begin, long old_nr,
+ long new_begin, long new_nr,
+ const char *func, long funclen);
+ int (*out_line)(void *, mmbuffer_t *, int);
} xdemitcb_t;
typedef long (*find_func_t)(const char *line, long line_len, char *buffer, long buffer_size, void *priv);
@@ -117,9 +113,9 @@ typedef struct s_bdiffparam {
} bdiffparam_t;
-#define xdl_malloc(x) malloc(x)
+#define xdl_malloc(x) xmalloc(x)
#define xdl_free(ptr) free(ptr)
-#define xdl_realloc(ptr,x) realloc(ptr,x)
+#define xdl_realloc(ptr,x) xrealloc(ptr,x)
void *xdl_mmfile_first(mmfile_t *mmf, long *size);
long xdl_mmfile_size(mmfile_t *mmf);
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 0de1ef463b..bd035139f9 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -22,42 +22,25 @@
#include "xinclude.h"
-
-
#define XDL_MAX_COST_MIN 256
#define XDL_HEUR_MIN_COST 256
#define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1)
#define XDL_SNAKE_CNT 20
#define XDL_K_HEUR 4
-
-
typedef struct s_xdpsplit {
long i1, i2;
int min_lo, min_hi;
} xdpsplit_t;
-
-
-
-static long xdl_split(unsigned long const *ha1, long off1, long lim1,
- unsigned long const *ha2, long off2, long lim2,
- long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl,
- xdalgoenv_t *xenv);
-static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2);
-
-
-
-
-
/*
* See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
* the forward diagonal starting from (off1, off2) and the backward diagonal
* starting from (lim1, lim2). If the K values on the same diagonal crosses
- * returns the furthest point of reach. We might end up having to expensive
- * cases using this algorithm is full, so a little bit of heuristic is needed
- * to cut the search and to return a suboptimal point.
+ * returns the furthest point of reach. We might encounter expensive edge cases
+ * using this algorithm, so a little bit of heuristic is needed to cut the
+ * search and to return a suboptimal point.
*/
static long xdl_split(unsigned long const *ha1, long off1, long lim1,
unsigned long const *ha2, long off2, long lim2,
@@ -80,11 +63,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
int got_snake = 0;
/*
- * We need to extent the diagonal "domain" by one. If the next
+ * We need to extend the diagonal "domain" by one. If the next
* values exits the box boundaries we need to change it in the
- * opposite direction because (max - min) must be a power of two.
+ * opposite direction because (max - min) must be a power of
+ * two.
+ *
* Also we initialize the external K value to -1 so that we can
- * avoid extra conditions check inside the core loop.
+ * avoid extra conditions in the check inside the core loop.
*/
if (fmin > dmin)
kvdf[--fmin - 1] = -1;
@@ -115,11 +100,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
}
/*
- * We need to extent the diagonal "domain" by one. If the next
+ * We need to extend the diagonal "domain" by one. If the next
* values exits the box boundaries we need to change it in the
- * opposite direction because (max - min) must be a power of two.
+ * opposite direction because (max - min) must be a power of
+ * two.
+ *
* Also we initialize the external K value to -1 so that we can
- * avoid extra conditions check inside the core loop.
+ * avoid extra conditions in the check inside the core loop.
*/
if (bmin > dmin)
kvdb[--bmin - 1] = XDL_LINE_MAX;
@@ -155,7 +142,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
/*
* If the edit cost is above the heuristic trigger and if
* we got a good snake, we sample current diagonals to see
- * if some of the, have reached an "interesting" path. Our
+ * if some of them have reached an "interesting" path. Our
* measure is a function of the distance from the diagonal
* corner (i1 + i2) penalized with the distance from the
* mid diagonal itself. If this value is above the current
@@ -213,8 +200,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
}
/*
- * Enough is enough. We spent too much time here and now we collect
- * the furthest reaching path using the (i1 + i2) measure.
+ * Enough is enough. We spent too much time here and now we
+ * collect the furthest reaching path using the (i1 + i2)
+ * measure.
*/
if (ec >= xenv->mxcost) {
long fbest, fbest1, bbest, bbest1;
@@ -261,9 +249,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
/*
- * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
- * the box splitting function. Note that the real job (marking changed lines)
- * is done in the two boundary reaching checks.
+ * Rule: "Divide et Impera" (divide & conquer). Recursively split the box in
+ * sub-boxes by calling the box splitting function. Note that the real job
+ * (marking changed lines) is done in the two boundary reaching checks.
*/
int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
diffdata_t *dd2, long off2, long lim2,
@@ -340,7 +328,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
}
/*
- * Allocate and setup K vectors to be used by the differential algorithm.
+ * Allocate and setup K vectors to be used by the differential
+ * algorithm.
+ *
* One is to store the forward path and one to store the backward path.
*/
ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
@@ -411,8 +401,8 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)
/*
* If a line is indented more than this, get_indent() just returns this value.
* This avoids having to do absurd amounts of work for data that are not
- * human-readable text, and also ensures that the output of get_indent fits within
- * an int.
+ * human-readable text, and also ensures that the output of get_indent fits
+ * within an int.
*/
#define MAX_INDENT 200
@@ -446,9 +436,9 @@ static int get_indent(xrecord_t *rec)
}
/*
- * If more than this number of consecutive blank rows are found, just return this
- * value. This avoids requiring O(N^2) work for pathological cases, and also
- * ensures that the output of score_split fits in an int.
+ * If more than this number of consecutive blank rows are found, just return
+ * this value. This avoids requiring O(N^2) work for pathological cases, and
+ * also ensures that the output of score_split fits in an int.
*/
#define MAX_BLANKS 20
@@ -460,8 +450,8 @@ struct split_measurement {
int end_of_file;
/*
- * How much is the line immediately following the split indented (or -1 if
- * the line is blank):
+ * How much is the line immediately following the split indented (or -1
+ * if the line is blank):
*/
int indent;
@@ -471,8 +461,8 @@ struct split_measurement {
int pre_blank;
/*
- * How much is the nearest non-blank line above the split indented (or -1
- * if there is no such line)?
+ * How much is the nearest non-blank line above the split indented (or
+ * -1 if there is no such line)?
*/
int pre_indent;
@@ -592,14 +582,19 @@ static void measure_split(const xdfile_t *xdf, long split,
#define INDENT_WEIGHT 60
/*
+ * How far do we slide a hunk at most?
+ */
+#define INDENT_HEURISTIC_MAX_SLIDING 100
+
+/*
* Compute a badness score for the hypothetical split whose measurements are
- * stored in m. The weight factors were determined empirically using the tools and
- * corpus described in
+ * stored in m. The weight factors were determined empirically using the tools
+ * and corpus described in
*
* https://github.com/mhagger/diff-slider-tools
*
- * Also see that project if you want to improve the weights based on, for example,
- * a larger or more diverse corpus.
+ * Also see that project if you want to improve the weights based on, for
+ * example, a larger or more diverse corpus.
*/
static void score_add_split(const struct split_measurement *m, struct split_score *s)
{
@@ -821,13 +816,16 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
group_init(xdfo, &go);
while (1) {
- /* If the group is empty in the to-be-compacted file, skip it: */
+ /*
+ * If the group is empty in the to-be-compacted file, skip it:
+ */
if (g.end == g.start)
goto next;
/*
* Now shift the change up and then down as far as possible in
- * each direction. If it bumps into any other changes, merge them.
+ * each direction. If it bumps into any other changes, merge
+ * them.
*/
do {
groupsize = g.end - g.start;
@@ -870,17 +868,17 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
* If the group can be shifted, then we can possibly use this
* freedom to produce a more intuitive diff.
*
- * The group is currently shifted as far down as possible, so the
- * heuristics below only have to handle upwards shifts.
+ * The group is currently shifted as far down as possible, so
+ * the heuristics below only have to handle upwards shifts.
*/
if (g.end == earliest_end) {
/* no shifting was possible */
} else if (end_matching_other != -1) {
/*
- * Move the possibly merged group of changes back to line
- * up with the last group of changes from the other file
- * that it can align with.
+ * Move the possibly merged group of changes back to
+ * line up with the last group of changes from the
+ * other file that it can align with.
*/
while (go.end == go.start) {
if (group_slide_up(xdf, &g, flags))
@@ -891,19 +889,25 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
} else if (flags & XDF_INDENT_HEURISTIC) {
/*
* Indent heuristic: a group of pure add/delete lines
- * implies two splits, one between the end of the "before"
- * context and the start of the group, and another between
- * the end of the group and the beginning of the "after"
- * context. Some splits are aesthetically better and some
- * are worse. We compute a badness "score" for each split,
- * and add the scores for the two splits to define a
- * "score" for each position that the group can be shifted
- * to. Then we pick the shift with the lowest score.
+ * implies two splits, one between the end of the
+ * "before" context and the start of the group, and
+ * another between the end of the group and the
+ * beginning of the "after" context. Some splits are
+ * aesthetically better and some are worse. We compute
+ * a badness "score" for each split, and add the scores
+ * for the two splits to define a "score" for each
+ * position that the group can be shifted to. Then we
+ * pick the shift with the lowest score.
*/
long shift, best_shift = -1;
struct split_score best_score;
- for (shift = earliest_end; shift <= g.end; shift++) {
+ shift = earliest_end;
+ if (g.end - groupsize - 1 > shift)
+ shift = g.end - groupsize - 1;
+ if (g.end - INDENT_HEURISTIC_MAX_SLIDING > shift)
+ shift = g.end - INDENT_HEURISTIC_MAX_SLIDING;
+ for (; shift <= g.end; shift++) {
struct split_measurement m;
struct split_score score = {0, 0};
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 7778dc2b19..9d7d6c5087 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -172,10 +172,12 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
struct func_line func_line = { 0 };
for (xch = xscr; xch; xch = xche->next) {
+ xdchange_t *xchp = xch;
xche = xdl_get_hunk(&xch, xecfg);
if (!xch)
break;
+pre_context_calculation:
s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
@@ -210,8 +212,23 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
if (fs1 < 0)
fs1 = 0;
if (fs1 < s1) {
- s2 -= s1 - fs1;
+ s2 = XDL_MAX(s2 - (s1 - fs1), 0);
s1 = fs1;
+
+ /*
+ * Did we extend context upwards into an
+ * ignored change?
+ */
+ while (xchp != xch &&
+ xchp->i1 + xchp->chg1 <= s1 &&
+ xchp->i2 + xchp->chg2 <= s2)
+ xchp = xchp->next;
+
+ /* If so, show it after all. */
+ if (xchp != xch) {
+ xch = xchp;
+ goto pre_context_calculation;
+ }
}
}
@@ -232,7 +249,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
if (fe1 < 0)
fe1 = xe->xdf1.nrec;
if (fe1 > e1) {
- e2 += fe1 - e1;
+ e2 = XDL_MIN(e2 + (fe1 - e1), xe->xdf2.nrec);
e1 = fe1;
}
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 73210cb6f3..c7b35a9667 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -42,8 +42,6 @@
*/
#include "xinclude.h"
-#include "xtypes.h"
-#include "xdiff.h"
#define MAX_PTR UINT_MAX
#define MAX_CNT UINT_MAX
@@ -233,54 +231,31 @@ static int try_lcs(struct histindex *index, struct region *lcs, int b_ptr,
return b_next;
}
-static int find_lcs(struct histindex *index, struct region *lcs,
- int line1, int count1, int line2, int count2) {
- int b_ptr;
-
- if (scanA(index, line1, count1))
- return -1;
-
- index->cnt = index->max_chain_length + 1;
-
- for (b_ptr = line2; b_ptr <= LINE_END(2); )
- b_ptr = try_lcs(index, lcs, b_ptr, line1, count1, line2, count2);
-
- return index->has_common && index->max_chain_length < index->cnt;
-}
-
-static int fall_back_to_classic_diff(struct histindex *index,
+static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
int line1, int count1, int line2, int count2)
{
- xpparam_t xpp;
- xpp.flags = index->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
+ xpparam_t xpparam;
+ xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
- return xdl_fall_back_diff(index->env, &xpp,
+ return xdl_fall_back_diff(env, &xpparam,
line1, count1, line2, count2);
}
-static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
- int line1, int count1, int line2, int count2)
+static inline void free_index(struct histindex *index)
{
- struct histindex index;
- struct region lcs;
- int sz;
- int result = -1;
-
- if (count1 <= 0 && count2 <= 0)
- return 0;
-
- if (LINE_END(1) >= MAX_PTR)
- return -1;
+ xdl_free(index->records);
+ xdl_free(index->line_map);
+ xdl_free(index->next_ptrs);
+ xdl_cha_free(&index->rcha);
+}
- if (!count1) {
- while(count2--)
- env->xdf2.rchg[line2++ - 1] = 1;
- return 0;
- } else if (!count2) {
- while(count1--)
- env->xdf1.rchg[line1++ - 1] = 1;
- return 0;
- }
+static int find_lcs(xpparam_t const *xpp, xdfenv_t *env,
+ struct region *lcs,
+ int line1, int count1, int line2, int count2)
+{
+ int b_ptr;
+ int sz, ret = -1;
+ struct histindex index;
memset(&index, 0, sizeof(index));
@@ -318,9 +293,55 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
index.ptr_shift = line1;
index.max_chain_length = 64;
+ if (scanA(&index, line1, count1))
+ goto cleanup;
+
+ index.cnt = index.max_chain_length + 1;
+
+ for (b_ptr = line2; b_ptr <= LINE_END(2); )
+ b_ptr = try_lcs(&index, lcs, b_ptr, line1, count1, line2, count2);
+
+ if (index.has_common && index.max_chain_length < index.cnt)
+ ret = 1;
+ else
+ ret = 0;
+
+cleanup:
+ free_index(&index);
+ return ret;
+}
+
+static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
+ int line1, int count1, int line2, int count2)
+{
+ struct region lcs;
+ int lcs_found;
+ int result;
+redo:
+ result = -1;
+
+ if (count1 <= 0 && count2 <= 0)
+ return 0;
+
+ if (LINE_END(1) >= MAX_PTR)
+ return -1;
+
+ if (!count1) {
+ while(count2--)
+ env->xdf2.rchg[line2++ - 1] = 1;
+ return 0;
+ } else if (!count2) {
+ while(count1--)
+ env->xdf1.rchg[line1++ - 1] = 1;
+ return 0;
+ }
+
memset(&lcs, 0, sizeof(lcs));
- if (find_lcs(&index, &lcs, line1, count1, line2, count2))
- result = fall_back_to_classic_diff(&index, line1, count1, line2, count2);
+ lcs_found = find_lcs(xpp, env, &lcs, line1, count1, line2, count2);
+ if (lcs_found < 0)
+ goto out;
+ else if (lcs_found)
+ result = fall_back_to_classic_diff(xpp, env, line1, count1, line2, count2);
else {
if (lcs.begin1 == 0 && lcs.begin2 == 0) {
while (count1--)
@@ -333,21 +354,21 @@ static int histogram_diff(xpparam_t const *xpp, xdfenv_t *env,
line1, lcs.begin1 - line1,
line2, lcs.begin2 - line2);
if (result)
- goto cleanup;
- result = histogram_diff(xpp, env,
- lcs.end1 + 1, LINE_END(1) - lcs.end1,
- lcs.end2 + 1, LINE_END(2) - lcs.end2);
- if (result)
- goto cleanup;
+ goto out;
+ /*
+ * result = histogram_diff(xpp, env,
+ * lcs.end1 + 1, LINE_END(1) - lcs.end1,
+ * lcs.end2 + 1, LINE_END(2) - lcs.end2);
+ * but let's optimize tail recursion ourself:
+ */
+ count1 = LINE_END(1) - lcs.end1;
+ line1 = lcs.end1 + 1;
+ count2 = LINE_END(2) - lcs.end2;
+ line2 = lcs.end2 + 1;
+ goto redo;
}
}
-
-cleanup:
- xdl_free(index.records);
- xdl_free(index.line_map);
- xdl_free(index.next_ptrs);
- xdl_cha_free(&index.rcha);
-
+out:
return result;
}
diff --git a/xdiff/xinclude.h b/xdiff/xinclude.h
index f35c4485df..a4285ac0eb 100644
--- a/xdiff/xinclude.h
+++ b/xdiff/xinclude.h
@@ -23,13 +23,7 @@
#if !defined(XINCLUDE_H)
#define XINCLUDE_H
-#include <ctype.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <limits.h>
-
+#include "git-compat-util.h"
#include "xmacros.h"
#include "xdiff.h"
#include "xtypes.h"
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index f3573d9f00..3c5601b602 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -20,8 +20,6 @@
*
*/
#include "xinclude.h"
-#include "xtypes.h"
-#include "xdiff.h"
/*
* The basic idea of patience diff is to find lines that are unique in
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 88e5995535..cfa6e2220f 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -20,13 +20,9 @@
*
*/
-#include <limits.h>
-#include <assert.h>
#include "xinclude.h"
-
-
long xdl_bogosqrt(long n) {
long i;
@@ -54,7 +50,7 @@ int xdl_emit_diffrec(char const *rec, long size, char const *pre, long psize,
mb[2].size = strlen(mb[2].ptr);
i++;
}
- if (ecb->outf(ecb->priv, mb, i) < 0) {
+ if (ecb->out_line(ecb->priv, mb, i) < 0) {
return -1;
}
@@ -344,8 +340,9 @@ int xdl_num_out(char *out, long val) {
return str - out;
}
-int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
- const char *func, long funclen, xdemitcb_t *ecb) {
+static int xdl_format_hunk_hdr(long s1, long c1, long s2, long c2,
+ const char *func, long funclen,
+ xdemitcb_t *ecb) {
int nb = 0;
mmbuffer_t mb;
char buf[128];
@@ -387,9 +384,21 @@ int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
mb.ptr = buf;
mb.size = nb;
- if (ecb->outf(ecb->priv, &mb, 1) < 0)
+ if (ecb->out_line(ecb->priv, &mb, 1) < 0)
return -1;
+ return 0;
+}
+int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
+ const char *func, long funclen,
+ xdemitcb_t *ecb) {
+ if (!ecb->out_hunk)
+ return xdl_format_hunk_hdr(s1, c1, s2, c2, func, funclen, ecb);
+ if (ecb->out_hunk(ecb->priv,
+ c1 ? s1 : s1 - 1, c1,
+ c2 ? s2 : s2 - 1, c2,
+ func, funclen) < 0)
+ return -1;
return 0;
}