summaryrefslogtreecommitdiff
path: root/xdiff
diff options
context:
space:
mode:
Diffstat (limited to 'xdiff')
-rw-r--r--xdiff/xdiff.h4
-rw-r--r--xdiff/xdiffi.c146
-rw-r--r--xdiff/xemit.c17
-rw-r--r--xdiff/xhistogram.c2
-rw-r--r--xdiff/xpatience.c16
5 files changed, 128 insertions, 57 deletions
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 032e3a9f41..7a04605146 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -79,6 +79,10 @@ typedef struct s_mmbuffer {
typedef struct s_xpparam {
unsigned long flags;
+ /* -I<regex> */
+ regex_t **ignore_regex;
+ size_t ignore_regex_nr;
+
/* See Documentation/diff-options.txt. */
char **anchors;
size_t anchors_nr;
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 1f1f4a3c78..380eb728ed 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -38,9 +38,9 @@ typedef struct s_xdpsplit {
* Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
* the forward diagonal starting from (off1, off2) and the backward diagonal
* starting from (lim1, lim2). If the K values on the same diagonal crosses
- * returns the furthest point of reach. We might end up having to expensive
- * cases using this algorithm is full, so a little bit of heuristic is needed
- * to cut the search and to return a suboptimal point.
+ * returns the furthest point of reach. We might encounter expensive edge cases
+ * using this algorithm, so a little bit of heuristic is needed to cut the
+ * search and to return a suboptimal point.
*/
static long xdl_split(unsigned long const *ha1, long off1, long lim1,
unsigned long const *ha2, long off2, long lim2,
@@ -63,11 +63,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
int got_snake = 0;
/*
- * We need to extent the diagonal "domain" by one. If the next
+ * We need to extend the diagonal "domain" by one. If the next
* values exits the box boundaries we need to change it in the
- * opposite direction because (max - min) must be a power of two.
+ * opposite direction because (max - min) must be a power of
+ * two.
+ *
* Also we initialize the external K value to -1 so that we can
- * avoid extra conditions check inside the core loop.
+ * avoid extra conditions in the check inside the core loop.
*/
if (fmin > dmin)
kvdf[--fmin - 1] = -1;
@@ -98,11 +100,13 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
}
/*
- * We need to extent the diagonal "domain" by one. If the next
+ * We need to extend the diagonal "domain" by one. If the next
* values exits the box boundaries we need to change it in the
- * opposite direction because (max - min) must be a power of two.
+ * opposite direction because (max - min) must be a power of
+ * two.
+ *
* Also we initialize the external K value to -1 so that we can
- * avoid extra conditions check inside the core loop.
+ * avoid extra conditions in the check inside the core loop.
*/
if (bmin > dmin)
kvdb[--bmin - 1] = XDL_LINE_MAX;
@@ -138,7 +142,7 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
/*
* If the edit cost is above the heuristic trigger and if
* we got a good snake, we sample current diagonals to see
- * if some of the, have reached an "interesting" path. Our
+ * if some of them have reached an "interesting" path. Our
* measure is a function of the distance from the diagonal
* corner (i1 + i2) penalized with the distance from the
* mid diagonal itself. If this value is above the current
@@ -196,8 +200,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
}
/*
- * Enough is enough. We spent too much time here and now we collect
- * the furthest reaching path using the (i1 + i2) measure.
+ * Enough is enough. We spent too much time here and now we
+ * collect the furthest reaching path using the (i1 + i2)
+ * measure.
*/
if (ec >= xenv->mxcost) {
long fbest, fbest1, bbest, bbest1;
@@ -244,9 +249,9 @@ static long xdl_split(unsigned long const *ha1, long off1, long lim1,
/*
- * Rule: "Divide et Impera". Recursively split the box in sub-boxes by calling
- * the box splitting function. Note that the real job (marking changed lines)
- * is done in the two boundary reaching checks.
+ * Rule: "Divide et Impera" (divide & conquer). Recursively split the box in
+ * sub-boxes by calling the box splitting function. Note that the real job
+ * (marking changed lines) is done in the two boundary reaching checks.
*/
int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1,
diffdata_t *dd2, long off2, long lim2,
@@ -323,7 +328,9 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
}
/*
- * Allocate and setup K vectors to be used by the differential algorithm.
+ * Allocate and setup K vectors to be used by the differential
+ * algorithm.
+ *
* One is to store the forward path and one to store the backward path.
*/
ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
@@ -394,8 +401,8 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2, long flags)
/*
* If a line is indented more than this, get_indent() just returns this value.
* This avoids having to do absurd amounts of work for data that are not
- * human-readable text, and also ensures that the output of get_indent fits within
- * an int.
+ * human-readable text, and also ensures that the output of get_indent fits
+ * within an int.
*/
#define MAX_INDENT 200
@@ -429,9 +436,9 @@ static int get_indent(xrecord_t *rec)
}
/*
- * If more than this number of consecutive blank rows are found, just return this
- * value. This avoids requiring O(N^2) work for pathological cases, and also
- * ensures that the output of score_split fits in an int.
+ * If more than this number of consecutive blank rows are found, just return
+ * this value. This avoids requiring O(N^2) work for pathological cases, and
+ * also ensures that the output of score_split fits in an int.
*/
#define MAX_BLANKS 20
@@ -443,8 +450,8 @@ struct split_measurement {
int end_of_file;
/*
- * How much is the line immediately following the split indented (or -1 if
- * the line is blank):
+ * How much is the line immediately following the split indented (or -1
+ * if the line is blank):
*/
int indent;
@@ -454,8 +461,8 @@ struct split_measurement {
int pre_blank;
/*
- * How much is the nearest non-blank line above the split indented (or -1
- * if there is no such line)?
+ * How much is the nearest non-blank line above the split indented (or
+ * -1 if there is no such line)?
*/
int pre_indent;
@@ -581,13 +588,13 @@ static void measure_split(const xdfile_t *xdf, long split,
/*
* Compute a badness score for the hypothetical split whose measurements are
- * stored in m. The weight factors were determined empirically using the tools and
- * corpus described in
+ * stored in m. The weight factors were determined empirically using the tools
+ * and corpus described in
*
* https://github.com/mhagger/diff-slider-tools
*
- * Also see that project if you want to improve the weights based on, for example,
- * a larger or more diverse corpus.
+ * Also see that project if you want to improve the weights based on, for
+ * example, a larger or more diverse corpus.
*/
static void score_add_split(const struct split_measurement *m, struct split_score *s)
{
@@ -809,13 +816,16 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
group_init(xdfo, &go);
while (1) {
- /* If the group is empty in the to-be-compacted file, skip it: */
+ /*
+ * If the group is empty in the to-be-compacted file, skip it:
+ */
if (g.end == g.start)
goto next;
/*
* Now shift the change up and then down as far as possible in
- * each direction. If it bumps into any other changes, merge them.
+ * each direction. If it bumps into any other changes, merge
+ * them.
*/
do {
groupsize = g.end - g.start;
@@ -858,17 +868,17 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
* If the group can be shifted, then we can possibly use this
* freedom to produce a more intuitive diff.
*
- * The group is currently shifted as far down as possible, so the
- * heuristics below only have to handle upwards shifts.
+ * The group is currently shifted as far down as possible, so
+ * the heuristics below only have to handle upwards shifts.
*/
if (g.end == earliest_end) {
/* no shifting was possible */
} else if (end_matching_other != -1) {
/*
- * Move the possibly merged group of changes back to line
- * up with the last group of changes from the other file
- * that it can align with.
+ * Move the possibly merged group of changes back to
+ * line up with the last group of changes from the
+ * other file that it can align with.
*/
while (go.end == go.start) {
if (group_slide_up(xdf, &g, flags))
@@ -879,14 +889,15 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
} else if (flags & XDF_INDENT_HEURISTIC) {
/*
* Indent heuristic: a group of pure add/delete lines
- * implies two splits, one between the end of the "before"
- * context and the start of the group, and another between
- * the end of the group and the beginning of the "after"
- * context. Some splits are aesthetically better and some
- * are worse. We compute a badness "score" for each split,
- * and add the scores for the two splits to define a
- * "score" for each position that the group can be shifted
- * to. Then we pick the shift with the lowest score.
+ * implies two splits, one between the end of the
+ * "before" context and the start of the group, and
+ * another between the end of the group and the
+ * beginning of the "after" context. Some splits are
+ * aesthetically better and some are worse. We compute
+ * a badness "score" for each split, and add the scores
+ * for the two splits to define a "score" for each
+ * position that the group can be shifted to. Then we
+ * pick the shift with the lowest score.
*/
long shift, best_shift = -1;
struct split_score best_score;
@@ -987,7 +998,7 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
return 0;
}
-static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
+static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)
{
xdchange_t *xch;
@@ -1008,6 +1019,46 @@ static void xdl_mark_ignorable(xdchange_t *xscr, xdfenv_t *xe, long flags)
}
}
+static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) {
+ regmatch_t regmatch;
+ int i;
+
+ for (i = 0; i < xpp->ignore_regex_nr; i++)
+ if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1,
+ &regmatch, 0))
+ return 1;
+
+ return 0;
+}
+
+static void xdl_mark_ignorable_regex(xdchange_t *xscr, const xdfenv_t *xe,
+ xpparam_t const *xpp)
+{
+ xdchange_t *xch;
+
+ for (xch = xscr; xch; xch = xch->next) {
+ xrecord_t **rec;
+ int ignore = 1;
+ long i;
+
+ /*
+ * Do not override --ignore-blank-lines.
+ */
+ if (xch->ignore)
+ continue;
+
+ rec = &xe->xdf1.recs[xch->i1];
+ for (i = 0; i < xch->chg1 && ignore; i++)
+ ignore = record_matches_regex(rec[i], xpp);
+
+ rec = &xe->xdf2.recs[xch->i2];
+ for (i = 0; i < xch->chg2 && ignore; i++)
+ ignore = record_matches_regex(rec[i], xpp);
+
+ xch->ignore = ignore;
+ }
+}
+
int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdemitconf_t const *xecfg, xdemitcb_t *ecb) {
xdchange_t *xscr;
@@ -1027,7 +1078,10 @@ int xdl_diff(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
}
if (xscr) {
if (xpp->flags & XDF_IGNORE_BLANK_LINES)
- xdl_mark_ignorable(xscr, &xe, xpp->flags);
+ xdl_mark_ignorable_lines(xscr, &xe, xpp->flags);
+
+ if (xpp->ignore_regex)
+ xdl_mark_ignorable_regex(xscr, &xe, xpp);
if (ef(&xe, xscr, ecb, xecfg) < 0) {
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 30713ae9a9..9d7d6c5087 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -172,10 +172,12 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
struct func_line func_line = { 0 };
for (xch = xscr; xch; xch = xche->next) {
+ xdchange_t *xchp = xch;
xche = xdl_get_hunk(&xch, xecfg);
if (!xch)
break;
+pre_context_calculation:
s1 = XDL_MAX(xch->i1 - xecfg->ctxlen, 0);
s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
@@ -212,6 +214,21 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
if (fs1 < s1) {
s2 = XDL_MAX(s2 - (s1 - fs1), 0);
s1 = fs1;
+
+ /*
+ * Did we extend context upwards into an
+ * ignored change?
+ */
+ while (xchp != xch &&
+ xchp->i1 + xchp->chg1 <= s1 &&
+ xchp->i2 + xchp->chg2 <= s2)
+ xchp = xchp->next;
+
+ /* If so, show it after all. */
+ if (xchp != xch) {
+ xch = xchp;
+ goto pre_context_calculation;
+ }
}
}
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index c7b35a9667..e694bfd9e3 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -235,6 +235,8 @@ static int fall_back_to_classic_diff(xpparam_t const *xpp, xdfenv_t *env,
int line1, int count1, int line2, int count2)
{
xpparam_t xpparam;
+
+ memset(&xpparam, 0, sizeof(xpparam));
xpparam.flags = xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
return xdl_fall_back_diff(env, &xpparam,
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index 3c5601b602..c5d48e80ae 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -90,7 +90,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
{
xrecord_t **records = pass == 1 ?
map->env->xdf1.recs : map->env->xdf2.recs;
- xrecord_t *record = records[line - 1], *other;
+ xrecord_t *record = records[line - 1];
/*
* After xdl_prepare_env() (or more precisely, due to
* xdl_classify_record()), the "ha" member of the records (AKA lines)
@@ -104,11 +104,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
int index = (int)((record->ha << 1) % map->alloc);
while (map->entries[index].line1) {
- other = map->env->xdf1.recs[map->entries[index].line1 - 1];
- if (map->entries[index].hash != record->ha ||
- !xdl_recmatch(record->ptr, record->size,
- other->ptr, other->size,
- map->xpp->flags)) {
+ if (map->entries[index].hash != record->ha) {
if (++index >= map->alloc)
index = 0;
continue;
@@ -253,8 +249,7 @@ static int match(struct hashmap *map, int line1, int line2)
{
xrecord_t *record1 = map->env->xdf1.recs[line1 - 1];
xrecord_t *record2 = map->env->xdf2.recs[line2 - 1];
- return xdl_recmatch(record1->ptr, record1->size,
- record2->ptr, record2->size, map->xpp->flags);
+ return record1->ha == record2->ha;
}
static int patience_diff(mmfile_t *file1, mmfile_t *file2,
@@ -289,9 +284,6 @@ static int walk_common_sequence(struct hashmap *map, struct entry *first,
/* Recurse */
if (next1 > line1 || next2 > line2) {
- struct hashmap submap;
-
- memset(&submap, 0, sizeof(submap));
if (patience_diff(map->file1, map->file2,
map->xpp, map->env,
line1, next1 - line1,
@@ -318,6 +310,8 @@ static int fall_back_to_classic_diff(struct hashmap *map,
int line1, int count1, int line2, int count2)
{
xpparam_t xpp;
+
+ memset(&xpp, 0, sizeof(xpp));
xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
return xdl_fall_back_diff(map->env, &xpp,