summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Linus Torvalds <torvalds@osdl.org>2006-03-12 22:26:34 -0800
committerLibravatar Junio C Hamano <junkio@cox.net>2006-03-12 23:02:00 -0800
commit90bd932c811f4ecd1d8cbceffdf6a69a5ca838b7 (patch)
treee216ba53a838787fa7931a23a0b335cb709aaba1
parentdiffcore-delta: tweak hashbase value. (diff)
downloadtgif-90bd932c811f4ecd1d8cbceffdf6a69a5ca838b7.tar.xz
Fix up diffcore-rename scoring
The "score" calculation for diffcore-rename was totally broken. It scaled "score" as score = src_copied * MAX_SCORE / dst->size; which means that you got a 100% similarity score even if src and dest were different, if just every byte of dst was copied from src, even if source was much larger than dst (eg we had copied 85% of the bytes, but _deleted_ the remaining 15%). That's clearly bogus. We should do the score calculation relative not to the destination size, but to the max size of the two. This seems to fix it. Signed-off-by: Linus Torvalds <torvalds@osdl.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
-rw-r--r--diffcore-rename.c12
1 files changed, 5 insertions, 7 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c
index ed99fe2cc0..e992698720 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -133,7 +133,7 @@ static int estimate_similarity(struct diff_filespec *src,
* match than anything else; the destination does not even
* call into this function in that case.
*/
- unsigned long delta_size, base_size, src_copied, literal_added;
+ unsigned long max_size, delta_size, base_size, src_copied, literal_added;
unsigned long delta_limit;
int score;
@@ -144,9 +144,9 @@ static int estimate_similarity(struct diff_filespec *src,
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0;
- delta_size = ((src->size < dst->size) ?
- (dst->size - src->size) : (src->size - dst->size));
+ max_size = ((src->size > dst->size) ? src->size : dst->size);
base_size = ((src->size < dst->size) ? src->size : dst->size);
+ delta_size = max_size - base_size;
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
@@ -174,12 +174,10 @@ static int estimate_similarity(struct diff_filespec *src,
/* How similar are they?
* what percentage of material in dst are from source?
*/
- if (dst->size < src_copied)
- score = MAX_SCORE;
- else if (!dst->size)
+ if (!dst->size)
score = 0; /* should not happen */
else
- score = src_copied * MAX_SCORE / dst->size;
+ score = src_copied * MAX_SCORE / max_size;
return score;
}