summaryrefslogtreecommitdiff
path: root/diffcore-delta.c
blob: d03787be65be82b0fcd6dc62dae77013b2576c80 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "cache.h"
#include "diff.h"
#include "diffcore.h"

struct linehash {
	unsigned long bytes;
	unsigned long hash;
};

static unsigned long hash_extended_line(const unsigned char **buf_p,
					unsigned long left)
{
	/* An extended line is zero or more whitespace letters (including LF)
	 * followed by one non whitespace letter followed by zero or more
	 * non LF, and terminated with by a LF (or EOF).
	 */
	const unsigned char *bol = *buf_p;
	const unsigned char *buf = bol;
	unsigned long hashval = 0;
	while (left) {
		unsigned c = *buf++;
		if (!c)
			goto binary;
		left--;
		if (' ' < c) {
			hashval = c;
			break;
		}
	}
	while (left) {
		unsigned c = *buf++;
		if (!c)
			goto binary;
		left--;
		if (c == '\n')
			break;
		if (' ' < c)
			hashval = hashval * 11 + c;
	}
	*buf_p = buf;
	return hashval;

 binary:
	*buf_p = NULL;
	return 0;
}

static int linehash_compare(const void *a_, const void *b_)
{
	struct linehash *a = (struct linehash *) a_;
	struct linehash *b = (struct linehash *) b_;
	if (a->hash < b->hash) return -1;
	if (a->hash > b->hash) return 1;
	return 0;
}

static struct linehash *hash_lines(const unsigned char *buf,
				   unsigned long size)
{
	const unsigned char *eobuf = buf + size;
	struct linehash *line = NULL;
	int alloc = 0, used = 0;

	while (buf < eobuf) {
		const unsigned char *ptr = buf;
		unsigned long hash = hash_extended_line(&buf, eobuf-ptr);
		if (!buf) {
			free(line);
			return NULL;
		}
		if (alloc <= used) {
			alloc = alloc_nr(alloc);
			line = xrealloc(line, sizeof(*line) * alloc);
		}
		line[used].bytes = buf - ptr;
		line[used].hash = hash;
		used++;
	}
	qsort(line, used, sizeof(*line), linehash_compare);

	/* Terminate the list */
	if (alloc <= used)
		line = xrealloc(line, sizeof(*line) * (used+1));
	line[used].bytes = line[used].hash = 0;
	return line;
}

int diffcore_count_changes(void *src, unsigned long src_size,
			   void *dst, unsigned long dst_size,
			   unsigned long delta_limit,
			   unsigned long *src_copied,
			   unsigned long *literal_added)
{
	struct linehash *src_lines, *dst_lines;
	unsigned long sc, la;

	src_lines = hash_lines(src, src_size);
	if (!src_lines)
		return -1;
	dst_lines = hash_lines(dst, dst_size);
	if (!dst_lines) {
		free(src_lines);
		return -1;
	}
	sc = la = 0;
	while (src_lines->bytes && dst_lines->bytes) {
		int cmp = linehash_compare(src_lines, dst_lines);
		if (!cmp) {
			sc += src_lines->bytes;
			src_lines++;
			dst_lines++;
			continue;
		}
		if (cmp < 0) {
			src_lines++;
			continue;
		}
		la += dst_lines->bytes;
		dst_lines++;
	}
	while (dst_lines->bytes) {
		la += dst_lines->bytes;
		dst_lines++;
	}
	*src_copied = sc;
	*literal_added = la;
	return 0;
}