summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--mergesort.c121
-rw-r--r--t/helper/test-mergesort.c368
-rwxr-xr-xt/perf/p0071-sort.sh40
-rwxr-xr-xt/t0071-sort.sh11
4 files changed, 473 insertions, 67 deletions
diff --git a/mergesort.c b/mergesort.c
index e5fdf2ee4a..6216835566 100644
--- a/mergesort.c
+++ b/mergesort.c
@@ -1,73 +1,84 @@
#include "cache.h"
#include "mergesort.h"
-struct mergesort_sublist {
- void *ptr;
- unsigned long len;
-};
-
-static void *get_nth_next(void *list, unsigned long n,
- void *(*get_next_fn)(const void *))
+/* Combine two sorted lists. Take from `list` on equality. */
+static void *llist_merge(void *list, void *other,
+ void *(*get_next_fn)(const void *),
+ void (*set_next_fn)(void *, void *),
+ int (*compare_fn)(const void *, const void *))
{
- while (n-- && list)
- list = get_next_fn(list);
- return list;
-}
+ void *result = list, *tail;
-static void *pop_item(struct mergesort_sublist *l,
- void *(*get_next_fn)(const void *))
-{
- void *p = l->ptr;
- l->ptr = get_next_fn(l->ptr);
- l->len = l->ptr ? (l->len - 1) : 0;
- return p;
+ if (compare_fn(list, other) > 0) {
+ result = other;
+ goto other;
+ }
+ for (;;) {
+ do {
+ tail = list;
+ list = get_next_fn(list);
+ if (!list) {
+ set_next_fn(tail, other);
+ return result;
+ }
+ } while (compare_fn(list, other) <= 0);
+ set_next_fn(tail, other);
+ other:
+ do {
+ tail = other;
+ other = get_next_fn(other);
+ if (!other) {
+ set_next_fn(tail, list);
+ return result;
+ }
+ } while (compare_fn(list, other) > 0);
+ set_next_fn(tail, list);
+ }
}
+/*
+ * Perform an iterative mergesort using an array of sublists.
+ *
+ * n is the number of items.
+ * ranks[i] is undefined if n & 2^i == 0, and assumed empty.
+ * ranks[i] contains a sublist of length 2^i otherwise.
+ *
+ * The number of bits in a void pointer limits the number of objects
+ * that can be created, and thus the number of array elements necessary
+ * to be able to sort any valid list.
+ *
+ * Adding an item to this array is like incrementing a binary number;
+ * positional values for set bits correspond to sublist lengths.
+ */
void *llist_mergesort(void *list,
void *(*get_next_fn)(const void *),
void (*set_next_fn)(void *, void *),
int (*compare_fn)(const void *, const void *))
{
- unsigned long l;
-
- if (!list)
- return NULL;
- for (l = 1; ; l *= 2) {
- void *curr;
- struct mergesort_sublist p, q;
+ void *ranks[bitsizeof(void *)];
+ size_t n = 0;
+ int i;
- p.ptr = list;
- q.ptr = get_nth_next(p.ptr, l, get_next_fn);
- if (!q.ptr)
- break;
- p.len = q.len = l;
+ while (list) {
+ void *next = get_next_fn(list);
+ if (next)
+ set_next_fn(list, NULL);
+ for (i = 0; n & (1 << i); i++)
+ list = llist_merge(ranks[i], list, get_next_fn,
+ set_next_fn, compare_fn);
+ n++;
+ ranks[i] = list;
+ list = next;
+ }
- if (compare_fn(p.ptr, q.ptr) > 0)
- list = curr = pop_item(&q, get_next_fn);
+ for (i = 0; n; i++, n >>= 1) {
+ if (!(n & 1))
+ continue;
+ if (list)
+ list = llist_merge(ranks[i], list, get_next_fn,
+ set_next_fn, compare_fn);
else
- list = curr = pop_item(&p, get_next_fn);
-
- while (p.ptr) {
- while (p.len || q.len) {
- void *prev = curr;
-
- if (!p.len)
- curr = pop_item(&q, get_next_fn);
- else if (!q.len)
- curr = pop_item(&p, get_next_fn);
- else if (compare_fn(p.ptr, q.ptr) > 0)
- curr = pop_item(&q, get_next_fn);
- else
- curr = pop_item(&p, get_next_fn);
- set_next_fn(prev, curr);
- }
- p.ptr = q.ptr;
- p.len = l;
- q.ptr = get_nth_next(p.ptr, l, get_next_fn);
- q.len = q.ptr ? l : 0;
-
- }
- set_next_fn(curr, NULL);
+ list = ranks[i];
}
return list;
}
diff --git a/t/helper/test-mergesort.c b/t/helper/test-mergesort.c
index c5cffaa4b7..ebf68f7de8 100644
--- a/t/helper/test-mergesort.c
+++ b/t/helper/test-mergesort.c
@@ -2,6 +2,12 @@
#include "cache.h"
#include "mergesort.h"
+static uint32_t minstd_rand(uint32_t *state)
+{
+ *state = (uint64_t)*state * 48271 % 2147483647;
+ return *state;
+}
+
struct line {
char *text;
struct line *next;
@@ -23,14 +29,12 @@ static int compare_strings(const void *a, const void *b)
return strcmp(x->text, y->text);
}
-int cmd__mergesort(int argc, const char **argv)
+static int sort_stdin(void)
{
struct line *line, *p = NULL, *lines = NULL;
struct strbuf sb = STRBUF_INIT;
- for (;;) {
- if (strbuf_getwholeline(&sb, stdin, '\n'))
- break;
+ while (!strbuf_getline(&sb, stdin)) {
line = xmalloc(sizeof(struct line));
line->text = strbuf_detach(&sb, NULL);
if (p) {
@@ -46,8 +50,362 @@ int cmd__mergesort(int argc, const char **argv)
lines = llist_mergesort(lines, get_next, set_next, compare_strings);
while (lines) {
- printf("%s", lines->text);
+ puts(lines->text);
lines = lines->next;
}
return 0;
}
+
+static void dist_sawtooth(int *arr, int n, int m)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ arr[i] = i % m;
+}
+
+static void dist_rand(int *arr, int n, int m)
+{
+ int i;
+ uint32_t seed = 1;
+ for (i = 0; i < n; i++)
+ arr[i] = minstd_rand(&seed) % m;
+}
+
+static void dist_stagger(int *arr, int n, int m)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ arr[i] = (i * m + i) % n;
+}
+
+static void dist_plateau(int *arr, int n, int m)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ arr[i] = (i < m) ? i : m;
+}
+
+static void dist_shuffle(int *arr, int n, int m)
+{
+ int i, j, k;
+ uint32_t seed = 1;
+ for (i = j = 0, k = 1; i < n; i++)
+ arr[i] = minstd_rand(&seed) % m ? (j += 2) : (k += 2);
+}
+
+#define DIST(name) { #name, dist_##name }
+
+static struct dist {
+ const char *name;
+ void (*fn)(int *arr, int n, int m);
+} dist[] = {
+ DIST(sawtooth),
+ DIST(rand),
+ DIST(stagger),
+ DIST(plateau),
+ DIST(shuffle),
+};
+
+static const struct dist *get_dist_by_name(const char *name)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(dist); i++) {
+ if (!strcmp(dist[i].name, name))
+ return &dist[i];
+ }
+ return NULL;
+}
+
+static void mode_copy(int *arr, int n)
+{
+ /* nothing */
+}
+
+static void mode_reverse(int *arr, int n)
+{
+ int i, j;
+ for (i = 0, j = n - 1; i < j; i++, j--)
+ SWAP(arr[i], arr[j]);
+}
+
+static void mode_reverse_1st_half(int *arr, int n)
+{
+ mode_reverse(arr, n / 2);
+}
+
+static void mode_reverse_2nd_half(int *arr, int n)
+{
+ int half = n / 2;
+ mode_reverse(arr + half, n - half);
+}
+
+static int compare_ints(const void *av, const void *bv)
+{
+ const int *ap = av, *bp = bv;
+ int a = *ap, b = *bp;
+ return (a > b) - (a < b);
+}
+
+static void mode_sort(int *arr, int n)
+{
+ QSORT(arr, n, compare_ints);
+}
+
+static void mode_dither(int *arr, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ arr[i] += i % 5;
+}
+
+static void unriffle(int *arr, int n, int *tmp)
+{
+ int i, j;
+ COPY_ARRAY(tmp, arr, n);
+ for (i = j = 0; i < n; i += 2)
+ arr[j++] = tmp[i];
+ for (i = 1; i < n; i += 2)
+ arr[j++] = tmp[i];
+}
+
+static void unriffle_recursively(int *arr, int n, int *tmp)
+{
+ if (n > 1) {
+ int half = n / 2;
+ unriffle(arr, n, tmp);
+ unriffle_recursively(arr, half, tmp);
+ unriffle_recursively(arr + half, n - half, tmp);
+ }
+}
+
+static void mode_unriffle(int *arr, int n)
+{
+ int *tmp;
+ ALLOC_ARRAY(tmp, n);
+ unriffle_recursively(arr, n, tmp);
+ free(tmp);
+}
+
+static unsigned int prev_pow2(unsigned int n)
+{
+ unsigned int pow2 = 1;
+ while (pow2 * 2 < n)
+ pow2 *= 2;
+ return pow2;
+}
+
+static void unriffle_recursively_skewed(int *arr, int n, int *tmp)
+{
+ if (n > 1) {
+ int pow2 = prev_pow2(n);
+ int rest = n - pow2;
+ unriffle(arr + pow2 - rest, rest * 2, tmp);
+ unriffle_recursively_skewed(arr, pow2, tmp);
+ unriffle_recursively_skewed(arr + pow2, rest, tmp);
+ }
+}
+
+static void mode_unriffle_skewed(int *arr, int n)
+{
+ int *tmp;
+ ALLOC_ARRAY(tmp, n);
+ unriffle_recursively_skewed(arr, n, tmp);
+ free(tmp);
+}
+
+#define MODE(name) { #name, mode_##name }
+
+static struct mode {
+ const char *name;
+ void (*fn)(int *arr, int n);
+} mode[] = {
+ MODE(copy),
+ MODE(reverse),
+ MODE(reverse_1st_half),
+ MODE(reverse_2nd_half),
+ MODE(sort),
+ MODE(dither),
+ MODE(unriffle),
+ MODE(unriffle_skewed),
+};
+
+static const struct mode *get_mode_by_name(const char *name)
+{
+ int i;
+ for (i = 0; i < ARRAY_SIZE(mode); i++) {
+ if (!strcmp(mode[i].name, name))
+ return &mode[i];
+ }
+ return NULL;
+}
+
+static int generate(int argc, const char **argv)
+{
+ const struct dist *dist = NULL;
+ const struct mode *mode = NULL;
+ int i, n, m, *arr;
+
+ if (argc != 4)
+ return 1;
+
+ dist = get_dist_by_name(argv[0]);
+ mode = get_mode_by_name(argv[1]);
+ n = strtol(argv[2], NULL, 10);
+ m = strtol(argv[3], NULL, 10);
+ if (!dist || !mode)
+ return 1;
+
+ ALLOC_ARRAY(arr, n);
+ dist->fn(arr, n, m);
+ mode->fn(arr, n);
+ for (i = 0; i < n; i++)
+ printf("%08x\n", arr[i]);
+ free(arr);
+ return 0;
+}
+
+static struct stats {
+ int get_next, set_next, compare;
+} stats;
+
+struct number {
+ int value, rank;
+ struct number *next;
+};
+
+static void *get_next_number(const void *a)
+{
+ stats.get_next++;
+ return ((const struct number *)a)->next;
+}
+
+static void set_next_number(void *a, void *b)
+{
+ stats.set_next++;
+ ((struct number *)a)->next = b;
+}
+
+static int compare_numbers(const void *av, const void *bv)
+{
+ const struct number *an = av, *bn = bv;
+ int a = an->value, b = bn->value;
+ stats.compare++;
+ return (a > b) - (a < b);
+}
+
+static void clear_numbers(struct number *list)
+{
+ while (list) {
+ struct number *next = list->next;
+ free(list);
+ list = next;
+ }
+}
+
+static int test(const struct dist *dist, const struct mode *mode, int n, int m)
+{
+ int *arr;
+ size_t i;
+ struct number *curr, *list, **tail;
+ int is_sorted = 1;
+ int is_stable = 1;
+ const char *verdict;
+ int result = -1;
+
+ ALLOC_ARRAY(arr, n);
+ dist->fn(arr, n, m);
+ mode->fn(arr, n);
+ for (i = 0, tail = &list; i < n; i++) {
+ curr = xmalloc(sizeof(*curr));
+ curr->value = arr[i];
+ curr->rank = i;
+ *tail = curr;
+ tail = &curr->next;
+ }
+ *tail = NULL;
+
+ stats.get_next = stats.set_next = stats.compare = 0;
+ list = llist_mergesort(list, get_next_number, set_next_number,
+ compare_numbers);
+
+ QSORT(arr, n, compare_ints);
+ for (i = 0, curr = list; i < n && curr; i++, curr = curr->next) {
+ if (arr[i] != curr->value)
+ is_sorted = 0;
+ if (curr->next && curr->value == curr->next->value &&
+ curr->rank >= curr->next->rank)
+ is_stable = 0;
+ }
+ if (i < n) {
+ verdict = "too short";
+ } else if (curr) {
+ verdict = "too long";
+ } else if (!is_sorted) {
+ verdict = "not sorted";
+ } else if (!is_stable) {
+ verdict = "unstable";
+ } else {
+ verdict = "OK";
+ result = 0;
+ }
+
+ printf("%-9s %-16s %8d %8d %8d %8d %8d %s\n",
+ dist->name, mode->name, n, m, stats.get_next, stats.set_next,
+ stats.compare, verdict);
+
+ clear_numbers(list);
+ free(arr);
+
+ return result;
+}
+
+/*
+ * A version of the qsort certification program from "Engineering a Sort
+ * Function" by Bentley and McIlroy, Software—Practice and Experience,
+ * Volume 23, Issue 11, 1249–1265 (November 1993).
+ */
+static int run_tests(int argc, const char **argv)
+{
+ const char *argv_default[] = { "100", "1023", "1024", "1025" };
+ if (!argc)
+ return run_tests(ARRAY_SIZE(argv_default), argv_default);
+ printf("%-9s %-16s %8s %8s %8s %8s %8s %s\n",
+ "distribut", "mode", "n", "m", "get_next", "set_next",
+ "compare", "verdict");
+ while (argc--) {
+ int i, j, m, n = strtol(*argv++, NULL, 10);
+ for (i = 0; i < ARRAY_SIZE(dist); i++) {
+ for (j = 0; j < ARRAY_SIZE(mode); j++) {
+ for (m = 1; m < 2 * n; m *= 2) {
+ if (test(&dist[i], &mode[j], n, m))
+ return 1;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int cmd__mergesort(int argc, const char **argv)
+{
+ int i;
+ const char *sep;
+
+ if (argc == 6 && !strcmp(argv[1], "generate"))
+ return generate(argc - 2, argv + 2);
+ if (argc == 2 && !strcmp(argv[1], "sort"))
+ return sort_stdin();
+ if (argc > 1 && !strcmp(argv[1], "test"))
+ return run_tests(argc - 2, argv + 2);
+ fprintf(stderr, "usage: test-tool mergesort generate <distribution> <mode> <n> <m>\n");
+ fprintf(stderr, " or: test-tool mergesort sort\n");
+ fprintf(stderr, " or: test-tool mergesort test [<n>...]\n");
+ fprintf(stderr, "\n");
+ for (i = 0, sep = "distributions: "; i < ARRAY_SIZE(dist); i++, sep = ", ")
+ fprintf(stderr, "%s%s", sep, dist[i].name);
+ fprintf(stderr, "\n");
+ for (i = 0, sep = "modes: "; i < ARRAY_SIZE(mode); i++, sep = ", ")
+ fprintf(stderr, "%s%s", sep, mode[i].name);
+ fprintf(stderr, "\n");
+ return 129;
+}
diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh
index 6e924f5fa3..ed366e2e12 100755
--- a/t/perf/p0071-sort.sh
+++ b/t/perf/p0071-sort.sh
@@ -11,16 +11,42 @@ test_expect_success 'setup' '
git cat-file --batch >unsorted
'
-test_perf 'sort(1)' '
- sort <unsorted >expect
+test_perf 'sort(1) unsorted' '
+ sort <unsorted >sorted
'
-test_perf 'string_list_sort()' '
- test-tool string-list sort <unsorted >actual
+test_expect_success 'reverse' '
+ sort -r <unsorted >reversed
'
-test_expect_success 'string_list_sort() sorts like sort(1)' '
- test_cmp_bin expect actual
-'
+for file in sorted reversed
+do
+ test_perf "sort(1) $file" "
+ sort <$file >actual
+ "
+done
+
+for file in unsorted sorted reversed
+do
+
+ test_perf "string_list_sort() $file" "
+ test-tool string-list sort <$file >actual
+ "
+
+ test_expect_success "string_list_sort() $file sorts like sort(1)" "
+ test_cmp_bin sorted actual
+ "
+done
+
+for file in unsorted sorted reversed
+do
+ test_perf "llist_mergesort() $file" "
+ test-tool mergesort sort <$file >actual
+ "
+
+ test_expect_success "llist_mergesort() $file sorts like sort(1)" "
+ test_cmp_bin sorted actual
+ "
+done
test_done
diff --git a/t/t0071-sort.sh b/t/t0071-sort.sh
new file mode 100755
index 0000000000..a8ab174879
--- /dev/null
+++ b/t/t0071-sort.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+test_description='verify sort functions'
+
+. ./test-lib.sh
+
+test_expect_success 'llist_mergesort()' '
+ test-tool mergesort test
+'
+
+test_done