diff options
-rw-r--r-- | mergesort.c | 121 | ||||
-rw-r--r-- | t/helper/test-mergesort.c | 368 | ||||
-rwxr-xr-x | t/perf/p0071-sort.sh | 40 | ||||
-rwxr-xr-x | t/t0071-sort.sh | 11 |
4 files changed, 473 insertions, 67 deletions
diff --git a/mergesort.c b/mergesort.c index e5fdf2ee4a..6216835566 100644 --- a/mergesort.c +++ b/mergesort.c @@ -1,73 +1,84 @@ #include "cache.h" #include "mergesort.h" -struct mergesort_sublist { - void *ptr; - unsigned long len; -}; - -static void *get_nth_next(void *list, unsigned long n, - void *(*get_next_fn)(const void *)) +/* Combine two sorted lists. Take from `list` on equality. */ +static void *llist_merge(void *list, void *other, + void *(*get_next_fn)(const void *), + void (*set_next_fn)(void *, void *), + int (*compare_fn)(const void *, const void *)) { - while (n-- && list) - list = get_next_fn(list); - return list; -} + void *result = list, *tail; -static void *pop_item(struct mergesort_sublist *l, - void *(*get_next_fn)(const void *)) -{ - void *p = l->ptr; - l->ptr = get_next_fn(l->ptr); - l->len = l->ptr ? (l->len - 1) : 0; - return p; + if (compare_fn(list, other) > 0) { + result = other; + goto other; + } + for (;;) { + do { + tail = list; + list = get_next_fn(list); + if (!list) { + set_next_fn(tail, other); + return result; + } + } while (compare_fn(list, other) <= 0); + set_next_fn(tail, other); + other: + do { + tail = other; + other = get_next_fn(other); + if (!other) { + set_next_fn(tail, list); + return result; + } + } while (compare_fn(list, other) > 0); + set_next_fn(tail, list); + } } +/* + * Perform an iterative mergesort using an array of sublists. + * + * n is the number of items. + * ranks[i] is undefined if n & 2^i == 0, and assumed empty. + * ranks[i] contains a sublist of length 2^i otherwise. + * + * The number of bits in a void pointer limits the number of objects + * that can be created, and thus the number of array elements necessary + * to be able to sort any valid list. + * + * Adding an item to this array is like incrementing a binary number; + * positional values for set bits correspond to sublist lengths. + */ void *llist_mergesort(void *list, void *(*get_next_fn)(const void *), void (*set_next_fn)(void *, void *), int (*compare_fn)(const void *, const void *)) { - unsigned long l; - - if (!list) - return NULL; - for (l = 1; ; l *= 2) { - void *curr; - struct mergesort_sublist p, q; + void *ranks[bitsizeof(void *)]; + size_t n = 0; + int i; - p.ptr = list; - q.ptr = get_nth_next(p.ptr, l, get_next_fn); - if (!q.ptr) - break; - p.len = q.len = l; + while (list) { + void *next = get_next_fn(list); + if (next) + set_next_fn(list, NULL); + for (i = 0; n & (1 << i); i++) + list = llist_merge(ranks[i], list, get_next_fn, + set_next_fn, compare_fn); + n++; + ranks[i] = list; + list = next; + } - if (compare_fn(p.ptr, q.ptr) > 0) - list = curr = pop_item(&q, get_next_fn); + for (i = 0; n; i++, n >>= 1) { + if (!(n & 1)) + continue; + if (list) + list = llist_merge(ranks[i], list, get_next_fn, + set_next_fn, compare_fn); else - list = curr = pop_item(&p, get_next_fn); - - while (p.ptr) { - while (p.len || q.len) { - void *prev = curr; - - if (!p.len) - curr = pop_item(&q, get_next_fn); - else if (!q.len) - curr = pop_item(&p, get_next_fn); - else if (compare_fn(p.ptr, q.ptr) > 0) - curr = pop_item(&q, get_next_fn); - else - curr = pop_item(&p, get_next_fn); - set_next_fn(prev, curr); - } - p.ptr = q.ptr; - p.len = l; - q.ptr = get_nth_next(p.ptr, l, get_next_fn); - q.len = q.ptr ? l : 0; - - } - set_next_fn(curr, NULL); + list = ranks[i]; } return list; } diff --git a/t/helper/test-mergesort.c b/t/helper/test-mergesort.c index c5cffaa4b7..ebf68f7de8 100644 --- a/t/helper/test-mergesort.c +++ b/t/helper/test-mergesort.c @@ -2,6 +2,12 @@ #include "cache.h" #include "mergesort.h" +static uint32_t minstd_rand(uint32_t *state) +{ + *state = (uint64_t)*state * 48271 % 2147483647; + return *state; +} + struct line { char *text; struct line *next; @@ -23,14 +29,12 @@ static int compare_strings(const void *a, const void *b) return strcmp(x->text, y->text); } -int cmd__mergesort(int argc, const char **argv) +static int sort_stdin(void) { struct line *line, *p = NULL, *lines = NULL; struct strbuf sb = STRBUF_INIT; - for (;;) { - if (strbuf_getwholeline(&sb, stdin, '\n')) - break; + while (!strbuf_getline(&sb, stdin)) { line = xmalloc(sizeof(struct line)); line->text = strbuf_detach(&sb, NULL); if (p) { @@ -46,8 +50,362 @@ int cmd__mergesort(int argc, const char **argv) lines = llist_mergesort(lines, get_next, set_next, compare_strings); while (lines) { - printf("%s", lines->text); + puts(lines->text); lines = lines->next; } return 0; } + +static void dist_sawtooth(int *arr, int n, int m) +{ + int i; + for (i = 0; i < n; i++) + arr[i] = i % m; +} + +static void dist_rand(int *arr, int n, int m) +{ + int i; + uint32_t seed = 1; + for (i = 0; i < n; i++) + arr[i] = minstd_rand(&seed) % m; +} + +static void dist_stagger(int *arr, int n, int m) +{ + int i; + for (i = 0; i < n; i++) + arr[i] = (i * m + i) % n; +} + +static void dist_plateau(int *arr, int n, int m) +{ + int i; + for (i = 0; i < n; i++) + arr[i] = (i < m) ? i : m; +} + +static void dist_shuffle(int *arr, int n, int m) +{ + int i, j, k; + uint32_t seed = 1; + for (i = j = 0, k = 1; i < n; i++) + arr[i] = minstd_rand(&seed) % m ? (j += 2) : (k += 2); +} + +#define DIST(name) { #name, dist_##name } + +static struct dist { + const char *name; + void (*fn)(int *arr, int n, int m); +} dist[] = { + DIST(sawtooth), + DIST(rand), + DIST(stagger), + DIST(plateau), + DIST(shuffle), +}; + +static const struct dist *get_dist_by_name(const char *name) +{ + int i; + for (i = 0; i < ARRAY_SIZE(dist); i++) { + if (!strcmp(dist[i].name, name)) + return &dist[i]; + } + return NULL; +} + +static void mode_copy(int *arr, int n) +{ + /* nothing */ +} + +static void mode_reverse(int *arr, int n) +{ + int i, j; + for (i = 0, j = n - 1; i < j; i++, j--) + SWAP(arr[i], arr[j]); +} + +static void mode_reverse_1st_half(int *arr, int n) +{ + mode_reverse(arr, n / 2); +} + +static void mode_reverse_2nd_half(int *arr, int n) +{ + int half = n / 2; + mode_reverse(arr + half, n - half); +} + +static int compare_ints(const void *av, const void *bv) +{ + const int *ap = av, *bp = bv; + int a = *ap, b = *bp; + return (a > b) - (a < b); +} + +static void mode_sort(int *arr, int n) +{ + QSORT(arr, n, compare_ints); +} + +static void mode_dither(int *arr, int n) +{ + int i; + for (i = 0; i < n; i++) + arr[i] += i % 5; +} + +static void unriffle(int *arr, int n, int *tmp) +{ + int i, j; + COPY_ARRAY(tmp, arr, n); + for (i = j = 0; i < n; i += 2) + arr[j++] = tmp[i]; + for (i = 1; i < n; i += 2) + arr[j++] = tmp[i]; +} + +static void unriffle_recursively(int *arr, int n, int *tmp) +{ + if (n > 1) { + int half = n / 2; + unriffle(arr, n, tmp); + unriffle_recursively(arr, half, tmp); + unriffle_recursively(arr + half, n - half, tmp); + } +} + +static void mode_unriffle(int *arr, int n) +{ + int *tmp; + ALLOC_ARRAY(tmp, n); + unriffle_recursively(arr, n, tmp); + free(tmp); +} + +static unsigned int prev_pow2(unsigned int n) +{ + unsigned int pow2 = 1; + while (pow2 * 2 < n) + pow2 *= 2; + return pow2; +} + +static void unriffle_recursively_skewed(int *arr, int n, int *tmp) +{ + if (n > 1) { + int pow2 = prev_pow2(n); + int rest = n - pow2; + unriffle(arr + pow2 - rest, rest * 2, tmp); + unriffle_recursively_skewed(arr, pow2, tmp); + unriffle_recursively_skewed(arr + pow2, rest, tmp); + } +} + +static void mode_unriffle_skewed(int *arr, int n) +{ + int *tmp; + ALLOC_ARRAY(tmp, n); + unriffle_recursively_skewed(arr, n, tmp); + free(tmp); +} + +#define MODE(name) { #name, mode_##name } + +static struct mode { + const char *name; + void (*fn)(int *arr, int n); +} mode[] = { + MODE(copy), + MODE(reverse), + MODE(reverse_1st_half), + MODE(reverse_2nd_half), + MODE(sort), + MODE(dither), + MODE(unriffle), + MODE(unriffle_skewed), +}; + +static const struct mode *get_mode_by_name(const char *name) +{ + int i; + for (i = 0; i < ARRAY_SIZE(mode); i++) { + if (!strcmp(mode[i].name, name)) + return &mode[i]; + } + return NULL; +} + +static int generate(int argc, const char **argv) +{ + const struct dist *dist = NULL; + const struct mode *mode = NULL; + int i, n, m, *arr; + + if (argc != 4) + return 1; + + dist = get_dist_by_name(argv[0]); + mode = get_mode_by_name(argv[1]); + n = strtol(argv[2], NULL, 10); + m = strtol(argv[3], NULL, 10); + if (!dist || !mode) + return 1; + + ALLOC_ARRAY(arr, n); + dist->fn(arr, n, m); + mode->fn(arr, n); + for (i = 0; i < n; i++) + printf("%08x\n", arr[i]); + free(arr); + return 0; +} + +static struct stats { + int get_next, set_next, compare; +} stats; + +struct number { + int value, rank; + struct number *next; +}; + +static void *get_next_number(const void *a) +{ + stats.get_next++; + return ((const struct number *)a)->next; +} + +static void set_next_number(void *a, void *b) +{ + stats.set_next++; + ((struct number *)a)->next = b; +} + +static int compare_numbers(const void *av, const void *bv) +{ + const struct number *an = av, *bn = bv; + int a = an->value, b = bn->value; + stats.compare++; + return (a > b) - (a < b); +} + +static void clear_numbers(struct number *list) +{ + while (list) { + struct number *next = list->next; + free(list); + list = next; + } +} + +static int test(const struct dist *dist, const struct mode *mode, int n, int m) +{ + int *arr; + size_t i; + struct number *curr, *list, **tail; + int is_sorted = 1; + int is_stable = 1; + const char *verdict; + int result = -1; + + ALLOC_ARRAY(arr, n); + dist->fn(arr, n, m); + mode->fn(arr, n); + for (i = 0, tail = &list; i < n; i++) { + curr = xmalloc(sizeof(*curr)); + curr->value = arr[i]; + curr->rank = i; + *tail = curr; + tail = &curr->next; + } + *tail = NULL; + + stats.get_next = stats.set_next = stats.compare = 0; + list = llist_mergesort(list, get_next_number, set_next_number, + compare_numbers); + + QSORT(arr, n, compare_ints); + for (i = 0, curr = list; i < n && curr; i++, curr = curr->next) { + if (arr[i] != curr->value) + is_sorted = 0; + if (curr->next && curr->value == curr->next->value && + curr->rank >= curr->next->rank) + is_stable = 0; + } + if (i < n) { + verdict = "too short"; + } else if (curr) { + verdict = "too long"; + } else if (!is_sorted) { + verdict = "not sorted"; + } else if (!is_stable) { + verdict = "unstable"; + } else { + verdict = "OK"; + result = 0; + } + + printf("%-9s %-16s %8d %8d %8d %8d %8d %s\n", + dist->name, mode->name, n, m, stats.get_next, stats.set_next, + stats.compare, verdict); + + clear_numbers(list); + free(arr); + + return result; +} + +/* + * A version of the qsort certification program from "Engineering a Sort + * Function" by Bentley and McIlroy, Software—Practice and Experience, + * Volume 23, Issue 11, 1249–1265 (November 1993). + */ +static int run_tests(int argc, const char **argv) +{ + const char *argv_default[] = { "100", "1023", "1024", "1025" }; + if (!argc) + return run_tests(ARRAY_SIZE(argv_default), argv_default); + printf("%-9s %-16s %8s %8s %8s %8s %8s %s\n", + "distribut", "mode", "n", "m", "get_next", "set_next", + "compare", "verdict"); + while (argc--) { + int i, j, m, n = strtol(*argv++, NULL, 10); + for (i = 0; i < ARRAY_SIZE(dist); i++) { + for (j = 0; j < ARRAY_SIZE(mode); j++) { + for (m = 1; m < 2 * n; m *= 2) { + if (test(&dist[i], &mode[j], n, m)) + return 1; + } + } + } + } + return 0; +} + +int cmd__mergesort(int argc, const char **argv) +{ + int i; + const char *sep; + + if (argc == 6 && !strcmp(argv[1], "generate")) + return generate(argc - 2, argv + 2); + if (argc == 2 && !strcmp(argv[1], "sort")) + return sort_stdin(); + if (argc > 1 && !strcmp(argv[1], "test")) + return run_tests(argc - 2, argv + 2); + fprintf(stderr, "usage: test-tool mergesort generate <distribution> <mode> <n> <m>\n"); + fprintf(stderr, " or: test-tool mergesort sort\n"); + fprintf(stderr, " or: test-tool mergesort test [<n>...]\n"); + fprintf(stderr, "\n"); + for (i = 0, sep = "distributions: "; i < ARRAY_SIZE(dist); i++, sep = ", ") + fprintf(stderr, "%s%s", sep, dist[i].name); + fprintf(stderr, "\n"); + for (i = 0, sep = "modes: "; i < ARRAY_SIZE(mode); i++, sep = ", ") + fprintf(stderr, "%s%s", sep, mode[i].name); + fprintf(stderr, "\n"); + return 129; +} diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh index 6e924f5fa3..ed366e2e12 100755 --- a/t/perf/p0071-sort.sh +++ b/t/perf/p0071-sort.sh @@ -11,16 +11,42 @@ test_expect_success 'setup' ' git cat-file --batch >unsorted ' -test_perf 'sort(1)' ' - sort <unsorted >expect +test_perf 'sort(1) unsorted' ' + sort <unsorted >sorted ' -test_perf 'string_list_sort()' ' - test-tool string-list sort <unsorted >actual +test_expect_success 'reverse' ' + sort -r <unsorted >reversed ' -test_expect_success 'string_list_sort() sorts like sort(1)' ' - test_cmp_bin expect actual -' +for file in sorted reversed +do + test_perf "sort(1) $file" " + sort <$file >actual + " +done + +for file in unsorted sorted reversed +do + + test_perf "string_list_sort() $file" " + test-tool string-list sort <$file >actual + " + + test_expect_success "string_list_sort() $file sorts like sort(1)" " + test_cmp_bin sorted actual + " +done + +for file in unsorted sorted reversed +do + test_perf "llist_mergesort() $file" " + test-tool mergesort sort <$file >actual + " + + test_expect_success "llist_mergesort() $file sorts like sort(1)" " + test_cmp_bin sorted actual + " +done test_done diff --git a/t/t0071-sort.sh b/t/t0071-sort.sh new file mode 100755 index 0000000000..a8ab174879 --- /dev/null +++ b/t/t0071-sort.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +test_description='verify sort functions' + +. ./test-lib.sh + +test_expect_success 'llist_mergesort()' ' + test-tool mergesort test +' + +test_done |