From 04ee8b875b96c096c479132adca02fa734c3fa96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 22 Jan 2017 18:51:11 +0100 Subject: compat: add qsort_s() The function qsort_s() was introduced with C11 Annex K; it provides the ability to pass a context pointer to the comparison function, supports the convention of using a NULL pointer for an empty array and performs a few safety checks. Add an implementation based on compat/qsort.c for platforms that lack a native standards-compliant qsort_s() (i.e. basically everyone). It doesn't perform the full range of possible checks: It uses size_t instead of rsize_t and doesn't check nmemb and size against RSIZE_MAX because we probably don't have the restricted size type defined. For the same reason it returns int instead of errno_t. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Makefile | 8 +++++++ compat/qsort_s.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ git-compat-util.h | 6 +++++ 3 files changed, 83 insertions(+) create mode 100644 compat/qsort_s.c diff --git a/Makefile b/Makefile index f53fcc90d7..8c549cecf0 100644 --- a/Makefile +++ b/Makefile @@ -279,6 +279,9 @@ all:: # is a simplified version of the merge sort used in glibc. This is # recommended if Git triggers O(n^2) behavior in your platform's qsort(). # +# Define HAVE_ISO_QSORT_S if your platform provides a qsort_s() that's +# compatible with the one described in C11 Annex K. +# # Define UNRELIABLE_FSTAT if your system's fstat does not return the same # information on a not yet closed file that lstat would return for the same # file after it was closed. @@ -1423,6 +1426,11 @@ ifdef INTERNAL_QSORT COMPAT_CFLAGS += -DINTERNAL_QSORT COMPAT_OBJS += compat/qsort.o endif +ifdef HAVE_ISO_QSORT_S + COMPAT_CFLAGS += -DHAVE_ISO_QSORT_S +else + COMPAT_OBJS += compat/qsort_s.o +endif ifdef RUNTIME_PREFIX COMPAT_CFLAGS += -DRUNTIME_PREFIX endif diff --git a/compat/qsort_s.c b/compat/qsort_s.c new file mode 100644 index 0000000000..52d1f0a73d --- /dev/null +++ b/compat/qsort_s.c @@ -0,0 +1,69 @@ +#include "../git-compat-util.h" + +/* + * A merge sort implementation, simplified from the qsort implementation + * by Mike Haertel, which is a part of the GNU C Library. + * Added context pointer, safety checks and return value. + */ + +static void msort_with_tmp(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *, void *), + char *t, void *ctx) +{ + char *tmp; + char *b1, *b2; + size_t n1, n2; + + if (n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = (char *)b + (n1 * s); + + msort_with_tmp(b1, n1, s, cmp, t, ctx); + msort_with_tmp(b2, n2, s, cmp, t, ctx); + + tmp = t; + + while (n1 > 0 && n2 > 0) { + if (cmp(b1, b2, ctx) <= 0) { + memcpy(tmp, b1, s); + tmp += s; + b1 += s; + --n1; + } else { + memcpy(tmp, b2, s); + tmp += s; + b2 += s; + --n2; + } + } + if (n1 > 0) + memcpy(tmp, b1, n1 * s); + memcpy(b, t, (n - n2) * s); +} + +int git_qsort_s(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *, void *), void *ctx) +{ + const size_t size = st_mult(n, s); + char buf[1024]; + + if (!n) + return 0; + if (!b || !cmp) + return -1; + + if (size < sizeof(buf)) { + /* The temporary array fits on the small on-stack buffer. */ + msort_with_tmp(b, n, s, cmp, buf, ctx); + } else { + /* It's somewhat large, so malloc it. */ + char *tmp = xmalloc(size); + msort_with_tmp(b, n, s, cmp, tmp, ctx); + free(tmp); + } + return 0; +} diff --git a/git-compat-util.h b/git-compat-util.h index 87237b092b..f706744e6a 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -988,6 +988,12 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size, qsort(base, nmemb, size, compar); } +#ifndef HAVE_ISO_QSORT_S +int git_qsort_s(void *base, size_t nmemb, size_t size, + int (*compar)(const void *, const void *, void *), void *ctx); +#define qsort_s git_qsort_s +#endif + #ifndef REG_STARTEND #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd" #endif -- cgit v1.2.3 From 3ca869940994866796edf9e53e52d80f82c3c04c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 22 Jan 2017 18:52:13 +0100 Subject: add QSORT_S Add the macro QSORT_S, a convenient wrapper for qsort_s() that infers the size of the array elements and dies on error. Basically all possible errors are programming mistakes (passing NULL as base of a non-empty array, passing NULL as comparison function, out-of-bounds accesses), so terminating the program should be acceptable for most callers. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- git-compat-util.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index f706744e6a..f46d40e4a4 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -994,6 +994,11 @@ int git_qsort_s(void *base, size_t nmemb, size_t size, #define qsort_s git_qsort_s #endif +#define QSORT_S(base, n, compar, ctx) do { \ + if (qsort_s((base), (n), sizeof(*(base)), compar, ctx)) \ + die("BUG: qsort_s() failed"); \ +} while (0) + #ifndef REG_STARTEND #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd" #endif -- cgit v1.2.3 From 564e94e6190a1a41d8516eaf58b268785ed0a7a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 22 Jan 2017 18:53:57 +0100 Subject: perf: add basic sort performance test Add a sort command to test-string-list that reads lines from stdin, stores them in a string_list and then sorts it. Use it in a simple perf test script to measure the performance of string_list_sort(). Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/helper/test-string-list.c | 25 +++++++++++++++++++++++++ t/perf/p0071-sort.sh | 26 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100755 t/perf/p0071-sort.sh diff --git a/t/helper/test-string-list.c b/t/helper/test-string-list.c index 4a68967bd1..c502fa16d3 100644 --- a/t/helper/test-string-list.c +++ b/t/helper/test-string-list.c @@ -97,6 +97,31 @@ int cmd_main(int argc, const char **argv) return 0; } + if (argc == 2 && !strcmp(argv[1], "sort")) { + struct string_list list = STRING_LIST_INIT_NODUP; + struct strbuf sb = STRBUF_INIT; + struct string_list_item *item; + + strbuf_read(&sb, 0, 0); + + /* + * Split by newline, but don't create a string_list item + * for the empty string after the last separator. + */ + if (sb.buf[sb.len - 1] == '\n') + strbuf_setlen(&sb, sb.len - 1); + string_list_split_in_place(&list, sb.buf, '\n', -1); + + string_list_sort(&list); + + for_each_string_list_item(item, &list) + puts(item->string); + + string_list_clear(&list, 0); + strbuf_release(&sb); + return 0; + } + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh new file mode 100755 index 0000000000..7c9a35a646 --- /dev/null +++ b/t/perf/p0071-sort.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +test_description='Basic sort performance tests' +. ./perf-lib.sh + +test_perf_default_repo + +test_expect_success 'setup' ' + git ls-files --stage "*.[ch]" "*.sh" | + cut -f2 -d" " | + git cat-file --batch >unsorted +' + +test_perf 'sort(1)' ' + sort expect +' + +test_perf 'string_list_sort()' ' + test-string-list sort actual +' + +test_expect_success 'string_list_sort() sorts like sort(1)' ' + test_cmp_bin expect actual +' + +test_done -- cgit v1.2.3 From 5ebd9472a490b12ff941b8085b1b0932a755ffcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 22 Jan 2017 18:57:09 +0100 Subject: string-list: use QSORT_S in string_list_sort() Pass the comparison function to cmp_items() via the context parameter of qsort_s() instead of using a global variable. That allows calling string_list_sort() from multiple parallel threads. Our qsort_s() in compat/ is slightly slower than qsort(1) from glibc 2.24 for sorting lots of lines: Test HEAD^ HEAD --------------------------------------------------------------------- 0071.2: sort(1) 0.10(0.22+0.01) 0.09(0.21+0.00) -10.0% 0071.3: string_list_sort() 0.16(0.15+0.01) 0.17(0.15+0.00) +6.3% GNU sort(1) version 8.26 is significantly faster because it uses multiple parallel threads; with the unportable option --parallel=1 it becomes slower: Test HEAD^ HEAD -------------------------------------------------------------------- 0071.2: sort(1) 0.21(0.18+0.01) 0.20(0.18+0.01) -4.8% 0071.3: string_list_sort() 0.16(0.13+0.02) 0.17(0.15+0.01) +6.3% There is some instability -- the numbers for the sort(1) check shouldn't be affected by this patch. Anyway, the performance of our qsort_s() implementation is apparently good enough, at least for this test. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- string-list.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/string-list.c b/string-list.c index 8c83cac189..45016ad86d 100644 --- a/string-list.c +++ b/string-list.c @@ -211,21 +211,18 @@ struct string_list_item *string_list_append(struct string_list *list, list->strdup_strings ? xstrdup(string) : (char *)string); } -/* Yuck */ -static compare_strings_fn compare_for_qsort; - -/* Only call this from inside string_list_sort! */ -static int cmp_items(const void *a, const void *b) +static int cmp_items(const void *a, const void *b, void *ctx) { + compare_strings_fn cmp = ctx; const struct string_list_item *one = a; const struct string_list_item *two = b; - return compare_for_qsort(one->string, two->string); + return cmp(one->string, two->string); } void string_list_sort(struct string_list *list) { - compare_for_qsort = list->cmp ? list->cmp : strcmp; - QSORT(list->items, list->nr, cmp_items); + QSORT_S(list->items, list->nr, cmp_items, + list->cmp ? list->cmp : strcmp); } struct string_list_item *unsorted_string_list_lookup(struct string_list *list, -- cgit v1.2.3 From 83fc4d64fec779d73b18494461613ef911236daf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 22 Jan 2017 18:58:07 +0100 Subject: ref-filter: use QSORT_S in ref_array_sort() Pass the array of sort keys to compare_refs() via the context parameter of qsort_s() instead of using a global variable; that's cleaner and simpler. If ref_array_sort() is to be called from multiple parallel threads then care still needs to be taken that the global variable used_atom is not modified concurrently. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- ref-filter.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index f5f7a70c6d..f39e6617aa 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1555,8 +1555,7 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru return (s->reverse) ? -cmp : cmp; } -static struct ref_sorting *ref_sorting; -static int compare_refs(const void *a_, const void *b_) +static int compare_refs(const void *a_, const void *b_, void *ref_sorting) { struct ref_array_item *a = *((struct ref_array_item **)a_); struct ref_array_item *b = *((struct ref_array_item **)b_); @@ -1572,8 +1571,7 @@ static int compare_refs(const void *a_, const void *b_) void ref_array_sort(struct ref_sorting *sorting, struct ref_array *array) { - ref_sorting = sorting; - QSORT(array->items, array->nr, compare_refs); + QSORT_S(array->items, array->nr, compare_refs, sorting); } static void append_literal(const char *cp, const char *ep, struct ref_formatting_state *state) -- cgit v1.2.3