summaryrefslogtreecommitdiff
path: root/t/helper
diff options
context:
space:
mode:
Diffstat (limited to 't/helper')
-rw-r--r--t/helper/test-tool.c1
-rw-r--r--t/helper/test-tool.h1
-rw-r--r--t/helper/test-xml-encode.c80
3 files changed, 82 insertions, 0 deletions
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index bfb195b1a8..4b4b397d93 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -49,6 +49,7 @@ static struct test_cmd cmds[] = {
{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
{ "subprocess", cmd__subprocess },
{ "urlmatch-normalization", cmd__urlmatch_normalization },
+ { "xml-encode", cmd__xml_encode },
{ "wildmatch", cmd__wildmatch },
#ifdef GIT_WINDOWS_NATIVE
{ "windows-named-pipe", cmd__windows_named_pipe },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 042f12464b..c0ab65e370 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -45,6 +45,7 @@ int cmd__submodule_config(int argc, const char **argv);
int cmd__submodule_nested_repo_config(int argc, const char **argv);
int cmd__subprocess(int argc, const char **argv);
int cmd__urlmatch_normalization(int argc, const char **argv);
+int cmd__xml_encode(int argc, const char **argv);
int cmd__wildmatch(int argc, const char **argv);
#ifdef GIT_WINDOWS_NATIVE
int cmd__windows_named_pipe(int argc, const char **argv);
diff --git a/t/helper/test-xml-encode.c b/t/helper/test-xml-encode.c
new file mode 100644
index 0000000000..a648bbd961
--- /dev/null
+++ b/t/helper/test-xml-encode.c
@@ -0,0 +1,80 @@
+#include "test-tool.h"
+
+static const char *utf8_replace_character = "�";
+
+/*
+ * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
+ * in an XML file.
+ */
+int cmd__xml_encode(int argc, const char **argv)
+{
+ unsigned char buf[1024], tmp[4], *tmp2 = NULL;
+ ssize_t cur = 0, len = 1, remaining = 0;
+ unsigned char ch;
+
+ for (;;) {
+ if (++cur == len) {
+ len = xread(0, buf, sizeof(buf));
+ if (!len)
+ return 0;
+ if (len < 0)
+ die_errno("Could not read <stdin>");
+ cur = 0;
+ }
+ ch = buf[cur];
+
+ if (tmp2) {
+ if ((ch & 0xc0) != 0x80) {
+ fputs(utf8_replace_character, stdout);
+ tmp2 = NULL;
+ cur--;
+ continue;
+ }
+ *tmp2 = ch;
+ tmp2++;
+ if (--remaining == 0) {
+ fwrite(tmp, tmp2 - tmp, 1, stdout);
+ tmp2 = NULL;
+ }
+ continue;
+ }
+
+ if (!(ch & 0x80)) {
+ /* 0xxxxxxx */
+ if (ch == '&')
+ fputs("&amp;", stdout);
+ else if (ch == '\'')
+ fputs("&apos;", stdout);
+ else if (ch == '"')
+ fputs("&quot;", stdout);
+ else if (ch == '<')
+ fputs("&lt;", stdout);
+ else if (ch == '>')
+ fputs("&gt;", stdout);
+ else if (ch >= 0x20)
+ fputc(ch, stdout);
+ else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
+ fprintf(stdout, "&#x%02x;", ch);
+ else
+ fputs(utf8_replace_character, stdout);
+ } else if ((ch & 0xe0) == 0xc0) {
+ /* 110XXXXx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 1;
+ tmp2 = tmp + 1;
+ } else if ((ch & 0xf0) == 0xe0) {
+ /* 1110XXXX 10Xxxxxx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 2;
+ tmp2 = tmp + 1;
+ } else if ((ch & 0xf8) == 0xf0) {
+ /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
+ tmp[0] = ch;
+ remaining = 3;
+ tmp2 = tmp + 1;
+ } else
+ fputs(utf8_replace_character, stdout);
+ }
+
+ return 0;
+}