diff options
Diffstat (limited to 't/helper')
-rw-r--r-- | t/helper/test-tool.c | 1 | ||||
-rw-r--r-- | t/helper/test-tool.h | 1 | ||||
-rw-r--r-- | t/helper/test-xml-encode.c | 80 |
3 files changed, 82 insertions, 0 deletions
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index bfb195b1a8..4b4b397d93 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -49,6 +49,7 @@ static struct test_cmd cmds[] = { { "submodule-nested-repo-config", cmd__submodule_nested_repo_config }, { "subprocess", cmd__subprocess }, { "urlmatch-normalization", cmd__urlmatch_normalization }, + { "xml-encode", cmd__xml_encode }, { "wildmatch", cmd__wildmatch }, #ifdef GIT_WINDOWS_NATIVE { "windows-named-pipe", cmd__windows_named_pipe }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 042f12464b..c0ab65e370 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -45,6 +45,7 @@ int cmd__submodule_config(int argc, const char **argv); int cmd__submodule_nested_repo_config(int argc, const char **argv); int cmd__subprocess(int argc, const char **argv); int cmd__urlmatch_normalization(int argc, const char **argv); +int cmd__xml_encode(int argc, const char **argv); int cmd__wildmatch(int argc, const char **argv); #ifdef GIT_WINDOWS_NATIVE int cmd__windows_named_pipe(int argc, const char **argv); diff --git a/t/helper/test-xml-encode.c b/t/helper/test-xml-encode.c new file mode 100644 index 0000000000..a648bbd961 --- /dev/null +++ b/t/helper/test-xml-encode.c @@ -0,0 +1,80 @@ +#include "test-tool.h" + +static const char *utf8_replace_character = "�"; + +/* + * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded + * in an XML file. + */ +int cmd__xml_encode(int argc, const char **argv) +{ + unsigned char buf[1024], tmp[4], *tmp2 = NULL; + ssize_t cur = 0, len = 1, remaining = 0; + unsigned char ch; + + for (;;) { + if (++cur == len) { + len = xread(0, buf, sizeof(buf)); + if (!len) + return 0; + if (len < 0) + die_errno("Could not read <stdin>"); + cur = 0; + } + ch = buf[cur]; + + if (tmp2) { + if ((ch & 0xc0) != 0x80) { + fputs(utf8_replace_character, stdout); + tmp2 = NULL; + cur--; + continue; + } + *tmp2 = ch; + tmp2++; + if (--remaining == 0) { + fwrite(tmp, tmp2 - tmp, 1, stdout); + tmp2 = NULL; + } + continue; + } + + if (!(ch & 0x80)) { + /* 0xxxxxxx */ + if (ch == '&') + fputs("&", stdout); + else if (ch == '\'') + fputs("'", stdout); + else if (ch == '"') + fputs(""", stdout); + else if (ch == '<') + fputs("<", stdout); + else if (ch == '>') + fputs(">", stdout); + else if (ch >= 0x20) + fputc(ch, stdout); + else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) + fprintf(stdout, "&#x%02x;", ch); + else + fputs(utf8_replace_character, stdout); + } else if ((ch & 0xe0) == 0xc0) { + /* 110XXXXx 10xxxxxx */ + tmp[0] = ch; + remaining = 1; + tmp2 = tmp + 1; + } else if ((ch & 0xf0) == 0xe0) { + /* 1110XXXX 10Xxxxxx 10xxxxxx */ + tmp[0] = ch; + remaining = 2; + tmp2 = tmp + 1; + } else if ((ch & 0xf8) == 0xf0) { + /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ + tmp[0] = ch; + remaining = 3; + tmp2 = tmp + 1; + } else + fputs(utf8_replace_character, stdout); + } + + return 0; +} |