diff options
author | Vincent Ambo <Vincent Ambo> | 2020-01-11T23·36+0000 |
---|---|---|
committer | Vincent Ambo <Vincent Ambo> | 2020-01-11T23·36+0000 |
commit | 1b593e1ea4d2af0f6444d9a7788d5d99abd6fde5 (patch) | |
tree | e3accb9beed5c4c1b5a05c99db71ab2841f0ed04 /t/helper/test-xml-encode.c |
Squashed 'third_party/git/' content from commit cb71568594
git-subtree-dir: third_party/git git-subtree-split: cb715685942260375e1eb8153b0768a376e4ece7
Diffstat (limited to 't/helper/test-xml-encode.c')
-rw-r--r-- | t/helper/test-xml-encode.c | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/t/helper/test-xml-encode.c b/t/helper/test-xml-encode.c new file mode 100644 index 000000000000..a648bbd961c2 --- /dev/null +++ b/t/helper/test-xml-encode.c @@ -0,0 +1,80 @@ +#include "test-tool.h" + +static const char *utf8_replace_character = "�"; + +/* + * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded + * in an XML file. + */ +int cmd__xml_encode(int argc, const char **argv) +{ + unsigned char buf[1024], tmp[4], *tmp2 = NULL; + ssize_t cur = 0, len = 1, remaining = 0; + unsigned char ch; + + for (;;) { + if (++cur == len) { + len = xread(0, buf, sizeof(buf)); + if (!len) + return 0; + if (len < 0) + die_errno("Could not read <stdin>"); + cur = 0; + } + ch = buf[cur]; + + if (tmp2) { + if ((ch & 0xc0) != 0x80) { + fputs(utf8_replace_character, stdout); + tmp2 = NULL; + cur--; + continue; + } + *tmp2 = ch; + tmp2++; + if (--remaining == 0) { + fwrite(tmp, tmp2 - tmp, 1, stdout); + tmp2 = NULL; + } + continue; + } + + if (!(ch & 0x80)) { + /* 0xxxxxxx */ + if (ch == '&') + fputs("&", stdout); + else if (ch == '\'') + fputs("'", stdout); + else if (ch == '"') + fputs(""", stdout); + else if (ch == '<') + fputs("<", stdout); + else if (ch == '>') + fputs(">", stdout); + else if (ch >= 0x20) + fputc(ch, stdout); + else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) + fprintf(stdout, "&#x%02x;", ch); + else + fputs(utf8_replace_character, stdout); + } else if ((ch & 0xe0) == 0xc0) { + /* 110XXXXx 10xxxxxx */ + tmp[0] = ch; + remaining = 1; + tmp2 = tmp + 1; + } else if ((ch & 0xf0) == 0xe0) { + /* 1110XXXX 10Xxxxxx 10xxxxxx */ + tmp[0] = ch; + remaining = 2; + tmp2 = tmp + 1; + } else if ((ch & 0xf8) == 0xf0) { + /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ + tmp[0] = ch; + remaining = 3; + tmp2 = tmp + 1; + } else + fputs(utf8_replace_character, stdout); + } + + return 0; +} |