#include "test-tool.h" static const char *utf8_replace_character = "�"; /* * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded * in an XML file. */ int cmd__xml_encode(int argc, const char **argv) { unsigned char buf[1024], tmp[4], *tmp2 = NULL; ssize_t cur = 0, len = 1, remaining = 0; unsigned char ch; for (;;) { if (++cur == len) { len = xread(0, buf, sizeof(buf)); if (!len) return 0; if (len < 0) die_errno("Could not read <stdin>"); cur = 0; } ch = buf[cur]; if (tmp2) { if ((ch & 0xc0) != 0x80) { fputs(utf8_replace_character, stdout); tmp2 = NULL; cur--; continue; } *tmp2 = ch; tmp2++; if (--remaining == 0) { fwrite(tmp, tmp2 - tmp, 1, stdout); tmp2 = NULL; } continue; } if (!(ch & 0x80)) { /* 0xxxxxxx */ if (ch == '&') fputs("&", stdout); else if (ch == '\'') fputs("'", stdout); else if (ch == '"') fputs(""", stdout); else if (ch == '<') fputs("<", stdout); else if (ch == '>') fputs(">", stdout); else if (ch >= 0x20) fputc(ch, stdout); else if (ch == 0x09 || ch == 0x0a || ch == 0x0d) fprintf(stdout, "&#x%02x;", ch); else fputs(utf8_replace_character, stdout); } else if ((ch & 0xe0) == 0xc0) { /* 110XXXXx 10xxxxxx */ tmp[0] = ch; remaining = 1; tmp2 = tmp + 1; } else if ((ch & 0xf0) == 0xe0) { /* 1110XXXX 10Xxxxxx 10xxxxxx */ tmp[0] = ch; remaining = 2; tmp2 = tmp + 1; } else if ((ch & 0xf8) == 0xf0) { /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ tmp[0] = ch; remaining = 3; tmp2 = tmp + 1; } else fputs(utf8_replace_character, stdout); } return 0; }