about summary refs log tree commit diff
path: root/t/helper/test-xml-encode.c
blob: a648bbd961c2361ea8fa2385cb3f63a002e634c3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#include "test-tool.h"

static const char *utf8_replace_character = "�";

/*
 * Encodes (possibly incorrect) UTF-8 on <stdin> to <stdout>, to be embedded
 * in an XML file.
 */
int cmd__xml_encode(int argc, const char **argv)
{
	unsigned char buf[1024], tmp[4], *tmp2 = NULL;
	ssize_t cur = 0, len = 1, remaining = 0;
	unsigned char ch;

	for (;;) {
		if (++cur == len) {
			len = xread(0, buf, sizeof(buf));
			if (!len)
				return 0;
			if (len < 0)
				die_errno("Could not read <stdin>");
			cur = 0;
		}
		ch = buf[cur];

		if (tmp2) {
			if ((ch & 0xc0) != 0x80) {
				fputs(utf8_replace_character, stdout);
				tmp2 = NULL;
				cur--;
				continue;
			}
			*tmp2 = ch;
			tmp2++;
			if (--remaining == 0) {
				fwrite(tmp, tmp2 - tmp, 1, stdout);
				tmp2 = NULL;
			}
			continue;
		}

		if (!(ch & 0x80)) {
			/* 0xxxxxxx */
			if (ch == '&')
				fputs("&amp;", stdout);
			else if (ch == '\'')
				fputs("&apos;", stdout);
			else if (ch == '"')
				fputs("&quot;", stdout);
			else if (ch == '<')
				fputs("&lt;", stdout);
			else if (ch == '>')
				fputs("&gt;", stdout);
			else if (ch >= 0x20)
				fputc(ch, stdout);
			else if (ch == 0x09 || ch == 0x0a || ch == 0x0d)
				fprintf(stdout, "&#x%02x;", ch);
			else
				fputs(utf8_replace_character, stdout);
		} else if ((ch & 0xe0) == 0xc0) {
			/* 110XXXXx 10xxxxxx */
			tmp[0] = ch;
			remaining = 1;
			tmp2 = tmp + 1;
		} else if ((ch & 0xf0) == 0xe0) {
			/* 1110XXXX 10Xxxxxx 10xxxxxx */
			tmp[0] = ch;
			remaining = 2;
			tmp2 = tmp + 1;
		} else if ((ch & 0xf8) == 0xf0) {
			/* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
			tmp[0] = ch;
			remaining = 3;
			tmp2 = tmp + 1;
		} else
			fputs(utf8_replace_character, stdout);
	}

	return 0;
}