about summary refs log tree commit diff
path: root/contrib/update-unicode/update_unicode.sh
blob: aa90865befa428e0b0972197bc0e79d008fd7053 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/bin/sh
#See http://www.unicode.org/reports/tr44/
#
#Me Enclosing_Mark  an enclosing combining mark
#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
#Cf Format          a format control character
#
cd "$(dirname "$0")"
UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode-width.h

wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
	http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt &&
if ! test -d uniset; then
	git clone https://github.com/depp/uniset.git &&
	( cd uniset && git checkout 4b186196dd )
fi &&
(
	cd uniset &&
	if ! test -x uniset; then
		autoreconf -i &&
		./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
	fi &&
	make
) &&
UNICODE_DIR=. && export UNICODE_DIR &&
cat >$UNICODEWIDTH_H <<-EOF
static const struct interval zero_width[] = {
	$(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD)
};
static const struct interval double_width[] = {
	$(uniset/uniset --32 eaw:F,W)
};
EOF