test4.ucm   [plain text]


# *******************************************************************************
# * Copyright (C) 2001-2003, International Business Machines
# * Corporation and others.  All Rights Reserved.
# *******************************************************************************
#
# test4.ucm
#
# Test file for MBCS conversion with four-byte codepage data.

<code_set_name>     "test4"
<mb_cur_max>        4
<mb_cur_min>        1
<uconv_class>       "MBCS"

# both subchars are single-byters, which does not make sense
# but works - adding subchar1 for tests but don't want to
# change old tests for a new subchar -- markus 20031028
<subchar>           \xff
<subchar1>          \xe1
<icu:state>         0, 1:1, 5-9, e1, ff
<icu:state>         2:2
<icu:state>         3:3
<icu:state>         a-f.p, ff

CHARMAP

# fromUnicode result is zero byte from other than U+0000
<U20ac>     \x00 |0

# fallback from non-zero to zero possible with extension table
<U20ad>     \x00 |1

# nothing special
<U0005>     \x05 |0

# toUnicode result is fallback direct
<U0006>     \x06 |3

# toUnicode result is direct non-BMP code point
<U101234>   \x07 |0
<Ufebcd>    \x08 |3

#unassigned \x09

# toUnicode result is surrogate pair: test real pair, single unit, unassigned
<U23456>    \x01\x02\x03\x0a |0
<U000b>     \x01\x02\x03\x0b |0
#unassigned \x01\x02\x03\x0c
<U34567>    \x01\x02\x03\x0d |3
<U000e>     \x01\x02\x03\x0e |3
#unassigned \x01\x02\x03\x0f

# <subchar1> non-mapping
<U50005>    \xe1 |2
# add a mapping that turns the above's Unicode side into a prefix
<U50005><U60006> \x06 |1

# many bytes, and bytes per UChar
<U30ab><U309a> \x01\x02\x03\x0a\x01\x02\x03\x0b\x01\x02\x03\x0c\x01\x02\x03\x0d\x01\x02\x03\x0e\x01\x02\x03\x0f\x01\x02\x03\x0a\x05\x06\x07 |0

# many UChars, and UChars per byte
<U304b><U309a><U304d><U309a><U304f><U309a><U3051><U309a><U3053><U309a><U30ab><U309a><U30ad><U309a><U30af><U309a><U30b1><U309a><U0300> \x08\x09 |0

END CHARMAP