# Copyright (c) 2002-2003, International Business Machines Corporation and # others. All Rights Reserved. # # file: line.txt # # Line Breaking Rules for ICU rules based break iteration. # Implement default line breaking as defined by Unicode TR 14. # # # Character Classes defined by Unicode TR 14. # These are generated by a script from the Unicode LineBreak derived # properties file. # ############ Start of Script-Generated Definitions ####################### $LF = [ \u000A]; $IN = [ \u2024-\u2026]; $SY = [ \u002F]; $EX = [ \u0021 \u003F \u2762-\u2763 \uFE56-\uFE57 \uFF01 \uFF1F]; $BA = [ \u0009 \u007C \u00AD \u058A \u0F0B \u1361 \u1680 \u17D5 \u2000-\u2006 \u2008-\u200A \u2010 \u2012-\u2013 \u2027 \u205F]; $IS = [ \u002C \u002E \u003A-\u003B \u0589]; $BB = [ \u00B4 \u02C8 \u02CC \u1806]; $SA = [ \u0E01-\u0E30 \u0E32-\u0E33 \u0E40-\u0E46 \u0E81-\u0E82 \u0E84 \u0E87-\u0E88 \u0E8A \u0E8D \u0E94-\u0E97 \u0E99-\u0E9F \u0EA1-\u0EA3 \u0EA5 \u0EA7 \u0EAA-\u0EAB \u0EAD-\u0EB0 \u0EB2-\u0EB3 \u0EBD \u0EC0-\u0EC4 \u0EC6 \u0EDC-\u0EDD \u1000-\u1021 \u1023-\u1027 \u1029-\u102A \u1050-\u1055 \u1780-\u17B3]; $CB = [ \uFFFC]; $XX = [ \uE000-\uF8FF \U000F0000-\U000FFFFD \U00100000-\U0010FFFD]; $HY = [ \u002D]; $AI = [ \u00A1 \u00A7-\u00A8 \u00AA \u00B2-\u00B3 \u00B6-\u00BA \u00BC-\u00BF \u00C6 \u00D0 \u00D7-\u00D8 \u00DE-\u00E1 \u00E6 \u00E8-\u00EA \u00EC-\u00ED \u00F0 \u00F2-\u00F3 \u00F7-\u00FA \u00FC \u00FE \u0101 \u0111 \u0113 \u011B \u0126-\u0127 \u012B \u0131-\u0133 \u0138 \u013F-\u0142 \u0144 \u0148-\u014A \u014D \u0152-\u0153 \u0166-\u0167 \u016B \u01CE \u01D0 \u01D2 \u01D4 \u01D6 \u01D8 \u01DA \u01DC \u0251 \u0261 \u02C7 \u02C9-\u02CB \u02CD \u02D0 \u02D8-\u02DB \u02DD \u0391-\u03A1 \u03A3-\u03A9 \u03B1-\u03C1 \u03C3-\u03C9 \u0401 \u0410-\u044F \u0451 \u2015-\u2016 \u2020-\u2021 \u203B \u2074 \u207F \u2081-\u2084 \u2105 \u2113 \u2121-\u2122 \u212B \u2140 \u2154-\u2155 \u215B \u215E \u2160-\u216B \u2170-\u2179 \u2190-\u2199 \u21D2 \u21D4 \u2200 \u2202-\u2203 \u2207-\u2208 \u220B \u220F \u2211 \u2215 \u221A \u221D-\u2220 \u2223 \u2225 \u2227-\u222C \u222E \u2234-\u2237 \u223C-\u223D \u2248 \u224C \u2252 \u2260-\u2261 \u2264-\u2267 \u226A-\u226B \u226E-\u226F \u2282-\u2283 \u2286-\u2287 \u2295 \u2299 \u22A5 \u22BF \u2312 \u2460-\u24BF \u24D0-\u24E9 \u24EB-\u24FE \u2500-\u254B \u2550-\u2574 \u2580-\u258F \u2592-\u2595 \u25A0-\u25A1 \u25A3-\u25A9 \u25B2-\u25B3 \u25B6-\u25B7 \u25BC-\u25BD \u25C0-\u25C1 \u25C6-\u25C8 \u25CB \u25CE-\u25D1 \u25E2-\u25E5 \u25EF \u2605-\u2606 \u2609 \u260E-\u260F \u2616-\u2617 \u261C \u261E \u2640 \u2642 \u2660-\u2661 \u2663-\u2665 \u2667-\u266A \u266C-\u266D \u266F \uFFFD]; $ZW = [ \u200B]; $SG = [ \uD800-\uDFFF]; $AL = [ \u0023 \u0026 \u002A \u003C-\u003E \u0040-\u005A \u005E-\u007A \u007E \u00A6 \u00A9 \u00AC \u00AE-\u00AF \u00B5 \u00C0-\u00C5 \u00C7-\u00CF \u00D1-\u00D6 \u00D9-\u00DD \u00E2-\u00E5 \u00E7 \u00EB \u00EE-\u00EF \u00F1 \u00F4-\u00F6 \u00FB \u00FD \u00FF-\u0100 \u0102-\u0110 \u0112 \u0114-\u011A \u011C-\u0125 \u0128-\u012A \u012C-\u0130 \u0134-\u0137 \u0139-\u013E \u0143 \u0145-\u0147 \u014B-\u014C \u014E-\u0151 \u0154-\u0165 \u0168-\u016A \u016C-\u01CD \u01CF \u01D1 \u01D3 \u01D5 \u01D7 \u01D9 \u01DB \u01DD-\u0220 \u0222-\u0233 \u0250 \u0252-\u0260 \u0262-\u02AD \u02B0-\u02C6 \u02CE-\u02CF \u02D1-\u02D7 \u02DC \u02DE-\u02EE \u0374-\u0375 \u037A \u037E \u0384-\u038A \u038C \u038E-\u0390 \u03AA-\u03B0 \u03C2 \u03CA-\u03CE \u03D0-\u03F6 \u0400 \u0402-\u040F \u0450 \u0452-\u0482 \u048A-\u04CE \u04D0-\u04F5 \u04F8-\u04F9 \u0500-\u050F \u0531-\u0556 \u0559-\u055F \u0561-\u0587 \u05BE \u05C0 \u05C3 \u05D0-\u05EA \u05F0-\u05F4 \u060C \u061B \u061F \u0621-\u063A \u0640-\u064A \u066A-\u066F \u0671-\u06D5 \u06E5-\u06E6 \u06E9 \u06FA-\u06FE \u0700-\u070D \u0710 \u0712-\u072C \u0780-\u07A5 \u07B1 \u0905-\u0939 \u093D \u0950 \u0958-\u0961 \u0964-\u0965 \u0970 \u0985-\u098C \u098F-\u0990 \u0993-\u09A8 \u09AA-\u09B0 \u09B2 \u09B6-\u09B9 \u09DC-\u09DD \u09DF-\u09E1 \u09F0-\u09F1 \u09F4-\u09FA \u0A05-\u0A0A \u0A0F-\u0A10 \u0A13-\u0A28 \u0A2A-\u0A30 \u0A32-\u0A33 \u0A35-\u0A36 \u0A38-\u0A39 \u0A59-\u0A5C \u0A5E \u0A72-\u0A74 \u0A85-\u0A8B \u0A8D \u0A8F-\u0A91 \u0A93-\u0AA8 \u0AAA-\u0AB0 \u0AB2-\u0AB3 \u0AB5-\u0AB9 \u0ABD \u0AD0 \u0AE0 \u0B05-\u0B0C \u0B0F-\u0B10 \u0B13-\u0B28 \u0B2A-\u0B30 \u0B32-\u0B33 \u0B36-\u0B39 \u0B3D \u0B5C-\u0B5D \u0B5F-\u0B61 \u0B70 \u0B83 \u0B85-\u0B8A \u0B8E-\u0B90 \u0B92-\u0B95 \u0B99-\u0B9A \u0B9C \u0B9E-\u0B9F \u0BA3-\u0BA4 \u0BA8-\u0BAA \u0BAE-\u0BB5 \u0BB7-\u0BB9 \u0BF0-\u0BF2 \u0C05-\u0C0C \u0C0E-\u0C10 \u0C12-\u0C28 \u0C2A-\u0C33 \u0C35-\u0C39 \u0C60-\u0C61 \u0C85-\u0C8C \u0C8E-\u0C90 \u0C92-\u0CA8 \u0CAA-\u0CB3 \u0CB5-\u0CB9 \u0CDE \u0CE0-\u0CE1 \u0D05-\u0D0C \u0D0E-\u0D10 \u0D12-\u0D28 \u0D2A-\u0D39 \u0D60-\u0D61 \u0D85-\u0D96 \u0D9A-\u0DB1 \u0DB3-\u0DBB \u0DBD \u0DC0-\u0DC6 \u0DF4 \u0E4F \u0F00-\u0F0A \u0F0D-\u0F17 \u0F1A-\u0F1F \u0F2A-\u0F34 \u0F36 \u0F38 \u0F40-\u0F47 \u0F49-\u0F6A \u0F85 \u0F88-\u0F8B \u0FBE-\u0FC5 \u0FC7-\u0FCC \u0FCF \u104A-\u104F \u10A0-\u10C5 \u10D0-\u10F8 \u10FB \u1200-\u1206 \u1208-\u1246 \u1248 \u124A-\u124D \u1250-\u1256 \u1258 \u125A-\u125D \u1260-\u1286 \u1288 \u128A-\u128D \u1290-\u12AE \u12B0 \u12B2-\u12B5 \u12B8-\u12BE \u12C0 \u12C2-\u12C5 \u12C8-\u12CE \u12D0-\u12D6 \u12D8-\u12EE \u12F0-\u130E \u1310 \u1312-\u1315 \u1318-\u131E \u1320-\u1346 \u1348-\u135A \u1362-\u1368 \u1372-\u137C \u13A0-\u13F4 \u1401-\u1676 \u1681-\u169A \u16A0-\u16F0 \u1700-\u170C \u170E-\u1711 \u1720-\u1731 \u1735-\u1736 \u1740-\u1751 \u1760-\u176C \u176E-\u1770 \u17DC \u1800-\u1805 \u1807-\u180A \u1820-\u1877 \u1880-\u18A8 \u1E00-\u1E9B \u1EA0-\u1EF9 \u1F00-\u1F15 \u1F18-\u1F1D \u1F20-\u1F45 \u1F48-\u1F4D \u1F50-\u1F57 \u1F59 \u1F5B \u1F5D \u1F5F-\u1F7D \u1F80-\u1FB4 \u1FB6-\u1FC4 \u1FC6-\u1FD3 \u1FD6-\u1FDB \u1FDD-\u1FEF \u1FF2-\u1FF4 \u1FF6-\u1FFE \u2017 \u2022-\u2023 \u2038 \u203D-\u2043 \u2047-\u2052 \u2057 \u2061-\u2063 \u2070-\u2071 \u2075-\u207C \u2080 \u2085-\u208C \u2100-\u2102 \u2104 \u2106-\u2108 \u210A-\u2112 \u2114-\u2115 \u2117-\u2120 \u2123-\u2125 \u2127-\u212A \u212C-\u213A \u213D-\u213F \u2141-\u214B \u2153 \u2156-\u215A \u215C-\u215D \u215F \u216C-\u216F \u217A-\u2183 \u219A-\u21D1 \u21D3 \u21D5-\u21FF \u2201 \u2204-\u2206 \u2209-\u220A \u220C-\u220E \u2210 \u2214 \u2216-\u2219 \u221B-\u221C \u2221-\u2222 \u2224 \u2226 \u222D \u222F-\u2233 \u2238-\u223B \u223E-\u2247 \u2249-\u224B \u224D-\u2251 \u2253-\u225F \u2262-\u2263 \u2268-\u2269 \u226C-\u226D \u2270-\u2281 \u2284-\u2285 \u2288-\u2294 \u2296-\u2298 \u229A-\u22A4 \u22A6-\u22BE \u22C0-\u2311 \u2313-\u2328 \u232B-\u23B3 \u23B7-\u23CE \u2400-\u2426 \u2440-\u244A \u24C0-\u24CF \u24EA \u254C-\u254F \u2575-\u257F \u2590-\u2591 \u2596-\u259F \u25A2 \u25AA-\u25B1 \u25B4-\u25B5 \u25B8-\u25BB \u25BE-\u25BF \u25C2-\u25C5 \u25C9-\u25CA \u25CC-\u25CD \u25D2-\u25E1 \u25E6-\u25EE \u25F0-\u2604 \u2607-\u2608 \u260A-\u260D \u2610-\u2613 \u2619-\u261B \u261D \u261F-\u263F \u2641 \u2643-\u265F \u2662 \u2666 \u266B \u266E \u2670-\u267D \u2680-\u2689 \u2701-\u2704 \u2706-\u2709 \u270C-\u2727 \u2729-\u274B \u274D \u274F-\u2752 \u2756 \u2758-\u275A \u2761 \u2764-\u2767 \u2776-\u2794 \u2798-\u27AF \u27B1-\u27BE \u27D0-\u27E5 \u27F0-\u2982 \u2999-\u29D7 \u29DC-\u29FB \u29FE-\u2AFF \uFB00-\uFB06 \uFB13-\uFB17 \uFB1D \uFB1F-\uFB36 \uFB38-\uFB3C \uFB3E \uFB40-\uFB41 \uFB43-\uFB44 \uFB46-\uFBB1 \uFBD3-\uFD3D \uFD50-\uFD8F \uFD92-\uFDC7 \uFDF0-\uFDFB \uFE70-\uFE74 \uFE76-\uFEFC \uFF66 \uFF71-\uFF9D \uFFA0-\uFFBE \uFFC2-\uFFC7 \uFFCA-\uFFCF \uFFD2-\uFFD7 \uFFDA-\uFFDC \uFFE8-\uFFEE \U00010300-\U0001031E \U00010320-\U00010323 \U00010330-\U0001034A \U00010400-\U00010425 \U00010428-\U0001044D \U0001D000-\U0001D0F5 \U0001D100-\U0001D126 \U0001D12A-\U0001D164 \U0001D16A-\U0001D16C \U0001D183-\U0001D184 \U0001D18C-\U0001D1A9 \U0001D1AE-\U0001D1DD \U0001D400-\U0001D454 \U0001D456-\U0001D49C \U0001D49E-\U0001D49F \U0001D4A2 \U0001D4A5-\U0001D4A6 \U0001D4A9-\U0001D4AC \U0001D4AE-\U0001D4B9 \U0001D4BB \U0001D4BD-\U0001D4C0 \U0001D4C2-\U0001D4C3 \U0001D4C5-\U0001D505 \U0001D507-\U0001D50A \U0001D50D-\U0001D514 \U0001D516-\U0001D51C \U0001D51E-\U0001D539 \U0001D53B-\U0001D53E \U0001D540-\U0001D544 \U0001D546 \U0001D54A-\U0001D550 \U0001D552-\U0001D6A3 \U0001D6A8-\U0001D7C9]; $OP = [ \u0028 \u005B \u007B \u0F3A \u0F3C \u169B \u201A \u201E \u2045 \u207D \u208D \u2329 \u23B4 \u2768 \u276A \u276C \u276E \u2770 \u2772 \u2774 \u27E6 \u27E8 \u27EA \u2983 \u2985 \u2987 \u2989 \u298B \u298D \u298F \u2991 \u2993 \u2995 \u2997 \u29D8 \u29DA \u29FC \u3008 \u300A \u300C \u300E \u3010 \u3014 \u3016 \u3018 \u301A \u301D \uFD3E \uFE35 \uFE37 \uFE39 \uFE3B \uFE3D \uFE3F \uFE41 \uFE43 \uFE59 \uFE5B \uFE5D \uFF08 \uFF3B \uFF5B \uFF5F \uFF62]; $BK = [ \u000C \u2028-\u2029]; $PO = [ \u0025 \u00A2 \u00B0 \u2030-\u2037 \u20A7 \u2103 \u2109 \u2126 \uFDFC \uFE6A \uFF05 \uFFE0]; $NS = [ \u0E5A-\u0E5B \u17D4 \u17D6-\u17DA \u203C \u2044 \u3005 \u301C \u303B-\u303C \u3041 \u3043 \u3045 \u3047 \u3049 \u3063 \u3083 \u3085 \u3087 \u308E \u3095-\u3096 \u309B-\u309E \u30A0-\u30A1 \u30A3 \u30A5 \u30A7 \u30A9 \u30C3 \u30E3 \u30E5 \u30E7 \u30EE \u30F5-\u30F6 \u30FB \u30FD \u31F0-\u31FF \uFE54-\uFE55 \uFF1A-\uFF1B \uFF65 \uFF67-\uFF70 \uFF9E-\uFF9F]; $CL = [ \u0029 \u005D \u007D \u0F3B \u0F3D \u169C \u2046 \u207E \u208E \u232A \u23B5 \u2769 \u276B \u276D \u276F \u2771 \u2773 \u2775 \u27E7 \u27E9 \u27EB \u2984 \u2986 \u2988 \u298A \u298C \u298E \u2990 \u2992 \u2994 \u2996 \u2998 \u29D9 \u29DB \u29FD \u3001-\u3002 \u3009 \u300B \u300D \u300F \u3011 \u3015 \u3017 \u3019 \u301B \u301E-\u301F \uFD3F \uFE36 \uFE38 \uFE3A \uFE3C \uFE3E \uFE40 \uFE42 \uFE44 \uFE50 \uFE52 \uFE5A \uFE5C \uFE5E \uFF09 \uFF0C \uFF0E \uFF3D \uFF5D \uFF60-\uFF61 \uFF63-\uFF64]; $NU = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; $CM = [ \u0000-\u0008 \u000B \u000E-\u001F \u007F-\u009F \u0300-\u034F \u0360-\u036F \u0483-\u0486 \u0488-\u0489 \u0591-\u05A1 \u05A3-\u05B9 \u05BB-\u05BD \u05BF \u05C1-\u05C2 \u05C4 \u064B-\u0655 \u0670 \u06D6-\u06E4 \u06E7-\u06E8 \u06EA-\u06ED \u070F \u0711 \u0730-\u074A \u07A6-\u07B0 \u0901-\u0903 \u093C \u093E-\u094D \u0951-\u0954 \u0962-\u0963 \u0981-\u0983 \u09BC \u09BE-\u09C4 \u09C7-\u09C8 \u09CB-\u09CD \u09D7 \u09E2-\u09E3 \u0A02 \u0A3C \u0A3E-\u0A42 \u0A47-\u0A48 \u0A4B-\u0A4D \u0A70-\u0A71 \u0A81-\u0A83 \u0ABC \u0ABE-\u0AC5 \u0AC7-\u0AC9 \u0ACB-\u0ACD \u0B01-\u0B03 \u0B3C \u0B3E-\u0B43 \u0B47-\u0B48 \u0B4B-\u0B4D \u0B56-\u0B57 \u0B82 \u0BBE-\u0BC2 \u0BC6-\u0BC8 \u0BCA-\u0BCD \u0BD7 \u0C01-\u0C03 \u0C3E-\u0C44 \u0C46-\u0C48 \u0C4A-\u0C4D \u0C55-\u0C56 \u0C82-\u0C83 \u0CBE-\u0CC4 \u0CC6-\u0CC8 \u0CCA-\u0CCD \u0CD5-\u0CD6 \u0D02-\u0D03 \u0D3E-\u0D43 \u0D46-\u0D48 \u0D4A-\u0D4D \u0D57 \u0D82-\u0D83 \u0DCA \u0DCF-\u0DD4 \u0DD6 \u0DD8-\u0DDF \u0DF2-\u0DF3 \u0E31 \u0E34-\u0E3A \u0E47-\u0E4E \u0EB1 \u0EB4-\u0EB9 \u0EBB-\u0EBC \u0EC8-\u0ECD \u0F18-\u0F19 \u0F35 \u0F37 \u0F39 \u0F3E-\u0F3F \u0F71-\u0F84 \u0F86-\u0F87 \u0F90-\u0F97 \u0F99-\u0FBC \u0FC6 \u102C-\u1032 \u1036-\u1039 \u1056-\u1059 \u1160-\u11A2 \u11A8-\u11F9 \u1712-\u1714 \u1732-\u1734 \u1752-\u1753 \u1772-\u1773 \u17B4-\u17D3 \u180B-\u180E \u18A9 \u200C-\u200F \u202A-\u202E \u206A-\u206F \u20D0-\u20EA \u302A-\u302F \u3099-\u309A \uFB1E \uFE00-\uFE0F \uFE20-\uFE23 \uFFF9-\uFFFB \U0001D165-\U0001D169 \U0001D16D-\U0001D182 \U0001D185-\U0001D18B \U0001D1AA-\U0001D1AD \U000E0001 \U000E0020-\U000E007F]; $PR = [ \u0024 \u002B \u005C \u00A3-\u00A5 \u00B1 \u09F2-\u09F3 \u0E3F \u17DB \u20A0-\u20A6 \u20A8-\u20B1 \u2116 \u2212-\u2213 \uFE69 \uFF04 \uFFE1 \uFFE5-\uFFE6]; $B2 = [ \u2014]; $ID = [ \u1100-\u1159 \u115F \u2E80-\u2E99 \u2E9B-\u2EF3 \u2F00-\u2FD5 \u2FF0-\u2FFB \u3000 \u3003-\u3004 \u3006-\u3007 \u3012-\u3013 \u3020-\u3029 \u3030-\u303A \u303D-\u303F \u3042 \u3044 \u3046 \u3048 \u304A-\u3062 \u3064-\u3082 \u3084 \u3086 \u3088-\u308D \u308F-\u3094 \u309F \u30A2 \u30A4 \u30A6 \u30A8 \u30AA-\u30C2 \u30C4-\u30E2 \u30E4 \u30E6 \u30E8-\u30ED \u30EF-\u30F4 \u30F7-\u30FA \u30FC \u30FE-\u30FF \u3105-\u312C \u3131-\u318E \u3190-\u31B7 \u3200-\u321C \u3220-\u3243 \u3251-\u327B \u327F-\u32CB \u32D0-\u32FE \u3300-\u3376 \u337B-\u33DD \u33E0-\u33FE \u3400-\u4DB5 \u4E00-\u9FA5 \uA000-\uA48C \uA490-\uA4C6 \uAC00-\uD7A3 \uF900-\uFA2D \uFA30-\uFA6A \uFE30-\uFE34 \uFE45-\uFE46 \uFE49-\uFE4F \uFE51 \uFE58 \uFE5F-\uFE66 \uFE68 \uFE6B \uFF02-\uFF03 \uFF06-\uFF07 \uFF0A-\uFF0B \uFF0D \uFF0F-\uFF19 \uFF1C-\uFF1E \uFF20-\uFF3A \uFF3C \uFF3E-\uFF5A \uFF5C \uFF5E \uFFE2-\uFFE4 \U00020000-\U0002A6D6 \U0002F800-\U0002FA1D]; $SP = [ \u0020]; $QU = [ \u0022 \u0027 \u00AB \u00BB \u2018-\u2019 \u201B-\u201D \u201F \u2039-\u203A \u23B6 \u275B-\u275E]; $CR = [ \u000D]; $GL = [ \u00A0 \u0F0C \u2007 \u2011 \u202F \u2060 \uFEFF]; ############ End of Script-Generated Definitions ####################### # # Thai Dictionary related definitions and rules # $dictionary = [\u0e01-\u0e2e \u0e30-\u0e3a \u0e40-\u0e44 \u0e47-\u0e4e]; # this rule breaks the iterator with mixed Thai and English $paiyannoi = [\u0e2f]; $maiyamok = [\u0e46]; $thai_etc = $paiyannoi \u0e25 $paiyannoi; # # Character classes from TR 29. Needed for finding characters. # # $Extend is all combining characters, and none of the other cruft that # TR14 puts into $CM, which is its concept of combining marks. # $Extend = # From UNIDATA/DerivedCoreProperties.txt [\u0300-\u034E \u0360-\u036F \u0483-\u0486 \u0488-\u0489 \u0591-\u05A1 \u05A3-\u05B9 \u05BB-\u05BD \u05BF \u05C1-\u05C2 \u05C4 \u064B-\u0655 \u0670 \u06D6-\u06DC \u06DE \u06DF-\u06E4 \u06E7-\u06E8 \u06EA-\u06ED \u0711 \u0730-\u074A \u07A6-\u07B0 \u0901-\u0902 \u0903 \u093C \u093E-\u0940 \u0941-\u0948 \u0949-\u094C \u0951-\u0954 \u0962-\u0963 \u0981 \u0982-\u0983 \u09BC \u09BE-\u09C0 \u09C1-\u09C4 \u09C7-\u09C8 \u09CB-\u09CC \u09D7 \u09E2-\u09E3 \u0A02 \u0A3C \u0A3E-\u0A40 \u0A41-\u0A42 \u0A47-\u0A48 \u0A4B-\u0A4C \u0A70-\u0A71 \u0A81-\u0A82 \u0A83 \u0ABC \u0ABE-\u0AC0 \u0AC1-\u0AC5 \u0AC7-\u0AC8 \u0AC9 \u0ACB-\u0ACC \u0B01 \u0B02-\u0B03 \u0B3C \u0B3E \u0B3F \u0B40 \u0B41-\u0B43 \u0B47-\u0B48 \u0B4B-\u0B4C \u0B56 \u0B57 \u0B82 \u0BBE-\u0BBF \u0BC0 \u0BC1-\u0BC2 \u0BC6-\u0BC8 \u0BCA-\u0BCC \u0BD7 \u0C01-\u0C03 \u0C3E-\u0C40 \u0C41-\u0C44 \u0C46-\u0C48 \u0C4A-\u0C4C \u0C55-\u0C56 \u0C82-\u0C83 \u0CBE \u0CBF \u0CC0-\u0CC4 \u0CC6 \u0CC7-\u0CC8 \u0CCA-\u0CCB \u0CCC \u0CD5-\u0CD6 \u0D02-\u0D03 \u0D3E-\u0D40 \u0D41-\u0D43 \u0D46-\u0D48 \u0D4A-\u0D4C \u0D57 \u0D82-\u0D83 \u0DCF-\u0DD1 \u0DD2-\u0DD4 \u0DD6 \u0DD8-\u0DDF \u0DF2-\u0DF3 \u0E31 \u0E34-\u0E39 \u0E47-\u0E4E \u0EB1 \u0EB4-\u0EB9 \u0EBB-\u0EBC \u0EC8-\u0ECD \u0F18-\u0F19 \u0F35 \u0F37 \u0F39 \u0F3E-\u0F3F \u0F71-\u0F7E \u0F7F \u0F80-\u0F84 \u0F86-\u0F87 \u0F90-\u0F97 \u0F99-\u0FBC \u0FC6 \u102C \u102D-\u1030 \u1031 \u1032 \u1036-\u1037 \u1038 \u1056-\u1057 \u1058-\u1059 \u1712-\u1714 \u1732-\u1734 \u1752-\u1753 \u1772-\u1773 \u17B4-\u17B6 \u17B7-\u17BD \u17BE-\u17C5 \u17C6 \u17C7-\u17C8 \u17C9-\u17D1 \u17D3 \u180B-\u180D \u18A9 \u20D0-\u20DC \u20DD-\u20E0 \u20E1 \u20E2-\u20E4 \u20E5-\u20EA \u302A-\u302F \u3099-\u309A \uFB1E \uFE00-\uFE0F \uFE20-\uFE23 \uFF9E-\uFF9F \U0001D165-\U0001D166 \U0001D167-\U0001D169 \U0001D16D-\U0001D172 \U0001D17B-\U0001D182 \U0001D185-\U0001D18B \U0001D1AA-\U0001D1AD]; # # Rule LB1. By default, treat AI (characters with ambiguous east Asian width) and # SA (South East Asian: Thai, Lao, Khmer) as $AL (Alphabetic) # $ALPlus = $AL | $AI | [$SA - $dictionary]; # # Combining Marks. X $CM* behaves as if it were X. Rule LB6. # TODO: This is going to produce some odd results, because of the non-combining # chars that are included in $CM. Use $Extend instead, where possible. # $ALcm = $ALPlus $CM*; $IDcm = $ID $CM*; $NUcm = $NU $Extend*; $HYcm = $HY $Extend*; $SPcm = $SP $Extend*; $QUcm = $QU $Extend*; $POcm = $PO $Extend*; $OPcm = $OP $Extend*; $BAcm = $BA $Extend*; $BBcm = $BB $Extend*; $NScm = $NS $Extend*; $GLcm = $GL $Extend*; $B2cm = $B2 $Extend*; $INcm = $IN $Extend*; # New Lines. Always break after, never break before. # Rule LB 3 # # Endings. NewLine or Zero Width Space, or both. Rules 4, 5 # Because we never break before these things, $Endings # appears at the end of line break rule. # $NLF = $BK | $CR | $LF | $CR $LF; $Endings = $SPcm* $ZW* $NLF?; $EndingsMandatory = $SPcm* $NLF | $SPcm* $ZW $NLF?; # # Openings Sequences that can precede Words, and that should not be separated from them. # Rules LB 9, 10 # $Openings = (($QUcm $SPcm*)? $OPcm $SPcm*)*; # # Closings Seqences that follow words, and that should not be separated from them, # Rule LB 8, 11, 15 $Closings = ($SPcm*( ($CL ($SPcm* $NScm)? | $EX | $IS | $SY) $Extend*) | $BAcm | $HYcm | $NScm | $maiyamok)*; # # Words. Includes mixed Alpha-numerics. # Rules 11a, 16, 17, 19, more or less. # $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+; $Number = $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number 18 $Word = (($IDcm | ($ALcm | $NUcm)+) ($POcm? | $INcm?)); # Alpha-numeric. 16, 17 $Dashes = (($B2cm $SPcm*)*); # Dashes 11a $ThaiRange = $dictionary+ | $thai_etc; $WordLikeThing = $Number | $Word | $Dashes | $ThaiRange; $Word15 = ($BBcm* ($WordLikeThing)? ($BAcm | $HYcm | $NScm)*) | # Rule 15. Stuff sticks around words. [^[:Cc:] $BK $CR $LF $ZW $SP $GL] $Extend* | # Allow characters that don't meet the [^$BK $CR $LF $ZW $SP $GL ]; # more elaborate definitions for WORD to be glued. $GluedWord = ($GLcm | $QUcm)? $Word15 (($GLcm | $QUcm) $Word15)*; # "Glue" will stick anything below it together. # Rules 13, 14 # # The actual rules, a combination of everything defined above. # $Openings $GluedWord $Closings $paiyannoi? $EndingsMandatory; $Openings $GluedWord $Closings $Endings; $Openings $GluedWord $Closings $paiyannoi / ([^\u0e25 $Extend] | \u0e25[^$paiyannoi $Extend]); #"$word($nbsp+$word)*$paiyannoi/([^[\u0e25$_ignore_]]|" # + "\u0e25[^$paiyannoi$_ignore_]);" # # Reverse Rules. # # Back up to a hard break or a space that will cause a boundary. # Not all spaces cause line breaks. $SpaceGlue represents a sequence # containing a space that may inhibit a break from occuring. # $SpaceGlue = ([$ZW $CL $IS $NS $OP] ($Extend* $SP)) | (($Extend* $SP)+ $OP); $ClumpingChars = [^$SP $BK $CR $LF]; !. . $ClumpingChars* ($SpaceGlue $ClumpingChars*)* (. | $LF $CR);