# # Copyright (C) 2002-2011, International Business Machines Corporation and others. # All Rights Reserved. # # file: char.txt # # ICU Character Break Rules, also known as Grapheme Cluster Boundaries # See Unicode Standard Annex #29. # These rules are based on UAX #29 Revision 19 for Unicode Version 6.1 # # # Character Class Definitions. # $CR = [\p{Grapheme_Cluster_Break = CR}]; $LF = [\p{Grapheme_Cluster_Break = LF}]; $Control = [\p{Grapheme_Cluster_Break = Control}]; # TODO: Restore if the Prepend set becomes non-empty again: $Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; $Extend = [\p{Grapheme_Cluster_Break = Extend}]; $SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; # # Korean Syllable Definitions # $L = [\p{Grapheme_Cluster_Break = L}]; $V = [\p{Grapheme_Cluster_Break = V}]; $T = [\p{Grapheme_Cluster_Break = T}]; $LV = [\p{Grapheme_Cluster_Break = LV}]; $LVT = [\p{Grapheme_Cluster_Break = LVT}]; # # Emoji regional indicators # $RI = [\U0001F1E6-\U0001F1FF]; ## ------------------------------------------------- !!chain; !!forward; $CR $LF; $L ($L | $V | $LV | $LVT); ($LV | $V) ($V | $T); ($LVT | $T) $T; [^$Control $CR $LF] $Extend; [^$Control $CR $LF] $SpacingMark; # TODO: Restore if the Prepend set becomes non-empty again: $Prepend [^$Control $CR $LF]; $RI $RI / $RI; $RI $RI; ## ------------------------------------------------- !!reverse; $LF $CR; ($L | $V | $LV | $LVT) $L; ($V | $T) ($LV | $V); $T ($LVT | $T); $Extend [^$Control $CR $LF]; $SpacingMark [^$Control $CR $LF]; # TODO: Restore if the Prepend set becomes non-empty again: [^$Control $CR $LF] $Prepend; $RI $RI / $RI $RI; $RI $RI; ## ------------------------------------------------- !!safe_reverse; ## ------------------------------------------------- !!safe_forward;