# # Copyright (C) 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html # Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved. # file: grapheme.txt # # Reference Grapheme Break rules for intltest rbbi/RBBIMonkeyTest # # # Note: Rule syntax and the monkey test itself are still a work in progress. # They are expected to change with review and the addition of support for rule tailoring. type = grapheme; # one of grapheme | word | line | sentence locale = en; CR = [\p{Grapheme_Cluster_Break = CR}]; LF = [\p{Grapheme_Cluster_Break = LF}]; Control = [[\p{Grapheme_Cluster_Break = Control}]]; Extend = [[\p{Grapheme_Cluster_Break = Extend}]]; ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}]; Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}]; Prepend = [\p{Grapheme_Cluster_Break = Prepend}]; SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}]; # # Korean Syllable Definitions # L = [\p{Grapheme_Cluster_Break = L}]; V = [\p{Grapheme_Cluster_Break = V}]; T = [\p{Grapheme_Cluster_Break = T}]; LV = [\p{Grapheme_Cluster_Break = LV}]; LVT = [\p{Grapheme_Cluster_Break = LVT}]; # Emoji definitions Extended_Pict = [:ExtPict:]; # Indic Sequences Virama_ = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Virama}]]; LinkingConsonant = [[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}] & [\p{Indic_Syllabic_Category=Consonant}]]; ExtCccZwj = [[Extend-[\p{ccc=0}]] ZWJ]; GB3: CR LF; GB4: (Control | CR | LF) ÷; GB5: . ÷ (Control | CR | LF); GB6: L (L | V | LV | LVT); GB7: (LV | V) (V | T); GB8: (LVT | T) T; GB11: Extended_Pict Extend* ZWJ Extended_Pict; GB9c: LinkingConsonant ExtCccZwj* Virama_ ExtCccZwj* LinkingConsonant; GB9: . (Extend | ZWJ); GB9a: . SpacingMark; GB9b: Prepend .; # Regional Indicators, split into pairs. # Note that a pair of RIs that is not followed by a third RI will fall into # the normal rules for Extend, etc. # GB12: Regional_Indicator Regional_Indicator ÷ Regional_Indicator; GB13: Regional_Indicator Regional_Indicator; GB999: . ÷;