comparison modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/common/ucase.h @ 11038:20a8447c71c6

8207159: Update ICU to version 62.1 Reviewed-by: mbilla, kcr, ghb
author arajkumar
date Fri, 24 Aug 2018 15:06:40 +0530
parents fee4ef5c87df
children
comparison
equal deleted inserted replaced
0:a94527429f66 1:b8b500f022e2
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
1 /* 3 /*
2 ******************************************************************************* 4 *******************************************************************************
3 * 5 *
4 * Copyright (C) 2004-2012, International Business Machines 6 * Copyright (C) 2004-2012, International Business Machines
5 * Corporation and others. All Rights Reserved. 7 * Corporation and others. All Rights Reserved.
6 * 8 *
7 ******************************************************************************* 9 *******************************************************************************
8 * file name: ucase.h 10 * file name: ucase.h
9 * encoding: US-ASCII 11 * encoding: UTF-8
10 * tab size: 8 (not used) 12 * tab size: 8 (not used)
11 * indentation:4 13 * indentation:4
12 * 14 *
13 * created on: 2004aug30 15 * created on: 2004aug30
14 * created by: Markus W. Scherer 16 * created by: Markus W. Scherer
22 #include "unicode/utypes.h" 24 #include "unicode/utypes.h"
23 #include "unicode/uset.h" 25 #include "unicode/uset.h"
24 #include "putilimp.h" 26 #include "putilimp.h"
25 #include "uset_imp.h" 27 #include "uset_imp.h"
26 #include "udataswp.h" 28 #include "udataswp.h"
29 #include "utrie2.h"
27 30
28 #ifdef __cplusplus 31 #ifdef __cplusplus
29 U_NAMESPACE_BEGIN 32 U_NAMESPACE_BEGIN
30 33
31 class UnicodeString; 34 class UnicodeString;
33 U_NAMESPACE_END 36 U_NAMESPACE_END
34 #endif 37 #endif
35 38
36 /* library API -------------------------------------------------------------- */ 39 /* library API -------------------------------------------------------------- */
37 40
38 U_CDECL_BEGIN
39
40 struct UCaseProps;
41 typedef struct UCaseProps UCaseProps;
42
43 U_CDECL_END
44
45 U_CAPI const UCaseProps * U_EXPORT2
46 ucase_getSingleton(void);
47
48 U_CFUNC void U_EXPORT2 41 U_CFUNC void U_EXPORT2
49 ucase_addPropertyStarts(const UCaseProps *csp, const USetAdder *sa, UErrorCode *pErrorCode); 42 ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode);
50 43
51 /** 44 /**
52 * Requires non-NULL locale ID but otherwise does the equivalent of 45 * Requires non-NULL locale ID but otherwise does the equivalent of
53 * checking for language codes as if uloc_getLanguage() were called: 46 * checking for language codes as if uloc_getLanguage() were called:
54 * Accepts both 2- and 3-letter codes and accepts case variants. 47 * Accepts both 2- and 3-letter codes and accepts case variants.
55 */ 48 */
56 U_CFUNC int32_t 49 U_CFUNC int32_t
57 ucase_getCaseLocale(const char *locale, int32_t *locCache); 50 ucase_getCaseLocale(const char *locale);
58 51
59 /* Casing locale types for ucase_getCaseLocale */ 52 /* Casing locale types for ucase_getCaseLocale */
60 enum { 53 enum {
61 UCASE_LOC_UNKNOWN, 54 UCASE_LOC_UNKNOWN,
62 UCASE_LOC_ROOT, 55 UCASE_LOC_ROOT,
63 UCASE_LOC_TURKISH, 56 UCASE_LOC_TURKISH,
64 UCASE_LOC_LITHUANIAN, 57 UCASE_LOC_LITHUANIAN,
58 UCASE_LOC_GREEK,
65 UCASE_LOC_DUTCH 59 UCASE_LOC_DUTCH
66 }; 60 };
67 61
68 /** 62 /**
69 * Bit mask for getting just the options from a string compare options word 63 * Bit mask for getting just the options from a string compare options word
70 * that are relevant for case-insensitive string comparison. 64 * that are relevant for case-insensitive string comparison.
71 * See uchar.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. 65 * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER.
72 * @internal 66 * @internal
73 */ 67 */
74 #define _STRCASECMP_OPTIONS_MASK 0xffff 68 #define _STRCASECMP_OPTIONS_MASK 0xffff
75 69
76 /** 70 /**
77 * Bit mask for getting just the options from a string compare options word 71 * Bit mask for getting just the options from a string compare options word
78 * that are relevant for case folding (of a single string or code point). 72 * that are relevant for case folding (of a single string or code point).
79 * See uchar.h. 73 *
74 * Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I.
75 * It is conceivable that at some point we might use one more bit for using uppercase sharp s.
76 * It is conceivable that at some point we might want the option to use only simple case foldings
77 * when operating on strings.
78 *
79 * See stringoptions.h.
80 * @internal 80 * @internal
81 */ 81 */
82 #define _FOLD_CASE_OPTIONS_MASK 0xff 82 #define _FOLD_CASE_OPTIONS_MASK 7
83 83
84 /* single-code point functions */ 84 /* single-code point functions */
85 85
86 U_CAPI UChar32 U_EXPORT2 86 U_CAPI UChar32 U_EXPORT2
87 ucase_tolower(const UCaseProps *csp, UChar32 c); 87 ucase_tolower(UChar32 c);
88 88
89 U_CAPI UChar32 U_EXPORT2 89 U_CAPI UChar32 U_EXPORT2
90 ucase_toupper(const UCaseProps *csp, UChar32 c); 90 ucase_toupper(UChar32 c);
91 91
92 U_CAPI UChar32 U_EXPORT2 92 U_CAPI UChar32 U_EXPORT2
93 ucase_totitle(const UCaseProps *csp, UChar32 c); 93 ucase_totitle(UChar32 c);
94 94
95 U_CAPI UChar32 U_EXPORT2 95 U_CAPI UChar32 U_EXPORT2
96 ucase_fold(const UCaseProps *csp, UChar32 c, uint32_t options); 96 ucase_fold(UChar32 c, uint32_t options);
97 97
98 /** 98 /**
99 * Adds all simple case mappings and the full case folding for c to sa, 99 * Adds all simple case mappings and the full case folding for c to sa,
100 * and also adds special case closure mappings. 100 * and also adds special case closure mappings.
101 * c itself is not added. 101 * c itself is not added.
103 * - for s include long s 103 * - for s include long s
104 * - for sharp s include ss 104 * - for sharp s include ss
105 * - for k include the Kelvin sign 105 * - for k include the Kelvin sign
106 */ 106 */
107 U_CFUNC void U_EXPORT2 107 U_CFUNC void U_EXPORT2
108 ucase_addCaseClosure(const UCaseProps *csp, UChar32 c, const USetAdder *sa); 108 ucase_addCaseClosure(UChar32 c, const USetAdder *sa);
109 109
110 /** 110 /**
111 * Maps the string to single code points and adds the associated case closure 111 * Maps the string to single code points and adds the associated case closure
112 * mappings. 112 * mappings.
113 * The string is mapped to code points if it is their full case folding string. 113 * The string is mapped to code points if it is their full case folding string.
118 * It must be length>=0. 118 * It must be length>=0.
119 * 119 *
120 * @return TRUE if the string was found 120 * @return TRUE if the string was found
121 */ 121 */
122 U_CFUNC UBool U_EXPORT2 122 U_CFUNC UBool U_EXPORT2
123 ucase_addStringCaseClosure(const UCaseProps *csp, const UChar *s, int32_t length, const USetAdder *sa); 123 ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa);
124 124
125 #ifdef __cplusplus 125 #ifdef __cplusplus
126 U_NAMESPACE_BEGIN 126 U_NAMESPACE_BEGIN
127 127
128 /** 128 /**
147 int32_t unfoldStringWidth; 147 int32_t unfoldStringWidth;
148 int32_t currentRow; 148 int32_t currentRow;
149 int32_t rowCpIndex; 149 int32_t rowCpIndex;
150 }; 150 };
151 151
152 /**
153 * Fast case mapping data for ASCII/Latin.
154 * Linear arrays of delta bytes: 0=no mapping; EXC=exception.
155 * Deltas must not cross the ASCII boundary, or else they cannot be easily used
156 * in simple UTF-8 code.
157 */
158 namespace LatinCase {
159
160 /** Case mapping/folding data for code points up to U+017F. */
161 constexpr UChar LIMIT = 0x180;
162 /** U+017F case-folds and uppercases crossing the ASCII boundary. */
163 constexpr UChar LONG_S = 0x17f;
164 /** Exception: Complex mapping, or too-large delta. */
165 constexpr int8_t EXC = -0x80;
166
167 /** Deltas for lowercasing for most locales, and default case folding. */
168 extern const int8_t TO_LOWER_NORMAL[LIMIT];
169 /** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */
170 extern const int8_t TO_LOWER_TR_LT[LIMIT];
171
172 /** Deltas for uppercasing for most locales. */
173 extern const int8_t TO_UPPER_NORMAL[LIMIT];
174 /** Deltas for uppercasing for tr/az. */
175 extern const int8_t TO_UPPER_TR[LIMIT];
176
177 } // namespace LatinCase
178
152 U_NAMESPACE_END 179 U_NAMESPACE_END
153 #endif 180 #endif
154 181
155 /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ 182 /** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */
156 U_CAPI int32_t U_EXPORT2 183 U_CAPI int32_t U_EXPORT2
157 ucase_getType(const UCaseProps *csp, UChar32 c); 184 ucase_getType(UChar32 c);
158 185
159 /** @return same as ucase_getType(), or <0 if c is case-ignorable */ 186 /** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */
160 U_CAPI int32_t U_EXPORT2 187 U_CAPI int32_t U_EXPORT2
161 ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c); 188 ucase_getTypeOrIgnorable(UChar32 c);
162 189
163 U_CAPI UBool U_EXPORT2 190 U_CAPI UBool U_EXPORT2
164 ucase_isSoftDotted(const UCaseProps *csp, UChar32 c); 191 ucase_isSoftDotted(UChar32 c);
165 192
166 U_CAPI UBool U_EXPORT2 193 U_CAPI UBool U_EXPORT2
167 ucase_isCaseSensitive(const UCaseProps *csp, UChar32 c); 194 ucase_isCaseSensitive(UChar32 c);
168 195
169 /* string case mapping functions */ 196 /* string case mapping functions */
170 197
171 U_CDECL_BEGIN 198 U_CDECL_BEGIN
172 199
235 * See UCaseContextIterator for details. 262 * See UCaseContextIterator for details.
236 * If iter==NULL then a context-independent result is returned. 263 * If iter==NULL then a context-independent result is returned.
237 * @param context Pointer to be passed into iter. 264 * @param context Pointer to be passed into iter.
238 * @param pString If the mapping result is a string, then the pointer is 265 * @param pString If the mapping result is a string, then the pointer is
239 * written to *pString. 266 * written to *pString.
240 * @param locale Locale ID for locale-dependent mappings. 267 * @param caseLocale Case locale value from ucase_getCaseLocale().
241 * @param locCache Initialize to 0; may be used to cache the result of parsing
242 * the locale ID for subsequent calls.
243 * Can be NULL.
244 * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH. 268 * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH.
245 * 269 *
246 * @see UCaseContextIterator 270 * @see UCaseContextIterator
247 * @see UCASE_MAX_STRING_LENGTH 271 * @see UCASE_MAX_STRING_LENGTH
248 * @internal 272 * @internal
249 */ 273 */
250 U_CAPI int32_t U_EXPORT2 274 U_CAPI int32_t U_EXPORT2
251 ucase_toFullLower(const UCaseProps *csp, UChar32 c, 275 ucase_toFullLower(UChar32 c,
252 UCaseContextIterator *iter, void *context, 276 UCaseContextIterator *iter, void *context,
253 const UChar **pString, 277 const UChar **pString,
254 const char *locale, int32_t *locCache); 278 int32_t caseLocale);
255 279
256 U_CAPI int32_t U_EXPORT2 280 U_CAPI int32_t U_EXPORT2
257 ucase_toFullUpper(const UCaseProps *csp, UChar32 c, 281 ucase_toFullUpper(UChar32 c,
258 UCaseContextIterator *iter, void *context, 282 UCaseContextIterator *iter, void *context,
259 const UChar **pString, 283 const UChar **pString,
260 const char *locale, int32_t *locCache); 284 int32_t caseLocale);
261 285
262 U_CAPI int32_t U_EXPORT2 286 U_CAPI int32_t U_EXPORT2
263 ucase_toFullTitle(const UCaseProps *csp, UChar32 c, 287 ucase_toFullTitle(UChar32 c,
264 UCaseContextIterator *iter, void *context, 288 UCaseContextIterator *iter, void *context,
265 const UChar **pString, 289 const UChar **pString,
266 const char *locale, int32_t *locCache); 290 int32_t caseLocale);
267 291
268 U_CAPI int32_t U_EXPORT2 292 U_CAPI int32_t U_EXPORT2
269 ucase_toFullFolding(const UCaseProps *csp, UChar32 c, 293 ucase_toFullFolding(UChar32 c,
270 const UChar **pString, 294 const UChar **pString,
271 uint32_t options); 295 uint32_t options);
272 296
273 U_CFUNC int32_t U_EXPORT2 297 U_CFUNC int32_t U_EXPORT2
274 ucase_hasBinaryProperty(UChar32 c, UProperty which); 298 ucase_hasBinaryProperty(UChar32 c, UProperty which);
278 302
279 /** 303 /**
280 * @internal 304 * @internal
281 */ 305 */
282 typedef int32_t U_CALLCONV 306 typedef int32_t U_CALLCONV
283 UCaseMapFull(const UCaseProps *csp, UChar32 c, 307 UCaseMapFull(UChar32 c,
284 UCaseContextIterator *iter, void *context, 308 UCaseContextIterator *iter, void *context,
285 const UChar **pString, 309 const UChar **pString,
286 const char *locale, int32_t *locCache); 310 int32_t caseLocale);
287 311
288 U_CDECL_END 312 U_CDECL_END
289 313
290 /* file definitions --------------------------------------------------------- */ 314 /* file definitions --------------------------------------------------------- */
291 315
310 UCASE_IX_TOP=16 334 UCASE_IX_TOP=16
311 }; 335 };
312 336
313 /* definitions for 16-bit case properties word ------------------------------ */ 337 /* definitions for 16-bit case properties word ------------------------------ */
314 338
339 U_CFUNC const UTrie2 * U_EXPORT2
340 ucase_getTrie();
341
315 /* 2-bit constants for types of cased characters */ 342 /* 2-bit constants for types of cased characters */
316 #define UCASE_TYPE_MASK 3 343 #define UCASE_TYPE_MASK 3
317 enum { 344 enum {
318 UCASE_NONE, 345 UCASE_NONE,
319 UCASE_LOWER, 346 UCASE_LOWER,
322 }; 349 };
323 350
324 #define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) 351 #define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK)
325 #define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) 352 #define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7)
326 353
354 #define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2)
355
327 #define UCASE_IGNORABLE 4 356 #define UCASE_IGNORABLE 4
328 #define UCASE_SENSITIVE 8 357 #define UCASE_EXCEPTION 8
329 #define UCASE_EXCEPTION 0x10 358 #define UCASE_SENSITIVE 0x10
359
360 #define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION)
330 361
331 #define UCASE_DOT_MASK 0x60 362 #define UCASE_DOT_MASK 0x60
332 enum { 363 enum {
333 UCASE_NO_DOT=0, /* normal characters with cc=0 */ 364 UCASE_NO_DOT=0, /* normal characters with cc=0 */
334 UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */ 365 UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */
346 # define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT) 377 # define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT)
347 #else 378 #else
348 # define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT)) 379 # define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT))
349 #endif 380 #endif
350 381
351 /* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */ 382 /* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */
352 #define UCASE_EXC_SHIFT 5 383 #define UCASE_EXC_SHIFT 4
353 #define UCASE_EXC_MASK 0xffe0 384 #define UCASE_EXC_MASK 0xfff0
354 #define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1) 385 #define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1)
355 386
356 /* definitions for 16-bit main exceptions word ------------------------------ */ 387 /* definitions for 16-bit main exceptions word ------------------------------ */
357 388
358 /* first 8 bits indicate values in optional slots */ 389 /* first 8 bits indicate values in optional slots */
359 enum { 390 enum {
360 UCASE_EXC_LOWER, 391 UCASE_EXC_LOWER,
361 UCASE_EXC_FOLD, 392 UCASE_EXC_FOLD,
362 UCASE_EXC_UPPER, 393 UCASE_EXC_UPPER,
363 UCASE_EXC_TITLE, 394 UCASE_EXC_TITLE,
364 UCASE_EXC_4, /* reserved */ 395 UCASE_EXC_DELTA,
365 UCASE_EXC_5, /* reserved */ 396 UCASE_EXC_5, /* reserved */
366 UCASE_EXC_CLOSURE, 397 UCASE_EXC_CLOSURE,
367 UCASE_EXC_FULL_MAPPINGS, 398 UCASE_EXC_FULL_MAPPINGS,
368 UCASE_EXC_ALL_SLOTS /* one past the last slot */ 399 UCASE_EXC_ALL_SLOTS /* one past the last slot */
369 }; 400 };
370 401
371 /* each slot is 2 uint16_t instead of 1 */ 402 /* each slot is 2 uint16_t instead of 1 */
372 #define UCASE_EXC_DOUBLE_SLOTS 0x100 403 #define UCASE_EXC_DOUBLE_SLOTS 0x100
373 404
374 /* reserved: exception bits 11..9 */ 405 enum {
406 UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200,
407 UCASE_EXC_DELTA_IS_NEGATIVE=0x400,
408 UCASE_EXC_SENSITIVE=0x800
409 };
375 410
376 /* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */ 411 /* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */
377 #define UCASE_EXC_DOT_SHIFT 7 412 #define UCASE_EXC_DOT_SHIFT 7
378 413
379 /* normally stored in the main word, but pushed out for larger exception indexes */ 414 /* normally stored in the main word, but pushed out for larger exception indexes */