comparison modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/i18n/ucoleitr.cpp @ 11038:20a8447c71c6

8207159: Update ICU to version 62.1 Reviewed-by: mbilla, kcr, ghb
author arajkumar
date Fri, 24 Aug 2018 15:06:40 +0530
parents fee4ef5c87df
children
comparison
equal deleted inserted replaced
0:24aa240622b6 1:9b9129e445dc
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
1 /* 3 /*
2 ****************************************************************************** 4 ******************************************************************************
3 * Copyright (C) 2001-2011, International Business Machines 5 * Copyright (C) 2001-2016, International Business Machines
4 * Corporation and others. All Rights Reserved. 6 * Corporation and others. All Rights Reserved.
5 ****************************************************************************** 7 ******************************************************************************
6 * 8 *
7 * File ucoleitr.cpp 9 * File ucoleitr.cpp
8 * 10 *
9 * Modification History: 11 * Modification History:
10 * 12 *
11 * Date Name Description 13 * Date Name Description
12 * 02/15/2001 synwee Modified all methods to process its own function 14 * 02/15/2001 synwee Modified all methods to process its own function
13 * instead of calling the equivalent c++ api (coleitr.h) 15 * instead of calling the equivalent c++ api (coleitr.h)
16 * 2012-2014 markus Rewritten in C++ again.
14 ******************************************************************************/ 17 ******************************************************************************/
15 18
16 #include "unicode/utypes.h" 19 #include "unicode/utypes.h"
17 20
18 #if !UCONFIG_NO_COLLATION 21 #if !UCONFIG_NO_COLLATION
19 22
23 #include "unicode/coleitr.h"
24 #include "unicode/tblcoll.h"
20 #include "unicode/ucoleitr.h" 25 #include "unicode/ucoleitr.h"
21 #include "unicode/ustring.h" 26 #include "unicode/ustring.h"
22 #include "unicode/sortkey.h" 27 #include "unicode/sortkey.h"
23 #include "unicode/uobject.h" 28 #include "unicode/uobject.h"
24 #include "ucol_imp.h"
25 #include "cmemory.h" 29 #include "cmemory.h"
30 #include "usrchimp.h"
26 31
27 U_NAMESPACE_USE 32 U_NAMESPACE_USE
28 33
29 #define BUFFER_LENGTH 100 34 #define BUFFER_LENGTH 100
30 35
31 #define DEFAULT_BUFFER_SIZE 16 36 #define DEFAULT_BUFFER_SIZE 16
32 #define BUFFER_GROW 8 37 #define BUFFER_GROW 8
33 38
34 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) 39 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
35 40
36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0]) 41 #define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
37
38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
39
40 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
41 42
42 #define DELETE_ARRAY(array) uprv_free((void *) (array)) 43 #define DELETE_ARRAY(array) uprv_free((void *) (array))
43
44 typedef struct icu::collIterate collIterator;
45 44
46 struct RCEI 45 struct RCEI
47 { 46 {
48 uint32_t ce; 47 uint32_t ce;
49 int32_t low; 48 int32_t low;
60 int32_t bufferSize; 59 int32_t bufferSize;
61 60
62 RCEBuffer(); 61 RCEBuffer();
63 ~RCEBuffer(); 62 ~RCEBuffer();
64 63
65 UBool empty() const; 64 UBool isEmpty() const;
66 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh); 65 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode);
67 const RCEI *get(); 66 const RCEI *get();
68 }; 67 };
69 68
70 RCEBuffer::RCEBuffer() 69 RCEBuffer::RCEBuffer()
71 { 70 {
72 buffer = defaultBuffer; 71 buffer = defaultBuffer;
73 bufferIndex = 0; 72 bufferIndex = 0;
74 bufferSize = DEFAULT_BUFFER_SIZE; 73 bufferSize = UPRV_LENGTHOF(defaultBuffer);
75 } 74 }
76 75
77 RCEBuffer::~RCEBuffer() 76 RCEBuffer::~RCEBuffer()
78 { 77 {
79 if (buffer != defaultBuffer) { 78 if (buffer != defaultBuffer) {
80 DELETE_ARRAY(buffer); 79 DELETE_ARRAY(buffer);
81 } 80 }
82 } 81 }
83 82
84 UBool RCEBuffer::empty() const 83 UBool RCEBuffer::isEmpty() const
85 { 84 {
86 return bufferIndex <= 0; 85 return bufferIndex <= 0;
87 } 86 }
88 87
89 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh) 88 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
90 { 89 {
90 if (U_FAILURE(errorCode)) {
91 return;
92 }
91 if (bufferIndex >= bufferSize) { 93 if (bufferIndex >= bufferSize) {
92 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW); 94 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
95 if (newBuffer == NULL) {
96 errorCode = U_MEMORY_ALLOCATION_ERROR;
97 return;
98 }
93 99
94 ARRAY_COPY(newBuffer, buffer, bufferSize); 100 ARRAY_COPY(newBuffer, buffer, bufferSize);
95 101
96 if (buffer != defaultBuffer) { 102 if (buffer != defaultBuffer) {
97 DELETE_ARRAY(buffer); 103 DELETE_ARRAY(buffer);
115 } 121 }
116 122
117 return NULL; 123 return NULL;
118 } 124 }
119 125
120 struct PCEI
121 {
122 uint64_t ce;
123 int32_t low;
124 int32_t high;
125 };
126
127 struct PCEBuffer
128 {
129 PCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
130 PCEI *buffer;
131 int32_t bufferIndex;
132 int32_t bufferSize;
133
134 PCEBuffer();
135 ~PCEBuffer();
136
137 void reset();
138 UBool empty() const;
139 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
140 const PCEI *get();
141 };
142
143 PCEBuffer::PCEBuffer() 126 PCEBuffer::PCEBuffer()
144 { 127 {
145 buffer = defaultBuffer; 128 buffer = defaultBuffer;
146 bufferIndex = 0; 129 bufferIndex = 0;
147 bufferSize = DEFAULT_BUFFER_SIZE; 130 bufferSize = UPRV_LENGTHOF(defaultBuffer);
148 } 131 }
149 132
150 PCEBuffer::~PCEBuffer() 133 PCEBuffer::~PCEBuffer()
151 { 134 {
152 if (buffer != defaultBuffer) { 135 if (buffer != defaultBuffer) {
157 void PCEBuffer::reset() 140 void PCEBuffer::reset()
158 { 141 {
159 bufferIndex = 0; 142 bufferIndex = 0;
160 } 143 }
161 144
162 UBool PCEBuffer::empty() const 145 UBool PCEBuffer::isEmpty() const
163 { 146 {
164 return bufferIndex <= 0; 147 return bufferIndex <= 0;
165 } 148 }
166 149
167 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh) 150 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode)
168 { 151 {
152 if (U_FAILURE(errorCode)) {
153 return;
154 }
169 if (bufferIndex >= bufferSize) { 155 if (bufferIndex >= bufferSize) {
170 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW); 156 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
157 if (newBuffer == NULL) {
158 errorCode = U_MEMORY_ALLOCATION_ERROR;
159 return;
160 }
171 161
172 ARRAY_COPY(newBuffer, buffer, bufferSize); 162 ARRAY_COPY(newBuffer, buffer, bufferSize);
173 163
174 if (buffer != defaultBuffer) { 164 if (buffer != defaultBuffer) {
175 DELETE_ARRAY(buffer); 165 DELETE_ARRAY(buffer);
193 } 183 }
194 184
195 return NULL; 185 return NULL;
196 } 186 }
197 187
198 /* 188 UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); }
199 * This inherits from UObject so that 189
200 * it can be allocated by new and the 190 UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); }
201 * constructor for PCEBuffer is called. 191
202 */ 192 void UCollationPCE::init(UCollationElements *elems) {
203 struct UCollationPCE : public UObject 193 init(CollationElementIterator::fromUCollationElements(elems));
204 { 194 }
205 PCEBuffer pceBuffer; 195
206 UCollationStrength strength; 196 void UCollationPCE::init(CollationElementIterator *iter)
207 UBool toShift; 197 {
208 UBool isShifted; 198 cei = iter;
209 uint32_t variableTop; 199 init(*iter->rbc_);
210 200 }
211 UCollationPCE(UCollationElements *elems); 201
212 ~UCollationPCE(); 202 void UCollationPCE::init(const Collator &coll)
213
214 void init(const UCollator *coll);
215
216 virtual UClassID getDynamicClassID() const;
217 static UClassID getStaticClassID();
218 };
219
220 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
221
222 UCollationPCE::UCollationPCE(UCollationElements *elems)
223 {
224 init(elems->iteratordata_.coll);
225 }
226
227 void UCollationPCE::init(const UCollator *coll)
228 { 203 {
229 UErrorCode status = U_ZERO_ERROR; 204 UErrorCode status = U_ZERO_ERROR;
230 205
231 strength = ucol_getStrength(coll); 206 strength = coll.getAttribute(UCOL_STRENGTH, status);
232 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED; 207 toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
233 isShifted = FALSE; 208 isShifted = FALSE;
234 variableTop = coll->variableTopValue << 16; 209 variableTop = coll.getVariableTop(status);
235 } 210 }
236 211
237 UCollationPCE::~UCollationPCE() 212 UCollationPCE::~UCollationPCE()
238 { 213 {
239 // nothing to do 214 // nothing to do
240 } 215 }
241 216
242 217 uint64_t UCollationPCE::processCE(uint32_t ce)
243 U_NAMESPACE_END
244
245
246 inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
247 { 218 {
248 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; 219 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
249 220
250 // This is clean, but somewhat slow... 221 // This is clean, but somewhat slow...
251 // We could apply the mask to ce and then 222 // We could apply the mask to ce and then
252 // just get all three orders... 223 // just get all three orders...
253 switch(elems->pce->strength) { 224 switch(strength) {
254 default: 225 default:
255 tertiary = ucol_tertiaryOrder(ce); 226 tertiary = ucol_tertiaryOrder(ce);
256 /* note fall-through */ 227 U_FALLTHROUGH;
257 228
258 case UCOL_SECONDARY: 229 case UCOL_SECONDARY:
259 secondary = ucol_secondaryOrder(ce); 230 secondary = ucol_secondaryOrder(ce);
260 /* note fall-through */ 231 U_FALLTHROUGH;
261 232
262 case UCOL_PRIMARY: 233 case UCOL_PRIMARY:
263 primary = ucol_primaryOrder(ce); 234 primary = ucol_primaryOrder(ce);
264 } 235 }
265 236
269 // **** So we can lay out the 64 bits as: 24.12.12.16. **** 240 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
270 // **** Another complication with continuations is that **** 241 // **** Another complication with continuations is that ****
271 // **** the *second* CE is marked as a continuation, so **** 242 // **** the *second* CE is marked as a continuation, so ****
272 // **** we always have to peek ahead to know how long **** 243 // **** we always have to peek ahead to know how long ****
273 // **** the primary is... **** 244 // **** the primary is... ****
274 if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0) 245 if ((toShift && variableTop > ce && primary != 0)
275 || (elems->pce->isShifted && primary == 0)) { 246 || (isShifted && primary == 0)) {
276 247
277 if (primary == 0) { 248 if (primary == 0) {
278 return UCOL_IGNORABLE; 249 return UCOL_IGNORABLE;
279 } 250 }
280 251
281 if (elems->pce->strength >= UCOL_QUATERNARY) { 252 if (strength >= UCOL_QUATERNARY) {
282 quaternary = primary; 253 quaternary = primary;
283 } 254 }
284 255
285 primary = secondary = tertiary = 0; 256 primary = secondary = tertiary = 0;
286 elems->pce->isShifted = TRUE; 257 isShifted = TRUE;
287 } else { 258 } else {
288 if (elems->pce->strength >= UCOL_QUATERNARY) { 259 if (strength >= UCOL_QUATERNARY) {
289 quaternary = 0xFFFF; 260 quaternary = 0xFFFF;
290 } 261 }
291 262
292 elems->pce->isShifted = FALSE; 263 isShifted = FALSE;
293 } 264 }
294 265
295 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; 266 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
296 } 267 }
297 268
298 U_CAPI void U_EXPORT2 269 U_NAMESPACE_END
299 uprv_init_pce(const UCollationElements *elems)
300 {
301 if (elems->pce != NULL) {
302 elems->pce->init(elems->iteratordata_.coll);
303 }
304 }
305
306
307 270
308 /* public methods ---------------------------------------------------- */ 271 /* public methods ---------------------------------------------------- */
309 272
310 U_CAPI UCollationElements* U_EXPORT2 273 U_CAPI UCollationElements* U_EXPORT2
311 ucol_openElements(const UCollator *coll, 274 ucol_openElements(const UCollator *coll,
314 UErrorCode *status) 277 UErrorCode *status)
315 { 278 {
316 if (U_FAILURE(*status)) { 279 if (U_FAILURE(*status)) {
317 return NULL; 280 return NULL;
318 } 281 }
319 282 if (coll == NULL || (text == NULL && textLength != 0)) {
320 UCollationElements *result = new UCollationElements; 283 *status = U_ILLEGAL_ARGUMENT_ERROR;
321 if (result == NULL) { 284 return NULL;
285 }
286 const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
287 if (rbc == NULL) {
288 *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBasedCollator
289 return NULL;
290 }
291
292 UnicodeString s((UBool)(textLength < 0), text, textLength);
293 CollationElementIterator *cei = rbc->createCollationElementIterator(s);
294 if (cei == NULL) {
322 *status = U_MEMORY_ALLOCATION_ERROR; 295 *status = U_MEMORY_ALLOCATION_ERROR;
323 return NULL; 296 return NULL;
324 } 297 }
325 298
326 result->reset_ = TRUE; 299 return cei->toUCollationElements();
327 result->isWritable = FALSE;
328 result->pce = NULL;
329
330 if (text == NULL) {
331 textLength = 0;
332 }
333 uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);
334
335 return result;
336 } 300 }
337 301
338 302
339 U_CAPI void U_EXPORT2 303 U_CAPI void U_EXPORT2
340 ucol_closeElements(UCollationElements *elems) 304 ucol_closeElements(UCollationElements *elems)
341 { 305 {
342 if (elems != NULL) { 306 delete CollationElementIterator::fromUCollationElements(elems);
343 collIterate *ci = &elems->iteratordata_;
344
345 if (ci->extendCEs) {
346 uprv_free(ci->extendCEs);
347 }
348
349 if (ci->offsetBuffer) {
350 uprv_free(ci->offsetBuffer);
351 }
352
353 if (elems->isWritable && elems->iteratordata_.string != NULL)
354 {
355 uprv_free((UChar *)elems->iteratordata_.string);
356 }
357
358 if (elems->pce != NULL) {
359 delete elems->pce;
360 }
361
362 delete elems;
363 }
364 } 307 }
365 308
366 U_CAPI void U_EXPORT2 309 U_CAPI void U_EXPORT2
367 ucol_reset(UCollationElements *elems) 310 ucol_reset(UCollationElements *elems)
368 { 311 {
369 collIterate *ci = &(elems->iteratordata_); 312 CollationElementIterator::fromUCollationElements(elems)->reset();
370 elems->reset_ = TRUE;
371 ci->pos = ci->string;
372 if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
373 ci->endp = ci->string + u_strlen(ci->string);
374 }
375 ci->CEpos = ci->toReturn = ci->CEs;
376 ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
377 if (ci->coll->normalizationMode == UCOL_ON) {
378 ci->flags |= UCOL_ITER_NORM;
379 }
380
381 ci->writableBuffer.remove();
382 ci->fcdPosition = NULL;
383
384 //ci->offsetReturn = ci->offsetStore = NULL;
385 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
386 }
387
388 U_CAPI void U_EXPORT2
389 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
390 {
391 if (U_FAILURE(*status)) {
392 return;
393 }
394
395 if (elems == NULL) {
396 *status = U_ILLEGAL_ARGUMENT_ERROR;
397 return;
398 }
399
400 elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
401 } 313 }
402 314
403 U_CAPI int32_t U_EXPORT2 315 U_CAPI int32_t U_EXPORT2
404 ucol_next(UCollationElements *elems, 316 ucol_next(UCollationElements *elems,
405 UErrorCode *status) 317 UErrorCode *status)
406 { 318 {
407 int32_t result;
408 if (U_FAILURE(*status)) { 319 if (U_FAILURE(*status)) {
409 return UCOL_NULLORDER; 320 return UCOL_NULLORDER;
410 } 321 }
411 322
412 elems->reset_ = FALSE; 323 return CollationElementIterator::fromUCollationElements(elems)->next(*status);
413 324 }
414 result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll, 325
415 &elems->iteratordata_, 326 U_NAMESPACE_BEGIN
416 status); 327
417 328 int64_t
418 if (result == UCOL_NO_MORE_CES) { 329 UCollationPCE::nextProcessed(
419 result = UCOL_NULLORDER;
420 }
421 return result;
422 }
423
424 U_CAPI int64_t U_EXPORT2
425 ucol_nextProcessed(UCollationElements *elems,
426 int32_t *ixLow, 330 int32_t *ixLow,
427 int32_t *ixHigh, 331 int32_t *ixHigh,
428 UErrorCode *status) 332 UErrorCode *status)
429 { 333 {
430 const UCollator *coll = elems->iteratordata_.coll;
431 int64_t result = UCOL_IGNORABLE; 334 int64_t result = UCOL_IGNORABLE;
432 uint32_t low = 0, high = 0; 335 uint32_t low = 0, high = 0;
433 336
434 if (U_FAILURE(*status)) { 337 if (U_FAILURE(*status)) {
435 return UCOL_PROCESSED_NULLORDER; 338 return UCOL_PROCESSED_NULLORDER;
436 } 339 }
437 340
438 if (elems->pce == NULL) { 341 pceBuffer.reset();
439 elems->pce = new UCollationPCE(elems);
440 } else {
441 elems->pce->pceBuffer.reset();
442 }
443
444 elems->reset_ = FALSE;
445 342
446 do { 343 do {
447 low = ucol_getOffset(elems); 344 low = cei->getOffset();
448 uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status); 345 int32_t ce = cei->next(*status);
449 high = ucol_getOffset(elems); 346 high = cei->getOffset();
450 347
451 if (ce == UCOL_NO_MORE_CES) { 348 if (ce == UCOL_NULLORDER) {
452 result = UCOL_PROCESSED_NULLORDER; 349 result = UCOL_PROCESSED_NULLORDER;
453 break; 350 break;
454 } 351 }
455 352
456 result = processCE(elems, ce); 353 result = processCE((uint32_t)ce);
457 } while (result == UCOL_IGNORABLE); 354 } while (result == UCOL_IGNORABLE);
458 355
459 if (ixLow != NULL) { 356 if (ixLow != NULL) {
460 *ixLow = low; 357 *ixLow = low;
461 } 358 }
465 } 362 }
466 363
467 return result; 364 return result;
468 } 365 }
469 366
367 U_NAMESPACE_END
368
470 U_CAPI int32_t U_EXPORT2 369 U_CAPI int32_t U_EXPORT2
471 ucol_previous(UCollationElements *elems, 370 ucol_previous(UCollationElements *elems,
472 UErrorCode *status) 371 UErrorCode *status)
473 { 372 {
474 if(U_FAILURE(*status)) { 373 if(U_FAILURE(*status)) {
475 return UCOL_NULLORDER; 374 return UCOL_NULLORDER;
476 } 375 }
477 else 376 return CollationElementIterator::fromUCollationElements(elems)->previous(*status);
478 { 377 }
479 int32_t result; 378
480 379 U_NAMESPACE_BEGIN
481 if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) { 380
482 if (elems->iteratordata_.endp == NULL) { 381 int64_t
483 elems->iteratordata_.endp = elems->iteratordata_.string + 382 UCollationPCE::previousProcessed(
484 u_strlen(elems->iteratordata_.string);
485 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
486 }
487 elems->iteratordata_.pos = elems->iteratordata_.endp;
488 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
489 }
490
491 elems->reset_ = FALSE;
492
493 result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
494 &(elems->iteratordata_),
495 status);
496
497 if (result == UCOL_NO_MORE_CES) {
498 result = UCOL_NULLORDER;
499 }
500
501 return result;
502 }
503 }
504
505 U_CAPI int64_t U_EXPORT2
506 ucol_previousProcessed(UCollationElements *elems,
507 int32_t *ixLow, 383 int32_t *ixLow,
508 int32_t *ixHigh, 384 int32_t *ixHigh,
509 UErrorCode *status) 385 UErrorCode *status)
510 { 386 {
511 const UCollator *coll = elems->iteratordata_.coll;
512 int64_t result = UCOL_IGNORABLE; 387 int64_t result = UCOL_IGNORABLE;
513 // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
514 // UCollationStrength strength = ucol_getStrength(coll);
515 // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
516 // uint32_t variableTop = coll->variableTopValue;
517 int32_t low = 0, high = 0; 388 int32_t low = 0, high = 0;
518 389
519 if (U_FAILURE(*status)) { 390 if (U_FAILURE(*status)) {
520 return UCOL_PROCESSED_NULLORDER; 391 return UCOL_PROCESSED_NULLORDER;
521 } 392 }
522 393
523 if (elems->reset_ && 394 // pceBuffer.reset();
524 (elems->iteratordata_.pos == elems->iteratordata_.string)) { 395
525 if (elems->iteratordata_.endp == NULL) { 396 while (pceBuffer.isEmpty()) {
526 elems->iteratordata_.endp = elems->iteratordata_.string +
527 u_strlen(elems->iteratordata_.string);
528 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
529 }
530
531 elems->iteratordata_.pos = elems->iteratordata_.endp;
532 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
533 }
534
535 if (elems->pce == NULL) {
536 elems->pce = new UCollationPCE(elems);
537 } else {
538 //elems->pce->pceBuffer.reset();
539 }
540
541 elems->reset_ = FALSE;
542
543 while (elems->pce->pceBuffer.empty()) {
544 // buffer raw CEs up to non-ignorable primary 397 // buffer raw CEs up to non-ignorable primary
545 RCEBuffer rceb; 398 RCEBuffer rceb;
546 uint32_t ce; 399 int32_t ce;
547 400
548 // **** do we need to reset rceb, or will it always be empty at this point **** 401 // **** do we need to reset rceb, or will it always be empty at this point ****
549 do { 402 do {
550 high = ucol_getOffset(elems); 403 high = cei->getOffset();
551 ce = ucol_getPrevCE(coll, &elems->iteratordata_, status); 404 ce = cei->previous(*status);
552 low = ucol_getOffset(elems); 405 low = cei->getOffset();
553 406
554 if (ce == UCOL_NO_MORE_CES) { 407 if (ce == UCOL_NULLORDER) {
555 if (! rceb.empty()) { 408 if (!rceb.isEmpty()) {
556 break; 409 break;
557 } 410 }
558 411
559 goto finish; 412 goto finish;
560 } 413 }
561 414
562 rceb.put(ce, low, high); 415 rceb.put((uint32_t)ce, low, high, *status);
563 } while ((ce & UCOL_PRIMARYMASK) == 0); 416 } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce)));
564 417
565 // process the raw CEs 418 // process the raw CEs
566 while (! rceb.empty()) { 419 while (U_SUCCESS(*status) && !rceb.isEmpty()) {
567 const RCEI *rcei = rceb.get(); 420 const RCEI *rcei = rceb.get();
568 421
569 result = processCE(elems, rcei->ce); 422 result = processCE(rcei->ce);
570 423
571 if (result != UCOL_IGNORABLE) { 424 if (result != UCOL_IGNORABLE) {
572 elems->pce->pceBuffer.put(result, rcei->low, rcei->high); 425 pceBuffer.put(result, rcei->low, rcei->high, *status);
573 } 426 }
574 } 427 }
428 if (U_FAILURE(*status)) {
429 return UCOL_PROCESSED_NULLORDER;
430 }
575 } 431 }
576 432
577 finish: 433 finish:
578 if (elems->pce->pceBuffer.empty()) { 434 if (pceBuffer.isEmpty()) {
579 // **** Is -1 the right value for ixLow, ixHigh? **** 435 // **** Is -1 the right value for ixLow, ixHigh? ****
580 if (ixLow != NULL) { 436 if (ixLow != NULL) {
581 *ixLow = -1; 437 *ixLow = -1;
582 } 438 }
583 439
586 ; 442 ;
587 } 443 }
588 return UCOL_PROCESSED_NULLORDER; 444 return UCOL_PROCESSED_NULLORDER;
589 } 445 }
590 446
591 const PCEI *pcei = elems->pce->pceBuffer.get(); 447 const PCEI *pcei = pceBuffer.get();
592 448
593 if (ixLow != NULL) { 449 if (ixLow != NULL) {
594 *ixLow = pcei->low; 450 *ixLow = pcei->low;
595 } 451 }
596 452
599 } 455 }
600 456
601 return pcei->ce; 457 return pcei->ce;
602 } 458 }
603 459
460 U_NAMESPACE_END
461
604 U_CAPI int32_t U_EXPORT2 462 U_CAPI int32_t U_EXPORT2
605 ucol_getMaxExpansion(const UCollationElements *elems, 463 ucol_getMaxExpansion(const UCollationElements *elems,
606 int32_t order) 464 int32_t order)
607 { 465 {
608 uint8_t result; 466 return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order);
609 467
610 #if 0 468 // TODO: The old code masked the order according to strength and then did a binary search.
611 UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result); 469 // However this was probably at least partially broken because of the following comment.
612 #else 470 // Still, it might have found a match when this version may not.
613 const UCollator *coll = elems->iteratordata_.coll;
614 const uint32_t *start;
615 const uint32_t *limit;
616 const uint32_t *mid;
617 uint32_t strengthMask = 0;
618 uint32_t mOrder = (uint32_t) order;
619
620 switch (coll->strength)
621 {
622 default:
623 strengthMask |= UCOL_TERTIARYORDERMASK;
624 /* fall through */
625
626 case UCOL_SECONDARY:
627 strengthMask |= UCOL_SECONDARYORDERMASK;
628 /* fall through */
629
630 case UCOL_PRIMARY:
631 strengthMask |= UCOL_PRIMARYORDERMASK;
632 }
633
634 mOrder &= strengthMask;
635 start = (coll)->endExpansionCE;
636 limit = (coll)->lastEndExpansionCE;
637
638 while (start < limit - 1) {
639 mid = start + ((limit - start) >> 1);
640 if (mOrder <= (*mid & strengthMask)) {
641 limit = mid;
642 } else {
643 start = mid;
644 }
645 }
646 471
647 // FIXME: with a masked search, there might be more than one hit, 472 // FIXME: with a masked search, there might be more than one hit,
648 // so we need to look forward and backward from the match to find all 473 // so we need to look forward and backward from the match to find all
649 // of the hits... 474 // of the hits...
650 if ((*start & strengthMask) == mOrder) {
651 result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
652 } else if ((*limit & strengthMask) == mOrder) {
653 result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
654 } else if ((mOrder & 0xFFFF) == 0x00C0) {
655 result = 2;
656 } else {
657 result = 1;
658 }
659 #endif
660
661 return result;
662 } 475 }
663 476
664 U_CAPI void U_EXPORT2 477 U_CAPI void U_EXPORT2
665 ucol_setText( UCollationElements *elems, 478 ucol_setText( UCollationElements *elems,
666 const UChar *text, 479 const UChar *text,
669 { 482 {
670 if (U_FAILURE(*status)) { 483 if (U_FAILURE(*status)) {
671 return; 484 return;
672 } 485 }
673 486
674 if (elems->isWritable && elems->iteratordata_.string != NULL) 487 if ((text == NULL && textLength != 0)) {
675 { 488 *status = U_ILLEGAL_ARGUMENT_ERROR;
676 uprv_free((UChar *)elems->iteratordata_.string); 489 return;
677 } 490 }
678 491 UnicodeString s((UBool)(textLength < 0), text, textLength);
679 if (text == NULL) { 492 return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status);
680 textLength = 0;
681 }
682
683 elems->isWritable = FALSE;
684
685 /* free offset buffer to avoid memory leak before initializing. */
686 ucol_freeOffsetBuffer(&(elems->iteratordata_));
687 /* Ensure that previously allocated extendCEs is freed before setting to NULL. */
688 if (elems->iteratordata_.extendCEs != NULL) {
689 uprv_free(elems->iteratordata_.extendCEs);
690 }
691 uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
692 &elems->iteratordata_, status);
693
694 elems->reset_ = TRUE;
695 } 493 }
696 494
697 U_CAPI int32_t U_EXPORT2 495 U_CAPI int32_t U_EXPORT2
698 ucol_getOffset(const UCollationElements *elems) 496 ucol_getOffset(const UCollationElements *elems)
699 { 497 {
700 const collIterate *ci = &(elems->iteratordata_); 498 return CollationElementIterator::fromUCollationElements(elems)->getOffset();
701
702 if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
703 return ci->offsetRepeatValue;
704 }
705
706 if (ci->offsetReturn != NULL) {
707 return *ci->offsetReturn;
708 }
709
710 // while processing characters in normalization buffer getOffset will
711 // return the next non-normalized character.
712 // should be inline with the old implementation since the old codes uses
713 // nextDecomp in normalizer which also decomposes the string till the
714 // first base character is found.
715 if (ci->flags & UCOL_ITER_INNORMBUF) {
716 if (ci->fcdPosition == NULL) {
717 return 0;
718 }
719 return (int32_t)(ci->fcdPosition - ci->string);
720 }
721 else {
722 return (int32_t)(ci->pos - ci->string);
723 }
724 } 499 }
725 500
726 U_CAPI void U_EXPORT2 501 U_CAPI void U_EXPORT2
727 ucol_setOffset(UCollationElements *elems, 502 ucol_setOffset(UCollationElements *elems,
728 int32_t offset, 503 int32_t offset,
730 { 505 {
731 if (U_FAILURE(*status)) { 506 if (U_FAILURE(*status)) {
732 return; 507 return;
733 } 508 }
734 509
735 // this methods will clean up any use of the writable buffer and points to 510 CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status);
736 // the original string
737 collIterate *ci = &(elems->iteratordata_);
738 ci->pos = ci->string + offset;
739 ci->CEpos = ci->toReturn = ci->CEs;
740 if (ci->flags & UCOL_ITER_INNORMBUF) {
741 ci->flags = ci->origFlags;
742 }
743 if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
744 ci->endp = ci->string + u_strlen(ci->string);
745 ci->flags |= UCOL_ITER_HASLEN;
746 }
747 ci->fcdPosition = NULL;
748 elems->reset_ = FALSE;
749
750 ci->offsetReturn = NULL;
751 ci->offsetStore = ci->offsetBuffer;
752 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
753 } 511 }
754 512
755 U_CAPI int32_t U_EXPORT2 513 U_CAPI int32_t U_EXPORT2
756 ucol_primaryOrder (int32_t order) 514 ucol_primaryOrder (int32_t order)
757 { 515 {
758 order &= UCOL_PRIMARYMASK; 516 return (order >> 16) & 0xffff;
759 return (order >> UCOL_PRIMARYORDERSHIFT);
760 } 517 }
761 518
762 U_CAPI int32_t U_EXPORT2 519 U_CAPI int32_t U_EXPORT2
763 ucol_secondaryOrder (int32_t order) 520 ucol_secondaryOrder (int32_t order)
764 { 521 {
765 order &= UCOL_SECONDARYMASK; 522 return (order >> 8) & 0xff;
766 return (order >> UCOL_SECONDARYORDERSHIFT);
767 } 523 }
768 524
769 U_CAPI int32_t U_EXPORT2 525 U_CAPI int32_t U_EXPORT2
770 ucol_tertiaryOrder (int32_t order) 526 ucol_tertiaryOrder (int32_t order)
771 { 527 {
772 return (order & UCOL_TERTIARYMASK); 528 return order & 0xff;
773 }
774
775
776 void ucol_freeOffsetBuffer(collIterate *s) {
777 if (s != NULL && s->offsetBuffer != NULL) {
778 uprv_free(s->offsetBuffer);
779 s->offsetBuffer = NULL;
780 s->offsetBufferSize = 0;
781 }
782 } 529 }
783 530
784 #endif /* #if !UCONFIG_NO_COLLATION */ 531 #endif /* #if !UCONFIG_NO_COLLATION */