annotate modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/i18n/ucoleitr.cpp @ 10550:fee4ef5c87df

8178360: Build and integrate ICU from source Reviewed-by: kcr, arajkumar
author ghb
date Tue, 04 Jul 2017 09:09:49 +0530
parents
children 20a8447c71c6
rev   line source
ghb@10550 1 /*
ghb@10550 2 ******************************************************************************
ghb@10550 3 * Copyright (C) 2001-2011, International Business Machines
ghb@10550 4 * Corporation and others. All Rights Reserved.
ghb@10550 5 ******************************************************************************
ghb@10550 6 *
ghb@10550 7 * File ucoleitr.cpp
ghb@10550 8 *
ghb@10550 9 * Modification History:
ghb@10550 10 *
ghb@10550 11 * Date Name Description
ghb@10550 12 * 02/15/2001 synwee Modified all methods to process its own function
ghb@10550 13 * instead of calling the equivalent c++ api (coleitr.h)
ghb@10550 14 ******************************************************************************/
ghb@10550 15
ghb@10550 16 #include "unicode/utypes.h"
ghb@10550 17
ghb@10550 18 #if !UCONFIG_NO_COLLATION
ghb@10550 19
ghb@10550 20 #include "unicode/ucoleitr.h"
ghb@10550 21 #include "unicode/ustring.h"
ghb@10550 22 #include "unicode/sortkey.h"
ghb@10550 23 #include "unicode/uobject.h"
ghb@10550 24 #include "ucol_imp.h"
ghb@10550 25 #include "cmemory.h"
ghb@10550 26
ghb@10550 27 U_NAMESPACE_USE
ghb@10550 28
ghb@10550 29 #define BUFFER_LENGTH 100
ghb@10550 30
ghb@10550 31 #define DEFAULT_BUFFER_SIZE 16
ghb@10550 32 #define BUFFER_GROW 8
ghb@10550 33
ghb@10550 34 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
ghb@10550 35
ghb@10550 36 #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
ghb@10550 37
ghb@10550 38 #define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
ghb@10550 39
ghb@10550 40 #define GROW_ARRAY(array, newSize) uprv_realloc((void *) (array), (newSize) * sizeof (array)[0])
ghb@10550 41
ghb@10550 42 #define DELETE_ARRAY(array) uprv_free((void *) (array))
ghb@10550 43
ghb@10550 44 typedef struct icu::collIterate collIterator;
ghb@10550 45
ghb@10550 46 struct RCEI
ghb@10550 47 {
ghb@10550 48 uint32_t ce;
ghb@10550 49 int32_t low;
ghb@10550 50 int32_t high;
ghb@10550 51 };
ghb@10550 52
ghb@10550 53 U_NAMESPACE_BEGIN
ghb@10550 54
ghb@10550 55 struct RCEBuffer
ghb@10550 56 {
ghb@10550 57 RCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
ghb@10550 58 RCEI *buffer;
ghb@10550 59 int32_t bufferIndex;
ghb@10550 60 int32_t bufferSize;
ghb@10550 61
ghb@10550 62 RCEBuffer();
ghb@10550 63 ~RCEBuffer();
ghb@10550 64
ghb@10550 65 UBool empty() const;
ghb@10550 66 void put(uint32_t ce, int32_t ixLow, int32_t ixHigh);
ghb@10550 67 const RCEI *get();
ghb@10550 68 };
ghb@10550 69
ghb@10550 70 RCEBuffer::RCEBuffer()
ghb@10550 71 {
ghb@10550 72 buffer = defaultBuffer;
ghb@10550 73 bufferIndex = 0;
ghb@10550 74 bufferSize = DEFAULT_BUFFER_SIZE;
ghb@10550 75 }
ghb@10550 76
ghb@10550 77 RCEBuffer::~RCEBuffer()
ghb@10550 78 {
ghb@10550 79 if (buffer != defaultBuffer) {
ghb@10550 80 DELETE_ARRAY(buffer);
ghb@10550 81 }
ghb@10550 82 }
ghb@10550 83
ghb@10550 84 UBool RCEBuffer::empty() const
ghb@10550 85 {
ghb@10550 86 return bufferIndex <= 0;
ghb@10550 87 }
ghb@10550 88
ghb@10550 89 void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh)
ghb@10550 90 {
ghb@10550 91 if (bufferIndex >= bufferSize) {
ghb@10550 92 RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW);
ghb@10550 93
ghb@10550 94 ARRAY_COPY(newBuffer, buffer, bufferSize);
ghb@10550 95
ghb@10550 96 if (buffer != defaultBuffer) {
ghb@10550 97 DELETE_ARRAY(buffer);
ghb@10550 98 }
ghb@10550 99
ghb@10550 100 buffer = newBuffer;
ghb@10550 101 bufferSize += BUFFER_GROW;
ghb@10550 102 }
ghb@10550 103
ghb@10550 104 buffer[bufferIndex].ce = ce;
ghb@10550 105 buffer[bufferIndex].low = ixLow;
ghb@10550 106 buffer[bufferIndex].high = ixHigh;
ghb@10550 107
ghb@10550 108 bufferIndex += 1;
ghb@10550 109 }
ghb@10550 110
ghb@10550 111 const RCEI *RCEBuffer::get()
ghb@10550 112 {
ghb@10550 113 if (bufferIndex > 0) {
ghb@10550 114 return &buffer[--bufferIndex];
ghb@10550 115 }
ghb@10550 116
ghb@10550 117 return NULL;
ghb@10550 118 }
ghb@10550 119
ghb@10550 120 struct PCEI
ghb@10550 121 {
ghb@10550 122 uint64_t ce;
ghb@10550 123 int32_t low;
ghb@10550 124 int32_t high;
ghb@10550 125 };
ghb@10550 126
ghb@10550 127 struct PCEBuffer
ghb@10550 128 {
ghb@10550 129 PCEI defaultBuffer[DEFAULT_BUFFER_SIZE];
ghb@10550 130 PCEI *buffer;
ghb@10550 131 int32_t bufferIndex;
ghb@10550 132 int32_t bufferSize;
ghb@10550 133
ghb@10550 134 PCEBuffer();
ghb@10550 135 ~PCEBuffer();
ghb@10550 136
ghb@10550 137 void reset();
ghb@10550 138 UBool empty() const;
ghb@10550 139 void put(uint64_t ce, int32_t ixLow, int32_t ixHigh);
ghb@10550 140 const PCEI *get();
ghb@10550 141 };
ghb@10550 142
ghb@10550 143 PCEBuffer::PCEBuffer()
ghb@10550 144 {
ghb@10550 145 buffer = defaultBuffer;
ghb@10550 146 bufferIndex = 0;
ghb@10550 147 bufferSize = DEFAULT_BUFFER_SIZE;
ghb@10550 148 }
ghb@10550 149
ghb@10550 150 PCEBuffer::~PCEBuffer()
ghb@10550 151 {
ghb@10550 152 if (buffer != defaultBuffer) {
ghb@10550 153 DELETE_ARRAY(buffer);
ghb@10550 154 }
ghb@10550 155 }
ghb@10550 156
ghb@10550 157 void PCEBuffer::reset()
ghb@10550 158 {
ghb@10550 159 bufferIndex = 0;
ghb@10550 160 }
ghb@10550 161
ghb@10550 162 UBool PCEBuffer::empty() const
ghb@10550 163 {
ghb@10550 164 return bufferIndex <= 0;
ghb@10550 165 }
ghb@10550 166
ghb@10550 167 void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh)
ghb@10550 168 {
ghb@10550 169 if (bufferIndex >= bufferSize) {
ghb@10550 170 PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW);
ghb@10550 171
ghb@10550 172 ARRAY_COPY(newBuffer, buffer, bufferSize);
ghb@10550 173
ghb@10550 174 if (buffer != defaultBuffer) {
ghb@10550 175 DELETE_ARRAY(buffer);
ghb@10550 176 }
ghb@10550 177
ghb@10550 178 buffer = newBuffer;
ghb@10550 179 bufferSize += BUFFER_GROW;
ghb@10550 180 }
ghb@10550 181
ghb@10550 182 buffer[bufferIndex].ce = ce;
ghb@10550 183 buffer[bufferIndex].low = ixLow;
ghb@10550 184 buffer[bufferIndex].high = ixHigh;
ghb@10550 185
ghb@10550 186 bufferIndex += 1;
ghb@10550 187 }
ghb@10550 188
ghb@10550 189 const PCEI *PCEBuffer::get()
ghb@10550 190 {
ghb@10550 191 if (bufferIndex > 0) {
ghb@10550 192 return &buffer[--bufferIndex];
ghb@10550 193 }
ghb@10550 194
ghb@10550 195 return NULL;
ghb@10550 196 }
ghb@10550 197
ghb@10550 198 /*
ghb@10550 199 * This inherits from UObject so that
ghb@10550 200 * it can be allocated by new and the
ghb@10550 201 * constructor for PCEBuffer is called.
ghb@10550 202 */
ghb@10550 203 struct UCollationPCE : public UObject
ghb@10550 204 {
ghb@10550 205 PCEBuffer pceBuffer;
ghb@10550 206 UCollationStrength strength;
ghb@10550 207 UBool toShift;
ghb@10550 208 UBool isShifted;
ghb@10550 209 uint32_t variableTop;
ghb@10550 210
ghb@10550 211 UCollationPCE(UCollationElements *elems);
ghb@10550 212 ~UCollationPCE();
ghb@10550 213
ghb@10550 214 void init(const UCollator *coll);
ghb@10550 215
ghb@10550 216 virtual UClassID getDynamicClassID() const;
ghb@10550 217 static UClassID getStaticClassID();
ghb@10550 218 };
ghb@10550 219
ghb@10550 220 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCollationPCE)
ghb@10550 221
ghb@10550 222 UCollationPCE::UCollationPCE(UCollationElements *elems)
ghb@10550 223 {
ghb@10550 224 init(elems->iteratordata_.coll);
ghb@10550 225 }
ghb@10550 226
ghb@10550 227 void UCollationPCE::init(const UCollator *coll)
ghb@10550 228 {
ghb@10550 229 UErrorCode status = U_ZERO_ERROR;
ghb@10550 230
ghb@10550 231 strength = ucol_getStrength(coll);
ghb@10550 232 toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED;
ghb@10550 233 isShifted = FALSE;
ghb@10550 234 variableTop = coll->variableTopValue << 16;
ghb@10550 235 }
ghb@10550 236
ghb@10550 237 UCollationPCE::~UCollationPCE()
ghb@10550 238 {
ghb@10550 239 // nothing to do
ghb@10550 240 }
ghb@10550 241
ghb@10550 242
ghb@10550 243 U_NAMESPACE_END
ghb@10550 244
ghb@10550 245
ghb@10550 246 inline uint64_t processCE(UCollationElements *elems, uint32_t ce)
ghb@10550 247 {
ghb@10550 248 uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
ghb@10550 249
ghb@10550 250 // This is clean, but somewhat slow...
ghb@10550 251 // We could apply the mask to ce and then
ghb@10550 252 // just get all three orders...
ghb@10550 253 switch(elems->pce->strength) {
ghb@10550 254 default:
ghb@10550 255 tertiary = ucol_tertiaryOrder(ce);
ghb@10550 256 /* note fall-through */
ghb@10550 257
ghb@10550 258 case UCOL_SECONDARY:
ghb@10550 259 secondary = ucol_secondaryOrder(ce);
ghb@10550 260 /* note fall-through */
ghb@10550 261
ghb@10550 262 case UCOL_PRIMARY:
ghb@10550 263 primary = ucol_primaryOrder(ce);
ghb@10550 264 }
ghb@10550 265
ghb@10550 266 // **** This should probably handle continuations too. ****
ghb@10550 267 // **** That means that we need 24 bits for the primary ****
ghb@10550 268 // **** instead of the 16 that we're currently using. ****
ghb@10550 269 // **** So we can lay out the 64 bits as: 24.12.12.16. ****
ghb@10550 270 // **** Another complication with continuations is that ****
ghb@10550 271 // **** the *second* CE is marked as a continuation, so ****
ghb@10550 272 // **** we always have to peek ahead to know how long ****
ghb@10550 273 // **** the primary is... ****
ghb@10550 274 if ((elems->pce->toShift && elems->pce->variableTop > ce && primary != 0)
ghb@10550 275 || (elems->pce->isShifted && primary == 0)) {
ghb@10550 276
ghb@10550 277 if (primary == 0) {
ghb@10550 278 return UCOL_IGNORABLE;
ghb@10550 279 }
ghb@10550 280
ghb@10550 281 if (elems->pce->strength >= UCOL_QUATERNARY) {
ghb@10550 282 quaternary = primary;
ghb@10550 283 }
ghb@10550 284
ghb@10550 285 primary = secondary = tertiary = 0;
ghb@10550 286 elems->pce->isShifted = TRUE;
ghb@10550 287 } else {
ghb@10550 288 if (elems->pce->strength >= UCOL_QUATERNARY) {
ghb@10550 289 quaternary = 0xFFFF;
ghb@10550 290 }
ghb@10550 291
ghb@10550 292 elems->pce->isShifted = FALSE;
ghb@10550 293 }
ghb@10550 294
ghb@10550 295 return primary << 48 | secondary << 32 | tertiary << 16 | quaternary;
ghb@10550 296 }
ghb@10550 297
ghb@10550 298 U_CAPI void U_EXPORT2
ghb@10550 299 uprv_init_pce(const UCollationElements *elems)
ghb@10550 300 {
ghb@10550 301 if (elems->pce != NULL) {
ghb@10550 302 elems->pce->init(elems->iteratordata_.coll);
ghb@10550 303 }
ghb@10550 304 }
ghb@10550 305
ghb@10550 306
ghb@10550 307
ghb@10550 308 /* public methods ---------------------------------------------------- */
ghb@10550 309
ghb@10550 310 U_CAPI UCollationElements* U_EXPORT2
ghb@10550 311 ucol_openElements(const UCollator *coll,
ghb@10550 312 const UChar *text,
ghb@10550 313 int32_t textLength,
ghb@10550 314 UErrorCode *status)
ghb@10550 315 {
ghb@10550 316 if (U_FAILURE(*status)) {
ghb@10550 317 return NULL;
ghb@10550 318 }
ghb@10550 319
ghb@10550 320 UCollationElements *result = new UCollationElements;
ghb@10550 321 if (result == NULL) {
ghb@10550 322 *status = U_MEMORY_ALLOCATION_ERROR;
ghb@10550 323 return NULL;
ghb@10550 324 }
ghb@10550 325
ghb@10550 326 result->reset_ = TRUE;
ghb@10550 327 result->isWritable = FALSE;
ghb@10550 328 result->pce = NULL;
ghb@10550 329
ghb@10550 330 if (text == NULL) {
ghb@10550 331 textLength = 0;
ghb@10550 332 }
ghb@10550 333 uprv_init_collIterate(coll, text, textLength, &result->iteratordata_, status);
ghb@10550 334
ghb@10550 335 return result;
ghb@10550 336 }
ghb@10550 337
ghb@10550 338
ghb@10550 339 U_CAPI void U_EXPORT2
ghb@10550 340 ucol_closeElements(UCollationElements *elems)
ghb@10550 341 {
ghb@10550 342 if (elems != NULL) {
ghb@10550 343 collIterate *ci = &elems->iteratordata_;
ghb@10550 344
ghb@10550 345 if (ci->extendCEs) {
ghb@10550 346 uprv_free(ci->extendCEs);
ghb@10550 347 }
ghb@10550 348
ghb@10550 349 if (ci->offsetBuffer) {
ghb@10550 350 uprv_free(ci->offsetBuffer);
ghb@10550 351 }
ghb@10550 352
ghb@10550 353 if (elems->isWritable && elems->iteratordata_.string != NULL)
ghb@10550 354 {
ghb@10550 355 uprv_free((UChar *)elems->iteratordata_.string);
ghb@10550 356 }
ghb@10550 357
ghb@10550 358 if (elems->pce != NULL) {
ghb@10550 359 delete elems->pce;
ghb@10550 360 }
ghb@10550 361
ghb@10550 362 delete elems;
ghb@10550 363 }
ghb@10550 364 }
ghb@10550 365
ghb@10550 366 U_CAPI void U_EXPORT2
ghb@10550 367 ucol_reset(UCollationElements *elems)
ghb@10550 368 {
ghb@10550 369 collIterate *ci = &(elems->iteratordata_);
ghb@10550 370 elems->reset_ = TRUE;
ghb@10550 371 ci->pos = ci->string;
ghb@10550 372 if ((ci->flags & UCOL_ITER_HASLEN) == 0 || ci->endp == NULL) {
ghb@10550 373 ci->endp = ci->string + u_strlen(ci->string);
ghb@10550 374 }
ghb@10550 375 ci->CEpos = ci->toReturn = ci->CEs;
ghb@10550 376 ci->flags = (ci->flags & UCOL_FORCE_HAN_IMPLICIT) | UCOL_ITER_HASLEN;
ghb@10550 377 if (ci->coll->normalizationMode == UCOL_ON) {
ghb@10550 378 ci->flags |= UCOL_ITER_NORM;
ghb@10550 379 }
ghb@10550 380
ghb@10550 381 ci->writableBuffer.remove();
ghb@10550 382 ci->fcdPosition = NULL;
ghb@10550 383
ghb@10550 384 //ci->offsetReturn = ci->offsetStore = NULL;
ghb@10550 385 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
ghb@10550 386 }
ghb@10550 387
ghb@10550 388 U_CAPI void U_EXPORT2
ghb@10550 389 ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status)
ghb@10550 390 {
ghb@10550 391 if (U_FAILURE(*status)) {
ghb@10550 392 return;
ghb@10550 393 }
ghb@10550 394
ghb@10550 395 if (elems == NULL) {
ghb@10550 396 *status = U_ILLEGAL_ARGUMENT_ERROR;
ghb@10550 397 return;
ghb@10550 398 }
ghb@10550 399
ghb@10550 400 elems->iteratordata_.flags |= UCOL_FORCE_HAN_IMPLICIT;
ghb@10550 401 }
ghb@10550 402
ghb@10550 403 U_CAPI int32_t U_EXPORT2
ghb@10550 404 ucol_next(UCollationElements *elems,
ghb@10550 405 UErrorCode *status)
ghb@10550 406 {
ghb@10550 407 int32_t result;
ghb@10550 408 if (U_FAILURE(*status)) {
ghb@10550 409 return UCOL_NULLORDER;
ghb@10550 410 }
ghb@10550 411
ghb@10550 412 elems->reset_ = FALSE;
ghb@10550 413
ghb@10550 414 result = (int32_t)ucol_getNextCE(elems->iteratordata_.coll,
ghb@10550 415 &elems->iteratordata_,
ghb@10550 416 status);
ghb@10550 417
ghb@10550 418 if (result == UCOL_NO_MORE_CES) {
ghb@10550 419 result = UCOL_NULLORDER;
ghb@10550 420 }
ghb@10550 421 return result;
ghb@10550 422 }
ghb@10550 423
ghb@10550 424 U_CAPI int64_t U_EXPORT2
ghb@10550 425 ucol_nextProcessed(UCollationElements *elems,
ghb@10550 426 int32_t *ixLow,
ghb@10550 427 int32_t *ixHigh,
ghb@10550 428 UErrorCode *status)
ghb@10550 429 {
ghb@10550 430 const UCollator *coll = elems->iteratordata_.coll;
ghb@10550 431 int64_t result = UCOL_IGNORABLE;
ghb@10550 432 uint32_t low = 0, high = 0;
ghb@10550 433
ghb@10550 434 if (U_FAILURE(*status)) {
ghb@10550 435 return UCOL_PROCESSED_NULLORDER;
ghb@10550 436 }
ghb@10550 437
ghb@10550 438 if (elems->pce == NULL) {
ghb@10550 439 elems->pce = new UCollationPCE(elems);
ghb@10550 440 } else {
ghb@10550 441 elems->pce->pceBuffer.reset();
ghb@10550 442 }
ghb@10550 443
ghb@10550 444 elems->reset_ = FALSE;
ghb@10550 445
ghb@10550 446 do {
ghb@10550 447 low = ucol_getOffset(elems);
ghb@10550 448 uint32_t ce = (uint32_t) ucol_getNextCE(coll, &elems->iteratordata_, status);
ghb@10550 449 high = ucol_getOffset(elems);
ghb@10550 450
ghb@10550 451 if (ce == UCOL_NO_MORE_CES) {
ghb@10550 452 result = UCOL_PROCESSED_NULLORDER;
ghb@10550 453 break;
ghb@10550 454 }
ghb@10550 455
ghb@10550 456 result = processCE(elems, ce);
ghb@10550 457 } while (result == UCOL_IGNORABLE);
ghb@10550 458
ghb@10550 459 if (ixLow != NULL) {
ghb@10550 460 *ixLow = low;
ghb@10550 461 }
ghb@10550 462
ghb@10550 463 if (ixHigh != NULL) {
ghb@10550 464 *ixHigh = high;
ghb@10550 465 }
ghb@10550 466
ghb@10550 467 return result;
ghb@10550 468 }
ghb@10550 469
ghb@10550 470 U_CAPI int32_t U_EXPORT2
ghb@10550 471 ucol_previous(UCollationElements *elems,
ghb@10550 472 UErrorCode *status)
ghb@10550 473 {
ghb@10550 474 if(U_FAILURE(*status)) {
ghb@10550 475 return UCOL_NULLORDER;
ghb@10550 476 }
ghb@10550 477 else
ghb@10550 478 {
ghb@10550 479 int32_t result;
ghb@10550 480
ghb@10550 481 if (elems->reset_ && (elems->iteratordata_.pos == elems->iteratordata_.string)) {
ghb@10550 482 if (elems->iteratordata_.endp == NULL) {
ghb@10550 483 elems->iteratordata_.endp = elems->iteratordata_.string +
ghb@10550 484 u_strlen(elems->iteratordata_.string);
ghb@10550 485 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
ghb@10550 486 }
ghb@10550 487 elems->iteratordata_.pos = elems->iteratordata_.endp;
ghb@10550 488 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
ghb@10550 489 }
ghb@10550 490
ghb@10550 491 elems->reset_ = FALSE;
ghb@10550 492
ghb@10550 493 result = (int32_t)ucol_getPrevCE(elems->iteratordata_.coll,
ghb@10550 494 &(elems->iteratordata_),
ghb@10550 495 status);
ghb@10550 496
ghb@10550 497 if (result == UCOL_NO_MORE_CES) {
ghb@10550 498 result = UCOL_NULLORDER;
ghb@10550 499 }
ghb@10550 500
ghb@10550 501 return result;
ghb@10550 502 }
ghb@10550 503 }
ghb@10550 504
ghb@10550 505 U_CAPI int64_t U_EXPORT2
ghb@10550 506 ucol_previousProcessed(UCollationElements *elems,
ghb@10550 507 int32_t *ixLow,
ghb@10550 508 int32_t *ixHigh,
ghb@10550 509 UErrorCode *status)
ghb@10550 510 {
ghb@10550 511 const UCollator *coll = elems->iteratordata_.coll;
ghb@10550 512 int64_t result = UCOL_IGNORABLE;
ghb@10550 513 // int64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0;
ghb@10550 514 // UCollationStrength strength = ucol_getStrength(coll);
ghb@10550 515 // UBool toShift = ucol_getAttribute(coll, UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED;
ghb@10550 516 // uint32_t variableTop = coll->variableTopValue;
ghb@10550 517 int32_t low = 0, high = 0;
ghb@10550 518
ghb@10550 519 if (U_FAILURE(*status)) {
ghb@10550 520 return UCOL_PROCESSED_NULLORDER;
ghb@10550 521 }
ghb@10550 522
ghb@10550 523 if (elems->reset_ &&
ghb@10550 524 (elems->iteratordata_.pos == elems->iteratordata_.string)) {
ghb@10550 525 if (elems->iteratordata_.endp == NULL) {
ghb@10550 526 elems->iteratordata_.endp = elems->iteratordata_.string +
ghb@10550 527 u_strlen(elems->iteratordata_.string);
ghb@10550 528 elems->iteratordata_.flags |= UCOL_ITER_HASLEN;
ghb@10550 529 }
ghb@10550 530
ghb@10550 531 elems->iteratordata_.pos = elems->iteratordata_.endp;
ghb@10550 532 elems->iteratordata_.fcdPosition = elems->iteratordata_.endp;
ghb@10550 533 }
ghb@10550 534
ghb@10550 535 if (elems->pce == NULL) {
ghb@10550 536 elems->pce = new UCollationPCE(elems);
ghb@10550 537 } else {
ghb@10550 538 //elems->pce->pceBuffer.reset();
ghb@10550 539 }
ghb@10550 540
ghb@10550 541 elems->reset_ = FALSE;
ghb@10550 542
ghb@10550 543 while (elems->pce->pceBuffer.empty()) {
ghb@10550 544 // buffer raw CEs up to non-ignorable primary
ghb@10550 545 RCEBuffer rceb;
ghb@10550 546 uint32_t ce;
ghb@10550 547
ghb@10550 548 // **** do we need to reset rceb, or will it always be empty at this point ****
ghb@10550 549 do {
ghb@10550 550 high = ucol_getOffset(elems);
ghb@10550 551 ce = ucol_getPrevCE(coll, &elems->iteratordata_, status);
ghb@10550 552 low = ucol_getOffset(elems);
ghb@10550 553
ghb@10550 554 if (ce == UCOL_NO_MORE_CES) {
ghb@10550 555 if (! rceb.empty()) {
ghb@10550 556 break;
ghb@10550 557 }
ghb@10550 558
ghb@10550 559 goto finish;
ghb@10550 560 }
ghb@10550 561
ghb@10550 562 rceb.put(ce, low, high);
ghb@10550 563 } while ((ce & UCOL_PRIMARYMASK) == 0);
ghb@10550 564
ghb@10550 565 // process the raw CEs
ghb@10550 566 while (! rceb.empty()) {
ghb@10550 567 const RCEI *rcei = rceb.get();
ghb@10550 568
ghb@10550 569 result = processCE(elems, rcei->ce);
ghb@10550 570
ghb@10550 571 if (result != UCOL_IGNORABLE) {
ghb@10550 572 elems->pce->pceBuffer.put(result, rcei->low, rcei->high);
ghb@10550 573 }
ghb@10550 574 }
ghb@10550 575 }
ghb@10550 576
ghb@10550 577 finish:
ghb@10550 578 if (elems->pce->pceBuffer.empty()) {
ghb@10550 579 // **** Is -1 the right value for ixLow, ixHigh? ****
ghb@10550 580 if (ixLow != NULL) {
ghb@10550 581 *ixLow = -1;
ghb@10550 582 }
ghb@10550 583
ghb@10550 584 if (ixHigh != NULL) {
ghb@10550 585 *ixHigh = -1
ghb@10550 586 ;
ghb@10550 587 }
ghb@10550 588 return UCOL_PROCESSED_NULLORDER;
ghb@10550 589 }
ghb@10550 590
ghb@10550 591 const PCEI *pcei = elems->pce->pceBuffer.get();
ghb@10550 592
ghb@10550 593 if (ixLow != NULL) {
ghb@10550 594 *ixLow = pcei->low;
ghb@10550 595 }
ghb@10550 596
ghb@10550 597 if (ixHigh != NULL) {
ghb@10550 598 *ixHigh = pcei->high;
ghb@10550 599 }
ghb@10550 600
ghb@10550 601 return pcei->ce;
ghb@10550 602 }
ghb@10550 603
ghb@10550 604 U_CAPI int32_t U_EXPORT2
ghb@10550 605 ucol_getMaxExpansion(const UCollationElements *elems,
ghb@10550 606 int32_t order)
ghb@10550 607 {
ghb@10550 608 uint8_t result;
ghb@10550 609
ghb@10550 610 #if 0
ghb@10550 611 UCOL_GETMAXEXPANSION(elems->iteratordata_.coll, (uint32_t)order, result);
ghb@10550 612 #else
ghb@10550 613 const UCollator *coll = elems->iteratordata_.coll;
ghb@10550 614 const uint32_t *start;
ghb@10550 615 const uint32_t *limit;
ghb@10550 616 const uint32_t *mid;
ghb@10550 617 uint32_t strengthMask = 0;
ghb@10550 618 uint32_t mOrder = (uint32_t) order;
ghb@10550 619
ghb@10550 620 switch (coll->strength)
ghb@10550 621 {
ghb@10550 622 default:
ghb@10550 623 strengthMask |= UCOL_TERTIARYORDERMASK;
ghb@10550 624 /* fall through */
ghb@10550 625
ghb@10550 626 case UCOL_SECONDARY:
ghb@10550 627 strengthMask |= UCOL_SECONDARYORDERMASK;
ghb@10550 628 /* fall through */
ghb@10550 629
ghb@10550 630 case UCOL_PRIMARY:
ghb@10550 631 strengthMask |= UCOL_PRIMARYORDERMASK;
ghb@10550 632 }
ghb@10550 633
ghb@10550 634 mOrder &= strengthMask;
ghb@10550 635 start = (coll)->endExpansionCE;
ghb@10550 636 limit = (coll)->lastEndExpansionCE;
ghb@10550 637
ghb@10550 638 while (start < limit - 1) {
ghb@10550 639 mid = start + ((limit - start) >> 1);
ghb@10550 640 if (mOrder <= (*mid & strengthMask)) {
ghb@10550 641 limit = mid;
ghb@10550 642 } else {
ghb@10550 643 start = mid;
ghb@10550 644 }
ghb@10550 645 }
ghb@10550 646
ghb@10550 647 // FIXME: with a masked search, there might be more than one hit,
ghb@10550 648 // so we need to look forward and backward from the match to find all
ghb@10550 649 // of the hits...
ghb@10550 650 if ((*start & strengthMask) == mOrder) {
ghb@10550 651 result = *((coll)->expansionCESize + (start - (coll)->endExpansionCE));
ghb@10550 652 } else if ((*limit & strengthMask) == mOrder) {
ghb@10550 653 result = *(coll->expansionCESize + (limit - coll->endExpansionCE));
ghb@10550 654 } else if ((mOrder & 0xFFFF) == 0x00C0) {
ghb@10550 655 result = 2;
ghb@10550 656 } else {
ghb@10550 657 result = 1;
ghb@10550 658 }
ghb@10550 659 #endif
ghb@10550 660
ghb@10550 661 return result;
ghb@10550 662 }
ghb@10550 663
ghb@10550 664 U_CAPI void U_EXPORT2
ghb@10550 665 ucol_setText( UCollationElements *elems,
ghb@10550 666 const UChar *text,
ghb@10550 667 int32_t textLength,
ghb@10550 668 UErrorCode *status)
ghb@10550 669 {
ghb@10550 670 if (U_FAILURE(*status)) {
ghb@10550 671 return;
ghb@10550 672 }
ghb@10550 673
ghb@10550 674 if (elems->isWritable && elems->iteratordata_.string != NULL)
ghb@10550 675 {
ghb@10550 676 uprv_free((UChar *)elems->iteratordata_.string);
ghb@10550 677 }
ghb@10550 678
ghb@10550 679 if (text == NULL) {
ghb@10550 680 textLength = 0;
ghb@10550 681 }
ghb@10550 682
ghb@10550 683 elems->isWritable = FALSE;
ghb@10550 684
ghb@10550 685 /* free offset buffer to avoid memory leak before initializing. */
ghb@10550 686 ucol_freeOffsetBuffer(&(elems->iteratordata_));
ghb@10550 687 /* Ensure that previously allocated extendCEs is freed before setting to NULL. */
ghb@10550 688 if (elems->iteratordata_.extendCEs != NULL) {
ghb@10550 689 uprv_free(elems->iteratordata_.extendCEs);
ghb@10550 690 }
ghb@10550 691 uprv_init_collIterate(elems->iteratordata_.coll, text, textLength,
ghb@10550 692 &elems->iteratordata_, status);
ghb@10550 693
ghb@10550 694 elems->reset_ = TRUE;
ghb@10550 695 }
ghb@10550 696
ghb@10550 697 U_CAPI int32_t U_EXPORT2
ghb@10550 698 ucol_getOffset(const UCollationElements *elems)
ghb@10550 699 {
ghb@10550 700 const collIterate *ci = &(elems->iteratordata_);
ghb@10550 701
ghb@10550 702 if (ci->offsetRepeatCount > 0 && ci->offsetRepeatValue != 0) {
ghb@10550 703 return ci->offsetRepeatValue;
ghb@10550 704 }
ghb@10550 705
ghb@10550 706 if (ci->offsetReturn != NULL) {
ghb@10550 707 return *ci->offsetReturn;
ghb@10550 708 }
ghb@10550 709
ghb@10550 710 // while processing characters in normalization buffer getOffset will
ghb@10550 711 // return the next non-normalized character.
ghb@10550 712 // should be inline with the old implementation since the old codes uses
ghb@10550 713 // nextDecomp in normalizer which also decomposes the string till the
ghb@10550 714 // first base character is found.
ghb@10550 715 if (ci->flags & UCOL_ITER_INNORMBUF) {
ghb@10550 716 if (ci->fcdPosition == NULL) {
ghb@10550 717 return 0;
ghb@10550 718 }
ghb@10550 719 return (int32_t)(ci->fcdPosition - ci->string);
ghb@10550 720 }
ghb@10550 721 else {
ghb@10550 722 return (int32_t)(ci->pos - ci->string);
ghb@10550 723 }
ghb@10550 724 }
ghb@10550 725
ghb@10550 726 U_CAPI void U_EXPORT2
ghb@10550 727 ucol_setOffset(UCollationElements *elems,
ghb@10550 728 int32_t offset,
ghb@10550 729 UErrorCode *status)
ghb@10550 730 {
ghb@10550 731 if (U_FAILURE(*status)) {
ghb@10550 732 return;
ghb@10550 733 }
ghb@10550 734
ghb@10550 735 // this methods will clean up any use of the writable buffer and points to
ghb@10550 736 // the original string
ghb@10550 737 collIterate *ci = &(elems->iteratordata_);
ghb@10550 738 ci->pos = ci->string + offset;
ghb@10550 739 ci->CEpos = ci->toReturn = ci->CEs;
ghb@10550 740 if (ci->flags & UCOL_ITER_INNORMBUF) {
ghb@10550 741 ci->flags = ci->origFlags;
ghb@10550 742 }
ghb@10550 743 if ((ci->flags & UCOL_ITER_HASLEN) == 0) {
ghb@10550 744 ci->endp = ci->string + u_strlen(ci->string);
ghb@10550 745 ci->flags |= UCOL_ITER_HASLEN;
ghb@10550 746 }
ghb@10550 747 ci->fcdPosition = NULL;
ghb@10550 748 elems->reset_ = FALSE;
ghb@10550 749
ghb@10550 750 ci->offsetReturn = NULL;
ghb@10550 751 ci->offsetStore = ci->offsetBuffer;
ghb@10550 752 ci->offsetRepeatCount = ci->offsetRepeatValue = 0;
ghb@10550 753 }
ghb@10550 754
ghb@10550 755 U_CAPI int32_t U_EXPORT2
ghb@10550 756 ucol_primaryOrder (int32_t order)
ghb@10550 757 {
ghb@10550 758 order &= UCOL_PRIMARYMASK;
ghb@10550 759 return (order >> UCOL_PRIMARYORDERSHIFT);
ghb@10550 760 }
ghb@10550 761
ghb@10550 762 U_CAPI int32_t U_EXPORT2
ghb@10550 763 ucol_secondaryOrder (int32_t order)
ghb@10550 764 {
ghb@10550 765 order &= UCOL_SECONDARYMASK;
ghb@10550 766 return (order >> UCOL_SECONDARYORDERSHIFT);
ghb@10550 767 }
ghb@10550 768
ghb@10550 769 U_CAPI int32_t U_EXPORT2
ghb@10550 770 ucol_tertiaryOrder (int32_t order)
ghb@10550 771 {
ghb@10550 772 return (order & UCOL_TERTIARYMASK);
ghb@10550 773 }
ghb@10550 774
ghb@10550 775
ghb@10550 776 void ucol_freeOffsetBuffer(collIterate *s) {
ghb@10550 777 if (s != NULL && s->offsetBuffer != NULL) {
ghb@10550 778 uprv_free(s->offsetBuffer);
ghb@10550 779 s->offsetBuffer = NULL;
ghb@10550 780 s->offsetBufferSize = 0;
ghb@10550 781 }
ghb@10550 782 }
ghb@10550 783
ghb@10550 784 #endif /* #if !UCONFIG_NO_COLLATION */