annotate modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/i18n/uspoof_conf.cpp @ 11038:20a8447c71c6

8207159: Update ICU to version 62.1 Reviewed-by: mbilla, kcr, ghb
author arajkumar
date Fri, 24 Aug 2018 15:06:40 +0530
parents fee4ef5c87df
children
rev   line source
arajkumar@11038 1 // © 2016 and later: Unicode, Inc. and others.
arajkumar@11038 2 // License & terms of use: http://www.unicode.org/copyright.html
ghb@10550 3 /*
ghb@10550 4 ******************************************************************************
ghb@10550 5 *
arajkumar@11038 6 * Copyright (C) 2008-2015, International Business Machines
ghb@10550 7 * Corporation and others. All Rights Reserved.
ghb@10550 8 *
ghb@10550 9 ******************************************************************************
ghb@10550 10 * file name: uspoof_conf.cpp
arajkumar@11038 11 * encoding: UTF-8
ghb@10550 12 * tab size: 8 (not used)
ghb@10550 13 * indentation:4
ghb@10550 14 *
ghb@10550 15 * created on: 2009Jan05 (refactoring earlier files)
ghb@10550 16 * created by: Andy Heninger
ghb@10550 17 *
ghb@10550 18 * Internal classes for compililing confusable data into its binary (runtime) form.
ghb@10550 19 */
ghb@10550 20
ghb@10550 21 #include "unicode/utypes.h"
ghb@10550 22 #include "unicode/uspoof.h"
ghb@10550 23 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
ghb@10550 24 #if !UCONFIG_NO_NORMALIZATION
ghb@10550 25
ghb@10550 26 #include "unicode/unorm.h"
ghb@10550 27 #include "unicode/uregex.h"
ghb@10550 28 #include "unicode/ustring.h"
ghb@10550 29 #include "cmemory.h"
ghb@10550 30 #include "uspoof_impl.h"
ghb@10550 31 #include "uhash.h"
ghb@10550 32 #include "uvector.h"
ghb@10550 33 #include "uassert.h"
ghb@10550 34 #include "uarrsort.h"
ghb@10550 35 #include "uspoof_conf.h"
ghb@10550 36
ghb@10550 37 U_NAMESPACE_USE
ghb@10550 38
ghb@10550 39
ghb@10550 40 //---------------------------------------------------------------------
ghb@10550 41 //
ghb@10550 42 // buildConfusableData Compile the source confusable data, as defined by
ghb@10550 43 // the Unicode data file confusables.txt, into the binary
ghb@10550 44 // structures used by the confusable detector.
ghb@10550 45 //
ghb@10550 46 // The binary structures are described in uspoof_impl.h
ghb@10550 47 //
arajkumar@11038 48 // 1. Parse the data, making a hash table mapping from a UChar32 to a String.
ghb@10550 49 //
ghb@10550 50 // 2. Sort all of the strings encountered by length, since they will need to
ghb@10550 51 // be stored in that order in the final string table.
arajkumar@11038 52 // TODO: Sorting these strings by length is no longer needed since the removal of
arajkumar@11038 53 // the string lengths table. This logic can be removed to save processing time
arajkumar@11038 54 // when building confusables data.
ghb@10550 55 //
ghb@10550 56 // 3. Build a list of keys (UChar32s) from the four mapping tables. Sort the
ghb@10550 57 // list because that will be the ordering of our runtime table.
ghb@10550 58 //
ghb@10550 59 // 4. Generate the run time string table. This is generated before the key & value
ghb@10550 60 // tables because we need the string indexes when building those tables.
ghb@10550 61 //
ghb@10550 62 // 5. Build the run-time key and value tables. These are parallel tables, and are built
ghb@10550 63 // at the same time
ghb@10550 64 //
ghb@10550 65
ghb@10550 66 SPUString::SPUString(UnicodeString *s) {
ghb@10550 67 fStr = s;
arajkumar@11038 68 fCharOrStrTableIndex = 0;
ghb@10550 69 }
ghb@10550 70
ghb@10550 71
ghb@10550 72 SPUString::~SPUString() {
ghb@10550 73 delete fStr;
ghb@10550 74 }
ghb@10550 75
ghb@10550 76
ghb@10550 77 SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(NULL), fHash(NULL) {
ghb@10550 78 fVec = new UVector(status);
arajkumar@11038 79 if (fVec == NULL) {
arajkumar@11038 80 status = U_MEMORY_ALLOCATION_ERROR;
arajkumar@11038 81 return;
arajkumar@11038 82 }
ghb@10550 83 fHash = uhash_open(uhash_hashUnicodeString, // key hash function
ghb@10550 84 uhash_compareUnicodeString, // Key Comparator
ghb@10550 85 NULL, // Value Comparator
ghb@10550 86 &status);
ghb@10550 87 }
ghb@10550 88
ghb@10550 89
ghb@10550 90 SPUStringPool::~SPUStringPool() {
ghb@10550 91 int i;
ghb@10550 92 for (i=fVec->size()-1; i>=0; i--) {
ghb@10550 93 SPUString *s = static_cast<SPUString *>(fVec->elementAt(i));
ghb@10550 94 delete s;
ghb@10550 95 }
ghb@10550 96 delete fVec;
ghb@10550 97 uhash_close(fHash);
ghb@10550 98 }
ghb@10550 99
ghb@10550 100
ghb@10550 101 int32_t SPUStringPool::size() {
ghb@10550 102 return fVec->size();
ghb@10550 103 }
ghb@10550 104
ghb@10550 105 SPUString *SPUStringPool::getByIndex(int32_t index) {
ghb@10550 106 SPUString *retString = (SPUString *)fVec->elementAt(index);
ghb@10550 107 return retString;
ghb@10550 108 }
ghb@10550 109
ghb@10550 110
ghb@10550 111 // Comparison function for ordering strings in the string pool.
ghb@10550 112 // Compare by length first, then, within a group of the same length,
ghb@10550 113 // by code point order.
ghb@10550 114 // Conforms to the type signature for a USortComparator in uvector.h
ghb@10550 115
ghb@10550 116 static int8_t U_CALLCONV SPUStringCompare(UHashTok left, UHashTok right) {
ghb@10550 117 const SPUString *sL = const_cast<const SPUString *>(
ghb@10550 118 static_cast<SPUString *>(left.pointer));
ghb@10550 119 const SPUString *sR = const_cast<const SPUString *>(
ghb@10550 120 static_cast<SPUString *>(right.pointer));
ghb@10550 121 int32_t lenL = sL->fStr->length();
ghb@10550 122 int32_t lenR = sR->fStr->length();
ghb@10550 123 if (lenL < lenR) {
ghb@10550 124 return -1;
ghb@10550 125 } else if (lenL > lenR) {
ghb@10550 126 return 1;
ghb@10550 127 } else {
ghb@10550 128 return sL->fStr->compare(*(sR->fStr));
ghb@10550 129 }
ghb@10550 130 }
ghb@10550 131
ghb@10550 132 void SPUStringPool::sort(UErrorCode &status) {
ghb@10550 133 fVec->sort(SPUStringCompare, status);
ghb@10550 134 }
ghb@10550 135
ghb@10550 136
ghb@10550 137 SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) {
ghb@10550 138 SPUString *hashedString = static_cast<SPUString *>(uhash_get(fHash, src));
ghb@10550 139 if (hashedString != NULL) {
ghb@10550 140 delete src;
ghb@10550 141 } else {
ghb@10550 142 hashedString = new SPUString(src);
arajkumar@11038 143 if (hashedString == NULL) {
arajkumar@11038 144 status = U_MEMORY_ALLOCATION_ERROR;
arajkumar@11038 145 return NULL;
arajkumar@11038 146 }
ghb@10550 147 uhash_put(fHash, src, hashedString, &status);
ghb@10550 148 fVec->addElement(hashedString, status);
ghb@10550 149 }
ghb@10550 150 return hashedString;
ghb@10550 151 }
ghb@10550 152
ghb@10550 153
ghb@10550 154
ghb@10550 155 ConfusabledataBuilder::ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status) :
ghb@10550 156 fSpoofImpl(spImpl),
ghb@10550 157 fInput(NULL),
arajkumar@11038 158 fTable(NULL),
ghb@10550 159 fKeySet(NULL),
ghb@10550 160 fKeyVec(NULL),
ghb@10550 161 fValueVec(NULL),
ghb@10550 162 fStringTable(NULL),
ghb@10550 163 stringPool(NULL),
ghb@10550 164 fParseLine(NULL),
ghb@10550 165 fParseHexNum(NULL),
ghb@10550 166 fLineNum(0)
ghb@10550 167 {
ghb@10550 168 if (U_FAILURE(status)) {
ghb@10550 169 return;
ghb@10550 170 }
arajkumar@11038 171
arajkumar@11038 172 fTable = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &status);
arajkumar@11038 173
arajkumar@11038 174 fKeySet = new UnicodeSet();
arajkumar@11038 175 if (fKeySet == NULL) {
arajkumar@11038 176 status = U_MEMORY_ALLOCATION_ERROR;
arajkumar@11038 177 return;
arajkumar@11038 178 }
arajkumar@11038 179
arajkumar@11038 180 fKeyVec = new UVector(status);
arajkumar@11038 181 if (fKeyVec == NULL) {
arajkumar@11038 182 status = U_MEMORY_ALLOCATION_ERROR;
arajkumar@11038 183 return;
arajkumar@11038 184 }
arajkumar@11038 185
arajkumar@11038 186 fValueVec = new UVector(status);
arajkumar@11038 187 if (fValueVec == NULL) {
arajkumar@11038 188 status = U_MEMORY_ALLOCATION_ERROR;
arajkumar@11038 189 return;
arajkumar@11038 190 }
arajkumar@11038 191
ghb@10550 192 stringPool = new SPUStringPool(status);
arajkumar@11038 193 if (stringPool == NULL) {
arajkumar@11038 194 status = U_MEMORY_ALLOCATION_ERROR;
arajkumar@11038 195 return;
arajkumar@11038 196 }
ghb@10550 197 }
ghb@10550 198
ghb@10550 199
ghb@10550 200 ConfusabledataBuilder::~ConfusabledataBuilder() {
ghb@10550 201 uprv_free(fInput);
ghb@10550 202 uregex_close(fParseLine);
ghb@10550 203 uregex_close(fParseHexNum);
arajkumar@11038 204 uhash_close(fTable);
ghb@10550 205 delete fKeySet;
ghb@10550 206 delete fKeyVec;
ghb@10550 207 delete fStringTable;
ghb@10550 208 delete fValueVec;
ghb@10550 209 delete stringPool;
ghb@10550 210 }
ghb@10550 211
ghb@10550 212
ghb@10550 213 void ConfusabledataBuilder::buildConfusableData(SpoofImpl * spImpl, const char * confusables,
ghb@10550 214 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status) {
ghb@10550 215
ghb@10550 216 if (U_FAILURE(status)) {
ghb@10550 217 return;
ghb@10550 218 }
ghb@10550 219 ConfusabledataBuilder builder(spImpl, status);
ghb@10550 220 builder.build(confusables, confusablesLen, status);
ghb@10550 221 if (U_FAILURE(status) && errorType != NULL) {
ghb@10550 222 *errorType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
ghb@10550 223 pe->line = builder.fLineNum;
ghb@10550 224 }
ghb@10550 225 }
ghb@10550 226
ghb@10550 227
ghb@10550 228 void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesLen,
ghb@10550 229 UErrorCode &status) {
ghb@10550 230
ghb@10550 231 // Convert the user input data from UTF-8 to UChar (UTF-16)
ghb@10550 232 int32_t inputLen = 0;
ghb@10550 233 if (U_FAILURE(status)) {
ghb@10550 234 return;
ghb@10550 235 }
ghb@10550 236 u_strFromUTF8(NULL, 0, &inputLen, confusables, confusablesLen, &status);
ghb@10550 237 if (status != U_BUFFER_OVERFLOW_ERROR) {
ghb@10550 238 return;
ghb@10550 239 }
ghb@10550 240 status = U_ZERO_ERROR;
ghb@10550 241 fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
ghb@10550 242 if (fInput == NULL) {
ghb@10550 243 status = U_MEMORY_ALLOCATION_ERROR;
ghb@10550 244 return;
ghb@10550 245 }
ghb@10550 246 u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);
ghb@10550 247
ghb@10550 248
ghb@10550 249 // Regular Expression to parse a line from Confusables.txt. The expression will match
ghb@10550 250 // any line. What was matched is determined by examining which capture groups have a match.
ghb@10550 251 // Capture Group 1: the source char
ghb@10550 252 // Capture Group 2: the replacement chars
arajkumar@11038 253 // Capture Group 3-6 the table type, SL, SA, ML, or MA (deprecated)
ghb@10550 254 // Capture Group 7: A blank or comment only line.
ghb@10550 255 // Capture Group 8: A syntactically invalid line. Anything that didn't match before.
ghb@10550 256 // Example Line from the confusables.txt source file:
ghb@10550 257 // "1D702 ; 006E 0329 ; SL # MATHEMATICAL ITALIC SMALL ETA ... "
ghb@10550 258 UnicodeString pattern(
ghb@10550 259 "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;" // Match the source char
ghb@10550 260 "[ \\t]*([0-9A-Fa-f]+" // Match the replacement char(s)
ghb@10550 261 "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;" // (continued)
ghb@10550 262 "\\s*(?:(SL)|(SA)|(ML)|(MA))" // Match the table type
ghb@10550 263 "[ \\t]*(?:#.*?)?$" // Match any trailing #comment
ghb@10550 264 "|^([ \\t]*(?:#.*?)?)$" // OR match empty lines or lines with only a #comment
ghb@10550 265 "|^(.*?)$", -1, US_INV); // OR match any line, which catches illegal lines.
ghb@10550 266 // TODO: Why are we using the regex C API here? C++ would just take UnicodeString...
ghb@10550 267 fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
ghb@10550 268
ghb@10550 269 // Regular expression for parsing a hex number out of a space-separated list of them.
ghb@10550 270 // Capture group 1 gets the number, with spaces removed.
ghb@10550 271 pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)");
ghb@10550 272 fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
ghb@10550 273
ghb@10550 274 // Zap any Byte Order Mark at the start of input. Changing it to a space is benign
ghb@10550 275 // given the syntax of the input.
ghb@10550 276 if (*fInput == 0xfeff) {
ghb@10550 277 *fInput = 0x20;
ghb@10550 278 }
ghb@10550 279
ghb@10550 280 // Parse the input, one line per iteration of this loop.
ghb@10550 281 uregex_setText(fParseLine, fInput, inputLen, &status);
ghb@10550 282 while (uregex_findNext(fParseLine, &status)) {
ghb@10550 283 fLineNum++;
ghb@10550 284 if (uregex_start(fParseLine, 7, &status) >= 0) {
ghb@10550 285 // this was a blank or comment line.
ghb@10550 286 continue;
ghb@10550 287 }
ghb@10550 288 if (uregex_start(fParseLine, 8, &status) >= 0) {
ghb@10550 289 // input file syntax error.
ghb@10550 290 status = U_PARSE_ERROR;
ghb@10550 291 return;
ghb@10550 292 }
ghb@10550 293
ghb@10550 294 // We have a good input line. Extract the key character and mapping string, and
ghb@10550 295 // put them into the appropriate mapping table.
ghb@10550 296 UChar32 keyChar = SpoofImpl::ScanHex(fInput, uregex_start(fParseLine, 1, &status),
ghb@10550 297 uregex_end(fParseLine, 1, &status), status);
ghb@10550 298
ghb@10550 299 int32_t mapStringStart = uregex_start(fParseLine, 2, &status);
ghb@10550 300 int32_t mapStringLength = uregex_end(fParseLine, 2, &status) - mapStringStart;
ghb@10550 301 uregex_setText(fParseHexNum, &fInput[mapStringStart], mapStringLength, &status);
ghb@10550 302
ghb@10550 303 UnicodeString *mapString = new UnicodeString();
ghb@10550 304 if (mapString == NULL) {
ghb@10550 305 status = U_MEMORY_ALLOCATION_ERROR;
ghb@10550 306 return;
ghb@10550 307 }
ghb@10550 308 while (uregex_findNext(fParseHexNum, &status)) {
ghb@10550 309 UChar32 c = SpoofImpl::ScanHex(&fInput[mapStringStart], uregex_start(fParseHexNum, 1, &status),
ghb@10550 310 uregex_end(fParseHexNum, 1, &status), status);
ghb@10550 311 mapString->append(c);
ghb@10550 312 }
ghb@10550 313 U_ASSERT(mapString->length() >= 1);
ghb@10550 314
ghb@10550 315 // Put the map (value) string into the string pool
ghb@10550 316 // This a little like a Java intern() - any duplicates will be eliminated.
ghb@10550 317 SPUString *smapString = stringPool->addString(mapString, status);
ghb@10550 318
arajkumar@11038 319 // Add the UChar32 -> string mapping to the table.
arajkumar@11038 320 // For Unicode 8, the SL, SA and ML tables have been discontinued.
arajkumar@11038 321 // All input data from confusables.txt is tagged MA.
arajkumar@11038 322 uhash_iput(fTable, keyChar, smapString, &status);
arajkumar@11038 323 if (U_FAILURE(status)) { return; }
ghb@10550 324 fKeySet->add(keyChar);
ghb@10550 325 }
ghb@10550 326
ghb@10550 327 // Input data is now all parsed and collected.
ghb@10550 328 // Now create the run-time binary form of the data.
ghb@10550 329 //
ghb@10550 330 // This is done in two steps. First the data is assembled into vectors and strings,
ghb@10550 331 // for ease of construction, then the contents of these collections are dumped
ghb@10550 332 // into the actual raw-bytes data storage.
ghb@10550 333
ghb@10550 334 // Build up the string array, and record the index of each string therein
ghb@10550 335 // in the (build time only) string pool.
ghb@10550 336 // Strings of length one are not entered into the strings array.
ghb@10550 337 // (Strings in the table are sorted by length)
ghb@10550 338 stringPool->sort(status);
ghb@10550 339 fStringTable = new UnicodeString();
ghb@10550 340 int32_t poolSize = stringPool->size();
ghb@10550 341 int32_t i;
ghb@10550 342 for (i=0; i<poolSize; i++) {
ghb@10550 343 SPUString *s = stringPool->getByIndex(i);
ghb@10550 344 int32_t strLen = s->fStr->length();
ghb@10550 345 int32_t strIndex = fStringTable->length();
ghb@10550 346 if (strLen == 1) {
ghb@10550 347 // strings of length one do not get an entry in the string table.
ghb@10550 348 // Keep the single string character itself here, which is the same
ghb@10550 349 // convention that is used in the final run-time string table index.
arajkumar@11038 350 s->fCharOrStrTableIndex = s->fStr->charAt(0);
ghb@10550 351 } else {
arajkumar@11038 352 s->fCharOrStrTableIndex = strIndex;
ghb@10550 353 fStringTable->append(*(s->fStr));
ghb@10550 354 }
ghb@10550 355 }
ghb@10550 356
ghb@10550 357 // Construct the compile-time Key and Value tables
ghb@10550 358 //
ghb@10550 359 // For each key code point, check which mapping tables it applies to,
ghb@10550 360 // and create the final data for the key & value structures.
ghb@10550 361 //
ghb@10550 362 // The four logical mapping tables are conflated into one combined table.
ghb@10550 363 // If multiple logical tables have the same mapping for some key, they
ghb@10550 364 // share a single entry in the combined table.
ghb@10550 365 // If more than one mapping exists for the same key code point, multiple
ghb@10550 366 // entries will be created in the table
ghb@10550 367
ghb@10550 368 for (int32_t range=0; range<fKeySet->getRangeCount(); range++) {
ghb@10550 369 // It is an oddity of the UnicodeSet API that simply enumerating the contained
ghb@10550 370 // code points requires a nested loop.
ghb@10550 371 for (UChar32 keyChar=fKeySet->getRangeStart(range);
ghb@10550 372 keyChar <= fKeySet->getRangeEnd(range); keyChar++) {
arajkumar@11038 373 SPUString *targetMapping = static_cast<SPUString *>(uhash_iget(fTable, keyChar));
arajkumar@11038 374 U_ASSERT(targetMapping != NULL);
arajkumar@11038 375
arajkumar@11038 376 // Set an error code if trying to consume a long string. Otherwise,
arajkumar@11038 377 // codePointAndLengthToKey will abort on a U_ASSERT.
arajkumar@11038 378 if (targetMapping->fStr->length() > 256) {
arajkumar@11038 379 status = U_ILLEGAL_ARGUMENT_ERROR;
arajkumar@11038 380 return;
arajkumar@11038 381 }
arajkumar@11038 382
arajkumar@11038 383 int32_t key = ConfusableDataUtils::codePointAndLengthToKey(keyChar,
arajkumar@11038 384 targetMapping->fStr->length());
arajkumar@11038 385 int32_t value = targetMapping->fCharOrStrTableIndex;
arajkumar@11038 386
arajkumar@11038 387 fKeyVec->addElement(key, status);
arajkumar@11038 388 fValueVec->addElement(value, status);
ghb@10550 389 }
ghb@10550 390 }
ghb@10550 391
ghb@10550 392 // Put the assembled data into the flat runtime array
ghb@10550 393 outputData(status);
ghb@10550 394
ghb@10550 395 // All of the intermediate allocated data belongs to the ConfusabledataBuilder
ghb@10550 396 // object (this), and is deleted in the destructor.
ghb@10550 397 return;
ghb@10550 398 }
ghb@10550 399
ghb@10550 400 //
ghb@10550 401 // outputData The confusable data has been compiled and stored in intermediate
ghb@10550 402 // collections and strings. Copy it from there to the final flat
ghb@10550 403 // binary array.
ghb@10550 404 //
ghb@10550 405 // Note that as each section is added to the output data, the
ghb@10550 406 // expand (reserveSpace() function will likely relocate it in memory.
ghb@10550 407 // Be careful with pointers.
ghb@10550 408 //
ghb@10550 409 void ConfusabledataBuilder::outputData(UErrorCode &status) {
ghb@10550 410
ghb@10550 411 U_ASSERT(fSpoofImpl->fSpoofData->fDataOwned == TRUE);
ghb@10550 412
ghb@10550 413 // The Key Table
ghb@10550 414 // While copying the keys to the runtime array,
ghb@10550 415 // also sanity check that they are sorted.
ghb@10550 416
ghb@10550 417 int32_t numKeys = fKeyVec->size();
ghb@10550 418 int32_t *keys =
ghb@10550 419 static_cast<int32_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(int32_t), status));
ghb@10550 420 if (U_FAILURE(status)) {
ghb@10550 421 return;
ghb@10550 422 }
ghb@10550 423 int i;
arajkumar@11038 424 UChar32 previousCodePoint = 0;
ghb@10550 425 for (i=0; i<numKeys; i++) {
ghb@10550 426 int32_t key = fKeyVec->elementAti(i);
arajkumar@11038 427 UChar32 codePoint = ConfusableDataUtils::keyToCodePoint(key);
arajkumar@11038 428 (void)previousCodePoint; // Suppress unused variable warning.
arajkumar@11038 429 // strictly greater because there can be only one entry per code point
arajkumar@11038 430 U_ASSERT(codePoint > previousCodePoint);
ghb@10550 431 keys[i] = key;
arajkumar@11038 432 previousCodePoint = codePoint;
ghb@10550 433 }
ghb@10550 434 SpoofDataHeader *rawData = fSpoofImpl->fSpoofData->fRawData;
ghb@10550 435 rawData->fCFUKeys = (int32_t)((char *)keys - (char *)rawData);
ghb@10550 436 rawData->fCFUKeysSize = numKeys;
ghb@10550 437 fSpoofImpl->fSpoofData->fCFUKeys = keys;
ghb@10550 438
ghb@10550 439
ghb@10550 440 // The Value Table, parallels the key table
ghb@10550 441 int32_t numValues = fValueVec->size();
ghb@10550 442 U_ASSERT(numKeys == numValues);
ghb@10550 443 uint16_t *values =
ghb@10550 444 static_cast<uint16_t *>(fSpoofImpl->fSpoofData->reserveSpace(numKeys*sizeof(uint16_t), status));
ghb@10550 445 if (U_FAILURE(status)) {
ghb@10550 446 return;
ghb@10550 447 }
ghb@10550 448 for (i=0; i<numValues; i++) {
ghb@10550 449 uint32_t value = static_cast<uint32_t>(fValueVec->elementAti(i));
ghb@10550 450 U_ASSERT(value < 0xffff);
ghb@10550 451 values[i] = static_cast<uint16_t>(value);
ghb@10550 452 }
ghb@10550 453 rawData = fSpoofImpl->fSpoofData->fRawData;
ghb@10550 454 rawData->fCFUStringIndex = (int32_t)((char *)values - (char *)rawData);
ghb@10550 455 rawData->fCFUStringIndexSize = numValues;
ghb@10550 456 fSpoofImpl->fSpoofData->fCFUValues = values;
ghb@10550 457
ghb@10550 458 // The Strings Table.
ghb@10550 459
ghb@10550 460 uint32_t stringsLength = fStringTable->length();
ghb@10550 461 // Reserve an extra space so the string will be nul-terminated. This is
ghb@10550 462 // only a convenience, for when debugging; it is not needed otherwise.
ghb@10550 463 UChar *strings =
ghb@10550 464 static_cast<UChar *>(fSpoofImpl->fSpoofData->reserveSpace(stringsLength*sizeof(UChar)+2, status));
ghb@10550 465 if (U_FAILURE(status)) {
ghb@10550 466 return;
ghb@10550 467 }
ghb@10550 468 fStringTable->extract(strings, stringsLength+1, status);
ghb@10550 469 rawData = fSpoofImpl->fSpoofData->fRawData;
ghb@10550 470 U_ASSERT(rawData->fCFUStringTable == 0);
ghb@10550 471 rawData->fCFUStringTable = (int32_t)((char *)strings - (char *)rawData);
ghb@10550 472 rawData->fCFUStringTableLen = stringsLength;
ghb@10550 473 fSpoofImpl->fSpoofData->fCFUStrings = strings;
ghb@10550 474 }
ghb@10550 475
ghb@10550 476 #endif
ghb@10550 477 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
ghb@10550 478