annotate modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/common/cstring.cpp @ 11038:20a8447c71c6

8207159: Update ICU to version 62.1 Reviewed-by: mbilla, kcr, ghb
author arajkumar
date Fri, 24 Aug 2018 15:06:40 +0530
parents modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/common/cstring.c@fee4ef5c87df
children
rev   line source
arajkumar@11038 1 // © 2016 and later: Unicode, Inc. and others.
arajkumar@11038 2 // License & terms of use: http://www.unicode.org/copyright.html
ghb@10550 3 /*
ghb@10550 4 ******************************************************************************
ghb@10550 5 *
ghb@10550 6 * Copyright (C) 1997-2011, International Business Machines
ghb@10550 7 * Corporation and others. All Rights Reserved.
ghb@10550 8 *
ghb@10550 9 ******************************************************************************
ghb@10550 10 *
ghb@10550 11 * File CSTRING.C
ghb@10550 12 *
ghb@10550 13 * @author Helena Shih
ghb@10550 14 *
ghb@10550 15 * Modification History:
ghb@10550 16 *
ghb@10550 17 * Date Name Description
ghb@10550 18 * 6/18/98 hshih Created
ghb@10550 19 * 09/08/98 stephen Added include for ctype, for Mac Port
ghb@10550 20 * 11/15/99 helena Integrated S/390 IEEE changes.
ghb@10550 21 ******************************************************************************
ghb@10550 22 */
ghb@10550 23
ghb@10550 24
ghb@10550 25
ghb@10550 26 #include <stdlib.h>
ghb@10550 27 #include <stdio.h>
ghb@10550 28 #include "unicode/utypes.h"
ghb@10550 29 #include "cmemory.h"
ghb@10550 30 #include "cstring.h"
ghb@10550 31 #include "uassert.h"
ghb@10550 32
ghb@10550 33 /*
ghb@10550 34 * We hardcode case conversion for invariant characters to match our expectation
ghb@10550 35 * and the compiler execution charset.
ghb@10550 36 * This prevents problems on systems
ghb@10550 37 * - with non-default casing behavior, like Turkish system locales where
ghb@10550 38 * tolower('I') maps to dotless i and toupper('i') maps to dotted I
ghb@10550 39 * - where there are no lowercase Latin characters at all, or using different
ghb@10550 40 * codes (some old EBCDIC codepages)
ghb@10550 41 *
ghb@10550 42 * This works because the compiler usually runs on a platform where the execution
ghb@10550 43 * charset includes all of the invariant characters at their expected
ghb@10550 44 * code positions, so that the char * string literals in ICU code match
ghb@10550 45 * the char literals here.
ghb@10550 46 *
ghb@10550 47 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
ghb@10550 48 * and the set of uppercase Latin letters is discontiguous as well.
ghb@10550 49 */
ghb@10550 50
ghb@10550 51 U_CAPI UBool U_EXPORT2
ghb@10550 52 uprv_isASCIILetter(char c) {
ghb@10550 53 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
ghb@10550 54 return
ghb@10550 55 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
ghb@10550 56 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
ghb@10550 57 #else
ghb@10550 58 return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
ghb@10550 59 #endif
ghb@10550 60 }
ghb@10550 61
ghb@10550 62 U_CAPI char U_EXPORT2
ghb@10550 63 uprv_toupper(char c) {
ghb@10550 64 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
ghb@10550 65 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
ghb@10550 66 c=(char)(c+('A'-'a'));
ghb@10550 67 }
ghb@10550 68 #else
ghb@10550 69 if('a'<=c && c<='z') {
ghb@10550 70 c=(char)(c+('A'-'a'));
ghb@10550 71 }
ghb@10550 72 #endif
ghb@10550 73 return c;
ghb@10550 74 }
ghb@10550 75
ghb@10550 76
ghb@10550 77 #if 0
ghb@10550 78 /*
ghb@10550 79 * Commented out because cstring.h defines uprv_tolower() to be
ghb@10550 80 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
ghb@10550 81 * to reduce the amount of code to cover with tests.
ghb@10550 82 *
ghb@10550 83 * Note that this uprv_tolower() definition is likely to work for most
ghb@10550 84 * charset families, not just ASCII and EBCDIC, because its #else branch
ghb@10550 85 * is written generically.
ghb@10550 86 */
ghb@10550 87 U_CAPI char U_EXPORT2
ghb@10550 88 uprv_tolower(char c) {
ghb@10550 89 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
ghb@10550 90 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
ghb@10550 91 c=(char)(c+('a'-'A'));
ghb@10550 92 }
ghb@10550 93 #else
ghb@10550 94 if('A'<=c && c<='Z') {
ghb@10550 95 c=(char)(c+('a'-'A'));
ghb@10550 96 }
ghb@10550 97 #endif
ghb@10550 98 return c;
ghb@10550 99 }
ghb@10550 100 #endif
ghb@10550 101
ghb@10550 102 U_CAPI char U_EXPORT2
ghb@10550 103 uprv_asciitolower(char c) {
ghb@10550 104 if(0x41<=c && c<=0x5a) {
ghb@10550 105 c=(char)(c+0x20);
ghb@10550 106 }
ghb@10550 107 return c;
ghb@10550 108 }
ghb@10550 109
ghb@10550 110 U_CAPI char U_EXPORT2
ghb@10550 111 uprv_ebcdictolower(char c) {
ghb@10550 112 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
ghb@10550 113 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
ghb@10550 114 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
ghb@10550 115 ) {
ghb@10550 116 c=(char)(c-0x40);
ghb@10550 117 }
ghb@10550 118 return c;
ghb@10550 119 }
ghb@10550 120
ghb@10550 121
ghb@10550 122 U_CAPI char* U_EXPORT2
ghb@10550 123 T_CString_toLowerCase(char* str)
ghb@10550 124 {
ghb@10550 125 char* origPtr = str;
ghb@10550 126
ghb@10550 127 if (str) {
ghb@10550 128 do
ghb@10550 129 *str = (char)uprv_tolower(*str);
ghb@10550 130 while (*(str++));
ghb@10550 131 }
ghb@10550 132
ghb@10550 133 return origPtr;
ghb@10550 134 }
ghb@10550 135
ghb@10550 136 U_CAPI char* U_EXPORT2
ghb@10550 137 T_CString_toUpperCase(char* str)
ghb@10550 138 {
ghb@10550 139 char* origPtr = str;
ghb@10550 140
ghb@10550 141 if (str) {
ghb@10550 142 do
ghb@10550 143 *str = (char)uprv_toupper(*str);
ghb@10550 144 while (*(str++));
ghb@10550 145 }
ghb@10550 146
ghb@10550 147 return origPtr;
ghb@10550 148 }
ghb@10550 149
ghb@10550 150 /*
ghb@10550 151 * Takes a int32_t and fills in a char* string with that number "radix"-based.
ghb@10550 152 * Does not handle negative values (makes an empty string for them).
ghb@10550 153 * Writes at most 12 chars ("-2147483647" plus NUL).
ghb@10550 154 * Returns the length of the string (not including the NUL).
ghb@10550 155 */
ghb@10550 156 U_CAPI int32_t U_EXPORT2
ghb@10550 157 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
ghb@10550 158 {
ghb@10550 159 char tbuf[30];
ghb@10550 160 int32_t tbx = sizeof(tbuf);
ghb@10550 161 uint8_t digit;
ghb@10550 162 int32_t length = 0;
ghb@10550 163 uint32_t uval;
ghb@10550 164
ghb@10550 165 U_ASSERT(radix>=2 && radix<=16);
ghb@10550 166 uval = (uint32_t) v;
ghb@10550 167 if(v<0 && radix == 10) {
ghb@10550 168 /* Only in base 10 do we conside numbers to be signed. */
ghb@10550 169 uval = (uint32_t)(-v);
ghb@10550 170 buffer[length++] = '-';
ghb@10550 171 }
ghb@10550 172
ghb@10550 173 tbx = sizeof(tbuf)-1;
ghb@10550 174 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
ghb@10550 175 do {
ghb@10550 176 digit = (uint8_t)(uval % radix);
ghb@10550 177 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
ghb@10550 178 uval = uval / radix;
ghb@10550 179 } while (uval != 0);
ghb@10550 180
ghb@10550 181 /* copy converted number into user buffer */
ghb@10550 182 uprv_strcpy(buffer+length, tbuf+tbx);
ghb@10550 183 length += sizeof(tbuf) - tbx -1;
ghb@10550 184 return length;
ghb@10550 185 }
ghb@10550 186
ghb@10550 187
ghb@10550 188
ghb@10550 189 /*
ghb@10550 190 * Takes a int64_t and fills in a char* string with that number "radix"-based.
ghb@10550 191 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
ghb@10550 192 * Returns the length of the string, not including the terminating NULL.
ghb@10550 193 */
ghb@10550 194 U_CAPI int32_t U_EXPORT2
ghb@10550 195 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
ghb@10550 196 {
ghb@10550 197 char tbuf[30];
ghb@10550 198 int32_t tbx = sizeof(tbuf);
ghb@10550 199 uint8_t digit;
ghb@10550 200 int32_t length = 0;
ghb@10550 201 uint64_t uval;
ghb@10550 202
ghb@10550 203 U_ASSERT(radix>=2 && radix<=16);
ghb@10550 204 uval = (uint64_t) v;
ghb@10550 205 if(v<0 && radix == 10) {
ghb@10550 206 /* Only in base 10 do we conside numbers to be signed. */
ghb@10550 207 uval = (uint64_t)(-v);
ghb@10550 208 buffer[length++] = '-';
ghb@10550 209 }
ghb@10550 210
ghb@10550 211 tbx = sizeof(tbuf)-1;
ghb@10550 212 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
ghb@10550 213 do {
ghb@10550 214 digit = (uint8_t)(uval % radix);
ghb@10550 215 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
ghb@10550 216 uval = uval / radix;
ghb@10550 217 } while (uval != 0);
ghb@10550 218
ghb@10550 219 /* copy converted number into user buffer */
ghb@10550 220 uprv_strcpy(buffer+length, tbuf+tbx);
ghb@10550 221 length += sizeof(tbuf) - tbx -1;
ghb@10550 222 return length;
ghb@10550 223 }
ghb@10550 224
ghb@10550 225
ghb@10550 226 U_CAPI int32_t U_EXPORT2
ghb@10550 227 T_CString_stringToInteger(const char *integerString, int32_t radix)
ghb@10550 228 {
ghb@10550 229 char *end;
ghb@10550 230 return uprv_strtoul(integerString, &end, radix);
ghb@10550 231
ghb@10550 232 }
ghb@10550 233
ghb@10550 234 U_CAPI int U_EXPORT2
ghb@10550 235 uprv_stricmp(const char *str1, const char *str2) {
ghb@10550 236 if(str1==NULL) {
ghb@10550 237 if(str2==NULL) {
ghb@10550 238 return 0;
ghb@10550 239 } else {
ghb@10550 240 return -1;
ghb@10550 241 }
ghb@10550 242 } else if(str2==NULL) {
ghb@10550 243 return 1;
ghb@10550 244 } else {
ghb@10550 245 /* compare non-NULL strings lexically with lowercase */
ghb@10550 246 int rc;
ghb@10550 247 unsigned char c1, c2;
ghb@10550 248
ghb@10550 249 for(;;) {
ghb@10550 250 c1=(unsigned char)*str1;
ghb@10550 251 c2=(unsigned char)*str2;
ghb@10550 252 if(c1==0) {
ghb@10550 253 if(c2==0) {
ghb@10550 254 return 0;
ghb@10550 255 } else {
ghb@10550 256 return -1;
ghb@10550 257 }
ghb@10550 258 } else if(c2==0) {
ghb@10550 259 return 1;
ghb@10550 260 } else {
ghb@10550 261 /* compare non-zero characters with lowercase */
ghb@10550 262 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
ghb@10550 263 if(rc!=0) {
ghb@10550 264 return rc;
ghb@10550 265 }
ghb@10550 266 }
ghb@10550 267 ++str1;
ghb@10550 268 ++str2;
ghb@10550 269 }
ghb@10550 270 }
ghb@10550 271 }
ghb@10550 272
ghb@10550 273 U_CAPI int U_EXPORT2
ghb@10550 274 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
ghb@10550 275 if(str1==NULL) {
ghb@10550 276 if(str2==NULL) {
ghb@10550 277 return 0;
ghb@10550 278 } else {
ghb@10550 279 return -1;
ghb@10550 280 }
ghb@10550 281 } else if(str2==NULL) {
ghb@10550 282 return 1;
ghb@10550 283 } else {
ghb@10550 284 /* compare non-NULL strings lexically with lowercase */
ghb@10550 285 int rc;
ghb@10550 286 unsigned char c1, c2;
ghb@10550 287
ghb@10550 288 for(; n--;) {
ghb@10550 289 c1=(unsigned char)*str1;
ghb@10550 290 c2=(unsigned char)*str2;
ghb@10550 291 if(c1==0) {
ghb@10550 292 if(c2==0) {
ghb@10550 293 return 0;
ghb@10550 294 } else {
ghb@10550 295 return -1;
ghb@10550 296 }
ghb@10550 297 } else if(c2==0) {
ghb@10550 298 return 1;
ghb@10550 299 } else {
ghb@10550 300 /* compare non-zero characters with lowercase */
ghb@10550 301 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
ghb@10550 302 if(rc!=0) {
ghb@10550 303 return rc;
ghb@10550 304 }
ghb@10550 305 }
ghb@10550 306 ++str1;
ghb@10550 307 ++str2;
ghb@10550 308 }
ghb@10550 309 }
ghb@10550 310
ghb@10550 311 return 0;
ghb@10550 312 }
ghb@10550 313
ghb@10550 314 U_CAPI char* U_EXPORT2
ghb@10550 315 uprv_strdup(const char *src) {
ghb@10550 316 size_t len = uprv_strlen(src) + 1;
ghb@10550 317 char *dup = (char *) uprv_malloc(len);
ghb@10550 318
ghb@10550 319 if (dup) {
ghb@10550 320 uprv_memcpy(dup, src, len);
ghb@10550 321 }
ghb@10550 322
ghb@10550 323 return dup;
ghb@10550 324 }
ghb@10550 325
ghb@10550 326 U_CAPI char* U_EXPORT2
ghb@10550 327 uprv_strndup(const char *src, int32_t n) {
ghb@10550 328 char *dup;
ghb@10550 329
ghb@10550 330 if(n < 0) {
ghb@10550 331 dup = uprv_strdup(src);
ghb@10550 332 } else {
ghb@10550 333 dup = (char*)uprv_malloc(n+1);
ghb@10550 334 if (dup) {
ghb@10550 335 uprv_memcpy(dup, src, n);
ghb@10550 336 dup[n] = 0;
ghb@10550 337 }
ghb@10550 338 }
ghb@10550 339
ghb@10550 340 return dup;
ghb@10550 341 }