OpenJDK / openjfx / jfx-dev / rt
view modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/common/filterednormalizer2.cpp @ 11038:20a8447c71c6
8207159: Update ICU to version 62.1
Reviewed-by: mbilla, kcr, ghb
author | arajkumar |
---|---|
date | Fri, 24 Aug 2018 15:06:40 +0530 |
parents | fee4ef5c87df |
children |
line wrap: on
line source
// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * * Copyright (C) 2009-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: filterednormalizer2.cpp * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2009dec10 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #if !UCONFIG_NO_NORMALIZATION #include "unicode/edits.h" #include "unicode/normalizer2.h" #include "unicode/stringoptions.h" #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/unorm.h" #include "cpputils.h" U_NAMESPACE_BEGIN FilteredNormalizer2::~FilteredNormalizer2() {} UnicodeString & FilteredNormalizer2::normalize(const UnicodeString &src, UnicodeString &dest, UErrorCode &errorCode) const { uprv_checkCanGetBuffer(src, errorCode); if(U_FAILURE(errorCode)) { dest.setToBogus(); return dest; } if(&dest==&src) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; return dest; } dest.remove(); return normalize(src, dest, USET_SPAN_SIMPLE, errorCode); } // Internal: No argument checking, and appends to dest. // Pass as input spanCondition the one that is likely to yield a non-zero // span length at the start of src. // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2, // USET_SPAN_SIMPLE should be passed in for the start of src // and USET_SPAN_NOT_CONTAINED should be passed in if we continue after // an in-filter prefix. UnicodeString & FilteredNormalizer2::normalize(const UnicodeString &src, UnicodeString &dest, USetSpanCondition spanCondition, UErrorCode &errorCode) const { UnicodeString tempDest; // Don't throw away destination buffer between iterations. for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) { int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition); int32_t spanLength=spanLimit-prevSpanLimit; if(spanCondition==USET_SPAN_NOT_CONTAINED) { if(spanLength!=0) { dest.append(src, prevSpanLimit, spanLength); } spanCondition=USET_SPAN_SIMPLE; } else { if(spanLength!=0) { // Not norm2.normalizeSecondAndAppend() because we do not want // to modify the non-filter part of dest. dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit), tempDest, errorCode)); if(U_FAILURE(errorCode)) { break; } } spanCondition=USET_SPAN_NOT_CONTAINED; } prevSpanLimit=spanLimit; } return dest; } void FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode) const { if (U_FAILURE(errorCode)) { return; } if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { edits->reset(); } options |= U_EDITS_NO_RESET; // Do not reset for each span. normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode); } void FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length, ByteSink &sink, Edits *edits, USetSpanCondition spanCondition, UErrorCode &errorCode) const { while (length > 0) { int32_t spanLength = set.spanUTF8(src, length, spanCondition); if (spanCondition == USET_SPAN_NOT_CONTAINED) { if (spanLength != 0) { if (edits != nullptr) { edits->addUnchanged(spanLength); } if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { sink.Append(src, spanLength); } } spanCondition = USET_SPAN_SIMPLE; } else { if (spanLength != 0) { // Not norm2.normalizeSecondAndAppend() because we do not want // to modify the non-filter part of dest. norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode); if (U_FAILURE(errorCode)) { break; } } spanCondition = USET_SPAN_NOT_CONTAINED; } src += spanLength; length -= spanLength; } } UnicodeString & FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const { return normalizeSecondAndAppend(first, second, TRUE, errorCode); } UnicodeString & FilteredNormalizer2::append(UnicodeString &first, const UnicodeString &second, UErrorCode &errorCode) const { return normalizeSecondAndAppend(first, second, FALSE, errorCode); } UnicodeString & FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, UBool doNormalize, UErrorCode &errorCode) const { uprv_checkCanGetBuffer(first, errorCode); uprv_checkCanGetBuffer(second, errorCode); if(U_FAILURE(errorCode)) { return first; } if(&first==&second) { errorCode=U_ILLEGAL_ARGUMENT_ERROR; return first; } if(first.isEmpty()) { if(doNormalize) { return normalize(second, first, errorCode); } else { return first=second; } } // merge the in-filter suffix of the first string with the in-filter prefix of the second int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE); if(prefixLimit!=0) { UnicodeString prefix(second.tempSubString(0, prefixLimit)); int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE); if(suffixStart==0) { if(doNormalize) { norm2.normalizeSecondAndAppend(first, prefix, errorCode); } else { norm2.append(first, prefix, errorCode); } } else { UnicodeString middle(first, suffixStart, INT32_MAX); if(doNormalize) { norm2.normalizeSecondAndAppend(middle, prefix, errorCode); } else { norm2.append(middle, prefix, errorCode); } first.replace(suffixStart, INT32_MAX, middle); } } if(prefixLimit<second.length()) { UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX)); if(doNormalize) { normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode); } else { first.append(rest); } } return first; } UBool FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const { return set.contains(c) && norm2.getDecomposition(c, decomposition); } UBool FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { return set.contains(c) && norm2.getRawDecomposition(c, decomposition); } UChar32 FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const { return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL; } uint8_t FilteredNormalizer2::getCombiningClass(UChar32 c) const { return set.contains(c) ? norm2.getCombiningClass(c) : 0; } UBool FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { uprv_checkCanGetBuffer(s, errorCode); if(U_FAILURE(errorCode)) { return FALSE; } USetSpanCondition spanCondition=USET_SPAN_SIMPLE; for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); if(spanCondition==USET_SPAN_NOT_CONTAINED) { spanCondition=USET_SPAN_SIMPLE; } else { if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) || U_FAILURE(errorCode) ) { return FALSE; } spanCondition=USET_SPAN_NOT_CONTAINED; } prevSpanLimit=spanLimit; } return TRUE; } UBool FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const { if(U_FAILURE(errorCode)) { return FALSE; } const char *s = sp.data(); int32_t length = sp.length(); USetSpanCondition spanCondition = USET_SPAN_SIMPLE; while (length > 0) { int32_t spanLength = set.spanUTF8(s, length, spanCondition); if (spanCondition == USET_SPAN_NOT_CONTAINED) { spanCondition = USET_SPAN_SIMPLE; } else { if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) || U_FAILURE(errorCode)) { return FALSE; } spanCondition = USET_SPAN_NOT_CONTAINED; } s += spanLength; length -= spanLength; } return TRUE; } UNormalizationCheckResult FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { uprv_checkCanGetBuffer(s, errorCode); if(U_FAILURE(errorCode)) { return UNORM_MAYBE; } UNormalizationCheckResult result=UNORM_YES; USetSpanCondition spanCondition=USET_SPAN_SIMPLE; for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); if(spanCondition==USET_SPAN_NOT_CONTAINED) { spanCondition=USET_SPAN_SIMPLE; } else { UNormalizationCheckResult qcResult= norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode); if(U_FAILURE(errorCode) || qcResult==UNORM_NO) { return qcResult; } else if(qcResult==UNORM_MAYBE) { result=qcResult; } spanCondition=USET_SPAN_NOT_CONTAINED; } prevSpanLimit=spanLimit; } return result; } int32_t FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { uprv_checkCanGetBuffer(s, errorCode); if(U_FAILURE(errorCode)) { return 0; } USetSpanCondition spanCondition=USET_SPAN_SIMPLE; for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); if(spanCondition==USET_SPAN_NOT_CONTAINED) { spanCondition=USET_SPAN_SIMPLE; } else { int32_t yesLimit= prevSpanLimit+ norm2.spanQuickCheckYes( s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode); if(U_FAILURE(errorCode) || yesLimit<spanLimit) { return yesLimit; } spanCondition=USET_SPAN_NOT_CONTAINED; } prevSpanLimit=spanLimit; } return s.length(); } UBool FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const { return !set.contains(c) || norm2.hasBoundaryBefore(c); } UBool FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const { return !set.contains(c) || norm2.hasBoundaryAfter(c); } UBool FilteredNormalizer2::isInert(UChar32 c) const { return !set.contains(c) || norm2.isInert(c); } U_NAMESPACE_END // C API ------------------------------------------------------------------- *** U_NAMESPACE_USE U_CAPI UNormalizer2 * U_EXPORT2 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return NULL; } if(filterSet==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return NULL; } Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2, *UnicodeSet::fromUSet(filterSet)); if(fn2==NULL) { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; } return (UNormalizer2 *)fn2; } #endif // !UCONFIG_NO_NORMALIZATION