view modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/tools/toolutil/ppucd.h @ 11038:20a8447c71c6

8207159: Update ICU to version 62.1 Reviewed-by: mbilla, kcr, ghb
author arajkumar
date Fri, 24 Aug 2018 15:06:40 +0530
parents fee4ef5c87df
line wrap: on
line source
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use:
*   Copyright (C) 2011-2013, International Business Machines
*   Corporation and others.  All Rights Reserved.
*   file name:  ppucd.h
*   encoding:   UTF-8
*   tab size:   8 (not used)
*   indentation:4
*   created on: 2011dec11
*   created by: Markus W. Scherer

#ifndef __PPUCD_H__
#define __PPUCD_H__

#include "unicode/utypes.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"

#include <stdio.h>

/** Additions to the uchar.h enum UProperty. */
enum {
    /** Name_Alias */


class U_TOOLUTIL_API PropertyNames {
    virtual ~PropertyNames();
    virtual int32_t getPropertyEnum(const char *name) const;
    virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const;

struct U_TOOLUTIL_API UniProps {

    int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; }

    UChar32 start, end;
    UBool binProps[UCHAR_BINARY_LIMIT];
    int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
    UVersionInfo age;
    UChar32 bmg, bpb;
    UChar32 scf, slc, stc, suc;
    int32_t digitValue;
    const char *numericValue;
    const char *name;
    const char *nameAlias;
    UnicodeString cf, lc, tc, uc;
    UnicodeSet scx;

class U_TOOLUTIL_API PreparsedUCD {
    enum LineType {
        /** No line, end of file. */
        /** Empty line. (Might contain a comment.) */

        /** ucd;6.1.0 */

        /** property;Binary;Alpha;Alphabetic */
        /** binary;N;No;F;False */
        /** value;gc;Zs;Space_Separator */

        /** defaults;0000..10FFFF;age=NA;bc=L;... */
        /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
        /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
        /** unassigned;E01F0..E0FFF;bc=BN;CWKCF;DI;GCB=CN;NFKC_CF= */

        /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */


     * Constructor.
     * Prepare this object for a new, empty package.
    PreparsedUCD(const char *filename, UErrorCode &errorCode);

    /** Destructor. */

    /** Sets (aliases) a non-standard PropertyNames implementation. Caller retains ownership. */
    void setPropertyNames(const PropertyNames *pn) { pnames=pn; }

     * Reads a line from the preparsed UCD file.
     * Splits the line by replacing each ';' with a NUL.
    LineType readLine(UErrorCode &errorCode);

    /** Returns the number of the line read by readLine(). */
    int32_t getLineNumber() const { return lineNumber; }

    /** Returns the line's next field, or NULL. */
    const char *nextField();

    /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
    const UVersionInfo &getUnicodeVersion() const { return ucdVersion; }

    /** Returns TRUE if the current line has property values. */
    UBool lineHasPropertyValues() const {
        return DEFAULTS_LINE<=lineType && lineType<=UNASSIGNED_LINE;

     * Parses properties from the current line.
     * Clears newValues and sets UProperty codes for property values mentioned
     * on the current line (as opposed to being inherited).
     * Returns a pointer to the filled-in UniProps, or NULL if something went wrong.
     * The returned UniProps are usable until the next line of the same type is read.
    const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode);

     * Returns the code point range for the current algnamesrange line.
     * Calls & parses nextField().
     * Further nextField() calls will yield the range's type & prefix string.
     * Returns U_SUCCESS(errorCode).
    UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode);

    UBool isLineBufferAvailable(int32_t i) {
        return defaultLineIndex!=i && blockLineIndex!=i;

    /** Resets the field iterator and returns the line's first field (the line type field). */
    const char *firstField();

    UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
                        UErrorCode &errorCode);
    UChar32 parseCodePoint(const char *s, UErrorCode &errorCode);
    UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode);
    void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode);
    void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode);

    static const int32_t kNumLineBuffers=3;

    PropertyNames *icuPnames;  // owned
    const PropertyNames *pnames;  // aliased
    FILE *file;
    int32_t defaultLineIndex, blockLineIndex, lineIndex;
    int32_t lineNumber;
    LineType lineType;
    char *fieldLimit;
    char *lineLimit;

    UVersionInfo ucdVersion;
    UniProps defaultProps, blockProps, cpProps;
    UnicodeSet blockValues;
    // Multiple lines so that default and block properties can maintain pointers
    // into their line buffers.
    char lines[kNumLineBuffers][4096];


#endif  // __PPUCD_H__