annotate modules/javafx.web/src/main/native/Source/ThirdParty/icu/source/tools/toolutil/uparse.h @ 11038:20a8447c71c6

8207159: Update ICU to version 62.1 Reviewed-by: mbilla, kcr, ghb
author arajkumar
date Fri, 24 Aug 2018 15:06:40 +0530
parents fee4ef5c87df
children
rev   line source
arajkumar@11038 1 // © 2016 and later: Unicode, Inc. and others.
arajkumar@11038 2 // License & terms of use: http://www.unicode.org/copyright.html
ghb@10550 3 /*
ghb@10550 4 *******************************************************************************
ghb@10550 5 *
ghb@10550 6 * Copyright (C) 2000-2010, International Business Machines
ghb@10550 7 * Corporation and others. All Rights Reserved.
ghb@10550 8 *
ghb@10550 9 *******************************************************************************
ghb@10550 10 * file name: uparse.h
arajkumar@11038 11 * encoding: UTF-8
ghb@10550 12 * tab size: 8 (not used)
ghb@10550 13 * indentation:4
ghb@10550 14 *
ghb@10550 15 * created on: 2000apr18
ghb@10550 16 * created by: Markus W. Scherer
ghb@10550 17 *
ghb@10550 18 * This file provides a parser for files that are delimited by one single
ghb@10550 19 * character like ';' or TAB. Example: the Unicode Character Properties files
ghb@10550 20 * like UnicodeData.txt are semicolon-delimited.
ghb@10550 21 */
ghb@10550 22
ghb@10550 23 #ifndef __UPARSE_H__
ghb@10550 24 #define __UPARSE_H__
ghb@10550 25
ghb@10550 26 #include "unicode/utypes.h"
ghb@10550 27
ghb@10550 28 /**
ghb@10550 29 * Is c an invariant-character whitespace?
ghb@10550 30 * @param c invariant character
ghb@10550 31 */
ghb@10550 32 #define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
ghb@10550 33
ghb@10550 34 U_CDECL_BEGIN
ghb@10550 35
ghb@10550 36 /**
ghb@10550 37 * Skip space ' ' and TAB '\t' characters.
ghb@10550 38 *
ghb@10550 39 * @param s Pointer to characters.
ghb@10550 40 * @return Pointer to first character at or after s that is not a space or TAB.
ghb@10550 41 */
ghb@10550 42 U_CAPI const char * U_EXPORT2
ghb@10550 43 u_skipWhitespace(const char *s);
ghb@10550 44
ghb@10550 45 /**
ghb@10550 46 * Trim whitespace (including line endings) from the end of the string.
ghb@10550 47 *
ghb@10550 48 * @param s Pointer to the string.
ghb@10550 49 * @return Pointer to the new end of the string.
ghb@10550 50 */
ghb@10550 51 U_CAPI char * U_EXPORT2
ghb@10550 52 u_rtrim(char *s);
ghb@10550 53
ghb@10550 54 /** Function type for u_parseDelimitedFile(). */
ghb@10550 55 typedef void U_CALLCONV
ghb@10550 56 UParseLineFn(void *context,
ghb@10550 57 char *fields[][2],
ghb@10550 58 int32_t fieldCount,
ghb@10550 59 UErrorCode *pErrorCode);
ghb@10550 60
ghb@10550 61 /**
ghb@10550 62 * Parser for files that are similar to UnicodeData.txt:
ghb@10550 63 * This function opens the file and reads it line by line. It skips empty lines
ghb@10550 64 * and comment lines that start with a '#'.
ghb@10550 65 * All other lines are separated into fields with one delimiter character
ghb@10550 66 * (semicolon for Unicode Properties files) between two fields. The last field in
ghb@10550 67 * a line does not need to be terminated with a delimiter.
ghb@10550 68 *
ghb@10550 69 * For each line, after segmenting it, a line function is called.
ghb@10550 70 * It gets passed the array of field start and limit pointers that is
ghb@10550 71 * passed into this parser and filled by it for each line.
ghb@10550 72 * For each field i of the line, the start pointer in fields[i][0]
ghb@10550 73 * points to the beginning of the field, while the limit pointer in fields[i][1]
ghb@10550 74 * points behind the field, i.e., to the delimiter or the line end.
ghb@10550 75 *
ghb@10550 76 * The context parameter of the line function is
ghb@10550 77 * the same as the one for the parse function.
ghb@10550 78 *
ghb@10550 79 * The line function may modify the contents of the fields including the
ghb@10550 80 * limit characters.
ghb@10550 81 *
ghb@10550 82 * If the file cannot be opened, or there is a parsing error or a field function
ghb@10550 83 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
ghb@10550 84 */
ghb@10550 85 U_CAPI void U_EXPORT2
ghb@10550 86 u_parseDelimitedFile(const char *filename, char delimiter,
ghb@10550 87 char *fields[][2], int32_t fieldCount,
ghb@10550 88 UParseLineFn *lineFn, void *context,
ghb@10550 89 UErrorCode *pErrorCode);
ghb@10550 90
ghb@10550 91 /**
ghb@10550 92 * Parse a string of code points like 0061 0308 0300.
ghb@10550 93 * s must end with either ';' or NUL.
ghb@10550 94 *
ghb@10550 95 * @return Number of code points.
ghb@10550 96 */
ghb@10550 97 U_CAPI int32_t U_EXPORT2
ghb@10550 98 u_parseCodePoints(const char *s,
ghb@10550 99 uint32_t *dest, int32_t destCapacity,
ghb@10550 100 UErrorCode *pErrorCode);
ghb@10550 101
ghb@10550 102 /**
ghb@10550 103 * Parse a list of code points like 0061 0308 0300
ghb@10550 104 * into a UChar * string.
ghb@10550 105 * s must end with either ';' or NUL.
ghb@10550 106 *
ghb@10550 107 * Set the first code point in *pFirst.
ghb@10550 108 *
ghb@10550 109 * @param s Input char * string.
ghb@10550 110 * @param dest Output string buffer.
ghb@10550 111 * @param destCapacity Capacity of dest in numbers of UChars.
ghb@10550 112 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
ghb@10550 113 * code point in the string.
ghb@10550 114 * @param pErrorCode ICU error code.
ghb@10550 115 * @return The length of the string in numbers of UChars.
ghb@10550 116 */
ghb@10550 117 U_CAPI int32_t U_EXPORT2
ghb@10550 118 u_parseString(const char *s,
ghb@10550 119 UChar *dest, int32_t destCapacity,
ghb@10550 120 uint32_t *pFirst,
ghb@10550 121 UErrorCode *pErrorCode);
ghb@10550 122
ghb@10550 123 /**
ghb@10550 124 * Parse a code point range like
ghb@10550 125 * 0085 or
ghb@10550 126 * 4E00..9FA5.
ghb@10550 127 *
ghb@10550 128 * s must contain such a range and end with either ';' or NUL.
ghb@10550 129 *
ghb@10550 130 * @return Length of code point range, end-start+1
ghb@10550 131 */
ghb@10550 132 U_CAPI int32_t U_EXPORT2
ghb@10550 133 u_parseCodePointRange(const char *s,
ghb@10550 134 uint32_t *pStart, uint32_t *pEnd,
ghb@10550 135 UErrorCode *pErrorCode);
ghb@10550 136
ghb@10550 137 /**
ghb@10550 138 * Same as u_parseCodePointRange() but the range may be terminated by
ghb@10550 139 * any character. The position of the terminating character is returned via
ghb@10550 140 * the *terminator output parameter.
ghb@10550 141 */
ghb@10550 142 U_CAPI int32_t U_EXPORT2
ghb@10550 143 u_parseCodePointRangeAnyTerminator(const char *s,
ghb@10550 144 uint32_t *pStart, uint32_t *pEnd,
ghb@10550 145 const char **terminator,
ghb@10550 146 UErrorCode *pErrorCode);
ghb@10550 147
ghb@10550 148 U_CAPI int32_t U_EXPORT2
ghb@10550 149 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
ghb@10550 150
ghb@10550 151 U_CDECL_END
ghb@10550 152
ghb@10550 153 #endif