FAQ
http://git-wip-us.apache.org/repos/asf/couchdb/blob/81332b78/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h
----------------------------------------------------------------------
diff --git a/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h b/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h
deleted file mode 100644
index 67e17cf..0000000
--- a/apps/couch_collate/platform/osx/icu/unicode/msgfmt.h
+++ /dev/null
@@ -1,940 +0,0 @@
-/*
-* Copyright (C) 2007-2008, International Business Machines Corporation and others. All Rights Reserved.
-********************************************************************************
-*
-* File MSGFMT.H
-*
-* Modification History:
-*
-* Date Name Description
-* 02/19/97 aliu Converted from java.
-* 03/20/97 helena Finished first cut of implementation.
-* 07/22/98 stephen Removed operator!= (defined in Format)
-* 08/19/2002 srl Removing Javaisms
-********************************************************************************
-*/
-
-#ifndef MSGFMT_H
-#define MSGFMT_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: Formats messages in a language-neutral way.
- */
-
-#if !UCONFIG_NO_FORMATTING
-
-#include "unicode/format.h"
-#include "unicode/locid.h"
-#include "unicode/parseerr.h"
-#include "unicode/uchar.h"
-
-U_NAMESPACE_BEGIN
-
-class NumberFormat;
-class DateFormat;
-
-/**
- *
- * A MessageFormat produces concatenated messages in a
- * language-neutral way. It should be used for all string
- * concatenations that are visible to end users.
- * <P>
- * A MessageFormat contains an array of <EM>subformats</EM> arranged
- * within a <EM>template string</EM>. Together, the subformats and
- * template string determine how the MessageFormat will operate during
- * formatting and parsing.
- * <P>
- * Typically, both the subformats and the template string are
- * specified at once in a <EM>pattern</EM>. By using different
- * patterns for different locales, messages may be localized.
- * <P>
- * During formatting, the MessageFormat takes an array of arguments
- * and produces a user-readable string. Each argument is a
- * Formattable object; they may be passed in in an array, or as a
- * single Formattable object which itself contains an array. Each
- * argument is matched up with its corresponding subformat, which then
- * formats it into a string. The resultant strings are then assembled
- * within the string template of the MessageFormat to produce the
- * final output string.
- * <p>
- * <strong>Note:</strong>
- * In ICU 4.0 MessageFormat supports named arguments. If a named argument
- * is used, all arguments must be named. Names start with a character in
- * <code>UCHAR_ID_START</code> and continue with characters in
- * <code>UCHARID_CONTINUE</code>, in particular they do not start with a digit.
- * If named arguments are used, {@link #usesNamedArguments()} will return true.
- * <p>
- * The other new methods supporting named arguments are
- * {@link #getFormatNames(UErrorCode& status)},
- * {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)}
- * {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)},
- * {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)},
- * {@link #format(const Formattable* arguments, const UnicodeString *argumentNames, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection, UErrorCode& success)},
- * {@link #format(const UnicodeString* argumentNames, const Formattable* arguments, int32_t count, UnicodeString& appendTo,UErrorCode& status)}.
- * These methods are all compatible with patterns that do not used named arguments--
- * in these cases the keys in the input or output use <code>UnicodeString</code>s
- * that name the argument indices, e.g. "0", "1", "2"... etc.
- * <p>
- * When named arguments are used, certain methods on MessageFormat that take or
- * return arrays do not perform any action, since it is not possible to
- * identify positions in an array using a name. UErrorCode is set to
- * U_ARGUMENT_TYPE_MISMATCH if there is a status/success field in the method.
- * These methods are
- * {@link #adoptFormats(Format** newFormats, int32_t count)},
- * {@link #setFormats(const Format** newFormats,int32_t count)},
- * {@link #adoptFormat(int32_t n, Format *newFormat)},
- * {@link #getFormats(int32_t& cnt)},
- * {@link #format(const Formattable* source,int32_t cnt,UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
- * {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)},
- * {@link #format(const Formattable& source, UnicodeString& appendTo,FieldPosition& ignore, UErrorCode& success)},
- * {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)},
- * {@link #parse(const UnicodeString& source, ParsePosition& pos,int32_t& count)},
- * {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)}
- * <p>
- *
- * <P>
- * During parsing, an input string is matched against the string
- * template of the MessageFormat to produce an array of Formattable
- * objects. Plain text of the template string is matched directly
- * against intput text. At each position in the template string where
- * a subformat is located, the subformat is called to parse the
- * corresponding segment of input text to produce an output argument.
- * In this way, an array of arguments is created which together
- * constitute the parse result.
- * <P>
- * Parsing may fail or produce unexpected results in a number of
- * circumstances.
- * <UL>
- * <LI>If one of the arguments does not occur in the pattern, it
- * will be returned as a default Formattable.
- * <LI>If the format of an argument is loses information, such as with
- * a choice format where a large number formats to "many", then the
- * parse may not correspond to the originally formatted argument.
- * <LI>MessageFormat does not handle ChoiceFormat recursion during
- * parsing; such parses will fail.
- * <LI>Parsing will not always find a match (or the correct match) if
- * some part of the parse is ambiguous. For example, if the pattern
- * "{1},{2}" is used with the string arguments {"a,b", "c"}, it will
- * format as "a,b,c". When the result is parsed, it will return {"a",
- * "b,c"}.
- * <LI>If a single argument is formatted more than once in the string,
- * then the rightmost subformat in the pattern string will produce the
- * parse result; prior subformats with the same argument index will
- * have no effect.
- * </UL>
- * Here are some examples of usage:
- * <P>
- * Example 1:
- * <pre>
- * \code
- * UErrorCode success = U_ZERO_ERROR;
- * GregorianCalendar cal(success);
- * Formattable arguments[] = {
- * 7L,
- * Formattable( (Date) cal.getTime(success), Formattable::kIsDate),
- * "a disturbance in the Force"
- * };
- *
- * UnicodeString result;
- * MessageFormat::format(
- * "At {1,time} on {1,date}, there was {2} on planet {0,number}.",
- * arguments, 3, result, success );
- *
- * cout << "result: " << result << endl;
- * //<output>: At 4:34:20 PM on 23-Mar-98, there was a disturbance
- * // in the Force on planet 7.
- * \endcode
- * </pre>
- * Typically, the message format will come from resources, and the
- * arguments will be dynamically set at runtime.
- * <P>
- * Example 2:
- * <pre>
- * \code
- * success = U_ZERO_ERROR;
- * Formattable testArgs[] = {3L, "MyDisk"};
- *
- * MessageFormat form(
- * "The disk \"{1}\" contains {0} file(s).", success );
- *
- * UnicodeString string;
- * FieldPosition fpos = 0;
- * cout << "format: " << form.format(testArgs, 2, string, fpos, success ) << endl;
- *
- * // output, with different testArgs:
- * // output: The disk "MyDisk" contains 0 file(s).
- * // output: The disk "MyDisk" contains 1 file(s).
- * // output: The disk "MyDisk" contains 1,273 file(s).
- * \endcode
- * </pre>
- *
- * The pattern is of the following form. Legend:
- * <pre>
- * \code
- * {optional item}
- * (group that may be repeated)*
- * \endcode
- * </pre>
- * Do not confuse optional items with items inside quotes braces, such
- * as this: "{". Quoted braces are literals.
- * <pre>
- * \code
- * messageFormatPattern := string ( "{" messageFormatElement "}" string )*
- *
- * messageFormatElement := argumentIndex | argumentName { "," elementFormat }
- *
- * elementFormat := "time" { "," datetimeStyle }
- * | "date" { "," datetimeStyle }
- * | "number" { "," numberStyle }
- * | "choice" "," choiceStyle
- *
- * datetimeStyle := "short"
- * | "medium"
- * | "long"
- * | "full"
- * | dateFormatPattern
- *
- * numberStyle := "currency"
- * | "percent"
- * | "integer"
- * | numberFormatPattern
- *
- * choiceStyle := choiceFormatPattern
- *
- * pluralStyle := pluralFormatPattern
- * \endcode
- * </pre>
- * If there is no elementFormat, then the argument must be a string,
- * which is substituted. If there is no dateTimeStyle or numberStyle,
- * then the default format is used (e.g. NumberFormat::createInstance(),
- * DateFormat::createTimeInstance(DateFormat::kDefault, ...) or DateFormat::createDateInstance(DateFormat::kDefault, ...). For
- * a ChoiceFormat, the pattern must always be specified, since there
- * is no default.
- * <P>
- * In strings, single quotes can be used to quote syntax characters.
- * A literal single quote is represented by '', both within and outside
- * of single-quoted segments. Inside a
- * messageFormatElement, quotes are <EM>not</EM> removed. For example,
- * {1,number,$'#',##} will produce a number format with the pound-sign
- * quoted, with a result such as: "$#31,45".
- * <P>
- * If a pattern is used, then unquoted braces in the pattern, if any,
- * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
- * {0'}' de" and "ab } de" are not.
- * <p>
- * <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
- * format patterns unfortunately have shown to be somewhat confusing.
- * In particular, it isn't always obvious to localizers whether single
- * quotes need to be doubled or not. Make sure to inform localizers about
- * the rules, and tell them (for example, by using comments in resource
- * bundle source files) which strings will be processed by MessageFormat.
- * Note that localizers may need to use single quotes in translated
- * strings where the original version doesn't have them.
- * <br>Note also that the simplest way to avoid the problem is to
- * use the real apostrophe (single quote) character U+2019 (') for
- * human-readable text, and to use the ASCII apostrophe (U+0027 ' )
- * only in program syntax, like quoting in MessageFormat.
- * See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
- * </dl>
- * <P>
- * The argumentIndex is a non-negative integer, which corresponds to the
- * index of the arguments presented in an array to be formatted. The
- * first argument has argumentIndex 0.
- * <P>
- * It is acceptable to have unused arguments in the array. With missing
- * arguments or arguments that are not of the right class for the
- * specified format, a failing UErrorCode result is set.
- * <P>
- * For more sophisticated patterns, you can use a ChoiceFormat to get
- * output:
- * <pre>
- * \code
- * UErrorCode success = U_ZERO_ERROR;
- * MessageFormat* form("The disk \"{1}\" contains {0}.", success);
- * double filelimits[] = {0,1,2};
- * UnicodeString filepart[] = {"no files","one file","{0,number} files"};
- * ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3);
- * form.setFormat(1, *fileform); // NOT zero, see below
- *
- * Formattable testArgs[] = {1273L, "MyDisk"};
- *
- * UnicodeString string;
- * FieldPosition fpos = 0;
- * cout << form.format(testArgs, 2, string, fpos, success) << endl;
- *
- * // output, with different testArgs
- * // output: The disk "MyDisk" contains no files.
- * // output: The disk "MyDisk" contains one file.
- * // output: The disk "MyDisk" contains 1,273 files.
- * \endcode
- * </pre>
- * You can either do this programmatically, as in the above example,
- * or by using a pattern (see ChoiceFormat for more information) as in:
- * <pre>
- * \code
- * form.applyPattern(
- * "There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.");
- * \endcode
- * </pre>
- * <P>
- * <EM>Note:</EM> As we see above, the string produced by a ChoiceFormat in
- * MessageFormat is treated specially; occurences of '{' are used to
- * indicated subformats, and cause recursion. If you create both a
- * MessageFormat and ChoiceFormat programmatically (instead of using
- * the string patterns), then be careful not to produce a format that
- * recurses on itself, which will cause an infinite loop.
- * <P>
- * <EM>Note:</EM> Subformats are numbered by their order in the pattern.
- * This is <EM>not</EM> the same as the argumentIndex.
- * <pre>
- * \code
- * For example: with "abc{2}def{3}ghi{0}...",
- *
- * format0 affects the first variable {2}
- * format1 affects the second variable {3}
- * format2 affects the second variable {0}
- * \endcode
- * </pre>
- *
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
- */
-class U_I18N_API MessageFormat : public Format {
-public:
- /**
- * Enum type for kMaxFormat.
- * @obsolete ICU 3.0. The 10-argument limit was removed as of ICU 2.6,
- * rendering this enum type obsolete.
- */
- enum EFormatNumber {
- /**
- * The maximum number of arguments.
- * @obsolete ICU 3.0. The 10-argument limit was removed as of ICU 2.6,
- * rendering this constant obsolete.
- */
- kMaxFormat = 10
- };
-
- /**
- * Constructs a new MessageFormat using the given pattern and the
- * default locale.
- *
- * @param pattern Pattern used to construct object.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @stable ICU 2.0
- */
- MessageFormat(const UnicodeString& pattern,
- UErrorCode &status);
-
- /**
- * Constructs a new MessageFormat using the given pattern and locale.
- * @param pattern Pattern used to construct object.
- * @param newLocale The locale to use for formatting dates and numbers.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @stable ICU 2.0
- */
- MessageFormat(const UnicodeString& pattern,
- const Locale& newLocale,
- UErrorCode& status);
- /**
- * Constructs a new MessageFormat using the given pattern and locale.
- * @param pattern Pattern used to construct object.
- * @param newLocale The locale to use for formatting dates and numbers.
- * @param parseError Struct to recieve information on position
- * of error within the pattern.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @stable ICU 2.0
- */
- MessageFormat(const UnicodeString& pattern,
- const Locale& newLocale,
- UParseError& parseError,
- UErrorCode& status);
- /**
- * Constructs a new MessageFormat from an existing one.
- * @stable ICU 2.0
- */
- MessageFormat(const MessageFormat&);
-
- /**
- * Assignment operator.
- * @stable ICU 2.0
- */
- const MessageFormat& operator=(const MessageFormat&);
-
- /**
- * Destructor.
- * @stable ICU 2.0
- */
- virtual ~MessageFormat();
-
- /**
- * Clones this Format object polymorphically. The caller owns the
- * result and should delete it when done.
- * @stable ICU 2.0
- */
- virtual Format* clone(void) const;
-
- /**
- * Returns true if the given Format objects are semantically equal.
- * Objects of different subclasses are considered unequal.
- * @param other the object to be compared with.
- * @return true if the given Format objects are semantically equal.
- * @stable ICU 2.0
- */
- virtual UBool operator==(const Format& other) const;
-
- /**
- * Sets the locale. This locale is used for fetching default number or date
- * format information.
- * @param theLocale the new locale value to be set.
- * @stable ICU 2.0
- */
- virtual void setLocale(const Locale& theLocale);
-
- /**
- * Gets the locale. This locale is used for fetching default number or date
- * format information.
- * @return the locale of the object.
- * @stable ICU 2.0
- */
- virtual const Locale& getLocale(void) const;
-
- /**
- * Applies the given pattern string to this message format.
- *
- * @param pattern The pattern to be applied.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @stable ICU 2.0
- */
- virtual void applyPattern(const UnicodeString& pattern,
- UErrorCode& status);
- /**
- * Applies the given pattern string to this message format.
- *
- * @param pattern The pattern to be applied.
- * @param parseError Struct to recieve information on position
- * of error within pattern.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @stable ICU 2.0
- */
- virtual void applyPattern(const UnicodeString& pattern,
- UParseError& parseError,
- UErrorCode& status);
-
- /**
- * Returns a pattern that can be used to recreate this object.
- *
- * @param appendTo Output parameter to receive the pattern.
- * Result is appended to existing contents.
- * @return Reference to 'appendTo' parameter.
- * @stable ICU 2.0
- */
- virtual UnicodeString& toPattern(UnicodeString& appendTo) const;
-
- /**
- * Sets subformats.
- * See the class description about format numbering.
- * The caller should not delete the Format objects after this call.
- * <EM>The array formatsToAdopt is not itself adopted.</EM> Its
- * ownership is retained by the caller. If the call fails because
- * memory cannot be allocated, then the formats will be deleted
- * by this method, and this object will remain unchanged.
- *
- * @stable ICU 2.0
- * @param formatsToAdopt the format to be adopted.
- * @param count the size of the array.
- */
- virtual void adoptFormats(Format** formatsToAdopt, int32_t count);
-
- /**
- * Sets subformats.
- * See the class description about format numbering.
- * Each item in the array is cloned into the internal array.
- * If the call fails because memory cannot be allocated, then this
- * object will remain unchanged.
- *
- * @stable ICU 2.0
- * @param newFormats the new format to be set.
- * @param cnt the size of the array.
- */
- virtual void setFormats(const Format** newFormats, int32_t cnt);
-
-
- /**
- * Sets one subformat.
- * See the class description about format numbering.
- * The caller should not delete the Format object after this call.
- * If the number is over the number of formats already set,
- * the item will be deleted and ignored.
- * @stable ICU 2.0
- * @param formatNumber index of the subformat.
- * @param formatToAdopt the format to be adopted.
- */
- virtual void adoptFormat(int32_t formatNumber, Format* formatToAdopt);
-
- /**
- * Sets one subformat.
- * See the class description about format numbering.
- * If the number is over the number of formats already set,
- * the item will be ignored.
- * @param formatNumber index of the subformat.
- * @param format the format to be set.
- * @stable ICU 2.0
- */
- virtual void setFormat(int32_t formatNumber, const Format& format);
-
- /**
- * Gets format names. This function returns formatNames in StringEnumerations
- * which can be used with getFormat() and setFormat() to export formattable
- * array from current MessageFormat to another. It is caller's resposibility
- * to delete the returned formatNames.
- * @param status output param set to success/failure code.
- * @draft ICU 4.0
- */
- virtual StringEnumeration* getFormatNames(UErrorCode& status);
-
- /**
- * Gets subformat pointer for given format name.
- * This function supports both named and numbered
- * arguments-- if numbered, the formatName is the
- * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
- * The returned Format object should not be deleted by the caller,
- * nor should the ponter of other object . The pointer and its
- * contents remain valid only until the next call to any method
- * of this class is made with this object.
- * @param formatName the name or number specifying a format
- * @param status output param set to success/failure code.
- * @draft ICU 4.0
- */
- virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status);
-
- /**
- * Sets one subformat for given format name.
- * See the class description about format name.
- * This function supports both named and numbered
- * arguments-- if numbered, the formatName is the
- * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
- * If there is no matched formatName or wrong type,
- * the item will be ignored.
- * @param formatName Name of the subformat.
- * @param format the format to be set.
- * @param status output param set to success/failure code.
- * @draft ICU 4.0
- */
- virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status);
-
- /**
- * Sets one subformat for given format name.
- * See the class description about format name.
- * This function supports both named and numbered
- * arguments-- if numbered, the formatName is the
- * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
- * If there is no matched formatName or wrong type,
- * the item will be ignored.
- * The caller should not delete the Format object after this call.
- * @param formatName Name of the subformat.
- * @param formatToAdopt Format to be adopted.
- * @param status output param set to success/failure code.
- * @draft ICU 4.0
- */
- virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status);
-
-
- /**
- * Gets an array of subformats of this object. The returned array
- * should not be deleted by the caller, nor should the pointers
- * within the array. The array and its contents remain valid only
- * until the next call to any method of this class is made with
- * this object. See the class description about format numbering.
- * @param count output parameter to receive the size of the array
- * @return an array of count Format* objects, or NULL if out of
- * memory. Any or all of the array elements may be NULL.
- * @stable ICU 2.0
- */
- virtual const Format** getFormats(int32_t& count) const;
-
- /**
- * Formats the given array of arguments into a user-readable string.
- * Does not take ownership of the Formattable* array or its contents.
- *
- * @param source An array of objects to be formatted.
- * @param count The number of elements of 'source'.
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param ignore Not used; inherited from base class API.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @return Reference to 'appendTo' parameter.
- * @stable ICU 2.0
- */
- UnicodeString& format( const Formattable* source,
- int32_t count,
- UnicodeString& appendTo,
- FieldPosition& ignore,
- UErrorCode& status) const;
-
- /**
- * Formats the given array of arguments into a user-readable string
- * using the given pattern.
- *
- * @param pattern The pattern.
- * @param arguments An array of objects to be formatted.
- * @param count The number of elements of 'source'.
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @return Reference to 'appendTo' parameter.
- * @stable ICU 2.0
- */
- static UnicodeString& format(const UnicodeString& pattern,
- const Formattable* arguments,
- int32_t count,
- UnicodeString& appendTo,
- UErrorCode& status);
-
- /**
- * Formats the given array of arguments into a user-readable
- * string. The array must be stored within a single Formattable
- * object of type kArray. If the Formattable object type is not of
- * type kArray, then returns a failing UErrorCode.
- *
- * @param obj A Formattable of type kArray containing
- * arguments to be formatted.
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param pos On input: an alignment field, if desired.
- * On output: the offsets of the alignment field.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @return Reference to 'appendTo' parameter.
- * @stable ICU 2.0
- */
- virtual UnicodeString& format(const Formattable& obj,
- UnicodeString& appendTo,
- FieldPosition& pos,
- UErrorCode& status) const;
-
- /**
- * Formats the given array of arguments into a user-readable
- * string. The array must be stored within a single Formattable
- * object of type kArray. If the Formattable object type is not of
- * type kArray, then returns a failing UErrorCode.
- *
- * @param obj The object to format
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @return Reference to 'appendTo' parameter.
- * @stable ICU 2.0
- */
- UnicodeString& format(const Formattable& obj,
- UnicodeString& appendTo,
- UErrorCode& status) const;
-
-
- /**
- * Formats the given array of arguments into a user-defined argument name
- * array. This function supports both named and numbered
- * arguments-- if numbered, the formatName is the
- * corresponding UnicodeStrings (e.g. "0", "1", "2"...).
- *
- * @param argumentNames argument name array
- * @param arguments An array of objects to be formatted.
- * @param count The number of elements of 'argumentNames' and
- * arguments. The number of argumentNames and arguments
- * must be the same.
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @return Reference to 'appendTo' parameter.
- * @stable ICU 4.0
- */
- UnicodeString& format(const UnicodeString* argumentNames,
- const Formattable* arguments,
- int32_t count,
- UnicodeString& appendTo,
- UErrorCode& status) const;
- /**
- * Parses the given string into an array of output arguments.
- *
- * @param source String to be parsed.
- * @param pos On input, starting position for parse. On output,
- * final position after parse. Unchanged if parse
- * fails.
- * @param count Output parameter to receive the number of arguments
- * parsed.
- * @return an array of parsed arguments. The caller owns both
- * the array and its contents.
- * @stable ICU 2.0
- */
- virtual Formattable* parse( const UnicodeString& source,
- ParsePosition& pos,
- int32_t& count) const;
-
- /**
- * Parses the given string into an array of output arguments.
- *
- * @param source String to be parsed.
- * @param count Output param to receive size of returned array.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * If the MessageFormat is named argument, the status is
- * set to U_ARGUMENT_TYPE_MISMATCH.
- * @return an array of parsed arguments. The caller owns both
- * the array and its contents. Return NULL if status is not U_ZERO_ERROR.
- *
- * @stable ICU 2.0
- */
- virtual Formattable* parse( const UnicodeString& source,
- int32_t& count,
- UErrorCode& status) const;
-
- /**
- * Parses the given string into an array of output arguments
- * stored within a single Formattable of type kArray.
- *
- * @param source The string to be parsed into an object.
- * @param result Formattable to be set to the parse result.
- * If parse fails, return contents are undefined.
- * @param pos On input, starting position for parse. On output,
- * final position after parse. Unchanged if parse
- * fails.
- * @stable ICU 2.0
- */
- virtual void parseObject(const UnicodeString& source,
- Formattable& result,
- ParsePosition& pos) const;
-
- /**
- * Convert an 'apostrophe-friendly' pattern into a standard
- * pattern. Standard patterns treat all apostrophes as
- * quotes, which is problematic in some languages, e.g.
- * French, where apostrophe is commonly used. This utility
- * assumes that only an unpaired apostrophe immediately before
- * a brace is a true quote. Other unpaired apostrophes are paired,
- * and the resulting standard pattern string is returned.
- *
- * <p><b>Note</b> it is not guaranteed that the returned pattern
- * is indeed a valid pattern. The only effect is to convert
- * between patterns having different quoting semantics.
- *
- * @param pattern the 'apostrophe-friendly' patttern to convert
- * @param status Input/output error code. If the pattern
- * cannot be parsed, the failure code is set.
- * @return the standard equivalent of the original pattern
- * @stable ICU 3.4
- */
- static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
- UErrorCode& status);
-
- /**
- * Returns true if this MessageFormat uses named arguments,
- * and false otherwise. See class description.
- *
- * @return true if named arguments are used.
- * @draft ICU 4.0
- */
- UBool usesNamedArguments() const;
-
- /**
- * Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
- * This method is to implement a simple version of RTTI, since not all
- * C++ compilers support genuine RTTI. Polymorphic operator==() and
- * clone() methods call this method.
- *
- * @return The class ID for this object. All objects of a
- * given class have the same class ID. Objects of
- * other classes have different class IDs.
- * @stable ICU 2.0
- */
- virtual UClassID getDynamicClassID(void) const;
-
- /**
- * Return the class ID for this class. This is useful only for
- * comparing to a return value from getDynamicClassID(). For example:
- * <pre>
- * . Base* polymorphic_pointer = createPolymorphicObject();
- * . if (polymorphic_pointer->getDynamicClassID() ==
- * . Derived::getStaticClassID()) ...
- * </pre>
- * @return The class ID for all objects of this class.
- * @stable ICU 2.0
- */
- static UClassID U_EXPORT2 getStaticClassID(void);
-
-private:
-
- Locale fLocale;
- UnicodeString fPattern;
- Format** formatAliases; // see getFormats
- int32_t formatAliasesCapacity;
- UProperty idStart;
- UProperty idContinue;
-
- MessageFormat(); // default constructor not implemented
-
- /*
- * A structure representing one subformat of this MessageFormat.
- * Each subformat has a Format object, an offset into the plain
- * pattern text fPattern, and an argument number. The argument
- * number corresponds to the array of arguments to be formatted.
- * @internal
- */
- class Subformat;
-
- /**
- * A MessageFormat contains an array of subformats. This array
- * needs to grow dynamically if the MessageFormat is modified.
- */
- Subformat* subformats;
- int32_t subformatCount;
- int32_t subformatCapacity;
-
- /**
- * A MessageFormat formats an array of arguments. Each argument
- * has an expected type, based on the pattern. For example, if
- * the pattern contains the subformat "{3,number,integer}", then
- * we expect argument 3 to have type Formattable::kLong. This
- * array needs to grow dynamically if the MessageFormat is
- * modified.
- */
- Formattable::Type* argTypes;
- int32_t argTypeCount;
- int32_t argTypeCapacity;
-
- /**
- * Is true iff all argument names are non-negative numbers.
- *
- */
- UBool isArgNumeric;
-
- // Variable-size array management
- UBool allocateSubformats(int32_t capacity);
- UBool allocateArgTypes(int32_t capacity);
-
- /**
- * Default Format objects used when no format is specified and a
- * numeric or date argument is formatted. These are volatile
- * cache objects maintained only for performance. They do not
- * participate in operator=(), copy constructor(), nor
- * operator==().
- */
- NumberFormat* defaultNumberFormat;
- DateFormat* defaultDateFormat;
-
- /**
- * Method to retrieve default formats (or NULL on failure).
- * These are semantically const, but may modify *this.
- */
- const NumberFormat* getDefaultNumberFormat(UErrorCode&) const;
- const DateFormat* getDefaultDateFormat(UErrorCode&) const;
-
- /**
- * Finds the word s, in the keyword list and returns the located index.
- * @param s the keyword to be searched for.
- * @param list the list of keywords to be searched with.
- * @return the index of the list which matches the keyword s.
- */
- static int32_t findKeyword( const UnicodeString& s,
- const UChar * const *list);
-
- /**
- * Formats the array of arguments and copies the result into the
- * result buffer, updates the field position.
- *
- * @param arguments The formattable objects array.
- * @param cnt The array count.
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param status Field position status.
- * @param recursionProtection
- * Initially zero. Bits 0..9 are used to indicate
- * that a parameter has already been seen, to
- * avoid recursion. Currently unused.
- * @param success The error code status.
- * @return Reference to 'appendTo' parameter.
- */
- UnicodeString& format( const Formattable* arguments,
- int32_t cnt,
- UnicodeString& appendTo,
- FieldPosition& status,
- int32_t recursionProtection,
- UErrorCode& success) const;
-
- UnicodeString& format( const Formattable* arguments,
- const UnicodeString *argumentNames,
- int32_t cnt,
- UnicodeString& appendTo,
- FieldPosition& status,
- int32_t recursionProtection,
- UErrorCode& success) const;
-
- void makeFormat(int32_t offsetNumber,
- UnicodeString* segments,
- UParseError& parseError,
- UErrorCode& success);
-
- /**
- * Convenience method that ought to be in NumberFormat
- */
- NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
-
- /**
- * Checks the range of the source text to quote the special
- * characters, { and ' and copy to target buffer.
- * @param source
- * @param start the text offset to start the process of in the source string
- * @param end the text offset to end the process of in the source string
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- */
- static void copyAndFixQuotes(const UnicodeString& appendTo, int32_t start, int32_t end, UnicodeString& target);
-
- /**
- * Returns array of argument types in the parsed pattern
- * for use in C API. Only for the use of umsg_vformat(). Not
- * for public consumption.
- * @param listCount Output parameter to receive the size of array
- * @return The array of formattable types in the pattern
- * @internal
- */
- const Formattable::Type* getArgTypeList(int32_t& listCount) const {
- listCount = argTypeCount;
- return argTypes;
- }
-
- /**
- * Returns FALSE if the argument name is not legal.
- * @param argName argument name.
- * @return TRUE if the argument name is legal, otherwise return FALSE.
- */
- UBool isLegalArgName(const UnicodeString& argName) const;
-
- friend class MessageFormatAdapter; // getFormatTypeList() access
-};
-
-inline UnicodeString&
-MessageFormat::format(const Formattable& obj,
- UnicodeString& appendTo,
- UErrorCode& status) const {
- return Format::format(obj, appendTo, status);
-}
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_FORMATTING */
-
-#endif // _MSGFMT
-//eof
-

http://git-wip-us.apache.org/repos/asf/couchdb/blob/81332b78/apps/couch_collate/platform/osx/icu/unicode/normlzr.h
----------------------------------------------------------------------
diff --git a/apps/couch_collate/platform/osx/icu/unicode/normlzr.h b/apps/couch_collate/platform/osx/icu/unicode/normlzr.h
deleted file mode 100644
index 7974f1a..0000000
--- a/apps/couch_collate/platform/osx/icu/unicode/normlzr.h
+++ /dev/null
@@ -1,823 +0,0 @@
-/*
- ********************************************************************
- * COPYRIGHT:
- * Copyright (c) 1996-2006, International Business Machines Corporation and
- * others. All Rights Reserved.
- ********************************************************************
- */
-
-#ifndef NORMLZR_H
-#define NORMLZR_H
-
-#include "unicode/utypes.h"
-
-/**
- * \file
- * \brief C++ API: Unicode Normalization
- */
-
-#if !UCONFIG_NO_NORMALIZATION
-
-#include "unicode/uobject.h"
-#include "unicode/unistr.h"
-#include "unicode/chariter.h"
-#include "unicode/unorm.h"
-
-
-struct UCharIterator;
-typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
-
-U_NAMESPACE_BEGIN
-/**
- * The Normalizer class supports the standard normalization forms described in
- * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
- * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
- *
- * The Normalizer class consists of two parts:
- * - static functions that normalize strings or test if strings are normalized
- * - a Normalizer object is an iterator that takes any kind of text and
- * provides iteration over its normalized form
- *
- * The Normalizer class is not suitable for subclassing.
- *
- * The static functions are basically wrappers around the C implementation,
- * using UnicodeString instead of UChar*.
- * For basic information about normalization forms and details about the C API
- * please see the documentation in unorm.h.
- *
- * The iterator API with the Normalizer constructors and the non-static functions
- * uses a CharacterIterator as input. It is possible to pass a string which
- * is then internally wrapped in a CharacterIterator.
- * The input text is not normalized all at once, but incrementally where needed
- * (providing efficient random access).
- * This allows to pass in a large text but spend only a small amount of time
- * normalizing a small part of that text.
- * However, if the entire text is normalized, then the iterator will be
- * slower than normalizing the entire text at once and iterating over the result.
- * A possible use of the Normalizer iterator is also to report an index into the
- * original text that is close to where the normalized characters come from.
- *
- * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
- * The earlier implementation reported the getIndex() inconsistently,
- * and previous() could not be used after setIndex(), next(), first(), and current().
- *
- * Normalizer allows to start normalizing from anywhere in the input text by
- * calling setIndexOnly(), first(), or last().
- * Without calling any of these, the iterator will start at the beginning of the text.
- *
- * At any time, next() returns the next normalized code point (UChar32),
- * with post-increment semantics (like CharacterIterator::next32PostInc()).
- * previous() returns the previous normalized code point (UChar32),
- * with pre-decrement semantics (like CharacterIterator::previous32()).
- *
- * current() returns the current code point
- * (respectively the one at the newly set index) without moving
- * the getIndex(). Note that if the text at the current position
- * needs to be normalized, then these functions will do that.
- * (This is why current() is not const.)
- * It is more efficient to call setIndexOnly() instead, which does not
- * normalize.
- *
- * getIndex() always refers to the position in the input text where the normalized
- * code points are returned from. It does not always change with each returned
- * code point.
- * The code point that is returned from any of the functions
- * corresponds to text at or after getIndex(), according to the
- * function's iteration semantics (post-increment or pre-decrement).
- *
- * next() returns a code point from at or after the getIndex()
- * from before the next() call. After the next() call, the getIndex()
- * might have moved to where the next code point will be returned from
- * (from a next() or current() call).
- * This is semantically equivalent to array access with array[index++]
- * (post-increment semantics).
- *
- * previous() returns a code point from at or after the getIndex()
- * from after the previous() call.
- * This is semantically equivalent to array access with array[--index]
- * (pre-decrement semantics).
- *
- * Internally, the Normalizer iterator normalizes a small piece of text
- * starting at the getIndex() and ending at a following "safe" index.
- * The normalized results is stored in an internal string buffer, and
- * the code points are iterated from there.
- * With multiple iteration calls, this is repeated until the next piece
- * of text needs to be normalized, and the getIndex() needs to be moved.
- *
- * The following "safe" index, the internal buffer, and the secondary
- * iteration index into that buffer are not exposed on the API.
- * This also means that it is currently not practical to return to
- * a particular, arbitrary position in the text because one would need to
- * know, and be able to set, in addition to the getIndex(), at least also the
- * current index into the internal buffer.
- * It is currently only possible to observe when getIndex() changes
- * (with careful consideration of the iteration semantics),
- * at which time the internal index will be 0.
- * For example, if getIndex() is different after next() than before it,
- * then the internal index is 0 and one can return to this getIndex()
- * later with setIndexOnly().
- *
- * @author Laura Werner, Mark Davis, Markus Scherer
- * @stable ICU 2.0
- */
-class U_COMMON_API Normalizer : public UObject {
-public:
- /**
- * If DONE is returned from an iteration function that returns a code point,
- * then there are no more normalization results available.
- * @stable ICU 2.0
- */
- enum {
- DONE=0xffff
- };
-
- // Constructors
-
- /**
- * Creates a new <code>Normalizer</code> object for iterating over the
- * normalized form of a given string.
- * <p>
- * @param str The string to be normalized. The normalization
- * will start at the beginning of the string.
- *
- * @param mode The normalization mode.
- * @stable ICU 2.0
- */
- Normalizer(const UnicodeString& str, UNormalizationMode mode);
-
- /**
- * Creates a new <code>Normalizer</code> object for iterating over the
- * normalized form of a given string.
- * <p>
- * @param str The string to be normalized. The normalization
- * will start at the beginning of the string.
- *
- * @param length Length of the string, or -1 if NUL-terminated.
- * @param mode The normalization mode.
- * @stable ICU 2.0
- */
- Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
-
- /**
- * Creates a new <code>Normalizer</code> object for iterating over the
- * normalized form of the given text.
- * <p>
- * @param iter The input text to be normalized. The normalization
- * will start at the beginning of the string.
- *
- * @param mode The normalization mode.
- * @stable ICU 2.0
- */
- Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
-
- /**
- * Copy constructor.
- * @param copy The object to be copied.
- * @stable ICU 2.0
- */
- Normalizer(const Normalizer& copy);
-
- /**
- * Destructor
- * @stable ICU 2.0
- */
- virtual ~Normalizer();
-
-
- //-------------------------------------------------------------------------
- // Static utility methods
- //-------------------------------------------------------------------------
-
- /**
- * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
- * This is a wrapper for unorm_normalize(), using UnicodeString's.
- *
- * The <code>options</code> parameter specifies which optional
- * <code>Normalizer</code> features are to be enabled for this operation.
- *
- * @param source the input string to be normalized.
- * @param mode the normalization mode
- * @param options the optional features to be enabled (0 for no options)
- * @param result The normalized string (on output).
- * @param status The error code.
- * @stable ICU 2.0
- */
- static void U_EXPORT2 normalize(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UnicodeString& result,
- UErrorCode &status);
-
- /**
- * Compose a <code>UnicodeString</code>.
- * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
- * This is a wrapper for unorm_normalize(), using UnicodeString's.
- *
- * The <code>options</code> parameter specifies which optional
- * <code>Normalizer</code> features are to be enabled for this operation.
- *
- * @param source the string to be composed.
- * @param compat Perform compatibility decomposition before composition.
- * If this argument is <code>FALSE</code>, only canonical
- * decomposition will be performed.
- * @param options the optional features to be enabled (0 for no options)
- * @param result The composed string (on output).
- * @param status The error code.
- * @stable ICU 2.0
- */
- static void U_EXPORT2 compose(const UnicodeString& source,
- UBool compat, int32_t options,
- UnicodeString& result,
- UErrorCode &status);
-
- /**
- * Static method to decompose a <code>UnicodeString</code>.
- * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
- * This is a wrapper for unorm_normalize(), using UnicodeString's.
- *
- * The <code>options</code> parameter specifies which optional
- * <code>Normalizer</code> features are to be enabled for this operation.
- *
- * @param source the string to be decomposed.
- * @param compat Perform compatibility decomposition.
- * If this argument is <code>FALSE</code>, only canonical
- * decomposition will be performed.
- * @param options the optional features to be enabled (0 for no options)
- * @param result The decomposed string (on output).
- * @param status The error code.
- * @stable ICU 2.0
- */
- static void U_EXPORT2 decompose(const UnicodeString& source,
- UBool compat, int32_t options,
- UnicodeString& result,
- UErrorCode &status);
-
- /**
- * Performing quick check on a string, to quickly determine if the string is
- * in a particular normalization format.
- * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
- *
- * Three types of result can be returned UNORM_YES, UNORM_NO or
- * UNORM_MAYBE. Result UNORM_YES indicates that the argument
- * string is in the desired normalized format, UNORM_NO determines that
- * argument string is not in the desired normalized format. A
- * UNORM_MAYBE result indicates that a more thorough check is required,
- * the user may have to put the string in its normalized form and compare the
- * results.
- * @param source string for determining if it is in a normalized format
- * @param mode normalization format
- * @param status A reference to a UErrorCode to receive any errors
- * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
- *
- * @see isNormalized
- * @stable ICU 2.0
- */
- static inline UNormalizationCheckResult
- quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
-
- /**
- * Performing quick check on a string; same as the other version of quickCheck
- * but takes an extra options parameter like most normalization functions.
- *
- * @param source string for determining if it is in a normalized format
- * @param mode normalization format
- * @param options the optional features to be enabled (0 for no options)
- * @param status A reference to a UErrorCode to receive any errors
- * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
- *
- * @see isNormalized
- * @stable ICU 2.6
- */
- static inline UNormalizationCheckResult
- quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
-
- /**
- * Test if a string is in a given normalization form.
- * This is semantically equivalent to source.equals(normalize(source, mode)) .
- *
- * Unlike unorm_quickCheck(), this function returns a definitive result,
- * never a "maybe".
- * For NFD, NFKD, and FCD, both functions work exactly the same.
- * For NFC and NFKC where quickCheck may return "maybe", this function will
- * perform further tests to arrive at a TRUE/FALSE result.
- *
- * @param src String that is to be tested if it is in a normalization format.
- * @param mode Which normalization form to test for.
- * @param errorCode ICU error code in/out parameter.
- * Must fulfill U_SUCCESS before the function call.
- * @return Boolean value indicating whether the source string is in the
- * "mode" normalization form.
- *
- * @see quickCheck
- * @stable ICU 2.2
- */
- static inline UBool
- isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
-
- /**
- * Test if a string is in a given normalization form; same as the other version of isNormalized
- * but takes an extra options parameter like most normalization functions.
- *
- * @param src String that is to be tested if it is in a normalization format.
- * @param mode Which normalization form to test for.
- * @param options the optional features to be enabled (0 for no options)
- * @param errorCode ICU error code in/out parameter.
- * Must fulfill U_SUCCESS before the function call.
- * @return Boolean value indicating whether the source string is in the
- * "mode" normalization form.
- *
- * @see quickCheck
- * @stable ICU 2.6
- */
- static inline UBool
- isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
-
- /**
- * Concatenate normalized strings, making sure that the result is normalized as well.
- *
- * If both the left and the right strings are in
- * the normalization form according to "mode/options",
- * then the result will be
- *
- * \code
- * dest=normalize(left+right, mode, options)
- * \endcode
- *
- * For details see unorm_concatenate in unorm.h.
- *
- * @param left Left source string.
- * @param right Right source string.
- * @param result The output string.
- * @param mode The normalization mode.
- * @param options A bit set of normalization options.
- * @param errorCode ICU error code in/out parameter.
- * Must fulfill U_SUCCESS before the function call.
- * @return result
- *
- * @see unorm_concatenate
- * @see normalize
- * @see unorm_next
- * @see unorm_previous
- *
- * @stable ICU 2.1
- */
- static UnicodeString &
- U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
- UnicodeString &result,
- UNormalizationMode mode, int32_t options,
- UErrorCode &errorCode);
-
- /**
- * Compare two strings for canonical equivalence.
- * Further options include case-insensitive comparison and
- * code point order (as opposed to code unit order).
- *
- * Canonical equivalence between two strings is defined as their normalized
- * forms (NFD or NFC) being identical.
- * This function compares strings incrementally instead of normalizing
- * (and optionally case-folding) both strings entirely,
- * improving performance significantly.
- *
- * Bulk normalization is only necessary if the strings do not fulfill the FCD
- * conditions. Only in this case, and only if the strings are relatively long,
- * is memory allocated temporarily.
- * For FCD strings and short non-FCD strings there is no memory allocation.
- *
- * Semantically, this is equivalent to
- * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
- * where code point order and foldCase are all optional.
- *
- * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
- * the case folding must be performed first, then the normalization.
- *
- * @param s1 First source string.
- * @param s2 Second source string.
- *
- * @param options A bit set of options:
- * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- * Case-sensitive comparison in code unit order, and the input strings
- * are quick-checked for FCD.
- *
- * - UNORM_INPUT_IS_FCD
- * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
- * If not set, the function will quickCheck for FCD
- * and normalize if necessary.
- *
- * - U_COMPARE_CODE_POINT_ORDER
- * Set to choose code point order instead of code unit order
- * (see u_strCompare for details).
- *
- * - U_COMPARE_IGNORE_CASE
- * Set to compare strings case-insensitively using case folding,
- * instead of case-sensitively.
- * If set, then the following case folding options are used.
- *
- * - Options as used with case-insensitive comparisons, currently:
- *
- * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- * (see u_strCaseCompare for details)
- *
- * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
- *
- * @param errorCode ICU error code in/out parameter.
- * Must fulfill U_SUCCESS before the function call.
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @see unorm_compare
- * @see normalize
- * @see UNORM_FCD
- * @see u_strCompare
- * @see u_strCaseCompare
- *
- * @stable ICU 2.2
- */
- static inline int32_t
- compare(const UnicodeString &s1, const UnicodeString &s2,
- uint32_t options,
- UErrorCode &errorCode);
-
- //-------------------------------------------------------------------------
- // Iteration API
- //-------------------------------------------------------------------------
-
- /**
- * Return the current character in the normalized text.
- * current() may need to normalize some text at getIndex().
- * The getIndex() is not changed.
- *
- * @return the current normalized code point
- * @stable ICU 2.0
- */
- UChar32 current(void);
-
- /**
- * Return the first character in the normalized text.
- * This is equivalent to setIndexOnly(startIndex()) followed by next().
- * (Post-increment semantics.)
- *
- * @return the first normalized code point
- * @stable ICU 2.0
- */
- UChar32 first(void);
-
- /**
- * Return the last character in the normalized text.
- * This is equivalent to setIndexOnly(endIndex()) followed by previous().
- * (Pre-decrement semantics.)
- *
- * @return the last normalized code point
- * @stable ICU 2.0
- */
- UChar32 last(void);
-
- /**
- * Return the next character in the normalized text.
- * (Post-increment semantics.)
- * If the end of the text has already been reached, DONE is returned.
- * The DONE value could be confused with a U+FFFF non-character code point
- * in the text. If this is possible, you can test getIndex()<endIndex()
- * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
- * after calling next(). (Calling last() will change the iterator state!)
- *
- * The C API unorm_next() is more efficient and does not have this ambiguity.
- *
- * @return the next normalized code point
- * @stable ICU 2.0
- */
- UChar32 next(void);
-
- /**
- * Return the previous character in the normalized text and decrement.
- * (Pre-decrement semantics.)
- * If the beginning of the text has already been reached, DONE is returned.
- * The DONE value could be confused with a U+FFFF non-character code point
- * in the text. If this is possible, you can test
- * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
- * the iterator state!)
- *
- * The C API unorm_previous() is more efficient and does not have this ambiguity.
- *
- * @return the previous normalized code point
- * @stable ICU 2.0
- */
- UChar32 previous(void);
-
- /**
- * Set the iteration position in the input text that is being normalized,
- * without any immediate normalization.
- * After setIndexOnly(), getIndex() will return the same index that is
- * specified here.
- *
- * @param index the desired index in the input text.
- * @stable ICU 2.0
- */
- void setIndexOnly(int32_t index);
-
- /**
- * Reset the index to the beginning of the text.
- * This is equivalent to setIndexOnly(startIndex)).
- * @stable ICU 2.0
- */
- void reset(void);
-
- /**
- * Retrieve the current iteration position in the input text that is
- * being normalized.
- *
- * A following call to next() will return a normalized code point from
- * the input text at or after this index.
- *
- * After a call to previous(), getIndex() will point at or before the
- * position in the input text where the normalized code point
- * was returned from with previous().
- *
- * @return the current index in the input text
- * @stable ICU 2.0
- */
- int32_t getIndex(void) const;
-
- /**
- * Retrieve the index of the start of the input text. This is the begin index
- * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
- * over which this <code>Normalizer</code> is iterating.
- *
- * @return the smallest index in the input text where the Normalizer operates
- * @stable ICU 2.0
- */
- int32_t startIndex(void) const;
-
- /**
- * Retrieve the index of the end of the input text. This is the end index
- * of the <code>CharacterIterator</code> or the length of the string
- * over which this <code>Normalizer</code> is iterating.
- * This end index is exclusive, i.e., the Normalizer operates only on characters
- * before this index.
- *
- * @return the first index in the input text where the Normalizer does not operate
- * @stable ICU 2.0
- */
- int32_t endIndex(void) const;
-
- /**
- * Returns TRUE when both iterators refer to the same character in the same
- * input text.
- *
- * @param that a Normalizer object to compare this one to
- * @return comparison result
- * @stable ICU 2.0
- */
- UBool operator==(const Normalizer& that) const;
-
- /**
- * Returns FALSE when both iterators refer to the same character in the same
- * input text.
- *
- * @param that a Normalizer object to compare this one to
- * @return comparison result
- * @stable ICU 2.0
- */
- inline UBool operator!=(const Normalizer& that) const;
-
- /**
- * Returns a pointer to a new Normalizer that is a clone of this one.
- * The caller is responsible for deleting the new clone.
- * @return a pointer to a new Normalizer
- * @stable ICU 2.0
- */
- Normalizer* clone(void) const;
-
- /**
- * Generates a hash code for this iterator.
- *
- * @return the hash code
- * @stable ICU 2.0
- */
- int32_t hashCode(void) const;
-
- //-------------------------------------------------------------------------
- // Property access methods
- //-------------------------------------------------------------------------
-
- /**
- * Set the normalization mode for this object.
- * <p>
- * <b>Note:</b>If the normalization mode is changed while iterating
- * over a string, calls to {@link #next() } and {@link #previous() } may
- * return previously buffers characters in the old normalization mode
- * until the iteration is able to re-sync at the next base character.
- * It is safest to call {@link #setIndexOnly }, {@link #reset() },
- * {@link #setText }, {@link #first() },
- * {@link #last() }, etc. after calling <code>setMode</code>.
- * <p>
- * @param newMode the new mode for this <code>Normalizer</code>.
- * @see #getUMode
- * @stable ICU 2.0
- */
- void setMode(UNormalizationMode newMode);
-
- /**
- * Return the normalization mode for this object.
- *
- * This is an unusual name because there used to be a getMode() that
- * returned a different type.
- *
- * @return the mode for this <code>Normalizer</code>
- * @see #setMode
- * @stable ICU 2.0
- */
- UNormalizationMode getUMode(void) const;
-
- /**
- * Set options that affect this <code>Normalizer</code>'s operation.
- * Options do not change the basic composition or decomposition operation
- * that is being performed, but they control whether
- * certain optional portions of the operation are done.
- * Currently the only available option is obsolete.
- *
- * It is possible to specify multiple options that are all turned on or off.
- *
- * @param option the option(s) whose value is/are to be set.
- * @param value the new setting for the option. Use <code>TRUE</code> to
- * turn the option(s) on and <code>FALSE</code> to turn it/them off.
- *
- * @see #getOption
- * @stable ICU 2.0
- */
- void setOption(int32_t option,
- UBool value);
-
- /**
- * Determine whether an option is turned on or off.
- * If multiple options are specified, then the result is TRUE if any
- * of them are set.
- * <p>
- * @param option the option(s) that are to be checked
- * @return TRUE if any of the option(s) are set
- * @see #setOption
- * @stable ICU 2.0
- */
- UBool getOption(int32_t option) const;
-
- /**
- * Set the input text over which this <code>Normalizer</code> will iterate.
- * The iteration position is set to the beginning.
- *
- * @param newText a string that replaces the current input text
- * @param status a UErrorCode
- * @stable ICU 2.0
- */
- void setText(const UnicodeString& newText,
- UErrorCode &status);
-
- /**
- * Set the input text over which this <code>Normalizer</code> will iterate.
- * The iteration position is set to the beginning.
- *
- * @param newText a CharacterIterator object that replaces the current input text
- * @param status a UErrorCode
- * @stable ICU 2.0
- */
- void setText(const CharacterIterator& newText,
- UErrorCode &status);
-
- /**
- * Set the input text over which this <code>Normalizer</code> will iterate.
- * The iteration position is set to the beginning.
- *
- * @param newText a string that replaces the current input text
- * @param length the length of the string, or -1 if NUL-terminated
- * @param status a UErrorCode
- * @stable ICU 2.0
- */
- void setText(const UChar* newText,
- int32_t length,
- UErrorCode &status);
- /**
- * Copies the input text into the UnicodeString argument.
- *
- * @param result Receives a copy of the text under iteration.
- * @stable ICU 2.0
- */
- void getText(UnicodeString& result);
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for this class.
- * @returns a UClassID for this class.
- * @stable ICU 2.2
- */
- static UClassID U_EXPORT2 getStaticClassID();
-
- /**
- * ICU "poor man's RTTI", returns a UClassID for the actual class.
- * @return a UClassID for the actual class.
- * @stable ICU 2.2
- */
- virtual UClassID getDynamicClassID() const;
-
-private:
- //-------------------------------------------------------------------------
- // Private functions
- //-------------------------------------------------------------------------
-
- Normalizer(); // default constructor not implemented
- Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
-
- // Private utility methods for iteration
- // For documentation, see the source code
- UBool nextNormalize();
- UBool previousNormalize();
-
- void init(CharacterIterator *iter);
- void clearBuffer(void);
-
- //-------------------------------------------------------------------------
- // Private data
- //-------------------------------------------------------------------------
-
- UNormalizationMode fUMode;
- int32_t fOptions;
-
- // The input text and our position in it
- UCharIterator *text;
-
- // The normalization buffer is the result of normalization
- // of the source in [currentIndex..nextIndex[ .
- int32_t currentIndex, nextIndex;
-
- // A buffer for holding intermediate results
- UnicodeString buffer;
- int32_t bufferPos;
-
-};
-
-//-------------------------------------------------------------------------
-// Inline implementations
-//-------------------------------------------------------------------------
-
-inline UBool
-Normalizer::operator!= (const Normalizer& other) const
-{ return ! operator==(other); }
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
- UNormalizationMode mode,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return UNORM_MAYBE;
- }
-
- return unorm_quickCheck(source.getBuffer(), source.length(),
- mode, &status);
-}
-
-inline UNormalizationCheckResult
-Normalizer::quickCheck(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return UNORM_MAYBE;
- }
-
- return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
- mode, options, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
- UNormalizationMode mode,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return FALSE;
- }
-
- return unorm_isNormalized(source.getBuffer(), source.length(),
- mode, &status);
-}
-
-inline UBool
-Normalizer::isNormalized(const UnicodeString& source,
- UNormalizationMode mode, int32_t options,
- UErrorCode &status) {
- if(U_FAILURE(status)) {
- return FALSE;
- }
-
- return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
- mode, options, &status);
-}
-
-inline int32_t
-Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
- uint32_t options,
- UErrorCode &errorCode) {
- // all argument checking is done in unorm_compare
- return unorm_compare(s1.getBuffer(), s1.length(),
- s2.getBuffer(), s2.length(),
- options,
- &errorCode);
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_NORMALIZATION */
-
-#endif // NORMLZR_H

Search Discussions

Discussion Posts

Previous

Follow ups

Related Discussions

Discussion Navigation
viewthread | post
posts ‹ prev | 57 of 59 | next ›
Discussion Overview
groupcommits @
categoriescouchdb
postedFeb 15, '14 at 9:50a
activeFeb 15, '14 at 9:50a
posts59
users1
websitecouchdb.apache.org
irc#couchdb

1 user in discussion

Benoitc: 59 posts

People

Translate

site design / logo © 2021 Grokbase