Author: pulkomandy Date: 2010-07-13 21:04:40 +0200 (Tue, 13 Jul 2010) New Revision: 37498 Changeset: http://dev.haiku-os.org/changeset/37498 Added: haiku/trunk/headers/libs/icu/unicode/ haiku/trunk/headers/libs/icu/unicode/basictz.h haiku/trunk/headers/libs/icu/unicode/bms.h haiku/trunk/headers/libs/icu/unicode/bmsearch.h haiku/trunk/headers/libs/icu/unicode/brkiter.h haiku/trunk/headers/libs/icu/unicode/bytestream.h haiku/trunk/headers/libs/icu/unicode/calendar.h haiku/trunk/headers/libs/icu/unicode/caniter.h haiku/trunk/headers/libs/icu/unicode/chariter.h haiku/trunk/headers/libs/icu/unicode/choicfmt.h haiku/trunk/headers/libs/icu/unicode/coleitr.h haiku/trunk/headers/libs/icu/unicode/coll.h haiku/trunk/headers/libs/icu/unicode/colldata.h haiku/trunk/headers/libs/icu/unicode/curramt.h haiku/trunk/headers/libs/icu/unicode/currpinf.h haiku/trunk/headers/libs/icu/unicode/currunit.h haiku/trunk/headers/libs/icu/unicode/datefmt.h haiku/trunk/headers/libs/icu/unicode/dbbi.h haiku/trunk/headers/libs/icu/unicode/dcfmtsym.h haiku/trunk/headers/libs/icu/unicode/decimfmt.h haiku/trunk/headers/libs/icu/unicode/docmain.h haiku/trunk/headers/libs/icu/unicode/dtfmtsym.h haiku/trunk/headers/libs/icu/unicode/dtintrv.h haiku/trunk/headers/libs/icu/unicode/dtitvfmt.h haiku/trunk/headers/libs/icu/unicode/dtitvinf.h haiku/trunk/headers/libs/icu/unicode/dtptngen.h haiku/trunk/headers/libs/icu/unicode/dtrule.h haiku/trunk/headers/libs/icu/unicode/errorcode.h haiku/trunk/headers/libs/icu/unicode/fieldpos.h haiku/trunk/headers/libs/icu/unicode/fmtable.h haiku/trunk/headers/libs/icu/unicode/format.h haiku/trunk/headers/libs/icu/unicode/fpositer.h haiku/trunk/headers/libs/icu/unicode/gregocal.h haiku/trunk/headers/libs/icu/unicode/icudataver.h haiku/trunk/headers/libs/icu/unicode/icuplug.h haiku/trunk/headers/libs/icu/unicode/localpointer.h haiku/trunk/headers/libs/icu/unicode/locdspnm.h haiku/trunk/headers/libs/icu/unicode/locid.h haiku/trunk/headers/libs/icu/unicode/measfmt.h haiku/trunk/headers/libs/icu/unicode/measunit.h haiku/trunk/headers/libs/icu/unicode/measure.h haiku/trunk/headers/libs/icu/unicode/msgfmt.h haiku/trunk/headers/libs/icu/unicode/normalizer2.h haiku/trunk/headers/libs/icu/unicode/normlzr.h haiku/trunk/headers/libs/icu/unicode/numfmt.h haiku/trunk/headers/libs/icu/unicode/numsys.h haiku/trunk/headers/libs/icu/unicode/parseerr.h haiku/trunk/headers/libs/icu/unicode/parsepos.h haiku/trunk/headers/libs/icu/unicode/platform.h haiku/trunk/headers/libs/icu/unicode/platform.h.in haiku/trunk/headers/libs/icu/unicode/plurfmt.h haiku/trunk/headers/libs/icu/unicode/plurrule.h haiku/trunk/headers/libs/icu/unicode/ppalmos.h haiku/trunk/headers/libs/icu/unicode/ptypes.h haiku/trunk/headers/libs/icu/unicode/putil.h haiku/trunk/headers/libs/icu/unicode/pwin32.h haiku/trunk/headers/libs/icu/unicode/rbbi.h haiku/trunk/headers/libs/icu/unicode/rbnf.h haiku/trunk/headers/libs/icu/unicode/rbtz.h haiku/trunk/headers/libs/icu/unicode/regex.h haiku/trunk/headers/libs/icu/unicode/rep.h haiku/trunk/headers/libs/icu/unicode/resbund.h haiku/trunk/headers/libs/icu/unicode/schriter.h haiku/trunk/headers/libs/icu/unicode/search.h haiku/trunk/headers/libs/icu/unicode/selfmt.h haiku/trunk/headers/libs/icu/unicode/simpletz.h haiku/trunk/headers/libs/icu/unicode/smpdtfmt.h haiku/trunk/headers/libs/icu/unicode/sortkey.h haiku/trunk/headers/libs/icu/unicode/std_string.h haiku/trunk/headers/libs/icu/unicode/strenum.h haiku/trunk/headers/libs/icu/unicode/stringpiece.h haiku/trunk/headers/libs/icu/unicode/stsearch.h haiku/trunk/headers/libs/icu/unicode/symtable.h haiku/trunk/headers/libs/icu/unicode/tblcoll.h haiku/trunk/headers/libs/icu/unicode/timezone.h haiku/trunk/headers/libs/icu/unicode/tmunit.h haiku/trunk/headers/libs/icu/unicode/tmutamt.h haiku/trunk/headers/libs/icu/unicode/tmutfmt.h haiku/trunk/headers/libs/icu/unicode/translit.h haiku/trunk/headers/libs/icu/unicode/tzrule.h haiku/trunk/headers/libs/icu/unicode/tztrans.h haiku/trunk/headers/libs/icu/unicode/ubidi.h haiku/trunk/headers/libs/icu/unicode/ubrk.h haiku/trunk/headers/libs/icu/unicode/ucal.h haiku/trunk/headers/libs/icu/unicode/ucasemap.h haiku/trunk/headers/libs/icu/unicode/ucat.h haiku/trunk/headers/libs/icu/unicode/uchar.h haiku/trunk/headers/libs/icu/unicode/uchriter.h haiku/trunk/headers/libs/icu/unicode/uclean.h haiku/trunk/headers/libs/icu/unicode/ucnv.h haiku/trunk/headers/libs/icu/unicode/ucnv_cb.h haiku/trunk/headers/libs/icu/unicode/ucnv_err.h haiku/trunk/headers/libs/icu/unicode/ucnvsel.h haiku/trunk/headers/libs/icu/unicode/ucol.h haiku/trunk/headers/libs/icu/unicode/ucoleitr.h haiku/trunk/headers/libs/icu/unicode/uconfig.h haiku/trunk/headers/libs/icu/unicode/ucsdet.h haiku/trunk/headers/libs/icu/unicode/ucurr.h haiku/trunk/headers/libs/icu/unicode/udat.h haiku/trunk/headers/libs/icu/unicode/udata.h [... truncated: 50 added files follow ...] Removed: haiku/trunk/headers/libs/icu/unicode/ haiku/trunk/src/build/icu/ haiku/trunk/src/libs/icu/ haiku/trunk/src/tools/icu/ Modified: haiku/trunk/build/jam/HaikuImage haiku/trunk/build/jam/OptionalBuildFeatures haiku/trunk/headers/os/locale/Country.h haiku/trunk/headers/os/locale/Language.h haiku/trunk/src/build/Jamfile haiku/trunk/src/kits/locale/Country.cpp haiku/trunk/src/kits/locale/Jamfile haiku/trunk/src/kits/locale/Language.cpp haiku/trunk/src/kits/locale/TimeFormat.cpp haiku/trunk/src/libs/Jamfile haiku/trunk/src/preferences/locale/Jamfile haiku/trunk/src/preferences/time/DateTimeEdit.cpp haiku/trunk/src/preferences/time/DateTimeEdit.h haiku/trunk/src/preferences/time/DateTimeView.cpp haiku/trunk/src/preferences/time/SectionEdit.h haiku/trunk/src/tests/kits/locale/Jamfile haiku/trunk/src/tools/Jamfile Log: Update ICU to 4.4 * Remove 4.2 sourcecode * ICU is now an optional package (mandatory) * Adjust the namespaces and libraries names where needed Modified: haiku/trunk/build/jam/HaikuImage =================================================================== --- haiku/trunk/build/jam/HaikuImage 2010-07-13 17:57:47 UTC (rev 37497) +++ haiku/trunk/build/jam/HaikuImage 2010-07-13 19:04:40 UTC (rev 37498) @@ -77,8 +77,6 @@ SYSTEM_DEMOS = BSnow Chart Clock Cortex FontDemo GLTeapot Haiku3d Mandelbrot Pairs Playground Pulse Sudoku ; -ICU_LIBS = libicu-common.so libicu-data.so libicu-i18n.so -; SYSTEM_LIBS = libbe.so libbsd.so libbnetapi.so libdebug.so libdevice.so @@ -100,7 +98,6 @@ libfluidsynth.so libilmimf.so liblinprog.so liblpsolve55.so - $(ICU_LIBS) ; SYSTEM_SERVERS = app_server cddb_daemon debug_server input_server mail_daemon media_addon_server media_server midi_server mount_server net_server Modified: haiku/trunk/build/jam/OptionalBuildFeatures =================================================================== --- haiku/trunk/build/jam/OptionalBuildFeatures 2010-07-13 17:57:47 UTC (rev 37497) +++ haiku/trunk/build/jam/OptionalBuildFeatures 2010-07-13 19:04:40 UTC (rev 37498) @@ -45,3 +45,50 @@ = [ FDirName $(HAIKU_OPENSSL_DIR) common include ] ; } } + + +# ICU + +# Note ICU isn't actually optional, but is still an external package +local isHybridBuild ; +if $(HAIKU_ADD_ALTERNATIVE_GCC_LIBS) = 1 + && $(HAIKU_ALTERNATIVE_GCC_OUTPUT_DIR) { + isHybridBuild = 1 ; +} +if $(HAIKU_GCC_VERSION[1]) < 4 && !isHybridBuild) { + HAIKU_ICU_PACKAGE = icu-4.4.1-r1a2-x86-gcc2-2010-07-13-a.zip ; +} else { + HAIKU_ICU_PACKAGE = icu-4.4.1-r1a2-x86-gcc4-2010-07-13-a.zip ; +} + +HAIKU_ICU_URL = $(baseURL)/$(HAIKU_ICU_PACKAGE) ; + +if $(TARGET_ARCH) != x86 { + Echo "ICU not available for $(TARGET_ARCH)" ; +} else { + local zipFile = [ DownloadFile $(HAIKU_ICU_PACKAGE) : $(HAIKU_ICU_URL) ] ; + + # zip file and output directory + HAIKU_ICU_ZIP_FILE = $(zipFile) ; + HAIKU_ICU_DIR = [ FDirName $(HAIKU_OPTIONAL_BUILD_PACKAGES_DIR) + $(HAIKU_ICU_PACKAGE:B) ] ; + + # extract headers and libraries + # HAIKU_ICU_HEADERS_DEPENDENCY = [ ExtractArchive $(HAIKU_ICU_DIR) + # : common/include/ : $(zipFile) ] ; + + HAIKU_ICU_LIBS = [ ExtractArchive $(HAIKU_ICU_DIR) + : + system/lib/libicudata.so + system/lib/libicui18n.so + system/lib/libicuio.so + system/lib/libicule.so + system/lib/libiculx.so + system/lib/libicutu.so + system/lib/libicuuc.so + : $(zipFile) + ] ; + + HAIKU_ICU_HEADERS + = [ FDirName $(HAIKU_ICU_DIR) common include ] ; +} Added: haiku/trunk/headers/libs/icu/unicode/basictz.h =================================================================== --- haiku/trunk/headers/libs/icu/unicode/basictz.h (rev 0) +++ haiku/trunk/headers/libs/icu/unicode/basictz.h 2010-07-13 19:04:40 UTC (rev 37498) @@ -0,0 +1,210 @@ +/* +******************************************************************************* +* Copyright (C) 2007-2008, International Business Machines Corporation and * +* others. All Rights Reserved. * +******************************************************************************* +*/ +#ifndef BASICTZ_H +#define BASICTZ_H + +/** + * \file + * \brief C++ API: ICU TimeZone base class + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/timezone.h" +#include "unicode/tzrule.h" +#include "unicode/tztrans.h" + +U_NAMESPACE_BEGIN + +// forward declarations +class UVector; + +/** + * <code>BasicTimeZone</code> is an abstract class extending <code>TimeZone</code>. + * This class provides some additional methods to access time zone transitions and rules. + * All ICU <code>TimeZone</code> concrete subclasses extend this class. + * @stable ICU 3.8 + */ +class U_I18N_API BasicTimeZone: public TimeZone { +public: + /** + * Destructor. + * @stable ICU 3.8 + */ + virtual ~BasicTimeZone(); + + /** + * Gets the first time zone transition after the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the first transition after the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getNextTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0; + + /** + * Gets the most recent time zone transition before the base time. + * @param base The base time. + * @param inclusive Whether the base time is inclusive or not. + * @param result Receives the most recent transition before the base time. + * @return TRUE if the transition is found. + * @stable ICU 3.8 + */ + virtual UBool getPreviousTransition(UDate base, UBool inclusive, TimeZoneTransition& result) /*const*/ = 0; + + /** + * Checks if the time zone has equivalent transitions in the time range. + * This method returns true when all of transition times, from/to standard + * offsets and DST savings used by this time zone match the other in the + * time range. + * @param tz The <code>BasicTimeZone</code> object to be compared with. + * @param start The start time of the evaluated time range (inclusive) + * @param end The end time of the evaluated time range (inclusive) + * @param ignoreDstAmount + * When true, any transitions with only daylight saving amount + * changes will be ignored, except either of them is zero. + * For example, a transition from rawoffset 3:00/dstsavings 1:00 + * to rawoffset 2:00/dstsavings 2:00 is excluded from the comparison, + * but a transtion from rawoffset 2:00/dstsavings 1:00 to + * rawoffset 3:00/dstsavings 0:00 is included. + * @param ec Output param to filled in with a success or an error. + * @return true if the other time zone has the equivalent transitions in the + * time range. + * @stable ICU 3.8 + */ + virtual UBool hasEquivalentTransitions(/*const*/ BasicTimeZone& tz, UDate start, UDate end, + UBool ignoreDstAmount, UErrorCode& ec) /*const*/; + + /** + * Returns the number of <code>TimeZoneRule</code>s which represents time transitions, + * for this time zone, that is, all <code>TimeZoneRule</code>s for this time zone except + * <code>InitialTimeZoneRule</code>. The return value range is 0 or any positive value. + * @param status Receives error status code. + * @return The number of <code>TimeZoneRule</code>s representing time transitions. + * @stable ICU 3.8 + */ + virtual int32_t countTransitionRules(UErrorCode& status) /*const*/ = 0; + + /** + * Gets the <code>InitialTimeZoneRule</code> and the set of <code>TimeZoneRule</code> + * which represent time transitions for this time zone. On successful return, + * the argument initial points to non-NULL <code>InitialTimeZoneRule</code> and + * the array trsrules is filled with 0 or multiple <code>TimeZoneRule</code> + * instances up to the size specified by trscount. The results are referencing the + * rule instance held by this time zone instance. Therefore, after this time zone + * is destructed, they are no longer available. + * @param initial Receives the initial timezone rule + * @param trsrules Receives the timezone transition rules + * @param trscount On input, specify the size of the array 'transitions' receiving + * the timezone transition rules. On output, actual number of + * rules filled in the array will be set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, + const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) /*const*/ = 0; + + /** + * Gets the set of time zone rules valid at the specified time. Some known external time zone + * implementations are not capable to handle historic time zone rule changes. Also some + * implementations can only handle certain type of rule definitions. + * If this time zone does not use any daylight saving time within about 1 year from the specified + * time, only the <code>InitialTimeZone</code> is returned. Otherwise, the rule for standard + * time and daylight saving time transitions are returned in addition to the + * <code>InitialTimeZoneRule</code>. The standard and daylight saving time transition rules are + * represented by <code>AnnualTimeZoneRule</code> with <code>DateTimeRule::DOW</code> for its date + * rule and <code>DateTimeRule::WALL_TIME</code> for its time rule. Because daylight saving time + * rule is changing time to time in many time zones and also mapping a transition time rule to + * different type is lossy transformation, the set of rules returned by this method may be valid + * for short period of time. + * The time zone rule objects returned by this method is owned by the caller, so the caller is + * responsible for deleting them after use. + * @param date The date used for extracting time zone rules. + * @param initial Receives the <code>InitialTimeZone</code>, always not NULL. + * @param std Receives the <code>AnnualTimeZoneRule</code> for standard time transitions. + * When this time time zone does not observe daylight saving times around the + * specified date, NULL is set. + * @param dst Receives the <code>AnnualTimeZoneRule</code> for daylight saving time + * transitions. When this time zone does not observer daylight saving times + * around the specified date, NULL is set. + * @param status Receives error status code. + * @stable ICU 3.8 + */ + virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial, + AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) /*const*/; + + + /** + * The time type option bit flags used by getOffsetFromLocal + * @internal + */ + enum { + kStandard = 0x01, + kDaylight = 0x03, + kFormer = 0x04, + kLatter = 0x0C + }; + + /** + * Get time zone offsets from local wall time. + * @internal + */ + virtual void getOffsetFromLocal(UDate date, int32_t nonExistingTimeOpt, int32_t duplicatedTimeOpt, + int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) /*const*/; + +protected: + + /** + * The time type option bit masks used by getOffsetFromLocal + * @internal + */ + enum { + kStdDstMask = kDaylight, + kFormerLatterMask = kLatter + }; + + /** + * Default constructor. + * @stable ICU 3.8 + */ + BasicTimeZone(); + + /** + * Construct a timezone with a given ID. + * @param id a system time zone ID + * @stable ICU 3.8 + */ + BasicTimeZone(const UnicodeString &id); + + /** + * Copy constructor. + * @param source the object to be copied. + * @stable ICU 3.8 + */ + BasicTimeZone(const BasicTimeZone& source); + + /** + * Gets the set of TimeZoneRule instances applicable to the specified time and after. + * @param start The start date used for extracting time zone rules + * @param initial Receives the InitialTimeZone, always not NULL + * @param transitionRules Receives the transition rules, could be NULL + * @param status Receives error status code + */ + void getTimeZoneRulesAfter(UDate start, InitialTimeZoneRule*& initial, UVector*& transitionRules, + UErrorCode& status) /*const*/; +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif // BASICTZ_H + +//eof Added: haiku/trunk/headers/libs/icu/unicode/bms.h =================================================================== --- haiku/trunk/headers/libs/icu/unicode/bms.h (rev 0) +++ haiku/trunk/headers/libs/icu/unicode/bms.h 2010-07-13 19:04:40 UTC (rev 37498) @@ -0,0 +1,274 @@ +/* + * Copyright (C) 1996-2010, International Business Machines Corporation and Others. + * All rights reserved. + */ + +/** + * \file + * \brief C API: Boyer-Moore StringSearch prototype. + * \internal + */ + +#ifndef _BMS_H +#define _BMS_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/ucol.h" + +/** + * A <code>UCD</code> object holds the Collator-specific data needed to + * compute the length of the shortest string that can + * generate a partcular list of CEs. + * + * <code>UCD</code> objects are quite expensive to compute. Because + * of this, they are cached. When you call <code>ucd_open</code> it + * returns a reference counted cached object. When you call <code>ucd_close</code> + * the reference count on the object is decremented but the object is not deleted. + * + * If you do not need to reuse any unreferenced objects in the cache, you can call + * <code>ucd_flushCCache</code>. If you no longer need any <code>UCD</code> + * objects, you can call <code>ucd_freeCache</code> + */ +typedef void UCD; + +/** + * Open a <code>UCD</code> object. + * + * @param coll - the collator + * @param status - will be set if any errors occur. + * + * @return the <code>UCD</code> object. You must call + * <code>ucd_close</code> when you are done using the object. + * + * Note: if on return status is set to an error, the only safe + * thing to do with the returned object is to call <code>ucd_close</code>. + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI UCD * U_EXPORT2 +ucd_open(UCollator *coll, UErrorCode *status); + +/** + * Release a <code>UCD</code> object. + * + * @param ucd - the object + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI void U_EXPORT2 +ucd_close(UCD *ucd); + +/** + * Get the <code>UCollator</code> object used to create a <code>UCD</code> object. + * The <code>UCollator</code> object returned may not be the exact + * object that was used to create this object, but it will have the + * same behavior. + * + * @param ucd - the <code>UCD</code> object + * + * @return the <code>UCollator</code> used to create the given + * <code>UCD</code> object. + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI UCollator * U_EXPORT2 +ucd_getCollator(UCD *ucd); + +/** + * <code>UCD</code> objects are expensive to compute, and so + * may be cached. This routine will free the cached objects and delete + * the cache. + * + * WARNING: Don't call this until you are have called <code>close</code> + * for each <code>UCD</code> object that you have used. also, + * DO NOT call this if another thread may be calling <code>ucd_flushCache</code> + * at the same time. + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI void U_EXPORT2 +ucd_freeCache(); + +/** + * <code>UCD</code> objects are expensive to compute, and so + * may be cached. This routine will remove any unused <code>UCD</code> + * objects from the cache. + * + * @internal 4.0.1 technology preview + */ +U_CAPI void U_EXPORT2 +ucd_flushCache(); + +/** + * BMS + * + * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates + * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them, + * and a reference to the text being searched. + * + * To do a search, you first need to get a <code>UCD</code> object by calling <code>ucd_open</code>. + * Then you construct a <code>BMS</code> object from the <code>UCD</code> object, the pattern + * string and the target string. Then you call the <code>search</code> method. Here's a code sample: + * + * <pre> + * void boyerMooreExample(UCollator *collator, UChar *pattern, int32_t patternLen, UChar *target, int32_t targetLength) + * { + * UErrorCode status = U_ZERO_ERROR; + * int32_t offset = 0, start = -1, end = -1; + * UCD *ucd = NULL); + * BMS *bms = NULL; + * + * ucd = ucd_open(collator, &status); + * if (U_FAILURE(status)) { + * // could not create a UCD object + * return; + * } + * + * BMS *bms = bms_open(ucd, pattern, patternLength, target, targetlength, &status); + * if (U_FAILURE(status)) { + * // could not create a BMS object + * ucd_close(ucd); + * return; + * } + * + * + * // Find all matches + * while (bms_search(bms, offset, &start, &end)) { + * // process the match between start and end + * ... + * + * // advance past the match + * offset = end; + * } + * + * // at this point, if offset == 0, there were no matches + * if (offset == 0) { + * // handle the case of no matches + * } + * + * bms_close(bms); + * ucd_close(ucd); + * + * // UCD objects are cached, so the call to + * // ucd_close doesn't delete the object. + * // Call this if you don't need the object any more. + * ucd_flushCache(); + * } + * </pre> + * + * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API. + * + * Knows linitations: + * 1) Backwards searching has not been implemented. + * + * 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general, + * this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored + * to be equal to Han characters with the same pronounciation. Because this code ignroes + * tailorings, searching for a Hangul character will not find a Han character and visa-versa. + * + * 3) In some cases, searching for a pattern that needs to be normalized and ends + * in a discontiguous contraction may fail. The only known cases of this are with + * the Tibetan script. For example searching for the pattern + * "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've + * been unable to find a pratical, real-world example of this failure.) + * + * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API. + * + * @internal ICU 4.0.1 technology preview + */ +struct BMS; +typedef struct BMS BMS; /**< @see BMS */ + +/** + * Construct a <code>MBS</code> object. + * + * @param ucd - A <code>UCD</code> object holding the Collator-sensitive data + * @param pattern - the string for which to search + * @param patternLength - the length of the string for which to search + * @param target - the string in which to search + * @param targetLength - the length of the string in which to search + * @param status - will be set if any errors occur. + * + * @return the <code>BMS</code> object. + * + * Note: if on return status is set to an error, the only safe + * thing to do with the returned object is to call + * <code>bms_close</code>. + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI BMS * U_EXPORT2 +bms_open(UCD *ucd, + const UChar *pattern, int32_t patternLength, + const UChar *target, int32_t targetLength, + UErrorCode *status); + +/** + * Close a <code>BMS</code> object and release all the + * storage associated with it. + * + * @param bms - the <code>BMS</code> object to close. + * @internal ICU 4.0.1 technology preview + */ +U_CAPI void U_EXPORT2 +bms_close(BMS *bms); + +/** + * Test the pattern to see if it generates any CEs. + * + * @param bms - the <code>BMS</code> object + * @return <code>TRUE</code> if the pattern string did not generate any CEs + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI UBool U_EXPORT2 +bms_empty(BMS *bms); + +/** + * Get the <code>UCD</code> object used to create + * a given <code>BMS</code> object. + * + * @param bms - the <code>BMS</code> object + * + * @return - the <code>UCD</code> object used to create + * the given <code>BMS</code> object. + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI UCD * U_EXPORT2 +bms_getData(BMS *bms); + +/** + * Search for the pattern string in the target string. + * + * @param bms - the <code>BMS</code> object + * @param offset - the offset in the target string at which to begin the search + * @param start - will be set to the starting offset of the match, or -1 if there's no match + * @param end - will be set to the ending offset of the match, or -1 if there's no match + * + * @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise. + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI UBool U_EXPORT2 +bms_search(BMS *bms, int32_t offset, int32_t *start, int32_t *end); + +/** + * Set the target string for the match. + * + * @param bms - the <code>BMS</code> object + * @param target - the new target string + * @param targetLength - the length of the new target string + * @param status - will be set if any errors occur. + * + * @internal ICU 4.0.1 technology preview + */ +U_CAPI void U_EXPORT2 +bms_setTargetString(BMS *bms, const UChar *target, int32_t targetLength, UErrorCode *status); + +#endif + +#endif /* _BMS_H */ Added: haiku/trunk/headers/libs/icu/unicode/bmsearch.h =================================================================== --- haiku/trunk/headers/libs/icu/unicode/bmsearch.h (rev 0) +++ haiku/trunk/headers/libs/icu/unicode/bmsearch.h 2010-07-13 19:04:40 UTC (rev 37498) @@ -0,0 +1,226 @@ +/* + ****************************************************************************** + * Copyright (C) 1996-2010, International Business Machines * + * Corporation and others. All Rights Reserved. * + ****************************************************************************** + */ + +/** + * \file + * \brief C++ API: Boyer-Moore StringSearch technology preview + * \internal ICU 4.0.1 technology preview + */ + +#ifndef B_M_SEARCH_H +#define B_M_SEARCH_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/uobject.h" +#include "unicode/ucol.h" + +#include "unicode/colldata.h" + +U_NAMESPACE_BEGIN + +class BadCharacterTable; +class GoodSuffixTable; +class Target; + +/** + * BoyerMooreSearch + * + * This object holds the information needed to do a Collation sensitive Boyer-Moore search. It encapulates + * the pattern, the "bad character" and "good suffix" tables, the Collator-based data needed to compute them, + * and a reference to the text being searched. + * + * To do a search, you fist need to get a <code>CollData</code> object by calling <code>CollData::open</code>. + * Then you construct a <code>BoyerMooreSearch</code> object from the <code>CollData</code> object, the pattern + * string and the target string. Then you call the <code>search</code> method. Here's a code sample: + * + * <pre> + * void boyerMooreExample(UCollator *collator, UnicodeString *pattern, UnicodeString *target) + * { + * UErrorCode status = U_ZERO_ERROR; + * CollData *collData = CollData::open(collator, status); + * + * if (U_FAILURE(status)) { + * // could not create a CollData object + * return; + * } + * + * BoyerMooreSearch *search = new BoyerMooreSearch(collData, *patternString, target, status); + * + * if (U_FAILURE(status)) { + * // could not create a BoyerMooreSearch object + * CollData::close(collData); + * return; + * } + * + * int32_t offset = 0, start = -1, end = -1; + * + * // Find all matches + * while (search->search(offset, start, end)) { + * // process the match between start and end + * ... + * // advance past the match + * offset = end; + * } + * + * // at this point, if offset == 0, there were no matches + * if (offset == 0) { + * // handle the case of no matches + * } + * + * delete search; + * CollData::close(collData); + * + * // CollData objects are cached, so the call to + * // CollData::close doesn't delete the object. + * // Call this if you don't need the object any more. + * CollData::flushCollDataCache(); + * } + * </pre> + * + * NOTE: This is a technology preview. The final version of this API may not bear any resenblence to this API. + * + * Knows linitations: + * 1) Backwards searching has not been implemented. + * + * 2) For Han and Hangul characters, this code ignores any Collation tailorings. In general, + * this isn't a problem, but in Korean locals, at strength 1, Hangul characters are tailored + * to be equal to Han characters with the same pronounciation. Because this code ignroes + * tailorings, searching for a Hangul character will not find a Han character and visa-versa. + * + * 3) In some cases, searching for a pattern that needs to be normalized and ends + * in a discontiguous contraction may fail. The only known cases of this are with + * the Tibetan script. For example searching for the pattern + * "\u0F7F\u0F80\u0F81\u0F82\u0F83\u0F84\u0F85" will fail. (This case is artificial. We've + * been unable to find a pratical, real-world example of this failure.) + * + * @internal ICU 4.0.1 technology preview + * + * @see CollData + */ +class U_I18N_API BoyerMooreSearch : public UObject +{ +public: + /** + * Construct a <code>BoyerMooreSearch</code> object. + * + * @param theData - A <code>CollData</code> object holding the Collator-sensitive data + * @param patternString - the string for which to search + * @param targetString - the string in which to search or <code>NULL</code> if youu will + * set it later by calling <code>setTargetString</code>. + * @param status - will be set if any errors occur. + * + * Note: if on return, status is set to an error code, + * the only safe thing to do with this object is to call + * the destructor. + * + * @internal ICU 4.0.1 technology preview + */ + BoyerMooreSearch(CollData *theData, const UnicodeString &patternString, const UnicodeString *targetString, UErrorCode &status); + + /** + * The desstructor + * + * @internal ICU 4.0.1 technology preview + */ + ~BoyerMooreSearch(); + + /** + * Test the pattern to see if it generates any CEs. + * + * @return <code>TRUE</code> if the pattern string did not generate any CEs + * + * @internal ICU 4.0.1 technology preview + */ + UBool empty(); + + /** + * Search for the pattern string in the target string. + * + * @param offset - the offset in the target string at which to begin the search + * @param start - will be set to the starting offset of the match, or -1 if there's no match + * @param end - will be set to the ending offset of the match, or -1 if there's no match + * + * @return <code>TRUE</code> if the match succeeds, <code>FALSE</code> otherwise. + * + * @internal ICU 4.0.1 technology preview + */ + UBool search(int32_t offset, int32_t &start, int32_t &end); + + /** + * Set the target string for the match. + * + * @param targetString - the new target string + * @param status - will be set if any errors occur. + * + * @internal ICU 4.0.1 technology preview + */ + void setTargetString(const UnicodeString *targetString, UErrorCode &status); + + // **** no longer need these? **** + /** + * Return the <code>CollData</code> object used for searching + * + * @return the <code>CollData</code> object used for searching + * + * @internal ICU 4.0.1 technology preview + */ + CollData *getData(); + + /** + * Return the CEs generated by the pattern string. + * + * @return a <code>CEList</code> object holding the CEs generated by the pattern string. + * + * @internal ICU 4.0.1 technology preview + */ + CEList *getPatternCEs(); + + /** + * Return the <code>BadCharacterTable</code> object computed for the pattern string. + * + * @return the <code>BadCharacterTable</code> object. + * + * @internal ICU 4.0.1 technology preview + */ + BadCharacterTable *getBadCharacterTable(); + + /** + * Return the <code>GoodSuffixTable</code> object computed for the pattern string. + * + * @return the <code>GoodSuffixTable</code> object computed for the pattern string. + * + * @internal ICU 4.0.1 technology preview + */ + GoodSuffixTable *getGoodSuffixTable(); + + /** + * UObject glue... + * @internal ICU 4.0.1 technology preview + */ + virtual UClassID getDynamicClassID() const; + /** + * UObject glue... + * @internal ICU 4.0.1 technology preview + */ + static UClassID getStaticClassID(); + +private: + CollData *data; + CEList *patCEs; + BadCharacterTable *badCharacterTable; + GoodSuffixTable *goodSuffixTable; + UnicodeString pattern; + Target *target; +}; + +U_NAMESPACE_END + +#endif // #if !UCONFIG_NO_COLLATION +#endif // #ifndef B_M_SEARCH_H Added: haiku/trunk/headers/libs/icu/unicode/brkiter.h =================================================================== --- haiku/trunk/headers/libs/icu/unicode/brkiter.h (rev 0) +++ haiku/trunk/headers/libs/icu/unicode/brkiter.h 2010-07-13 19:04:40 UTC (rev 37498) @@ -0,0 +1,557 @@ +/* +******************************************************************************** +* Copyright (C) 1997-2010, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File brkiter.h +* +* Modification History: +* +* Date Name Description +* 02/18/97 aliu Added typedef for TextCount. Made DONE const. +* 05/07/97 aliu Fixed DLL declaration. +* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK +* 08/11/98 helena Sync-up JDK1.2. +* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. +******************************************************************************** +*/ + +#ifndef BRKITER_H +#define BRKITER_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Break Iterator. + */ + +#if UCONFIG_NO_BREAK_ITERATION + +U_NAMESPACE_BEGIN + +/* + * Allow the declaration of APIs with pointers to BreakIterator + * even when break iteration is removed from the build. + */ +class BreakIterator; + +U_NAMESPACE_END + +#else + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#include "unicode/locid.h" +#include "unicode/ubrk.h" +#include "unicode/strenum.h" +#include "unicode/utext.h" +#include "unicode/umisc.h" + +U_NAMESPACE_BEGIN + +/** + * The BreakIterator class implements methods for finding the location + * of boundaries in text. BreakIterator is an abstract base class. + * Instances of BreakIterator maintain a current position and scan over + * text returning the index of characters where boundaries occur. + * <p> + * Line boundary analysis determines where a text string can be broken + * when line-wrapping. The mechanism correctly handles punctuation and + * hyphenated words. + * <p> + * Sentence boundary analysis allows selection with correct + * interpretation of periods within numbers and abbreviations, and + * trailing punctuation marks such as quotation marks and parentheses. + * <p> + * Word boundary analysis is used by search and replace functions, as + * well as within text editing applications that allow the user to + * select words with a double click. Word selection provides correct + * interpretation of punctuation marks within and following + * words. Characters that are not part of a word, such as symbols or + * punctuation marks, have word-breaks on both sides. + * <p> + * Character boundary analysis allows users to interact with + * characters as they expect to, for example, when moving the cursor + * through a text string. Character boundary analysis provides correct + * navigation of through character strings, regardless of how the + * character is stored. For example, an accented character might be + * stored as a base character and a diacritical mark. What users + * consider to be a character can differ between languages. + * <p> + * The text boundary positions are found according to the rules + * described in Unicode Standard Annex #29, Text Boundaries, and + * Unicode Standard Annex #14, Line Breaking Properties. These + * are available at http://www.unicode.org/reports/tr14/ and + * http://www.unicode.org/reports/tr29/. + * <p> + * In addition to the C++ API defined in this header file, a + * plain C API with equivalent functionality is defined in the + * file ubrk.h + * <p> + * Code snippets illustrating the use of the Break Iterator APIs + * are available in the ICU User Guide, + * http://icu-project.org/userguide/boundaryAnalysis.html + * and in the sample program icu/source/samples/break/break.cpp + * + */ +class U_COMMON_API BreakIterator : public UObject { +public: + /** + * destructor + * @stable ICU 2.0 + */ + virtual ~BreakIterator(); + + /** + * Return true if another object is semantically equal to this + * one. The other object should be an instance of the same subclass of + * BreakIterator. Objects of different subclasses are considered + * unequal. + * <P> + * Return true if this BreakIterator is at the same position in the + * same text, and is the same class and type (word, line, etc.) of + * BreakIterator, as the argument. Text is considered the same if + * it contains the same characters, it need not be the same + * object, and styles are not considered. + * @stable ICU 2.0 + */ + virtual UBool operator==(const BreakIterator&) const = 0; + + /** + * Returns the complement of the result of operator== + * @param rhs The BreakIterator to be compared for inequality + * @return the complement of the result of operator== + * @stable ICU 2.0 + */ + UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } + + /** + * Return a polymorphic copy of this object. This is an abstract + * method which subclasses implement. + * @stable ICU 2.0 + */ + virtual BreakIterator* clone(void) const = 0; + + /** + * Return a polymorphic class ID for this object. Different subclasses + * will return distinct unequal values. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Return a CharacterIterator over the text being analyzed. + * @stable ICU 2.0 + */ + virtual CharacterIterator& getText(void) const = 0; + + + /** + * Get a UText for the text being analyzed. + * The returned UText is a shallow clone of the UText used internally + * by the break iterator implementation. It can safely be used to + * access the text without impacting any break iterator operations, + * but the underlying text itself must not be altered. + * + * @param fillIn A UText to be filled in. If NULL, a new UText will be + * allocated to hold the result. + * @param status receives any error codes. + * @return The current UText for this break iterator. If an input + * UText was provided, it will always be returned. + * @stable ICU 3.4 + */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; + + /** + * Change the text over which this operates. The text boundary is + * reset to the start. + * @param text The UnicodeString used to change the text. + * @stable ICU 2.0 + */ + virtual void setText(const UnicodeString &text) = 0; + + /** + * Reset the break iterator to operate over the text represented by + * the UText. The iterator position is reset to the start. + * + * This function makes a shallow clone of the supplied UText. This means + * that the caller is free to immediately close or otherwise reuse the + * Utext that was passed as a parameter, but that the underlying text itself + * must not be altered while being referenced by the break iterator. + * + * @param text The UText used to change the text. + * @param status receives any error codes. + * @stable ICU 3.4 + */ + virtual void setText(UText *text, UErrorCode &status) = 0; [... truncated: 86570 lines follow ...]