Author: mmlr Date: 2010-02-01 05:20:01 +0100 (Mon, 01 Feb 2010) New Revision: 35371 Changeset: http://dev.haiku-os.org/changeset/35371/haiku Modified: haiku/trunk/headers/os/support/String.h haiku/trunk/src/kits/support/Jamfile haiku/trunk/src/kits/support/String.cpp Log: Adding utf-8 aware version of most functions to BString. They are named after the corresponding normal functions but have a "Chars" in the name like "MoveCharsInto" or "AppendChars". Also added CountBytes() and CharAt(). This should make everyday string handling with multibyte strings a bit easier. Modified: haiku/trunk/headers/os/support/String.h =================================================================== --- haiku/trunk/headers/os/support/String.h 2010-01-31 23:41:44 UTC (rev 35370) +++ haiku/trunk/headers/os/support/String.h 2010-02-01 04:20:01 UTC (rev 35371) @@ -1,5 +1,5 @@ /* - * Copyright 2001-2009, Haiku Inc. All Rights Reserved. + * Copyright 2001-2010, Haiku Inc. All Rights Reserved. * Distributed under the terms of the MIT License. */ #ifndef __BSTRING__ @@ -26,6 +26,8 @@ const char* String() const; int32 Length() const; int32 CountChars() const; + int32 CountBytes(int32 fromCharOffset, + int32 charCount) const; // Assignment BString& operator=(const BString& string); @@ -43,12 +45,21 @@ BString& SetTo(char c, int32 count); + BString& SetToChars(const char* string, int32 charCount); + BString& SetToChars(const BString& string, int32 charCount); + BString& AdoptChars(BString& from, int32 charCount); + // Substring copying BString& CopyInto(BString& into, int32 fromOffset, int32 length) const; void CopyInto(char* into, int32 fromOffset, int32 length) const; + BString& CopyCharsInto(BString& into, int32 fromCharOffset, + int32 charCount) const; + bool CopyCharsInto(char* into, int32* intoLength, + int32 fromCharOffset, int32 charCount) const; + // Appending BString& operator+=(const BString& string); BString& operator+=(const char* string); @@ -61,6 +72,9 @@ BString& Append(const char* string, int32 length); BString& Append(char c, int32 count); + BString& AppendChars(const BString& string, int32 charCount); + BString& AppendChars(const char* string, int32 charCount); + // Prepending BString& Prepend(const char* string); BString& Prepend(const BString& string); @@ -68,6 +82,10 @@ BString& Prepend(const BString& string, int32 length); BString& Prepend(char c, int32 count); + BString& PrependChars(const char* string, int32 charCount); + BString& PrependChars(const BString& string, + int32 charCount); + // Inserting BString& Insert(const char* string, int32 position); BString& Insert(const char* string, int32 length, @@ -81,9 +99,26 @@ int32 length, int32 position); BString& Insert(char c, int32 count, int32 position); + BString& InsertChars(const char* string, int32 charPosition); + BString& InsertChars(const char* string, int32 charCount, + int32 charPosition); + BString& InsertChars(const char* string, + int32 fromCharOffset, int32 charCount, + int32 charPosition); + BString& InsertChars(const BString& string, + int32 charPosition); + BString& InsertChars(const BString& string, int32 charCount, + int32 charPosition); + BString& InsertChars(const BString& string, + int32 fromCharOffset, int32 charCount, + int32 charPosition); + // Removing BString& Truncate(int32 newLength, bool lazy = true); + BString& TruncateChars(int32 newCharCount, bool lazy = true); + BString& Remove(int32 from, int32 length); + BString& RemoveChars(int32 fromCharOffset, int32 charCount); BString& RemoveFirst(const BString& string); BString& RemoveLast(const BString& string); @@ -93,11 +128,17 @@ BString& RemoveLast(const char* string); BString& RemoveAll(const char* string); - BString& RemoveSet(const char* setOfCharsToRemove); + BString& RemoveSet(const char* setOfBytesToRemove); + BString& RemoveCharsSet(const char* setOfCharsToRemove); BString& MoveInto(BString& into, int32 from, int32 length); void MoveInto(char* into, int32 from, int32 length); + BString& MoveCharsInto(BString& into, int32 fromCharOffset, + int32 charCount); + bool MoveCharsInto(char* into, int32* intoLength, + int32 fromCharOffset, int32 charCount); + // Compare functions bool operator<(const BString& string) const; bool operator<=(const BString& string) const; @@ -120,6 +161,12 @@ int Compare(const char* string) const; int Compare(const BString& string, int32 length) const; int Compare(const char* string, int32 length) const; + + int CompareChars(const BString& string, + int32 charCount) const; + int CompareChars(const char* string, + int32 charCount) const; + int ICompare(const BString& string) const; int ICompare(const char* string) const; int ICompare(const BString& string, int32 length) const; @@ -135,6 +182,11 @@ int32 FindFirst(char c) const; int32 FindFirst(char c, int32 fromOffset) const; + int32 FindFirstChars(const BString& string, + int32 fromCharOffset) const; + int32 FindFirstChars(const char* string, + int32 fromCharOffset) const; + int32 FindLast(const BString& string) const; int32 FindLast(const char* string) const; int32 FindLast(const BString& string, @@ -144,6 +196,11 @@ int32 FindLast(char c) const; int32 FindLast(char c, int32 beforeOffset) const; + int32 FindLastChars(const BString& string, + int32 beforeCharOffset) const; + int32 FindLastChars(const char* string, + int32 beforeCharOffset) const; + int32 IFindFirst(const BString& string) const; int32 IFindFirst(const char* string) const; int32 IFindFirst(const BString& string, @@ -175,6 +232,12 @@ const char* withThis, int32 maxReplaceCount, int32 fromOffset = 0); + BString& ReplaceAllChars(const char* replaceThis, + const char* withThis, int32 fromCharOffset); + BString& ReplaceChars(const char* replaceThis, + const char* withThis, int32 maxReplaceCount, + int32 fromCharOffset); + BString& IReplaceFirst(char replaceThis, char withThis); BString& IReplaceLast(char replaceThis, char withThis); BString& IReplaceAll(char replaceThis, char withThis, @@ -191,9 +254,13 @@ const char* withThis, int32 maxReplaceCount, int32 fromOffset = 0); - BString& ReplaceSet(const char* setOfChars, char with); - BString& ReplaceSet(const char* setOfChars, const char* with); + BString& ReplaceSet(const char* setOfBytes, char with); + BString& ReplaceSet(const char* setOfBytes, + const char* with); + BString& ReplaceCharsSet(const char* setOfChars, + const char* with); + // Unchecked char access char operator[](int32 index) const; @@ -205,6 +272,9 @@ // Checked char access char ByteAt(int32 index) const; + const char* CharAt(int32 charIndex, int32* bytes = NULL) const; + bool CharAt(int32 charIndex, char* buffer, + int32* bytes) const; // Fast low-level manipulation char* LockBuffer(int32 maxLength); Modified: haiku/trunk/src/kits/support/Jamfile =================================================================== --- haiku/trunk/src/kits/support/Jamfile 2010-01-31 23:41:44 UTC (rev 35370) +++ haiku/trunk/src/kits/support/Jamfile 2010-02-01 04:20:01 UTC (rev 35371) @@ -2,7 +2,7 @@ SetSubDirSupportedPlatforms haiku libbe_test ; -UsePrivateHeaders shared app media support ; +UsePrivateHeaders app interface media shared support ; MergeObject <libbe>support_kit.o : Archivable.cpp Modified: haiku/trunk/src/kits/support/String.cpp =================================================================== --- haiku/trunk/src/kits/support/String.cpp 2010-01-31 23:41:44 UTC (rev 35370) +++ haiku/trunk/src/kits/support/String.cpp 2010-02-01 04:20:01 UTC (rev 35371) @@ -1,5 +1,5 @@ /* - * Copyright 2001-2009, Haiku, Inc. All Rights Reserved. + * Copyright 2001-2010, Haiku, Inc. All Rights Reserved. * Distributed under the terms of the MIT License. * * Authors: @@ -8,6 +8,7 @@ * Oliver Tappe (openbeos@xxxxxxxxxxxxxxx) * Axel Dörfler, axeld@xxxxxxxxxxxxxxxx * Julun <host.haiku@xxxxxx> + * Michael Lotz <mmlr@xxxxxxxx> */ @@ -23,7 +24,9 @@ #include <Debug.h> +#include <utf8_functions.h> + // define proper names for case-option of _DoReplace() #define KEEP_CASE false #define IGNORE_CASE true @@ -268,19 +271,15 @@ int32 BString::CountChars() const { - int32 count = 0; + return UTF8CountChars(fPrivateData, Length()); +} - const char* start = fPrivateData; - const char* end = fPrivateData + Length(); - while (start++ != end) { - count++; - - // Jump to next UTF8 character - for (; (*start & 0xc0) == 0x80; start++); - } - - return count; +int32 +BString::CountBytes(int32 fromCharOffset, int32 charCount) const +{ + return UTF8CountBytes( + fPrivateData + UTF8CountBytes(fPrivateData, fromCharOffset), charCount); } @@ -404,6 +403,27 @@ } +BString& +BString::SetToChars(const char* string, int32 charCount) +{ + return SetTo(string, UTF8CountBytes(string, charCount)); +} + + +BString& +BString::SetToChars(const BString& string, int32 charCount) +{ + return SetTo(string, UTF8CountBytes(string.String(), charCount)); +} + + +BString& +BString::AdoptChars(BString& string, int32 charCount) +{ + return Adopt(string, UTF8CountBytes(string.String(), charCount)); +} + + // #pragma mark - Substring copying @@ -426,6 +446,38 @@ } +BString& +BString::CopyCharsInto(BString& into, int32 fromCharOffset, + int32 charCount) const +{ + int32 fromOffset = UTF8CountBytes(fPrivateData, fromCharOffset); + int32 length = UTF8CountBytes(fPrivateData + fromOffset, charCount); + return CopyInto(into, fromOffset, length); +} + + +bool +BString::CopyCharsInto(char* into, int32* intoLength, int32 fromCharOffset, + int32 charCount) const +{ + if (into == NULL) + return false; + + int32 fromOffset = UTF8CountBytes(fPrivateData, fromCharOffset); + int32 length = UTF8CountBytes(fPrivateData + fromOffset, charCount); + length = min_clamp0(length, Length() - fromOffset); + + if (intoLength != NULL) { + if (*intoLength < length) + return false; + *intoLength = length; + } + + memcpy(into, fPrivateData + fromOffset, length); + return true; +} + + // #pragma mark - Appending @@ -483,6 +535,20 @@ } +BString& +BString::AppendChars(const BString& string, int32 charCount) +{ + return Append(string, UTF8CountBytes(string.String(), charCount)); +} + + +BString& +BString::AppendChars(const char* string, int32 charCount) +{ + return Append(string, UTF8CountBytes(string, charCount)); +} + + // #pragma mark - Prepending @@ -531,6 +597,20 @@ } +BString& +BString::PrependChars(const char* string, int32 charCount) +{ + return Prepend(string, UTF8CountBytes(string, charCount)); +} + + +BString& +BString::PrependChars(const BString& string, int32 charCount) +{ + return Prepend(string, UTF8CountBytes(string.String(), charCount)); +} + + // #pragma mark - Inserting @@ -626,6 +706,58 @@ } +BString& +BString::InsertChars(const char* string, int32 charPosition) +{ + return Insert(string, UTF8CountBytes(fPrivateData, charPosition)); +} + + +BString& +BString::InsertChars(const char* string, int32 charCount, int32 charPosition) +{ + return Insert(string, UTF8CountBytes(string, charCount), + UTF8CountBytes(fPrivateData, charPosition)); +} + + +BString& +BString::InsertChars(const char* string, int32 fromCharOffset, + int32 charCount, int32 charPosition) +{ + int32 fromOffset = UTF8CountBytes(string, fromCharOffset); + return Insert(string, fromOffset, + UTF8CountBytes(string + fromOffset, charCount), + UTF8CountBytes(fPrivateData, charPosition)); +} + + +BString& +BString::InsertChars(const BString& string, int32 charPosition) +{ + return Insert(string, UTF8CountBytes(fPrivateData, charPosition)); +} + + +BString& +BString::InsertChars(const BString& string, int32 charCount, int32 charPosition) +{ + return Insert(string, UTF8CountBytes(string.String(), charCount), + UTF8CountBytes(fPrivateData, charPosition)); +} + + +BString& +BString::InsertChars(const BString& string, int32 fromCharOffset, + int32 charCount, int32 charPosition) +{ + int32 fromOffset = UTF8CountBytes(string.String(), fromCharOffset); + return Insert(string, fromOffset, + UTF8CountBytes(string.String() + fromOffset, charCount), + UTF8CountBytes(fPrivateData, charPosition)); +} + + // #pragma mark - Removing @@ -645,6 +777,13 @@ BString& +BString::TruncateChars(int32 newCharCount, bool lazy) +{ + return Truncate(UTF8CountBytes(fPrivateData, newCharCount)); +} + + +BString& BString::Remove(int32 from, int32 length) { if (length > 0 && from < Length()) @@ -654,6 +793,15 @@ BString& +BString::RemoveChars(int32 fromCharOffset, int32 charCount) +{ + int32 fromOffset = UTF8CountBytes(fPrivateData, fromCharOffset); + return Remove(fromOffset, + UTF8CountBytes(fPrivateData + fromOffset, charCount)); +} + + +BString& BString::RemoveFirst(const BString& string) { if (string.Length() > 0) { @@ -729,13 +877,20 @@ BString& -BString::RemoveSet(const char* setOfCharsToRemove) +BString::RemoveSet(const char* setOfBytesToRemove) { - return ReplaceSet(setOfCharsToRemove, ""); + return ReplaceSet(setOfBytesToRemove, ""); } BString& +BString::RemoveCharsSet(const char* setOfCharsToRemove) +{ + return ReplaceCharsSet(setOfCharsToRemove, ""); +} + + +BString& BString::MoveInto(BString& into, int32 from, int32 length) { if (length) { @@ -756,6 +911,30 @@ } +BString& +BString::MoveCharsInto(BString& into, int32 fromCharOffset, int32 charCount) +{ + if (charCount > 0) { + CopyCharsInto(into, fromCharOffset, charCount); + RemoveChars(fromCharOffset, charCount); + } + + return into; +} + + +bool +BString::MoveCharsInto(char* into, int32* intoLength, int32 fromCharOffset, + int32 charCount) +{ + if (!CopyCharsInto(into, intoLength, fromCharOffset, charCount)) + return false; + + RemoveChars(fromCharOffset, charCount); + return true; +} + + // #pragma mark - Compare functions @@ -826,6 +1005,20 @@ int +BString::CompareChars(const BString& string, int32 charCount) const +{ + return Compare(string, UTF8CountBytes(fPrivateData, charCount)); +} + + +int +BString::CompareChars(const char* string, int32 charCount) const +{ + return Compare(string, UTF8CountBytes(fPrivateData, charCount)); +} + + +int BString::ICompare(const BString& string) const { return strcasecmp(String(), string.String()); @@ -940,6 +1133,20 @@ int32 +BString::FindFirstChars(const BString& string, int32 fromCharOffset) const +{ + return FindFirst(string, UTF8CountBytes(fPrivateData, fromCharOffset)); +} + + +int32 +BString::FindFirstChars(const char* string, int32 fromCharOffset) const +{ + return FindFirst(string, UTF8CountBytes(fPrivateData, fromCharOffset)); +} + + +int32 BString::FindLast(const BString& string) const { return _FindBefore(string.String(), Length(), string.Length()); @@ -1023,6 +1230,20 @@ int32 +BString::FindLastChars(const BString& string, int32 beforeCharOffset) const +{ + return FindLast(string, UTF8CountBytes(fPrivateData, beforeCharOffset)); +} + + +int32 +BString::FindLastChars(const char* string, int32 beforeCharOffset) const +{ + return FindLast(string, UTF8CountBytes(fPrivateData, beforeCharOffset)); +} + + +int32 BString::IFindFirst(const BString& string) const { return _IFindAfter(string.String(), 0, string.Length()); @@ -1245,6 +1466,24 @@ BString& +BString::ReplaceAllChars(const char* replaceThis, const char* withThis, + int32 fromCharOffset) +{ + return ReplaceAll(replaceThis, withThis, + UTF8CountBytes(fPrivateData, fromCharOffset)); +} + + +BString& +BString::ReplaceChars(const char* replaceThis, const char* withThis, + int32 maxReplaceCount, int32 fromCharOffset) +{ + return Replace(replaceThis, withThis, maxReplaceCount, + UTF8CountBytes(fPrivateData, fromCharOffset)); +} + + +BString& BString::IReplaceFirst(char replaceThis, char withThis) { char tmp[2] = { replaceThis, '\0' }; @@ -1385,9 +1624,9 @@ BString& -BString::ReplaceSet(const char* setOfChars, char with) +BString::ReplaceSet(const char* setOfBytes, char with) { - if (!setOfChars || strcspn(fPrivateData, setOfChars) >= uint32(Length())) + if (!setOfBytes || strcspn(fPrivateData, setOfBytes) >= uint32(Length())) return *this; if (_MakeWritable() != B_OK) @@ -1396,7 +1635,7 @@ int32 offset = 0; int32 length = Length(); for (int32 pos;;) { - pos = strcspn(fPrivateData + offset, setOfChars); + pos = strcspn(fPrivateData + offset, setOfBytes); offset += pos; if (offset >= length) @@ -1411,16 +1650,16 @@ BString& -BString::ReplaceSet(const char* setOfChars, const char* with) +BString::ReplaceSet(const char* setOfBytes, const char* with) { - if (!setOfChars || !with - || strcspn(fPrivateData, setOfChars) >= uint32(Length())) + if (!setOfBytes || !with + || strcspn(fPrivateData, setOfBytes) >= uint32(Length())) return *this; // delegate simple case int32 withLen = strlen(with); if (withLen == 1) - return ReplaceSet(setOfChars, *with); + return ReplaceSet(setOfBytes, *with); if (_MakeWritable() != B_OK) return *this; @@ -1431,7 +1670,7 @@ PosVect positions; for (int32 offset = 0; offset < len; offset += (pos + searchLen)) { - pos = strcspn(fPrivateData + offset, setOfChars); + pos = strcspn(fPrivateData + offset, setOfBytes); if (pos + offset >= len) break; if (!positions.Add(offset + pos)) @@ -1443,6 +1682,49 @@ } +BString& +BString::ReplaceCharsSet(const char* setOfChars, const char* with) +{ + if (!setOfChars || !with) + return *this; + + int32 setCharCount = UTF8CountChars(setOfChars, -1); + if ((uint32)setCharCount == strlen(setOfChars)) { + // no multi-byte chars at all + return ReplaceSet(setOfChars, with); + } + + BString setString(setOfChars); + BString result; + + int32 withLength = withLength; + int32 charCount = CountChars(); + for (int32 i = 0; i < charCount; i++) { + int32 charLength; + const char* sourceChar = CharAt(i, &charLength); + bool match = false; + + for (int32 j = 0; j < setCharCount; j++) { + int32 setCharLength; + const char* setChar = setString.CharAt(j, &setCharLength); + if (charLength == setCharLength + && memcmp(sourceChar, setChar, charLength) == 0) { + match = true; + break; + } + } + + if (match) + result.Append(with, withLength); + else + result.Append(sourceChar, charLength); + } + + *this = result; + return *this; +} + + // #pragma mark - Unchecked char access @@ -1469,6 +1751,32 @@ #endif +const char* +BString::CharAt(int32 charIndex, int32* bytes) const +{ + int32 offset = UTF8CountBytes(fPrivateData, charIndex); + if (bytes != NULL) + *bytes = UTF8NextCharLen(fPrivateData + offset); + return fPrivateData + offset; +} + + +bool +BString::CharAt(int32 charIndex, char* buffer, int32* bytes) const +{ + int32 length; + const char* charAt = CharAt(charIndex, &length); + if (bytes != NULL) { + if (*bytes < length) + return false; + *bytes = length; + } + + memcpy(buffer, charAt, length); + return true; +} + + // #pragma mark - Fast low-level manipulation