Author: zooey Date: 2010-07-17 21:04:52 +0200 (Sat, 17 Jul 2010) New Revision: 37557 Changeset: http://dev.haiku-os.org/changeset/37557 Added: haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strcoll.cpp haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strxfrm.cpp Removed: haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strcoll.c haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strxfrm.c Modified: haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICUCollateData.h haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICULocaleBackend.h haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/LocaleBackend.h haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICUCollateData.cpp haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICULocaleBackend.cpp haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/Jamfile haiku/branches/developer/zooey/posix-locale/src/tests/system/libroot/posix/locale_test.cpp Log: * implemented support for the last missing category, LC_COLLATE (so strcoll() and strxfrm() should work properly now) * cleaned up locale_test Modified: haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICUCollateData.h =================================================================== --- haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICUCollateData.h 2010-07-17 16:17:14 UTC (rev 37556) +++ haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICUCollateData.h 2010-07-17 19:04:52 UTC (rev 37557) @@ -8,19 +8,32 @@ #include "ICUCategoryData.h" +#include <unicode/coll.h> + namespace BPrivate { class ICUCollateData : public ICUCategoryData { typedef ICUCategoryData inherited; + public: + ICUCollateData(); + virtual ~ICUCollateData(); + virtual status_t SetTo(const Locale& locale, const char* posixLocaleName); virtual status_t SetToPosix(); -protected: - static const uint16 kLCBufSize = 16; + status_t Strcoll(const char* a, const char* b, int& out); + status_t Strxfrm(char* out, const char* in, size_t size, + size_t& outSize); + +private: + status_t _ToUnicodeString(const char* in, + UnicodeString& out); + + Collator* fCollator; }; Modified: haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICULocaleBackend.h =================================================================== --- haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICULocaleBackend.h 2010-07-17 16:17:14 UTC (rev 37556) +++ haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/ICULocaleBackend.h 2010-07-17 19:04:52 UTC (rev 37557) @@ -27,6 +27,8 @@ ICULocaleBackend(); virtual ~ICULocaleBackend(); + virtual void Initialize(LocaleDataBridge* dataBridge); + virtual const char* SetLocale(int category, const char* posixLocaleName); virtual const struct lconv* LocaleConv(); @@ -37,7 +39,9 @@ virtual const char* GetLanginfo(int index); - virtual void Initialize(LocaleDataBridge* dataBridge); + virtual status_t Strcoll(const char* a, const char* b, int& out); + virtual status_t Strxfrm(char* out, const char* in, size_t size, + size_t& outSize); private: const char* _QueryLocale(int category); Modified: haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/LocaleBackend.h =================================================================== --- haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/LocaleBackend.h 2010-07-17 16:17:14 UTC (rev 37556) +++ haiku/branches/developer/zooey/posix-locale/headers/private/libroot/locale/LocaleBackend.h 2010-07-17 19:04:52 UTC (rev 37557) @@ -90,6 +90,11 @@ virtual const char* GetLanginfo(int index) = 0; + virtual status_t Strcoll(const char* a, const char* b, + int& out) = 0; + virtual status_t Strxfrm(char* out, const char* in, size_t size, + size_t& outSize) = 0; + virtual void Initialize(LocaleDataBridge* dataBridge) = 0; static status_t LoadBackend(LocaleDataBridge* dataBridge); Modified: haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICUCollateData.cpp =================================================================== --- haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICUCollateData.cpp 2010-07-17 16:17:14 UTC (rev 37556) +++ haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICUCollateData.cpp 2010-07-17 19:04:52 UTC (rev 37557) @@ -6,16 +6,37 @@ #include "ICUCollateData.h" +#include <unicode/unistr.h> + namespace BPrivate { +ICUCollateData::ICUCollateData() + : + fCollator(NULL) +{ +} + + +ICUCollateData::~ICUCollateData() +{ + delete fCollator; +} + + status_t ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName) { status_t result = inherited::SetTo(locale, posixLocaleName); - // TODO: implement! + if (result == B_OK) { + UErrorCode icuStatus = U_ZERO_ERROR; + delete fCollator; + fCollator = Collator::createInstance(fLocale, icuStatus); + if (!U_SUCCESS(icuStatus)) + return B_NO_MEMORY; + } return result; } @@ -26,10 +47,128 @@ { status_t result = inherited::SetToPosix(); - // TODO: implement! + if (result == B_OK) { + delete fCollator; + fCollator = NULL; + } return result; } +status_t +ICUCollateData::Strcoll(const char* a, const char* b, int& result) +{ + if (strcmp(fPosixLocaleName, "POSIX") == 0) { + // handle POSIX here as the collator ICU uses for that (english) is + // incompatible in too many ways + result = strcmp(a, b); + for (const char* aIter = a; *aIter != 0; ++aIter) { + if (*aIter < 0) + return B_BAD_VALUE; + } + for (const char* bIter = b; *bIter != 0; ++bIter) { + if (*bIter < 0) + return B_BAD_VALUE; + } + return B_OK; + } + + status_t status = B_OK; + UErrorCode icuStatus = U_ZERO_ERROR; + + if (strcasecmp(fGivenCharset, "utf-8") == 0) { + UCharIterator aIter, bIter; + uiter_setUTF8(&aIter, a, -1); + uiter_setUTF8(&bIter, b, -1); + + result = fCollator->compare(aIter, bIter, icuStatus); + } else { + UnicodeString unicodeA; + UnicodeString unicodeB; + + if (_ToUnicodeString(a, unicodeA) != B_OK + || _ToUnicodeString(b, unicodeB) != B_OK) { + status = B_BAD_VALUE; + } + + result = fCollator->compare(unicodeA, unicodeB, icuStatus); + } + + if (!U_SUCCESS(icuStatus)) + status = B_BAD_VALUE; + + return status; +} + + +status_t +ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize) +{ + if (strcmp(fPosixLocaleName, "POSIX") == 0) { + // handle POSIX here as the collator ICU uses for that (english) is + // incompatible in too many ways + outSize = strlcpy(out, in, size); + for (const char* inIter = in; *inIter != 0; ++inIter) { + if (*inIter < 0) + return B_BAD_VALUE; + } + return B_OK; + } + + if (in == NULL) { + outSize = 0; + return B_OK; + } + + UErrorCode icuStatus = U_ZERO_ERROR; + + UnicodeString unicodeIn; + if (_ToUnicodeString(in, unicodeIn) != B_OK) + return B_BAD_VALUE; + + outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size); + if (!U_SUCCESS(icuStatus)) + return B_BAD_VALUE; + + return B_OK; +} + + +status_t +ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out) +{ + out.remove(); + + if (in == NULL) + return B_OK; + + size_t inLen = strlen(in); + if (inLen == 0) + return B_OK; + + UErrorCode icuStatus = U_ZERO_ERROR; + int32_t outLen = ucnv_toUChars(fConverter, NULL, 0, in, inLen, &icuStatus); + if (icuStatus != U_BUFFER_OVERFLOW_ERROR) + return B_BAD_VALUE; + if (outLen < 0) + return B_ERROR; + if (outLen == 0) + return B_OK; + + UChar* outBuf = out.getBuffer(outLen + 1); + icuStatus = U_ZERO_ERROR; + outLen + = ucnv_toUChars(fConverter, outBuf, outLen + 1, in, inLen, &icuStatus); + if (!U_SUCCESS(icuStatus)) { + out.releaseBuffer(0); + return B_BAD_VALUE; + } + + out.releaseBuffer(outLen); + + return B_OK; +} + + } // namespace BPrivate Modified: haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICULocaleBackend.cpp =================================================================== --- haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICULocaleBackend.cpp 2010-07-17 16:17:14 UTC (rev 37556) +++ haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/locale/ICULocaleBackend.cpp 2010-07-17 19:04:52 UTC (rev 37557) @@ -211,6 +211,21 @@ } +status_t +ICULocaleBackend::Strcoll(const char* a, const char* b, int& result) +{ + return fCollateData.Strcoll(a, b, result); +} + + +status_t +ICULocaleBackend::Strxfrm(char* out, const char* in, size_t size, + size_t& outSize) +{ + return fCollateData.Strxfrm(out, in, size, outSize); +} + + const char* ICULocaleBackend::_QueryLocale(int category) { Modified: haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/Jamfile =================================================================== --- haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/Jamfile 2010-07-17 16:17:14 UTC (rev 37556) +++ haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/Jamfile 2010-07-17 19:04:52 UTC (rev 37557) @@ -1,5 +1,7 @@ SubDir HAIKU_TOP src system libroot posix string ; +UsePrivateHeaders [ FDirName libroot locale ] ; + MergeObject posix_string.o : bcmp.c bcopy.c @@ -16,7 +18,7 @@ strchr.c strchrnul.c strcmp.c - strcoll.c + strcoll.cpp strcpy.c strcspn.c strdup.c @@ -35,7 +37,7 @@ strstr.c strtok.c strupr.c - strxfrm.c + strxfrm.cpp ; HaikuSubInclude arch $(TARGET_ARCH) ; Copied: haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strcoll.cpp (from rev 37484, haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strcoll.c) =================================================================== --- haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strcoll.cpp (rev 0) +++ haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strcoll.cpp 2010-07-17 19:04:52 UTC (rev 37557) @@ -0,0 +1,31 @@ +/* + * Copyright 2004, Axel Dörfler, axeld@xxxxxxxxxxxxxxxxx All rights reserved. + * Copyright 2010, Oliver Tappe, zooey@xxxxxxxxxxxxxxxx + * All rights reserved. Distributed under the terms of the MIT License. + */ + +#define B_USE_POSITIVE_POSIX_ERRORS +#include <errno.h> +#include <string.h> + +#include "LocaleBackend.h" + + +using BPrivate::gLocaleBackend; + + +extern "C" int +strcoll(const char *a, const char *b) +{ + if (gLocaleBackend != NULL) { + int result = 0; + status_t status = gLocaleBackend->Strcoll(a, b, result); + + if (status != B_OK) + errno = status; + + return result; + } + + return strcmp(a, b); +} Copied: haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strxfrm.cpp (from rev 37484, haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strxfrm.c) =================================================================== --- haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strxfrm.cpp (rev 0) +++ haiku/branches/developer/zooey/posix-locale/src/system/libroot/posix/string/strxfrm.cpp 2010-07-17 19:04:52 UTC (rev 37557) @@ -0,0 +1,31 @@ +/* + * Copyright 2005, Ingo Weinhold <bonefish@xxxxxxxxxxxxxxx>. + * Copyright 2010, Oliver Tappe, zooey@xxxxxxxxxxxxxxxx + * All rights reserved. Distributed under the terms of the MIT license. + */ + +#define B_USE_POSITIVE_POSIX_ERRORS +#include <errno.h> +#include <string.h> + +#include "LocaleBackend.h" + + +using BPrivate::gLocaleBackend; + + +extern "C" size_t +strxfrm(char *out, const char *in, size_t size) +{ + if (gLocaleBackend != NULL) { + size_t outSize = 0; + status_t status = gLocaleBackend->Strxfrm(out, in, size, outSize); + + if (status != B_OK) + errno = status; + + return outSize; + } + + return strlcpy(out, in, size); +} Modified: haiku/branches/developer/zooey/posix-locale/src/tests/system/libroot/posix/locale_test.cpp =================================================================== --- haiku/branches/developer/zooey/posix-locale/src/tests/system/libroot/posix/locale_test.cpp 2010-07-17 16:17:14 UTC (rev 37556) +++ haiku/branches/developer/zooey/posix-locale/src/tests/system/libroot/posix/locale_test.cpp 2010-07-17 19:04:52 UTC (rev 37557) @@ -3,16 +3,22 @@ * Distributed under the terms of the MIT License. */ +#define B_USE_POSITIVE_POSIX_ERRORS #include <ctype.h> +#include <errno.h> #include <langinfo.h> #include <locale.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <time.h> #include <wctype.h> +// #pragma mark - setlocale ---------------------------------------------------- + + void test_setlocale() { @@ -34,18 +40,26 @@ "C", NULL }; + printf("setlocale()\n"); + + int problemCount = 0; for(int i = 0; locales[i] != NULL; ++i) { - printf("locale '%s': ", locales[i]); char* result = setlocale(LC_ALL, locales[i]); - if (!result) { - printf("not ok\n"); - continue; + if (!result || *result == 0) { + printf("\tPROBLEM: locale '%s' failed", locales[i]); + problemCount++; } - printf("ok (%s)\n", result); } + if (problemCount) + printf("\t%d problem(s) found!\n", problemCount); + else + printf("\tall fine\n"); } +// #pragma mark - localeconv --------------------------------------------------- + + void dumpGrouping(const char* grouping) { @@ -69,7 +83,6 @@ "nl_NL", "nb_NO", "nb_NO.utf-8", - "POSIX", NULL }; for(int i = 0; locales[i] != NULL; ++i) { @@ -112,41 +125,134 @@ } +// #pragma mark - strftime ----------------------------------------------------- + + +struct strftime_data { + const char* format; + const char* result; +}; + + void -test_strftime() +test_strftime(const char* locale, const strftime_data data[]) { - const char* locales[] = { - "POSIX", - "de_DE", - "de_DE.iso8859-1", - "hr_HR.ISO-8859-2", - "de_CH", - "gu_IN", - "it_IT", - "nl_NL", - "nb_NO", - "nb_NO.utf-8", - "POSIX", - NULL - }; - time_t nowSecs = time(NULL); + setlocale(LC_TIME, locale); + printf("strftime for '%s'\n", locale); + + time_t nowSecs = 1279391169; // pure magic tm* now = localtime(&nowSecs); - char buf[100]; - for(int i = 0; locales[i] != NULL; ++i) { - setlocale(LC_ALL, locales[i]); - printf("strftime for '%s'\n", locales[i]); - strftime(buf, 100, "%c", now); - printf("\tdatetime: '%s'\n", buf); - strftime(buf, 100, "%x", now); - printf("\tdate: '%s'\n", buf); - strftime(buf, 100, "%X", now); - printf("\ttime: '%s'\n", buf); - strftime(buf, 100, "%a %A %b %B", now); - printf("\tmonth/day names: '%s'\n", buf); + int problemCount = 0; + for(int i = 0; data[i].format != NULL; ++i) { + char buf[100]; + strftime(buf, 100, data[i].format, now); + if (strcmp(buf, data[i].result) != 0) { + printf("\tPROBLEM: strftime(\"%s\") = \"%s\" (expected \"%s\")\n", + data[i].format, buf, data[i].result); + problemCount++; + } } + if (problemCount) + printf("\t%d problem(s) found!\n", problemCount); + else + printf("\tall fine\n"); } +void +test_strftime() +{ + setenv("TZ", "GMT", 1); + + const strftime_data strftime_posix[] = { + { "%c", "Sat Jul 17 18:26:09 2010" }, + { "%x", "07/17/10" }, + { "%X", "18:26:09" }, + { "%a", "Sat" }, + { "%A", "Saturday" }, + { "%b", "Jul" }, + { "%B", "July" }, + { NULL, NULL } + }; + test_strftime("POSIX", strftime_posix); + + const strftime_data strftime_de[] = { + { "%c", "Samstag, 17. Juli 2010 18:26:09 GMT" }, + { "%x", "17.07.2010" }, + { "%X", "18:26:09" }, + { "%a", "Sa." }, + { "%A", "Samstag" }, + { "%b", "Jul" }, + { "%B", "Juli" }, + { NULL, NULL } + }; + test_strftime("de_DE.UTF-8", strftime_de); + + const strftime_data strftime_hr[] = { + { "%c", "subota, 17. srpnja 2010. 18:26:09 GMT" }, + { "%x", "17.07.2010." }, + { "%X", "18:26:09" }, + { "%a", "sub" }, + { "%A", "subota" }, + { "%b", "07." }, + { "%B", "srpnja" }, + { NULL, NULL } + }; + test_strftime("hr_HR.ISO8859-2", strftime_hr); + + const strftime_data strftime_gu[] = { + { "%c", "શનિવાર, 17 જુલાઈ, 2010 06:26:09 pm GMT" }, + { "%x", "17 જુલાઈ, 2010" }, + { "%X", "06:26:09 pm" }, + { "%a", "શનિ" }, + { "%A", "શનિવાર" }, + { "%b", "જુલાઈ" }, + { "%B", "જુલાઈ" }, + { NULL, NULL } + }; + test_strftime("gu_IN", strftime_gu); + + const strftime_data strftime_it[] = { + { "%c", "sabato 17 luglio 2010 18.26.09 GMT" }, + { "%x", "17/lug/2010" }, + { "%X", "18.26.09" }, + { "%a", "sab" }, + { "%A", "sabato" }, + { "%b", "lug" }, + { "%B", "luglio" }, + { NULL, NULL } + }; + test_strftime("it_IT", strftime_it); + + const strftime_data strftime_nl[] = { + { "%c", "zaterdag 17 juli 2010 18:26:09 GMT" }, + { "%x", "17 jul. 2010" }, + { "%X", "18:26:09" }, + { "%a", "za" }, + { "%A", "zaterdag" }, + { "%b", "jul." }, + { "%B", "juli" }, + { NULL, NULL } + }; + test_strftime("nl_NL", strftime_nl); + + const strftime_data strftime_nb[] = { + { "%c", "lørdag 17. juli 2010 kl. 18.26.09 GMT" }, + { "%x", "17. juli 2010" }, + { "%X", "18.26.09" }, + { "%a", "lør." }, + { "%A", "lørdag" }, + { "%b", "juli" }, + { "%B", "juli" }, + { NULL, NULL } + }; + test_strftime("nb_NO", strftime_nb); +} + + +// #pragma mark - ctype -------------------------------------------------------- + + unsigned short determineFullClassInfo(int i) { @@ -181,44 +287,46 @@ } -unsigned short -determineWideFullClassInfo(int i) +void +test_ctype(const char* locale, const unsigned short int classInfos[], + const int toLowerMap[], const int toUpperMap[]) { - unsigned short classInfo = 0; + setlocale(LC_CTYPE, locale); + printf("ctype of %s locale\n", locale); - if (iswblank(i)) - classInfo |= _ISblank; - if (iswcntrl(i)) - classInfo |= _IScntrl; - if (iswpunct(i)) - classInfo |= _ISpunct; - if (iswalnum(i)) - classInfo |= _ISalnum; - if (iswupper(i)) - classInfo |= _ISupper; - if (iswlower(i)) - classInfo |= _ISlower; - if (iswalpha(i)) - classInfo |= _ISalpha; - if (iswdigit(i)) - classInfo |= _ISdigit; - if (iswxdigit(i)) - classInfo |= _ISxdigit; - if (iswspace(i)) - classInfo |= _ISspace; - if (iswprint(i)) - classInfo |= _ISprint; - if (iswgraph(i)) - classInfo |= _ISgraph; + int problemCount = 0; + for (int i = 0; i < 256; ++i) { + unsigned short classInfo = determineFullClassInfo(i); - return classInfo; + if (classInfo != classInfos[i]) { + printf("\tPROBLEM: %d = %x (expected %x)\n", i, classInfo, + classInfos[i]); + problemCount++; + } + int lower = tolower(i); + if (lower != toLowerMap[i]) { + printf("\tPROBLEM: tolower(%d) = %x (expected %x)\n", i, lower, + toLowerMap[i]); + problemCount++; + } + int upper = toupper(i); + if (upper != toUpperMap[i]) { + printf("\tPROBLEM: toupper(%d) = %x (expected %x)\n", i, upper, + toUpperMap[i]); + problemCount++; + } + } + if (problemCount) + printf("\t%d problem(s) found!\n", problemCount); + else + printf("\tall fine\n"); } void -test_posix_ctype() +test_ctype() { - const unsigned short int classInfo_POSIX[256] = { + const unsigned short int classInfos_posix[256] = { /* 0 */ _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, /* 8 */ _IScntrl, _ISblank|_IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl, _IScntrl, /* 16 */ _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, @@ -252,8 +360,7 @@ /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 248 */ 0, 0, 0, 0, 0, 0, 0, 0, }; - - const int toLowerMap_POSIX[256] = { + const int toLowerMap_posix[256] = { /* 0 */ 0, 1, 2, 3, 4, 5, 6, 7, /* 8 */ 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ 16, 17, 18, 19, 20, 21, 22, 23, @@ -287,8 +394,7 @@ /* 240 */ 240, 241, 242, 243, 244, 245, 246, 247, /* 248 */ 248, 249, 250, 251, 252, 253, 254, 255, }; - - const int toUpperMap_POSIX[256] = { + const int toUpperMap_posix[256] = { /* 0 */ 0, 1, 2, 3, 4, 5, 6, 7, /* 8 */ 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ 16, 17, 18, 19, 20, 21, 22, 23, @@ -322,42 +428,9 @@ /* 240 */ 240, 241, 242, 243, 244, 245, 246, 247, /* 248 */ 248, 249, 250, 251, 252, 253, 254, 255, }; + test_ctype("POSIX", classInfos_posix, toLowerMap_posix, toUpperMap_posix); - setlocale(LC_CTYPE, "POSIX"); - printf("ctype of POSIX locale\n"); - int problemCount = 0; - for (int i = 0; i < 256; ++i) { - unsigned short classInfo = determineFullClassInfo(i); - - if (classInfo != classInfo_POSIX[i]) { - printf("\tPROBLEM: %d = %x (expected %x)\n", i, classInfo, - classInfo_POSIX[i]); - problemCount++; - } - int lower = tolower(i); - if (lower != toLowerMap_POSIX[i]) { - printf("\tPROBLEM: tolower(%d) = %x (expected %x)\n", i, lower, - toLowerMap_POSIX[i]); - problemCount++; - } - int upper = toupper(i); - if (upper != toUpperMap_POSIX[i]) { - printf("\tPROBLEM: toupper(%d) = %x (expected %x)\n", i, upper, - toUpperMap_POSIX[i]); - problemCount++; - } - } - if (problemCount) - printf("\t%d problem(s) found!\n", problemCount); - else - printf("\tall fine\n"); -} - - -void -test_iso8859_1_ctype() -{ - const unsigned short int classInfo_ISO8859_1[256] = { + const unsigned short int classInfos_de[256] = { /* 0 */ _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, /* 8 */ _IScntrl, _ISblank|_IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl, _IScntrl, /* 16 */ _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, @@ -391,8 +464,7 @@ /* 240 */ _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph, /* 248 */ _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, }; - - const int toLowerMap_ISO8859_1[256] = { + const int toLowerMap_de[256] = { /* 0 */ 0, 1, 2, 3, 4, 5, 6, 7, /* 8 */ 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ 16, 17, 18, 19, 20, 21, 22, 23, @@ -426,8 +498,7 @@ /* 240 */ 240, 241, 242, 243, 244, 245, 246, 247, /* 248 */ 248, 249, 250, 251, 252, 253, 254, 255, }; - - const int toUpperMap_ISO8859_1[256] = { + const int toUpperMap_de[256] = { /* 0 */ 0, 1, 2, 3, 4, 5, 6, 7, /* 8 */ 8, 9, 10, 11, 12, 13, 14, 15, /* 16 */ 16, 17, 18, 19, 20, 21, 22, 23, @@ -461,42 +532,9 @@ /* 240 */ 208, 209, 210, 211, 212, 213, 214, 247, /* 248 */ 216, 217, 218, 219, 220, 221, 222, 255, }; + test_ctype("de_DE.ISO8859-1", classInfos_de, toLowerMap_de, toUpperMap_de); - setlocale(LC_CTYPE, "de_DE.ISO8859-1"); - printf("ctype of de_DE.ISO8859-1 locale\n"); - int problemCount = 0; - for (int i = 0; i < 256; ++i) { - unsigned short classInfo = determineFullClassInfo(i); - - if (classInfo != classInfo_ISO8859_1[i]) { - printf("\tPROBLEM: %d = %x (expected %x)\n", i, classInfo, - classInfo_ISO8859_1[i]); - problemCount++; - } - int lower = tolower(i); - if (lower != toLowerMap_ISO8859_1[i]) { - printf("\tPROBLEM: tolower(%d) = %x (expected %x)\n", i, lower, - toLowerMap_ISO8859_1[i]); - problemCount++; - } - int upper = toupper(i); - if (upper != toUpperMap_ISO8859_1[i]) { - printf("\tPROBLEM: toupper(%d) = %x (expected %x)\n", i, upper, - toUpperMap_ISO8859_1[i]); - problemCount++; - } - } - if (problemCount) - printf("\t%d problem(s) found!\n", problemCount); - else - printf("\tall fine\n"); -} - - -void -test_utf8_ctype() -{ - const unsigned short int classInfo_UTF8[256] = { + const unsigned short int classInfos_utf8[256] = { /* 0 */ _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, /* 8 */ _IScntrl, _ISblank|_IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl|_ISspace, _IScntrl, _IScntrl, /* 16 */ _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, _IScntrl, @@ -530,215 +568,68 @@ /* 240 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 248 */ 0, 0, 0, 0, 0, 0, 0, 0, }; - - const int toLowerMap_UTF8[256] = { - /* 0 */ 0, 1, 2, 3, 4, 5, 6, 7, - /* 8 */ 8, 9, 10, 11, 12, 13, 14, 15, - /* 16 */ 16, 17, 18, 19, 20, 21, 22, 23, - /* 24 */ 24, 25, 26, 27, 28, 29, 30, 31, - /* 32 */ 32, 33, 34, 35, 36, 37, 38, 39, - /* 40 */ 40, 41, 42, 43, 44, 45, 46, 47, - /* 48 */ '0', '1', '2', '3', '4', '5', '6', '7', - /* 56 */ '8', '9', 58, 59, 60, 61, 62, 63, - /* 64 */ 64, 'a', 'b', 'c', 'd', 'e', 'f', 'g', - /* 72 */ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - /* 80 */ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - /* 88 */ 'x', 'y', 'z', 91, 92, 93, 94, 95, - /* 96 */ 96, 'a', 'b', 'c', 'd', 'e', 'f', 'g', - /* 104 */ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - /* 112 */ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', - /* 120 */ 'x', 'y', 'z', 123, 124, 125, 126, 127, - /* 128 */ 128, 129, 130, 131, 132, 133, 134, 135, - /* 136 */ 136, 137, 138, 139, 140, 141, 142, 143, - /* 144 */ 144, 145, 146, 147, 148, 149, 150, 151, - /* 152 */ 152, 153, 154, 155, 156, 157, 158, 159, - /* 160 */ 160, 161, 162, 163, 164, 165, 166, 167, - /* 168 */ 168, 169, 170, 171, 172, 173, 174, 175, - /* 176 */ 176, 177, 178, 179, 180, 181, 182, 183, - /* 184 */ 184, 185, 186, 187, 188, 189, 190, 191, - /* 192 */ 192, 193, 194, 195, 196, 197, 198, 199, - /* 200 */ 200, 201, 202, 203, 204, 205, 206, 207, - /* 208 */ 208, 209, 210, 211, 212, 213, 214, 215, - /* 216 */ 216, 217, 218, 219, 220, 221, 222, 223, - /* 224 */ 224, 225, 226, 227, 228, 229, 230, 231, - /* 232 */ 232, 233, 234, 235, 236, 237, 238, 239, - /* 240 */ 240, 241, 242, 243, 244, 245, 246, 247, - /* 248 */ 248, 249, 250, 251, 252, 253, 254, 255, - }; - - const int toUpperMap_UTF8[256] = { - /* 0 */ 0, 1, 2, 3, 4, 5, 6, 7, - /* 8 */ 8, 9, 10, 11, 12, 13, 14, 15, - /* 16 */ 16, 17, 18, 19, 20, 21, 22, 23, - /* 24 */ 24, 25, 26, 27, 28, 29, 30, 31, - /* 32 */ 32, 33, 34, 35, 36, 37, 38, 39, - /* 40 */ 40, 41, 42, 43, 44, 45, 46, 47, - /* 48 */ '0', '1', '2', '3', '4', '5', '6', '7', - /* 56 */ '8', '9', 58, 59, 60, 61, 62, 63, - /* 64 */ 64, 'A', 'B', 'C', 'D', 'E', 'F', 'G', - /* 72 */ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - /* 80 */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - /* 88 */ 'X', 'Y', 'Z', 91, 92, 93, 94, 95, - /* 96 */ 96, 'A', 'B', 'C', 'D', 'E', 'F', 'G', - /* 104 */ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', - /* 112 */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - /* 120 */ 'X', 'Y', 'Z', 123, 124, 125, 126, 127, - /* 128 */ 128, 129, 130, 131, 132, 133, 134, 135, - /* 136 */ 136, 137, 138, 139, 140, 141, 142, 143, - /* 144 */ 144, 145, 146, 147, 148, 149, 150, 151, - /* 152 */ 152, 153, 154, 155, 156, 157, 158, 159, - /* 160 */ 160, 161, 162, 163, 164, 165, 166, 167, - /* 168 */ 168, 169, 170, 171, 172, 173, 174, 175, - /* 176 */ 176, 177, 178, 179, 180, 181, 182, 183, - /* 184 */ 184, 185, 186, 187, 188, 189, 190, 191, - /* 192 */ 192, 193, 194, 195, 196, 197, 198, 199, - /* 200 */ 200, 201, 202, 203, 204, 205, 206, 207, - /* 208 */ 208, 209, 210, 211, 212, 213, 214, 215, - /* 216 */ 216, 217, 218, 219, 220, 221, 222, 223, - /* 224 */ 224, 225, 226, 227, 228, 229, 230, 231, - /* 232 */ 232, 233, 234, 235, 236, 237, 238, 239, - /* 240 */ 240, 241, 242, 243, 244, 245, 246, 247, - /* 248 */ 248, 249, 250, 251, 252, 253, 254, 255, - }; - - setlocale(LC_CTYPE, "de_DE.UTF-8"); - printf("ctype of de_DE.UTF-8 locale\n"); - int problemCount = 0; - for (int i = 0; i < 256; ++i) { - unsigned short classInfo = determineFullClassInfo(i); - - if (classInfo != classInfo_UTF8[i]) { - printf("\tPROBLEM: %d = %x (expected %x)\n", i, classInfo, - classInfo_UTF8[i]); - problemCount++; - } - int lower = tolower(i); - if (lower != toLowerMap_UTF8[i]) { - printf("\tPROBLEM: tolower(%d) = %x (expected %x)\n", i, lower, - toLowerMap_UTF8[i]); - problemCount++; - } - int upper = toupper(i); - if (upper != toUpperMap_UTF8[i]) { - printf("\tPROBLEM: toupper(%d) = %x (expected %x)\n", i, upper, - toUpperMap_UTF8[i]); - problemCount++; - } - } - if (problemCount) - printf("\t%d problem(s) found!\n", problemCount); - else - printf("\tall fine\n"); + test_ctype("de_DE.UTF-8", classInfos_utf8, toLowerMap_posix, + toUpperMap_posix); } -void -test_ctype() -{ - test_posix_ctype(); - test_iso8859_1_ctype(); - test_utf8_ctype(); -} +// #pragma mark - wctype ------------------------------------------------------- -void -test_posix_wctype() +unsigned short +determineWideFullClassInfo(int i) { - const wchar_t* text = L"Hi there, how do you do? (äÜößáéúíó€'¤¹²$%#@) 12"; + unsigned short classInfo = 0; - const unsigned short int wc_POSIX[48] = { - 0x48, 0x69, 0x20, 0x74, 0x68, 0x65, 0x72, 0x65, - 0x2c, 0x20, 0x68, 0x6f, 0x77, 0x20, 0x64, 0x6f, - 0x20, 0x79, 0x6f, 0x75, 0x20, 0x64, 0x6f, 0x3f, - 0x20, 0x28, 0xe4, 0xdc, 0xf6, 0xdf, 0xe1, 0xe9, - 0xfa, 0xed, 0xf3, 0x20ac, 0x27, 0xa4, 0xb9, 0xb2, - 0x24, 0x25, 0x23, 0x40, 0x29, 0x20, 0x31, 0x32 - }; - const unsigned short int classInfo_POSIX[48] = { - _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISupper, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISspace|_ISprint|_ISblank, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, - _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower|_ISxdigit, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower|_ISxdigit, - _ISprint|_ISgraph|_ISpunct, _ISspace|_ISprint|_ISblank, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, - _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISspace|_ISprint|_ISblank, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower|_ISxdigit, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, - _ISspace|_ISprint|_ISblank, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, - _ISspace|_ISprint|_ISblank, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower|_ISxdigit, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISpunct, - _ISspace|_ISprint|_ISblank, _ISprint|_ISgraph|_ISpunct, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISupper, - _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, - _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph|_ISalpha|_ISalnum|_ISlower, _ISprint|_ISgraph, - _ISprint|_ISgraph|_ISpunct, _ISprint|_ISgraph, _ISprint|_ISgraph, _ISprint|_ISgraph, - _ISprint|_ISgraph, _ISpunct|_ISprint|_ISgraph, _ISpunct|_ISprint|_ISgraph, _ISpunct|_ISprint|_ISgraph, - _ISpunct|_ISprint|_ISgraph, _ISspace|_ISprint|_ISblank, _ISprint|_ISgraph|_ISalnum|_ISdigit|_ISxdigit, _ISprint|_ISgraph|_ISalnum|_ISdigit|_ISxdigit - }; + if (iswblank(i)) + classInfo |= _ISblank; + if (iswcntrl(i)) + classInfo |= _IScntrl; + if (iswpunct(i)) + classInfo |= _ISpunct; + if (iswalnum(i)) + classInfo |= _ISalnum; + if (iswupper(i)) + classInfo |= _ISupper; + if (iswlower(i)) + classInfo |= _ISlower; + if (iswalpha(i)) + classInfo |= _ISalpha; + if (iswdigit(i)) + classInfo |= _ISdigit; + if (iswxdigit(i)) + classInfo |= _ISxdigit; + if (iswspace(i)) + classInfo |= _ISspace; + if (iswprint(i)) + classInfo |= _ISprint; + if (iswgraph(i)) + classInfo |= _ISgraph; - setlocale(LC_CTYPE, "POSIX"); - printf("wctype of POSIX locale\n"); [... truncated: 560 lines follow ...]