From 543d6fff956361a3fdd1de89392eb2cc308670b1 Mon Sep 17 00:00:00 2001 From: Marc Cromme Date: Mon, 30 Apr 2007 13:56:52 +0000 Subject: [PATCH] checked in test for ICU uppercase lowercase, title and foldcase char mapping --- src/Makefile.am | 3 +- src/icu_I18N.c | 193 ++++++++++++++++++++++++++++++++++++++++++++ src/icu_I18N.h | 73 +++++++++++++++++ src/test_icu_I18N.c | 222 +++++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 485 insertions(+), 6 deletions(-) create mode 100644 src/icu_I18N.c create mode 100644 src/icu_I18N.h diff --git a/src/Makefile.am b/src/Makefile.am index 5a5c646..1bcc3cf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -# $Id: Makefile.am,v 1.18 2007-04-27 14:31:15 marc Exp $ +# $Id: Makefile.am,v 1.19 2007-04-30 13:56:52 marc Exp $ bin_PROGRAMS = pazpar2 check_PROGRAMS = test_config \ @@ -21,6 +21,7 @@ AM_CFLAGS = $(YAZINC) $(ICU_CPPFLAGS) libpazpar2_a_SOURCES = config.c config.h eventl.c eventl.h \ http.c http_command.c http_command.h http.h \ + icu_I18N.h icu_I18N.c \ logic.c pazpar2.h \ record.h record.c reclists.c reclists.h \ relevance.c relevance.h termlists.c termlists.h \ diff --git a/src/icu_I18N.c b/src/icu_I18N.c new file mode 100644 index 0000000..060030b --- /dev/null +++ b/src/icu_I18N.c @@ -0,0 +1,193 @@ +/* $Id: icu_I18N.c,v 1.1 2007-04-30 13:56:52 marc Exp $ + Copyright (c) 2006-2007, Index Data. + +This file is part of Pazpar2. + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Pazpar2; see the file LICENSE. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. + */ + +#if HAVE_CONFIG_H +#include "cconfig.h" +#endif + +#define USE_TIMING 0 +#if USE_TIMING +#include +#endif + + +#ifdef HAVE_ICU +#include "icu_I18N.h" + +#include + +#include +#include /* some more string fcns*/ + +//#include +//#include /* Basic ICU data types */ +//#include +//#include /* C Converter API */ +//#include /* char names */ +//#include +//#include +/* #include */ + + + +int icu_check_status (UErrorCode status) +{ + if(U_FAILURE(status)) + yaz_log(YLOG_WARN, + "ICU Error: %d %s\n", status, u_errorName(status)); + return status; +} + + +UChar* icu_utf16_from_utf8(UChar *utf16, + int32_t utf16_cap, + int32_t *utf16_len, + const char *utf8) +{ + size_t utf8_len = strlen(utf8); + return icu_utf16_from_utf8n(utf16, utf16_cap, utf16_len, + utf8, utf8_len); +} + + +UChar* icu_utf16_from_utf8n(UChar *utf16, + int32_t utf16_cap, + int32_t *utf16_len, + const char *utf8, + size_t utf8_len) +{ + UErrorCode status = U_ZERO_ERROR; + u_strFromUTF8(utf16, utf16_cap, utf16_len, utf8, (int32_t) utf8_len, + &status); + if (U_ZERO_ERROR != icu_check_status(status)) + return 0; + else + return utf16; +} + + +char* icu_utf16_to_utf8(char *utf8, + size_t utf8_cap, + size_t *utf8_len, + const UChar *utf16, + int32_t utf16_len) +{ + UErrorCode status = U_ZERO_ERROR; + u_strToUTF8(utf8, (int32_t) utf8_cap, (int32_t *)utf8_len, + utf16, utf16_len, &status); + if (U_ZERO_ERROR != icu_check_status(status)) + return 0; + else + return utf8; +} + + +int32_t icu_utf16_casemap(UChar *dest16, int32_t dest16_cap, + const UChar *src16, int32_t src16_len, + const char *locale, char action) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t dest16_len = 0; + + switch(action) { + case 'l': + dest16_len = u_strToLower(dest16, dest16_cap, src16, src16_len, + locale, &status); + break; + case 'u': + dest16_len = u_strToUpper(dest16, dest16_cap, src16, src16_len, + locale, &status); + break; + case 't': + dest16_len = u_strToTitle(dest16, dest16_cap, src16, src16_len, + 0, locale, &status); + break; + case 'f': + dest16_len = u_strFoldCase(dest16, dest16_cap, src16, src16_len, + U_FOLD_CASE_DEFAULT, &status); + break; + + default: + return 0; + break; + } + + if (U_ZERO_ERROR != icu_check_status(status)) + return 0; + else + return dest16_len; +} + + +char * icu_casemap(NMEM nmem, char *buf, size_t buf_cap, + size_t *dest8_len, const char *src8, + const char *locale, char action) +{ + size_t src8_len = strlen(src8); + int32_t buf_len = 0; + char * dest8 = 0; + + if (dest8_len) + *dest8_len = 0; + + if (!buf || !(buf_cap > 0) || !src8_len) + return 0; + + // converting buf to utf16 + buf = (char *)icu_utf16_from_utf8n((UChar *) buf, + (int32_t) buf_cap, &buf_len, + src8, src8_len); + + // case mapping + buf_len = (size_t) icu_utf16_casemap((UChar *)buf, (int32_t) buf_cap, + (const UChar *)buf, (int32_t) buf_len, + locale, action); + + // converting buf to utf8 + buf = icu_utf16_to_utf8(buf, buf_cap, (size_t *) &buf_len, + (const UChar *) buf, (int32_t) buf_len); + + + // copying out to nmem + buf[buf_len] = '\0'; + + if(dest8_len) + *dest8_len = buf_len; + + dest8 = nmem_strdup(nmem, buf); + return dest8; +} + + + + +#endif // HAVE_ICU + + + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/src/icu_I18N.h b/src/icu_I18N.h new file mode 100644 index 0000000..9020719 --- /dev/null +++ b/src/icu_I18N.h @@ -0,0 +1,73 @@ +/* $Id: icu_I18N.h,v 1.1 2007-04-30 13:56:52 marc Exp $ + Copyright (c) 2006-2007, Index Data. + +This file is part of Pazpar2. + +Pazpar2 is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Pazpar2; see the file LICENSE. If not, write to the +Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. + */ + +#ifndef ICU_I18NL_H +#define ICU_I18NL_H + +#ifdef HAVE_ICU + +#include + + +//#include /* Basic ICU data types */ +#include /* char names */ + +//#include +//#include +//#include /* C Converter API */ +//#include /* some more string fcns*/ +//#include +//#include +//#include + + +int icu_check_status (UErrorCode status); + +UChar* icu_utf16_from_utf8(UChar *utf16, + int32_t utf16_cap, + int32_t *utf16_len, + const char *utf8); + +UChar* icu_utf16_from_utf8n(UChar *utf16, + int32_t utf16_cap, + int32_t *utf16_len, + const char *utf8, + size_t utf8_len); + + +char* icu_utf16_to_utf8(char *utf8, + size_t utf8_cap, + size_t *utf8_len, + const UChar *utf16, + int32_t utf16_len); + + +int32_t icu_utf16_casemap(UChar *dest16, int32_t dest16_cap, + const UChar *src16, int32_t src16_len, + const char *locale, char action); + +char * icu_casemap(NMEM nmem, char *buf, size_t buf_cap, + size_t *dest8_len, const char *src8, + const char *locale, char action); + + +#endif // HAVE_ICU +#endif // ICU_I18NL_H diff --git a/src/test_icu_I18N.c b/src/test_icu_I18N.c index 31c9e03..3b56883 100644 --- a/src/test_icu_I18N.c +++ b/src/test_icu_I18N.c @@ -1,4 +1,4 @@ -/* $Id: test_icu_I18N.c,v 1.1 2007-04-27 14:31:15 marc Exp $ +/* $Id: test_icu_I18N.c,v 1.2 2007-04-30 13:56:52 marc Exp $ Copyright (c) 2006-2007, Index Data. This file is part of Pazpar2. @@ -33,16 +33,223 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #ifdef HAVE_ICU +#include "icu_I18N.h" +#include "string.h" + +void test_icu_I18N_casemap_en(int argc, char **argv) +{ + + size_t buf_cap = 128; + char buf[buf_cap]; + size_t dest8_len = 0; + NMEM nmem = nmem_create(); + char * dest8 = 0; + + const char * src8 = "A ReD fOx hunTS sQUirriLs"; + size_t src8_len = strlen(src8); + + printf("original string: '%s' (%d)\n", src8, (int) src8_len); + + //these shall succeed + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "en", 'l'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "en", 'u'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'en:u' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "en", 'f'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'en:f' '%s' (%d)\n", dest8, (int) dest8_len); + + + // some calling error needs investigation + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "en", 't'); + YAZ_CHECK(0 == dest8_len); + printf("icu_casemap 'en:t' '%s' (%d)\n", dest8, (int) dest8_len); + + + // attention: does not fail even if no locale 'xy_zz' defined + // it seems to default to english locale + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "zz_abc", 'l'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'zz:l' '%s' (%d)\n", dest8, (int) dest8_len); + + + // shall fail - no buf buffer defined + dest8 = icu_casemap(nmem, 0, buf_cap, &dest8_len, + src8, "en", 'l'); + YAZ_CHECK(0 == dest8_len); + //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); + + // shall fail - no buf_cap defined + dest8 = icu_casemap(nmem, buf, 0, &dest8_len, + src8, "en", 'l'); + YAZ_CHECK(0 == dest8_len); + //printf("icu_casemap 'en:l' '%s' (%d)\n", dest8, (int) dest8_len); + + // shall fail - no action 'x' defined + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "en", 'x'); + YAZ_CHECK(0 == dest8_len); + //printf("icu_casemap 'en:x' '%s' (%d)\n", dest8, (int) dest8_len); + + + + + + nmem_destroy(nmem); + + YAZ_CHECK(0 == 0); + //YAZ_CHECK_EQ(0, 1); +} + +void test_icu_I18N_casemap_da(int argc, char **argv) +{ + + size_t buf_cap = 128; + char buf[buf_cap]; + size_t dest8_len = 0; + NMEM nmem = nmem_create(); + char * dest8 = 0; + + const char * src8 = "åh ÆbLE, øs fLØde i Åen efter bLåBærGRødeN"; + size_t src8_len = strlen(src8); + + printf("original string: '%s' (%d)\n", src8, (int) src8_len); + + //these shall succeed + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 'l'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:l' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 'u'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:u' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 'f'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:f' '%s' (%d)\n", dest8, (int) dest8_len); + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 't'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:t' '%s' (%d)\n", dest8, (int) dest8_len); -void test_icu_I18N(int argc, char **argv) + nmem_destroy(nmem); + + YAZ_CHECK(0 == 0); + //YAZ_CHECK_EQ(0, 1); +} + +void test_icu_I18N_casemap_de(int argc, char **argv) +{ + + size_t buf_cap = 128; + char buf[buf_cap]; + size_t dest8_len = 0; + NMEM nmem = nmem_create(); + char * dest8 = 0; + + const char * src8 = "zWÖlf ärgerliche Würste rollen ÜBer die StRAße"; + size_t src8_len = strlen(src8); + + printf("original string: '%s' (%d)\n", src8, (int) src8_len); + + //these shall succeed + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 'l'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:l' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 'u'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:u' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 'f'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:f' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "da", 't'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'da:t' '%s' (%d)\n", dest8, (int) dest8_len); + + nmem_destroy(nmem); + + YAZ_CHECK(0 == 0); + //YAZ_CHECK_EQ(0, 1); +} + +void test_icu_I18N_casemap_el(int argc, char **argv) { - YAZ_CHECK(0 == 0); - //YAZ_CHECK_EQ(0, 1); + +#if 0 + + size_t buf_cap = 128; + char buf[buf_cap]; + size_t dest8_len = 0; + NMEM nmem = nmem_create(); + char * dest8 = 0; + + const char * src8 = "" + size_t src8_len = strlen(src8); + + printf("original string: '%s' (%d)\n", src8, (int) src8_len); + + //these shall succeed + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "el", 'l'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'el:l' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "el", 'u'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'el:u' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "el", 'f'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'el:f' '%s' (%d)\n", dest8, (int) dest8_len); + + + dest8 = icu_casemap(nmem, buf, buf_cap, &dest8_len, + src8, "el", 't'); + YAZ_CHECK(dest8_len); + printf("icu_casemap 'el:t' '%s' (%d)\n", dest8, (int) dest8_len); + + nmem_destroy(nmem); + + YAZ_CHECK(0 == 0); + //YAZ_CHECK_EQ(0, 1); +#endif + } + #endif int main(int argc, char **argv) @@ -53,10 +260,15 @@ int main(int argc, char **argv) #ifdef HAVE_ICU - test_icu_I18N(argc, argv); + test_icu_I18N_casemap_en(argc, argv); + test_icu_I18N_casemap_da(argc, argv); + test_icu_I18N_casemap_de(argc, argv); + test_icu_I18N_casemap_el(argc, argv); #else + printf("ICU unit tests omitted.\n" + "Please install libicu36-dev and icu-doc or similar\n".); YAZ_CHECK(0 == 0); #endif -- 1.7.10.4