+ r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ yaz_iconv_close(cd);
+
+ if (r == (size_t)(-1))
+ return 0;
+
+ cd = yaz_iconv_open("UCS4LE", "UTF-8");
+ if (!cd)
+ return 0;
+ inbytesleft = sizeof(utf8buf) - outbytesleft;
+ inbuf = utf8buf;
+
+ outbuf = dst;
+ outbytesleft = 4;
+
+ r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ if (r == (size_t)(-1))
+ return 0;
+
+ yaz_iconv_close(cd);
+
+ if (memcmp(src, dst, 4))
+ return 0;
+ }
+ return 1;
+}
+
+static void tst_marc8_to_utf8(void)
+{
+ yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours de math",
+ "Cours de math"));
+ /* COMBINING ACUTE ACCENT */
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâe",
+ "Cours de mathe\xcc\x81"));
+
+
+ YAZ_CHECK(tst_convert(cd, "a\xea\x1e", "a\x1e\xcc\x8a"));
+
+ YAZ_CHECK(tst_convert(cd, "a\xea", "a"));
+ yaz_iconv_close(cd);
+}
+
+static void tst_marc8s_to_utf8(void)
+{
+ yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC8s");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours de math",
+ "Cours de math"));
+ /* E9: LATIN SMALL LETTER E WITH ACUTE */
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâe",
+ "Cours de math\xc3\xa9"));
+
+ yaz_iconv_close(cd);
+}
+
+
+static void tst_marc8_to_latin1(void)
+{
+ yaz_iconv_t cd = yaz_iconv_open("ISO-8859-1", "MARC8");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "ax", "ax"));
+
+ /* latin capital letter o with stroke */
+ YAZ_CHECK(tst_convert(cd, "\xa2", "\xd8"));
+
+ /* with latin small letter ae */
+ YAZ_CHECK(tst_convert(cd, "eneb\xb5r", "eneb\346r"));
+
+ YAZ_CHECK(tst_convert(cd, "\xea" "a\xa2", "\xe5" "\xd8"));
+
+ YAZ_CHECK(tst_convert(cd, "\xea" "a\xa2" "b", "\xe5" "\xd8" "b"));
+
+ YAZ_CHECK(tst_convert(cd, "\xea" "a" "\xea" "a", "\xe5" "\xe5"));
+
+ YAZ_CHECK(tst_convert(cd, "Cours de math",
+ "Cours de math"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâe",
+ "Cours de mathé"));
+ YAZ_CHECK(tst_convert(cd, "12345678âe",
+ "12345678é"));
+ YAZ_CHECK(tst_convert(cd, "123456789âe",
+ "123456789é"));
+ YAZ_CHECK(tst_convert(cd, "1234567890âe",
+ "1234567890é"));
+ YAZ_CHECK(tst_convert(cd, "12345678901âe",
+ "12345678901é"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâem",
+ "Cours de mathém"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathâematiques",
+ "Cours de mathématiques"));
+
+ yaz_iconv_close(cd);
+}
+
+static void tst_utf8_to_marc8(void)
+{
+ yaz_iconv_t cd = yaz_iconv_open("MARC8", "UTF-8");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+
+ /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */
+ YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat"));
+
+ /** Pure ASCII. 12 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"));
+
+ /** Pure ASCII. 13 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math."));
+
+ /** UPPERCASE SCANDINAVIAN O */
+ YAZ_CHECK(tst_convert(cd, "S\xc3\x98", "S\xa2"));
+
+ /** ARING */
+ YAZ_CHECK(tst_convert(cd, "A" "\xCC\x8A", "\xEA" "A"));
+
+ /** A MACRON + UMLAUT, DIAERESIS */
+ YAZ_CHECK(tst_convert(cd, "A" "\xCC\x84" "\xCC\x88",
+ "\xE5\xE8\x41"));
+
+ /* Ligature spanning two characters */
+ YAZ_CHECK(tst_convert(cd,
+ "\x74" "\xCD\xA1" "\x73", /* UTF-8 */
+ "\xEB\x74\xEC\x73")); /* MARC-8 */
+
+ /* Double title spanning two characters */
+ YAZ_CHECK(tst_convert(cd,
+ "\x74" "\xCD\xA0" "\x73", /* UTF-8 */
+ "\xFA\x74\xFB\x73")); /* MARC-8 */
+
+ /** Ideographic question mark (Unicode FF1F) */
+ YAZ_CHECK(tst_convert(cd,
+ "\xEF\xBC\x9F" "o", /* UTF-8 */
+ "\033$1" "\x21\x2B\x3B" "\033(B" "o" ));
+
+
+ /** Superscript 0 . bug #642 */
+ YAZ_CHECK(tst_convert(cd,
+ "(\xe2\x81\xb0)", /* UTF-8 */
+ "(\033p0\x1bs)"));
+
+
+ {
+ char *inbuf0 = "\xe2\x81\xb0";
+ char *inbuf = inbuf0;
+ size_t inbytesleft = strlen(inbuf);
+ char outbuf0[64];
+ char *outbuf = outbuf0;
+ size_t outbytesleft = sizeof(outbuf0)-1;
+ size_t r;
+#if 0
+ int i;
+#endif
+ r = yaz_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
+ YAZ_CHECK(r != (size_t) (-1));
+
+#if 0
+ *outbuf = '\0'; /* so we know when to stop printing */
+ for (i = 0; outbuf0[i]; i++)
+ {
+ int ch = outbuf0[i] & 0xff;
+ yaz_log(YLOG_LOG, "ch%d %02X %c", i, ch, ch >= ' ' ? ch : '?');
+ }
+#endif
+
+ r = yaz_iconv(cd, 0, 0, &outbuf, &outbytesleft);
+ YAZ_CHECK(r != (size_t) (-1));
+ *outbuf = '\0'; /* for strcmp test below and printing */
+#if 0
+ for (i = 0; outbuf0[i]; i++)
+ {
+ int ch = outbuf0[i] & 0xff;
+ yaz_log(YLOG_LOG, "ch%d %02X %c", i, ch, ch >= ' ' ? ch : '?');
+ }
+#endif
+ YAZ_CHECK(strcmp("\033p0\x1bs", outbuf0) == 0);
+ }
+ yaz_iconv_close(cd);
+}
+
+static void tst_advance_to_utf8(void)
+{
+ yaz_iconv_t cd = yaz_iconv_open("utf-8", "advancegreek");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+ yaz_iconv_close(cd);
+}
+
+static void tst_utf8_to_advance(void)
+{
+ yaz_iconv_t cd = yaz_iconv_open("advancegreek", "utf-8");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+ yaz_iconv_close(cd);
+}
+
+static void tst_latin1_to_marc8(void)
+{
+ yaz_iconv_t cd = yaz_iconv_open("MARC8", "ISO-8859-1");
+
+ YAZ_CHECK(cd);
+ if (!cd)
+ return;
+
+ YAZ_CHECK(tst_convert(cd, "Cours ", "Cours "));
+
+ /** Pure ASCII. 11 characters (sizeof(outbuf)-1) */
+ YAZ_CHECK(tst_convert(cd, "Cours de mat", "Cours de mat"));
+
+ /** Pure ASCII. 12 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"));
+
+ /** Pure ASCII. 13 characters (sizeof(outbuf)) */
+ YAZ_CHECK(tst_convert(cd, "Cours de math.", "Cours de math."));
+
+ /** D8: UPPERCASE SCANDINAVIAN O */
+ YAZ_CHECK(tst_convert(cd, "S\xd8", "S\xa2"));
+
+ /** E9: LATIN SMALL LETTER E WITH ACUTE */
+ YAZ_CHECK(tst_convert(cd, "Cours de math\xe9", "Cours de mathâe"));
+ YAZ_CHECK(tst_convert(cd, "Cours de math", "Cours de math"
+ ));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathé", "Cours de mathâe" ));
+ YAZ_CHECK(tst_convert(cd, "12345678é","12345678âe"));
+ YAZ_CHECK(tst_convert(cd, "123456789é", "123456789âe"));
+ YAZ_CHECK(tst_convert(cd, "1234567890é","1234567890âe"));
+ YAZ_CHECK(tst_convert(cd, "12345678901é", "12345678901âe"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathém", "Cours de mathâem"));
+ YAZ_CHECK(tst_convert(cd, "Cours de mathématiques",
+ "Cours de mathâematiques"));
+ yaz_iconv_close(cd);
+}
+
+static void tst_utf8_codes(void)
+{
+ YAZ_CHECK(utf8_check(3));
+ YAZ_CHECK(utf8_check(127));
+ YAZ_CHECK(utf8_check(128));
+ YAZ_CHECK(utf8_check(255));
+ YAZ_CHECK(utf8_check(256));
+ YAZ_CHECK(utf8_check(900));
+ YAZ_CHECK(utf8_check(1000));
+ YAZ_CHECK(utf8_check(10000));
+ YAZ_CHECK(utf8_check(100000));
+ YAZ_CHECK(utf8_check(1000000));
+ YAZ_CHECK(utf8_check(10000000));
+ YAZ_CHECK(utf8_check(100000000));
+}
+