GNU Libidn

Less

Simon Josefsson Datakonsult
Hagagatan 24
113 47 Stockholm
Sweden

E-mail: simon@josefsson.org

     $ wget -q http://josefsson.org/libidn/releases/libidn-0.6.14.tar.gz
     $ tar xfz libidn-0.6.14.tar.gz
     $ cd libidn-0.6.14/
     $ ./configure
     ...
     $ make
     ...
     $ make install
     ...

     #include <stringprep.h>

       if (!stringprep_check_version (STRINGPREP_VERSION))
         {
           printf ("stringprep_check_version() failed:\n"
                   "Header file incompatible with shared library.\n");
           exit(1);
         }

     gcc -c foo.c `pkg-config libidn --cflags`

     gcc -o foo foo.o `pkg-config libidn --libs`

     gcc -o foo foo.c `pkg-config libidn --cflags --libs`

     AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]],
                                     [Support IDN (needs GNU Libidn)]),
       libidn=$withval, libidn=yes)
     if test "$libidn" != "no"; then
       if test "$libidn" != "yes"; then
         LDFLAGS="${LDFLAGS} -L$libidn/lib"
         CPPFLAGS="${CPPFLAGS} -I$libidn/include"
       fi
       AC_CHECK_HEADER(idna.h,
         AC_CHECK_LIB(idn, stringprep_check_version,
           [libidn=yes LIBS="${LIBS} -lidn"], libidn=no),
         libidn=no)
     fi
     if test "$libidn" != "no" ; then
       AC_DEFINE(LIBIDN, 1, [Define to 1 if you want IDN support.])
     else
       AC_MSG_WARN([Libidn not found])
     fi
     AC_MSG_CHECKING([if Libidn should be used])
     AC_MSG_RESULT($libidn)

     AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]],
                                     [Support IDN (needs GNU Libidn)]),
       libidn=$withval, libidn=yes)
     if test "$libidn" != "no" ; then
       PKG_CHECK_MODULES(LIBIDN, libidn >= 0.0.0, [libidn=yes], [libidn=no])
       if test "$libidn" != "yes" ; then
         libidn=no
         AC_MSG_WARN([Libidn not found])
       else
         libidn=yes
         AC_DEFINE(LIBIDN, 1, [Define to 1 if you want Libidn.])
       fi
     fi
     AC_MSG_CHECKING([if Libidn should be used])
     AC_MSG_RESULT($libidn)

     #include <stringprep.h>

     #include <stringprep.h>

     #include <punycode.h>

     #include <idna.h>

     #include <tld.h>

     #include <pr29.h>

/* example.c --- Example code showing how to use stringprep().
 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * GNU Libidn is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * GNU Libidn is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with GNU Libidn; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>		/* setlocale() */
#include <stringprep.h>

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example example.c `pkg-config --cflags --libs libidn`
 * $ ./example
 * Input string encoded as `ISO-8859-1': �
 * Before locale2utf8 (length 2): aa 0a
 * Before stringprep (length 3): c2 aa 0a
 * After stringprep (length 2): 61 0a
 * $
 *
 */

int
main (int argc, char *argv[])
{
  char buf[BUFSIZ];
  char *p;
  int rc;
  size_t i;

  setlocale (LC_ALL, "");

  printf ("Input string encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  fgets (buf, BUFSIZ, stdin);

  printf ("Before locale2utf8 (length %d): ", strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  p = stringprep_locale_to_utf8 (buf);
  if (p)
    {
      strcpy (buf, p);
      free (p);
    }
  else
    printf ("Could not convert string to UTF-8, continuing anyway...\n");

  printf ("Before stringprep (length %d): ", strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  rc = stringprep (buf, BUFSIZ, 0, stringprep_nameprep);
  if (rc != STRINGPREP_OK)
    printf ("Stringprep failed (%d): %s\n", rc, stringprep_strerror (rc));
  else
    {
      printf ("After stringprep (length %d): ", strlen (buf));
      for (i = 0; i < strlen (buf); i++)
	printf ("%02x ", buf[i] & 0xFF);
      printf ("\n");
    }

  return 0;
}

/* example2.c --- Example code showing how to use punycode.
 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
 * Copyright (C) 2002  Adam M. Costello
 *
 * This file is part of GNU Libidn.
 *
 * GNU Libidn is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * GNU Libidn is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with GNU Libidn; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

#include <locale.h>		/* setlocale() */

/*
 * This file is derived from RFC 3492 written by Adam M. Costello.
 *
 * Disclaimer and license: Regarding this entire document or any
 * portion of it (including the pseudocode and C code), the author
 * makes no guarantees and is not responsible for any damage resulting
 * from its use.  The author grants irrevocable permission to anyone
 * to use, modify, and distribute it in any way that does not diminish
 * the rights of anyone else to use, modify, and distribute it,
 * provided that redistributed derivative works do not contain
 * misleading author or version information.  Derivative works need
 * not be licensed under similar terms.
 *
 */

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <punycode.h>

/* For testing, we'll just set some compile-time limits rather than */
/* use malloc(), and set a compile-time option rather than using a  */
/* command-line option.                                             */

enum
{
  unicode_max_length = 256,
  ace_max_length = 256
};

static void
usage (char **argv)
{
  fprintf (stderr,
	   "\n"
	   "%s -e reads code points and writes a Punycode string.\n"
	   "%s -d reads a Punycode string and writes code points.\n"
	   "\n"
	   "Input and output are plain text in the native character set.\n"
	   "Code points are in the form u+hex separated by whitespace.\n"
	   "Although the specification allows Punycode strings to contain\n"
	   "any characters from the ASCII repertoire, this test code\n"
	   "supports only the printable characters, and needs the Punycode\n"
	   "string to be followed by a newline.\n"
	   "The case of the u in u+hex is the force-to-uppercase flag.\n",
	   argv[0], argv[0]);
  exit (EXIT_FAILURE);
}

static void
fail (const char *msg)
{
  fputs (msg, stderr);
  exit (EXIT_FAILURE);
}

static const char too_big[] =
  "input or output is too large, recompile with larger limits\n";
static const char invalid_input[] = "invalid input\n";
static const char overflow[] = "arithmetic overflow\n";
static const char io_error[] = "I/O error\n";

/* The following string is used to convert printable */
/* characters between ASCII and the native charset:  */

static const char print_ascii[] = "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" " !\"#$%&'()*+,-./" "0123456789:;<=>?" "\0x40"	/* at sign */
  "ABCDEFGHIJKLMNO"
  "PQRSTUVWXYZ[\\]^_" "`abcdefghijklmno" "pqrstuvwxyz{|}~\n";

int
main (int argc, char **argv)
{
  enum punycode_status status;
  int r;
  size_t input_length, output_length, j;
  unsigned char case_flags[unicode_max_length];

  setlocale (LC_ALL, "");

  if (argc != 2)
    usage (argv);
  if (argv[1][0] != '-')
    usage (argv);
  if (argv[1][2] != 0)
    usage (argv);

  if (argv[1][1] == 'e')
    {
      uint32_t input[unicode_max_length];
      unsigned long codept;
      char output[ace_max_length + 1], uplus[3];
      int c;

      /* Read the input code points: */

      input_length = 0;

      for (;;)
	{
	  r = scanf ("%2s%lx", uplus, &codept);
	  if (ferror (stdin))
	    fail (io_error);
	  if (r == EOF || r == 0)
	    break;

	  if (r != 2 || uplus[1] != '+' || codept > (uint32_t) - 1)
	    {
	      fail (invalid_input);
	    }

	  if (input_length == unicode_max_length)
	    fail (too_big);

	  if (uplus[0] == 'u')
	    case_flags[input_length] = 0;
	  else if (uplus[0] == 'U')
	    case_flags[input_length] = 1;
	  else
	    fail (invalid_input);

	  input[input_length++] = codept;
	}

      /* Encode: */

      output_length = ace_max_length;
      status = punycode_encode (input_length, input, case_flags,
				&output_length, output);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Convert to native charset and output: */

      for (j = 0; j < output_length; ++j)
	{
	  c = output[j];
	  assert (c >= 0 && c <= 127);
	  if (print_ascii[c] == 0)
	    fail (invalid_input);
	  output[j] = print_ascii[c];
	}

      output[j] = 0;
      r = puts (output);
      if (r == EOF)
	fail (io_error);
      return EXIT_SUCCESS;
    }

  if (argv[1][1] == 'd')
    {
      char input[ace_max_length + 2], *p, *pp;
      uint32_t output[unicode_max_length];

      /* Read the Punycode input string and convert to ASCII: */

      fgets (input, ace_max_length + 2, stdin);
      if (ferror (stdin))
	fail (io_error);
      if (feof (stdin))
	fail (invalid_input);
      input_length = strlen (input) - 1;
      if (input[input_length] != '\n')
	fail (too_big);
      input[input_length] = 0;

      for (p = input; *p != 0; ++p)
	{
	  pp = strchr (print_ascii, *p);
	  if (pp == 0)
	    fail (invalid_input);
	  *p = pp - print_ascii;
	}

      /* Decode: */

      output_length = unicode_max_length;
      status = punycode_decode (input_length, input, &output_length,
				output, case_flags);
      if (status == punycode_bad_input)
	fail (invalid_input);
      if (status == punycode_big_output)
	fail (too_big);
      if (status == punycode_overflow)
	fail (overflow);
      assert (status == punycode_success);

      /* Output the result: */

      for (j = 0; j < output_length; ++j)
	{
	  r = printf ("%s+%04lX\n",
		      case_flags[j] ? "U" : "u", (unsigned long) output[j]);
	  if (r < 0)
	    fail (io_error);
	}

      return EXIT_SUCCESS;
    }

  usage (argv);
  return EXIT_SUCCESS;		/* not reached, but quiets compiler warning */
}

/* example3.c --- Example ToASCII() code showing how to use Libidn.
 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * GNU Libidn is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * GNU Libidn is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with GNU Libidn; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>		/* setlocale() */
#include <stringprep.h>		/* stringprep_locale_charset() */
#include <idna.h>		/* idna_to_ascii_lz() */

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example3 example3.c `pkg-config --cflags --libs libidn`
 * $ ./example3
 * Input domain encoded as `ISO-8859-1': www.r�ksm�rg�s�.example
 * Read string (length 23): 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 aa 2e 65 78 61 6d 70 6c 65
 * ACE label (length 33): 'www.xn--rksmrgsa-0zap8p.example'
 * 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65
 * $
 *
 */

int
main (int argc, char *argv[])
{
  char buf[BUFSIZ];
  char *p;
  int rc;
  size_t i;

  setlocale (LC_ALL, "");

  printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  fgets (buf, BUFSIZ, stdin);
  buf[strlen (buf) - 1] = '\0';

  printf ("Read string (length %d): ", strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  rc = idna_to_ascii_lz (buf, &p, 0);
  if (rc != IDNA_SUCCESS)
    {
      printf ("ToASCII() failed (%d): %s\n", rc, idna_strerror (rc));
      exit (1);
    }

  printf ("ACE label (length %d): '%s'\n", strlen (p), p);
  for (i = 0; i < strlen (p); i++)
    printf ("%02x ", p[i] & 0xFF);
  printf ("\n");

  free (p);

  return 0;
}

/* example4.c --- Example ToUnicode() code showing how to use Libidn.
 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * GNU Libidn is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * GNU Libidn is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with GNU Libidn; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>		/* setlocale() */
#include <stringprep.h>		/* stringprep_locale_charset() */
#include <idna.h>		/* idna_to_unicode_lzlz() */

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example4 example4.c `pkg-config --cflags --libs libidn`
 * $ ./example4
 * Input domain encoded as `ISO-8859-1': www.xn--rksmrgsa-0zap8p.example
 * Read string (length 33): 77 77 77 2e 78 6e 2d 2d 72 6b 73 6d 72 67 73 61 2d 30 7a 61 70 38 70 2e 65 78 61 6d 70 6c 65
 * ACE label (length 23): 'www.r�ksm�rg�sa.example'
 * 77 77 77 2e 72 e4 6b 73 6d f6 72 67 e5 73 61 2e 65 78 61 6d 70 6c 65
 * $
 *
 */

int
main (int argc, char *argv[])
{
  char buf[BUFSIZ];
  char *p;
  int rc;
  size_t i;

  setlocale (LC_ALL, "");

  printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  fgets (buf, BUFSIZ, stdin);
  buf[strlen (buf) - 1] = '\0';

  printf ("Read string (length %d): ", strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  rc = idna_to_unicode_lzlz (buf, &p, 0);
  if (rc != IDNA_SUCCESS)
    {
      printf ("ToUnicode() failed (%d): %s\n", rc, idna_strerror (rc));
      exit (1);
    }

  printf ("ACE label (length %d): '%s'\n", strlen (p), p);
  for (i = 0; i < strlen (p); i++)
    printf ("%02x ", p[i] & 0xFF);
  printf ("\n");

  free (p);

  return 0;
}

/* example5.c --- Example TLD checking.
 * Copyright (C) 2004, 2005, 2006, 2007  Simon Josefsson
 *
 * This file is part of GNU Libidn.
 *
 * GNU Libidn is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * GNU Libidn is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with GNU Libidn; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* Get stringprep_locale_charset, etc. */
#include <stringprep.h>

/* Get idna_to_ascii_8z, etc. */
#include <idna.h>

/* Get tld_check_4z. */
#include <tld.h>

/*
 * Compiling using libtool and pkg-config is recommended:
 *
 * $ libtool cc -o example5 example5.c `pkg-config --cflags --libs libidn`
 * $ ./example5
 * Input domain encoded as `UTF-8': fooß.no
 * Read string (length 8): 66 6f 6f c3 9f 2e 6e 6f
 * ToASCII string (length 8): fooss.no
 * ToUnicode string: U+0066 U+006f U+006f U+0073 U+0073 U+002e U+006e U+006f
 * Domain accepted by TLD check
 *
 * $ ./example5
 * Input domain encoded as `UTF-8': gr€€n.no
 * Read string (length 12): 67 72 e2 82 ac e2 82 ac 6e 2e 6e 6f
 * ToASCII string (length 16): xn--grn-l50aa.no
 * ToUnicode string: U+0067 U+0072 U+20ac U+20ac U+006e U+002e U+006e U+006f
 * Domain rejected by TLD check, Unicode position 2
 *
 */

int
main (int argc, char *argv[])
{
  char buf[BUFSIZ];
  char *p;
  uint32_t *r;
  int rc;
  size_t errpos, i;

  printf ("Input domain encoded as `%s': ", stringprep_locale_charset ());
  fflush (stdout);
  fgets (buf, BUFSIZ, stdin);
  buf[strlen (buf) - 1] = '\0';

  printf ("Read string (length %d): ", strlen (buf));
  for (i = 0; i < strlen (buf); i++)
    printf ("%02x ", buf[i] & 0xFF);
  printf ("\n");

  p = stringprep_locale_to_utf8 (buf);
  if (p)
    {
      strcpy (buf, p);
      free (p);
    }
  else
    printf ("Could not convert string to UTF-8, continuing anyway...\n");

  rc = idna_to_ascii_8z (buf, &p, 0);
  if (rc != IDNA_SUCCESS)
    {
      printf ("idna_to_ascii_8z failed (%d): %s\n", rc, idna_strerror (rc));
      return 2;
    }

  printf ("ToASCII string (length %d): %s\n", strlen (p), p);

  rc = idna_to_unicode_8z4z (p, &r, 0);
  free (p);
  if (rc != IDNA_SUCCESS)
    {
      printf ("idna_to_unicode_8z4z failed (%d): %s\n",
	      rc, idna_strerror (rc));
      return 2;
    }

  printf ("ToUnicode string: ");
  for (i = 0; r[i]; i++)
    printf ("U+%04x ", r[i]);
  printf ("\n");

  rc = tld_check_4z (r, &errpos, NULL);
  free (r);
  if (rc == TLD_INVALID)
    {
      printf ("Domain rejected by TLD check, Unicode position %d\n", errpos);
      return 1;
    }
  else if (rc != TLD_SUCCESS)
    {
      printf ("tld_check_4z() failed (%d): %s\n", rc, tld_strerror (rc));
      return 2;
    }

  printf ("Domain accepted by TLD check\n");

  return 0;
}

  -h, --help               Print help and exit

  -V, --version            Print version and exit

  -s, --stringprep         Prepare string according to nameprep profile

  -d, --punycode-decode    Decode Punycode

  -e, --punycode-encode    Encode Punycode

  -a, --idna-to-ascii      Convert to ACE according to IDNA (default)

  -u, --idna-to-unicode    Convert from ACE according to IDNA

      --allow-unassigned   Toggle IDNA AllowUnassigned flag  (default=off)

      --usestd3asciirules  Toggle IDNA UseSTD3ASCIIRules flag  (default=off)

  -t, --tld                Check string for TLD specific rules
                             Only for --idna-to-ascii and --idna-to-unicode
                             (default=on)

  -p, --profile=STRING     Use specified stringprep profile instead
                             Valid stringprep profiles are `Nameprep',
                             `iSCSI', `Nodeprep', `Resourceprep', `trace', and
                             `SASLprep'.

      --debug              Print debugging information  (default=off)

      --quiet              Silent operation  (default=off)

     $ CHARSET=ISO-8859-1 idn --punycode-encode
     ...

     jas@latte:~$ idn
     libidn 0.3.5
     Copyright 2002, 2003 Simon Josefsson.
     GNU Libidn comes with NO WARRANTY, to the extent permitted by law.
     You may redistribute copies of GNU Libidn under the terms of
     the GNU Lesser General Public License.  For more information
     about these matters, see the file named COPYING.LIB.
     Type each input string on a line by itself, terminated by a newline character.
     räksmörgås.se
     xn--rksmrgs-5wao1o.se
     jas@latte:~$

     jas@latte:~$ idn --quiet räksmörgås.se blåbærgrød.no
     xn--rksmrgs-5wao1o.se
     xn--blbrgrd-fxak7p.no
     jas@latte:~$

     jas@latte:~$ idn --quiet --profile=SASLprep --stringprep teßtª
     teßta
     jas@latte:~$

     jas@latte:~$ idn --debug --quiet ""
     system locale uses charset `UTF-8'.

     jas@latte:~$

     jas@latte:~$ idn --quiet foo
     idn: could not convert from ISO-8859-1 to UTF-8.
     jas@latte:~$

     jas@latte:~$ idn --quiet --debug ""
     system locale uses charset `ISO-8859-1'.

     jas@latte:~$ CHARSET=UTF-8 idn --quiet --debug räksmörgås
     system locale uses charset `UTF-8'.
     input[0] = U+0072
     input[1] = U+4af3
     input[2] = U+006d
     input[3] = U+1b29e5
     input[4] = U+0073
     output[0] = U+0078
     output[1] = U+006e
     output[2] = U+002d
     output[3] = U+002d
     output[4] = U+0072
     output[5] = U+006d
     output[6] = U+0073
     output[7] = U+002d
     output[8] = U+0068
     output[9] = U+0069
     output[10] = U+0036
     output[11] = U+0064
     output[12] = U+0035
     output[13] = U+0039
     output[14] = U+0037
     output[15] = U+0035
     output[16] = U+0035
     output[17] = U+0032
     output[18] = U+0061
     xn--rms-hi6d597552a
     jas@latte:~$

     gnu.inet.encoding.IDNA.toASCII("blöds.züg");
     gnu.inet.encoding.IDNA.toUnicode("xn--blds-6qa.xn--zg-xka");

     $ java GenerateRFC3454
     Creating RFC3454.java... Ok.

     $ java GenerateNFKC
     Creating CombiningClass.java... Ok.
     Creating DecompositionKeys.java... Ok.
     Creating DecompositionMappings.java... Ok.
     Creating Composition.java... Ok.

     $ java -cp .:../libidn.jar TestIDNA -a <string to test>
     Input: <string to test>
     Output: <toASCII(string to test)>
     $ java -cp .:../libidn.jar TestIDNA -u <string to test>
     Input: <string to test>
     Output: <toUnicode(string to test)>

     $ java -cp .:../libidn.jar TestIDNA -t
     No errors detected!

     $ java -cp .:../libidn.jar TestNFKC <string to test>
     Input: <string to test>
     Output: <nfkc version of the string to test>

     $ java -cp .:../libidn.jar TestNFKC
     No errors detected!

From: Rick McGowan <rick@unicode.org>
Subject: Possible bug and status of PR 29 change(s)
To: bug-libidn@gnu.org
Date: Wed, 27 Oct 2004 14:49:17 -0700

Hello. On behalf of the Unicode Consortium editorial committee, I would  
like to find out more information about the PR 29 fixes, if any, and  
functions in Libidn. Your implementation was listed in the text of PR29 as  
needing investigation, so I am following up on several implementations.

The UTC has accepted the proposed fix to D2 as outlined in PR29, and a new  
draft of UAX #15 has been issued.

I have looked at Libidn 0.5.8 (today), and there may still be a possible  
bug in NFKC.java and nfkc.c.

------------------------------------------------------

1. In NFKC.java, this line in canonicalOrdering():

      if (i > 0 && (last_cc == 0 || last_cc != cc)) {

should perhaps be changed to:

      if (i > 0 && (last_cc == 0 || last_cc < cc)) {

but I'm not sure of the sense of this comparison.

------------------------------------------------------

2. In nfkc.c, function _g_utf8_normalize_wc() has this code:

	  if (i > 0 &&
	      (last_cc == 0 || last_cc != cc) &&
	      combine (wc_buffer[last_start], wc_buffer[i],
		       &wc_buffer[last_start]))
	    {

This appears to have the same bug as the current Python implementation (in  
Python 2.3.4). The code should be checking, as per new rule D2 UAX #15  
update, that the next combining character is the same or HIGHER than the  
current one. It now checks to see if it's non-zero and not equal.

The above line(s) should perhaps be changed to:

	  if (i > 0 &&
	      (last_cc == 0 || last_cc < cc) &&
	      combine (wc_buffer[last_start], wc_buffer[i],
		       &wc_buffer[last_start]))
	    {

but I'm not sure of the sense of the comparison (< or > or <=?) here.

In the text of PR29, I will be marking Libidn as "needs change" and adding  
the version number that I checked. If any further change is made, please  
let me know the release version, and I'll update again.

Regards,
	Rick McGowan

From: Simon Josefsson <jas@extundo.com>
Subject: Re: Possible bug and status of PR 29 change(s)
To: Rick McGowan <rick@unicode.org>
Cc: bug-libidn@gnu.org
Date: Thu, 28 Oct 2004 09:47:47 +0200

Rick McGowan <rick@unicode.org> writes:

> Hello. On behalf of the Unicode Consortium editorial committee, I would  
> like to find out more information about the PR 29 fixes, if any, and  
> functions in Libidn. Your implementation was listed in the text of PR29 as  
> needing investigation, so I am following up on several implementations.
>
> The UTC has accepted the proposed fix to D2 as outlined in PR29, and a new  
> draft of UAX #15 has been issued.
>
> I have looked at Libidn 0.5.8 (today), and there may still be a possible  
> bug in NFKC.java and nfkc.c.

Hello Rick.

I believe the current behavior is intentional.  Libidn do not aim to
implement latest-and-greatest NFKC, it aim to implement the NFKC
functionality required for StringPrep and IDN.  As you may know,
StringPrep/IDN reference Unicode 3.2.0, and explicitly says any later
changes (which I consider PR29 as) do not apply.

In fact, I believe that would I incorporate the changes suggested in
PR29, I would in fact be violating the IDN specifications.

Thanks for looking into the code and finding the place where the
change could be made.  I'll see if I can mention this in the manual
somewhere, for technically interested readers.

Regards,
Simon

     Copyright © 2000,2001,2002 Free Software Foundation, Inc.
     51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA

     Everyone is permitted to copy and distribute verbatim copies
     of this license document, but changing it is not allowed.

       Copyright (C)  year  your name.
       Permission is granted to copy, distribute and/or modify this document
       under the terms of the GNU Free Documentation License, Version 1.2
       or any later version published by the Free Software Foundation;
       with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
       Texts.  A copy of the license is included in the section entitled ``GNU
       Free Documentation License''.

         with the Invariant Sections being list their titles, with
         the Front-Cover Texts being list, and with the Back-Cover Texts
         being list.

     Copyright © 1991, 1999 Free Software Foundation, Inc.
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA

     Everyone is permitted to copy and distribute verbatim copies
     of this license document, but changing it is not allowed.

     [This is the first released version of the Lesser GPL.  It also counts
     as the successor of the GNU Library Public License, version 2, hence the
     version number 2.1.]

     one line to give the library's name and an idea of what it does.
     Copyright (C) year  name of author

     This library is free software; you can redistribute it and/or modify it
     under the terms of the GNU Lesser General Public License as published by
     the Free Software Foundation; either version 2.1 of the License, or (at
     your option) any later version.

     This library is distributed in the hope that it will be useful, but
     WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.

     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
     USA.

     Yoyodyne, Inc., hereby disclaims all copyright interest in the library
     `Frob' (a library for tweaking knobs) written by James Random Hacker.

     signature of Ty Coon, 1 April 1990
     Ty Coon, President of Vice

     Copyright © 1989, 1991 Free Software Foundation, Inc.
     51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA

     Everyone is permitted to copy and distribute verbatim copies
     of this license document, but changing it is not allowed.

     one line to give the program's name and a brief idea of what it does.
     Copyright (C) yyyy  name of author

     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     the Free Software Foundation; either version 2 of the License, or
     (at your option) any later version.

     This program is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.

     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

     Gnomovision version 69, Copyright (C) year name of author
     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
     This is free software, and you are welcome to redistribute it
     under certain conditions; type `show c' for details.

     Yoyodyne, Inc., hereby disclaims all copyright interest in the program
     `Gnomovision' (which makes passes at compilers) written by James Hacker.

     signature of Ty Coon, 1 April 1989
     Ty Coon, President of Vice

GNU Libidn

Table of Contents

GNU Libidn

1 Introduction

1.1 Getting Started

1.2 Features

1.3 Library Overview

1.4 Supported Platforms

1.5 Getting help

1.6 Commercial Support

1.7 Downloading and Installing

1.8 Bug Reports

1.9 Contributing

2 Preparation

2.1 Header

2.2 Initialization

2.3 Version Check

stringprep_check_version

2.4 Building the source

2.5 Autoconf tests

3 Utility Functions

3.1 Header file stringprep.h

3.2 Unicode Encoding Transformation

stringprep_unichar_to_utf8

stringprep_utf8_to_unichar

stringprep_ucs4_to_utf8

stringprep_utf8_to_ucs4

3.3 Unicode Normalization

stringprep_ucs4_nfkc_normalize

stringprep_utf8_nfkc_normalize

3.4 Character Set Conversion

stringprep_locale_charset

stringprep_convert

stringprep_locale_to_utf8

stringprep_utf8_to_locale

4 Stringprep Functions

4.1 Header file stringprep.h

4.2 Defining A Stringprep Profile

4.3 Control Flags

4.4 Core Functions

stringprep_4i

stringprep_4zi

stringprep

stringprep_profile

4.5 Error Handling

stringprep_strerror

4.6 Stringprep Profile Macros

5 Punycode Functions

5.1 Header file punycode.h

5.2 Unicode Code Point Data Type

5.3 Core Functions

punycode_encode

punycode_decode

5.4 Error Handling

punycode_strerror

6 IDNA Functions

6.1 Header file idna.h

6.2 Control Flags

6.3 Prefix String

6.4 Core Functions

idna_to_ascii_4i

idna_to_unicode_44i

6.5 Simplified ToASCII Interface

idna_to_ascii_4z

idna_to_ascii_8z

idna_to_ascii_lz

6.6 Simplified ToUnicode Interface

idna_to_unicode_4z4z

idna_to_unicode_8z4z

idna_to_unicode_8z8z

idna_to_unicode_8zlz

idna_to_unicode_lzlz

6.7 Error Handling

idna_strerror

7 TLD Functions

7.1 Header file tld.h

7.2 Core Functions

tld_check_4t

tld_check_4tz

7.3 Utility Functions

3.1 Header file `stringprep.h`

4.1 Header file `stringprep.h`

5.1 Header file `punycode.h`

6.1 Header file `idna.h`

7.1 Header file `tld.h`

8.1 Header file `pr29.h`