help-libidn
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: AW: treatment of U+002E that is produced by NFKC


From: Simon Josefsson
Subject: Re: AW: treatment of U+002E that is produced by NFKC
Date: Mon, 14 Jan 2008 11:10:44 +0100
User-agent: Gnus/5.110007 (No Gnus v0.7) Emacs/23.0.50 (gnu/linux)

"Alexander Gnauck" <address@hidden> writes:

>> Sure, that is one way to deal with this. Libidn users may not be
>> clamoring for a resolution. Other implementations may be in more of a
>> rush to resolve the conflict. (I work for Google.)
>
> What about adding a define to deal with this, and make a note in the
> documentation about this "issue".

Yes, we should definitely document the problem in the manual.  Erik, do
you know of any good links that discuss this issue?

Fortunately, all the idna_* APIs in libidn takes a 'flags' parameter.
It would be possibly to add a new flag IDNA_TREAT_U2024_AS_DOT and have
the code treat U+2024 as a dot character as per RFC 3490 section 3.1 if
the flag is given.  I've confirmed that this makes libidn produce the
same output as MSIE/Firefox output.  See initial skeleton patch below.

/Simon

diff --git a/lib/idna.c b/lib/idna.c
index b815a3f..09ef929 100644
--- a/lib/idna.c
+++ b/lib/idna.c
@@ -1,5 +1,5 @@
 /* idna.c --- Convert to or from IDN strings.
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008  Simon Josefsson
  *
  * This file is part of GNU Libidn.
  *
@@ -30,8 +30,9 @@
 
 #include "idna.h"
 
-#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||     \
-                (c) == 0xFF0E || (c) == 0xFF61)
+#define DOTP(c, flags) ((c) == 0x002E || (c) == 0x3002 ||      \
+                       (c) == 0xFF0E || (c) == 0xFF61 ||               \
+                       ((flags & IDNA_TREAT_U2024_AS_DOT) && (c) == 0x2024))
 
 /* Core functions */
 
@@ -475,7 +476,7 @@ idna_to_ascii_4z (const uint32_t * input, char **output, 
int flags)
       return IDNA_SUCCESS;
     }
 
-  if (DOTP (input[0]) && input[1] == 0)
+  if (DOTP (input[0], flags) && input[1] == 0)
     {
       /* Handle explicit zero-length root label. */
       *output = malloc (2);
@@ -490,7 +491,7 @@ idna_to_ascii_4z (const uint32_t * input, char **output, 
int flags)
     {
       end = start;
 
-      for (; *end && !DOTP (*end); end++)
+      for (; *end && !DOTP (*end, flags); end++)
        ;
 
       if (*end == '\0' && start == end)
@@ -628,7 +629,7 @@ idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** 
output, int flags)
     {
       end = start;
 
-      for (; *end && !DOTP (*end); end++)
+      for (; *end && !DOTP (*end, flags); end++)
        ;
 
       buflen = end - start;
diff --git a/lib/idna.h b/lib/idna.h
index f6b24ac..e968d33 100644
--- a/lib/idna.h
+++ b/lib/idna.h
@@ -1,5 +1,5 @@
 /* idna.h --- Declarations for Internationalized Domain Name in Applications.
- * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007  Simon Josefsson
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008  Simon Josefsson
  *
  * This file is part of GNU Libidn.
  *
@@ -54,7 +54,8 @@ extern "C"
   typedef enum
   {
     IDNA_ALLOW_UNASSIGNED = 0x0001,
-    IDNA_USE_STD3_ASCII_RULES = 0x0002
+    IDNA_USE_STD3_ASCII_RULES = 0x0002,
+    IDNA_TREAT_U2024_AS_DOT = 0x0004
   } Idna_flags;
 
 # ifndef IDNA_ACE_PREFIX
diff --git a/src/idn.c b/src/idn.c
index abb545e..cf4009b 100644
--- a/src/idn.c
+++ b/src/idn.c
@@ -370,8 +370,10 @@ main (int argc, char *argv[])
                                 (args_info.allow_unassigned_given ?
                                  IDNA_ALLOW_UNASSIGNED : 0) |
                                 (args_info.usestd3asciirules_given ?
-                                 IDNA_USE_STD3_ASCII_RULES : 0));
-         free (q);
+                                 IDNA_USE_STD3_ASCII_RULES : 0) |
+                                (args_info.treatu2024asdot_given ?
+                                 IDNA_TREAT_U2024_AS_DOT : 0));
+      free (q);
          if (rc != IDNA_SUCCESS)
            error (EXIT_FAILURE, 0, _("idna_to_ascii_4z: %s"),
                   idna_strerror (rc));
@@ -385,7 +387,9 @@ main (int argc, char *argv[])
                                         (args_info.allow_unassigned_given ?
                                          IDNA_ALLOW_UNASSIGNED : 0) |
                                         (args_info.usestd3asciirules_given ?
-                                         IDNA_USE_STD3_ASCII_RULES : 0));
+                                         IDNA_USE_STD3_ASCII_RULES : 0) |
+                                        (args_info.treatu2024asdot_given ?
+                                         IDNA_TREAT_U2024_AS_DOT : 0));
              if (rc != IDNA_SUCCESS)
                error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z (TLD): %s"),
                       idna_strerror (rc));
@@ -450,7 +454,9 @@ main (int argc, char *argv[])
                                     (args_info.allow_unassigned_given ?
                                      IDNA_ALLOW_UNASSIGNED : 0) |
                                     (args_info.usestd3asciirules_given ?
-                                     IDNA_USE_STD3_ASCII_RULES : 0));
+                                     IDNA_USE_STD3_ASCII_RULES : 0) |
+                                    (args_info.treatu2024asdot_given ?
+                                     IDNA_TREAT_U2024_AS_DOT : 0));
          free (p);
          if (rc != IDNA_SUCCESS)
            error (EXIT_FAILURE, 0, _("idna_to_unicode_8z4z: %s"),
diff --git a/src/idn.ggo b/src/idn.ggo
index 620f9f6..680686f 100644
--- a/src/idn.ggo
+++ b/src/idn.ggo
@@ -1,4 +1,4 @@
-# Copyright (C) 2003, 2004, 2005, 2006, 2007 Simon Josefsson.
+# Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Simon Josefsson.
 #
 # This file is part of GNU Libidn.
 #
@@ -31,6 +31,7 @@ option "idna-to-ascii"   a "Convert to ACE according to IDNA 
(default)" no
 option "idna-to-unicode" u "Convert from ACE according to IDNA" no
 option "allow-unassigned" - "Toggle IDNA AllowUnassigned flag" flag off
 option "usestd3asciirules" - "Toggle IDNA UseSTD3ASCIIRules flag" flag off
+option "treatu2024asdot" - "Toggle IDNA TreatU2024AsDot flag" flag off
 option "tld" t "Check string for TLD specific rules\nOnly for --idna-to-ascii 
and --idna-to-unicode" flag on
 option "profile" p "Use specified stringprep profile instead\nValid stringprep 
profiles are `Nameprep', `iSCSI', `Nodeprep', `Resourceprep', `trace', and 
`SASLprep'." string no
 option "debug" - "Print debugging information" flag off




reply via email to

[Prev in Thread] Current Thread [Next in Thread]