lynx-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Lynx-dev] HTML5 meta charset attribute


From: KIHARA Hideto
Subject: [Lynx-dev] HTML5 meta charset attribute
Date: Thu, 29 Sep 2011 19:58:10 +0900
User-agent: Mutt/1.5.21 (2010-09-15)

Please support meta charset attribute of HTML5.
  http://www.w3.org/TR/html5/semantics.html#the-meta-element

Because lynx does not recognize charset attribute on the meta element,
following site which contains <meta charset="UTF-8"> is not shown correctly.
  http://vim-jp.org
("Charset: euc-jp" in information page shown by '=' key.
 "Display character set" option is "Japanese (EUC-JP)").

Screen captures:
  Lynx 2.8.8dev.9:
    http://www1.interq.or.jp/~deton/lynx/meta-charset-NG.png
  Lynx 2.8.8dev.9 with patch:
    http://www1.interq.or.jp/~deton/lynx/meta-charset-OK.png

Lynx.trace log:
SGML: Unknown attribute charset for tag META
SGML: Attribute value UTF-8 ***ignored
SGML: Start <META>
LYHandleMETA: HTTP-EQUIV="(null)" NAME="(null)" CONTENT="(null)"

Here is a patch to support meta charset attribute of HTML5.

diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/hdr_HTMLDTD.h 
./WWW/Library/Implementation/hdr_HTMLDTD.h
--- ../lynx2-8-8.orig/WWW/Library/Implementation/hdr_HTMLDTD.h  2011-06-13 
09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/hdr_HTMLDTD.h  2011-09-27 20:53:44.000000000 
+0900
@@ -670,11 +670,12 @@ extern "C" {
 #define HTML_MATH_TITLE           7
 #define HTML_MATH_ATTRIBUTES      8
 
-#define HTML_META_CONTENT         0
-#define HTML_META_HTTP_EQUIV      1
-#define HTML_META_NAME            2
-#define HTML_META_SCHEME          3
-#define HTML_META_ATTRIBUTES      4
+#define HTML_META_CHARSET         0
+#define HTML_META_CONTENT         1
+#define HTML_META_HTTP_EQUIV      2
+#define HTML_META_NAME            3
+#define HTML_META_SCHEME          4
+#define HTML_META_ATTRIBUTES      5
 
 #define HTML_NEXTID_N             0
 #define HTML_NEXTID_ATTRIBUTES    1
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.h 
./WWW/Library/Implementation/src0_HTMLDTD.h
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.h 2011-06-13 
09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src0_HTMLDTD.h 2011-09-27 20:53:44.000000000 
+0900
@@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] =
 };
 
 static const attr META_attr_list[] = {
+       { "CHARSET"       T(N) },
        { "CONTENT"       T(N) },
        { "HTTP-EQUIV"    T(N) },
        { "NAME"          T(N) },
@@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = {
 };
 
 static const attr META_attr[] = {       /* META attributes */
+       { "CHARSET"       T(N) },
        { "CONTENT"       T(N) },
        { "HTTP-EQUIV"    T(N) },
        { "NAME"          T(N) },
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.txt 
./WWW/Library/Implementation/src0_HTMLDTD.txt
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src0_HTMLDTD.txt       
2011-06-13 09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src0_HTMLDTD.txt       2011-09-27 
20:10:29.000000000 +0900
@@ -336,11 +336,12 @@
                        0:0:BOX
                        1:0:CLEAR
        40:META
-               4 attributes:
-                       0:0:CONTENT
-                       1:0:HTTP-EQUIV
-                       2:0:NAME
-                       3:0:SCHEME
+               5 attributes:
+                       0:0:CHARSET
+                       1:0:CONTENT
+                       2:0:HTTP-EQUIV
+                       3:0:NAME
+                       4:0:SCHEME
        41:NEXTID
                1 attributes:
                        0:0:N
@@ -2509,11 +2510,12 @@
                flags:
        75:META
                justify
-               4 attributes:
-                       0:0:CONTENT
-                       1:0:HTTP-EQUIV
-                       2:0:NAME
-                       3:0:SCHEME
+               5 attributes:
+                       0:0:CHARSET
+                       1:0:CONTENT
+                       2:0:HTTP-EQUIV
+                       3:0:NAME
+                       4:0:SCHEME
                1 attr_types
                        META
                contents: SGML_EMPTY
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.h 
./WWW/Library/Implementation/src1_HTMLDTD.h
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.h 2011-06-13 
09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src1_HTMLDTD.h 2011-09-27 20:53:44.000000000 
+0900
@@ -847,6 +847,7 @@ static const AttrType MATH_attr_type[] =
 };
 
 static const attr META_attr_list[] = {
+       { "CHARSET"       T(N) },
        { "CONTENT"       T(N) },
        { "HTTP-EQUIV"    T(N) },
        { "NAME"          T(N) },
@@ -1794,6 +1795,7 @@ static const attr MATH_attr[] = {
 };
 
 static const attr META_attr[] = {       /* META attributes */
+       { "CHARSET"       T(N) },
        { "CONTENT"       T(N) },
        { "HTTP-EQUIV"    T(N) },
        { "NAME"          T(N) },
diff -urp ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.txt 
./WWW/Library/Implementation/src1_HTMLDTD.txt
--- ../lynx2-8-8.orig/WWW/Library/Implementation/src1_HTMLDTD.txt       
2011-06-13 09:18:54.000000000 +0900
+++ ./WWW/Library/Implementation/src1_HTMLDTD.txt       2011-09-27 
20:11:33.000000000 +0900
@@ -336,11 +336,12 @@
                        0:0:BOX
                        1:0:CLEAR
        40:META
-               4 attributes:
-                       0:0:CONTENT
-                       1:0:HTTP-EQUIV
-                       2:0:NAME
-                       3:0:SCHEME
+               5 attributes:
+                       0:0:CHARSET
+                       1:0:CONTENT
+                       2:0:HTTP-EQUIV
+                       3:0:NAME
+                       4:0:SCHEME
        41:NEXTID
                1 attributes:
                        0:0:N
@@ -2509,11 +2510,12 @@
                flags:
        75:META
                justify
-               4 attributes:
-                       0:0:CONTENT
-                       1:0:HTTP-EQUIV
-                       2:0:NAME
-                       3:0:SCHEME
+               5 attributes:
+                       0:0:CHARSET
+                       1:0:CONTENT
+                       2:0:HTTP-EQUIV
+                       3:0:NAME
+                       4:0:SCHEME
                1 attr_types
                        META
                contents: SGML_EMPTY
diff -urp ../lynx2-8-8.orig/src/LYCharUtils.c ./src/LYCharUtils.c
--- ../lynx2-8-8.orig/src/LYCharUtils.c 2011-06-13 09:18:54.000000000 +0900
+++ ./src/LYCharUtils.c 2011-09-29 07:21:32.000000000 +0900
@@ -2029,7 +2029,7 @@ void LYHandleMETA(HTStructured * me, con
                  const char **value,
                  char **include GCC_UNUSED)
 {
-    char *http_equiv = NULL, *name = NULL, *content = NULL;
+    char *http_equiv = NULL, *name = NULL, *content = NULL, *charset = NULL;
     char *href = NULL, *id_string = NULL, *temp = NULL;
     char *cp, *cp0, *cp1 = NULL;
     int url_type = 0;
@@ -2079,141 +2079,49 @@ void LYHandleMETA(HTStructured * me, con
            FREE(content);
        }
     }
+    if (present[HTML_META_CHARSET] &&
+       non_empty(value[HTML_META_CHARSET])) {
+       StrAllocCopy(charset, value[HTML_META_CHARSET]);
+       convert_to_spaces(charset, TRUE);
+       LYUCTranslateHTMLString(&charset, me->tag_charset, me->tag_charset,
+                               NO, NO, YES, st_other);
+       if (*charset == '\0') {
+           FREE(charset);
+       }
+    }
     CTRACE((tfp,
-           "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\"\n",
+           "LYHandleMETA: HTTP-EQUIV=\"%s\" NAME=\"%s\" CONTENT=\"%s\" 
CHARSET=\"%s\"\n",
            NONNULL(http_equiv),
            NONNULL(name),
-           NONNULL(content)));
+           NONNULL(content),
+           NONNULL(charset)));
 
     /*
-     * Make sure we have META name/value pairs to handle.  - FM
+     * Check for a text/html Content-Type with a charset directive, if we
+     * didn't already set the charset via a server's header.  - AAC & FM
      */
-    if (!(http_equiv || name) || !content)
-       goto free_META_copies;
-
-    /*
-     * Check for a no-cache Pragma
-     * or Cache-Control directive. - FM
-     */
-    if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
-       !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
-       LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
-                               NO, NO, YES, st_other);
-       if (!strcasecomp(content, "no-cache")) {
-           me->node_anchor->no_cache = TRUE;
-           HText_setNoCache(me->text);
-       }
-
-       /*
-        * If we didn't get a Cache-Control MIME header, and the META has one,
-        * convert to lowercase, store it in the anchor element, and if we
-        * haven't yet set no_cache, check whether we should.  - FM
-        */
-       if ((!me->node_anchor->cache_control) &&
-           !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
-           LYLowerCase(content);
-           StrAllocCopy(me->node_anchor->cache_control, content);
-           if (me->node_anchor->no_cache == FALSE) {
-               cp0 = content;
-               while ((cp = strstr(cp0, "no-cache")) != NULL) {
-                   cp += 8;
-                   while (*cp != '\0' && WHITE(*cp))
-                       cp++;
-                   if (*cp == '\0' || *cp == ';') {
-                       me->node_anchor->no_cache = TRUE;
-                       HText_setNoCache(me->text);
-                       break;
-                   }
-                   cp0 = cp;
-               }
-               if (me->node_anchor->no_cache == TRUE)
-                   goto free_META_copies;
-               cp0 = content;
-               while ((cp = strstr(cp0, "max-age")) != NULL) {
-                   cp += 7;
-                   while (*cp != '\0' && WHITE(*cp))
-                       cp++;
-                   if (*cp == '=') {
-                       cp++;
-                       while (*cp != '\0' && WHITE(*cp))
-                           cp++;
-                       if (isdigit(UCH(*cp))) {
-                           cp0 = cp;
-                           while (isdigit(UCH(*cp)))
-                               cp++;
-                           if (*cp0 == '0' && cp == (cp0 + 1)) {
-                               me->node_anchor->no_cache = TRUE;
-                               HText_setNoCache(me->text);
-                               break;
-                           }
-                       }
-                   }
-                   cp0 = cp;
-               }
-           }
-       }
-
-       /*
-        * Check for an Expires directive. - FM
-        */
-    } else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
-       /*
-        * If we didn't get an Expires MIME header, store it in the anchor
-        * element, and if we haven't yet set no_cache, check whether we
-        * should.  Note that we don't accept a Date header via META tags,
-        * because it's likely to be untrustworthy, but do check for a Date
-        * header from a server when making the comparison.  - FM
-        */
-       LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
-                               NO, NO, YES, st_other);
-       StrAllocCopy(me->node_anchor->expires, content);
-       if (me->node_anchor->no_cache == FALSE) {
-           if (!strcmp(content, "0")) {
-               /*
-                * The value is zero, which we treat as an absolute no-cache
-                * directive.  - FM
-                */
-               me->node_anchor->no_cache = TRUE;
-               HText_setNoCache(me->text);
-           } else if (me->node_anchor->date != NULL) {
-               /*
-                * We have a Date header, so check if the value is less than or
-                * equal to that.  - FM
-                */
-               if (LYmktime(content, TRUE) <=
-                   LYmktime(me->node_anchor->date, TRUE)) {
-                   me->node_anchor->no_cache = TRUE;
-                   HText_setNoCache(me->text);
-               }
-           } else if (LYmktime(content, FALSE) == 0) {
-               /*
-                * We don't have a Date header, and the value is in past for
-                * us.  - FM
-                */
-               me->node_anchor->no_cache = TRUE;
-               HText_setNoCache(me->text);
-           }
-       }
-
-       /*
-        * Check for a text/html Content-Type with a charset directive, if we
-        * didn't already set the charset via a server's header.  - AAC & FM
-        */
-    } else if (isEmpty(me->node_anchor->charset) &&
-              !strcasecomp(NonNull(http_equiv), "Content-Type")) {
+    if (isEmpty(me->node_anchor->charset) &&
+       (charset ||
+        !strcasecomp(NonNull(http_equiv), "Content-Type") && content)) {
        LYUCcharset *p_in = NULL;
        LYUCcharset *p_out = NULL;
 
-       LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
-                               NO, NO, YES, st_other);
-       LYLowerCase(content);
+       if (charset) {
+           LYLowerCase(charset);
+       } else {
+           LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+                                   NO, NO, YES, st_other);
+           LYLowerCase(content);
+       }
 
-       if ((cp1 = strstr(content, "charset")) != NULL) {
+       if ((cp1 = charset) != NULL ||
+           (cp1 = strstr(content, "charset")) != NULL) {
            BOOL chartrans_ok = NO;
            char *cp3 = NULL, *cp4;
            int chndl;
 
-           cp1 += 7;
+           if (!charset)
+               cp1 += 7;
            while (*cp1 == ' ' || *cp1 == '=' || *cp1 == '"')
                cp1++;
 
@@ -2378,6 +2286,117 @@ void LYHandleMETA(HTStructured * me, con
         * Set the kcode element based on the charset.  - FM
         */
        HText_setKcode(me->text, me->node_anchor->charset, p_in);
+    }
+
+    /*
+     * Make sure we have META name/value pairs to handle.  - FM
+     */
+    if (!(http_equiv || name) || !content)
+       goto free_META_copies;
+
+    /*
+     * Check for a no-cache Pragma
+     * or Cache-Control directive. - FM
+     */
+    if (!strcasecomp(NonNull(http_equiv), "Pragma") ||
+       !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
+       LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+                               NO, NO, YES, st_other);
+       if (!strcasecomp(content, "no-cache")) {
+           me->node_anchor->no_cache = TRUE;
+           HText_setNoCache(me->text);
+       }
+
+       /*
+        * If we didn't get a Cache-Control MIME header, and the META has one,
+        * convert to lowercase, store it in the anchor element, and if we
+        * haven't yet set no_cache, check whether we should.  - FM
+        */
+       if ((!me->node_anchor->cache_control) &&
+           !strcasecomp(NonNull(http_equiv), "Cache-Control")) {
+           LYLowerCase(content);
+           StrAllocCopy(me->node_anchor->cache_control, content);
+           if (me->node_anchor->no_cache == FALSE) {
+               cp0 = content;
+               while ((cp = strstr(cp0, "no-cache")) != NULL) {
+                   cp += 8;
+                   while (*cp != '\0' && WHITE(*cp))
+                       cp++;
+                   if (*cp == '\0' || *cp == ';') {
+                       me->node_anchor->no_cache = TRUE;
+                       HText_setNoCache(me->text);
+                       break;
+                   }
+                   cp0 = cp;
+               }
+               if (me->node_anchor->no_cache == TRUE)
+                   goto free_META_copies;
+               cp0 = content;
+               while ((cp = strstr(cp0, "max-age")) != NULL) {
+                   cp += 7;
+                   while (*cp != '\0' && WHITE(*cp))
+                       cp++;
+                   if (*cp == '=') {
+                       cp++;
+                       while (*cp != '\0' && WHITE(*cp))
+                           cp++;
+                       if (isdigit(UCH(*cp))) {
+                           cp0 = cp;
+                           while (isdigit(UCH(*cp)))
+                               cp++;
+                           if (*cp0 == '0' && cp == (cp0 + 1)) {
+                               me->node_anchor->no_cache = TRUE;
+                               HText_setNoCache(me->text);
+                               break;
+                           }
+                       }
+                   }
+                   cp0 = cp;
+               }
+           }
+       }
+
+       /*
+        * Check for an Expires directive. - FM
+        */
+    } else if (!strcasecomp(NonNull(http_equiv), "Expires")) {
+       /*
+        * If we didn't get an Expires MIME header, store it in the anchor
+        * element, and if we haven't yet set no_cache, check whether we
+        * should.  Note that we don't accept a Date header via META tags,
+        * because it's likely to be untrustworthy, but do check for a Date
+        * header from a server when making the comparison.  - FM
+        */
+       LYUCTranslateHTMLString(&content, me->tag_charset, me->tag_charset,
+                               NO, NO, YES, st_other);
+       StrAllocCopy(me->node_anchor->expires, content);
+       if (me->node_anchor->no_cache == FALSE) {
+           if (!strcmp(content, "0")) {
+               /*
+                * The value is zero, which we treat as an absolute no-cache
+                * directive.  - FM
+                */
+               me->node_anchor->no_cache = TRUE;
+               HText_setNoCache(me->text);
+           } else if (me->node_anchor->date != NULL) {
+               /*
+                * We have a Date header, so check if the value is less than or
+                * equal to that.  - FM
+                */
+               if (LYmktime(content, TRUE) <=
+                   LYmktime(me->node_anchor->date, TRUE)) {
+                   me->node_anchor->no_cache = TRUE;
+                   HText_setNoCache(me->text);
+               }
+           } else if (LYmktime(content, FALSE) == 0) {
+               /*
+                * We don't have a Date header, and the value is in past for
+                * us.  - FM
+                */
+               me->node_anchor->no_cache = TRUE;
+               HText_setNoCache(me->text);
+           }
+       }
 
        /*
         * Check for a Refresh directive.  - FM
@@ -2566,6 +2585,7 @@ void LYHandleMETA(HTStructured * me, con
     FREE(http_equiv);
     FREE(name);
     FREE(content);
+    FREE(charset);
 }
 
 /*




reply via email to

[Prev in Thread] Current Thread [Next in Thread]