[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] gnun-validate-html may break on ASCII art
From: |
Ineiev |
Subject: |
Re: [PATCH] gnun-validate-html may break on ASCII art |
Date: |
Fri, 19 Aug 2011 14:53:20 +0000 |
User-agent: |
Thunderbird 2.0.0.14 (X11/20080501) |
On 08/18/2011 03:32 PM, Ineiev wrote:
It is suggested that the SSIs may be expanded via recursive
awk script; by the way, configuration time variables for
awk and sed programs are used like the variable for m4 in the
current version of gnun-validate-html is used.
Next revision: expand also includes like '<!--#include file="...',
make it work with relative paths of the included files, support
' and ` as well as " to quote the file name.
Index: gnun-validate-html.in
===================================================================
RCS file:
/sources/trans-coord/trans-coord/gnun/server/gnun/gnun-validate-html.in,v
retrieving revision 1.9
diff -U 2 -r1.9 gnun-validate-html.in
--- gnun-validate-html.in 7 Jan 2010 14:43:04 -0000 1.9
+++ gnun-validate-html.in 19 Aug 2011 14:45:18 -0000
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+# Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
# This file is part of GNUnited Nations.
@@ -21,5 +21,5 @@
cat <<EOF
gnun-validate-html (@PACKAGE_NAME@) @PACKAGE_VERSION@
-Copyright (C) 2010 Free Software Foundation, Inc.
+Copyright (C) 2011 Free Software Foundation, Inc.
You may redistribute copies of @PACKAGE_NAME@
under the terms of the GNU General Public License.
@@ -69,18 +69,59 @@
TMP1=`mktemp -t gnun.1.XXXXXX`
TMP2=`mktemp -t gnun.2.XXXXXX`
-TMP3=`mktemp -t gnun.3.XXXXXX`
-trap "rm -f $TMP1 $TMP2 $TMP3" EXIT
+trap "rm -f $TMP1 $TMP2" EXIT
# Expand input file's #include directives and save the result in
# $TMP1.
-cat $1 > $TMP1
+cat > $TMP2 <<"EOF"
+# Limitations: CGI includes would be expanded in a wrong way
+BEGIN {
+ relative_dir_name = ARGV[1];
+ sub ("[^/]*$", "", relative_dir_name);
+ sub ("/*$", "/", relative_dir_name);
+}
+
+/<!--#include +(virtual)|(file)=[\"`']/ {
+ n = split ($0, names, "<!--#include ");
+ printf ("%s", names[1]);
+ for (i = 2; i <= n; i++)
+ {
+ # Extract the next included file name
+ if (names[i] !~ /^ *(virtual)|(file)=[\"'`]/)
+ {
+ # Pass it unchanged: this is not an Apache include directive
+ printf ("<!--#include %s", names[i]);
+ continue;
+ }
+ m = index (names[i], "-->");
+ if (m == 0) # This shouldn't happen: the "-->" must be on the same line
+ m = length (names[i]) + 1;
+ name = substr (names[i], 1, m-1);
+ # Absolute paths are not allowed with "file=" type of includes,
+ # but we process them in the same way for simplicity
+ sub ("^ *((virtual)|(file))=", "", name);
+ quote_symbol = substr (name, 1, 1);
+ sub ("^.", "", name);
+ sub (quote_symbol "[^" quote_symbol "]*$", "", name);
+
+ # Construct the real path to the file
+ if (name ~ /^\//)
+ name = root name;
+ else
+ name = relative_dir_name name;
+
+ # Invoke the script recursively
+ system ("@AWK@ -v script_name='" script_name "' -v root='" \
+ root "' -f '" script_name "' " name);
+
+ # Output the part remaining after the include directive
+ print (substr (names[i], m + 3));
+ }
+ next;
+}
+
+{ print; }
+EOF
-while true; do
- grep --quiet '<!--#include virtual' $TMP1 || break
- sed --in-place \
- "s/<\!--#include virtual=\"\/\?\(.*\)\" -->/m4_include(\`\1')/g" $TMP1
- @M4@ -P -EE -I $ROOT $TMP1 > $TMP3
- cp $TMP3 $TMP1
-done
address@hidden@ -v script_name=$TMP2 -v root="$ROOT" -f $TMP2 $1 > $TMP1
# Execute xmllint on $TMP1 and save its output to $TMP2.
@@ -98,5 +139,5 @@
# expanded #include directives) and the translator can not easily look
# up for references in it.
-cat $TMP2 | sed '
+cat $TMP2 | @SED@ '
/line [[:digit:]]\+/ {
p