www-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

www/proprietary/workshop malgen item-create upd...


From: Therese Godefroy
Subject: www/proprietary/workshop malgen item-create upd...
Date: Fri, 30 Nov 2018 04:18:10 -0500 (EST)

CVSROOT:        /webcvs/www
Module name:    www
Changes by:     Therese Godefroy <th_g> 18/11/30 04:18:10

Modified files:
        proprietary/workshop: malgen item-create 
Added files:
        proprietary/workshop: update-item-count 

Log message:
        Changes by Ineiev:
        - all HTML generated by malgen (called from item-create);
        - separate script for updating item count: update-item-count;
        - more robust and portable code, more checks, etc. 

CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/proprietary/workshop/malgen?cvsroot=www&r1=1.2&r2=1.3
http://web.cvs.savannah.gnu.org/viewcvs/www/proprietary/workshop/item-create?cvsroot=www&r1=1.5&r2=1.6
http://web.cvs.savannah.gnu.org/viewcvs/www/proprietary/workshop/update-item-count?cvsroot=www&rev=1.1

Patches:
Index: malgen
===================================================================
RCS file: /webcvs/www/www/proprietary/workshop/malgen,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- malgen      13 Nov 2018 10:47:36 -0000      1.2
+++ malgen      30 Nov 2018 09:18:10 -0000      1.3
@@ -16,22 +16,32 @@
 
 # Written by Therese <address@hidden> and Ineiev.
 
-
+export LC_ALL=C
 set -e
 
-# Sort mal.rec by descending Id.
+# Output mal.rec items sorted by descending Id.
 function sort_recfile () {
-  awk '!/^Added/ { print }' mal.rec |
-  awk 'BEGIN {
+  {
+    # Output sorted Id lines separated by empty lines.
+    grep '^Id: ' mal.rec | sort -r | sed "s/$/\\n/";
+    # Mark end of ids, append mal.rec.
+    echo end of ids; echo; cat mal.rec;
+  } | awk 'BEGIN {
          RS=""; ORS="\n\n"; FS="\n"
-         PROCINFO["sorted_in"]="@ind_str_desc"
+         reading_ids = 1; id_no = 1
+       }
+       reading_ids && /end of ids/ { reading_ids = 0; next }
+       reading_ids { ids[id_no++] = $1; next }
+       /(.*\n|^)Id: / {
+         id=$0; sub(/(.*\n|^)Id:/, "Id:", id); sub(/\n.*/, "", id)
+         a[id]=$0
        }
-         /Id: / { a[$1]=$0 }
        END {
-         for(i in a) { print a[i] }
+         for (i = 1; i < id_no; i++) { print a[ids[i]] }
        }' > $sorted
 }
 
+# Regenerate targets.rec.
 function update_targets () {
   # List English pages in proprietary/.
   pages=$(ls ../*.html |
@@ -41,7 +51,11 @@
   for p in $pages; do
     targets=$(awk -f list-targets.awk ../$p)
     if test -n "$targets"; then
-      echo -e "\nPage: $p\nTargets: $targets" >> $temp1
+      cat >> $temp1 <<EOF
+
+Page: $p
+Targets: $targets
+EOF
     fi
   done
   # Replace the old one.
@@ -50,14 +64,14 @@
 }
 
 function rec2html () {
-# Converts the blurbs to HTML.
+# Convert the blurbs to HTML.
   sed    '$ { /^$/d }' $temp1 > $temp2
-  sed -r '/^[-0-9]+$/d' $temp2 |
-  sed    '/^Id:/,/^Blurb:/ { /^Id:/p; /^Blurb:/p; d }' |
-  sed -r 's,^Id:[ \t]*([0-9]{9}).*$,  <li id="M\1">,
+  sed    '/^[-0-9][-0-9]*$/d' $temp2 |
+  sed    '/^Id:/p;/^Blurb:/p;/^+/p;/^$/p;d' |
+  sed    's,^Id:[ \t]*\([0-9]\{9\}\).*$,  <li id="M\1">,
           s,^$,  </li>\n,
           s,^Blurb:[ \t]*,    ,
-          s,^\+, ,
+          s,^+, ,
           s,^[ \t]*$,,
           $s,$,\n  </li>\n</ul>,' > $temp1
 }
@@ -70,6 +84,12 @@
       }" $out > $f.temp && mv $f.temp $out
 }
 
+# Check mal.rec for consistency.
+if [ `grep '^Id: ' mal.rec | sort | uniq -d | wc -l` -gt 0 ]; then
+  echo 1>&2 "Id lines in mal.rec aren't unique:"
+  grep '^Id: ' mal.rec |  sort | uniq -dc 
+  exit 1
+fi
 
 # List English pages in proprietary/.
 pages=$(ls ../*.html |
@@ -79,39 +99,40 @@
 if [ $# = 0 ]; then
   input="$pages"
 else
-  input="$(while [ $# -gt 0 ]; do echo $1; shift; done)"
-  wrong_args=$(echo "$input" | while read arg; do
-    if ! echo "$pages" | grep -q "^$arg$"; then
+  input="$(while [ $# -gt 0 ]; do echo "x$1" | sed 's,.,,;q'; shift; done)"
+  wrong_args=$(echo "x$input" | sed '1s,.,,' | while read arg; do
+    if ! echo "x$pages" | sed '1s,.,,' | grep -q "^${arg//./[.]}$"; then
       echo "'$arg'"
     fi
   done)
   if [ -n "$wrong_args" ]; then
-    echo 1>&2 "These pages don't exist:" $wrong_args
+    echo 1>&2 "These pages don't exist: $wrong_args"
     exit 1
   fi
 fi
 
-# Create a temporary file.
+# Create temporary files.
 temp1=$(mktemp -t mal.XXXXXX) || (echo 1>&2 "Can't make temp1";  exit 1)
 temp2=$(mktemp -t mal.XXXXXX) || (echo 1>&2 "Can't make temp2";  exit 1)
-sorted=$(mktemp -t mal.XXXXXX) || (echo 1>&2 "Can't make temp2";  exit 1)
-trap 'rm -f "$temp1" "$temp2""$sorted"' EXIT
+sorted=$(mktemp -t mal.XXXXXX) || (echo 1>&2 "Can't make sorted";  exit 1)
+trap 'rm -f "$temp1" "$temp2" "$sorted"' EXIT
 
 sort_recfile
 update_targets
 
-for f in $input; do
-  out=../$f
-  echo $f
+echo "x$input" | sed '1s,.,,' | while read f; do
+  out="../$f"
+  echo "Regenerating '$f'"
   # List all possible targets.
   targets=$(awk 'BEGIN { RS = ""; FS="\nTargets: " }'"
-                   /$f/"'{$1=""; print $0}' targets.rec)
+                 /^Page: ${f//./[.]}\n/"'{$1=""; print $0}' targets.rec)
 
   for t in $targets; do
     if [ "$f" != 'proprietary.html' ]; then
 
       awk 'BEGIN { RS=""; ORS="\n\n"; FS="\n" }
-             /Target: '"$f[ \t]+$t"'/ { print }' $sorted  > $temp1
+           /\nTarget: '"${f//./[.]}[ \t]+${t//./[.]}"'\n/ { print }' \
+          $sorted  > $temp1
 
       if [ -s $temp1 ]; then
         rec2html
@@ -122,22 +143,23 @@
 
     else
       # List 5 recently added items. Sort by descending date.
-      awk 'BEGIN {
-             RS="\n\nAdded: "; FS="\n\n"; ORS="\n\n"
-             PROCINFO["sorted_in"]="@ind_str_desc"
-           }
-           !/^#/ && !/^%/ { a[$1]=$1 }
-           END {
-             n=1
-             for(i in a) {
-               if ( n <= 5 ) { print a[i]; n++ }
-             }
-           }' mal.rec > $temp1
+      recent=`grep "^Added: " $sorted | sort -r | head -n 5 \
+              | sed "s/Added: //"`
+      echo "x$recent" | sed '1s,.,,' | grep -v "^[-0-9]*$" \
+        | while read i; do
+            echo 1>&2 "Malformed \"Added\" field found: '$i'"
+          done
+      for i in $recent; do
+        awk 'BEGIN { RS=""; FS="\n"; ORS="\n\n" }
+             /(\n|^)Added: '$i'(\n|$)/ { print }
+            ' $sorted;
+      done | awk 'BEGIN { RS=""; FS="\n"; ORS="\n\n"; i = 1 }
+                  { if (i++ > 5) { exit 0; } print }
+                 ' > $temp1
       rec2html
       insert_list
     fi
-  done
-done
+  done # for t in $targets
+done # for f in $input
 
-sleep 5
-exit 0
+./update-item-count

Index: item-create
===================================================================
RCS file: /webcvs/www/www/proprietary/workshop/item-create,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -b -r1.5 -r1.6
--- item-create 13 Nov 2018 10:06:30 -0000      1.5
+++ item-create 30 Nov 2018 09:18:10 -0000      1.6
@@ -24,7 +24,7 @@
 # Written by Therese <address@hidden> and Ineiev.
 
 
-export LC_ALL=en_US.UTF-8
+export LC_ALL=C
 set -e
 
 temp1=$(mktemp -t mal.XXXXXX) || (echo 1>&2 "Can't make temp1";  exit 1)
@@ -41,48 +41,64 @@
         sed 's,^\.\./,,')
 
 if [ ! -f item ]; then
-  echo 1>&2 "!!! 'item' doesn't exist."
-  sleep 5; exit 1
-elif ! id=$(grep '^Id:' item); then
+  echo 1>&2 "!!! 'item' file doesn't exist."
+  exit 1
+fi
+if [ ! -r item ]; then
+  echo 1>&2 "!!! 'item' file isn't readable."
+  exit 1
+fi
+
+files=$(awk '/Target:/ {print $2}' item)
+if [ -z "$files" ]; then
+  echo 1>&2 "!!! This item has no Target field.  It can't be added
+    anywhere."
+  exit 1
+fi
+if ! id=$(grep '^Id:' item); then
   echo 1>&2 "!!! This item doesn't have an Id."
-  sleep 5; exit 1
-elif [ "$id" = 'Id: 200000000' ]; then
+  exit 1
+fi
+if [ `echo "$id" | wc -l` -gt 1 ]; then
+  echo 1>&2 "!!! This item has multiple Id lines."
+  exit 1
+fi
+if [ "$id" = 'Id: 200000000' ]; then
   echo 1>&2 "!!! This item still has the default Id."
-  sleep 5; exit 1
-elif [[ "$files" =~ 'proprietary.html' ]]; then
+  exit 1
+fi
+if echo "$id" | egrep -qv '^Id: 20[0-9]{7}$'; then
+  echo 1>&2 "!!! This item has a malformed Id, '$id'."
+  exit 1
+fi
+if echo "x$files" | sed '1s,.,,' | grep -q '^proprietary\.html$'; then
   echo 1>&2 "!!! proprietary.html is not a primary target for malware items."
-  sleep 5; exit 1
-elif grep -q "$id" mal.rec; then
+  exit 1
+fi
+if grep -q "$id" mal.rec; then
   echo 1>&2 "!!! An item with same Id is already in mal.rec.
     - If the two items are different, please change the last digit.
     - If you are editing an item that has been added already, you should
     do it in mal.rec, and regenerate the pages with malgen. See README.md."
-  sleep 10; exit 1
+  exit 1
 fi
 
-files=$(awk '/Target:/ {print $2}' item)
-if [ -z "$(echo $files)" ]; then
-  echo 1>&2 "!!! This item has empty Target field(s). It can't be added
-    anywhere."
-  sleep 5; exit 1
-fi
-
-wrong_files=$(echo "$files" | while read f; do
-  if ! echo "$pages" | grep -q "^$f$"; then
+wrong_files=$(echo "x$files" | sed '1s,.,,' | while read f; do
+  if ! echo "x$pages" | sed '1s,.,,' | grep -q "^${f//./[.]}$"; then
     echo "    $f"
   fi
 done)
 if [ -n "$wrong_files" ]; then
   echo 1>&2 "!!! These pages don't exist:
 $wrong_files"
-  sleep 5; exit 1
+  exit 1
 fi
 
 ## Standardize the format.
 
 # Remove comments, blank lines, linefeeds & leading spaces; unwrap.
 sed '/^$/d; /^#/d' item |
-sed -r ':a /Blurb:.*$/ { N; s,(.+)\n(.+)$,\1 \2,; ba }' |
+sed ':a /Blurb:.*$/ { N; s,\(.\+\)\n\(.\+\)$,\1 \2,; ba }' |
 sed 's,Blurb: *,Blurb:\n,' > $temp1
 
 # Reformat.
@@ -113,9 +129,6 @@
 # Wrap the text.
 fmt -s -w 76 $temp1 > $temp2
 
-
-## REC version
-
 # Convert to rec format.
 sed -e '/^Blurb:/ {N; s,\n[ ]\+<, <,}' \
     -e 's,^ ,+,' \
@@ -125,127 +138,63 @@
 today=$(date +'%Y-%m-%d')
 sed -i "/^Id:/ s,^,\\nAdded: $today\\n", item.rec 
 
-# Add the item to the recfile, but don't make this permanent before
-# checking HTML addition.
-sed '/^# ADD NEW BLURB HERE/r item.rec' mal.rec > mal.rec.temp
-if ! grep -q "$id" mal.rec.temp; then
-  echo 1>&2 "!!! $id couldn't be added to mal.rec."
-  sleep 5; exit 2
-fi
-
-
-## HTML version
-
-# Tweak the format.
-
-sed -r '
-1 s,Id:[ \t]([0-9]{9}).*$,  <li id="M\1">,
-2,/^Blurb/d
-$ a\
-  </li>\n
-/^Added:/d
-' $temp2 > item.html
-
-# Add the item to the relevant pages.
+it='<li id="M'${id#* }'"'
 
 error_html=""
 for f in $files proprietary.html; do
   # Check that the page doesn't already have an item with same Id.
-  it=$(grep '<li id="M20' item.html)
   if grep -q "$it" ../$f; then
     echo "$it is already in $f."
-
   else
     # Check the target.
-    target=$(awk "/$f/"'{ print $3 }' item)
+    target=$(sed "/^Target: ${f//./[.]} /{s/.* //;q};d" item)
     # If no target id is specified, make sure the page only has one list.
     # Get the id for the second list.
-    t2=$(awk 'BEGIN { RS = "" }'"/$f/"'{ print $5 }' targets.rec)
+    t2=$(awk 'BEGIN { RS = "" }'"/^Page: ${f//./[.]}\n/"'{ print $5 }' \
+         targets.rec)
 
     # If there is one, the item can't be added. Switch error warning.
     if [ -z "$target" ] &&  [ -n "$t2" ]; then
       error_html=1
       echo 1>&2 "!!! $f: A target id should be specified."
-
     else
       # Otherwise, use the first target id.
       if [ -z "$target" ]; then
-        target=$(awk 'BEGIN { RS = "" }'"/$f/"'{ print $4 }' targets.rec)
+        target=$(awk 'BEGIN { RS = "" }'"/${f//./[.]}/"'{ print $4 }' \
+                 targets.rec)
         # Add it to the Target field in mal.rec. This keeps the
         # regeneration script (malgen) simpler.
-        sed -i "s,Target:[ \t]*$f[ \t]*$,Target: $f $target," mal.rec.temp
-      fi
-
-      # Add the item.
-      sed  "/id=.$target./,/class=\"blurbs\"/ {
-            /class=\"blurbs\"/r item.html
-           }" ../$f > $f.temp
-      # If addition fails, switch error warning.
-      number=$(grep -c "$it" $f.temp) || true
-      if [ "$number" -ne "1" ]; then
-        error_html=1
-        echo 1>&2 "!!! $f:$it couldn't be added. Wrong target id?"
+        sed -i "s,Target:[ \t]*${f//./[.]}[ \t]*$,Target: $f $target," item.rec
       fi
     fi
   fi
 done
-
-# If any additions failed, quit.
+# Quit on errors.
 if [ -n "$error_html" ]; then
-  sleep 5; exit 2
+  exit 2
+fi
 
-# Otherwise, make additions permanent and report what was done.
-else
-  for f in $files proprietary.html; do
-    [ -f $f.temp ] && (mv $f.temp ../$f
-    echo "*** $it was added to $f")
-  done
-  [ -f mal.rec.temp ] && (mv mal.rec.temp mal.rec
-  echo "*** $id was added to mal.rec.")
-  # Make sure the added item isn't going to be reused for creating
-  # another one.
-  cp item item-old
-  cp item-start item
+# Add the item to the recfile.
+sed '/^# ADD NEW BLURB HERE/r item.rec' mal.rec > mal.rec.temp
+if ! grep -q "$id" mal.rec.temp; then
+  echo 1>&2 "!!! $id couldn't be added to mal.rec."
+  exit 2
 fi
 
+mv mal.rec.temp mal.rec
 
-# RT #1328973.
-
-# Get the number of items, and approximate it to the nearest 50.
-item_count=$(grep -Ec 'Id: 20[0-9]{6}[048]' mal.rec ) 
-i=$((item_count/50))
-i=$((i*50))
-d=$((item_count%50))
-if [ $d -ge 25 ]; then
-  i=$(($i+50))
-fi
-
-# Get the number of links, and approximate it to the lower 50.
-link_count=$(sed -rn '
-/href="http/ {
-  s,^.*href="https?://([^">]+)/?">.*$,\1,;
-  p
-} ' mal.rec | sort | uniq | wc -l)
-l=$((link_count/50))
-l=$((l*50))
-
-# Update item and link count in proprietary.html and
-# free-software-even-more-important.html, as needed.
-month=$(date +'%B, %Y')
-if ! grep "list around $i" ../proprietary.html > /dev/null ||
- ! grep "more than $l references" ../proprietary.html > /dev/null; then
-  sed -i "
-  s|As of .*, the pages in this directory list .*$|As of $month, the pages in 
this directory list around $i|
-  s|functionalities (with .* references|functionalities (with more than $l 
references|
-  " ../proprietary.html
-  echo "Malware count was updated in proprietary.html."
-fi
-if ! grep "lists around $i different" \
- ../../philosophy/free-software-even-more-important.html > /dev/null; then
-  sed -i "
-  s|That directory lists around .* different|That directory lists around $i 
different|
-  s|malicious functionalities (as of .*),|malicious functionalities (as of 
$month),|
-  " ../../philosophy/free-software-even-more-important.html
-  echo "Malware count was updated in free-software-even-more-important.html."
-fi
-exit 0
+./malgen $files proprietary.html
+
+# Emit a warning if addition fails.
+error_html=""
+for f in $files proprietary.html; do
+  if ! grep -q "$it" ../$f; then
+    echo 1>&2 "!!! $f:$it couldn't be added."
+    error_html=1
+  fi
+done
+
+# Make sure the added item isn't going to be reused for creating
+# another one.
+cp item item-old
+cp item-start item

Index: update-item-count
===================================================================
RCS file: update-item-count
diff -N update-item-count
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ update-item-count   30 Nov 2018 09:18:10 -0000      1.1
@@ -0,0 +1,74 @@
+#! /bin/bash
+
+# update-item-count updates counts of items listed in mal.rec
+# on relevant pages.
+
+# RT #1328973.
+
+# Copyright (C) 2018 Free Software Foundation, Inc.
+
+# Copying and distribution of this file, with or without modification,
+# are permitted in any medium without royalty provided the copyright
+# notice and this notice are preserved.  This file is offered as-is,
+# without any warranty.
+
+# Written by Therese <address@hidden> and Ineiev.
+
+export LC_ALL=C
+set -e
+
+# Get the number of items, and approximate it to the nearest 50.
+item_count=$(grep -Ec '^Id: 20[0-9]{6}[048]' mal.rec)
+i=$((item_count/50))
+i=$((i*50))
+d=$((item_count%50))
+if [ $d -ge 25 ]; then
+  i=$((i+50))
+fi
+
+# Get the number of links, and approximate it to the lower 50.
+link_count=$(sed "s/href[ \t]*=[ \t]*['\"]http/\n&/g;" mal.rec \
+  | grep "^href[ \t]*=[ \t]*['\"]https\?" | awk '{
+    s = $0; sub(/href[ \t]*=[ \t]*/, "", s)
+    quote = substr(s, 1, 1)
+    sub(/.https?:\/\//, "", s); idx = index(s, quote)
+    if (idx)
+      s = substr(s, 1, idx - 1)
+    sub(/\/*$/, "", s)
+    print s
+  }' | sort -u | wc -l)
+# The page says, "more than $l references", so
+# when e.g. link_count is exactly 350, l should be 300.
+l=$(((link_count - 1)/50))
+l=$((l*50))
+
+# Check if the numbers to put in proprietary.html and
+# free-software-even-more-important.html changed, update the files
+# as needed.
+month=$(date +'%B, %Y')
+if ! grep -q "list around $i\>" ../proprietary.html \
+   || ! grep -q "more than $l references" ../proprietary.html; then
+  sed -i "
+  s|As of .*, the pages in this directory list .*$|As of $month, the pages in 
this directory list around $i|
+  s|functionalities (with .* references|functionalities (with more than $l 
references|
+  " ../proprietary.html
+  if ! grep -q "list around $i\>" ../proprietary.html \
+     || ! grep -q "more than $l references" ../proprietary.html; then
+     echo 1>&2 "Malware count update failed in proprietary.html."
+     exit 1
+  fi
+  echo "Malware count was updated in proprietary.html."
+fi
+f=../../philosophy/free-software-even-more-important.html
+if ! grep -q "lists around $i different" $f; then
+  sed -i "
+  s|That directory lists around .* different|That directory lists around $i 
different|
+  s|malicious functionalities (as of .*),|malicious functionalities (as of 
$month),|
+  " $f
+  if ! grep -q "lists around $i different" $f \
+     || ! grep -q "malicious functionalities (as of $month)," $f; then
+     echo 1>&2 "Malware count update failed in ${f#../../}."
+     exit 1
+  fi
+  echo "Malware count was updated in ${f#../../}."
+fi



reply via email to

[Prev in Thread] Current Thread [Next in Thread]