www-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

www/server/source/planetrss planetrss.pl


From: Pavel Kharitonov
Subject: www/server/source/planetrss planetrss.pl
Date: Mon, 09 Mar 2015 08:13:36 +0000

CVSROOT:        /web/www
Module name:    www
Changes by:     Pavel Kharitonov <ineiev>       15/03/09 08:13:36

Modified files:
        server/source/planetrss: planetrss.pl 

Log message:
        Strip col and colgroup when stripping tables; skip GNU Remotecontrol 
Newsletter by default.

CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/planetrss.pl?cvsroot=www&r1=1.22&r2=1.23

Patches:
Index: planetrss.pl
===================================================================
RCS file: /web/www/www/server/source/planetrss/planetrss.pl,v
retrieving revision 1.22
retrieving revision 1.23
diff -u -b -r1.22 -r1.23
--- planetrss.pl        11 Aug 2014 05:33:55 -0000      1.22
+++ planetrss.pl        9 Mar 2015 08:13:35 -0000       1.23
@@ -1,7 +1,7 @@
 # PlanetRSS: fetch feeds from planetgnu.org and output them as HTML.
 #
 # Copyright © 2011 Shailesh Ghadge
-# Copyright © 2013, 2014 Free Software Foundation, Inc.
+# Copyright © 2013, 2014, 2015 Free Software Foundation, Inc.
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -31,9 +31,12 @@
 # Provides argument handling.
 use Getopt::Long;
 
-my $Version = "1.9";
+my $Version = "1.10";
 my $default_lines = 3;
 my $default_length = 200;
+# GNU Remotecontrol Newsletter doesn't produce meaningful feeds;
+# skip it by default.
+my $default_exclude_pattern = '^GNU Remotecontrol: Newsletter';
 my $head = "<!-- Autogenerated by planetrss.pl ".$Version." -->\n";
 
 # Number of feeds to output.
@@ -42,6 +45,9 @@
 # Number of characters per feed.
 my $FeedLength = $default_length;
 
+# Regex for excluded titles
+my $exclude_pattern = $default_exclude_pattern;
+
 # Other options.
 my $a = 0;
 my $b = 0;
@@ -82,6 +88,7 @@
             "strong=i" => \$strong, "table=i" => \$table,
             "textarea=i" => \$textarea, "tt=i" => \$tt, "ul=i" => \$ul,
             "FeedLines=i" => \$FeedLines, "FeedLength=i" => \$FeedLength,
+            "exclude=s" => \$exclude_pattern,
             "help" => \$help, "version" => \$version);
 
 if ($help)
@@ -98,6 +105,7 @@
     -code=1         preserve code tags
     -div=1          preserve div tags
     -em=1           preserve em tags
+    -exclude=regex  exclude entries whose title match this pattern
     -h=1            preserve h tags
     -hr=1           preserve hr tags
     -i=1            preserve i tags
@@ -105,7 +113,7 @@
     -p=1            preserve p tags
     -pre=1          preserve pre tags
     -strong=1       preserve strong tags
-    -table=1        preserve table, tr, th tags
+    -table=1        preserve tables
     -textarea=1     preserve textarea tags
     -tt=1           preserve tt tags
     -ul=1           preserve ul, ol, dl, li tags
@@ -115,6 +123,7 @@
 
 Defaults:
     -FeedLines=".$default_lines." -FeedLength=".$default_length."
+    -exclude='".$default_exclude_pattern."'
 
 Examples:
     perl planetrss.pl -FeedLines=7 -FeedLength=500
@@ -130,7 +139,7 @@
   {
     print "PlanetRSS ".$Version."
 Copyright (C) 2011 Shailesh Ghadge
-Copyright (C) 2013, 2014 Free Software Foundation, Inc.
+Copyright (C) 2013, 2014, 2015 Free Software Foundation, Inc.
 
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -156,7 +165,8 @@
 
 print $head;
 
-for (my $i = 0; $i < $FeedLines; $i++)
+my $skipped = 0;
+for (my $i = 0; $i - $skipped < $FeedLines; $i++)
   {
     my $feed  = $PGparser->get ($i);
     my $url   = $feed->get ('url');
@@ -165,6 +175,12 @@
     my $tail  = "...\n".$gnun_split."<a href='".$url."'>"
                 .$gnun_split."more".$gnun_split."</a>";
     my $d0;
+
+    if ($title =~ /$exclude_pattern/)
+      {
+        $skipped++;
+        next;
+      }
     $head  = ":\n";
 
     # Remove Blog name.
@@ -193,6 +209,8 @@
         $desc = strip_tag ($desc, "table");
         $desc = strip_tag ($desc, "tbody");
         $desc = strip_tag ($desc, "t[hrd]");
+        $desc = strip_tag ($desc, "col");
+        $desc = strip_tag ($desc, "colgroup");
       }
     if ($textarea != 1)
       {



reply via email to

[Prev in Thread] Current Thread [Next in Thread]