www/server/source/planetrss changelog planetrss.pl

www-commits
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
www/server/source/planetrss changelog planetrss.pl

From:	Shailesh Ghadge
Subject:	www/server/source/planetrss changelog planetrss.pl
Date:	Thu, 14 Apr 2011 06:41:16 +0000
CVSROOT:        /web/www
Module name:    www
Changes by:     Shailesh Ghadge <shailesh_ghadge>       11/04/14 06:41:16

Modified files:
        server/source/planetrss: changelog planetrss.pl 

Log message:
        Added feature for user control over tag removal from feed description

CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/changelog?cvsroot=www&r1=1.1&r2=1.2
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/planetrss.pl?cvsroot=www&r1=1.8&r2=1.9

Patches:
Index: changelog
===================================================================
RCS file: /web/www/www/server/source/planetrss/changelog,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- changelog   26 Mar 2011 16:04:25 -0000      1.1
+++ changelog   14 Apr 2011 06:39:49 -0000      1.2
@@ -1,3 +1,13 @@
+Version 1.2 - 14 April 2011
+- User control over tag removals in Feed Description
+- <a href> will not be removed by default.
+- To remove 'a' html tags:
+       command: perl planetrss.pl -a=0
+- Other tags are removed from description by default
+- To stop removal of a tag:
+       command: perl planetrss.pl -tag=1
+       replace tag in above by one of these:  a ,  b , code ,  div ,  em ,  h 
,   hr ,  i ,  img ,  p ,  pre ,  strong , table ,  textarea ,  tt ,  ul 
+
 Version 1.1 - 26 Mar 2011
 Added Checker code, to first check previously retrieved feeds(if any) and only 
then proceed to write to planetfeeds.html if required.
 

Index: planetrss.pl
===================================================================
RCS file: /web/www/www/server/source/planetrss/planetrss.pl,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -b -r1.8 -r1.9
--- planetrss.pl        6 Apr 2011 17:45:13 -0000       1.8
+++ planetrss.pl        14 Apr 2011 06:39:49 -0000      1.9
@@ -1,4 +1,4 @@
-       #                  PlanetRSS, Version 1.1       
+       #                  PlanetRSS, Version 1.2       
        #               Copyright Â© 2011 Shailesh Ghadge
 
        #This program is free software: you can redistribute it and/or modify
@@ -14,11 +14,12 @@
        #You should have received a copy of the GNU General Public License
        #along with this program.  If not, see <http://www.gnu.org/licenses/>.
                
-       #Email: address@hidden  #Date: 26 Mar 2011
+       #Email: address@hidden  #Version Date: 14 Apr 2011
        #
        #Functionality: Compare with previously retrieved feeds(if any) and 
then if required,
        #               Fetch & save 'n' feeds from planet.gnu.org using RSS 
feed link http://planet.gnu.org/rss20.xml in html format
        #               Each feed is truncated to 'm' characters.
+       #               User control over removal of html tags
        
        
        #--------------------------------------------
@@ -28,6 +29,8 @@
        use LWP::Simple;
        #Provides get(url) function
          
+       use Getopt::Long;
+       #Provides arguement handling 
        #---------------------------------------------  
        my $FeedLines = 3;      # 'n' feeds
        my $FeedLength = 200;   # 'm' characters 
@@ -35,6 +38,12 @@
        my $PGfeeds = get("http://planet.gnu.org/rss20.xml";);
        #Fetch RSS feeds as xml
        
+       #Options for Sanitization (value 1 implies tag will not be stripped, 
any other value implies tag will be stripped)
+       my $a = 1; my $b = 0;  my $code = 0; my $div = 0; my $em = 0; my $h = 
0;  my $hr = 0; my $i = 0; my $img = 0; my $p = 0; my $pre = 0; my $strong = 0; 
 
+       my $table = 0; my $textarea = 0; my $tt = 0; my $ul = 0;
+       GetOptions("a=i" => \$a, "b=i" => \$b,  "code=i" => \$code, "div=i" => 
\$div, "em=i" => \$em,  "h=i" => \$h, "hr=i" => \$hr, "i=i" => \$i,"img=i" => 
\$img, 
+                  "p=i" => \$p, "pre=i" => \$pre, "strong=i" => \$strong,  
"table=i" => \$table, "textarea=i" => \$textarea, "tt=i" => \$tt, "ul=i" => 
\$ul);  
+
         my $PGparser = new XML::RSS::Parser::Lite;
        #Create new RSS parser
        
@@ -75,8 +84,9 @@
        #---------------------To Create/Overwrite PlanetFeeds.html-----------
        if($Write2File==1)
        {
-               
+               my $PGhead= "<!-- Autogenerated File by planetrss.pl 
http://web.cvs.savannah.gnu.org/viewvc/www/server/source/planetrss/?root=www 
-->";
                open (PGhtml, '>planetfeeds.html');
+               print PGhtml $PGhead;
                #Print Feeds data in the format of- "Title - Description... <a 
href='URL'>more</a>"
                for (my $i = 0; $i < $FeedLines; $i++) 
                {               
@@ -94,16 +104,89 @@
                        }
                        else
                        {               
-                               #Sanitize Description (using Regex method after 
failure with scrubber,strip.. )                 
-                               $PGdesc=~ s/<(.*?)>//gi;        $PGdesc=~ 
s/&lt;a(.*?)&gt;//gi;         $PGdesc=~ s/&lt;\/a&gt;//gi;
-                               $PGdesc=~ s/&lt;p&gt;//gi;      $PGdesc=~ 
s/&lt;\/p&gt;//gi;            $PGdesc=~ s/&lt;li&gt;//gi;
-                               $PGdesc=~ s/&lt;ul&gt;//gi;     $PGdesc=~ 
s/&lt;br \/&gt;//gi;          $PGdesc =~ s/\s\s+/ /g;#remove whitespace
-                               #Have kept separate expressions now for easy 
debugging in case of error
+                               #Sanitize Description                   
+                               $PGdesc=~ s/<(.*?)>//gi;        
+                               $PGdesc=~ s/&lt;br \/&gt;//gi;          $PGdesc 
=~ s/\s\s+/ /g;#remove whitespace
+                               if($a!=1)
+                               {                               
+                                       $PGdesc=~ s/&lt;a(.*?)&gt;//gi;         
$PGdesc=~ s/&lt;\/a&gt;//gi;
+                               }
+                               if($b!=1)
+                               {
+                                       $PGdesc=~ s/&lt;b&gt;//gi;      
$PGdesc=~ s/&lt;\/b&gt;//gi;
+                               }
+                               if($code!=1)
+                               {
+                                       $PGdesc=~ s/&lt;code&gt;//gi;   
$PGdesc=~ s/&lt;\/code&gt;//gi;
+                               }
+                               if($div!=1)
+                               {
+                                       $PGdesc=~ s/&lt;div(.*?)&gt;//gi;       
$PGdesc=~ s/&lt;\/div&gt;//gi; 
+                               }                               
+                               if($em!=1)
+                               {
+                                       $PGdesc=~ s/&lt;em&gt;//gi;     
$PGdesc=~ s/&lt;\/em&gt;//gi;
+                               }       
+                               if($i!=1)
+                               {
+                                       $PGdesc=~ s/&lt;i&gt;//gi;      
$PGdesc=~ s/&lt;\/i&gt;//gi;
+                               }
+                                                       
+                               if($img!=1)
+                               {
+                                       $PGdesc=~ s/&lt;img(.*?)&gt;//gi;       
$PGdesc=~ s/&lt;\/img&gt;//gi;
+                               }
+                               if($h!=1)
+                               {
+                                       $PGdesc=~ s/&lt;h(.*?)&gt;//gi; 
$PGdesc=~ s/&lt;\/h(.*?)&gt;//gi;
+                               }
+                               
+                               if($hr!=1)
+                               {
+                                       $PGdesc=~ s/&lt;hr&gt;//gi;     
$PGdesc=~ s/&lt;\/hr&gt;//gi;
+                               }                               
+                               if($p!=1)
+                               {                               
+                                       $PGdesc=~ s/&lt;p&gt;//gi;      
$PGdesc=~ s/&lt;\/p&gt;//gi;
+                               }
+                               if($pre!=1)
+                               {
+                                       $PGdesc=~ s/&lt;pre&gt;//gi;    
$PGdesc=~ s/&lt;\/pre&gt;//gi;
+                               }
+                               if($strong!=1)
+                               {
+                                       $PGdesc=~ s/&lt;strong&gt;//gi; 
$PGdesc=~ s/&lt;\/strong&gt;//gi;
+                               }
+                               if($table!=1)
+                               {                               
+                                       $PGdesc=~ s/&lt;table(.*?)&gt;//gi;     
$PGdesc=~ s/&lt;\/table&gt;//gi;
+                                       $PGdesc=~ s/&lt;tr(.*?)&gt;//gi;        
$PGdesc=~ s/&lt;\/tr&gt;//gi;
+                                       $PGdesc=~ s/&lt;th(.*?)&gt;//gi;        
$PGdesc=~ s/&lt;\/th&gt;//gi;
+                               }
+                               if($textarea!=1)
+                               {                               
+                                       $PGdesc=~ s/&lt;textarea(.*?)&gt;//gi;  
$PGdesc=~ s/&lt;\/textarea&gt;//gi;
+                                       $PGdesc=~ s/<textarea(.*?)>//gi;        
$PGdesc=~ s/<\/textarea>//gi;
+                               }
+                               if($tt!=1)
+                               {
+                                       $PGdesc=~ s/&lt;tt&gt;//gi;     
$PGdesc=~ s/&lt;\/tt&gt;//gi;
+                               }                               
+                               if($ul!=1)
+                               {                               
+                                       $PGdesc=~ s/&lt;ul(.*?)&gt;//gi;        
$PGdesc=~ s/&lt;\/ul&gt;//gi;
+                                       $PGdesc=~ s/&lt;li(.*?)&gt;//gi;        
$PGdesc=~ s/&lt;\/li&gt;//gi;
+                               }
+                               $PGdesc=~ s/&lt;/</gi;  $PGdesc=~ s/&gt;/>/gi;
+                               $PGdesc=~ s/&quot;/"/gi;
+                               #------End of Sanitization
+                       
                        
                                #Truncate Description
                                $PGdesc = substr($PGdesc, 
0,($FeedLength-(10+length($PGtitle)))); #10 characters removed for ': ' and 
'... more'
                        
                                #Output                                 
+                               
                                print PGhtml "<p><a 
href='".$PGurl."'>".$PGtitle ."</a>: ".$PGdesc. "... <a 
href='".$PGurl."'>more</a></p>\n";
                        }
                }
[Prev in Thread]
Current Thread
[Next in Thread]
www/server/source/planetrss changelog planetrss.pl, Shailesh Ghadge <=
- www/server/source/planetrss changelog planetrss.pl, Shailesh Ghadge, 2011/04/26
Prev by Date: www/people past-webmasters.html
Next by Date: www/server 98whatsnew.html
Previous by thread: www/philosophy amazon.it.html free-software-for...
Next by thread: www/server/source/planetrss changelog planetrss.pl
Index(es):
- Date
- Thread