www-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

www/server/source/planetrss changelog planetrss.pl


From: Shailesh Ghadge
Subject: www/server/source/planetrss changelog planetrss.pl
Date: Tue, 26 Apr 2011 17:59:11 +0000

CVSROOT:        /web/www
Module name:    www
Changes by:     Shailesh Ghadge <shailesh_ghadge>       11/04/26 17:59:11

Modified files:
        server/source/planetrss: changelog planetrss.pl 

Log message:
        PlanetRSS version 1.3
        - User control over output path
             Option:  -path=path
        - User control over FeedLength & FeedLines
             Option: -FeedLines=n         'n' is number of Feed lines
             Option: -FeedLength= m       'm' is the length of each feed
        - Predict & Resolve <a href> tag bursting
        - Added -help & -version
             Command: perl planetrss.pl -help
                      perl planetrss.pl -version
        - User control over Forced writing
             Option: -f

CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/changelog?cvsroot=www&r1=1.2&r2=1.3
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/planetrss.pl?cvsroot=www&r1=1.12&r2=1.13

Patches:
Index: changelog
===================================================================
RCS file: /web/www/www/server/source/planetrss/changelog,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- changelog   14 Apr 2011 06:39:49 -0000      1.2
+++ changelog   26 Apr 2011 17:59:04 -0000      1.3
@@ -1,3 +1,16 @@
+Version 1.3 - 27 April 2011
+- User control over output path
+       Option:  -path="\path"
+- User control over FeedLength & FeedLines
+       Option: -FeedLines=n            'n' is number of Feed lines
+       Option: -FeedLength= m          'm' is the length of each feed
+- Predict & Resolve <a href> tag bursting
+- Added -help & -version
+       Command: perl planetrss.pl -help
+                perl planetrss.pl -version
+- User control over Forced writing
+       Option: -f
+-----------------------------------------------------------------------------------------------
 Version 1.2 - 14 April 2011
 - User control over tag removals in Feed Description
 - <a href> will not be removed by default.
@@ -7,10 +20,10 @@
 - To stop removal of a tag:
        command: perl planetrss.pl -tag=1
        replace tag in above by one of these:  a ,  b , code ,  div ,  em ,  h 
,   hr ,  i ,  img ,  p ,  pre ,  strong , table ,  textarea ,  tt ,  ul 
-
+-----------------------------------------------------------------------------------------------
 Version 1.1 - 26 Mar 2011
 Added Checker code, to first check previously retrieved feeds(if any) and only 
then proceed to write to planetfeeds.html if required.
-
+-----------------------------------------------------------------------------------------------
 Version 1.0 - 20 Mar 2011
 The Perl script fetches & saves 'n' feeds from planet.gnu.org using RSS feed 
link http://planet.gnu.org/rss20.xml in html format.
 Each feed is truncated to 'm' characters.

Index: planetrss.pl
===================================================================
RCS file: /web/www/www/server/source/planetrss/planetrss.pl,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -b -r1.12 -r1.13
--- planetrss.pl        25 Apr 2011 21:47:57 -0000      1.12
+++ planetrss.pl        26 Apr 2011 17:59:04 -0000      1.13
@@ -1,4 +1,4 @@
-       #                  PlanetRSS, Version 1.2       
+       #                  PlanetRSS, Version 1.3       
        #               Copyright © 2011 Shailesh Ghadge
 
        #This program is free software: you can redistribute it and/or modify
@@ -14,12 +14,16 @@
        #You should have received a copy of the GNU General Public License
        #along with this program.  If not, see <http://www.gnu.org/licenses/>.
                
-       #Email: address@hidden  #Version Date: 14 Apr 2011
+       #Email: address@hidden  #Version Date: 27 Apr 2011
        #
        #Functionality: Compare with previously retrieved feeds(if any) and 
then if required,
        #               Fetch & save 'n' feeds from planet.gnu.org using RSS 
feed link http://planet.gnu.org/rss20.xml in html format
        #               Each feed is truncated to 'm' characters.
-       #               User control over removal of html tags
+       #               User control over:- 
+       #                       1.retaining/removal of html tags
+       #                       2.number of Feeds & Feed length
+       #                       3.output path
+       #                       4.forced write
        
        
        #--------------------------------------------
@@ -32,7 +36,7 @@
        use Getopt::Long;
        #Provides arguement handling 
        #---------------------------------------------  
-       my $FeedLines = 4;      # 'n' feeds
+       my $FeedLines = 3;      # 'n' feeds
        my $FeedLength = 200;   # 'm' characters 
                
        my $PGfeeds = get("http://planet.gnu.org/rss20.xml";);
@@ -41,8 +45,94 @@
        #Options for Sanitization (value 1 implies tag will not be stripped, 
any other value implies tag will be stripped)
        my $a = 1; my $b = 0;  my $code = 0; my $div = 0; my $em = 0; my $h = 
0;  my $hr = 0; my $i = 0; my $img = 0; my $p = 0; my $pre = 0; my $strong = 0; 
 
        my $table = 0; my $textarea = 0; my $tt = 0; my $ul = 0;
+       
+       #Options
+       my $help; my $version; my $PGpath = "planetfeeds.html"; my $f;
+
+       #Set values as per agruements
        GetOptions("a=i" => \$a, "b=i" => \$b,  "code=i" => \$code, "div=i" => 
\$div, "em=i" => \$em,  "h=i" => \$h, "hr=i" => \$hr, "i=i" => \$i,"img=i" => 
\$img, 
-                  "p=i" => \$p, "pre=i" => \$pre, "strong=i" => \$strong,  
"table=i" => \$table, "textarea=i" => \$textarea, "tt=i" => \$tt, "ul=i" => 
\$ul);  
+                  "p=i" => \$p, "pre=i" => \$pre, "strong=i" => \$strong,  
"table=i" => \$table, "textarea=i" => \$textarea, "tt=i" => \$tt, "ul=i" => 
\$ul, 
+                  "FeedLines=i" => \$FeedLines, "FeedLength=i" => 
\$FeedLength, "help" => \$help, "version" => \$version, "path=s"   => \$PGpath,
+                  "f" => \$f); 
+               
+       #------------------------------Help-------------------------------
+       if($help)
+       {
+               print "
+
+Usage: perl planetrss.pl [-options]
+------------------------------------------------------------------------------
+Defaults: 
+       Number of Feeds= 3, 
+       Length of Feed = 200,
+       except 'a' tag, all above tags are removed.
+
+Feed control options:
+       -FeedLines=n            'n' is the number of Feeds
+       -FeedLength=m           'm' is the length of Feed
+
+Force Write:
+       -f                      Overwrites existing outputfile (even if the 
latest feed from RSS & First feed of Previous outputfile is same)
+
+Help:
+       -help
+
+Output Path:
+       -path=\"/path\"         Set the output path, 
+                               eg: -path=\"/www/planetfeeds.html\"
+                                   -path=\"../www/\"
+                                   -path=\"../www\"
+
+Tag preserve options:
+       -a=1            a href tag will not be removed
+       -b=1            b tag will not be removed
+       -code=1         code tag will not be removed
+       -div=1          div tag will not be removed
+       -em=1           em tag will not be removed
+       -h=1            h tag will not be removed
+       -hr=1           hr tag will not be removed
+       -i=1            tag will not be removed
+       -img=1          img tag will not be removed
+       -p=1            p tag will not be removed
+       -pre=1          pre tag will not be removed
+       -strong=1       strong tag will not be removed
+       -table=1        table,tr,th tags will not be removed
+       -textarea=1     textarea tag will not be removed
+       -tt=1           tt tag will not be removed
+       -ul=1           ul li tags will not be removed
+
+Tag removal options:
+       Syntax same as in 'tag perserve'.
+       Set value to 0 or any number other than 1.
+
+Version Info:
+       -version
+------------------------------------------------------------------------------
+Some Examples:
+       perl planetrss.pl -f -FeedLines=7 -FeedLength=500 
-path=\"../www/planetfeeds.html\"
+       perl planetrss.pl -version
+       perl planetrss.pl -help
+       perl planetrss.pl -i=1 -hr=1 -a=0
+       
+\n";
+               exit;
+       }
+       #-------------------------------End Help-----------------------------
+
+       #-------------------------------Version------------------------------
+       if($version)
+       {
+               print "
+------------------------------------------------------------------
+                  PlanetRSS, Version 1.3       
+               Copyright © 2011 Shailesh Ghadge
+       License: GPLv3          Contact: address@hidden
+       Version: 1.3            Version released on: 27 April 2011
+------------------------------------------------------------------\n";
+               exit;
+       }
+       #------------------------------End Version--------------------------
+
 
         my $PGparser = new XML::RSS::Parser::Lite;
        #Create new RSS parser
@@ -50,10 +140,26 @@
        $PGparser->parse($PGfeeds);
        #To Parse the supplied xml
 
-       #-------------------Check----------------------
+       #-----------------------------Path Check----------------------------
+       if(-d $PGpath)
+       {
+               if(substr($PGpath,length($PGpath)-1) eq "/")
+               {
+                       $PGpath=$PGpath."planetfeeds.html";
+               }
+               else
+               {
+                       $PGpath=$PGpath."\/planetfeeds.html";
+               }
+       }
+       #-------------------------------------------------------------------
        my $Write2File = 1; #Default: We write to PlanetFeeds.html;
+       #--------------------------------Check------------------------------
+       if(!$f)         # If force write flag is set, then no need to check
+       {               
        my $CompareFeeds = 1;
-       open (CurPGhtml, 'planetfeeds.html') || $CompareFeeds--;
+               #open (CurPGhtml, 'planetfeeds.html') || $CompareFeeds--;
+               open (CurPGhtml, $PGpath) || $CompareFeeds--;
        if($CompareFeeds == 1)
        {
                my @Cur_Content = <CurPGhtml>;
@@ -76,6 +182,7 @@
                #Decide whether to continue & write PGhtml
        }
        close(CurPGhtml);
+       }
        #-------------------------End of Check-----------------------------
 
         #print "content-type: text/html \n";
@@ -85,7 +192,8 @@
        if($Write2File==1)
        {
                my $PGhead= "<!-- Autogenerated File by planetrss.pl 
http://web.cvs.savannah.gnu.org/viewvc/www/server/source/planetrss/?root=www 
-->";
-               open (PGhtml, '>planetfeeds.html');
+               #open (PGhtml, '>planetfeeds.html');
+               open (PGhtml, '>'.$PGpath);
                print PGhtml $PGhead;
                #Print Feeds data in the format of- "Title - Description... <a 
href='URL'>more</a>"
                for (my $i = 0; $i < $FeedLines; $i++) 
@@ -105,7 +213,7 @@
                        else
                        {               
                                #Sanitize Description                   
-                               $PGdesc=~ s/<(.*?)>//gi;        
+                               #$PGdesc=~ s/<(.*?)>//gi;       
                                $PGdesc=~ s/&lt;br \/&gt;//gi;          $PGdesc 
=~ s/\s\s+/ /g;#remove whitespace
                                if($a!=1)
                                {                               
@@ -177,16 +285,38 @@
                                        $PGdesc=~ s/&lt;ul(.*?)&gt;//gi;        
$PGdesc=~ s/&lt;\/ul&gt;//gi;
                                        $PGdesc=~ s/&lt;li(.*?)&gt;//gi;        
$PGdesc=~ s/&lt;\/li&gt;//gi;
                                }
-                               $PGdesc=~ s/&lt;/</gi;  $PGdesc=~ s/&gt;/>/gi;
+                               $PGdesc=~ s/&lt;/</gi;  $PGdesc=~ s/&gt;/>/gi;  
$PGdesc=~ s/&amp;lt;/</gi;      $PGdesc=~ s/&amp;gt;/>/gi;
                                $PGdesc=~ s/&quot;/"/gi;
                                #------End of Sanitization
                        
+                               #Predict & resolve 'a' tag breaking
+                               if($a==1) # If a tags are included
+                               {
+                                       $PGdesc_front = substr($PGdesc, 
0,($FeedLength-(10+length($PGtitle))));
+                                       $PGdesc_rear = substr($PGdesc, 
($FeedLength-(10+length($PGtitle))));
+                                       
if(substr($PGdesc_front,($FeedLength-(10+length($PGtitle))-1)) eq "<") #Fix for 
line cut at '<'
+                                       {
+                                               $PGdesc_front = substr($PGdesc, 
0,($FeedLength-(10+length($PGtitle)))+1);
+                                               $PGdesc_rear = substr($PGdesc, 
($FeedLength-(10+length($PGtitle)))+1);
+                                       }                                       
+                                        
+                                       while ($PGdesc_front =~ /<a/gi) { 
$start_a++ }
+                                       while ($PGdesc_front =~ /<\/a>/gi) { 
$end_a++ }
+                                       
+                                       if($start_a != $end_a)
+                                       {
+                                               $PGdesc_front = 
$PGdesc_front.substr($PGdesc_rear,0,index($PGdesc_rear,'</a>')+4);
+                                       }
+                                       $PGdesc=$PGdesc_front;                  
                
                        
+                               }
+                               else # If a tags are removed
+                               {
                                #Truncate Description
                                $PGdesc = substr($PGdesc, 
0,($FeedLength-(10+length($PGtitle)))); #10 characters removed for ': ' and 
'... more'
+                               }
                                
                                #Output 
-                               
                                print PGhtml "<p><a 
href='".$PGurl."'>".$PGtitle ."</a>: ".$PGdesc. "... <a 
href='".$PGurl."'>more</a></p>\n";
                        }
                }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]