[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
www/server/source/planetrss changelog planetrss.pl
From: |
Shailesh Ghadge |
Subject: |
www/server/source/planetrss changelog planetrss.pl |
Date: |
Tue, 26 Apr 2011 17:59:11 +0000 |
CVSROOT: /web/www
Module name: www
Changes by: Shailesh Ghadge <shailesh_ghadge> 11/04/26 17:59:11
Modified files:
server/source/planetrss: changelog planetrss.pl
Log message:
PlanetRSS version 1.3
- User control over output path
Option: -path=path
- User control over FeedLength & FeedLines
Option: -FeedLines=n 'n' is number of Feed lines
Option: -FeedLength= m 'm' is the length of each feed
- Predict & Resolve <a href> tag bursting
- Added -help & -version
Command: perl planetrss.pl -help
perl planetrss.pl -version
- User control over Forced writing
Option: -f
CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/changelog?cvsroot=www&r1=1.2&r2=1.3
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/planetrss.pl?cvsroot=www&r1=1.12&r2=1.13
Patches:
Index: changelog
===================================================================
RCS file: /web/www/www/server/source/planetrss/changelog,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- changelog 14 Apr 2011 06:39:49 -0000 1.2
+++ changelog 26 Apr 2011 17:59:04 -0000 1.3
@@ -1,3 +1,16 @@
+Version 1.3 - 27 April 2011
+- User control over output path
+ Option: -path="\path"
+- User control over FeedLength & FeedLines
+ Option: -FeedLines=n 'n' is number of Feed lines
+ Option: -FeedLength= m 'm' is the length of each feed
+- Predict & Resolve <a href> tag bursting
+- Added -help & -version
+ Command: perl planetrss.pl -help
+ perl planetrss.pl -version
+- User control over Forced writing
+ Option: -f
+-----------------------------------------------------------------------------------------------
Version 1.2 - 14 April 2011
- User control over tag removals in Feed Description
- <a href> will not be removed by default.
@@ -7,10 +20,10 @@
- To stop removal of a tag:
command: perl planetrss.pl -tag=1
replace tag in above by one of these: a , b , code , div , em , h
, hr , i , img , p , pre , strong , table , textarea , tt , ul
-
+-----------------------------------------------------------------------------------------------
Version 1.1 - 26 Mar 2011
Added Checker code, to first check previously retrieved feeds(if any) and only
then proceed to write to planetfeeds.html if required.
-
+-----------------------------------------------------------------------------------------------
Version 1.0 - 20 Mar 2011
The Perl script fetches & saves 'n' feeds from planet.gnu.org using RSS feed
link http://planet.gnu.org/rss20.xml in html format.
Each feed is truncated to 'm' characters.
Index: planetrss.pl
===================================================================
RCS file: /web/www/www/server/source/planetrss/planetrss.pl,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -b -r1.12 -r1.13
--- planetrss.pl 25 Apr 2011 21:47:57 -0000 1.12
+++ planetrss.pl 26 Apr 2011 17:59:04 -0000 1.13
@@ -1,4 +1,4 @@
- # PlanetRSS, Version 1.2
+ # PlanetRSS, Version 1.3
# Copyright © 2011 Shailesh Ghadge
#This program is free software: you can redistribute it and/or modify
@@ -14,12 +14,16 @@
#You should have received a copy of the GNU General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
- #Email: address@hidden #Version Date: 14 Apr 2011
+ #Email: address@hidden #Version Date: 27 Apr 2011
#
#Functionality: Compare with previously retrieved feeds(if any) and
then if required,
# Fetch & save 'n' feeds from planet.gnu.org using RSS
feed link http://planet.gnu.org/rss20.xml in html format
# Each feed is truncated to 'm' characters.
- # User control over removal of html tags
+ # User control over:-
+ # 1.retaining/removal of html tags
+ # 2.number of Feeds & Feed length
+ # 3.output path
+ # 4.forced write
#--------------------------------------------
@@ -32,7 +36,7 @@
use Getopt::Long;
#Provides arguement handling
#---------------------------------------------
- my $FeedLines = 4; # 'n' feeds
+ my $FeedLines = 3; # 'n' feeds
my $FeedLength = 200; # 'm' characters
my $PGfeeds = get("http://planet.gnu.org/rss20.xml");
@@ -41,8 +45,94 @@
#Options for Sanitization (value 1 implies tag will not be stripped,
any other value implies tag will be stripped)
my $a = 1; my $b = 0; my $code = 0; my $div = 0; my $em = 0; my $h =
0; my $hr = 0; my $i = 0; my $img = 0; my $p = 0; my $pre = 0; my $strong = 0;
my $table = 0; my $textarea = 0; my $tt = 0; my $ul = 0;
+
+ #Options
+ my $help; my $version; my $PGpath = "planetfeeds.html"; my $f;
+
+ #Set values as per agruements
GetOptions("a=i" => \$a, "b=i" => \$b, "code=i" => \$code, "div=i" =>
\$div, "em=i" => \$em, "h=i" => \$h, "hr=i" => \$hr, "i=i" => \$i,"img=i" =>
\$img,
- "p=i" => \$p, "pre=i" => \$pre, "strong=i" => \$strong,
"table=i" => \$table, "textarea=i" => \$textarea, "tt=i" => \$tt, "ul=i" =>
\$ul);
+ "p=i" => \$p, "pre=i" => \$pre, "strong=i" => \$strong,
"table=i" => \$table, "textarea=i" => \$textarea, "tt=i" => \$tt, "ul=i" =>
\$ul,
+ "FeedLines=i" => \$FeedLines, "FeedLength=i" =>
\$FeedLength, "help" => \$help, "version" => \$version, "path=s" => \$PGpath,
+ "f" => \$f);
+
+ #------------------------------Help-------------------------------
+ if($help)
+ {
+ print "
+
+Usage: perl planetrss.pl [-options]
+------------------------------------------------------------------------------
+Defaults:
+ Number of Feeds= 3,
+ Length of Feed = 200,
+ except 'a' tag, all above tags are removed.
+
+Feed control options:
+ -FeedLines=n 'n' is the number of Feeds
+ -FeedLength=m 'm' is the length of Feed
+
+Force Write:
+ -f Overwrites existing outputfile (even if the
latest feed from RSS & First feed of Previous outputfile is same)
+
+Help:
+ -help
+
+Output Path:
+ -path=\"/path\" Set the output path,
+ eg: -path=\"/www/planetfeeds.html\"
+ -path=\"../www/\"
+ -path=\"../www\"
+
+Tag preserve options:
+ -a=1 a href tag will not be removed
+ -b=1 b tag will not be removed
+ -code=1 code tag will not be removed
+ -div=1 div tag will not be removed
+ -em=1 em tag will not be removed
+ -h=1 h tag will not be removed
+ -hr=1 hr tag will not be removed
+ -i=1 tag will not be removed
+ -img=1 img tag will not be removed
+ -p=1 p tag will not be removed
+ -pre=1 pre tag will not be removed
+ -strong=1 strong tag will not be removed
+ -table=1 table,tr,th tags will not be removed
+ -textarea=1 textarea tag will not be removed
+ -tt=1 tt tag will not be removed
+ -ul=1 ul li tags will not be removed
+
+Tag removal options:
+ Syntax same as in 'tag perserve'.
+ Set value to 0 or any number other than 1.
+
+Version Info:
+ -version
+------------------------------------------------------------------------------
+Some Examples:
+ perl planetrss.pl -f -FeedLines=7 -FeedLength=500
-path=\"../www/planetfeeds.html\"
+ perl planetrss.pl -version
+ perl planetrss.pl -help
+ perl planetrss.pl -i=1 -hr=1 -a=0
+
+\n";
+ exit;
+ }
+ #-------------------------------End Help-----------------------------
+
+ #-------------------------------Version------------------------------
+ if($version)
+ {
+ print "
+------------------------------------------------------------------
+ PlanetRSS, Version 1.3
+ Copyright © 2011 Shailesh Ghadge
+ License: GPLv3 Contact: address@hidden
+ Version: 1.3 Version released on: 27 April 2011
+------------------------------------------------------------------\n";
+ exit;
+ }
+ #------------------------------End Version--------------------------
+
my $PGparser = new XML::RSS::Parser::Lite;
#Create new RSS parser
@@ -50,10 +140,26 @@
$PGparser->parse($PGfeeds);
#To Parse the supplied xml
- #-------------------Check----------------------
+ #-----------------------------Path Check----------------------------
+ if(-d $PGpath)
+ {
+ if(substr($PGpath,length($PGpath)-1) eq "/")
+ {
+ $PGpath=$PGpath."planetfeeds.html";
+ }
+ else
+ {
+ $PGpath=$PGpath."\/planetfeeds.html";
+ }
+ }
+ #-------------------------------------------------------------------
my $Write2File = 1; #Default: We write to PlanetFeeds.html;
+ #--------------------------------Check------------------------------
+ if(!$f) # If force write flag is set, then no need to check
+ {
my $CompareFeeds = 1;
- open (CurPGhtml, 'planetfeeds.html') || $CompareFeeds--;
+ #open (CurPGhtml, 'planetfeeds.html') || $CompareFeeds--;
+ open (CurPGhtml, $PGpath) || $CompareFeeds--;
if($CompareFeeds == 1)
{
my @Cur_Content = <CurPGhtml>;
@@ -76,6 +182,7 @@
#Decide whether to continue & write PGhtml
}
close(CurPGhtml);
+ }
#-------------------------End of Check-----------------------------
#print "content-type: text/html \n";
@@ -85,7 +192,8 @@
if($Write2File==1)
{
my $PGhead= "<!-- Autogenerated File by planetrss.pl
http://web.cvs.savannah.gnu.org/viewvc/www/server/source/planetrss/?root=www
-->";
- open (PGhtml, '>planetfeeds.html');
+ #open (PGhtml, '>planetfeeds.html');
+ open (PGhtml, '>'.$PGpath);
print PGhtml $PGhead;
#Print Feeds data in the format of- "Title - Description... <a
href='URL'>more</a>"
for (my $i = 0; $i < $FeedLines; $i++)
@@ -105,7 +213,7 @@
else
{
#Sanitize Description
- $PGdesc=~ s/<(.*?)>//gi;
+ #$PGdesc=~ s/<(.*?)>//gi;
$PGdesc=~ s/<br \/>//gi; $PGdesc
=~ s/\s\s+/ /g;#remove whitespace
if($a!=1)
{
@@ -177,16 +285,38 @@
$PGdesc=~ s/<ul(.*?)>//gi;
$PGdesc=~ s/<\/ul>//gi;
$PGdesc=~ s/<li(.*?)>//gi;
$PGdesc=~ s/<\/li>//gi;
}
- $PGdesc=~ s/</</gi; $PGdesc=~ s/>/>/gi;
+ $PGdesc=~ s/</</gi; $PGdesc=~ s/>/>/gi;
$PGdesc=~ s/&lt;/</gi; $PGdesc=~ s/&gt;/>/gi;
$PGdesc=~ s/"/"/gi;
#------End of Sanitization
+ #Predict & resolve 'a' tag breaking
+ if($a==1) # If a tags are included
+ {
+ $PGdesc_front = substr($PGdesc,
0,($FeedLength-(10+length($PGtitle))));
+ $PGdesc_rear = substr($PGdesc,
($FeedLength-(10+length($PGtitle))));
+
if(substr($PGdesc_front,($FeedLength-(10+length($PGtitle))-1)) eq "<") #Fix for
line cut at '<'
+ {
+ $PGdesc_front = substr($PGdesc,
0,($FeedLength-(10+length($PGtitle)))+1);
+ $PGdesc_rear = substr($PGdesc,
($FeedLength-(10+length($PGtitle)))+1);
+ }
+
+ while ($PGdesc_front =~ /<a/gi) {
$start_a++ }
+ while ($PGdesc_front =~ /<\/a>/gi) {
$end_a++ }
+
+ if($start_a != $end_a)
+ {
+ $PGdesc_front =
$PGdesc_front.substr($PGdesc_rear,0,index($PGdesc_rear,'</a>')+4);
+ }
+ $PGdesc=$PGdesc_front;
+ }
+ else # If a tags are removed
+ {
#Truncate Description
$PGdesc = substr($PGdesc,
0,($FeedLength-(10+length($PGtitle)))); #10 characters removed for ': ' and
'... more'
+ }
#Output
-
print PGhtml "<p><a
href='".$PGurl."'>".$PGtitle ."</a>: ".$PGdesc. "... <a
href='".$PGurl."'>more</a></p>\n";
}
}