[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
www/server/source/planetrss changelog planetrss.pl
From: |
Shailesh Ghadge |
Subject: |
www/server/source/planetrss changelog planetrss.pl |
Date: |
Thu, 14 Apr 2011 06:41:16 +0000 |
CVSROOT: /web/www
Module name: www
Changes by: Shailesh Ghadge <shailesh_ghadge> 11/04/14 06:41:16
Modified files:
server/source/planetrss: changelog planetrss.pl
Log message:
Added feature for user control over tag removal from feed description
CVSWeb URLs:
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/changelog?cvsroot=www&r1=1.1&r2=1.2
http://web.cvs.savannah.gnu.org/viewcvs/www/server/source/planetrss/planetrss.pl?cvsroot=www&r1=1.8&r2=1.9
Patches:
Index: changelog
===================================================================
RCS file: /web/www/www/server/source/planetrss/changelog,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -b -r1.1 -r1.2
--- changelog 26 Mar 2011 16:04:25 -0000 1.1
+++ changelog 14 Apr 2011 06:39:49 -0000 1.2
@@ -1,3 +1,13 @@
+Version 1.2 - 14 April 2011
+- User control over tag removals in Feed Description
+- <a href> will not be removed by default.
+- To remove 'a' html tags:
+ command: perl planetrss.pl -a=0
+- Other tags are removed from description by default
+- To stop removal of a tag:
+ command: perl planetrss.pl -tag=1
+ replace tag in above by one of these: a , b , code , div , em , h
, hr , i , img , p , pre , strong , table , textarea , tt , ul
+
Version 1.1 - 26 Mar 2011
Added Checker code, to first check previously retrieved feeds(if any) and only
then proceed to write to planetfeeds.html if required.
Index: planetrss.pl
===================================================================
RCS file: /web/www/www/server/source/planetrss/planetrss.pl,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -b -r1.8 -r1.9
--- planetrss.pl 6 Apr 2011 17:45:13 -0000 1.8
+++ planetrss.pl 14 Apr 2011 06:39:49 -0000 1.9
@@ -1,4 +1,4 @@
- # PlanetRSS, Version 1.1
+ # PlanetRSS, Version 1.2
# Copyright © 2011 Shailesh Ghadge
#This program is free software: you can redistribute it and/or modify
@@ -14,11 +14,12 @@
#You should have received a copy of the GNU General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
- #Email: address@hidden #Date: 26 Mar 2011
+ #Email: address@hidden #Version Date: 14 Apr 2011
#
#Functionality: Compare with previously retrieved feeds(if any) and
then if required,
# Fetch & save 'n' feeds from planet.gnu.org using RSS
feed link http://planet.gnu.org/rss20.xml in html format
# Each feed is truncated to 'm' characters.
+ # User control over removal of html tags
#--------------------------------------------
@@ -28,6 +29,8 @@
use LWP::Simple;
#Provides get(url) function
+ use Getopt::Long;
+ #Provides arguement handling
#---------------------------------------------
my $FeedLines = 3; # 'n' feeds
my $FeedLength = 200; # 'm' characters
@@ -35,6 +38,12 @@
my $PGfeeds = get("http://planet.gnu.org/rss20.xml");
#Fetch RSS feeds as xml
+ #Options for Sanitization (value 1 implies tag will not be stripped,
any other value implies tag will be stripped)
+ my $a = 1; my $b = 0; my $code = 0; my $div = 0; my $em = 0; my $h =
0; my $hr = 0; my $i = 0; my $img = 0; my $p = 0; my $pre = 0; my $strong = 0;
+ my $table = 0; my $textarea = 0; my $tt = 0; my $ul = 0;
+ GetOptions("a=i" => \$a, "b=i" => \$b, "code=i" => \$code, "div=i" =>
\$div, "em=i" => \$em, "h=i" => \$h, "hr=i" => \$hr, "i=i" => \$i,"img=i" =>
\$img,
+ "p=i" => \$p, "pre=i" => \$pre, "strong=i" => \$strong,
"table=i" => \$table, "textarea=i" => \$textarea, "tt=i" => \$tt, "ul=i" =>
\$ul);
+
my $PGparser = new XML::RSS::Parser::Lite;
#Create new RSS parser
@@ -75,8 +84,9 @@
#---------------------To Create/Overwrite PlanetFeeds.html-----------
if($Write2File==1)
{
-
+ my $PGhead= "<!-- Autogenerated File by planetrss.pl
http://web.cvs.savannah.gnu.org/viewvc/www/server/source/planetrss/?root=www
-->";
open (PGhtml, '>planetfeeds.html');
+ print PGhtml $PGhead;
#Print Feeds data in the format of- "Title - Description... <a
href='URL'>more</a>"
for (my $i = 0; $i < $FeedLines; $i++)
{
@@ -94,16 +104,89 @@
}
else
{
- #Sanitize Description (using Regex method after
failure with scrubber,strip.. )
- $PGdesc=~ s/<(.*?)>//gi; $PGdesc=~
s/<a(.*?)>//gi; $PGdesc=~ s/<\/a>//gi;
- $PGdesc=~ s/<p>//gi; $PGdesc=~
s/<\/p>//gi; $PGdesc=~ s/<li>//gi;
- $PGdesc=~ s/<ul>//gi; $PGdesc=~
s/<br \/>//gi; $PGdesc =~ s/\s\s+/ /g;#remove whitespace
- #Have kept separate expressions now for easy
debugging in case of error
+ #Sanitize Description
+ $PGdesc=~ s/<(.*?)>//gi;
+ $PGdesc=~ s/<br \/>//gi; $PGdesc
=~ s/\s\s+/ /g;#remove whitespace
+ if($a!=1)
+ {
+ $PGdesc=~ s/<a(.*?)>//gi;
$PGdesc=~ s/<\/a>//gi;
+ }
+ if($b!=1)
+ {
+ $PGdesc=~ s/<b>//gi;
$PGdesc=~ s/<\/b>//gi;
+ }
+ if($code!=1)
+ {
+ $PGdesc=~ s/<code>//gi;
$PGdesc=~ s/<\/code>//gi;
+ }
+ if($div!=1)
+ {
+ $PGdesc=~ s/<div(.*?)>//gi;
$PGdesc=~ s/<\/div>//gi;
+ }
+ if($em!=1)
+ {
+ $PGdesc=~ s/<em>//gi;
$PGdesc=~ s/<\/em>//gi;
+ }
+ if($i!=1)
+ {
+ $PGdesc=~ s/<i>//gi;
$PGdesc=~ s/<\/i>//gi;
+ }
+
+ if($img!=1)
+ {
+ $PGdesc=~ s/<img(.*?)>//gi;
$PGdesc=~ s/<\/img>//gi;
+ }
+ if($h!=1)
+ {
+ $PGdesc=~ s/<h(.*?)>//gi;
$PGdesc=~ s/<\/h(.*?)>//gi;
+ }
+
+ if($hr!=1)
+ {
+ $PGdesc=~ s/<hr>//gi;
$PGdesc=~ s/<\/hr>//gi;
+ }
+ if($p!=1)
+ {
+ $PGdesc=~ s/<p>//gi;
$PGdesc=~ s/<\/p>//gi;
+ }
+ if($pre!=1)
+ {
+ $PGdesc=~ s/<pre>//gi;
$PGdesc=~ s/<\/pre>//gi;
+ }
+ if($strong!=1)
+ {
+ $PGdesc=~ s/<strong>//gi;
$PGdesc=~ s/<\/strong>//gi;
+ }
+ if($table!=1)
+ {
+ $PGdesc=~ s/<table(.*?)>//gi;
$PGdesc=~ s/<\/table>//gi;
+ $PGdesc=~ s/<tr(.*?)>//gi;
$PGdesc=~ s/<\/tr>//gi;
+ $PGdesc=~ s/<th(.*?)>//gi;
$PGdesc=~ s/<\/th>//gi;
+ }
+ if($textarea!=1)
+ {
+ $PGdesc=~ s/<textarea(.*?)>//gi;
$PGdesc=~ s/<\/textarea>//gi;
+ $PGdesc=~ s/<textarea(.*?)>//gi;
$PGdesc=~ s/<\/textarea>//gi;
+ }
+ if($tt!=1)
+ {
+ $PGdesc=~ s/<tt>//gi;
$PGdesc=~ s/<\/tt>//gi;
+ }
+ if($ul!=1)
+ {
+ $PGdesc=~ s/<ul(.*?)>//gi;
$PGdesc=~ s/<\/ul>//gi;
+ $PGdesc=~ s/<li(.*?)>//gi;
$PGdesc=~ s/<\/li>//gi;
+ }
+ $PGdesc=~ s/</</gi; $PGdesc=~ s/>/>/gi;
+ $PGdesc=~ s/"/"/gi;
+ #------End of Sanitization
+
#Truncate Description
$PGdesc = substr($PGdesc,
0,($FeedLength-(10+length($PGtitle)))); #10 characters removed for ': ' and
'... more'
#Output
+
print PGhtml "<p><a
href='".$PGurl."'>".$PGtitle ."</a>: ".$PGdesc. "... <a
href='".$PGurl."'>more</a></p>\n";
}
}
- www/server/source/planetrss changelog planetrss.pl,
Shailesh Ghadge <=