[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
gnatsweb & text/plain
From: |
Robert Lupton the Good |
Subject: |
gnatsweb & text/plain |
Date: |
Tue, 12 Feb 2002 14:17:44 -0500 |
Didn't I post this before?
R
Frank Gilmurray writes:
>
> We're running gnatsweb 2.9.2. Serveral people need the query results in
> plain text format for various reason (like exporting them into excel,
> wunning awk/sed scripts on them, ...).
>
> Before I start modifying gnatsweb to do this, I though I'd ask if anyone
> else has done this or knows a way to convert the html to plain text.
===File ~/bin/format-query==================================
#!/usr/bin/perl
# -*- perl -*-
#
# Reformat a gnatsweb query saved as HTML from netscape
#
# Robert Lupton (rhl@astro.princeton.edu)
#
require "getopts.pl";
if(!&Getopts('chnstT:vw:')) {
&syntax;
exit 1;
}
$linelen = 80; # length of output line
$new = -1; # unknown style;
# new == 1 => new style output (gnatsweb 2.9+)
$ntab = 4; # number of tabs to indent Synopsis
if($opt_h) {
&syntax;
exit 1;
}
if($opt_c) {
$compress = 1;
}
if($opt_n) {
$new = 1;
}
if($opt_o) {
$new = 0;
}
if($opt_s) {
$synopsis_on_new_line = 1;
}
if($opt_t) {
$delete_time = 1;
}
if($opt_T) {
$ntab = $opt_T; # number of tabs to indent Synopsis
}
if($opt_v) {
$verbose = 1;
}
if($opt_w) {
$linelen = $opt_w;
}
#
# Skip header
#
$header = 1; # reading header
$colheadings = 1; # haven't read column headings
while (<>) {
if($new < 0 && m|-//IETF//DTD HTML//EN|) {
$new = 1;
}
if($new == 1) {
if(/^<table border/i) {
$header = 0;
}
} else {
if(/^<INPUT/i) {
$header = 0;
}
}
s%>edit<%><%gi; # special case for `edit' links
s%</?(FONT|FORM\s|HEAD|HTML|INPUT\s|TT)[^>]*>%%gi;
s%<A\s+HREF=[^>]+>([^<]*)</A>%\1%gi;
s%<BR>%+%gi;
s% ?%%gi;
s%<%<%gi; s%>%>%gi; s%"%"%gi; # match quotes "
if($delete_time) {
s/[0-9][0-9]:[0-9][0-9]:[0-9][0-9]\s*//g;
}
if($header) {
if(m%<TITLE>([^<]*)</TITLE>%gi) {
$title = $1;
} elsif(m%<SMALL>([^<]*)</SMALL>%gi) {
local($descrip) = $1;
$descrip =~ s/\+/\n/g;
$title .= "\n$descrip";
}
if(m%<H2>([^<]+)</H2>%gi) {
local($descrip) = $1;
$descrip =~ s/\+/\n/g;
$title .= "\n$descrip";
}
if($new == 1) {
if(m%>(User|Access|Date):\s+([^&]+)</SPAN>%) {
$query_info{$1} = $2;
}
}
next;
} elsif($title) {
my($title1, $title2) = split("\n", $title, 2);
if($new == 1) {
my($title15) = "";
foreach $k ("User", "Access", "Date") {
if($title2 !~ /$k/) {
$title15 = "$k: $query_info{$k}\n"
}
$title2 = $title15 . $title2;
}
}
($title2 = "\n" . $title2) =~ s/\n/\n\t\t\t\t\t\t/g;
print " $title1\n";
print " $title2\n\n";
$title = 0;
}
if($new == 1 && $colheadings && m%^</TR>%i) {
$colheadings = 0;
$_ = $colheading_line . "\n";
} else {
s/\s*\n$//; # == chomp in perl5
}
s%<(TABLE|TR)\s*[^>]*>%%gi;
s%</TR>%\n%gi;
s%</T[DH]>%%gi;
#
# The new HTML format puts the column headings and PR number on lines of
# their own; fix this
#
if($new == 1) {
if($colheadings) {
$colheading_line .= $_;
next;
}
if(/<td nowrap>\d+\s*$/i) {
$pr_num_line = $_;
next;
}
$_ = $pr_num_line . $_;
}
while(s/^(.+)\n//m) {
$line = $1;
@fields = split(/<TD\s*[^>]*>|<TH>/i, $line);
shift(@fields); # initial empty match
if(!$fmt) {
@fieldlist = @fields; # list of field headings
grep(s/Arrival-Date/Arrived/, @fields);
grep(s/Last-Modified/Modified/, @fields);
grep(s/Closed-Date/Closed/, @fields);
foreach $el (@fields) {
if($el eq "PR") {
$fmt .= "%-5s";
} elsif($el eq "Category") {
$fmt .= "%-9s ";
} elsif($el eq "Synopsis") {
$have_synopsis = 1;
} elsif($el eq "Confidential") {
$fmt .= "%-4s ";
} elsif($el eq "Severity") {
if($compress) {
$el = "S";
$fmt .= "%-2s ";
} else {
$fmt .= "%-8s ";
}
} elsif($el eq "Priority") {
if($compress) {
$el = "P";
$fmt .= "%-2s ";
} else {
$fmt .= "%-8s ";
}
} elsif($el eq "Responsible") {
$el = "Respon";
$fmt .= "%-8s ";
} elsif($el eq "State") {
if($compress) {
$el = "St";
$fmt .= "%-2s ";
} else {
$fmt .= "%-9s ";
}
} elsif($el eq "Class") {
if($compress) {
$el = "Cl";
$fmt .= "%-3s ";
} else {
$fmt .= "%-6s ";
}
} elsif($el eq "Submitter-Id") {
if($compress) {
$el = "S-ID";
$fmt .= "%-4s ";
} else {
$el = "Submit-ID";
$fmt .= "%-6s ";
}
} elsif($el =~ /-Date/) {
$fmt .= "%-12s ";
} elsif($el eq "Originator") {
$fmt .= "%-8s ";
} elsif($el eq "Release") {
$fmt .= "%-7s ";
} elsif($el eq "Arrived") {
$fmt .= "%-7s ";
} elsif($el eq "Modified") {
$fmt .= "%-8s ";
} elsif($el eq "Closed") {
$fmt .= "%-8s ";
} else {
$fmt .= "%-10s ";
}
}
$fmt =~ s/\s+$//;
}
if($have_synopsis) {
$synopsis = pop(@fields);
}
if($compress) {
&compress_fields;
} else {
grep(s/^change-request$/change/, @fields);
}
$start = sprintf($fmt, @fields);
printf $start;
if($have_synopsis) {
if(@fields[0] =~ /PR/) {
print " Synopsis\n";
print <<EOT;
--------------------------------------------------------------------------
EOT
} else {
$line = "";
$need_new_line = $synopsis_on_new_line ? 1 : 0;
while($synopsis) {
$synopsis =~ s/^\s*((\S+(\s+|$)))//;
if(length($line . $1) > $linelen - length($start) - 1) {
$need_new_line = 1;
}
if($need_new_line &&
length($line . $1) > $linelen - 8*$ntab - 1) {
print "\n" . "\t" x $ntab . $line; $line = "";
}
$line .= $1;
}
if($line !~ /^\s*$/) {
if($need_new_line || (length($line) > $linelen - 8*$ntab - 1)) {
print "\n" . "\t" x $ntab;
}
print "$line";
}
}
}
print "\n";
}
$pr_num_line = "";
}
###############################################################################
sub compress_fields
{
my($fieldname);
for($i = 0; $i < @fieldlist; $i++) {
$fieldname = @fieldlist[$i];
if($fieldname eq "Severity") {
@fields[$i] =~ s/non-critical/NC/;
@fields[$i] =~ s/critical/C/;
@fields[$i] =~ s/serious/S/;
} elsif($fieldname eq "Priority") {
@fields[$i] =~ s/high/H/;
@fields[$i] =~ s/medium/M/;
@fields[$i] =~ s/low/L/;
} elsif($fieldname eq "State") {
@fields[$i] =~ s/open/O/;
@fields[$i] =~ s/assigned/As/;
@fields[$i] =~ s/analyzed/Az/;
@fields[$i] =~ s/feedback/F/;
@fields[$i] =~ s/needstest/NT/;
@fields[$i] =~ s/closed/C/;
@fields[$i] =~ s/suspended/S/;
@fields[$i] =~ s/mistaken/M/;
@fields[$i] =~ s/duplicate/D/;
} elsif($fieldname eq "Class") {
@fields[$i] =~ s/change-request/C-R/;
@fields[$i] =~ s/(sw|hw)-bug/bug/;
@fields[$i] =~ s/documentation/doc/;
@fields[$i] =~ s/support/sup/;
} elsif($fieldname eq "Submitter-Id") {
@fields[$i] =~ s/unknown/U/;
@fields[$i] =~ s/approved/A/;
@fields[$i] =~ s/longTerm/L/;
@fields[$i] =~ s/manana/M/;
} elsif($fieldname =~ /-Date/) {
@fields[$i] =~ s/(19|20)([0-9][0-9]-[0-9][0-9]-[0-9][0-9])\s*/\2/g;
} else {
; # nothing to do
}
}
}
###############################################################################
sub syntax
{
print <<"EOT";
Reformat a gnatsweb query saved as HTML from netscape
Usage:
format-query [options] file
Options:
-h Print this message
-c Compress the output wherever possible
-n Read new-style gnatsweb output (gnatsweb 2.9.0 and later)
-o Read old-style gnatsweb output (pre-gnatsweb 2.9.0)
-s Always start Synopsis on a new line
-T n Indent continued Synopsis line by n tabs (default: 4)
-t Delete any times of the form hh:mm:ss, leaving the year-mon-day
-v Be chatty
-w nnn Set length of output line (default: 80)
If you specify neither -n nor -o, we will guess for you.
EOT
}
============================================================