bug-gnats
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

gnatsweb & text/plain


From: Robert Lupton the Good
Subject: gnatsweb & text/plain
Date: Tue, 12 Feb 2002 14:17:44 -0500

Didn't I post this before?


                                R

Frank Gilmurray writes:
 > 
 > We're running gnatsweb 2.9.2.  Serveral people need the query results in
 > plain text format for various reason (like exporting them into excel,
 > wunning awk/sed scripts on them, ...).
 > 
 > Before I start modifying gnatsweb to do this, I though I'd ask if anyone
 > else has done this or knows a way to convert the html to plain text.


===File ~/bin/format-query==================================
#!/usr/bin/perl
# -*- perl -*-
#
# Reformat a gnatsweb query saved as HTML from netscape
#
# Robert Lupton (rhl@astro.princeton.edu)
#
require "getopts.pl";

if(!&Getopts('chnstT:vw:')) {
   &syntax;
   exit 1;
}

$linelen = 80;                  # length of output line
$new = -1;                      # unknown style;
                                # new == 1 => new style output (gnatsweb 2.9+)
$ntab = 4;                      # number of tabs to indent Synopsis

if($opt_h) {
   &syntax;
   exit 1;
}
if($opt_c) {
   $compress = 1;
}
if($opt_n) {
   $new = 1;
}
if($opt_o) {
   $new = 0;
}
if($opt_s) {
   $synopsis_on_new_line = 1;
}
if($opt_t) {
   $delete_time = 1;
}
if($opt_T) {
   $ntab = $opt_T;                      # number of tabs to indent Synopsis
}
if($opt_v) {
   $verbose = 1;
}
if($opt_w) {
   $linelen = $opt_w;
}

#
# Skip header
#
$header = 1;                    # reading header
$colheadings = 1;               # haven't read column headings

while (<>) {
   if($new < 0 && m|-//IETF//DTD HTML//EN|) {
      $new = 1;
   }
   if($new == 1) {
      if(/^<table border/i) {
         $header = 0;
      }
   } else {
      if(/^<INPUT/i) {
         $header = 0;
      }
   }

   s%>edit<%><%gi;              # special case for `edit' links
   s%</?(FONT|FORM\s|HEAD|HTML|INPUT\s|TT)[^>]*>%%gi;
   s%<A\s+HREF=[^>]+>([^<]*)</A>%\1%gi;
   s%<BR>%+%gi;
   s%&nbsp;?%%gi;
   s%&lt;%<%gi; s%&gt;%>%gi; s%&quot;%"%gi; # match quotes " 

   if($delete_time) {
      s/[0-9][0-9]:[0-9][0-9]:[0-9][0-9]\s*//g;
   }
   if($header) {
      if(m%<TITLE>([^<]*)</TITLE>%gi) {
         $title = $1;
      } elsif(m%<SMALL>([^<]*)</SMALL>%gi) {
         local($descrip) = $1;
         $descrip =~ s/\+/\n/g;
         $title .= "\n$descrip";
      }
      if(m%<H2>([^<]+)</H2>%gi) {
         local($descrip) = $1;
         $descrip =~ s/\+/\n/g;
         $title .= "\n$descrip";
      }
      if($new == 1) {
         if(m%>(User|Access|Date):\s+([^&]+)</SPAN>%) {
            $query_info{$1} = $2;
         }
      }

      next;
   } elsif($title) {
      my($title1, $title2) = split("\n", $title, 2);

      if($new == 1) {
         my($title15) = "";
         foreach $k ("User", "Access", "Date") {
            if($title2 !~ /$k/) {
               $title15 = "$k: $query_info{$k}\n"
            }
            $title2 = $title15 . $title2;
         }
      }

      ($title2 = "\n" . $title2) =~ s/\n/\n\t\t\t\t\t\t/g;
      print "      $title1\n";
      print "      $title2\n\n";
      $title = 0;
   }

   if($new == 1 && $colheadings && m%^</TR>%i) {
      $colheadings = 0;
      $_ = $colheading_line . "\n";
   } else {
      s/\s*\n$//;                       # == chomp in perl5
   }
   
   s%<(TABLE|TR)\s*[^>]*>%%gi;
   s%</TR>%\n%gi;
   s%</T[DH]>%%gi;
   #
   # The new HTML format puts the column headings and PR number on lines of
   # their own; fix this
   #
   if($new == 1) {
      if($colheadings) {
         $colheading_line .= $_;

         next;
      }
      if(/<td nowrap>\d+\s*$/i) {
         $pr_num_line = $_;
         next;
      }

      $_ = $pr_num_line . $_;
   }

   while(s/^(.+)\n//m) {
      $line = $1;
      
      @fields = split(/<TD\s*[^>]*>|<TH>/i, $line);
      shift(@fields);           # initial empty match

      if(!$fmt) {
         @fieldlist = @fields;  # list of field headings

         grep(s/Arrival-Date/Arrived/, @fields);
         grep(s/Last-Modified/Modified/, @fields);
         grep(s/Closed-Date/Closed/, @fields);
         
         foreach $el (@fields) {
            if($el eq "PR") {
               $fmt .= "%-5s";
            } elsif($el eq "Category") {
               $fmt .= "%-9s ";
            } elsif($el eq "Synopsis") {
               $have_synopsis = 1;
            } elsif($el eq "Confidential") {
               $fmt .= "%-4s ";
            } elsif($el eq "Severity") {
               if($compress) {
                  $el = "S";
                  $fmt .= "%-2s ";
               } else {
                  $fmt .= "%-8s ";
               }
            } elsif($el eq "Priority") {
               if($compress) {
                  $el = "P";
                  $fmt .= "%-2s ";
               } else {
                  $fmt .= "%-8s ";
               }
            } elsif($el eq "Responsible") {
               $el = "Respon";
               $fmt .= "%-8s ";
            } elsif($el eq "State") {
               if($compress) {
                  $el = "St";
                  $fmt .= "%-2s ";
               } else {
                  $fmt .= "%-9s ";
               }
            } elsif($el eq "Class") {
               if($compress) {
                  $el = "Cl";
                  $fmt .= "%-3s ";
               } else {
                  $fmt .= "%-6s ";
               }
            } elsif($el eq "Submitter-Id") {
               if($compress) {
                  $el = "S-ID";
                  $fmt .= "%-4s ";
               } else {
                  $el = "Submit-ID";
                  $fmt .= "%-6s ";
               }
            } elsif($el =~ /-Date/) {
               $fmt .= "%-12s ";
            } elsif($el eq "Originator") {
               $fmt .= "%-8s ";
            } elsif($el eq "Release") {
               $fmt .= "%-7s ";
            } elsif($el eq "Arrived") {
               $fmt .= "%-7s ";
            } elsif($el eq "Modified") {
               $fmt .= "%-8s ";
            } elsif($el eq "Closed") {
               $fmt .= "%-8s ";
            } else {
               $fmt .= "%-10s ";
            }
         }
         $fmt =~ s/\s+$//;
      }

      if($have_synopsis) {
         $synopsis = pop(@fields);
      }

      if($compress) {
         &compress_fields;
      } else {
         grep(s/^change-request$/change/, @fields);
      }

      $start = sprintf($fmt, @fields);
      printf $start;
      
      if($have_synopsis) {
         if(@fields[0] =~ /PR/) {
            print "  Synopsis\n";
            print <<EOT;
--------------------------------------------------------------------------
EOT
         } else {
            $line = "";
            $need_new_line = $synopsis_on_new_line ? 1 : 0;
            while($synopsis) {
               $synopsis =~ s/^\s*((\S+(\s+|$)))//;

               if(length($line . $1) > $linelen - length($start) - 1) {
                  $need_new_line = 1;
               }
               if($need_new_line &&
                  length($line . $1) > $linelen - 8*$ntab - 1) {
                  print "\n" . "\t" x $ntab . $line; $line = ""; 
               }
               $line .= $1;
            }
            if($line !~ /^\s*$/) {
               if($need_new_line || (length($line) > $linelen - 8*$ntab - 1)) {
                  print "\n" . "\t" x $ntab;
               }
               print "$line";
            }
         }
      }

      print "\n";
   }
   $pr_num_line = "";
}

###############################################################################

sub compress_fields
{
   my($fieldname);

   for($i = 0; $i < @fieldlist; $i++) {
      $fieldname = @fieldlist[$i];

      if($fieldname eq "Severity") {
         @fields[$i] =~ s/non-critical/NC/;
         @fields[$i] =~ s/critical/C/;
         @fields[$i] =~ s/serious/S/;
      } elsif($fieldname eq "Priority") {
         @fields[$i] =~ s/high/H/;
         @fields[$i] =~ s/medium/M/;
         @fields[$i] =~ s/low/L/;
      } elsif($fieldname eq "State") {
         @fields[$i] =~ s/open/O/;
         @fields[$i] =~ s/assigned/As/;
         @fields[$i] =~ s/analyzed/Az/;
         @fields[$i] =~ s/feedback/F/;
         @fields[$i] =~ s/needstest/NT/;
         @fields[$i] =~ s/closed/C/;
         @fields[$i] =~ s/suspended/S/;
         @fields[$i] =~ s/mistaken/M/;
         @fields[$i] =~ s/duplicate/D/;
      } elsif($fieldname eq "Class") {
         @fields[$i] =~ s/change-request/C-R/;
         @fields[$i] =~ s/(sw|hw)-bug/bug/;
         @fields[$i] =~ s/documentation/doc/;
         @fields[$i] =~ s/support/sup/;
      } elsif($fieldname eq "Submitter-Id") {
         @fields[$i] =~ s/unknown/U/;
         @fields[$i] =~ s/approved/A/;
         @fields[$i] =~ s/longTerm/L/;
         @fields[$i] =~ s/manana/M/;
      } elsif($fieldname =~ /-Date/) {
         @fields[$i] =~ s/(19|20)([0-9][0-9]-[0-9][0-9]-[0-9][0-9])\s*/\2/g;
      } else {
         ;                      # nothing to do
      }
   }
}

###############################################################################

sub syntax
{
   print <<"EOT";
Reformat a gnatsweb query saved as HTML from netscape
Usage:
    format-query [options] file
Options:
    -h          Print this message
    -c          Compress the output wherever possible
    -n          Read new-style gnatsweb output (gnatsweb 2.9.0 and later)
    -o          Read old-style gnatsweb output (pre-gnatsweb 2.9.0)
    -s          Always start Synopsis on a new line
    -T n        Indent continued Synopsis line by n tabs (default: 4)
    -t          Delete any times of the form hh:mm:ss, leaving the year-mon-day
    -v          Be chatty
    -w nnn      Set length of output line (default: 80)

If you specify neither -n nor -o, we will guess for you.
EOT
}
============================================================



reply via email to

[Prev in Thread] Current Thread [Next in Thread]