[commit-womb] gnumaint Makefile gm gnupackages.txt gm-read.pl...

commit-womb
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[commit-womb] gnumaint Makefile gm gnupackages.txt gm-read.pl...

From:	Karl Berry
Subject:	[commit-womb] gnumaint Makefile gm gnupackages.txt gm-read.pl...
Date:	Wed, 21 Dec 2011 21:01:01 +0000
CVSROOT:        /sources/womb
Module name:    gnumaint
Changes by:     Karl Berry <karl>       11/12/21 21:01:01

Modified files:
        .              : Makefile gm gnupackages.txt 
Added files:
        .              : gm-read.pl gm-util.pl 

Log message:
        try splitting gm script

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/gnumaint/Makefile?cvsroot=womb&r1=1.34&r2=1.35
http://cvs.savannah.gnu.org/viewcvs/gnumaint/gm?cvsroot=womb&r1=1.47&r2=1.48
http://cvs.savannah.gnu.org/viewcvs/gnumaint/gnupackages.txt?cvsroot=womb&r1=1.96&r2=1.97
http://cvs.savannah.gnu.org/viewcvs/gnumaint/gm-read.pl?cvsroot=womb&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/gnumaint/gm-util.pl?cvsroot=womb&rev=1.1

Patches:
Index: Makefile
===================================================================
RCS file: /sources/womb/gnumaint/Makefile,v
retrieving revision 1.34
retrieving revision 1.35
diff -u -b -r1.34 -r1.35
--- Makefile    28 Aug 2011 18:51:33 -0000      1.34
+++ Makefile    21 Dec 2011 21:01:00 -0000      1.35
@@ -1,11 +1,11 @@
-# $Id: Makefile,v 1.34 2011/08/28 18:51:33 karl Exp $
+# $Id: Makefile,v 1.35 2011/12/21 21:01:00 karl Exp $
 # Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
 #
 # Copying and distribution of this file, with or without modification,
 # are permitted in any medium without royalty provided the copyright
 # notice and this notice are preserved.
 
-default: test-genmhtml
+default: pap
 
 test-genlhtml ghtml:  # result included from www.gnu.org/graphics/manual.html
        gm generate logos html | tee ~/tmp/x.html
@@ -26,6 +26,9 @@
 test-listfsfnot notfsf:
        gm list copyrightfsfnot
 
+test-listpapers pap:
+       gm list copyrightpapers
+       
 test-nophysical:
        gm list maintainers nophysical
 

Index: gm
===================================================================
RCS file: /sources/womb/gnumaint/gm,v
retrieving revision 1.47
retrieving revision 1.48
diff -u -b -r1.47 -r1.48
--- gm  5 Dec 2011 01:16:44 -0000       1.47
+++ gm  21 Dec 2011 21:01:00 -0000      1.48
@@ -1,5 +1,5 @@
 #!/usr/bin/env perl
-# $Id: gm,v 1.47 2011/12/05 01:16:44 karl Exp $
+# $Id: gm,v 1.48 2011/12/21 21:01:00 karl Exp $
 # GNU maintainer-related operations.
 # 
 # Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation Inc.
@@ -19,9 +19,13 @@
 #
 # Originally written by Karl Berry.
 
+require "gm-util.pl";
+require "gm-read.pl";
+
 $DEBUG = 0;
 $ACTIVITY_FILE = "activity-report.txt";
 $COPYRIGHT_LIST_FILE = "copyright.list";
+$COPYRIGHT_PAPERS_FILE = "copyright-papers.txt";
 $DOC_CATEGORIES_FILE = "doc-categories.txt";
 $FTPLISTING_FILE = "ftplisting.txt";
 $GNUPACKAGES_FILE = "gnupackages.txt";
@@ -59,7 +63,10 @@
 
 list copyrightfsf               FSF-copyrighted packages, with maintainers
 list copyrightfsfnot            non-FSF-copyrighted packages, with maintainers
+list copyrightpapers            copyright.list vs. paperwork
+
 list maintainers nophysical     maintainers without phone/address info
+
 list packages activity          packages sorted by activity-status
 list packages gplv3             packages with GPLv3 update pending
 list packages unanswered        packages we've had no contact about
@@ -68,7 +75,7 @@
 "generate" actions write output that's intended to be used directly, and
 "list" actions are for information and further consumption.
 
-Correspondence to address@hidden
+Correspondence to address@hidden
 END_USAGE
     return 0;
   }
@@ -78,7 +85,7 @@
   my @lines = eval "&$fn ()";
 
   # This gives an ugly message if the eval failed, but this program is
-  # not for public consumption, so it's ok.
+  # not for public consumption, so that's ok.
   die "$0 failed: address@hidden" if $@;
 
   # Print the resulting lines.  The idea is that none of the subroutines
@@ -108,14 +115,12 @@
     
     push (@ret, "$ACTIVITY_FILE:$line: active package $ap does not exist"
                 . " ($email|$name|$time)")
-      unless exists $pkgs{$ap} || exists $missing_pkg_ok{$ap};
-    
+      unless exists $pkgs{$ap}; #|| exists $missing_pkg_ok{$ap};
   }
-  
   return @ret;
   
-  sub by_lineno
-  {
+  
+  sub by_lineno {
     my (undef,undef,undef,$aline) = split (/\|/, $activity{$a});
     my (undef,undef,undef,$bline) = split (/\|/, $activity{$b});
     $aline <=> $bline;
@@ -141,7 +146,7 @@
   for my $f (sort @ftp) {
     next if exists $pkgs{$f};
     next if grep { $f =~ /^$_/ } @special;
-    next if grep { $f =~ /^$_/ } @old;
+    # read oldpackages?  next if grep { $f =~ /^$_/ } @old;
     push (@ret, $f);
   }
   
@@ -557,7 +562,7 @@
   my ($notfsf) = @_;
   my @ret = ();
 
-  my %fsf_pkgs = &read_copyright_list ();
+  my %fsf_pkgs = &read_copyright_list ("by-line");
   my %old_pkgs = &read_oldpackages ();
   my %maint_pkgs = &read_maintainers ("by-package");
 
@@ -649,6 +654,29 @@
 
 
 
+# Return copyright.list entries that don't have matching paperwork,
+# and vice versa.
+# 
+sub list_copyrightpapers_ {
+  my @ret = ();
+  my %cl_pkgs = &read_copyright_list ("by-year");
+  my %cp_pkgs = &read_copyright_papers ();
+  
+  $DEBUG = 1;
+  
+  for my $year (sort keys %cp_pkgs) {
+    my $cp_year = $cp_pkgs{$year};
+    my $cl_year = $cl_pkgs{$year};
+  &debug_hash ("cp_year $year", $cp_year);
+  &debug_hash ("cl_year $year", $cl_year);
+    last;
+  }
+  
+  return @ret;
+}
+
+
+
 # Return list of maintainers for whom we have no phone or address.
 # 
 sub list_maintainers_nophysical {
@@ -688,8 +716,7 @@
   
   return @ret;
   
-  sub by_activity
-  {
+  sub by_activity {
     (my $a_status = $pkgs{$a}->{"activity-status"}) =~ s/ .*//;;
     (my $b_status = $pkgs{$b}->{"activity-status"}) =~ s/ .*//;;
     $activity_order{$a_status} <=> $activity_order{$b_status}
@@ -716,8 +743,7 @@
   
   return @ret;
   
-  sub by_gplv3
-  {
+  sub by_gplv3 {
     (my $a_status = $pkgs{$a}->{"gplv3-status"});# =~ s/ .*//;;
     (my $b_status = $pkgs{$b}->{"gplv3-status"});# =~ s/ .*//;;
     $pkgs{$a}->{"gplv3-status"} cmp $pkgs{$b}->{"gplv3-status"}
@@ -763,593 +789,3 @@
   
   return @ret;
 }
-
-
-
-# Read $COPYRIGHT_LIST_FILE.  Return hash with the keys being 
-# package names and the values their line numbers in the file.
-# 
-sub read_copyright_list {
-  my %ret;
-  
-  open (COPYRIGHT_LIST_FILE) || die "open($COPYRIGHT_LIST_FILE) failed: $!";
-  while (<COPYRIGHT_LIST_FILE>) {
-    # Look at lines following a blank line.
-    next unless /^\s*$/;
-    $_ = <COPYRIGHT_LIST_FILE>; # go past blank line.
-    chomp;
-    
-    # Drop everything after the first tab, we don't want to see all-caps
-    # company names.
-    s/\t.*//;
-    
-    # Sometimes commas are used to separate package names.  Just replace
-    # them with spaces as a small simplification.
-    s/,/ /g;
-    
-    # Split remainder into words at whitespace.
-    my @words = split (" ");
-    
-    my $pkg_constituent = "A-Z0-9._-";
-    for my $w (@words) {
-      # the word has to start with an uppercase letter or number,
-      # and be followed only by possible constituent characters,
-      # or we're done with this line.
-      last unless $w =~ /^[A-Z0-9][$pkg_constituent]+$/;
-      
-      # and it must not be only digits and -, since that's a date.
-      last if $w =~ /^[-\d]+/;
-      
-      # keyword ANY in copyright.list is not a package name for us, etc.
-      next if $w =~ /^(ANY|UNUSED|SPECIAL|TRANSLATIONS)$/;
-      next if $w =~ /^(CCLRC|CNOC|ET|INSIGHT|L3|LINBIT|MEDIAGOBLIN|WCT|FOO)$/;
-      
-      $w = lc ($w);
-      $canonical_pkg_name = &canonicalize_pkg_name ($w);
-      $ret{$canonical_pkg_name} = $.;
-    }
-  }
-  close (COPYRIGHT_LIST_FILE) || warn "close($COPYRIGHT_LIST_FILE) failed: $!";
-  
-  $ret{"gnustandards"} = 1; # no papers, but is copyright FSF
-  $ret{"goodbye"} = $ret{"network"} = 1; # mattl
-  $ret{"libtasn1"} = 1;     # split off from gnutls, so no separate papers
-  $ret{"lispintro"} = 1;    # no papers, but is copyright FSF
-  $ret{"mig"} = 1;          # part of hurd
-  $ret{"trans-coord"} = 1;  # container
-  
-  return %ret;
-
-  
-  # lots of names in copyright.list don't match current package
-  # identifiers, for whatever reason.
-  #
-  sub canonicalize_pkg_name {
-    my ($w) = @_;
-    
-    $w =~ s/_manual//;  # manuals are not separate packages for us.
-
-    %map = (
-      "dotgnu"         => "dotgnu-pnet", # make container?
-      "enterprise"     => "gnue",
-      "getopt"         => "libc",
-      "glibc"          => "libc",
-      "glob"           => "libc",
-      "gnm"            => "binutils",
-      "gnm960"         => "binutils",
-      "gnu.regexp"     => "libc",
-      "gnu-c"          => "gcc",
-      "gnuchess"       => "chess",
-      "gnugsl"         => "gsl",
-      "gnupascal"      => "pascal",
-      "gnus"           => "emacs",
-      "gnuucp"         => "uucp",
-      "gpc"            => "pascal",
-      "gprof"          => "binutils",
-      "graphics"       => "plotutils",
-      "grub2"          => "grub",
-      "gsize"          => "binutils",
-      "gsize960"       => "binutils",
-      "gstrip"         => "binutils",
-      "gstrip960"      => "binutils",
-      "hashcash.el"    => "emacs",
-      "info"           => "texinfo",
-      "interactive"    => "gnuit",
-      "ld"             => "binutils",
-      "libavl"         => "avl",
-      "libbfd.a"       => "bfd",
-      "libgcj"         => "gcc",
-      "libgen.a"       => "libc",
-      "libgsasl"       => "gsasl",
-      "libutf8"                => "libunistring",
-      "mach"           => "gnumach",
-      "makeinfo"       => "texinfo",
-      "malloc"         => "libc",
-      "mcount.c"       => "libc",
-      "memcmp"         => "libc",
-      "memcpy"         => "libc",
-      "memset"         => "libc",
-      "midnight"       => "mc",
-      "midnight_commander" => "mc",
-      "mp"             => "gmp",
-      "muse"           => "emacs-muse",
-      "objc"           => "gcc",
-      "obstack"                => "libc",
-      "pfe"            => "gforth",
-      "pnet"           => "dotgnu-pnet",
-      "portable.net"   => "dotgnu-pnet",
-      "ps"             => "sysutils",
-      "ptx"            => "coreutils",
-      "queue"          => "gnu-queue",
-      "radio"          => "gnuradio",
-      "readlink"       => "coreutils",
-      "regcmp"         => "libc",
-      "regex"          => "libc",
-      "regexp"         => "regex",
-      "robotussin"     => "binutils",
-      "sasl"           => "gsasl",
-      "shogi"          => "gnushogi",
-      "shred"          => "coreutils",
-      "snprintfv"      => "libc",
-      "sql.el"         => "emacs",
-      "strchr"         => "libc",
-      "strftime"       => "libc",
-      "strtod"         => "libc",
-      "superoptimizer" => "superopt",
-      "texi2html"      => "texinfo",
-      "verilog-mode.el"        => "emacs",
-      "winboard"       => "xboard",
-    );
-    return $map{$w} || $w;
-  }
-}
-
-
-
-# Look at an rsync listing of ftp.gnu.org, with entries like this:
-# 
-# drwxr-xr-x        4096 2004/01/16 12:20:08 gnu/3dldf
-# lrwxrwxrwx           5 2010/12/29 13:30:03 gnu/libc -> glibc
-# 
-# Return list of directories and symlinks under gnu/.
-#
-sub read_ftplisting {
-  my @ret;
-
-  open (FTPLISTING_FILE) || die "open($FTPLISTING_FILE) failed: $!";
-  my %keys;
-  while (<FTPLISTING_FILE>) {
-    chomp;
-    next unless /^[dl].*[0-9] gnu[^+]/;  # the 0-9 is the seconds
-    my $orig = $_;
-    s,^.* gnu/,,; # rm through the gnu/
-    s,[ /].*$,,;  # rm all following components or symlink target
-    push (@ret, $_) unless exists $keys{$_};
-    $keys{$_} = 1;
-    warn "keeping $_ from $orig\n" if /^drw/;
-  }
-  close (FTPLISTING_FILE) || warn "close($FTPLISTING_FILE) failed: $!";
-  
-  return @ret;
-}
-
-
-# Read doc-categories.txt file for info about SHORT_CAT, and return a
-# reference to a two-element array.  The first element in the array is
-# the long category name; the second element is a url to the category in
-# the Free Software Directory.
-# 
-# If we ever need a third piece of information, should probably switch
-# to a hash for the values.
-# 
-sub read_doc_categories {
-  my ($short_cat) = @_;
-  
-  if (keys %doc_category == 0) {
-    open (DOC_CATEGORIES_FILE) || die "open($DOC_CATEGORIES_FILE) failed: $!";
-  
-    while (<DOC_CATEGORIES_FILE>) {
-      next if /^\s*#/;  # ignore comments
-      next if /^\s*$/;  # ignore blank lines.
-      chomp;
-    
-      my ($short,$fsd,$full) = split (" ", $_, 3);
-      my $ret_full = $full || $short;
-      my $ret_url = $fsd eq "-"
-                    ? "" : "http://directory.fsf.org/category/$fsd/";;
-      $doc_category{$short} = [ $ret_full, $ret_url ];
-           
-    }
-
-    close (DOC_CATEGORIES_FILE)
-    || warn "close($DOC_CATEGORIES_FILE) failed: $!";
-  }
-  
-  # now we have the hash, so look up SHORT_CAT.
-  my $ret;
-  if (exists ($doc_category{$short_cat})) {
-    $ret = $doc_category{$short_cat};
-  } else {
-    warn "$DOC_CATEGORIES_FILE: no short category name $short_cat\n";
-    $ret = ["no long name for $short_cat", "no url for $short_cat"];
-  }
-  
-  return $ret;
-}
-
-
-
-# read the gnupackages.txt file, return a hash of information, where
-# the keys are package names and the values are hash references with the
-# information.  If a key is given more than once (e.g., note), the
-# values are separated by |.  A key "lineno" is synthesized with the
-# line number of the blank line following the package.
-# 
-sub read_gnupackages {
-  my %ret;
- 
-  open (GNUPACKAGES_FILE) || die "open($GNUPACKAGES_FILE) failed: $!";
-  
-  my %pkg;
-  while (<GNUPACKAGES_FILE>) {
-    next if /^#/;  # ignore comments
-    s/ +$//; # remove trailing spaces
-    chomp;
-    
-    # at a blank line, save the info we've accumulated, if any.
-    if (/^$/) {
-      next unless keys %pkg;
-      
-      if (exists $pkg{"package"}) {
-        my %copy = %pkg;  # do not save a pointer to what will be overwritten
-        $copy{"lineno"} = $. - 1;  # save line number
-        $ret{$pkg{"package"}} = \%copy;
-      } else {
-        warn "$GNUPACKAGES_FILE:$.: no package name for block ending here\n";
-      }
-      undef %pkg;  # clear out next
-      next;
-    }
-
-    # key is everything before the first colon.
-    # value is everything after the first colon and whitespace.
-    my ($key,$val) = split (/:\s*/, $_, 2);
-    if ($key eq $_) {
-      warn "$GNUPACKAGES_FILE:$.: no colon in line\n";
-    }
-    
-    if ($key eq "package" && $val =~ /[A-Z]/) {
-      warn "$GNUPACKAGES_FILE:$.: forcing package name to lowercase\n";
-      $val = lc ($val);
-    }
-    
-    # if key already exists, use | to separate values.
-    $val = "$pkg{$key}|$val" if exists $pkg{$key};
-    
-    $pkg{$key} = $val;
-  }
-  
-  close (GNUPACKAGES_FILE) || warn "close($GNUPACKAGES_FILE) failed: $!";
-  
-  return %ret;
-}
-
-
-
-# Read htmlxref.cnf file for entries relating to PKGNAME.
-# Return a hash where the keys are the manual identifiers and the values
-# are the urls.
-# 
-# See the HTML Xref Configuration node in the Texinfo manual.
-# The file is maintained in the util subdirectory of the Texinfo sources.
-# 
-sub read_htmlxref {
-  my ($pkgname) = @_;
-  my %ret;
-    
-  open (HTMLXREF_FILE) || die "open($HTMLXREF_FILE) failed: $!";
-
-  my %variables;
-  my %ret_type;  # record preferred xref type so far, for each manual
-  while (<HTMLXREF_FILE>) {
-    next if /^\s*#/;  # ignore comments
-    next if /^\s*$/;  # ignore blank lines.
-    chomp;
-    
-    # handle variable definitions:
-    if (/^\s*(\w+)\s*=\s*(.*)\s*$/) {
-      my ($var,$val) = ($1,$2);
-      $variables{$var} = &expand_variables ($val);
-
-    # look for manual entries relating to PKGNAME:
-    } elsif (/^\s*(\S+)\s+(\w+)\s+(.*)\s*$/) {
-      my ($manual,$type,$url) = ($1,$2,$3);
-
-      # The manual name may be exactly the package name, but it may not
-      # be, yet still part of the package.  To detect this, we see if
-      # the entry uses the package name as a variable in the url.  (This
-      # seemed the only other way we could extract the relevant manuals,
-      # barring adding another field, which seemed redundant.)  Such a
-      # variable name is always in all-uppercase, with - changed to _.
-      # 
-      (my $pkgname_as_var = $pkgname) =~ tr/a-z-/A-Z_/;
-      if ($manual eq $pkgname || $url =~ /\$\{$pkgname_as_var\}/) {
-        if (! exists $ret_type{$manual}
-            || &prefer_xref_type ($type, $ret_type{$manual})) {
-          $ret{$manual} = &expand_variables ($url);
-          $ret_type{$manual} = $type;
-        }
-      }
-      
-    } else {
-      warn "$HTMLXREF_FILE:$.: unexpected line: $_\n";
-    }
-  }
-  
-  close (HTMLXREF_FILE) || warn "close($HTMLXREF_FILE) failed: $!";
-
-  return %ret;
-
-  # using the %variables hash, expand ${varname} constructs until none
-  # remain.  If a variable isn't defined, just replace it with the empty
-  # string.  Would be better to give a warning, but let's not bother.
-  sub expand_variables {
-    my ($val) = @_;
-    $val =~ s/\$\{(\w+)\}/$variables{$1}/eg  # expand variables
-      until $val !~ /\$\{(\w+)\}/;           # until no more
-    return $val;
-  }
-
-  # If TYPE1 is preferred to TYPE2, return 1, else 0.
-  # When a given manual is available in multiple formats, we prefer the
-  # "smallest" one (mono < chapter < section < node), since packages with
-  # multiple manuals tend to be large, and in any event, we usually have a
-  # generic url (/software/pkgname/manual/) linking to all available forms.
-  sub prefer_xref_type {
-    my ($type1,$type2) = @_;
-    my %xref_types = (
-      "node"    => 10,
-      "section" => 20,
-      "chapter" => 30,
-      "mono"    => 40,
-    );
-    
-    if (! exists $xref_types{$type1}) {
-      warn "$HTMLXREF_FILE:$.: unexpected xref type: $type1\n";
-    }
-    if (! exists $xref_types{$type2}) {
-      warn "$HTMLXREF_FILE:$.: unexpected xref type: $type2\n";
-    }
-    
-    return $xref_types{$type1} < $xref_types{$type2};
-  }
-}
-
-
-
-# Read $MAINTAINERS_FILE according to $HOW, either "by-package" or
-# "by-maintainer" We return a hash.  With "by-package", the keys are
-# package names and the values are a list of maintainer hash references.
-# With "by-maintainer", the keys are maintainer (real) names and the
-# values are hash references with their information.
-# 
-# Special maintainer keys we synthesize, that are not in the actual
-# maintainers file:
-# is_generic - whether it is an actual person or a generic address;
-# best_email - uses privateemail where present, in preference to email;
-# lineno - location in the file.
-# 
-sub read_maintainers {
-  my ($how) = @_;
-  my %ret;
-  
-  open (MAINTAINERS_FILE) || die "open($MAINTAINERS_FILE) failed: $!";
-
-  # ignore first part of maintainers, through the first form feed.
-  while (<MAINTAINERS_FILE>) {
-    chomp;
-    last if /^\f$/;
-  }
-  
-  # read the real information.
-  my %maint;  # info we are accumulating for one maintainer
-  while (<MAINTAINERS_FILE>) {
-    next if /^#/;  # ignore comments
-    chomp;
-    
-    # at a blank line, save the maintainer info we've accumulated, if any.
-    if (/^\s*$/) {
-      &debug_hash ($., %maint);
-      next unless keys %maint;
-      
-      # record whether this is a generic maintainer (starts with lowercase):
-      $maint{"is_generic"} = $maint{"name"} =~ m/^[a-z]/;
-      
-      # record best email to use for the maintainer.
-      $maint{"best_email"} = $maint{"privateemail"} || $maint{"email"};
-      
-      # record where we found it, more or less.  We are past the blank
-      # line, and we can assume every entry has at least two lines.
-      $maint{"lineno"} = $. - 4;
-      
-      if ($how eq "by-package") {
-        # split apart the package value we've accumulated..
-        if (exists $maint{"package"}) {
-          my @pkgs = split (/\|/, $maint{"package"});
-
-          # append this maintainer to the list for each of his/her packages.
-          my %copy = %maint;
-          for my $p (@pkgs) {
-            my @x = exists $ret{$p} ? @{$ret{$p}} : ();
-            push (@x, \%copy);
-            $ret{$p} = address@hidden;
-          }
-        } else {
-          warn "no packages for $maint{name}";
-        }
-
-      } elsif ($how eq "by-maintainer") {
-        if (! exists $maint{"name"}) {
-          warn "no name for maintainer";
-          next;
-        }
-        my $name = $maint{"name"};
-        if (exists $ret{$name}) {
-          warn "ignoring second entry for maintainer $name";
-          next;
-        }
-        my %copy = %maint;
-        $ret{$name} = \%copy;
-
-      } else {
-        die "can't read_maintainers($how)";
-      }
-
-      undef %maint;  # clear out for next maintainer.
-
-      last if /^\f$/;  # form feed marks end of info.
-      next;
-    }
-
-    # key is everything before the first colon.
-    # value is everything after the first colon and whitespace.
-    my ($key,$val) = split (/:\s*/, $_, 2);
-    
-    # if key already exists, use | to separate values.
-    $val = "$maint{$key}|$val" if exists $maint{$key};
-    
-    # todo: parse key of address+ and append.
-    $maint{$key} = $val;
-  }
-  
-  # skip the rest.
-  close (MAINTAINERS_FILE) || warn "close($MAINTAINERS_FILE) failed: $!";
-  
-  return %ret;
-}
-
-
-
-# Return list of entries in $RECENTREL_FILE -- one per line, ignoring
-# comments starting with # and blank lines.
-# 
-sub read_oldpackages {
-  local $GNUPACKAGES_FILE = $OLDPACKAGES_FILE;
-  my %ret = &read_gnupackages ();    # reuse routine via dynamic scoping
-  return %ret;
-}  
-
-
-
-# Return list of entries in $RECENTREL_FILE -- one per line, ignoring
-# comments starting with # and blank lines.
-# 
-sub read_recentrel {
-  my @ret = ();
-  
-  open (RECENTREL_FILE) || die "open($RECENTREL_FILE) failed: $!";
-  while (<RECENTREL_FILE>) {
-    next if /^\s*#/;  # ignore comments
-    next if /^\s*$/;  # ignore blank lines.
-    chomp;
-    push (@ret, $_);
-  }
-  close (RECENTREL_FILE) || warn "close($RECENTREL_FILE) failed: $!";
- 
-  return @ret;
-}
-
-
-
-# read the savannah groups.tsv file, return a hash of information, where
-# the keys are project identifiers and the values are references to
-# hashes with the information.  A key "lineno" is included in each value.
-# 
-sub read_savannah {
-  my %ret;
-  
-  open (SAVANNAH_FILE) || die "open($SAVANNAH_FILE) failed: $!";
-  <SAVANNAH_FILE>;  # ignore first line (with field names).
-  
-  # We want only the offical GNU packages, which, happily, come first (type=1).
-  while (<SAVANNAH_FILE>) {
-    last if /^2/;  # quit at first non-gnu
-    chomp;
-    my ($type_id,$name,$unix_group_name,$group_name) = split (/\t/);
-
-    my %pkg;
-    $pkg{"name"} = $group_name;
-    $pkg{"lineno"} = $.;
-    
-    if (exists $ret{$unix_group_name}) {
-      warn "$SAVANNAH_FILE:$.: already saw $unix_group_name?\n";
-    } else {
-      $ret{$unix_group_name} = \%pkg;
-    }
-  }
-  
-  close (SAVANNAH_FILE) || warn "close($SAVANNAH_FILE) failed: $!";
-  
-  return %ret;
-}
-
-
-
-# Utilities.
-
-
-# Since we need this in more than one place.
-#
-sub skip_pkg_p {
-  my ($pkgname) = @_;
-  return $pkgname =~ / /            # gimp www pages, flex manual
-         || $pkgname =~ /^gg-/      # groups
-         || $pkgname =~ /^(sovix|cfengine)$/  # not really ours
-         || $pkgname =~ /\.nongnu/; # specialness, see maintainers
-}
-
-
-# avoid repeating the field widths.
-# 
-sub gnupkgs_msg {
-  my ($msg, %p) = @_;
-  warn "gnupkgs_msg: no lineno/package elements in %p hash"
-    unless $p{"lineno"} && $p{"package"};
-  return sprintf ("$GNUPACKAGES_FILE:%4d:%-16s $msg",
-                  $p{"lineno"}, $p{"package"});
-}
-
-
-# return auto-generation notice to include in output files.
-# 
-sub generated_by_us {
-  chomp (my $date = `date`);
-  (my $us = $0) =~ s,^\./,,;
-  return "generated by womb/gnumaint/$us $date";
-}
-
-
-# print arg on stderr.
-sub debug { warn "$_[0]\n" if $DEBUG; }
-
-
-# Log LABEL followed by hash elements, all on one line.
-# 
-sub debug_hash {
-  return unless $DEBUG;
-  my ($label) = shift;
-  my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_;
-
-  my $str = "$label: {";
-  my @items = ();
-  for my $key (sort keys %hash) {
-    my $val = $hash{$key};
-    $key =~ s/\n/\\n/g;
-    $val =~ s/\n/\\n/g;
-    push (@items, "$key:$val");
-  }
-  $str .= join (",", @items);
-  $str .= "}";
-
-  warn "$str\n";
-}

Index: gnupackages.txt
===================================================================
RCS file: /sources/womb/gnumaint/gnupackages.txt,v
retrieving revision 1.96
retrieving revision 1.97
diff -u -b -r1.96 -r1.97
--- gnupackages.txt     3 Dec 2011 01:31:38 -0000       1.96
+++ gnupackages.txt     21 Dec 2011 21:01:00 -0000      1.97
@@ -1,4 +1,4 @@
-# $Id: gnupackages.txt,v 1.96 2011/12/03 01:31:38 karl Exp $
+# $Id: gnupackages.txt,v 1.97 2011/12/21 21:01:00 karl Exp $
 # Public domain.
 #
 # This file records information on a per-package basis, *not* including
@@ -155,7 +155,7 @@
 doc-summary: Automated program generator
 doc-url: /software/autogen/manual/
 gplv3-status: done-in-5.9.5
-activity-status: ok 20110626 (5.12)
+activity-status: ok 20111204 (5.13)
 
 package: automake
 language: perl
@@ -201,7 +201,7 @@
 doc-summary: Telephony server
 doc-url: http://www.gnutelephony.org/doxy/bayonne2/
 gplv3-status: next-release-early-2009 (licensing #373278)
-activity-status: ok 20070118 (bayonne2-2.3.2, maintainer is very active)
+activity-status: ok 20111218 (bayonne-2.99+3.0apionly2)
 
 package: bazaar
 savannah: nonesuch
@@ -399,7 +399,7 @@
 doc-summary: (u)Common C++ framework
 doc-url: none
 gplv3-status: not-done-commoncpp2-1.6.3.tar.gz, done-ucommon-1.9.1
-activity-status: ok commoncpp2-1.8.0/20100224 (ucommon-5.0.7/20111112)
+activity-status: ok commoncpp2-1.8.0/20100224 (ucommon-5.1.0/20111218)
 last-contact: 8jun10 wrote
 
 package: complexity
@@ -696,7 +696,7 @@
 doc-summary: Low-level disk partitioning and formatting
 doc-url: none
 gplv3-status: done-in-1.0
-activity-status: ok 20111010 (2.0.0a)
+activity-status: ok 20111204 (2.0.0a1)
 
 package: ferret
 logo: /software/ferret/ferret.png
@@ -750,7 +750,7 @@
 logo: /software/freeipmi/images/freeipmi-logo-small.jpg
 gplv3-status: in-dev-sources
 last-contact: 14jan10 replied, next major release
-activity-status: ok 20111121 (1.0.9)
+activity-status: ok 20111213 (1.0.10)
 
 package: freetalk
 doc-category: Internet
@@ -1398,7 +1398,7 @@
 doc-summary: Transport layer security library
 doc-url: /software/gnutls/manual/
 gplv3-status: stays-v2-indefinitely (gnumaint-reply 21 Aug 2007 11:13:04)
-activity-status: ok 20111112 (3.0.8, 2.12.14/20111107)
+activity-status: ok 20111213 (3.0.9, 2.12.14/20111107)
 
 package: gnutrition
 doc-category: Health
@@ -1653,7 +1653,7 @@
 doc-summary: PostScript and PDF viewer using Ghostscript as a back-end
 doc-url: /software/gv/manual/
 gplv3-status: done-in-3.6.5
-activity-status: ok 20110429 (3.7.1)
+activity-status: ok 20111202 (3.7.3)
 
 package: gvpe
 doc-category: Security
@@ -2015,8 +2015,7 @@
 doc-summary: Multiplayer wargame where your army is a blob of liquid
 doc-url: /software/liquidwar6/manual/
 gplv3-status: done-in-0.0.2beta
-download-url: http://download.savannah.gnu.org/releases/liquidwar6/
-activity-status: ok 20080130 (0.0.3)
+activity-status: ok 20111218 (0.0.12beta)
 
 package: lispintro
 doc-category: Software
@@ -2144,9 +2143,9 @@
 doc-category: Internet
 doc-summary: Photo and media sharing
 doc-url: http://docs.mediagoblin.org/
-download-url: http://mediagoblin.org/download/mediagoblin-0.0.5.tar.gz
+download-url: http://mediagoblin.org/download/
 gplv3-status: ok
-activity-status: ok 20111102 (0.1.0)
+activity-status: ok 20111212 (0.2.0)
 
 package: melting
 logo: http://www.ebi.ac.uk/~lenov/GRAPHICS/meltingtop.jpg
@@ -2169,10 +2168,11 @@
 doc-category: Web
 doc-summary: Cross between HTML and Lisp, specifically designed for Web servers
 doc-url: http://metahtml.sourceforge.net/documentation/
-gplv3-status: not-done-since-stale
-activity-status: stale 20010306 (6.11.01)
+gplv3-status: not-done-since-nomaint
+activity-status: nomaint 20010306 (6.11.01)
 note: metahtml.org domain expired
 note: used by gcc, Gerald Pfeifer has patches
+note: bfox has some updates to sources on sf, but has not sent
 last-contact: 27nov11 gerald wrote, 19jan11 replied, 23feb10 replied
 
 package: mifluz
@@ -2540,7 +2540,7 @@
 doc-url: /software/pth/pth-manual.html
 gplv3-status: next-release rms
 activity-status: stale 20060608 (2.0.7)
-last-contact: 10mar11 wrote (rms gave up), 4jan11 to rms, 12mar10 wrote, 
1jun09 replied
+last-contact: 3dec11 replied, 10mar11 wrote (rms gave up), 4jan11 to rms, 
12mar10 wrote, 1jun09 replied
 
 package: pythonwebkit
 doc-category: Software
@@ -2718,7 +2718,7 @@
 doc-summary: Secure peer-to-peer VoIP server for the SIP protocol
 doc-url: none
 gplv3-status: done-in-0.1
-activity-status: ok 20111112 (1.1.3)
+activity-status: ok 20111211 (1.1.4)
 
 package: slib
 mundane-name: SLIB
@@ -2847,8 +2847,7 @@
 doc-summary: Managing installed software packages
 doc-url: /software/stow/manual.html
 gplv3-status: not-done-since-stale
-activity-status: stale 20020107 (1.3.3)
-last-contact: 6feb11 wrote, 24mar10 replied
+activity-status: ok 20111211 (2.1.2)
 
 package: stump
 logo: http://www.algebra.com/~ichudov/images/active/stump.jpg
@@ -3181,7 +3180,7 @@
 doc-summary: Record, replay and distribute user actions under X11
 doc-url: http://www.sandklef.com/xnee/?q=node&q=documents
 gplv3-status: done-in-directory
-activity-status: ok 20110809 (3.10)
+activity-status: ok 20111216 (3.11)
 
 package: xorriso
 doc-category: Audio

Index: gm-read.pl
===================================================================
RCS file: gm-read.pl
diff -N gm-read.pl
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ gm-read.pl  21 Dec 2011 21:01:00 -0000      1.1
@@ -0,0 +1,625 @@
+# $Id: gm-read.pl,v 1.1 2011/12/21 21:01:00 karl Exp $
+# Subroutines for gm script that read various external data file.
+# (In this particular case, using require seemed better than setting up
+# modules.  Certainly simpler.)
+# 
+# Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation Inc.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Originally written by Karl Berry.
+
+
+
+# Read $COPYRIGHT_LIST_FILE.  If HOW is "by-line", return hash with the
+# keys being package names and the values their line numbers in the
+# file.  If "by-year", return hash with keys as the assignment years and
+# the values another hash mapping packages to names.
+# 
+sub read_copyright_list {
+  my ($how) = @_;
+  my %ret;
+  
+  if ($how !~ /^by-(year|line)$/) {
+    die "unknown argument $how to read_copyright_list";
+  }
+
+  open (COPYRIGHT_LIST_FILE) || die "open($COPYRIGHT_LIST_FILE) failed: $!";
+  while (<COPYRIGHT_LIST_FILE>) {
+    # Look at lines following a blank line.
+    next unless /^\s*$/;
+    $_ = <COPYRIGHT_LIST_FILE>; # go past blank line.
+    chomp;
+    
+    # Drop everything after the first tab, we don't want to see all-caps
+    # company names.
+    s/\t.*//;
+    
+    # Sometimes commas are used to separate package names.  Just replace
+    # them with spaces as a small simplification.
+    s/,/ /g;
+    
+    # Split remainder into words at whitespace.
+    my @words = split (" ");
+    
+    my $year = -1;
+    my @line_pkgs = (); # if by-year
+    for my $w (@words) {
+      # the word has to start with an uppercase letter or number,
+      # and be followed only by possible constituent characters,
+      # or we're done with this line.
+      last unless $w =~ /^[A-Z0-9][A-Z0-9._-]+$/;
+      
+      # and if it is only digits and -, that's a date.
+      # remember the year if we need it, and we're done with this line.
+      if ($w =~ /^\d+-[-\d]+$/) {
+        if ($how eq "by-year") {
+          $year = substr ($w, 0, 4);
+          if ($year < 1980 || $year > 2222) {
+            warn "$COPYRIGHT_LIST_FILE:$.: strange year: $year\n";
+            next;
+          }
+        }
+        last;
+      }
+      
+      # keyword ANY in copyright.list is not a package name for us, etc.
+      next if $w =~ /^(ANY|UNUSED|SPECIAL|TRANSLATIONS)$/;
+      next if $w =~ /^(CCLRC|CNOC|ET|INSIGHT|L3|LINBIT|WCT|FOO)$/;
+      
+      $w = lc ($w);
+      $canonical_pkg_name = &canonicalize_pkg_name ($w);
+      
+      # if by line, we can just save this package in the return hash.
+      # if by year, have to accumulate all the info on this line.
+      if ($how eq "by-year") {
+        push (@line_pkgs, $canonical_pkg_name);
+      } elsif ($how eq "by-line") {
+        $ret{$canonical_pkg_name} = $.;
+      }
+    }
+  }
+  close (COPYRIGHT_LIST_FILE) || warn "close($COPYRIGHT_LIST_FILE) failed: $!";
+  
+  $ret{"gnustandards"} = 1; # no papers, but is copyright FSF
+  $ret{"goodbye"} = $ret{"network"} = 1; # mattl
+  $ret{"libtasn1"} = 1;     # split off from gnutls, so no separate papers
+  $ret{"lispintro"} = 1;    # no papers, but is copyright FSF
+  $ret{"mig"} = 1;          # part of hurd
+  $ret{"trans-coord"} = 1;  # container
+  
+  return %ret;
+
+  
+  # lots of names in copyright.list don't match current package
+  # identifiers, for whatever reason.
+  #
+  sub canonicalize_pkg_name {
+    my ($w) = @_;
+    
+    $w =~ s/_manual//;  # manuals are not separate packages for us.
+
+    %map = (
+      "dotgnu"         => "dotgnu-pnet", # make container?
+      "enterprise"     => "gnue",
+      "getopt"         => "libc",
+      "glibc"          => "libc",
+      "glob"           => "libc",
+      "gnm"            => "binutils",
+      "gnm960"         => "binutils",
+      "gnu.regexp"     => "libc",
+      "gnu-c"          => "gcc",
+      "gnuchess"       => "chess",
+      "gnugsl"         => "gsl",
+      "gnupascal"      => "pascal",
+      "gnus"           => "emacs",
+      "gnuucp"         => "uucp",
+      "gpc"            => "pascal",
+      "gprof"          => "binutils",
+      "graphics"       => "plotutils",
+      "grub2"          => "grub",
+      "gsize"          => "binutils",
+      "gsize960"       => "binutils",
+      "gstrip"         => "binutils",
+      "gstrip960"      => "binutils",
+      "hashcash.el"    => "emacs",
+      "info"           => "texinfo",
+      "interactive"    => "gnuit",
+      "ld"             => "binutils",
+      "libavl"         => "avl",
+      "libbfd.a"       => "bfd",
+      "libgcj"         => "gcc",
+      "libgen.a"       => "libc",
+      "libgsasl"       => "gsasl",
+      "libutf8"                => "libunistring",
+      "mach"           => "gnumach",
+      "makeinfo"       => "texinfo",
+      "malloc"         => "libc",
+      "mcount.c"       => "libc",
+      "memcmp"         => "libc",
+      "memcpy"         => "libc",
+      "memset"         => "libc",
+      "midnight"       => "mc",
+      "midnight_commander" => "mc",
+      "mp"             => "gmp",
+      "muse"           => "emacs-muse",
+      "objc"           => "gcc",
+      "obstack"                => "libc",
+      "pfe"            => "gforth",
+      "pnet"           => "dotgnu-pnet",
+      "portable.net"   => "dotgnu-pnet",
+      "ps"             => "sysutils",
+      "ptx"            => "coreutils",
+      "queue"          => "gnu-queue",
+      "radio"          => "gnuradio",
+      "readlink"       => "coreutils",
+      "regcmp"         => "libc",
+      "regex"          => "libc",
+      "regexp"         => "regex",
+      "robotussin"     => "binutils",
+      "sasl"           => "gsasl",
+      "shogi"          => "gnushogi",
+      "shred"          => "coreutils",
+      "snprintfv"      => "libc",
+      "sql.el"         => "emacs",
+      "strchr"         => "libc",
+      "strftime"       => "libc",
+      "strtod"         => "libc",
+      "superoptimizer" => "superopt",
+      "texi2html"      => "texinfo",
+      "verilog-mode.el"        => "emacs",
+      "winboard"       => "xboard",
+    );
+    return $map{$w} || $w;
+  }
+}
+
+
+
+# Read $COPYRIGHT_PAPERS_FILE, generated with
+#   cd /srv/data/copyright-mirror && find -type f | sort
+#
+# Return hash with the keys being years and the values another hash
+# (reference): this one with keys being the package names and the values
+# yet a third hash (reference): this one just last names for keys, to
+# easily avoid muliple entries.  This is the information given in the
+# filenames.
+# 
+sub read_copyright_papers {
+  my %ret;
+  
+  open (COPYRIGHT_PAPERS_FILE) ||die "open($COPYRIGHT_PAPERS_FILE) failed: $!";
+  while (<COPYRIGHT_PAPERS_FILE>) {
+    # Examples: ./1985/Curry.grep.pdf  ./1985/Robinson.emacs.1.pdf
+    chomp;
+    my (undef,$year,$file) = split ("/");
+    next if $file eq "";   # ignore top-level files
+    next if $file =~ /#/;  # ignore autosave files.
+    next if $file =~ /~$/; # ignore backup files.
+    
+    if ($year < 1980 || $year > 2222) {
+      warn "$COPYRIGHT_PAPERS_FILE:$.: strange year: $year\n";
+      next;
+    }
+    
+    $file =~ s/\.pdf$//;  # remove trailing .pdf
+    
+    my ($name,$package) = split (/\./, $file);
+    if ($name eq "") {
+      warn ("$COPYRIGHT_PAPERS_FILE:$.: empty name in $_\n");
+      next;
+    }
+
+    # many filenames lack package names; try to match up last names.
+    $package = ".nameonly" if $package eq "";
+    $ret{$year}->{$package}->{$name}++;
+  }
+    
+  close (COPYRIGHT_PAPERS_FILE)
+  || warn "close($COPYRIGHT_PAPERS_FILE) failed: $!";
+  return %ret;
+}
+
+
+
+# Read doc-categories.txt file for info about SHORT_CAT, and return a
+# reference to a two-element array.  The first element in the array is
+# the long category name; the second element is a url to the category in
+# the Free Software Directory.
+# 
+# If we ever need a third piece of information, should probably switch
+# to a hash for the values.
+# 
+sub read_doc_categories {
+  my ($short_cat) = @_;
+  
+  if (keys %doc_category == 0) {
+    open (DOC_CATEGORIES_FILE) || die "open($DOC_CATEGORIES_FILE) failed: $!";
+  
+    while (<DOC_CATEGORIES_FILE>) {
+      next if /^\s*#/;  # ignore comments
+      next if /^\s*$/;  # ignore blank lines.
+      chomp;
+    
+      my ($short,$fsd,$full) = split (" ", $_, 3);
+      my $ret_full = $full || $short;
+      my $ret_url = $fsd eq "-"
+                    ? "" : "http://directory.fsf.org/category/$fsd/";;
+      $doc_category{$short} = [ $ret_full, $ret_url ];
+           
+    }
+
+    close (DOC_CATEGORIES_FILE)
+    || warn "close($DOC_CATEGORIES_FILE) failed: $!";
+  }
+  
+  # now we have the hash, so look up SHORT_CAT.
+  my $ret;
+  if (exists ($doc_category{$short_cat})) {
+    $ret = $doc_category{$short_cat};
+  } else {
+    warn "$DOC_CATEGORIES_FILE: no short category name $short_cat\n";
+    $ret = ["no long name for $short_cat", "no url for $short_cat"];
+  }
+  
+  return $ret;
+}
+
+
+
+# Read an rsync listing of ftp.gnu.org, with entries like this:
+# 
+# drwxr-xr-x        4096 2004/01/16 12:20:08 gnu/3dldf
+# lrwxrwxrwx           5 2010/12/29 13:30:03 gnu/libc -> glibc
+# 
+# Return list of directories and symlinks under gnu/.
+#
+sub read_ftplisting {
+  my @ret;
+
+  open (FTPLISTING_FILE) || die "open($FTPLISTING_FILE) failed: $!";
+  my %keys;
+  while (<FTPLISTING_FILE>) {
+    chomp;
+    next unless /^[dl].*[0-9] gnu[^+]/;  # the 0-9 is the seconds
+    my $orig = $_;
+    s,^.* gnu/,,; # rm through the gnu/
+    s,[ /].*$,,;  # rm all following components or symlink target
+    push (@ret, $_) unless exists $keys{$_};
+    $keys{$_} = 1;
+    warn "keeping $_ from $orig\n" if /^drw/;
+  }
+  close (FTPLISTING_FILE) || warn "close($FTPLISTING_FILE) failed: $!";
+  
+  return @ret;
+}
+
+
+
+# Read the gnupackages.txt file, return a hash of information, where
+# the keys are package names and the values are hash references with the
+# information.  If a key is given more than once (e.g., note), the
+# values are separated by |.  A key "lineno" is synthesized with the
+# line number of the blank line following the package.
+# 
+sub read_gnupackages {
+  my %ret;
+ 
+  open (GNUPACKAGES_FILE) || die "open($GNUPACKAGES_FILE) failed: $!";
+  
+  my %pkg;
+  while (<GNUPACKAGES_FILE>) {
+    next if /^#/;  # ignore comments
+    s/ +$//; # remove trailing spaces
+    chomp;
+    
+    # at a blank line, save the info we've accumulated, if any.
+    if (/^$/) {
+      next unless keys %pkg;
+      
+      if (exists $pkg{"package"}) {
+        my %copy = %pkg;  # do not save a pointer to what will be overwritten
+        $copy{"lineno"} = $. - 1;  # save line number
+        $ret{$pkg{"package"}} = \%copy;
+      } else {
+        warn "$GNUPACKAGES_FILE:$.: no package name for block ending here\n";
+      }
+      undef %pkg;  # clear out next
+      next;
+    }
+
+    # key is everything before the first colon.
+    # value is everything after the first colon and whitespace.
+    my ($key,$val) = split (/:\s*/, $_, 2);
+    if ($key eq $_) {
+      warn "$GNUPACKAGES_FILE:$.: no colon in line\n";
+    }
+    
+    if ($key eq "package" && $val =~ /[A-Z]/) {
+      warn "$GNUPACKAGES_FILE:$.: forcing package name to lowercase\n";
+      $val = lc ($val);
+    }
+    
+    # if key already exists, use | to separate values.
+    $val = "$pkg{$key}|$val" if exists $pkg{$key};
+    
+    $pkg{$key} = $val;
+  }
+  
+  close (GNUPACKAGES_FILE) || warn "close($GNUPACKAGES_FILE) failed: $!";
+  
+  return %ret;
+}
+
+
+
+# Read htmlxref.cnf file for entries relating to PKGNAME.
+# Return a hash where the keys are the manual identifiers and the values
+# are the urls.
+# 
+# See the HTML Xref Configuration node in the Texinfo manual.
+# The file is maintained in the util subdirectory of the Texinfo sources.
+# 
+sub read_htmlxref {
+  my ($pkgname) = @_;
+  my %ret;
+    
+  open (HTMLXREF_FILE) || die "open($HTMLXREF_FILE) failed: $!";
+
+  my %variables;
+  my %ret_type;  # record preferred xref type so far, for each manual
+  while (<HTMLXREF_FILE>) {
+    next if /^\s*#/;  # ignore comments
+    next if /^\s*$/;  # ignore blank lines.
+    chomp;
+    
+    # handle variable definitions:
+    if (/^\s*(\w+)\s*=\s*(.*)\s*$/) {
+      my ($var,$val) = ($1,$2);
+      $variables{$var} = &expand_variables ($val);
+
+    # look for manual entries relating to PKGNAME:
+    } elsif (/^\s*(\S+)\s+(\w+)\s+(.*)\s*$/) {
+      my ($manual,$type,$url) = ($1,$2,$3);
+
+      # The manual name may be exactly the package name, but it may not
+      # be, yet still part of the package.  To detect this, we see if
+      # the entry uses the package name as a variable in the url.  (This
+      # seemed the only other way we could extract the relevant manuals,
+      # barring adding another field, which seemed redundant.)  Such a
+      # variable name is always in all-uppercase, with - changed to _.
+      # 
+      (my $pkgname_as_var = $pkgname) =~ tr/a-z-/A-Z_/;
+      if ($manual eq $pkgname || $url =~ /\$\{$pkgname_as_var\}/) {
+        if (! exists $ret_type{$manual}
+            || &prefer_xref_type ($type, $ret_type{$manual})) {
+          $ret{$manual} = &expand_variables ($url);
+          $ret_type{$manual} = $type;
+        }
+      }
+      
+    } else {
+      warn "$HTMLXREF_FILE:$.: unexpected line: $_\n";
+    }
+  }
+  
+  close (HTMLXREF_FILE) || warn "close($HTMLXREF_FILE) failed: $!";
+
+  return %ret;
+
+  # using the %variables hash, expand ${varname} constructs until none
+  # remain.  If a variable isn't defined, just replace it with the empty
+  # string.  Would be better to give a warning, but let's not bother.
+  sub expand_variables {
+    my ($val) = @_;
+    $val =~ s/\$\{(\w+)\}/$variables{$1}/eg  # expand variables
+      until $val !~ /\$\{(\w+)\}/;           # until no more
+    return $val;
+  }
+
+  # If TYPE1 is preferred to TYPE2, return 1, else 0.
+  # When a given manual is available in multiple formats, we prefer the
+  # "smallest" one (mono < chapter < section < node), since packages with
+  # multiple manuals tend to be large, and in any event, we usually have a
+  # generic url (/software/pkgname/manual/) linking to all available forms.
+  sub prefer_xref_type {
+    my ($type1,$type2) = @_;
+    my %xref_types = (
+      "node"    => 10,
+      "section" => 20,
+      "chapter" => 30,
+      "mono"    => 40,
+    );
+    
+    if (! exists $xref_types{$type1}) {
+      warn "$HTMLXREF_FILE:$.: unexpected xref type: $type1\n";
+    }
+    if (! exists $xref_types{$type2}) {
+      warn "$HTMLXREF_FILE:$.: unexpected xref type: $type2\n";
+    }
+    
+    return $xref_types{$type1} < $xref_types{$type2};
+  }
+}
+
+
+
+# Read $MAINTAINERS_FILE according to $HOW, either "by-package" or
+# "by-maintainer" We return a hash.  With "by-package", the keys are
+# package names and the values are a list of maintainer hash references.
+# With "by-maintainer", the keys are maintainer (real) names and the
+# values are hash references with their information.
+# 
+# Special maintainer keys we synthesize, that are not in the actual
+# maintainers file:
+# is_generic - whether it is an actual person or a generic address;
+# best_email - uses privateemail where present, in preference to email;
+# lineno - location in the file.
+# 
+sub read_maintainers {
+  my ($how) = @_;
+  my %ret;
+  
+  open (MAINTAINERS_FILE) || die "open($MAINTAINERS_FILE) failed: $!";
+
+  # ignore first part of maintainers, through the first form feed.
+  while (<MAINTAINERS_FILE>) {
+    chomp;
+    last if /^\f$/;
+  }
+  
+  # read the real information.
+  my %maint;  # info we are accumulating for one maintainer
+  while (<MAINTAINERS_FILE>) {
+    next if /^#/;  # ignore comments
+    chomp;
+    
+    # at a blank line, save the maintainer info we've accumulated, if any.
+    if (/^\s*$/) {
+      &debug_hash ($., %maint);
+      next unless keys %maint;
+      
+      # record whether this is a generic maintainer (starts with lowercase):
+      $maint{"is_generic"} = $maint{"name"} =~ m/^[a-z]/;
+      
+      # record best email to use for the maintainer.
+      $maint{"best_email"} = $maint{"privateemail"} || $maint{"email"};
+      
+      # record where we found it, more or less.  We are past the blank
+      # line, and we can assume every entry has at least two lines.
+      $maint{"lineno"} = $. - 4;
+      
+      if ($how eq "by-package") {
+        # split apart the package value we've accumulated..
+        if (exists $maint{"package"}) {
+          my @pkgs = split (/\|/, $maint{"package"});
+
+          # append this maintainer to the list for each of his/her packages.
+          my %copy = %maint;
+          for my $p (@pkgs) {
+            my @x = exists $ret{$p} ? @{$ret{$p}} : ();
+            push (@x, \%copy);
+            $ret{$p} = address@hidden;
+          }
+        } else {
+          warn "no packages for $maint{name}";
+        }
+
+      } elsif ($how eq "by-maintainer") {
+        if (! exists $maint{"name"}) {
+          warn "no name for maintainer";
+          next;
+        }
+        my $name = $maint{"name"};
+        if (exists $ret{$name}) {
+          warn "ignoring second entry for maintainer $name";
+          next;
+        }
+        my %copy = %maint;
+        $ret{$name} = \%copy;
+
+      } else {
+        die "can't read_maintainers($how)";
+      }
+
+      undef %maint;  # clear out for next maintainer.
+
+      last if /^\f$/;  # form feed marks end of info.
+      next;
+    }
+
+    # key is everything before the first colon.
+    # value is everything after the first colon and whitespace.
+    my ($key,$val) = split (/:\s*/, $_, 2);
+    
+    # if key already exists, use | to separate values.
+    $val = "$maint{$key}|$val" if exists $maint{$key};
+    
+    # todo: parse key of address+ and append.
+    $maint{$key} = $val;
+  }
+  
+  # skip the rest.
+  close (MAINTAINERS_FILE) || warn "close($MAINTAINERS_FILE) failed: $!";
+  
+  return %ret;
+}
+
+
+
+# Return list of entries in $OLDPACKAGES_FILE -- one per line, ignoring
+# comments starting with # and blank lines.
+# 
+sub read_oldpackages {
+  local $GNUPACKAGES_FILE = $OLDPACKAGES_FILE;
+  my %ret = &read_gnupackages ();    # reuse routine via dynamic scoping
+  return %ret;
+}  
+
+
+
+# Return list of entries in $RECENTREL_FILE -- one per line, ignoring
+# comments starting with # and blank lines.
+# 
+sub read_recentrel {
+  my @ret = ();
+  
+  open (RECENTREL_FILE) || die "open($RECENTREL_FILE) failed: $!";
+  while (<RECENTREL_FILE>) {
+    next if /^\s*#/;  # ignore comments
+    next if /^\s*$/;  # ignore blank lines.
+    chomp;
+    push (@ret, $_);
+  }
+  close (RECENTREL_FILE) || warn "close($RECENTREL_FILE) failed: $!";
+ 
+  return @ret;
+}
+
+
+
+# read the savannah groups.tsv file, return a hash of information, where
+# the keys are project identifiers and the values are references to
+# hashes with the information.  A key "lineno" is included in each value.
+# 
+sub read_savannah {
+  my %ret;
+  
+  open (SAVANNAH_FILE) || die "open($SAVANNAH_FILE) failed: $!";
+  <SAVANNAH_FILE>;  # ignore first line (with field names).
+  
+  # We want only the offical GNU packages, which, happily, come first (type=1).
+  while (<SAVANNAH_FILE>) {
+    last if /^2/;  # quit at first non-gnu
+    chomp;
+    my ($type_id,$name,$unix_group_name,$group_name) = split (/\t/);
+
+    my %pkg;
+    $pkg{"name"} = $group_name;
+    $pkg{"lineno"} = $.;
+    
+    if (exists $ret{$unix_group_name}) {
+      warn "$SAVANNAH_FILE:$.: already saw $unix_group_name?\n";
+    } else {
+      $ret{$unix_group_name} = \%pkg;
+    }
+  }
+  
+  close (SAVANNAH_FILE) || warn "close($SAVANNAH_FILE) failed: $!";
+  
+  return %ret;
+}
+
+
+1;

Index: gm-util.pl
===================================================================
RCS file: gm-util.pl
diff -N gm-util.pl
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ gm-util.pl  21 Dec 2011 21:01:00 -0000      1.1
@@ -0,0 +1,81 @@
+# $Id: gm-util.pl,v 1.1 2011/12/21 21:01:00 karl Exp $
+# Utilities for the gm script.
+# (In this particular case, using require seemed better than setting up
+# modules.  Certainly simpler.)
+# 
+# Copyright 2007, 2008, 2009, 2010, 2011 Free Software Foundation Inc.
+# 
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or (at
+# your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Originally written by Karl Berry.
+
+
+
+# Since we need this in more than one place.
+#
+sub skip_pkg_p {
+  my ($pkgname) = @_;
+  return $pkgname =~ / /            # gimp www pages, flex manual
+         || $pkgname =~ /^gg-/      # groups
+         || $pkgname =~ /^(sovix|cfengine)$/  # not really ours
+         || $pkgname =~ /\.nongnu/; # specialness, see maintainers
+}
+
+
+# avoid repeating the field widths.
+# 
+sub gnupkgs_msg {
+  my ($msg, %p) = @_;
+  warn "gnupkgs_msg: no lineno/package elements in %p hash"
+    unless $p{"lineno"} && $p{"package"};
+  return sprintf ("$GNUPACKAGES_FILE:%4d:%-16s $msg",
+                  $p{"lineno"}, $p{"package"});
+}
+
+
+# return auto-generation notice to include in output files.
+# 
+sub generated_by_us {
+  chomp (my $date = `date`);
+  (my $us = $0) =~ s,^\./,,;
+  return "generated by womb/gnumaint/$us $date";
+}
+
+
+# print arg on stderr.
+sub debug { warn "$_[0]\n" if $DEBUG; }
+
+
+# Log LABEL followed by hash elements, all on one line.
+# 
+sub debug_hash {
+  return unless $DEBUG;
+  my ($label) = shift;
+  my (%hash) = (ref $_[0] && $_[0] =~ /.*HASH.*/) ? %{$_[0]} : @_;
+
+  my $str = "$label: {";
+  my @items = ();
+  for my $key (sort keys %hash) {
+    my $val = $hash{$key};
+    $key =~ s/\n/\\n/g;
+    $val =~ s/\n/\\n/g;
+    push (@items, "$key:$val");
+  }
+  $str .= join (",", @items);
+  $str .= "}";
+
+  warn "$str\n";
+}
+
+1;
[Prev in Thread]
Current Thread
[Next in Thread]
[commit-womb] gnumaint Makefile gm gnupackages.txt gm-read.pl..., Karl Berry <=
Prev by Date: [commit-womb] gnumaint gm
Next by Date: [commit-womb] gnumaint gnupackages.txt
Previous by thread: [commit-womb] gnumaint gm
Next by thread: [commit-womb] gnumaint gnupackages.txt
Index(es):
- Date
- Thread