[Koha-cvs] CVS: koha/C4 SimpleMarc.pm,1.4,1.5

koha-cvs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Koha-cvs] CVS: koha/C4 SimpleMarc.pm,1.4,1.5

From:	Andrew Arensburger
Subject:	[Koha-cvs] CVS: koha/C4 SimpleMarc.pm,1.4,1.5
Date:	Sun, 06 Oct 2002 17:51:24 -0700
Update of /cvsroot/koha/koha/C4
In directory usw-pr-cvs1:/tmp/cvs-serv14456

Modified Files:
        SimpleMarc.pm 
Log Message:
Added POD and some comments.


Index: SimpleMarc.pm
===================================================================
RCS file: /cvsroot/koha/koha/C4/SimpleMarc.pm,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** SimpleMarc.pm       5 Oct 2002 09:53:11 -0000       1.4
--- SimpleMarc.pm       7 Oct 2002 00:51:22 -0000       1.5
***************
*** 43,46 ****
--- 43,64 ----
  $VERSION = 0.01;
  
+ =head1 NAME
+ 
+ C4::SimpleMarc - Functions for parsing MARC records and files
+ 
+ =head1 SYNOPSIS
+ 
+   use C4::SimpleMarc;
+ 
+ =head1 DESCRIPTION
+ 
+ This module provides functions for parsing MARC records and files.
+ 
+ =head1 FUNCTIONS
+ 
+ =over 2
+ 
+ =cut
+ 
  @ISA = qw(Exporter);
  @EXPORT = qw(
***************
*** 56,59 ****
--- 74,80 ----
  # as well as any optionally exported functions
  
+ # FIXME - %tagtext and %tagmap are in both @EXPORT and @EXPORT_OK.
+ # They should be in one or the other, but not both (though preferably,
+ # things shouldn't get exported in the first place).
  @EXPORT_OK   = qw(
        %tagtext
***************
*** 92,95 ****
--- 113,117 ----
  # Constants
  
+ # %tagtext maps MARC tags to descriptive names.
  my %tagtext = (
      'LDR' => 'Leader',
***************
*** 156,159 ****
--- 178,196 ----
  
  # tag, subfield, field name, repeats, striptrailingchars
+ # FIXME - What is this? Can it be explained without a semester-long
+ # course in MARC?
+ 
+ # XXX - Maps MARC (field, subfield) tuples to Koha database field
+ # names (presumably in 'biblioitems'). $tagmap{$field}->{$subfield} is
+ # an anonymous hash of the form
+ #     {
+ #             name    => "title",     # Name of Koha field
+ #             rpt     => 0,           # I don't know what this is, but
+ #                                     # it's not used.
+ #             striptrail => ',:;/-',  # Lists the set of characters that
+ #                                     # should be stripped from the end
+ #                                     # of the MARC field.
+ #     }
+ 
  my %tagmap=(
      '010'=>{'a'=>{name=> 'lccn',      rpt=>0, striptrail=>' '         }},
***************
*** 182,185 ****
--- 219,241 ----
  
  #------------------
+ 
+ =item extractmarcfields
+ 
+   $biblioitem = &extractmarcfields($marc_record);
+ 
+ C<$marc_record> is a reference-to-array representing a MARC record;
+ each element is a reference-to-hash specifying a MARC field (possibly
+ with subfields).
+ 
+ C<&extractmarcfields> translates C<$marc_record> into a Koha
+ biblioitem. C<$biblioitem> is a reference-to-hash whose keys are named
+ after fields in the biblioitems table of the Koha database.
+ 
+ =cut
+ #'
+ # FIXME - Throughout:
+ #     $foo->{bar}->[baz]->{quux}
+ # can be rewritten as
+ #     $foo->{bar}[baz]{quux}
  sub extractmarcfields {
      use strict;
***************
*** 218,223 ****
--- 274,287 ----
  
            # Check each subfield in field
+           # FIXME - Would this code be more readable with
+           #   while (($subfieldname, $subfield) = each %{$field->{subfields}})
+           # ?
            foreach $subfield ( keys %{$field->{subfields}} ) {
                # see if it is defined in our Marc to koha mapping table
+               # FIXME - This if-clause takes up the entire loop.
+               # This would be better rewritten as
+               #       next unless defined($tagmap{...});
+               # Then the body of the loop doesn't have to be
+               # indented as much.
                if ( $fieldname=$tagmap{ $field->{'tag'} }->{$subfield}->{name} 
) {
                    # Yes, so keep the value
***************
*** 231,234 ****
--- 295,300 ----
                    # see if this field should have trailing chars dropped
                    if ($strip=$tagmap{ $field->{'tag'} 
}->{$subfield}->{striptrail} ) {
+                       # FIXME - The next three lines can be rewritten as:
+                       #       $bib =~ s/[\Q$strip\E]+$//;
                        $strip=~s//\\/; # backquote each char
                        $stripregex='[ ' . $strip . ']+$';  # remove trailing 
spaces also
***************
*** 243,251 ****
            } # foreach subfield
  
! 
            if ($field->{'tag'} eq '001') {
                $bib->{controlnumber}=$field->{'indicator'};
            }
            if ($field->{'tag'} eq '015') {
                $bib->{lccn}=$field->{'subfields'}->{'a'};
                $bib->{lccn}=~s/^\s*//;
--- 309,321 ----
            } # foreach subfield
  
!           # Handle special fields and tags
            if ($field->{'tag'} eq '001') {
                $bib->{controlnumber}=$field->{'indicator'};
            }
            if ($field->{'tag'} eq '015') {
+               # FIXME - I think this can be rewritten as
+               #       $field->{"subfields"}{"a"} =~ /^\s*C?(\S+)/ and
+               #               $bib->{"lccn"} = $1;
+               # This might break with invalid input, though.
                $bib->{lccn}=$field->{'subfields'}->{'a'};
                $bib->{lccn}=~s/^\s*//;
***************
*** 255,261 ****
--- 325,333 ----
  
  
+               # FIXME - Fix indentation
                if ($field->{'tag'} eq '260') {
  
                    $publicationyear=$field->{'subfields'}->{'c'};
+                   # FIXME - "\d\d\d\d" can be rewritten as "\d{4}"
                    if ($publicationyear=~/c(\d\d\d\d)/) {
                        $copyrightdate=$1;
***************
*** 283,291 ****
                }
                if ($field->{'tag'} =~/65\d/) {
!                   my $sub;
                    my $subject=$field->{'subfields'}->{'a'};
                    $subject=~s/\.$//;
                    print "Subject=$subject\n" if $debug;
                    foreach $subjectsubfield ( 'x','y','z' ) {
                      if 
($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
                        if ( ref($subdivision) eq 'ARRAY' ) {
--- 355,368 ----
                }
                if ($field->{'tag'} =~/65\d/) {
!                   my $sub;    # FIXME - Never used
                    my $subject=$field->{'subfields'}->{'a'};
                    $subject=~s/\.$//;
                    print "Subject=$subject\n" if $debug;
                    foreach $subjectsubfield ( 'x','y','z' ) {
+                     # FIXME - $subdivision is only used in this
+                     # loop. Make it 'my' here, rather than in the
+                     # entire function.
+                     # Ditto $subjectsubfield. Make it 'my' in the
+                     # 'foreach' statement.
                      if 
($subdivision=$field->{'subfields'}->{$subjectsubfield}) {
                        if ( ref($subdivision) eq 'ARRAY' ) {
***************
*** 306,309 ****
--- 383,388 ----
  
          } # foreach field
+         # FIXME - Why not do this up in the "Handle special fields and
+         # tags" section?
        ($publicationyear       ) && ($bib->{publicationyear}=$publicationyear  
);
        ($copyrightdate         ) && ($bib->{copyrightdate}=$copyrightdate  );
***************
*** 312,319 ****
--- 391,406 ----
        ($notes                 ) && ($bib->{notes}=$notes  );
        ($#subjects             ) && ($bib->address@hidden  );
+               # FIXME - This doesn't look right: for an array with
+               # one element, $#subjects == 0, which is false. For an
+               # array with 0 elements, $#subjects == -1, which is
+               # true.
  
        # Misc cleanup
        if ($bib->{dewey}) {
            $bib->{dewey}=~s/\///g;     # drop any slashes
+                                       # FIXME - Why? Don't the
+                                       # slashes mean something?
+                                       # The Dewey code is NOT a number,
+                                       # it's a string.
        }
  
***************
*** 324,327 ****
--- 411,417 ----
        if ( $bib->{isbn} ) {
            $bib->{isbn}=~s/[^\d]*//g;  # drop non-digits
+                       # FIXME - "[^\d]" can be rewritten as "\D"
+                       # FIXME - Does this include the check digit? If so,
+                       # it might be "X".                      
        };
  
***************
*** 342,345 ****
--- 432,443 ----
  
      } else {
+       # FIXME - Style: this sort of error-checking should really go
+       # closer to the actual test, e.g.:
+       #       if (ref($record) ne "ARRAY")
+       #       {
+       #               die "Not an array!"
+       #       }
+       # then the rest of the code which follows can assume that the
+       # input is good, and you don't have to indent as much.
        print "Error: extractmarcfields: input ref $record is " .
                ref($record) . " not ARRAY. Contact sysadmin.\n";
***************
*** 353,358 ****
--- 451,475 ----
  
  #--------------------------
+ 
+ =item parsemarcfileformat
+ 
+   @records = &parsemarcfileformat($marc_data);
+ 
+ Parses the contents of a MARC file.
+ 
+ C<$marc_data> is a string, the contents of a MARC file.
+ C<&parsemarcfileformat> parses this string into individual MARC
+ records and returns them.
+ 
+ C<@records> is an array of references-to-hash. Each element is a MARC
+ record; its keys are the MARC tags.
+ 
+ =cut
+ #'
  # Parse MARC data in file format with control-character separators
  #   May be multiple records.
+ # FIXME - Is the input ever likely to be more than a few Kb? If so, it
+ # might be worth changing this function to take a (read-only)
+ # reference-to-string, to avoid unnecessary copying.
  sub parsemarcfileformat {
      use strict;
***************
*** 362,368 ****
      my @records;
  
!     my $splitchar=chr(29);
!     my $splitchar2=chr(30);
!     my $splitchar3=chr(31);
      my $debug=0;
      my $record;
--- 479,485 ----
      my @records;
  
!     my $splitchar=chr(29);    # \c]
!     my $splitchar2=chr(30);   # \c^
!     my $splitchar3=chr(31);   # \c_
      my $debug=0;
      my $record;
***************
*** 456,459 ****
--- 573,597 ----
  
  #----------------------------------------------
+ 
+ =item taglabel
+ 
+   $label = &taglabel($tag);
+ 
+ Converts a MARC tag (a three-digit number, or "LDR") and returns a
+ descriptive label.
+ 
+ Note that although the tag looks like a number, it is treated here as
+ a string. Be sure to use
+ 
+     $label = &taglabel("082");
+ 
+ and not
+ 
+     $label = &taglabel(082);  # <-- Invalid octal number!
+ 
+ =cut
+ #'
+ # FIXME - Does this function mean that %tagtext doesn't need to be
+ # exported?
  sub taglabel {
      my ($tag)address@hidden;
***************
*** 463,468 ****
--- 601,611 ----
  } # sub taglabel
  
+ 1;
+ 
  #---------------------------------------------
  # $Log$
+ # Revision 1.5  2002/10/07 00:51:22  arensb
+ # Added POD and some comments.
+ #
  # Revision 1.4  2002/10/05 09:53:11  arensb
  # Merged with arensb-context branch: use C4::Context->dbh instead of
***************
*** 490,491 ****
--- 633,642 ----
  # Moved acqui.simple MARC handling to new module SimpleMarc.pm
  #
+ __END__
+ =back
+ 
+ =head1 AUTHOR
+ 
+ Koha Developement team <address@hidden>
+ 
+ =cut
[Prev in Thread]
Current Thread
[Next in Thread]
[Koha-cvs] CVS: koha/C4 SimpleMarc.pm,1.4,1.5, Andrew Arensburger <=
Prev by Date: [Koha-cvs] CVS: koha/C4 Security.pm,1.4,1.5
Next by Date: [Koha-cvs] CVS: koha/C4/Circulation Borrissues.pm,1.3,1.4
Previous by thread: [Koha-cvs] CVS: koha/C4 Security.pm,1.4,1.5
Next by thread: [Koha-cvs] CVS: koha/C4/Circulation Borrissues.pm,1.3,1.4
Index(es):
- Date
- Thread