koha-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Koha-cvs] CVS: koha/misc bulkmarcimport.pl,NONE,1.1


From: Paul POULAIN
Subject: [Koha-cvs] CVS: koha/misc bulkmarcimport.pl,NONE,1.1
Date: Fri, 22 Nov 2002 02:20:42 -0800

Update of /cvsroot/koha/koha/misc
In directory sc8-pr-cvs1:/tmp/cvs-serv11372

Added Files:
        bulkmarcimport.pl 
Log Message:
moving non koha-running files to misc dir

--- NEW FILE ---
#!/usr/bin/perl
#
# Tool for importing bulk marc records
#
# WARNING!!
#
# Do not use this script on a production system, it is still in development
#
#





# Copyright 2000-2002 Katipo Communications
#
# This file is part of Koha.
#
# Koha is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
# Suite 330, Boston, MA  02111-1307 USA

$file=$ARGV[0];

$branchname='MAIN';

unless ($file) {
    print "USAGE: ./bulkmarcimport.pl filename\n";
    exit;
}




my $lc1='#dddddd';
my $lc2='#ddaaaa';


use C4::Context;
use CGI;
use DBI;
#use strict;
use C4::Catalogue;
use C4::Biblio;
use C4::Output;
my $dbh = C4::Context->dbh;
my $userid=$ENV{'REMOTE_USER'};

# FIXME - Wouldn't it be better to use &C4::SimpleMarc::taglabel
# instead of duplicating this information?
%tagtext = (
    '001' => 'Control number',
    '003' => 'Control number identifier',
    '005' => 'Date and time of latest transaction',
    '006' => 'Fixed-length data elements -- additional material 
characteristics',
    '007' => 'Physical description fixed field',
    '008' => 'Fixed length data elements',
    '010' => 'LCCN',
    '015' => 'LCCN Cdn',
    '020' => 'ISBN',
    '022' => 'ISSN',
    '037' => 'Source of acquisition',
    '040' => 'Cataloging source',
    '041' => 'Language code',
    '043' => 'Geographic area code',
    '050' => 'Library of Congress call number',
    '060' => 'National Library of Medicine call number',
    '082' => 'Dewey decimal call number',
    '100' => 'Main entry -- Personal name',
    '110' => 'Main entry -- Corporate name',
    '130' => 'Main entry -- Uniform title',
    '240' => 'Uniform title',
    '245' => 'Title statement',
    '246' => 'Varying form of title',
    '250' => 'Edition statement',
    '256' => 'Computer file characteristics',
    '260' => 'Publication, distribution, etc.',
    '263' => 'Projected publication date',
    '300' => 'Physical description',
    '306' => 'Playing time',
    '440' => 'Series statement / Added entry -- Title',
    '490' => 'Series statement',
    '500' => 'General note',
    '504' => 'Bibliography, etc. note',
    '505' => 'Formatted contents note',
    '508' => 'Creation/production credits note',
    '510' => 'Citation/references note',
    '511' => 'Participant or performer note',
    '520' => 'Summary, etc. note',
    '521' => 'Target audience note (ie age)',
    '530' => 'Additional physical form available note',
    '538' => 'System details note',
    '586' => 'Awards note',
    '600' => 'Subject added entry -- Personal name',
    '610' => 'Subject added entry -- Corporate name',
    '650' => 'Subject added entry -- Topical term',
    '651' => 'Subject added entry -- Geographic name',
    '656' => 'Index term -- Occupation',
    '700' => 'Added entry -- Personal name',
    '710' => 'Added entry -- Corporate name',
    '730' => 'Added entry -- Uniform title',
    '740' => 'Added entry -- Uncontrolled related/analytical title',
    '800' => 'Series added entry -- Personal name',
    '830' => 'Series added entry -- Uniform title',
    '852' => 'Location',
    '856' => 'Electronic location and access',
);


if ($file) {
    open (F, "$file");
    my $data=<F>;
    close F;
    $splitchar=chr(29);


# Cycle through all of the records in the file


RECORD:
    foreach $record (split(/$splitchar/, $data)) {
        $leader=substr($record,0,24);
        print 
"\n\n---------------------------------------------------------------------------\n";
        print "Leader: $leader\n";
        $record=substr($record,24);
        $splitchar2=chr(30);
        my $directory=0;
        my $tagcounter=0;
        my %tag;
        my @record;
        my %record;
        foreach $field (split(/$splitchar2/, $record)) {
            my %field;
            unless ($directory) {
                # Parse the MARC directory and store the cotents in the %tag 
hash
                $directory=$field;
                my $itemcounter=1;
                $counter=0;
                while ($item=substr($directory,0,12)) { # FIXME - $item never 
used
                    $tag=substr($directory,0,3);
                    $length=substr($directory,3,4);     # FIXME - Unused
                    $start=substr($directory,7,6);      # FIXME - Unused
                    $directory=substr($directory,12);
                    $tag{$counter}=$tag;
                    $counter++;
                }
                $directory=1;
                next;
            }
            $tag=$tag{$tagcounter};
            $tagcounter++;
            $field{'tag'}=$tag;
            printf "%4s %-40s ",$tag, $tagtext{$tag};
            $splitchar3=chr(31);
            my @subfields=split(/$splitchar3/, $field);
            $indicator=$subfields[0];
            $field{'indicator'}=$indicator;
            my $firstline=1;
            if ($#subfields==0) {
                print "$indicator\n";
            } else {
                print "\n";
                my %subfields;
                for ($i=1; $i<=$#subfields; $i++) {
                    my $text=$subfields[$i];
                    my $subfieldcode=substr($text,0,1);
                    my $subfield=substr($text,1);
                    print "   $subfieldcode $subfield\n";
                    if ($subfields{$subfieldcode}) {
                        my $subfieldlist=$subfields{$subfieldcode};
                        my @address@hidden;
                        if ($#subfieldlist>=0) {
                            push (@subfieldlist, $subfield);
                        } else {
                            @subfieldlist=($subfields{$subfieldcode}, 
$subfield);
                        }
                        address@hidden;
                    } else {
                        $subfields{$subfieldcode}=$subfield;
                    }
                }
                $field{'subfields'}=\%subfields;
            }
            if ($record{$tag}) {
                my $fieldlist=$record{$tag};
                if ($fieldlist->{'tag'}) {
                    @fieldlist=($fieldlist, \%field);
                    address@hidden;
                } else {
                    push (@$fieldlist,\%field);
                }
                $record{$tag}=$fieldlist;
            } else {
                $record{$tag}=[\%field];
            }
            push (@record, \%field);
        }
        address@hidden;
        $counter++;
        my ($lccn, $isbn, $issn, $dewey, $author, $title, $place, $publisher, 
$publicationyear, $volume, $number, @subjects, $note, $additionalauthors, 
$illustrator, $copyrightdate, $barcode, $itemtype, $seriestitle, @barcodes);
        my $marc=$record;
        foreach $field (sort {$a->{'tag'} cmp $b->{'tag'}} @$rec) {
            # LCCN is stored in field 010 a
            if ($field->{'tag'} eq '010') {
                $lccn=$field->{'subfields'}->{'a'};
                $lccn=~s/^\s*//;
                $lccn=~s/cn //;
                $lccn=~s/^\s*//;
                ($lccn) = (split(/\s+/, $lccn))[0];
            }
            # LCCN is stored in field 015 a
            if ($field->{'tag'} eq '015') {
                $lccn=$field->{'subfields'}->{'a'};
                $lccn=~s/^\s*//;
                $lccn=~s/^C//;
                ($lccn) = (split(/\s+/, $lccn))[0];
            }
            # ISBN is stored in field 020 a
            if ($field->{'tag'} eq '020') {
                $isbn=$field->{'subfields'}->{'a'};
                $isbn=~s/^\s*//;
                ($isbn) = (split(/\s+/, $isbn))[0];
            }
            # ISSN is stored in field 022 a
            if ($field->{'tag'} eq '022') {
                $issn=$field->{'subfields'}->{'a'};
                $issn=~s/^\s*//;
                ($issn) = (split(/\s+/, $issn))[0];
            }
            # Dewey number stored in field 082 a
            # If there is more than one dewey number (more than one 'a'
            # subfield) I just take the first one
            if ($field->{'tag'} eq '082') {
                $dewey=$field->{'subfields'}->{'a'};
                $dewey=~s/\///g;
                if (@$dewey) {
                    $dewey=$$dewey[0];
                }
            }
            # Author is stored in field 100 a
            if ($field->{'tag'} eq '100') {
                $author=$field->{'subfields'}->{'a'};
            }
            # Title is stored in field 245 a
            # Subtitle in field 245 b
            # Illustrator in field 245 c
            if ($field->{'tag'} eq '245') {
                $title=$field->{'subfields'}->{'a'};
                $title=~s/ \/$//;
                $subtitle=$field->{'subfields'}->{'b'};
                $subtitle=~s/ \/$//;
                my $name=$field->{'subfields'}->{'c'};
                if ($name=~/illustrated by]*\s+(.*)/) {
                    $illustrator=$1;
                }
            }
            # Publisher Info in field 260
            #   a = place
            #   b = publisher
            #   c = publication date
            #     (also store as copyright date if date starts with a 'c' as in 
c1995)
            if ($field->{'tag'} eq '260') {
                $place=$field->{'subfields'}->{'a'};
                if (@$place) {
                    $place=$$place[0];
                }
                $place=~s/\s*:$//g;
                $publisher=$field->{'subfields'}->{'b'};
                if (@$publisher) {
                    $publisher=$$publisher[0];
                }
                $publisher=~s/\s*:$//g;
                $publicationyear=$field->{'subfields'}->{'c'};
                if ($publicationyear=~/c(\d\d\d\d)/) {
                    $copyrightdate=$1;
                }
                if ($publicationyear=~/[^c](\d\d\d\d)/) {
                    $publicationyear=$1;
                } elsif ($copyrightdate) {
                    $publicationyear=$copyrightdate;
                } else {
                    $publicationyear=~/(\d\d\d\d)/;
                    $publicationyear=$1;
                }
            }
            # Physical Dimensions in field 300
            #   a = pages
            #   c = size
            if ($field->{'tag'} eq '300') {
                $pages=$field->{'subfields'}->{'a'};
                $pages=~s/ \;$//;
                $size=$field->{'subfields'}->{'c'};
                $pages=~s/\s*:$//g;
                $size=~s/\s*:$//g;
            }
            # Vol/No in field 362 a
            if ($field->{'tag'} eq '362') {
                if ($field->{'subfields'}->{'a'}=~/(\d+).*(\d+)/) {
                    $volume=$1;
                    $number=$2;
                }
            }
            # Series Title in field 440 a
            # Vol/No in field 440 v
            if ($field->{'tag'} eq '440') {
                $seriestitle=$field->{'subfields'}->{'a'};
                if ($field->{'subfields'}->{'v'}=~/(\d+).*(\d+)/) {
                    $volume=$1;
                    $number=$2;
                }
            }
            # BARCODES!!!
            # 852 p stores barcodes
            # 852 h stores dewey field
            # 852 9 stores replacement price
            #   I check for an itemtype identifier in 852h as well... pb or pbk 
means PBK
            #   also if $dewey is > 0, then I assign JNF, otherwise JF.
            #   Note that my libraries are school libraries, so I assume Junior.
            if ($field->{'tag'} eq '852') {
                $barcode=$field->{'subfields'}->{'p'};
                push (@barcodes, $barcode);
                my $q_barcode=$dbh->quote($barcode);
                my $deweyfield=$field->{'subfields'}->{'h'};
                $deweyfield=~/^([\d\.]*)/;
                $dewey=$1;
                if (($deweyfield=~/pbk/) || ($deweyfield=~/pb$/)) {
                    $itemtype='PBK';
                } elsif ($dewey) {
                    $itemtype='JNF';
                } else {
                    $itemtype='JF';
                }

                $replacementprice=$field->{'subfields'}->{'9'};
            }
            # 700 a stores additional authors / illustrator info
            # 700 c will contain 'ill' if it's an illustrator
            if ($field->{'tag'} eq '700') {
                my $name=$field->{'subfields'}->{'a'};
                if ($field->{'subfields'}->{'c'}=~/ill/) {
                    $illustrator=$name;
                } else {
                    $additionalauthors.="$name\n";
                }
            }
            # I concatenate all 5XX a entries as notes
            if ($field->{'tag'} =~/^5/) {
                $note.="$field->{'subfields'}->{'a'}\n";
            }
            # 6XX entries are subject entries
            #   Not sure why I'm skipping 691 tags
            #   691 a contains the subject.
            # I take subfield a, and append entries from subfield x (general
            # subdivision) y (Chronological subdivision) and z (geographic
            # subdivision)
            if ($field->{'tag'} =~/6\d\d/) {
                (next) if ($field->{'tag'} eq '691');
                my $subject=$field->{'subfields'}->{'a'};
                print "SUBJECT: $subject\n";
                $subject=~s/\.$//;
                if ($gensubdivision=$field->{'subfields'}->{'x'}) {
                    my @address@hidden;
                    if ($#sub>=0) {
                        foreach $s (@sub) {
                            $s=~s/\.$//;
                            $subject.=" -- $s";
                        }
                    } else {
                        $gensubdivision=~s/\.$//;
                        $subject.=" -- $gensubdivision";
                    }
                }
                if ($chronsubdivision=$field->{'subfields'}->{'y'}) {
                    my @address@hidden;
                    if ($#sub>=0) {
                        foreach $s (@sub) {
                            $s=~s/\.$//;
                            $subject.=" -- $s";
                        }
                    } else {
                        $chronsubdivision=~s/\.$//;
                        $subject.=" -- $chronsubdivision";
                    }
                }
                if ($geosubdivision=$field->{'subfields'}->{'z'}) {
                    my @address@hidden;
                    if ($#sub>=0) {
                        foreach $s (@sub) {
                            $s=~s/\.$//;
                            $subject.=" -- $s";
                        }
                    } else {
                        $geosubdivision=~s/\.$//;
                        $subject.=" -- $geosubdivision";
                    }
                }
                push @subjects, $subject;
            }
        }

        my $q_isbn=$dbh->quote($isbn);
        my $q_issn=$dbh->quote($issn);
        my $q_lccn=$dbh->quote($lccn);
        my $sth=$dbh->prepare("select biblionumber,biblioitemnumber from 
biblioitems where issn=$q_issn or isbn=$q_isbn or lccn=$q_lccn");
        $sth->execute;
        my $biblionumber=0;
        my $biblioitemnumber=0;
        if ($sth->rows) {
            ($biblionumber, $biblioitemnumber) = $sth->fetchrow;
            my $title=$title;                   # FIXME - WTF?
#title already in the database
        } else {
            my $q_title=$dbh->quote("$title");
            my $q_subtitle=$dbh->quote("$subtitle");
            my $q_author=$dbh->quote($author);
            my $q_copyrightdate=$dbh->quote($copyrightdate);
            my $q_seriestitle=$dbh->quote($seriestitle);
            $sth=$dbh->prepare("select biblionumber from biblio where 
title=$q_title and author=$q_author and copyrightdate=$q_copyrightdate and 
seriestitle=$q_seriestitle");
            $sth->execute;
            if ($sth->rows) {
                ($biblionumber) = $sth->fetchrow;
#title already in the database
            } else {
                $sth=$dbh->prepare("select max(biblionumber) from biblio");
                $sth->execute;
                ($biblionumber) = $sth->fetchrow;
                $biblionumber++;
                my $q_notes=$dbh->quote($note);
                $sth=$dbh->prepare("insert into biblio (biblionumber, title, 
author, copyrightdate, seriestitle, notes) values ($biblionumber, $q_title, 
$q_author, $q_copyrightdate, $q_seriestitle, $q_notes)");
                $sth->execute;
                $sth=$dbh->prepare("insert into bibliosubtitle values 
($q_subtitle, $biblionumber)");
                $sth->execute;
            }
            $sth=$dbh->prepare("select max(biblioitemnumber) from biblioitems");
            $sth->execute;
            ($biblioitemnumber) = $sth->fetchrow;
            $biblioitemnumber++;
            my $q_isbn=$dbh->quote($isbn);
            my $q_issn=$dbh->quote($issn);
            my $q_lccn=$dbh->quote($lccn);
            my $q_volume=$dbh->quote($volume);
            my $q_number=$dbh->quote($number);
            my $q_itemtype=$dbh->quote($itemtype);
            my $q_dewey=$dbh->quote($dewey);
            $cleanauthor=$author;
            $cleanauthor=~s/[^A-Za-z]//g;
            $subclass=ucz(substr($cleanauthor,0,3));
                        # FIXME - WTF is the author being converted
                        # to upper case?
            my $q_subclass=$dbh->quote($subclass);
            my $q_publicationyear=$dbh->quote($publicationyear);
            my $q_publishercode=$dbh->quote($publishercode);    # FIXME - 
$publishercode undefined
            my $q_volumedate=$dbh->quote($volumedate);  # FIXME - $volumedate 
undefined
            my $q_volumeddesc=$dbh->quote($volumeddesc);        # FIXME - 
$volumeddesc undefined
            my $q_illus=$dbh->quote($illustrator);
            my $q_pages=$dbh->quote($pages);
            my $q_notes=$dbh->quote($note);
            ($q_notes) || ($q_notes="''");
            my $q_size=$dbh->quote($size);
            my $q_place=$dbh->quote($place);
            my $q_marc=$dbh->quote($marc);

            $sth=$dbh->prepare("insert into biblioitems (biblioitemnumber, 
biblionumber, volume, number, itemtype, isbn, issn, dewey, subclass, 
publicationyear, publishercode, volumedate, volumeddesc, illus, pages, size, 
place, lccn, marc) values ($biblioitemnumber, $biblionumber, $q_volume, 
$q_number, $q_itemtype, $q_isbn, $q_issn, $q_dewey, $q_subclass, 
$q_publicationyear, $q_publishercode, $q_volumedate, $q_volumeddesc, $q_illus, 
$q_pages, $q_size, $q_place, $q_lccn, $q_marc)");
            $sth->execute;
            my $subjectheading;
            foreach $subjectheading (@subjects) {
                # convert to upper case
                $subjectheading=uc($subjectheading);
                                # FIXME - WTF is the subject being
                                # converted to upper case?
                # quote value
                my $q_subjectheading=$dbh->quote($subjectheading);
                $sth=$dbh->prepare("insert into bibliosubject 
(biblionumber,subject)
                    values ($biblionumber, $q_subjectheading)");
                $sth->execute;
            }
            my @additionalauthors=split(/\n/,$additionalauthors);
            my $additionalauthor;
            foreach $additionalauthor (@additionalauthors) {
                # remove any line ending characters (Ctrl-L or Ctrl-M)
                $additionalauthor=~s/\013//g;
                $additionalauthor=~s/\010//g;
                # convert to upper case
                $additionalauthor=uc($additionalauthor);
                                # FIXME - WTF is the author being converted
                                # to upper case?
                # quote value
                my $q_additionalauthor=$dbh->quote($additionalauthor);
                $sth=$dbh->prepare("insert into additionalauthors 
(biblionumber,author) values ($biblionumber, $q_additionalauthor)");
                $sth->execute;
            }
        }
        my $q_barcode=$dbh->quote($barcode);
        my $q_homebranch="'$branchname'";
        my $q_notes="''";
        #my $replacementprice=0;
        # FIXME - There's already a $sth in this scope.
        my $sth=$dbh->prepare("select max(itemnumber) from items");
        $sth->execute;
        my ($itemnumber) = $sth->fetchrow;
        $itemnumber++;
        my @datearr=localtime(time);
        my $date=(1900+$datearr[5])."-".($datearr[4]+1)."-".$datearr[3];
BARCODE:
        foreach $barcode (@barcodes) {
            my $q_barcode=$dbh->quote($barcode);
            my $sti=$dbh->prepare("select barcode from items where 
barcode=$q_barcode");
            $sti->execute;
            if ($sti->rows) {
                print "Skipping $barcode\n";
                next BARCODE;
            }
            $replacementprice=~s/^p//;
            ($replacementprice) || ($replacementprice=0);
            $replacementprice=~s/\$//;
            $task="insert into items (itemnumber, biblionumber, 
biblioitemnumber, barcode, itemnotes, homebranch, holdingbranch, 
dateaccessioned, replacementprice) values ($itemnumber, $biblionumber, 
$biblioitemnumber, $q_barcode, $q_notes, $q_homebranch, '$branchname', '$date', 
$replacementprice)";
            $sth=$dbh->prepare($task);
            print "$task\n";
            $sth->execute;
        }
    }
}




reply via email to

[Prev in Thread] Current Thread [Next in Thread]