koha-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.5,1.6 text-extra


From: Ambrose Li
Subject: [Koha-cvs] CVS: koha/misc/translator TmplTokenizer.pm,1.5,1.6 text-extract2.pl,1.38,1.39
Date: Mon, 16 Feb 2004 21:07:06 -0800

Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv30220

Modified Files:
        TmplTokenizer.pm text-extract2.pl 
Log Message:
Converted TmplTokenizer into a class. Everything still seems ok, but it is
not tested thoroughly.


Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** TmplTokenizer.pm    17 Feb 2004 03:17:48 -0000      1.5
--- TmplTokenizer.pm    17 Feb 2004 05:07:04 -0000      1.6
***************
*** 13,17 ****
  =head1 NAME
  
! TmplTokenizer.pm - Simple-minded tokenizer for HTML::Template .tmpl files
  
  =head1 DESCRIPTION
--- 13,17 ----
  =head1 NAME
  
! TmplTokenizer.pm - Simple-minded tokenizer class for HTML::Template .tmpl 
files
  
  =head1 DESCRIPTION
***************
*** 32,45 ****
  
###############################################################################
  
! $VERSION = 0.01;
  
  @ISA = qw(Exporter);
  @EXPORT_OK = qw();
  
- use vars qw( $input );
- use vars qw( $debug_dump_only_p );
  use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
  use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
- use vars qw( $fatal_p );
  
  
###############################################################################
--- 32,42 ----
  
###############################################################################
  
! $VERSION = 0.02;
  
  @ISA = qw(Exporter);
  @EXPORT_OK = qw();
  
  use vars qw( $pedantic_attribute_error_in_nonpedantic_mode_p );
  use vars qw( $pedantic_tmpl_var_use_in_nonpedantic_mode_p );
  
  
###############################################################################
***************
*** 85,106 ****
  # End of the hideous stuff
  
! use vars qw( @readahead $lc_0 $lc $syntaxerror_p );
! use vars qw( $cdata_mode_p $cdata_close );
  
  
###############################################################################
  
! # Easy accessors
  
! sub fatal_p () {
!     return $fatal_p;
  }
  
! sub syntaxerror_p () {
!     return $syntaxerror_p;
  }
  
  
###############################################################################
  
! sub extract_attributes ($;$) {
      my($s, $lc) = @_;
      my %attr;
--- 82,234 ----
  # End of the hideous stuff
  
! use vars qw( $serial );
  
  
###############################################################################
  
! sub FATAL_P           () {'fatal-p'}
! sub SYNTAXERROR_P     () {'syntaxerror-p'}
  
! sub FILENAME          () {'input'}
! sub HANDLE            () {'handle'}
! 
! sub READAHEAD         () {'readahead'}
! sub LINENUM_START     () {'lc_0'}
! sub LINENUM           () {'lc'}
! sub CDATA_MODE_P      () {'cdata-mode-p'}
! sub CDATA_CLOSE               () {'cdata-close'}
! 
! sub new {
!     my $this = shift;
!     my($input) = @_;
!     my $class = ref($this) || $this;
!     my $self = {};
!     bless $self, $class;
! 
!     my $handle = sprintf('TMPLTOKENIZER%d', $serial);
!     $serial += 1;
! 
!     no strict;
!     open($handle, "<$input") || die "$input: $!\n";
!     use strict;
!     $self->{+FILENAME} = $input;
!     $self->{+HANDLE} = $handle;
!     $self->{+READAHEAD} = [];
!     return $self;
! }
! 
! 
###############################################################################
! 
! # Simple getters
! 
! sub _handle {
!     my $this = shift;
!     return $this->{+HANDLE};
! }
! 
! sub fatal_p {
!     my $this = shift;
!     return $this->{+FATAL_P};
! }
! 
! sub syntaxerror_p {
!     my $this = shift;
!     return $this->{+SYNTAXERROR_P};
! }
! 
! sub has_readahead_p {
!     my $this = shift;
!     return @{$this->{+READAHEAD}};
! }
! 
! sub _peek_readahead {
!     my $this = shift;
!     return $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}];
  }
  
! sub line_number_start {
!     my $this = shift;
!     return $this->{+LINENUM_START};
! }
! 
! sub line_number {
!     my $this = shift;
!     return $this->{+LINENUM};
! }
! 
! sub cdata_mode_p {
!     my $this = shift;
!     return $this->{+CDATA_MODE_P};
! }
! 
! sub cdata_close {
!     my $this = shift;
!     return $this->{+CDATA_CLOSE};
! }
! 
! # Simple setters
! 
! sub _set_fatal {
!     my $this = shift;
!     $this->{+FATAL_P} = $_[0];
!     return $this;
! }
! 
! sub _set_syntaxerror {
!     my $this = shift;
!     $this->{+SYNTAXERROR_P} = $_[0];
!     return $this;
! }
! 
! sub _push_readahead {
!     my $this = shift;
!     push @{$this->{+READAHEAD}}, $_[0];
!     return $this;
! }
! 
! sub _pop_readahead {
!     my $this = shift;
!     return pop @{$this->{+READAHEAD}};
! }
! 
! sub _append_readahead {
!     my $this = shift;
!     $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}] .= $_[0];
!     return $this;
! }
! 
! sub _set_readahead {
!     my $this = shift;
!     $this->{+READAHEAD}->[$#{$this->{+READAHEAD}}] = $_[0];
!     return $this;
! }
! 
! sub _increment_line_number {
!     my $this = shift;
!     $this->{+LINENUM} += 1;
!     return $this;
! }
! 
! sub _set_line_number_start {
!     my $this = shift;
!     $this->{+LINENUM_START} = $_[0];
!     return $this;
! }
! 
! sub _set_cdata_mode {
!     my $this = shift;
!     $this->{+CDATA_MODE_P} = $_[0];
!     return $this;
! }
! 
! sub _set_cdata_close {
!     my $this = shift;
!     $this->{+CDATA_CLOSE} = $_[0];
!     return $this;
  }
  
  
###############################################################################
  
! sub _extract_attributes ($;$) {
!     my $this = shift;
      my($s, $lc) = @_;
      my %attr;
***************
*** 140,144 ****
            error_normal("Completely confused while extracting attributes: $1", 
$lc);
            error_normal((scalar(split(/\n/, $s)) - 1) . " more line(s) not 
shown.", undef);
!           $fatal_p = 1;
        } else {
            warn_normal "Strange attribute syntax: $s\n", $lc;
--- 268,272 ----
            error_normal("Completely confused while extracting attributes: $1", 
$lc);
            error_normal((scalar(split(/\n/, $s)) - 1) . " more line(s) not 
shown.", undef);
!           $this->_set_fatal( 1 );
        } else {
            warn_normal "Strange attribute syntax: $s\n", $lc;
***************
*** 148,198 ****
  }
  
! sub next_token_internal (*) {
      my($h) = @_;
      my($it, $kind);
      my $eof_p = 0;
!     pop @readahead if @readahead && !ref $readahead[$#readahead]
!           && !length $readahead[$#readahead];
!     if (address@hidden) {
        my $next = scalar <$h>;
        $eof_p = !defined $next;
        if (!$eof_p) {
!           $lc += 1;
!           push @readahead, $next;
        }
      }
!     $lc_0 = $lc;                      # remember line number of first line
!     if (@readahead && ref $readahead[$#readahead]) {  # TmplToken object
!       my $t = pop @readahead;
!       ($it, $kind, local $lc) = ($t->string, $t->type, $t->line_number);
!     } elsif ($eof_p && address@hidden) {      # nothing left to do
        ;
!     } elsif ($readahead[$#readahead] =~ /^\s+/s) {    # whitespace
!       ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::TEXT, $&, $');
      # FIXME the following (the [<\s] part) is an unreliable HACK :-(
!     } elsif ($readahead[$#readahead] =~ /^(?:[^<]|<[<\s])+/s) {       # 
non-space normal text
!       ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::TEXT, $&, $');
!       warn_normal "Unescaped < in $it\n", $lc_0
!               if !$cdata_mode_p && $it =~ /</s;
      } else {                          # tag/declaration/processing instruction
        my $ok_p = 0;
!       for (;;) {
!           if ($cdata_mode_p) {
!               if ($readahead[$#readahead] =~ /^$cdata_close/) {
!                   ($kind, $it, $readahead[$#readahead]) = 
(TmplTokenType::TAG, $&, $');
                    $ok_p = 1;
                } else {
!                   ($kind, $it) = (TmplTokenType::TEXT, pop @readahead);
                    $ok_p = 1;
                }
!           } elsif ($readahead[$#readahead] =~ /^$re_tag_compat/os) {
!               ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::TAG, 
"$1>", $3);
                $ok_p = 1;
!               warn_normal "SGML \"closed start tag\" notation: $1<\n", $lc_0 
if $2 eq '';
!           } elsif ($readahead[$#readahead] =~ /^<!--(?:(?!-->).)*-->/s) {
!               ($kind, $it, $readahead[$#readahead]) = 
(TmplTokenType::COMMENT, $&, $');
                $ok_p = 1;
!               warn_normal "Syntax error in comment: $&\n", $lc_0;
!               $syntaxerror_p = 1;
            }
        last if $ok_p;
--- 276,332 ----
  }
  
! sub _next_token_internal {
!     my $this = shift;
      my($h) = @_;
      my($it, $kind);
      my $eof_p = 0;
!     $this->_pop_readahead if $this->has_readahead_p
!           && !ref $this->_peek_readahead
!           && !length $this->_peek_readahead;
!     if (!$this->has_readahead_p) {
        my $next = scalar <$h>;
        $eof_p = !defined $next;
        if (!$eof_p) {
!           $this->_increment_line_number;
!           $this->_push_readahead( $next );
        }
      }
!     $this->_set_line_number_start( $this->line_number ); # remember 1st line 
num
!     if ($this->has_readahead_p && ref $this->_peek_readahead) {       # 
TmplToken obj.
!       ($it, $kind) = ($this->_pop_readahead, undef);
!     } elsif ($eof_p && !$this->has_readahead_p) {     # nothing left to do
        ;
!     } elsif ($this->_peek_readahead =~ /^\s+/s) {     # whitespace
!       ($kind, $it) = (TmplTokenType::TEXT, $&);
!       $this->_set_readahead( $' );
      # FIXME the following (the [<\s] part) is an unreliable HACK :-(
!     } elsif ($this->_peek_readahead =~ /^(?:[^<]|<[<\s])+/s) {        # 
non-space normal text
!       ($kind, $it) = (TmplTokenType::TEXT, $&);
!       $this->_set_readahead( $' );
!       warn_normal "Unescaped < in $it\n", $this->line_number_start
!               if !$this->cdata_mode_p && $it =~ /</s;
      } else {                          # tag/declaration/processing instruction
        my $ok_p = 0;
!       for (my $cdata_close = $this->cdata_close;;) {
!           if ($this->cdata_mode_p) {
!               if ($this->_peek_readahead =~ /^$cdata_close/) {
!                   ($kind, $it) = (TmplTokenType::TAG, $&);
!                   $this->_set_readahead( $' );
                    $ok_p = 1;
                } else {
!                   ($kind, $it) = (TmplTokenType::TEXT, $this->_pop_readahead);
                    $ok_p = 1;
                }
!           } elsif ($this->_peek_readahead =~ /^$re_tag_compat/os) {
!               ($kind, $it) = (TmplTokenType::TAG, "$1>");
!               $this->_set_readahead( $3 );
                $ok_p = 1;
!               warn_normal "SGML \"closed start tag\" notation: $1<\n", 
$this->line_number_start if $2 eq '';
!           } elsif ($this->_peek_readahead =~ /^<!--(?:(?!-->).)*-->/s) {
!               ($kind, $it) = (TmplTokenType::COMMENT, $&);
!               $this->_set_readahead( $' );
                $ok_p = 1;
!               warn_normal "Syntax error in comment: $&\n", 
$this->line_number_start;
!               $this->_set_syntaxerror( 1 );
            }
        last if $ok_p;
***************
*** 200,205 ****
            $eof_p = !defined $next;
        last if $eof_p;
!           $lc += 1;
!           $readahead[$#readahead] .= $next;
        }
        if ($kind ne TmplTokenType::TAG) {
--- 334,339 ----
            $eof_p = !defined $next;
        last if $eof_p;
!           $this->_increment_line_number;
!           $this->_append_readahead( $next );
        }
        if ($kind ne TmplTokenType::TAG) {
***************
*** 211,252 ****
            $kind = TmplTokenType::PI;
        }
!       if ($it =~ /^$re_directive/ios && !$cdata_mode_p) {
            $kind = TmplTokenType::DIRECTIVE;
        }
        if (!$ok_p && $eof_p) {
!           ($kind, $it, $readahead[$#readahead]) = (TmplTokenType::UNKNOWN, 
$readahead[$#readahead], undef);
!           $syntaxerror_p = 1;
        }
      }
!     warn_normal "Unrecognizable token found: $it\n", $lc_0
            if $kind eq TmplTokenType::UNKNOWN;
!     return defined $it? TmplToken->new($it, $kind, $lc): undef;
  }
  
! sub next_token (*) {
!     my($h) = @_;
      my $it;
!     if (!$cdata_mode_p) {
!       $it = next_token_internal($h);
        if (defined $it && $it->type eq TmplTokenType::TAG) {
!           ($cdata_mode_p, $cdata_close) = (1, "</$1\\s*>")
!                   if $it->string =~ /^<(script|style|textarea)\b/i;
!           $it->set_attributes( extract_attributes($it->string, $lc_0) );
        }
      } else {
!       for ($it = '';;) {
!           my $lc_prev = $lc;
!           my $next = next_token_internal($h);
        last if !defined $next;
            if (defined $next && $next->string =~ /$cdata_close/i) {
!               push @readahead, $next; # push the entire TmplToken object
!               #$lc = $lc_prev; XXX
!               $cdata_mode_p = 0;
            }
!       last unless $cdata_mode_p;
            $it .= $next->string;
        }
!       $it = TmplToken->new( $it, TmplTokenType::CDATA, $lc );
!       $cdata_close = undef;
      }
      return $it;
--- 345,388 ----
            $kind = TmplTokenType::PI;
        }
!       if ($it =~ /^$re_directive/ios && !$this->cdata_mode_p) {
            $kind = TmplTokenType::DIRECTIVE;
        }
        if (!$ok_p && $eof_p) {
!           ($kind, $it) = (TmplTokenType::UNKNOWN, $this->_peek_readahead);
!           $this->_set_readahead, undef;
!           $this->_set_syntaxerror( 1 );
        }
      }
!     warn_normal "Unrecognizable token found: $it\n", $this->line_number_start
            if $kind eq TmplTokenType::UNKNOWN;
!     return defined $it? (ref $it? $it: TmplToken->new($it, $kind, 
$this->line_number)): undef;
  }
  
! sub next_token {
!     my $this = shift;
!     my $h = $this->_handle;
      my $it;
!     if (!$this->cdata_mode_p) {
!       $it = $this->_next_token_internal($h);
        if (defined $it && $it->type eq TmplTokenType::TAG) {
!           if ($it->string =~ /^<(script|style|textarea)\b/i) {
!               $this->_set_cdata_mode( 1 );
!               $this->_set_cdata_close( "</$1\\s*>" );
!           }
!           $it->set_attributes( $this->_extract_attributes($it->string, 
$it->line_number) );
        }
      } else {
!       for ($it = '', my $cdata_close = $this->cdata_close;;) {
!           my $next = $this->_next_token_internal($h);
        last if !defined $next;
            if (defined $next && $next->string =~ /$cdata_close/i) {
!               $this->_push_readahead( $next ); # push entire TmplToken object
!               $this->_set_cdata_mode( 0 );
            }
!       last unless $this->cdata_mode_p;
            $it .= $next->string;
        }
!       $it = TmplToken->new( $it, TmplTokenType::CDATA, $this->line_number );
!       $this->_set_cdata_close, undef;
      }
      return $it;

Index: text-extract2.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/text-extract2.pl,v
retrieving revision 1.38
retrieving revision 1.39
diff -C2 -r1.38 -r1.39
*** text-extract2.pl    17 Feb 2004 03:02:39 -0000      1.38
--- text-extract2.pl    17 Feb 2004 05:07:04 -0000      1.39
***************
*** 26,30 ****
  
###############################################################################
  
! sub debug_dump (*) { # for testing only
      my($h) = @_;
      print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n";
--- 26,30 ----
  
###############################################################################
  
! sub debug_dump ($) { # for testing only
      my($h) = @_;
      print "re_tag_compat is /", TmplTokenizer::re_tag(1), "/\n";
***************
*** 51,55 ****
  
###############################################################################
  
! sub text_extract (*) {
      my($h) = @_;
      my %text = ();
--- 51,55 ----
  
###############################################################################
  
! sub text_extract ($) {
      my($h) = @_;
      my %text = ();
***************
*** 125,133 ****
  usage_error('Missing mandatory option -f') unless defined $input;
  
! open(INPUT, "<$input") || die "$0: $input: $!\n";
  if ($debug_dump_only_p) {
!     debug_dump(*INPUT);
  } else {
!     text_extract(*INPUT);
  }
  
--- 125,133 ----
  usage_error('Missing mandatory option -f') unless defined $input;
  
! my $h = TmplTokenizer->new( $input );
  if ($debug_dump_only_p) {
!     debug_dump( $h );
  } else {
!     text_extract( $h );
  }
  




reply via email to

[Prev in Thread] Current Thread [Next in Thread]