[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Koha-cvs] CVS: koha/misc/translator TmplToken.pm,1.5,1.6 TmplTokenizer.
From: |
Ambrose C. LI |
Subject: |
[Koha-cvs] CVS: koha/misc/translator TmplToken.pm,1.5,1.6 TmplTokenizer.pm,1.33,1.34 text-extract2.pl,1.41,1.42 tmpl_process3.pl,1.19,1.20 xgettext.pl,1.11,1.12 |
Date: |
Tue, 09 Mar 2004 23:00:30 -0800 |
Update of /cvsroot/koha/koha/misc/translator
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18221
Modified Files:
TmplToken.pm TmplTokenizer.pm text-extract2.pl
tmpl_process3.pl xgettext.pl
Log Message:
Added hack to extract and translate strings inside JavaScript CDATA blocks,
using C-like _("some translatable string") notation. English templates will
need to be modified.
Index: TmplToken.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplToken.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -r1.5 -r1.6
*** TmplToken.pm 27 Feb 2004 13:26:07 -0000 1.5
--- TmplToken.pm 10 Mar 2004 07:00:27 -0000 1.6
***************
*** 110,113 ****
--- 110,156 ----
}
+ sub has_js_data {
+ my $this = shift;
+ return defined $this->{'_js_data'} && ref($this->{'_js_data'}) eq 'ARRAY';
+ }
+
+ sub js_data {
+ my $this = shift;
+ return $this->{'_js_data'};
+ }
+
+ sub set_js_data {
+ my $this = shift;
+ $this->{'_js_data'} = $_[0];
+ return $this;
+ }
+
+ # predefined tests
+
+ sub tag_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::TAG;
+ }
+
+ sub cdata_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::CDATA;
+ }
+
+ sub text_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::TEXT;
+ }
+
+ sub text_parametrized_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::TEXT_PARAMETRIZED;
+ }
+
+ sub directive_p {
+ my $this = shift;
+ return $this->type == TmplTokenType::DIRECTIVE;
+ }
+
###############################################################################
Index: TmplTokenizer.pm
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/TmplTokenizer.pm,v
retrieving revision 1.33
retrieving revision 1.34
diff -C2 -r1.33 -r1.34
*** TmplTokenizer.pm 8 Mar 2004 05:00:42 -0000 1.33
--- TmplTokenizer.pm 10 Mar 2004 07:00:27 -0000 1.34
***************
*** 94,97 ****
--- 94,98 ----
sub CDATA_CLOSE () {'cdata-close'}
sub PCDATA_MODE_P () {'pcdata-mode-p'} # additional submode for CDATA
+ sub JS_MODE_P () {'js-mode-p'} # cdata-mode-p must also be true
sub ALLOW_CFORMAT_P () {'allow-cformat-p'}
***************
*** 170,173 ****
--- 171,179 ----
}
+ sub js_mode_p {
+ my $this = shift;
+ return $this->{+JS_MODE_P};
+ }
+
sub cdata_close {
my $this = shift;
***************
*** 241,244 ****
--- 247,256 ----
}
+ sub _set_js_mode {
+ my $this = shift;
+ $this->{+JS_MODE_P} = $_[0];
+ return $this;
+ }
+
sub _set_cdata_close {
my $this = shift;
***************
*** 255,258 ****
--- 267,364 ----
###############################################################################
+ use vars qw( $js_EscapeSequence );
+ BEGIN {
+ # Perl quoting is really screwed up, but this common subexp is way too
long
+ $js_EscapeSequence =
q{\\\\(?:['"\\\\bfnrt]|[^0-7xu]|[0-3]?[0-7]{1,2}|x[\da-fA-F]{2}|u[\da-fA-F]{4})};
+ }
+ sub parenleft () { '(' }
+ sub parenright () { ')' }
+
+ sub split_js ($) {
+ my ($s0) = @_;
+ my @it = ();
+ while (length $s0) {
+ if ($s0 =~ /^\s+/s) { # whitespace
+ push @it, $&;
+ $s0 = $';
+ } elsif ($s0 =~ /^\/\/[^\r\n]*(?:[\r\n]|$)/s) { # C++-style comment
+ push @it, $&;
+ $s0 = $';
+ } elsif ($s0 =~ /^\/\*(?:(?!\*\/).)*\*\//s) { # C-style comment
+ push @it, $&;
+ $s0 = $';
+ # Keyword or identifier, ECMA-262 p.13 (section 7.5)
+ } elsif ($s0 =~ /^[A-Z_\$][A-Z\d_\$]*/is) { # IdentifierName
+ push @it, $&;
+ $s0 = $';
+ # Punctuator, ECMA-262 p.13 (section 7.6)
+ } elsif ($s0 =~
/^(?:[\(\){}\[\];]|>>>=|<<=|>>=|[-\+\*\/\&\|\^\%]=|>>>|<<|>>|--|\+\+|\|\||\&\&|==|<=|>=|!=|[=><,!~\?:\.\-\+\*\/\&\|\^\%])/s)
{
+ push @it, $&;
+ $s0 = $';
+ # DecimalLiteral, ECMA-262 p.14 (section 7.7.3); note: bug in the spec
+ } elsif ($s0 =~ /^(?:0|[1-9]\d+(?:\.\d*(?:[eE][-\+]?\d+)?)?)/s) {
+ push @it, $&;
+ $s0 = $';
+ # HexIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+ } elsif ($s0 =~ /^0[xX][\da-fA-F]+/s) {
+ push @it, $&;
+ $s0 = $';
+ # OctalIntegerLiteral, ECMA-262 p.15 (section 7.7.3)
+ } elsif ($s0 =~ /^0[\da-fA-F]+/s) {
+ push @it, $&;
+ $s0 = $';
+ # StringLiteral, ECMA-262 p.17 (section 7.7.4)
+ # XXX SourceCharacter doesn't seem to be defined (?)
+ } elsif ($s0 =~
/^(?:"(?:(?!["\\\r\n]).|$js_EscapeSequence)*"|'(?:(?!['\\\r\n]).|$js_EscapeSequence)*')/os)
{
+ push @it, $&;
+ $s0 = $';
+ } elsif ($s0 =~ /^./) { # UNKNOWN TOKEN !!!
+ push @it, $&;
+ $s0 = $';
+ }
+ }
+ return @it;
+ }
+
+ sub STATE_UNDERSCORE () { 1 }
+ sub STATE_PARENLEFT () { 2 }
+ sub STATE_STRING_LITERAL () { 3 }
+
+ # XXX This is a crazy hack. I don't want to write an ECMAScript parser.
+ # XXX A scanner is one thing; a parser another thing.
+ sub identify_js_translatables (@) {
+ my @input = @_;
+ my @output = ();
+ # We mark a JavaScript translatable string as in C, i.e., _("literal")
+ # For simplicity, we ONLY look for "_" "(" StringLiteral ")"
+ for (my $i = 0, my $state = 0, my($j, $q, $s); $i <= $#input; $i += 1) {
+ my $reset_state_p = 0;
+ push @output, [0, $input[$i]];
+ if ($input[$i] !~ /\S/s) {
+ ;
+ } elsif ($state == 0) {
+ $state = STATE_UNDERSCORE if $input[$i] eq '_';
+ } elsif ($state == STATE_UNDERSCORE) {
+ $state = $input[$i] eq parenleft ? STATE_PARENLEFT : 0;
+ } elsif ($state == STATE_PARENLEFT) {
+ if ($input[$i] =~ /^(['"])(.*)\1$/s) {
+ ($state, $j, $q, $s) = (STATE_STRING_LITERAL, $#output, $1, $2);
+ } else {
+ $state = 0;
+ }
+ } elsif ($state == STATE_STRING_LITERAL) {
+ if ($input[$i] eq parenright) {
+ $output[$j] = [1, $output[$j]->[1], $q, $s];
+ }
+ $state = 0;
+ } else {
+ die "identify_js_translatables internal error: Unknown state $state"
+ }
+ }
+ return address@hidden;
+ }
+
+
###############################################################################
+
sub _extract_attributes ($;$) {
my $this = shift;
***************
*** 431,434 ****
--- 537,541 ----
$this->_set_cdata_close( "</$1\\s*>" );
$this->_set_pcdata_mode( 0 );
+ $this->_set_js_mode( lc($1) eq 'script' );
# } elsif ($it->string =~ /^<(title)\b/is) {
# $this->_set_cdata_mode( 1 );
***************
*** 471,476 ****
($this->pcdata_mode_p?
TmplTokenType::TEXT: TmplTokenType::CDATA),
! $this->line_number )
if defined $it;
$this->_set_pcdata_mode, 0;
$this->_set_cdata_close, undef unless !defined $it;
--- 578,595 ----
($this->pcdata_mode_p?
TmplTokenType::TEXT: TmplTokenType::CDATA),
! $this->line_number, $this->filename )
if defined $it;
+ if ($this->js_mode_p) {
+ my $s0 = $it->string;
+ my @head = ();
+ my @tail = ();
+ if ($s0 =~ /^(\s*<!--\s*)(.*)(\s*--\s*>\s*)$/s) {
+ push @head, $1;
+ push @tail, $3;
+ $s0 = $2;
+ }
+ push @head, split_js $s0;
+ $it->set_js_data( identify_js_translatables(@head, @tail) );
+ }
$this->_set_pcdata_mode, 0;
$this->_set_cdata_close, undef unless !defined $it;
Index: text-extract2.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/text-extract2.pl,v
retrieving revision 1.41
retrieving revision 1.42
diff -C2 -r1.41 -r1.42
*** text-extract2.pl 19 Feb 2004 21:24:30 -0000 1.41
--- text-extract2.pl 10 Mar 2004 07:00:28 -0000 1.42
***************
*** 61,64 ****
--- 61,70 ----
}
}
+ if ($s->has_js_data) {
+ printf "JavaScript translatable strings:\n";
+ for my $t (@{$s->js_data}) {
+ printf "%dH%s\n", length $t->[3], underline $t->[3] if $t->[0];
# FIXME
+ }
+ }
}
}
***************
*** 89,92 ****
--- 95,102 ----
}
}
+ } elsif ($s->has_js_data) {
+ for my $t (@{$s->js_data}) {
+ remember( $s, $t->[3] ) if $t->[0]; # FIXME
+ }
}
}
Index: tmpl_process3.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/tmpl_process3.pl,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -r1.19 -r1.20
*** tmpl_process3.pl 8 Mar 2004 04:59:38 -0000 1.19
--- tmpl_process3.pl 10 Mar 2004 07:00:28 -0000 1.20
***************
*** 98,101 ****
--- 98,111 ----
} elsif ($kind eq TmplTokenType::TAG && %$attr) {
print $output text_replace_tag($t, $attr);
+ } elsif ($s->has_js_data) {
+ for my $t (@{$s->js_data}) {
+ # FIXME for this whole block
+ if ($t->[0]) {
+ printf $output "%s%s%s", $t->[2], find_translation $t->[3],
+ $t->[2];
+ } else {
+ print $output $t->[1];
+ }
+ }
} elsif (defined $t) {
print $output $t;
Index: xgettext.pl
===================================================================
RCS file: /cvsroot/koha/koha/misc/translator/xgettext.pl,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -r1.11 -r1.12
*** xgettext.pl 27 Feb 2004 13:26:08 -0000 1.11
--- xgettext.pl 10 Mar 2004 07:00:28 -0000 1.12
***************
*** 108,111 ****
--- 108,115 ----
}
}
+ } elsif ($s->has_js_data) {
+ for my $t (@{$s->js_data}) {
+ remember( $s, $t->[3] ) if $t->[0]; # FIXME
+ }
}
}
***************
*** 199,202 ****
--- 203,209 ----
. (defined $name? " name=$name->[1]": '');
}
+ } elsif ($text{$t}->[0]->has_js_data) {
+ printf OUTPUT "#. For the first occurrence,\n" if @{$text{$t}} > 1;
+ printf OUTPUT "#. SCRIPT\n";
}
my $cformat_p;
***************
*** 377,381 ****
=item -
- (Future goal)
Translation to non-English-like languages with different word
order: gettext's c-format strings can theoretically be
--- 384,387 ----
***************
*** 418,421 ****
--- 424,441 ----
"update" actions have already been implemented in tmpl_process3.pl.
+ =head2 Strings inside JavaScript
+
+ In the SCRIPT elements, the script will attempt to scan for
+ _("I<string literal>") patterns, and extract the I<string literal>
+ as a translatable string.
+
+ Note that the C-like _(...) notation is required.
+
+ The JavaScript must actually define a _ function
+ so that the code remains correct JavaScript.
+ A suitable definition of such a function can be
+
+ function _(s) { return s } // dummy function for gettext
+
=head1 SEE ALSO
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Koha-cvs] CVS: koha/misc/translator TmplToken.pm,1.5,1.6 TmplTokenizer.pm,1.33,1.34 text-extract2.pl,1.41,1.42 tmpl_process3.pl,1.19,1.20 xgettext.pl,1.11,1.12,
Ambrose C. LI <=
- Prev by Date:
[Koha-cvs] CVS: koha/misc/translator intranet.zh_TW,1.44,1.45
- Next by Date:
[Koha-cvs] CVS: koha/koha-tmpl/intranet-tmpl/default/en/parameters aqbookfund.tmpl,1.20.2.1,1.20.2.2 aqbudget.tmpl,1.20.2.2,1.20.2.3 categorie.tmpl,1.15.2.1,1.15.2.2 categoryitem.tmpl,1.7.2.2,1.7.2.3 currency.tmpl,1.7.2.2,1.7.2.3 itemtypes.tmpl,1.14.2.2,1.14.2.3 marctagstructure.tmpl,1.20.2.1,1.20.2.2 printers.tmpl,1.8.2.2,1.8.2.3 stopwords.tmpl,1.11.2.1,1.11.2.2 systempreferences.tmpl,1.18.2.4,1.18.2.5 z3950servers.tmpl,1.11.2.2,1.11.2.3
- Previous by thread:
[Koha-cvs] CVS: koha/misc/translator intranet.zh_TW,1.44,1.45
- Next by thread:
[Koha-cvs] CVS: koha/koha-tmpl/intranet-tmpl/default/en/parameters aqbookfund.tmpl,1.20.2.1,1.20.2.2 aqbudget.tmpl,1.20.2.2,1.20.2.3 categorie.tmpl,1.15.2.1,1.15.2.2 categoryitem.tmpl,1.7.2.2,1.7.2.3 currency.tmpl,1.7.2.2,1.7.2.3 itemtypes.tmpl,1.14.2.2,1.14.2.3 marctagstructure.tmpl,1.20.2.1,1.20.2.2 printers.tmpl,1.8.2.2,1.8.2.3 stopwords.tmpl,1.11.2.1,1.11.2.2 systempreferences.tmpl,1.18.2.4,1.18.2.5 z3950servers.tmpl,1.11.2.2,1.11.2.3
- Index(es):