guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

01/09: website: Add custom xgettext to extract from nested sexps for i18


From: Florian Pelz
Subject: 01/09: website: Add custom xgettext to extract from nested sexps for i18n.
Date: Fri, 8 Nov 2019 02:02:14 -0500 (EST)

pelzflorian pushed a commit to branch wip-i18n
in repository guix-artwork.

commit 7fbe99dac1e0eaeec06b6ab2d8dffd6a98b00f86
Author: Florian Pelz <address@hidden>
Date:   Wed Oct 30 08:22:47 2019 +0000

    website: Add custom xgettext to extract from nested sexps for i18n.
    
    * website/po/POTFILES: New file.  List apps files here.
    * website/po/LINGUAS: New file.  List en_US lingua.
    * website/po/ietf-tags.scm: New file.  Add association for en_US lingua.
    * website/scripts/sexp-xgettext.scm: New file for generating a POT file.
    (<keyword-spec>, <po-entry>, <construct-fold-state>): New record types.
    (combine-duplicate-po-entries, complex-keyword-spec?, parse-scheme-file,
    po-equal?, write-po-entry, update-ecomments-string!, update-file-name!,
    update-old-line-number!, update-line-number!, incr-line-number!,
    incr-line-number-for-each-nl!, current-ref, make-simple-po-entry,
    matching-keyword, nth-exp, more-than-one-exp?, token->string-symbol-or-keyw,
    complex-marked-list->po-entries, construct-po-entries, tag,
    construct-msgid-and-po-entries, scheme-file->po-entries): New procedures.
    (%keyword-specs, %options, %comments-line, %ecomments-string, %file-name,
    %old-line-number, %line-number, %files-from-port, %source-files,
    %output-po-entries, %output-port): New variables.
    * website/sexp-xgettext.scm: New file with module for looking up
    translations.
    (%complex-keywords, %simple-keywords, %plural-numbers, %linguas):
    New variables.
    (<construct-fold-state>, <deconstruct-fold-state>): New record types.
    (set-complex-keywords!, set-simple-keywords!, gettext-keyword?, tag,
    sexp->msgid, deconstruct): New procedures.
    (sgettext, spgettext, sngettext, snpgettext): New macro helpers.
    * website/apps/i18n.scm: New file.
    (G_, N_, C_, NC_, ietf-tags-file-contents): New syntax to use for i18n.
    (%current-ietf-tag, %current-lang, %current-lingua): New variables.
    (builder->localized-builder, builders->localized-builders,
    localized-root-path, first-value): New utility procedures.
    (<asset>, <page>): New imports from Haunt.
    * website/haunt.scm: Wrap each builder to build the locale set in LC_ALL.
    * website/.guix.scm: Make Haunt build directory writable so Haunt can
    overwrite duplicate assets.  Convert PO files to MO files and build for
    each lingua.
    * website/README: Adapt build instructions for i18n.
    * website/i18n-howto: New file with usage instructions.
---
 website/.guix.scm                 |  80 +++-
 website/README                    |   8 +-
 website/apps/i18n.scm             | 132 ++++++
 website/haunt.scm                 |  19 +-
 website/i18n-howto.txt            |  86 ++++
 website/po/LINGUAS                |   3 +
 website/po/POTFILES               |  36 ++
 website/po/ietf-tags.scm          |   9 +
 website/scripts/sexp-xgettext.scm | 830 ++++++++++++++++++++++++++++++++++++++
 website/sexp-xgettext.scm         | 530 ++++++++++++++++++++++++
 10 files changed, 1707 insertions(+), 26 deletions(-)

diff --git a/website/.guix.scm b/website/.guix.scm
index 8f44c90..9510779 100644
--- a/website/.guix.scm
+++ b/website/.guix.scm
@@ -1,5 +1,6 @@
 ;;; GNU Guix web site
 ;;; Copyright © 2017, 2019 Ludovic Courtès <address@hidden>
+;;; Copyright © 2019 Florian Pelz <address@hidden>
 ;;;
 ;;; This file is part of the GNU Guix web site.
 ;;;
@@ -18,16 +19,27 @@
 
 ;; Run 'guix build -f .guix.scm' to build the web site.
 
+(define this-directory
+  (dirname (current-filename)))
+
+;; Make sure po/LINGUAS will be found in the current working
+;; directory.
+(chdir this-directory)
+
+;; We need %linguas from the (sexp-xgettext) module.
+;; Therefore, we add its path to the load path.
+(set! %load-path (cons this-directory %load-path))
+
 (use-modules (guix) (gnu)
              (guix modules)
              (guix git-download)
              (guix gexp)
              (guix channels)
              (srfi srfi-9)
-             (ice-9 match))
-
-(define this-directory
-  (dirname (current-filename)))
+             (ice-9 match)
+             (ice-9 rdelim)
+             (ice-9 regex)
+             (sexp-xgettext))
 
 (define source
   (local-file this-directory "guix-web-site"
@@ -73,9 +85,7 @@
 
           (setvbuf (current-output-port) 'line)
           (setvbuf (current-error-port) 'line)
-
           (copy-recursively #$source ".")
-
           ;; Set 'GUILE_LOAD_PATH' so that Haunt find the Guix modules and
           ;; its dependencies.  To find out the load path of Guix and its
           ;; dependencies, fetch its value over 'guix repl'.
@@ -96,24 +106,62 @@
                                     ":"))))
             (close-pipe pipe))
 
+          ;; Make the copy writable so Haunt can overwrite duplicate assets.
+          (invoke #+(file-append (specification->package "coreutils")
+                                 "/bin/chmod")
+                  "--recursive" "u+w" ".")
+
+          ;; For translations, create MO files from PO files.
+          (for-each
+           (lambda (lingua)
+             (let* ((msgfmt #+(file-append
+                               (specification->package "gettext-minimal")
+                               "/bin/msgfmt"))
+                    (lingua-file (string-append "po/" lingua ".po"))
+                    (lang (car (string-split lingua #\_)))
+                    (lang-file (string-append "po/" lang ".po")))
+               (define (create-mo filename)
+                 (begin
+                   (invoke msgfmt filename)
+                   (mkdir-p (string-append lingua "/LC_MESSAGES"))
+                   (rename-file "messages.mo"
+                                (string-append lingua "/LC_MESSAGES/"
+                                               "guix-website.mo"))))
+               (cond
+                ((file-exists? lingua-file)
+                 (create-mo lingua-file))
+                ((file-exists? lang-file)
+                 (create-mo lang-file))
+                (else #t))))
+           (list #$@%linguas))
+
           ;; So we can read/write UTF-8 files.
           (setenv "GUIX_LOCPATH"
                   #+(file-append (specification->package "glibc-utf8-locales")
                                  "/lib/locale"))
-          (setenv "LC_ALL" "en_US.utf8")
 
           ;; Use a sane default.
           (setenv "XDG_CACHE_HOME" "/tmp/.cache")
 
-          (format #t "Running 'haunt build'...~%")
-          (invoke #+(file-append (specification->package "haunt")
-                                 "/bin/haunt")
-                  "build")
-
-          (mkdir-p #$output)
-          (copy-recursively "/tmp/gnu.org/software/guix" #$output
-                            #:log (%make-void-port "w"))
-          (symlink "guix.html" (string-append #$output "/index.html"))))))
+          ;; Build the website for each translation.
+          (for-each
+           (lambda (lingua)
+             (begin
+               (setenv "LC_ALL" (string-append lingua ".utf8"))
+               (format #t "Running 'haunt build' for lingua ~a...~%" lingua)
+               (invoke #+(file-append (specification->package "haunt")
+                                      "/bin/haunt")
+                       "build")
+               (mkdir-p #$output)
+               (copy-recursively "/tmp/gnu.org/software/guix" #$output
+                                 #:log (%make-void-port "w"))
+               (let ((tag (assoc-ref
+                           (call-with-input-file "po/ietf-tags.scm"
+                             (lambda (port) (read port)))
+                           lingua)))
+                 (symlink "guix.html"
+                          (string-append #$output "/" tag "/index.html")))))
+           (list #$@%linguas))))))
 
 (computed-file "guix-web-site" build)
 
diff --git a/website/README b/website/README
index d3a3a78..ff54053 100644
--- a/website/README
+++ b/website/README
@@ -24,14 +24,18 @@ commands:
 
 #+BEGIN_EXAMPLE
 $ cd path/to/guix-artwork/website
-$ GUIX_WEB_SITE_LOCAL=yes haunt build
+$ export GUILE_LOAD_PATH=$(guix build 
guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH
+$ LC_ALL=en_US.utf8 GUIX_WEB_SITE_LOCAL=yes haunt build
 $ haunt serve
 #+END_EXAMPLE
 
-Then, visit http://localhost:8080/guix.html in a web browser.
+Then, visit http://localhost:8080/en/guix.html in a web browser.
 
 You can stop the server pressing ~Ctrl + C~ twice.
 
+See also the file i18n-howto.txt for information on working with
+translations.
+
 * Deploying
 
 Like the pages of many GNU websites, this website is managed through
diff --git a/website/apps/i18n.scm b/website/apps/i18n.scm
new file mode 100644
index 0000000..d88333a
--- /dev/null
+++ b/website/apps/i18n.scm
@@ -0,0 +1,132 @@
+;;; GNU Guix web site
+;;; Copyright © 2019 Florian Pelz <address@hidden>
+;;;
+;;; This file is part of the GNU Guix web site.
+;;;
+;;; The GNU Guix web site is free software; you can redistribute it and/or 
modify it
+;;; under the terms of the GNU Affero General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; The GNU Guix web site is distributed in the hope that it will be useful, 
but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public License
+;;; along with the GNU Guix web site.  If not, see 
<http://www.gnu.org/licenses/>.
+
+(define-module (apps i18n)
+  #:use-module (haunt asset)
+  #:use-module (haunt page)
+  #:use-module (haunt utils)
+  #:use-module (ice-9 match)
+  #:use-module (sexp-xgettext)
+  #:use-module (srfi srfi-1)
+  #:export (G_
+            N_
+            C_
+            NC_
+            %current-ietf-tag
+            %current-lang
+            %current-lingua
+            builder->localized-builder
+            builders->localized-builders
+            ietf-tags-file-contents
+            localized-root-path))
+
+(define %gettext-domain
+  "guix-website")
+
+(bindtextdomain %gettext-domain (getcwd))
+(bind-textdomain-codeset %gettext-domain "UTF-8")
+(textdomain %gettext-domain)
+
+;; NOTE: The sgettext macros have no hygiene because they use
+;; datum->syntax and do not preserve the semantics of anything looking
+;; like an sgettext macro.  This is an exceptional use case; do not
+;; try this at home.
+
+(define-syntax G_
+  sgettext)
+
+(set-simple-keywords! '(G_))
+
+(define-syntax N_ ;like ngettext
+  sngettext)
+
+(define-syntax C_ ;like pgettext
+  spgettext)
+
+(define-syntax NC_ ;like npgettext
+  snpgettext)
+
+(set-complex-keywords! '(N_ C_ NC_))
+
+(define %current-lingua
+  ;; strip the character encoding:
+  (car (string-split (setlocale LC_ALL) #\.)))
+
+(define-syntax ietf-tags-file-contents
+  (identifier-syntax
+   (force (delay (call-with-input-file
+                     "po/ietf-tags.scm"
+                   (lambda (port) (read port)))))))
+
+
+(define %current-ietf-tag
+  (or (assoc-ref ietf-tags-file-contents %current-lingua)
+      "en"))
+
+(define %current-lang
+  (car (string-split %current-ietf-tag #\-)))
+
+(define* (localized-root-path url #:key (lingua %current-ietf-tag))
+  "Given a URL as used in a href attribute, return the URL prefix
+'builder->localized-builder' would use for the URL when called with
+LINGUA."
+  (if (or (string-suffix? ".html" url)
+          (string-suffix? "/" url))
+      (string-append lingua "/")
+      ""))
+
+(define (first-value arg)
+  "For some reason the builder returned by static-directory returns
+multiple values.  This procedure is used to retain only the first
+return value.  TODO: This should not be necessary."
+  arg)
+
+(define <asset>
+  (@@ (haunt asset) <asset>))
+
+(define <page>
+  (@@ (haunt page) <page>))
+
+(define (builder->localized-builder builder)
+  "Return a Haunt builder procedure generated from an existing BUILDER
+with translations for the current system locale coming from
+sexp-xgettext."
+  (compose
+   (lambda (pages-and-assets)
+     (map
+      (lambda (page-or-asset)
+        (match page-or-asset
+          (($ <page> file-name contents writer)
+           (let ((new-name (string-append (localized-root-path file-name)
+                                          file-name)))
+             (make-page new-name contents writer)))
+          (($ <asset> source target)
+           (let ((new-name (string-append (localized-root-path target) 
target)))
+             (make-asset source new-name)))))
+      pages-and-assets))
+   (lambda (site posts)
+     (first-value (builder site posts)))))
+
+(define (builders->localized-builders builders)
+  "Return a list of new Haunt builder procedures generated from
+BUILDERS and localized via sexp-xgettext for the current system
+locale."
+  (flatten
+   (map-in-order
+    builder->localized-builder
+    builders)))
diff --git a/website/haunt.scm b/website/haunt.scm
index 0cb7177..01e2af7 100644
--- a/website/haunt.scm
+++ b/website/haunt.scm
@@ -5,22 +5,25 @@
 (use-modules ((apps base builder) #:prefix base:)
             ((apps blog builder) #:prefix blog:)
             ((apps download builder) #:prefix download:)
+             (apps i18n)
              ((apps media builder) #:prefix media:)
             ((apps packages builder) #:prefix packages:)
             (haunt asset)
              (haunt builder assets)
              (haunt reader)
             (haunt reader commonmark)
-             (haunt site))
-
+             (haunt site)
+             (ice-9 rdelim)
+             (srfi srfi-1))
 
 (site #:title "GNU Guix"
       #:domain "https://guix.gnu.org";
       #:build-directory "/tmp/gnu.org/software/guix"
       #:readers (list sxml-reader html-reader commonmark-reader)
-      #:builders (list base:builder
-                      blog:builder
-                      download:builder
-                       media:builder
-                      packages:builder
-                      (static-directory "static")))
+      #:builders (builders->localized-builders
+                  (list base:builder
+                        blog:builder
+                        download:builder
+                        media:builder
+                        packages:builder
+                        (static-directory "static"))))
diff --git a/website/i18n-howto.txt b/website/i18n-howto.txt
new file mode 100644
index 0000000..54f85f0
--- /dev/null
+++ b/website/i18n-howto.txt
@@ -0,0 +1,86 @@
+With sexp-xgettext, arbitrary s-expressions can be marked for
+translation (not only strings like with normal xgettext).
+
+S-expressions can be marked with G_ (simple marking for translation),
+N_ (“complex” marking with different forms depending on number like
+ngettext), C_ (“complex” marking distinguished from other markings by
+a msgctxt like pgettext) or NC_ (mix of both).
+
+Marking a string for translation behaves like normal gettext.  Marking
+a parenthesized expression (i.e. a list or procedure call) extracts
+each string from the parenthesized expression.  If a symbol, keyword
+or other parenthesized expression occurs between the strings, it is
+extracted as an XML element.  Expressions before or after all strings
+are not extracted.  If strings from a parenthesized sub-expression
+shall be extracted too, the sub-expression must again be marked with
+G_ unless it is the only sub-expression or it follows a quote,
+unquote, quasiquote or unquote-splicing.  The order of XML elements
+can be changed in the translation to produce a different ordering
+inside a parenthesized expression.  If a string shall not be extracted
+from a marked expression, it must be wrapped, for example by a call to
+the identity procedure.  Be careful when marking non-SHTML content
+such as procedure calls for translation: Additional strings will be
+inserted between non-string elements.
+
+Known issues:
+
+* Line numbers are sometimes off.
+
+* Some less important other TODOs in the comments.
+
+=====
+
+The following commands are an example of the translation for locale
+de_DE.  Adapt as necessary.  We assume the software requirements
+mentioned in the README are installed.
+
+To create a pot file:
+
+guile scripts/sexp-xgettext.scm -f po/POTFILES \
+                                -o po/guix-website.pot \
+                                --from-code=UTF-8 \
+                                --copyright-holder="Ludovic Courtès" \
+                                --package-name="guix-website" \
+                                --msgid-bugs-address="address@hidden" \
+                                --keyword=G_ \
+                                --keyword=N_:1,2 \
+                                --keyword=C_:1c,2 \
+                                --keyword=NC_:1c,2,3
+
+To create a po file from a pot file, do the usual:
+
+cd po
+msginit -l de --no-translator
+
+To merge an existing po file with a new pot file:
+
+cd po
+msgmerge --previous -U de.po guix-website.pot
+
+To update mo files:
+
+mkdir -p de/LC_MESSAGES
+cd po
+msgfmt de.po
+cd ..
+mv po/messages.mo de/LC_MESSAGES/guix-website.mo
+
+To build all languages:
+
+guix build -f .guix.scm
+
+To test the de_DE translation, update its mo file as above, then:
+
+guix environment --ad-hoc haunt
+LC_ALL=de_DE.utf8 \
+ GUILE_LOAD_PATH=$(guix build 
guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH \
+ GUIX_WEB_SITE_LOCAL=yes \
+ haunt build
+GUILE_LOAD_PATH=$(guix build 
guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH \
+ haunt serve
+
+For checking for errors / debugging newly marked files you can try:
+
+GUILE_LOAD_PATH=.:$(guix build haunt)/share/guile/site/2.2:\
+$(guix build guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH \
+ guile apps/base/templates/about.scm   # an example for debugging about.scm
diff --git a/website/po/LINGUAS b/website/po/LINGUAS
new file mode 100644
index 0000000..d4dd759
--- /dev/null
+++ b/website/po/LINGUAS
@@ -0,0 +1,3 @@
+# Translation with sexp-xgettext requires the full LL_CC locale name
+# to be specified.
+en_US
diff --git a/website/po/POTFILES b/website/po/POTFILES
new file mode 100644
index 0000000..e538f84
--- /dev/null
+++ b/website/po/POTFILES
@@ -0,0 +1,36 @@
+# high-priority files that should come first in the PO file
+apps/base/utils.scm
+apps/base/templates/home.scm
+apps/base/templates/theme.scm
+apps/base/templates/components.scm
+apps/base/templates/about.scm
+apps/base/data.scm
+apps/base/templates/help.scm
+# other files
+apps/base/templates/contact.scm
+apps/base/templates/contribute.scm
+apps/base/templates/donate.scm
+apps/base/templates/graphics.scm
+apps/base/templates/irc.scm
+apps/base/templates/menu.scm
+apps/base/templates/security.scm
+apps/blog/templates/components.scm
+apps/blog/templates/feed.scm
+apps/blog/templates/post-list.scm
+apps/blog/templates/post.scm
+apps/blog/templates/tag.scm
+apps/download/data.scm
+apps/download/templates/components.scm
+apps/download/templates/download.scm
+apps/media/data.scm
+apps/media/templates/components.scm
+apps/media/templates/screenshot.scm
+apps/media/templates/screenshots-overview.scm
+apps/media/templates/video.scm
+apps/media/templates/video-list.scm
+apps/packages/templates/components.scm
+apps/packages/templates/detailed-index.scm
+apps/packages/templates/detailed-package-list.scm
+apps/packages/templates/index.scm
+apps/packages/templates/package-list.scm
+apps/packages/templates/package.scm
diff --git a/website/po/ietf-tags.scm b/website/po/ietf-tags.scm
new file mode 100644
index 0000000..8102a49
--- /dev/null
+++ b/website/po/ietf-tags.scm
@@ -0,0 +1,9 @@
+;;; This file contains an association list for each translation from
+;;; the locale to an IETF language tag to be used in the URL path of
+;;; translated pages.  The language tag results from the translation
+;;; team’s language code from
+;;; <https://translationproject.org/team/index.html>.  The underscore
+;;; in the team’s code is replaced by a hyphen.  For example, az would
+;;; be used for the Azerbaijani language (not az-Latn) and zh-CN would
+;;; be used for mainland Chinese (not zh-Hans-CN).
+(("en_US" . "en"))
diff --git a/website/scripts/sexp-xgettext.scm 
b/website/scripts/sexp-xgettext.scm
new file mode 100644
index 0000000..aba527f
--- /dev/null
+++ b/website/scripts/sexp-xgettext.scm
@@ -0,0 +1,830 @@
+;;; GNU Guix web site
+;;; Copyright © 2019 Florian Pelz <address@hidden>
+;;;
+;;; This file is part of the GNU Guix web site.
+;;;
+;;; The GNU Guix web site is free software; you can redistribute it and/or 
modify it
+;;; under the terms of the GNU Affero General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; The GNU Guix web site is distributed in the hope that it will be useful, 
but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public License
+;;; along with the GNU Guix web site.  If not, see 
<http://www.gnu.org/licenses/>.
+
+(use-modules (ice-9 getopt-long)
+             (ice-9 match)
+             (ice-9 peg)
+             (ice-9 receive)
+             (ice-9 regex)
+             (ice-9 textual-ports)
+             (srfi srfi-1) ;lists
+             (srfi srfi-9) ;records
+             (srfi srfi-19) ;date
+             (srfi srfi-26)) ;cut
+
+;;; This script imitates xgettext, but combines nested s-expressions
+;;; in the input Scheme files to a single msgstr in the PO file.  It
+;;; works by first reading the keywords specified on the command-line,
+;;; then dealing with the remaining options using (ice-9 getopt-long).
+;;; Then, it parses each Scheme file in the POTFILES file specified
+;;; with --files-from and constructs po entries from it.  For parsing,
+;;; a PEG is used instead of Scheme’s read, because we can extract
+;;; comments with it.  The po entries are written to the PO file
+;;; specified with the --output option.  Scheme code can then use the
+;;; (sexp-xgettext) module to deconstruct the msgids looked up in the
+;;; PO file via gettext.
+
+(define-record-type <keyword-spec>
+  (make-keyword-spec id sg pl c total xcomment)
+  keyword-spec?
+  (id keyword-spec-id) ;identifier
+  (sg keyword-spec-sg) ;arg with singular
+  (pl keyword-spec-pl) ;arg with plural
+  (c keyword-spec-c) ;arg with msgctxt or 'mixed if sg is mixed 
msgctxt|singular
+  (total keyword-spec-total) ;total number of args
+  (xcomment keyword-spec-xcomment))
+
+(define (complex-keyword-spec? keyword-spec)
+  "Return for a keyword passed on the command-line whether it is
+complex, i.e. whether occurrences inside another marked expression may
+be part of that other expression.  See i18n-howto.txt."
+  (match keyword-spec
+    (($ <keyword-spec> _ _ #f #f _ #f) #f)
+    (else #t)))
+
+(define %keyword-specs
+  ;; List of valid xgettext keyword options.
+  ;; Read keywords from command-line options.
+  (let loop ((opts (cdr (command-line)));command-line options from
+                                        ;which to extract --keyword
+                                        ;options
+             (remaining-opts '()) ;unhandled opts
+             (specs '()))
+    (define (string->integer str)
+      (if (string-match "[0-9]+" str)
+          (string->number str)
+          (error "Not a decimal integer.")))
+    (define* (argnums->spec id #:optional (argnums '()))
+      (let loop ((sg #f)
+                 (pl #f)
+                 (c #f)
+                 (total #f)
+                 (xcomment #f)
+                 (argnums argnums))
+        (match argnums
+          (() (make-keyword-spec id
+                                 (if sg sg 1)
+                                 pl
+                                 c
+                                 total
+                                 xcomment))
+          ((arg . argnums)
+           (cond
+            ((string-suffix? "c" arg)
+             (cond (c (error "c suffix clashes"))
+                   (else
+                    (let* ((number-str (string-drop-right arg 1))
+                           (number (string->integer number-str)))
+                      (loop sg pl number total xcomment argnums)))))
+            ((string-suffix? "g" arg)
+             (cond
+              (sg (error "Only first argnum can have g suffix."))
+              (c (error "g suffix clashes."))
+              (else
+               (let* ((number-str (string-drop-right arg 1))
+                      (number (string->integer number-str)))
+                 (loop number #f 'mixed total xcomment argnums)))))
+            ((string-suffix? "t" arg)
+             (cond (total (error "t suffix clashes"))
+                   (else
+                    (let* ((number-str (string-drop-right arg 1))
+                           (number (string->integer number-str)))
+                      (loop sg pl c number xcomment argnums)))))
+            ((string-suffix? "\"" arg)
+             (cond (xcomment (error "xcomment clashes"))
+                   (else
+                    (let* ((comment (substring arg
+                                               1
+                                               (- (string-length arg) 1))))
+                      (loop sg pl c total comment argnums)))))
+            (else
+             (let* ((number (string->integer arg)))
+               (if sg
+                   (if pl
+                       (error "Too many argnums.")
+                       (loop sg number c total xcomment argnums))
+                   (loop number #f c total xcomment argnums)))))))))
+
+    (define (string->spec str) ;see `info xgettext`
+      (match (string-split str #\:)
+        ((id) (argnums->spec id))
+        ((id argnums)
+         (argnums->spec id (string-split argnums #\,)))))
+    (match opts
+      (() (begin
+            ;; remove recognized --keyword command-line options:
+            (set-program-arguments (cons (car (command-line))
+                                         (reverse remaining-opts)))
+            specs))
+      ((current-opt . rest)
+       (cond
+        ((string=? "--" current-opt) specs)
+        ((string-prefix? "--keyword=" current-opt)
+         (let ((keyword (string-drop current-opt (string-length 
"--keyword="))))
+           (loop rest remaining-opts (cons (string->spec keyword) specs))))
+        ((or (string=? "--keyword" current-opt)
+             (string=? "-k" current-opt))
+         (let ((next-opt (car rest)))
+           (loop (cdr rest)
+                 remaining-opts
+                 (cons (string->spec next-opt) specs))))
+        (else (loop rest (cons current-opt remaining-opts) specs)))))))
+
+;;; Other options are not repeated, so we can use getopt-long:
+
+(define %options ;; Corresponds to what is documented at `info xgettext`.
+  (let ((option-spec
+         `((files (single-char #\f) (value #t))
+           (directory (single-char #\D) (value #t))
+           (default-domain (single-char #\d) (value #t))
+           (output (single-char #\o) (value #t))
+           (output-dir (single-char #\p) (value #t))
+           (from-code (value #t))
+           (join-existing (single-char #\j) (value #f))
+           (exclude-file (single-char #\x) (value #t))
+           (add-comments (single-char #\c) (value #t))
+
+           ;; Because getopt-long does not support repeated options,
+           ;; we took care of --keyword options further up.
+           ;; (keyword (single-char #\k) (value #t))
+
+           (flag (value #t))
+           (force-po (value #f))
+           (indent (single-char #\i) (value #f))
+           (no-location (value #f))
+           (add-location (single-char #\n) (value #t))
+           (width (single-char #\w) (value #t))
+           (no-wrap (value #f))
+           (sort-output (single-char #\s) (value #f))
+           (sort-by-file (single-char #\F) (value #f))
+           (omit-header (value #f))
+           (copyright-holder (value #t))
+           (foreign-user (value #f))
+           (package-name (value #t))
+           (package-version (value #t))
+           (msgid-bugs-address (value #t))
+           (msgstr-prefix (single-char #\m) (value #t))
+           (msgstr-suffix (single-char #\m) (value #t))
+           (help (value #f))
+           (pack (value #f)))))
+    (getopt-long (command-line) option-spec)))
+
+
+(define parse-scheme-file
+  ;; This procedure parses FILE and returns a parse tree.
+  (let ()
+    ;;TODO: Optionally ignore case.
+    (define-peg-pattern NL all "\n")
+    (define-peg-pattern comment all (and ";"
+                                         (* (and peg-any
+                                                 (not-followed-by NL)))
+                                         (and peg-any (followed-by NL))))
+    (define-peg-pattern empty none (or " " "\t"))
+    (define-peg-pattern whitespace body (or empty NL))
+    (define-peg-pattern quotation body (or "'" "`" "," ",@"))
+                                        ;TODO: Allow user to specify
+                                        ;other quote reader macros to
+                                        ;be ignored and also ignore
+                                        ;quote spelled out without
+                                        ;reader macro.
+    (define-peg-pattern open body (and (? quotation)
+                                       (or "(" "[" "{")))
+    (define-peg-pattern close body (or ")" "]" "}"))
+    (define-peg-pattern string body (and (followed-by "\"")
+                                         (* (or "\\\""
+                                                (and (or NL peg-any)
+                                                     (not-followed-by "\""))))
+                                         (and (or NL peg-any)
+                                              (followed-by "\""))
+                                         "\""))
+    (define-peg-pattern token all (or string
+                                      (and
+                                       (not-followed-by open)
+                                       (not-followed-by close)
+                                       (not-followed-by comment)
+                                       (* (and peg-any
+                                               (not-followed-by open)
+                                               (not-followed-by close)
+                                               (not-followed-by comment)
+                                               (not-followed-by string)
+                                               (not-followed-by whitespace)))
+                                       (or
+                                        (and peg-any (followed-by open))
+                                        (and peg-any (followed-by close))
+                                        (and peg-any (followed-by comment))
+                                        (and peg-any (followed-by string))
+                                        (and peg-any (followed-by whitespace))
+                                        (not-followed-by peg-any)))))
+    (define-peg-pattern list all (or (and (? quotation) "(" program ")")
+                                     (and (? quotation) "[" program "]")
+                                     (and (? quotation) "{" program "}")))
+    (define-peg-pattern t-or-s body (or token list))
+    (define-peg-pattern program all (* (or whitespace
+                                           comment
+                                           t-or-s)))
+    (lambda (file)
+      (call-with-input-file file
+        (lambda (port)
+          ;; It would be nice to match port directly without
+          ;; converting to a string first, but apparently guile cannot
+          ;; do that yet.
+          (let ((string (get-string-all port)))
+            (peg:tree (match-pattern program string))))))))
+
+
+(define-record-type <po-entry>
+  (make-po-entry ecomments ref flags ctxt id idpl)
+  po-entry?
+;;; irrelevant: (tcomments po-entry-tcomments) ;translator-comments
+  (ecomments po-entry-ecomments) ;extracted-comments
+  (ref po-entry-ref) ;reference
+  (flags po-entry-flags)
+;;; irrelevant: (prevctxt po-entry-prevctxt) ;previous-ctxt
+;;; irrelevant: (prev po-entry-prev) ;previous-translation
+  (ctxt po-entry-ctxt) ;msgctxt
+  (id po-entry-id) ;msgid
+  (idpl po-entry-idpl) ;msgid-plural
+;;; irrelevant: (str po-entry-str) ;msgstr string or association list
+;;;                                ;integer to string
+  )
+
+(define (po-equal? po1 po2)
+  "Return whether PO1 and PO2 have equal ctxt, id and idpl."
+  (and (equal? (po-entry-ctxt po1) (po-entry-ctxt po2))
+       (equal? (po-entry-id po1) (po-entry-id po2))
+       (equal? (po-entry-idpl po1) (po-entry-idpl po2))))
+
+(define (combine-duplicate-po-entries list)
+  "Return LIST with duplicate po entries replaced by a single PO entry
+with both refs."
+  (let loop ((remaining list))
+    (match remaining
+      (() '())
+      ((head . tail)
+       (receive (before from)
+           (break (cut po-equal? head <>) tail)
+         (cond
+          ((null? from) (cons head (loop tail)))
+          (else
+           (loop
+            (cons
+             (match head
+               (($ <po-entry> ecomments1 ref1 flags ctxt id idpl)
+                (match (car from)
+                  (($ <po-entry> ecomments2 ref2 _ _ _ _)
+                   (let ((ecomments (if (or ecomments1 ecomments2)
+                                        (append (or ecomments1 '())
+                                                (or ecomments2 '()))
+                                        #f))
+                         (ref (if (or ref1 ref2)
+                                  (string-join
+                                   (cons
+                                    (or ref1 "")
+                                    (cons
+                                     (or ref2 "")
+                                     '())))
+                                  #f)))
+                     (make-po-entry ecomments ref flags ctxt id idpl))))))
+             (append before (cdr from)))))))))))
+
+(define (write-po-entry po-entry)
+  (define (prepare-text text)
+    "If TEXT is false, return #f.  Otherwise correct the formatting of
+TEXT by escaping backslashes and newlines and enclosing TEXT in
+quotes. Note that Scheme’s write is insufficient because it would
+escape far more.  TODO: Strings should be wrappable to a maximum line
+width."
+    (and text
+         (string-append "\""
+                        (with-output-to-string
+                          (lambda ()
+                            (call-with-input-string text
+                              (lambda (port)
+                                (let loop ((c (get-char port)))
+                                  (unless (eof-object? c)
+                                    (case c
+                                      ((#\\) (display "\\"))
+                                      ((#\newline) (display "\\n"))
+                                      (else (write-char c)))
+                                    (loop (get-char port))))))))
+                        "\"")))
+  (define (write-component c prefix)
+    (when c
+      (begin (display prefix)
+             (display " ")
+             (display c)
+             (newline))))
+  (match po-entry
+    (($ <po-entry> ecomments ref flags ctxt id idpl)
+     (let ((prepared-ctxt (prepare-text ctxt))
+           (prepared-id (prepare-text id))
+           (prepared-idpl (prepare-text idpl)))
+       (when ecomments
+         (for-each
+          (lambda (line)
+            (write-component line "#."))
+          (reverse ecomments)))
+       (write-component ref "#:")
+       (write-component (and flags (string-join flags ", ")) "#,")
+       (write-component prepared-ctxt "msgctxt")
+       (write-component prepared-id "msgid")
+       (write-component prepared-idpl "msgid_plural")
+       (if idpl
+           (begin
+             (display "msgstr[0] \"\"")
+             (newline)
+             (display "msgstr[1] \"\""))
+           (display "msgstr \"\""))
+       (newline)))))
+
+;; Extraction of TRANSLATORS comments:
+
+(define %comments-line
+  (make-parameter #f))
+
+(define %ecomments-string
+  (make-parameter #f))
+
+(define (update-ecomments-string! str)
+  "Sets the value of the parameter object %ecomments-string if str is
+an ecomments string.  An ecomments string is extracted from a comment
+because it starts with TRANSLATORS or a key specified with
+--add-comments." ;TODO: Support for other keys is missing.
+  (cond
+   ((not str) (%ecomments-string #f))
+   ((= (1+ (or (%comments-line)
+               -42)) ;arbitrary unequal initial value
+       (or (%line-number) 0))
+    (let ((m (string-match ";+[ \t]*(.*)" str)))
+      (when m
+        (%comments-line (%line-number))
+        (%ecomments-string
+         (if (%ecomments-string)
+             (cons (match:substring m 1) (%ecomments-string))
+             (list (match:substring m 1)))))))
+   (else
+    (let ((m (string-match ";+[ \t]*(TRANSLATORS:.*)" str)))
+      (if m
+          (begin
+            (%comments-line (%line-number))
+            (%ecomments-string
+             (if (%ecomments-string)
+                 (cons (match:substring m 1) (%ecomments-string))
+                 (list (match:substring m 1)))))
+          (%ecomments-string '#f))))))
+
+(define %file-name
+  (make-parameter #f))
+
+(define (update-file-name! name)
+  "Sets the value of the parameter object %file-name to NAME."
+  (%file-name name))
+
+(define %old-line-number
+  (make-parameter #f))
+
+(define (update-old-line-number! number)
+  "Sets the value of the parameter object %old-line-number to NUMBER."
+  (%old-line-number number))
+
+(define %line-number
+  (make-parameter #f))
+
+(define (update-line-number! number)
+  "Sets the value of the parameter object %line-number to NUMBER."
+  (%line-number number))
+
+(define (incr-line-number!)
+  "Increments the value of the parameter object %line-number by 1."
+  (%line-number (1+ (%line-number))))
+
+(define (incr-line-number-for-each-nl! list)
+  "Increments %line-number once for each NL recursively in LIST.  Does
+nothing if LIST is no list but e.g. an empty 'program."
+  (when (list? list)
+    (for-each
+     (lambda (part)
+       (match part
+         ('NL (incr-line-number!))
+         ((? list?) (incr-line-number-for-each-nl! part))
+         (else #f)))
+     list)))
+
+(define (current-ref)
+  "Return the location field for a PO entry."
+  (let ((add (option-ref %options 'add-location 'full)))
+    (cond
+     ((option-ref %options 'no-location #f) #f)
+     ((eq? add 'full)
+      (string-append (%file-name) ":" (number->string (%line-number))))
+     ((eq? add 'file)
+      (%file-name))
+     ((eq? add 'never)
+      #f))))
+
+(define (make-simple-po-entry msgid)
+  (let ((po (make-po-entry
+             (%ecomments-string)
+             (current-ref)
+             #f ;TODO: Use scheme-format for format strings?
+             #f ;no ctxt
+             msgid
+             #f)))
+    (update-ecomments-string! #f)
+    po))
+
+
+(define (matching-keyword id)
+  "Return the keyword-spec whose identifier is the same as ID, or #f
+if ID is no string or no such keyword-spec exists."
+  (and (symbol? id)
+       (let ((found (member (symbol->string id)
+                            %keyword-specs
+                            (lambda (id spec)
+                              (string=? id (keyword-spec-id spec))))))
+         (and found (car found)))))
+
+(define (nth-exp program n)
+  "Return the Nth 'token or 'list inside the PROGRAM parse tree or #f
+if no tokens or lists exist."
+  (let loop ((i 0)
+             (rest program))
+    (define (on-hit exp)
+      (if (= i n) exp
+          ;; else:
+          (loop (1+ i) (cdr rest))))
+    (match rest
+      (() #f)
+      ((('token . _) . _) (on-hit (car rest)))
+      ((('list open-paren exp close-paren) . _) (on-hit (car rest)))
+      ((_ . _) (loop i (cdr rest)))
+      (else #f))))
+
+(define (more-than-one-exp? program)
+  "Return true if PROGRAM consiste of more than one expression."
+  (if (matching-keyword (token->string-symbol-or-keyw (nth-exp program 0)))
+      (nth-exp program 2) ;if there is third element, keyword does not count
+      (nth-exp program 1)))
+
+(define (token->string-symbol-or-keyw tok)
+  "For a parse tree TOK, if it is a 'token parse tree, return its
+value as a string, symbol or #:-keyword, otherwise return #f."
+  (match tok
+    (('token (parts ...) . remaining)
+     ;; This is a string with line breaks in it.
+     (with-input-from-string
+         (string-append
+          (apply string-append
+                 (map-in-order
+                  (lambda (part)
+                    (match part
+                      (('NL _)
+                       (begin (incr-line-number!)
+                              "\n"))
+                      (else part)))
+                  parts))
+          (car remaining))
+       (lambda ()
+         (read))))
+    (('token exp)
+     (with-input-from-string exp
+       (lambda ()
+         (read))))
+    (else #f)))
+
+(define (complex-marked-list->po-entries parse-tree)
+  "Check if PARSE-TREE is marked by a keyword.  If yes, for a complex
+keyword spec, return a list of po-entries for it.  For a simple
+keyword spec, return the argument number of its singular form.
+Otherwise return #f."
+  (let* ((first (nth-exp parse-tree 0))
+         (spec (matching-keyword (token->string-symbol-or-keyw first))))
+    (if spec
+        (if ;if the identifier of a complex keyword occurs first
+         (complex-keyword-spec? spec)
+         ;; then make po entries for it
+         (match spec
+           (($ <keyword-spec> id sg pl c total xcomment)
+            (if (eq? c 'mixed) ; if msgctxt and singular msgid are in one 
string
+                (let* ((exp (nth-exp parse-tree sg))
+                       (val (token->string-symbol-or-keyw exp))
+                       (idx (if (string? val) (string-rindex val #\|))))
+                  (list
+                   (let ((po (make-po-entry
+                              (%ecomments-string)
+                              (current-ref)
+                              #f ;TODO: Use scheme-format for format strings?
+                              (string-take val idx)
+                              (string-drop val (1+ idx))
+                              #f))) ;plural forms are unsupported here
+                     (update-ecomments-string! #f)
+                     po)))
+                ;; else construct msgids
+                (receive (pl-id pl-entries)
+                    (match pl
+                      (#f (values #f '()))
+                      (else (construct-msgid-and-po-entries
+                             (nth-exp parse-tree pl))))
+                  (receive (sg-id sg-entries)
+                      (construct-msgid-and-po-entries
+                       (nth-exp parse-tree sg))
+                    (cons
+                     (let ((po (make-po-entry
+                                (%ecomments-string)
+                                (current-ref)
+                                #f ;TODO: Use scheme-format for format strings?
+                                (and c (token->string-symbol-or-keyw
+                                        (nth-exp parse-tree c)))
+                                sg-id
+                                pl-id)))
+                       (update-ecomments-string! #f)
+                       po)
+                     (append sg-entries pl-entries)))))))
+         ;; else if it is a simple keyword, return the argnum:
+         (keyword-spec-sg spec))
+        ;; if no keyword occurs, then false
+        #f)))
+
+(define (construct-po-entries parse-tree)
+  "Converts a PARSE-TREE resulting from a call to parse-scheme-file to
+a list of po-entry records.  Unlike construct-msgid-and-po-entries,
+strings are not collected to a msgid.  The list of po-entry records is
+the return value."
+  (let ((entries (complex-marked-list->po-entries parse-tree)))
+    (cond
+     ((list? entries) entries)
+     ((number? entries) ;parse-tree yields a single, simple po entry
+      (update-old-line-number! (%line-number))
+      (receive (id entries)
+          (construct-msgid-and-po-entries
+           (nth-exp parse-tree entries))
+        (update-line-number! (%old-line-number))
+        (let ((po (make-simple-po-entry id)))
+          (incr-line-number-for-each-nl! parse-tree)
+          (cons po entries))))
+     (else ;search for marked translations in parse-tree
+      (match parse-tree
+        (() '())
+        (('comment str) (begin
+                          (update-ecomments-string! str)
+                          '()))
+        (('NL _) (begin (incr-line-number!) '()))
+        (('token . _) (begin (incr-line-number-for-each-nl! parse-tree) '()))
+        (('list open-paren program close-paren)
+         (construct-po-entries program))
+        (('program . components)
+         (append-map construct-po-entries components))
+        ;; Note: PEG compresses empty programs to non-lists:
+        ('program
+         '()))))))
+
+(define* (tag counter prefix #:key (flavor 'start))
+  "Formats the number COUNTER as a tag according to FLAVOR, which is
+either 'start, 'end or 'empty for a start, end or empty tag,
+respectively."
+  (string-append "<"
+                 (if (eq? flavor 'end) "/" "")
+                 prefix
+                 (number->string counter)
+                 (if (eq? flavor 'empty) "/" "")
+                 ">"))
+
+(define-record-type <construct-fold-state>
+  (make-construct-fold-state msgid-string maybe-part counter po-entries)
+  construct-fold-state?
+  ;; msgid constructed so far; #f if none, "" if only empty string:
+  (msgid-string construct-fold-state-msgid-string)
+  ;; only append this if string follows:
+  (maybe-part construct-fold-state-maybe-part)
+  ;; counter for next tag:
+  (counter construct-fold-state-counter)
+  ;; complete po entries from marked sub-expressions:
+  (po-entries construct-fold-state-po-entries))
+
+(define* (construct-msgid-and-po-entries parse-tree
+                                         #:optional
+                                         (prefix ""))
+  "Like construct-po-entries, but with two return values.  The first
+is an accumulated msgid constructed from all components in PARSE-TREE
+for use in make-po-entry.  Non-strings are replaced by tags containing
+PREFIX.  The second return value is a list of po entries for
+sub-expressions marked with a complex keyword spec."
+  (match parse-tree
+    (() (values "" '()))
+    ;; Note: PEG compresses empty programs to non-lists:
+    ('program (values "" '()))
+    (('comment str) (begin
+                      (update-ecomments-string! str)
+                      (values "" '())))
+    (('NL _) (begin (incr-line-number!)
+                    (error "Program consists only of line break."
+                           `(,(%file-name) ,(%line-number)))))
+    (('token . _)
+     (let ((maybe-string (token->string-symbol-or-keyw parse-tree)))
+       (if (string? maybe-string)
+           (values maybe-string '())
+           (error "Single symbol marked for translation."
+                  `(,maybe-string ,(%file-name) ,(%line-number))))))
+    (('list open-paren program close-paren)
+     ;; parse program instead
+     (construct-msgid-and-po-entries program prefix))
+    (('program (? matching-keyword))
+     (error "Double-marked for translation."
+            `(,parse-tree ,(%file-name) ,(%line-number))))
+    (('program . components)
+     ;; Concatenate strings in parse-tree to a new msgid and add an
+     ;; <x> tag for each list in between.
+     (match
+         (fold
+          (lambda (component prev-state)
+            (match prev-state
+              (($ <construct-fold-state> msgid-string maybe-part
+                  counter po-entries)
+               (match component
+                 (('comment str) (begin (update-ecomments-string! str)
+                                        prev-state))
+                 (('NL _) (begin (incr-line-number!)
+                                 prev-state))
+                 (('token . _)
+                  (let ((maybe-string (token->string-symbol-or-keyw 
component)))
+                    (cond
+                     ((string? maybe-string)
+                      ;; if string, append maybe-string to previous msgid
+                      (make-construct-fold-state
+                       (string-append (or msgid-string "")
+                                      maybe-part maybe-string)
+                       ""
+                       counter
+                       po-entries))
+                     ((and (more-than-one-exp? components) ;not the only symbol
+                           (or (not msgid-string) ;no string so far
+                               (string-suffix? ">" msgid-string))) ;tag before
+                      prev-state) ;then ignore
+                     (else ;append tag representing the token
+                      (make-construct-fold-state
+                       msgid-string
+                       (string-append
+                        maybe-part
+                        (tag counter prefix #:flavor 'empty))
+                       (1+ counter)
+                       po-entries)))))
+                 (('list open-paren program close-paren)
+                  (let ((first (nth-exp program 0)))
+                    (incr-line-number-for-each-nl! list)
+                    (match (complex-marked-list->po-entries program)
+                      ((? list? result)
+                       (make-construct-fold-state
+                        msgid-string
+                        (string-append
+                         maybe-part
+                         (tag counter prefix #:flavor 'empty))
+                        (1+ counter)
+                        (append result po-entries)))
+                      (result
+                       (cond
+                        ((number? result)
+                         (receive (id entries)
+                             (construct-msgid-and-po-entries
+                              (nth-exp program result)
+                              (string-append prefix
+                                             (number->string counter)
+                                             "."))
+                           (make-construct-fold-state
+                            (string-append (or msgid-string "")
+                                           maybe-part
+                                           (tag counter prefix
+                                                #:flavor 'start)
+                                           id
+                                           (tag counter prefix
+                                                #:flavor 'end))
+                            ""
+                            (1+ counter)
+                            (append entries po-entries))))
+                        ((not (more-than-one-exp? components))
+                         ;; Singletons do not need to be marked.
+                         (receive (id entries)
+                             (construct-msgid-and-po-entries
+                              program
+                              prefix)
+                           (make-construct-fold-state
+                            id
+                            ""
+                            counter
+                            (append entries po-entries))))
+                        (else ;unmarked list
+                         (if (not msgid-string)
+                             ;; then ignore
+                             prev-state
+                             ;; else:
+                             (make-construct-fold-state
+                              msgid-string
+                              (string-append
+                               maybe-part
+                               (tag counter prefix #:flavor 'empty))
+                              (1+ counter)
+                              po-entries))))))))))))
+          (make-construct-fold-state #f "" 1 '())
+          components)
+       (($ <construct-fold-state> msgid-string maybe-part counter po-entries)
+        (values (or msgid-string
+                    (error "Marking for translation yields empty msgid."
+                           %file-name %line-number))
+                po-entries))))))
+
+(define scheme-file->po-entries
+  (compose construct-po-entries
+           parse-scheme-file))
+
+(define %files-from-port
+  (let ((files-from (option-ref %options 'files #f)))
+    (if files-from
+        (open-input-file files-from)
+        (current-input-port))))
+
+(define %source-files
+  (let loop ((line (get-line %files-from-port))
+             (source-files '()))
+    (if (eof-object? line)
+        (begin
+          (close-port %files-from-port)
+          source-files)
+        ;; else read file names before comment
+        (let ((before-comment (car (string-split line #\#))))
+          (loop (get-line %files-from-port)
+                (append
+                 (map match:substring (list-matches "[^ \t]+" before-comment))
+                 source-files))))))
+
+(define %output-po-entries
+  (fold (lambda (scheme-file po-entries)
+          (begin
+            (update-file-name! scheme-file)
+            (update-line-number! 1)
+            (update-old-line-number! #f)
+            (%comments-line #f)
+            (append (scheme-file->po-entries scheme-file)
+                    po-entries)))
+        '()
+        %source-files))
+
+(define %output-port
+  (let ((output (option-ref %options 'output #f))
+        (domain (option-ref %options 'default-domain #f)))
+    (cond
+     (output (open-output-file output))
+     (domain (open-output-file (string-append domain ".po")))
+     (else (open-output-file "messages.po")))))
+
+(with-output-to-port %output-port
+  (lambda ()
+    (let ((copyright (option-ref %options 'copyright-holder
+                                 "THE PACKAGE'S COPYRIGHT HOLDER"))
+          (package (option-ref %options 'package-name "PACKAGE"))
+          (version (option-ref %options 'package-version #f))
+          (bugs-email (option-ref %options 'msgid-bugs-address "")))
+      (display "# SOME DESCRIPTIVE TITLE.\n")
+      (display (string-append "# Copyright (C) YEAR " copyright "\n"))
+      (display (string-append "# This file is distributed under the same \
+license as the " package " package.\n"))
+      (display "# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n")
+      (display "#\n")
+      (write-po-entry (make-po-entry #f #f '("fuzzy") #f "" #f))
+      (display (string-append "\"Project-Id-Version: "
+                              package
+                              (if version
+                                  (string-append " " version)
+                                  "")
+                              "\\n\"\n"))
+      (display (string-append "\"Report-Msgid-Bugs-To: "
+                              bugs-email
+                              "\\n\"\n"))
+      (display (string-append "\"POT-Creation-Date: "
+                              (date->string (current-date) "~1 ~H:~M~z")
+                              "\\n\"\n"))
+      (display "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n")
+      (display "\"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n\"\n")
+      (display "\"Language-Team: LANGUAGE <address@hidden>\\n\"\n")
+      (display "\"Language: \\n\"\n")
+      (display "\"MIME-Version: 1.0\\n\"\n")
+      (display "\"Content-Type: text/plain; charset=UTF-8\\n\"\n")
+      (display "\"Content-Transfer-Encoding: 8bit\\n\"\n")
+      (for-each (lambda (po-entry)
+                  (begin
+                    (newline)
+                    (write-po-entry po-entry)))
+                (combine-duplicate-po-entries %output-po-entries)))))
diff --git a/website/sexp-xgettext.scm b/website/sexp-xgettext.scm
new file mode 100644
index 0000000..71ef4a9
--- /dev/null
+++ b/website/sexp-xgettext.scm
@@ -0,0 +1,530 @@
+;;; GNU Guix web site
+;;; Copyright © 2019 Florian Pelz <address@hidden>
+;;;
+;;; This file is part of the GNU Guix web site.
+;;;
+;;; The GNU Guix web site is free software; you can redistribute it and/or 
modify it
+;;; under the terms of the GNU Affero General Public License as published by
+;;; the Free Software Foundation; either version 3 of the License, or (at
+;;; your option) any later version.
+;;;
+;;; The GNU Guix web site is distributed in the hope that it will be useful, 
but
+;;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU Affero General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Affero General Public License
+;;; along with the GNU Guix web site.  If not, see 
<http://www.gnu.org/licenses/>.
+
+(define-module (sexp-xgettext)
+  #:use-module (ice-9 match)
+  #:use-module (ice-9 peg)
+  #:use-module (ice-9 rdelim)
+  #:use-module (ice-9 receive)
+  #:use-module (ice-9 regex)
+  #:use-module (srfi srfi-1) ;lists
+  #:use-module (srfi srfi-9) ;records
+  #:export (set-complex-keywords!
+            set-simple-keywords!
+            sgettext
+            sngettext
+            spgettext
+            snpgettext
+            %linguas))
+
+(define %complex-keywords
+  ;; Use set-complex-keywords! to change this to a list of keywords
+  ;; for sexp-xgettext functions other than sgettext.
+  (make-parameter '()))
+
+(define (set-complex-keywords! kw)
+  (%complex-keywords kw))
+
+(define %simple-keywords
+  ;; Use set-simple-keywords! to change this to a list of keywords
+  ;; for sgettext.
+  (make-parameter '()))
+
+(define (set-simple-keywords! kw)
+  (%simple-keywords kw))
+
+(define (gettext-keyword? id)
+  (or (member id (%complex-keywords))
+      (member id (%simple-keywords))))
+
+;;COPIED FROM scripts/sexp-xgettext.scm:
+(define* (tag counter prefix #:key (flavor 'start))
+  "Formats the number COUNTER as a tag according to FLAVOR, which is
+either 'start, 'end or 'empty for a start, end or empty tag,
+respectively."
+  (string-append "<"
+                 (if (eq? flavor 'end) "/" "")
+                 prefix
+                 (number->string counter)
+                 (if (eq? flavor 'empty) "/" "")
+                 ">"))
+;;END COPIED FROM scripts/sexp-xgettext.scm
+
+;;ADAPTED FROM scripts/sexp-xgettext.scm
+(define-record-type <construct-fold-state>
+  (make-construct-fold-state msgid-string maybe-part counter)
+  construct-fold-state?
+  ;; msgid constructed so far; #f if none, "" if only empty string
+  (msgid-string construct-fold-state-msgid-string)
+  ;; only append this if string follows:
+  (maybe-part construct-fold-state-maybe-part)
+  ;; counter for next tag:
+  (counter construct-fold-state-counter))
+;;END ADAPTED FROM scripts/sexp-xgettext.scm
+
+(define (sexp->msgid exp)
+  "Return the msgid as constructed by construct-msgid-and-po-entries
+in scripts/sexp-xgettext.scm from the expression EXP."
+  (let loop ((exp exp)
+             (prefix ""))
+    (match exp
+      (() "")
+      ((or ('quote inner-exp)
+           ('quasiquote inner-exp)
+           ('unquote inner-exp)
+           ('unquote-splicing inner-exp))
+       (loop inner-exp prefix))
+      ((first-component . components)
+       (cond
+        ((gettext-keyword? first-component)
+         (error "Double-marked for translation." exp))
+        (else
+         (or
+          (construct-fold-state-msgid-string
+           (fold
+            (lambda (component prev-state)
+              (match prev-state
+                (($ <construct-fold-state> msgid-string maybe-part counter)
+                 (let inner-loop ((exp component))
+                   (match exp
+                     ((or (? symbol?) (? keyword?))
+                      (if (not msgid-string)
+                          ;; ignore symbols at the beginning
+                          prev-state
+                          ;; else make a tag for the symbol
+                          (make-construct-fold-state
+                           msgid-string
+                           (string-append maybe-part
+                                          (tag counter prefix #:flavor 'empty))
+                           (1+ counter))))
+                     ((? string?)
+                      (make-construct-fold-state
+                       (string-append (or msgid-string "")
+                                      maybe-part exp)
+                       "" counter))
+                     ((? list?)
+                      (match exp
+                        (() ;ignore empty list
+                         prev-state)
+                        ((or (singleton)
+                             ('quote singleton)
+                             ('quasiquote singleton)
+                             ('unquote singleton)
+                             ('unquote-splicing singleton))
+                         (inner-loop singleton))
+                        ((components ...)
+                         (cond
+                          ((and (not (null? components))
+                                (member (car components) (%simple-keywords)))
+                           ;; if marked for translation, insert inside tag
+                           (make-construct-fold-state
+                            (string-append (or msgid-string "")
+                                           maybe-part
+                                           (tag counter prefix #:flavor 'start)
+                                           (loop (cadr components)
+                                                 (string-append
+                                                  prefix
+                                                  (number->string counter)
+                                                  "."))
+                                           (tag counter prefix #:flavor 'end))
+                            ""
+                            (1+ counter)))
+                          ;; else ignore if first
+                          ((not msgid-string)
+                           prev-state)
+                          ;; else make empty tag
+                          (else (make-construct-fold-state
+                                 msgid-string
+                                 (string-append
+                                  maybe-part
+                                  (tag counter prefix #:flavor 'empty))
+                                 (1+ counter))))))))))))
+            (make-construct-fold-state #f "" 1)
+            exp))
+          (error "Marking for translation yields empty msgid." exp)))))
+      ((? string?) exp)
+      (else (error "Single symbol marked for translation." exp)))))
+
+(define-record-type <deconstruct-fold-state>
+  (make-deconstruct-fold-state tagged maybe-tagged counter)
+  deconstruct-fold-state?
+  ;; XML-tagged expressions as an association list name->expression:
+  (tagged deconstruct-fold-state-tagged)
+  ;; associate this not-yet-tagged expression with pre if string
+  ;; follows, with post if not:
+  (maybe-tagged deconstruct-fold-state-maybe-tagged)
+  ;; counter for next tag:
+  (counter deconstruct-fold-state-counter))
+
+(define (deconstruct exp msgstr)
+  "Return an s-expression like EXP, but filled with the content from
+MSGSTR."
+  (define (find-empty-element msgstr name)
+    "Return the regex match structure for the empty tag for XML
+element of type NAME inside MSGSTR.  If the element does not exist or
+is more than the empty tag, #f is returned."
+    (string-match (string-append "<" (regexp-quote name) "/>") msgstr))
+  (define (find-element-with-content msgstr name)
+    "Return the regex match structure for the non-empty XML element of
+type NAME inside MSGSTR.  Submatch 1 is its content.  If the element
+does not exist or is just the empty tag, #f is returned."
+    (string-match (string-append "<" (regexp-quote name) ">"
+                                 "(.*)"
+                                 "</" (regexp-quote name) ">")
+                  msgstr))
+  (define (get-first-element-name prefix msgstr)
+    "Return the name of the first XML element in MSGSTR whose name
+begins with PREFIX, or #f if there is none."
+    (let ((m (string-match
+              (string-append "<(" (regexp-quote prefix) "[^>/.]+)/?>") 
msgstr)))
+      (and m (match:substring m 1))))
+  (define (prefix+counter prefix counter)
+    "Return PREFIX with the number COUNTER appended."
+    (string-append prefix (number->string counter)))
+  (let loop ((exp exp)
+             (msgstr msgstr)
+             (prefix ""))
+    (define (unwrap-marked-expression exp)
+      "Return two values for an expression EXP containing a (possibly
+quoted/unquoted) marking for translation with a simple keyword at its
+root.  The first return value is a list with the inner expression, the
+second is a procedure to wrap the processed inner expression in the
+same quotes or unquotes again."
+      (match exp
+        (('quote inner-exp)
+         (receive (unwrapped quotation)
+             (unwrap-marked-expression inner-exp)
+           (values unwrapped
+                   (lambda (res)
+                     (list 'quote (quotation res))))))
+        (('quasiquote inner-exp)
+         (receive (unwrapped quotation)
+             (unwrap-marked-expression inner-exp)
+           (values unwrapped
+                   (lambda (res)
+                     (list 'quasiquote (quotation res))))))
+        (('unquote inner-exp)
+         (receive (unwrapped quotation)
+             (unwrap-marked-expression inner-exp)
+           (values unwrapped
+                   (lambda (res)
+                     (list 'unquote (quotation res))))))
+        (('unquote-splicing inner-exp)
+         (receive (unwrapped quotation)
+             (unwrap-marked-expression inner-exp)
+           (values unwrapped
+                   (lambda (res)
+                     (list 'unquote-splicing (quotation res))))))
+        ((marking . rest) ;list with marking as car
+         ;; assume arg to translate is first argument to marking:
+         (values (list-ref rest 0) identity))))
+    (define (assemble-parenthesized-expression prefix tagged)
+      "Return a parenthesized expression deconstructed from MSGSTR
+with the meaning of XML elements taken from the name->expression
+association list TAGGED.  The special tags [prefix]pre and
+[prefix]post are associated with a list of expressions before or after
+all others in the parenthesized expression with the prefix,
+respectively, in reverse order."
+      (append ;prepend pre elements to what is in msgstr
+       (reverse (or (assoc-ref tagged (string-append prefix "pre")) '()))
+       (let assemble ((rest msgstr))
+         (let ((name (get-first-element-name prefix rest)))
+           (cond
+            ((and name (find-empty-element rest name)) =>
+             ;; first XML element in rest is empty element
+             (lambda (m)
+               (cons*
+                (match:prefix m) ;prepend string before name
+                (assoc-ref tagged name) ;and expression for name
+                (assemble (match:suffix m)))))
+            ((and name (find-element-with-content rest name)) =>
+             ;; first XML element in rest has content
+             (lambda (m)
+               (receive (unwrapped quotation)
+                   (unwrap-marked-expression (assoc-ref tagged name))
+                 (cons*
+                  (match:prefix m) ;prepend string before name
+                  ;; and the deconstructed element with the content as msgstr:
+                  (quotation
+                   (loop
+                    unwrapped
+                    (match:substring m 1)
+                    (string-append name ".")))
+                  (assemble (match:suffix m))))))
+            (else
+             ;; there is no first element
+             (cons
+              rest ;return remaining string
+              (reverse ;and post expressions
+               (or (assoc-ref tagged (string-append prefix "post")) 
'())))))))))
+    (match exp
+      (() '())
+      (('quote singleton)
+       (cons 'quote (list (loop singleton msgstr prefix))))
+      (('quasiquote singleton)
+       (cons 'quasiquote (list (loop singleton msgstr prefix))))
+      (('unquote singleton)
+       (cons 'unquote (list (loop singleton msgstr prefix))))
+      (('unquote-splicing singleton)
+       (cons 'unquote-splicing (list (loop singleton msgstr prefix))))
+      ((singleton)
+       (list (loop singleton msgstr prefix)))
+      ((first-component . components)
+       (cond
+        ((gettext-keyword? first-component)
+         ;; another marking for translation
+         ;; -> should be an error anyway; just retain exp
+         exp)
+        (else
+         ;; This handles a single level of a parenthesized expression.
+         ;; assemble-parenthesized-expression will call loop to
+         ;; recurse to deeper levels.
+         (let ((tagged-state
+                (fold
+                 (lambda (component prev-state)
+                   (match prev-state
+                     (($ <deconstruct-fold-state> tagged maybe-tagged counter)
+                      (let inner-loop ((exp component) ;sexp to handle
+                                       (quoting identity)) ;for wrapping state
+                        (define (tagged-with-maybes)
+                          "Return the value of tagged after adding all
+maybe-tagged expressions.  This should be used as the base value for
+tagged when a string or marked expression is seen."
+                          (match counter
+                            (#f
+                             (alist-cons (string-append prefix "pre")
+                                         maybe-tagged
+                                         tagged))
+                            ((? number?)
+                             (let accumulate ((prev-counter counter)
+                                              (maybes (reverse maybe-tagged)))
+                               (match maybes
+                                 (() tagged)
+                                 ((head . tail)
+                                  (alist-cons
+                                   (prefix+counter prefix prev-counter)
+                                   head
+                                   (accumulate (1+ prev-counter) tail))))))))
+                        (define (add-maybe exp)
+                          "Return a deconstruct-fold-state with EXP
+added to maybe-tagged.  This should be used for expressions that are
+neither strings nor marked for translation with a simple keyword."
+                          (make-deconstruct-fold-state
+                           tagged
+                           (cons (quoting exp) maybe-tagged)
+                           counter))
+                        (define (counter-with-maybes)
+                          "Return the old counter value incremented by
+one for each expression in maybe-tagged.  This should be used together
+with tagged-with-maybes."
+                          (match counter
+                            ((? number?)
+                             (+ counter (length maybe-tagged)))
+                            (#f
+                             1)))
+                        (define (add-tagged exp)
+                          "Return a deconstruct-fold-state with an
+added association in tagged from the current counter to EXP.  If
+MAYBE-TAGGED is not empty, associations for its expressions are added
+to pre or their respective counter.  This should be used for
+expressions marked for translation with a simple keyword."
+                          (let ((c (counter-with-maybes)))
+                            (make-deconstruct-fold-state
+                             (alist-cons
+                              (prefix+counter prefix c)
+                              (quoting exp)
+                              (tagged-with-maybes))
+                             '()
+                             (1+ c))))
+                        (match exp
+                          (('quote inner-exp)
+                           (inner-loop inner-exp
+                                       (lambda (res)
+                                         (list 'quote res))))
+                          (('quasiquote inner-exp)
+                           (inner-loop inner-exp
+                                       (lambda (res)
+                                         (list 'quasiquote res))))
+                          (('unquote inner-exp)
+                           (inner-loop inner-exp
+                                       (lambda (res)
+                                         (list 'unquote res))))
+                          (('unquote-splicing inner-exp)
+                           (inner-loop inner-exp
+                                       (lambda (res)
+                                         (list 'unquote-splicing res))))
+                          (((? gettext-keyword?) . rest)
+                           (add-tagged exp))
+                          ((or (? symbol?) (? keyword?) (? list?))
+                           (add-maybe exp))
+                          ((? string?)
+                           ;; elements in maybe-tagged appear between strings
+                           (let ((c (counter-with-maybes)))
+                             (make-deconstruct-fold-state
+                              (tagged-with-maybes)
+                              '()
+                              c))))))))
+                 (make-deconstruct-fold-state '() '() #f)
+                 exp)))
+           (match tagged-state
+             (($ <deconstruct-fold-state> tagged maybe-tagged counter)
+              (assemble-parenthesized-expression
+               prefix
+               (match maybe-tagged
+                 (() tagged)
+                 (else ;associate maybe-tagged with pre or post
+                  (alist-cons
+                   (cond ;if there already is a pre, use post
+                    ((assoc-ref tagged (string-append prefix "pre"))
+                     (string-append prefix "post"))
+                    (else (string-append prefix "pre")))
+                   maybe-tagged
+                   tagged))))))))))
+      ((? string?) msgstr)
+      (else (error "Single symbol marked for translation." exp)))))
+
+;; NOTE: The sgettext macros have no hygiene because they use
+;; datum->syntax and do not preserve the semantics of anything looking
+;; like an sgettext macro.  This is an exceptional use case; do not
+;; try this at home.
+
+(define (sgettext x)
+  "After choosing an identifier for marking s-expressions for
+translation, make it usable by defining a macro with it calling
+sgettext.  If for example the chosen identifier is G_,
+use (define-syntax G_ sgettext)."
+  (syntax-case x ()
+    ((id exp)
+     (let* ((msgid (sexp->msgid (syntax->datum #'exp)))
+            (new-exp (deconstruct (syntax->datum #'exp)
+                                  (gettext msgid))))
+       (datum->syntax #'id new-exp)))))
+
+;; gettext’s share/gettext/gettext.h tells us we can prepend a msgctxt
+;; and #\eot before a msgid in a gettext call.
+
+(define (spgettext x)
+  "After choosing an identifier for behavior similar to pgettext:1c,2,
+make it usable like (define-syntax C_ spgettext)."
+  (syntax-case x ()
+    ((id msgctxt exp)
+     (let* ((gettext-context-glue #\eot) ;as defined in gettext.h
+            (lookup (string-append (syntax->datum #'msgctxt)
+                                   (string gettext-context-glue)
+                                   (sexp->msgid (syntax->datum #'exp))))
+            (msgstr (car (reverse (string-split (gettext lookup)
+                                                gettext-context-glue))))
+            (new-exp (deconstruct (syntax->datum #'exp)
+                                  msgstr)))
+       (datum->syntax #'id new-exp)))))
+
+(define %plural-numbers
+  ;; Hard-coded list of input numbers such that for each language’s
+  ;; plural formula, for each possible output grammatical number,
+  ;; there is an n among %plural-numbers that yields this output (for
+  ;; any language documented when running “info "(gettext) Plural
+  ;; forms"”), except 1 is omitted from this list because it is a
+  ;; special case for sngettext.  That is, calling ngettext with each
+  ;; number from %plural-numbers and with 1 in any locale is
+  ;; guaranteed to return each plural form at least once.  It would be
+  ;; more resilient towards new languages if instead of hard-coding we
+  ;; computed this from the Plural-Forms in the MO file header entry,
+  ;; but that is not worth the incurred code complexity.
+  '(0 2 3 11 100))
+
+(define (sngettext x)
+  "After choosing an identifier for behavior similar to ngettext:1,2,
+make it usable like (define-syntax N_ sngettext).  sngettext takes
+into account that not all languages have only singular and plural
+forms."
+  (syntax-case x ()
+    ((id exp1 exp2 n)
+     (let* ((msgid1 (sexp->msgid (syntax->datum #'exp1)))
+            (msgid2 (sexp->msgid (syntax->datum #'exp2)))
+            (msgstr1 (ngettext msgid1 msgid2 1))
+            (result (acons ;return an association list msgstr->deconstructed
+                     ;; msgstr for n=1:
+                     msgstr1
+                     `(,'unquote ,(deconstruct (syntax->datum #'exp1)
+                                               msgstr1))
+                     ;; other msgstr for n of each plural form:
+                     (map
+                      (lambda (n)
+                        (let ((msgstr (ngettext msgid1 msgid2 n)))
+                          (cons msgstr `(,'unquote
+                                         ,(deconstruct (syntax->datum #'exp2)
+                                                       msgstr)))))
+                      %plural-numbers))))
+       (datum->syntax
+        #'id
+        `(,assoc-ref (,'quasiquote ,result)
+                     (,ngettext ,msgid1 ,msgid2 ,(syntax->datum #'n))))))))
+
+(define (snpgettext x)
+  "After choosing an identifier for behavior similar to npgettext:1c,2,3,
+make it usable like (define-syntax NC_ snpgettext)."
+  (syntax-case x ()
+    ((id msgctxt exp1 exp2 n)
+     (let* ((gettext-context-glue #\eot) ;as defined in gettext.h
+            (msgid1 (string-append (syntax->datum #'msgctxt)
+                                   (string gettext-context-glue)
+                                   (sexp->msgid (syntax->datum #'exp1))))
+            ;; gettext.h implementation shows: msgctxt is only part of msgid1.
+            (msgid2 (sexp->msgid (syntax->datum #'exp2)))
+            (msgstr1 (car
+                      (reverse
+                       (string-split
+                        (ngettext msgid1 msgid2 1)
+                        gettext-context-glue))))
+            (result (acons ;return an association list msgstr->deconstructed
+                     ;; msgstr for n=1:
+                     msgstr1
+                     `(,'unquote ,(deconstruct (syntax->datum #'exp1)
+                                               msgstr1))
+                     ;; other msgstr for n of each plural form:
+                     (map
+                      (lambda (n)
+                        (let ((msgstr (car
+                                       (reverse
+                                        (string-split
+                                         (ngettext msgid1 msgid2 n)
+                                         gettext-context-glue)))))
+                          (cons msgstr `(,'unquote
+                                         ,(deconstruct (syntax->datum #'exp2)
+                                                       msgstr)))))
+                      %plural-numbers))))
+       (datum->syntax
+        #'id
+        `(,assoc-ref (,'quasiquote ,result)
+                     (,car
+                      (,reverse
+                       (,string-split
+                        (,ngettext ,msgid1 ,msgid2 ,(syntax->datum #'n))
+                        ,gettext-context-glue)))))))))
+
+(define %linguas
+  (with-input-from-file "po/LINGUAS"
+    (lambda _
+      (let loop ((line (read-line)))
+        (if (eof-object? line)
+            '()
+            ;; else read linguas before comment
+            (let ((before-comment (car (string-split line #\#))))
+              (append
+               (map match:substring (list-matches "[^ \t]+" before-comment))
+               (loop (read-line)))))))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]