;; $Id: stream-wiki.scm 5587 2006-05-19 06:24:51Z azul $ ;; ;; This file is in the public domain and may be reproduced or copied without ;; permission from its author. Citation of the source is appreciated. ;; ;; Alejandro Forero Cuervo
;; ;; This file implements an egg for Chicken Scheme for parsing files in wiki ;; format and rendering them to HTML. ;; ;; Documentation is available in HTML format. ;; ;; Newer versions might be available at: ;; ;; http://anonymous:@afc.no-ip.info:8000/svn/home/src/chicken-eggs/stream-wiki (declare (export make-html-header wiki->html wiki->text wiki->toc wiki->sections load-linktypes wiki->latex latex-page wiki->texi texi-page wiki-links wiki-tags load-extensions-from-file extension-update extension-files-actions-links extension-toc-header extension-data ; Exports for extensions: driver-horizontal-line driver-math driver-literal driver-literal-line driver-center driver-big driver-em driver-strong driver-small driver-header driver-blockquote driver-line-break driver-anchor *extensions*) (usual-integrations) (run-time-macros)) (include "chicken-more-macros") (use srfi-1 uri ) (use srfi-40 stream-ext html-stream stream-parser sandbox posix format-modular stream-sections) ; An output driver is simply a table of functions. ; For each field FIELD in our driver record, we also want to define a function ; wikidata-FIELD such that ; ; (wikidata-FIELD x) ; ; evaluates to whatever ; ; (driver-FIELD (wikidata-driver x)) ; ; evaluates to. This is just to save some typing, since actually the driver is ; usuall stored inside a wikidata record everywhere. ; ; That's all there is to the following macro. (define-macro (define-driver-record . funcs) `(begin (define-record driver ,@funcs) ,@(map (lambda (sym) `(define (,(string->symbol (format #f "wikidata-~A" sym)) info . args) (apply (,(string->symbol (format #f "driver-~A" sym)) (wikidata-driver info)) args))) funcs))) ; Now lets define all the functions that make up an output driver. ; ; linktypes is a hashing table: the first element is a symbol for the name of ; the type of link and the second is what we should use, with escape character ; %t (replaced with the link's target). (define-driver-record output-format horizontal-line header blockquote center small big literal literal-line paragraph strong em link image math ordered-list unordered-list list-item definition-list definition-item toc special-char tags comment line-break anchor) (let ((get driver-anchor)) (set! driver-anchor (lambda (driver) (lambda (anchor . rest) (let-optionals rest ((text stream-null)) ((get driver) (text->html-id anchor) text)))))) ; An extension tag for the wiki syntax. ; name is a symbol with the name for the tag (all in lower). (define-record extension name table) ; Output is an output driver. See html-driver for an example. ; linktypes-current is a list with the currently applied linktypes. When ; expanding a linktype (parsing its output recursively), we append its type ; to linktypes-current. That way we can detect infinite loops and react ; gracefully. ; tags is a hash table of record objects of type tags. (define-record wikidata driver open include name linktypes extensions extension-args linktypes-current previous-sections) ; Macro used to generate the functions in the HTML driver. (define-macro (tags-func . tags) (let ((arg (gensym "arg"))) `(lambda (,arg) (html-stream ,(fold-right list arg tags))))) ;;; Helper functions ; ; Functions useful for all the drivers. ; Is URL external? Currently we simply check to see if it is an absolute URL. ; We could be smarter than this if we had more information about the canonical ; URL for the repository and of the current file but this is not that easy (for ; instance, "/foo", "http://bar/foo" and "../foo" might be external or not). ; ; This function is replicated in svnwiki's links.scm. (define (url-external? url) (let ((rest (stream-drop-while (disjoin char-alphabetic? char-numeric?) url))) (and (not (stream-null? rest)) (not (stream-null? (stream-cdr rest))) (char=? (stream-car rest) #\:)))) (define (name-to-base str) (stream-downcase (stream-map (lambda (c) (if (char-whitespace? c) #\- c)) (stream-filter (disjoin char-alphabetic? char-numeric? char-whitespace?) str)))) (define (fix-suffix suffix) (receive (querystring anchor) (stream-break (cut char=? <> #\#) suffix) (if (stream-null? anchor) suffix (stream-append querystring (stream-cons #\# (text->html-id (stream-cdr anchor))))))) ; Function to handle links intelligently. ; ; Analyzes the target and returns two things: ; ; - The type of link ("external", "internal" or "unexistant") ; ; - The URL that should be used as the target. This is based on the input ; target but it might be modified. For instance, its case might be changed ; to match a file (so users won't have to worry about the case of files when ; creating links) or a relative URL might be turned into an absolute URL. (define (parse-link dst check-exists? url-adjust) (assert (and 'parse-link (stream? dst))) (if (url-external? dst) (values 'external dst) (receive (dst-file dst-suffix) (stream-break (disjoin (cut char=? <> #\?) (cut char=? <> #\#)) dst) (let ((dst-real (stream-find check-exists? (parse-link-file dst-file)))) (if dst-real (values "internal" (url-adjust (stream-append dst-real (fix-suffix dst-suffix)))) (values "unexistant" (name-to-base dst-file))))))) (define (parse-link-file file) (stream-map (lambda (f) (f file)) (stream identity stream-downcase name-to-base))) ;;; OpenOffice.org Driver ;; ; Wildly incomplete. (define (ooo-driver header-start) (make-driver 'open-document-format (constantly (html-stream (hr))) (lambda (name depth id) (stream-append (html-stream ((a name (text->html-id name)))) (string->stream (format #f ",
,,, or tags. (define (text-transform info strong em literal start newline-rep) (assert (wikidata? info)) (lambda (str fail parsed) (parse-token str fail parsed ; Cases for EOL. If it is followed by a character, we want ; to replace it with newline-rep (which is normally a space ; but is a #\newline when we're inside a tag): ((#\newline (assert newline-rep) (end)) stream-null) ((#\newline (assert (and newline-rep start)) start) (stream newline-rep)) ((#\newline (assert (and newline-rep (not start)))) (stream newline-rep)) ; and : ((#\' #\' #\' (assert (not strong)) (bind text (*? (rule-apply (text-transform info #t em literal start newline-rep)))) #\' #\' #\') (wikidata-strong info text)) ((#\' #\' (assert (not em)) (bind text (*? (rule-apply (text-transform info strong #t literal start newline-rep)))) #\' #\') (wikidata-em info text)) ; Links: [ TYPE : ] DST [ | NAME ] (type and name are ; optional). ((#\[ #\[ (all char-whitespace?) (? (bind type (*? (not (or #\[ #\: #\| #\] #\newline)))) (all char-whitespace?) #\: (all char-whitespace?)) (bind dst (*? (not (or #\[ #\| #\] #\newline)))) (all char-whitespace?) (? #\| (all char-whitespace?) (bind name (*? (rule-apply (text-transform info strong em literal start newline-rep)))) (all char-whitespace?)) #\] #\]) (make-link info (if (stream-null? type) #f type) dst (if (stream-null? name) #f name))) ; Typewritten text: ((#\{ #\{ (bind text (*? char?)) #\} #\}) (wikidata-literal-line info (parse-all text (lambda () (error "bar")) (text-transform info strong em #t start newline-rep) stream-null))) ; Comments ((#\< #\! (all char-whitespace?) #\- (all char-whitespace?) #\- (all char-whitespace?) (bind text (*? char?)) (all char-whitespace?) #\- (all char-whitespace?) #\- (all char-whitespace?) #\>) (wikidata-comment info text)) ; Span tags: ((#\< (all char-whitespace?) (bind tag (all char-alphabetic?)) (assert (and-let* ((ext (hash-table-ref/default (wikidata-extensions info) (stream->symbol (stream-downcase tag)) #f))) (extension-code-span ext))) ; Parameters (* (+ char-whitespace?) (bind name (all char-alphabetic?)) (all char-whitespace?) (? #\= (all char-whitespace?) (or (#\" (bind value (all (not #\"))) #\") (#\' (bind value (all (not #\'))) #\') ((bind value (all (or char-alphabetic? char-numeric?)))))) (bind-accum (params '()) cons (cons (stream->symbol (stream-downcase name)) value))) (all char-whitespace?) #\> (bind text (*? char?)) #\< (all char-whitespace?) #\/ tag (all char-whitespace?) #\>) (run-span-extension (hash-table-ref (wikidata-extensions info) (stream->symbol (stream-downcase tag))) text params info)) ; Break tags: ((#\< (all char-whitespace?) (bind tag (all char-alphabetic?)) (assert (and-let* ((ext (hash-table-ref/default (wikidata-extensions info) (stream->symbol (stream-downcase tag)) #f))) (extension-code-break ext))) ; Parameters (* (+ char-whitespace?) (bind name (all (or char-alphabetic? char-numeric?))) (all char-whitespace?) (? #\= (all char-whitespace?) (or (#\" (bind value (all (not #\"))) #\") (#\' (bind value (all (not #\'))) #\') ((bind value (all (or char-alphabetic? char-numeric?)))))) (bind-accum (params '()) cons (cons (stream->symbol (stream-downcase name)) value))) (all char-whitespace?) (? #\/) (all char-whitespace?) #\>) (run-break-extension (hash-table-ref (wikidata-extensions info) (stream->symbol (stream-downcase tag))) params info)) ; Replace certain sequences with HTML entities, unless we are inside a ;, ortag. ((#\< #\< (assert (not literal))) (wikidata-special-char info 'laquo)) ((#\> #\> (assert (not literal))) (wikidata-special-char info 'raquo)) ((#\( (or #\R #\r) #\) (assert (not literal))) (wikidata-special-char info 'reg)) ((#\( (or #\C #\c) #\) (assert (not literal))) (wikidata-special-char info 'copyright)) ((#\1 #\/ #\2 (assert (not literal))) (string->stream "½")) ((#\1 #\/ #\4 (assert (not literal))) (string->stream "¼")) ((#\3 #\/ #\4 (assert (not literal))) (string->stream "¾")) ((#\< #\- #\> (assert (not literal))) (wikidata-special-char info 'double-arrow)) ((#\< #\- (assert (not literal))) (wikidata-special-char info 'left-arrow)) ((#\- #\> (assert (not literal))) (wikidata-special-char info 'right-arrow)) ((#\< #\= #\> (assert (not literal))) (wikidata-special-char info 'double-arrow-wide)) ((#\< #\= (assert (not literal))) (wikidata-special-char info 'left-arrow-wide)) ((#\= #\> (assert (not literal))) (wikidata-special-char info 'right-arrow-wide)) ((#\- #\- #\- (assert (not literal))) (wikidata-special-char info 'mdash)) ((#\- #\- (assert (not literal))) (wikidata-special-char info 'ndash)) ; Some entities that we don't want to include literally. If you add ; more, make sure you also include them in the list of non-standard ; characters (in function standard-char?). (((bind char (or #\{ #\} #\@ #\# #\$ #\< #\> #\& #\_ #\\ #\"))) (wikidata-special-char info (stream-car char))) ; An email address: (((bind initial (?? char?)) (bind email char-alphabetic? (all (or char-alphabetic? char-numeric? #\. #\- #\_ #\+)) #\@ (+ (or char-alphabetic? char-numeric?) (* (or char-alphabetic? char-numeric? #\-)) (or char-alphabetic? char-numeric?) #\.) (or char-alphabetic? char-numeric?) (* (or char-alphabetic? char-numeric? #\-)) (or char-alphabetic? char-numeric?))) (stream-append initial (make-link info (string->stream "mailto") email email))) ; A URL: (((bind initial (?? char?)) (bind type (all char-alphabetic?)) #\: (assert (assoc (stream->symbol (stream-downcase type)) *allowed-url-schemes*)) (bind dst #\/ #\/ (+ (or char-alphabetic? char-numeric?) (* (or char-alphabetic? char-numeric? #\-)) (or char-alphabetic? char-numeric?) #\.) (or char-alphabetic? char-numeric?) (* (or char-alphabetic? char-numeric? #\-)) (or char-alphabetic? char-numeric?) ; Port (? #\: (+ char-numeric?)) ; File or QUERY STRING or Anchor (? (or #\/ #\? #\#) (? (* (or char-alphabetic? char-numeric? #\~ #\/ #\. #\? #\& #\# #\% #\= #\- #\_)) ; The last character must not be a dot: (or char-alphabetic? char-numeric? #\~ #\/ #\? #\& #\# #\% #\= #\- #\_))))) (stream-append initial (make-link info type dst #f))) ; Normal text: (((bind str not-newline? (all standard-char?))) str)))) (define (not-newline? x) (not (char=? x #\newline))) (define *allowed-url-schemes* '((http) (https) (ftp))) (define (standard-char? x) (case x ((#\newline #\@ #\' #\[ #\] #\{ #\} #\< #\> #\& #\: #\| #\( #\space #\, #\. #\# #\$ #\- #\= #\_ #\\ #\") #f) (else #t))) (define (make-link info type dst name) (assert (wikidata? info)) ((let ((type-sym (and type (stream->symbol (stream-downcase type))))) (cond ((not type-sym) make-default-link) ((assoc type-sym *link-types*) => cadr) ((hash-table-ref/default (wikidata-linktypes info) type-sym #f) => run-linktype) (else make-default-link))) info type dst name)) (define (make-default-link info type dst name) (assert (wikidata? info)) (let ((real-dst (if type (stream-append type (stream-cons #\: dst)) dst))) (wikidata-link info real-dst (or name real-dst)))) (define (make-link-image info type dst name) (assert (wikidata? info)) (wikidata-image info dst (or name dst))) (define (register-link-tag info type dst name) (assert (wikidata? info)) (wikidata-tags info (or name dst))) (define (make-link-include info type dst name) (assert (wikidata? info)) ((wikidata-include info) (stream->string dst) stream-null)) (define (make-link-toc info type dst name) (assert (wikidata? info)) (wikidata-toc info info dst)) (define (make-link-mailto info type dst name) (wikidata-link info (stream-append type (stream-cons #\: dst)) (or name dst))) (define *link-types* `((include ,make-link-include) (toc ,make-link-toc) (image ,make-link-image) (tags ,register-link-tag) (mailto ,make-link-mailto))) (define (list-transform info current) (assert (wikidata? info)) (lambda (str fail parsed) (parse-token str fail parsed (((bind data current (all #\space) (bind list-item (or #\* #\#)) (all (all #\space) (or #\* #\#)) (? #\newline) (all (not (or #\* #\# #\newline)) (all (not #\newline)) (? #\newline)) (all #\newline) (all current (all #\space) list-item (all (all #\space) (or #\* #\#)) (? #\newline) (all (not (or #\* #\# #\newline)) (all (not #\newline)) (? #\newline)) (all #\newline)))) ((if (char=? (stream-car list-item) #\*) wikidata-unordered-list wikidata-ordered-list) info (parse-all data fail (list-transform info (stream-append current list-item))))) (((bind head (? current (all #\space) (? #\newline) (bind text (all (not (or #\* #\# #\newline)) (all (not #\newline)) (? #\newline))))) (all #\newline) (bind tail (all current (+all (all #\space) (or #\* #\#)) (all #\space) (? #\newline) (all (not (or #\* #\#)) (all (not #\newline)) (? #\newline)))) (assert (not (and (stream-null? head) (stream-null? tail))))) (wikidata-list-item info (stream-append (if (stream-null? head) stream-null (parse-all (stream-reverse (stream-drop-while char-whitespace? (stream-reverse text))) (lambda () (error "foo")) (text-transform info #f #f #f #f #\space))) (if (stream-null? tail) stream-null (parse-all tail fail (list-transform info current))))))))) (define (definition-list info) (assert (wikidata? info)) (lambda (str fail parsed) (parse-token str fail parsed ((#\; (all char-whitespace?) (bind term (*? (rule-apply (text-transform info #f #f #f #f #f)))) (? (all char-blank?) #\: (all char-blank?) (bind definition (*? (not #\: #\newline) (all standard-char?))) (all char-blank?)) (+ (all char-blank?) (or #\newline ((end))))) (wikidata-definition-item info term (parse-all definition (lambda () (error "foo")) (text-transform info #f #f #f #f #\space))))))) (define (global-token output open include name linktypes extensions extensions-args) (assert (driver? output)) (global-token-info (make-wikidata output open include name linktypes extensions extensions-args '() (make-hash-table)))) ; This is the global parser which gets called by wiki->html. It ; splits the input in chunks of lines (corresponding to ; paragraphs, lists, quotes, etc.) that must be processed ; together. (define (global-token-info info) (assert (wikidata? info)) (lambda (str fail parsed) (parse-token str fail parsed ; Simple line break: ((#\- #\- #\- #\- (all char-blank?)) (wikidata-horizontal-line info)) ; Get all headers. The type of header depends on the number ; of #\= signs: ((#\= (bind depth (+all #\=)) (all char-blank?) (bind name (*? (not #\newline))) (all char-blank?) (? #\= depth (all char-blank?)) (or #\newline ((end)))) (wikidata-header info (parse-all name (lambda () (error "noparse" (stream->string name))) (text-transform info #f #f #f #f #\space)) (min 5 (stream-length (stream-cdr depth))) (sections-accept-new-name! (wikidata-previous-sections info) name))) ; Skip empty lines: ((#\newline) stream-null) ; Process
or
lists calling the list-transform ; parser: (((bind lines (+ (+all (or #\# #\*) (all #\space)) (? #\newline) (all (not (or #\* #\# #\newline (#\= #\=))) (all (not #\newline)) (or #\newline ((end)))) (all #\newline)))) (parse-all lines (lambda () (error "list-transform failed to parse")) (list-transform info stream-null))) ; Process definition lists with the definition-list parser: (((bind lines (+ (rule-apply (definition-list info))))) (wikidata-definition-list info lines)) ; Get quoted text (lines starting with #\>) inside a pair of ;
tags: (((+all #\> (all (or #\space #\tab)) (bind text (all (not #\newline)) (or #\newline ((end)))) (bind-accum (result '()) cons text))) (wikidata-blockquote info (parse-all (stream-concatenate (list->stream (reverse result))) (lambda () (error "noparse" (stream->string (stream-reverse (stream-cdr text))))) (global-token-info info)))) ; Get literal text (lines starting with #\space) inside a ; pair oftags: (((bind text (+ #\space (*? char?) (or #\newline ((end)))))) (wikidata-literal info (parse-all (stream-cdr text) (lambda () (error "foo")) (text-transform info #f #f #t #\space #\newline)))) ; Comments ((#\< #\! (all char-whitespace?) #\- (all char-whitespace?) #\- (all char-whitespace?) (bind text (*? char?)) (all char-whitespace?) #\- (all char-whitespace?) #\- (all char-whitespace?) #\>) (wikidata-comment info text)) ; Detect span extensions: ((#\< (all char-whitespace?) (bind tag (all (or char-alphabetic? char-numeric?))) (assert (and-let* ((ext (hash-table-ref/default (wikidata-extensions info) (stream->symbol (stream-downcase tag)) #f))) (extension-code-span ext))) ; Parameters (* (+ char-whitespace?) (bind name (all (or char-alphabetic? char-numeric?))) (all char-whitespace?) (? #\= (all char-whitespace?) (or (#\" (bind value (all (not #\"))) #\") (#\' (bind value (all (not #\'))) #\') ((bind value (all (or char-alphabetic? char-numeric?)))))) (bind-accum (params '()) cons (cons (stream->symbol (stream-downcase name)) value))) (all char-whitespace?) #\> (bind text (*? char?)) #\< (all char-whitespace?) #\/ tag (all char-whitespace?) #\>) ; *tags-span-external* (run-span-extension (hash-table-ref (wikidata-extensions info) (stream->symbol (stream-downcase tag))) text params info)) ; Detect break tags: ((#\< (all char-whitespace?) (bind tag (all char-alphabetic?)) (assert (and-let* ((ext (hash-table-ref/default (wikidata-extensions info) (stream->symbol (stream-downcase tag)) #f))) (extension-code-break ext))) ; Parameters (* (+ char-whitespace?) (bind name (all (or char-alphabetic? char-numeric?))) (all char-whitespace?) (? #\= (all char-whitespace?) (or (#\" (bind value (all (not #\"))) #\") (#\' (bind value (all (not #\'))) #\') ((bind value (all (or char-alphabetic? char-numeric?)))))) (bind-accum (params '()) cons (cons (stream->symbol (stream-downcase name)) value))) (all char-whitespace?) (? #\/) #\>) (run-break-extension (hash-table-ref (wikidata-extensions info) (stream->symbol (stream-downcase tag))) params info)) ; Rule for normal paragraphs: (((bind text (+all (not (or #\space #\newline #\> #\* #\# (#\= #\=))) (all (not #\newline)) (or #\newline ((end)))))) (let ((text (parse-all text (lambda () (error "noparse" (stream->string (stream-reverse (stream-cdr text))))) (text-transform info #f #f #f #f #\space)))) (if (stream-null? text) stream-null (wikidata-paragraph info text))))))) (define (accum-with-driver driver) (lambda (str . rest) (let-optionals rest ((tail stream-null) (name "") (open (constantly stream-null)) (include (lambda (name tail) tail)) (linktypes (make-hash-table)) (extensions (make-hash-table))) (iterator->stream (lambda (collect stop) (stream-traverse (wiki-parse (driver collect) str tail name open include linktypes extensions))))))) (define wiki-links (accum-with-driver links-driver)) (define wiki-tags (accum-with-driver tags-driver)) ; TODO: This is already broken: we need to split PROCEDURE into two procs, one ; to generate the file and another one to include it (which can get called even ; if the file does not need to be generated). ; ; data-output-func receives three arguments: ; ; - a STREAM with some representation of the contents that will get rendered to ; the file. Or #f. ; ; - the MIME-TYPE (eg. "image/jpeg") ; ; - a PROCEDURE. ; ; If STREAM evaluates to true, it gets hashed and a filename (path) at some ; directory is created with the hash on it (only the filename, no actual file ; is created). If STREAM is #f, a random path (in the same directory) is used. ; If data-output-func wants to allow the contents to be rendered, it calls the ; procedure and passes two arguments: the actual filename that should be ; generated and the URL where that filename can be loaded. data-output-func ; returns whatever the procedure returned (or, if it doesn't want any file to ; be created, whatever stream it wants to display instead of the file). ; ; The caller should not assume that the file it specified was created, just ; assume that it may have been created. There might have been problems that ; prevented the procedure from actually creating it. ; ; Alternatively, data-output-func may be just #f, in which case wiki->html will ; now that it is never allowed to create any files. ; ; This interface hasa the following advantages: ; ; 1. The caller to wiki->html can register all the files created as a result of ; the parsing. ; ; 2. The caller to wiki->html gets full control as to where the files are ; actually created. It can even specify that no file should actually be ; created. ; ; 3. Extensions can render new files, they just need to be given access to ; data-output-func. ; ; 4. We reuse paths, since their names depend on the hash of the content being ; rendered. That way, if some content doesn't change, wiki->html will always ; use the same filename for it. While this isn't a *requirement*, it makes ; wiki->html generated directories/files play nice with mirror software such as ; the file-mirror egg (which would otherwise constantly have to reupload ; everything, since the paths would change). ; ; 5. The data-output-func is responsible for mapping the mime-type to the ; actual extension that should be used. Which is good: wiki->html shouldn't be ; the one to do that (because it would require to make assumptions that may not ; always hold). ; ; The mime type passed is one of the following: ; ; - image/png ; ; For functions/extensions calling data-output-func, here are some guidelines: ; ; - Make sure the mime type you use is documented above. ; ; - Note that the actual pathname created will depend on the actual value of ; the stream (very likely be a md5 or sha1 hash of it). You should make sure ; that the inputs from which your file depends are fully included in the ; stream. Otherwise you could end up overwriting the file with a different ; one. For that reason, your stream should always start with the name of the ; caller (such as "html-math") followed by a colon followed by the actual input ; used by that caller. If you can't fully specify your inputs here, just use ; #f (but this practice is discouraged!). ; ; This interface should hopefully accomodate well the needs of extensions' ; creators. I know that at some point it will break and we will need something ; else, but I hope quite a long time passes before that horrid day. (define (wiki->html str . rest) (stream-delay (let-optionals rest ((tail stream-null) (name "") (open (constantly stream-null)) (include (lambda (name tail) tail)) (linktypes (make-hash-table)) (make-header (make-html-header)) (data-output-func (constantly stream-null)) (check-exists? (constantly #t)) (extensions (make-hash-table)) (url-adjust identity) (extension-args #f) (no-follow? (constantly #f))) (wiki-parse (html-driver make-header data-output-func check-exists? url-adjust no-follow?) str tail name open include linktypes extensions extension-args)))) (define (wiki->text str . rest) (stream-delay (let-optionals rest ((human-readable #t) (tail stream-null) (name "") (open (constantly stream-null)) (include (lambda (name tail) tail)) (linktypes (make-hash-table)) (extensions (make-hash-table)) (extension-args #f)) (wiki-parse (text-driver human-readable) str tail name open include linktypes extensions extension-args)))) (define (wiki->latex str . rest) (stream-delay (let-optionals rest ((tail stream-null) (name "") (open (constantly stream-null)) (include (lambda (name tail) tail)) (linktypes (make-hash-table)) (extensions (make-hash-table)) (class *latex-default-document-class*) (links-base identity) (extension-args #f)) (wiki-parse (latex-driver class links-base) str tail name open include linktypes extensions extension-args)))) (define (wiki-parse output str . rest) (assert (driver? output)) (let-optionals rest ((tail stream-null) (name "") (open (constantly stream-null)) (include (lambda (name tail) tail)) (linktypes (make-hash-table)) (extensions (make-hash-table)) (extension-args #f)) (parse-all str (lambda () (error "Syntax error")) (global-token output open include name linktypes extensions extension-args) tail))) ;;; Linktypes ; ; Used for interwiki links and stuff. For example, have [[google:something]] ; translate into a Google search for that keyword. ; Load the linktypes specified in file into hash. The file has the format ; NAME:CONTENT, where CONTENT is the wiki code we will replace the link with. ; CONTENT must have %t for the target and %n for the link's name. ; ; E.g.: google:[[http://www.google.com/search?hl=es&q=%t|%n]] ; ; All the linktypes are added to hash hash, indexed by their type. The value ; is a stream where entries are either streams of characters (to be included ; literally) or characters for the escape sequences. (define (load-linktypes hash file) (with-input-from-file file (lambda () (stream-for-each (lambda (line) (receive (name type) (stream-break (cut char=? #\: <>) line) (unless (stream-null? type) (hash-table-set! hash (stream->symbol (stream-downcase name)) (linktype-parse (stream-cdr type)))))) (stream-remove (lambda (x) (or (stream-null? x) (char=? (stream-car x) #\#))) (stream-map (cut stream-drop-while char-whitespace? <>) (stream-lines (port->stream (current-input-port))))))))) (define (linktype-parse type) (receive (literal escape) (stream-break (cut char=? #\% <>) type) (if (stream-null? literal) (linktype-parse-escape escape) (stream-cons literal (linktype-parse-escape escape))))) (define (linktype-parse-escape escape) (if (or (stream-null? escape) (stream-null? (stream-cdr escape))) stream-null (stream-cons (stream-cadr escape) (linktype-parse (stream-cddr escape))))) (define (run-linktype lt) (lambda (info type dst name) (let* ((type-sym (stream->symbol (stream-downcase type))) (text (run-linktype-first lt dst name))) ; Avoid infinite recursion (if (member type-sym (wikidata-linktypes-current info)) text (parse-all text (lambda () (error "Internal error: Unable to parse" (stream->string text))) (text-transform (apply make-wikidata (map (lambda (f) (f info)) (list wikidata-driver wikidata-open wikidata-include wikidata-name wikidata-linktypes wikidata-extensions wikidata-extension-args wikidata-previous-sections (lambda (o) (cons type-sym (wikidata-linktypes-current o)))))) #f #f #f #f #\newline) stream-null))))) (define (run-linktype-first lt dst name) (let ((votes (delay (vote-linktype (or name dst))))) (let loop ((lt lt)) (stream-delay (if (stream-null? lt) stream-null (stream-append (if (stream? (stream-car lt)) (stream-car lt) (case (stream-car lt) ((#\c) (number->stream (stream-length (force votes)))) ((#\n) (or name dst)) ((#\o) (stream-map char-downcase (or name dst))) ((#\t) dst) ((#\u) (stream-map char-downcase dst)) ((#\v) (string->stream (format #f "~,2F" (votes-average (force votes) 0 0)))) (else (error "Invalid escape sequence" (stream-car lt))))) (loop (stream-cdr lt)))))))) (define (votes-average str total count) (cond ((not (stream-null? str)) (votes-average (stream-cdr str) (+ total (stream-car str)) (+ count 1))) ((zero? count) 0) (else (/ total count)))) (define (vote-linktype str) (stream-filter identity (stream-map stream->number (stream-split str char-whitespace?)))) ;; (define (simple-tag-span-get name get-parser) (list name (let ((open (stream-append (stream #\<) (symbol->stream name) (stream #\>))) (close (stream-append (stream #\< #\/) (symbol->stream name) (stream #\>)))) (lambda (text params . args) (stream-append open (parse-all text (lambda () (error "bar")) (apply get-parser args) stream-null) close))))) (define (simple-tag-span name) (simple-tag-span-get name text-transform)) (define (shell-escape path) (list->string (cons #\space (fold-right (lambda (c rest) (if (or (char-alphabetic? c) (char-numeric? c) (member c '(#\/ #\-))) (cons c rest) (cons* #\\ c rest))) '() (stream->list path))))) (define *tags-span* `( ,@(map simple-tag-span '(u i span small big b sup sub)))) ;;; Sections for wiki pages (define (recognize-start-wiki document) (receive (result rest new-fail parsed) (parse-token document (constantly #f #f #f #f) stream-null ((#\= #\= (bind depth (all #\=)) (all char-blank?) (bind name (*? (not #\newline))) (all char-blank?) (? #\= #\= depth (all char-blank?)) (or #\newline ((end)))) (make-section (stream-length depth) name))) (if result (values result (stream-reverse parsed) rest) (take-one-line document)))) (define (wiki->sections document) (document->sections recognize-start-wiki document)) (define wiki->toc (compose sections->toc wiki->sections)) ;;; Extensions (define (load-extensions-from-file extensions file) (set! *extensions* '()) ; just in case. (load file) (when (null? *extensions*) (warning "Extension does not define anything!" file)) (for-each (lambda (alist) (and (pair? alist) (not (null? alist)) (hash-table-set! extensions (car alist) (alist->extension alist)))) *extensions*)) ; TODO: This is bad, we shouldn't do this. Ugh. But we need some way ; to communicate with the code in the extensions. (define *extensions* '()) (define (alist->extension alist) (make-extension (car alist) (cdr alist))) (define (extension-data ext data) (let ((value (assoc data (extension-table ext)))) (and value (pair? (cdr value)) (cadr value)))) (define extension-code-span (cut extension-data <> 'code-span)) (define extension-code-break (cut extension-data <> 'code-break)) (define extension-update (cut extension-data <> 'update)) (define extension-files-actions-links (cut extension-data <> 'files-actions-links)) (define extension-toc-header (cut extension-data <> 'toc-header)) (define (run-span-extension extension text params info) ((extension-code-span extension) ; TODO: This really ought to be an environment. (lambda (op) (case op ((text) text) ((params) params) ((parse) (lambda (str) (parse-all str (lambda () (error "Syntax error")) (global-token-info info) stream-null))) ; Function to parse some text as a paragraph. Receives the text and an ; optional alist with properties corresponding to parameters for ; text-transform. ((parse-paragraph) (lambda (text . rest) (let-optionals rest ((properties '())) (parse-all text (lambda () (error "bar")) (apply text-transform info (map (lambda (data) (cadr (or (assoc (car data) properties) data))) '((strong #f) (em #f) (literal #f) (start #f) (newline #\newline)))) stream-null)))) ; This used to be deprecated but it really is needed by some ; format-dependant extensions. ((output-format) (driver-output-format (wikidata-driver info))) ((driver) (wikidata-driver info)) (else ((wikidata-extension-args info) op)))))) (define (run-break-extension extension params info) ((extension-code-break extension) (lambda (op) (case op ((params) params) ((parse) (lambda (str) (parse-all str (lambda () (error "Syntax error")) (global-token-info info) stream-null))) ; Function to parse some text as a paragraph. Receives the text and an ; optional alist with properties corresponding to parameters for ; text-transform. ((parse-paragraph) (lambda (text . rest) (let-optionals rest ((properties '())) (parse-all text (lambda () (error "bar")) (apply text-transform info (map (lambda (data) (cadr (or (assoc (car data) properties) data))) '((strong #f) (em #f) (literal #f) (start #f) (newline #\newline)))) stream-null)))) ; Deprecated. The reason to deprecate it is that extensions shouldn't ; have to deal with having to recognize multiple formats ever. They ; should really on the drivers for format-specific things. ; ; Why am I committing it if it is deprecated? Because drivers still ; don't know about,
, and . They ought to. Once ; they do, the core-extension tags.scm won't need output-format so this ; whole thing will go away. ; ; Including this terribly nice comment, damn. ((output-format) (driver-output-format (wikidata-driver info))) ((driver) (wikidata-driver info)) (else (if (wikidata-extension-args info) ((wikidata-extension-args info) op) (warning "Requested environment object but environment is not defined~%" op))))))) ;;; TEXI driver (define (wiki->texi str . rest) (stream-delay (let-optionals rest ((tail stream-null) (name "") (open (constantly stream-null)) (include (lambda (name tail) tail)) (linktypes (make-hash-table)) (extensions (make-hash-table)) (extension-args #f)) (let-values (((texi-nodes lookup-table) (texi-menu str))) (stream-append (->stream-char (format #f "@menu\n")) texi-nodes (->stream-char (format #f "@end menu\n")) (wiki-parse (texi-driver lookup-table) str tail name open include linktypes extensions extension-args)))))) (define (texi-wrap start end) (let ((real-start (string->stream start)) (real-end (string->stream end))) (lambda (arg) (stream-append real-start arg real-end)))) (define (texi-page author title copyright first content . rest) (let-optionals rest ((subtitle #f) (month-year #f) (version #f)) (stream-append (->stream-char (string-concatenate (list (format #f "\\input texinfo @c -*- texinfo -*-\n") (format #f "@settitle ~A\n" title) (format #f "@setchapternewpage on\n") (if month-year (format #f "@set month-year ~A\n" month-year) "") (if version (format #f "@set version ~A\n" version) "") (format #f "@copying\n") (format #f "~A\n" copyright) (format #f "@end copying\n") (format #f "@titlepage\n") (format #f "@sp 10\n") (format #f "@title{~A}\n" title) (if subtitle (format #f "@subtitle{~A}\n" subtitle) "") (if version (format #f "@subtitle{Edition @value{version}}\n") "") (format #f "@author{~A}\n" author) (format #f "@comment The following two commands start the copyright page.\n") (format #f "@page\n") (format #f "@vskip 0pt plus 1fill\n") (format #f "~A\n" copyright) (format #f "@end titlepage\n")))) (->stream-char (string-concatenate (list (format #f "@node Top, ~A, (dir), (dir)\n" first) (format #f "@top ~A\n" title) (format #f "This is the top node.\n")))) content (->stream-char (string-concatenate (list (format #f "\n") (format #f "@shortcontents\n") (format #f "@contents\n") (format #f "@bye\n"))))))) (define (texi-find-node lookup-table n) (hash-table-ref lookup-table (string->symbol (escape-texi-node-name n)))) (define (texi-driver lookup-table) (make-driver 'texi ; horizontal line (constantly stream-null) ; make header (lambda (name depth id) (stream-append ;; print the @node and @menu. (with-output-to-stream (lambda () (print (texi-find-node lookup-table (stream->string name))))) (stream #\newline #\@) (string->stream (case depth ((0) "chapter") ((1) "section") ((2) "subsection") (else "subsubsection"))) (stream #\space) name (stream #\newline #\newline))) ;; blockquote (texi-wrap "@quotation\n" "address@hidden quotation\n") ; center (texi-wrap "@center " "") ; small (lambda (x) (error "Small not implemented yet in Texinfo mode.")) ; big (lambda (x) (error "Big not implemented yet in Texinfo mode.")) ; verbatim (texi-wrap "@verbatim\n" "address@hidden verbatim\n") ; code (texi-wrap "@code{" "}") ; paragraph (lambda (x) (stream-append (string->stream "") x (string->stream "\n\n"))) ; strong (texi-wrap "@b{" "}") ; emphasis (texi-wrap "@emph{" "}") ; link (lambda (dst name) (let* ((link (stream->string (texi-link-target dst))) (url (uri link #f))) (stream-append (cond ((and url (uri-scheme url)) (string->stream (conc "@uref{" (uri->string url)))) (else (string->stream (conc "@ref{" link)))) (string->stream ", ") name (stream #\})))) ; image (lambda (dst name) (warning "Image not implemented in Texinfo mode.~%") (receive (type alt) (stream-break (cut char=? <> #\|) name) (stream->string (format #f "[[IMAGE:~A]]" (stream->string (or alt dst)))))) ; math (lambda (text) (string->stream (format #f "$$~A$$" (stream->string text)))) ; ordered list (texi-wrap "@enumerate\n" "address@hidden enumerate\n") ; bullets list (texi-wrap "@itemize\n" "address@hidden itemize\n") ; list item (texi-wrap "@item " "\n\n") ; definition list (texi-wrap "@table @b\n" "address@hidden table\n") ; definition (lambda (term definition) (string->stream (format #f "@item ~A~%~%~A~%" (stream->string term) (stream->string definition)))) ;; toc (constantly stream-null) ; special-character ;; fixme: just borrowed this from latex-driver. (lambda (x) (->stream-char (case x ((#\@ #\{ #\}) (stream #\@ x)) ((copyright) "(C)") ((reg) "(R)") ((left-arrow) "<-") ((right-arrow) "->") ((double-arrow) "<->") ((double-arrow-wide) "<=>") ((left-arrow-wide) "<=") ((right-arrow-wide) "=>") ((mdash) "---") ((ndash) "--") ((laquo) #\xab) ((raquo) #\xbb) (else (stream x))))) ; tags (constantly stream-null) ; comments (constantly stream-null) ; line-break (constantly (stream #\\ #\\ )) ; anchor (lambda (anchor text) (string->stream (format #f "@anchor{~A}" (stream->string text)))) )) (define (texi-link-target dst) (cond ((stream-null? dst) stream-null) ((char=? (stream-car dst) #\#) (stream-cons* #\\ #\# (texi-link-target (stream-cdr dst)))) (else (stream-cons (stream-car dst) (texi-link-target (stream-cdr dst)))))) ;;; Texi: menus, navigation, node relationships ;; The current strategy is to do a pre-pass on the wiki-stream, and ;; build a lookup table of all the sections (which will become nodes ;; in the texi output). This table is passed as an argument when the ;; texi-driver is being created, and used in the TOC and Section ;; handlers to generate the appropriate navigational cues. ;; This isn't very streamy code. :-) I'm sure we can make it more ;; consistent with the stream-wiki style once it's working correctly. ;; Values in the lookup table are texi-node records. (Keys are ;; node-names.) (define-record texi-node name next prev up submenu depth) (define (print-node node out #!optional (prologue #f)) ;; print the @node line for the node, an optional prologue, and its ;; menu if any. (fprintf out "@node ~A, ~A, ~A, ~A~%" (texi-node-name node) (or (texi-node-next node) "(dir)") (or (texi-node-prev node) "(dir)") (or (texi-node-up node) "(dir)")) (when prologue (print prologue)) (let ((sub (texi-node-submenu node))) (unless (null? sub) (fprintf out "address@hidden") (for-each (lambda (sub) (fprintf out "* ~A::~%" sub)) (reverse sub)) (fprintf out "address@hidden menu~%")))) (define-record-printer (texi-node node out) (print-node node out #f)) (define (escape-texi-node-name s) ;; commas are not allowed in node names. What else? (string-translate* s '(("," . ";")))) (define texi-menu (let ((lookup (make-hash-table))) (compose (lambda (nodes) (values nodes lookup)) (accum-with-driver (texi-node-driver lookup))))) (define (texi-node-driver lookup) (lambda (register) (let ((last-at-depth (make-vector 10 #f)) (last-link #f)) (make-driver 'texi-node (constantly stream-null) ;; make header (lambda (name depth id) (let* ((sym (string->symbol (escape-texi-node-name (stream->string name)))) (up-node (and (positive? depth) (vector-ref last-at-depth (- depth 1)))) (this-node (make-texi-node sym #f #f (and up-node (texi-node-name up-node)) (list) depth))) ;; if a node by that name already exists, there are two possibilities: ;; there are two nodes of the same name in the wiki document (which is an error) ;; or a dummy node of that name was created by a "Next" link (see below) (let ((l (hash-table-ref/default lookup sym #f))) (if (and l (texi-node-name l)) ;; ;; Texinfo does not support two nodes with the same name ;; ;; This would break svnwiki's handling of sections anyway; ;; editing one may result in overwriting the other one). ;; (error 'texi-node-driver "two nodes cannot have the same name: " (stream->string name))) ;; use the next/prev link in the dummy node, if it was created (and l (cond ((texi-node-prev l) (texi-node-prev-set! this-node (texi-node-prev l))) ((texi-node-next l) (texi-node-next-set! this-node (texi-node-next l))))) ;; discard the dummy node and insert the real node in the table (hash-table-set! lookup sym this-node) (if (= 0 depth) (stream-for-each register (string->stream (format #f "* ~A::\n" (texi-node-name this-node))))) (if up-node (texi-node-submenu-set! up-node (cons sym (texi-node-submenu up-node)))) (let ((last-node (vector-ref last-at-depth depth))) (if last-node (begin (texi-node-next-set! last-node (texi-node-name this-node)) (texi-node-prev-set! this-node (texi-node-name last-node))))) (vector-set! last-at-depth depth this-node) stream-null))) (constantly stream-null) (constantly stream-null) (constantly stream-null) (constantly stream-null) (constantly stream-null) (constantly stream-null) ;; The text "Next: " or "Previous: ", followed by a link serves to ;; override the default prev/next nodes, respectively (lambda (x) (cond ((and (<= 6 (stream-length x)) (string=? "Next: " (stream->string (stream-take x 6)))) (begin (let ((up-node (vector-ref last-at-depth 0))) (if (and up-node last-link) (let ((texi-next last-link)) (texi-node-next-set! up-node texi-next) (let ((texi-next-node (hash-table-ref/default lookup texi-next #f))) (if texi-next-node (texi-node-prev-set! texi-next-node (texi-node-name up-node)) (let ((dummy-node (make-texi-node #f #f (texi-node-name up-node) #f #f #f))) (hash-table-set! lookup texi-next dummy-node)))))) stream-null))) ((and (<= 10 (stream-length x)) (string=? "Previous: " (stream->string (stream-take x 10)))) (begin (let ((up-node (vector-ref last-at-depth 0))) (if (and up-node last-link) (let ((texi-prev last-link)) (texi-node-prev-set! up-node texi-prev) (let ((texi-prev-node (hash-table-ref/default lookup texi-prev #f))) (if texi-prev-node (texi-node-next-set! texi-prev-node (texi-node-name up-node)) (let ((dummy-node (make-texi-node #f (texi-node-name up-node) #f #f #f #f))) (hash-table-set! lookup texi-prev dummy-node)))))) stream-null))) (else (begin (set! last-link #f) stream-null)))) (constantly stream-null) (constantly stream-null) ;; Local links named "Next" or "Previous" override the default ;; prev/next nodes in the Texinfo navigation menu (lambda (dst name) (let* ((link (stream->string (texi-link-target dst))) (url (uri link #f))) (if (not (and url (uri-scheme url))) (set! last-link (string->symbol (escape-texi-node-name (stream->string dst))))) (cond ((and url (uri-scheme url)) (values)) ((string=? (stream->string name) "Next") (let ((up-node (vector-ref last-at-depth 0))) (if up-node (let ((texi-next (string->symbol (escape-texi-node-name (stream->string name))))) (texi-node-next-set! up-node texi-next) (let ((texi-next-node (hash-table-ref/default lookup texi-next #f))) (if texi-next-node (texi-node-prev-set! texi-next-node (texi-node-name up-node)) (let ((dummy-node (make-texi-node #f #f (texi-node-name up-node) #f #f #f))) (hash-table-set! lookup texi-next dummy-node)))))))) ((string=? (stream->string name) "Previous") (let ((up-node (vector-ref last-at-depth 0))) (if up-node (let ((texi-prev (string->symbol (escape-texi-node-name (stream->string name))))) (texi-node-prev-set! up-node texi-prev) (let ((texi-prev-node (hash-table-ref/default lookup texi-prev #f))) (if texi-prev-node (texi-node-next-set! texi-prev-node (texi-node-name up-node)) (let ((dummy-node (make-texi-node #f (texi-node-name up-node) #f #f #f #f))) (hash-table-set! lookup texi-prev dummy-node))))))))) stream-null)) (constantly stream-null) (constantly stream-null) (constantly stream-null) (constantly stream-null) (constantly stream-null) (constantly stream-null) (constantly stream-null) ;; toc (constantly stream-null) ;; special-char (lambda (x) (->stream-char (case x ((#\@ #\{ #\}) (stream #\@ x)) ((copyright) "(C)") ((reg) "(R)") ((left-arrow) "<-") ((right-arrow) "->") ((double-arrow) "<->") ((double-arrow-wide) "<=>") ((left-arrow-wide) "<=") ((right-arrow-wide) "=>") ((mdash) "---") ((ndash) "--") ((laquo) #\xab) ((raquo) #\xbb) (else (stream x))))) ;; tags (constantly stream-null) (constantly stream-null) (constantly stream-null) ;; anchor (constantly stream-null)))))