>From bd59d72dca9da87ac11d4a0e77eefd1adb5c9147 Mon Sep 17 00:00:00 2001
From: Peter Bex <address@hidden>
Date: Sun, 16 Sep 2012 20:04:14 +0200
Subject: [PATCH 2/4] Irregex: Implement Laurikari's algorithm for
 tNFA->(t)DFA compilation.  (upstream changesets
 6ec98fa4f5a9 and 71c42f9974ce)

---
 irregex-core.scm       |  938 ++++++++++++++++++++++++------------------------
 irregex.scm            |   11 +-
 tests/test-irregex.scm |    4 +-
 types.db               |   34 +-
 4 files changed, 500 insertions(+), 487 deletions(-)

diff --git a/irregex-core.scm b/irregex-core.scm
index 017e090..e0fc210 100644
--- a/irregex-core.scm
+++ b/irregex-core.scm
@@ -101,24 +101,21 @@
      (define (irregex-dfa/search x)
        (internal "##sys#check-structure" x 'regexp 'irregex-dfa/search)
        (internal "##sys#slot" x 2))
-     (define (irregex-dfa/extract x)
-       (internal "##sys#check-structure" x 'regexp 'irregex-dfa/extract)
-       (internal "##sys#slot" x 3))
      (define (irregex-nfa x)
        (internal "##sys#check-structure" x 'regexp 'irregex-nfa)
-       (internal "##sys#slot" x 4))
+       (internal "##sys#slot" x 3))
      (define (irregex-flags x)
        (internal "##sys#check-structure" x 'regexp 'irregex-flags)
-       (internal "##sys#slot" x 5))
+       (internal "##sys#slot" x 4))
      (define (irregex-num-submatches x)
        (internal "##sys#check-structure" x 'regexp 'irregex-num-submatches)
-       (internal "##sys#slot" x 6))
+       (internal "##sys#slot" x 5))
      (define (irregex-lengths x)
        (internal "##sys#check-structure" x 'regexp 'irregex-lengths)
-       (internal "##sys#slot" x 7))
+       (internal "##sys#slot" x 6))
      (define (irregex-names x)
        (internal "##sys#check-structure" x 'regexp 'irregex-names)
-       (internal "##sys#slot" x 8))
+       (internal "##sys#slot" x 7))
      ;; make-irregex-match defined elsewhere
      (define (irregex-new-matches irx)
        (make-irregex-match (irregex-num-submatches irx) (irregex-names irx)))
@@ -174,22 +171,19 @@
   (else
    (begin
      (define irregex-tag '*irregex-tag*)
-     (define (make-irregex dfa dfa/search dfa/extract nfa flags
-			   submatches lengths names)
-       (vector irregex-tag dfa dfa/search dfa/extract nfa flags
-	       submatches lengths names))
+     (define (make-irregex dfa dfa/search nfa flags submatches lengths names)
+       (vector irregex-tag dfa dfa/search nfa flags submatches lengths names))
      (define (irregex? obj)
        (and (vector? obj)
-	    (= 9 (vector-length obj))
+	    (= 8 (vector-length obj))
 	    (eq? irregex-tag (vector-ref obj 0))))
      (define (irregex-dfa x) (vector-ref x 1))
      (define (irregex-dfa/search x) (vector-ref x 2))
-     (define (irregex-dfa/extract x) (vector-ref x 3))
-     (define (irregex-nfa x) (vector-ref x 4))
-     (define (irregex-flags x) (vector-ref x 5))
-     (define (irregex-num-submatches x) (vector-ref x 6))
-     (define (irregex-lengths x) (vector-ref x 7))
-     (define (irregex-names x) (vector-ref x 8))
+     (define (irregex-nfa x) (vector-ref x 3))
+     (define (irregex-flags x) (vector-ref x 4))
+     (define (irregex-num-submatches x) (vector-ref x 5))
+     (define (irregex-lengths x) (vector-ref x 6))
+     (define (irregex-names x) (vector-ref x 7))
      (define (irregex-new-matches irx)
        (make-irregex-match (irregex-num-submatches irx) (irregex-names irx)))
      (define (irregex-reset-matches! m)
@@ -255,6 +249,14 @@
 (define (irregex-match-end-index-set! m n end)
   (vector-set! m (+ 6 (* n 4)) end))
 
+;; Tags use indices that are aligned to start/end positions just like the
+;; match vectors.  ie, a tag 0 is a start tag, 1 is its corresponding end tag.
+;; They start at 0, which requires us to map them to submatch index 1.
+;; Sorry for the horrible name ;)
+(define (irregex-match-chunk&index-from-tag-set! m t chunk index)
+  (vector-set! m (+ 7 (* t 2)) chunk)
+  (vector-set! m (+ 8 (* t 2)) index))
+
 ;; Helper procedure to convert any type of index from a rest args list
 ;; to a numeric index.  Named submatches are converted to their corresponding
 ;; numeric index, and numeric submatches are checked for validity.
@@ -1610,8 +1612,6 @@
                           (nfa->dfa nfa (* dfa-limit (nfa-num-states nfa)))))
                     (else #f)))
          (submatches (sre-count-submatches sre-dfa))
-         (extractor
-          (and dfa dfa/search (sre-match-extractor sre-dfa submatches)))
          (names (sre-names sre-dfa 1 '()))
          (lens (sre-length-ranges sre-dfa names))
          (flags (flag-join
@@ -1619,10 +1619,10 @@
                  (and (sre-consumer? sre) ~consumer?))))
     (cond
      (dfa
-      (make-irregex dfa dfa/search extractor #f flags submatches lens names))
+      (make-irregex dfa dfa/search #f flags submatches lens names))
      (else
       (let ((f (sre->procedure sre pat-flags names)))
-        (make-irregex #f #f #f f flags submatches lens names))))))
+        (make-irregex #f #f f flags submatches lens names))))))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;; SRE Analysis
@@ -1968,11 +1968,6 @@
        ((dfa-match/longest (irregex-dfa irx) cnk src i #f #f matches 0)
         (irregex-match-start-chunk-set! matches 0 src)
         (irregex-match-start-index-set! matches 0 i)
-        ((irregex-dfa/extract irx)
-         cnk src i
-         (%irregex-match-end-chunk matches 0)
-         (%irregex-match-end-index matches 0)
-         matches)
         matches)
        (else
         #f)))
@@ -1989,11 +1984,6 @@
                ((dfa-match/longest dfa cnk src i #f #f matches 0)
                 (irregex-match-start-chunk-set! matches 0 src)
                 (irregex-match-start-index-set! matches 0 i)
-                ((irregex-dfa/extract irx)
-                 cnk src i
-                 (%irregex-match-end-chunk matches 0)
-                 (%irregex-match-end-index matches 0)
-                 matches)
                 matches)
                ((>= i end)
                 (let ((next (get-next src)))
@@ -2063,11 +2053,6 @@
          (irregex-match-start-index-set! matches
                                          0
                                          ((chunker-get-start cnk) src))
-         ((irregex-dfa/extract irx)
-          cnk src ((chunker-get-start cnk) src)
-          (%irregex-match-end-chunk matches 0)
-          (%irregex-match-end-index matches 0)
-          matches)
          matches)))
      (else
       (let* ((matcher (irregex-nfa irx))
@@ -2100,8 +2085,10 @@
 (define (dfa-init-state dfa)
   (vector-ref dfa 0))
 (define (dfa-next-state dfa node)
-  (vector-ref dfa (cdr node)))
-(define (dfa-final-state? dfa state)
+  (vector-ref dfa (cadr node)))
+(define (dfa-cell-commands dfa node)
+  (cddr node))
+(define (dfa-finalizer dfa state)
   (car state))
 
 ;; this searches for the first end index for which a match is possible
@@ -2109,15 +2096,17 @@
   (let ((get-str (chunker-get-str cnk))
         (get-start (chunker-get-start cnk))
         (get-end (chunker-get-end cnk))
-        (get-next (chunker-get-next cnk)))
-    (let lp1 ((src src) (start start) (state (dfa-init-state dfa)))
+        (get-next (chunker-get-next cnk))
+        ;; Skip the "set-up" state, we don't need to set tags.
+        (start-state (dfa-next-state dfa (cadr (dfa-init-state dfa)))))
+    (let lp1 ((src src) (start start) (state start-state))
       (and
        src
        (let ((str (get-str src))
              (end (get-end src)))
          (let lp2 ((i start) (state state))
            (cond
-            ((dfa-final-state? dfa state)
+            ((dfa-finalizer dfa state)
              (cond
               (index
                (irregex-match-end-chunk-set! matches index src)
@@ -2135,28 +2124,60 @@
              (let ((next (get-next src)))
                (and next (lp1 next (get-start next) state)))))))))))
 
+(define (finalize! finalizer memory matches)
+  (for-each
+   (lambda (tag&slot)
+     (let* ((tag (car tag&slot))
+            (slot (vector-ref memory (cdr tag&slot)))
+            (chunk&pos (vector-ref slot tag)))
+       (irregex-match-chunk&index-from-tag-set!
+        matches tag
+        (and chunk&pos (car chunk&pos))
+        (and chunk&pos (cdr chunk&pos)))))
+   finalizer))
+(define (make-initial-memory slots matches)
+  (let ((size (* (irregex-match-num-submatches matches) 2))
+        (memory (make-vector slots)))
+    (do ((i 0 (+ i 1)))
+        ((= i slots) memory)
+      (vector-set! memory i (make-vector size #f)))))
+
 ;; this finds the longest match starting at a given index
 (define (dfa-match/longest dfa cnk src start end-src end matches index)
-  (let ((get-str (chunker-get-str cnk))
-        (get-start (chunker-get-start cnk))
-        (get-end (chunker-get-end cnk))
-        (get-next (chunker-get-next cnk))
-        (start-is-final? (dfa-final-state? dfa (dfa-init-state dfa))))
+  (let* ((get-str (chunker-get-str cnk))
+         (get-start (chunker-get-start cnk))
+         (get-end (chunker-get-end cnk))
+         (get-next (chunker-get-next cnk))
+         (initial-state (dfa-init-state dfa))
+         (memory-size (car initial-state))
+         (submatches? (not (zero? memory-size)))
+         ;; A vector of vectors, each of size <number of start/end submatches>
+         (memory (make-initial-memory memory-size matches))
+         (init-cell (cadr initial-state))
+         (start-state (dfa-next-state dfa init-cell))
+         (start-finalizer (dfa-finalizer dfa start-state)))
     (cond
      (index
       (irregex-match-end-chunk-set! matches index #f)
       (irregex-match-end-index-set! matches index #f)))
+    (cond (submatches?
+           (for-each (lambda (s)
+                       (let ((slot (vector-ref memory (cdr s))))
+                         (vector-set! slot (car s) (cons src start))))
+                     (cdr (dfa-cell-commands dfa init-cell)))))
     (let lp1 ((src src)
               (start start)
-              (state (dfa-init-state dfa))
-              (res-src (and start-is-final? src))
-              (res-index (and start-is-final? start)))
+              (state start-state)
+              (res-src (and start-finalizer src))
+              (res-index (and start-finalizer start))
+              (finalizer start-finalizer))
       (let ((str (get-str src))
             (end (if (eq? src end-src) end (get-end src))))
         (let lp2 ((i start)
                   (state state)
                   (res-src res-src)
-                  (res-index res-index))
+                  (res-index res-index)
+                  (finalizer finalizer))
           (cond
            ((>= i end)
             (cond
@@ -2165,9 +2186,11 @@
               (irregex-match-end-index-set! matches index res-index)))
             (let ((next (and (not (eq? src end-src)) (get-next src))))
               (if next
-                  (lp1 next (get-start next) state res-src res-index)
+                  (lp1 next (get-start next) state res-src res-index finalizer)
                   (and index
                        (%irregex-match-end-chunk matches index)
+                       (or (not submatches?)
+                           (finalize! finalizer memory matches))
                        #t))))
            (else
             (let* ((ch (string-ref str i))
@@ -2178,17 +2201,37 @@
                                (cdr state))))
               (cond
                (cell
+                (cond
+                 (submatches?
+                  (let ((cmds (dfa-cell-commands dfa cell)))
+                    (for-each (lambda (s)
+                                (let ((slot (vector-ref memory (cdr s)))
+                                      (chunk&position (cons src (+ i 1))))
+                                  (vector-set! slot (car s) chunk&position)))
+                              (cdr cmds))
+                    (for-each (lambda (c)
+                                (let* ((tag (vector-ref c 0))
+                                       (ss (vector-ref memory (vector-ref c 1)))
+                                       (ds (vector-ref memory (vector-ref c 2))))
+                                  (vector-set! ds tag (vector-ref ss tag))))
+                              (car cmds)))))
                 (let ((next (dfa-next-state dfa cell)))
-                  (if (dfa-final-state? dfa next)
-                      (lp2 (+ i 1) next src (+ i 1))
-                      (lp2 (+ i 1) next res-src res-index))))
+                 (cond
+                  ((dfa-finalizer dfa next) =>
+                   (lambda (new-finalizer)
+                     (lp2 (+ i 1) next src (+ i 1) new-finalizer)))
+                  (else (lp2 (+ i 1) next res-src res-index finalizer)))))
                (res-src
                 (cond
                  (index
                   (irregex-match-end-chunk-set! matches index res-src)
                   (irregex-match-end-index-set! matches index res-index)))
+                (cond (submatches?
+                       (finalize! finalizer memory matches)))
                 #t)
                ((and index (%irregex-match-end-chunk matches index))
+                (cond (submatches?
+                       (finalize! finalizer memory matches)))
                 #t)
                (else
                 #f))))))))))
@@ -2329,6 +2372,10 @@
 
 (define (nfa-num-tags nfa)
   (vector-ref nfa 0))
+(define (nfa-highest-map-index nfa)
+  (vector-ref nfa 1))
+(define (nfa-set-highest-map-index! nfa idx)
+  (vector-set! nfa 1 idx))
 
 (define (nfa-get-state-trans nfa i)
   (if (= i 0) '() (vector-ref nfa (* i *nfa-num-fields*))))
@@ -2336,7 +2383,7 @@
   (vector-set! nfa (* i *nfa-num-fields*) x))
 
 (define (nfa-get-epsilons nfa i)
-  (vector-ref nfa (+ (* i *nfa-num-fields*) 1)))
+  (if (= i 0) '() (vector-ref nfa (+ (* i *nfa-num-fields*) 1))))
 (define (nfa-set-epsilons! nfa i x)
   (vector-set! nfa (+ (* i *nfa-num-fields*) 1) x))
 (define (nfa-add-epsilon! nfa i x t)
@@ -2344,21 +2391,24 @@
     (if (not (assv x eps))
         (nfa-set-epsilons! nfa i (cons (cons x t) eps)))))
 
-(define (nfa-get-state-closure nfa i)
-  (vector-ref nfa (+ (* i *nfa-num-fields*) 2)))
-(define (nfa-set-state-closure! nfa i x)
-  (vector-set! nfa (+ (* i *nfa-num-fields*) 2) x))
+(define (nfa-get-reorder-commands nfa mst)
+  (cond ((assoc mst
+                (vector-ref nfa (+ (* (nfa-multi-state-hash nfa mst)
+                                      *nfa-num-fields*) 2)))
+         => cdr)
+        (else #f)))
+(define (nfa-set-reorder-commands! nfa mst x)
+  (let ((i (+ (* (nfa-multi-state-hash nfa mst) *nfa-num-fields*) 2)))
+    (vector-set! nfa i (cons (cons mst x) (vector-ref nfa i)))))
 
 (define (nfa-get-closure nfa mst)
   (cond ((assoc mst
                 (vector-ref nfa (+ (* (nfa-multi-state-hash nfa mst)
-                                      *nfa-num-fields*)
-                                   (- *nfa-num-fields* 1))))
+                                      *nfa-num-fields*) 3)))
          => cdr)
         (else #f)))
 (define (nfa-add-closure! nfa mst x)
-  (let ((i (+ (* (nfa-multi-state-hash nfa mst) *nfa-num-fields*)
-              (- *nfa-num-fields* 1))))
+  (let ((i (+ (* (nfa-multi-state-hash nfa mst) *nfa-num-fields*) 3)))
     (vector-set! nfa i (cons (cons mst x) (vector-ref nfa i)))))
 
 ;; Compile and return the vector of NFA states (in groups of
@@ -2377,6 +2427,9 @@
               ;; We abuse the transitions slot for state 0 (the final state,
               ;; which can have no transitions) to store the number of tags.
               (vector-set! buf 0 (* max 2))
+              ;; We abuse the epsilons slot for state 0 to store the highest
+              ;; encountered memory slot mapping index.  Initialize to -1.
+              (vector-set! buf 1 -1)
               res)
              ((pair? (car sre))
               ;; The appends here should be safe (are they?)
@@ -2615,445 +2668,400 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;; NFA multi-state representation
 
-;; Cache closures in a simple hash-table keyed on the smallest state
-;; (define (nfa-multi-state-hash nfa mst)
-;;   (car mst))
-
-;; Original sorted list-based representation
-
-;; (define (make-nfa-multi-state nfa)
-;;   '())
-
-;; (define (nfa-state->multi-state nfa state)
-;;   (list state))
-
-;; (define (nfa-multi-state-copy mst)
-;;   (map (lambda (x) x) mst))
-
-;; (define (list->nfa-multi-state nfa ls)
-;;   (nfa-multi-state-copy ls))
-
-;; (define (nfa-multi-state-contains? mst i)
-;;   (memq i mst))
-
-;; (define (nfa-multi-state-fold mst kons knil)
-;;   (fold kons knil mst))
-
-;; (define (nfa-multi-state-add! mst i)
-;;   (insert-sorted i mst))
-
-;; (define (nfa-multi-state-add mst i)
-;;   (insert-sorted i mst))
-
-;; (define (nfa-multi-state-union a b)
-;;   (merge-sorted a b))
-
-;; Sorted List Utilities
-
-;; (define (insert-sorted n ls)
-;;   (cond
-;;    ((null? ls)
-;;     (cons n '()))
-;;    ((<= n (car ls))
-;;     (if (= n (car ls))
-;;         ls
-;;         (cons n ls)))
-;;    (else
-;;     (cons (car ls) (insert-sorted n (cdr ls))))))
-
-;; (define (insert-sorted! n ls)
-;;   (cond
-;;    ((null? ls)
-;;     (cons n '()))
-;;    ((<= n (car ls))
-;;     (if (= n (car ls))
-;;         ls
-;;         (cons n ls)))
-;;    (else
-;;     (let lp ((head ls) (tail (cdr ls)))
-;;       (cond ((or (null? tail) (< n (car tail)))
-;;              (set-cdr! head (cons n tail)))
-;;             ((> n (car tail))
-;;              (lp tail (cdr tail)))))
-;;     ls)))
-
-;; (define (merge-sorted a b)
-;;   (cond ((null? a) b)
-;;         ((null? b) a)
-;;         ((< (car a) (car b))
-;;          (cons (car a) (merge-sorted (cdr a) b)))
-;;         ((> (car a) (car b))
-;;          (cons (car b) (merge-sorted a (cdr b))))
-;;         (else (merge-sorted (cdr a) b))))
-
-;; ========================================================= ;;
-
-;; Presized bit-vector based
-
 (define (nfa-multi-state-hash nfa mst)
-  (modulo (vector-ref mst 0) (nfa-num-states nfa)))
+  ;; We could do (modulo X (nfa-num-states nfa)) here which would be faster,
+  ;; but we can't assume a full numerical tower (and updating *could*
+  ;; produce a bignum), so we do it each time when updating the hash.
+  (vector-ref mst 2))
+
+;; Returns #f if NFA state does not occur in multi-state
+(define (nfa-state-mappings mst state)
+  (vector-ref mst (+ state 3)))
+
+(define (nfa-multi-state-mappings-summary mst)
+  (vector-ref mst 0))
+
+;; A multi-state holds a set of states with their tag-to-slot mappings.
+;; Slot 0 contains a summary of all mappings for all states in the multi-state.
+;; Slot 1 contains the total number of states in the multi-state.
+;; Slot 2 contains a hash value, which is used for quick lookup of cached
+;; reorder-commands or epsilon-closure in the NFA.  This is the sum of all
+;; state numbers plus each tag value (once per occurrence).  This is a silly
+;; hashing calculation, but it seems to produce a well-spread out hash table and
+;; it has the added advantage that we can use the value as a quick check if the
+;; state is definitely NOT equivalent to another in nfa-multi-state-same-states?
+;; The other slots contain mappings for each corresponding state.
 
 (define (make-nfa-multi-state nfa)
-  (make-vector (quotient (+ (nfa-num-states nfa) 24 -1) 24) 0))
-
-(define (nfa-state->multi-state nfa state)
-  (nfa-multi-state-add! (make-nfa-multi-state nfa) state))
-
-(define (nfa-multi-state-copy mst)
-  (let ((res (make-vector (vector-length mst))))
-    (do ((i (- (vector-length mst) 1) (- i 1)))
-        ((< i 0) res)
-      (vector-set! res i (vector-ref mst i)))))
-
-(define (nfa-multi-state-contains? mst i)
-  (let ((cell (quotient i 24))
-        (bit (remainder i 24)))
-    (not (zero? (bit-and (vector-ref mst cell) (bit-shl 1 bit))))))
-
-(define (nfa-multi-state-contains-only? mst i)
-  (let ((cell (quotient i 24))
-        (bit (remainder i 24)))
-    (= (vector-ref mst cell) (bit-shl 1 bit))))
-
-(define (nfa-multi-state-add! mst i)
-  (let ((cell (quotient i 24))
-        (bit (remainder i 24)))
-    (vector-set! mst cell (bit-ior (vector-ref mst cell) (bit-shl 1 bit)))
+  (let ((mst (make-vector (+ (nfa-num-states nfa) 3) #f)))
+    (vector-set! mst 0 (make-vector (nfa-num-tags nfa) '())) ; tag summary
+    (vector-set! mst 1 0)               ; total number of states
+    (vector-set! mst 2 0)               ; states and tags hash
     mst))
 
-(define (nfa-multi-state-add mst i)
-  (nfa-multi-state-add! (nfa-multi-state-copy mst) i))
+;; NOTE: This doesn't do a deep copy of the mappings.  Don't mutate them!
+(define (nfa-multi-state-copy mst)
+  (let ((v (vector-copy mst)))
+    (vector-set! v 0 (vector-copy (vector-ref mst 0)))
+    v))
 
-(define (nfa-multi-state-union! a b)
-  (do ((i (- (vector-length a) 1) (- i 1)))
-      ((< i 0) a)
-    (vector-set! a i (bit-ior (vector-ref a i) (vector-ref b i)))))
+(define (nfa-state->multi-state nfa state mappings)
+  (let ((mst (make-nfa-multi-state nfa)))
+    (nfa-multi-state-add! nfa mst state mappings)
+    mst))
 
-(define (nfa-multi-state-union a b)
-  (nfa-multi-state-union! (nfa-multi-state-copy a) b))
+;; Extend multi-state with a state and add its tag->slot mappings.
+(define (nfa-multi-state-add! nfa mst state mappings)
+  (let ((hash-value (vector-ref mst 2)))
+    (cond ((not (vector-ref mst (+ state 3))) ;  Update state hash & count?
+           (set! hash-value (+ (vector-ref mst 2) state))
+           (vector-set! mst 1 (+ (vector-ref mst 1) 1))))
+    (vector-set! mst (+ state 3) mappings)
+    (let ((all-mappings (vector-ref mst 0)))
+      (for-each
+       (lambda (tag&slot)
+         (let* ((t (car tag&slot))
+                (s (cdr tag&slot))
+                (m (vector-ref all-mappings t)))
+           (cond ((not (memv s m))
+                  (set! hash-value (+ hash-value t))
+                  (vector-set! all-mappings t (cons s m))))))
+       mappings))
+    (vector-set! mst 2 (modulo hash-value (nfa-num-states nfa)))))
+
+;; Same as above, but skip updating mappings summary.
+;; Called when we know all the tag->slot mappings are already in the summary.
+(define (nfa-multi-state-add/fast! nfa mst state mappings)
+  (cond ((not (vector-ref mst (+ state 3))) ;  Update state hash & count?
+         (vector-set! mst 2 (modulo (+ (vector-ref mst 2) state)
+                                    (nfa-num-states nfa)))
+         (vector-set! mst 1 (+ (vector-ref mst 1) 1))))
+  (vector-set! mst (+ state 3) mappings))
+
+;; Same as above, assigning a new slot for a tag.  This slot is then
+;; added to the summary, if it isn't in there yet.  This is more efficient
+;; than looping through all the mappings.
+(define (nfa-multi-state-add-tagged! nfa mst state mappings tag slot)
+  (let* ((mappings-summary (vector-ref mst 0))
+         (summary-tag-slots (vector-ref mappings-summary tag))
+         (new-mappings (let lp ((m mappings)
+                                (res '()))
+                         (cond ((null? m) (cons (cons tag slot) res))
+                               ((= (caar m) tag)
+                                (append res (cons (cons tag slot) (cdr m))))
+                               (else (lp (cdr m) (cons (car m) res))))))
+         (hash-value (vector-ref mst 2)))
+    (cond ((not (vector-ref mst (+ state 3))) ;  Update state hash & count?
+           (set! hash-value (+ hash-value state))
+           (vector-set! mst 1 (+ (vector-ref mst 1) 1))))
+    (vector-set! mst (+ state 3) new-mappings)
+    (cond ((not (memv slot summary-tag-slots)) ; Update tag/slot summary
+           (set! hash-value (+ hash-value tag))
+           (vector-set! mappings-summary tag (cons slot summary-tag-slots))))
+    (vector-set! mst 2 (modulo hash-value (nfa-num-states nfa)))
+    new-mappings))
+
+(define (nfa-multi-state-same-states? a b)
+  ;; First check if hash and state counts match, then check each state
+  (and (= (vector-ref a 2) (vector-ref b 2))
+       (= (vector-ref a 1) (vector-ref b 1))
+       (let ((len (vector-length a)))
+         (let lp ((i 3))
+           (or (= i len)
+               (and (equal? (not (vector-ref a i))
+                            (not (vector-ref b i)))
+                    (lp (+ i 1))))))))
 
 (define (nfa-multi-state-fold mst kons knil)
   (let ((limit (vector-length mst)))
-    (let lp1 ((i 0)
-              (acc knil))
-      (if (>= i limit)
+    (let lp ((i 3)
+             (acc knil))
+      (if (= i limit)
           acc
-          (let lp2 ((n (vector-ref mst i))
-                    (acc acc))
-            (if (zero? n)
-                (lp1 (+ i 1) acc)
-                (let* ((n2 (bit-and n (- n 1)))
-                       (n-tail (- n n2))
-                       (bit (+ (* i 24) (integer-log n-tail))))
-                  (lp2 n2 (kons bit acc)))))))))
+          (let ((m (vector-ref mst i)))
+            (lp (+ i 1) (if m (kons (- i 3) m acc) acc)))))))
+
+;; Find the lowest fresh index for this tag that's unused
+;; in the multi-state.  This also updates the nfa's highest
+;; tag counter if a completely new slot number was assigned.
+(define (next-index-for-tag! nfa tag mst)
+  (let* ((highest (nfa-highest-map-index nfa))
+         (tag-slots (vector-ref (vector-ref mst 0) tag))
+         (new-index (do ((slot 0 (+ slot 1)))
+                        ((not (memv slot tag-slots)) slot))))
+    (cond ((> new-index highest)
+           (nfa-set-highest-map-index! nfa new-index)))
+    new-index))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;; NFA->DFA compilation
-;;
+;;;; tNFA->DFA compilation
 ;; During processing, the DFA is a list of the form:
 ;;
-;;   ((NFA-states ...) accepting-state? transitions ...)
+;;   ((annotated-tNFA-states ...) finalizer transitions ...)
 ;;
 ;; where the transitions are as in the NFA, except there are no
 ;; epsilons, duplicate characters or overlapping char-set ranges, and
 ;; the states moved to are closures (sets of NFA states).  Multiple
-;; DFA states may be accepting states.
+;; DFA states may be accepting states.  If the state is an accepting state,
+;; the finalizer is a list of (tag . memory-slot) retrieval commands.
+;; tNFA-states are annotated with mappings which store the tag values of
+;; memory slots, if any.  There is always at most one slot for a tag.
+;;
+;; The DFA itself simulates a NFA by representing all the simultaneous
+;; states the NFA can be in at any given point in time as one DFA state.
+;; The tag values are ambiguous since each NFA transition can set a tag.
+;; To solve this we keep a bank of memory slots around which tracks tag
+;; values for each distinct path through the NFA.
+;;
+;; Once we get to a final state we can pluck the tag values from the
+;; memory slots corresponding to the path through which the NFA could have
+;; reached the final state.  To resolve ambiguities, states are assigned
+;; priorities, and the path to the final state is chosen correspondingly.
+;;
+;; For a more detailed explanation about this process, see
+;; Ville Laurikari; ``NFAs with Tagged Transitions, their Conversion to
+;; Deterministic Automata and Application to Regular Expressions'' (2000).
+;; Laurikari also wrote a master's thesis about this approach which is
+;; less terse but the algorithms are not exactly the same.
+;; ``Efficient submatch addressing for regular expressions'' (2001).
+;; This implementation follows the 2000 paper where they differ.
 
 (define (nfa->dfa nfa . o)
-  (let ((max-states (and (pair? o) (car o))))
-    (let lp ((ls (list (nfa-cache-state-closure! nfa (nfa-start-state nfa))))
-             (i 0)
-             (res '()))
+  (let* ((max-states (and (pair? o) (car o)))
+         (start (nfa-state->multi-state nfa (nfa-start-state nfa) '()))
+         (start-closure (nfa-epsilon-closure nfa start))
+         ;; Set up a special "initializer" state from which we reach the
+         ;; start-closure to ensure that leading tags are set properly.
+         (init-set (tag-set-commands-for-closure nfa start start-closure '()))
+         (dummy (make-nfa-multi-state nfa))
+         (init-state (list dummy #f `((,start-closure #f () . ,init-set)))))
+    ;; Unmarked states are just sets of NFA states with tag-maps, marked states
+    ;; are sets of NFA states with transitions to sets of NFA states
+    (let lp ((unmarked-states (list start-closure))
+             (marked-states (list init-state))
+             (dfa-size 0))
       (cond
-       ((null? ls)
-        (dfa-renumber nfa (reverse res)))
-       ((assoc (car ls) res) ;; already seen this combination of states
-        (lp (cdr ls) i res))
-       ((and max-states (> i max-states)) ;; too many DFA states
+       ((null? unmarked-states)
+        ;; Abuse finalizer slot for storing the number of memory slots we need
+        (set-car! (cdr init-state) (+ (nfa-highest-map-index nfa) 1))
+        (dfa-renumber (reverse marked-states)))
+       ((and max-states (> dfa-size max-states)) ; Too many DFA states
         #f)
+       ((assoc (car unmarked-states) marked-states) ; Seen set of NFA-states?
+        (lp (cdr unmarked-states) marked-states dfa-size))
        (else
-        (let* ((states (car ls))
-               (trans (nfa-state-transitions nfa states))
-               (accept? (and (nfa-multi-state-contains? states 0) #t)))
-          (lp (append (map cdr trans) (cdr ls))
-              (+ i 1)
-              `((,states ,accept? ,@trans) ,@res))))))))
-
-;; When the conversion is complete we renumber the DFA sets-of-states
-;; in order and convert the result to a vector for fast lookup.
-;; Charsets containing single characters are converted to those characters
-;; for quick matching of the literal parts in a regex.
-(define (dfa-renumber nfa dfa)
-  (let* ((len (length dfa))
-         (states (make-vector (nfa-num-states nfa) '()))
-         (res (make-vector len)))
-    (define (renumber mst)
-      (cdr (assoc mst (vector-ref states (nfa-multi-state-hash nfa mst)))))
-    (let lp ((ls dfa) (i 0))
-      (cond ((pair? ls)
-             (let ((j (nfa-multi-state-hash nfa (caar ls))))
-               (vector-set! states j (cons (cons (caar ls) i)
-                                           (vector-ref states j))))
-             (lp (cdr ls) (+ i 1)))))
-    (let lp ((ls dfa) (i 0))
-      (cond ((pair? ls)
-             (for-each
-              (lambda (x)
-                (set-car! x (maybe-cset->char (car x)))
-                (set-cdr! x (renumber (cdr x))))
-              (cddar ls))
-             (vector-set! res i (cdar ls))
-             (lp (cdr ls) (+ i 1)))))
-    res))
+        (let ((dfa-state (car unmarked-states)))
+          (let lp2 ((trans (get-distinct-transitions nfa dfa-state))
+                    (unmarked-states (cdr unmarked-states))
+                    (dfa-trans '()))
+            (if (null? trans)
+                (let ((finalizer (nfa-state-mappings dfa-state 0)))
+                  (lp unmarked-states
+                      (cons (list dfa-state finalizer dfa-trans) marked-states)
+                      (+ dfa-size 1)))
+                (let* ((closure (nfa-epsilon-closure nfa (cdar trans)))
+                       (reordered (find-reorder-commands nfa closure marked-states))
+                       (copy-cmds (if reordered (cdr reordered) '()))
+                       ;; Laurikari doesn't mention what "k" is, but it seems it
+                       ;; must be the mappings of the state's reach
+                       (set-cmds (tag-set-commands-for-closure nfa (cdar trans) closure copy-cmds))
+                       (trans-closure (if reordered (car reordered) closure)))
+                  (lp2 (cdr trans)
+                       (if reordered
+                           unmarked-states
+                           (cons trans-closure unmarked-states))
+                       (cons `(,trans-closure ,(caar trans) ,copy-cmds . ,set-cmds)
+                             dfa-trans)))))))))))
+
+(define (dfa-renumber states)
+  (let ((indexes (let lp ((i 0) (states states) (indexes '()))
+                   (if (null? states)
+                       indexes
+                       (lp (+ i 1) (cdr states)
+                           (cons (cons (caar states) i) indexes)))))
+        (dfa (make-vector (length states))))
+    (do ((i 0 (+ i 1))
+         (states states (cdr states)))
+        ((null? states) dfa)
+      (let ((maybe-finalizer (cadar states))
+            (transitions (caddar states)))
+       (vector-set!
+        dfa i
+        (cons maybe-finalizer
+              (map (lambda (tr)
+                     `(,(and (cadr tr) (maybe-cset->char (cadr tr)))
+                       ,(cdr (assoc (car tr) indexes)) . ,(cddr tr)))
+                   transitions)))))))
 
 ;; Extract all distinct ranges and the potential states they can transition
 ;; to from a given set of states.  Any ranges that would overlap with
 ;; distinct characters are split accordingly.
-(define (nfa-state-transitions nfa states)
-  (let ((res (nfa-multi-state-fold
-              states
-              (lambda (st res)
-                (let ((trans (nfa-get-state-trans nfa st)))
-                  (if (null? trans)
-                      res
-                      (nfa-join-transitions! nfa res (car trans) (cdr trans)))))
-              '())))
-    (for-each (lambda (x) (set-cdr! x (nfa-closure nfa (cdr x)))) res)
-    res))
 
-(define (nfa-join-transitions! nfa existing elt state)
+;; This function is like "reach" in Laurikari's papers, but for each
+;; possible distinct range of characters rather than per character.
+(define (get-distinct-transitions nfa annotated-states)
   (define (csets-intersect? a b)
     (let ((i (cset-intersection a b)))
       (and (not (cset-empty? i)) i)))
-  (let lp ((ls existing) (res '()))
-    (cond
-     ((null? ls)
-      (cond
-       ;; First try to find a group that includes *only* this state.
-       ;; TRICKY!: If it contains other states too, we will end up in trouble
-       ;; later on if the group needs to be broken up because of overlapping
-       ;; csets, since then you don't know what parts of the overlap "belong"
-       ;; to the state we are about to add or the one that was already there.
-       ((find (lambda (x) (nfa-multi-state-contains-only? (cdr x) state)) existing) =>
-        (lambda (existing-state)    ; If found, merge charsets with it
-          (set-car! existing-state (cset-union (car existing-state) elt))
-          existing))
-       ;; State not seen yet?  Add a new state transition
-       (else (cons (cons elt (nfa-state->multi-state nfa state)) existing))))
-     ((cset=? elt (caar ls)) ; Add state to existing set for this charset
-      (set-cdr! (car ls) (nfa-multi-state-add! (cdar ls) state))
-      existing)
-     ((csets-intersect? elt (caar ls)) => ; overlapping charset, but diff state
-      (lambda (intersection)
-        (let* ((only-in-old (cset-difference (caar ls) elt))
-               (states-for-old (and (not (cset-empty? only-in-old))
-                                    (nfa-multi-state-copy (cdar ls))))
-               (result (if states-for-old
-                           (cons (cons only-in-old states-for-old)
-                                 (append res (cdr ls)))
-                           (append res (cdr ls))))
-               (only-in-new (cset-difference elt (caar ls))))
-          ;; Add this state to the states already here and restrict to
-          ;; the overlapping charset
-          (set-car! (car ls) intersection)
-          (set-cdr! (car ls) (nfa-multi-state-add! (cdar ls) state))
-          ;; Continue with the remaining subset of the new cset (if nonempty)
-          (cons (car ls)
-                (if (cset-empty? only-in-new)
-                    result
-                    (nfa-join-transitions! nfa result only-in-new state))))))
-     (else
-      (lp (cdr ls) (cons (car ls) res))))))
-
-(define (nfa-cache-state-closure! nfa state)
-  (let ((cached (nfa-get-state-closure nfa state)))
-    (cond
-     ((not (null? cached))
-      cached)
-     (else
-      (let ((res (nfa-state-closure-internal nfa state)))
-        (nfa-set-state-closure! nfa state res)
-        res)))))
-
-;; The `closure' of a list of NFA states - all states that can be
-;; reached from any of them using any number of epsilon transitions.
-(define (nfa-state-closure-internal nfa state)
-  (let lp ((ls (list state))
-           (res (make-nfa-multi-state nfa)))
-    (cond
-     ((null? ls)
-      res)
-     ((nfa-multi-state-contains? res (car ls))
-      (lp (cdr ls) res))
-     (else
-      ;; Ignore any epsilon tags for now
-      (lp (append (map car (nfa-get-epsilons nfa (car ls))) (cdr ls))
-          (nfa-multi-state-add! res (car ls)))))))
-
-(define (nfa-closure-internal nfa states)
   (nfa-multi-state-fold
-   states
-   (lambda (st res)
-     (nfa-multi-state-union! res (nfa-cache-state-closure! nfa st)))
-   (make-nfa-multi-state nfa)))
+   annotated-states
+   (lambda (st mappings res)
+     (let ((trans (nfa-get-state-trans nfa st))) ; Always one state per trans
+       (if (null? trans)
+           res
+           (let lp ((ls res) (cs (car trans)) (state (cdr trans)) (res '()))
+             (cond
+              ;; State not seen yet?  Add a new state transition
+              ((null? ls)
+               ;; TODO: We should try to find an existing DFA state with only
+               ;; this NFA state in it, and extend the cset with the current one.
+               ;; This produces smaller DFAs, but takes longer to compile.
+               (cons (cons cs (nfa-state->multi-state nfa state mappings))
+                     res))
+              ((cset=? cs (caar ls)) ; Add state to existing set for this charset
+               (nfa-multi-state-add! nfa (cdar ls) state mappings)
+               (append ls res))
+              ((csets-intersect? cs (caar ls)) =>
+               (lambda (intersection)
+                 (let* ((only-in-new (cset-difference cs (caar ls)))
+                        (only-in-old (cset-difference (caar ls) cs))
+                        (states-in-both (cdar ls))
+                        (states-for-old (and (not (cset-empty? only-in-old))
+                                             (nfa-multi-state-copy states-in-both)))
+                        (res (if states-for-old
+                                 (cons (cons only-in-old states-for-old) res)
+                                 res)))
+                   (nfa-multi-state-add! nfa states-in-both state mappings)
+                   ;; Add this state to the states already here and restrict to
+                   ;; the overlapping charset and continue with the remaining subset
+                   ;; of the new cset (if nonempty)
+                   (if (cset-empty? only-in-new)
+                       (cons (cons intersection states-in-both)
+                             (append (cdr ls) res))
+                       (lp (cdr ls) only-in-new state
+                           (cons (cons intersection states-in-both) res))))))
+              (else
+               (lp (cdr ls) cs state (cons (car ls) res))))))))
+   '()))
+
+;; The epsilon-closure of a set of states is all the states reachable
+;; through epsilon transitions, with the tags encountered on the way.
+(define (nfa-epsilon-closure-internal nfa annotated-states)
+  ;; The stack _MUST_ be in this order for some reason I don't fully understand
+  (let lp ((stack (nfa-multi-state-fold annotated-states
+                                        (lambda (st m res)
+                                          (cons (cons st m) res))
+                                        '()))
+           (priorities (make-vector (nfa-num-states nfa) 0))
+           (closure (nfa-multi-state-copy annotated-states)))
+    (if (null? stack)
+        closure
+        (let ((prio/orig-state (caar stack)) ; priority is just the state nr.
+              (mappings (cdar stack)))
+          (let lp2 ((trans (nfa-get-epsilons nfa prio/orig-state))
+                    (stack (cdr stack)))
+            (if (null? trans)
+                (lp stack priorities closure)
+                (let ((state (caar trans)))
+                  (cond
+                   ;; Our priorities are inverted because we start at
+                   ;; the highest state number and go downwards to 0.
+                   ((> prio/orig-state (vector-ref priorities state))
+                    (vector-set! priorities state prio/orig-state)
+                    (cond
+                     ((cdar trans) =>   ; tagged transition?
+                      (lambda (tag)
+                       (let* ((index (next-index-for-tag! nfa tag closure))
+                              (new-mappings (nfa-multi-state-add-tagged!
+                                             nfa closure state mappings tag index)))
+                         (lp2 (cdr trans) (cons (cons state new-mappings) stack)))))
+                     (else
+                      (nfa-multi-state-add/fast! nfa closure state mappings)
+                      (lp2 (cdr trans) (cons (cons state mappings) stack)))))
+                   (else (lp2 (cdr trans) stack))))))))))
+
 
-(define (nfa-closure nfa states)
+(define (nfa-epsilon-closure nfa states)
   (or (nfa-get-closure nfa states)
-      (let ((res (nfa-closure-internal nfa states)))
+      (let ((res (nfa-epsilon-closure-internal nfa states)))
         (nfa-add-closure! nfa states res)
         res)))
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;; Match Extraction
-;;
-;; DFAs don't give us match information, so once we match and
-;; determine the start and end, we need to recursively break the
-;; problem into smaller DFAs to get each submatch.
-;;
-;; See http://compilers.iecc.com/comparch/article/07-10-026
-
-(define (match-vector-ref v i) (vector-ref v (+ 3 i)))
-
-(define (match-vector-set! v i x) (vector-set! v (+ 3 i) x))
+;; Generate "set" commands for all tags in the closure that are
+;; not present in the original state.
+(define (tag-set-commands-for-closure nfa orig-state closure copy-cmds)
+  (let ((num-tags (nfa-num-tags nfa))
+        (closure-summary (nfa-multi-state-mappings-summary closure))
+        (state-summary (nfa-multi-state-mappings-summary orig-state)))
+    (let lp ((t 0) (cmds '()))
+      (if (= t num-tags)
+          cmds
+          (let lp2 ((s1 (vector-ref closure-summary t))
+                    (s2 (vector-ref state-summary t))
+                    (cmds cmds))
+            (cond ((null? s1) (lp (+ t 1) cmds))
+                  ((or (memv (car s1) s2) ; Tag in original state?
+                       ;; Try to avoid generating set-commands for any slots
+                       ;; that will be overwritten by copy commands, but only
+                       ;; if that slot isn't copied to another slot.
+                       (and (not (null? copy-cmds)) ; null check for performance
+                            ;; Look for copy command overwriting this tag-slot
+                            (any (lambda (c)
+                                   (and (= (vector-ref c 0) t)
+                                        (= (vector-ref c 2) (car s1))))
+                                 copy-cmds)
+                            ;; Ensure it's not copied to another slot before
+                            ;; discarding the set-command.
+                            (not (any (lambda (c)
+                                        (and (= (vector-ref c 0) t)
+                                             (= (vector-ref c 1) (car s1))))
+                                      copy-cmds))))
+                   (lp2 (cdr s1) s2 cmds))
+                  (else (lp2 (cdr s1) s2
+                             (cons (cons t (car s1)) cmds)))))))))
+
+;; Look in dfa-states for an already existing state which matches
+;; closure, but has different tag value mappings.
+;; If found, calculate reordering commands so we can map the closure
+;; to that state instead of adding a new DFA state.
+;; This is completely handwaved away in Laurikari's paper (it basically
+;; says "insert reordering algorithm here"), so this code was constructed
+;; after some experimentation.  In other words, bugs be here.
+(define (find-reorder-commands-internal nfa closure dfa-states)
+  (let ((num-states (nfa-num-states nfa))
+        (num-tags (nfa-num-tags nfa))
+        (closure-summary (nfa-multi-state-mappings-summary closure)))
+    (let lp ((dfa-states dfa-states))
+      (if (null? dfa-states)
+          #f
+          (if (not (nfa-multi-state-same-states? (caar dfa-states) closure))
+              (lp (cdr dfa-states))
+              (let lp2 ((state-summary (nfa-multi-state-mappings-summary
+                                        (caar dfa-states)))
+                        (t 0) (cmds '()))
+                (if (= t num-tags)
+                    (cons (caar dfa-states) cmds)
+                    (let lp3 ((closure-slots (vector-ref closure-summary t))
+                              (state-slots (vector-ref state-summary t))
+                              (cmds cmds))
+                      (cond ((null? closure-slots)
+                             (if (null? state-slots)
+                                 (lp2 state-summary (+ t 1) cmds)
+                                 (lp (cdr dfa-states))))
+                            ((null? state-slots) (lp (cdr dfa-states)))
+                            (else (lp3 (cdr closure-slots)
+                                       (cdr state-slots)
+                                       (if (= (car closure-slots) (car state-slots))
+                                           cmds
+                                           (cons (vector t (car closure-slots) (car state-slots))
+                                                 cmds)))))))))))))
+
+(define (find-reorder-commands nfa closure dfa-states)
+  (or (nfa-get-reorder-commands nfa closure)
+      (let ((res (find-reorder-commands-internal nfa closure dfa-states)))
+        (nfa-set-reorder-commands! nfa closure res)
+        res)))
 
-(define (sre-match-extractor sre num-submatches)
-  (let* ((tmp (+ num-submatches 1))
-         (tmp-end-src-offset (+ 2 (* tmp 4)))
-         (tmp-end-index-offset (+ 3 (* tmp 4))))
-    (let lp ((sre sre) (n 1) (submatch-deps? #f))
-      (cond
-       ((not (sre-has-submatches? sre))
-        (if (not submatch-deps?)
-            (lambda (cnk start i end j matches) #t)
-            (let ((dfa (nfa->dfa (sre->nfa sre ~none))))
-              (lambda (cnk start i end j matches)
-                (dfa-match/longest dfa cnk start i end j matches tmp)))))
-       ((pair? sre)
-        (case (car sre)
-          ((: seq)
-           (let* ((right (sre-sequence (cddr sre)))
-                  (match-left (lp (cadr sre) n #t))
-                  (match-right
-                   (lp right (+ n (sre-count-submatches (cadr sre))) #t)))
-             (lambda (cnk start i end j matches)
-               (let lp1 ((end2 end) (j2 j) (best-src #f) (best-index #f))
-                 (let ((limit (if (eq? start end2)
-                                  i
-                                  ((chunker-get-start cnk) end2))))
-                   (let lp2 ((k j2) (best-src best-src) (best-index best-index))
-                     (if (< k limit)
-                         (cond
-                          ((not (eq? start end2))
-                           (let ((prev (chunker-prev-chunk cnk start end2)))
-                             (lp1 prev
-                                  ((chunker-get-end cnk) prev)
-                                  best-src
-                                  best-index)))
-                          (best-src
-                           (match-vector-set! matches tmp-end-src-offset best-src)
-                           (match-vector-set! matches tmp-end-index-offset best-index)
-                           #t)
-                          (else
-                           #f))
-                         (if (and (match-left cnk start i end2 k matches)
-                                  (eq? end2 (match-vector-ref matches
-                                                        tmp-end-src-offset))
-                                  (eqv? k (match-vector-ref matches
-                                                      tmp-end-index-offset))
-                                  (match-right cnk end2 k end j matches))
-                             (let ((right-src
-                                    (match-vector-ref matches tmp-end-src-offset))
-                                   (right
-                                    (match-vector-ref matches tmp-end-index-offset)))
-                               (cond
-                                ((and (eq? end right-src) (eqv? j right))
-                                 (match-vector-set! matches tmp-end-src-offset end)
-                                 (match-vector-set! matches tmp-end-index-offset j)
-                                 #t)
-                                ((or (not best-src)
-                                     (if (eq? best-src right-src)
-                                         (> right best-index)
-                                         (chunk-before? cnk
-                                                        best-src
-                                                        right-src)))
-                                 (lp2 (- k 1) right-src right))
-                                (else
-                                 (lp2 (- k 1) best-src best-index))))
-                             (lp2 (- k 1) best-src best-index)))))))))
-          ((or)
-           (if (null? (cdr sre))
-               (lambda (cnk start i end j matches) #f)
-               (let* ((rest (sre-alternate (cddr sre)))
-                      (match-first
-                       (lp (cadr sre) n #t))
-                      (match-rest
-                       (lp rest
-                           (+ n (sre-count-submatches (cadr sre)))
-                           submatch-deps?)))
-                 (lambda (cnk start i end j matches)
-                   (or (and (match-first cnk start i end j matches)
-                            (eq? end (match-vector-ref matches tmp-end-src-offset))
-                            (eqv? j (match-vector-ref matches tmp-end-index-offset)))
-                       (match-rest cnk start i end j matches))))))
-          ((* +)
-           (letrec ((match-once
-                     (lp (sre-sequence (cdr sre)) n #t))
-                    (match-all
-                     (lambda (cnk start i end j matches)
-                       (if (match-once cnk start i end j matches)
-                           (let ((src (match-vector-ref matches tmp-end-src-offset))
-                                 (k (match-vector-ref matches tmp-end-index-offset)))
-                             (if (and src (or (not (eq? start src)) (< i k)))
-                                 (match-all cnk src k end j matches)
-                                 #t))
-                           (begin
-                             (match-vector-set! matches tmp-end-src-offset start)
-                             (match-vector-set! matches tmp-end-index-offset i)
-                             #t)))))
-             (if (eq? '* (car sre))
-                 match-all
-                 (lambda (cnk start i end j matches)
-                   (and (match-once cnk start i end j matches)
-                        (let ((src (match-vector-ref matches tmp-end-src-offset))
-                              (k (match-vector-ref matches tmp-end-index-offset)))
-                          (match-all cnk src k end j matches)))))))
-          ((?)
-           (let ((match-once (lp (sre-sequence (cdr sre)) n #t)))
-             (lambda (cnk start i end j matches)
-               (cond
-                ((match-once cnk start i end j matches)
-                 #t)
-                (else
-                 (match-vector-set! matches tmp-end-src-offset start)
-                 (match-vector-set! matches tmp-end-index-offset i)
-                 #t)))))
-          (($ submatch => submatch-named)
-           (let ((match-one
-                  (lp (sre-sequence (if (memq (car sre) '($ submatch))
-                                        (cdr sre)
-                                        (cddr sre)))
-                      (+ n 1)
-                      #t))
-                 (start-src-offset (* n 4))
-                 (start-index-offset (+ 1 (* n 4)))
-                 (end-src-offset (+ 2 (* n 4)))
-                 (end-index-offset (+ 3 (* n 4))))
-             (lambda (cnk start i end j matches)
-               (cond
-                ((match-one cnk start i end j matches)
-                 (match-vector-set! matches start-src-offset start)
-                 (match-vector-set! matches start-index-offset i)
-                 (match-vector-set! matches end-src-offset
-                              (match-vector-ref matches tmp-end-src-offset))
-                 (match-vector-set! matches end-index-offset
-                              (match-vector-ref matches tmp-end-index-offset))
-                 #t)
-                (else
-                 #f)))))
-          (else
-           (%irregex-error "unknown regexp operator" (car sre)))))
-       (else
-        (%irregex-error "unknown regexp" sre))))))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;; Closure Compilation
diff --git a/irregex.scm b/irregex.scm
index 9349bcf..659d874 100644
--- a/irregex.scm
+++ b/irregex.scm
@@ -142,9 +142,9 @@
 
 (define-compiler-syntax make-irregex 
   (syntax-rules ()
-    ((_ dfa dfa/search dfa/extract nfa flags submatches lengths names)
+    ((_ dfa dfa/search nfa flags submatches lengths names)
      (##sys#make-structure
-      'regexp dfa dfa/search dfa/extract nfa flags submatches lengths names))))
+      'regexp dfa dfa/search nfa flags submatches lengths names))))
 
 (define-compiler-syntax make-irregex-match
   (syntax-rules ()
@@ -208,6 +208,13 @@
     ((_ m n end)
      (vector-set! (##sys#slot m 1) (+ 3 (* n 4)) end))))
 
+(define-compiler-syntax irregex-match-chunk&index-from-tag-set!
+  (syntax-rules ()
+    ((_ m t chunk index)
+     (begin
+       (vector-set! (##sys#slot m 1) (+ 4 (* t 2)) chunk)
+       (vector-set! (##sys#slot m 1) (+ 5 (* t 2)) index)))))
+
 (define-compiler-syntax %irregex-error
   (syntax-rules ()
     ((_ args ...)
diff --git a/tests/test-irregex.scm b/tests/test-irregex.scm
index fd2cb97..cef431c 100644
--- a/tests/test-irregex.scm
+++ b/tests/test-irregex.scm
@@ -276,8 +276,8 @@
   (test-group "predicates"
     (test-assert (irregex? (irregex "a.*b")))
     (test-assert (irregex? (irregex '(: "a" (* any) "b"))))
-    (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f #f))))
-    (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f #f))))
+    (test-assert (not (irregex? (vector '*irregex-tag* #f #f #f #f #f #f))))
+    (test-assert (not (irregex? (vector #f #f #f #f #f #f #f #f))))
     (test-assert (irregex-match-data? (irregex-search "a.*b" "axxxb")))
     (test-assert (irregex-match-data? (irregex-match "a.*b" "axxxb")))
     (test-assert (not (irregex-match-data? (vector '*irregex-match-tag* #f #f #f #f #f #f #f #f #f))))
diff --git a/types.db b/types.db
index 84dbab0..a9a8791 100644
--- a/types.db
+++ b/types.db
@@ -1300,23 +1300,31 @@
 (irregex-dfa (#(procedure #:clean #:enforce) irregex-dfa ((struct regexp)) *)
 	     (((struct regexp)) (##sys#slot #(1) '1)))
 
-(irregex-dfa/extract (#(procedure #:clean #:enforce) irregex-dfa/extract ((struct regexp)) *)
-		     (((struct regexp)) (##sys#slot #(1) '3)))
-
 (irregex-dfa/search (#(procedure #:clean #:enforce) irregex-dfa/search ((struct regexp)) *)
 		    (((struct regexp)) (##sys#slot #(1) '2)))
 
-(irregex-extract (#(procedure #:enforce) irregex-extract (* string #!optional fixnum fixnum) list)) ;XXX specialize?
+(irregex-nfa (#(procedure #:clean #:enforce) irregex-nfa ((struct regexp)) *)
+	     (((struct regexp)) (##sys#slot #(1) '3)))
+
 (irregex-flags (#(procedure #:clean #:enforce) irregex-flags ((struct regexp)) *)
-	       (((struct regexp)) (##sys#slot #(1) '5)))
+	       (((struct regexp)) (##sys#slot #(1) '4)))
+
+(irregex-num-submatches (#(procedure #:clean #:enforce) irregex-num-submatches ((struct regexp))
+				   fixnum)
+			(((struct regexp)) (##sys#slot #(1) '5)))
+
+(irregex-lengths (#(procedure #:clean #:enforce) irregex-lengths ((struct regexp)) *)
+		 (((struct regexp)) (##sys#slot #(1) '6)))
+
+(irregex-names (#(procedure #:clean #:enforce) irregex-names ((struct regexp)) *)
+	       (((struct regexp)) (##sys#slot #(1) '7)))
+
+(irregex-extract (#(procedure #:enforce) irregex-extract (* string #!optional fixnum fixnum) list)) ;XXX specialize?
 
 (irregex-fold (#(procedure #:enforce) irregex-fold (* (procedure (fixnum (struct regexp-match) *) *) * string #!optional (procedure (fixnum *) *) fixnum fixnum) *))
 
 (irregex-fold/chunked (#(procedure #:enforce) irregex-fold/chunked (* (procedure (* fixnum (struct regexp-match) *) *) * procedure * #!optional (procedure (* fixnum *) *) fixnum fixnum) *))
 
-(irregex-lengths (#(procedure #:clean #:enforce) irregex-lengths ((struct regexp)) *)
-		 (((struct regexp)) (##sys#slot #(1) '7)))
-
 (irregex-match (#(procedure #:enforce) irregex-match (* string #!optional fixnum fixnum) *))
 ;irregex-match?
 
@@ -1338,18 +1346,8 @@
 (irregex-match-substring (#(procedure) irregex-match-substring (* #!optional *) *))
 (irregex-match/chunked (#(procedure #:enforce) irregex-match/chunked (* * * #!optional fixnum) *))
 
-(irregex-names (#(procedure #:clean #:enforce) irregex-names ((struct regexp)) *)
-	       (((struct regexp)) (##sys#slot #(1) '8)))
-
 (irregex-new-matches (procedure irregex-new-matches (*) *))
 
-(irregex-nfa (#(procedure #:clean #:enforce) irregex-nfa ((struct regexp)) *)
-	     (((struct regexp)) (##sys#slot #(1) '4)))
-
-(irregex-num-submatches (#(procedure #:clean #:enforce) irregex-num-submatches ((struct regexp))
-				   fixnum)
-			(((struct regexp)) (##sys#slot #(1) '6)))
-
 (irregex-opt (#(procedure #:enforce) irregex-opt (list) *))
 (irregex-quote (#(procedure #:enforce) irregex-quote (string) string))
 (irregex-replace (#(procedure #:enforce) irregex-replace (* string #!rest) string))
-- 
1.7.9.1