guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

01/02: Try to further speed up inserting missing derivation source files


From: Christopher Baines
Subject: 01/02: Try to further speed up inserting missing derivation source files
Date: Wed, 2 Mar 2022 13:24:25 -0500 (EST)

cbaines pushed a commit to branch master
in repository data-service.

commit f86657915e68201dbe7903009a8b37713979bf5d
Author: Christopher Baines <mail@cbaines.net>
AuthorDate: Wed Mar 2 18:00:36 2022 +0000

    Try to further speed up inserting missing derivation source files
    
    Switch from using a recursive query to doing a breath first search through 
the
    graph of derivations, as I think PostgreSQL wasn't doing a great job of
    planning the recursive queries (it would overestimate the rows involved, and
    prefer sequential scans for the derivation_outputs table).
---
 guix-data-service/model/derivation.scm | 122 +++++++++++++++++++--------------
 1 file changed, 69 insertions(+), 53 deletions(-)

diff --git a/guix-data-service/model/derivation.scm 
b/guix-data-service/model/derivation.scm
index c1d1c69..3c5d0fb 100644
--- a/guix-data-service/model/derivation.scm
+++ b/guix-data-service/model/derivation.scm
@@ -1738,37 +1738,38 @@ WHERE " criteria ";"))
        (chunk! missing-file-names 2000)))))
 
 (define (derivation-file-names->derivation-ids conn derivation-file-names)
-  (define (select-source-files-missing-nars! derivation-ids)
-    (define (derivation-ids->all-related-derivation-ids ids)
-      (define query
-        (string-append
-         "
-WITH RECURSIVE all_derivations AS (
-    SELECT column1 AS derivation_id
-    FROM (VALUES "
-       (string-join (map
-                     (lambda (id)
-                       (string-append "(" id ")"))
-                     (map number->string ids))
-                    ", ")
-       ") AS data
-  UNION
-    SELECT derivation_outputs.derivation_id
-    FROM all_derivations
-    INNER JOIN derivation_inputs
-      ON derivation_inputs.derivation_id = all_derivations.derivation_id
-    INNER JOIN derivation_outputs
-      ON derivation_outputs.id = derivation_inputs.derivation_output_id
-)
-SELECT all_derivations.derivation_id
-FROM all_derivations"))
-
-      (map (lambda (row)
-             (string->number
-              (car row)))
-           (with-time-logging
-               "querying for batch of all related derivation ids"
-             (exec-query conn query))))
+  (define (insert-source-files-missing-nars derivation-ids)
+    (define (derivation-ids->next-related-derivation-ids! ids seen-ids)
+      (delete-duplicates/sort!
+       (append-map!
+        (lambda (ids-chunk)
+          (let ((query
+                 (string-append
+                  "
+SELECT derivation_outputs.derivation_id
+FROM derivation_inputs
+INNER JOIN derivation_outputs
+  ON derivation_outputs.id = derivation_inputs.derivation_output_id
+WHERE derivation_inputs.derivation_id IN ("
+                  (string-join (map number->string ids) ",")
+                  ")")))
+
+            (filter-map
+             (lambda (row)
+               (let ((number
+                      (string->number
+                       (car row))))
+                 (if (hash-ref seen-ids number)
+                     #f
+                     (begin
+                       (hash-set! seen-ids number #t)
+
+                       number))))
+             (with-time-logging
+                 "querying for batch of all related derivation ids"
+               (exec-query conn query)))))
+        (chunk! ids 2000))
+       <))
 
     (define (derivation-ids->missing-sources ids)
       (define query
@@ -1787,21 +1788,42 @@ INNER JOIN derivation_source_files
          ")
        AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
 
-      (with-time-logging "finding batch of missing sources"
-        (exec-query conn query)))
-
-    (let ((all-derivation-ids
-           (with-time-logging "querying for all related dervation ids"
-             (delete-duplicates/sort!
-              (append-map!
-               derivation-ids->all-related-derivation-ids
-               (chunk! derivation-ids 5000))
-              <))))
+      (map (lambda (row)
+             (list (string->number (first row))
+                   (second row)))
+           (with-time-logging "finding batch of missing sources"
+             (exec-query conn query))))
 
-      (with-time-logging "querying for missing sources"
-        (append-map! derivation-ids->missing-sources
-                     (chunk! all-derivation-ids
-                             10000)))))
+    (let ((seen-ids (make-hash-table)))
+      (let loop ((next-related-derivation-ids
+                  (with-time-logging "querying for next related dervation ids"
+                    (derivation-ids->next-related-derivation-ids!
+                     (list-copy derivation-ids)
+                     seen-ids))))
+        (unless (null? next-related-derivation-ids)
+          (let ((missing-sources
+                 (with-time-logging "querying for missing sources"
+                   (append-map! derivation-ids->missing-sources
+                                (chunk next-related-derivation-ids
+                                       10000)))))
+
+            (unless (null? missing-sources)
+              (with-time-logging
+                  (simple-format #f "inserting ~A missing source files"
+                                 (length missing-sources))
+                (for-each (match-lambda
+                            ((derivation-source-file-id store-path)
+                             (insert-derivation-source-file-nar
+                              conn
+                              derivation-source-file-id
+                              store-path)))
+                          missing-sources))))
+
+          (loop
+           (with-time-logging "querying for next related dervation ids"
+             (derivation-ids->next-related-derivation-ids!
+              next-related-derivation-ids
+              seen-ids)))))))
 
   (if (null? derivation-file-names)
       '()
@@ -1854,13 +1876,7 @@ INNER JOIN derivation_source_files
                             (error "missing derivation id")))
                       derivation-file-names)))
 
-            (with-time-logging "inserting missing source files"
-              (for-each (match-lambda
-                          ((derivation-source-file-id store-path)
-                           (insert-derivation-source-file-nar
-                            conn
-                            (string->number derivation-source-file-id)
-                            store-path)))
-                        (select-source-files-missing-nars! all-ids)))
+            (with-time-logging "insert-source-files-missing-nars"
+              (insert-source-files-missing-nars all-ids))
 
             all-ids)))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]