[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
01/02: Try to further speed up inserting missing derivation source files
From: |
Christopher Baines |
Subject: |
01/02: Try to further speed up inserting missing derivation source files |
Date: |
Wed, 2 Mar 2022 13:24:25 -0500 (EST) |
cbaines pushed a commit to branch master
in repository data-service.
commit f86657915e68201dbe7903009a8b37713979bf5d
Author: Christopher Baines <mail@cbaines.net>
AuthorDate: Wed Mar 2 18:00:36 2022 +0000
Try to further speed up inserting missing derivation source files
Switch from using a recursive query to doing a breath first search through
the
graph of derivations, as I think PostgreSQL wasn't doing a great job of
planning the recursive queries (it would overestimate the rows involved, and
prefer sequential scans for the derivation_outputs table).
---
guix-data-service/model/derivation.scm | 122 +++++++++++++++++++--------------
1 file changed, 69 insertions(+), 53 deletions(-)
diff --git a/guix-data-service/model/derivation.scm
b/guix-data-service/model/derivation.scm
index c1d1c69..3c5d0fb 100644
--- a/guix-data-service/model/derivation.scm
+++ b/guix-data-service/model/derivation.scm
@@ -1738,37 +1738,38 @@ WHERE " criteria ";"))
(chunk! missing-file-names 2000)))))
(define (derivation-file-names->derivation-ids conn derivation-file-names)
- (define (select-source-files-missing-nars! derivation-ids)
- (define (derivation-ids->all-related-derivation-ids ids)
- (define query
- (string-append
- "
-WITH RECURSIVE all_derivations AS (
- SELECT column1 AS derivation_id
- FROM (VALUES "
- (string-join (map
- (lambda (id)
- (string-append "(" id ")"))
- (map number->string ids))
- ", ")
- ") AS data
- UNION
- SELECT derivation_outputs.derivation_id
- FROM all_derivations
- INNER JOIN derivation_inputs
- ON derivation_inputs.derivation_id = all_derivations.derivation_id
- INNER JOIN derivation_outputs
- ON derivation_outputs.id = derivation_inputs.derivation_output_id
-)
-SELECT all_derivations.derivation_id
-FROM all_derivations"))
-
- (map (lambda (row)
- (string->number
- (car row)))
- (with-time-logging
- "querying for batch of all related derivation ids"
- (exec-query conn query))))
+ (define (insert-source-files-missing-nars derivation-ids)
+ (define (derivation-ids->next-related-derivation-ids! ids seen-ids)
+ (delete-duplicates/sort!
+ (append-map!
+ (lambda (ids-chunk)
+ (let ((query
+ (string-append
+ "
+SELECT derivation_outputs.derivation_id
+FROM derivation_inputs
+INNER JOIN derivation_outputs
+ ON derivation_outputs.id = derivation_inputs.derivation_output_id
+WHERE derivation_inputs.derivation_id IN ("
+ (string-join (map number->string ids) ",")
+ ")")))
+
+ (filter-map
+ (lambda (row)
+ (let ((number
+ (string->number
+ (car row))))
+ (if (hash-ref seen-ids number)
+ #f
+ (begin
+ (hash-set! seen-ids number #t)
+
+ number))))
+ (with-time-logging
+ "querying for batch of all related derivation ids"
+ (exec-query conn query)))))
+ (chunk! ids 2000))
+ <))
(define (derivation-ids->missing-sources ids)
(define query
@@ -1787,21 +1788,42 @@ INNER JOIN derivation_source_files
")
AND derivation_source_file_nars.derivation_source_file_id IS NULL"))
- (with-time-logging "finding batch of missing sources"
- (exec-query conn query)))
-
- (let ((all-derivation-ids
- (with-time-logging "querying for all related dervation ids"
- (delete-duplicates/sort!
- (append-map!
- derivation-ids->all-related-derivation-ids
- (chunk! derivation-ids 5000))
- <))))
+ (map (lambda (row)
+ (list (string->number (first row))
+ (second row)))
+ (with-time-logging "finding batch of missing sources"
+ (exec-query conn query))))
- (with-time-logging "querying for missing sources"
- (append-map! derivation-ids->missing-sources
- (chunk! all-derivation-ids
- 10000)))))
+ (let ((seen-ids (make-hash-table)))
+ (let loop ((next-related-derivation-ids
+ (with-time-logging "querying for next related dervation ids"
+ (derivation-ids->next-related-derivation-ids!
+ (list-copy derivation-ids)
+ seen-ids))))
+ (unless (null? next-related-derivation-ids)
+ (let ((missing-sources
+ (with-time-logging "querying for missing sources"
+ (append-map! derivation-ids->missing-sources
+ (chunk next-related-derivation-ids
+ 10000)))))
+
+ (unless (null? missing-sources)
+ (with-time-logging
+ (simple-format #f "inserting ~A missing source files"
+ (length missing-sources))
+ (for-each (match-lambda
+ ((derivation-source-file-id store-path)
+ (insert-derivation-source-file-nar
+ conn
+ derivation-source-file-id
+ store-path)))
+ missing-sources))))
+
+ (loop
+ (with-time-logging "querying for next related dervation ids"
+ (derivation-ids->next-related-derivation-ids!
+ next-related-derivation-ids
+ seen-ids)))))))
(if (null? derivation-file-names)
'()
@@ -1854,13 +1876,7 @@ INNER JOIN derivation_source_files
(error "missing derivation id")))
derivation-file-names)))
- (with-time-logging "inserting missing source files"
- (for-each (match-lambda
- ((derivation-source-file-id store-path)
- (insert-derivation-source-file-nar
- conn
- (string->number derivation-source-file-id)
- store-path)))
- (select-source-files-missing-nars! all-ids)))
+ (with-time-logging "insert-source-files-missing-nars"
+ (insert-source-files-missing-nars all-ids))
all-ids)))))