guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[shepherd] 15/15: WIP: shepherd: Port core service actor.


From: Juliana Sims
Subject: [shepherd] 15/15: WIP: shepherd: Port core service actor.
Date: Tue, 26 Nov 2024 16:46:17 -0500 (EST)

juli pushed a commit to branch wip-goblinsify
in repository shepherd.

commit 5f9771e7227df12e015541e23371650b467536a6
Author: Juliana Sims <juli@incana.org>
AuthorDate: Tue Nov 26 13:10:56 2024 -0500

    WIP: shepherd: Port core service actor.
    
    This is a living commit and will change over time.  All "final" commits will
    be atomic.
    
    Summary of major architectural changes thus far:
     - wrap the <service> record in a Goblins actor
     - move service-controller logic inside service actor
     - move stop logic inside the service actor
     - move start logic inside service actor
    
    * modules/shepherd/service.scm: Port the core service actor.
---
 modules/shepherd/service.scm | 1089 ++++++++++++++++++++++++++----------------
 1 file changed, 690 insertions(+), 399 deletions(-)

diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index 13ca230..1a8fd8a 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -32,6 +32,9 @@
   #:use-module (fibers conditions)
   #:use-module (fibers scheduler)
   #:use-module (fibers timers)
+  #:use-module (goblins)
+  #:use-module (goblins actor-lib cell)
+  #:use-module (goblins actor-lib methods)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-9)
   #:use-module (srfi srfi-9 gnu)
@@ -175,6 +178,11 @@
             get-message*                      ;XXX: for lack of a better place
             essential-task-thunk))
 
+(define-syntax-rule (if-on pred b1 b2)
+  (on pred (lambda (true?) (if true? b1 b2)) #:promise? #t))
+
+(define-syntax-rule (match-on expr body body* ...)
+  (on expr (match-lambda body body* ...) #:promise? #t))
 
 (define sleep (@ (fibers) sleep))
 
@@ -390,15 +398,390 @@ Log abnormal termination reported by @var{status}."
 denoting what the service provides."
   (match provision
     (((? symbol?) ..1)
-     (make-service provision requirement one-shot? transient?
-                   respawn? respawn-limit respawn-delay
-                   start stop actions termination-handler
-                   documentation #f))
+     (selfish-spawn ^service
+                    (make-service provision requirement one-shot? transient?
+                                  respawn? respawn-limit respawn-delay
+                                  start stop actions termination-handler
+                                  documentation #f)))
     (_
      (raise (condition
              (&message (message "invalid service provision list")))))))
 
-(define (service-control service)                 ;internal
+(define-actor (^service _bcom self service-record)
+  "Constructor for Goblins actor representing a service"
+  #:frozen
+  (define (pid? obj)
+    "Return #t if @var{obj} looks like a PID"
+    (and (integer? obj) (exact? obj) (> obj 1)))
+
+  (define (update-status-changes status)
+    "Add @var{status} to @var{changes}, the ring buffer of status changes"
+    (ring-buffer-insert (cons status (current-time))
+                        ($ changes)))
+  (define (start)
+    "Attempt to start, blocking if we are already starting.  Return a promise
+resolving to @code{#f} if we are already running or being started; otherwise
+return a promise resolving to a promise resolver to be fulfilled once we 
start."
+    (if ($ enabled?)
+        (match ($ status)
+          ;; Running; return #f
+          ('running #f)
+          ;; Starting; wait until started then return #f
+          ('starting (on ($ started-promise) (lambda _ #f) #:promise? #t))
+          ;; Stopping; wait until stopped then try starting again
+          ('stopping
+           (local-output (l10n "Waiting for ~a to stop...") name)
+           (on ($ stopped-promise) (lambda _ (start))))
+          ;; Start
+          ('stopped
+           (let-values (((notification-promise notification-promise-resolver)
+                         (spawn-promise-values)))
+             (on notification-promise
+                 (lambda (running?)
+                   (if running?
+                       (local-output (l10n "Service ~a started.") name)
+                       (local-output (l10n "Service ~a failed to start.") 
name))
+                   ($ self 'service-started! running?))
+                 #:catch
+                 (lambda (key args)
+                   (report-exception 'start self key args)))
+             (local-output (l10n "Starting service ~a...") name)
+             ($ status 'starting)
+             ($ changes (update-status-changes 'starting))
+             ;; We reset this promise pair here because this
+             ;; is the point at which we are no longer stopped
+             (let-values (((promise resolver)
+                           (spawn-promise-values)))
+               ($ stopped-promise promise)
+               ($ stopped-promise-resolver resolver))
+             notification-promise-resolver)))
+        'disabled))
+
+  (define (stop)
+    "Attempt to stop this service, blocking if it is already being stopped.
+Return a promise resolving to @code{#f} if this service was already running or
+being stopped; otherwise return a promise resolving to a promise resolver to be
+fulfilled when this service stops."
+    (match ($ status)
+      ;; Stopped; return #f
+      ('stopped #f)
+      ;; Stopping; wait until stopped then return #f
+      ('stopping (on ($ stopped-promise) (lambda _ #f) #:promise? #t))
+      ;; Starting; wait until started then try stopping again
+      ('starting
+       (local-output (l10n "Waiting for ~a to start...") name)
+       (on ($ started-promise) (lambda _ (stop))))
+      ;; Stop
+      ('running
+       (let-values (((notification-promise notification-promise-resolver)
+                     (spawn-promise-values)))
+         (on notification-promise
+             (lambda (stopped?)
+               ;; The STOPPED? boolean is supposed to indicate success
+               ;; or failure, but sometimes 'stop' method might return a
+               ;; truth value even though the service was successfully
+               ;; stopped, hence "might have failed" below.
+               (if stopped?
+                   (local-output (l10n "Service ~a stopped.") name)
+                   (local-output
+                    (l10n "Service ~a might have failed to stop.") name))
+               ($ self 'service-stopped! stopped?))
+             #:catch
+             (lambda (key args)
+               (report-exception 'stop self key args)))
+         (local-output (l10n "Stopping service ~a...") name)
+         ($ status 'stopping)
+         ($ changes (update-status-changes 'stopping))
+         ;; We reset this promise pair here because this
+         ;; is the point at which we are no longer running
+         (let-values (((promise resolver)
+                       (spawn-promise-values)))
+           ($ started-promise promise)
+           ($ started-promise-resolver resolver))
+         notification-promise-resolver))))
+
+  (define name (car (service-provision service-record)))
+
+  (define-values (started-promise started-promise-resolver)
+    (let-values ((promise resolver)
+                 (spawn-promise-values))
+      (values
+       (spawn ^cell promise)
+       (spawn ^cell resolver))))
+  (define-values (stopped-promise stopped-promise-resolver)
+    (let-values ((promise resolver)
+                 (spawn-promise-values))
+      (values
+       (spawn ^cell promise)
+       (spawn ^cell resolver))))
+  (define-cell status 'stopped)
+  (define-cell value #f)
+  (define-cell enabled? #t)
+  (define-cell changes                  ;list of status/timestamp pairs
+    (ring-buffer %max-recorded-startup-changes))
+  (define-cell failures                 ; list of timestamps
+    (ring-buffer %max-recorded-startup-failures))
+  (define-cell respawns '())            ; list of timestamps
+  (define-cell exit-statuses
+    (ring-buffer %max-recorded-exist-statuses))
+  (define-cell replacement #f)
+  ;; TODO: port to Goblins
+  (define-cell logger #f)               ;logger actor
+  (define-cell log-file)
+  (methods
+   ;; Immutable state
+   ((canonical-name) name)
+   ((provision) (service-provision service-record))
+   ((requirement) (service-requirement service-record))
+   ((one-shot?) (one-shot-service? service-record))
+   ((transient?) (transient-service service-record))
+   ((respawn?) (respawn-service? service-record))
+   ((respawn-limit) (service-respawn-limit service-record))
+   ((respawn-delay) (service-respawn-delay service-record))
+   ((start-action) (service-start service-record))
+   ((stop-action) (service-stop service-record))
+   ((actions) (service-actions service-record))
+   ((termination-handler) (service-termination-handler service-record))
+   ((documentation) (service-documentation service-record))
+   ((%control) (%service-control service-record))
+   ((%control new-control) (set-service-control! service-record new-control))
+
+   ((running) ($ value))
+   ((status) ($ status))
+   ((enabled?) ($ enabled?))
+   ((respawn-times) ($ respawns))
+   ((startup-failures) ($ failures))
+   ((status-changes) ($ changes))
+   ((exit-statuses) ($ exit-statuses))
+
+   ((enable) ($ enabled? #t))           ;no reply
+   ((disable) ($ enabled? #f))          ;no reply
+
+   ((stopped?) (eq? 'stopped ($ status)))
+   ((running?) (not ($ self 'stopped?)))
+
+   ((start . args)
+    "Start @var{service} and its dependencies, passing @var{args} to its
+@code{start} method.  Return its running value, @code{#f} on failure."
+    ;; Not running; launch
+    (let-on ((problems
+              (all-of* (start-in-parallel (map lookup-service* ($ self 
'requirement))))))
+      (if (pair? problems)
+          (begin
+            (for-each (lambda (problem)
+                        (on (<- problem 'canonical-name)
+                            (lambda (problem-name)
+                              (local-output (l10n "Service ~a depends on ~a.")
+                                            name problem-name))))
+                      problems)
+            #f)
+          ;; Start the service itself.
+          (on (start)
+              (match-lambda
+                ;; Already running
+                (#f ($ value))
+                ;; Disabled
+                ('disabled
+                 (local-output (l10n "Service ~a is currently disabled.") name)
+                 ($ value))
+                ;; Start the service and return its running value
+                ((? procedure? promise-resolver)
+                 (let ((running
+                        (catch #t
+                          (lambda ()
+                            ;; Make sure the 'start' method writes
+                            ;; messages to the right port.
+                            (parameterize ((current-output-port
+                                            (%current-service-output-port))
+                                           (current-error-port
+                                            (%current-service-output-port))
+                                           (current-service self))
+                              (apply ($ self 'start-action) args)))
+                          (lambda (key . args)
+                            (<-np promise-resolver 'break key args)))))
+                   (<-np promise-resolver 'fulfill running)
+                   (local-output (if running
+                                     (l10n "Service ~a has been started.")
+                                     (l10n "Service ~a could not be started."))
+                                 name)
+                   running)))))))
+   ((service-started! new-value)        ;no reply
+    ;; When NEW-VALUE is a procedure, call it to get the actual value and
+    ;; pass it a callback so it can eventually change it.
+    (let ((new-value (cond
+                      ;; This may return true on a promise.  Is that okay?
+                      ((procedure? new-value)
+                       (new-value
+                        (lambda (value)
+                          ($ self 'change-value! value))))
+                      ((pid? new-value) ;backward compatibility
+                       (pid->process new-value))
+                      (else new-value))))
+      (on new-value
+          (lambda (new-value)
+            (when new-value
+              (local-output (l10n "Service ~a running with value ~s.")
+                            name new-value))
+            (when (process? new-value)
+              ;; TODO: service monitoring
+              (monitor-service-process self (process-id new-value)))
+
+            (<-np ($ started-promise-resolver) 'fulfill)
+            (let ((new-status (if (and new-value
+                                       (not ($ self 'one-shot?)))
+                                  'running
+                                  'stopped)))
+              ($ status new-status)
+              ($ value (and (eq? new-value 'running) new-value))
+              ($ changes (update-status-changes new-status))
+              ($ failures (if new-value
+                              failures
+                              (ring-buffer-insert (current-time)
+                                                  ($ failures)))))))))
+   ((change-value! new-value)
+    (let ((new-value (if (pid? new-value) ;backward compatibility
+                         (pid->process new-value)
+                         new-value)))
+      (local-output (l10n "Running value of service ~a changed to ~s.")
+                    name new-value)
+      (when (process? new-value)
+        ;; TODO: service monitoring
+        (monitor-service-process self (process-id new-value)))
+      ($ value new-value)))
+   ;; Stop the service, including services that depend on it.  If the
+   ;; latter fails, continue anyway.  Return `#f' if it could be stopped.
+   ((stop . args)
+    "Stop this service and any service that depends on it.  Return the list of
+names of services that have been stopped (including transitive dependent
+services).
+
+If this service is not running, print a warning and return its canonical name
+in a list."
+    (if ($ self 'stopped?)
+        (begin
+          (local-output (l10n "Service ~a is not running.") name)
+          (list name))
+        (on (all-of*
+             (fold-services
+              (lambda (other acc-promise)
+                (let-on* ((acc acc-promise)
+                          (other-running? (<- other 'running?))
+                          ;; We have to define dependencies here rather than
+                          ;; inline to its use because we're actually getting
+                          ;; and resolving a promise.  If we wanted to get this
+                          ;; value inline, we would need a stacked `on'.
+                          (dependencies (<- other 'requirement))
+                          (required?
+                           (and (find (lambda (dependency)
+                                        (memq dependency ($ self 'provision)))
+                                      dependencies)
+                                #t)))
+                  (if (and other-running? required?)
+                      (on (<- other 'stop)
+                          (lambda (ret-val)
+                            (append ret-val acc))
+                          #:promise? #t)
+                      acc)))
+              '()))
+            (lambda (stopped-dependents)
+              ;; Stop the service itself.
+              (on (stop)
+                  (match-lambda
+                    (#f #f)
+                    ((? procedure? promise-resolver)
+                     (catch #t
+                       (lambda ()
+                         (define stopped?
+                           (parameterize ((current-output-port
+                                           (%current-service-output-port))
+                                          (current-error-port
+                                           (%current-service-output-port))
+                                          (current-service self))
+                             (not (apply ($ self 'stop-action)
+                                         ($ value)
+                                         args))))
+                         (<-np promise-resolver 'fulfill stopped?))
+                       (lambda (key . args)
+                         ;; Special case: 'root' may quit.
+                         (and (eq? root-service self)
+                              (eq? key 'quit)
+                              (apply quit args))
+                         (<-np promise-resolver 'break key args)))))
+                  #:finally
+                  (lambda ()
+                    (when ($ self 'transient?)
+                      (put-message (current-registry-channel)
+                                   `(unregister ,(list service)))
+                      (local-output (l10n "Transient service ~a unregistered.")
+                                    name))
+                    ;; Replace the service with its replacement, if it has one.
+                    (let ((replacement ($ self 'replacement)))
+                      (when replacement
+                        (register-services (list replacement)))
+                      (cons (or replacement self) stopped-dependents))))))))
+   ((service-stopped!)                  ;no reply
+    (local-output (l10n "Service ~a is now stopped.") name)
+    (<-np ($ stopped-promise-resolver) 'fulfill)
+    (when ($ logger) (put-message ($ logger) 'terminate))
+
+    ($ status 'stopped)
+    ($ changes (update-status-changes 'stopped))
+    ($ value #f) ($ logger #f)
+    ($ respawns '())
+    ($ failures (ring-buffer %max-recorded-startup-failures)))
+   ((handle-termination pid exit-status) ;no reply
+    ;; Handle premature termination of this service's process, possibly by
+    ;; respawning it, unless STATUS is 'stopping' or 'stopped' or PID
+    ;; doesn't match VALUE (which happens with notifications of processes
+    ;; terminated while stopping the service or shortly after).
+    (unless (or (memq ($ status) '(stopping stopped))
+                (not (process? ($ value)))
+                (not (= (process-id ($ value)) pid)))
+      (spawn-fiber
+       (lambda ()
+         (false-if-exception
+          (($ self 'termination-handler)
+           service-record ($ value) exit-status))
+         (when ($ logger)
+           (put-message ($ logger) 'terminate))))
+      ($ status 'stopped)
+      ($ changes (update-status-changes 'stopped))
+      ($ exit-statuses
+         (ring-buffer-insert (cons exit-status (current-time))
+                             exit-statuses))
+      ($ value #f) ($ logger #f)))
+   ((record-process-exit-status pid status)
+    ($ exit-statuses
+       (ring-buffer-insert (cons status (current-time))
+                           ($ exit-statuses))))
+   ((record-respawn-time)               ;no reply
+    ($ respawns (cons (current-time) ($ respawns))))
+   ((replace-if-running new-serivce)
+    (if (eq? ($ status) 'running)
+        (begin
+          (local-output (l10n "Recording replacement for ~a.") name)
+          ($ replacement new-service)
+          #t)
+        (begin
+          ($ replacement #f)
+          #f)))
+   ((replacement)
+    ($ replacement))
+   ((register-logger new-logger)        ;no reply
+    (when logger
+      ;; This happens when, for example, the 'start' procedure calls
+      ;; 'fork+exec-command' several times: each call creates a new logger.
+      (local-output (l10n "Registering new logger for ~a.") name)
+      (put-message ($ logger) 'terminate))
+    ($ logger new-logger))
+   ((terminate)                         ;no reply
+    (if (eq? ($ status) 'stopped)
+        (bcom (lambda _ '*service-stopped*))
+        (local-output
+         (l10n "Attempt to terminate controller of ~a in ~a state!")
+         name ($ status))))))
+
+(define (service-control service)       ;internal
   "Return the controlling channel of @var{service}."
   ;; Spawn the controlling fiber lazily, hopefully once Fibers has actually
   ;; been initialized.
@@ -564,24 +947,24 @@ denoting what the service provides."
                               ((pid? new-value)   ;backward compatibility
                                (pid->process new-value))
                               (else new-value))))
-        (when new-value
-          (local-output (l10n "Service ~a running with value ~s.")
-                        (service-canonical-name service) new-value))
-        (when (process? new-value)
-          (monitor-service-process service (process-id new-value)))
-
-        (signal-condition! condition)
-        (let ((new-status (if (and new-value (not (one-shot-service? service)))
-                              'running
-                              'stopped)))
-          (loop (status new-status)
-                (value (and (not (one-shot-service? service)) new-value))
-                (changes (update-status-changes new-status))
-                (condition #f)
-                (failures (if new-value
-                              failures
-                              (ring-buffer-insert (current-time)
-                                                  failures)))))))
+         (when new-value
+           (local-output (l10n "Service ~a running with value ~s.")
+                         (service-canonical-name service) new-value))
+         (when (process? new-value)
+           (monitor-service-process service (process-id new-value)))
+
+         (signal-condition! condition)
+         (let ((new-status (if (and new-value (not (one-shot-service? 
service)))
+                               'running
+                               'stopped)))
+           (loop (status new-status)
+                 (value (and (not (one-shot-service? service)) new-value))
+                 (changes (update-status-changes new-status))
+                 (condition #f)
+                 (failures (if new-value
+                               failures
+                               (ring-buffer-insert (current-time)
+                                                   failures)))))))
 
       (((? change-value-message?) new-value)
        (let ((new-value (if (pid? new-value)      ;backward compatibility
@@ -911,6 +1294,7 @@ symbols)."
   (wait (future-condition future))
   (future-value future))
 
+;; TODO: port
 (define (start-in-parallel services)
   "Start @var{services} in parallel--i.e., without waiting for each one to be
 started before starting the next one.  Return the subset of @var{services}
@@ -921,49 +1305,57 @@ that could not be started."
   ;; its 'start' method is invoked only once.  The hash table maps services to
   ;; futures that eventually contain the service's running value when it was
   ;; started, #f if it failed to start.
+  ;; XXX: In a distributed context, we can't be sure all of our dependent
+  ;; services share this parameter.
+  ;; It may be worthwhile to move the start prevention logic inside the service
+  ;; eg by one-shots becoming something with a no-op start message.
   (parameterize ((%one-shot-services-started
                   (or (%one-shot-services-started)
                       (make-hash-table))))
-    (let ((channel (make-channel)))
-      (for-each (lambda (service)
-                  (spawn-fiber
-                   (lambda ()
-                     (let ((value
-                            (guard (c ((action-runtime-error? c)
-                                       (local-output
-                                        (l10n "Exception caught \
-while starting ~a: ~s")
-                                        (service-canonical-name service)
-                                        (cons (action-runtime-error-key c)
-                                              (action-runtime-error-arguments 
c)))
-                                       #f))
-                              (if (one-shot-service? service)
-                                  (match (hashq-ref 
(%one-shot-services-started)
-                                                    service)
-                                    (#f
-                                     ;; Be the first one to start SERVICE.
-                                     (let ((future (make-future)))
-                                       (hashq-set! (%one-shot-services-started)
-                                                   service future)
-                                       (complete-future! future
-                                                         (start-service 
service))
-                                       (touch future)))
-                                    (future
-                                     ;; SERVICE was already started: wait for
-                                     ;; its running value and return it.
-                                     (touch future)))
-                                  (start-service service)))))
-                       (put-message channel (cons service value))))))
-                services)
-      (let loop ((i (length services))
-                 (failures '()))
-        (if (> i 0)
-            (match (get-message channel)
-              ((service . #f)
-               (loop (- i 1) (cons service failures)))
-              ((_ . _)
-               (loop (- i 1) failures)))
-            failures)))))
+    (on
+     (all-of*
+      (map
+       (lambda (service)
+         (on (<- service 'one-shot?)
+             (lambda (one-shot?)
+               (if one-shot?
+                   (match (hashq-ref (%one-shot-services-started) service)
+                     ;; Start SERVICE
+                     (#f
+                      (let ((promise (on (<- service 'start)
+                                         (lambda (value)
+                                           (cons service value))
+                                         #:promise? #t)))
+                        (hashq-set! (%one-shot-services-started)
+                                    service promise)
+                        promise))
+                     ;; SERVICE was started; return its promise value
+                     (promise promise))
+                   (on (<- service 'start)
+                       (lambda (value)
+                         (cons service value))
+                       #:promise? #t)))
+             #:promise? #t
+             ;; XXX: This has slightly different semantics than the original
+             ;; code, which uses guard with one clause equivalent to the first
+             ;; one here.  Does that matter?
+             #:catch
+             (match-lambda
+               ((? action-runtime-error? c)
+                (let-on ((name (<- service 'canonical-name)))
+                  (local-output
+                   (l10n "Exception caught while starting ~a: ~s") name
+                   (cons (action-runtime-error-key c)
+                         (action-runtime-error-arguments c)))
+                  #f))
+               (err (raise err)))))
+       services))
+     (lambda (started-pairs)
+       (fold (match-lambda
+               (((service . #f) failures) (cons service failures))
+               (((_ . _) acc) failures))
+             '() started-pairs))
+     #:promise? #t)))
 
 (define (lookup-service* name)
   "Like @code{lookup-service}, but raise an exception if @var{name} is not
@@ -973,119 +1365,19 @@ found in the service registry."
     (service service)))
 
 (define (start-service service . args)
-  "Start @var{service} and its dependencies, passing @var{args} to its
-@code{start} method.  Return its running value, @code{#f} on failure."
-  ;; It is not running; go ahead and launch it.
-  (let ((problems
-        ;; Resolve all dependencies.
-        (start-in-parallel (map lookup-service*
-                                 (service-requirement service)))))
-    (if (pair? problems)
-        (begin
-          (for-each (lambda (problem)
-                     (local-output (l10n "Service ~a depends on ~a.")
-                                   (service-canonical-name service)
-                                   (service-canonical-name problem)))
-                    problems)
-          #f)
-        ;; Start the service itself.
-        (let ((reply (make-channel)))
-          (put-message (service-control service) `(start ,reply))
-          (match (get-message reply)
-            (#f
-             ;; We lost the race: SERVICE is already running.
-             (service-running-value service))
-            ('disabled
-             (local-output (l10n "Service ~a is currently disabled.")
-                           (service-canonical-name service))
-             (service-running-value service))
-            ((? channel? notification)
-             ;; We won the race: we're responsible for starting SERVICE
-             ;; and sending its running value on NOTIFICATION.
-             (let ((running
-                    (catch #t
-                      (lambda ()
-                        ;; Make sure the 'start' method writes
-                        ;; messages to the right port.
-                        (parameterize ((current-output-port
-                                        (%current-service-output-port))
-                                       (current-error-port
-                                        (%current-service-output-port))
-                                       (current-service service))
-                          (apply (service-start service) args)))
-                      (lambda (key . args)
-                        (put-message notification #f)
-                        (report-exception 'start service key args)))))
-               (put-message notification running)
-               (local-output (if running
-                                (l10n "Service ~a has been started.")
-                                 (l10n "Service ~a could not be started."))
-                            (service-canonical-name service))
-               running)))))))
+  (<- service 'start args))
 
 (define (required-by? service dependent)
-  "Returns #t if DEPENDENT directly requires SERVICE in order to run.  Returns
-#f otherwise."
-  (and (find (lambda (dependency)
-               (memq dependency (service-provision service)))
-             (service-requirement dependent))
-       #t))
-
-;; Stop the service, including services that depend on it.  If the
-;; latter fails, continue anyway.  Return `#f' if it could be stopped.
+  "Wrapper for @var{service} required-by? message
+
+Type: Service Service -> (Promise Boolean)"
+  (<- service 'required-by? dependent))
+
 (define (stop-service service . args)
-  "Stop @var{service} and any service that depends on it.  Return the list of
-services that have been stopped (including transitive dependent services).
+  "Wrapper for @var{service} stop message
 
-If @var{service} is not running, print a warning and return its canonical name
-in a list."
-  (if (service-stopped? service)
-      (begin
-        (local-output (l10n "Service ~a is not running.")
-                      (service-canonical-name service))
-        (list service))
-      (let ((stopped-dependents
-             (fold-services (lambda (other acc)
-                              (if (and (service-running? other)
-                                       (required-by? service other))
-                                  (append (stop-service other) acc)
-                                  acc))
-                            '())))
-        ;; Stop the service itself.
-        (let ((reply (make-channel)))
-          (put-message (service-control service) `(stop ,reply))
-          (match (get-message reply)
-            (#f
-             #f)
-            ((? channel? notification)
-             (catch #t
-               (lambda ()
-                 (define stopped?
-                   (parameterize ((current-service service))
-                     (not (apply (service-stop service)
-                                 (service-running-value service)
-                                 args))))
-                 (put-message notification stopped?))
-               (lambda (key . args)
-                 ;; Special case: 'root' may quit.
-                 (and (eq? root-service service)
-                      (eq? key 'quit)
-                      (apply quit args))
-                 (put-message notification #f)
-                 (report-exception 'stop service key args))))))
-
-        (when (transient-service? service)
-          (put-message (current-registry-channel)
-                       `(unregister ,(list service)))
-          (local-output (l10n "Transient service ~a unregistered.")
-                        (service-canonical-name service)))
-
-        ;; Replace the service with its replacement, if it has one.
-        (let ((replacement (service-replacement service)))
-          (when replacement
-            (register-services (list replacement)))
-
-          (cons (or replacement service) stopped-dependents)))))
+Type: Service . Any -> (Promise (Listof String))"
+  (<- service 'stop args))
 
 (define (perform-service-action service the-action . args)
   "Perform @var{the-action} (a symbol such as @code{'restart} or 
@code{'status})
@@ -1128,7 +1420,7 @@ the action."
 
   (let ((proc (or (and=> (lookup-service-action service the-action)
                          action-procedure)
-                 default-action)))
+                  default-action)))
     ;; Invoking THE-ACTION is allowed even when the service is not running, as
     ;; it provides generally useful functionality and information.
     (catch #t
@@ -1150,7 +1442,7 @@ the action."
              (apply quit args))
 
         ;; Re-throw service errors that the caller will handle.
-        (cond ((and (eq? key '%exception)         ;Guile 3.x
+        (cond ((and (eq? key '%exception) ;Guile 3.x
                     (service-error? (car args)))
                (raise-exception (car args)))
               (else
@@ -1167,34 +1459,34 @@ the action."
   (if (null? args)
       ;; No further argument given -> Normal level of detail.
       (local-output (service-documentation service))
-    (case (string->symbol (car args)) ;; Does not work with strings.
-      ((full)
-       ;; FIXME
-       (local-output (service-documentation service)))
-      ((short)
-       ;; FIXME
-       (local-output (service-documentation service)))
-      ((action)
-       ;; Display documentation of given actions.
-       (for-each
-       (lambda (the-action)
-          (let ((action-object
-                 (lookup-service-action service (string->symbol the-action))))
-            (unless action-object
-              (raise (condition (&unknown-action-error
-                                 (action the-action)
-                                 (service service)))))
-            (local-output "~a: ~a" the-action
-                          (action-documentation action-object))))
-        (cdr args)))
-      ((list-actions)
-       (for-each (lambda (name)
-                   (let ((action (lookup-service-action service name)))
-                     (display-action-documentation action)))
-                 (service-action-list service)))
-      (else
-       ;; FIXME: Implement doc-help.
-       (local-output (l10n "Unknown keyword.  Try 'herd help'."))))))
+      (case (string->symbol (car args)) ;; Does not work with strings.
+        ((full)
+         ;; FIXME
+         (local-output (service-documentation service)))
+        ((short)
+         ;; FIXME
+         (local-output (service-documentation service)))
+        ((action)
+         ;; Display documentation of given actions.
+         (for-each
+          (lambda (the-action)
+            (let ((action-object
+                   (lookup-service-action service (string->symbol 
the-action))))
+              (unless action-object
+                (raise (condition (&unknown-action-error
+                                   (action the-action)
+                                   (service service)))))
+              (local-output "~a: ~a" the-action
+                            (action-documentation action-object))))
+          (cdr args)))
+        ((list-actions)
+         (for-each (lambda (name)
+                     (let ((action (lookup-service-action service name)))
+                       (display-action-documentation action)))
+                   (service-action-list service)))
+        (else
+         ;; FIXME: Implement doc-help.
+         (local-output (l10n "Unknown keyword.  Try 'herd help'."))))))
 
 (define-record-type-serializer (service->sexp (service <service>))
   "Return a representation of SERVICE as an sexp meant to be consumed by
@@ -1588,43 +1880,43 @@ false."
              (print-exception (current-error-port) #f key args)
              (primitive-exit 1))))
 
-     ;; setgid must be done *before* setuid, otherwise the user will
-     ;; likely no longer have permissions to setgid.
-     (when group
-       (catch #t
-         (lambda ()
-           ;; Clear supplementary groups.
-           (setgroups (format-supplementary-groups supplementary-groups))
-           (setgid (group:gid (getgr group))))
-         (lambda (key . args)
-           (format (current-error-port)
-                   "failed to change to group ~s:~%" group)
-           (print-exception (current-error-port) #f key args)
-           (primitive-exit 1))))
+       ;; setgid must be done *before* setuid, otherwise the user will
+       ;; likely no longer have permissions to setgid.
+       (when group
+         (catch #t
+           (lambda ()
+             ;; Clear supplementary groups.
+             (setgroups (format-supplementary-groups supplementary-groups))
+             (setgid (group:gid (getgr group))))
+           (lambda (key . args)
+             (format (current-error-port)
+                     "failed to change to group ~s:~%" group)
+             (print-exception (current-error-port) #f key args)
+             (primitive-exit 1))))
 
-     (when user
-       (catch #t
-         (lambda ()
-           (setuid (passwd:uid (getpw user))))
-         (lambda (key . args)
-           (format (current-error-port)
-                   "failed to change to user ~s:~%" user)
-           (print-exception (current-error-port) #f key args)
-           (primitive-exit 1))))
+       (when user
+         (catch #t
+           (lambda ()
+             (setuid (passwd:uid (getpw user))))
+           (lambda (key . args)
+             (format (current-error-port)
+                     "failed to change to user ~s:~%" user)
+             (print-exception (current-error-port) #f key args)
+             (primitive-exit 1))))
 
-     (when file-creation-mask
-       (umask file-creation-mask))
+       (when file-creation-mask
+         (umask file-creation-mask))
 
-     (catch 'system-error
-       (lambda ()
-         ;; File descriptors used internally are all marked as close-on-exec,
-         ;; so we can fearlessly go ahead.
-         (apply execlp program program args))
-       (lambda args
-         (format (current-error-port)
-                 "exec of ~s failed: ~a~%"
-                 program (strerror (system-error-errno args)))
-         (primitive-exit 1)))))))
+       (catch 'system-error
+         (lambda ()
+           ;; File descriptors used internally are all marked as close-on-exec,
+           ;; so we can fearlessly go ahead.
+           (apply execlp program program args))
+         (lambda args
+           (format (current-error-port)
+                   "exec of ~s failed: ~a~%"
+                   program (strerror (system-error-errno args)))
+           (primitive-exit 1)))))))
 
 (define %precious-signals
   ;; Signals that the shepherd process handles.
@@ -1966,26 +2258,26 @@ The remaining arguments are as for 
@code{make-forkexec-constructor}."
   (define (spawn-child-service connection server-address client-address)
     (let* ((name    (child-service-name))
            (service (service
-                      (list name)
-                      #:requirement requirements
-                      #:respawn? #f
-                      #:transient? #t
-                      #:start (make-inetd-forkexec-constructor
-                               command connection
-                               #:user user
-                               #:group group
-                               #:supplementary-groups
-                               supplementary-groups
-                               #:directory directory
-                               #:file-creation-mask file-creation-mask
-                               #:create-session? create-session?
-                               #:environment-variables
-                               (append (inetd-variables server-address
-                                                        client-address)
-                                   environment-variables)
-                               #:resource-limits resource-limits)
-                      #:termination-handler handle-child-termination
-                      #:stop (make-kill-destructor))))
+                     (list name)
+                     #:requirement requirements
+                     #:respawn? #f
+                     #:transient? #t
+                     #:start (make-inetd-forkexec-constructor
+                              command connection
+                              #:user user
+                              #:group group
+                              #:supplementary-groups
+                              supplementary-groups
+                              #:directory directory
+                              #:file-creation-mask file-creation-mask
+                              #:create-session? create-session?
+                              #:environment-variables
+                              (append (inetd-variables server-address
+                                                       client-address)
+                                      environment-variables)
+                              #:resource-limits resource-limits)
+                     #:termination-handler handle-child-termination
+                     #:stop (make-kill-destructor))))
       (register-services (list service))
       (start-service service)))
 
@@ -2541,31 +2833,31 @@ then disable it."
            (issue-deprecation-warning
             "Passing 'register-services' services as a rest list is \
 now deprecated."))))
-   (case-lambda
-     ((services)
-      "Register @var{services} so that they can be looked up by name, for 
instance
+    (case-lambda
+      ((services)
+       "Register @var{services} so that they can be looked up by name, for 
instance
 when resolving dependencies.
 
 Each name uniquely identifies one service.  If a service with a given name has
 already been registered, arrange to have it replaced when it is next stopped.
 If it is currently stopped, replace it immediately."
-      (define (register-single-service new)
-        ;; Sanity-checks first.
-        (assert (list-of-symbols? (service-provision new)))
-        (assert (list-of-symbols? (service-requirement new)))
-        (assert (boolean? (respawn-service? new)))
-
-        (put-message (current-registry-channel) `(register ,new)))
-
-      (let ((services (if (service? services)
-                          (begin
-                            (warn-deprecated-form)
-                            (list services))
-                          services)))
-        (for-each register-single-service services)))
-     (services
-      (warn-deprecated-form)
-      (register-services services)))))
+       (define (register-single-service new)
+         ;; Sanity-checks first.
+         (assert (list-of-symbols? (service-provision new)))
+         (assert (list-of-symbols? (service-requirement new)))
+         (assert (boolean? (respawn-service? new)))
+
+         (put-message (current-registry-channel) `(register ,new)))
+
+       (let ((services (if (service? services)
+                           (begin
+                             (warn-deprecated-form)
+                             (list services))
+                           services)))
+         (for-each register-single-service services)))
+      (services
+       (warn-deprecated-form)
+       (register-services services)))))
 
 (define (unregister-services services)
   "Remove all of @var{services} from the registry, stopping them if they are 
not
@@ -2619,10 +2911,10 @@ requested to be removed."
 ;; Test if OBJ is a list that only contains symbols.
 (define (list-of-symbols? obj)
   (cond ((null? obj) #t)
-       ((and (pair? obj)
-             (symbol? (car obj)))
-        (list-of-symbols? (cdr obj)))
-       (else #f)))
+        ((and (pair? obj)
+              (symbol? (car obj)))
+         (list-of-symbols? (cdr obj)))
+        (else #f)))
 
 
 
@@ -2646,7 +2938,7 @@ requested to be removed."
                   (local-output
                    (l10n "Ignoring unknown error while stopping ~a: ~s")
                    (service-canonical-name service) c)))
-        (stop-service service))))
+         (stop-service service))))
    (service-list)))
 
 (define (check-for-dead-services)
@@ -2678,33 +2970,33 @@ where prctl/PR_SET_CHILD_SUBREAPER is unsupported."
 (define root-service
   (service
    '(root shepherd)
-    #:documentation
-    (l10n "The root service is used to operate on shepherd itself.")
-    #:requirement '()
-    #:respawn? #f
-    #:start (lambda args
-             (when (isatty? (current-output-port))
-                (display-version))
-              (lambda (change-value)
-                (add-hook! %post-daemonize-hook
-                           (lambda ()
-                             (change-value (process (getpid) #f))))
-                (process (getpid) #f)))
-    #:stop (lambda (unused . args)
-            (local-output (l10n "Exiting shepherd..."))
-             (shutdown-services)
-            (quit))
-    ;; All actions here need to take care that they do not invoke any
-    ;; user-defined code without catching `quit', since they are
-    ;; allowed to quit, while user-supplied code shouldn't be.
-    #:actions
-    (actions
-     (help
-      "Show the help message for the 'root' service."
-      (lambda _
-        ;; A rudimentary attempt to have 'herd help' return something
-        ;; sensible.
-        (l10n "\
+   #:documentation
+   (l10n "The root service is used to operate on shepherd itself.")
+   #:requirement '()
+   #:respawn? #f
+   #:start (lambda args
+             (when (isatty? (current-output-port))
+               (display-version))
+             (lambda (change-value)
+               (add-hook! %post-daemonize-hook
+                          (lambda ()
+                            (change-value (process (getpid) #f))))
+               (process (getpid) #f)))
+   #:stop (lambda (unused . args)
+            (local-output (l10n "Exiting shepherd..."))
+            (shutdown-services)
+            (quit))
+   ;; All actions here need to take care that they do not invoke any
+   ;; user-defined code without catching `quit', since they are
+   ;; allowed to quit, while user-supplied code shouldn't be.
+   #:actions
+   (actions
+    (help
+     "Show the help message for the 'root' service."
+     (lambda _
+       ;; A rudimentary attempt to have 'herd help' return something
+       ;; sensible.
+       (l10n "\
 This is the help message for the 'root' service of the Shepherd.  The 'root'
 service is used to control the Shepherd itself and it supports several
 actions.  For instance, running 'herd status root' shows information about the
@@ -2713,104 +3005,103 @@ Shepherd, and running 'herd status' returns a summary 
of each service.
 Try 'herd doc root list-actions' to see the list of available actions.
 Run 'info shepherd' to access the user manual.")))
 
-     (status
-      "Return an s-expression showing information about all the services.
+    (status
+     "Return an s-expression showing information about all the services.
 Clients such as 'herd' can read it and format it in a human-readable way."
-      (lambda (running)
-        ;; Return the list of services.
-        (service-list)))
-
-     ;; Halt.
-     (halt
-      "Halt the system."
-      (lambda (running)
-        (catch 'quit
-          (cut stop-service root-service)
-          (lambda (key)
-            (local-output (l10n "Halting..."))
-            (halt)))))
-     ;; Power off.
-     (power-off
-      "Halt the system and turn it off."
-      (lambda (running)
-        (catch 'quit
-          (cut stop-service root-service)
-          (lambda (key)
-            (local-output (l10n "Shutting down..."))
-            (power-off)))))
-
-     (kexec
-      "Reboot the system and run kexec."
-      (lambda (running)
-        (catch 'quit
-          (cut stop-service root-service)
-          (lambda (key)
-            (local-output (l10n "Rebooting with kexec..."))
-            (reboot-kexec)))))
-
-     ;; Evaluate arbitrary code.
-     (load
-      "Load the Scheme code from FILE into shepherd.  This is potentially
+     (lambda (running)
+       ;; Return the list of services.
+       (service-list)))
+
+    ;; Halt.
+    (halt
+     "Halt the system."
+     (lambda (running)
+       (catch 'quit
+         (cut stop-service root-service)
+         (lambda (key)
+           (local-output (l10n "Halting..."))
+           (halt)))))
+    ;; Power off.
+    (power-off
+     "Halt the system and turn it off."
+     (lambda (running)
+       (catch 'quit
+         (cut stop-service root-service)
+         (lambda (key)
+           (local-output (l10n "Shutting down..."))
+           (power-off)))))
+
+    (kexec
+     "Reboot the system and run kexec."
+     (lambda (running)
+       (catch 'quit
+         (cut stop-service root-service)
+         (lambda (key)
+           (local-output (l10n "Rebooting with kexec..."))
+           (reboot-kexec)))))
+
+    ;; Evaluate arbitrary code.
+    (load
+     "Load the Scheme code from FILE into shepherd.  This is potentially
 dangerous.  You have been warned."
-      (lambda (running file-name)
-        (load-config file-name)))
-     (eval
-      "Evaluate the given Scheme expression into the shepherd.  This is
+     (lambda (running file-name)
+       (load-config file-name)))
+    (eval
+     "Evaluate the given Scheme expression into the shepherd.  This is
 potentially dangerous, be careful."
-      (lambda (running str)
-        (let ((exp (call-with-input-string str read)))
-          (local-output (l10n "Evaluating user expression ~a.")
-                        (call-with-output-string
-                          (lambda (port)
-                            (truncated-print exp port #:width 50))))
-          (eval-in-user-module exp))))
-
-     ;; Unload a service
-     (unload
-      "Unload the service identified by SERVICE-NAME or all services
+     (lambda (running str)
+       (let ((exp (call-with-input-string str read)))
+         (local-output (l10n "Evaluating user expression ~a.")
+                       (call-with-output-string
+                         (lambda (port)
+                           (truncated-print exp port #:width 50))))
+         (eval-in-user-module exp))))
+
+    ;; Unload a service
+    (unload
+     "Unload the service identified by SERVICE-NAME or all services
 except for 'root' if SERVICE-NAME is 'all'.  Stop services before
 removing them if needed."
-      (lambda (running service-name)
-        (deregister-service service-name)))
-     (reload
-      "Unload all services, then load from FILE-NAME into shepherd.  This
+     (lambda (running service-name)
+       (deregister-service service-name)))
+    (reload
+     "Unload all services, then load from FILE-NAME into shepherd.  This
 is potentially dangerous.  You have been warned."
-      (lambda (running file-name)
-        (and (deregister-service "all") ; unload all services
-             (load-config file-name)))) ; reload from FILE-NAME
-     ;; Go into the background.
-     (daemonize
-      "Go into the background.  Be careful, this means that a new
+     (lambda (running file-name)
+       (and (deregister-service "all") ; unload all services
+            (load-config file-name)))) ; reload from FILE-NAME
+    ;; Go into the background.
+    (daemonize
+     "Go into the background.  Be careful, this means that a new
 process will be created, so shepherd will not get SIGCHLD signals anymore
 if previously spawned children terminate.  Therefore, this action should
 usually only be used (if at all) *before* children get spawned for which
 we want to receive these signals."
-      (lambda (running)
-        (cond ((= 1 (getpid))
-               (local-output (l10n "Running as PID 1, so not daemonizing.")))
-              ((fold-services (lambda (service found?)
-                                (or found?
-                                    (and (service-running? service)
-                                         (not (eq? service root-service)))))
-                              #f)
-               (local-output
-                (l10n "Services already running, so not daemonizing."))
-               #f)
-              (else
-               (local-output (l10n "Daemonizing..."))
-               (if (zero? (primitive-fork))
-                   (begin
-                     (catch-system-error (prctl PR_SET_CHILD_SUBREAPER 1))
-                     (run-hook %post-daemonize-hook)
-                     (local-output (l10n "Now running as process ~a.")
-                                   (getpid))
-                     #t)
-                   (primitive-exit 0))))))
-     ;; Restart it - that does not make sense, but
-     ;; we're better off by implementing it due to the
-     ;; default action.
-     (restart
-      "This does not work for the 'root' service."
-      (lambda (running)
-       (local-output (l10n "You must be kidding.")))))))
-
+     (lambda (running)
+       (cond ((= 1 (getpid))
+              (local-output (l10n "Running as PID 1, so not daemonizing.")))
+             ((fold-services (lambda (service found?)
+                               (or found?
+                                   (and (service-running? service)
+                                        (not (eq? service root-service)))))
+                             #f)
+              (local-output
+               (l10n "Services already running, so not daemonizing."))
+              #f)
+             (else
+              (local-output (l10n "Daemonizing..."))
+              (if (zero? (primitive-fork))
+                  (begin
+                    (catch-system-error (prctl PR_SET_CHILD_SUBREAPER 1))
+                    (run-hook %post-daemonize-hook)
+                    (local-output (l10n "Now running as process ~a.")
+                                  (getpid))
+                    #t)
+                  (primitive-exit 0))))))
+    ;; Restart it - that does not make sense, but
+    ;; we're better off by implementing it due to the
+    ;; default action.
+    (restart
+     "This does not work for the 'root' service."
+     (lambda (running)
+       (local-output (l10n "You must be kidding.")))))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]