From dc8bdf692d3802f87aa5b13a244771e1707c1a1a Mon Sep 17 00:00:00 2001 From: "B. Wilson" Date: Tue, 20 Apr 2021 11:49:26 +0900 Subject: [PATCH] gnu: Add rasdaemon. To: guix-patches@gnu.org * gnu/packages/linux.scm (rasdaemon): New variable. * gnu/services/linux.scm (rasdaemon-configuration) (rasdaemon-configuration?, rasdaemon-configuration-record?) (rasdaemon-service-type): New variables. * doc/guix.texi (Linux Services): Document it. --- doc/guix.texi | 81 ++++++++++++++++++++++++++++++++++++++++++ gnu/packages/linux.scm | 45 +++++++++++++++++++++++ gnu/services/linux.scm | 49 +++++++++++++++++++++++++ 3 files changed, 175 insertions(+) diff --git a/doc/guix.texi b/doc/guix.texi index 58bcfbdbb5..a80ad02223 100644 --- a/doc/guix.texi +++ b/doc/guix.texi @@ -88,6 +88,7 @@ Copyright @copyright{} 2020 John Soo@* Copyright @copyright{} 2020 Jonathan Brielmaier@* Copyright @copyright{} 2020 Edgar Vincent@* Copyright @copyright{} 2021 Maxime Devos@* +Copyright @copyright{} 2021 B. Wilson@* Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or @@ -31457,6 +31458,86 @@ parameters, can be done as follow: @end lisp @end deffn +@cindex rasdaemon +@cindex Platform Reliability, Availability and Serviceability daemon +@subsubheading Rasdaemon Service + +The Rasdaemon service provides a daemon which monitors the platform Reliablity, +Availability and Serviceability (RAS) reports from the Linux kernel trace +events, logging them in @file{/var/log/rasdaemon.log}. + +Reliability, Availability and Serviceability is a concept used on servers meant +to measure their robustness. + +@strong{Relability} is the probability that a system will produce correct +outputs: + +@itemize @bullet +@item Generally measured as Mean Time Between Failures (MTBF), and +@item Enhanced by features that help to avoid, detect and repair hardware. +faults +@end itemize + +@strong{Availability} is the probability that a system is operational at a +given time: + +@itemize @bullet +@item Generally measured as a percentage of downtime per a period of time, and +@item Often uses mechanisms to detect and correct hardware faults in runtime. +@end itemize + +@strong{Serviceability} is the simplicity and speed with which a system can be +repaired or maintained: + +@itemize @bullet +@item Generally measured on Mean Time Between Repair (MTBR). +@end itemize + + +Among the monitoring measures, the most usual ones include: + +@itemize @bullet +@item CPU – detect errors at instruction execution and at L1/L2/L3 caches; +@item Memory – add error correction logic (ECC) to detect and correct errors; +@item I/O – add CRC checksums for transferred data; +@item Storage – RAID, journal file systems, checksums, Self-Monitoring, +Analysis and Reporting Technology (SMART). +@end itemize + +By monitoring the number of occurrences of error detections, it is possible to +identify if the probability of hardware errors is increasing, and, on such +case, do a preventive maintenance to replace a degraded component while those +errors are correctable. + +For detailed information about the types of error events gathered and how to +make sense of them, see the kernel administrator's guide at +@url{https://www.kernel.org/doc/html/latest/admin-guide/ras.html}. + +@defvr {Scheme Variable} rasdaemon-service-type +Service type for the @command{rasdaemon} service. It accepts a +@code{rasdaemon-configuration} object. Instantiating like + +@lisp +(service rasdaemon-service-type) +@end lisp + +will load with a default configuration, which monitors all events and logs to +@file{/var/log/rasdaemon.log}. +@end defvr + +@deftp {Data Type} rasdaemon-configuration +The data type representing the configuration of @command{rasdaemon}. + +@table @asis +@item @code{record?} (default: @code{#f}) + +A boolean indicating whether to record the events in an SQLite database. This +provides a more structured access to the information contained in the log file. +The database location is hard-coded to @file{/var/lib/rasdaemon/ras-mc_event.db}. + +@end table +@end deftp + @cindex zram @cindex compressed swap @cindex Compressed RAM-based block devices diff --git a/gnu/packages/linux.scm b/gnu/packages/linux.scm index 1ea9d80834..0384ae03df 100644 --- a/gnu/packages/linux.scm +++ b/gnu/packages/linux.scm @@ -53,6 +53,7 @@ ;;; Copyright © 2020 Zhu Zihao ;;; Copyright © 2020 David Dashyan ;;; Copyright © 2020 pukkamustard +;;; Copyright © 2021 B. Wilson ;;; ;;; This file is part of GNU Guix. ;;; @@ -130,6 +131,7 @@ #:use-module (gnu packages sdl) #:use-module (gnu packages serialization) #:use-module (gnu packages slang) + #:use-module (gnu packages sqlite) #:use-module (gnu packages texinfo) #:use-module (gnu packages tls) #:use-module (gnu packages valgrind) @@ -8037,3 +8039,46 @@ kernel side implementation.") read-only file system optimized for resource-scarce devices. This package provides user-space tools for creating EROFS file systems.") (license license:gpl2+))) + +(define-public rasdaemon + (package + (name "rasdaemon") + (version "0.6.6") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/mchehab/rasdaemon") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "13g39x19lfjf9izdcb0nlyfjrgpliivhv4nw3ndgyzi59l3yqc0v")))) + (native-inputs `(("autoconf" ,autoconf) + ("automake" ,automake) + ("libtool" ,libtool))) + (inputs `(("sqlite" ,sqlite))) + (arguments + `(#:configure-flags '("--enable-all" + "--localstatedir=/var") + #:phases + (modify-phases %standard-phases + (add-before 'configure 'munge-autotools + (lambda _ + ;; For some reason upstream forces sysconfdir=/etc. This results + ;; in EPERM during the install phase. Removing the offending + ;; line lets sysconfdir correctly pick up DESTDIR. + (substitute* "configure.ac" + (("^test .* sysconfdir=/etc\n$") "")) + ;; Upstream tries to create /var/lib/rasdaemon at install time. + ;; This results in EPERM on guix. Instead, the service should + ;; create this at activation time. + (substitute* "Makefile.am" + (("^\\s*\\$\\(install_sh\\) -d .*@RASSTATEDIR@.*$") ""))))))) + (build-system gnu-build-system) + (home-page "https://github.com/mchehab/rasdaemon") + (synopsis "Platform Reliability, Availability and Serviceability tools") + (description "The @code{rasdaemon} program is a daemon which monitors the +platform Reliablity, Availability and Serviceability (RAS) reports from the +Linux kernel trace events. These trace events are logged in +/sys/kernel/debug/tracing, reporting them via syslog/journald.") + (license (list license:gpl2 license:gpl2+ license:lgpl2.1)))) diff --git a/gnu/services/linux.scm b/gnu/services/linux.scm index 340b330030..5ecc9bdf25 100644 --- a/gnu/services/linux.scm +++ b/gnu/services/linux.scm @@ -3,6 +3,7 @@ ;;; Copyright © 2020 Brice Waegeneire ;;; Copyright © 2020 Efraim Flashner ;;; Copyright © 2021 raid5atemyhomework +;;; Copyright © 2021 B. Wilson ;;; ;;; This file is part of GNU Guix. ;;; @@ -47,6 +48,11 @@ kernel-module-loader-service-type + rasdaemon-configuration + rasdaemon-configuration? + rasdaemon-configuration-record? + rasdaemon-service-type + zram-device-configuration zram-device-configuration? zram-device-configuration-size @@ -188,6 +194,49 @@ representation." (extend append) (default-value '()))) + +;;; +;;; Reliability, Availability, and Serviceability (RAS) daemon +;;; + +(define-record-type* + rasdaemon-configuration make-rasdaemon-configuration + rasdaemon-configuration? + (record? rasdaemon-configuration-record? (default #f))) + +(define (rasdaemon-configuration->command-line-args config) + "Translate to its command line arguments + representation" + (let ((record? (rasdaemon-configuration-record? config))) + `(,(file-append rasdaemon "/sbin/rasdaemon") + "--foreground" ,@(if record? '("--record") '())))) + +(define (rasdaemon-activation config) + (let ((record? (rasdaemon-configuration-record? config)) + (rasdaemon-dir "/var/lib/rasdaemon")) + (with-imported-modules '((guix build utils)) + #~(if #$record? (mkdir-p #$rasdaemon-dir))))) + +(define (rasdaemon-shepherd-service config) + (shepherd-service + (documentation "Run rasdaemon") + (provision '(rasdaemon)) + (start #~(make-forkexec-constructor + '#$(rasdaemon-configuration->command-line-args config) + #:log-file "/var/log/rasdaemon.log")) + (stop #~(make-kill-destructor)))) + +(define rasdaemon-service-type + (service-type + (name 'rasdaemon) + (default-value (rasdaemon-configuration)) + (extensions + (list (service-extension shepherd-root-service-type + (compose list rasdaemon-shepherd-service)) + (service-extension activation-service-type rasdaemon-activation))) + (compose concatenate) + (description "Run @command{rasdaemon}, the RAS monitor"))) + ;;; ;;; Kernel module loader. -- 2.31.1