[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 2/2] guest-agent: preserve online/offline state
From: |
Laszlo Ersek |
Subject: |
Re: [Qemu-devel] [PATCH 2/2] guest-agent: preserve online/offline state of VCPUs on guest reboot |
Date: |
Mon, 06 Oct 2014 11:46:12 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.1.2 |
On 10/06/14 09:44, Igor Mammedov wrote:
> Fixes issue when CPU was offlined via libvirt using command:
> virsh setvcpus --guest myguest NR_CPUS
> but it became onlined again after guest was rebooted.
>
> Fix issue by storing current state of CPUs online state
> on persistent storage when GA is being stopped and restore
> it when it's started at system boot time.
>
> Signed-off-by: Igor Mammedov <address@hidden>
> ---
> qga/main.c | 102
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 102 insertions(+)
>
> diff --git a/qga/main.c b/qga/main.c
> index 5afba01..1173ca9 100644
> --- a/qga/main.c
> +++ b/qga/main.c
> @@ -32,6 +32,7 @@
> #include "qapi/qmp/dispatch.h"
> #include "qga/channel.h"
> #include "qemu/bswap.h"
> +#include "qga-qmp-commands.h"
> #ifdef _WIN32
> #include "qga/service-win32.h"
> #include "qga/vss-win32.h"
> @@ -57,6 +58,7 @@
> #define QGA_FSFREEZE_HOOK_DEFAULT CONFIG_QEMU_CONFDIR "/fsfreeze-hook"
> #endif
> #define QGA_SENTINEL_BYTE 0xFF
> +#define QGA_CPU_STATE_GROUP "CPU online states"
>
> static struct {
> const char *state_dir;
> @@ -66,6 +68,7 @@ static struct {
> typedef struct GAPersistentState {
> #define QGA_PSTATE_DEFAULT_FD_COUNTER 1000
> int64_t fd_counter;
> + GuestLogicalProcessorList *vcpus;
> } GAPersistentState;
>
> struct GAState {
> @@ -770,6 +773,40 @@ static void
> persistent_state_from_keyfile(GAPersistentState *pstate,
> pstate->fd_counter =
> g_key_file_get_integer(keyfile, "global", "fd_counter", NULL);
> }
> +
> + if (g_key_file_has_group(keyfile, QGA_CPU_STATE_GROUP)) {
> + bool state;
> + char **cpu_id_str;
> + GuestLogicalProcessor *vcpu;
> + GuestLogicalProcessorList *entry;
> + GuestLogicalProcessorList *head, **link;
> + char **cpus_list = g_key_file_get_keys(keyfile, QGA_CPU_STATE_GROUP,
> + NULL, NULL);
> +
> + head = NULL;
> + link = &head;
> + for (cpu_id_str = cpus_list; *cpu_id_str; ++cpu_id_str) {
> + state = g_key_file_get_boolean(keyfile, QGA_CPU_STATE_GROUP,
> + *cpu_id_str, NULL);
> +
> + vcpu = g_malloc0(sizeof *vcpu);
> + vcpu->logical_id = g_ascii_strtoll(*cpu_id_str, NULL, 0);
> + vcpu->online = state;
> + vcpu->has_can_offline = true;
> +
> + entry = g_malloc0(sizeof *entry);
> + entry->value = vcpu;
> +
> + *link = entry;
> + link = &entry->next;
> + }
> + pstate->vcpus = head;
> +
> + g_strfreev(cpus_list);
> + }
> + if (pstate->vcpus == NULL) {
> + pstate->vcpus = qmp_guest_get_vcpus(NULL);
> + }
> }
>
> static void persistent_state_to_keyfile(const GAPersistentState *pstate,
> @@ -779,6 +816,27 @@ static void persistent_state_to_keyfile(const
> GAPersistentState *pstate,
> g_assert(keyfile);
>
> g_key_file_set_integer(keyfile, "global", "fd_counter",
> pstate->fd_counter);
> +
> + if (pstate->vcpus) {
> + GuestLogicalProcessorList *vcpus = pstate->vcpus;
> +
> + while (vcpus != NULL) {
> + char *logical_id;
> + GuestLogicalProcessor *vcpu = vcpus->value;
> +
> + if (vcpu->can_offline == false) {
> + vcpus = vcpus->next;
> + continue;
> + }
> +
> + logical_id = g_strdup_printf("%" PRId64, vcpu->logical_id);
> + g_key_file_set_boolean(keyfile, QGA_CPU_STATE_GROUP,
> + logical_id, vcpu->online);
> + g_free(logical_id);
> +
> + vcpus = vcpus->next;
> + }
> + }
> }
>
> static gboolean write_persistent_state(const GAPersistentState *pstate,
> @@ -820,6 +878,47 @@ out:
> return ret;
> }
>
> +static void ga_clean_vcpu_pstate(GAPersistentState *pstate)
> +{
> + if (pstate->vcpus) {
> + qapi_free_GuestLogicalProcessorList(pstate->vcpus);
> + pstate->vcpus = NULL;
> + }
> +}
> +
> +static void ga_clean_pstate(GAPersistentState *pstate)
> +{
> + ga_clean_vcpu_pstate(pstate);
> +}
> +
> +#if defined(__linux__)
> +
> +static void ga_update_vcpu_pstate(GAPersistentState *pstate, const char
> *path)
> +{
> + Error *local_err = NULL;
> +
> + ga_clean_vcpu_pstate(pstate);
> + pstate->vcpus = qmp_guest_get_vcpus(&local_err);
> +
> + if (local_err != NULL) {
> + g_critical("%s", error_get_pretty(local_err));
> + error_free(local_err);
> + return;
> + }
> +
> + if (!write_persistent_state(pstate, path)) {
> + g_critical("failed to commit CPU persistent state to disk");
> + }
> +}
> +
> +#else
> +
> +static void ga_update_vcpu_pstate(GAPersistentState *pstate, const char
> *path)
> +{
> +}
> +
> +#endif
> +
> static gboolean read_persistent_state(GAPersistentState *pstate,
> const gchar *path, gboolean frozen)
> {
> @@ -878,6 +977,7 @@ static gboolean read_persistent_state(GAPersistentState
> *pstate,
> }
>
> persistent_state_from_keyfile(pstate, keyfile);
> + qmp_guest_set_vcpus(pstate->vcpus, &error_abort);
>
> out:
> if (keyfile) {
> @@ -1185,6 +1285,8 @@ int main(int argc, char **argv)
>
> ga_command_state_cleanup_all(ga_state->command_state);
> ga_channel_free(ga_state->channel);
> + ga_update_vcpu_pstate(&ga_state->pstate, ga_state->pstate_filepath);
> + ga_clean_pstate(&ga_state->pstate);
>
> if (daemonize) {
> unlink(pid_filepath);
>
I think this information should be held in *libvirtd*.
The current complaint is:
- admin starts VM with 4 VCPUs
- disables 2 of them with the in-guest online/offline interface
(because hot-unplug doesn't work yet fully)
- guest is rebooted and comes up with 4 VCPUs again
- admin is annoyed
And the patch tries to address this.
Suppose it's committed. The next complaint will be:
- admin starts VM with 4 VCPUs
- disables 2 of them with the in-guest online/offline interface
- guest is shut down fully
- admin modifies guest config --> 6 VCPUs
- admin boots VM
- guest agent does "something" to the VCPU state from the inside, right
at startup (ie. offlining a few VCPUs)
- admin is annoyed
I think this info should be kept in *libvirtd* (the domain config,
actually). Libvirtd already has all the VCPU related information. A
statement of the form
please always use "virsh setvcpus --guest" *for this domain*
is just another bit that pertains to the same set of information.
IOW, "--guest" is just an "access method", and what we want to make
permanent here is *that*. The number of online VCPUs, as a *preference*,
shouldn't be duplicated inside the guest; libvirt already has that
information. What we should do instead is inform libvirt persistently
that it always should use "--guest", when massaging VCPU status, in
*this* particular guest.
Importantly, I'm not arguing that '--guest' should be the indiscriminate
default -- I'm aware that it's a security risk; see
<https://bugzilla.redhat.com/show_bug.cgi?id=1127403#c1>.
(I apologize to non-RedHatters for referencing a non-public BZ here, but
it's closed due to customer details in it. The gist is that "anything
that requires guest interaction is a security risk on an untrusted
guest, and therefore must be opt-in instead of default" -- Eric's words.)
I agree that '--guest' should be opt-in, in general. However, the admin
should be able to opt-in *persistently* for a *specific* domain.
When the --guest preference is captured persistently in the domain XML
(alongside the preferred online VCPU count), then libvirtd can (could)
effect the exact desired VCPU count as soon as the agent comes up inside
the guest and connects.
Multi-mastering the desired VCPU count just calls for trouble.
Thanks,
Laszlo