qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 3/3] coroutine: add x86 specific coroutine backe


From: Kevin Wolf
Subject: Re: [Qemu-devel] [PATCH 3/3] coroutine: add x86 specific coroutine backend
Date: Wed, 13 Mar 2019 15:14:26 +0100
User-agent: Mutt/1.11.3 (2019-02-01)

Am 11.03.2019 um 13:35 hat Paolo Bonzini geschrieben:
> This backend is faster (100ns vs 150ns per switch on my laptop), but
> especially it will be possible to add CET support to it in 4.1.  In
> the meanwhile, it is nice to have it as an experimental alternative.
> 
> Signed-off-by: Paolo Bonzini <address@hidden>

Creating a qcow2 image fails for me with a General Protection Fault.

Looks like this is because of a 'movaps %xmm0,(%rsp)' with an unaligned
stack pointer (0x7fffec5f8b78). We need to start with rsp 8 bytes lower
to comply with the calling convention.

> --- /dev/null
> +++ b/util/coroutine-x86.c
> @@ -0,0 +1,213 @@
> +/*
> + * x86-specific coroutine initialization code
> + *
> + * Copyright (C) 2006  Anthony Liguori <address@hidden>
> + * Copyright (C) 2011  Kevin Wolf <address@hidden>
> + * Copyright (C) 2019  Paolo Bonzini <address@hidden>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.0 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see 
> <http://www.gnu.org/licenses/>.
> + */
> +
> +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
> +#ifdef _FORTIFY_SOURCE
> +#undef _FORTIFY_SOURCE
> +#endif

You don't use setjmp/longjmp, so is this really necessary?

> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +#include "qemu/coroutine_int.h"
> +
> +#ifdef CONFIG_VALGRIND_H
> +#include <valgrind/valgrind.h>
> +#endif
> +
> +#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
> +#ifdef CONFIG_ASAN_IFACE_FIBER
> +#define CONFIG_ASAN 1
> +#include <sanitizer/asan_interface.h>
> +#endif
> +#endif
> +
> +typedef struct {
> +    Coroutine base;
> +    void *stack;
> +    size_t stack_size;
> +    void *sp;
> +
> +#ifdef CONFIG_VALGRIND_H
> +    unsigned int valgrind_stack_id;
> +#endif
> +} CoroutineX86;
> +
> +/**
> + * Per-thread coroutine bookkeeping
> + */
> +static __thread CoroutineX86 leader;
> +static __thread Coroutine *current;
> +
> +static void finish_switch_fiber(void *fake_stack_save)
> +{
> +#ifdef CONFIG_ASAN
> +    const void *bottom_old;
> +    size_t size_old;
> +
> +    __sanitizer_finish_switch_fiber(fake_stack_save, &bottom_old, &size_old);
> +
> +    if (!leader.stack) {
> +        leader.stack = (void *)bottom_old;
> +        leader.stack_size = size_old;
> +    }
> +#endif
> +}
> +
> +static void start_switch_fiber(void **fake_stack_save,
> +                               const void *bottom, size_t size)
> +{
> +#ifdef CONFIG_ASAN
> +    __sanitizer_start_switch_fiber(fake_stack_save, bottom, size);
> +#endif
> +}

These two functions are duplicated without changes between ucontext and
x86, and they aren't really backend-specific. Should they be moved to a
place where both backends can share them, like util/qemu-coroutine.c?

> +/* On entry to a coroutine, rax is "value" and rsi is the coroutine itself.  
> */

rax is "action" (not "value"), and the coroutine is rdi (not rsi).

> +#define CO_SWITCH(from, to, action, jump) ({                                 
>            \
> +    int ret = action;                                                        
>            \
> +    void *from_ = from;                                                      
>            \
> +    void *to_ = to;                                                          
>            \
> +    asm volatile(                                                            
>            \
> +        ".cfi_remember_state\n"                                              
>            \
> +        "pushq %%rbp\n"                     /* save scratch register on 
> source stack */ \
> +        ".cfi_adjust_cfa_offset 8\n"                                         
>            \
> +        ".cfi_rel_offset %%rbp, 0\n"                                         
>            \
> +        "call 1f\n"                         /* switch continues at label 1 
> */           \
> +        ".cfi_adjust_cfa_offset 8\n"                                         
>            \
> +        "jmp 2f\n"                          /* switch back continues at 
> label 2 */      \
> +        "1: movq (%%rsp), %%rbp\n"          /* save source IP for debugging 
> */          \
> +        "movq %%rsp, %c[sp](%[FROM])\n"     /* save source SP */             
>            \
> +        "movq %c[sp](%[TO]), %%rsp\n"       /* load destination SP */        
>            \
> +        jump "\n"                           /* coroutine switch */           
>            \
> +        "2:"                                                                 
>            \
> +        ".cfi_adjust_cfa_offset -8\n"                                        
>            \
> +        "popq %%rbp\n"                                                       
>            \
> +        ".cfi_adjust_cfa_offset -8\n"                                        
>            \
> +        ".cfi_restore_state\n"                                               
>            \
> +        : "+a" (ret), [FROM] "+b" (from_), [TO] "+D" (to_)                   
>            \
> +        : [sp] "i" (offsetof(CoroutineX86, sp))                              
>            \
> +        : "rcx", "rdx", "rsi", "r8", "r9", "r10", "r11", "r12", "r13", 
> "r14", "r15",    \
> +          "memory");                                                         
>            \
> +    ret; \
> +})
> +
> +static void __attribute__((__used__)) coroutine_trampoline(void *arg)
> +{
> +    CoroutineX86 *self = arg;
> +    Coroutine *co = &self->base;
> +
> +    finish_switch_fiber(NULL);
> +
> +    while (true) {
> +        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
> +        co->entry(co->entry_arg);

Okay, inverse order because you have a fake entry on creation below...

> +    }
> +}
> +
> +Coroutine *qemu_coroutine_new(void)
> +{
> +    CoroutineX86 *co;
> +    void *fake_stack_save = NULL;
> +
> +    co = g_malloc0(sizeof(*co));
> +    co->stack_size = COROUTINE_STACK_SIZE;
> +    co->stack = qemu_alloc_stack(&co->stack_size);
> +    co->sp = co->stack + co->stack_size;
> +
> +#ifdef CONFIG_VALGRIND_H
> +    co->valgrind_stack_id =
> +        VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
> +#endif
> +
> +    /* Immediately enter the coroutine once to pass it its address as the 
> argument */
> +    co->base.caller = qemu_coroutine_self();
> +    start_switch_fiber(&fake_stack_save, co->stack, co->stack_size);
> +    CO_SWITCH(current, co, 0, "jmp coroutine_trampoline");
> +    finish_switch_fiber(fake_stack_save);
> +    co->base.caller = NULL;

...but why is this necessary? CO_SWITCH() always passes the coroutine in
rdi, not just here, so wouldn't the first real call do this, too?

Ah, I see, because of the 'jmp coroutine_trampoline'. But the comment is
really misleading. Actually, I think the code would become simpler if
you just put the address of coroutine_trampoline on the initial stack
and then have 'ret' unconditionally (see below for a quick attempt at
something to squash in).

> +    return &co->base;
> +}
> +
> +#ifdef CONFIG_VALGRIND_H
> +#if defined(CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE) && !defined(__clang__)
> +/* Work around an unused variable in the valgrind.h macro... */
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
> +#endif
> +static inline void valgrind_stack_deregister(CoroutineX86 *co)
> +{
> +    VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
> +}
> +#if defined(CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE) && !defined(__clang__)
> +#pragma GCC diagnostic pop
> +#endif
> +#endif

Another candidate for sharing instead of duplicating? (You could
trivially pass the valgrind_stack_id instead of the CoroutineX86
object.)

Kevin


diff --git a/util/coroutine-x86.c b/util/coroutine-x86.c
index b7649e7ae1..ff78c298c9 100644
--- a/util/coroutine-x86.c
+++ b/util/coroutine-x86.c
@@ -79,7 +79,7 @@ static void start_switch_fiber(void **fake_stack_save,
 }

 /* On entry to a coroutine, rax is "value" and rsi is the coroutine itself.  */
-#define CO_SWITCH(from, to, action, jump) ({                                   
         \
+#define CO_SWITCH(from, to, action) ({                                         
         \
     int ret = action;                                                          
         \
     void *from_ = from;                                                        
         \
     void *to_ = to;                                                            
         \
@@ -94,7 +94,7 @@ static void start_switch_fiber(void **fake_stack_save,
         "1: movq (%%rsp), %%rbp\n"          /* save source IP for debugging */ 
         \
         "movq %%rsp, %c[sp](%[FROM])\n"     /* save source SP */               
         \
         "movq %c[sp](%[TO]), %%rsp\n"       /* load destination SP */          
         \
-        jump "\n"                           /* coroutine switch */             
         \
+        "ret\n"                             /* coroutine switch */             
         \
         "2:"                                                                   
         \
         ".cfi_adjust_cfa_offset -8\n"                                          
         \
         "popq %%rbp\n"                                                         
         \
@@ -115,15 +115,14 @@ static void __attribute__((__used__)) 
coroutine_trampoline(void *arg)
     finish_switch_fiber(NULL);

     while (true) {
-        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
         co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
     }
 }

 Coroutine *qemu_coroutine_new(void)
 {
     CoroutineX86 *co;
-    void *fake_stack_save = NULL;

     co = g_malloc0(sizeof(*co));
     co->stack_size = COROUTINE_STACK_SIZE;
@@ -135,12 +134,10 @@ Coroutine *qemu_coroutine_new(void)
         VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
 #endif

-    /* Immediately enter the coroutine once to pass it its address as the 
argument */
-    co->base.caller = qemu_coroutine_self();
-    start_switch_fiber(&fake_stack_save, co->stack, co->stack_size);
-    CO_SWITCH(current, co, 0, "jmp coroutine_trampoline");
-    finish_switch_fiber(fake_stack_save);
-    co->base.caller = NULL;
+    /* Put entry point on the stack; 8 more bytes for the stack alignment
+     * required by the calling convention. */
+    co->sp -= 16;
+    *(uint64_t*) co->sp = (uint64_t) coroutine_trampoline;

     return &co->base;
 }
@@ -193,7 +190,7 @@ qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,

     start_switch_fiber(action == COROUTINE_TERMINATE ?
                        NULL : &fake_stack_save, to->stack, to->stack_size);
-    action = CO_SWITCH(from, to, action, "ret");
+    action = CO_SWITCH(from, to, action);
     finish_switch_fiber(fake_stack_save);

     return action;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]