qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU


From: Alex Bennée
Subject: Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU
Date: Fri, 22 Apr 2016 15:47:39 +0100
User-agent: mu4e 0.9.17; emacs 25.0.92.6

Alex Bennée <address@hidden> writes:

> Emilio G. Cota <address@hidden> writes:
>
>> This is a first attempt at making tb_flush not have to stop all CPUs.
>> There are issues as pointed out below, but this could be a good start.
>>
>> Context:
>>   https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04658.html
>>   https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg06942.html
>>
>> Known issues:
>> - Basically compile-tested only, since I've only run this with
>>   single-threaded TCG; I also tried running it with linux-user,
>>   but in order to trigger tb_flush I had to make code_gen_buffer
>>   so small that the CPU calling tb_flush would immediately fill
>>   the 2nd buffer, triggering the assert. If you have a working
>>   multi-threaded workload that would be good to test this, please
>>   let me know.
>
> With my latest mttcg unit tests:
>
> ./arm-softmmu/qemu-system-arm -machine virt,accel=tcg -cpu cortex-a15 \
>   -device virtio-serial-device -device virtconsole,chardev=ctd \
>   -chardev testdev,id=ctd -display none -serial stdio \
>   -kernel arm/tcg-test.flat -smp 4 -tcg mttcg=on \
>   -append "tight smc irq mod=1 rounds=100000"  -name
>   arm,debug-threads=on

Ahh, I just realised you wanted a linux-user workload.

>
>
>> - Windows; not even compile-tested!
>>
>> Signed-off-by: Emilio G. Cota <address@hidden>
>> ---
>>  translate-all.c | 122 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 117 insertions(+), 5 deletions(-)
>>
>> diff --git a/translate-all.c b/translate-all.c
>> index bba9b62..4c14b4d 100644
>> --- a/translate-all.c
>> +++ b/translate-all.c
>> @@ -536,8 +536,13 @@ static inline void *split_cross_256mb(void *buf1, 
>> size_t size1)
>>  #endif
>>
>>  #ifdef USE_STATIC_CODE_GEN_BUFFER
>> -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
>> +static uint8_t static_code_gen_buffer1[DEFAULT_CODE_GEN_BUFFER_SIZE]
>>      __attribute__((aligned(CODE_GEN_ALIGN)));
>> +static uint8_t static_code_gen_buffer2[DEFAULT_CODE_GEN_BUFFER_SIZE]
>> +    __attribute__((aligned(CODE_GEN_ALIGN)));
>> +static int static_buf_mask = 1;
>> +static void *static_buf1;
>> +static void *static_buf2;
>>
>>  # ifdef _WIN32
>>  static inline void do_protect(void *addr, long size, int prot)
>> @@ -580,13 +585,12 @@ static inline void map_none(void *addr, long size)
>>  }
>>  # endif /* WIN32 */
>>
>> -static inline void *alloc_code_gen_buffer(void)
>> +static void *alloc_static_code_gen_buffer(void *buf)
>>  {
>> -    void *buf = static_code_gen_buffer;
>>      size_t full_size, size;
>>
>>      /* The size of the buffer, rounded down to end on a page boundary.  */
>> -    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
>> +    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer1))
>>                   & qemu_real_host_page_mask) - (uintptr_t)buf;
>>
>>      /* Reserve a guard page.  */
>> @@ -612,6 +616,15 @@ static inline void *alloc_code_gen_buffer(void)
>>
>>      return buf;
>>  }
>> +
>> +static inline void *alloc_code_gen_buffer(void)
>> +{
>> +    static_buf1 = alloc_static_code_gen_buffer(static_code_gen_buffer1);
>> +    static_buf2 = alloc_static_code_gen_buffer(static_code_gen_buffer2);
>> +
>> +    assert(static_buf_mask == 1);
>> +    return static_buf1;
>> +}
>>  #elif defined(_WIN32)
>>  static inline void *alloc_code_gen_buffer(void)
>>  {
>> @@ -829,8 +842,100 @@ static void page_flush_tb(void)
>>      }
>>  }
>>
>> +#ifdef USE_STATIC_CODE_GEN_BUFFER
>> +
>> +struct code_gen_desc {
>> +    struct rcu_head rcu;
>> +    int clear_bit;
>> +};
>> +
>> +static void code_gen_buffer_clear(struct rcu_head *rcu)
>> +{
>> +    struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, 
>> rcu);
>> +
>> +    tb_lock();
>> +    static_buf_mask &= ~desc->clear_bit;
>> +    tb_unlock();
>> +    g_free(desc);
>> +}
>> +
>> +static void *code_gen_buffer_replace(void)
>> +{
>> +    struct code_gen_desc *desc = g_malloc0(sizeof(*desc));
>> +
>> +    /*
>> +     * If both bits are set, we're having two concurrent flushes. This
>> +     * can easily happen if the buffers are heavily undersized.
>> +     */
>> +    assert(static_buf_mask == 1 || static_buf_mask == 2);
>> +
>> +    desc->clear_bit = static_buf_mask;
>> +    call_rcu1(&desc->rcu, code_gen_buffer_clear);
>> +
>> +    if (static_buf_mask == 1) {
>> +        static_buf_mask |= 2;
>> +        return static_buf2;
>> +    }
>> +    static_buf_mask |= 1;
>> +    return static_buf1;
>> +}
>> +
>> +#elif defined(_WIN32)
>> +
>> +struct code_gen_desc {
>> +    struct rcu_head rcu;
>> +    void *buf;
>> +};
>> +
>> +static void code_gen_buffer_vfree(struct rcu_head *rcu)
>> +{
>> +    struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, 
>> rcu);
>> +
>> +    VirtualFree(desc->buf, 0, MEM_RELEASE);
>> +    g_free(desc);
>> +}
>> +
>> +static void *code_gen_buffer_replace(void)
>> +{
>> +    struct code_gen_desc *desc;
>> +
>> +    desc = g_malloc0(sizeof(*desc));
>> +    desc->buf = tcg_ctx.code_gen_buffer;
>> +    call_rcu1(&desc->rcu, code_gen_buffer_vfree);
>> +
>> +    return alloc_code_gen_buffer();
>> +}
>> +
>> +#else /* UNIX, dynamically-allocated code buffer */
>> +
>> +struct code_gen_desc {
>> +    struct rcu_head rcu;
>> +    void *buf;
>> +    size_t size;
>> +};
>> +
>> +static void code_gen_buffer_unmap(struct rcu_head *rcu)
>> +{
>> +    struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, 
>> rcu);
>> +
>> +    munmap(desc->buf, desc->size + qemu_real_host_page_size);
>> +    g_free(desc);
>> +}
>> +
>> +static void *code_gen_buffer_replace(void)
>> +{
>> +    struct code_gen_desc *desc;
>> +
>> +    desc = g_malloc0(sizeof(*desc));
>> +    desc->buf = tcg_ctx.code_gen_buffer;
>> +    desc->size = tcg_ctx.code_gen_buffer_size;
>> +    call_rcu1(&desc->rcu, code_gen_buffer_unmap);
>> +
>> +    return alloc_code_gen_buffer();
>> +}
>> +#endif /* USE_STATIC_CODE_GEN_BUFFER */
>> +
>>  /* flush all the translation blocks */
>> -/* XXX: tb_flush is currently not thread safe */
>>  void tb_flush(CPUState *cpu)
>>  {
>>  #if defined(DEBUG_FLUSH)
>> @@ -853,10 +958,17 @@ void tb_flush(CPUState *cpu)
>>      qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
>>      page_flush_tb();
>>
>> +    tcg_ctx.code_gen_buffer = code_gen_buffer_replace();
>>      tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
>> +    tcg_prologue_init(&tcg_ctx);
>>      /* XXX: flush processor icache at this point if cache flush is
>>         expensive */
>>      tcg_ctx.tb_ctx.tb_flush_count++;
>> +
>> +    /* exit all CPUs so that the old buffer is quickly cleared. */
>> +    CPU_FOREACH(cpu) {
>> +        cpu_exit(cpu);
>> +    }
>>  }
>>
>>  #ifdef DEBUG_TB_CHECK


--
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]