[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU |
Date: |
Fri, 22 Apr 2016 15:47:39 +0100 |
User-agent: |
mu4e 0.9.17; emacs 25.0.92.6 |
Alex Bennée <address@hidden> writes:
> Emilio G. Cota <address@hidden> writes:
>
>> This is a first attempt at making tb_flush not have to stop all CPUs.
>> There are issues as pointed out below, but this could be a good start.
>>
>> Context:
>> https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04658.html
>> https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg06942.html
>>
>> Known issues:
>> - Basically compile-tested only, since I've only run this with
>> single-threaded TCG; I also tried running it with linux-user,
>> but in order to trigger tb_flush I had to make code_gen_buffer
>> so small that the CPU calling tb_flush would immediately fill
>> the 2nd buffer, triggering the assert. If you have a working
>> multi-threaded workload that would be good to test this, please
>> let me know.
>
> With my latest mttcg unit tests:
>
> ./arm-softmmu/qemu-system-arm -machine virt,accel=tcg -cpu cortex-a15 \
> -device virtio-serial-device -device virtconsole,chardev=ctd \
> -chardev testdev,id=ctd -display none -serial stdio \
> -kernel arm/tcg-test.flat -smp 4 -tcg mttcg=on \
> -append "tight smc irq mod=1 rounds=100000" -name
> arm,debug-threads=on
Ahh, I just realised you wanted a linux-user workload.
>
>
>> - Windows; not even compile-tested!
>>
>> Signed-off-by: Emilio G. Cota <address@hidden>
>> ---
>> translate-all.c | 122
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 117 insertions(+), 5 deletions(-)
>>
>> diff --git a/translate-all.c b/translate-all.c
>> index bba9b62..4c14b4d 100644
>> --- a/translate-all.c
>> +++ b/translate-all.c
>> @@ -536,8 +536,13 @@ static inline void *split_cross_256mb(void *buf1,
>> size_t size1)
>> #endif
>>
>> #ifdef USE_STATIC_CODE_GEN_BUFFER
>> -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
>> +static uint8_t static_code_gen_buffer1[DEFAULT_CODE_GEN_BUFFER_SIZE]
>> __attribute__((aligned(CODE_GEN_ALIGN)));
>> +static uint8_t static_code_gen_buffer2[DEFAULT_CODE_GEN_BUFFER_SIZE]
>> + __attribute__((aligned(CODE_GEN_ALIGN)));
>> +static int static_buf_mask = 1;
>> +static void *static_buf1;
>> +static void *static_buf2;
>>
>> # ifdef _WIN32
>> static inline void do_protect(void *addr, long size, int prot)
>> @@ -580,13 +585,12 @@ static inline void map_none(void *addr, long size)
>> }
>> # endif /* WIN32 */
>>
>> -static inline void *alloc_code_gen_buffer(void)
>> +static void *alloc_static_code_gen_buffer(void *buf)
>> {
>> - void *buf = static_code_gen_buffer;
>> size_t full_size, size;
>>
>> /* The size of the buffer, rounded down to end on a page boundary. */
>> - full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
>> + full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer1))
>> & qemu_real_host_page_mask) - (uintptr_t)buf;
>>
>> /* Reserve a guard page. */
>> @@ -612,6 +616,15 @@ static inline void *alloc_code_gen_buffer(void)
>>
>> return buf;
>> }
>> +
>> +static inline void *alloc_code_gen_buffer(void)
>> +{
>> + static_buf1 = alloc_static_code_gen_buffer(static_code_gen_buffer1);
>> + static_buf2 = alloc_static_code_gen_buffer(static_code_gen_buffer2);
>> +
>> + assert(static_buf_mask == 1);
>> + return static_buf1;
>> +}
>> #elif defined(_WIN32)
>> static inline void *alloc_code_gen_buffer(void)
>> {
>> @@ -829,8 +842,100 @@ static void page_flush_tb(void)
>> }
>> }
>>
>> +#ifdef USE_STATIC_CODE_GEN_BUFFER
>> +
>> +struct code_gen_desc {
>> + struct rcu_head rcu;
>> + int clear_bit;
>> +};
>> +
>> +static void code_gen_buffer_clear(struct rcu_head *rcu)
>> +{
>> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc,
>> rcu);
>> +
>> + tb_lock();
>> + static_buf_mask &= ~desc->clear_bit;
>> + tb_unlock();
>> + g_free(desc);
>> +}
>> +
>> +static void *code_gen_buffer_replace(void)
>> +{
>> + struct code_gen_desc *desc = g_malloc0(sizeof(*desc));
>> +
>> + /*
>> + * If both bits are set, we're having two concurrent flushes. This
>> + * can easily happen if the buffers are heavily undersized.
>> + */
>> + assert(static_buf_mask == 1 || static_buf_mask == 2);
>> +
>> + desc->clear_bit = static_buf_mask;
>> + call_rcu1(&desc->rcu, code_gen_buffer_clear);
>> +
>> + if (static_buf_mask == 1) {
>> + static_buf_mask |= 2;
>> + return static_buf2;
>> + }
>> + static_buf_mask |= 1;
>> + return static_buf1;
>> +}
>> +
>> +#elif defined(_WIN32)
>> +
>> +struct code_gen_desc {
>> + struct rcu_head rcu;
>> + void *buf;
>> +};
>> +
>> +static void code_gen_buffer_vfree(struct rcu_head *rcu)
>> +{
>> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc,
>> rcu);
>> +
>> + VirtualFree(desc->buf, 0, MEM_RELEASE);
>> + g_free(desc);
>> +}
>> +
>> +static void *code_gen_buffer_replace(void)
>> +{
>> + struct code_gen_desc *desc;
>> +
>> + desc = g_malloc0(sizeof(*desc));
>> + desc->buf = tcg_ctx.code_gen_buffer;
>> + call_rcu1(&desc->rcu, code_gen_buffer_vfree);
>> +
>> + return alloc_code_gen_buffer();
>> +}
>> +
>> +#else /* UNIX, dynamically-allocated code buffer */
>> +
>> +struct code_gen_desc {
>> + struct rcu_head rcu;
>> + void *buf;
>> + size_t size;
>> +};
>> +
>> +static void code_gen_buffer_unmap(struct rcu_head *rcu)
>> +{
>> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc,
>> rcu);
>> +
>> + munmap(desc->buf, desc->size + qemu_real_host_page_size);
>> + g_free(desc);
>> +}
>> +
>> +static void *code_gen_buffer_replace(void)
>> +{
>> + struct code_gen_desc *desc;
>> +
>> + desc = g_malloc0(sizeof(*desc));
>> + desc->buf = tcg_ctx.code_gen_buffer;
>> + desc->size = tcg_ctx.code_gen_buffer_size;
>> + call_rcu1(&desc->rcu, code_gen_buffer_unmap);
>> +
>> + return alloc_code_gen_buffer();
>> +}
>> +#endif /* USE_STATIC_CODE_GEN_BUFFER */
>> +
>> /* flush all the translation blocks */
>> -/* XXX: tb_flush is currently not thread safe */
>> void tb_flush(CPUState *cpu)
>> {
>> #if defined(DEBUG_FLUSH)
>> @@ -853,10 +958,17 @@ void tb_flush(CPUState *cpu)
>> qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
>> page_flush_tb();
>>
>> + tcg_ctx.code_gen_buffer = code_gen_buffer_replace();
>> tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
>> + tcg_prologue_init(&tcg_ctx);
>> /* XXX: flush processor icache at this point if cache flush is
>> expensive */
>> tcg_ctx.tb_ctx.tb_flush_count++;
>> +
>> + /* exit all CPUs so that the old buffer is quickly cleared. */
>> + CPU_FOREACH(cpu) {
>> + cpu_exit(cpu);
>> + }
>> }
>>
>> #ifdef DEBUG_TB_CHECK
--
Alex Bennée
- [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/21
- Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/22
- Re: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/22
- [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/23
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/24
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/25
- Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/25
- [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Emilio G. Cota, 2016/04/25
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Richard Henderson, 2016/04/26
- Re: [Qemu-devel] [RFC v3] translate-all: protect code_gen_buffer with RCU, Alex Bennée, 2016/04/26