qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC v6 01/14] exec.c: Add new exclusive bitmap to ram_


From: alvise rigo
Subject: Re: [Qemu-devel] [RFC v6 01/14] exec.c: Add new exclusive bitmap to ram_list
Date: Fri, 18 Dec 2015 14:47:50 +0100

On Fri, Dec 18, 2015 at 2:18 PM, Alex Bennée <address@hidden> wrote:
>
> Alvise Rigo <address@hidden> writes:
>
>> The purpose of this new bitmap is to flag the memory pages that are in
>> the middle of LL/SC operations (after a LL, before a SC) on a per-vCPU
>> basis.
>> For all these pages, the corresponding TLB entries will be generated
>> in such a way to force the slow-path if at least one vCPU has the bit
>> not set.
>> When the system starts, the whole memory is dirty (all the bitmap is
>> set). A page, after being marked as exclusively-clean, will be
>> restored as dirty after the SC.
>>
>> For each page we keep 8 bits to be shared among all the vCPUs available
>> in the system. In general, the to the vCPU n correspond the bit n % 8.
>>
>> Suggested-by: Jani Kokkonen <address@hidden>
>> Suggested-by: Claudio Fontana <address@hidden>
>> Signed-off-by: Alvise Rigo <address@hidden>
>> ---
>>  exec.c                  |  8 ++++--
>>  include/exec/memory.h   |  3 +-
>>  include/exec/ram_addr.h | 76 
>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>  3 files changed, 84 insertions(+), 3 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index 0bf0a6e..e66d232 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -1548,11 +1548,15 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, 
>> Error **errp)
>>          int i;
>>
>>          /* ram_list.dirty_memory[] is protected by the iothread lock.  */
>> -        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
>> +        for (i = 0; i < DIRTY_MEMORY_EXCLUSIVE; i++) {
>>              ram_list.dirty_memory[i] =
>>                  bitmap_zero_extend(ram_list.dirty_memory[i],
>>                                     old_ram_size, new_ram_size);
>> -       }
>> +        }
>> +        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE] = bitmap_zero_extend(
>> +                ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
>> +                old_ram_size * EXCL_BITMAP_CELL_SZ,
>> +                new_ram_size * EXCL_BITMAP_CELL_SZ);
>>      }
>
> I'm wondering is old/new_ram_size should be renamed to
> old/new_ram_pages?

Yes, I think it make more sense.

>
> So as I understand it we have created a bitmap which has
> EXCL_BITMAP_CELL_SZ bits per page.
>
>>      cpu_physical_memory_set_dirty_range(new_block->offset,
>>                                          new_block->used_length,
>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>> index 0f07159..2782c77 100644
>> --- a/include/exec/memory.h
>> +++ b/include/exec/memory.h
>> @@ -19,7 +19,8 @@
>>  #define DIRTY_MEMORY_VGA       0
>>  #define DIRTY_MEMORY_CODE      1
>>  #define DIRTY_MEMORY_MIGRATION 2
>> -#define DIRTY_MEMORY_NUM       3        /* num of dirty bits */
>> +#define DIRTY_MEMORY_EXCLUSIVE 3
>> +#define DIRTY_MEMORY_NUM       4        /* num of dirty bits */
>>
>>  #include <stdint.h>
>>  #include <stdbool.h>
>> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
>> index 7115154..b48af27 100644
>> --- a/include/exec/ram_addr.h
>> +++ b/include/exec/ram_addr.h
>> @@ -21,6 +21,7 @@
>>
>>  #ifndef CONFIG_USER_ONLY
>>  #include "hw/xen/xen.h"
>> +#include "sysemu/sysemu.h"
>>
>>  struct RAMBlock {
>>      struct rcu_head rcu;
>> @@ -82,6 +83,13 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, 
>> Error **errp);
>>  #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
>>  #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << 
>> DIRTY_MEMORY_CODE))
>>
>> +/* Exclusive bitmap support. */
>> +#define EXCL_BITMAP_CELL_SZ 8
>> +#define EXCL_BITMAP_GET_BIT_OFFSET(addr) \
>> +        (EXCL_BITMAP_CELL_SZ * (addr >> TARGET_PAGE_BITS))
>> +#define EXCL_BITMAP_GET_BYTE_OFFSET(addr) (addr >> TARGET_PAGE_BITS)
>> +#define EXCL_IDX(cpu) (cpu % EXCL_BITMAP_CELL_SZ)
>> +
>
> I think some of the explanation of what CELL_SZ means from your commit
> message needs to go here.

OK.

>
>>  static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
>>                                                   ram_addr_t length,
>>                                                   unsigned client)
>> @@ -173,6 +181,11 @@ static inline void 
>> cpu_physical_memory_set_dirty_range(ram_addr_t start,
>>      if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
>>          bitmap_set_atomic(d[DIRTY_MEMORY_CODE], page, end - page);
>>      }
>> +    if (unlikely(mask & (1 << DIRTY_MEMORY_EXCLUSIVE))) {
>> +        bitmap_set_atomic(d[DIRTY_MEMORY_EXCLUSIVE],
>> +                        page * EXCL_BITMAP_CELL_SZ,
>> +                        (end - page) * EXCL_BITMAP_CELL_SZ);
>> +    }
>>      xen_modified_memory(start, length);
>>  }
>>
>> @@ -288,5 +301,68 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned 
>> long *dest,
>>  }
>>
>>  void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
>> +
>> +/* One cell for each page. The n-th bit of a cell describes all the i-th 
>> vCPUs
>> + * such that (i % EXCL_BITMAP_CELL_SZ) == n.
>> + * A bit set to zero ensures that all the vCPUs described by the bit have 
>> the
>> + * EXCL_BIT set for the page. */
>> +static inline void cpu_physical_memory_unset_excl(ram_addr_t addr, uint32_t 
>> cpu)
>> +{
>> +    set_bit_atomic(EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu),
>> +            ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
>> +}
>> +
>> +/* Return true if there is at least one cpu with the EXCL bit set for the 
>> page
>> + * of @addr. */
>> +static inline int cpu_physical_memory_atleast_one_excl(ram_addr_t addr)
>> +{
>> +    uint8_t *bitmap;
>> +
>> +    bitmap = (uint8_t *)(ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE]);
>> +
>> +    /* This is safe even if smp_cpus < 8 since the unused bits are always 
>> 1. */
>> +    return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] != UCHAR_MAX;
>> +}
>
> So I'm getting a little confused as to why we need EXCL_BITMAP_CELL_SZ
> bits rather that just one bit shared amongst the CPUs. What's so special
> about 8 if we could say have a 16 cpu system?

Here the ideal solution would be to have one bit per CPU, but since
this is too expensive, we group them in cluster of 8.
Of course the value 8 could be changed, but I would not go much higher
since DIRTY_MEMORY_EXCLUSIVE is already 8 times the size of the other
bitmaps.

>
> Ultimately if any page has an exclusive operation going on all vCPUs are
> forced down the slow-path for that pages access? Where is the benefit in
> splitting that bitfield across these vCPUs if the only test is "is at
> least one vCPU doing an excl right now?".

Clustering the CPUs in this way, we make the flushing procedure a bit
more efficient since we perform TLB flushes at a cluster granularity.
In practice, when handling a LL operation we query the flush only to
those clusters with the bit "dirty".

Regards,
avise

>
>> +
>> +/* Return true if the @cpu has the bit set (not exclusive) for the page of
>> + * @addr.  If @cpu == smp_cpus return true if at least one vCPU has the 
>> dirty
>> + * bit set for that page. */
>> +static inline int cpu_physical_memory_not_excl(ram_addr_t addr,
>> +                                               unsigned long cpu)
>> +{
>> +    uint8_t *bitmap;
>> +
>> +    bitmap = (uint8_t *)ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE];
>> +
>> +    if (cpu == smp_cpus) {
>> +        if (smp_cpus >= EXCL_BITMAP_CELL_SZ) {
>> +            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)];
>> +        } else {
>> +            return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] &
>> +                                            ((1 << smp_cpus) - 1);
>> +        }
>> +    } else {
>> +        return bitmap[EXCL_BITMAP_GET_BYTE_OFFSET(addr)] & (1 << 
>> EXCL_IDX(cpu));
>> +    }
>> +}
>> +
>> +/* Clean the dirty bit of @cpu (i.e. set the page as exclusive). If @cpu ==
>> + * smp_cpus clean the dirty bit for all the vCPUs. */
>> +static inline int cpu_physical_memory_set_excl(ram_addr_t addr, uint32_t 
>> cpu)
>> +{
>> +    if (cpu == smp_cpus) {
>> +        int nr = (smp_cpus >= EXCL_BITMAP_CELL_SZ) ?
>> +                            EXCL_BITMAP_CELL_SZ : smp_cpus;
>> +
>> +        return bitmap_test_and_clear_atomic(
>> +                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
>> +                        EXCL_BITMAP_GET_BIT_OFFSET(addr), nr);
>> +    } else {
>> +        return bitmap_test_and_clear_atomic(
>> +                        ram_list.dirty_memory[DIRTY_MEMORY_EXCLUSIVE],
>> +                        EXCL_BITMAP_GET_BIT_OFFSET(addr) + EXCL_IDX(cpu), 
>> 1);
>> +    }
>> +}
>> +
>>  #endif
>>  #endif
>
> Maybe this will get clearer as I read on but at the moment the bitfield
> split confuses me.
>
> --
> Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]