[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu
From: |
liu ping fan |
Subject: |
Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style |
Date: |
Tue, 14 May 2013 13:47:42 +0800 |
On Mon, May 13, 2013 at 5:31 PM, Paolo Bonzini <address@hidden> wrote:
> Il 13/05/2013 05:21, Liu Ping Fan ha scritto:
>> From: Liu Ping Fan <address@hidden>
>>
>> Each address space listener has PhysPageMap *cur_map, *next_map,
>> the switch from cur_map to next_map complete the RCU style. The
>> mem_commit() do the switch, and it is against reader but AddressSpace's
>> lock or later RCU mechanism (around address_space_translate() ).
>>
>> Signed-off-by: Liu Ping Fan <address@hidden>
>> ---
>> exec.c | 36 +++++++++++++++++++++++++++---------
>> include/exec/memory-internal.h | 11 ++++++++++-
>> 2 files changed, 37 insertions(+), 10 deletions(-)
>>
>> diff --git a/exec.c b/exec.c
>> index bb4e540..e5871d6 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -186,24 +186,26 @@ static void phys_page_set(AddressSpaceDispatch *d,
>> hwaddr index, hwaddr nb,
>> uint16_t leaf)
>> {
>> + PhysPageMap *map = d->next_map;
>> /* Wildly overreserve - it doesn't matter much. */
>> phys_map_node_reserve(3 * P_L2_LEVELS);
>>
>> - phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
>> + phys_page_set_level(&map->root, &index, &nb, leaf, P_L2_LEVELS - 1);
>> }
>>
>> static PhysSection *phys_section_find(AddressSpaceDispatch *d,
>> hwaddr index)
>> {
>> - PhysPageEntry lp = d->phys_map;
>> PhysPageEntry *p;
>> - PhysSection *phys_sections = cur_pgtbl->phys_sections;
>> - Node *phys_map_nodes = cur_pgtbl->phys_map_nodes;
>> + PhysPageEntry lp = d->cur_map->root;
>> + PhysPageTable *pgtbl = d->cur_map->pgtbl;
>> + PhysSection *phys_sections = pgtbl->phys_sections;
>> + Node *phys_map_nodes = pgtbl->phys_map_nodes;
>> int i;
>>
>> for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
>> if (lp.ptr == PHYS_MAP_NODE_NIL) {
>> - return &phys_sections[cur_pgtbl->phys_section_unassigned];
>> + return &phys_sections[pgtbl->phys_section_unassigned];
>> }
>> p = phys_map_nodes[lp.ptr];
>> lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
>> @@ -234,7 +236,7 @@ MemoryRegionSection
>> *address_space_translate(AddressSpace *as, hwaddr addr,
>> IOMMUTLBEntry iotlb;
>> MemoryRegionSection *section;
>> hwaddr len = *plen;
>> -
>> + PhysPageTable *pgtbl = cur_pgtbl;
>
> d->cur_map->pgtbl.
>
>> for (;;) {
>> section = address_space_lookup_region(as, addr);
>>
>> @@ -254,7 +256,7 @@ MemoryRegionSection
>> *address_space_translate(AddressSpace *as, hwaddr addr,
>> | (addr & iotlb.addr_mask));
>> len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
>> if (!iotlb.perm[is_write]) {
>> - section =
>> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section;
>> + section =
>> &pgtbl->phys_sections[pgtbl->phys_section_unassigned].section;
>> break;
>> }
>>
>> @@ -1703,7 +1705,21 @@ static void mem_begin(MemoryListener *listener)
>> {
>> AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch,
>> listener);
>>
>> - d->phys_map.ptr = PHYS_MAP_NODE_NIL;
>> + d->next_map = g_new0(PhysPageMap, 1);
>> + d->next_map->pgtbl = next_pgtbl;
>> +}
>> +
>> +static void mem_commit(MemoryListener *listener)
>> +{
>> + AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch,
>> listener);
>> + PhysPageMap *m = d->cur_map;
>> +
>> + d->cur_map = d->next_map;
>> + /* Fixme, Currently, we rely on biglock or address-space lock against
>> + * reader. So here, we can safely drop it.
>> + * After RCU, should change to call_rcu()
>> + */
>> + g_free(m);
>> }
>>
>> static void core_begin(MemoryListener *listener)
>> @@ -1771,11 +1787,12 @@ void address_space_init_dispatch(AddressSpace *as)
>> {
>> AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
>>
>> - d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0
>> };
>> + d->cur_map = g_new0(PhysPageMap, 1);
>> d->listener = (MemoryListener) {
>> .begin = mem_begin,
>> .region_add = mem_add,
>> .region_nop = mem_add,
>> + .commit = mem_commit,
>> .priority = 0,
>> };
>> as->dispatch = d;
>> @@ -1787,6 +1804,7 @@ void address_space_destroy_dispatch(AddressSpace *as)
>> AddressSpaceDispatch *d = as->dispatch;
>>
>> memory_listener_unregister(&d->listener);
>> + g_free(d->cur_map);
>> g_free(d);
>> as->dispatch = NULL;
>> }
>> diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h
>> index 1b156fd..0dfe260 100644
>> --- a/include/exec/memory-internal.h
>> +++ b/include/exec/memory-internal.h
>> @@ -30,13 +30,22 @@ struct PhysPageEntry {
>> uint16_t ptr : 15;
>> };
>>
>> +struct PhysPageTable;
>> +typedef struct PhysPageMap PhysPageMap;
>> +
>> +struct PhysPageMap {
>> + PhysPageEntry root;
>> + struct PhysPageTable *pgtbl;
>
> cur_pgtbl should be introduced in patch 1 already.
>
PhysPageMap is a member of each AddressSpaceDispatch. And we achieve
RCU based on PhysPageMap *cur_map=next_map. While
cur_pgtbl/next_pgtbl are shared by all AddressSpaceDispatch. And the
out-date one will be reclaimed after all AddressSpaceDispatch have
finish its RCU critical section.
Maybe I did not lay out the patches correctly, but I originally want
to split it out to distinguish the shared object by all
AddressSpaceDispatch and their own object.
>> +};
>> +
>> typedef struct AddressSpaceDispatch AddressSpaceDispatch;
>>
>> struct AddressSpaceDispatch {
>> /* This is a multi-level map on the physical address space.
>> * The bottom level has pointers to MemoryRegionSections.
>> */
>> - PhysPageEntry phys_map;
>> + PhysPageMap *cur_map;
>> + PhysPageMap *next_map;
>
> Pointers are quite expensive here. With RCU we can fetch a consistent
> root/table pair like this:
>
> rcu_read_lock();
> do {
> pgtbl = d->cur_pgtbl;
> smp_rmb();
> root = d->cur_root;
>
> /* RCU ensures that d->cur_pgtbl remains alive, thus it cannot
> * be recycled while this loop is running. If
> * d->cur_pgtbl == pgtbl, the root is the right one for this
> * pgtable.
> */
> smp_rmb();
> } while (d->cur_pgtbl == pgtbl);
> ...
> rcu_read_unlock();
>
It seems to break the semantic of rcu_dereference() and rcu_assign().
If pointers are expensive, how about this:
if (unlikely(d->prev_map!=d->cur_map)) {
d->root = d->cur_map->root;
d->pgtbl = d->cur_map->root;
d->prev_map = d->cur_map;
}
So usually, we use cache value.
Thanks and regards,
Pingfan
> Remember to have a matching smp_wmb() in mem_commit, and to write ->root
> first:
>
> old_pgtbl = d->cur_pgtbl;
> smp_wmb();
> d->cur_root = d->next_root;
>
> /* Write the root before updating the page table. */
> smp_wmb();
> d->cur_pgtbl = d->next_pgtbl;
>
> /* Write cur_pgtbl before possibly destroying the old one. */
> smp_mb();
> page_table_unref(old_pgtbl); /* uses call_rcu if --refcount == 0 */
>
> If you are renaming fields, please do it as the first step.
>
> Paolo
>
>> MemoryListener listener;
>> };
>>
>>
>
[Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Liu Ping Fan, 2013/05/12
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/13
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style,
liu ping fan <=
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/14
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, liu ping fan, 2013/05/14
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/15
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, liu ping fan, 2013/05/15
- Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/15
Re: [Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Paolo Bonzini, 2013/05/16