[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nod
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU |
Date: |
Mon, 13 May 2013 11:20:59 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130311 Thunderbird/17.0.4 |
Il 13/05/2013 05:21, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <address@hidden>
>
> Now, each AddressSpaceDispatch has its own radix-tree, and all of them
> lie on phys_section[] and phys_map_nodes[]. When we want lockless
> mmio dispatch, we need something like RCU.
>
> Acheive this with PhysPageTable which contains all of the info for all
> radix trees. After all address space listeners update (ie. excluding the
> readers) we switch from PhysPageTable *cur_pgtbl to *next_pgtbl.
> (The real RCU style is adopt by listener, see next patch)
>
> Signed-off-by: Liu Ping Fan <address@hidden>
> ---
> exec.c | 197
> +++++++++++++++++++++++++------------------------
> include/exec/memory.h | 2 +
> memory.c | 2 +
> 3 files changed, 106 insertions(+), 95 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index c5f8082..bb4e540 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -80,23 +80,34 @@ int use_icount;
> #if !defined(CONFIG_USER_ONLY)
>
> #define SUBSECTION_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
> -#define PHYS_SECTION_ID(psection) ((psection) - phys_sections)
> +#define PHYS_SECTION_ID(psection, base) ((psection) - base->phys_sections)
>
> typedef struct PhysSection {
> MemoryRegionSection section;
> uint16_t *sub_section;
> } PhysSection;
>
> -static PhysSection *phys_sections;
> -static unsigned phys_sections_nb, phys_sections_nb_alloc;
> -static uint16_t phys_section_unassigned;
> -static uint16_t phys_section_notdirty;
> -static uint16_t phys_section_rom;
> -static uint16_t phys_section_watch;
> +typedef PhysPageEntry Node[L2_SIZE];
>
> -/* Simple allocator for PhysPageEntry nodes */
> -static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
> -static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
> +typedef struct PhysPageTable PhysPageTable;
> +
> +struct PhysPageTable {
> + int ref;
> + PhysSection *phys_sections;
> + unsigned phys_sections_nb;
> + unsigned phys_sections_nb_alloc;
> + uint16_t phys_section_unassigned;
> + uint16_t phys_section_notdirty;
> + uint16_t phys_section_rom;
> + uint16_t phys_section_watch;
These four should be constants. Please make them #defines and add
assertions that they have the expected values (in a separate patch).
> +
> + Node *phys_map_nodes;
> + unsigned phys_map_nodes_nb;
> + unsigned phys_map_nodes_nb_alloc;
> +};
> +static PhysPageTable *cur_pgtbl;
> +static PhysPageTable *next_pgtbl;
You shouldn't need cur_pgtbl. Instead, each AddressSpaceDispatch should
have a pointer to its own cur_pgtbl. In the commit hook you can then
take a lock, unref the old table, assign cur_pgtbl = next_pgtbl and ref
the new one.
The lock must also be taken in address_space_translate, together with a
relatively expensive ref/unref pair (two atomic operations). We
probably need real RCU so that we can skip the ref/unref.
>
> #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>
> @@ -111,13 +122,13 @@ static MemoryRegion io_mem_watch;
>
> static void phys_map_node_reserve(unsigned nodes)
> {
> - if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
> + if (next_pgtbl->phys_map_nodes_nb + nodes >
> next_pgtbl->phys_map_nodes_nb_alloc) {
> typedef PhysPageEntry Node[L2_SIZE];
> - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
> - phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
> - phys_map_nodes_nb + nodes);
> - phys_map_nodes = g_renew(Node, phys_map_nodes,
> - phys_map_nodes_nb_alloc);
> + next_pgtbl->phys_map_nodes_nb_alloc =
> MAX(next_pgtbl->phys_map_nodes_nb_alloc * 2, 16);
> + next_pgtbl->phys_map_nodes_nb_alloc =
> MAX(next_pgtbl->phys_map_nodes_nb_alloc,
> + next_pgtbl->phys_map_nodes_nb + nodes);
> + next_pgtbl->phys_map_nodes = g_renew(Node,
> next_pgtbl->phys_map_nodes,
> + next_pgtbl->phys_map_nodes_nb_alloc);
> }
> }
>
> @@ -126,22 +137,16 @@ static uint16_t phys_map_node_alloc(void)
> unsigned i;
> uint16_t ret;
>
> - ret = phys_map_nodes_nb++;
> + ret = next_pgtbl->phys_map_nodes_nb++;
> assert(ret != PHYS_MAP_NODE_NIL);
> - assert(ret != phys_map_nodes_nb_alloc);
> + assert(ret != next_pgtbl->phys_map_nodes_nb_alloc);
> for (i = 0; i < L2_SIZE; ++i) {
> - phys_map_nodes[ret][i].is_leaf = 0;
> - phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> + next_pgtbl->phys_map_nodes[ret][i].is_leaf = 0;
> + next_pgtbl->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
> }
> return ret;
> }
>
> -static void phys_map_nodes_reset(void)
> -{
> - phys_map_nodes_nb = 0;
> -}
> -
> -
> static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
> hwaddr *nb, uint16_t leaf,
> int level)
> @@ -152,15 +157,15 @@ static void phys_page_set_level(PhysPageEntry *lp,
> hwaddr *index,
>
> if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
> lp->ptr = phys_map_node_alloc();
> - p = phys_map_nodes[lp->ptr];
> + p = next_pgtbl->phys_map_nodes[lp->ptr];
> if (level == 0) {
> for (i = 0; i < L2_SIZE; i++) {
> p[i].is_leaf = 1;
> - p[i].ptr = phys_section_unassigned;
> + p[i].ptr = next_pgtbl->phys_section_unassigned;
> }
> }
> } else {
> - p = phys_map_nodes[lp->ptr];
> + p = next_pgtbl->phys_map_nodes[lp->ptr];
> }
> lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
>
> @@ -192,11 +197,13 @@ static PhysSection
> *phys_section_find(AddressSpaceDispatch *d,
> {
> PhysPageEntry lp = d->phys_map;
> PhysPageEntry *p;
> + PhysSection *phys_sections = cur_pgtbl->phys_sections;
> + Node *phys_map_nodes = cur_pgtbl->phys_map_nodes;
> int i;
>
> for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
> if (lp.ptr == PHYS_MAP_NODE_NIL) {
> - return &phys_sections[phys_section_unassigned];
> + return &phys_sections[cur_pgtbl->phys_section_unassigned];
> }
> p = phys_map_nodes[lp.ptr];
> lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
> @@ -209,6 +216,7 @@ static MemoryRegionSection
> *address_space_lookup_region(AddressSpace *as,
> {
> PhysSection *psection;
> uint16_t idx;
> + PhysSection *phys_sections = cur_pgtbl->phys_sections;
>
> psection = phys_section_find(as->dispatch, addr >> TARGET_PAGE_BITS);
> if (psection->sub_section) {
> @@ -246,7 +254,7 @@ MemoryRegionSection *address_space_translate(AddressSpace
> *as, hwaddr addr,
> | (addr & iotlb.addr_mask));
> len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
> if (!iotlb.perm[is_write]) {
> - section = &phys_sections[phys_section_unassigned].section;
> + section =
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section;
> break;
> }
>
> @@ -690,12 +698,12 @@ hwaddr memory_region_section_get_iotlb(CPUArchState
> *env,
> iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
> + xlat;
> if (!section->readonly) {
> - iotlb |= phys_section_notdirty;
> + iotlb |= cur_pgtbl->phys_section_notdirty;
> } else {
> - iotlb |= phys_section_rom;
> + iotlb |= cur_pgtbl->phys_section_rom;
> }
> } else {
> - iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section));
> + iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section),
> cur_pgtbl);
> iotlb += xlat;
> }
>
> @@ -705,7 +713,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
> if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
> /* Avoid trapping reads of pages with a write breakpoint. */
> if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
> - iotlb = phys_section_watch + paddr;
> + iotlb = cur_pgtbl->phys_section_watch + paddr;
> *address |= TLB_MMIO;
> break;
> }
> @@ -721,59 +729,40 @@ static int subsection_register(PhysSection *psection,
> uint32_t start,
> uint32_t end, uint16_t section);
> static void subsections_init(PhysSection *psection);
>
> -static void destroy_page_desc(uint16_t section_index)
> +/* Call after all listener has been commit.
> + * we do not walk over tree, just simply drop.
> + */
> +static void destroy_pagetable(PhysPageTable *pgtbl)
> {
> - g_free(phys_sections[section_index].sub_section);
> -}
> -
> -static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
> -{
> - unsigned i;
> - PhysPageEntry *p;
> + int i;
>
> - if (lp->ptr == PHYS_MAP_NODE_NIL) {
> - return;
> - }
> + g_free(pgtbl->phys_map_nodes);
>
> - p = phys_map_nodes[lp->ptr];
> - for (i = 0; i < L2_SIZE; ++i) {
> - if (!p[i].is_leaf) {
> - destroy_l2_mapping(&p[i], level - 1);
> + for (i = 0; i < pgtbl->phys_sections_nb_alloc; i++) {
> + if (pgtbl->phys_sections[i].sub_section) {
> + g_free(pgtbl->phys_sections[i].sub_section);
> } else {
> - destroy_page_desc(p[i].ptr);
> + memory_region_unref(pgtbl->phys_sections[i].section.mr);
> }
> }
> - lp->is_leaf = 0;
> - lp->ptr = PHYS_MAP_NODE_NIL;
> -}
> + g_free(pgtbl->phys_sections);
>
> -static void destroy_all_mappings(AddressSpaceDispatch *d)
> -{
> - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
> - phys_map_nodes_reset();
> + g_free(pgtbl);
> }
This should be a separate patch.
> -static uint16_t phys_section_add(MemoryRegionSection *section)
> +static uint16_t phys_section_add(MemoryRegionSection *section, PhysPageTable
> *pgtbl)
phys_section_add will always add to next_pgtbl.
> {
> - assert(phys_sections_nb < TARGET_PAGE_SIZE);
> + assert(pgtbl->phys_sections_nb < TARGET_PAGE_SIZE);
>
> - if (phys_sections_nb == phys_sections_nb_alloc) {
> - phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
> - phys_sections = g_renew(PhysSection, phys_sections,
> - phys_sections_nb_alloc);
> + if (pgtbl->phys_sections_nb == pgtbl->phys_sections_nb_alloc) {
> + pgtbl->phys_sections_nb_alloc = MAX(pgtbl->phys_sections_nb_alloc *
> 2, 16);
> + pgtbl->phys_sections = g_renew(PhysSection, pgtbl->phys_sections,
> + pgtbl->phys_sections_nb_alloc);
> }
> - phys_sections[phys_sections_nb].section = *section;
> - phys_sections[phys_sections_nb].sub_section = NULL;
> + pgtbl->phys_sections[pgtbl->phys_sections_nb].section = *section;
> + pgtbl->phys_sections[pgtbl->phys_sections_nb].sub_section = NULL;
> memory_region_ref(section->mr);
> - return phys_sections_nb++;
> -}
> -
> -static void phys_sections_clear(void)
> -{
> - while (phys_sections_nb > 0) {
> - PhysSection *phys_section = &phys_sections[--phys_sections_nb];
> - memory_region_unref(phys_section->section.mr);
> - }
> + return pgtbl->phys_sections_nb++;
> }
>
> static void register_subsection(AddressSpaceDispatch *d,
> @@ -793,18 +782,18 @@ static void register_subsection(AddressSpaceDispatch *d,
> psection->section.mr == &io_mem_unassigned);
>
> if (!psection->sub_section) {
> - new_section = phys_section_add(&subsection);
> - psection = &phys_sections[new_section];
> + new_section = phys_section_add(&subsection, next_pgtbl);
> + psection = &next_pgtbl->phys_sections[new_section];
> subsections_init(psection);
> phys_page_set(d, base >> TARGET_PAGE_BITS, 1, new_section);
> } else {
> - new_section = PHYS_SECTION_ID(psection);
> + new_section = PHYS_SECTION_ID(psection, next_pgtbl);
> }
>
> - new_subsection = phys_section_add(section);
> + new_subsection = phys_section_add(section, next_pgtbl);
>
> /* phys_section_add invalidates psection, reload it */
> - psection = &phys_sections[new_section];
> + psection = &next_pgtbl->phys_sections[new_section];
> start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
> end = start + section->size - 1;
> subsection_register(psection, start, end, new_subsection);
> @@ -816,7 +805,7 @@ static void register_multipage(AddressSpaceDispatch *d,
> MemoryRegionSection *sec
> hwaddr start_addr = section->offset_within_address_space;
> ram_addr_t size = section->size;
> hwaddr addr;
> - uint16_t section_index = phys_section_add(section);
> + uint16_t section_index = phys_section_add(section, next_pgtbl);
>
> assert(size);
>
> @@ -1653,7 +1642,7 @@ static void subsections_init(PhysSection *psection)
> {
> psection->sub_section = g_malloc0(sizeof(uint16_t) * TARGET_PAGE_SIZE);
> subsection_register(psection, 0, TARGET_PAGE_SIZE-1,
> - phys_section_unassigned);
> + next_pgtbl->phys_section_unassigned);
> }
>
> static uint16_t dummy_section(MemoryRegion *mr)
> @@ -1665,12 +1654,12 @@ static uint16_t dummy_section(MemoryRegion *mr)
> .size = UINT64_MAX,
> };
>
> - return phys_section_add(§ion);
> + return phys_section_add(§ion, next_pgtbl);
> }
>
> MemoryRegion *iotlb_to_region(hwaddr index)
> {
> - return phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
> + return cur_pgtbl->phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
> }
>
> static void io_mem_init(void)
> @@ -1685,21 +1674,40 @@ static void io_mem_init(void)
> "watch", UINT64_MAX);
> }
>
> +void global_begin(void)
> +{
> + next_pgtbl = g_new0(PhysPageTable, 1);
> + next_pgtbl->ref = 1;
> + next_pgtbl->phys_section_unassigned = dummy_section(&io_mem_unassigned);
> + next_pgtbl->phys_section_notdirty = dummy_section(&io_mem_notdirty);
> + next_pgtbl->phys_section_rom = dummy_section(&io_mem_rom);
> + next_pgtbl->phys_section_watch = dummy_section(&io_mem_watch);
> +}
> +
> +/* other listeners finished */
> +void global_commit(void)
> +{
> + PhysPageTable *t = cur_pgtbl;
> +
> + cur_pgtbl = next_pgtbl;
> + /* Fix me, currently, we rely on each address space listener agaist its
> + * reader. So when we come here, no readers will touch old
> phys_map_node.
> + * After rcu, should changed to call_rcu()
> + */
> + if (__sync_sub_and_fetch(&t->ref, 1) == 0) {
> + destroy_pagetable(t);
> + }
global_commit should not be needed, and global_begin should be simply
the new core_begin. It should unref next_pgtbl and reallocate a new one.
> +}
> +
> static void mem_begin(MemoryListener *listener)
> {
> AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch,
> listener);
>
> - destroy_all_mappings(d);
> d->phys_map.ptr = PHYS_MAP_NODE_NIL;
> }
>
> static void core_begin(MemoryListener *listener)
> {
> - phys_sections_clear();
> - phys_section_unassigned = dummy_section(&io_mem_unassigned);
> - phys_section_notdirty = dummy_section(&io_mem_notdirty);
> - phys_section_rom = dummy_section(&io_mem_rom);
> - phys_section_watch = dummy_section(&io_mem_watch);
> }
>
> static void tcg_commit(MemoryListener *listener)
> @@ -1779,7 +1787,6 @@ void address_space_destroy_dispatch(AddressSpace *as)
> AddressSpaceDispatch *d = as->dispatch;
>
> memory_listener_unregister(&d->listener);
> - destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
> g_free(d);
> as->dispatch = NULL;
> }
> @@ -2386,7 +2393,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val)
>
> if (!memory_region_is_ram(section->mr) || section->readonly) {
> if (memory_region_is_ram(section->mr)) {
> - section = &phys_sections[phys_section_rom].section;
> + section =
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
> }
> io_mem_write(section->mr, addr, val, 4);
> } else {
> @@ -2422,7 +2429,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val)
>
> if (!memory_region_is_ram(section->mr) || section->readonly) {
> if (memory_region_is_ram(section->mr)) {
> - section = &phys_sections[phys_section_rom].section;
> + section =
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
> }
> #ifdef TARGET_WORDS_BIGENDIAN
> io_mem_write(section->mr, addr, val >> 32, 4);
> @@ -2455,7 +2462,7 @@ static inline void stl_phys_internal(hwaddr addr,
> uint32_t val,
>
> if (!memory_region_is_ram(section->mr) || section->readonly) {
> if (memory_region_is_ram(section->mr)) {
> - section = &phys_sections[phys_section_rom].section;
> + section =
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
> }
> #if defined(TARGET_WORDS_BIGENDIAN)
> if (endian == DEVICE_LITTLE_ENDIAN) {
> @@ -2526,7 +2533,7 @@ static inline void stw_phys_internal(hwaddr addr,
> uint32_t val,
>
> if (!memory_region_is_ram(section->mr) || section->readonly) {
> if (memory_region_is_ram(section->mr)) {
> - section = &phys_sections[phys_section_rom].section;
> + section =
> &cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
> }
> #if defined(TARGET_WORDS_BIGENDIAN)
> if (endian == DEVICE_LITTLE_ENDIAN) {
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index b97ace7..cc654fa 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -992,6 +992,8 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
> void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
> int is_write, hwaddr access_len);
>
> +void global_begin(void);
> +void global_commit(void);
>
> #endif
>
> diff --git a/memory.c b/memory.c
> index 1a86607..da06dfd 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -805,6 +805,7 @@ void memory_region_transaction_commit(void)
> --memory_region_transaction_depth;
> if (!memory_region_transaction_depth && memory_region_update_pending) {
> memory_region_update_pending = false;
> + global_begin();
> MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
>
> QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
> @@ -812,6 +813,7 @@ void memory_region_transaction_commit(void)
> }
>
> MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
> + global_commit();
> }
> }
>
>
- [Qemu-devel] [RFC PATCH 0/2] make memory listener prepared for rcu style, Liu Ping Fan, 2013/05/12
- [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU, Liu Ping Fan, 2013/05/12
- Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU,
Paolo Bonzini <=
- Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU, liu ping fan, 2013/05/13
- Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU, Paolo Bonzini, 2013/05/14
- Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU, liu ping fan, 2013/05/15
- Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU, liu ping fan, 2013/05/26
- Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU, Paolo Bonzini, 2013/05/27
- Re: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU, liu ping fan, 2013/05/28
[Qemu-devel] [RFC PATCH 2/2] mem: prepare address_space listener rcu style, Liu Ping Fan, 2013/05/12