[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v1 5/6] mem: make dispatch path satify rcu style
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH v1 5/6] mem: make dispatch path satify rcu style |
Date: |
Wed, 29 May 2013 09:15:20 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130514 Thunderbird/17.0.6 |
Il 29/05/2013 04:11, Liu Ping Fan ha scritto:
> From: Liu Ping Fan <address@hidden>
>
> Using seqlock to load dispatch context atomic. The dispatch
> context consist of: cur_map_nodes, cur_sections, cur_roots.
>
> Also during the dispatch, we should get the terminal, and dup
> MemoryRegionSection. So after rcu unlock, the cur dispatch
> context can be dropped safely.
>
> Signed-off-by: Liu Ping Fan <address@hidden>
>
> --------
> The tcg code related to tlb_set_page() should be fixed too.
> But it is a little complicated, separating it for easing review.
> ---
> exec.c | 239 ++++++++++++++++++++++++++++++++++++++++-----------------------
> 1 files changed, 152 insertions(+), 87 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 315960d..9a5c67f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -112,6 +112,8 @@ static Node *next_map_nodes;
> static PhysPageEntry *next_roots;
> static AllocInfo *next_alloc_info;
>
> +QemuSeqLock dispatch_table_lock;
You didn't include the seqlock implementation.
> +
> #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
>
> static void io_mem_init(void);
> @@ -199,31 +201,24 @@ static void phys_page_set(AddressSpaceDispatch *d,
> P_L2_LEVELS - 1);
> }
>
> -static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr
> index, bool cur)
> +static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d,
> + hwaddr index,
> + Node *map_base,
> + MemoryRegionSection *sections_base,
> + PhysPageEntry *root_base)
> {
> - PhysPageEntry lp;
> + PhysPageEntry lp = root_base[d->idx];
> PhysPageEntry *p;
> int i;
> - Node *map_nodes;
> - MemoryRegionSection *sections;
>
> - if (likely(cur)) {
> - map_nodes = cur_map_nodes;
> - sections = cur_phys_sections;
> - lp = cur_roots[d->idx];
> - } else {
> - map_nodes = next_map_nodes;
> - sections = next_phys_sections;
> - lp = next_roots[d->idx];
> - }
> for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
> if (lp.ptr == PHYS_MAP_NODE_NIL) {
> - return §ions[phys_section_unassigned];
> + return §ions_base[phys_section_unassigned];
> }
> - p = map_nodes[lp.ptr];
> + p = map_base[lp.ptr];
> lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
> }
> - return §ions[lp.ptr];
> + return §ions_base[lp.ptr];
> }
>
> bool memory_region_is_unassigned(MemoryRegion *mr)
> @@ -233,21 +228,28 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
> }
>
> static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
> - hwaddr addr)
> + hwaddr addr, Node *map_base, MemoryRegionSection *sections_base,
> + PhysPageEntry *root_base)
> +
> {
> - return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, true);
> + return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS,
> + map_base, sections_base, root_base);
> }
>
> MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
> hwaddr *xlat, hwaddr *plen,
> - bool is_write)
> + bool is_write,
> + Node *map_base,
> + MemoryRegionSection
> *sections_base,
> + PhysPageEntry *root_base)
> {
> IOMMUTLBEntry iotlb;
> - MemoryRegionSection *section, *base = cur_phys_sections;
> + MemoryRegionSection *section;
> hwaddr len = *plen;
>
> for (;;) {
> - section = address_space_lookup_region(as, addr);
> + section = address_space_lookup_region(as, addr, map_base,
> + sections_base, root_base);
>
> /* Compute offset within MemoryRegionSection */
> addr -= section->offset_within_address_space;
> @@ -265,7 +267,7 @@ MemoryRegionSection *address_space_translate(AddressSpace
> *as, hwaddr addr,
> | (addr & iotlb.addr_mask));
> len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
> if (!(iotlb.perm & (1 << is_write))) {
> - section = &base[phys_section_unassigned];
> + section = §ions_base[phys_section_unassigned];
> break;
> }
>
> @@ -276,6 +278,42 @@ MemoryRegionSection
> *address_space_translate(AddressSpace *as, hwaddr addr,
> *xlat = addr;
> return section;
> }
> +
> +/* caller should hold rcu read lock */
> +MemoryRegionSection address_space_get_terminal(AddressSpace *as, hwaddr addr,
> + hwaddr *xlat, hwaddr *plen,
> + bool is_write)
> +{
> + MemoryRegionSection *mrs, *ret, *sections_base;
> + Node *map_base;
> + subpage_t *mmio;
> + unsigned int idx, start;
> + PhysPageEntry *root_base;
> +
> + /* Walk through all of the AddressSpace in consistent */
> + do {
> + start = seqlock_read_begin(&dispatch_table_lock);
> + map_base = cur_map_nodes;
> + sections_base = cur_phys_sections;
> + /* get all AddressSpaceDispatch root */
> + root_base = cur_roots;
> + } while (seqlock_read_check(&dispatch_table_lock, start);
> +
> + mrs = address_space_translate(as, addr, xlat, plen, is_write,
> + map_base, sections_base, root_base);
> + if (!mrs->mr->terminates) {
Do you want to test mrs->mr->subpage instead? Very similar code is
already in address_space_lookup_region, and should be there.
address_space_lookup_region should ref the region, and the callers
should unref it. This way, only address_space_lookup_region needs to
host the RCU critical section.
Adding the ref/unref should be a separate patch. Even before, we should
change address_space_translate and address_space_translate_for_iotlb to
return a MemoryRegion instead of a MemoryRegionSection.
> + mmio = container_of(mrs->mr, subpage_t, mmio);
> + idx = SUBPAGE_IDX(addr);
> + ret = §ions_base[mmio->sub_section[idx]];
> + *xlat += mmio->base;
> + *xlat -= mrs->offset_within_address_space;
> + *xlat += mrs->offset_within_region;
> + } else {
> + ret = mrs;
> + }
> +
> + return *ret;
> +}
> #endif
>
> void cpu_exec_init_all(void)
> @@ -1777,13 +1815,12 @@ static void core_commit(MemoryListener *listener)
> info->sections = cur_phys_sections;
> info->roots = cur_roots;
>
> - /* Fix me, in future, will be protected by wr seqlock when in parellel
> - * with reader
> - */
> + seqlock_write_lock(&dispatch_table_lock);
> cur_map_nodes = next_map_nodes;
> cur_phys_sections = next_phys_sections;
> cur_alloc_info = next_alloc_info;
> cur_roots = next_roots;
> + seqlock_write_unlock(&dispatch_table_lock);
>
> /* since each CPU stores ram addresses in its TLB cache, we must
> reset the modified entries */
> @@ -1890,8 +1927,13 @@ void address_space_destroy_dispatch(AddressSpace *as)
>
> static void memory_map_init(void)
> {
> + QemuMutex *mutex;
> +
> qemu_mutex_init(&id_lock);
> address_space_id_map = g_malloc0(MAX_IDX/BITS_PER_BYTE);
> + mutex = g_malloc0(sizeof(QemuMutex));
> + qemu_mutex_init(mutex);
No need for a separate mutex, we can use the BQL.
> + seqlock_init(&dispatch_table_lock, mutex);
> system_memory = g_malloc(sizeof(*system_memory));
> memory_region_init(system_memory, "system", INT64_MAX);
> address_space_init(&address_space_memory, system_memory, "memory");
> @@ -2001,59 +2043,68 @@ bool address_space_rw(AddressSpace *as, hwaddr addr,
> uint8_t *buf,
> uint8_t *ptr;
> uint64_t val;
> hwaddr addr1;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> bool error = false;
>
> while (len > 0) {
> l = len;
> - section = address_space_translate(as, addr, &addr1, &l, is_write);
> + rcu_read_lock();
> + section = address_space_get_terminal(as, addr, &addr1, &l, is_write);
>
> if (is_write) {
> - if (!memory_access_is_direct(section->mr, is_write)) {
> + if (!memory_access_is_direct(section.mr, is_write)) {
> l = memory_access_size(l, addr1);
> /* XXX: could force cpu_single_env to NULL to avoid
> potential bugs */
> + memory_region_ref(section.mr);
> + rcu_read_unlock();
> if (l == 4) {
> /* 32 bit write access */
> val = ldl_p(buf);
> - error |= io_mem_write(section->mr, addr1, val, 4);
> + error |= io_mem_write(section.mr, addr1, val, 4);
> } else if (l == 2) {
> /* 16 bit write access */
> val = lduw_p(buf);
> - error |= io_mem_write(section->mr, addr1, val, 2);
> + error |= io_mem_write(section.mr, addr1, val, 2);
> } else {
> /* 8 bit write access */
> val = ldub_p(buf);
> - error |= io_mem_write(section->mr, addr1, val, 1);
> + error |= io_mem_write(section.mr, addr1, val, 1);
> }
> + memory_region_unref(section.mr);
> } else {
> - addr1 += memory_region_get_ram_addr(section->mr);
> + addr1 += memory_region_get_ram_addr(section.mr);
> /* RAM case */
> ptr = qemu_get_ram_ptr(addr1);
> memcpy(ptr, buf, l);
> invalidate_and_set_dirty(addr1, l);
> + rcu_read_unlock();
> }
> } else {
> - if (!memory_access_is_direct(section->mr, is_write)) {
> + if (!memory_access_is_direct(section.mr, is_write)) {
> /* I/O case */
> + memory_region_ref(section.mr);
> + rcu_read_unlock();
> l = memory_access_size(l, addr1);
> if (l == 4) {
> /* 32 bit read access */
> - error |= io_mem_read(section->mr, addr1, &val, 4);
> + error |= io_mem_read(section.mr, addr1, &val, 4);
> stl_p(buf, val);
> } else if (l == 2) {
> /* 16 bit read access */
> - error |= io_mem_read(section->mr, addr1, &val, 2);
> + error |= io_mem_read(section.mr, addr1, &val, 2);
> stw_p(buf, val);
> } else {
> /* 8 bit read access */
> - error |= io_mem_read(section->mr, addr1, &val, 1);
> + error |= io_mem_read(section.mr, addr1, &val, 1);
> stb_p(buf, val);
> }
> + memory_region_unref(section.mr);
> } else {
> /* RAM case */
> - ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
> + ptr = qemu_get_ram_ptr(section.mr->ram_addr + addr1);
> memcpy(buf, ptr, l);
> + rcu_read_unlock();
> }
> }
> len -= l;
> @@ -2089,18 +2140,19 @@ void cpu_physical_memory_write_rom(hwaddr addr,
> hwaddr l;
> uint8_t *ptr;
> hwaddr addr1;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
>
> + rcu_read_lock();
> while (len > 0) {
> l = len;
> - section = address_space_translate(&address_space_memory,
> + section = address_space_get_terminal(&address_space_memory,
> addr, &addr1, &l, true);
>
> - if (!(memory_region_is_ram(section->mr) ||
> - memory_region_is_romd(section->mr))) {
> + if (!(memory_region_is_ram(section.mr) ||
> + memory_region_is_romd(section.mr))) {
> /* do nothing */
> } else {
> - addr1 += memory_region_get_ram_addr(section->mr);
> + addr1 += memory_region_get_ram_addr(section.mr);
> /* ROM/RAM case */
> ptr = qemu_get_ram_ptr(addr1);
> memcpy(ptr, buf, l);
> @@ -2110,6 +2162,7 @@ void cpu_physical_memory_write_rom(hwaddr addr,
> buf += l;
> addr += l;
> }
> + rcu_read_unlock();
> }
>
> typedef struct {
> @@ -2161,15 +2214,15 @@ static void cpu_notify_map_clients(void)
>
> bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool
> is_write)
> {
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> hwaddr l, xlat;
>
> while (len > 0) {
> l = len;
> - section = address_space_translate(as, addr, &xlat, &l, is_write);
> - if (!memory_access_is_direct(section->mr, is_write)) {
> + section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
> + if (!memory_access_is_direct(section.mr, is_write)) {
> l = memory_access_size(l, addr);
> - if (!memory_region_access_valid(section->mr, xlat, l, is_write))
> {
> + if (!memory_region_access_valid(section.mr, xlat, l, is_write)) {
> return false;
> }
> }
> @@ -2195,14 +2248,14 @@ void *address_space_map(AddressSpace *as,
> hwaddr len = *plen;
> hwaddr todo = 0;
> hwaddr l, xlat;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> ram_addr_t raddr = RAM_ADDR_MAX;
> ram_addr_t rlen;
> void *ret;
>
> while (len > 0) {
> l = len;
> - section = address_space_translate(as, addr, &xlat, &l, is_write);
> + section = address_space_get_terminal(as, addr, &xlat, &l, is_write);
>
> if (!memory_access_is_direct(section->mr, is_write)) {
> if (todo || bounce.buffer) {
> @@ -2211,20 +2264,20 @@ void *address_space_map(AddressSpace *as,
> bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE,
> TARGET_PAGE_SIZE);
> bounce.addr = addr;
> bounce.len = l;
> - bounce.mr = section->mr;
> + bounce.mr = section.mr;
> if (!is_write) {
> address_space_read(as, addr, bounce.buffer, l);
> }
>
> *plen = l;
> - memory_region_ref(section->mr);
> + memory_region_ref(section.mr);
> return bounce.buffer;
> }
> if (!todo) {
> - raddr = memory_region_get_ram_addr(section->mr) + xlat;
> - memory_region_ref(section->mr);
> + raddr = memory_region_get_ram_addr(section.mr) + xlat;
> + memory_region_ref(section.mr);
> } else {
> - if (memory_region_get_ram_addr(section->mr) + xlat != raddr +
> todo) {
> + if (memory_region_get_ram_addr(section.mr) + xlat != raddr +
> todo) {
> break;
> }
> }
> @@ -2297,15 +2350,17 @@ static inline uint32_t ldl_phys_internal(hwaddr addr,
> {
> uint8_t *ptr;
> uint64_t val;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> hwaddr l = 4;
> hwaddr addr1;
>
> - section = address_space_translate(&address_space_memory, addr, &addr1,
> &l,
> - false);
> + rcu_read_lock();
> + section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> + &l, false);
> + rcu_read_unlock();
> if (l < 4 || !memory_access_is_direct(section->mr, false)) {
> /* I/O case */
> - io_mem_read(section->mr, addr1, &val, 4);
> + io_mem_read(section.mr, addr1, &val, 4);
> #if defined(TARGET_WORDS_BIGENDIAN)
> if (endian == DEVICE_LITTLE_ENDIAN) {
> val = bswap32(val);
> @@ -2360,8 +2415,10 @@ static inline uint64_t ldq_phys_internal(hwaddr addr,
> hwaddr l = 8;
> hwaddr addr1;
>
> - section = address_space_translate(&address_space_memory, addr, &addr1,
> &l,
> - false);
> + rcu_read_lock();
> + section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> + &l, false);
> + rcu_read_unlock();
> if (l < 8 || !memory_access_is_direct(section->mr, false)) {
> /* I/O case */
> io_mem_read(section->mr, addr1, &val, 8);
> @@ -2423,15 +2480,17 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
> {
> uint8_t *ptr;
> uint64_t val;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> hwaddr l = 2;
> hwaddr addr1;
>
> - section = address_space_translate(&address_space_memory, addr, &addr1,
> &l,
> - false);
> - if (l < 2 || !memory_access_is_direct(section->mr, false)) {
> + rcu_read_lock();
> + section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> + &l, false);
> + rcu_read_unlock();
> + if (l < 2 || !memory_access_is_direct(section.mr, false)) {
> /* I/O case */
> - io_mem_read(section->mr, addr1, &val, 2);
> + io_mem_read(section.mr, addr1, &val, 2);
> #if defined(TARGET_WORDS_BIGENDIAN)
> if (endian == DEVICE_LITTLE_ENDIAN) {
> val = bswap16(val);
> @@ -2443,7 +2502,7 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
> #endif
> } else {
> /* RAM case */
> - ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
> + ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
> & TARGET_PAGE_MASK)
> + addr1);
> switch (endian) {
> @@ -2482,16 +2541,18 @@ uint32_t lduw_be_phys(hwaddr addr)
> void stl_phys_notdirty(hwaddr addr, uint32_t val)
> {
> uint8_t *ptr;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> hwaddr l = 4;
> hwaddr addr1;
>
> - section = address_space_translate(&address_space_memory, addr, &addr1,
> &l,
> - true);
> - if (l < 4 || !memory_access_is_direct(section->mr, true)) {
> - io_mem_write(section->mr, addr1, val, 4);
> + rcu_read_lock();
> + section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> + &l, true);
> + rcu_read_unlock();
> + if (l < 4 || !memory_access_is_direct(section.mr, true)) {
> + io_mem_write(section.mr, addr1, val, 4);
> } else {
> - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
> ptr = qemu_get_ram_ptr(addr1);
> stl_p(ptr, val);
>
> @@ -2512,13 +2573,15 @@ static inline void stl_phys_internal(hwaddr addr,
> uint32_t val,
> enum device_endian endian)
> {
> uint8_t *ptr;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> hwaddr l = 4;
> hwaddr addr1;
>
> - section = address_space_translate(&address_space_memory, addr, &addr1,
> &l,
> - true);
> - if (l < 4 || !memory_access_is_direct(section->mr, true)) {
> + rcu_read_lock();
> + section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> + &l, true);
> + rcu_read_unlock();
> + if (l < 4 || !memory_access_is_direct(section.mr, true)) {
> #if defined(TARGET_WORDS_BIGENDIAN)
> if (endian == DEVICE_LITTLE_ENDIAN) {
> val = bswap32(val);
> @@ -2528,10 +2591,10 @@ static inline void stl_phys_internal(hwaddr addr,
> uint32_t val,
> val = bswap32(val);
> }
> #endif
> - io_mem_write(section->mr, addr1, val, 4);
> + io_mem_write(section.mr, addr1, val, 4);
> } else {
> /* RAM case */
> - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
> ptr = qemu_get_ram_ptr(addr1);
> switch (endian) {
> case DEVICE_LITTLE_ENDIAN:
> @@ -2575,13 +2638,13 @@ static inline void stw_phys_internal(hwaddr addr,
> uint32_t val,
> enum device_endian endian)
> {
> uint8_t *ptr;
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> hwaddr l = 2;
> hwaddr addr1;
>
> - section = address_space_translate(&address_space_memory, addr, &addr1,
> &l,
> - true);
> - if (l < 2 || !memory_access_is_direct(section->mr, true)) {
> + section = address_space_get_terminal(&address_space_memory, addr, &addr1,
> + &l, true);
> + if (l < 2 || !memory_access_is_direct(section.mr, true)) {
> #if defined(TARGET_WORDS_BIGENDIAN)
> if (endian == DEVICE_LITTLE_ENDIAN) {
> val = bswap16(val);
> @@ -2591,10 +2654,10 @@ static inline void stw_phys_internal(hwaddr addr,
> uint32_t val,
> val = bswap16(val);
> }
> #endif
> - io_mem_write(section->mr, addr1, val, 2);
> + io_mem_write(section.mr, addr1, val, 2);
> } else {
> /* RAM case */
> - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
> + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK;
> ptr = qemu_get_ram_ptr(addr1);
> switch (endian) {
> case DEVICE_LITTLE_ENDIAN:
> @@ -2696,13 +2759,15 @@ bool virtio_is_big_endian(void)
> #ifndef CONFIG_USER_ONLY
> bool cpu_physical_memory_is_io(hwaddr phys_addr)
> {
> - MemoryRegionSection *section;
> + MemoryRegionSection section;
> hwaddr l = 1;
>
> - section = address_space_translate(&address_space_memory,
> + rcu_read_lock();
> + section = address_space_get_terminal(&address_space_memory,
> phys_addr, &phys_addr, &l, false);
> + rcu_read_unlock();
>
> - return !(memory_region_is_ram(section->mr) ||
> - memory_region_is_romd(section->mr));
> + return !(memory_region_is_ram(section.mr) ||
> + memory_region_is_romd(section.mr));
> }
> #endif
>