qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH][RFC] Optimize ld[bwlq]_phys and st[bwlq]_phys


From: Anthony Liguori
Subject: [Qemu-devel] [PATCH][RFC] Optimize ld[bwlq]_phys and st[bwlq]_phys
Date: Tue, 14 Oct 2008 14:59:08 -0500

This patch optimizes the ld and st functions that operate on physical addresses.
Right now, a number of them default to cpu_phys_memory_{read,write} which is
very slow.  As long as the operations are aligned, it is safe to just lookup
the page and directly read/write the data via ld_p or st_p.

This patch introduces a common function since all of these functions are
roughly the same.  I've tested x86 and sparc with Linux and Windows guests.

I'm pretty confident that this new code is functionally equivalent but I wanted
to have someone else confirm this.

Signed-off-by: Anthony Liguori <address@hidden>

diff --git a/exec.c b/exec.c
index f1fcec8..71336b5 100644
--- a/exec.c
+++ b/exec.c
@@ -2911,43 +2911,12 @@ void cpu_physical_memory_write_rom(target_phys_addr_t 
addr,
     }
 }
 
-
-/* warning: addr must be aligned */
-uint32_t ldl_phys(target_phys_addr_t addr)
-{
-    int io_index;
-    uint8_t *ptr;
-    uint32_t val;
-    unsigned long pd;
-    PhysPageDesc *p;
-
-    p = phys_page_find(addr >> TARGET_PAGE_BITS);
-    if (!p) {
-        pd = IO_MEM_UNASSIGNED;
-    } else {
-        pd = p->phys_offset;
-    }
-
-    if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
-        !(pd & IO_MEM_ROMD)) {
-        /* I/O case */
-        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-    } else {
-        /* RAM case */
-        ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
-            (addr & ~TARGET_PAGE_MASK);
-        val = ldl_p(ptr);
-    }
-    return val;
-}
-
 /* warning: addr must be aligned */
-uint64_t ldq_phys(target_phys_addr_t addr)
+static uint64_t ld_phys(target_phys_addr_t addr, int size)
 {
     int io_index;
     uint8_t *ptr;
-    uint64_t val;
+    uint64_t val = 0;
     unsigned long pd;
     PhysPageDesc *p;
 
@@ -2962,76 +2931,69 @@ uint64_t ldq_phys(target_phys_addr_t addr)
         !(pd & IO_MEM_ROMD)) {
         /* I/O case */
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+        switch (size) {
+        case 1:
+            val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr);
+            break;
+        case 2:
+            val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
+            break;
+        case 4:
+            val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+            break;
+        case 8:
 #ifdef TARGET_WORDS_BIGENDIAN
-        val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], 
addr) << 32;
-        val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
+            val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], 
addr) << 32;
+            val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
 #else
-        val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
-        val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], 
addr + 4) << 32;
+            val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
+            val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], 
addr + 4) << 32;
 #endif
+            break;
+        }
     } else {
         /* RAM case */
         ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
             (addr & ~TARGET_PAGE_MASK);
-        val = ldq_p(ptr);
+        switch (size) {
+        case 1: val = ldub_p(ptr); break;
+        case 2: val = lduw_p(ptr); break;
+        case 4: val = ldl_p(ptr); break;
+        case 8: val = ldq_p(ptr); break;
+        }
     }
     return val;
 }
 
-/* XXX: optimize */
-uint32_t ldub_phys(target_phys_addr_t addr)
+/* warning: addr must be aligned */
+uint32_t ldl_phys(target_phys_addr_t addr)
 {
-    uint8_t val;
-    cpu_physical_memory_read(addr, &val, 1);
-    return val;
+    return ld_phys(addr, 4);
 }
 
-/* XXX: optimize */
-uint32_t lduw_phys(target_phys_addr_t addr)
+/* warning: addr must be aligned */
+uint64_t ldq_phys(target_phys_addr_t addr)
 {
-    uint16_t val;
-    cpu_physical_memory_read(addr, (uint8_t *)&val, 2);
-    return tswap16(val);
+    return ld_phys(addr, 8);
 }
 
-/* warning: addr must be aligned. The ram page is not masked as dirty
-   and the code inside is not invalidated. It is useful if the dirty
-   bits are used to track modified PTEs */
-void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
+uint32_t ldub_phys(target_phys_addr_t addr)
 {
-    int io_index;
-    uint8_t *ptr;
-    unsigned long pd;
-    PhysPageDesc *p;
-
-    p = phys_page_find(addr >> TARGET_PAGE_BITS);
-    if (!p) {
-        pd = IO_MEM_UNASSIGNED;
-    } else {
-        pd = p->phys_offset;
-    }
+    return ld_phys(addr, 1);
+}
 
-    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-    } else {
-        unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & 
~TARGET_PAGE_MASK);
-        ptr = phys_ram_base + addr1;
-        stl_p(ptr, val);
-
-        if (unlikely(in_migration)) {
-            if (!cpu_physical_memory_is_dirty(addr1)) {
-                /* invalidate code */
-                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
-                /* set dirty bit */
-                phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
-                    (0xff & ~CODE_DIRTY_FLAG);
-            }
-        }
-    }
+uint32_t lduw_phys(target_phys_addr_t addr)
+{
+    if (unlikely(addr & 1)) {
+        uint16_t val;
+        cpu_physical_memory_read(addr, (uint8_t *)&val, 2);
+        return tswap16(val);
+    } else
+        return ld_phys(addr, 2);
 }
 
-void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
+/* warning: must be aligned */
+static void st_phys(target_phys_addr_t addr, int size, uint64_t val, int dirty)
 {
     int io_index;
     uint8_t *ptr;
@@ -3047,47 +3009,41 @@ void stq_phys_notdirty(target_phys_addr_t addr, 
uint64_t val)
 
     if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
         io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
+        switch (size) {
+        case 1:
+            io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
+            break;
+        case 2:
+            io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
+            break;
+        case 4:
+            io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+            break;
+        case 8:
 #ifdef TARGET_WORDS_BIGENDIAN
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
+            io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 
32);
+            io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
 #else
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 
32);
+            io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+            io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val 
>> 32);
 #endif
-    } else {
-        ptr = phys_ram_base + (pd & TARGET_PAGE_MASK) +
-            (addr & ~TARGET_PAGE_MASK);
-        stq_p(ptr, val);
-    }
-}
-
-/* warning: addr must be aligned */
-void stl_phys(target_phys_addr_t addr, uint32_t val)
-{
-    int io_index;
-    uint8_t *ptr;
-    unsigned long pd;
-    PhysPageDesc *p;
-
-    p = phys_page_find(addr >> TARGET_PAGE_BITS);
-    if (!p) {
-        pd = IO_MEM_UNASSIGNED;
-    } else {
-        pd = p->phys_offset;
-    }
-
-    if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
-        io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
-        io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
+            break;
+        }
     } else {
         unsigned long addr1;
+
         addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
         /* RAM case */
-        ptr = phys_ram_base + addr1;
-        stl_p(ptr, val);
-        if (!cpu_physical_memory_is_dirty(addr1)) {
+        ptr = phys_ram_base + addr;
+        switch (size) {
+        case 1: stb_p(ptr, val); break;
+        case 2: stw_p(ptr, val); break;
+        case 4: stl_p(ptr, val); break;
+        case 8: stq_p(ptr, val); break;
+        }
+        if (dirty && !cpu_physical_memory_is_dirty(addr1)) {
             /* invalidate code */
-            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
+            tb_invalidate_phys_page_range(addr1, addr1 + size, 0);
             /* set dirty bit */
             phys_ram_dirty[addr1 >> TARGET_PAGE_BITS] |=
                 (0xff & ~CODE_DIRTY_FLAG);
@@ -3095,25 +3051,44 @@ void stl_phys(target_phys_addr_t addr, uint32_t val)
     }
 }
 
-/* XXX: optimize */
+/* warning: addr must be aligned. The ram page is not masked as dirty
+   and the code inside is not invalidated. It is useful if the dirty
+   bits are used to track modified PTEs */
+void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
+{
+    st_phys(addr, 4, val, 0);
+}
+
+/* warning: addr must be aligned */
+void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
+{
+    st_phys(addr, 8, val, 0);
+}
+
+/* warning: addr must be aligned */
+void stl_phys(target_phys_addr_t addr, uint32_t val)
+{
+    st_phys(addr, 4, val, 1);
+}
+
 void stb_phys(target_phys_addr_t addr, uint32_t val)
 {
-    uint8_t v = val;
-    cpu_physical_memory_write(addr, &v, 1);
+    st_phys(addr, 1, val, 1);
 }
 
-/* XXX: optimize */
 void stw_phys(target_phys_addr_t addr, uint32_t val)
 {
-    uint16_t v = tswap16(val);
-    cpu_physical_memory_write(addr, (const uint8_t *)&v, 2);
+    if (unlikely(addr & 1)) {
+        uint16_t v = tswap16(val);
+        cpu_physical_memory_write(addr, (const uint8_t *)&v, 2);
+    } else
+        st_phys(addr, 2, val, 1);
 }
 
-/* XXX: optimize */
+/* warning: addr must be aligned */
 void stq_phys(target_phys_addr_t addr, uint64_t val)
 {
-    val = tswap64(val);
-    cpu_physical_memory_write(addr, (const uint8_t *)&val, 8);
+    st_phys(addr, 8, val, 1);
 }
 
 #endif




reply via email to

[Prev in Thread] Current Thread [Next in Thread]