[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 0/4] speedup memory dispatch
From: |
Paolo Bonzini |
Subject: |
[Qemu-devel] [PATCH 0/4] speedup memory dispatch |
Date: |
Mon, 2 Dec 2013 15:23:59 +0100 |
This avoids useless masking and shifting when a single call to the
MemoryRegion ops will do. It cuts 30 cycles off the common case
of memory dispatch (out of ~150).
Paolo Bonzini (4):
memory: cache min/max_access_size
memory: streamline common case for memory dispatch
memory: hoist coalesced MMIO flush
memory: small tweaks
include/exec/memory.h | 2 +
memory.c | 114 +++++++++++++++++++++++++++++++++++---------------
2 files changed, 82 insertions(+), 34 deletions(-)
--
1.8.4.2
>From 8324dd46284285cec1de394ce2fc3fb449e776d6 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 8 Nov 2013 11:29:41 +0100
Subject: [PATCH 1/4] memory: cache min/max_access_size
This will simplify the code in the next patch.
Signed-off-by: Paolo Bonzini <address@hidden>
---
include/exec/memory.h | 2 ++
memory.c | 27 +++++++++++----------------
2 files changed, 13 insertions(+), 16 deletions(-)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 480dfbf..cf63adb 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -163,6 +163,8 @@ struct MemoryRegion {
unsigned ioeventfd_nb;
MemoryRegionIoeventfd *ioeventfds;
NotifierList iommu_notify;
+
+ uint8_t min_access_size, max_access_size;
};
typedef struct MemoryListener MemoryListener;
diff --git a/memory.c b/memory.c
index 28f6449..56e54aa 100644
--- a/memory.c
+++ b/memory.c
@@ -443,8 +443,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
static void access_with_adjusted_size(hwaddr addr,
uint64_t *value,
unsigned size,
- unsigned access_size_min,
- unsigned access_size_max,
void (*access)(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
@@ -457,15 +455,8 @@ static void access_with_adjusted_size(hwaddr addr,
unsigned access_size;
unsigned i;
- if (!access_size_min) {
- access_size_min = 1;
- }
- if (!access_size_max) {
- access_size_max = 4;
- }
-
/* FIXME: support unaligned access? */
- access_size = MAX(MIN(size, access_size_max), access_size_min);
+ access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
access_mask = -1ULL >> (64 - access_size * 8);
if (memory_region_big_endian(mr)) {
for (i = 0; i < size; i += access_size) {
@@ -942,11 +933,9 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion
*mr,
if (mr->ops->read) {
access_with_adjusted_size(addr, &data, size,
- mr->ops->impl.min_access_size,
- mr->ops->impl.max_access_size,
memory_region_read_accessor, mr);
} else {
- access_with_adjusted_size(addr, &data, size, 1, 4,
+ access_with_adjusted_size(addr, &data, size,
memory_region_oldmmio_read_accessor, mr);
}
@@ -982,11 +971,9 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
if (mr->ops->write) {
access_with_adjusted_size(addr, &data, size,
- mr->ops->impl.min_access_size,
- mr->ops->impl.max_access_size,
memory_region_write_accessor, mr);
} else {
- access_with_adjusted_size(addr, &data, size, 1, 4,
+ access_with_adjusted_size(addr, &data, size,
memory_region_oldmmio_write_accessor, mr);
}
return false;
@@ -1004,6 +991,14 @@ void memory_region_init_io(MemoryRegion *mr,
mr->opaque = opaque;
mr->terminates = true;
mr->ram_addr = ~(ram_addr_t)0;
+
+ if (mr->ops->read) {
+ mr->min_access_size = mr->ops->impl.min_access_size ? : 1;
+ mr->max_access_size = mr->ops->impl.max_access_size ? : 4;
+ } else {
+ mr->min_access_size = 1;
+ mr->max_access_size = 4;
+ }
}
void memory_region_init_ram(MemoryRegion *mr,
--
1.8.4.2
>From 2afa3d3451dbfc76b3c6a28af1e7cdd3ab50c7ec Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 8 Nov 2013 11:39:18 +0100
Subject: [PATCH 2/4] memory: streamline common case for memory dispatch
In the common case where there is no combining or splitting,
access_with_adjusted_size is adding a lot of overhead. Call
the MMIO ops directly in that case.
Signed-off-by: Paolo Bonzini <address@hidden>
---
memory.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 56 insertions(+), 12 deletions(-)
diff --git a/memory.c b/memory.c
index 56e54aa..1ade19c 100644
--- a/memory.c
+++ b/memory.c
@@ -443,6 +443,7 @@ static void memory_region_write_accessor(MemoryRegion *mr,
static void access_with_adjusted_size(hwaddr addr,
uint64_t *value,
unsigned size,
+ unsigned access_size,
void (*access)(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
@@ -452,11 +453,9 @@ static void access_with_adjusted_size(hwaddr addr,
MemoryRegion *mr)
{
uint64_t access_mask;
- unsigned access_size;
unsigned i;
/* FIXME: support unaligned access? */
- access_size = MAX(MIN(size, mr->min_access_size), mr->min_access_size);
access_mask = -1ULL >> (64 - access_size * 8);
if (memory_region_big_endian(mr)) {
for (i = 0; i < size; i += access_size) {
@@ -929,13 +928,32 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion
*mr,
hwaddr addr,
unsigned size)
{
+ unsigned access_size;
uint64_t data = 0;
+ if (size < mr->min_access_size) {
+ access_size = mr->min_access_size;
+ goto adjusted;
+ }
+ if (size > mr->max_access_size) {
+ access_size = mr->max_access_size;
+ goto adjusted;
+ }
+
+ if (mr->ops->read) {
+ data = mr->ops->read(mr->opaque, addr, size);
+ } else {
+ data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+ }
+ trace_memory_region_ops_read(mr, addr, data, size);
+ return data;
+
+adjusted:
if (mr->ops->read) {
- access_with_adjusted_size(addr, &data, size,
+ access_with_adjusted_size(addr, &data, size, access_size,
memory_region_read_accessor, mr);
} else {
- access_with_adjusted_size(addr, &data, size,
+ access_with_adjusted_size(addr, &data, size, access_size,
memory_region_oldmmio_read_accessor, mr);
}
@@ -957,6 +975,39 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
return false;
}
+static void memory_region_dispatch_write1(MemoryRegion *mr,
+ hwaddr addr,
+ uint64_t data,
+ unsigned size)
+{
+ unsigned access_size;
+ if (size < mr->min_access_size) {
+ access_size = mr->min_access_size;
+ goto adjusted;
+ }
+ if (size > mr->max_access_size) {
+ access_size = mr->max_access_size;
+ goto adjusted;
+ }
+
+ trace_memory_region_ops_write(mr, addr, data, size);
+ if (mr->ops->write) {
+ mr->ops->write(mr->opaque, addr, data, size);
+ } else {
+ mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+ }
+ return;
+
+adjusted:
+ if (mr->ops->write) {
+ access_with_adjusted_size(addr, &data, size, access_size,
+ memory_region_write_accessor, mr);
+ } else {
+ access_with_adjusted_size(addr, &data, size, access_size,
+ memory_region_oldmmio_write_accessor, mr);
+ }
+}
+
static bool memory_region_dispatch_write(MemoryRegion *mr,
hwaddr addr,
uint64_t data,
@@ -968,14 +1019,7 @@ static bool memory_region_dispatch_write(MemoryRegion *mr,
}
adjust_endianness(mr, &data, size);
-
- if (mr->ops->write) {
- access_with_adjusted_size(addr, &data, size,
- memory_region_write_accessor, mr);
- } else {
- access_with_adjusted_size(addr, &data, size,
- memory_region_oldmmio_write_accessor, mr);
- }
+ memory_region_dispatch_write1(mr, addr, data, size);
return false;
}
--
1.8.4.2
>From c65bf3cd93264eac93bbfe3a5974b8d15a1599b0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 8 Nov 2013 11:49:52 +0100
Subject: [PATCH 3/4] memory: hoist coalesced MMIO flush
No need to flush the coalesced MMIO buffer multiple times when combining
multiple accesses into one.
Signed-off-by: Paolo Bonzini <address@hidden>
---
memory.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/memory.c b/memory.c
index 1ade19c..495e693 100644
--- a/memory.c
+++ b/memory.c
@@ -401,9 +401,6 @@ static void memory_region_read_accessor(MemoryRegion *mr,
{
uint64_t tmp;
- if (mr->flush_coalesced_mmio) {
- qemu_flush_coalesced_mmio_buffer();
- }
tmp = mr->ops->read(mr->opaque, addr, size);
trace_memory_region_ops_read(mr, addr, tmp, size);
*value |= (tmp & mask) << shift;
@@ -432,9 +429,6 @@ static void memory_region_write_accessor(MemoryRegion *mr,
{
uint64_t tmp;
- if (mr->flush_coalesced_mmio) {
- qemu_flush_coalesced_mmio_buffer();
- }
tmp = (*value >> shift) & mask;
trace_memory_region_ops_write(mr, addr, tmp, size);
mr->ops->write(mr->opaque, addr, tmp, size);
@@ -965,6 +959,10 @@ static bool memory_region_dispatch_read(MemoryRegion *mr,
uint64_t *pval,
unsigned size)
{
+ if (mr->flush_coalesced_mmio) {
+ qemu_flush_coalesced_mmio_buffer();
+ }
+
if (!memory_region_access_valid(mr, addr, size, false)) {
*pval = unassigned_mem_read(mr, addr, size);
return true;
@@ -1013,6 +1011,10 @@ static bool memory_region_dispatch_write(MemoryRegion
*mr,
uint64_t data,
unsigned size)
{
+ if (mr->flush_coalesced_mmio) {
+ qemu_flush_coalesced_mmio_buffer();
+ }
+
if (!memory_region_access_valid(mr, addr, size, true)) {
unassigned_mem_write(mr, addr, data, size);
return true;
--
1.8.4.2
>From d13d7f894cb4670568a0bd45d592a9321d9d5dab Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <address@hidden>
Date: Fri, 8 Nov 2013 11:48:11 +0100
Subject: [PATCH 4/4] memory: small tweaks
Make adjust_endianness inline, and do not use a ctz instruction
when a shift will do.
Signed-off-by: Paolo Bonzini <address@hidden>
---
memory.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/memory.c b/memory.c
index 495e693..d3b0dce 100644
--- a/memory.c
+++ b/memory.c
@@ -357,7 +357,7 @@ static bool memory_region_wrong_endianness(MemoryRegion *mr)
#endif
}
-static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
+static inline void adjust_endianness(MemoryRegion *mr, uint64_t *data,
unsigned size)
{
if (memory_region_wrong_endianness(mr)) {
switch (size) {
@@ -378,6 +378,11 @@ static void adjust_endianness(MemoryRegion *mr, uint64_t
*data, unsigned size)
}
}
+static inline int ctz3(unsigned size)
+{
+ return size >> 1;
+}
+
static void memory_region_oldmmio_read_accessor(MemoryRegion *mr,
hwaddr addr,
uint64_t *value,
@@ -387,7 +392,7 @@ static void
memory_region_oldmmio_read_accessor(MemoryRegion *mr,
{
uint64_t tmp;
- tmp = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+ tmp = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
trace_memory_region_ops_read(mr, addr, tmp, size);
*value |= (tmp & mask) << shift;
}
@@ -417,7 +422,7 @@ static void
memory_region_oldmmio_write_accessor(MemoryRegion *mr,
tmp = (*value >> shift) & mask;
trace_memory_region_ops_write(mr, addr, tmp, size);
- mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, tmp);
+ mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, tmp);
}
static void memory_region_write_accessor(MemoryRegion *mr,
@@ -937,7 +942,7 @@ static uint64_t memory_region_dispatch_read1(MemoryRegion
*mr,
if (mr->ops->read) {
data = mr->ops->read(mr->opaque, addr, size);
} else {
- data = mr->ops->old_mmio.read[ctz32(size)](mr->opaque, addr);
+ data = mr->ops->old_mmio.read[ctz3(size)](mr->opaque, addr);
}
trace_memory_region_ops_read(mr, addr, data, size);
return data;
@@ -992,7 +997,7 @@ static void memory_region_dispatch_write1(MemoryRegion *mr,
if (mr->ops->write) {
mr->ops->write(mr->opaque, addr, data, size);
} else {
- mr->ops->old_mmio.write[ctz32(size)](mr->opaque, addr, data);
+ mr->ops->old_mmio.write[ctz3(size)](mr->opaque, addr, data);
}
return;
--
1.8.4.2
- [Qemu-devel] [PATCH 0/4] speedup memory dispatch,
Paolo Bonzini <=
- [Qemu-devel] [PATCH 1/4] memory: cache min/max_access_size, Paolo Bonzini, 2013/12/02
- [Qemu-devel] [PATCH 2/4] memory: streamline common case for memory dispatch, Paolo Bonzini, 2013/12/02
- [Qemu-devel] [PATCH 3/4] memory: hoist coalesced MMIO flush, Paolo Bonzini, 2013/12/02
- [Qemu-devel] [PATCH 4/4] memory: small tweaks, Paolo Bonzini, 2013/12/02
- Re: [Qemu-devel] [PATCH 0/4] speedup memory dispatch, Marcel Apfelbaum, 2013/12/10