qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC 02/10] use a different translation block list for


From: Frederic Konrad
Subject: Re: [Qemu-devel] [RFC 02/10] use a different translation block list for each cpu.
Date: Tue, 27 Jan 2015 16:16:24 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.5.0

On 27/01/2015 15:45, Alex Bennée wrote:
address@hidden writes:

From: KONRAD Frederic <address@hidden>

We need a different TranslationBlock list for each core in case of multithread
TCG.

Signed-off-by: KONRAD Frederic <address@hidden>
---
  translate-all.c | 40 ++++++++++++++++++++++------------------
  1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/translate-all.c b/translate-all.c
index 8fa4378..0e11c70 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -72,10 +72,11 @@
  #endif
#define SMC_BITMAP_USE_THRESHOLD 10
+#define MAX_CPUS 256
Where does this number come from?

  typedef struct PageDesc {
      /* list of TBs intersecting this ram page */
-    TranslationBlock *first_tb;
+    TranslationBlock *first_tb[MAX_CPUS];
Especially given the size of the PageDesc structure this adds a lot of
of bulk, mostly unused. Is the access to the TB list via PageDesc that
frequent to avoid an additional indirection?

      /* in order to optimize self modifying code, we count the number
         of lookups we do to a given page to use a bitmap */
      unsigned int code_write_count;
@@ -750,7 +751,7 @@ static inline void invalidate_page_bitmap(PageDesc *p)
  /* Set to NULL all the 'first_tb' fields in all PageDescs. */
  static void page_flush_tb_1(int level, void **lp)
  {
-    int i;
+    int i, j;
if (*lp == NULL) {
          return;
@@ -759,7 +760,9 @@ static void page_flush_tb_1(int level, void **lp)
          PageDesc *pd = *lp;
for (i = 0; i < V_L2_SIZE; ++i) {
-            pd[i].first_tb = NULL;
+            for (j = 0; j < MAX_CPUS; j++) {
+                pd[i].first_tb[j] = NULL;
+            }
              invalidate_page_bitmap(pd + i);
          }
      } else {
@@ -937,12 +940,12 @@ void tb_phys_invalidate(TranslationBlock *tb, 
tb_page_addr_t page_addr)
      /* remove the TB from the page list */
      if (tb->page_addr[0] != page_addr) {
          p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
+        tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb);
          invalidate_page_bitmap(p);
      }
      if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
          p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
-        tb_page_remove(&p->first_tb, tb);
+        tb_page_remove(&p->first_tb[current_cpu->cpu_index], tb);
          invalidate_page_bitmap(p);
      }
@@ -1012,7 +1015,7 @@ static void build_page_bitmap(PageDesc *p) p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8); - tb = p->first_tb;
+    tb = p->first_tb[current_cpu->cpu_index];
      while (tb != NULL) {
          n = (uintptr_t)tb & 3;
          tb = (TranslationBlock *)((uintptr_t)tb & ~3);
@@ -1138,7 +1141,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, 
tb_page_addr_t end,
      /* we remove all the TBs in the range [start, end[ */
      /* XXX: see if in some cases it could be faster to invalidate all
         the code */
-    tb = p->first_tb;
+    tb = p->first_tb[cpu->cpu_index];
      while (tb != NULL) {
          n = (uintptr_t)tb & 3;
          tb = (TranslationBlock *)((uintptr_t)tb & ~3);
@@ -1196,7 +1199,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, 
tb_page_addr_t end,
      }
  #if !defined(CONFIG_USER_ONLY)
      /* if no code remaining, no need to continue to use slow writes */
-    if (!p->first_tb) {
+    if (!p->first_tb[cpu->cpu_index]) {
          invalidate_page_bitmap(p);
          if (is_cpu_write_access) {
              tlb_unprotect_code_phys(cpu, start, cpu->mem_io_vaddr);
@@ -1224,10 +1227,10 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, 
int len)
  #if 0
      if (1) {
          qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
-                  cpu_single_env->mem_io_vaddr, len,
-                  cpu_single_env->eip,
-                  cpu_single_env->eip +
-                  (intptr_t)cpu_single_env->segs[R_CS].base);
+                  current_cpu->mem_io_vaddr, len,
+                  current_cpu->eip,
+                  current_cpu->eip +
+                  (intptr_t)current_cpu->segs[R_CS].base);
      }
  #endif
      p = page_find(start >> TARGET_PAGE_BITS);
@@ -1269,7 +1272,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
      if (!p) {
          return;
      }
-    tb = p->first_tb;
+    tb = p->first_tb[current_cpu->cpu_index];
  #ifdef TARGET_HAS_PRECISE_SMC
      if (tb && pc != 0) {
          current_tb = tb_find_pc(pc);
@@ -1299,7 +1302,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
          tb_phys_invalidate(tb, addr);
          tb = tb->page_next[n];
      }
-    p->first_tb = NULL;
+    p->first_tb[current_cpu->cpu_index] = NULL;
  #ifdef TARGET_HAS_PRECISE_SMC
      if (current_tb_modified) {
          /* we generate a block containing just the instruction
@@ -1327,11 +1330,12 @@ static inline void tb_alloc_page(TranslationBlock *tb,
tb->page_addr[n] = page_addr;
      p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
-    tb->page_next[n] = p->first_tb;
+    tb->page_next[n] = p->first_tb[current_cpu->cpu_index];
  #ifndef CONFIG_USER_ONLY
-    page_already_protected = p->first_tb != NULL;
+    page_already_protected = p->first_tb[current_cpu->cpu_index] != NULL;
  #endif
-    p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
+    p->first_tb[current_cpu->cpu_index]
+      = (TranslationBlock *)((uintptr_t)tb | n);
      invalidate_page_bitmap(p);
#if defined(TARGET_HAS_SMC) || 1
@@ -1821,7 +1825,7 @@ void page_set_flags(target_ulong start, target_ulong end, 
int flags)
             the code inside.  */
          if (!(p->flags & PAGE_WRITE) &&
              (flags & PAGE_WRITE) &&
-            p->first_tb) {
+            p->first_tb[current_cpu->cpu_index]) {
              tb_invalidate_phys_page(addr, 0, NULL, false);
          }
          p->flags = flags;
As the TranslationBlock itself has a linked list for page related
blocks:

   struct TranslationBlock *page_next[2];

could we not just come up with a structure that chains them together
here?

Hi Alex,

Thanks for looking at this.

We don't know how many time this first_tb is accessed right now..
You suggest to chains tb instead of using an array for this?
This make sense but I think it means we will have to protect this by a mutex as
well?

Thanks,
Fred



reply via email to

[Prev in Thread] Current Thread [Next in Thread]