[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 0/6] pc: bring ACPI table size below to 2.0 leve
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH 0/6] pc: bring ACPI table size below to 2.0 levels, try fixing -initrd for good |
Date: |
Fri, 19 Sep 2014 15:09:12 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.0 |
Il 19/09/2014 09:36, Gerd Hoffmann ha scritto:
> Hi,
>
>> However, there is another problem. As the ACPI tables grow, we need
>> to move the address at which linuxboot.bin loads the initrd. This
>> address is placed close to the end of memory, but it is QEMU that
>> tells linuxboot.bin where exactly the initrd is to be loaded. And
>> QEMU cannot really know how much high memory SeaBIOS will use, because
>> QEMU does not know the final e820 memory map.
>>
>> The solution would be to let linuxboot.bin parse the memory map and
>> ignore the suggested initrd base address, but that's tedious. In the
>> meanwhile, we can just assume that most of the need comes from the ACPI
>> tables (which is in fact true: patch 3 adds a fixed 32k extra just in
>> case) and dynamically resize the padding.
>
> Hmm. That assumes we are running seabios, where we know how much memory
> we actually need.
>
> IMHO we should either really parse the memory map, or reserve more
> space.
>
> IIRC it doesn't matter that much where we load the initrd. It should
> not be just after the kernel, because the kernel needs some space to
> unpack itself and for early allocations such as initial page tables.
> This is where the common practice to load the initrd high comes from.
> But whenever we leave 128k or 16m between initrd and top-of-memory
> doesn't make much of a difference.
Ok, I wrote the e820 scanning code, and it works with KVM but it hits
a TCG bug. The rep/movsb in SeaBIOS's e820 routine just doesn't write to
es:di. The TCG ops seem sane:
set_label $0x1
ext16u_i64 tmp2,rsi
ld_i64 tmp3,env,$0x108 // load ds base
add_i64 tmp2,tmp2,tmp3
ext32u_i64 tmp2,tmp2
qemu_ld_i64 tmp0,tmp2,ub,$0x2 // load into tmp0
ext16u_i64 tmp2,rdi
ld_i64 tmp3,env,$0xc0 // load es base
add_i64 tmp2,tmp2,tmp3
ext32u_i64 tmp2,tmp2
qemu_st_i64 tmp0,tmp2,ub,$0x2 // store from tmp0
ld32s_i64 tmp0,env,$0xac // increase rsi/rdi
add_i64 tmp3,rsi,tmp0
deposit_i64 rsi,rsi,tmp3,$0x0,$0x10
add_i64 tmp3,rdi,tmp0
deposit_i64 rdi,rdi,tmp3,$0x0,$0x10
movi_i64 tmp13,$0xffffffffffffffff // decrement rcx
add_i64 tmp3,rcx,tmp13
deposit_i64 rcx,rcx,tmp3,$0x0,$0x10
goto_tb $0x0
movi_i64 tmp3,$0xf7b4
st_i64 tmp3,env,$0x80
exit_tb $0x7fe8a2c167a0
set_label $0x0
exit_tb $0x7fe8a2c167a3
For now I'm giving up, here is the patch just in case. It also fails with
2.1.1.
There is some debugging output that goes to the serial port. With KVM
it prints 1/2/2/1/2/2, while with TCG it prints 0/0/0/0/0 (it should
print 1/2/2/1/2 instead).
diff --git a/pc-bios/optionrom/linuxboot.S b/pc-bios/optionrom/linuxboot.S
index 748c831..e6f1be1 100644
--- a/pc-bios/optionrom/linuxboot.S
+++ b/pc-bios/optionrom/linuxboot.S
@@ -76,6 +76,96 @@ boot_kernel:
copy_kernel:
+ push %ds
+ pop %es
+
+ /* Compute initrd address */
+ mov $0xe801, %ax
+ xor %cx, %cx
+ xor %dx, %dx
+ int $0x15
+
+ /* Output could be in AX/BX or CX/DX */
+ or %cx, %cx
+ jnz 1f
+ or %dx, %dx
+ jnz 1f
+ mov %ax, %cx
+ mov %bx, %dx
+1:
+
+ or %dx, %dx
+ jnz 2f
+ addw $1024, %cx /* add 1 MB */
+ movzwl %cx, %ebp
+ shll $10, %ebp /* convert to bytes */
+ jmp mmap_loop_start
+
+2:
+ addw $16777216 >> 16, %dx /* add 16 MB */
+ movzwl %dx, %ebp
+ shll $16, %ebp /* convert to bytes */
+
+ /* EBP (end of memory) is a hint to the loop below, that computes the
+ final location using the e820 memory map. O(n^2) loop, but e820
+ is small anyway. */
+
+mmap_loop_start:
+ movl %ebp, %esi /* ESI = end of memory */
+
+ read_fw FW_CFG_INITRD_SIZE
+ subl %eax, %ebp /* EBP = start of initrd */
+ andl $-4096, %ebp
+
+ xor %ebx, %ebx
+
+ /* now move it further down according to the indications of the e820
+ memory map... */
+mmap_loop:
+ mov $0xe820, %ax
+ mov $0x534D4150, %edx
+ mov $24, %ecx
+ mov $e820, %edi
+ int $0x15
+ jc mmap_done /* if at end of list, we're done
*/
+ cmp $0x534D4150, %eax /* if BIOS broken, exit */
+ jnz mmap_done
+ or %ebx, %ebx /* another check for end of list
*/
+ jz mmap_done
+
+mov 16(%di), %al
+mov $0x3f8, %dx
+add $0x30, %al
+out %al, %dx
+mov $0xd, %al
+out %al, %dx
+mov $0xa, %al
+out %al, %dx
+
+ jcxz mmap_loop /* ignore empty entries */
+ cmpb $1, 16(%di) /* only process reserved regions
*/
+ je mmap_loop
+ cmpl $0, 4(%di) /* only process low memory */
+ jne mmap_loop
+ cmpl %esi, 0(%di)
+ jae mmap_loop
+
+ movl 8(%di), %ecx /* ECX = region size */
+ jecxz mmap_loop /* ignore empty regions */
+
+ /* Valid low memory region. Check if it overlaps EBP..ESI */
+
+ addl 0(%di), %ecx /* ECX = end of region */
+ cmp %ebp, %ecx /* not if end <= initrd_start */
+ jbe mmap_loop
+
+ /* Cannot put initrd here, try lowering the top of memory */
+
+ movl 0(%di), %ebp
+ jmp mmap_loop_start
+
+mmap_done:
+ mov %ebp, %edi /* EDI = start of initrd */
/* We need to load the kernel into memory we can't access in 16 bit
mode, so let's get into 32 bit mode, write the kernel and jump
@@ -108,10 +198,18 @@ copy_kernel:
/* We're now running in 16-bit CS, but 32-bit ES! */
/* Load kernel and initrd */
+ pushl %edi
+ read_fw_blob_addr32_edi(FW_CFG_INITRD)
read_fw_blob_addr32(FW_CFG_KERNEL)
- read_fw_blob_addr32(FW_CFG_INITRD)
read_fw_blob_addr32(FW_CFG_CMDLINE)
- read_fw_blob_addr32(FW_CFG_SETUP)
+
+ read_fw FW_CFG_SETUP_ADDR
+ mov %eax, %edi
+ mov %eax, %ebx
+ read_fw_blob_addr32_edi(FW_CFG_SETUP)
+
+ /* Update the header with the initrd address we chose above */
+ popl %es:0x218(%ebx)
/* And now jump into Linux! */
mov $0, %eax
@@ -136,4 +234,9 @@ gdt:
/* 0x10: data segment (base=0, limit=0xfffff, type=32bit data
read/write, DPL=0, 4k) */
.byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00
+e820:
+.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+.byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+
BOOT_ROM_END
diff --git a/pc-bios/optionrom/optionrom.h b/pc-bios/optionrom/optionrom.h
index ce43608..f1a9021 100644
--- a/pc-bios/optionrom/optionrom.h
+++ b/pc-bios/optionrom/optionrom.h
@@ -51,8 +51,6 @@
.endm
#define read_fw_blob_pre(var) \
- read_fw var ## _ADDR; \
- mov %eax, %edi; \
read_fw var ## _SIZE; \
mov %eax, %ecx; \
mov $var ## _DATA, %ax; \
@@ -68,6 +66,8 @@
* Clobbers: %eax, %edx, %es, %ecx, %edi
*/
#define read_fw_blob(var) \
+ read_fw var ## _ADDR; \
+ mov %eax, %edi; \
read_fw_blob_pre(var); \
/* old as(1) doesn't like this insn so emit the bytes instead: \
rep insb (%dx), %es:(%edi); \
@@ -80,7 +80,22 @@
*
* Clobbers: %eax, %edx, %es, %ecx, %edi
*/
-#define read_fw_blob_addr32(var) \
+#define read_fw_blob_addr32(var) \
+ read_fw var ## _ADDR; \
+ mov %eax, %edi; \
+ read_fw_blob_pre(var); \
+ /* old as(1) doesn't like this insn so emit the bytes instead: \
+ addr32 rep insb (%dx), %es:(%edi); \
+ */ \
+ .dc.b 0x67,0xf3,0x6c
+
+/*
+ * Read a blob from the fw_cfg device in forced addr32 mode, address is in
%edi.
+ * Requires _SIZE and _DATA values for the parameter.
+ *
+ * Clobbers: %eax, %edx, %edi, %es, %ecx
+ */
+#define read_fw_blob_addr32_edi(var) \
read_fw_blob_pre(var); \
/* old as(1) doesn't like this insn so emit the bytes instead: \
addr32 rep insb (%dx), %es:(%edi); \
- [Qemu-devel] [PATCH 0/6] pc: bring ACPI table size below to 2.0 levels, try fixing -initrd for good, Paolo Bonzini, 2014/09/18
- [Qemu-devel] [PATCH 1/6] pc: initialize fw_cfg earlier, Paolo Bonzini, 2014/09/18
- [Qemu-devel] [PATCH 2/6] pc: load the kernel after ACPI tables are built, Paolo Bonzini, 2014/09/18
- [Qemu-devel] [PATCH 3/6] pc: redo sizing of reserved high memory area for -kernel/-initrd, Paolo Bonzini, 2014/09/18
- [Qemu-devel] [PATCH 4/6] pc: introduce new ACPI table sizing algorithm, Paolo Bonzini, 2014/09/18
- [Qemu-devel] [PATCH 5/6] pc: go back to smaller ACPI tables, Paolo Bonzini, 2014/09/18
- [Qemu-devel] [PATCH 6/6] pc: clean up pre-2.1 compatibility code, Paolo Bonzini, 2014/09/18
- Re: [Qemu-devel] [PATCH 0/6] pc: bring ACPI table size below to 2.0 levels, try fixing -initrd for good, Gerd Hoffmann, 2014/09/19