2017-09-12 10:16:48

by Du, Changbin

[permalink] [raw]
Subject: Does perf-annotate work correctly?


When a annotate a symbol, I find the annotated C source code doesn't match assembly code.
So I cannot determine which line of C code has much overhead withou gdb's help.

Here is a example result of function apic_has_interrupt_for_ppr() in kvm module.

│580 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); ▒
│581 } ▒
│ ▒
│583 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) ▒
│584 { ▒
0.88 │30: cmpb $0x0,0x91(%rdi) ▒
2.54 │ ↓ je 63 ▒
0.20 │ mov 0xa0(%rdi),%rcx ▒
│581 int highest_irr; ▒
│582 if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) ▒
4.91 │ mov $0xe0,%eax x ▒
1.46 │45: mov %eax,%edx x ▒
0.02 │ sar $0x5,%edx x ▒
3.57 │ shl $0x4,%edx x ▒
3.34 │ movslq %edx,%rdx x ▒
1.25 │ mov 0x200(%rcx,%rdx,1),%edx x ▒
42.44 │ test %edx,%edx x ▒
0.01 │ ┌──jne 88 x ▒
3.48 │ │ sub $0x20,%eax x ▒
2.24 │ │ cmp $0xffffffe0,%eax x ▒
│586│apic_find_highest_irr(): ▒
│ │ ▒
│407│ /* ▒
│408│ * Note that irr_pending is just a hint. It will be always ▒
│409│ * true with virtual interrupt delivery enabled. ▒
│410│ */ ▒
│411│ if (!apic->irr_pending) ▒
│ │↑ jne 45 ▒
0.62 │63:│ mov $0xffffffff,%eax ◆
0.83 │ │ leaveq ▒
13.52 │ │← retq ▒
│6a:│ mov %esi,-0x4(%rbp) ▒
│ │ mov %rdx,%rdi ▒
│418│find_highest_vector(): ▒
│340│static int find_highest_vector(void *bitmap) ▒
│341│{ ▒
│342│ int vec; ▒
│343│ u32 *reg; ▒
│ │ ▒
│345│ for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; ▒
│ │→ callq *%rax ▒
│ │ mov -0x4(%rbp),%esi ▒
│343│ vec >= 0; vec -= APIC_VECTORS_PER_REG) { ▒
│344│ reg = bitmap + REG_POS(vec); ▒
│345│ if (*reg) ▒
0.05 │75:│ cmp $0xffffffff,%eax ▒
│ │↑ je 63 ▒
1.95 │ │ mov %eax,%edx ▒
1.45 │ │ and $0xf0,%edx


Look at the assembly code block where I have put a 'x' on the right. Apparently the
assembly code doesn't match the C source code arrounded. Let's look the correct disassemble
result from gdb:

340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
0x000000000003b4e0 <+64>: mov $0xe0,%eax

342 reg = bitmap + REG_POS(vec);
343 if (*reg)
0x000000000003b4e5 <+69>: mov %eax,%edx
0x000000000003b4e7 <+71>: sar $0x5,%edx
0x000000000003b4ea <+74>: shl $0x4,%edx
0x000000000003b4ed <+77>: movslq %edx,%rdx
0x000000000003b4f0 <+80>: mov 0x200(%rcx,%rdx,1),%edx
0x000000000003b4f7 <+87>: test %edx,%edx
0x000000000003b4f9 <+89>: jne 0x3b528 <apic_has_interrupt_for_ppr+136>

341 vec >= 0; vec -= APIC_VECTORS_PER_REG) {
0x000000000003b4fb <+91>: sub $0x20,%eax

340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
0x000000000003b4fe <+94>: cmp $0xffffffe0,%eax
0x000000000003b501 <+97>: jne 0x3b4e5 <apic_has_interrupt_for_ppr+69>


Compared to gdb, perf-annoate has messed up. is it a bug or just perf is not as perfect as gdb?

--
Thanks,
Changbin Du


Attachments:
(No filename) (5.98 kB)
signature.asc (473.00 B)
Download all attachments

2017-09-12 14:33:54

by Arnaldo Carvalho de Melo

[permalink] [raw]
Subject: Re: Does perf-annotate work correctly?

Em Tue, Sep 12, 2017 at 06:10:35PM +0800, Du, Changbin escreveu:
> When a annotate a symbol, I find the annotated C source code doesn't match assembly code.
> So I cannot determine which line of C code has much overhead withou gdb's help.
>
> Here is a example result of function apic_has_interrupt_for_ppr() in kvm module.

Ok, was this using the module .ko file or /proc/kcore? You forgot to
cut'n'paste the first line on the screen.

Also, how did you use gdb?

perf uses objdump to do the disassembly, and depending on how it is used
(live system, post processing on a different machine, permissions) it
may use different files to do the disassembly.

Please provide more detailed information on the exact command line
arguments and usage scenario.

- Arnaldo

> │580 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); ▒
> │581 } ▒
> │ ▒
> │583 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) ▒
> │584 { ▒
> 0.88 │30: cmpb $0x0,0x91(%rdi) ▒
> 2.54 │ ↓ je 63 ▒
> 0.20 │ mov 0xa0(%rdi),%rcx ▒
> │581 int highest_irr; ▒
> │582 if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) ▒
> 4.91 │ mov $0xe0,%eax x ▒
> 1.46 │45: mov %eax,%edx x ▒
> 0.02 │ sar $0x5,%edx x ▒
> 3.57 │ shl $0x4,%edx x ▒
> 3.34 │ movslq %edx,%rdx x ▒
> 1.25 │ mov 0x200(%rcx,%rdx,1),%edx x ▒
> 42.44 │ test %edx,%edx x ▒
> 0.01 │ ┌──jne 88 x ▒
> 3.48 │ │ sub $0x20,%eax x ▒
> 2.24 │ │ cmp $0xffffffe0,%eax x ▒
> │586│apic_find_highest_irr(): ▒
> │ │ ▒
> │407│ /* ▒
> │408│ * Note that irr_pending is just a hint. It will be always ▒
> │409│ * true with virtual interrupt delivery enabled. ▒
> │410│ */ ▒
> │411│ if (!apic->irr_pending) ▒
> │ │↑ jne 45 ▒
> 0.62 │63:│ mov $0xffffffff,%eax ◆
> 0.83 │ │ leaveq ▒
> 13.52 │ │← retq ▒
> │6a:│ mov %esi,-0x4(%rbp) ▒
> │ │ mov %rdx,%rdi ▒
> │418│find_highest_vector(): ▒
> │340│static int find_highest_vector(void *bitmap) ▒
> │341│{ ▒
> │342│ int vec; ▒
> │343│ u32 *reg; ▒
> │ │ ▒
> │345│ for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; ▒
> │ │→ callq *%rax ▒
> │ │ mov -0x4(%rbp),%esi ▒
> │343│ vec >= 0; vec -= APIC_VECTORS_PER_REG) { ▒
> │344│ reg = bitmap + REG_POS(vec); ▒
> │345│ if (*reg) ▒
> 0.05 │75:│ cmp $0xffffffff,%eax ▒
> │ │↑ je 63 ▒
> 1.95 │ │ mov %eax,%edx ▒
> 1.45 │ │ and $0xf0,%edx
>
>
> Look at the assembly code block where I have put a 'x' on the right. Apparently the
> assembly code doesn't match the C source code arrounded. Let's look the correct disassemble
> result from gdb:
>
> 340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
> 0x000000000003b4e0 <+64>: mov $0xe0,%eax
>
> 342 reg = bitmap + REG_POS(vec);
> 343 if (*reg)
> 0x000000000003b4e5 <+69>: mov %eax,%edx
> 0x000000000003b4e7 <+71>: sar $0x5,%edx
> 0x000000000003b4ea <+74>: shl $0x4,%edx
> 0x000000000003b4ed <+77>: movslq %edx,%rdx
> 0x000000000003b4f0 <+80>: mov 0x200(%rcx,%rdx,1),%edx
> 0x000000000003b4f7 <+87>: test %edx,%edx
> 0x000000000003b4f9 <+89>: jne 0x3b528 <apic_has_interrupt_for_ppr+136>
>
> 341 vec >= 0; vec -= APIC_VECTORS_PER_REG) {
> 0x000000000003b4fb <+91>: sub $0x20,%eax
>
> 340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
> 0x000000000003b4fe <+94>: cmp $0xffffffe0,%eax
> 0x000000000003b501 <+97>: jne 0x3b4e5 <apic_has_interrupt_for_ppr+69>
>
>
> Compared to gdb, perf-annoate has messed up. is it a bug or just perf is not as perfect as gdb?
>
> --
> Thanks,
> Changbin Du


2017-09-13 02:00:39

by Du, Changbin

[permalink] [raw]
Subject: Re: Does perf-annotate work correctly?

On Tue, Sep 12, 2017 at 11:33:50AM -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Sep 12, 2017 at 06:10:35PM +0800, Du, Changbin escreveu:
> > When a annotate a symbol, I find the annotated C source code doesn't match assembly code.
> > So I cannot determine which line of C code has much overhead withou gdb's help.
> >
> > Here is a example result of function apic_has_interrupt_for_ppr() in kvm module.
>
> Ok, was this using the module .ko file or /proc/kcore? You forgot to
> cut'n'paste the first line on the screen.
>
It is arch/x86/kvm/kvm.ko.

> Also, how did you use gdb?
>
$ gdb arch/x86/kvm/kvm.ko
$ (gdb) disassemble /s apic_has_interrupt_for_ppr

> perf uses objdump to do the disassembly, and depending on how it is used
> (live system, post processing on a different machine, permissions) it
> may use different files to do the disassembly.
>
But objdump has same out as gdb. (Always on same machine, and no binary changed.)

$ objdump -d -S arch/x86/kvm/kvm.o
...
static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
{
3b4a0: e8 00 00 00 00 callq 3b4a5 <apic_has_interrupt_for_ppr+0x5>
3b4a5: 55 push %rbp
3b4a6: 48 89 e5 mov %rsp,%rbp
3b4a9: 48 83 ec 08 sub $0x8,%rsp
int highest_irr;
if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
3b4ad: 48 8b 05 00 00 00 00 mov 0x0(%rip),%rax # 3b4b4 <apic_has_interrupt_for_ppr+0x14>
3b4b4: 48 8b 80 38 02 00 00 mov 0x238(%rax),%rax
3b4bb: 48 85 c0 test %rax,%rax
3b4be: 74 10 je 3b4d0 <apic_has_interrupt_for_ppr+0x30>
3b4c0: 48 8b 97 88 00 00 00 mov 0x88(%rdi),%rdx
3b4c7: 80 ba 28 03 00 00 00 cmpb $0x0,0x328(%rdx)
3b4ce: 75 3a jne 3b50a <apic_has_interrupt_for_ppr+0x6a>

/*
* Note that irr_pending is just a hint. It will be always
* true with virtual interrupt delivery enabled.
*/
if (!apic->irr_pending)
3b4d0: 80 bf 91 00 00 00 00 cmpb $0x0,0x91(%rdi)
3b4d7: 74 2a je 3b503 <apic_has_interrupt_for_ppr+0x63>
3b4d9: 48 8b 8f a0 00 00 00 mov 0xa0(%rdi),%rcx
static int find_highest_vector(void *bitmap)
{
int vec;
u32 *reg;

for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
3b4e0: b8 e0 00 00 00 mov $0xe0,%eax
vec >= 0; vec -= APIC_VECTORS_PER_REG) {
reg = bitmap + REG_POS(vec);
if (*reg)
3b4e5: 89 c2 mov %eax,%edx
3b4e7: c1 fa 05 sar $0x5,%edx
3b4ea: c1 e2 04 shl $0x4,%edx
3b4ed: 48 63 d2 movslq %edx,%rdx
3b4f0: 8b 94 11 00 02 00 00 mov 0x200(%rcx,%rdx,1),%edx
3b4f7: 85 d2 test %edx,%edx
3b4f9: 75 2d jne 3b528 <apic_has_interrupt_for_ppr+0x88>



> Please provide more detailed information on the exact command line
> arguments and usage scenario.
>
> - Arnaldo

>
> > │580 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); ▒
> > │581 } ▒
> > │ ▒
> > │583 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) ▒
> > │584 { ▒
> > 0.88 │30: cmpb $0x0,0x91(%rdi) ▒
> > 2.54 │ ↓ je 63 ▒
> > 0.20 │ mov 0xa0(%rdi),%rcx ▒
> > │581 int highest_irr; ▒
> > │582 if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) ▒
> > 4.91 │ mov $0xe0,%eax x ▒
> > 1.46 │45: mov %eax,%edx x ▒
> > 0.02 │ sar $0x5,%edx x ▒
> > 3.57 │ shl $0x4,%edx x ▒
> > 3.34 │ movslq %edx,%rdx x ▒
> > 1.25 │ mov 0x200(%rcx,%rdx,1),%edx x ▒
> > 42.44 │ test %edx,%edx x ▒
> > 0.01 │ ┌──jne 88 x ▒
> > 3.48 │ │ sub $0x20,%eax x ▒
> > 2.24 │ │ cmp $0xffffffe0,%eax x ▒
> > │586│apic_find_highest_irr(): ▒
> > │ │ ▒
> > │407│ /* ▒
> > │408│ * Note that irr_pending is just a hint. It will be always ▒
> > │409│ * true with virtual interrupt delivery enabled. ▒
> > │410│ */ ▒
> > │411│ if (!apic->irr_pending) ▒
> > │ │↑ jne 45 ▒
> > 0.62 │63:│ mov $0xffffffff,%eax ◆
> > 0.83 │ │ leaveq ▒
> > 13.52 │ │← retq ▒
> > │6a:│ mov %esi,-0x4(%rbp) ▒
> > │ │ mov %rdx,%rdi ▒
> > │418│find_highest_vector(): ▒
> > │340│static int find_highest_vector(void *bitmap) ▒
> > │341│{ ▒
> > │342│ int vec; ▒
> > │343│ u32 *reg; ▒
> > │ │ ▒
> > │345│ for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; ▒
> > │ │→ callq *%rax ▒
> > │ │ mov -0x4(%rbp),%esi ▒
> > │343│ vec >= 0; vec -= APIC_VECTORS_PER_REG) { ▒
> > │344│ reg = bitmap + REG_POS(vec); ▒
> > │345│ if (*reg) ▒
> > 0.05 │75:│ cmp $0xffffffff,%eax ▒
> > │ │↑ je 63 ▒
> > 1.95 │ │ mov %eax,%edx ▒
> > 1.45 │ │ and $0xf0,%edx
> >
> >
> > Look at the assembly code block where I have put a 'x' on the right. Apparently the
> > assembly code doesn't match the C source code arrounded. Let's look the correct disassemble
> > result from gdb:
> >
> > 340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
> > 0x000000000003b4e0 <+64>: mov $0xe0,%eax
> >
> > 342 reg = bitmap + REG_POS(vec);
> > 343 if (*reg)
> > 0x000000000003b4e5 <+69>: mov %eax,%edx
> > 0x000000000003b4e7 <+71>: sar $0x5,%edx
> > 0x000000000003b4ea <+74>: shl $0x4,%edx
> > 0x000000000003b4ed <+77>: movslq %edx,%rdx
> > 0x000000000003b4f0 <+80>: mov 0x200(%rcx,%rdx,1),%edx
> > 0x000000000003b4f7 <+87>: test %edx,%edx
> > 0x000000000003b4f9 <+89>: jne 0x3b528 <apic_has_interrupt_for_ppr+136>
> >
> > 341 vec >= 0; vec -= APIC_VECTORS_PER_REG) {
> > 0x000000000003b4fb <+91>: sub $0x20,%eax
> >
> > 340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
> > 0x000000000003b4fe <+94>: cmp $0xffffffe0,%eax
> > 0x000000000003b501 <+97>: jne 0x3b4e5 <apic_has_interrupt_for_ppr+69>
> >
> >
> > Compared to gdb, perf-annoate has messed up. is it a bug or just perf is not as perfect as gdb?
> >
> > --
> > Thanks,
> > Changbin Du
>
>

--
Thanks,
Changbin Du


Attachments:
(No filename) (8.96 kB)
signature.asc (473.00 B)
Download all attachments

2017-09-13 09:20:23

by Du, Changbin

[permalink] [raw]
Subject: Re: Does perf-annotate work correctly?


Here is another example which may be easy to analyse. I beleve that something wrong
in parsing objdump output.

The 'test %esi,%esi' is not of 'if (unlikely(error))', but 'if (!idtv_info_valid)'.


perf annotate of function vmx_complete_interrupts():
vmx_complete_interrupts /lib/modules/4.13.0+/kernel/arch/x86/kvm/kvm-intel.ko
Percent│ mov 0x4590(%rdi),%esi ▒
│ vmx_cancel_injection(): ▒
│ vmcs_read32(VM_ENTRY_INTR_INFO_FIELD), ▒
│ VM_ENTRY_INSTRUCTION_LEN, ▒
│ VM_ENTRY_EXCEPTION_ERROR_CODE); ▒
│ ▒
│ vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); ▒
│ } ▒
│ movb $0x0,0x32c8(%rdi) ▒
│ __vmx_complete_interrupts(): ▒
│ movb $0x0,0x1a7c(%rdi) ▒
│ movb $0x0,0x1a88(%rdi) ▒
│ __vmcs_writel(): ▒
│ u8 error; ▒
│ ▒
│ asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0" ▒
│ : "=q"(error) : "a"(value), "d"(field) : "cc"); ▒
│ if (unlikely(error)) ▒
│ vmwrite_error(field, value); ▒
0.02 │ test %esi,%esi ▒
│ ↓ js 25 ▒
99.98 │ ← retq ▒
│25: push %rbp ▒
│ mov $0x440a,%ecx ▒
│ mov $0x440c,%edx ▒
│ vmx_complete_interrupts(): ◆
│ break; ▒
│ } ▒
│ } ▒
│ ▒
│ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) ▒
│ { ▒
│ mov %rsp,%rbp ▒
│ → callq __vmx_complete_interrupts.part.64 ▒
│ __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info, ▒
│ pop %rbp ▒
│ ← retq ▒




GDB disassable:
(gdb) disassemble /s vmx_complete_interrupts
Dump of assembler code for function vmx_complete_interrupts:
arch/x86/kvm/vmx.c:
8917 {
0x0000000000007110 <+0>: callq 0x7115 <vmx_complete_interrupts+5>

8918 __vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
0x0000000000007115 <+5>: mov 0x4590(%rdi),%esi

8873 vcpu->arch.nmi_injected = false;
0x000000000000711b <+11>: movb $0x0,0x32c8(%rdi)

arch/x86/kvm/x86.h:
14 vcpu->arch.exception.pending = false;
0x0000000000007122 <+18>: movb $0x0,0x1a7c(%rdi)

15 }
16
17 static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
18 bool soft)
19 {
20 vcpu->arch.interrupt.pending = true;
21 vcpu->arch.interrupt.soft = soft;
22 vcpu->arch.interrupt.nr = vector;
23 }
24
25 static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
26 {
27 vcpu->arch.interrupt.pending = false;
0x0000000000007129 <+25>: movb $0x0,0x1a88(%rdi)

arch/x86/kvm/vmx.c:
8877 if (!idtv_info_valid)
0x0000000000007130 <+32>: test %esi,%esi
0x0000000000007132 <+34>: js 0x7135 <vmx_complete_interrupts+37>

8919 VM_EXIT_INSTRUCTION_LEN,
8920 IDT_VECTORING_ERROR_CODE);
8921 }
0x0000000000007134 <+36>: retq

8917 {
0x0000000000007135 <+37>: push %rbp
0x0000000000007136 <+38>: mov $0x440a,%ecx
0x000000000000713b <+43>: mov $0x440c,%edx
---Type <return> to continue, or q <return> to quit---
0x0000000000007140 <+48>: mov %rsp,%rbp
0x0000000000007143 <+51>: callq 0x7000 <__vmx_complete_interrupts>

8919 VM_EXIT_INSTRUCTION_LEN,
8920 IDT_VECTORING_ERROR_CODE);
8921 }


Attachments:
(No filename) (5.96 kB)
signature.asc (473.00 B)
Download all attachments

2017-09-26 06:13:00

by Du, Changbin

[permalink] [raw]
Subject: Re: Does perf-annotate work correctly?


Hi Arnaldo and all,
Is there someone looking at this issue? I can provide more info if you need.
This issue makes it so inconvenient to use perf. Thanks.

On Wed, Sep 13, 2017 at 09:54:25AM +0800, Du, Changbin wrote:
> On Tue, Sep 12, 2017 at 11:33:50AM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Tue, Sep 12, 2017 at 06:10:35PM +0800, Du, Changbin escreveu:
> > > When a annotate a symbol, I find the annotated C source code doesn't match assembly code.
> > > So I cannot determine which line of C code has much overhead withou gdb's help.
> > >
> > > Here is a example result of function apic_has_interrupt_for_ppr() in kvm module.
> >
> > Ok, was this using the module .ko file or /proc/kcore? You forgot to
> > cut'n'paste the first line on the screen.
> >
> It is arch/x86/kvm/kvm.ko.
>
> > Also, how did you use gdb?
> >
> $ gdb arch/x86/kvm/kvm.ko
> $ (gdb) disassemble /s apic_has_interrupt_for_ppr
>
> > perf uses objdump to do the disassembly, and depending on how it is used
> > (live system, post processing on a different machine, permissions) it
> > may use different files to do the disassembly.
> >
> But objdump has same out as gdb. (Always on same machine, and no binary changed.)
>
> $ objdump -d -S arch/x86/kvm/kvm.o
> ...
> static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
> {
> 3b4a0: e8 00 00 00 00 callq 3b4a5 <apic_has_interrupt_for_ppr+0x5>
> 3b4a5: 55 push %rbp
> 3b4a6: 48 89 e5 mov %rsp,%rbp
> 3b4a9: 48 83 ec 08 sub $0x8,%rsp
> int highest_irr;
> if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active)
> 3b4ad: 48 8b 05 00 00 00 00 mov 0x0(%rip),%rax # 3b4b4 <apic_has_interrupt_for_ppr+0x14>
> 3b4b4: 48 8b 80 38 02 00 00 mov 0x238(%rax),%rax
> 3b4bb: 48 85 c0 test %rax,%rax
> 3b4be: 74 10 je 3b4d0 <apic_has_interrupt_for_ppr+0x30>
> 3b4c0: 48 8b 97 88 00 00 00 mov 0x88(%rdi),%rdx
> 3b4c7: 80 ba 28 03 00 00 00 cmpb $0x0,0x328(%rdx)
> 3b4ce: 75 3a jne 3b50a <apic_has_interrupt_for_ppr+0x6a>
>
> /*
> * Note that irr_pending is just a hint. It will be always
> * true with virtual interrupt delivery enabled.
> */
> if (!apic->irr_pending)
> 3b4d0: 80 bf 91 00 00 00 00 cmpb $0x0,0x91(%rdi)
> 3b4d7: 74 2a je 3b503 <apic_has_interrupt_for_ppr+0x63>
> 3b4d9: 48 8b 8f a0 00 00 00 mov 0xa0(%rdi),%rcx
> static int find_highest_vector(void *bitmap)
> {
> int vec;
> u32 *reg;
>
> for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
> 3b4e0: b8 e0 00 00 00 mov $0xe0,%eax
> vec >= 0; vec -= APIC_VECTORS_PER_REG) {
> reg = bitmap + REG_POS(vec);
> if (*reg)
> 3b4e5: 89 c2 mov %eax,%edx
> 3b4e7: c1 fa 05 sar $0x5,%edx
> 3b4ea: c1 e2 04 shl $0x4,%edx
> 3b4ed: 48 63 d2 movslq %edx,%rdx
> 3b4f0: 8b 94 11 00 02 00 00 mov 0x200(%rcx,%rdx,1),%edx
> 3b4f7: 85 d2 test %edx,%edx
> 3b4f9: 75 2d jne 3b528 <apic_has_interrupt_for_ppr+0x88>
>
>
>
> > Please provide more detailed information on the exact command line
> > arguments and usage scenario.
> >
> > - Arnaldo
>
> >
> > > │580 __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); ▒
> > > │581 } ▒
> > > │ ▒
> > > │583 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) ▒
> > > │584 { ▒
> > > 0.88 │30: cmpb $0x0,0x91(%rdi) ▒
> > > 2.54 │ ↓ je 63 ▒
> > > 0.20 │ mov 0xa0(%rdi),%rcx ▒
> > > │581 int highest_irr; ▒
> > > │582 if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) ▒
> > > 4.91 │ mov $0xe0,%eax x ▒
> > > 1.46 │45: mov %eax,%edx x ▒
> > > 0.02 │ sar $0x5,%edx x ▒
> > > 3.57 │ shl $0x4,%edx x ▒
> > > 3.34 │ movslq %edx,%rdx x ▒
> > > 1.25 │ mov 0x200(%rcx,%rdx,1),%edx x ▒
> > > 42.44 │ test %edx,%edx x ▒
> > > 0.01 │ ┌──jne 88 x ▒
> > > 3.48 │ │ sub $0x20,%eax x ▒
> > > 2.24 │ │ cmp $0xffffffe0,%eax x ▒
> > > │586│apic_find_highest_irr(): ▒
> > > │ │ ▒
> > > │407│ /* ▒
> > > │408│ * Note that irr_pending is just a hint. It will be always ▒
> > > │409│ * true with virtual interrupt delivery enabled. ▒
> > > │410│ */ ▒
> > > │411│ if (!apic->irr_pending) ▒
> > > │ │↑ jne 45 ▒
> > > 0.62 │63:│ mov $0xffffffff,%eax ◆
> > > 0.83 │ │ leaveq ▒
> > > 13.52 │ │← retq ▒
> > > │6a:│ mov %esi,-0x4(%rbp) ▒
> > > │ │ mov %rdx,%rdi ▒
> > > │418│find_highest_vector(): ▒
> > > │340│static int find_highest_vector(void *bitmap) ▒
> > > │341│{ ▒
> > > │342│ int vec; ▒
> > > │343│ u32 *reg; ▒
> > > │ │ ▒
> > > │345│ for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG; ▒
> > > │ │→ callq *%rax ▒
> > > │ │ mov -0x4(%rbp),%esi ▒
> > > │343│ vec >= 0; vec -= APIC_VECTORS_PER_REG) { ▒
> > > │344│ reg = bitmap + REG_POS(vec); ▒
> > > │345│ if (*reg) ▒
> > > 0.05 │75:│ cmp $0xffffffff,%eax ▒
> > > │ │↑ je 63 ▒
> > > 1.95 │ │ mov %eax,%edx ▒
> > > 1.45 │ │ and $0xf0,%edx
> > >
> > >
> > > Look at the assembly code block where I have put a 'x' on the right. Apparently the
> > > assembly code doesn't match the C source code arrounded. Let's look the correct disassemble
> > > result from gdb:
> > >
> > > 340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
> > > 0x000000000003b4e0 <+64>: mov $0xe0,%eax
> > >
> > > 342 reg = bitmap + REG_POS(vec);
> > > 343 if (*reg)
> > > 0x000000000003b4e5 <+69>: mov %eax,%edx
> > > 0x000000000003b4e7 <+71>: sar $0x5,%edx
> > > 0x000000000003b4ea <+74>: shl $0x4,%edx
> > > 0x000000000003b4ed <+77>: movslq %edx,%rdx
> > > 0x000000000003b4f0 <+80>: mov 0x200(%rcx,%rdx,1),%edx
> > > 0x000000000003b4f7 <+87>: test %edx,%edx
> > > 0x000000000003b4f9 <+89>: jne 0x3b528 <apic_has_interrupt_for_ppr+136>
> > >
> > > 341 vec >= 0; vec -= APIC_VECTORS_PER_REG) {
> > > 0x000000000003b4fb <+91>: sub $0x20,%eax
> > >
> > > 340 for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
> > > 0x000000000003b4fe <+94>: cmp $0xffffffe0,%eax
> > > 0x000000000003b501 <+97>: jne 0x3b4e5 <apic_has_interrupt_for_ppr+69>
> > >
> > >
> > > Compared to gdb, perf-annoate has messed up. is it a bug or just perf is not as perfect as gdb?
> > >
> > > --
> > > Thanks,
> > > Changbin Du
> >
> >
>
> --
> Thanks,
> Changbin Du



--
Thanks,
Changbin Du


Attachments:
(No filename) (9.51 kB)
signature.asc (473.00 B)
Download all attachments