Failure of an element of a Xen multicall is signalled via a WARN()
only unless the kernel is compiled with MC_DEBUG. It is impossible to
know which element failed and why it did so.
Change that by printing the related information even without MC_DEBUG,
even if maybe in some limited form (e.g. without information which
caller produced the failing element).
Move the printing out of the switch statement in order to have the
same information for a single call.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/xen/multicalls.c | 35 ++++++++++++++++++++---------------
1 file changed, 20 insertions(+), 15 deletions(-)
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 2bce7958ce8b..0766a08bdf45 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -69,6 +69,11 @@ void xen_mc_flush(void)
trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
+#if MC_DEBUG
+ memcpy(b->debug, b->entries,
+ b->mcidx * sizeof(struct multicall_entry));
+#endif
+
switch (b->mcidx) {
case 0:
/* no-op */
@@ -87,32 +92,34 @@ void xen_mc_flush(void)
break;
default:
-#if MC_DEBUG
- memcpy(b->debug, b->entries,
- b->mcidx * sizeof(struct multicall_entry));
-#endif
-
if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
BUG();
for (i = 0; i < b->mcidx; i++)
if (b->entries[i].result < 0)
ret++;
+ }
+ if (WARN_ON(ret)) {
+ pr_err("%d of %d multicall(s) failed: cpu %d\n",
+ ret, b->mcidx, smp_processor_id());
+ for (i = 0; i < b->mcidx; i++) {
+ if (b->entries[i].result < 0) {
#if MC_DEBUG
- if (ret) {
- printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
- ret, smp_processor_id());
- dump_stack();
- for (i = 0; i < b->mcidx; i++) {
- printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
- i+1, b->mcidx,
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n",
+ i + 1,
b->debug[i].op,
b->debug[i].args[0],
b->entries[i].result,
b->caller[i]);
+#else
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
+ i + 1,
+ b->entries[i].op,
+ b->entries[i].args[0],
+ b->entries[i].result);
+#endif
}
}
-#endif
}
b->mcidx = 0;
@@ -126,8 +133,6 @@ void xen_mc_flush(void)
b->cbidx = 0;
local_irq_restore(flags);
-
- WARN_ON(ret);
}
struct multicall_space __xen_mc_entry(size_t args)
--
2.16.4
On 11/23/18 11:24 AM, Juergen Gross wrote:
> Failure of an element of a Xen multicall is signalled via a WARN()
> only unless the kernel is compiled with MC_DEBUG. It is impossible to
s/unless/if
> know which element failed and why it did so.
>
> Change that by printing the related information even without MC_DEBUG,
> even if maybe in some limited form (e.g. without information which
> caller produced the failing element).
>
> Move the printing out of the switch statement in order to have the
> same information for a single call.
>
> Signed-off-by: Juergen Gross <[email protected]>
> ---
> arch/x86/xen/multicalls.c | 35 ++++++++++++++++++++---------------
> 1 file changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
> index 2bce7958ce8b..0766a08bdf45 100644
> --- a/arch/x86/xen/multicalls.c
> +++ b/arch/x86/xen/multicalls.c
> @@ -69,6 +69,11 @@ void xen_mc_flush(void)
>
> trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
>
> +#if MC_DEBUG
> + memcpy(b->debug, b->entries,
> + b->mcidx * sizeof(struct multicall_entry));
> +#endif
> +
> switch (b->mcidx) {
> case 0:
> /* no-op */
> @@ -87,32 +92,34 @@ void xen_mc_flush(void)
> break;
>
> default:
> -#if MC_DEBUG
> - memcpy(b->debug, b->entries,
> - b->mcidx * sizeof(struct multicall_entry));
> -#endif
> -
> if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
> BUG();
> for (i = 0; i < b->mcidx; i++)
> if (b->entries[i].result < 0)
> ret++;
> + }
>
> + if (WARN_ON(ret)) {
> + pr_err("%d of %d multicall(s) failed: cpu %d\n",
> + ret, b->mcidx, smp_processor_id());
> + for (i = 0; i < b->mcidx; i++) {
> + if (b->entries[i].result < 0) {
> #if MC_DEBUG
> - if (ret) {
> - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
> - ret, smp_processor_id());
> - dump_stack();
> - for (i = 0; i < b->mcidx; i++) {
> - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
> - i+1, b->mcidx,
> + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n",
> + i + 1,
> b->debug[i].op,
> b->debug[i].args[0],
> b->entries[i].result,
> b->caller[i]);
> +#else
> + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
> + i + 1,
> + b->entries[i].op,
> + b->entries[i].args[0],
> + b->entries[i].result);
> +#endif
Doesn't (non-debug) hypervisor corrupt op and args?
(Also, we don't really need to print anything when b->entries[i].result
== 0)
-boris
> }
> }
> -#endif
> }
>
> b->mcidx = 0;
> @@ -126,8 +133,6 @@ void xen_mc_flush(void)
> b->cbidx = 0;
>
> local_irq_restore(flags);
> -
> - WARN_ON(ret);
> }
>
> struct multicall_space __xen_mc_entry(size_t args)
On 26/11/2018 21:11, Boris Ostrovsky wrote:
> On 11/23/18 11:24 AM, Juergen Gross wrote:
>> Failure of an element of a Xen multicall is signalled via a WARN()
>> only unless the kernel is compiled with MC_DEBUG. It is impossible to
>
> s/unless/if
>
>
>> know which element failed and why it did so.
>>
>> Change that by printing the related information even without MC_DEBUG,
>> even if maybe in some limited form (e.g. without information which
>> caller produced the failing element).
>>
>> Move the printing out of the switch statement in order to have the
>> same information for a single call.
>>
>> Signed-off-by: Juergen Gross <[email protected]>
>> ---
>> arch/x86/xen/multicalls.c | 35 ++++++++++++++++++++---------------
>> 1 file changed, 20 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
>> index 2bce7958ce8b..0766a08bdf45 100644
>> --- a/arch/x86/xen/multicalls.c
>> +++ b/arch/x86/xen/multicalls.c
>> @@ -69,6 +69,11 @@ void xen_mc_flush(void)
>>
>> trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
>>
>> +#if MC_DEBUG
>> + memcpy(b->debug, b->entries,
>> + b->mcidx * sizeof(struct multicall_entry));
>> +#endif
>> +
>> switch (b->mcidx) {
>> case 0:
>> /* no-op */
>> @@ -87,32 +92,34 @@ void xen_mc_flush(void)
>> break;
>>
>> default:
>> -#if MC_DEBUG
>> - memcpy(b->debug, b->entries,
>> - b->mcidx * sizeof(struct multicall_entry));
>> -#endif
>> -
>> if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
>> BUG();
>> for (i = 0; i < b->mcidx; i++)
>> if (b->entries[i].result < 0)
>> ret++;
>> + }
>>
>> + if (WARN_ON(ret)) {
>> + pr_err("%d of %d multicall(s) failed: cpu %d\n",
>> + ret, b->mcidx, smp_processor_id());
>> + for (i = 0; i < b->mcidx; i++) {
>> + if (b->entries[i].result < 0) {
>> #if MC_DEBUG
>> - if (ret) {
>> - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
>> - ret, smp_processor_id());
>> - dump_stack();
>> - for (i = 0; i < b->mcidx; i++) {
>> - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
>> - i+1, b->mcidx,
>> + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n",
>> + i + 1,
>> b->debug[i].op,
>> b->debug[i].args[0],
>> b->entries[i].result,
>> b->caller[i]);
>> +#else
>> + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
>> + i + 1,
>> + b->entries[i].op,
>> + b->entries[i].args[0],
>> + b->entries[i].result);
>> +#endif
>
> Doesn't (non-debug) hypervisor corrupt op and args?
No. Only debug hypervisor does so.
See my patch (and rather lengthy discussion) on xen-devel:
https://lists.xen.org/archives/html/xen-devel/2018-11/msg02714.html
>
> (Also, we don't really need to print anything when b->entries[i].result
> == 0)
Right. Did you miss the:
+ if (b->entries[i].result < 0) {
above?
Juergen
On 11/27/18 2:46 AM, Juergen Gross wrote:
> On 26/11/2018 21:11, Boris Ostrovsky wrote:
>> On 11/23/18 11:24 AM, Juergen Gross wrote:
>>> Failure of an element of a Xen multicall is signalled via a WARN()
>>> only unless the kernel is compiled with MC_DEBUG. It is impossible to
>> s/unless/if
>>
>>
>>> know which element failed and why it did so.
>>>
>>> Change that by printing the related information even without MC_DEBUG,
>>> even if maybe in some limited form (e.g. without information which
>>> caller produced the failing element).
>>>
>>> Move the printing out of the switch statement in order to have the
>>> same information for a single call.
>>>
>>> Signed-off-by: Juergen Gross <[email protected]>
>>> ---
>>> arch/x86/xen/multicalls.c | 35 ++++++++++++++++++++---------------
>>> 1 file changed, 20 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
>>> index 2bce7958ce8b..0766a08bdf45 100644
>>> --- a/arch/x86/xen/multicalls.c
>>> +++ b/arch/x86/xen/multicalls.c
>>> @@ -69,6 +69,11 @@ void xen_mc_flush(void)
>>>
>>> trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
>>>
>>> +#if MC_DEBUG
>>> + memcpy(b->debug, b->entries,
>>> + b->mcidx * sizeof(struct multicall_entry));
>>> +#endif
>>> +
>>> switch (b->mcidx) {
>>> case 0:
>>> /* no-op */
>>> @@ -87,32 +92,34 @@ void xen_mc_flush(void)
>>> break;
>>>
>>> default:
>>> -#if MC_DEBUG
>>> - memcpy(b->debug, b->entries,
>>> - b->mcidx * sizeof(struct multicall_entry));
>>> -#endif
>>> -
>>> if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
>>> BUG();
>>> for (i = 0; i < b->mcidx; i++)
>>> if (b->entries[i].result < 0)
>>> ret++;
>>> + }
>>>
>>> + if (WARN_ON(ret)) {
>>> + pr_err("%d of %d multicall(s) failed: cpu %d\n",
>>> + ret, b->mcidx, smp_processor_id());
>>> + for (i = 0; i < b->mcidx; i++) {
>>> + if (b->entries[i].result < 0) {
>>> #if MC_DEBUG
>>> - if (ret) {
>>> - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
>>> - ret, smp_processor_id());
>>> - dump_stack();
>>> - for (i = 0; i < b->mcidx; i++) {
>>> - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
>>> - i+1, b->mcidx,
>>> + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n",
>>> + i + 1,
>>> b->debug[i].op,
>>> b->debug[i].args[0],
>>> b->entries[i].result,
>>> b->caller[i]);
>>> +#else
>>> + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
>>> + i + 1,
>>> + b->entries[i].op,
>>> + b->entries[i].args[0],
>>> + b->entries[i].result);
>>> +#endif
>> Doesn't (non-debug) hypervisor corrupt op and args?
> No. Only debug hypervisor does so.
>
> See my patch (and rather lengthy discussion) on xen-devel:
>
> https://lists.xen.org/archives/html/xen-devel/2018-11/msg02714.html
Yes, I saw that later, after I responded to the patch.
>
>> (Also, we don't really need to print anything when b->entries[i].result
>> == 0)
> Right. Did you miss the:
I did.
Reviewed-by: Boris Ostrovsky <[email protected]>
(with commit message fixed)
>
> + if (b->entries[i].result < 0) {
>
> above?
>
>
> Juergen