The first four patches are fixes for XSA-332. The avoid WARN splats
and a performance issue with interdomain events.
Patches 5 and 6 are some additions to event handling in order to add
some per pv-device statistics to sysfs and the ability to have a per
backend device spurious event delay control.
Patches 7 and 8 are minor fixes I had lying around.
Juergen Gross (8):
xen/events: reset affinity of 2-level event when tearing it down
xen/events: don't unmask an event channel when an eoi is pending
xen/events: avoid handling the same event on two cpus at the same time
xen/netback: fix spurious event detection for common event case
xen/events: link interdomain events to associated xenbus device
xen/events: add per-xenbus device event statistics and settings
xen/evtchn: use smp barriers for user event ring
xen/evtchn: use READ/WRITE_ONCE() for accessing ring indices
.../ABI/testing/sysfs-devices-xenbus | 41 ++++
drivers/block/xen-blkback/xenbus.c | 2 +-
drivers/net/xen-netback/interface.c | 24 ++-
drivers/xen/events/events_2l.c | 22 +-
drivers/xen/events/events_base.c | 190 ++++++++++++++----
drivers/xen/events/events_fifo.c | 7 -
drivers/xen/events/events_internal.h | 14 +-
drivers/xen/evtchn.c | 29 ++-
drivers/xen/pvcalls-back.c | 4 +-
drivers/xen/xen-pciback/xenbus.c | 2 +-
drivers/xen/xen-scsiback.c | 2 +-
drivers/xen/xenbus/xenbus_probe.c | 66 ++++++
include/xen/events.h | 7 +-
include/xen/xenbus.h | 7 +
14 files changed, 323 insertions(+), 94 deletions(-)
create mode 100644 Documentation/ABI/testing/sysfs-devices-xenbus
--
2.26.2
When changing the cpu affinity of an event it can happen today that
(with some unlucky timing) the same event will be handled on the old
and the new cpu at the same time.
Avoid that by adding an "event active" flag to the per-event data and
call the handler only if this flag isn't set.
Reported-by: Julien Grall <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- new patch
---
drivers/xen/events/events_base.c | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index e157e7506830..f7e22330dcef 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -102,6 +102,7 @@ struct irq_info {
#define EVT_MASK_REASON_EXPLICIT 0x01
#define EVT_MASK_REASON_TEMPORARY 0x02
#define EVT_MASK_REASON_EOI_PENDING 0x04
+ u8 is_active; /* Is event just being handled? */
unsigned irq;
evtchn_port_t evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
@@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
}
info->eoi_time = 0;
+ smp_store_release(&info->is_active, 0);
do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
}
@@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq)
static void eoi_pirq(struct irq_data *data)
{
- evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
int rc = 0;
if (!VALID_EVTCHN(evtchn))
return;
+ smp_store_release(&info->is_active, 0);
clear_evtchn(evtchn);
if (pirq_needs_eoi(data->irq)) {
@@ -1640,6 +1644,8 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
}
info = info_for_irq(irq);
+ if (xchg_acquire(&info->is_active, 1))
+ return;
if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id();
@@ -1823,11 +1829,13 @@ static void disable_dynirq(struct irq_data *data)
static void ack_dynirq(struct irq_data *data)
{
- evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (!VALID_EVTCHN(evtchn))
return;
+ smp_store_release(&info->is_active, 0);
clear_evtchn(evtchn);
}
@@ -1969,10 +1977,13 @@ static void restore_cpu_ipis(unsigned int cpu)
/* Clear an irq's pending state, in preparation for polling on it */
void xen_clear_irq_pending(int irq)
{
- evtchn_port_t evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = info_for_irq(irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
- if (VALID_EVTCHN(evtchn))
+ if (VALID_EVTCHN(evtchn)) {
+ smp_store_release(&info->is_active, 0);
clear_evtchn(evtchn);
+ }
}
EXPORT_SYMBOL(xen_clear_irq_pending);
void xen_set_irq_pending(int irq)
--
2.26.2
In case of a common event for rx and tx queue the event should be
regarded to be spurious if no rx and no tx requests are pending.
Unfortunately the condition for testing that is wrong causing to
decide a event being spurious if no rx OR no tx requests are
pending.
Fix that plus using local variables for rx/tx pending indicators in
order to split function calls and if condition.
Fixes: 23025393dbeb3b ("xen/netback: use lateeoi irq binding")
Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- new patch, fixing FreeBSD performance issue
---
drivers/net/xen-netback/interface.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index acb786d8b1d8..e02a4fbb74de 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -162,13 +162,15 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
{
struct xenvif_queue *queue = dev_id;
int old;
+ bool has_rx, has_tx;
old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending);
WARN(old, "Interrupt while EOI pending\n");
- /* Use bitwise or as we need to call both functions. */
- if ((!xenvif_handle_tx_interrupt(queue) |
- !xenvif_handle_rx_interrupt(queue))) {
+ has_tx = xenvif_handle_tx_interrupt(queue);
+ has_rx = xenvif_handle_rx_interrupt(queue);
+
+ if (!has_rx && !has_tx) {
atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending);
xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
}
--
2.26.2
When creating a new event channel with 2-level events the affinity
needs to be reset initially in order to avoid using an old affinity
from earlier usage of the event channel port. So when tearing an event
channel down reset all affinity bits.
The same applies to the affinity when onlining a vcpu: all old
affinity settings for this vcpu must be reset. As percpu events get
initialized before the percpu event channel hook is called,
resetting of the affinities happens after offlining a vcpu (this is
working, as initial percpu memory is zeroed out).
Cc: [email protected]
Reported-by: Julien Grall <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- reset affinity when tearing down the event (Julien Grall)
---
drivers/xen/events/events_2l.c | 15 +++++++++++++++
drivers/xen/events/events_base.c | 1 +
drivers/xen/events/events_internal.h | 8 ++++++++
3 files changed, 24 insertions(+)
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index da87f3a1e351..a7f413c5c190 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -47,6 +47,11 @@ static unsigned evtchn_2l_max_channels(void)
return EVTCHN_2L_NR_CHANNELS;
}
+static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu)
+{
+ clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
unsigned int old_cpu)
{
@@ -355,9 +360,18 @@ static void evtchn_2l_resume(void)
EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
}
+static int evtchn_2l_percpu_deinit(unsigned int cpu)
+{
+ memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) *
+ EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
+
+ return 0;
+}
+
static const struct evtchn_ops evtchn_ops_2l = {
.max_channels = evtchn_2l_max_channels,
.nr_channels = evtchn_2l_max_channels,
+ .remove = evtchn_2l_remove,
.bind_to_cpu = evtchn_2l_bind_to_cpu,
.clear_pending = evtchn_2l_clear_pending,
.set_pending = evtchn_2l_set_pending,
@@ -367,6 +381,7 @@ static const struct evtchn_ops evtchn_ops_2l = {
.unmask = evtchn_2l_unmask,
.handle_events = evtchn_2l_handle_events,
.resume = evtchn_2l_resume,
+ .percpu_deinit = evtchn_2l_percpu_deinit,
};
void __init xen_evtchn_2l_init(void)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index e850f79351cb..6c539db81f8f 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -368,6 +368,7 @@ static int xen_irq_info_pirq_setup(unsigned irq,
static void xen_irq_info_cleanup(struct irq_info *info)
{
set_evtchn_to_irq(info->evtchn, -1);
+ xen_evtchn_port_remove(info->evtchn, info->cpu);
info->evtchn = 0;
channels_on_cpu_dec(info);
}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index 0a97c0549db7..18a4090d0709 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -14,6 +14,7 @@ struct evtchn_ops {
unsigned (*nr_channels)(void);
int (*setup)(evtchn_port_t port);
+ void (*remove)(evtchn_port_t port, unsigned int cpu);
void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu,
unsigned int old_cpu);
@@ -54,6 +55,13 @@ static inline int xen_evtchn_port_setup(evtchn_port_t evtchn)
return 0;
}
+static inline void xen_evtchn_port_remove(evtchn_port_t evtchn,
+ unsigned int cpu)
+{
+ if (evtchn_ops->remove)
+ evtchn_ops->remove(evtchn, cpu);
+}
+
static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn,
unsigned int cpu,
unsigned int old_cpu)
--
2.26.2
An event channel should be kept masked when an eoi is pending for it.
When being migrated to another cpu it might be unmasked, though.
In order to avoid this keep three different flags for each event channel
to be able to distinguish "normal" masking/unmasking from eoi related
masking/unmasking and temporary masking. The event channel should only
be able to generate an interrupt if all flags are cleared.
Cc: [email protected]
Fixes: 54c9de89895e0a36047 ("xen/events: add a new late EOI evtchn framework")
Reported-by: Julien Grall <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- introduce a lock around masking/unmasking
- merge patch 3 into this one (Jan Beulich)
---
drivers/xen/events/events_2l.c | 7 --
drivers/xen/events/events_base.c | 102 +++++++++++++++++++++------
drivers/xen/events/events_fifo.c | 7 --
drivers/xen/events/events_internal.h | 6 --
4 files changed, 81 insertions(+), 41 deletions(-)
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
index a7f413c5c190..b8f2f971c2f0 100644
--- a/drivers/xen/events/events_2l.c
+++ b/drivers/xen/events/events_2l.c
@@ -77,12 +77,6 @@ static bool evtchn_2l_is_pending(evtchn_port_t port)
return sync_test_bit(port, BM(&s->evtchn_pending[0]));
}
-static bool evtchn_2l_test_and_set_mask(evtchn_port_t port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
-}
-
static void evtchn_2l_mask(evtchn_port_t port)
{
struct shared_info *s = HYPERVISOR_shared_info;
@@ -376,7 +370,6 @@ static const struct evtchn_ops evtchn_ops_2l = {
.clear_pending = evtchn_2l_clear_pending,
.set_pending = evtchn_2l_set_pending,
.is_pending = evtchn_2l_is_pending,
- .test_and_set_mask = evtchn_2l_test_and_set_mask,
.mask = evtchn_2l_mask,
.unmask = evtchn_2l_unmask,
.handle_events = evtchn_2l_handle_events,
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 6c539db81f8f..e157e7506830 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -97,13 +97,18 @@ struct irq_info {
short refcnt;
u8 spurious_cnt;
u8 is_accounted;
- enum xen_irq_type type; /* type */
+ short type; /* type: IRQT_* */
+ u8 mask_reason; /* Why is event channel masked */
+#define EVT_MASK_REASON_EXPLICIT 0x01
+#define EVT_MASK_REASON_TEMPORARY 0x02
+#define EVT_MASK_REASON_EOI_PENDING 0x04
unsigned irq;
evtchn_port_t evtchn; /* event channel */
unsigned short cpu; /* cpu bound */
unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time; /* Time in jiffies when to EOI. */
+ spinlock_t lock;
union {
unsigned short virq;
@@ -152,6 +157,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
* evtchn_rwlock
* IRQ-desc lock
* percpu eoi_list_lock
+ * irq_info->lock
*/
static LIST_HEAD(xen_irq_list_head);
@@ -302,6 +308,8 @@ static int xen_irq_info_common_setup(struct irq_info *info,
info->irq = irq;
info->evtchn = evtchn;
info->cpu = cpu;
+ info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+ spin_lock_init(&info->lock);
ret = set_evtchn_to_irq(evtchn, irq);
if (ret < 0)
@@ -450,6 +458,34 @@ unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
return ret;
}
+static void do_mask(struct irq_info *info, u8 reason)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->lock, flags);
+
+ if (!info->mask_reason)
+ mask_evtchn(info->evtchn);
+
+ info->mask_reason |= reason;
+
+ spin_unlock_irqrestore(&info->lock, flags);
+}
+
+static void do_unmask(struct irq_info *info, u8 reason)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&info->lock, flags);
+
+ info->mask_reason &= ~reason;
+
+ if (!info->mask_reason)
+ unmask_evtchn(info->evtchn);
+
+ spin_unlock_irqrestore(&info->lock, flags);
+}
+
#ifdef CONFIG_X86
static bool pirq_check_eoi_map(unsigned irq)
{
@@ -586,7 +622,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
}
info->eoi_time = 0;
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
}
static void xen_irq_lateeoi_worker(struct work_struct *work)
@@ -831,7 +867,8 @@ static unsigned int __startup_pirq(unsigned int irq)
goto err;
out:
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_EXPLICIT);
+
eoi_pirq(irq_get_irq_data(irq));
return 0;
@@ -858,7 +895,7 @@ static void shutdown_pirq(struct irq_data *data)
if (!VALID_EVTCHN(evtchn))
return;
- mask_evtchn(evtchn);
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
}
@@ -1691,10 +1728,10 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
}
/* Rebind an evtchn so that it gets delivered to a specific cpu */
-static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
+static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
{
struct evtchn_bind_vcpu bind_vcpu;
- int masked;
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (!VALID_EVTCHN(evtchn))
return -1;
@@ -1710,7 +1747,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
* Mask the event while changing the VCPU binding to prevent
* it being delivered on an unexpected VCPU.
*/
- masked = test_and_set_mask(evtchn);
+ do_mask(info, EVT_MASK_REASON_TEMPORARY);
/*
* If this fails, it usually just indicates that we're dealing with a
@@ -1720,8 +1757,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
bind_evtchn_to_cpu(evtchn, tcpu, false);
- if (!masked)
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
return 0;
}
@@ -1760,7 +1796,7 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
unsigned int tcpu = select_target_cpu(dest);
int ret;
- ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
+ ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
if (!ret)
irq_data_update_effective_affinity(data, cpumask_of(tcpu));
@@ -1769,18 +1805,20 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
static void enable_dynirq(struct irq_data *data)
{
- evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (VALID_EVTCHN(evtchn))
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_EXPLICIT);
}
static void disable_dynirq(struct irq_data *data)
{
- evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (VALID_EVTCHN(evtchn))
- mask_evtchn(evtchn);
+ do_mask(info, EVT_MASK_REASON_EXPLICIT);
}
static void ack_dynirq(struct irq_data *data)
@@ -1799,18 +1837,40 @@ static void mask_ack_dynirq(struct irq_data *data)
ack_dynirq(data);
}
+static void lateeoi_ack_dynirq(struct irq_data *data)
+{
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
+
+ if (VALID_EVTCHN(evtchn)) {
+ do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+ clear_evtchn(evtchn);
+ }
+}
+
+static void lateeoi_mask_ack_dynirq(struct irq_data *data)
+{
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
+
+ if (VALID_EVTCHN(evtchn)) {
+ do_mask(info, EVT_MASK_REASON_EXPLICIT |
+ EVT_MASK_REASON_EOI_PENDING);
+ clear_evtchn(evtchn);
+ }
+}
+
static int retrigger_dynirq(struct irq_data *data)
{
- evtchn_port_t evtchn = evtchn_from_irq(data->irq);
- int masked;
+ struct irq_info *info = info_for_irq(data->irq);
+ evtchn_port_t evtchn = info ? info->evtchn : 0;
if (!VALID_EVTCHN(evtchn))
return 0;
- masked = test_and_set_mask(evtchn);
+ do_mask(info, EVT_MASK_REASON_TEMPORARY);
set_evtchn(evtchn);
- if (!masked)
- unmask_evtchn(evtchn);
+ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
return 1;
}
@@ -2024,8 +2084,8 @@ static struct irq_chip xen_lateeoi_chip __read_mostly = {
.irq_mask = disable_dynirq,
.irq_unmask = enable_dynirq,
- .irq_ack = mask_ack_dynirq,
- .irq_mask_ack = mask_ack_dynirq,
+ .irq_ack = lateeoi_ack_dynirq,
+ .irq_mask_ack = lateeoi_mask_ack_dynirq,
.irq_set_affinity = set_affinity_irq,
.irq_retrigger = retrigger_dynirq,
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
index b234f1766810..ad9fe51d3fb3 100644
--- a/drivers/xen/events/events_fifo.c
+++ b/drivers/xen/events/events_fifo.c
@@ -209,12 +209,6 @@ static bool evtchn_fifo_is_pending(evtchn_port_t port)
return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
}
-static bool evtchn_fifo_test_and_set_mask(evtchn_port_t port)
-{
- event_word_t *word = event_word_from_port(port);
- return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
-}
-
static void evtchn_fifo_mask(evtchn_port_t port)
{
event_word_t *word = event_word_from_port(port);
@@ -423,7 +417,6 @@ static const struct evtchn_ops evtchn_ops_fifo = {
.clear_pending = evtchn_fifo_clear_pending,
.set_pending = evtchn_fifo_set_pending,
.is_pending = evtchn_fifo_is_pending,
- .test_and_set_mask = evtchn_fifo_test_and_set_mask,
.mask = evtchn_fifo_mask,
.unmask = evtchn_fifo_unmask,
.handle_events = evtchn_fifo_handle_events,
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
index 18a4090d0709..4d3398eff9cd 100644
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -21,7 +21,6 @@ struct evtchn_ops {
void (*clear_pending)(evtchn_port_t port);
void (*set_pending)(evtchn_port_t port);
bool (*is_pending)(evtchn_port_t port);
- bool (*test_and_set_mask)(evtchn_port_t port);
void (*mask)(evtchn_port_t port);
void (*unmask)(evtchn_port_t port);
@@ -84,11 +83,6 @@ static inline bool test_evtchn(evtchn_port_t port)
return evtchn_ops->is_pending(port);
}
-static inline bool test_and_set_mask(evtchn_port_t port)
-{
- return evtchn_ops->test_and_set_mask(port);
-}
-
static inline void mask_evtchn(evtchn_port_t port)
{
return evtchn_ops->mask(port);
--
2.26.2
In order to support the possibility of per-device event channel
settings (e.g. lateeoi spurious event thresholds) add a xenbus device
pointer to struct irq_info() and modify the related event channel
binding interfaces to take the pointer to the xenbus device as a
parameter instead of the domain id of the other side.
While at it remove the stale prototype of bind_evtchn_to_irq_lateeoi().
Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Boris Ostrovsky <[email protected]>
Reviewed-by: Wei Liu <[email protected]>
---
drivers/block/xen-blkback/xenbus.c | 2 +-
drivers/net/xen-netback/interface.c | 16 +++++------
drivers/xen/events/events_base.c | 41 +++++++++++++++++------------
drivers/xen/pvcalls-back.c | 4 +--
drivers/xen/xen-pciback/xenbus.c | 2 +-
drivers/xen/xen-scsiback.c | 2 +-
include/xen/events.h | 7 ++---
7 files changed, 41 insertions(+), 33 deletions(-)
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 9860d4842f36..c2aaf690352c 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -245,7 +245,7 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
if (req_prod - rsp_prod > size)
goto fail;
- err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->domid,
+ err = bind_interdomain_evtchn_to_irqhandler_lateeoi(blkif->be->dev,
evtchn, xen_blkif_be_int, 0, "blkif-backend", ring);
if (err < 0)
goto fail;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index e02a4fbb74de..50a94e58c150 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -630,13 +630,13 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
unsigned int evtchn)
{
struct net_device *dev = vif->dev;
+ struct xenbus_device *xendev = xenvif_to_xenbus_device(vif);
void *addr;
struct xen_netif_ctrl_sring *shared;
RING_IDX rsp_prod, req_prod;
int err;
- err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
- &ring_ref, 1, &addr);
+ err = xenbus_map_ring_valloc(xendev, &ring_ref, 1, &addr);
if (err)
goto err;
@@ -650,7 +650,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl))
goto err_unmap;
- err = bind_interdomain_evtchn_to_irq_lateeoi(vif->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(xendev, evtchn);
if (err < 0)
goto err_unmap;
@@ -673,8 +673,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
vif->ctrl_irq = 0;
err_unmap:
- xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
- vif->ctrl.sring);
+ xenbus_unmap_ring_vfree(xendev, vif->ctrl.sring);
vif->ctrl.sring = NULL;
err:
@@ -719,6 +718,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
unsigned int tx_evtchn,
unsigned int rx_evtchn)
{
+ struct xenbus_device *dev = xenvif_to_xenbus_device(queue->vif);
struct task_struct *task;
int err;
@@ -755,7 +755,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
- queue->vif->domid, tx_evtchn, xenvif_interrupt, 0,
+ dev, tx_evtchn, xenvif_interrupt, 0,
queue->name, queue);
if (err < 0)
goto err;
@@ -766,7 +766,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name),
"%s-tx", queue->name);
err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
- queue->vif->domid, tx_evtchn, xenvif_tx_interrupt, 0,
+ dev, tx_evtchn, xenvif_tx_interrupt, 0,
queue->tx_irq_name, queue);
if (err < 0)
goto err;
@@ -776,7 +776,7 @@ int xenvif_connect_data(struct xenvif_queue *queue,
snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name),
"%s-rx", queue->name);
err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
- queue->vif->domid, rx_evtchn, xenvif_rx_interrupt, 0,
+ dev, rx_evtchn, xenvif_rx_interrupt, 0,
queue->rx_irq_name, queue);
if (err < 0)
goto err;
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index f7e22330dcef..a5cce4c626c2 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -63,6 +63,7 @@
#include <xen/interface/physdev.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
+#include <xen/xenbus.h>
#include <asm/hw_irq.h>
#include "events_internal.h"
@@ -121,6 +122,7 @@ struct irq_info {
unsigned char flags;
uint16_t domid;
} pirq;
+ struct xenbus_device *interdomain;
} u;
};
@@ -322,11 +324,16 @@ static int xen_irq_info_common_setup(struct irq_info *info,
}
static int xen_irq_info_evtchn_setup(unsigned irq,
- evtchn_port_t evtchn)
+ evtchn_port_t evtchn,
+ struct xenbus_device *dev)
{
struct irq_info *info = info_for_irq(irq);
+ int ret;
- return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
+ ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
+ info->u.interdomain = dev;
+
+ return ret;
}
static int xen_irq_info_ipi_setup(unsigned cpu,
@@ -1158,7 +1165,8 @@ int xen_pirq_from_irq(unsigned irq)
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
-static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
+static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
+ struct xenbus_device *dev)
{
int irq;
int ret;
@@ -1178,7 +1186,7 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
irq_set_chip_and_handler_name(irq, chip,
handle_edge_irq, "event");
- ret = xen_irq_info_evtchn_setup(irq, evtchn);
+ ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
if (ret < 0) {
__unbind_from_irq(irq);
irq = ret;
@@ -1205,7 +1213,7 @@ static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip)
int bind_evtchn_to_irq(evtchn_port_t evtchn)
{
- return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip);
+ return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
@@ -1254,27 +1262,27 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
return irq;
}
-static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain,
+static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
evtchn_port_t remote_port,
struct irq_chip *chip)
{
struct evtchn_bind_interdomain bind_interdomain;
int err;
- bind_interdomain.remote_dom = remote_domain;
+ bind_interdomain.remote_dom = dev->otherend_id;
bind_interdomain.remote_port = remote_port;
err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
&bind_interdomain);
return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
- chip);
+ chip, dev);
}
-int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
evtchn_port_t remote_port)
{
- return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
+ return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
&xen_lateeoi_chip);
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
@@ -1387,7 +1395,7 @@ static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
{
int irq, retval;
- irq = bind_evtchn_to_irq_chip(evtchn, chip);
+ irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
if (irq < 0)
return irq;
retval = request_irq(irq, handler, irqflags, devname, dev_id);
@@ -1422,14 +1430,13 @@ int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
static int bind_interdomain_evtchn_to_irqhandler_chip(
- unsigned int remote_domain, evtchn_port_t remote_port,
+ struct xenbus_device *dev, evtchn_port_t remote_port,
irq_handler_t handler, unsigned long irqflags,
const char *devname, void *dev_id, struct irq_chip *chip)
{
int irq, retval;
- irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port,
- chip);
+ irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
if (irq < 0)
return irq;
@@ -1442,14 +1449,14 @@ static int bind_interdomain_evtchn_to_irqhandler_chip(
return irq;
}
-int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
const char *devname,
void *dev_id)
{
- return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain,
+ return bind_interdomain_evtchn_to_irqhandler_chip(dev,
remote_port, handler, irqflags, devname,
dev_id, &xen_lateeoi_chip);
}
@@ -1723,7 +1730,7 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
- (void)xen_irq_info_evtchn_setup(irq, evtchn);
+ (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
mutex_unlock(&irq_mapping_update_lock);
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index a7d293fa8d14..b47fd8435061 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -348,7 +348,7 @@ static struct sock_mapping *pvcalls_new_active_socket(
map->bytes = page;
ret = bind_interdomain_evtchn_to_irqhandler_lateeoi(
- fedata->dev->otherend_id, evtchn,
+ fedata->dev, evtchn,
pvcalls_back_conn_event, 0, "pvcalls-backend", map);
if (ret < 0)
goto out;
@@ -948,7 +948,7 @@ static int backend_connect(struct xenbus_device *dev)
goto error;
}
- err = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(dev, evtchn);
if (err < 0)
goto error;
fedata->irq = err;
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index e7c692cfb2cf..5188f02e75fb 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -124,7 +124,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
pdev->sh_info = vaddr;
err = bind_interdomain_evtchn_to_irqhandler_lateeoi(
- pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
+ pdev->xdev, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 862162dca33c..8b59897b2df9 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -799,7 +799,7 @@ static int scsiback_init_sring(struct vscsibk_info *info, grant_ref_t ring_ref,
sring = (struct vscsiif_sring *)area;
BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
- err = bind_interdomain_evtchn_to_irq_lateeoi(info->domid, evtchn);
+ err = bind_interdomain_evtchn_to_irq_lateeoi(info->dev, evtchn);
if (err < 0)
goto unmap_page;
diff --git a/include/xen/events.h b/include/xen/events.h
index 8ec418e30c7f..c204262d9fc2 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -12,10 +12,11 @@
#include <asm/xen/hypercall.h>
#include <asm/xen/events.h>
+struct xenbus_device;
+
unsigned xen_evtchn_nr_channels(void);
int bind_evtchn_to_irq(evtchn_port_t evtchn);
-int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn);
int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
irq_handler_t handler,
unsigned long irqflags, const char *devname,
@@ -35,9 +36,9 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
unsigned long irqflags,
const char *devname,
void *dev_id);
-int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain,
+int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
evtchn_port_t remote_port);
-int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain,
+int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
evtchn_port_t remote_port,
irq_handler_t handler,
unsigned long irqflags,
--
2.26.2
Add syfs nodes for each xenbus device showing event statistics (number
of events and spurious events, number of associated event channels)
and for setting a spurious event threshold in case a frontend is
sending too many events without being rogue on purpose.
Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- add documentation (Boris Ostrovsky)
---
.../ABI/testing/sysfs-devices-xenbus | 41 ++++++++++++
drivers/xen/events/events_base.c | 27 +++++++-
drivers/xen/xenbus/xenbus_probe.c | 66 +++++++++++++++++++
include/xen/xenbus.h | 7 ++
4 files changed, 139 insertions(+), 2 deletions(-)
create mode 100644 Documentation/ABI/testing/sysfs-devices-xenbus
diff --git a/Documentation/ABI/testing/sysfs-devices-xenbus b/Documentation/ABI/testing/sysfs-devices-xenbus
new file mode 100644
index 000000000000..fd796cb4f315
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-xenbus
@@ -0,0 +1,41 @@
+What: /sys/devices/*/xenbus/event_channels
+Date: February 2021
+Contact: Xen Developers mailing list <[email protected]>
+Description:
+ Number of Xen event channels associated with a kernel based
+ paravirtualized device frontend or backend.
+
+What: /sys/devices/*/xenbus/events
+Date: February 2021
+Contact: Xen Developers mailing list <[email protected]>
+Description:
+ Total number of Xen events received for a Xen pv device
+ frontend or backend.
+
+What: /sys/devices/*/xenbus/jiffies_eoi_delayed
+Date: February 2021
+Contact: Xen Developers mailing list <[email protected]>
+Description:
+ Summed up time in jiffies the EOI of an interrupt for a Xen
+ pv device has been delayed in order to avoid stalls due to
+ event storms. This value rising is a first sign for a rogue
+ other end of the pv device.
+
+What: /sys/devices/*/xenbus/spurious_events
+Date: February 2021
+Contact: Xen Developers mailing list <[email protected]>
+Description:
+ Number of events received for a Xen pv device which did not
+ require any action. Too many spurious events in a row will
+ trigger delayed EOI processing.
+
+What: /sys/devices/*/xenbus/spurious_threshold
+Date: February 2021
+Contact: Xen Developers mailing list <[email protected]>
+Description:
+ Controls the tolerated number of subsequent spurious events
+ before delayed EOI processing is triggered for a Xen pv
+ device. Default is 1. This can be modified in case the other
+ end of the pv device is issuing spurious events on a regular
+ basis and is known not to be malicious on purpose. Raising
+ the value for such cases can improve pv device performance.
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index a5cce4c626c2..48210c1e62e3 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -332,6 +332,8 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
info->u.interdomain = dev;
+ if (dev)
+ atomic_inc(&dev->event_channels);
return ret;
}
@@ -606,18 +608,28 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
return;
if (spurious) {
+ struct xenbus_device *dev = info->u.interdomain;
+ unsigned int threshold = 1;
+
+ if (dev && dev->spurious_threshold)
+ threshold = dev->spurious_threshold;
+
if ((1 << info->spurious_cnt) < (HZ << 2)) {
if (info->spurious_cnt != 0xFF)
info->spurious_cnt++;
}
- if (info->spurious_cnt > 1) {
- delay = 1 << (info->spurious_cnt - 2);
+ if (info->spurious_cnt > threshold) {
+ delay = 1 << (info->spurious_cnt - 1 - threshold);
if (delay > HZ)
delay = HZ;
if (!info->eoi_time)
info->eoi_cpu = smp_processor_id();
info->eoi_time = get_jiffies_64() + delay;
+ if (dev)
+ atomic_add(delay, &dev->jiffies_eoi_delayed);
}
+ if (dev)
+ atomic_inc(&dev->spurious_events);
} else {
info->spurious_cnt = 0;
}
@@ -950,6 +962,7 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq);
+ struct xenbus_device *dev;
xen_evtchn_close(evtchn);
@@ -960,6 +973,11 @@ static void __unbind_from_irq(unsigned int irq)
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
break;
+ case IRQT_EVTCHN:
+ dev = info->u.interdomain;
+ if (dev)
+ atomic_dec(&dev->event_channels);
+ break;
default:
break;
}
@@ -1623,6 +1641,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
{
int irq;
struct irq_info *info;
+ struct xenbus_device *dev;
irq = get_evtchn_to_irq(port);
if (irq == -1)
@@ -1654,6 +1673,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
if (xchg_acquire(&info->is_active, 1))
return;
+ dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
+ if (dev)
+ atomic_inc(&dev->events);
+
if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id();
info->irq_epoch = __this_cpu_read(irq_epoch);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 18ffd0551b54..9494ecad3c92 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -206,6 +206,65 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
}
EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
+#define XENBUS_SHOW_STAT(name) \
+static ssize_t show_##name(struct device *_dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct xenbus_device *dev = to_xenbus_device(_dev); \
+ \
+ return sprintf(buf, "%d\n", atomic_read(&dev->name)); \
+} \
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+XENBUS_SHOW_STAT(event_channels);
+XENBUS_SHOW_STAT(events);
+XENBUS_SHOW_STAT(spurious_events);
+XENBUS_SHOW_STAT(jiffies_eoi_delayed);
+
+static ssize_t show_spurious_threshold(struct device *_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+
+ return sprintf(buf, "%d\n", dev->spurious_threshold);
+}
+
+static ssize_t set_spurious_threshold(struct device *_dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned int val;
+ ssize_t ret;
+
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ dev->spurious_threshold = val;
+
+ return count;
+}
+
+static DEVICE_ATTR(spurious_threshold, 0644, show_spurious_threshold,
+ set_spurious_threshold);
+
+static struct attribute *xenbus_attrs[] = {
+ &dev_attr_event_channels.attr,
+ &dev_attr_events.attr,
+ &dev_attr_spurious_events.attr,
+ &dev_attr_jiffies_eoi_delayed.attr,
+ &dev_attr_spurious_threshold.attr,
+ NULL
+};
+
+static const struct attribute_group xenbus_group = {
+ .name = "xenbus",
+ .attrs = xenbus_attrs,
+};
+
int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
@@ -253,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev)
return err;
}
+ dev->spurious_threshold = 1;
+ if (sysfs_create_group(&dev->dev.kobj, &xenbus_group))
+ dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n",
+ dev->nodename);
+
return 0;
fail_put:
module_put(drv->driver.owner);
@@ -269,6 +333,8 @@ int xenbus_dev_remove(struct device *_dev)
DPRINTK("%s", dev->nodename);
+ sysfs_remove_group(&dev->dev.kobj, &xenbus_group);
+
free_otherend_watch(dev);
if (drv->remove) {
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 2c43b0ef1e4d..13ee375a1f05 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -88,6 +88,13 @@ struct xenbus_device {
struct completion down;
struct work_struct work;
struct semaphore reclaim_sem;
+
+ /* Event channel based statistics and settings. */
+ atomic_t event_channels;
+ atomic_t events;
+ atomic_t spurious_events;
+ atomic_t jiffies_eoi_delayed;
+ unsigned int spurious_threshold;
};
static inline struct xenbus_device *to_xenbus_device(struct device *dev)
--
2.26.2
For avoiding read- and write-tearing by the compiler use READ_ONCE()
and WRITE_ONCE() for accessing the ring indices in evtchn.c.
Signed-off-by: Juergen Gross <[email protected]>
---
V2:
- modify all accesses (Julien Grall)
---
drivers/xen/evtchn.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 421382c73d88..620008f89dbe 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
{
struct user_evtchn *evtchn = data;
struct per_user_data *u = evtchn->user;
+ unsigned int prod, cons;
WARN(!evtchn->enabled,
"Interrupt for port %u, but apparently not enabled; per-user %p\n",
@@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
spin_lock(&u->ring_prod_lock);
- if ((u->ring_prod - u->ring_cons) < u->ring_size) {
- *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
+ prod = READ_ONCE(u->ring_prod);
+ cons = READ_ONCE(u->ring_cons);
+
+ if ((prod - cons) < u->ring_size) {
+ *evtchn_ring_entry(u, prod) = evtchn->port;
smp_wmb(); /* Ensure ring contents visible */
- if (u->ring_cons == u->ring_prod++) {
+ if (cons == prod++) {
+ WRITE_ONCE(u->ring_prod, prod);
wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue,
SIGIO, POLL_IN);
@@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
if (u->ring_overflow)
goto unlock_out;
- c = u->ring_cons;
- p = u->ring_prod;
+ c = READ_ONCE(u->ring_cons);
+ p = READ_ONCE(u->ring_prod);
if (c != p)
break;
@@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
return -EAGAIN;
rc = wait_event_interruptible(u->evtchn_wait,
- u->ring_cons != u->ring_prod);
+ READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod));
if (rc)
return rc;
}
@@ -251,7 +256,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out;
- u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+ WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t));
rc = bytes1 + bytes2;
unlock_out:
@@ -552,7 +557,9 @@ static long evtchn_ioctl(struct file *file,
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&u->ring_prod_lock);
- u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+ WRITE_ONCE(u->ring_cons, 0);
+ WRITE_ONCE(u->ring_prod, 0);
+ u->ring_overflow = 0;
spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
@@ -595,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait)
struct per_user_data *u = file->private_data;
poll_wait(file, &u->evtchn_wait, wait);
- if (u->ring_cons != u->ring_prod)
+ if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod))
mask |= EPOLLIN | EPOLLRDNORM;
if (u->ring_overflow)
mask = EPOLLERR;
--
2.26.2
The ring buffer for user events is local to the given kernel instance,
so smp barriers are fine for ensuring consistency.
Reported-by: Andrew Cooper <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Jan Beulich <[email protected]>
---
drivers/xen/evtchn.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index a7a85719a8c8..421382c73d88 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -173,7 +173,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
if ((u->ring_prod - u->ring_cons) < u->ring_size) {
*evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
- wmb(); /* Ensure ring contents visible */
+ smp_wmb(); /* Ensure ring contents visible */
if (u->ring_cons == u->ring_prod++) {
wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue,
@@ -245,7 +245,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
}
rc = -EFAULT;
- rmb(); /* Ensure that we see the port before we copy it. */
+ smp_rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
--
2.26.2
On 11.02.2021 11:16, Juergen Gross wrote:
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -162,13 +162,15 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
> {
> struct xenvif_queue *queue = dev_id;
> int old;
> + bool has_rx, has_tx;
>
> old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending);
> WARN(old, "Interrupt while EOI pending\n");
>
> - /* Use bitwise or as we need to call both functions. */
> - if ((!xenvif_handle_tx_interrupt(queue) |
> - !xenvif_handle_rx_interrupt(queue))) {
> + has_tx = xenvif_handle_tx_interrupt(queue);
> + has_rx = xenvif_handle_rx_interrupt(queue);
> +
> + if (!has_rx && !has_tx) {
> atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending);
> xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS);
> }
>
Ah yes, what was originally meant really was
if (!(xenvif_handle_tx_interrupt(queue) |
xenvif_handle_rx_interrupt(queue))) {
(also hinted at by the otherwise pointless inner parentheses),
which you simply write in an alternative way.
Reviewed-by: Jan Beulich <[email protected]>
Jan
> -----Original Message-----
> From: Juergen Gross <[email protected]>
> Sent: 11 February 2021 10:16
> To: [email protected]; [email protected]; [email protected]
> Cc: Juergen Gross <[email protected]>; Wei Liu <[email protected]>; Paul Durrant <[email protected]>; David
> S. Miller <[email protected]>; Jakub Kicinski <[email protected]>
> Subject: [PATCH v2 4/8] xen/netback: fix spurious event detection for common event case
>
> In case of a common event for rx and tx queue the event should be
> regarded to be spurious if no rx and no tx requests are pending.
>
> Unfortunately the condition for testing that is wrong causing to
> decide a event being spurious if no rx OR no tx requests are
> pending.
>
> Fix that plus using local variables for rx/tx pending indicators in
> order to split function calls and if condition.
>
Definitely neater.
> Fixes: 23025393dbeb3b ("xen/netback: use lateeoi irq binding")
> Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>
> -----Original Message-----
> From: Juergen Gross <[email protected]>
> Sent: 11 February 2021 10:16
> To: [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]
> Cc: Juergen Gross <[email protected]>; Konrad Rzeszutek Wilk <[email protected]>; Roger Pau Monné
> <[email protected]>; Jens Axboe <[email protected]>; Wei Liu <[email protected]>; Paul Durrant
> <[email protected]>; David S. Miller <[email protected]>; Jakub Kicinski <[email protected]>; Boris
> Ostrovsky <[email protected]>; Stefano Stabellini <[email protected]>
> Subject: [PATCH v2 5/8] xen/events: link interdomain events to associated xenbus device
>
> In order to support the possibility of per-device event channel
> settings (e.g. lateeoi spurious event thresholds) add a xenbus device
> pointer to struct irq_info() and modify the related event channel
> binding interfaces to take the pointer to the xenbus device as a
> parameter instead of the domain id of the other side.
>
> While at it remove the stale prototype of bind_evtchn_to_irq_lateeoi().
>
> Signed-off-by: Juergen Gross <[email protected]>
> Reviewed-by: Boris Ostrovsky <[email protected]>
> Reviewed-by: Wei Liu <[email protected]>
Reviewed-by: Paul Durrant <[email protected]>
On Thu, Feb 11, 2021 at 11:16:12AM +0100, Juergen Gross wrote:
> In case of a common event for rx and tx queue the event should be
> regarded to be spurious if no rx and no tx requests are pending.
>
> Unfortunately the condition for testing that is wrong causing to
> decide a event being spurious if no rx OR no tx requests are
> pending.
>
> Fix that plus using local variables for rx/tx pending indicators in
> order to split function calls and if condition.
>
> Fixes: 23025393dbeb3b ("xen/netback: use lateeoi irq binding")
> Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Wei Liu <[email protected]>
Hi Juergen,
On 11/02/2021 10:16, Juergen Gross wrote:
> When creating a new event channel with 2-level events the affinity
> needs to be reset initially in order to avoid using an old affinity
> from earlier usage of the event channel port. So when tearing an event
> channel down reset all affinity bits.
>
> The same applies to the affinity when onlining a vcpu: all old
> affinity settings for this vcpu must be reset. As percpu events get
> initialized before the percpu event channel hook is called,
> resetting of the affinities happens after offlining a vcpu (this is
> working, as initial percpu memory is zeroed out).
>
> Cc: [email protected]
> Reported-by: Julien Grall <[email protected]>
> Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Julien Grall <[email protected]>
Cheers,
> ---
> V2:
> - reset affinity when tearing down the event (Julien Grall)
> ---
> drivers/xen/events/events_2l.c | 15 +++++++++++++++
> drivers/xen/events/events_base.c | 1 +
> drivers/xen/events/events_internal.h | 8 ++++++++
> 3 files changed, 24 insertions(+)
>
> diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
> index da87f3a1e351..a7f413c5c190 100644
> --- a/drivers/xen/events/events_2l.c
> +++ b/drivers/xen/events/events_2l.c
> @@ -47,6 +47,11 @@ static unsigned evtchn_2l_max_channels(void)
> return EVTCHN_2L_NR_CHANNELS;
> }
>
> +static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu)
> +{
> + clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
> +}
> +
> static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
> unsigned int old_cpu)
> {
> @@ -355,9 +360,18 @@ static void evtchn_2l_resume(void)
> EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
> }
>
> +static int evtchn_2l_percpu_deinit(unsigned int cpu)
> +{
> + memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) *
> + EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
> +
> + return 0;
> +}
> +
> static const struct evtchn_ops evtchn_ops_2l = {
> .max_channels = evtchn_2l_max_channels,
> .nr_channels = evtchn_2l_max_channels,
> + .remove = evtchn_2l_remove,
> .bind_to_cpu = evtchn_2l_bind_to_cpu,
> .clear_pending = evtchn_2l_clear_pending,
> .set_pending = evtchn_2l_set_pending,
> @@ -367,6 +381,7 @@ static const struct evtchn_ops evtchn_ops_2l = {
> .unmask = evtchn_2l_unmask,
> .handle_events = evtchn_2l_handle_events,
> .resume = evtchn_2l_resume,
> + .percpu_deinit = evtchn_2l_percpu_deinit,
> };
>
> void __init xen_evtchn_2l_init(void)
> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
> index e850f79351cb..6c539db81f8f 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -368,6 +368,7 @@ static int xen_irq_info_pirq_setup(unsigned irq,
> static void xen_irq_info_cleanup(struct irq_info *info)
> {
> set_evtchn_to_irq(info->evtchn, -1);
> + xen_evtchn_port_remove(info->evtchn, info->cpu);
> info->evtchn = 0;
> channels_on_cpu_dec(info);
> }
> diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
> index 0a97c0549db7..18a4090d0709 100644
> --- a/drivers/xen/events/events_internal.h
> +++ b/drivers/xen/events/events_internal.h
> @@ -14,6 +14,7 @@ struct evtchn_ops {
> unsigned (*nr_channels)(void);
>
> int (*setup)(evtchn_port_t port);
> + void (*remove)(evtchn_port_t port, unsigned int cpu);
> void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu,
> unsigned int old_cpu);
>
> @@ -54,6 +55,13 @@ static inline int xen_evtchn_port_setup(evtchn_port_t evtchn)
> return 0;
> }
>
> +static inline void xen_evtchn_port_remove(evtchn_port_t evtchn,
> + unsigned int cpu)
> +{
> + if (evtchn_ops->remove)
> + evtchn_ops->remove(evtchn, cpu);
> +}
> +
> static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn,
> unsigned int cpu,
> unsigned int old_cpu)
>
--
Julien Grall
Hi Juergen,
On 11/02/2021 10:16, Juergen Gross wrote:
> When changing the cpu affinity of an event it can happen today that
> (with some unlucky timing) the same event will be handled on the old
> and the new cpu at the same time.
>
> Avoid that by adding an "event active" flag to the per-event data and
> call the handler only if this flag isn't set.
>
> Reported-by: Julien Grall <[email protected]>
> Signed-off-by: Juergen Gross <[email protected]>
> ---
> V2:
> - new patch
> ---
> drivers/xen/events/events_base.c | 19 +++++++++++++++----
> 1 file changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
> index e157e7506830..f7e22330dcef 100644
> --- a/drivers/xen/events/events_base.c
> +++ b/drivers/xen/events/events_base.c
> @@ -102,6 +102,7 @@ struct irq_info {
> #define EVT_MASK_REASON_EXPLICIT 0x01
> #define EVT_MASK_REASON_TEMPORARY 0x02
> #define EVT_MASK_REASON_EOI_PENDING 0x04
> + u8 is_active; /* Is event just being handled? */
> unsigned irq;
> evtchn_port_t evtchn; /* event channel */
> unsigned short cpu; /* cpu bound */
> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
> }
>
> info->eoi_time = 0;
> + smp_store_release(&info->is_active, 0);
> do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
> }
>
> @@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq)
>
> static void eoi_pirq(struct irq_data *data)
> {
> - evtchn_port_t evtchn = evtchn_from_irq(data->irq);
> + struct irq_info *info = info_for_irq(data->irq);
> + evtchn_port_t evtchn = info ? info->evtchn : 0;
> struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
> int rc = 0;
>
> if (!VALID_EVTCHN(evtchn))
> return;
>
> + smp_store_release(&info->is_active, 0);
Would you mind to explain why you are using the release semantics?
It is also not clear to me if there are any expected ordering between
clearing is_active and clearing the pending bit.
> clear_evtchn(evtchn);
The 2 lines here seems to be a common pattern in this patch. So I would
suggest to create a new helper.
>
> if (pirq_needs_eoi(data->irq)) {
> @@ -1640,6 +1644,8 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
> }
>
> info = info_for_irq(irq);
> + if (xchg_acquire(&info->is_active, 1))
> + return;
>
> if (ctrl->defer_eoi) {
> info->eoi_cpu = smp_processor_id();
> @@ -1823,11 +1829,13 @@ static void disable_dynirq(struct irq_data *data)
>
> static void ack_dynirq(struct irq_data *data)
> {
> - evtchn_port_t evtchn = evtchn_from_irq(data->irq);
> + struct irq_info *info = info_for_irq(data->irq);
> + evtchn_port_t evtchn = info ? info->evtchn : 0;
>
> if (!VALID_EVTCHN(evtchn))
> return;
>
> + smp_store_release(&info->is_active, 0);
> clear_evtchn(evtchn);
> }
>
> @@ -1969,10 +1977,13 @@ static void restore_cpu_ipis(unsigned int cpu)
> /* Clear an irq's pending state, in preparation for polling on it */
> void xen_clear_irq_pending(int irq)
> {
> - evtchn_port_t evtchn = evtchn_from_irq(irq);
> + struct irq_info *info = info_for_irq(irq);
> + evtchn_port_t evtchn = info ? info->evtchn : 0;
>
> - if (VALID_EVTCHN(evtchn))
> + if (VALID_EVTCHN(evtchn)) {
> + smp_store_release(&info->is_active, 0);
> clear_evtchn(evtchn);
> + }
> }
> EXPORT_SYMBOL(xen_clear_irq_pending);
> void xen_set_irq_pending(int irq)
>
--
Julien Grall
On 14.02.21 22:34, Julien Grall wrote:
> Hi Juergen,
>
> On 11/02/2021 10:16, Juergen Gross wrote:
>> When changing the cpu affinity of an event it can happen today that
>> (with some unlucky timing) the same event will be handled on the old
>> and the new cpu at the same time.
>>
>> Avoid that by adding an "event active" flag to the per-event data and
>> call the handler only if this flag isn't set.
>>
>> Reported-by: Julien Grall <[email protected]>
>> Signed-off-by: Juergen Gross <[email protected]>
>> ---
>> V2:
>> - new patch
>> ---
>> drivers/xen/events/events_base.c | 19 +++++++++++++++----
>> 1 file changed, 15 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/xen/events/events_base.c
>> b/drivers/xen/events/events_base.c
>> index e157e7506830..f7e22330dcef 100644
>> --- a/drivers/xen/events/events_base.c
>> +++ b/drivers/xen/events/events_base.c
>> @@ -102,6 +102,7 @@ struct irq_info {
>> #define EVT_MASK_REASON_EXPLICIT 0x01
>> #define EVT_MASK_REASON_TEMPORARY 0x02
>> #define EVT_MASK_REASON_EOI_PENDING 0x04
>> + u8 is_active; /* Is event just being handled? */
>> unsigned irq;
>> evtchn_port_t evtchn; /* event channel */
>> unsigned short cpu; /* cpu bound */
>> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info
>> *info, bool spurious)
>> }
>> info->eoi_time = 0;
>> + smp_store_release(&info->is_active, 0);
>> do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
>> }
>> @@ -809,13 +811,15 @@ static void pirq_query_unmask(int irq)
>> static void eoi_pirq(struct irq_data *data)
>> {
>> - evtchn_port_t evtchn = evtchn_from_irq(data->irq);
>> + struct irq_info *info = info_for_irq(data->irq);
>> + evtchn_port_t evtchn = info ? info->evtchn : 0;
>> struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
>> int rc = 0;
>> if (!VALID_EVTCHN(evtchn))
>> return;
>> + smp_store_release(&info->is_active, 0);
>
> Would you mind to explain why you are using the release semantics?
It is basically releasing a lock. So release semantics seem to be
appropriate.
> It is also not clear to me if there are any expected ordering between
> clearing is_active and clearing the pending bit.
No, I don't think there is a specific ordering required. is_active is
just guarding against two simultaneous IRQ handler calls for the same
event being active. Clearing the pending bit is not part of the guarded
section.
>
>> clear_evtchn(evtchn);
>
>
> The 2 lines here seems to be a common pattern in this patch. So I would
> suggest to create a new helper.
Okay.
Juergen
On 2/11/21 5:16 AM, Juergen Gross wrote:
> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
> }
>
> info->eoi_time = 0;
> + smp_store_release(&info->is_active, 0);
Can this be done in lateeoi_ack_dynirq()/lateeoi_mask_ack_dynirq(), after we've masked the channel? Then it will be consistent with how how other chips do it, especially with the new helper.
-boris
> do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
> }
>
On 2/11/21 5:16 AM, Juergen Gross wrote:
> Add syfs nodes for each xenbus device showing event statistics (number
> of events and spurious events, number of associated event channels)
> and for setting a spurious event threshold in case a frontend is
> sending too many events without being rogue on purpose.
>
> Signed-off-by: Juergen Gross <[email protected]>
Reviewed-by: Boris Ostrovsky <[email protected]>
On 15.02.21 22:35, Boris Ostrovsky wrote:
>
> On 2/11/21 5:16 AM, Juergen Gross wrote:
>
>> @@ -622,6 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
>> }
>>
>> info->eoi_time = 0;
>> + smp_store_release(&info->is_active, 0);
>
>
> Can this be done in lateeoi_ack_dynirq()/lateeoi_mask_ack_dynirq(), after we've masked the channel? Then it will be consistent with how how other chips do it, especially with the new helper.
Yes, I think this should work.
Juergen
On 17.02.21 14:29, Ross Lagerwall wrote:
> On 2021-02-11 10:16, Juergen Gross wrote:
>> For avoiding read- and write-tearing by the compiler use READ_ONCE()
>> and WRITE_ONCE() for accessing the ring indices in evtchn.c.
>>
>> Signed-off-by: Juergen Gross <[email protected]>
>> ---
>> V2:
>> - modify all accesses (Julien Grall)
>> ---
>> drivers/xen/evtchn.c | 25 ++++++++++++++++---------
>> 1 file changed, 16 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
>> index 421382c73d88..620008f89dbe 100644
>> --- a/drivers/xen/evtchn.c
>> +++ b/drivers/xen/evtchn.c
>> @@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
>> {
>> struct user_evtchn *evtchn = data;
>> struct per_user_data *u = evtchn->user;
>> + unsigned int prod, cons;
>>
>> WARN(!evtchn->enabled,
>> "Interrupt for port %u, but apparently not enabled; per-user %p\n",
>> @@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
>>
>> spin_lock(&u->ring_prod_lock);
>>
>> - if ((u->ring_prod - u->ring_cons) < u->ring_size) {
>> - *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
>> + prod = READ_ONCE(u->ring_prod);
>> + cons = READ_ONCE(u->ring_cons);
>> +
>> + if ((prod - cons) < u->ring_size) {
>> + *evtchn_ring_entry(u, prod) = evtchn->port;
>> smp_wmb(); /* Ensure ring contents visible */
>> - if (u->ring_cons == u->ring_prod++) {
>> + if (cons == prod++) {
>> + WRITE_ONCE(u->ring_prod, prod);
>> wake_up_interruptible(&u->evtchn_wait);
>> kill_fasync(&u->evtchn_async_queue,
>> SIGIO, POLL_IN);
>
> This doesn't work correctly since now u->ring_prod is only updated if cons == prod++.
Right. Thanks for noticing.
Juergen