The goal of the patch serie is to enhance the state machine by
centralizing all state changes inside the state machine wrapper
and have a clear view of state changes.
Doing this will:
- facilitate the introduction of new events received from QEMU
(cancel/clear/stsh) or from the hardware (chr events).
- produce small, easy to maintain FSM functions with clear
incoming events and outgoing states
This series introduces new states and events and suppressed
others.
- VFIO_CCW_STATE_NOT_OPER : when the Sub-Channel is KO
- VFIO_CCW_STATE_STANDBY : when it is offline
- VFIO_CCW_STATE_IDLE : when it is ready for I/O
- VFIO_CCW_STATE_BUSY : when it is busy doing I/O
- VFIO_CCW_STATE_QUIESCING: when it is busy going offline
- VFIO_CCW_EVENT_INIT : the channel setup (admin)
- VFIO_CCW_EVENT_NOT_OPER : something really wrong happened
- VFIO_CCW_EVENT_SSCH_REQ : Starting an I/O request (UAPI)
- VFIO_CCW_EVENT_INTERRUPT: Receiving an interrupt (callback)
- VFIO_CCW_EVENT_SCH_EVENT: Receiving a channel event (callback)
- VFIO_CCW_EVENT_ONLINE : Channel is set online (admin)
- VFIO_CCW_EVENT_OFFLINE : Channel is set offline (admin)
The ABI did not change, so a standard QEMU devel branch can be used.
Pierre Morel (10):
vfio: ccw: Moving state change out of IRQ context
vfio: ccw: Transform FSM functions to return state
vfio: ccw: new SCH_EVENT event
vfio: ccw: replace IO_REQ event with SSCH_REQ event
vfio: ccw: Suppress unused event parameter
vfio: ccw: Make FSM functions atomic
vfio: ccw: Introduce the INIT event
vfio: ccw: Handling reset and shutdown with states
vfio: ccw: Suppressing the BOXED state
vfio: ccw: Let user wait when busy on IO
drivers/s390/cio/vfio_ccw_drv.c | 120 +++++++-------------
drivers/s390/cio/vfio_ccw_fsm.c | 215 +++++++++++++++++++++++++-----------
drivers/s390/cio/vfio_ccw_ops.c | 58 +++++-----
drivers/s390/cio/vfio_ccw_private.h | 21 +++-
4 files changed, 232 insertions(+), 182 deletions(-)
--
2.7.4
The Sub channel event callback is threaded using workqueues.
The work uses the FSM introducing the VFIO_CCW_EVENT_SCH_EVENT
event.
The update of the SCHIB is now done inside the FSM function.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_drv.c | 33 +++++++++++++--------------------
drivers/s390/cio/vfio_ccw_fsm.c | 23 +++++++++++++++++++++++
drivers/s390/cio/vfio_ccw_private.h | 3 +++
3 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index f1b158c..8a91eee 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -77,6 +77,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
private->state = VFIO_CCW_STATE_IDLE;
}
+static void vfio_ccw_sch_event_todo(struct work_struct *work)
+{
+ struct vfio_ccw_private *private;
+
+ private = container_of(work, struct vfio_ccw_private, event_work);
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_SCH_EVENT);
+}
+
+
/*
* Css driver callbacks
*/
@@ -125,6 +134,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
goto out_disable;
INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+ INIT_WORK(&private->event_work, vfio_ccw_sch_event_todo);
atomic_set(&private->avail, 1);
private->state = VFIO_CCW_STATE_STANDBY;
@@ -171,28 +181,11 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch)
static int vfio_ccw_sch_event(struct subchannel *sch, int process)
{
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
- unsigned long flags;
- spin_lock_irqsave(sch->lock, flags);
if (!device_is_registered(&sch->dev))
- goto out_unlock;
-
- if (work_pending(&sch->todo_work))
- goto out_unlock;
-
- if (cio_update_schib(sch)) {
- vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
- goto out_unlock;
- }
-
- private = dev_get_drvdata(&sch->dev);
- if (private->state == VFIO_CCW_STATE_NOT_OPER) {
- private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
- VFIO_CCW_STATE_STANDBY;
- }
-
-out_unlock:
- spin_unlock_irqrestore(sch->lock, flags);
+ return -1;
+ WARN_ON(work_pending(&private->event_work));
+ queue_work(vfio_ccw_work_q, &private->event_work);
return 0;
}
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index 2f3108d..13751b4 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -183,6 +183,24 @@ static int fsm_irq(struct vfio_ccw_private *private,
}
/*
+ * Got a sub-channel event .
+ */
+static int fsm_sch_event(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+ unsigned long flags;
+ int ret = private->state;
+ struct subchannel *sch = private->sch;
+
+ spin_lock_irqsave(sch->lock, flags);
+ if (cio_update_schib(sch))
+ ret = VFIO_CCW_STATE_NOT_OPER;
+ spin_unlock_irqrestore(sch->lock, flags);
+
+ return ret;
+}
+
+/*
* Device statemachine
*/
fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
@@ -190,25 +208,30 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
+ [VFIO_CCW_EVENT_SCH_EVENT] = fsm_nop,
},
[VFIO_CCW_STATE_STANDBY] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_IDLE] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_request,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_BOXED] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_BUSY] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
+ [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
};
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index f526b18..3284e64 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -33,6 +33,7 @@
* @scsw: scsw info
* @io_trigger: eventfd ctx for signaling userspace I/O results
* @io_work: work for deferral process of I/O handling
+ * @event_work: work for deferral process of sub-channel event
*/
struct vfio_ccw_private {
struct subchannel *sch;
@@ -49,6 +50,7 @@ struct vfio_ccw_private {
struct eventfd_ctx *io_trigger;
struct work_struct io_work;
+ struct work_struct event_work;
} __aligned(8);
extern int vfio_ccw_mdev_reg(struct subchannel *sch);
@@ -76,6 +78,7 @@ enum vfio_ccw_event {
VFIO_CCW_EVENT_NOT_OPER,
VFIO_CCW_EVENT_IO_REQ,
VFIO_CCW_EVENT_INTERRUPT,
+ VFIO_CCW_EVENT_SCH_EVENT,
/* last element! */
NR_VFIO_CCW_EVENTS
};
--
2.7.4
We use mutex around the FSM function call to make the FSM
event handling and state change atomic.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_drv.c | 3 +--
drivers/s390/cio/vfio_ccw_fsm.c | 2 --
drivers/s390/cio/vfio_ccw_ops.c | 4 +---
drivers/s390/cio/vfio_ccw_private.h | 3 +++
4 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 8a91eee..1c9422a 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -73,8 +73,6 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
private = container_of(work, struct vfio_ccw_private, io_work);
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
- if (private->mdev)
- private->state = VFIO_CCW_STATE_IDLE;
}
static void vfio_ccw_sch_event_todo(struct work_struct *work)
@@ -120,6 +118,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
return -ENOMEM;
private->sch = sch;
dev_set_drvdata(&sch->dev, private);
+ mutex_init(&private->state_mutex);
spin_lock_irq(sch->lock);
private->state = VFIO_CCW_STATE_NOT_OPER;
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index f8ded70..d85bcfc 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -109,8 +109,6 @@ static int fsm_io_request(struct vfio_ccw_private *private)
union orb *orb = (union orb *)io_region->orb_area;
struct mdev_device *mdev = private->mdev;
- private->state = VFIO_CCW_STATE_BOXED;
-
io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), orb);
if (io_region->ret_code)
goto err_out;
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 4da7b61..dac8ce4 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -204,10 +204,8 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
return -EINVAL;
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_SSCH_REQ);
- if (region->ret_code != 0) {
- private->state = VFIO_CCW_STATE_IDLE;
+ if (region->ret_code != 0)
return region->ret_code;
- }
return count;
}
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 823e46c..cf197cf 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -51,6 +51,7 @@ struct vfio_ccw_private {
struct eventfd_ctx *io_trigger;
struct work_struct io_work;
struct work_struct event_work;
+ struct mutex state_mutex;
} __aligned(8);
extern int vfio_ccw_mdev_reg(struct subchannel *sch);
@@ -92,7 +93,9 @@ extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
int event)
{
+ mutex_lock(&private->state_mutex);
private->state = vfio_ccw_jumptable[private->state][event](private);
+ mutex_unlock(&private->state_mutex);
}
extern struct workqueue_struct *vfio_ccw_work_q;
--
2.7.4
The fsm_func_t function type definition does not need the event
parameter since all functions are in a state/event table.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_fsm.c | 34 +++++++++++-----------------------
drivers/s390/cio/vfio_ccw_private.h | 5 ++---
2 files changed, 13 insertions(+), 26 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index f9855cd..f8ded70 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -55,8 +55,7 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
}
}
-static int fsm_notoper(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_notoper(struct vfio_ccw_private *private)
{
struct subchannel *sch = private->sch;
@@ -65,36 +64,31 @@ static int fsm_notoper(struct vfio_ccw_private *private,
* Probably we should send the machine check to the guest.
*/
css_sched_sch_todo(sch, SCH_TODO_UNREG);
- private->state = VFIO_CCW_STATE_NOT_OPER;
- return private->state;
+ return VFIO_CCW_STATE_NOT_OPER;
}
/*
* No operation action.
*/
-static int fsm_nop(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_nop(struct vfio_ccw_private *private)
{
return private->state;
}
-static int fsm_io_error(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_io_error(struct vfio_ccw_private *private)
{
pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
private->io_region.ret_code = -EIO;
return private->state;
}
-static int fsm_io_busy(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_io_busy(struct vfio_ccw_private *private)
{
private->io_region.ret_code = -EBUSY;
return private->state;
}
-static int fsm_disabled_irq(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_disabled_irq(struct vfio_ccw_private *private)
{
struct subchannel *sch = private->sch;
@@ -109,17 +103,14 @@ static int fsm_disabled_irq(struct vfio_ccw_private *private,
/*
* Deal with the ccw command request from the userspace.
*/
-static int fsm_io_request(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_io_request(struct vfio_ccw_private *private)
{
- union orb *orb;
struct ccw_io_region *io_region = &private->io_region;
+ union orb *orb = (union orb *)io_region->orb_area;
struct mdev_device *mdev = private->mdev;
private->state = VFIO_CCW_STATE_BOXED;
- orb = (union orb *)io_region->orb_area;
-
io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), orb);
if (io_region->ret_code)
goto err_out;
@@ -139,15 +130,13 @@ static int fsm_io_request(struct vfio_ccw_private *private,
return VFIO_CCW_STATE_BUSY;
err_out:
- private->state = VFIO_CCW_STATE_IDLE;
- return private->state;
+ return VFIO_CCW_STATE_IDLE;
}
/*
* Got an interrupt for a normal io (state busy).
*/
-static int fsm_irq(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_irq(struct vfio_ccw_private *private)
{
struct irb *irb = &private->irb;
@@ -166,8 +155,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
/*
* Got a sub-channel event .
*/
-static int fsm_sch_event(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int fsm_sch_event(struct vfio_ccw_private *private)
{
unsigned long flags;
int ret = private->state;
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 93aab87..823e46c 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -86,14 +86,13 @@ enum vfio_ccw_event {
/*
* Action called through jumptable.
*/
-typedef int (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
+typedef int (fsm_func_t)(struct vfio_ccw_private *);
extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
int event)
{
- private->state = vfio_ccw_jumptable[private->state][event](private,
- event);
+ private->state = vfio_ccw_jumptable[private->state][event](private);
}
extern struct workqueue_struct *vfio_ccw_work_q;
--
2.7.4
The VFIO_CCW_EVENT_INIT event allows to export the initial state
into the FSM.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_drv.c | 26 +++++++++++---------------
drivers/s390/cio/vfio_ccw_fsm.c | 21 +++++++++++++++++++++
drivers/s390/cio/vfio_ccw_ops.c | 11 -----------
drivers/s390/cio/vfio_ccw_private.h | 1 +
4 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 1c9422a..f6e7def1 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -116,31 +116,27 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
if (!private)
return -ENOMEM;
+
private->sch = sch;
dev_set_drvdata(&sch->dev, private);
mutex_init(&private->state_mutex);
-
- spin_lock_irq(sch->lock);
- private->state = VFIO_CCW_STATE_NOT_OPER;
- sch->isc = VFIO_CCW_ISC;
- ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
- spin_unlock_irq(sch->lock);
- if (ret)
- goto out_free;
+ INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+ INIT_WORK(&private->event_work, vfio_ccw_sch_event_todo);
+ atomic_set(&private->avail, 1);
ret = vfio_ccw_mdev_reg(sch);
if (ret)
- goto out_disable;
+ goto out_free;
- INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
- INIT_WORK(&private->event_work, vfio_ccw_sch_event_todo);
- atomic_set(&private->avail, 1);
- private->state = VFIO_CCW_STATE_STANDBY;
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INIT);
+ ret = -EFAULT;
+ if (private->state != VFIO_CCW_STATE_STANDBY)
+ goto out_unreg;
return 0;
-out_disable:
- cio_disable_subchannel(sch);
+out_unreg:
+ vfio_ccw_mdev_unreg(sch);
out_free:
dev_set_drvdata(&sch->dev, NULL);
kfree(private);
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index d85bcfc..e8fe1e6 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -9,6 +9,7 @@
#include <linux/vfio.h>
#include <linux/mdev.h>
+#include <asm/isc.h>
#include "ioasm.h"
#include "vfio_ccw_private.h"
@@ -167,35 +168,55 @@ static int fsm_sch_event(struct vfio_ccw_private *private)
return ret;
}
+static int fsm_init(struct vfio_ccw_private *private)
+{
+ struct subchannel *sch = private->sch;
+ int ret = VFIO_CCW_STATE_STANDBY;
+
+ spin_lock_irq(sch->lock);
+ sch->isc = VFIO_CCW_ISC;
+ if (cio_enable_subchannel(sch, (u32)(unsigned long)sch))
+ ret = VFIO_CCW_STATE_NOT_OPER;
+ spin_unlock_irq(sch->lock);
+
+ return ret;
+}
+
+
/*
* Device statemachine
*/
fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
[VFIO_CCW_STATE_NOT_OPER] = {
+ [VFIO_CCW_EVENT_INIT] = fsm_init,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_nop,
},
[VFIO_CCW_STATE_STANDBY] = {
+ [VFIO_CCW_EVENT_INIT] = fsm_nop,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_IDLE] = {
+ [VFIO_CCW_EVENT_INIT] = fsm_nop,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_request,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_BOXED] = {
+ [VFIO_CCW_EVENT_INIT] = fsm_nop,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_BUSY] = {
+ [VFIO_CCW_EVENT_INIT] = fsm_nop,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index dac8ce4..78d1925 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -111,14 +111,10 @@ static int vfio_ccw_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
struct vfio_ccw_private *private =
dev_get_drvdata(mdev_parent_dev(mdev));
- if (private->state == VFIO_CCW_STATE_NOT_OPER)
- return -ENODEV;
-
if (atomic_dec_if_positive(&private->avail) < 0)
return -EPERM;
private->mdev = mdev;
- private->state = VFIO_CCW_STATE_IDLE;
return 0;
}
@@ -128,13 +124,6 @@ static int vfio_ccw_mdev_remove(struct mdev_device *mdev)
struct vfio_ccw_private *private =
dev_get_drvdata(mdev_parent_dev(mdev));
- if ((private->state != VFIO_CCW_STATE_NOT_OPER) &&
- (private->state != VFIO_CCW_STATE_STANDBY)) {
- if (!vfio_ccw_mdev_reset(mdev))
- private->state = VFIO_CCW_STATE_STANDBY;
- /* The state will be NOT_OPER on error. */
- }
-
private->mdev = NULL;
atomic_inc(&private->avail);
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index cf197cf..cacf677 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -76,6 +76,7 @@ enum vfio_ccw_state {
* Asynchronous events of the device statemachine.
*/
enum vfio_ccw_event {
+ VFIO_CCW_EVENT_INIT,
VFIO_CCW_EVENT_NOT_OPER,
VFIO_CCW_EVENT_SSCH_REQ,
VFIO_CCW_EVENT_INTERRUPT,
--
2.7.4
This patch simplifies the IO request handling to handle the only
implemented request: SSCH.
Other request are invalid and get a return value of -EINVAL.
This patch change the event name to VFIO_CCW_EVENT_SSCH_REQ to reflect
what is done and prepare for future implementation of other requests.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_fsm.c | 63 +++++++++++++------------------------
drivers/s390/cio/vfio_ccw_ops.c | 9 ++++--
drivers/s390/cio/vfio_ccw_private.h | 2 +-
3 files changed, 29 insertions(+), 45 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index 13751b4..f9855cd 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -19,14 +19,9 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
union orb *orb;
int ccode;
__u8 lpm;
- unsigned long flags;
sch = private->sch;
- spin_lock_irqsave(sch->lock, flags);
- private->state = VFIO_CCW_STATE_BUSY;
- spin_unlock_irqrestore(sch->lock, flags);
-
orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
/* Issue "Start Subchannel" */
@@ -118,44 +113,30 @@ static int fsm_io_request(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
union orb *orb;
- union scsw *scsw = &private->scsw;
struct ccw_io_region *io_region = &private->io_region;
struct mdev_device *mdev = private->mdev;
private->state = VFIO_CCW_STATE_BOXED;
- memcpy(scsw, io_region->scsw_area, sizeof(*scsw));
-
- if (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) {
- orb = (union orb *)io_region->orb_area;
-
- io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
- orb);
- if (io_region->ret_code)
- goto err_out;
-
- io_region->ret_code = cp_prefetch(&private->cp);
- if (io_region->ret_code) {
- cp_free(&private->cp);
- goto err_out;
- }
-
- /* Start channel program and wait for I/O interrupt. */
- io_region->ret_code = fsm_io_helper(private);
- if (io_region->ret_code) {
- cp_free(&private->cp);
- goto err_out;
- }
- return private->state;
- } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
- /* XXX: Handle halt. */
- io_region->ret_code = -EOPNOTSUPP;
+ orb = (union orb *)io_region->orb_area;
+
+ io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), orb);
+ if (io_region->ret_code)
+ goto err_out;
+
+ io_region->ret_code = cp_prefetch(&private->cp);
+ if (io_region->ret_code) {
+ cp_free(&private->cp);
goto err_out;
- } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
- /* XXX: Handle clear. */
- io_region->ret_code = -EOPNOTSUPP;
+ }
+
+ /* Start channel program and wait for I/O interrupt. */
+ io_region->ret_code = fsm_io_helper(private);
+ if (io_region->ret_code) {
+ cp_free(&private->cp);
goto err_out;
}
+ return VFIO_CCW_STATE_BUSY;
err_out:
private->state = VFIO_CCW_STATE_IDLE;
@@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
if (private->io_trigger)
eventfd_signal(private->io_trigger, 1);
- return private->state;
+ return VFIO_CCW_STATE_IDLE;
}
/*
@@ -206,31 +187,31 @@ static int fsm_sch_event(struct vfio_ccw_private *private,
fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
[VFIO_CCW_STATE_NOT_OPER] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
- [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
+ [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_nop,
},
[VFIO_CCW_STATE_STANDBY] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
- [VFIO_CCW_EVENT_IO_REQ] = fsm_io_error,
+ [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_IDLE] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
- [VFIO_CCW_EVENT_IO_REQ] = fsm_io_request,
+ [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_request,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_BOXED] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
- [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
+ [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
[VFIO_CCW_STATE_BUSY] = {
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
- [VFIO_CCW_EVENT_IO_REQ] = fsm_io_busy,
+ [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 41eeb57..4da7b61 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -188,19 +188,22 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
{
struct vfio_ccw_private *private;
struct ccw_io_region *region;
+ union scsw *scsw;
if (*ppos + count > sizeof(*region))
return -EINVAL;
private = dev_get_drvdata(mdev_parent_dev(mdev));
- if (private->state != VFIO_CCW_STATE_IDLE)
- return -EACCES;
region = &private->io_region;
if (copy_from_user((void *)region + *ppos, buf, count))
return -EFAULT;
- vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
+ scsw = (union scsw *) ®ion->scsw_area;
+ if ((scsw->cmd.fctl & SCSW_FCTL_START_FUNC) != SCSW_FCTL_START_FUNC)
+ return -EINVAL;
+
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_SSCH_REQ);
if (region->ret_code != 0) {
private->state = VFIO_CCW_STATE_IDLE;
return region->ret_code;
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 3284e64..93aab87 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -76,7 +76,7 @@ enum vfio_ccw_state {
*/
enum vfio_ccw_event {
VFIO_CCW_EVENT_NOT_OPER,
- VFIO_CCW_EVENT_IO_REQ,
+ VFIO_CCW_EVENT_SSCH_REQ,
VFIO_CCW_EVENT_INTERRUPT,
VFIO_CCW_EVENT_SCH_EVENT,
/* last element! */
--
2.7.4
Two new events, VFIO_CCW_EVENT_ONLINE and VFIO_CCW_EVENT_OFFLINE
allow to handle the enabling and disabling of a Sub Channel and
the shutdown, quiesce and reset operations are changed accordingly.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_drv.c | 47 ++++++--------------------
drivers/s390/cio/vfio_ccw_fsm.c | 66 +++++++++++++++++++++++++++++++++++++
drivers/s390/cio/vfio_ccw_ops.c | 15 +++------
drivers/s390/cio/vfio_ccw_private.h | 3 ++
4 files changed, 83 insertions(+), 48 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index f6e7def1..7175c64 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -30,41 +30,13 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch)
{
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
DECLARE_COMPLETION_ONSTACK(completion);
- int iretry, ret = 0;
-
- spin_lock_irq(sch->lock);
- if (!sch->schib.pmcw.ena)
- goto out_unlock;
- ret = cio_disable_subchannel(sch);
- if (ret != -EBUSY)
- goto out_unlock;
-
- do {
- iretry = 255;
-
- ret = cio_cancel_halt_clear(sch, &iretry);
- while (ret == -EBUSY) {
- /*
- * Flush all I/O and wait for
- * cancel/halt/clear completion.
- */
- private->completion = &completion;
- spin_unlock_irq(sch->lock);
-
- wait_for_completion_timeout(&completion, 3*HZ);
-
- spin_lock_irq(sch->lock);
- private->completion = NULL;
- flush_workqueue(vfio_ccw_work_q);
- ret = cio_cancel_halt_clear(sch, &iretry);
- };
-
- ret = cio_disable_subchannel(sch);
- } while (ret == -EBUSY);
-out_unlock:
- private->state = VFIO_CCW_STATE_NOT_OPER;
- spin_unlock_irq(sch->lock);
- return ret;
+
+ private->completion = &completion;
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_OFFLINE);
+ wait_for_completion(&completion);
+ if (private->state != VFIO_CCW_STATE_STANDBY)
+ return -EFAULT;
+ return 0;
}
static void vfio_ccw_sch_io_todo(struct work_struct *work)
@@ -97,8 +69,6 @@ static void vfio_ccw_sch_irq(struct subchannel *sch)
WARN_ON(work_pending(&private->io_work));
queue_work(vfio_ccw_work_q, &private->io_work);
- if (private->completion)
- complete(private->completion);
}
static int vfio_ccw_sch_probe(struct subchannel *sch)
@@ -132,6 +102,9 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
ret = -EFAULT;
if (private->state != VFIO_CCW_STATE_STANDBY)
goto out_unreg;
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_ONLINE);
+ if (private->state != VFIO_CCW_STATE_IDLE)
+ goto out_unreg;
return 0;
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index e8fe1e6..444424e 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -68,6 +68,53 @@ static int fsm_notoper(struct vfio_ccw_private *private)
return VFIO_CCW_STATE_NOT_OPER;
}
+static int fsm_online(struct vfio_ccw_private *private)
+{
+ struct subchannel *sch = private->sch;
+ int ret = VFIO_CCW_STATE_IDLE;
+
+ spin_lock_irq(sch->lock);
+ if (cio_enable_subchannel(sch, (u32)(unsigned long)sch))
+ ret = VFIO_CCW_STATE_NOT_OPER;
+ spin_unlock_irq(sch->lock);
+
+ return ret;
+}
+static int fsm_offline(struct vfio_ccw_private *private)
+{
+ struct subchannel *sch = private->sch;
+ int ret = VFIO_CCW_STATE_STANDBY;
+
+ spin_lock_irq(sch->lock);
+ if (cio_disable_subchannel(sch))
+ ret = VFIO_CCW_STATE_NOT_OPER;
+ spin_unlock_irq(sch->lock);
+ if (private->completion)
+ complete(private->completion);
+
+ return ret;
+}
+static int fsm_quiescing(struct vfio_ccw_private *private)
+{
+ struct subchannel *sch = private->sch;
+ int ret = VFIO_CCW_STATE_STANDBY;
+ int iretry = 255;
+
+ spin_lock_irq(sch->lock);
+ ret = cio_cancel_halt_clear(sch, &iretry);
+ if (ret == -EBUSY)
+ ret = VFIO_CCW_STATE_QUIESCING;
+ else if (private->completion)
+ complete(private->completion);
+ spin_unlock_irq(sch->lock);
+ return ret;
+}
+static int fsm_quiescing_done(struct vfio_ccw_private *private)
+{
+ if (private->completion)
+ complete(private->completion);
+ return VFIO_CCW_STATE_STANDBY;
+}
/*
* No operation action.
*/
@@ -189,6 +236,8 @@ static int fsm_init(struct vfio_ccw_private *private)
fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
[VFIO_CCW_STATE_NOT_OPER] = {
[VFIO_CCW_EVENT_INIT] = fsm_init,
+ [VFIO_CCW_EVENT_ONLINE] = fsm_nop,
+ [VFIO_CCW_EVENT_OFFLINE] = fsm_nop,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
@@ -196,6 +245,8 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
},
[VFIO_CCW_STATE_STANDBY] = {
[VFIO_CCW_EVENT_INIT] = fsm_nop,
+ [VFIO_CCW_EVENT_ONLINE] = fsm_online,
+ [VFIO_CCW_EVENT_OFFLINE] = fsm_nop,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
@@ -203,6 +254,8 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
},
[VFIO_CCW_STATE_IDLE] = {
[VFIO_CCW_EVENT_INIT] = fsm_nop,
+ [VFIO_CCW_EVENT_ONLINE] = fsm_nop,
+ [VFIO_CCW_EVENT_OFFLINE] = fsm_offline,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_request,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
@@ -210,6 +263,8 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
},
[VFIO_CCW_STATE_BOXED] = {
[VFIO_CCW_EVENT_INIT] = fsm_nop,
+ [VFIO_CCW_EVENT_ONLINE] = fsm_nop,
+ [VFIO_CCW_EVENT_OFFLINE] = fsm_quiescing,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
@@ -217,9 +272,20 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
},
[VFIO_CCW_STATE_BUSY] = {
[VFIO_CCW_EVENT_INIT] = fsm_nop,
+ [VFIO_CCW_EVENT_ONLINE] = fsm_nop,
+ [VFIO_CCW_EVENT_OFFLINE] = fsm_quiescing,
[VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
[VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
+ [VFIO_CCW_STATE_QUIESCING] = {
+ [VFIO_CCW_EVENT_INIT] = fsm_nop,
+ [VFIO_CCW_EVENT_ONLINE] = fsm_nop,
+ [VFIO_CCW_EVENT_OFFLINE] = fsm_nop,
+ [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
+ [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
+ [VFIO_CCW_EVENT_INTERRUPT] = fsm_quiescing_done,
+ [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
+ },
};
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 78d1925..f0f4071 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -21,21 +21,14 @@ static int vfio_ccw_mdev_reset(struct mdev_device *mdev)
private = dev_get_drvdata(mdev_parent_dev(mdev));
sch = private->sch;
- /*
- * TODO:
- * In the cureent stage, some things like "no I/O running" and "no
- * interrupt pending" are clear, but we are not sure what other state
- * we need to care about.
- * There are still a lot more instructions need to be handled. We
- * should come back here later.
- */
+
ret = vfio_ccw_sch_quiesce(sch);
if (ret)
return ret;
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_ONLINE);
- ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch);
- if (!ret)
- private->state = VFIO_CCW_STATE_IDLE;
+ if (!(private->state == VFIO_CCW_STATE_IDLE))
+ ret = -EFAULT;
return ret;
}
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index cacf677..e7ea076 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -68,6 +68,7 @@ enum vfio_ccw_state {
VFIO_CCW_STATE_IDLE,
VFIO_CCW_STATE_BOXED,
VFIO_CCW_STATE_BUSY,
+ VFIO_CCW_STATE_QUIESCING,
/* last element! */
NR_VFIO_CCW_STATES
};
@@ -81,6 +82,8 @@ enum vfio_ccw_event {
VFIO_CCW_EVENT_SSCH_REQ,
VFIO_CCW_EVENT_INTERRUPT,
VFIO_CCW_EVENT_SCH_EVENT,
+ VFIO_CCW_EVENT_ONLINE,
+ VFIO_CCW_EVENT_OFFLINE,
/* last element! */
NR_VFIO_CCW_EVENTS
};
--
2.7.4
We change the FSM functions to return the next state,
and adapt the fsm_func_t function type.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_fsm.c | 24 ++++++++++++++++--------
drivers/s390/cio/vfio_ccw_private.h | 5 +++--
2 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index af88551..2f3108d 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -60,7 +60,7 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
}
}
-static void fsm_notoper(struct vfio_ccw_private *private,
+static int fsm_notoper(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
struct subchannel *sch = private->sch;
@@ -71,30 +71,34 @@ static void fsm_notoper(struct vfio_ccw_private *private,
*/
css_sched_sch_todo(sch, SCH_TODO_UNREG);
private->state = VFIO_CCW_STATE_NOT_OPER;
+ return private->state;
}
/*
* No operation action.
*/
-static void fsm_nop(struct vfio_ccw_private *private,
+static int fsm_nop(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
+ return private->state;
}
-static void fsm_io_error(struct vfio_ccw_private *private,
+static int fsm_io_error(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
private->io_region.ret_code = -EIO;
+ return private->state;
}
-static void fsm_io_busy(struct vfio_ccw_private *private,
+static int fsm_io_busy(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
private->io_region.ret_code = -EBUSY;
+ return private->state;
}
-static void fsm_disabled_irq(struct vfio_ccw_private *private,
+static int fsm_disabled_irq(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
struct subchannel *sch = private->sch;
@@ -104,12 +108,13 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private,
* successful - should not happen, but we try to disable again.
*/
cio_disable_subchannel(sch);
+ return private->state;
}
/*
* Deal with the ccw command request from the userspace.
*/
-static void fsm_io_request(struct vfio_ccw_private *private,
+static int fsm_io_request(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
union orb *orb;
@@ -141,7 +146,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
cp_free(&private->cp);
goto err_out;
}
- return;
+ return private->state;
} else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
/* XXX: Handle halt. */
io_region->ret_code = -EOPNOTSUPP;
@@ -154,12 +159,13 @@ static void fsm_io_request(struct vfio_ccw_private *private,
err_out:
private->state = VFIO_CCW_STATE_IDLE;
+ return private->state;
}
/*
* Got an interrupt for a normal io (state busy).
*/
-static void fsm_irq(struct vfio_ccw_private *private,
+static int fsm_irq(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
struct irb *irb = &private->irb;
@@ -172,6 +178,8 @@ static void fsm_irq(struct vfio_ccw_private *private,
if (private->io_trigger)
eventfd_signal(private->io_trigger, 1);
+
+ return private->state;
}
/*
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 78a66d9..f526b18 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -83,13 +83,14 @@ enum vfio_ccw_event {
/*
* Action called through jumptable.
*/
-typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
+typedef int (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
int event)
{
- vfio_ccw_jumptable[private->state][event](private, event);
+ private->state = vfio_ccw_jumptable[private->state][event](private,
+ event);
}
extern struct workqueue_struct *vfio_ccw_work_q;
--
2.7.4
VFIO_CCW_STATE_BOXED and VFIO_CCW_STATE_BUSY are the same
states.
Let's only keep one: VFIO_CCW_STATE_BUSY
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_fsm.c | 9 ---------
drivers/s390/cio/vfio_ccw_private.h | 1 -
2 files changed, 10 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index 444424e..b77b8ad 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -261,15 +261,6 @@ fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
[VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
[VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
},
- [VFIO_CCW_STATE_BOXED] = {
- [VFIO_CCW_EVENT_INIT] = fsm_nop,
- [VFIO_CCW_EVENT_ONLINE] = fsm_nop,
- [VFIO_CCW_EVENT_OFFLINE] = fsm_quiescing,
- [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
- [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
- [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
- [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
- },
[VFIO_CCW_STATE_BUSY] = {
[VFIO_CCW_EVENT_INIT] = fsm_nop,
[VFIO_CCW_EVENT_ONLINE] = fsm_nop,
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index e7ea076..dbef727 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -66,7 +66,6 @@ enum vfio_ccw_state {
VFIO_CCW_STATE_NOT_OPER,
VFIO_CCW_STATE_STANDBY,
VFIO_CCW_STATE_IDLE,
- VFIO_CCW_STATE_BOXED,
VFIO_CCW_STATE_BUSY,
VFIO_CCW_STATE_QUIESCING,
/* last element! */
--
2.7.4
Having state changes out of IRQ context allows to protect
critical sections with mutexes.
Next patches in the serie will use this possibility.
We use work queues to thread the interrupts.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_drv.c | 21 ++++++++-------------
drivers/s390/cio/vfio_ccw_fsm.c | 14 ++++++++------
2 files changed, 16 insertions(+), 19 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index ea6a2d0..f1b158c 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -70,20 +70,9 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch)
static void vfio_ccw_sch_io_todo(struct work_struct *work)
{
struct vfio_ccw_private *private;
- struct irb *irb;
private = container_of(work, struct vfio_ccw_private, io_work);
- irb = &private->irb;
-
- if (scsw_is_solicited(&irb->scsw)) {
- cp_update_scsw(&private->cp, &irb->scsw);
- cp_free(&private->cp);
- }
- memcpy(private->io_region.irb_area, irb, sizeof(*irb));
-
- if (private->io_trigger)
- eventfd_signal(private->io_trigger, 1);
-
+ vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
if (private->mdev)
private->state = VFIO_CCW_STATE_IDLE;
}
@@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
static void vfio_ccw_sch_irq(struct subchannel *sch)
{
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+ struct irb *irb = this_cpu_ptr(&cio_irb);
inc_irq_stat(IRQIO_CIO);
- vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
+ memcpy(&private->irb, irb, sizeof(*irb));
+
+ WARN_ON(work_pending(&private->io_work));
+ queue_work(vfio_ccw_work_q, &private->io_work);
+ if (private->completion)
+ complete(private->completion);
}
static int vfio_ccw_sch_probe(struct subchannel *sch)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index c30420c..af88551 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -162,14 +162,16 @@ static void fsm_io_request(struct vfio_ccw_private *private,
static void fsm_irq(struct vfio_ccw_private *private,
enum vfio_ccw_event event)
{
- struct irb *irb = this_cpu_ptr(&cio_irb);
+ struct irb *irb = &private->irb;
- memcpy(&private->irb, irb, sizeof(*irb));
-
- queue_work(vfio_ccw_work_q, &private->io_work);
+ if (scsw_is_solicited(&irb->scsw)) {
+ cp_update_scsw(&private->cp, &irb->scsw);
+ cp_free(&private->cp);
+ }
+ memcpy(private->io_region.irb_area, irb, sizeof(*irb));
- if (private->completion)
- complete(private->completion);
+ if (private->io_trigger)
+ eventfd_signal(private->io_trigger, 1);
}
/*
--
2.7.4
In the current implementation, we do not want to start a new SSCH
command before the last one ends.
Signed-off-by: Pierre Morel <[email protected]>
---
drivers/s390/cio/vfio_ccw_fsm.c | 3 +++
drivers/s390/cio/vfio_ccw_ops.c | 21 ++++++++++++++++++++-
drivers/s390/cio/vfio_ccw_private.h | 4 +++-
3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index b77b8ad..4140292 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -195,6 +195,9 @@ static int fsm_irq(struct vfio_ccw_private *private)
if (private->io_trigger)
eventfd_signal(private->io_trigger, 1);
+ if (private->io_completion)
+ complete(private->io_completion);
+
return VFIO_CCW_STATE_IDLE;
}
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index f0f4071..346532d 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -171,6 +171,8 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
struct vfio_ccw_private *private;
struct ccw_io_region *region;
union scsw *scsw;
+ int max_retries = 5;
+ DECLARE_COMPLETION_ONSTACK(completion);
if (*ppos + count > sizeof(*region))
return -EINVAL;
@@ -185,7 +187,24 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
if ((scsw->cmd.fctl & SCSW_FCTL_START_FUNC) != SCSW_FCTL_START_FUNC)
return -EINVAL;
- vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_SSCH_REQ);
+ do {
+ switch (private->state) {
+ case VFIO_CCW_STATE_BUSY:
+ private->io_completion = &completion;
+ wait_for_completion(&completion);
+ break;
+ case VFIO_CCW_STATE_IDLE:
+ if (!vfio_ccw_fsm_event(private,
+ VFIO_CCW_EVENT_SSCH_REQ))
+ return count;
+ break;
+ default:
+ return -EBUSY;
+ }
+ } while (max_retries--);
+
+ if (max_retries <= 0)
+ return -EBUSY;
if (region->ret_code != 0)
return region->ret_code;
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index dbef727..7cca078 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -39,6 +39,7 @@ struct vfio_ccw_private {
struct subchannel *sch;
int state;
struct completion *completion;
+ struct completion *io_completion;
atomic_t avail;
struct mdev_device *mdev;
struct notifier_block nb;
@@ -93,12 +94,13 @@ enum vfio_ccw_event {
typedef int (fsm_func_t)(struct vfio_ccw_private *);
extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
-static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
+static inline int vfio_ccw_fsm_event(struct vfio_ccw_private *private,
int event)
{
mutex_lock(&private->state_mutex);
private->state = vfio_ccw_jumptable[private->state][event](private);
mutex_unlock(&private->state_mutex);
+ return private->io_region.ret_code;
}
extern struct workqueue_struct *vfio_ccw_work_q;
--
2.7.4
On 24/04/2018 09:25, Dong Jia Shi wrote:
> * Pierre Morel <[email protected]> [2018-04-19 16:48:05 +0200]:
>
>> We change the FSM functions to return the next state,
>> and adapt the fsm_func_t function type.
> I think I'd need to read the rest patches to understand why we need this
> one, but no hurt to write some ideas that I noticed at my first glance.
> See below please.
>
>> Signed-off-by: Pierre Morel <[email protected]>
>> ---
>> drivers/s390/cio/vfio_ccw_fsm.c | 24 ++++++++++++++++--------
>> drivers/s390/cio/vfio_ccw_private.h | 5 +++--
>> 2 files changed, 19 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
>> index af88551..2f3108d 100644
>> --- a/drivers/s390/cio/vfio_ccw_fsm.c
>> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
>> @@ -60,7 +60,7 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>> }
>> }
>>
>> -static void fsm_notoper(struct vfio_ccw_private *private,
>> +static int fsm_notoper(struct vfio_ccw_private *private,
>> enum vfio_ccw_event event)
>> {
>> struct subchannel *sch = private->sch;
>> @@ -71,30 +71,34 @@ static void fsm_notoper(struct vfio_ccw_private *private,
>> */
>> css_sched_sch_todo(sch, SCH_TODO_UNREG);
>> private->state = VFIO_CCW_STATE_NOT_OPER;
> Here..
>
>> + return private->state;
>> }
>>
>> /*
>> * No operation action.
>> */
>> -static void fsm_nop(struct vfio_ccw_private *private,
>> +static int fsm_nop(struct vfio_ccw_private *private,
>> enum vfio_ccw_event event)
>> {
>> + return private->state;
>> }
>>
>> -static void fsm_io_error(struct vfio_ccw_private *private,
>> +static int fsm_io_error(struct vfio_ccw_private *private,
>> enum vfio_ccw_event event)
>> {
>> pr_err("vfio-ccw: FSM: I/O request from state:%d\n", private->state);
>> private->io_region.ret_code = -EIO;
>> + return private->state;
>> }
>>
>> -static void fsm_io_busy(struct vfio_ccw_private *private,
>> +static int fsm_io_busy(struct vfio_ccw_private *private,
>> enum vfio_ccw_event event)
>> {
>> private->io_region.ret_code = -EBUSY;
>> + return private->state;
>> }
>>
>> -static void fsm_disabled_irq(struct vfio_ccw_private *private,
>> +static int fsm_disabled_irq(struct vfio_ccw_private *private,
>> enum vfio_ccw_event event)
>> {
>> struct subchannel *sch = private->sch;
>> @@ -104,12 +108,13 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private,
>> * successful - should not happen, but we try to disable again.
>> */
>> cio_disable_subchannel(sch);
>> + return private->state;
>> }
>>
>> /*
>> * Deal with the ccw command request from the userspace.
>> */
>> -static void fsm_io_request(struct vfio_ccw_private *private,
>> +static int fsm_io_request(struct vfio_ccw_private *private,
>> enum vfio_ccw_event event)
>> {
>> union orb *orb;
>> @@ -141,7 +146,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>> cp_free(&private->cp);
>> goto err_out;
>> }
>> - return;
>> + return private->state;
>> } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) {
>> /* XXX: Handle halt. */
>> io_region->ret_code = -EOPNOTSUPP;
>> @@ -154,12 +159,13 @@ static void fsm_io_request(struct vfio_ccw_private *private,
>>
>> err_out:
>> private->state = VFIO_CCW_STATE_IDLE;
> Here..
>
>> + return private->state;
>> }
>>
>> /*
>> * Got an interrupt for a normal io (state busy).
>> */
>> -static void fsm_irq(struct vfio_ccw_private *private,
>> +static int fsm_irq(struct vfio_ccw_private *private,
>> enum vfio_ccw_event event)
>> {
>> struct irb *irb = &private->irb;
>> @@ -172,6 +178,8 @@ static void fsm_irq(struct vfio_ccw_private *private,
>>
>> if (private->io_trigger)
>> eventfd_signal(private->io_trigger, 1);
>> +
>> + return private->state;
>> }
>>
>> /*
>> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
>> index 78a66d9..f526b18 100644
>> --- a/drivers/s390/cio/vfio_ccw_private.h
>> +++ b/drivers/s390/cio/vfio_ccw_private.h
>> @@ -83,13 +83,14 @@ enum vfio_ccw_event {
>> /*
>> * Action called through jumptable.
>> */
>> -typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
>> +typedef int (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
>> extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
>>
>> static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
>> int event)
>> {
>> - vfio_ccw_jumptable[private->state][event](private, event);
>> + private->state = vfio_ccw_jumptable[private->state][event](private,
>> + event);
> Since here it assigns new value to private->state, there is no need to
> do that inside each fsm_func?
Absolutely.
I just kept the previous code, just adding the return private->state in
the functions
in this patch.
merging the state and the return value is done in a later patch.
If you prefer I can do it in this patch.
>
>> }
>>
>> extern struct workqueue_struct *vfio_ccw_work_q;
>> --
>> 2.7.4
>>
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On Tue, 24 Apr 2018 10:40:56 +0200
Pierre Morel <[email protected]> wrote:
> On 24/04/2018 08:54, Dong Jia Shi wrote:
> > * Pierre Morel <[email protected]> [2018-04-19 16:48:04 +0200]:
> >
> > [...]
> >
> >> @@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >> static void vfio_ccw_sch_irq(struct subchannel *sch)
> >> {
> >> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
> >> + struct irb *irb = this_cpu_ptr(&cio_irb);
> >>
> >> inc_irq_stat(IRQIO_CIO);
> >> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
> >> + memcpy(&private->irb, irb, sizeof(*irb));
> >> +
> >> + WARN_ON(work_pending(&private->io_work));
> > Hmm, why do we need this?
>
> The current design insure that we have not two concurrent SSCH requests.
> How ever I want here to track spurious interrupt.
> If we implement cancel, halt or clear requests, we also may trigger (AFAIU)
> a second interrupts depending on races between instructions, controller
> and device.
You won't get an interrupt for a successful cancel. If you do a
halt/clear, you will make the subchannel halt/clear pending in addition
to start pending and you'll only get one interrupt (if the I/O has
progressed far enough, you won't be able to issue a hsch). The
interesting case is:
- guest does a ssch, we do a ssch on the device
- the guest does a csch before it got the interrupt for the ssch
- before we do the csch on the device, the subchannel is already status
pending with completion of the ssch
- after we issue the csch, we get a second interrupt (for the csch)
I think we should present two interrupts to the guest in that case.
Races between issuing ssch/hsch/csch and the subchannel becoming status
pending happen on real hardware as well, we're just more likely to see
them with the vfio layer in between.
(I'm currently trying to recall what we're doing with unsolicited
interrupts. These are fun wrt deferred cc 1; I'm not sure if there are
cases where we want to present a deferred cc to the guest.)
Also, doing a second ssch before we got final state for the first one
is perfectly valid. Linux just does not do it, so I'm not sure if we
should invest too much time there.
>
> We do not need it strongly.
>
> >
> >> + queue_work(vfio_ccw_work_q, &private->io_work);
> >> + if (private->completion)
> >> + complete(private->completion);
> >> }
> >>
> >> static int vfio_ccw_sch_probe(struct subchannel *sch)
> > [...]
> >
>
On 24/04/2018 11:59, Cornelia Huck wrote:
> On Tue, 24 Apr 2018 10:40:56 +0200
> Pierre Morel <[email protected]> wrote:
>
>> On 24/04/2018 08:54, Dong Jia Shi wrote:
>>> * Pierre Morel <[email protected]> [2018-04-19 16:48:04 +0200]:
>>>
>>> [...]
>>>
>>>> @@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>> static void vfio_ccw_sch_irq(struct subchannel *sch)
>>>> {
>>>> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
>>>> + struct irb *irb = this_cpu_ptr(&cio_irb);
>>>>
>>>> inc_irq_stat(IRQIO_CIO);
>>>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
>>>> + memcpy(&private->irb, irb, sizeof(*irb));
>>>> +
>>>> + WARN_ON(work_pending(&private->io_work));
>>> Hmm, why do we need this?
>> The current design insure that we have not two concurrent SSCH requests.
>> How ever I want here to track spurious interrupt.
>> If we implement cancel, halt or clear requests, we also may trigger (AFAIU)
>> a second interrupts depending on races between instructions, controller
>> and device.
> You won't get an interrupt for a successful cancel. If you do a
> halt/clear, you will make the subchannel halt/clear pending in addition
> to start pending and you'll only get one interrupt (if the I/O has
> progressed far enough, you won't be able to issue a hsch). The
> interesting case is:
> - guest does a ssch, we do a ssch on the device
> - the guest does a csch before it got the interrupt for the ssch
> - before we do the csch on the device, the subchannel is already status
> pending with completion of the ssch
> - after we issue the csch, we get a second interrupt (for the csch)
We agree.
>
> I think we should present two interrupts to the guest in that case.
> Races between issuing ssch/hsch/csch and the subchannel becoming status
> pending happen on real hardware as well, we're just more likely to see
> them with the vfio layer in between.
Yes, agreed too.
>
> (I'm currently trying to recall what we're doing with unsolicited
> interrupts. These are fun wrt deferred cc 1; I'm not sure if there are
> cases where we want to present a deferred cc to the guest.)
This patch does not change the current functionalities, only
consolidates the FSM.
The current way to handle unsolicited interrupts is to report them to
the guest
along with the deferred code AFAIU.
>
> Also, doing a second ssch before we got final state for the first one
> is perfectly valid. Linux just does not do it, so I'm not sure if we
> should invest too much time there.
I agree too, it would just make things unnecessary complicated.
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 24/04/2018 13:55, Cornelia Huck wrote:
> On Tue, 24 Apr 2018 13:49:14 +0200
> Pierre Morel <[email protected]> wrote:
>
>> On 24/04/2018 11:59, Cornelia Huck wrote:
>>> On Tue, 24 Apr 2018 10:40:56 +0200
>>> Pierre Morel <[email protected]> wrote:
>>>
>>>> On 24/04/2018 08:54, Dong Jia Shi wrote:
>>>>> * Pierre Morel <[email protected]> [2018-04-19 16:48:04 +0200]:
>>>>>
>>>>> [...]
>>>>>
>>>>>> @@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>>>> static void vfio_ccw_sch_irq(struct subchannel *sch)
>>>>>> {
>>>>>> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
>>>>>> + struct irb *irb = this_cpu_ptr(&cio_irb);
>>>>>>
>>>>>> inc_irq_stat(IRQIO_CIO);
>>>>>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
>>>>>> + memcpy(&private->irb, irb, sizeof(*irb));
>>>>>> +
>>>>>> + WARN_ON(work_pending(&private->io_work));
>>>>> Hmm, why do we need this?
>>>> The current design insure that we have not two concurrent SSCH requests.
>>>> How ever I want here to track spurious interrupt.
>>>> If we implement cancel, halt or clear requests, we also may trigger (AFAIU)
>>>> a second interrupts depending on races between instructions, controller
>>>> and device.
>>> You won't get an interrupt for a successful cancel. If you do a
>>> halt/clear, you will make the subchannel halt/clear pending in addition
>>> to start pending and you'll only get one interrupt (if the I/O has
>>> progressed far enough, you won't be able to issue a hsch). The
>>> interesting case is:
>>> - guest does a ssch, we do a ssch on the device
>>> - the guest does a csch before it got the interrupt for the ssch
>>> - before we do the csch on the device, the subchannel is already status
>>> pending with completion of the ssch
>>> - after we issue the csch, we get a second interrupt (for the csch)
>> We agree.
>>
>>> I think we should present two interrupts to the guest in that case.
>>> Races between issuing ssch/hsch/csch and the subchannel becoming status
>>> pending happen on real hardware as well, we're just more likely to see
>>> them with the vfio layer in between.
>> Yes, agreed too.
>>
>>> (I'm currently trying to recall what we're doing with unsolicited
>>> interrupts. These are fun wrt deferred cc 1; I'm not sure if there are
>>> cases where we want to present a deferred cc to the guest.)
>> This patch does not change the current functionalities, only
>> consolidates the FSM.
>> The current way to handle unsolicited interrupts is to report them to
>> the guest
>> along with the deferred code AFAIU.
> My question was more along the line of "do we actually want to
> _generate_ a deferred cc1 or unsolicited interrupt, based upon what we
> do in our state machine". My guess is no, regardless of the changes you
> do in this series.
>
>>> Also, doing a second ssch before we got final state for the first one
>>> is perfectly valid. Linux just does not do it, so I'm not sure if we
>>> should invest too much time there.
>> I agree too, it would just make things unnecessary complicated.
> I'm a big fan of just throwing everything at the hardware and let it
> sort out any races etc. We just need to be sure we don't mix up
> interrupts :)
>
OK, I understand, I can do somthing in the interrupt handler to make
sure we do not loose interrupt IRQs.
I make a proposition in V2.
Thanks,
Pierre
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 24/04/2018 08:54, Dong Jia Shi wrote:
> * Pierre Morel <[email protected]> [2018-04-19 16:48:04 +0200]:
>
> [...]
>
>> @@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>> static void vfio_ccw_sch_irq(struct subchannel *sch)
>> {
>> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
>> + struct irb *irb = this_cpu_ptr(&cio_irb);
>>
>> inc_irq_stat(IRQIO_CIO);
>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
>> + memcpy(&private->irb, irb, sizeof(*irb));
>> +
>> + WARN_ON(work_pending(&private->io_work));
> Hmm, why do we need this?
The current design insure that we have not two concurrent SSCH requests.
How ever I want here to track spurious interrupt.
If we implement cancel, halt or clear requests, we also may trigger (AFAIU)
a second interrupts depending on races between instructions, controller
and device.
We do not need it strongly.
>
>> + queue_work(vfio_ccw_work_q, &private->io_work);
>> + if (private->completion)
>> + complete(private->completion);
>> }
>>
>> static int vfio_ccw_sch_probe(struct subchannel *sch)
> [...]
>
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On Tue, 24 Apr 2018 13:49:14 +0200
Pierre Morel <[email protected]> wrote:
> On 24/04/2018 11:59, Cornelia Huck wrote:
> > On Tue, 24 Apr 2018 10:40:56 +0200
> > Pierre Morel <[email protected]> wrote:
> >
> >> On 24/04/2018 08:54, Dong Jia Shi wrote:
> >>> * Pierre Morel <[email protected]> [2018-04-19 16:48:04 +0200]:
> >>>
> >>> [...]
> >>>
> >>>> @@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >>>> static void vfio_ccw_sch_irq(struct subchannel *sch)
> >>>> {
> >>>> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
> >>>> + struct irb *irb = this_cpu_ptr(&cio_irb);
> >>>>
> >>>> inc_irq_stat(IRQIO_CIO);
> >>>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
> >>>> + memcpy(&private->irb, irb, sizeof(*irb));
> >>>> +
> >>>> + WARN_ON(work_pending(&private->io_work));
> >>> Hmm, why do we need this?
> >> The current design insure that we have not two concurrent SSCH requests.
> >> How ever I want here to track spurious interrupt.
> >> If we implement cancel, halt or clear requests, we also may trigger (AFAIU)
> >> a second interrupts depending on races between instructions, controller
> >> and device.
> > You won't get an interrupt for a successful cancel. If you do a
> > halt/clear, you will make the subchannel halt/clear pending in addition
> > to start pending and you'll only get one interrupt (if the I/O has
> > progressed far enough, you won't be able to issue a hsch). The
> > interesting case is:
> > - guest does a ssch, we do a ssch on the device
> > - the guest does a csch before it got the interrupt for the ssch
> > - before we do the csch on the device, the subchannel is already status
> > pending with completion of the ssch
> > - after we issue the csch, we get a second interrupt (for the csch)
>
> We agree.
>
> >
> > I think we should present two interrupts to the guest in that case.
> > Races between issuing ssch/hsch/csch and the subchannel becoming status
> > pending happen on real hardware as well, we're just more likely to see
> > them with the vfio layer in between.
>
> Yes, agreed too.
>
> >
> > (I'm currently trying to recall what we're doing with unsolicited
> > interrupts. These are fun wrt deferred cc 1; I'm not sure if there are
> > cases where we want to present a deferred cc to the guest.)
>
> This patch does not change the current functionalities, only
> consolidates the FSM.
> The current way to handle unsolicited interrupts is to report them to
> the guest
> along with the deferred code AFAIU.
My question was more along the line of "do we actually want to
_generate_ a deferred cc1 or unsolicited interrupt, based upon what we
do in our state machine". My guess is no, regardless of the changes you
do in this series.
>
> >
> > Also, doing a second ssch before we got final state for the first one
> > is perfectly valid. Linux just does not do it, so I'm not sure if we
> > should invest too much time there.
>
> I agree too, it would just make things unnecessary complicated.
I'm a big fan of just throwing everything at the hardware and let it
sort out any races etc. We just need to be sure we don't mix up
interrupts :)
On 04/24/2018 11:59 AM, Cornelia Huck wrote:
> On Tue, 24 Apr 2018 10:40:56 +0200
> Pierre Morel <[email protected]> wrote:
>
>> On 24/04/2018 08:54, Dong Jia Shi wrote:
>>> * Pierre Morel <[email protected]> [2018-04-19 16:48:04 +0200]:
>>>
>>> [...]
>>>
>>>> @@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
>>>> static void vfio_ccw_sch_irq(struct subchannel *sch)
>>>> {
>>>> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
>>>> + struct irb *irb = this_cpu_ptr(&cio_irb);
>>>>
>>>> inc_irq_stat(IRQIO_CIO);
>>>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
>>>> + memcpy(&private->irb, irb, sizeof(*irb));
>>>> +
>>>> + WARN_ON(work_pending(&private->io_work));
>>> Hmm, why do we need this?
>>
>> The current design insure that we have not two concurrent SSCH requests.
>> How ever I want here to track spurious interrupt.
>> If we implement cancel, halt or clear requests, we also may trigger (AFAIU)
>> a second interrupts depending on races between instructions, controller
>> and device.
>
> You won't get an interrupt for a successful cancel. If you do a
> halt/clear, you will make the subchannel halt/clear pending in addition
> to start pending and you'll only get one interrupt (if the I/O has
> progressed far enough, you won't be able to issue a hsch). The
> interesting case is:
> - guest does a ssch, we do a ssch on the device
> - the guest does a csch before it got the interrupt for the ssch
> - before we do the csch on the device, the subchannel is already status
> pending with completion of the ssch
> - after we issue the csch, we get a second interrupt (for the csch)
>
> I think we should present two interrupts to the guest in that case.
> Races between issuing ssch/hsch/csch and the subchannel becoming status
> pending happen on real hardware as well, we're just more likely to see
> them with the vfio layer in between.
>
AFAIU this will be the problem of the person implementing the clear
and the halt for vfio-ccw. I.e. it's a non-problem right now.
> (I'm currently trying to recall what we're doing with unsolicited
> interrupts. These are fun wrt deferred cc 1; I'm not sure if there are
> cases where we want to present a deferred cc to the guest.)
What are 'fun wrt deferred cc 1' again? The deferred cc I understand
but wrt does not click at all.
>
> Also, doing a second ssch before we got final state for the first one
> is perfectly valid. Linux just does not do it, so I'm not sure if we
> should invest too much time there.
>
>>
>> We do not need it strongly.
>>
>>>
>>>> + queue_work(vfio_ccw_work_q, &private->io_work);
>>>> + if (private->completion)
>>>> + complete(private->completion);
>>>> }
>>>>
>>>> static int vfio_ccw_sch_probe(struct subchannel *sch)
>>> [...]
>>>
>>
>
On Tue, 24 Apr 2018 18:42:38 +0200
Halil Pasic <[email protected]> wrote:
> On 04/24/2018 11:59 AM, Cornelia Huck wrote:
> > On Tue, 24 Apr 2018 10:40:56 +0200
> > Pierre Morel <[email protected]> wrote:
> >
> >> On 24/04/2018 08:54, Dong Jia Shi wrote:
> >>> * Pierre Morel <[email protected]> [2018-04-19 16:48:04 +0200]:
> >>>
> >>> [...]
> >>>
> >>>> @@ -94,9 +83,15 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
> >>>> static void vfio_ccw_sch_irq(struct subchannel *sch)
> >>>> {
> >>>> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
> >>>> + struct irb *irb = this_cpu_ptr(&cio_irb);
> >>>>
> >>>> inc_irq_stat(IRQIO_CIO);
> >>>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
> >>>> + memcpy(&private->irb, irb, sizeof(*irb));
> >>>> +
> >>>> + WARN_ON(work_pending(&private->io_work));
> >>> Hmm, why do we need this?
> >>
> >> The current design insure that we have not two concurrent SSCH requests.
> >> How ever I want here to track spurious interrupt.
> >> If we implement cancel, halt or clear requests, we also may trigger (AFAIU)
> >> a second interrupts depending on races between instructions, controller
> >> and device.
> >
> > You won't get an interrupt for a successful cancel. If you do a
> > halt/clear, you will make the subchannel halt/clear pending in addition
> > to start pending and you'll only get one interrupt (if the I/O has
> > progressed far enough, you won't be able to issue a hsch). The
> > interesting case is:
> > - guest does a ssch, we do a ssch on the device
> > - the guest does a csch before it got the interrupt for the ssch
> > - before we do the csch on the device, the subchannel is already status
> > pending with completion of the ssch
> > - after we issue the csch, we get a second interrupt (for the csch)
> >
> > I think we should present two interrupts to the guest in that case.
> > Races between issuing ssch/hsch/csch and the subchannel becoming status
> > pending happen on real hardware as well, we're just more likely to see
> > them with the vfio layer in between.
> >
>
> AFAIU this will be the problem of the person implementing the clear
> and the halt for vfio-ccw. I.e. it's a non-problem right now.
Well, that person is me :) I will post some RFC Real Soon Now if I stop
getting sidetracked...
>
> > (I'm currently trying to recall what we're doing with unsolicited
> > interrupts. These are fun wrt deferred cc 1; I'm not sure if there are
> > cases where we want to present a deferred cc to the guest.)
>
> What are 'fun wrt deferred cc 1' again? The deferred cc I understand
> but wrt does not click at all.
wrt == with regard to
(Or were you asking something else?)
>
> >
> > Also, doing a second ssch before we got final state for the first one
> > is perfectly valid. Linux just does not do it, so I'm not sure if we
> > should invest too much time there.
> >
> >>
> >> We do not need it strongly.
> >>
> >>>
> >>>> + queue_work(vfio_ccw_work_q, &private->io_work);
> >>>> + if (private->completion)
> >>>> + complete(private->completion);
> >>>> }
> >>>>
> >>>> static int vfio_ccw_sch_probe(struct subchannel *sch)
> >>> [...]
> >>>
> >>
> >
>
On Thu, 19 Apr 2018 16:48:06 +0200
Pierre Morel <[email protected]> wrote:
> The Sub channel event callback is threaded using workqueues.
> The work uses the FSM introducing the VFIO_CCW_EVENT_SCH_EVENT
> event.
I don't think this is a good name; after all, all of the events are
events for the subchannel :)
This seems to be more of a "we need to update the schib" event...
VFIO_CCW_EVENT_SCHIB_CHANGED? _SCH_CHANGED? _UPDATE_NEEDED?
Tbh, I'm not quite sure this makes sense for me yet... will continue
reading, but this probably needs a 'why'.
> The update of the SCHIB is now done inside the FSM function.
>
> Signed-off-by: Pierre Morel <[email protected]>
> ---
> drivers/s390/cio/vfio_ccw_drv.c | 33 +++++++++++++--------------------
> drivers/s390/cio/vfio_ccw_fsm.c | 23 +++++++++++++++++++++++
> drivers/s390/cio/vfio_ccw_private.h | 3 +++
> 3 files changed, 39 insertions(+), 20 deletions(-)
On Thu, 19 Apr 2018 16:48:07 +0200
Pierre Morel <[email protected]> wrote:
> This patch simplifies the IO request handling to handle the only
> implemented request: SSCH.
I *really* need to post my halt/clear patches soon, I think.
> Other request are invalid and get a return value of -EINVAL.
This is an user api change: We got -EOPNOTSUPP in the region's return
code before.
>
> This patch change the event name to VFIO_CCW_EVENT_SSCH_REQ to reflect
> what is done and prepare for future implementation of other requests.
>
> Signed-off-by: Pierre Morel <[email protected]>
> ---
> drivers/s390/cio/vfio_ccw_fsm.c | 63 +++++++++++++------------------------
> drivers/s390/cio/vfio_ccw_ops.c | 9 ++++--
> drivers/s390/cio/vfio_ccw_private.h | 2 +-
> 3 files changed, 29 insertions(+), 45 deletions(-)
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index 41eeb57..4da7b61 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -188,19 +188,22 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
> {
> struct vfio_ccw_private *private;
> struct ccw_io_region *region;
> + union scsw *scsw;
>
> if (*ppos + count > sizeof(*region))
> return -EINVAL;
>
> private = dev_get_drvdata(mdev_parent_dev(mdev));
> - if (private->state != VFIO_CCW_STATE_IDLE)
> - return -EACCES;
>
> region = &private->io_region;
> if (copy_from_user((void *)region + *ppos, buf, count))
> return -EFAULT;
>
> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
> + scsw = (union scsw *) ®ion->scsw_area;
> + if ((scsw->cmd.fctl & SCSW_FCTL_START_FUNC) != SCSW_FCTL_START_FUNC)
You should not allow the halt/clear functions to be specified, if you
go that route. The precedence order needs to be clear -> halt -> start.
> + return -EINVAL;
As said, that's a user api change. Previously, user space could detect
whether halt/clear are supported or not by simply specifying the
halt/clear function and checking for -EOPNOTSUPP in the region's return
code. Now they get -EINVAL (which I think is not a good return code,
even if we did not have the api breakage).
> +
> + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_SSCH_REQ);
> if (region->ret_code != 0) {
> private->state = VFIO_CCW_STATE_IDLE;
> return region->ret_code;
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index 3284e64..93aab87 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -76,7 +76,7 @@ enum vfio_ccw_state {
> */
> enum vfio_ccw_event {
> VFIO_CCW_EVENT_NOT_OPER,
> - VFIO_CCW_EVENT_IO_REQ,
> + VFIO_CCW_EVENT_SSCH_REQ,
> VFIO_CCW_EVENT_INTERRUPT,
> VFIO_CCW_EVENT_SCH_EVENT,
> /* last element! */
I don't think we should separate the ssch handling. The major
difference to halt/clear is that it needs channel program translation.
Everything else (issuing the instruction and processing the interrupt)
are basically the same. If we just throw everything at the hardware and
let the host's channel subsystem figure it out, we already should be
fine with regard to most of the races.
On Thu, 19 Apr 2018 16:48:12 +0200
Pierre Morel <[email protected]> wrote:
> VFIO_CCW_STATE_BOXED and VFIO_CCW_STATE_BUSY are the same
> states.
> Let's only keep one: VFIO_CCW_STATE_BUSY
>
> Signed-off-by: Pierre Morel <[email protected]>
> ---
> drivers/s390/cio/vfio_ccw_fsm.c | 9 ---------
> drivers/s390/cio/vfio_ccw_private.h | 1 -
> 2 files changed, 10 deletions(-)
I think they were initially supposed to cover two different things:
- BUSY: we're currently dealing with an I/O request
- BOXED: the device currently won't talk to us or we won't talk to it
It seems we never really did anything useful with BOXED; but should we?
On Thu, 19 Apr 2018 16:48:13 +0200
Pierre Morel <[email protected]> wrote:
> In the current implementation, we do not want to start a new SSCH
> command before the last one ends.
>
> Signed-off-by: Pierre Morel <[email protected]>
> ---
> drivers/s390/cio/vfio_ccw_fsm.c | 3 +++
> drivers/s390/cio/vfio_ccw_ops.c | 21 ++++++++++++++++++++-
> drivers/s390/cio/vfio_ccw_private.h | 4 +++-
> 3 files changed, 26 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
> index b77b8ad..4140292 100644
> --- a/drivers/s390/cio/vfio_ccw_fsm.c
> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
> @@ -195,6 +195,9 @@ static int fsm_irq(struct vfio_ccw_private *private)
> if (private->io_trigger)
> eventfd_signal(private->io_trigger, 1);
>
> + if (private->io_completion)
> + complete(private->io_completion);
> +
> return VFIO_CCW_STATE_IDLE;
> }
>
> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
> index f0f4071..346532d 100644
> --- a/drivers/s390/cio/vfio_ccw_ops.c
> +++ b/drivers/s390/cio/vfio_ccw_ops.c
> @@ -171,6 +171,8 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
> struct vfio_ccw_private *private;
> struct ccw_io_region *region;
> union scsw *scsw;
> + int max_retries = 5;
> + DECLARE_COMPLETION_ONSTACK(completion);
>
> if (*ppos + count > sizeof(*region))
> return -EINVAL;
> @@ -185,7 +187,24 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
> if ((scsw->cmd.fctl & SCSW_FCTL_START_FUNC) != SCSW_FCTL_START_FUNC)
> return -EINVAL;
>
> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_SSCH_REQ);
> + do {
> + switch (private->state) {
> + case VFIO_CCW_STATE_BUSY:
> + private->io_completion = &completion;
> + wait_for_completion(&completion);
> + break;
> + case VFIO_CCW_STATE_IDLE:
> + if (!vfio_ccw_fsm_event(private,
> + VFIO_CCW_EVENT_SSCH_REQ))
> + return count;
> + break;
> + default:
> + return -EBUSY;
> + }
> + } while (max_retries--);
I really don't think we want to go there. If we are busy, generate an
indication to that respect, but don't retry. My preferred approach
would be to keep the "we're busy" times as small as possible and let
the host channel subsystem handle any further races. We can't make that
bulletproof anyway, so no reason to make life more difficult for us.
> +
> + if (max_retries <= 0)
> + return -EBUSY;
> if (region->ret_code != 0)
> return region->ret_code;
>
> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> index dbef727..7cca078 100644
> --- a/drivers/s390/cio/vfio_ccw_private.h
> +++ b/drivers/s390/cio/vfio_ccw_private.h
> @@ -39,6 +39,7 @@ struct vfio_ccw_private {
> struct subchannel *sch;
> int state;
> struct completion *completion;
> + struct completion *io_completion;
> atomic_t avail;
> struct mdev_device *mdev;
> struct notifier_block nb;
> @@ -93,12 +94,13 @@ enum vfio_ccw_event {
> typedef int (fsm_func_t)(struct vfio_ccw_private *);
> extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
>
> -static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
> +static inline int vfio_ccw_fsm_event(struct vfio_ccw_private *private,
> int event)
> {
> mutex_lock(&private->state_mutex);
> private->state = vfio_ccw_jumptable[private->state][event](private);
> mutex_unlock(&private->state_mutex);
> + return private->io_region.ret_code;
> }
>
> extern struct workqueue_struct *vfio_ccw_work_q;
On 04/25/2018 08:57 AM, Cornelia Huck wrote:
>> AFAIU this will be the problem of the person implementing the clear
>> and the halt for vfio-ccw. I.e. it's a non-problem right now.
> Well, that person is me:) I will post some RFC Real Soon Now if I stop
> getting sidetracked...
>
Makes sense. It should be fine either way AFAIU.
CSCH, more precisely the clear function is supposed to clear the
interruption request(s) too. But I guess there is no way of the CP to
identify an I/O interrupt that should have been cleared -- that is catch
us disrespecting the architecture. I can't think of a way to establish
must happen before relationship...
But discarding the first interrupt and delivering just one for the CSCH
is fine too for the same reason.
Regards,
Halil
On 25/04/2018 10:25, Cornelia Huck wrote:
> On Thu, 19 Apr 2018 16:48:06 +0200
> Pierre Morel <[email protected]> wrote:
>
>> The Sub channel event callback is threaded using workqueues.
>> The work uses the FSM introducing the VFIO_CCW_EVENT_SCH_EVENT
>> event.
> I don't think this is a good name; after all, all of the events are
> events for the subchannel :)
>
> This seems to be more of a "we need to update the schib" event...
> VFIO_CCW_EVENT_SCHIB_CHANGED? _SCH_CHANGED? _UPDATE_NEEDED?
>
> Tbh, I'm not quite sure this makes sense for me yet... will continue
> reading, but this probably needs a 'why'.
SCHIB_CHANGED or something like this sounds better indeed. :)
>
>> The update of the SCHIB is now done inside the FSM function.
>>
>> Signed-off-by: Pierre Morel <[email protected]>
>> ---
>> drivers/s390/cio/vfio_ccw_drv.c | 33 +++++++++++++--------------------
>> drivers/s390/cio/vfio_ccw_fsm.c | 23 +++++++++++++++++++++++
>> drivers/s390/cio/vfio_ccw_private.h | 3 +++
>> 3 files changed, 39 insertions(+), 20 deletions(-)
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 25/04/2018 10:44, Cornelia Huck wrote:
> On Thu, 19 Apr 2018 16:48:12 +0200
> Pierre Morel <[email protected]> wrote:
>
>> VFIO_CCW_STATE_BOXED and VFIO_CCW_STATE_BUSY are the same
>> states.
>> Let's only keep one: VFIO_CCW_STATE_BUSY
>>
>> Signed-off-by: Pierre Morel <[email protected]>
>> ---
>> drivers/s390/cio/vfio_ccw_fsm.c | 9 ---------
>> drivers/s390/cio/vfio_ccw_private.h | 1 -
>> 2 files changed, 10 deletions(-)
> I think they were initially supposed to cover two different things:
> - BUSY: we're currently dealing with an I/O request
> - BOXED: the device currently won't talk to us or we won't talk to it
>
> It seems we never really did anything useful with BOXED; but should we?
>
I do not know what.
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 25/04/2018 10:48, Cornelia Huck wrote:
> On Thu, 19 Apr 2018 16:48:13 +0200
> Pierre Morel <[email protected]> wrote:
>
>> In the current implementation, we do not want to start a new SSCH
>> command before the last one ends.
>>
>> Signed-off-by: Pierre Morel <[email protected]>
>> ---
>> drivers/s390/cio/vfio_ccw_fsm.c | 3 +++
>> drivers/s390/cio/vfio_ccw_ops.c | 21 ++++++++++++++++++++-
>> drivers/s390/cio/vfio_ccw_private.h | 4 +++-
>> 3 files changed, 26 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
>> index b77b8ad..4140292 100644
>> --- a/drivers/s390/cio/vfio_ccw_fsm.c
>> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
>> @@ -195,6 +195,9 @@ static int fsm_irq(struct vfio_ccw_private *private)
>> if (private->io_trigger)
>> eventfd_signal(private->io_trigger, 1);
>>
>> + if (private->io_completion)
>> + complete(private->io_completion);
>> +
>> return VFIO_CCW_STATE_IDLE;
>> }
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
>> index f0f4071..346532d 100644
>> --- a/drivers/s390/cio/vfio_ccw_ops.c
>> +++ b/drivers/s390/cio/vfio_ccw_ops.c
>> @@ -171,6 +171,8 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
>> struct vfio_ccw_private *private;
>> struct ccw_io_region *region;
>> union scsw *scsw;
>> + int max_retries = 5;
>> + DECLARE_COMPLETION_ONSTACK(completion);
>>
>> if (*ppos + count > sizeof(*region))
>> return -EINVAL;
>> @@ -185,7 +187,24 @@ static ssize_t vfio_ccw_mdev_write(struct mdev_device *mdev,
>> if ((scsw->cmd.fctl & SCSW_FCTL_START_FUNC) != SCSW_FCTL_START_FUNC)
>> return -EINVAL;
>>
>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_SSCH_REQ);
>> + do {
>> + switch (private->state) {
>> + case VFIO_CCW_STATE_BUSY:
>> + private->io_completion = &completion;
>> + wait_for_completion(&completion);
>> + break;
>> + case VFIO_CCW_STATE_IDLE:
>> + if (!vfio_ccw_fsm_event(private,
>> + VFIO_CCW_EVENT_SSCH_REQ))
>> + return count;
>> + break;
>> + default:
>> + return -EBUSY;
>> + }
>> + } while (max_retries--);
> I really don't think we want to go there. If we are busy, generate an
> indication to that respect, but don't retry. My preferred approach
> would be to keep the "we're busy" times as small as possible and let
> the host channel subsystem handle any further races. We can't make that
> bulletproof anyway, so no reason to make life more difficult for us.
OK, clear.
Thanks
Pierre
>
>> +
>> + if (max_retries <= 0)
>> + return -EBUSY;
>> if (region->ret_code != 0)
>> return region->ret_code;
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
>> index dbef727..7cca078 100644
>> --- a/drivers/s390/cio/vfio_ccw_private.h
>> +++ b/drivers/s390/cio/vfio_ccw_private.h
>> @@ -39,6 +39,7 @@ struct vfio_ccw_private {
>> struct subchannel *sch;
>> int state;
>> struct completion *completion;
>> + struct completion *io_completion;
>> atomic_t avail;
>> struct mdev_device *mdev;
>> struct notifier_block nb;
>> @@ -93,12 +94,13 @@ enum vfio_ccw_event {
>> typedef int (fsm_func_t)(struct vfio_ccw_private *);
>> extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
>>
>> -static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
>> +static inline int vfio_ccw_fsm_event(struct vfio_ccw_private *private,
>> int event)
>> {
>> mutex_lock(&private->state_mutex);
>> private->state = vfio_ccw_jumptable[private->state][event](private);
>> mutex_unlock(&private->state_mutex);
>> + return private->io_region.ret_code;
>> }
>>
>> extern struct workqueue_struct *vfio_ccw_work_q;
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On Wed, 25 Apr 2018 13:06:31 +0200
Halil Pasic <[email protected]> wrote:
> On 04/25/2018 08:57 AM, Cornelia Huck wrote:
> >> AFAIU this will be the problem of the person implementing the clear
> >> and the halt for vfio-ccw. I.e. it's a non-problem right now.
> > Well, that person is me:) I will post some RFC Real Soon Now if I stop
> > getting sidetracked...
> >
>
> Makes sense. It should be fine either way AFAIU.
>
> CSCH, more precisely the clear function is supposed to clear the
> interruption request(s) too. But I guess there is no way of the CP to
> identify an I/O interrupt that should have been cleared -- that is catch
> us disrespecting the architecture. I can't think of a way to establish
> must happen before relationship...
>
> But discarding the first interrupt and delivering just one for the CSCH
> is fine too for the same reason.
Yes, both work. The calling code in the guest has to be able to handle
both anyway, since both can happen on real hardware as well (with a
smaller race window).
On Tue, 24 Apr 2018 10:22:15 +0200
Pierre Morel <[email protected]> wrote:
> On 24/04/2018 09:25, Dong Jia Shi wrote:
> > * Pierre Morel <[email protected]> [2018-04-19 16:48:05 +0200]:
> >
> >> We change the FSM functions to return the next state,
> >> and adapt the fsm_func_t function type.
> > I think I'd need to read the rest patches to understand why we need this
> > one, but no hurt to write some ideas that I noticed at my first glance.
> > See below please.
> >
> >> Signed-off-by: Pierre Morel <[email protected]>
> >> ---
> >> drivers/s390/cio/vfio_ccw_fsm.c | 24 ++++++++++++++++--------
> >> drivers/s390/cio/vfio_ccw_private.h | 5 +++--
> >> 2 files changed, 19 insertions(+), 10 deletions(-)
> >> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> >> index 78a66d9..f526b18 100644
> >> --- a/drivers/s390/cio/vfio_ccw_private.h
> >> +++ b/drivers/s390/cio/vfio_ccw_private.h
> >> @@ -83,13 +83,14 @@ enum vfio_ccw_event {
> >> /*
> >> * Action called through jumptable.
> >> */
> >> -typedef void (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
> >> +typedef int (fsm_func_t)(struct vfio_ccw_private *, enum vfio_ccw_event);
> >> extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
> >>
> >> static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
> >> int event)
> >> {
> >> - vfio_ccw_jumptable[private->state][event](private, event);
> >> + private->state = vfio_ccw_jumptable[private->state][event](private,
> >> + event);
> > Since here it assigns new value to private->state, there is no need to
> > do that inside each fsm_func?
> Absolutely.
> I just kept the previous code, just adding the return private->state in
> the functions
> in this patch.
> merging the state and the return value is done in a later patch.
> If you prefer I can do it in this patch.
I think we should revisit this later. It's hard to judge this patch on
its own.
On Thu, 26 Apr 2018 14:59:54 +0800
Dong Jia Shi <[email protected]> wrote:
> * Pierre Morel <[email protected]> [2018-04-19 16:48:06 +0200]:
>
> > The Sub channel event callback is threaded using workqueues.
> > The work uses the FSM introducing the VFIO_CCW_EVENT_SCH_EVENT
> > event.
> > The update of the SCHIB is now done inside the FSM function.
> >
> > Signed-off-by: Pierre Morel <[email protected]>
> > ---
> > drivers/s390/cio/vfio_ccw_drv.c | 33 +++++++++++++--------------------
> > drivers/s390/cio/vfio_ccw_fsm.c | 23 +++++++++++++++++++++++
> > drivers/s390/cio/vfio_ccw_private.h | 3 +++
> > 3 files changed, 39 insertions(+), 20 deletions(-)
> >
> > @@ -171,28 +181,11 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch)
> > static int vfio_ccw_sch_event(struct subchannel *sch, int process)
> > {
> > struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
> > - unsigned long flags;
> >
> > - spin_lock_irqsave(sch->lock, flags);
> > if (!device_is_registered(&sch->dev))
> > - goto out_unlock;
> > -
> > - if (work_pending(&sch->todo_work))
> > - goto out_unlock;
> Just realized that this has a bug in the orignal implementation. For
> error out this should return -EAGAIN. We'd need a separated fix on
> this.
Indeed. Will you send a patch, or should I hack something up?
>
> > -
> > - if (cio_update_schib(sch)) {
> > - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
> > - goto out_unlock;
> > - }
> > -
> > - private = dev_get_drvdata(&sch->dev);
> > - if (private->state == VFIO_CCW_STATE_NOT_OPER) {
> > - private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
> > - VFIO_CCW_STATE_STANDBY;
> > - }
> This hunk was toatally removed, and this is fine because?
>
> > -
> > -out_unlock:
> > - spin_unlock_irqrestore(sch->lock, flags);
> > + return -1;
> -1 is not a valid code.
-ENODEV looks more fitting, if we decide to go with this rework.
>
> > + WARN_ON(work_pending(&private->event_work));
> > + queue_work(vfio_ccw_work_q, &private->event_work);
> >
> > return 0;
> > }
I'm wondering why this should always be done via a workqueue. It seems
the other subchannel types try to do as much as possible immediately?
(And returning -EAGAIN already triggers the css code to schedule
another call later.)
On Wed, 25 Apr 2018 15:52:19 +0200
Pierre Morel <[email protected]> wrote:
> On 25/04/2018 10:41, Cornelia Huck wrote:
> > On Thu, 19 Apr 2018 16:48:07 +0200
> > Pierre Morel<[email protected]> wrote:
> >> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> >> index 3284e64..93aab87 100644
> >> --- a/drivers/s390/cio/vfio_ccw_private.h
> >> +++ b/drivers/s390/cio/vfio_ccw_private.h
> >> @@ -76,7 +76,7 @@ enum vfio_ccw_state {
> >> */
> >> enum vfio_ccw_event {
> >> VFIO_CCW_EVENT_NOT_OPER,
> >> - VFIO_CCW_EVENT_IO_REQ,
> >> + VFIO_CCW_EVENT_SSCH_REQ,
> >> VFIO_CCW_EVENT_INTERRUPT,
> >> VFIO_CCW_EVENT_SCH_EVENT,
> >> /* last element! */
> > I don't think we should separate the ssch handling. The major
> > difference to halt/clear is that it needs channel program translation.
> > Everything else (issuing the instruction and processing the interrupt)
> > are basically the same. If we just throw everything at the hardware
> > and let the host's channel subsystem figure it out, we already should
> > be fine with regard to most of the races.
>
> We must test at a moment or another the kind of request we do,
> cancel, halt and clear only need the subchannel id in register 1 and as
> you said are much more direct to implement.
>
> If we do not separate them here, we need a switch in the "do_io_request"
> function.
> Is it what you mean?
Yes. Most of the handling should be the same for any function.
On Thu, 26 Apr 2018 15:48:06 +0800
Dong Jia Shi <[email protected]> wrote:
> * Dong Jia Shi <[email protected]> [2018-04-26 15:30:54 +0800]:
>
> [...]
>
> > > @@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
> > > if (private->io_trigger)
> > > eventfd_signal(private->io_trigger, 1);
> > >
> > > - return private->state;
> > > + return VFIO_CCW_STATE_IDLE;
> > This is not right. For example, if we are in STANDBY state (subch driver
> > is probed, but mdev device is not created), we can not jump to IDLE
> > state.
> >
> I see my problem, for STANDBY state, we should introduce another event
> callback for VFIO_CCW_EVENT_INTERRUPT. It doesn't make sense to call
> fsm_irq() which tries to signal userspace with interrupt notification
> when mdev is not created yet... So we'd need a separated fix for this
> issue too.
But how do we even get into that situation when we don't have an mdev
yet?
On Thu, 19 Apr 2018 16:48:10 +0200
Pierre Morel <[email protected]> wrote:
> The VFIO_CCW_EVENT_INIT event allows to export the initial state
> into the FSM.
I don't know, that feels a bit unintuitive to me. I would naively
expect initialization to be done _before_ we activate processing via
the state machine.
>
> Signed-off-by: Pierre Morel <[email protected]>
> ---
> drivers/s390/cio/vfio_ccw_drv.c | 26 +++++++++++---------------
> drivers/s390/cio/vfio_ccw_fsm.c | 21 +++++++++++++++++++++
> drivers/s390/cio/vfio_ccw_ops.c | 11 -----------
> drivers/s390/cio/vfio_ccw_private.h | 1 +
> 4 files changed, 33 insertions(+), 26 deletions(-)
> /*
> * Device statemachine
> */
> fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
> [VFIO_CCW_STATE_NOT_OPER] = {
> + [VFIO_CCW_EVENT_INIT] = fsm_init,
> [VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
> [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_nop,
> },
> [VFIO_CCW_STATE_STANDBY] = {
> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
> },
> [VFIO_CCW_STATE_IDLE] = {
> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_request,
> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
> },
> [VFIO_CCW_STATE_BOXED] = {
> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
> },
> [VFIO_CCW_STATE_BUSY] = {
> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
...especially as you only call it if in the NOT_OPER state. Why should
this event be generated in any case but in the very beginning?
On Thu, 19 Apr 2018 16:48:11 +0200
Pierre Morel <[email protected]> wrote:
> Two new events, VFIO_CCW_EVENT_ONLINE and VFIO_CCW_EVENT_OFFLINE
> allow to handle the enabling and disabling of a Sub Channel and
> the shutdown, quiesce and reset operations are changed accordingly.
OK, onlining/offlining via the fsm makes more sense conceptually than
the init event.
How is that supposed to play with enabling the subchannel in the init
event? I would rather expect it to be done in the onlining transition
only?
>
> Signed-off-by: Pierre Morel <[email protected]>
> ---
> drivers/s390/cio/vfio_ccw_drv.c | 47 ++++++--------------------
> drivers/s390/cio/vfio_ccw_fsm.c | 66 +++++++++++++++++++++++++++++++++++++
> drivers/s390/cio/vfio_ccw_ops.c | 15 +++------
> drivers/s390/cio/vfio_ccw_private.h | 3 ++
> 4 files changed, 83 insertions(+), 48 deletions(-)
On Wed, 25 Apr 2018 15:55:51 +0200
Pierre Morel <[email protected]> wrote:
> On 25/04/2018 10:44, Cornelia Huck wrote:
> > On Thu, 19 Apr 2018 16:48:12 +0200
> > Pierre Morel <[email protected]> wrote:
> >
> >> VFIO_CCW_STATE_BOXED and VFIO_CCW_STATE_BUSY are the same
> >> states.
> >> Let's only keep one: VFIO_CCW_STATE_BUSY
> >>
> >> Signed-off-by: Pierre Morel <[email protected]>
> >> ---
> >> drivers/s390/cio/vfio_ccw_fsm.c | 9 ---------
> >> drivers/s390/cio/vfio_ccw_private.h | 1 -
> >> 2 files changed, 10 deletions(-)
> > I think they were initially supposed to cover two different things:
> > - BUSY: we're currently dealing with an I/O request
> > - BOXED: the device currently won't talk to us or we won't talk to it
> >
> > It seems we never really did anything useful with BOXED; but should we?
> >
> I do not know what.
The BUSY state is something we know that we'll get out of soon-ish
(when the I/O request has finished). We could conceivably use a timeout
and drop to the BOXED state if we don't get an answer.
I think this plays also into the reserve/release and path handling
questions. One of the more common reasons for devices to become boxed
I've seen is another system doing a reserve on a dasd.
On Wed, 2 May 2018 15:46:22 +0800
Dong Jia Shi <[email protected]> wrote:
> * Cornelia Huck <[email protected]> [2018-04-30 17:33:05 +0200]:
>
> > On Thu, 26 Apr 2018 15:48:06 +0800
> > Dong Jia Shi <[email protected]> wrote:
> >
> > > * Dong Jia Shi <[email protected]> [2018-04-26 15:30:54 +0800]:
> > >
> > > [...]
> > >
> > > > > @@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
> > > > > if (private->io_trigger)
> > > > > eventfd_signal(private->io_trigger, 1);
> > > > >
> > > > > - return private->state;
> > > > > + return VFIO_CCW_STATE_IDLE;
> > > > This is not right. For example, if we are in STANDBY state (subch driver
> > > > is probed, but mdev device is not created), we can not jump to IDLE
> > > > state.
> > > >
> > > I see my problem, for STANDBY state, we should introduce another event
> > > callback for VFIO_CCW_EVENT_INTERRUPT. It doesn't make sense to call
> > > fsm_irq() which tries to signal userspace with interrupt notification
> > > when mdev is not created yet... So we'd need a separated fix for this
> > > issue too.
> >
> > But how do we even get into that situation when we don't have an mdev
> > yet?
> >
> We cann't... So let's assign fsm_nop() as the interrupt callback for
> STANDBY state?
Either that, or have a special fsm_should_not_happen() function that
can pop out a trace message and then continue to do nothing.
On 30/04/2018 17:47, Cornelia Huck wrote:
> On Wed, 25 Apr 2018 15:55:51 +0200
> Pierre Morel <[email protected]> wrote:
>
>> On 25/04/2018 10:44, Cornelia Huck wrote:
>>> On Thu, 19 Apr 2018 16:48:12 +0200
>>> Pierre Morel <[email protected]> wrote:
>>>
>>>> VFIO_CCW_STATE_BOXED and VFIO_CCW_STATE_BUSY are the same
>>>> states.
>>>> Let's only keep one: VFIO_CCW_STATE_BUSY
>>>>
>>>> Signed-off-by: Pierre Morel <[email protected]>
>>>> ---
>>>> drivers/s390/cio/vfio_ccw_fsm.c | 9 ---------
>>>> drivers/s390/cio/vfio_ccw_private.h | 1 -
>>>> 2 files changed, 10 deletions(-)
>>> I think they were initially supposed to cover two different things:
>>> - BUSY: we're currently dealing with an I/O request
>>> - BOXED: the device currently won't talk to us or we won't talk to it
>>>
>>> It seems we never really did anything useful with BOXED; but should we?
>>>
>> I do not know what.
> The BUSY state is something we know that we'll get out of soon-ish
> (when the I/O request has finished). We could conceivably use a timeout
> and drop to the BOXED state if we don't get an answer.
Absolutely, timeout on requests is something I wanted to do in a second
series.
>
> I think this plays also into the reserve/release and path handling
> questions. One of the more common reasons for devices to become boxed
> I've seen is another system doing a reserve on a dasd.
>
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 30/04/2018 17:39, Cornelia Huck wrote:
> On Thu, 19 Apr 2018 16:48:10 +0200
> Pierre Morel <[email protected]> wrote:
>
>> The VFIO_CCW_EVENT_INIT event allows to export the initial state
>> into the FSM.
> I don't know, that feels a bit unintuitive to me. I would naively
> expect initialization to be done _before_ we activate processing via
> the state machine.
>
>> Signed-off-by: Pierre Morel <[email protected]>
>> ---
>> drivers/s390/cio/vfio_ccw_drv.c | 26 +++++++++++---------------
>> drivers/s390/cio/vfio_ccw_fsm.c | 21 +++++++++++++++++++++
>> drivers/s390/cio/vfio_ccw_ops.c | 11 -----------
>> drivers/s390/cio/vfio_ccw_private.h | 1 +
>> 4 files changed, 33 insertions(+), 26 deletions(-)
>> /*
>> * Device statemachine
>> */
>> fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS] = {
>> [VFIO_CCW_STATE_NOT_OPER] = {
>> + [VFIO_CCW_EVENT_INIT] = fsm_init,
>> [VFIO_CCW_EVENT_NOT_OPER] = fsm_nop,
>> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
>> [VFIO_CCW_EVENT_INTERRUPT] = fsm_disabled_irq,
>> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_nop,
>> },
>> [VFIO_CCW_STATE_STANDBY] = {
>> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
>> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
>> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_error,
>> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
>> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
>> },
>> [VFIO_CCW_STATE_IDLE] = {
>> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
>> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
>> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_request,
>> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
>> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
>> },
>> [VFIO_CCW_STATE_BOXED] = {
>> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
>> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
>> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
>> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
>> [VFIO_CCW_EVENT_SCH_EVENT] = fsm_sch_event,
>> },
>> [VFIO_CCW_STATE_BUSY] = {
>> + [VFIO_CCW_EVENT_INIT] = fsm_nop,
>> [VFIO_CCW_EVENT_NOT_OPER] = fsm_notoper,
>> [VFIO_CCW_EVENT_SSCH_REQ] = fsm_io_busy,
>> [VFIO_CCW_EVENT_INTERRUPT] = fsm_irq,
> ...especially as you only call it if in the NOT_OPER state. Why should
> this event be generated in any case but in the very beginning?
>
It is the only way to go out of the NOT_OPER state.
The idea is to introduce recoverycode in a later version.
However, no problem to suppress this state until we get there so I will
remove it in V2.
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 26/04/2018 09:36, Dong Jia Shi wrote:
> * Pierre Morel <[email protected]> [2018-04-19 16:48:08 +0200]:
>
>> The fsm_func_t function type definition does not need the event
>> parameter since all functions are in a state/event table.
> Not sure if this will still be right after you addressed all of the
> comments on this version. But no hurt to point out some nits that I
> noticed below.
>
>> Signed-off-by: Pierre Morel <[email protected]>
>> ---
>> drivers/s390/cio/vfio_ccw_fsm.c | 34 +++++++++++-----------------------
>> drivers/s390/cio/vfio_ccw_private.h | 5 ++---
>> 2 files changed, 13 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
>> index f9855cd..f8ded70 100644
>> --- a/drivers/s390/cio/vfio_ccw_fsm.c
>> +++ b/drivers/s390/cio/vfio_ccw_fsm.c
>> @@ -55,8 +55,7 @@ static int fsm_io_helper(struct vfio_ccw_private *private)
>> }
>> }
>>
>> -static int fsm_notoper(struct vfio_ccw_private *private,
>> - enum vfio_ccw_event event)
>> +static int fsm_notoper(struct vfio_ccw_private *private)
>> {
>> struct subchannel *sch = private->sch;
>>
>> @@ -65,36 +64,31 @@ static int fsm_notoper(struct vfio_ccw_private *private,
>> * Probably we should send the machine check to the guest.
>> */
>> css_sched_sch_todo(sch, SCH_TODO_UNREG);
>> - private->state = VFIO_CCW_STATE_NOT_OPER;
>> - return private->state;
>> + return VFIO_CCW_STATE_NOT_OPER;
> This change should belong to a former patch.
>
> [...]
>
>> @@ -109,17 +103,14 @@ static int fsm_disabled_irq(struct vfio_ccw_private *private,
>> /*
>> * Deal with the ccw command request from the userspace.
>> */
>> -static int fsm_io_request(struct vfio_ccw_private *private,
>> - enum vfio_ccw_event event)
>> +static int fsm_io_request(struct vfio_ccw_private *private)
>> {
>> - union orb *orb;
>> struct ccw_io_region *io_region = &private->io_region;
>> + union orb *orb = (union orb *)io_region->orb_area;
>> struct mdev_device *mdev = private->mdev;
>>
>> private->state = VFIO_CCW_STATE_BOXED;
>>
>> - orb = (union orb *)io_region->orb_area;
>> -
> Irrelevant change for this patch.
>
>> io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), orb);
>> if (io_region->ret_code)
>> goto err_out;
>> @@ -139,15 +130,13 @@ static int fsm_io_request(struct vfio_ccw_private *private,
>> return VFIO_CCW_STATE_BUSY;
>>
>> err_out:
>> - private->state = VFIO_CCW_STATE_IDLE;
>> - return private->state;
>> + return VFIO_CCW_STATE_IDLE;
> Should move to other patch.
>
> [...]
>
OK, I will move these changes in patch 4/10.
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 30/04/2018 17:30, Cornelia Huck wrote:
> On Wed, 25 Apr 2018 15:52:19 +0200
> Pierre Morel <[email protected]> wrote:
>
>> On 25/04/2018 10:41, Cornelia Huck wrote:
>>> On Thu, 19 Apr 2018 16:48:07 +0200
>>> Pierre Morel<[email protected]> wrote:
>>>> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
>>>> index 3284e64..93aab87 100644
>>>> --- a/drivers/s390/cio/vfio_ccw_private.h
>>>> +++ b/drivers/s390/cio/vfio_ccw_private.h
>>>> @@ -76,7 +76,7 @@ enum vfio_ccw_state {
>>>> */
>>>> enum vfio_ccw_event {
>>>> VFIO_CCW_EVENT_NOT_OPER,
>>>> - VFIO_CCW_EVENT_IO_REQ,
>>>> + VFIO_CCW_EVENT_SSCH_REQ,
>>>> VFIO_CCW_EVENT_INTERRUPT,
>>>> VFIO_CCW_EVENT_SCH_EVENT,
>>>> /* last element! */
>>> I don't think we should separate the ssch handling. The major
>>> difference to halt/clear is that it needs channel program translation.
>>> Everything else (issuing the instruction and processing the interrupt)
>>> are basically the same. If we just throw everything at the hardware
>>> and let the host's channel subsystem figure it out, we already should
>>> be fine with regard to most of the races.
>> We must test at a moment or another the kind of request we do,
>> cancel, halt and clear only need the subchannel id in register 1 and as
>> you said are much more direct to implement.
>>
>> If we do not separate them here, we need a switch in the "do_io_request"
>> function.
>> Is it what you mean?
> Yes. Most of the handling should be the same for any function.
I really don't know, the 4 functions are quite different.
- SSCH uses an ORB, and has a quite long kernel execution time for VFIO
- there is a race between SSCH and the others instructions
- XSCH makes subchannel no longer start pending, also reset the busy
indications
- CSCH cancels both SSCH and HSCH instruction, and perform path management
- HSCH has different busy (entry) conditions
But since they are not implemented today, I can keep the old name.
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 02/05/2018 09:46, Dong Jia Shi wrote:
> * Cornelia Huck <[email protected]> [2018-04-30 17:33:05 +0200]:
>
>> On Thu, 26 Apr 2018 15:48:06 +0800
>> Dong Jia Shi <[email protected]> wrote:
>>
>>> * Dong Jia Shi <[email protected]> [2018-04-26 15:30:54 +0800]:
>>>
>>> [...]
>>>
>>>>> @@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
>>>>> if (private->io_trigger)
>>>>> eventfd_signal(private->io_trigger, 1);
>>>>>
>>>>> - return private->state;
>>>>> + return VFIO_CCW_STATE_IDLE;
>>>> This is not right. For example, if we are in STANDBY state (subch driver
>>>> is probed, but mdev device is not created), we can not jump to IDLE
>>>> state.
>>>>
>>> I see my problem, for STANDBY state, we should introduce another event
>>> callback for VFIO_CCW_EVENT_INTERRUPT. It doesn't make sense to call
>>> fsm_irq() which tries to signal userspace with interrupt notification
>>> when mdev is not created yet... So we'd need a separated fix for this
>>> issue too.
>> But how do we even get into that situation when we don't have an mdev
>> yet?
>>
> We cann't... So let's assign fsm_nop() as the interrupt callback for
> STANDBY state?
>
:) Isn't it exactly what my patch series handle?
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 30/04/2018 17:28, Cornelia Huck wrote:
> On Thu, 26 Apr 2018 14:59:54 +0800
> Dong Jia Shi <[email protected]> wrote:
>
>> * Pierre Morel <[email protected]> [2018-04-19 16:48:06 +0200]:
>>
>>> The Sub channel event callback is threaded using workqueues.
>>> The work uses the FSM introducing the VFIO_CCW_EVENT_SCH_EVENT
>>> event.
>>> The update of the SCHIB is now done inside the FSM function.
>>>
>>> Signed-off-by: Pierre Morel <[email protected]>
>>> ---
>>> drivers/s390/cio/vfio_ccw_drv.c | 33 +++++++++++++--------------------
>>> drivers/s390/cio/vfio_ccw_fsm.c | 23 +++++++++++++++++++++++
>>> drivers/s390/cio/vfio_ccw_private.h | 3 +++
>>> 3 files changed, 39 insertions(+), 20 deletions(-)
>>>
>>> @@ -171,28 +181,11 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch)
>>> static int vfio_ccw_sch_event(struct subchannel *sch, int process)
>>> {
>>> struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
>>> - unsigned long flags;
>>>
>>> - spin_lock_irqsave(sch->lock, flags);
>>> if (!device_is_registered(&sch->dev))
>>> - goto out_unlock;
>>> -
>>> - if (work_pending(&sch->todo_work))
>>> - goto out_unlock;
>> Just realized that this has a bug in the orignal implementation. For
>> error out this should return -EAGAIN. We'd need a separated fix on
>> this.
> Indeed. Will you send a patch, or should I hack something up?
>
>>> -
>>> - if (cio_update_schib(sch)) {
>>> - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
>>> - goto out_unlock;
>>> - }
>>> -
>>> - private = dev_get_drvdata(&sch->dev);
>>> - if (private->state == VFIO_CCW_STATE_NOT_OPER) {
>>> - private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
>>> - VFIO_CCW_STATE_STANDBY;
>>> - }
>> This hunk was toatally removed, and this is fine because?
The first part is moved to fsm_sch_event()
The second part disapear per design as state changes are done inside the
FSM.
>>
>>> -
>>> -out_unlock:
>>> - spin_unlock_irqrestore(sch->lock, flags);
>>> + return -1;
>> -1 is not a valid code.
> -ENODEV looks more fitting, if we decide to go with this rework.
:) yes, forgot the -1 from the first tests.
>
>>> + WARN_ON(work_pending(&private->event_work));
>>> + queue_work(vfio_ccw_work_q, &private->event_work);
>>>
>>> return 0;
>>> }
> I'm wondering why this should always be done via a workqueue. It seems
> the other subchannel types try to do as much as possible immediately?
Doing things inside the top half is not very friendly with the system.
The goal of the patch is to build a clean atomic state machine.
Allowing the use of mutexes insures atomicity.
I notice that I forgot to point this out in the cover letter although it is
one of the design key.
I will update the cover letter.
> (And returning -EAGAIN already triggers the css code to schedule
> another call later.)
Yes, if(work_pending()) return -EAGAIN
Thanks for the review
Pierre
>
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On 04/05/2018 03:19, Dong Jia Shi wrote:
> * Pierre Morel <[email protected]> [2018-05-03 16:26:29 +0200]:
>
>> On 02/05/2018 09:46, Dong Jia Shi wrote:
>>> * Cornelia Huck <[email protected]> [2018-04-30 17:33:05 +0200]:
>>>
>>>> On Thu, 26 Apr 2018 15:48:06 +0800
>>>> Dong Jia Shi <[email protected]> wrote:
>>>>
>>>>> * Dong Jia Shi <[email protected]> [2018-04-26 15:30:54 +0800]:
>>>>>
>>>>> [...]
>>>>>
>>>>>>> @@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
>>>>>>> if (private->io_trigger)
>>>>>>> eventfd_signal(private->io_trigger, 1);
>>>>>>>
>>>>>>> - return private->state;
>>>>>>> + return VFIO_CCW_STATE_IDLE;
>>>>>> This is not right. For example, if we are in STANDBY state (subch driver
>>>>>> is probed, but mdev device is not created), we can not jump to IDLE
>>>>>> state.
>>>>> I see my problem, for STANDBY state, we should introduce another event
>>>>> callback for VFIO_CCW_EVENT_INTERRUPT. It doesn't make sense to call
>>>>> fsm_irq() which tries to signal userspace with interrupt notification
>>>>> when mdev is not created yet... So we'd need a separated fix for this
>>>>> issue too.
>>>> But how do we even get into that situation when we don't have an mdev
>>>> yet?
>>>>
>>> We cann't... So let's assign fsm_nop() as the interrupt callback for
>>> STANDBY state?
>>>
>> :) Isn't it exactly what my patch series handle?
> As far as I see, that's not true. ;)
>
> After this series applied,
> vfio_ccw_jumptable[VFIO_CCW_STATE_STANDBY][VFIO_CCW_EVENT_INTERRUPT] is
> still fsm_irq().
>
What I mean is, this code tries to handle design problems
without changing too much of the original code at first.
The problem here is not that the fsm_irq function is called on interrupt,
if we have an interrupt it must be signaled to user land.
The problem is that this state is entered at the wrong moment.
STANDBY should be entered, during the mdev_open when we realize the QEMU
device,
and not during the probe, in which we should stay in NOT_OPER until we
get the QEMU device.
The probe() and mdev_open() function should be modified, not the state
table.
Regards,
Pierre
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
[still backlog processing...]
On Thu, 3 May 2018 14:06:51 +0200
Pierre Morel <[email protected]> wrote:
> On 30/04/2018 17:30, Cornelia Huck wrote:
> > On Wed, 25 Apr 2018 15:52:19 +0200
> > Pierre Morel <[email protected]> wrote:
> >
> >> On 25/04/2018 10:41, Cornelia Huck wrote:
> >>> On Thu, 19 Apr 2018 16:48:07 +0200
> >>> Pierre Morel<[email protected]> wrote:
> >>>> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
> >>>> index 3284e64..93aab87 100644
> >>>> --- a/drivers/s390/cio/vfio_ccw_private.h
> >>>> +++ b/drivers/s390/cio/vfio_ccw_private.h
> >>>> @@ -76,7 +76,7 @@ enum vfio_ccw_state {
> >>>> */
> >>>> enum vfio_ccw_event {
> >>>> VFIO_CCW_EVENT_NOT_OPER,
> >>>> - VFIO_CCW_EVENT_IO_REQ,
> >>>> + VFIO_CCW_EVENT_SSCH_REQ,
> >>>> VFIO_CCW_EVENT_INTERRUPT,
> >>>> VFIO_CCW_EVENT_SCH_EVENT,
> >>>> /* last element! */
> >>> I don't think we should separate the ssch handling. The major
> >>> difference to halt/clear is that it needs channel program translation.
> >>> Everything else (issuing the instruction and processing the interrupt)
> >>> are basically the same. If we just throw everything at the hardware
> >>> and let the host's channel subsystem figure it out, we already should
> >>> be fine with regard to most of the races.
> >> We must test at a moment or another the kind of request we do,
> >> cancel, halt and clear only need the subchannel id in register 1 and as
> >> you said are much more direct to implement.
> >>
> >> If we do not separate them here, we need a switch in the "do_io_request"
> >> function.
> >> Is it what you mean?
> > Yes. Most of the handling should be the same for any function.
>
> I really don't know, the 4 functions are quite different.
>
> - SSCH uses an ORB, and has a quite long kernel execution time for VFIO
> - there is a race between SSCH and the others instructions
> - XSCH makes subchannel no longer start pending, also reset the busy
> indications
> - CSCH cancels both SSCH and HSCH instruction, and perform path management
> - HSCH has different busy (entry) conditions
Roughly speaking, we have two categories: An asynchronous function is
performed (SSCH, HSCH, CSCH) or not (XSCH). So I would split out XSCH
in any case.
SSCH, HSCH, CSCH all perform path management. I see them as kind of
escalating (i.e. CSCH 'beats' HSCH which 'beats' SSCH). I think they
are all similar enough, though, as we can call through to the real
hardware and have it sorted out there.
Looking through the channel I/O instructions:
- RSCH should be handled with SSCH (as a special case).
- MSCH should also be handled in the long run, STSCH as well.
- SCHM is interesting, as it's not per-subchannel. We have some basic
handling of the instruction in QEMU, but it only emulates some ssch
counters and completely lacks support for the other fields.
- IIRC, there's also a CHSC command dealing with channel monitoring. We
currently fence off any CHSC that is not needed for Linux to run, but
there are some that might be useful for the guest (path handling
etc.) Hard to come to a conclusion here without access to the
documentation.
- I don't think we need to care about TSCH (other than keeping the
schib up to date, which we also need to do for STSCH).
- Likewise, TPI should be handled via emulation.
Coming back to the original issue, I think we can easily handle SSCH
(and RSCH), HSCH and CSCH together (with the actual hardware doing the
heavy lifting anyway). For other instructions, we need separate
states/processing.
On Fri, 4 May 2018 13:02:36 +0200
Pierre Morel <[email protected]> wrote:
> On 04/05/2018 03:19, Dong Jia Shi wrote:
> > * Pierre Morel <[email protected]> [2018-05-03 16:26:29 +0200]:
> >
> >> On 02/05/2018 09:46, Dong Jia Shi wrote:
> >>> * Cornelia Huck <[email protected]> [2018-04-30 17:33:05 +0200]:
> >>>
> >>>> On Thu, 26 Apr 2018 15:48:06 +0800
> >>>> Dong Jia Shi <[email protected]> wrote:
> >>>>
> >>>>> * Dong Jia Shi <[email protected]> [2018-04-26 15:30:54 +0800]:
> >>>>>
> >>>>> [...]
> >>>>>
> >>>>>>> @@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
> >>>>>>> if (private->io_trigger)
> >>>>>>> eventfd_signal(private->io_trigger, 1);
> >>>>>>>
> >>>>>>> - return private->state;
> >>>>>>> + return VFIO_CCW_STATE_IDLE;
> >>>>>> This is not right. For example, if we are in STANDBY state (subch driver
> >>>>>> is probed, but mdev device is not created), we can not jump to IDLE
> >>>>>> state.
> >>>>> I see my problem, for STANDBY state, we should introduce another event
> >>>>> callback for VFIO_CCW_EVENT_INTERRUPT. It doesn't make sense to call
> >>>>> fsm_irq() which tries to signal userspace with interrupt notification
> >>>>> when mdev is not created yet... So we'd need a separated fix for this
> >>>>> issue too.
> >>>> But how do we even get into that situation when we don't have an mdev
> >>>> yet?
> >>>>
> >>> We cann't... So let's assign fsm_nop() as the interrupt callback for
> >>> STANDBY state?
> >>>
> >> :) Isn't it exactly what my patch series handle?
> > As far as I see, that's not true. ;)
> >
> > After this series applied,
> > vfio_ccw_jumptable[VFIO_CCW_STATE_STANDBY][VFIO_CCW_EVENT_INTERRUPT] is
> > still fsm_irq().
> >
>
>
> What I mean is, this code tries to handle design problems
> without changing too much of the original code at first.
>
> The problem here is not that the fsm_irq function is called on interrupt,
> if we have an interrupt it must be signaled to user land.
> The problem is that this state is entered at the wrong moment.
>
> STANDBY should be entered, during the mdev_open when we realize the QEMU
> device,
> and not during the probe, in which we should stay in NOT_OPER until we
> get the QEMU device.
>
> The probe() and mdev_open() function should be modified, not the state
> table.
So, the takeaway is that we should handle starting via the init
callbacks and not via the state machine?
On 22/05/2018 17:41, Cornelia Huck wrote:
> On Fri, 4 May 2018 13:02:36 +0200
> Pierre Morel <[email protected]> wrote:
>
>> On 04/05/2018 03:19, Dong Jia Shi wrote:
>>> * Pierre Morel <[email protected]> [2018-05-03 16:26:29 +0200]:
>>>
>>>> On 02/05/2018 09:46, Dong Jia Shi wrote:
>>>>> * Cornelia Huck <[email protected]> [2018-04-30 17:33:05 +0200]:
>>>>>
>>>>>> On Thu, 26 Apr 2018 15:48:06 +0800
>>>>>> Dong Jia Shi <[email protected]> wrote:
>>>>>>
>>>>>>> * Dong Jia Shi <[email protected]> [2018-04-26 15:30:54 +0800]:
>>>>>>>
>>>>>>> [...]
>>>>>>>
>>>>>>>>> @@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
>>>>>>>>> if (private->io_trigger)
>>>>>>>>> eventfd_signal(private->io_trigger, 1);
>>>>>>>>>
>>>>>>>>> - return private->state;
>>>>>>>>> + return VFIO_CCW_STATE_IDLE;
>>>>>>>> This is not right. For example, if we are in STANDBY state (subch driver
>>>>>>>> is probed, but mdev device is not created), we can not jump to IDLE
>>>>>>>> state.
>>>>>>> I see my problem, for STANDBY state, we should introduce another event
>>>>>>> callback for VFIO_CCW_EVENT_INTERRUPT. It doesn't make sense to call
>>>>>>> fsm_irq() which tries to signal userspace with interrupt notification
>>>>>>> when mdev is not created yet... So we'd need a separated fix for this
>>>>>>> issue too.
>>>>>> But how do we even get into that situation when we don't have an mdev
>>>>>> yet?
>>>>>>
>>>>> We cann't... So let's assign fsm_nop() as the interrupt callback for
>>>>> STANDBY state?
>>>>>
>>>> :) Isn't it exactly what my patch series handle?
>>> As far as I see, that's not true. ;)
>>>
>>> After this series applied,
>>> vfio_ccw_jumptable[VFIO_CCW_STATE_STANDBY][VFIO_CCW_EVENT_INTERRUPT] is
>>> still fsm_irq().
>>>
>>
>> What I mean is, this code tries to handle design problems
>> without changing too much of the original code at first.
>>
>> The problem here is not that the fsm_irq function is called on interrupt,
>> if we have an interrupt it must be signaled to user land.
>> The problem is that this state is entered at the wrong moment.
>>
>> STANDBY should be entered, during the mdev_open when we realize the QEMU
>> device,
>> and not during the probe, in which we should stay in NOT_OPER until we
>> get the QEMU device.
>>
>> The probe() and mdev_open() function should be modified, not the state
>> table.
> So, the takeaway is that we should handle starting via the init
> callbacks and not via the state machine?
>
hum, sorry, I think that my previous answer was not completely right,
and did not really answer to Dong Jia comment, yes fsm_irq was not
at its place, thinking again about the comments of both of you
I think that we can suppress the INIT event.
I would like to rebase the patch to include the comments you both did.
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany
On Wed, 23 May 2018 09:50:00 +0200
Pierre Morel <[email protected]> wrote:
> On 22/05/2018 17:41, Cornelia Huck wrote:
> > On Fri, 4 May 2018 13:02:36 +0200
> > Pierre Morel <[email protected]> wrote:
> >
> >> On 04/05/2018 03:19, Dong Jia Shi wrote:
> >>> * Pierre Morel <[email protected]> [2018-05-03 16:26:29 +0200]:
> >>>
> >>>> On 02/05/2018 09:46, Dong Jia Shi wrote:
> >>>>> * Cornelia Huck <[email protected]> [2018-04-30 17:33:05 +0200]:
> >>>>>
> >>>>>> On Thu, 26 Apr 2018 15:48:06 +0800
> >>>>>> Dong Jia Shi <[email protected]> wrote:
> >>>>>>
> >>>>>>> * Dong Jia Shi <[email protected]> [2018-04-26 15:30:54 +0800]:
> >>>>>>>
> >>>>>>> [...]
> >>>>>>>
> >>>>>>>>> @@ -179,7 +160,7 @@ static int fsm_irq(struct vfio_ccw_private *private,
> >>>>>>>>> if (private->io_trigger)
> >>>>>>>>> eventfd_signal(private->io_trigger, 1);
> >>>>>>>>>
> >>>>>>>>> - return private->state;
> >>>>>>>>> + return VFIO_CCW_STATE_IDLE;
> >>>>>>>> This is not right. For example, if we are in STANDBY state (subch driver
> >>>>>>>> is probed, but mdev device is not created), we can not jump to IDLE
> >>>>>>>> state.
> >>>>>>> I see my problem, for STANDBY state, we should introduce another event
> >>>>>>> callback for VFIO_CCW_EVENT_INTERRUPT. It doesn't make sense to call
> >>>>>>> fsm_irq() which tries to signal userspace with interrupt notification
> >>>>>>> when mdev is not created yet... So we'd need a separated fix for this
> >>>>>>> issue too.
> >>>>>> But how do we even get into that situation when we don't have an mdev
> >>>>>> yet?
> >>>>>>
> >>>>> We cann't... So let's assign fsm_nop() as the interrupt callback for
> >>>>> STANDBY state?
> >>>>>
> >>>> :) Isn't it exactly what my patch series handle?
> >>> As far as I see, that's not true. ;)
> >>>
> >>> After this series applied,
> >>> vfio_ccw_jumptable[VFIO_CCW_STATE_STANDBY][VFIO_CCW_EVENT_INTERRUPT] is
> >>> still fsm_irq().
> >>>
> >>
> >> What I mean is, this code tries to handle design problems
> >> without changing too much of the original code at first.
> >>
> >> The problem here is not that the fsm_irq function is called on interrupt,
> >> if we have an interrupt it must be signaled to user land.
> >> The problem is that this state is entered at the wrong moment.
> >>
> >> STANDBY should be entered, during the mdev_open when we realize the QEMU
> >> device,
> >> and not during the probe, in which we should stay in NOT_OPER until we
> >> get the QEMU device.
> >>
> >> The probe() and mdev_open() function should be modified, not the state
> >> table.
> > So, the takeaway is that we should handle starting via the init
> > callbacks and not via the state machine?
> >
> hum, sorry, I think that my previous answer was not completely right,
> and did not really answer to Dong Jia comment, yes fsm_irq was not
> at its place, thinking again about the comments of both of you
> I think that we can suppress the INIT event.
>
> I would like to rebase the patch to include the comments you both did.
>
>
Yes, a respin is probably best before we get confused even more :)
On 22/05/2018 17:38, Cornelia Huck wrote:
> [still backlog processing...]
>
> On Thu, 3 May 2018 14:06:51 +0200
> Pierre Morel <[email protected]> wrote:
>
>> On 30/04/2018 17:30, Cornelia Huck wrote:
>>> On Wed, 25 Apr 2018 15:52:19 +0200
>>> Pierre Morel <[email protected]> wrote:
>>>
>>>> On 25/04/2018 10:41, Cornelia Huck wrote:
>>>>> On Thu, 19 Apr 2018 16:48:07 +0200
>>>>> Pierre Morel<[email protected]> wrote:
>>>>>> diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
>>>>>> index 3284e64..93aab87 100644
>>>>>> --- a/drivers/s390/cio/vfio_ccw_private.h
>>>>>> +++ b/drivers/s390/cio/vfio_ccw_private.h
>>>>>> @@ -76,7 +76,7 @@ enum vfio_ccw_state {
>>>>>> */
>>>>>> enum vfio_ccw_event {
>>>>>> VFIO_CCW_EVENT_NOT_OPER,
>>>>>> - VFIO_CCW_EVENT_IO_REQ,
>>>>>> + VFIO_CCW_EVENT_SSCH_REQ,
>>>>>> VFIO_CCW_EVENT_INTERRUPT,
>>>>>> VFIO_CCW_EVENT_SCH_EVENT,
>>>>>> /* last element! */
>>>>> I don't think we should separate the ssch handling. The major
>>>>> difference to halt/clear is that it needs channel program translation.
>>>>> Everything else (issuing the instruction and processing the interrupt)
>>>>> are basically the same. If we just throw everything at the hardware
>>>>> and let the host's channel subsystem figure it out, we already should
>>>>> be fine with regard to most of the races.
>>>> We must test at a moment or another the kind of request we do,
>>>> cancel, halt and clear only need the subchannel id in register 1 and as
>>>> you said are much more direct to implement.
>>>>
>>>> If we do not separate them here, we need a switch in the "do_io_request"
>>>> function.
>>>> Is it what you mean?
>>> Yes. Most of the handling should be the same for any function.
>> I really don't know, the 4 functions are quite different.
>>
>> - SSCH uses an ORB, and has a quite long kernel execution time for VFIO
>> - there is a race between SSCH and the others instructions
>> - XSCH makes subchannel no longer start pending, also reset the busy
>> indications
>> - CSCH cancels both SSCH and HSCH instruction, and perform path management
>> - HSCH has different busy (entry) conditions
> Roughly speaking, we have two categories: An asynchronous function is
> performed (SSCH, HSCH, CSCH) or not (XSCH). So I would split out XSCH
> in any case.
>
> SSCH, HSCH, CSCH all perform path management. I see them as kind of
> escalating (i.e. CSCH 'beats' HSCH which 'beats' SSCH). I think they
> are all similar enough, though, as we can call through to the real
> hardware and have it sorted out there.
>
> Looking through the channel I/O instructions:
> - RSCH should be handled with SSCH (as a special case).
> - MSCH should also be handled in the long run, STSCH as well.
> - SCHM is interesting, as it's not per-subchannel. We have some basic
> handling of the instruction in QEMU, but it only emulates some ssch
> counters and completely lacks support for the other fields.
> - IIRC, there's also a CHSC command dealing with channel monitoring. We
> currently fence off any CHSC that is not needed for Linux to run, but
> there are some that might be useful for the guest (path handling
> etc.) Hard to come to a conclusion here without access to the
> documentation.
> - I don't think we need to care about TSCH (other than keeping the
> schib up to date, which we also need to do for STSCH).
> - Likewise, TPI should be handled via emulation.
>
> Coming back to the original issue, I think we can easily handle SSCH
> (and RSCH), HSCH and CSCH together (with the actual hardware doing the
> heavy lifting anyway). For other instructions, we need separate
> states/processing.
>
OK, I make the next version with this in mind.
Thanks
Pierre
--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany