From: Ian Munsie <[email protected]>
This adds an afu_driver_ops structure with event_pending and
deliver_event callbacks. An AFU driver such as cxlflash can fill these
out and associate it with a context to enable passing custom AFU
specific events to userspace.
The cxl driver will call event_pending() during poll, select, read, etc.
calls to check if an AFU driver specific event is pending, and will call
deliver_event() to deliver that event. This way, the cxl driver takes
care of all the usual locking semantics around these calls and handles
all the generic cxl events, so that the AFU driver only needs to worry
about it's own events.
The deliver_event() call is passed a struct cxl_event buffer to fill in.
The header will already be filled in for an AFU driver event, and the
AFU driver is expected to expand the header.size as necessary (up to
max_size, defined by struct cxl_event_afu_driver_reserved) and fill out
it's own information.
Since AFU drivers provide their own means for userspace to obtain the
AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
descriptor to obtain the AFU file descriptor) and the generic cxl driver
will never use this event, the ABI of the event is up to each individual
AFU driver.
Signed-off-by: Ian Munsie <[email protected]>
---
Changes since v2:
- Fixed some typos spotted by Matt Ochs
Changes since v1:
- Rebased on upstream
- Bumped cxl api version to 3
- Addressed comments from mpe:
- Clarified commit message & some comments
- Mentioned 'cxlflash' as a possible user of this event
- Check driver ops on registration and warn if missing calls
- Remove redundant checks where driver ops is used
- Simplified ctx_event_pending and removed underscore version
- Changed deliver_event to take the context as the first argument
drivers/misc/cxl/Kconfig | 5 +++++
drivers/misc/cxl/api.c | 8 ++++++++
drivers/misc/cxl/cxl.h | 6 +++++-
drivers/misc/cxl/file.c | 36 +++++++++++++++++++++++++-----------
include/misc/cxl.h | 29 +++++++++++++++++++++++++++++
include/uapi/misc/cxl.h | 22 ++++++++++++++++++++++
6 files changed, 94 insertions(+), 12 deletions(-)
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 8756d06..560412c 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -15,12 +15,17 @@ config CXL_EEH
bool
default n
+config CXL_AFU_DRIVER_OPS
+ bool
+ default n
+
config CXL
tristate "Support for IBM Coherent Accelerators (CXL)"
depends on PPC_POWERNV && PCI_MSI && EEH
select CXL_BASE
select CXL_KERNEL_API
select CXL_EEH
+ select CXL_AFU_DRIVER_OPS
default m
help
Select this option to enable driver support for IBM Coherent
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index ea3eeb7..eebc9c3 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -296,6 +296,14 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
}
EXPORT_SYMBOL_GPL(cxl_fops_get_context);
+void cxl_set_driver_ops(struct cxl_context *ctx,
+ struct cxl_afu_driver_ops *ops)
+{
+ WARN_ON(!ops->event_pending || !ops->deliver_event);
+ ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
int cxl_start_work(struct cxl_context *ctx,
struct cxl_ioctl_start_work *work)
{
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index a521bc7..64e8e0a 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -24,6 +24,7 @@
#include <asm/reg.h>
#include <misc/cxl-base.h>
+#include <misc/cxl.h>
#include <uapi/misc/cxl.h>
extern uint cxl_verbose;
@@ -34,7 +35,7 @@ extern uint cxl_verbose;
* Bump version each time a user API change is made, whether it is
* backwards compatible ot not.
*/
-#define CXL_API_VERSION 2
+#define CXL_API_VERSION 3
#define CXL_API_VERSION_COMPATIBLE 1
/*
@@ -485,6 +486,9 @@ struct cxl_context {
bool pending_fault;
bool pending_afu_err;
+ /* Used by AFU drivers for driver specific event delivery */
+ struct cxl_afu_driver_ops *afu_driver_ops;
+
struct rcu_head rcu;
};
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 783337d..d1cc297 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -295,6 +295,17 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm)
return cxl_context_iomap(ctx, vm);
}
+static inline bool ctx_event_pending(struct cxl_context *ctx)
+{
+ if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
+ return true;
+
+ if (ctx->afu_driver_ops)
+ return ctx->afu_driver_ops->event_pending(ctx);
+
+ return false;
+}
+
unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
{
struct cxl_context *ctx = file->private_data;
@@ -307,8 +318,7 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
pr_devel("afu_poll wait done pe: %i\n", ctx->pe);
spin_lock_irqsave(&ctx->lock, flags);
- if (ctx->pending_irq || ctx->pending_fault ||
- ctx->pending_afu_err)
+ if (ctx_event_pending(ctx))
mask |= POLLIN | POLLRDNORM;
else if (ctx->status == CLOSED)
/* Only error on closed when there are no futher events pending
@@ -321,12 +331,6 @@ unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
return mask;
}
-static inline int ctx_event_pending(struct cxl_context *ctx)
-{
- return (ctx->pending_irq || ctx->pending_fault ||
- ctx->pending_afu_err || (ctx->status == CLOSED));
-}
-
ssize_t afu_read(struct file *file, char __user *buf, size_t count,
loff_t *off)
{
@@ -346,7 +350,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
for (;;) {
prepare_to_wait(&ctx->wq, &wait, TASK_INTERRUPTIBLE);
- if (ctx_event_pending(ctx))
+ if (ctx_event_pending(ctx) || (ctx->status == CLOSED))
break;
if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
@@ -376,7 +380,14 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
memset(&event, 0, sizeof(event));
event.header.process_element = ctx->pe;
event.header.size = sizeof(struct cxl_event_header);
- if (ctx->pending_irq) {
+
+ if (ctx->afu_driver_ops && ctx->afu_driver_ops->event_pending(ctx)) {
+ pr_devel("afu_read delivering AFU driver specific event\n");
+ event.header.type = CXL_EVENT_AFU_DRIVER;
+ ctx->afu_driver_ops->deliver_event(ctx, &event, sizeof(event));
+ WARN_ON(event.header.size > sizeof(event));
+
+ } else if (ctx->pending_irq) {
pr_devel("afu_read delivering AFU interrupt\n");
event.header.size += sizeof(struct cxl_event_afu_interrupt);
event.header.type = CXL_EVENT_AFU_INTERRUPT;
@@ -384,6 +395,7 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
clear_bit(event.irq.irq - 1, ctx->irq_bitmap);
if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count))
ctx->pending_irq = false;
+
} else if (ctx->pending_fault) {
pr_devel("afu_read delivering data storage fault\n");
event.header.size += sizeof(struct cxl_event_data_storage);
@@ -391,12 +403,14 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
event.fault.addr = ctx->fault_addr;
event.fault.dsisr = ctx->fault_dsisr;
ctx->pending_fault = false;
+
} else if (ctx->pending_afu_err) {
pr_devel("afu_read delivering afu error\n");
event.header.size += sizeof(struct cxl_event_afu_error);
event.header.type = CXL_EVENT_AFU_ERROR;
event.afu_error.error = ctx->afu_err;
ctx->pending_afu_err = false;
+
} else if (ctx->status == CLOSED) {
pr_devel("afu_read fatal error\n");
spin_unlock_irqrestore(&ctx->lock, flags);
@@ -554,7 +568,7 @@ int __init cxl_file_init(void)
* If these change we really need to update API. Either change some
* flags or update API version number CXL_API_VERSION.
*/
- BUILD_BUG_ON(CXL_API_VERSION != 2);
+ BUILD_BUG_ON(CXL_API_VERSION != 3);
BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64);
BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8);
BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8);
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index f2ffe5b..01d66a3 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -210,4 +210,33 @@ ssize_t cxl_fd_read(struct file *file, char __user *buf, size_t count,
void cxl_perst_reloads_same_image(struct cxl_afu *afu,
bool perst_reloads_same_image);
+/*
+ * AFU driver ops allows an AFU driver to create their own events to pass to
+ * userspace through the file descriptor as a simpler alternative to overriding
+ * the read() and poll() calls that works with the generic cxl events. These
+ * events are given priority over the generic cxl events, so they will be
+ * delivered first if multiple types of events are pending.
+ *
+ * event_pending() will be called by the cxl driver to check if an event is
+ * pending (e.g. in select/poll/read calls).
+ *
+ * deliver_event() will be called to fill out a cxl_event structure with the
+ * driver specific event. The header will already have the type and
+ * process_element fields filled in, and header.size will be set to
+ * sizeof(struct cxl_event_header). The AFU driver can extend that size up to
+ * max_size (if an afu driver requires more space, they should submit a patch
+ * increasing the size in the struct cxl_event_afu_driver_reserved definition).
+ *
+ * Both of these calls are made with a spin lock held, so they must not sleep.
+ */
+struct cxl_afu_driver_ops {
+ bool (*event_pending) (struct cxl_context *ctx);
+ void (*deliver_event) (struct cxl_context *ctx,
+ struct cxl_event *event, size_t max_size);
+};
+
+/* Associate the above driver ops with a specific context */
+void cxl_set_driver_ops(struct cxl_context *ctx,
+ struct cxl_afu_driver_ops *ops);
+
#endif /* _MISC_CXL_H */
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index 1e889aa..8b097db 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -69,6 +69,7 @@ enum cxl_event_type {
CXL_EVENT_AFU_INTERRUPT = 1,
CXL_EVENT_DATA_STORAGE = 2,
CXL_EVENT_AFU_ERROR = 3,
+ CXL_EVENT_AFU_DRIVER = 4,
};
struct cxl_event_header {
@@ -100,12 +101,33 @@ struct cxl_event_afu_error {
__u64 error;
};
+struct cxl_event_afu_driver_reserved {
+ /*
+ * Reserves space for AFU driver specific events. Not actually
+ * reserving any more space compared to other events as we can't know
+ * how much an AFU driver will need (but it is likely to be small). If
+ * your AFU driver needs more than this, please submit a patch
+ * increasing it as part of your driver submission.
+ *
+ * This is not ABI since the event header.size passed to the user for
+ * existing events is set in the read call to sizeof(cxl_event_header)
+ * + sizeof(whatever event is being dispatched) and will not increase
+ * just because this is, and the user is already required to use a 4K
+ * buffer on the read call. This is merely the size of the buffer
+ * passed between the cxl and AFU drivers.
+ *
+ * Of course the contents will be ABI, but that's up the AFU driver.
+ */
+ __u64 reserved[4];
+};
+
struct cxl_event {
struct cxl_event_header header;
union {
struct cxl_event_afu_interrupt irq;
struct cxl_event_data_storage fault;
struct cxl_event_afu_error afu_error;
+ struct cxl_event_afu_driver_reserved afu_driver_event;
};
};
--
2.1.4
From: Michael Neuling <[email protected]>
This provides AFU drivers a means to associate private data with a cxl
context. This is particularly intended for make the new callbacks for
driver specific events easier for AFU drivers to use, as they can easily
get back to any private data structures they may use.
Signed-off-by: Michael Neuling <[email protected]>
Signed-off-by: Ian Munsie <[email protected]>
Reviewed-by: Matthew R. Ochs <[email protected]>
---
No changes since v1, added Matt Ochs reviewed-by tag.
drivers/misc/cxl/api.c | 21 +++++++++++++++++++++
drivers/misc/cxl/cxl.h | 3 +++
include/misc/cxl.h | 7 +++++++
3 files changed, 31 insertions(+)
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index eebc9c3..93b270c 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -91,6 +91,27 @@ int cxl_release_context(struct cxl_context *ctx)
}
EXPORT_SYMBOL_GPL(cxl_release_context);
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+ if (!ctx)
+ return -EINVAL;
+
+ ctx->priv = priv;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+ if (!ctx)
+ return ERR_PTR(-EINVAL);
+
+ return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
{
if (num == 0)
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 64e8e0a..71f66e7 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -454,6 +454,9 @@ struct cxl_context {
/* Only used in PR mode */
u64 process_token;
+ /* driver private data */
+ void *priv;
+
unsigned long *irq_bitmap; /* Accessed from IRQ context */
struct cxl_irq_ranges irqs;
struct list_head irq_names;
diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 01d66a3..76c08cb 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -89,6 +89,13 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev);
int cxl_release_context(struct cxl_context *ctx);
/*
+ * Set and get private data associated with a context. Allows drivers to have a
+ * back pointer to some useful structure.
+ */
+int cxl_set_priv(struct cxl_context *ctx, void *priv);
+void *cxl_get_priv(struct cxl_context *ctx);
+
+/*
* Allocate AFU interrupts for this context. num=0 will allocate the default
* for this AFU as given in the AFU descriptor. This number doesn't include the
* interrupt 0 (CAIA defines AFU IRQ 0 for page faults). Each interrupt to be
--
2.1.4
> On Mar 7, 2016, at 7:48 PM, Ian Munsie <[email protected]> wrote:
>
> From: Ian Munsie <[email protected]>
>
> This adds an afu_driver_ops structure with event_pending and
> deliver_event callbacks. An AFU driver such as cxlflash can fill these
> out and associate it with a context to enable passing custom AFU
> specific events to userspace.
>
> The cxl driver will call event_pending() during poll, select, read, etc.
> calls to check if an AFU driver specific event is pending, and will call
> deliver_event() to deliver that event. This way, the cxl driver takes
> care of all the usual locking semantics around these calls and handles
> all the generic cxl events, so that the AFU driver only needs to worry
> about it's own events.
>
> The deliver_event() call is passed a struct cxl_event buffer to fill in.
> The header will already be filled in for an AFU driver event, and the
> AFU driver is expected to expand the header.size as necessary (up to
> max_size, defined by struct cxl_event_afu_driver_reserved) and fill out
> it's own information.
>
> Since AFU drivers provide their own means for userspace to obtain the
> AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
> descriptor to obtain the AFU file descriptor) and the generic cxl driver
> will never use this event, the ABI of the event is up to each individual
> AFU driver.
>
> Signed-off-by: Ian Munsie <[email protected]>
Reviewed-by: Matthew R. Ochs <[email protected]>
On 08/03/16 12:48, Ian Munsie wrote:
> From: Ian Munsie <[email protected]>
>
> This adds an afu_driver_ops structure with event_pending and
> deliver_event callbacks. An AFU driver such as cxlflash can fill these
> out and associate it with a context to enable passing custom AFU
> specific events to userspace.
>
> The cxl driver will call event_pending() during poll, select, read, etc.
> calls to check if an AFU driver specific event is pending, and will call
> deliver_event() to deliver that event. This way, the cxl driver takes
> care of all the usual locking semantics around these calls and handles
> all the generic cxl events, so that the AFU driver only needs to worry
> about it's own events.
>
> The deliver_event() call is passed a struct cxl_event buffer to fill in.
> The header will already be filled in for an AFU driver event, and the
> AFU driver is expected to expand the header.size as necessary (up to
> max_size, defined by struct cxl_event_afu_driver_reserved) and fill out
> it's own information.
>
> Since AFU drivers provide their own means for userspace to obtain the
> AFU file descriptor (i.e. cxlflash uses an ioctl on their scsi file
> descriptor to obtain the AFU file descriptor) and the generic cxl driver
> will never use this event, the ABI of the event is up to each individual
> AFU driver.
>
> Signed-off-by: Ian Munsie <[email protected]>
Reviewed-by: Andrew Donnellan <[email protected]>
--
Andrew Donnellan Software Engineer, OzLabs
[email protected] Australia Development Lab, Canberra
+61 2 6201 8874 (work) IBM Australia Limited
On 08/03/16 12:48, Ian Munsie wrote:
> From: Michael Neuling <[email protected]>
>
> This provides AFU drivers a means to associate private data with a cxl
> context. This is particularly intended for make the new callbacks for
> driver specific events easier for AFU drivers to use, as they can easily
> get back to any private data structures they may use.
>
> Signed-off-by: Michael Neuling <[email protected]>
> Signed-off-by: Ian Munsie <[email protected]>
> Reviewed-by: Matthew R. Ochs <[email protected]>
Looks good to me.
Reviewed-by: Andrew Donnellan <[email protected]>
--
Andrew Donnellan Software Engineer, OzLabs
[email protected] Australia Development Lab, Canberra
+61 2 6201 8874 (work) IBM Australia Limited
Hi Ian,
Le 08/03/2016 02:48, Ian Munsie a écrit :
> diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
...
> +static inline bool ctx_event_pending(struct cxl_context *ctx)
> +{
> + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
> + return true;
> +
> + if (ctx->afu_driver_ops)
> + return ctx->afu_driver_ops->event_pending(ctx);
> +
> + return false;
> +}
> +
...
> +
> + if (ctx->afu_driver_ops && ctx->afu_driver_ops->event_pending(ctx)) {
> + pr_devel("afu_read delivering AFU driver specific event\n");
> + event.header.type = CXL_EVENT_AFU_DRIVER;
> + ctx->afu_driver_ops->deliver_event(ctx, &event, sizeof(event));
> + WARN_ON(event.header.size > sizeof(event));
> +
> + } else if (ctx->pending_irq) {
So on afu_read(), we may call afu_driver_ops->event_pending() twice
before calling afu_driver_ops->deliver_event(). Actually, in the
(likely) scenario where there's only an afu_driver event pending, we
*will* call afu_driver_ops->event_pending() twice. Wouldn't it make
sense to cache it then?
It would also avoid entering
WARN(1, "afu_read must be buggy\n");
if the driver changes its mind between the 2 calls :-)
Fred
Hi Ian,
Sorry for getting into this discussion late. I have few suggestions.
Ian Munsie <[email protected]> writes:
>
> diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
> index 8756d06..560412c 100644
> --- a/drivers/misc/cxl/Kconfig
> +++ b/drivers/misc/cxl/Kconfig
> @@ -15,12 +15,17 @@ config CXL_EEH
> bool
> default n
>
> +config CXL_AFU_DRIVER_OPS
> + bool
> + default n
> +
> config CXL
> tristate "Support for IBM Coherent Accelerators (CXL)"
> depends on PPC_POWERNV && PCI_MSI && EEH
> select CXL_BASE
> select CXL_KERNEL_API
> select CXL_EEH
> + select CXL_AFU_DRIVER_OPS
I suggest wrapping the driver_ops struct definition and other related
functions inside a #ifdef CONFIG_CXL_AFU_DRIVER_OPS.
> default m
> help
> Select this option to enable driver support for IBM Coherent
> diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
> index ea3eeb7..eebc9c3 100644
> --- a/drivers/misc/cxl/api.c
> +++ b/drivers/misc/cxl/api.c
> @@ -296,6 +296,14 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
> }
> EXPORT_SYMBOL_GPL(cxl_fops_get_context);
>
> +void cxl_set_driver_ops(struct cxl_context *ctx,
> + struct cxl_afu_driver_ops *ops)
> +{
> + WARN_ON(!ops->event_pending || !ops->deliver_event);
> + ctx->afu_driver_ops = ops;
> +}
I would recommend adding a "struct module *" member to afu_driver_ops
and doing a __module_get on to it here and module_put when we destroy
the context. Since these callbacks will be residing within an external
module .text region hence it should stay in the memory until the context
is alive.
> diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
> index 783337d..d1cc297 100644
> --- a/drivers/misc/cxl/file.c
> +++ b/drivers/misc/cxl/file.c
> @@ -295,6 +295,17 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm)
> return cxl_context_iomap(ctx, vm);
> }
>
> +static inline bool ctx_event_pending(struct cxl_context *ctx)
> +{
> + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
> + return true;
> +
> + if (ctx->afu_driver_ops)
> + return ctx->afu_driver_ops->event_pending(ctx);
Should also check if ctx->afu_driver_ops->event_pending is NULL before
calling it.
> +/*
> + * AFU driver ops allows an AFU driver to create their own events to pass to
> + * userspace through the file descriptor as a simpler alternative to overriding
> + * the read() and poll() calls that works with the generic cxl events. These
> + * events are given priority over the generic cxl events, so they will be
> + * delivered first if multiple types of events are pending.
> + *
> + * event_pending() will be called by the cxl driver to check if an event is
> + * pending (e.g. in select/poll/read calls).
> + *
> + * deliver_event() will be called to fill out a cxl_event structure with the
> + * driver specific event. The header will already have the type and
> + * process_element fields filled in, and header.size will be set to
> + * sizeof(struct cxl_event_header). The AFU driver can extend that size up to
> + * max_size (if an afu driver requires more space, they should submit a patch
> + * increasing the size in the struct cxl_event_afu_driver_reserved definition).
> + *
> + * Both of these calls are made with a spin lock held, so they must not sleep.
> + */
> +struct cxl_afu_driver_ops {
> + bool (*event_pending) (struct cxl_context *ctx);
> + void (*deliver_event) (struct cxl_context *ctx,
> + struct cxl_event *event, size_t max_size);
> +};
> +
I would propose these two apis.
/*
* fetches an event from the driver event queue. NULL means that queue
* is empty. Can sleep if needed. The memory for cxl_event is allocated
* by module being called. Hence it can be potentially be larger then
* sizeof(struct cxl_event). Multiple calls to this should return same
* pointer untill ack_event is called.
*/
struct cxl_event * fetch_event(struct cxl_context * ctx);
/*
* Returns and acknowledge the struct cxl_event * back to the driver
* which can then free it or maybe put it back in a kmem_cache. This
* should be called once we have completely returned the current
* struct cxl_event from the readcall
*/
void ack_event(struct cxl_context * ctx, struct cxl_event *);
I think above apis would give us more flexbility in the future when
drivers would want to send larger events without breaking the abi.
Cheers,
~ Vaibhav
> On Mar 9, 2016, at 8:37 AM, Vaibhav Jain <[email protected]> wrote:
>> +/*
>> + * AFU driver ops allows an AFU driver to create their own events to pass to
>> + * userspace through the file descriptor as a simpler alternative to overriding
>> + * the read() and poll() calls that works with the generic cxl events. These
>> + * events are given priority over the generic cxl events, so they will be
>> + * delivered first if multiple types of events are pending.
>> + *
>> + * event_pending() will be called by the cxl driver to check if an event is
>> + * pending (e.g. in select/poll/read calls).
>> + *
>> + * deliver_event() will be called to fill out a cxl_event structure with the
>> + * driver specific event. The header will already have the type and
>> + * process_element fields filled in, and header.size will be set to
>> + * sizeof(struct cxl_event_header). The AFU driver can extend that size up to
>> + * max_size (if an afu driver requires more space, they should submit a patch
>> + * increasing the size in the struct cxl_event_afu_driver_reserved definition).
>> + *
>> + * Both of these calls are made with a spin lock held, so they must not sleep.
>> + */
>> +struct cxl_afu_driver_ops {
>> + bool (*event_pending) (struct cxl_context *ctx);
>> + void (*deliver_event) (struct cxl_context *ctx,
>> + struct cxl_event *event, size_t max_size);
>> +};
>> +
>
> I would propose these two apis.
>
> /*
> * fetches an event from the driver event queue. NULL means that queue
> * is empty. Can sleep if needed. The memory for cxl_event is allocated
> * by module being called. Hence it can be potentially be larger then
> * sizeof(struct cxl_event). Multiple calls to this should return same
> * pointer untill ack_event is called.
> */
> struct cxl_event * fetch_event(struct cxl_context * ctx);
>
> /*
> * Returns and acknowledge the struct cxl_event * back to the driver
> * which can then free it or maybe put it back in a kmem_cache. This
> * should be called once we have completely returned the current
> * struct cxl_event from the readcall
> */
> void ack_event(struct cxl_context * ctx, struct cxl_event *);
>
> I think above apis would give us more flexbility in the future when
> drivers would want to send larger events without breaking the abi.
>From a cxlflash perspective, I think we'd be fine with this model as
long as the driver events are still prioritized. I do like the removal of
the no-sleep requirement and this would allow us to simply hand off
an already populated event reference.
Hi Vaibhav,
Le 09/03/2016 15:37, Vaibhav Jain a écrit :
> I would propose these two apis.
>
> /*
> * fetches an event from the driver event queue. NULL means that queue
> * is empty. Can sleep if needed. The memory for cxl_event is allocated
> * by module being called. Hence it can be potentially be larger then
> * sizeof(struct cxl_event). Multiple calls to this should return same
> * pointer untill ack_event is called.
> */
> struct cxl_event * fetch_event(struct cxl_context * ctx);
>
> /*
> * Returns and acknowledge the struct cxl_event * back to the driver
> * which can then free it or maybe put it back in a kmem_cache. This
> * should be called once we have completely returned the current
> * struct cxl_event from the readcall
> */
> void ack_event(struct cxl_context * ctx, struct cxl_event *);
How would you implement polling on those APIs?
How would you implement afu_read? There are several sources of events.
Fred
Excerpts from Frederic Barrat's message of 2016-03-09 20:27:20 +1100:
> So on afu_read(), we may call afu_driver_ops->event_pending() twice
> before calling afu_driver_ops->deliver_event(). Actually, in the
> (likely) scenario where there's only an afu_driver event pending, we
> *will* call afu_driver_ops->event_pending() twice. Wouldn't it make
> sense to cache it then?
Yep, will change it.
Excerpts from Vaibhav Jain's message of 2016-03-10 01:37:56 +1100:
> > + select CXL_AFU_DRIVER_OPS
> I suggest wrapping the driver_ops struct definition and other related
> functions inside a #ifdef CONFIG_CXL_AFU_DRIVER_OPS.
No, the kconfig option is there so that cxlflash can add support for
this and not have to worry about breaking any builds if their code is
merged into the scsi tree that doesn't have our code yet.
There is nothing optional about this within our driver, which is why
this is a select and has no configuration choice of it's own.
On a related matter, we should send a patch to remove some of the
leftover config options that were added to smooth the merging of
cxlflash in the first place (CXL_KERNEL_API, CXL_EEH).
> > +void cxl_set_driver_ops(struct cxl_context *ctx,
> > + struct cxl_afu_driver_ops *ops)
> > +{
> > + WARN_ON(!ops->event_pending || !ops->deliver_event);
> > + ctx->afu_driver_ops = ops;
> > +}
> I would recommend adding a "struct module *" member to afu_driver_ops
> and doing a __module_get on to it here and module_put when we destroy
> the context. Since these callbacks will be residing within an external
> module .text region hence it should stay in the memory until the context
> is alive.
ok
> > diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
> > index 783337d..d1cc297 100644
> > --- a/drivers/misc/cxl/file.c
> > +++ b/drivers/misc/cxl/file.c
> > @@ -295,6 +295,17 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm)
> > return cxl_context_iomap(ctx, vm);
> > }
> >
> > +static inline bool ctx_event_pending(struct cxl_context *ctx)
> > +{
> > + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
> > + return true;
> > +
> > + if (ctx->afu_driver_ops)
> > + return ctx->afu_driver_ops->event_pending(ctx);
> Should also check if ctx->afu_driver_ops->event_pending is NULL before
> calling it.
The v1 patch did exactly that and mpe rejected it as it made this code
too ugly - we now check that event_pending field is valid when it is
registered and WARN if it is not.
> I would propose these two apis.
>
> /*
> * fetches an event from the driver event queue. NULL means that queue
> * is empty. Can sleep if needed. The memory for cxl_event is allocated
> * by module being called. Hence it can be potentially be larger then
> * sizeof(struct cxl_event). Multiple calls to this should return same
> * pointer untill ack_event is called.
> */
> struct cxl_event * fetch_event(struct cxl_context * ctx);
>
> /*
> * Returns and acknowledge the struct cxl_event * back to the driver
> * which can then free it or maybe put it back in a kmem_cache. This
> * should be called once we have completely returned the current
> * struct cxl_event from the readcall
> */
> void ack_event(struct cxl_context * ctx, struct cxl_event *);
>
> I think above apis would give us more flexbility in the future when
> drivers would want to send larger events without breaking the abi.
I'm very reluctant to make this kind of change - while nice on paper,
poll() and read() are already very easy calls to screw up, and we have
seen that happen countless times in the past from different drivers that
e.g. and end up in a situation where poll says there is an event but
then read blocks, or poll blocks even though there is an event already
pending.
The API at the moment fits into the poll() / read() model and has
appropriate locking and the correct waiting semantics to avoid those
kind of issues (provided that the afu driver doesn't do something that
violates these semantics like sleep in one of these calls, but the
kernel has debug features to detect that), but any deviation from this
is too risky in my view.
Cheers,
-Ian
Excerpts from Frederic Barrat's message of 2016-03-09 20:27:20 +1100:
> It would also avoid entering
> WARN(1, "afu_read must be buggy\n");
> if the driver changes its mind between the 2 calls :-)
Honestly, it had better not - that would be a gross violation of the
poll & read semantics and the kind of thing that leads to application
hangs.
Cheers,
-Ian
On Wed, 2016-03-09 at 20:07 +0530, Vaibhav Jain wrote:
> Hi Ian,
>
> Sorry for getting into this discussion late. I have few suggestions.
>
> Ian Munsie <[email protected]> writes:
> >
> > diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
> > index 8756d06..560412c 100644
> > --- a/drivers/misc/cxl/Kconfig
> > +++ b/drivers/misc/cxl/Kconfig
> > @@ -15,12 +15,17 @@ config CXL_EEH
> > bool
> > default n
> >
> > +config CXL_AFU_DRIVER_OPS
> > + bool
> > + default n
> > +
> > config CXL
> > tristate "Support for IBM Coherent Accelerators (CXL)"
> > depends on PPC_POWERNV && PCI_MSI && EEH
> > select CXL_BASE
> > select CXL_KERNEL_API
> > select CXL_EEH
> > + select CXL_AFU_DRIVER_OPS
> I suggest wrapping the driver_ops struct definition and other related
> functions inside a #ifdef CONFIG_CXL_AFU_DRIVER_OPS.
These are here to enable the feature in other drivers. So the cxlflash
(or whoever) can put their code in via the linux-scsi tree but that new
piece is only enabled when CXL_AFU_DRIVER_OPS is present (ie. when
merged upstream). But if it's not, their code can still compile.
Hence their code compiles in linux-scsi and our code compiles in linux
-ppc, but only once they're together do they actually enable the full
feature. We don't have a nasty dependency of linux-scsi having to pull
in linux-ppc or visa versa before the merge window. Everyone works
independently and it all gets fixed in linus tree.
Eventually, when everyone has the all the code in merged upstream, we
can remove these config options. We should be able to remove
CXL_KERNEL_API and CXL_EEH now actually!
So no, we shouldn't wrap the actual code.
Mikey
Frederic Barrat <[email protected]> writes:
> Hi Vaibhav,
>
> Le 09/03/2016 15:37, Vaibhav Jain a écrit :
>
>> I would propose these two apis.
>>
>> /*
>> * fetches an event from the driver event queue. NULL means that queue
>> * is empty. Can sleep if needed. The memory for cxl_event is allocated
>> * by module being called. Hence it can be potentially be larger then
>> * sizeof(struct cxl_event). Multiple calls to this should return same
>> * pointer untill ack_event is called.
>> */
>> struct cxl_event * fetch_event(struct cxl_context * ctx);
>>
>> /*
>> * Returns and acknowledge the struct cxl_event * back to the driver
>> * which can then free it or maybe put it back in a kmem_cache. This
>> * should be called once we have completely returned the current
>> * struct cxl_event from the readcall
>> */
>> void ack_event(struct cxl_context * ctx, struct cxl_event *);
>
>
> How would you implement polling on those APIs?
Hi Fred. I am looking at an implementation similar to this:
static inline bool ctx_event_pending(struct cxl_context *ctx)
{
typeof (ctx->afu_driver_ops->fetch_event) fn_events =
(ctx->afu_driver_ops != NULL) ? ctx->afu_driver_ops->fetch_event : NULL;
if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
return true;
/*
* if fn_event returns a not null then its gauranteed to return
* the same pointer on next call
*/
if (fn_events)
return fn_events(ctx) != NULL;
return false;
}
unsigned int afu_poll(struct file *file, struct poll_table_struct *poll)
{
struct cxl_context *ctx = file->private_data;
int mask = 0;
unsigned long flags;
poll_wait(file, &ctx->wq, poll);
pr_devel("afu_poll wait done pe: %i\n", ctx->pe);
spin_lock_irqsave(&ctx->lock, flags);
if (ctx_event_pending(ctx))
mask |= POLLIN | POLLRDNORM;
else if (ctx->status == CLOSED)
/* Only error on closed when there are no futher events pending
*/
mask |= POLLERR;
spin_unlock_irqrestore(&ctx->lock, flags);
pr_devel("afu_poll pe: %i returning %#x\n", ctx->pe, mask);
return mask;
}
> How would you implement afu_read? There are several sources of events.
Looking at an implementation similar to this:
ssize_t afu_read(struct file *file, char __user *buf, size_t count,
loff_t *off)
{
unsigned long flags;
ssize_t rc = 0;
struct cxl_context *ctx = file->private_data;
struct cxl_event *ptr_event, event = {
.header.process_element = ctx->pe,
.header.size = sizeof(struct cxl_event_header)
};
typeof (ctx->afu_driver_ops->fetch_event) fn_fetch_event =
(ctx->afu_driver_ops != NULL) ? ctx->afu_driver_ops->fetch_event : NULL;
typeof (ctx->afu_driver_ops->ack_event) fn_ack_event =
(ctx->afu_driver_ops != NULL) ? ctx->afu_driver_ops->ack_event : NULL;
if (count < CXL_READ_MIN_SIZE)
return -EINVAL;
if (!cxl_adapter_link_ok(ctx->afu->adapter) ||
ctx->status == CLOSED)
return -EIO;
if (signal_pending(current))
return -ERESTARTSYS;
/* if no events then wait */
if (!ctx_event_pending(ctx)) {
if ((file->f_flags & O_NONBLOCK))
return -EAGAIN;
pr_devel("afu_read going to sleep...\n");
rc = wait_event_interruptible(ctx->wq,
(ctx->status == CLOSED) ||
cxl_adapter_link_ok(ctx->afu->adapter) ||
ctx_event_pending(ctx));
pr_devel("afu_read woken up\n");
}
/* did we get interrupted during wait sleep */
if (rc)
return rc;
/* get driver events if any */
ptr_event = fn_fetch_event ? fn_fetch_event(ctx) : NULL;
/* In case of error feching driver specific event */
if (IS_ERR(ptr_event)) {
pr_warn("Error fetching driver specific event %ld", PTR_ERR(ptr_event));
ptr_event = NULL;
}
/* code below manipulates ctx so take a spin lock */
spin_lock_irqsave(&ctx->lock, flags);
/* give driver events first priority */
if (ptr_event) {
pr_devel("afu_read delivering AFU driver specific event\n");
/* populate the header type and pe in the event struct */
ptr_event->header.type = CXL_EVENT_AFU_DRIVER;
ptr_event->header.process_element = ctx->pe;
WARN_ON(event.header.size > count);
} else if (ctx->pending_irq) {
pr_devel("afu_read delivering AFU interrupt\n");
event.header.size += sizeof(struct cxl_event_afu_interrupt);
event.header.type = CXL_EVENT_AFU_INTERRUPT;
event.irq.irq = find_first_bit(ctx->irq_bitmap, ctx->irq_count) + 1;
clear_bit(event.irq.irq - 1, ctx->irq_bitmap);
if (bitmap_empty(ctx->irq_bitmap, ctx->irq_count))
ctx->pending_irq = false;
} else if (ctx->pending_fault) {
pr_devel("afu_read delivering data storage fault\n");
event.header.size += sizeof(struct cxl_event_data_storage);
event.header.type = CXL_EVENT_DATA_STORAGE;
event.fault.addr = ctx->fault_addr;
event.fault.dsisr = ctx->fault_dsisr;
ctx->pending_fault = false;
} else if (ctx->pending_afu_err) {
pr_devel("afu_read delivering afu error\n");
event.header.size += sizeof(struct cxl_event_afu_error);
event.header.type = CXL_EVENT_AFU_ERROR;
event.afu_error.error = ctx->afu_err;
ctx->pending_afu_err = false;
} else if (ctx->status == CLOSED) {
pr_devel("afu_read fatal error\n");
rc = -EIO;
} else
WARN(1, "afu_read must be buggy\n");
spin_unlock_irqrestore(&ctx->lock, flags);
if (!rc) {
/* if we dont have a driver event then use 'event' var */
ptr_event = ptr_event ? ptr_event : &event;
rc = min(((size_t)ptr_event->header.size), count);
if (copy_to_user(buf, ptr_event, rc))
rc = -EFAULT;
}
/* if its a driver event ack it back to the driver */
if (fn_ack_event && (ptr_event != &event))
fn_ack_event(ctx, ptr_event);
return rc;
}
Cheers,
~ Vaibhav
Michael Neuling <[email protected]> writes:
> These are here to enable the feature in other drivers. So the cxlflash
> (or whoever) can put their code in via the linux-scsi tree but that new
> piece is only enabled when CXL_AFU_DRIVER_OPS is present (ie. when
> merged upstream). But if it's not, their code can still compile.
>
> Hence their code compiles in linux-scsi and our code compiles in linux
> -ppc, but only once they're together do they actually enable the full
> feature. We don't have a nasty dependency of linux-scsi having to pull
> in linux-ppc or visa versa before the merge window. Everyone works
> independently and it all gets fixed in linus tree.
>
> Eventually, when everyone has the all the code in merged upstream, we
> can remove these config options. We should be able to remove
> CXL_KERNEL_API and CXL_EEH now actually!
>
> So no, we shouldn't wrap the actual code.
Mikey & Ian,
Agree on the point made. Thanks for detailed explaination.
~ Vaibhav
Ian Munsie <[email protected]> writes:
> No, the kconfig option is there so that cxlflash can add support for
> this and not have to worry about breaking any builds if their code is
> merged into the scsi tree that doesn't have our code yet.
>
> There is nothing optional about this within our driver, which is why
> this is a select and has no configuration choice of it's own.
>
> On a related matter, we should send a patch to remove some of the
> leftover config options that were added to smooth the merging of
> cxlflash in the first place (CXL_KERNEL_API, CXL_EEH).
Agreed.
>> > diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
>> > index 783337d..d1cc297 100644
>> > --- a/drivers/misc/cxl/file.c
>> > +++ b/drivers/misc/cxl/file.c
>> > @@ -295,6 +295,17 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm)
>> > return cxl_context_iomap(ctx, vm);
>> > }
>> >
>> > +static inline bool ctx_event_pending(struct cxl_context *ctx)
>> > +{
>> > + if (ctx->pending_irq || ctx->pending_fault || ctx->pending_afu_err)
>> > + return true;
>> > +
>> > + if (ctx->afu_driver_ops)
>> > + return ctx->afu_driver_ops->event_pending(ctx);
>> Should also check if ctx->afu_driver_ops->event_pending is NULL before
>> calling it.
>
> The v1 patch did exactly that and mpe rejected it as it made this code
> too ugly - we now check that event_pending field is valid when it is
> registered and WARN if it is not.
The driver_ops struct pointer being passed is still owned by the
external module and its free change it even after calling this
function. We can mitigate this to some extent by accepting a const
pointer to the struct or by copying this struct to the context object.
> I'm very reluctant to make this kind of change - while nice on paper,
> poll() and read() are already very easy calls to screw up, and we have
> seen that happen countless times in the past from different drivers that
> e.g. and end up in a situation where poll says there is an event but
> then read blocks, or poll blocks even though there is an event already
> pending.
>
> The API at the moment fits into the poll() / read() model and has
> appropriate locking and the correct waiting semantics to avoid those
> kind of issues (provided that the afu driver doesn't do something that
> violates these semantics like sleep in one of these calls, but the
> kernel has debug features to detect that), but any deviation from this
> is too risky in my view.
Ian I have responded to Fred with an example implementation of these
calls. Requesting you to please take a look.
Cheers,
~ Vaibhav
On 10/03/16 12:18, Ian Munsie wrote:
> On a related matter, we should send a patch to remove some of the
> leftover config options that were added to smooth the merging of
> cxlflash in the first place (CXL_KERNEL_API, CXL_EEH).
I'm happy to do that after this series is merged.
--
Andrew Donnellan Software Engineer, OzLabs
[email protected] Australia Development Lab, Canberra
+61 2 6201 8874 (work) IBM Australia Limited