2019-09-19 00:10:21

by Jacob Pan

[permalink] [raw]
Subject: [PATCH 3/4] iommu/ioasid: Add custom allocators

IOASID allocation may rely on platform specific methods. One use case is
that when running in the guest, in order to obtain system wide global
IOASIDs, emulated allocation interface is needed to communicate with the
host. Here we call these platform specific allocators custom allocators.

Custom IOASID allocators can be registered at runtime and take precedence
over the default XArray allocator. They have these attributes:

- provides platform specific alloc()/free() functions with private data.
- allocation results lookup are not provided by the allocator, lookup
request must be done by the IOASID framework by its own XArray.
- allocators can be unregistered at runtime, either fallback to the next
custom allocator or to the default allocator.
- custom allocators can share the same set of alloc()/free() helpers, in
this case they also share the same IOASID space, thus the same XArray.
- switching between allocators requires all outstanding IOASIDs to be
freed unless the two allocators share the same alloc()/free() helpers.

Signed-off-by: Jean-Philippe Brucker <[email protected]>
Signed-off-by: Jacob Pan <[email protected]>
Link: https://lkml.org/lkml/2019/4/26/462
---
drivers/iommu/ioasid.c | 301 +++++++++++++++++++++++++++++++++++++++++++++++--
include/linux/ioasid.h | 28 +++++
2 files changed, 319 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c
index 6fbea76a47cf..5b6ead4b07d6 100644
--- a/drivers/iommu/ioasid.c
+++ b/drivers/iommu/ioasid.c
@@ -17,7 +17,254 @@ struct ioasid_data {
struct rcu_head rcu;
};

-static DEFINE_XARRAY_ALLOC(ioasid_xa);
+/*
+ * struct ioasid_allocator_data - Internal data structure to hold information
+ * about an allocator. There are two types of allocators:
+ *
+ * - Default allocator always has its own XArray to track the IOASIDs allocated.
+ * - Custom allocators may share allocation helpers with different private data.
+ * Custom allocators share the same helper functions also share the same
+ * XArray.
+ * Rules:
+ * 1. Default allocator is always available, not dynamically registered. This is
+ * to prevent race conditions with early boot code that want to register
+ * custom allocators or allocate IOASIDs.
+ * 2. Custom allocators take precedence over the default allocator.
+ * 3. When all custom allocators sharing the same helper functions are
+ * unregistered (e.g. due to hotplug), all outstanding IOASIDs must be
+ * freed.
+ * 4. When switching between custom allocators sharing the same helper
+ * functions, outstanding IOASIDs are preserved.
+ * 5. When switching between custom allocator and default allocator, all IOASIDs
+ * must be freed to ensure unadulterated space for the new allocator.
+ *
+ * @ops: allocator helper functions and its data
+ * @list: registered custom allocators
+ * @slist: allocators share the same ops but different data
+ * @flags: attributes of the allocator
+ * @xa xarray holds the IOASID space
+ * @users number of allocators sharing the same ops and XArray
+ */
+struct ioasid_allocator_data {
+ struct ioasid_allocator_ops *ops;
+ struct list_head list;
+ struct list_head slist;
+#define IOASID_ALLOCATOR_CUSTOM BIT(0) /* Needs framework to track results */
+ unsigned long flags;
+ struct xarray xa;
+ refcount_t users;
+};
+
+static DEFINE_SPINLOCK(ioasid_allocator_lock);
+static LIST_HEAD(allocators_list);
+
+static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque);
+static void default_free(ioasid_t ioasid, void *opaque);
+
+static struct ioasid_allocator_ops default_ops = {
+ .alloc = default_alloc,
+ .free = default_free,
+};
+
+static struct ioasid_allocator_data default_allocator = {
+ .ops = &default_ops,
+ .flags = 0,
+ .xa = XARRAY_INIT(ioasid_xa, XA_FLAGS_ALLOC),
+};
+
+static struct ioasid_allocator_data *active_allocator = &default_allocator;
+
+static ioasid_t default_alloc(ioasid_t min, ioasid_t max, void *opaque)
+{
+ ioasid_t id;
+
+ if (xa_alloc(&default_allocator.xa, &id, opaque, XA_LIMIT(min, max), GFP_ATOMIC)) {
+ pr_err("Failed to alloc ioasid from %d to %d\n", min, max);
+ return INVALID_IOASID;
+ }
+
+ return id;
+}
+
+static void default_free(ioasid_t ioasid, void *opaque)
+{
+ struct ioasid_data *ioasid_data;
+
+ ioasid_data = xa_erase(&default_allocator.xa, ioasid);
+ kfree_rcu(ioasid_data, rcu);
+}
+
+/* Allocate and initialize a new custom allocator with its helper functions */
+static struct ioasid_allocator_data *ioasid_alloc_allocator(struct ioasid_allocator_ops *ops)
+{
+ struct ioasid_allocator_data *ia_data;
+
+ ia_data = kzalloc(sizeof(*ia_data), GFP_ATOMIC);
+ if (!ia_data)
+ return NULL;
+
+ xa_init_flags(&ia_data->xa, XA_FLAGS_ALLOC);
+ INIT_LIST_HEAD(&ia_data->slist);
+ ia_data->flags |= IOASID_ALLOCATOR_CUSTOM;
+ ia_data->ops = ops;
+
+ /* For tracking custom allocators that share the same ops */
+ list_add_tail(&ops->list, &ia_data->slist);
+ refcount_set(&ia_data->users, 1);
+
+ return ia_data;
+}
+
+static bool use_same_ops(struct ioasid_allocator_ops *a, struct ioasid_allocator_ops *b)
+{
+ return (a->free == b->free) && (a->alloc == b->alloc);
+}
+
+/**
+ * ioasid_register_allocator - register a custom allocator
+ * @ops: the custom allocator ops to be registered
+ *
+ * Custom allocators take precedence over the default xarray based allocator.
+ * Private data associated with the IOASID allocated by the custom allocators
+ * are managed by IOASID framework similar to data stored in xa by default
+ * allocator.
+ *
+ * There can be multiple allocators registered but only one is active. In case
+ * of runtime removal of a custom allocator, the next one is activated based
+ * on the registration ordering.
+ *
+ * Multiple allocators can share the same alloc() function, in this case the
+ * IOASID space is shared.
+ */
+int ioasid_register_allocator(struct ioasid_allocator_ops *ops)
+{
+ struct ioasid_allocator_data *ia_data;
+ struct ioasid_allocator_data *pallocator;
+ int ret = 0;
+
+ spin_lock(&ioasid_allocator_lock);
+
+ ia_data = ioasid_alloc_allocator(ops);
+ if (!ia_data) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /*
+ * No particular preference, we activate the first one and keep
+ * the later registered allocators in a list in case the first one gets
+ * removed due to hotplug.
+ */
+ if (list_empty(&allocators_list)) {
+ WARN_ON(active_allocator != &default_allocator);
+ /* Use this new allocator if default is not active */
+ if (xa_empty(&active_allocator->xa)) {
+ active_allocator = ia_data;
+ list_add_tail(&ia_data->list, &allocators_list);
+ goto out_unlock;
+ }
+ pr_warn("Default allocator active with outstanding IOASID\n");
+ ret = -EAGAIN;
+ goto out_free;
+ }
+
+ /* Check if the allocator is already registered */
+ list_for_each_entry(pallocator, &allocators_list, list) {
+ if (pallocator->ops == ops) {
+ pr_err("IOASID allocator already registered\n");
+ ret = -EEXIST;
+ goto out_free;
+ } else if (use_same_ops(pallocator->ops, ops)) {
+ /*
+ * If the new allocator shares the same ops,
+ * then they will share the same IOASID space.
+ * We should put them under the same xarray.
+ */
+ list_add_tail(&ops->list, &pallocator->slist);
+ refcount_inc(&pallocator->users);
+ pr_info("New IOASID allocator ops shared %u times\n",
+ refcount_read(&pallocator->users));
+ goto out_free;
+ }
+ }
+ list_add_tail(&ia_data->list, &allocators_list);
+
+ spin_unlock(&ioasid_allocator_lock);
+ return 0;
+out_free:
+ kfree(ia_data);
+out_unlock:
+ spin_unlock(&ioasid_allocator_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ioasid_register_allocator);
+
+static inline bool has_shared_ops(struct ioasid_allocator_data *allocator)
+{
+ return refcount_read(&allocator->users) > 1;
+}
+
+/**
+ * ioasid_unregister_allocator - Remove a custom IOASID allocator ops
+ * @ops: the custom allocator to be removed
+ *
+ * Remove an allocator from the list, activate the next allocator in
+ * the order it was registered. Or revert to default allocator if all
+ * custom allocators are unregistered without outstanding IOASIDs.
+ */
+void ioasid_unregister_allocator(struct ioasid_allocator_ops *ops)
+{
+ struct ioasid_allocator_data *pallocator;
+ struct ioasid_allocator_ops *sops;
+
+ spin_lock(&ioasid_allocator_lock);
+ if (list_empty(&allocators_list)) {
+ pr_warn("No custom IOASID allocators active!\n");
+ goto exit_unlock;
+ }
+
+ list_for_each_entry(pallocator, &allocators_list, list) {
+ if (!use_same_ops(pallocator->ops, ops))
+ continue;
+
+ if (refcount_read(&pallocator->users) == 1) {
+ /* No shared helper functions */
+ list_del(&pallocator->list);
+ /*
+ * All IOASIDs should have been freed before
+ * the last allocator that shares the same ops
+ * is unregistered.
+ */
+ WARN_ON(!xa_empty(&pallocator->xa));
+ kfree(pallocator);
+ if (list_empty(&allocators_list)) {
+ pr_info("No custom IOASID allocators, switch to default.\n");
+ active_allocator = &default_allocator;
+ } else if (pallocator == active_allocator) {
+ active_allocator = list_first_entry(&allocators_list, struct ioasid_allocator_data, list);
+ pr_info("IOASID allocator changed");
+ }
+ break;
+ }
+ /*
+ * Find the matching shared ops to delete,
+ * but keep outstanding IOASIDs
+ */
+ list_for_each_entry(sops, &pallocator->slist, list) {
+ if (sops == ops) {
+ list_del(&ops->list);
+ if (refcount_dec_and_test(&pallocator->users))
+ pr_err("no shared ops\n");
+ break;
+ }
+ }
+ break;
+ }
+
+exit_unlock:
+ spin_unlock(&ioasid_allocator_lock);
+}
+EXPORT_SYMBOL_GPL(ioasid_unregister_allocator);

/**
* ioasid_set_data - Set private data for an allocated ioasid
@@ -32,13 +279,13 @@ int ioasid_set_data(ioasid_t ioasid, void *data)
struct ioasid_data *ioasid_data;
int ret = 0;

- xa_lock(&ioasid_xa);
- ioasid_data = xa_load(&ioasid_xa, ioasid);
+ spin_lock(&ioasid_allocator_lock);
+ ioasid_data = xa_load(&active_allocator->xa, ioasid);
if (ioasid_data)
rcu_assign_pointer(ioasid_data->private, data);
else
ret = -ENOENT;
- xa_unlock(&ioasid_xa);
+ spin_unlock(&ioasid_allocator_lock);

/*
* Wait for readers to stop accessing the old private data, so the
@@ -66,8 +313,9 @@ EXPORT_SYMBOL_GPL(ioasid_set_data);
ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
void *private)
{
- ioasid_t id;
struct ioasid_data *data;
+ void *adata;
+ ioasid_t id;

data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
@@ -76,14 +324,34 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
data->set = set;
data->private = private;

- if (xa_alloc(&ioasid_xa, &id, data, XA_LIMIT(min, max), GFP_KERNEL)) {
- pr_err("Failed to alloc ioasid from %d to %d\n", min, max);
+ /*
+ * Custom allocator needs allocator data to perform platform specific
+ * operations.
+ */
+ spin_lock(&ioasid_allocator_lock);
+ adata = active_allocator->flags & IOASID_ALLOCATOR_CUSTOM ? active_allocator->ops->pdata : data;
+ id = active_allocator->ops->alloc(min, max, adata);
+ if (id == INVALID_IOASID) {
+ pr_err("Failed ASID allocation %lu\n", active_allocator->flags);
goto exit_free;
}
+
+ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
+ /* Custom allocator needs framework to store and track allocation results */
+ min = max = id;
+
+ if (xa_alloc(&active_allocator->xa, &id, data, XA_LIMIT(min, max), GFP_KERNEL)) {
+ pr_err("Failed to alloc ioasid from %d to %d\n", min, max);
+ active_allocator->ops->free(id, NULL);
+ goto exit_free;
+ }
+ }
data->id = id;

+ spin_unlock(&ioasid_allocator_lock);
return id;
exit_free:
+ spin_unlock(&ioasid_allocator_lock);
kfree(data);
return INVALID_IOASID;
}
@@ -97,9 +365,22 @@ void ioasid_free(ioasid_t ioasid)
{
struct ioasid_data *ioasid_data;

- ioasid_data = xa_erase(&ioasid_xa, ioasid);
+ spin_lock(&ioasid_allocator_lock);
+ ioasid_data = xa_load(&active_allocator->xa, ioasid);
+ if (!ioasid_data) {
+ pr_err("Trying to free unknown IOASID %u\n", ioasid);
+ goto exit_unlock;
+ }

- kfree_rcu(ioasid_data, rcu);
+ active_allocator->ops->free(ioasid, active_allocator->ops->pdata);
+ /* Custom allocator needs additional steps to free the xa element */
+ if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
+ ioasid_data = xa_erase(&active_allocator->xa, ioasid);
+ kfree_rcu(ioasid_data, rcu);
+ }
+
+exit_unlock:
+ spin_unlock(&ioasid_allocator_lock);
}
EXPORT_SYMBOL_GPL(ioasid_free);

@@ -124,7 +405,7 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
struct ioasid_data *ioasid_data;

rcu_read_lock();
- ioasid_data = xa_load(&ioasid_xa, ioasid);
+ ioasid_data = xa_load(&active_allocator->xa, ioasid);
if (!ioasid_data) {
priv = ERR_PTR(-ENOENT);
goto unlock;
diff --git a/include/linux/ioasid.h b/include/linux/ioasid.h
index 0c272d924671..33ef3eab346a 100644
--- a/include/linux/ioasid.h
+++ b/include/linux/ioasid.h
@@ -6,11 +6,28 @@

#define INVALID_IOASID ((ioasid_t)-1)
typedef unsigned int ioasid_t;
+typedef ioasid_t (*ioasid_alloc_fn_t)(ioasid_t min, ioasid_t max, void *data);
+typedef void (*ioasid_free_fn_t)(ioasid_t ioasid, void *data);

struct ioasid_set {
int dummy;
};

+/**
+ * struct ioasid_allocator_ops - IOASID allocator helper functions and data
+ *
+ * @alloc: helper function to allocate IOASID
+ * @free: helper function to free IOASID
+ * @list: for tracking ops that share helper functions but not data
+ * @pdata: data belong to the allocator, provided when calling alloc()
+ */
+struct ioasid_allocator_ops {
+ ioasid_alloc_fn_t alloc;
+ ioasid_free_fn_t free;
+ struct list_head list;
+ void *pdata;
+};
+
#define DECLARE_IOASID_SET(name) struct ioasid_set name = { 0 }

#if IS_ENABLED(CONFIG_IOASID)
@@ -19,6 +36,8 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
void ioasid_free(ioasid_t ioasid);
void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
bool (*getter)(void *));
+int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
+void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
int ioasid_set_data(ioasid_t ioasid, void *data);

#else /* !CONFIG_IOASID */
@@ -38,6 +57,15 @@ static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
return NULL;
}

+static inline int ioasid_register_allocator(struct ioasid_allocator_ops *allocator)
+{
+ return -ENODEV;
+}
+
+static inline void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator)
+{
+}
+
static inline int ioasid_set_data(ioasid_t ioasid, void *data)
{
return -ENODEV;
--
2.7.4


2019-09-22 18:58:14

by Jean-Philippe Brucker

[permalink] [raw]
Subject: Re: [PATCH 3/4] iommu/ioasid: Add custom allocators

On Wed, Sep 18, 2019 at 04:26:33PM -0700, Jacob Pan wrote:
> +/*
> + * struct ioasid_allocator_data - Internal data structure to hold information
> + * about an allocator. There are two types of allocators:
> + *
> + * - Default allocator always has its own XArray to track the IOASIDs allocated.
> + * - Custom allocators may share allocation helpers with different private data.
> + * Custom allocators share the same helper functions also share the same
> + * XArray.

"that share the same helper"

> + * Rules:
> + * 1. Default allocator is always available, not dynamically registered. This is
> + * to prevent race conditions with early boot code that want to register
> + * custom allocators or allocate IOASIDs.
> + * 2. Custom allocators take precedence over the default allocator.
> + * 3. When all custom allocators sharing the same helper functions are
> + * unregistered (e.g. due to hotplug), all outstanding IOASIDs must be
> + * freed.
> + * 4. When switching between custom allocators sharing the same helper
> + * functions, outstanding IOASIDs are preserved.
> + * 5. When switching between custom allocator and default allocator, all IOASIDs
> + * must be freed to ensure unadulterated space for the new allocator.
> + *
> + * @ops: allocator helper functions and its data
> + * @list: registered custom allocators
> + * @slist: allocators share the same ops but different data
> + * @flags: attributes of the allocator
> + * @xa xarray holds the IOASID space
> + * @users number of allocators sharing the same ops and XArray
> + */
> +struct ioasid_allocator_data {
> + struct ioasid_allocator_ops *ops;
> + struct list_head list;
> + struct list_head slist;
> +#define IOASID_ALLOCATOR_CUSTOM BIT(0) /* Needs framework to track results */
> + unsigned long flags;
> + struct xarray xa;
> + refcount_t users;
> +};
> +
> +static DEFINE_SPINLOCK(ioasid_allocator_lock);

Thanks for making this a spinlock! I hit that sleep-in-atomic problem
while updating iommu-sva to the new MMU notifier API, which doesn't
allow sleeping in the free() callback.

I don't like having to use GFP_ATOMIC everywhere as a result, but can't
see a better way. Maybe we can improve that later.

[...]
> +/**
> + * ioasid_unregister_allocator - Remove a custom IOASID allocator ops
> + * @ops: the custom allocator to be removed
> + *
> + * Remove an allocator from the list, activate the next allocator in
> + * the order it was registered. Or revert to default allocator if all
> + * custom allocators are unregistered without outstanding IOASIDs.
> + */
> +void ioasid_unregister_allocator(struct ioasid_allocator_ops *ops)
> +{
> + struct ioasid_allocator_data *pallocator;
> + struct ioasid_allocator_ops *sops;
> +
> + spin_lock(&ioasid_allocator_lock);
> + if (list_empty(&allocators_list)) {
> + pr_warn("No custom IOASID allocators active!\n");
> + goto exit_unlock;
> + }
> +
> + list_for_each_entry(pallocator, &allocators_list, list) {
> + if (!use_same_ops(pallocator->ops, ops))
> + continue;
> +
> + if (refcount_read(&pallocator->users) == 1) {
> + /* No shared helper functions */
> + list_del(&pallocator->list);
> + /*
> + * All IOASIDs should have been freed before
> + * the last allocator that shares the same ops
> + * is unregistered.
> + */
> + WARN_ON(!xa_empty(&pallocator->xa));

The function doc seems to say that we revert to the default allocator
only if there wasn't any outstanding IOASID, which isn't what this does.
To follow the doc, we'd need to return here instead of continuing. The
best solution would be to return with an error, but since we don't
propagate errors I think leaking stuff is preferable to leaving the
allocator registered, since the caller might free the ops when this
function return. So I would keep the code like that but change the
function's comment. What do you think is best?

> + kfree(pallocator);
> + if (list_empty(&allocators_list)) {
> + pr_info("No custom IOASID allocators, switch to default.\n");
> + active_allocator = &default_allocator;

I'm concerned about the active_allocator variable, because ioasid_find()
accesses it without holding ioasid_allocator_lock. It is holding the RCU
read lock, so I think we need to free pallocator after a RCU grace
period (using kfree_rcu)? I think we also need to update
active_allocator with rcu_assign_pointer() and dereference it with
rcu_dereference()

> + } else if (pallocator == active_allocator) {
> + active_allocator = list_first_entry(&allocators_list, struct ioasid_allocator_data, list);
> + pr_info("IOASID allocator changed");
> + }
> + break;
> + }
> + /*
> + * Find the matching shared ops to delete,
> + * but keep outstanding IOASIDs
> + */
> + list_for_each_entry(sops, &pallocator->slist, list) {
> + if (sops == ops) {
> + list_del(&ops->list);
> + if (refcount_dec_and_test(&pallocator->users))
> + pr_err("no shared ops\n");

That's not possible, right, since dec_and_test only returns true if
pallocator->users was 1, which we already checked against? I find
pallocator->users a bit redundant since you can use list_is_empty() or
list_is_singular() on pallocator->slist

[...]
> ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
> void *private)
> {
> - ioasid_t id;
> struct ioasid_data *data;
> + void *adata;
> + ioasid_t id;
>
> data = kzalloc(sizeof(*data), GFP_KERNEL);
> if (!data)
> @@ -76,14 +324,34 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
> data->set = set;
> data->private = private;
>
> - if (xa_alloc(&ioasid_xa, &id, data, XA_LIMIT(min, max), GFP_KERNEL)) {
> - pr_err("Failed to alloc ioasid from %d to %d\n", min, max);
> + /*
> + * Custom allocator needs allocator data to perform platform specific
> + * operations.
> + */
> + spin_lock(&ioasid_allocator_lock);
> + adata = active_allocator->flags & IOASID_ALLOCATOR_CUSTOM ? active_allocator->ops->pdata : data;
> + id = active_allocator->ops->alloc(min, max, adata);
> + if (id == INVALID_IOASID) {
> + pr_err("Failed ASID allocation %lu\n", active_allocator->flags);
> goto exit_free;
> }
> +
> + if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
> + /* Custom allocator needs framework to store and track allocation results */
> + min = max = id;
> +
> + if (xa_alloc(&active_allocator->xa, &id, data, XA_LIMIT(min, max), GFP_KERNEL)) {

Or just XA_LIMIT(id, id), and merge the two ifs?

You do need GFP_ATOMIC here.

> + pr_err("Failed to alloc ioasid from %d to %d\n", min, max);

Maybe just "Failed to alloc ioasid %d\n" then

> + active_allocator->ops->free(id, NULL);

Why doesn't this call need to pass active_allocator->ops->pdata like the
one in ioasid_free()?

Thanks,
Jean

2019-09-22 19:22:05

by Jacob Pan

[permalink] [raw]
Subject: Re: [PATCH 3/4] iommu/ioasid: Add custom allocators

On Fri, 20 Sep 2019 18:35:58 +0200
Jean-Philippe Brucker <[email protected]> wrote:

> On Wed, Sep 18, 2019 at 04:26:33PM -0700, Jacob Pan wrote:
> > +/*
> > + * struct ioasid_allocator_data - Internal data structure to hold
> > information
> > + * about an allocator. There are two types of allocators:
> > + *
> > + * - Default allocator always has its own XArray to track the
> > IOASIDs allocated.
> > + * - Custom allocators may share allocation helpers with different
> > private data.
> > + * Custom allocators share the same helper functions also share
> > the same
> > + * XArray.
>
> "that share the same helper"
>
> > + * Rules:
> > + * 1. Default allocator is always available, not dynamically
> > registered. This is
> > + * to prevent race conditions with early boot code that want to
> > register
> > + * custom allocators or allocate IOASIDs.
> > + * 2. Custom allocators take precedence over the default allocator.
> > + * 3. When all custom allocators sharing the same helper functions
> > are
> > + * unregistered (e.g. due to hotplug), all outstanding IOASIDs
> > must be
> > + * freed.
> > + * 4. When switching between custom allocators sharing the same
> > helper
> > + * functions, outstanding IOASIDs are preserved.
> > + * 5. When switching between custom allocator and default
> > allocator, all IOASIDs
> > + * must be freed to ensure unadulterated space for the new
> > allocator.
> > + *
> > + * @ops: allocator helper functions and its data
> > + * @list: registered custom allocators
> > + * @slist: allocators share the same ops but different data
> > + * @flags: attributes of the allocator
> > + * @xa xarray holds the IOASID space
> > + * @users number of allocators sharing the same ops and
> > XArray
> > + */
> > +struct ioasid_allocator_data {
> > + struct ioasid_allocator_ops *ops;
> > + struct list_head list;
> > + struct list_head slist;
> > +#define IOASID_ALLOCATOR_CUSTOM BIT(0) /* Needs framework to track
> > results */
> > + unsigned long flags;
> > + struct xarray xa;
> > + refcount_t users;
> > +};
> > +
> > +static DEFINE_SPINLOCK(ioasid_allocator_lock);
>
> Thanks for making this a spinlock! I hit that sleep-in-atomic problem
> while updating iommu-sva to the new MMU notifier API, which doesn't
> allow sleeping in the free() callback.
>
> I don't like having to use GFP_ATOMIC everywhere as a result, but
> can't see a better way. Maybe we can improve that later.
>
> [...]
> > +/**
> > + * ioasid_unregister_allocator - Remove a custom IOASID allocator
> > ops
> > + * @ops: the custom allocator to be removed
> > + *
> > + * Remove an allocator from the list, activate the next allocator
> > in
> > + * the order it was registered. Or revert to default allocator if
> > all
> > + * custom allocators are unregistered without outstanding IOASIDs.
> > + */
> > +void ioasid_unregister_allocator(struct ioasid_allocator_ops *ops)
> > +{
> > + struct ioasid_allocator_data *pallocator;
> > + struct ioasid_allocator_ops *sops;
> > +
> > + spin_lock(&ioasid_allocator_lock);
> > + if (list_empty(&allocators_list)) {
> > + pr_warn("No custom IOASID allocators active!\n");
> > + goto exit_unlock;
> > + }
> > +
> > + list_for_each_entry(pallocator, &allocators_list, list) {
> > + if (!use_same_ops(pallocator->ops, ops))
> > + continue;
> > +
> > + if (refcount_read(&pallocator->users) == 1) {
> > + /* No shared helper functions */
> > + list_del(&pallocator->list);
> > + /*
> > + * All IOASIDs should have been freed
> > before
> > + * the last allocator that shares the same
> > ops
> > + * is unregistered.
> > + */
> > + WARN_ON(!xa_empty(&pallocator->xa));
>
> The function doc seems to say that we revert to the default allocator
> only if there wasn't any outstanding IOASID, which isn't what this
> does. To follow the doc, we'd need to return here instead of
> continuing. The best solution would be to return with an error, but
> since we don't propagate errors I think leaking stuff is preferable
> to leaving the allocator registered, since the caller might free the
> ops when this function return. So I would keep the code like that but
> change the function's comment. What do you think is best?
>
I agree, unregister allocator should not fail. I will change the
comments stating that if allocator is unregistered prior to freeing all
outstanding IOASIDs, these IOASIDs will be orphaned and lost.

> > + kfree(pallocator);
> > + if (list_empty(&allocators_list)) {
> > + pr_info("No custom IOASID
> > allocators, switch to default.\n");
> > + active_allocator =
> > &default_allocator;
>
> I'm concerned about the active_allocator variable, because
> ioasid_find() accesses it without holding ioasid_allocator_lock. It
> is holding the RCU read lock, so I think we need to free pallocator
> after a RCU grace period (using kfree_rcu)? I think we also need to
> update active_allocator with rcu_assign_pointer() and dereference it
> with rcu_dereference()
>
right, will do. ioasid_find is on the fast path, so we try not to use
spinlock.
> > + } else if (pallocator == active_allocator)
> > {
> > + active_allocator =
> > list_first_entry(&allocators_list, struct ioasid_allocator_data,
> > list);
> > + pr_info("IOASID allocator
> > changed");
> > + }
> > + break;
> > + }
> > + /*
> > + * Find the matching shared ops to delete,
> > + * but keep outstanding IOASIDs
> > + */
> > + list_for_each_entry(sops, &pallocator->slist,
> > list) {
> > + if (sops == ops) {
> > + list_del(&ops->list);
> > + if
> > (refcount_dec_and_test(&pallocator->users))
> > + pr_err("no shared
> > ops\n");
>
> That's not possible, right, since dec_and_test only returns true if
> pallocator->users was 1, which we already checked against? I find
> pallocator->users a bit redundant since you can use list_is_empty() or
> list_is_singular() on pallocator->slist
>
you are right, no need for the refcount, just check the status of the
shared op list.
> [...]
> > ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
> > ioasid_t max, void *private)
> > {
> > - ioasid_t id;
> > struct ioasid_data *data;
> > + void *adata;
> > + ioasid_t id;
> >
> > data = kzalloc(sizeof(*data), GFP_KERNEL);
> > if (!data)
> > @@ -76,14 +324,34 @@ ioasid_t ioasid_alloc(struct ioasid_set *set,
> > ioasid_t min, ioasid_t max, data->set = set;
> > data->private = private;
> >
> > - if (xa_alloc(&ioasid_xa, &id, data, XA_LIMIT(min, max),
> > GFP_KERNEL)) {
> > - pr_err("Failed to alloc ioasid from %d to %d\n",
> > min, max);
> > + /*
> > + * Custom allocator needs allocator data to perform
> > platform specific
> > + * operations.
> > + */
> > + spin_lock(&ioasid_allocator_lock);
> > + adata = active_allocator->flags &
> > IOASID_ALLOCATOR_CUSTOM ? active_allocator->ops->pdata : data;
> > + id = active_allocator->ops->alloc(min, max, adata);
> > + if (id == INVALID_IOASID) {
> > + pr_err("Failed ASID allocation %lu\n",
> > active_allocator->flags); goto exit_free;
> > }
> > +
> > + if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
> > + /* Custom allocator needs framework to store and
> > track allocation results */
> > + min = max = id;
> > +
> > + if (xa_alloc(&active_allocator->xa, &id, data,
> > XA_LIMIT(min, max), GFP_KERNEL)) {
>
> Or just XA_LIMIT(id, id), and merge the two ifs?
>
much better, thanks for the suggestions.
> You do need GFP_ATOMIC here.
>
right, will change.

> > + pr_err("Failed to alloc ioasid from %d to
> > %d\n", min, max);
>
> Maybe just "Failed to alloc ioasid %d\n" then
>
agreed. just failed on a specific id, not range.
> > + active_allocator->ops->free(id, NULL);
>
> Why doesn't this call need to pass active_allocator->ops->pdata like
> the one in ioasid_free()?
>
Good catch, this call also need pdata.
> Thanks,
> Jean