From: Hannes Reinecke <[email protected]>
When triggering a rescan due to a namespace resize we will be
receiving AENs on every controller, triggering a rescan of all
attached namespaces. If multipath is active only the current path and
the ns_head disk will be updated, the other paths will still refer to
the old size until AENs for the remaining controllers are received.
If I/O comes in before that it might be routed to one of the old
paths, triggering an I/O failure with 'access beyond end of device'.
With this patch the old paths are skipped from multipath path
selection until the controller serving these paths has been rescanned.
Signed-off-by: Hannes Reinecke <[email protected]>
[dwagner: - introduce NVME_NS_READY flag instead of NVME_NS_INVALIDATE
- use 'revalidate' instead of 'invalidate' which
follows the zoned device code path.]
Tested-by: Daniel Wagner <[email protected]>
Signed-off-by: Daniel Wagner <[email protected]>
---
v3:
- Renamed nvme_mpath_invalidated_paths to nvme_mpath_revalidate_paths()
- Replaced NVME_NS_INVALIDATE with NVME_NS_READY
v2:
- https://lore.kernel.org/linux-nvme/[email protected]/
- removed churn from failed rebase.
v1:
- https://lore.kernel.org/linux-nvme/[email protected]/
drivers/nvme/host/core.c | 3 +++
drivers/nvme/host/multipath.c | 17 ++++++++++++++++-
drivers/nvme/host/nvme.h | 5 +++++
3 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2f0cbaba12ac..54aafde4f556 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1878,6 +1878,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
goto out_unfreeze;
}
+ set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
if (blk_queue_is_zoned(ns->queue)) {
@@ -1889,6 +1890,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
if (nvme_ns_head_multipath(ns->head)) {
blk_mq_freeze_queue(ns->head->disk->queue);
nvme_update_disk_info(ns->head->disk, ns, id);
+ nvme_mpath_revalidate_paths(ns);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
blk_queue_update_readahead(ns->head->disk->queue);
@@ -3816,6 +3818,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
return;
+ clear_bit(NVME_NS_READY, &ns->flags);
set_capacity(ns->disk, 0);
nvme_fault_inject_fini(&ns->fault_inject);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3f32c5e86bfc..d390f14b8bb6 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -147,6 +147,21 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
mutex_unlock(&ctrl->scan_lock);
}
+void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+{
+ struct nvme_ns_head *head = ns->head;
+ sector_t capacity = get_capacity(head->disk);
+ int node;
+
+ for_each_node(node)
+ rcu_assign_pointer(head->current_path[node], NULL);
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (capacity != get_capacity(ns->disk))
+ clear_bit(NVME_NS_READY, &ns->flags);
+ }
+}
+
static bool nvme_path_is_disabled(struct nvme_ns *ns)
{
/*
@@ -158,7 +173,7 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
ns->ctrl->state != NVME_CTRL_DELETING)
return true;
if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
- test_bit(NVME_NS_REMOVING, &ns->flags))
+ !test_bit(NVME_NS_READY, &ns->flags))
return true;
return false;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 26511794629b..6c67dac79168 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -468,6 +468,7 @@ struct nvme_ns {
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
#define NVME_NS_FORCE_RO 3
+#define NVME_NS_READY 4
struct cdev cdev;
struct device cdev_device;
@@ -760,6 +761,7 @@ void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_revalidate_paths(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
@@ -807,6 +809,9 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
return false;
}
+static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+{
+}
static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
}
--
2.29.2
On Wed, Aug 11, 2021 at 05:28:03PM +0200, Daniel Wagner wrote:
> From: Hannes Reinecke <[email protected]>
>
> When triggering a rescan due to a namespace resize we will be
> receiving AENs on every controller, triggering a rescan of all
> attached namespaces. If multipath is active only the current path and
> the ns_head disk will be updated, the other paths will still refer to
> the old size until AENs for the remaining controllers are received.
>
> If I/O comes in before that it might be routed to one of the old
> paths, triggering an I/O failure with 'access beyond end of device'.
> With this patch the old paths are skipped from multipath path
> selection until the controller serving these paths has been rescanned.
ping
On 8/11/21 8:28 AM, Daniel Wagner wrote:
> From: Hannes Reinecke <[email protected]>
>
> When triggering a rescan due to a namespace resize we will be
> receiving AENs on every controller, triggering a rescan of all
> attached namespaces. If multipath is active only the current path and
> the ns_head disk will be updated, the other paths will still refer to
> the old size until AENs for the remaining controllers are received.
>
> If I/O comes in before that it might be routed to one of the old
> paths, triggering an I/O failure with 'access beyond end of device'.
> With this patch the old paths are skipped from multipath path
> selection until the controller serving these paths has been rescanned.
>
> Signed-off-by: Hannes Reinecke <[email protected]>
> [dwagner: - introduce NVME_NS_READY flag instead of NVME_NS_INVALIDATE
> - use 'revalidate' instead of 'invalidate' which
> follows the zoned device code path.]
> Tested-by: Daniel Wagner <[email protected]>
> Signed-off-by: Daniel Wagner <[email protected]>
> ---
> v3:
> - Renamed nvme_mpath_invalidated_paths to nvme_mpath_revalidate_paths()
> - Replaced NVME_NS_INVALIDATE with NVME_NS_READY
> v2:
> - https://lore.kernel.org/linux-nvme/[email protected]/
> - removed churn from failed rebase.
> v1:
> - https://lore.kernel.org/linux-nvme/[email protected]/
>
> drivers/nvme/host/core.c | 3 +++
> drivers/nvme/host/multipath.c | 17 ++++++++++++++++-
> drivers/nvme/host/nvme.h | 5 +++++
> 3 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 2f0cbaba12ac..54aafde4f556 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1878,6 +1878,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
> goto out_unfreeze;
> }
>
> + set_bit(NVME_NS_READY, &ns->flags);
> blk_mq_unfreeze_queue(ns->disk->queue);
>
> if (blk_queue_is_zoned(ns->queue)) {
> @@ -1889,6 +1890,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id)
> if (nvme_ns_head_multipath(ns->head)) {
> blk_mq_freeze_queue(ns->head->disk->queue);
> nvme_update_disk_info(ns->head->disk, ns, id);
> + nvme_mpath_revalidate_paths(ns);
> blk_stack_limits(&ns->head->disk->queue->limits,
> &ns->queue->limits, 0);
> blk_queue_update_readahead(ns->head->disk->queue);
> @@ -3816,6 +3818,7 @@ static void nvme_ns_remove(struct nvme_ns *ns)
> if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
> return;
>
> + clear_bit(NVME_NS_READY, &ns->flags);
> set_capacity(ns->disk, 0);
> nvme_fault_inject_fini(&ns->fault_inject);
>
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 3f32c5e86bfc..d390f14b8bb6 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -147,6 +147,21 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
> mutex_unlock(&ctrl->scan_lock);
> }
>
> +void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
> +{
> + struct nvme_ns_head *head = ns->head;
> + sector_t capacity = get_capacity(head->disk);
> + int node;
> +
> + for_each_node(node)
> + rcu_assign_pointer(head->current_path[node], NULL);
> +
> + list_for_each_entry_rcu(ns, &head->list, siblings) {
> + if (capacity != get_capacity(ns->disk))
> + clear_bit(NVME_NS_READY, &ns->flags);
> + }
Shouldn't the null setting to current_path come after
we clear NVME_NS_READY on the ns? Otherwise we may still
submit and current_path will be populated with the ns
again...
On Mon, Aug 23, 2021 at 10:16:23AM -0700, Sagi Grimberg wrote:
> > +void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
> > +{
> > + struct nvme_ns_head *head = ns->head;
> > + sector_t capacity = get_capacity(head->disk);
> > + int node;
> > +
> > + for_each_node(node)
> > + rcu_assign_pointer(head->current_path[node], NULL);
> > +
> > + list_for_each_entry_rcu(ns, &head->list, siblings) {
> > + if (capacity != get_capacity(ns->disk))
> > + clear_bit(NVME_NS_READY, &ns->flags);
> > + }
>
> Shouldn't the null setting to current_path come after
> we clear NVME_NS_READY on the ns? Otherwise we may still
> submit and current_path will be populated with the ns
> again...
Ahh, I got it this time! Yes, you are right. I think Christoph has
dropped this patch from the nvme-5.15 queue anyway. I'll resend a new
version with the order changed.
Daniel
On Tue, Aug 24, 2021 at 03:44:58PM +0200, Daniel Wagner wrote:
> On Mon, Aug 23, 2021 at 10:16:23AM -0700, Sagi Grimberg wrote:
> > > +void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
> > > +{
> > > + struct nvme_ns_head *head = ns->head;
> > > + sector_t capacity = get_capacity(head->disk);
> > > + int node;
> > > +
> > > + for_each_node(node)
> > > + rcu_assign_pointer(head->current_path[node], NULL);
> > > +
> > > + list_for_each_entry_rcu(ns, &head->list, siblings) {
> > > + if (capacity != get_capacity(ns->disk))
> > > + clear_bit(NVME_NS_READY, &ns->flags);
> > > + }
> >
> > Shouldn't the null setting to current_path come after
> > we clear NVME_NS_READY on the ns? Otherwise we may still
> > submit and current_path will be populated with the ns
> > again...
>
> Ahh, I got it this time! Yes, you are right. I think Christoph has
> dropped this patch from the nvme-5.15 queue anyway. I'll resend a new
> version with the order changed.
I am blind. The patch is queued up in nvme-5.15.
@Christoph, do you want me to send fix on top of this patch or do you
want an updated version of this patch?
On Tue, Aug 24, 2021 at 03:48:30PM +0200, Daniel Wagner wrote:
> I am blind. The patch is queued up in nvme-5.15.
>
> @Christoph, do you want me to send fix on top of this patch or do you
> want an updated version of this patch?
Either way is fine.