rqd.error is masked by the return value of pblk_submit_io_sync.
The rqd structure is then passed on to the end_io function, which
assumes that any error should lead to a chunk being marked
offline/bad. Since the pblk_submit_io_sync can fail before the
command is issued to the device, the error value maybe not correspond
to a media failure, leading to chunks being immaturely retired.
Also, the pblk_blk_erase_sync function prints an error message in case
the erase fails. Since the caller prints an error message by itself,
remove the error message in this function.
Signed-off-by: Matias Bjørling <[email protected]>
---
drivers/lightnvm/pblk-core.c | 19 ++-----------------
1 file changed, 2 insertions(+), 17 deletions(-)
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 72acf2f6dbd6..814204d22a2e 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -886,10 +886,8 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
{
- struct nvm_rq rqd;
- int ret = 0;
-
- memset(&rqd, 0, sizeof(struct nvm_rq));
+ struct nvm_rq rqd = {0};
+ int ret;
pblk_setup_e_rq(pblk, &rqd, ppa);
@@ -897,19 +895,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
* with writes. Thus, there is no need to take the LUN semaphore.
*/
ret = pblk_submit_io_sync(pblk, &rqd);
- if (ret) {
- struct nvm_tgt_dev *dev = pblk->dev;
- struct nvm_geo *geo = &dev->geo;
-
- pblk_err(pblk, "could not sync erase line:%d,blk:%d\n",
- pblk_ppa_to_line(ppa),
- pblk_ppa_to_pos(geo, ppa));
-
- rqd.error = ret;
- goto out;
- }
-
-out:
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
--
2.11.0
> On 2 Aug 2018, at 22.50, Matias Bjørling <[email protected]> wrote:
>
> rqd.error is masked by the return value of pblk_submit_io_sync.
> The rqd structure is then passed on to the end_io function, which
> assumes that any error should lead to a chunk being marked
> offline/bad. Since the pblk_submit_io_sync can fail before the
> command is issued to the device, the error value maybe not correspond
> to a media failure, leading to chunks being immaturely retired.
>
> Also, the pblk_blk_erase_sync function prints an error message in case
> the erase fails. Since the caller prints an error message by itself,
> remove the error message in this function.
>
> Signed-off-by: Matias Bjørling <[email protected]>
> ---
> drivers/lightnvm/pblk-core.c | 19 ++-----------------
> 1 file changed, 2 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 72acf2f6dbd6..814204d22a2e 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -886,10 +886,8 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
>
> static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
> {
> - struct nvm_rq rqd;
> - int ret = 0;
> -
> - memset(&rqd, 0, sizeof(struct nvm_rq));
> + struct nvm_rq rqd = {0};
This is a matter of taste, but if you want to squeeze it in here, it is
fine by me. There are other places with the same pattern; if you feel
strongly about this then please send a patch changing it in all the
places.
> + int ret;
>
> pblk_setup_e_rq(pblk, &rqd, ppa);
>
> @@ -897,19 +895,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
> * with writes. Thus, there is no need to take the LUN semaphore.
> */
> ret = pblk_submit_io_sync(pblk, &rqd);
> - if (ret) {
> - struct nvm_tgt_dev *dev = pblk->dev;
> - struct nvm_geo *geo = &dev->geo;
> -
> - pblk_err(pblk, "could not sync erase line:%d,blk:%d\n",
> - pblk_ppa_to_line(ppa),
> - pblk_ppa_to_pos(geo, ppa));
> -
> - rqd.error = ret;
> - goto out;
> - }
> -
> -out:
> rqd.private = pblk;
> __pblk_end_io_erase(pblk, &rqd);
>
> --
> 2.11.0
Otherwise, it looks like a good cleanup. Thanks.
Reviewed-by: Javier González <[email protected]>
On Thu, Aug 2, 2018 at 10:50 PM, Matias Bjørling <[email protected]> wrote:
> rqd.error is masked by the return value of pblk_submit_io_sync.
> The rqd structure is then passed on to the end_io function, which
> assumes that any error should lead to a chunk being marked
> offline/bad. Since the pblk_submit_io_sync can fail before the
> command is issued to the device, the error value maybe not correspond
> to a media failure, leading to chunks being immaturely retired.
>
> Also, the pblk_blk_erase_sync function prints an error message in case
> the erase fails. Since the caller prints an error message by itself,
> remove the error message in this function.
>
> Signed-off-by: Matias Bjørling <[email protected]>
> ---
> drivers/lightnvm/pblk-core.c | 19 ++-----------------
> 1 file changed, 2 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 72acf2f6dbd6..814204d22a2e 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -886,10 +886,8 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
>
> static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
> {
> - struct nvm_rq rqd;
> - int ret = 0;
> -
> - memset(&rqd, 0, sizeof(struct nvm_rq));
> + struct nvm_rq rqd = {0};
> + int ret;
>
> pblk_setup_e_rq(pblk, &rqd, ppa);
>
> @@ -897,19 +895,6 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
> * with writes. Thus, there is no need to take the LUN semaphore.
> */
> ret = pblk_submit_io_sync(pblk, &rqd);
> - if (ret) {
> - struct nvm_tgt_dev *dev = pblk->dev;
> - struct nvm_geo *geo = &dev->geo;
> -
> - pblk_err(pblk, "could not sync erase line:%d,blk:%d\n",
> - pblk_ppa_to_line(ppa),
> - pblk_ppa_to_pos(geo, ppa));
> -
> - rqd.error = ret;
> - goto out;
> - }
> -
> -out:
> rqd.private = pblk;
> __pblk_end_io_erase(pblk, &rqd);
>
> --
> 2.11.0
>
Nice catch! I just added fixing this exact issue in my own backlog,
great that you've already fixed it.
Reviewed-by: Hans Holmberg <[email protected]>