pblk guarantees write ordering at a chunk level through a per open chunk
semaphore. At this point, since we only have an open I/O stream for both
user and GC data, the semaphore is per parallel unit.
Since metadata I/O is synchronous, the semaphore is not needed as
ordering is guaranteed. However, if the metadata scheme changes or
multiple streams are open, this guarantee might not be preserved.
This patch makes sure that all writes go through the semaphore, even for
synchronous I/O. This is consistent with pblk's write I/O model. It also
simplifies maintenance since changes in the metdatada scheme could cause
ordering issues.
Signed-off-by: Javier González <[email protected]>
---
drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
drivers/lightnvm/pblk.h | 1 +
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 00984b486fea..160b54d26bfa 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
return nvm_submit_io_sync(dev, rqd);
}
+int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ if (rqd->opcode != NVM_OP_PWRITE)
+ pblk_submit_io_sync(pblk, rqd);
+
+ pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+ pblk_submit_io_sync(pblk, rqd);
+ pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+}
+
static void pblk_bio_map_addr_endio(struct bio *bio)
{
bio_put(bio);
@@ -737,7 +747,7 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
}
}
- ret = pblk_submit_io_sync(pblk, &rqd);
+ ret = pblk_submit_io_sync_sem(pblk, &rqd);
if (ret) {
pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
bio_put(bio);
@@ -842,7 +852,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
* the write thread is the only one sending write and erase commands,
* there is no need to take the LUN semaphore.
*/
- ret = pblk_submit_io_sync(pblk, &rqd);
+ ret = pblk_submit_io_sync_sem(pblk, &rqd);
if (ret) {
pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
bio_put(bio);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 4760af7b6499..6ccc6ad8e1ce 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -782,6 +782,7 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
--
2.7.4
On 08/03/2018 02:05 PM, Javier González wrote:
> pblk guarantees write ordering at a chunk level through a per open chunk
> semaphore. At this point, since we only have an open I/O stream for both
> user and GC data, the semaphore is per parallel unit.
>
> Since metadata I/O is synchronous, the semaphore is not needed as
> ordering is guaranteed. However, if the metadata scheme changes or
> multiple streams are open, this guarantee might not be preserved.
>
> This patch makes sure that all writes go through the semaphore, even for
> synchronous I/O. This is consistent with pblk's write I/O model. It also
> simplifies maintenance since changes in the metdatada scheme could cause
> ordering issues.
>
> Signed-off-by: Javier González <[email protected]>
> ---
> drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
> drivers/lightnvm/pblk.h | 1 +
> 2 files changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 00984b486fea..160b54d26bfa 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
> return nvm_submit_io_sync(dev, rqd);
> }
>
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
> +{
> + if (rqd->opcode != NVM_OP_PWRITE)
> + pblk_submit_io_sync(pblk, rqd);
> +
Why should the write be issued twice?
> + pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
> + pblk_submit_io_sync(pblk, rqd);
> + pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
> +}
> +
> static void pblk_bio_map_addr_endio(struct bio *bio)
> {
> bio_put(bio);
> @@ -737,7 +747,7 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
> }
> }
>
> - ret = pblk_submit_io_sync(pblk, &rqd);
> + ret = pblk_submit_io_sync_sem(pblk, &rqd);
> if (ret) {
> pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
> bio_put(bio);
> @@ -842,7 +852,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
> * the write thread is the only one sending write and erase commands,
> * there is no need to take the LUN semaphore.
> */
> - ret = pblk_submit_io_sync(pblk, &rqd);
> + ret = pblk_submit_io_sync_sem(pblk, &rqd);
> if (ret) {
> pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
> bio_put(bio);
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index 4760af7b6499..6ccc6ad8e1ce 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -782,6 +782,7 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
> int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
> int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
> struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
> unsigned int nr_secs, unsigned int len,
>
> On 3 Aug 2018, at 14.45, Matias Bjørling <[email protected]> wrote:
>
> On 08/03/2018 02:05 PM, Javier González wrote:
>> pblk guarantees write ordering at a chunk level through a per open chunk
>> semaphore. At this point, since we only have an open I/O stream for both
>> user and GC data, the semaphore is per parallel unit.
>> Since metadata I/O is synchronous, the semaphore is not needed as
>> ordering is guaranteed. However, if the metadata scheme changes or
>> multiple streams are open, this guarantee might not be preserved.
>> This patch makes sure that all writes go through the semaphore, even for
>> synchronous I/O. This is consistent with pblk's write I/O model. It also
>> simplifies maintenance since changes in the metdatada scheme could cause
>> ordering issues.
>> Signed-off-by: Javier González <[email protected]>
>> ---
>> drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
>> drivers/lightnvm/pblk.h | 1 +
>> 2 files changed, 13 insertions(+), 2 deletions(-)
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 00984b486fea..160b54d26bfa 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
>> return nvm_submit_io_sync(dev, rqd);
>> }
>> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
>> +{
>> + if (rqd->opcode != NVM_OP_PWRITE)
>> + pblk_submit_io_sync(pblk, rqd);
>> +
>
> Why should the write be issued twice?
>
It is the read that is sent twice, that's why it does not fail. Rebased
the patch manually and messed up... should be return... I'll send a V2.
Javier
On Fri, Aug 3, 2018 at 2:05 PM, Javier González <[email protected]> wrote:
> pblk guarantees write ordering at a chunk level through a per open chunk
> semaphore. At this point, since we only have an open I/O stream for both
> user and GC data, the semaphore is per parallel unit.
>
> Since metadata I/O is synchronous, the semaphore is not needed as
> ordering is guaranteed. However, if the metadata scheme changes or
> multiple streams are open, this guarantee might not be preserved.
>
> This patch makes sure that all writes go through the semaphore, even for
> synchronous I/O. This is consistent with pblk's write I/O model. It also
> simplifies maintenance since changes in the metdatada scheme could cause
> ordering issues.
>
> Signed-off-by: Javier González <[email protected]>
> ---
> drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
> drivers/lightnvm/pblk.h | 1 +
> 2 files changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
> index 00984b486fea..160b54d26bfa 100644
> --- a/drivers/lightnvm/pblk-core.c
> +++ b/drivers/lightnvm/pblk-core.c
> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
> return nvm_submit_io_sync(dev, rqd);
> }
>
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
> +{
> + if (rqd->opcode != NVM_OP_PWRITE)
> + pblk_submit_io_sync(pblk, rqd);
> +
> + pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
This will only work if rqd->nr_ppas > 1, better check if rqd->nr_ppas
is 1 and pass &ppa->ppa_addr on to pblk_down_page when needed.
> + pblk_submit_io_sync(pblk, rqd);
> + pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
> +}
> +
> static void pblk_bio_map_addr_endio(struct bio *bio)
> {
> bio_put(bio);
> @@ -737,7 +747,7 @@ static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
> }
> }
>
> - ret = pblk_submit_io_sync(pblk, &rqd);
> + ret = pblk_submit_io_sync_sem(pblk, &rqd);
> if (ret) {
> pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
> bio_put(bio);
> @@ -842,7 +852,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
> * the write thread is the only one sending write and erase commands,
> * there is no need to take the LUN semaphore.
> */
> - ret = pblk_submit_io_sync(pblk, &rqd);
> + ret = pblk_submit_io_sync_sem(pblk, &rqd);
> if (ret) {
> pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
> bio_put(bio);
> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
> index 4760af7b6499..6ccc6ad8e1ce 100644
> --- a/drivers/lightnvm/pblk.h
> +++ b/drivers/lightnvm/pblk.h
> @@ -782,6 +782,7 @@ void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
> void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
> int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
> int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
> int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
> struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
> unsigned int nr_secs, unsigned int len,
> --
> 2.7.4
>
> On 10 Aug 2018, at 10.04, Hans Holmberg <[email protected]> wrote:
>
> On Fri, Aug 3, 2018 at 2:05 PM, Javier González <[email protected]> wrote:
>> pblk guarantees write ordering at a chunk level through a per open chunk
>> semaphore. At this point, since we only have an open I/O stream for both
>> user and GC data, the semaphore is per parallel unit.
>>
>> Since metadata I/O is synchronous, the semaphore is not needed as
>> ordering is guaranteed. However, if the metadata scheme changes or
>> multiple streams are open, this guarantee might not be preserved.
>>
>> This patch makes sure that all writes go through the semaphore, even for
>> synchronous I/O. This is consistent with pblk's write I/O model. It also
>> simplifies maintenance since changes in the metdatada scheme could cause
>> ordering issues.
>>
>> Signed-off-by: Javier González <[email protected]>
>> ---
>> drivers/lightnvm/pblk-core.c | 14 ++++++++++++--
>> drivers/lightnvm/pblk.h | 1 +
>> 2 files changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
>> index 00984b486fea..160b54d26bfa 100644
>> --- a/drivers/lightnvm/pblk-core.c
>> +++ b/drivers/lightnvm/pblk-core.c
>> @@ -493,6 +493,16 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
>> return nvm_submit_io_sync(dev, rqd);
>> }
>>
>> +int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
>> +{
>> + if (rqd->opcode != NVM_OP_PWRITE)
>> + pblk_submit_io_sync(pblk, rqd);
>> +
>> + pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
>
> This will only work if rqd->nr_ppas > 1, better check if rqd->nr_ppas
> is 1 and pass &ppa->ppa_addr on to pblk_down_page when needed.
For this particular case, we will always get > 1 ppas, but you're right,
it is more robust to do the check for future cases. I'll add that to V3.
Thanks!
Javier