From: Ming Lin <[email protected]>
Hi list,
This moves the mempool based chained scatterlist alloc/free code from
scsi_lib.c to lib/scatterlist.c.
So other drivers(for example, the under development NVMe over fabric drivers)
can also use it.
Ming Lin (2):
scatterlist: add mempool based chained SG alloc/free api
scsi: use the new chained SG api
drivers/scsi/scsi_lib.c | 129 ++----------------------------------
include/linux/scatterlist.h | 12 ++++
lib/scatterlist.c | 156 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 175 insertions(+), 122 deletions(-)
--
1.9.1
From: Ming Lin <[email protected]>
This removes the old code and uses the new chained SG alloc/free api.
Signed-off-by: Ming Lin <[email protected]>
---
drivers/scsi/scsi_lib.c | 129 +++---------------------------------------------
1 file changed, 7 insertions(+), 122 deletions(-)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 8c6e318..97e283c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -39,40 +39,6 @@
#include "scsi_priv.h"
#include "scsi_logging.h"
-
-#define SG_MEMPOOL_NR ARRAY_SIZE(scsi_sg_pools)
-#define SG_MEMPOOL_SIZE 2
-
-struct scsi_host_sg_pool {
- size_t size;
- char *name;
- struct kmem_cache *slab;
- mempool_t *pool;
-};
-
-#define SP(x) { .size = x, "sgpool-" __stringify(x) }
-#if (SCSI_MAX_SG_SEGMENTS < 32)
-#error SCSI_MAX_SG_SEGMENTS is too small (must be 32 or greater)
-#endif
-static struct scsi_host_sg_pool scsi_sg_pools[] = {
- SP(8),
- SP(16),
-#if (SCSI_MAX_SG_SEGMENTS > 32)
- SP(32),
-#if (SCSI_MAX_SG_SEGMENTS > 64)
- SP(64),
-#if (SCSI_MAX_SG_SEGMENTS > 128)
- SP(128),
-#if (SCSI_MAX_SG_SEGMENTS > 256)
-#error SCSI_MAX_SG_SEGMENTS is too large (256 MAX)
-#endif
-#endif
-#endif
-#endif
- SP(SCSI_MAX_SG_SEGMENTS)
-};
-#undef SP
-
struct kmem_cache *scsi_sdb_cache;
/*
@@ -553,61 +519,23 @@ void scsi_run_host_queues(struct Scsi_Host *shost)
scsi_run_queue(sdev->request_queue);
}
-static inline unsigned int scsi_sgtable_index(unsigned short nents)
-{
- unsigned int index;
-
- BUG_ON(nents > SCSI_MAX_SG_SEGMENTS);
-
- if (nents <= 8)
- index = 0;
- else
- index = get_count_order(nents) - 3;
-
- return index;
-}
-
-static void scsi_sg_free(struct scatterlist *sgl, unsigned int nents)
-{
- struct scsi_host_sg_pool *sgp;
-
- sgp = scsi_sg_pools + scsi_sgtable_index(nents);
- mempool_free(sgl, sgp->pool);
-}
-
-static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask)
-{
- struct scsi_host_sg_pool *sgp;
-
- sgp = scsi_sg_pools + scsi_sgtable_index(nents);
- return mempool_alloc(sgp->pool, gfp_mask);
-}
-
static void scsi_free_sgtable(struct scsi_data_buffer *sdb, bool mq)
{
- if (mq && sdb->table.orig_nents <= SCSI_MAX_SG_SEGMENTS)
- return;
- __sg_free_table(&sdb->table, SCSI_MAX_SG_SEGMENTS, mq, scsi_sg_free);
+ sg_free_chained(&sdb->table, mq);
}
static int scsi_alloc_sgtable(struct scsi_data_buffer *sdb, int nents, bool mq)
{
- struct scatterlist *first_chunk = NULL;
+ struct scatterlist *first_chunk;
int ret;
- BUG_ON(!nents);
-
- if (mq) {
- if (nents <= SCSI_MAX_SG_SEGMENTS) {
- sdb->table.nents = sdb->table.orig_nents = nents;
- sg_init_table(sdb->table.sgl, nents);
- return 0;
- }
+ if (mq)
first_chunk = sdb->table.sgl;
- }
+ else
+ first_chunk = NULL;
+
+ ret = sg_alloc_chained(&sdb->table, nents, first_chunk, GFP_ATOMIC);
- ret = __sg_alloc_table(&sdb->table, nents, SCSI_MAX_SG_SEGMENTS,
- first_chunk, GFP_ATOMIC, scsi_sg_alloc);
if (unlikely(ret))
scsi_free_sgtable(sdb, mq);
return ret;
@@ -2264,8 +2192,6 @@ EXPORT_SYMBOL(scsi_unblock_requests);
int __init scsi_init_queue(void)
{
- int i;
-
scsi_sdb_cache = kmem_cache_create("scsi_data_buffer",
sizeof(struct scsi_data_buffer),
0, 0, NULL);
@@ -2274,53 +2200,12 @@ int __init scsi_init_queue(void)
return -ENOMEM;
}
- for (i = 0; i < SG_MEMPOOL_NR; i++) {
- struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
- int size = sgp->size * sizeof(struct scatterlist);
-
- sgp->slab = kmem_cache_create(sgp->name, size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
- if (!sgp->slab) {
- printk(KERN_ERR "SCSI: can't init sg slab %s\n",
- sgp->name);
- goto cleanup_sdb;
- }
-
- sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
- sgp->slab);
- if (!sgp->pool) {
- printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
- sgp->name);
- goto cleanup_sdb;
- }
- }
-
return 0;
-
-cleanup_sdb:
- for (i = 0; i < SG_MEMPOOL_NR; i++) {
- struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
- if (sgp->pool)
- mempool_destroy(sgp->pool);
- if (sgp->slab)
- kmem_cache_destroy(sgp->slab);
- }
- kmem_cache_destroy(scsi_sdb_cache);
-
- return -ENOMEM;
}
void scsi_exit_queue(void)
{
- int i;
-
kmem_cache_destroy(scsi_sdb_cache);
-
- for (i = 0; i < SG_MEMPOOL_NR; i++) {
- struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
- mempool_destroy(sgp->pool);
- kmem_cache_destroy(sgp->slab);
- }
}
/**
--
1.9.1
From: Ming Lin <[email protected]>
This copied code from scsi_lib.c to scatterlist.c and
modified it a bit.
Signed-off-by: Ming Lin <[email protected]>
---
include/linux/scatterlist.h | 12 ++++
lib/scatterlist.c | 156 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 168 insertions(+)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 556ec1e..888f2c3 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -266,6 +266,10 @@ int sg_alloc_table_from_pages(struct sg_table *sgt,
unsigned long offset, unsigned long size,
gfp_t gfp_mask);
+void sg_free_chained(struct sg_table *table, bool first_chunk);
+int sg_alloc_chained(struct sg_table *table, int nents,
+ struct scatterlist *first_chunk, gfp_t gfp);
+
size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
size_t buflen, off_t skip, bool to_buffer);
@@ -286,6 +290,14 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
#define SG_MAX_SINGLE_ALLOC (PAGE_SIZE / sizeof(struct scatterlist))
/*
+ * The maximum number of SG segments that we will put inside a
+ * scatterlist.
+ *
+ * XXX: what's the best number?
+ */
+#define SG_MAX_SEGMENTS 128
+
+/*
* sg page iterator
*
* Iterates over sg entries page-by-page. On each successful iteration,
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 004fc70..f97831e 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -11,6 +11,7 @@
#include <linux/scatterlist.h>
#include <linux/highmem.h>
#include <linux/kmemleak.h>
+#include <linux/mempool.h>
/**
* sg_next - return the next scatterlist entry in a list
@@ -755,3 +756,158 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents,
return sg_copy_buffer(sgl, nents, buf, buflen, skip, true);
}
EXPORT_SYMBOL(sg_pcopy_to_buffer);
+
+#define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools)
+#define SG_MEMPOOL_SIZE 2
+
+struct sg_mempool {
+ size_t size;
+ char *name;
+ struct kmem_cache *slab;
+ mempool_t *pool;
+};
+
+#define SP(x) { .size = x, "sgpool-" __stringify(x) }
+#if (SG_MAX_SEGMENTS < 32)
+#error SG_MAX_SEGMENTS is too small (must be 32 or greater)
+#endif
+static struct sg_mempool sg_pools[] = {
+ SP(8),
+ SP(16),
+#if (SG_MAX_SEGMENTS > 32)
+ SP(32),
+#if (SG_MAX_SEGMENTS > 64)
+ SP(64),
+#if (SG_MAX_SEGMENTS > 128)
+ SP(128),
+#if (SG_MAX_SEGMENTS > 256)
+#error SG_MAX_SEGMENTS is too large (256 MAX)
+#endif
+#endif
+#endif
+#endif
+ SP(SG_MAX_SEGMENTS)
+};
+#undef SP
+
+static inline unsigned int sg_pool_index(unsigned short nents)
+{
+ unsigned int index;
+
+ BUG_ON(nents > SG_MAX_SEGMENTS);
+
+ if (nents <= 8)
+ index = 0;
+ else
+ index = get_count_order(nents) - 3;
+
+ return index;
+}
+
+static void sg_mempoll_free(struct scatterlist *sgl, unsigned int nents)
+{
+ struct sg_mempool *sgp;
+
+ sgp = sg_pools + sg_pool_index(nents);
+ mempool_free(sgl, sgp->pool);
+}
+
+static struct scatterlist *sg_mempool_alloc(unsigned int nents, gfp_t gfp)
+{
+ struct sg_mempool *sgp;
+
+ sgp = sg_pools + sg_pool_index(nents);
+ return mempool_alloc(sgp->pool, gfp);
+}
+
+/**
+ * sg_free_chained - Free a previously mapped sg table
+ * @table: The sg table header to use
+ * @first_chunk: was first_chunk not NULL in sg_alloc_chained?
+ *
+ * Description:
+ * Free an sg table previously allocated and setup with
+ * sg_alloc_chained().
+ *
+ **/
+void sg_free_chained(struct sg_table *table, bool first_chunk)
+{
+ if (first_chunk && table->orig_nents <= SG_MAX_SEGMENTS)
+ return;
+ __sg_free_table(table, SG_MAX_SEGMENTS, 1, sg_mempoll_free);
+}
+EXPORT_SYMBOL_GPL(sg_free_chained);
+
+/**
+ * sg_alloc_chained - Allocate and chain SGLs in an sg table
+ * @table: The sg table header to use
+ * @nents: Number of entries in sg list
+ * @first_chunk: first SGL
+ * @gfp: GFP allocation mask
+ *
+ * Description:
+ * Allocate and chain SGLs in an sg table. If @nents@ is larger than
+ * SG_MAX_SEGMENTS a chained sg table will be setup.
+ *
+ **/
+int sg_alloc_chained(struct sg_table *table, int nents,
+ struct scatterlist *first_chunk, gfp_t gfp)
+{
+ int ret;
+
+ BUG_ON(!nents);
+
+ if (first_chunk && nents <= SG_MAX_SEGMENTS) {
+ table->nents = table->orig_nents = nents;
+ sg_init_table(first_chunk, nents);
+ return 0;
+ }
+
+ ret = __sg_alloc_table(table, nents, SG_MAX_SEGMENTS,
+ first_chunk, gfp, sg_mempool_alloc);
+ if (unlikely(ret))
+ sg_free_chained(table, (bool)first_chunk);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sg_alloc_chained);
+
+static __init int sg_mempool_init(void)
+{
+ int i;
+
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_mempool *sgp = sg_pools + i;
+ int size = sgp->size * sizeof(struct scatterlist);
+
+ sgp->slab = kmem_cache_create(sgp->name, size, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!sgp->slab) {
+ printk(KERN_ERR "NVME: can't init sg slab %s\n",
+ sgp->name);
+ goto cleanup_sgp;
+ }
+
+ sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
+ sgp->slab);
+ if (!sgp->pool) {
+ printk(KERN_ERR "NVME can't init sg mempool %s\n",
+ sgp->name);
+ goto cleanup_sgp;
+ }
+ }
+
+ return 0;
+
+cleanup_sgp:
+ for (i = 0; i < SG_MEMPOOL_NR; i++) {
+ struct sg_mempool *sgp = sg_pools + i;
+ if (sgp->pool)
+ mempool_destroy(sgp->pool);
+ if (sgp->slab)
+ kmem_cache_destroy(sgp->slab);
+ }
+
+ return -ENOMEM;
+}
+subsys_initcall(sg_mempool_init);
--
1.9.1
On Tue, 2016-03-15 at 15:39 -0700, Ming Lin wrote:
> From: Ming Lin <[email protected]>
>
> Hi list,
>
> This moves the mempool based chained scatterlist alloc/free code from
> scsi_lib.c to lib/scatterlist.c.
>
> So other drivers(for example, the under development NVMe over fabric
> drivers) can also use it.
>
> Ming Lin (2):
> scatterlist: add mempool based chained SG alloc/free api
> scsi: use the new chained SG api
>
> drivers/scsi/scsi_lib.c | 129 ++----------------------------------
> include/linux/scatterlist.h | 12 ++++
> lib/scatterlist.c | 156 ++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 175 insertions(+), 122 deletions(-)
I'd really rather this were a single patch so git can tell us the code
motion. If you add in one patch and remove in another the code motion
trackers don't see it.
Secondly, you said "This copied code from scsi_lib.c to scatterlist.c
and modified it a bit" could you move in one patch and modify in
another, so we can see exactly what you're changing.
Thirdly, are you sure the pool structure for NVMe should be the same as
for SCSI? We don't do buddy pools for 1,2 or 4 entry transactions in
SCSI just basically because of heuristics, but the packetised io
characteristics of NVMe make single entry lists more likely for it,
don't they?
James
On Tue, 2016-03-15 at 16:12 -0700, James Bottomley wrote:
> On Tue, 2016-03-15 at 15:39 -0700, Ming Lin wrote:
> > From: Ming Lin <[email protected]>
> >
> > Hi list,
> >
> > This moves the mempool based chained scatterlist alloc/free code
> > from
> > scsi_lib.c to lib/scatterlist.c.
> >
> > So other drivers(for example, the under development NVMe over
> > fabric
> > drivers) can also use it.
> >
> > Ming Lin (2):
> > scatterlist: add mempool based chained SG alloc/free api
> > scsi: use the new chained SG api
> >
> > drivers/scsi/scsi_lib.c | 129 ++----------------------------
> > ------
> > include/linux/scatterlist.h | 12 ++++
> > lib/scatterlist.c | 156
> > ++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 175 insertions(+), 122 deletions(-)
>
> I'd really rather this were a single patch so git can tell us the
> code
> motion. If you add in one patch and remove in another the code
> motion
> trackers don't see it.
>
> Secondly, you said "This copied code from scsi_lib.c to scatterlist.c
> and modified it a bit" could you move in one patch and modify in
> another, so we can see exactly what you're changing.
The modification is mostly about structure names and function names
changes.
I can do it in a single patch.
>
> Thirdly, are you sure the pool structure for NVMe should be the same
> as
> for SCSI? We don't do buddy pools for 1,2 or 4 entry transactions in
> SCSI just basically because of heuristics, but the packetised io
> characteristics of NVMe make single entry lists more likely for it,
> don't they?
Not sure about this, but the nvme-pci driver may not use this api,
because it also has a PRP lists except for the SG lists.
But nvme-over-rdma/nvme-over-fiber-channel driver is good to use this
api.
>
> James
>
>
> /*
> + * The maximum number of SG segments that we will put inside a
> + * scatterlist.
> + *
> + * XXX: what's the best number?
> + */
> +#define SG_MAX_SEGMENTS 128
The important part here is that it's the amount we fit into a single
scatterlist chunk. So I think naming it SG_CHUNK_SIZE or similar
would be a better idea (the name in SCSI is from the days before
we supported chained S/G lists).
It would also be good to ⅺring over the comments from
SCSI_MAX_SG_SEGMENTS.
We'll also need a symbol like SCSI_MAX_SG_CHAIN_SEGMENTS that contains
the absolute limit, and we need the CONFIG_ARCH_HAS_SG_CHAIN magic
around it for now as we still have architetures that do not support
S/G chanining in their dma_map_sg implementation. I plan to fix that
up in a merge window or two, though. My name suggestion for that
would be SG_MAX_SEGMENTS.
> +#define SG_MEMPOOL_NR ARRAY_SIZE(sg_pools)
We can defintively kill this one.
> +#define SG_MEMPOOL_SIZE 2
> +
> +struct sg_mempool {
I'd keep this as struct sg_pool, similar to SCSI.
> +/**
> + * sg_free_chained - Free a previously mapped sg table
> + * @table: The sg table header to use
> + * @first_chunk: was first_chunk not NULL in sg_alloc_chained?
> + *
> + * Description:
> + * Free an sg table previously allocated and setup with
> + * sg_alloc_chained().
> + *
> + **/
> +void sg_free_chained(struct sg_table *table, bool first_chunk)
Can we call this sg_free_table_chained to be similar to sg_table_free?
Same for the alloc side.
> +static __init int sg_mempool_init(void)
> +{
> + int i;
> +
> + for (i = 0; i < SG_MEMPOOL_NR; i++) {
> + struct sg_mempool *sgp = sg_pools + i;
> + int size = sgp->size * sizeof(struct scatterlist);
> +
> + sgp->slab = kmem_cache_create(sgp->name, size, 0,
> + SLAB_HWCACHE_ALIGN, NULL);
Having these mempoools around in every kernel will make some embedded
developers rather unhappy. We could either not create them at
runtime, which would require either a check in the fast path, or
an init call in every driver, or just move the functions you
added into a separe file, which will be compiled only based on a Kconfig
symbol, and could even be potentially modular. I think that
second option might be easier.
On Tue, Mar 15, 2016 at 04:12:03PM -0700, James Bottomley wrote:
> motion. If you add in one patch and remove in another the code motion
> trackers don't see it.
Agreed, having the move in a single patch would be nice.
> Thirdly, are you sure the pool structure for NVMe should be the same as
> for SCSI? We don't do buddy pools for 1,2 or 4 entry transactions in
> SCSI just basically because of heuristics, but the packetised io
> characteristics of NVMe make single entry lists more likely for it,
> don't they?
Not really. NVMe doesn't really do packetized I/O. And while people
were setting all kinds of nomerge flags early on we're getting rid
of them and are seeing similar I/O patterns to fast SCSI devices
now.
NVMe over PCIe still uses the crazy PRPs by default, which aren't
very suitable for this allocator (someone will have to come up
with a good mempool for it eventually, though), but we're developing
a set of new drivers transporting NVMe command which use SGLs very
similar to most SCSI controllers, so using the same SGL allocator
is a very natural choice.
On Wed, 2016-03-16 at 09:23 +0100, Christoph Hellwig wrote:
> >
> We can defintively kill this one.
We want to support different size of pools.
How can we kill this one?
Or did you mean we just create a single pool with size SG_CHUNK_SIZE?
>
> > +static __init int sg_mempool_init(void)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < SG_MEMPOOL_NR; i++) {
> > + struct sg_mempool *sgp = sg_pools + i;
> > + int size = sgp->size * sizeof(struct scatterlist);
> > +
> > + sgp->slab = kmem_cache_create(sgp->name, size, 0,
> > + SLAB_HWCACHE_ALIGN, NULL);
>
> Having these mempoools around in every kernel will make some embedded
> developers rather unhappy. We could either not create them at
> runtime, which would require either a check in the fast path, or
> an init call in every driver, or just move the functions you
> added into a separe file, which will be compiled only based on a
> Kconfig
> symbol, and could even be potentially modular. I think that
> second option might be easier.
I created lib/sg_pool.c with CONFIG_SG_POOL.
On Sun, Mar 20, 2016 at 11:55:17PM -0700, Ming Lin wrote:
> On Wed, 2016-03-16 at 09:23 +0100, Christoph Hellwig wrote:
> > >?
> > We can defintively kill this one.
>
> We want to support different size of pools.
> How can we kill this one?
>
> Or did you mean we just create a single pool with size SG_CHUNK_SIZE?
I mean just killing the SG_MEMPOOL_NR define and using the ARRAY_SIZE
directly.
> I created lib/sg_pool.c with CONFIG_SG_POOL.
Great!
On 03/15/16 15:39, Ming Lin wrote:
> +static void sg_mempoll_free(struct scatterlist *sgl, unsigned int nents)
Please change mempoll into mempool.
Thanks,
Bart.
On Thu, Apr 7, 2016 at 7:56 AM, Bart Van Assche
<[email protected]> wrote:
> On 03/15/16 15:39, Ming Lin wrote:
>>
>> +static void sg_mempoll_free(struct scatterlist *sgl, unsigned int nents)
>
>
> Please change mempoll into mempool.
Good catch. Thanks Bart!
On Thu, Apr 7, 2016 at 9:43 AM, Ming Lin <[email protected]> wrote:
> On Thu, Apr 7, 2016 at 7:56 AM, Bart Van Assche
> <[email protected]> wrote:
>> On 03/15/16 15:39, Ming Lin wrote:
>>>
>>> +static void sg_mempoll_free(struct scatterlist *sgl, unsigned int nents)
>>
>>
>> Please change mempoll into mempool.
>
> Good catch. Thanks Bart!
Hi Bart,
This is actually the previous old RFC patch.
The v2 and v3 patch series doesn't have this typo.
Thanks.