2020-09-27 06:48:22

by Leon Romanovsky

[permalink] [raw]
Subject: [PATCH rdma-next v4 1/4] lib/scatterlist: Add support in dynamic allocation of SG table from pages

From: Maor Gottlieb <[email protected]>

Extend __sg_alloc_table_from_pages to support dynamic allocation of
SG table from pages. It should be used by drivers that can't supply
all the pages at one time.

This function returns the last populated SGE in the table. Users should
pass it as an argument to the function from the second call and forward.
As before, nents will be equal to the number of populated SGEs (chunks).

With this new extension, drivers can benefit the optimization of merging
contiguous pages without a need to allocate all pages in advance and
hold them in a large buffer.

E.g. with the Infiniband driver that allocates a single page for hold the
pages. For 1TB memory registration, the temporary buffer would consume only
4KB, instead of 2GB.

Signed-off-by: Maor Gottlieb <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Leon Romanovsky <[email protected]>
---
drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 12 +-
drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 15 +-
include/linux/scatterlist.h | 43 +++---
lib/scatterlist.c | 160 +++++++++++++++-----
lib/sg_pool.c | 3 +-
tools/testing/scatterlist/main.c | 9 +-
6 files changed, 165 insertions(+), 77 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12b30075134a..f2eaed6aca3d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
unsigned int max_segment = i915_sg_segment_size();
struct sg_table *st;
unsigned int sg_page_sizes;
+ struct scatterlist *sg;
int ret;

st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
return ERR_PTR(-ENOMEM);

alloc_table:
- ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
- 0, num_pages << PAGE_SHIFT,
- max_segment,
- GFP_KERNEL);
- if (ret) {
+ sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
+ num_pages << PAGE_SHIFT, max_segment,
+ NULL, 0, GFP_KERNEL);
+ if (IS_ERR(sg)) {
kfree(st);
- return ERR_PTR(ret);
+ return ERR_CAST(sg);
}

ret = i915_gem_gtt_prepare_pages(obj, st);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index ab524ab3b0b4..f22acd398b1f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -419,6 +419,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
int ret = 0;
static size_t sgl_size;
static size_t sgt_size;
+ struct scatterlist *sg;

if (vmw_tt->mapped)
return 0;
@@ -441,13 +442,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
if (unlikely(ret != 0))
return ret;

- ret = __sg_alloc_table_from_pages
- (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
- (unsigned long) vsgt->num_pages << PAGE_SHIFT,
- dma_get_max_seg_size(dev_priv->dev->dev),
- GFP_KERNEL);
- if (unlikely(ret != 0))
+ sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
+ vsgt->num_pages, 0,
+ (unsigned long) vsgt->num_pages << PAGE_SHIFT,
+ dma_get_max_seg_size(dev_priv->dev->dev),
+ NULL, 0, GFP_KERNEL);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
goto out_sg_alloc_fail;
+ }

if (vsgt->num_pages > vmw_tt->sgt.nents) {
uint64_t over_alloc =
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 45cf7b69d852..c24cc667b56b 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
#define for_each_sgtable_dma_sg(sgt, sg, i) \
for_each_sg((sgt)->sgl, sg, (sgt)->nents, i)

+static inline void __sg_chain(struct scatterlist *chain_sg,
+ struct scatterlist *sgl)
+{
+ /*
+ * offset and length are unused for chain entry. Clear them.
+ */
+ chain_sg->offset = 0;
+ chain_sg->length = 0;
+
+ /*
+ * Set lowest bit to indicate a link pointer, and make sure to clear
+ * the termination bit if it happens to be set.
+ */
+ chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END;
+}
+
/**
* sg_chain - Chain two sglists together
* @prv: First scatterlist
@@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
struct scatterlist *sgl)
{
- /*
- * offset and length are unused for chain entry. Clear them.
- */
- prv[prv_nents - 1].offset = 0;
- prv[prv_nents - 1].length = 0;
-
- /*
- * Set lowest bit to indicate a link pointer, and make sure to clear
- * the termination bit if it happens to be set.
- */
- prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN)
- & ~SG_END;
+ __sg_chain(&prv[prv_nents - 1], sgl);
}

/**
@@ -283,13 +288,15 @@ typedef void (sg_free_fn)(struct scatterlist *, unsigned int);
void __sg_free_table(struct sg_table *, unsigned int, unsigned int,
sg_free_fn *);
void sg_free_table(struct sg_table *);
-int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
- struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
+int __sg_alloc_table(struct sg_table *, struct scatterlist *, unsigned int,
+ unsigned int, struct scatterlist *, unsigned int,
+ gfp_t, sg_alloc_fn *);
int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- unsigned int n_pages, unsigned int offset,
- unsigned long size, unsigned int max_segment,
- gfp_t gfp_mask);
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+ struct page **pages, unsigned int n_pages, unsigned int offset,
+ unsigned long size, unsigned int max_segment,
+ struct scatterlist *prv, unsigned int left_pages,
+ gfp_t gfp_mask);
int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
unsigned int n_pages, unsigned int offset,
unsigned long size, gfp_t gfp_mask);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 5d63a8857f36..c200d3442f47 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -245,6 +245,7 @@ EXPORT_SYMBOL(sg_free_table);
/**
* __sg_alloc_table - Allocate and initialize an sg table with given allocator
* @table: The sg table header to use
+ * @prv: Last populated sge in sgt
* @nents: Number of entries in sg list
* @max_ents: The maximum number of entries the allocator returns per call
* @nents_first_chunk: Number of entries int the (preallocated) first
@@ -263,17 +264,15 @@ EXPORT_SYMBOL(sg_free_table);
* __sg_free_table() to cleanup any leftover allocations.
*
**/
-int __sg_alloc_table(struct sg_table *table, unsigned int nents,
- unsigned int max_ents, struct scatterlist *first_chunk,
- unsigned int nents_first_chunk, gfp_t gfp_mask,
- sg_alloc_fn *alloc_fn)
+int __sg_alloc_table(struct sg_table *table, struct scatterlist *prv,
+ unsigned int nents, unsigned int max_ents,
+ struct scatterlist *first_chunk,
+ unsigned int nents_first_chunk, gfp_t gfp_mask,
+ sg_alloc_fn *alloc_fn)
{
- struct scatterlist *sg, *prv;
- unsigned int left;
- unsigned curr_max_ents = nents_first_chunk ?: max_ents;
- unsigned prv_max_ents;
-
- memset(table, 0, sizeof(*table));
+ unsigned int curr_max_ents = nents_first_chunk ?: max_ents;
+ unsigned int left, prv_max_ents = 0;
+ struct scatterlist *sg;

if (nents == 0)
return -EINVAL;
@@ -283,7 +282,6 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
#endif

left = nents;
- prv = NULL;
do {
unsigned int sg_size, alloc_size = left;

@@ -308,7 +306,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
* linkage. Without this, sg_kfree() may get
* confused.
*/
- if (prv)
+ if (prv_max_ents)
table->nents = ++table->orig_nents;

return -ENOMEM;
@@ -321,10 +319,18 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents,
* If this is the first mapping, assign the sg table header.
* If this is not the first mapping, chain previous part.
*/
- if (prv)
- sg_chain(prv, prv_max_ents, sg);
- else
+ if (!prv)
table->sgl = sg;
+ else if (prv_max_ents)
+ sg_chain(prv, prv_max_ents, sg);
+ else {
+ __sg_chain(prv, sg);
+ /*
+ * We decrease one since the prvious last sge in used to
+ * chain the chunks together.
+ */
+ table->nents = table->orig_nents -= 1;
+ }

/*
* If no more entries after this one, mark the end
@@ -356,7 +362,8 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
{
int ret;

- ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
+ memset(table, 0, sizeof(*table));
+ ret = __sg_alloc_table(table, NULL, nents, SG_MAX_SINGLE_ALLOC,
NULL, 0, gfp_mask, sg_kmalloc);
if (unlikely(ret))
__sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree);
@@ -365,6 +372,30 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
}
EXPORT_SYMBOL(sg_alloc_table);

+static struct scatterlist *get_next_sg(struct sg_table *table,
+ struct scatterlist *prv, unsigned long left_npages,
+ gfp_t gfp_mask)
+{
+ struct scatterlist *next_sg;
+ int ret;
+
+ /* If table was just allocated */
+ if (!prv)
+ return table->sgl;
+
+ /* Check if last entry should be keeped for chainning */
+ next_sg = sg_next(prv);
+ if (!sg_is_last(next_sg) || left_npages == 1)
+ return next_sg;
+
+ ret = __sg_alloc_table(table, next_sg,
+ min_t(unsigned long, left_npages, SG_MAX_SINGLE_ALLOC),
+ SG_MAX_SINGLE_ALLOC, NULL, 0, gfp_mask, sg_kmalloc);
+ if (ret)
+ return ERR_PTR(ret);
+ return sg_next(prv);
+}
+
/**
* __sg_alloc_table_from_pages - Allocate and initialize an sg table from
* an array of pages
@@ -374,29 +405,49 @@ EXPORT_SYMBOL(sg_alloc_table);
* @offset: Offset from start of the first page to the start of a buffer
* @size: Number of valid bytes in the buffer (after offset)
* @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @prv: Last populated sge in sgt
+ * @left_pages: Left pages caller have to set after this call
* @gfp_mask: GFP allocation mask
*
- * Description:
- * Allocate and initialize an sg table from a list of pages. Contiguous
- * ranges of the pages are squashed into a single scatterlist node up to the
- * maximum size specified in @max_segment. An user may provide an offset at a
- * start and a size of valid data in a buffer specified by the page array.
- * The returned sg table is released by sg_free_table.
+ * Description:
+ * If @prv is NULL, allocate and initialize an sg table from a list of pages,
+ * else reuse the scatterlist passed in at @prv.
+ * Contiguous ranges of the pages are squashed into a single scatterlist
+ * entry up to the maximum size specified in @max_segment. A user may
+ * provide an offset at a start and a size of valid data in a buffer
+ * specified by the page array.
*
* Returns:
- * 0 on success, negative error on failure
+ * Last SGE in sgt on success, PTR_ERR on otherwise.
+ * The allocation in @sgt must be released by sg_free_table.
+ *
+ * Notes:
+ * If this function returns non-0 (eg failure), the caller must call
+ * sg_free_table() to cleanup any leftover allocations.
*/
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- unsigned int n_pages, unsigned int offset,
- unsigned long size, unsigned int max_segment,
- gfp_t gfp_mask)
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+ struct page **pages, unsigned int n_pages, unsigned int offset,
+ unsigned long size, unsigned int max_segment,
+ struct scatterlist *prv, unsigned int left_pages,
+ gfp_t gfp_mask)
{
- unsigned int chunks, cur_page, seg_len, i;
+ unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
+ struct scatterlist *s = prv;
+ unsigned int table_size;
+ unsigned int tmp_nents;
int ret;
- struct scatterlist *s;

if (WARN_ON(!max_segment || offset_in_page(max_segment)))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ tmp_nents = prv ? sgt->nents : 0;
+
+ if (prv &&
+ page_to_pfn(sg_page(prv)) + (prv->length >> PAGE_SHIFT) ==
+ page_to_pfn(pages[0]))
+ prv_len = prv->length;

/* compute number of contiguous chunks */
chunks = 1;
@@ -410,13 +461,17 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
}
}

- ret = sg_alloc_table(sgt, chunks, gfp_mask);
- if (unlikely(ret))
- return ret;
+ if (!prv) {
+ /* Only the last allocation could be less than the maximum */
+ table_size = left_pages ? SG_MAX_SINGLE_ALLOC : chunks;
+ ret = sg_alloc_table(sgt, table_size, gfp_mask);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+ }

/* merging chunks and putting them into the scatterlist */
cur_page = 0;
- for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+ for (i = 0; i < chunks; i++) {
unsigned int j, chunk_size;

/* look for the end of the current chunk */
@@ -425,19 +480,41 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
seg_len += PAGE_SIZE;
if (seg_len >= max_segment ||
page_to_pfn(pages[j]) !=
- page_to_pfn(pages[j - 1]) + 1)
+ page_to_pfn(pages[j - 1]) + 1)
break;
}

chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
- sg_set_page(s, pages[cur_page],
- min_t(unsigned long, size, chunk_size), offset);
+ chunk_size = min_t(unsigned long, size, chunk_size);
+ if (!i && prv_len) {
+ if (max_segment - prv->length >= chunk_size) {
+ sg_set_page(s, sg_page(s),
+ s->length + chunk_size, s->offset);
+ goto next;
+ }
+ }
+
+ /* Pass how many chunks might left */
+ s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+ if (IS_ERR(s)) {
+ /*
+ * Adjust entry length to be as before function was
+ * called.
+ */
+ if (prv_len)
+ prv->length = prv_len;
+ goto out;
+ }
+ sg_set_page(s, pages[cur_page], chunk_size, offset);
+ tmp_nents++;
+next:
size -= chunk_size;
offset = 0;
cur_page = j;
}
-
- return 0;
+ sgt->nents = tmp_nents;
+out:
+ return s;
}
EXPORT_SYMBOL(__sg_alloc_table_from_pages);

@@ -465,8 +542,9 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
unsigned int n_pages, unsigned int offset,
unsigned long size, gfp_t gfp_mask)
{
- return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size,
- SCATTERLIST_MAX_SEGMENT, gfp_mask);
+ return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
+ offset, size, SCATTERLIST_MAX_SEGMENT, NULL, 0,
+ gfp_mask));
}
EXPORT_SYMBOL(sg_alloc_table_from_pages);

diff --git a/lib/sg_pool.c b/lib/sg_pool.c
index db29e5c1f790..c449248bf5d5 100644
--- a/lib/sg_pool.c
+++ b/lib/sg_pool.c
@@ -129,7 +129,8 @@ int sg_alloc_table_chained(struct sg_table *table, int nents,
nents_first_chunk = 0;
}

- ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE,
+ memset(table, 0, sizeof(*table));
+ ret = __sg_alloc_table(table, NULL, nents, SG_CHUNK_SIZE,
first_chunk, nents_first_chunk,
GFP_ATOMIC, sg_pool_alloc);
if (unlikely(ret))
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 0a1464181226..4899359a31ac 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -55,14 +55,13 @@ int main(void)
for (i = 0, test = tests; test->expected_segments; test++, i++) {
struct page *pages[MAX_PAGES];
struct sg_table st;
- int ret;
+ struct scatterlist *sg;

set_pages(pages, test->pfn, test->num_pages);

- ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
- 0, test->size, test->max_seg,
- GFP_KERNEL);
- assert(ret == test->alloc_ret);
+ sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+ test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+ assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);

if (test->alloc_ret)
continue;
--
2.26.2


2020-10-02 15:07:27

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH rdma-next v4 1/4] lib/scatterlist: Add support in dynamic allocation of SG table from pages

On Sun, Sep 27, 2020 at 09:46:44AM +0300, Leon Romanovsky wrote:
> +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
> + struct page **pages, unsigned int n_pages, unsigned int offset,
> + unsigned long size, unsigned int max_segment,
> + struct scatterlist *prv, unsigned int left_pages,
> + gfp_t gfp_mask)
> {
> - unsigned int chunks, cur_page, seg_len, i;
> + unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
> + struct scatterlist *s = prv;
> + unsigned int table_size;
> + unsigned int tmp_nents;
> int ret;
> - struct scatterlist *s;
>
> if (WARN_ON(!max_segment || offset_in_page(max_segment)))
> - return -EINVAL;
> + return ERR_PTR(-EINVAL);
> + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
> + return ERR_PTR(-EOPNOTSUPP);
> +
> + tmp_nents = prv ? sgt->nents : 0;
> +
> + if (prv &&
> + page_to_pfn(sg_page(prv)) + (prv->length >> PAGE_SHIFT) ==

This calculation of the end doesn't consider sg->offset

> + page_to_pfn(pages[0]))
> + prv_len = prv->length;
>
> /* compute number of contiguous chunks */
> chunks = 1;
> @@ -410,13 +461,17 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
> }
> }
>
> - ret = sg_alloc_table(sgt, chunks, gfp_mask);
> - if (unlikely(ret))
> - return ret;
> + if (!prv) {
> + /* Only the last allocation could be less than the maximum */
> + table_size = left_pages ? SG_MAX_SINGLE_ALLOC : chunks;
> + ret = sg_alloc_table(sgt, table_size, gfp_mask);
> + if (unlikely(ret))
> + return ERR_PTR(ret);
> + }

This is basically redundant right? Now that get_next_sg() can allocate
SGs it can just build them one by one, no need to preallocate.

Actually all the changes the the allocation seem like overkill, just
allocate a single new array directly in get_next_sg() whenever it
needs.

Something like this:

@@ -365,6 +372,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
}
EXPORT_SYMBOL(sg_alloc_table);

+static struct scatterlist *get_next_sg(struct sg_table *table,
+ struct scatterlist *cur, unsigned long needed_sges,
+ gfp_t gfp_mask)
+{
+ struct scatterlist *new_sg;
+ unsigned int alloc_size;
+
+ if (cur) {
+ struct scatterlist *next_sg = sg_next(cur);
+
+ /* Check if last entry should be keeped for chainning */
+ if (!sg_is_last(next_sg) || needed_sges == 1)
+ return next_sg;
+ }
+
+ alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
+ new_sg = sg_kmalloc(alloc_size, gfp_mask);
+ if (!new_sg)
+ return ERR_PTR(-ENOMEM);
+ sg_init_table(new_sg, alloc_size);
+ if (cur) {
+ __sg_chain(cur, new_sg);
+ table->orig_nents += alloc_size - 1;
+ } else {
+ table->sgl = new_sg;
+ table->orig_nents = alloc_size;
+ table->nents = 0;
+ }
+ return new_sg;
+}
+
/**
* __sg_alloc_table_from_pages - Allocate and initialize an sg table from
* an array of pages
@@ -374,29 +412,64 @@ EXPORT_SYMBOL(sg_alloc_table);
* @offset: Offset from start of the first page to the start of a buffer
* @size: Number of valid bytes in the buffer (after offset)
* @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @prv: Last populated sge in sgt
+ * @left_pages: Left pages caller have to set after this call
* @gfp_mask: GFP allocation mask
*
- * Description:
- * Allocate and initialize an sg table from a list of pages. Contiguous
- * ranges of the pages are squashed into a single scatterlist node up to the
- * maximum size specified in @max_segment. An user may provide an offset at a
- * start and a size of valid data in a buffer specified by the page array.
- * The returned sg table is released by sg_free_table.
+ * Description:
+ * If @prv is NULL, allocate and initialize an sg table from a list of pages,
+ * else reuse the scatterlist passed in at @prv.
+ * Contiguous ranges of the pages are squashed into a single scatterlist
+ * entry up to the maximum size specified in @max_segment. A user may
+ * provide an offset at a start and a size of valid data in a buffer
+ * specified by the page array.
*
* Returns:
- * 0 on success, negative error on failure
+ * Last SGE in sgt on success, PTR_ERR on otherwise.
+ * The allocation in @sgt must be released by sg_free_table.
+ *
+ * Notes:
+ * If this function returns non-0 (eg failure), the caller must call
+ * sg_free_table() to cleanup any leftover allocations.
*/
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- unsigned int n_pages, unsigned int offset,
- unsigned long size, unsigned int max_segment,
- gfp_t gfp_mask)
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+ struct page **pages, unsigned int n_pages, unsigned int offset,
+ unsigned long size, unsigned int max_segment,
+ struct scatterlist *prv, unsigned int left_pages,
+ gfp_t gfp_mask)
{
- unsigned int chunks, cur_page, seg_len, i;
- int ret;
- struct scatterlist *s;
+ unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
+ unsigned int added_nents = 0;
+ struct scatterlist *s = prv;

if (WARN_ON(!max_segment || offset_in_page(max_segment)))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (prv) {
+ unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
+ prv->offset + prv->length) /
+ PAGE_SIZE;
+
+ if (WARN_ON(offset))
+ return ERR_PTR(-EINVAL);
+
+ /* Merge contiguous pages into the last SG */
+ prv_len = prv->length;
+ while (n_pages && page_to_pfn(pages[0]) == paddr) {
+ if (prv->length + PAGE_SIZE > max_segment)
+ break;
+ prv->length += PAGE_SIZE;
+ paddr++;
+ pages++;
+ n_pages--;
+ }
+ if (!n_pages) {
+ sg_mark_end(sg_next(prv));
+ return prv;
+ }
+ }

/* compute number of contiguous chunks */
chunks = 1;
@@ -410,13 +483,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
}
}

- ret = sg_alloc_table(sgt, chunks, gfp_mask);
- if (unlikely(ret))
- return ret;
-
/* merging chunks and putting them into the scatterlist */
cur_page = 0;
- for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+ for (i = 0; i < chunks; i++) {
unsigned int j, chunk_size;

/* look for the end of the current chunk */
@@ -429,15 +498,28 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
break;
}

+ /* Pass how many chunks might be left */
+ s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+ if (IS_ERR(s)) {
+ /*
+ * Adjust entry length to be as before function was
+ * called.
+ */
+ if (prv)
+ prv->length = prv_len;
+ return s;
+ }
chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
sg_set_page(s, pages[cur_page],
min_t(unsigned long, size, chunk_size), offset);
+ added_nents++;
size -= chunk_size;
offset = 0;
cur_page = j;
}
-
- return 0;
+ sgt->nents += added_nents;
+ sg_mark_end(s);
+ return s;
}
EXPORT_SYMBOL(__sg_alloc_table_from_pages);

2020-10-02 16:13:54

by Maor Gottlieb

[permalink] [raw]
Subject: Re: [PATCH rdma-next v4 1/4] lib/scatterlist: Add support in dynamic allocation of SG table from pages


On 10/2/2020 6:02 PM, Jason Gunthorpe wrote:
> On Sun, Sep 27, 2020 at 09:46:44AM +0300, Leon Romanovsky wrote:
>> +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
>> + struct page **pages, unsigned int n_pages, unsigned int offset,
>> + unsigned long size, unsigned int max_segment,
>> + struct scatterlist *prv, unsigned int left_pages,
>> + gfp_t gfp_mask)
>> {
>> - unsigned int chunks, cur_page, seg_len, i;
>> + unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
>> + struct scatterlist *s = prv;
>> + unsigned int table_size;
>> + unsigned int tmp_nents;
>> int ret;
>> - struct scatterlist *s;
>>
>> if (WARN_ON(!max_segment || offset_in_page(max_segment)))
>> - return -EINVAL;
>> + return ERR_PTR(-EINVAL);
>> + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
>> + return ERR_PTR(-EOPNOTSUPP);
>> +
>> + tmp_nents = prv ? sgt->nents : 0;
>> +
>> + if (prv &&
>> + page_to_pfn(sg_page(prv)) + (prv->length >> PAGE_SHIFT) ==
> This calculation of the end doesn't consider sg->offset

Right, should be fixed.
>
>> + page_to_pfn(pages[0]))
>> + prv_len = prv->length;
>>
>> /* compute number of contiguous chunks */
>> chunks = 1;
>> @@ -410,13 +461,17 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
>> }
>> }
>>
>> - ret = sg_alloc_table(sgt, chunks, gfp_mask);
>> - if (unlikely(ret))
>> - return ret;
>> + if (!prv) {
>> + /* Only the last allocation could be less than the maximum */
>> + table_size = left_pages ? SG_MAX_SINGLE_ALLOC : chunks;
>> + ret = sg_alloc_table(sgt, table_size, gfp_mask);
>> + if (unlikely(ret))
>> + return ERR_PTR(ret);
>> + }
> This is basically redundant right? Now that get_next_sg() can allocate
> SGs it can just build them one by one, no need to preallocate.
>
> Actually all the changes the the allocation seem like overkill, just
> allocate a single new array directly in get_next_sg() whenever it
> needs.

No, only the last allocation could be less than maximum. (as written in
the comment).
I am preferring to stick with the current implementation and fix the offset.
>
> Something like this:
>
> @@ -365,6 +372,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
> }
> EXPORT_SYMBOL(sg_alloc_table);
>
> +static struct scatterlist *get_next_sg(struct sg_table *table,
> + struct scatterlist *cur, unsigned long needed_sges,
> + gfp_t gfp_mask)
> +{
> + struct scatterlist *new_sg;
> + unsigned int alloc_size;
> +
> + if (cur) {
> + struct scatterlist *next_sg = sg_next(cur);
> +
> + /* Check if last entry should be keeped for chainning */
> + if (!sg_is_last(next_sg) || needed_sges == 1)
> + return next_sg;
> + }
> +
> + alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
> + new_sg = sg_kmalloc(alloc_size, gfp_mask);
> + if (!new_sg)
> + return ERR_PTR(-ENOMEM);
> + sg_init_table(new_sg, alloc_size);
> + if (cur) {
> + __sg_chain(cur, new_sg);
> + table->orig_nents += alloc_size - 1;
> + } else {
> + table->sgl = new_sg;
> + table->orig_nents = alloc_size;
> + table->nents = 0;
> + }
> + return new_sg;
> +}
> +
> /**
> * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
> * an array of pages
> @@ -374,29 +412,64 @@ EXPORT_SYMBOL(sg_alloc_table);
> * @offset: Offset from start of the first page to the start of a buffer
> * @size: Number of valid bytes in the buffer (after offset)
> * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
> + * @prv: Last populated sge in sgt
> + * @left_pages: Left pages caller have to set after this call
> * @gfp_mask: GFP allocation mask
> *
> - * Description:
> - * Allocate and initialize an sg table from a list of pages. Contiguous
> - * ranges of the pages are squashed into a single scatterlist node up to the
> - * maximum size specified in @max_segment. An user may provide an offset at a
> - * start and a size of valid data in a buffer specified by the page array.
> - * The returned sg table is released by sg_free_table.
> + * Description:
> + * If @prv is NULL, allocate and initialize an sg table from a list of pages,
> + * else reuse the scatterlist passed in at @prv.
> + * Contiguous ranges of the pages are squashed into a single scatterlist
> + * entry up to the maximum size specified in @max_segment. A user may
> + * provide an offset at a start and a size of valid data in a buffer
> + * specified by the page array.
> *
> * Returns:
> - * 0 on success, negative error on failure
> + * Last SGE in sgt on success, PTR_ERR on otherwise.
> + * The allocation in @sgt must be released by sg_free_table.
> + *
> + * Notes:
> + * If this function returns non-0 (eg failure), the caller must call
> + * sg_free_table() to cleanup any leftover allocations.
> */
> -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
> - unsigned int n_pages, unsigned int offset,
> - unsigned long size, unsigned int max_segment,
> - gfp_t gfp_mask)
> +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
> + struct page **pages, unsigned int n_pages, unsigned int offset,
> + unsigned long size, unsigned int max_segment,
> + struct scatterlist *prv, unsigned int left_pages,
> + gfp_t gfp_mask)
> {
> - unsigned int chunks, cur_page, seg_len, i;
> - int ret;
> - struct scatterlist *s;
> + unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
> + unsigned int added_nents = 0;
> + struct scatterlist *s = prv;
>
> if (WARN_ON(!max_segment || offset_in_page(max_segment)))
> - return -EINVAL;
> + return ERR_PTR(-EINVAL);
> + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
> + return ERR_PTR(-EOPNOTSUPP);
> +
> + if (prv) {
> + unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
> + prv->offset + prv->length) /
> + PAGE_SIZE;
> +
> + if (WARN_ON(offset))
> + return ERR_PTR(-EINVAL);
> +
> + /* Merge contiguous pages into the last SG */
> + prv_len = prv->length;
> + while (n_pages && page_to_pfn(pages[0]) == paddr) {
> + if (prv->length + PAGE_SIZE > max_segment)
> + break;
> + prv->length += PAGE_SIZE;
> + paddr++;
> + pages++;
> + n_pages--;
> + }
> + if (!n_pages) {
> + sg_mark_end(sg_next(prv));
> + return prv;
> + }
> + }
>
> /* compute number of contiguous chunks */
> chunks = 1;
> @@ -410,13 +483,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
> }
> }
>
> - ret = sg_alloc_table(sgt, chunks, gfp_mask);
> - if (unlikely(ret))
> - return ret;
> -
> /* merging chunks and putting them into the scatterlist */
> cur_page = 0;
> - for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
> + for (i = 0; i < chunks; i++) {
> unsigned int j, chunk_size;
>
> /* look for the end of the current chunk */
> @@ -429,15 +498,28 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
> break;
> }
>
> + /* Pass how many chunks might be left */
> + s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
> + if (IS_ERR(s)) {
> + /*
> + * Adjust entry length to be as before function was
> + * called.
> + */
> + if (prv)
> + prv->length = prv_len;
> + return s;
> + }
> chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
> sg_set_page(s, pages[cur_page],
> min_t(unsigned long, size, chunk_size), offset);
> + added_nents++;
> size -= chunk_size;
> offset = 0;
> cur_page = j;
> }
> -
> - return 0;
> + sgt->nents += added_nents;
> + sg_mark_end(s);
> + return s;
> }
> EXPORT_SYMBOL(__sg_alloc_table_from_pages);
>

2020-10-02 16:21:42

by Jason Gunthorpe

[permalink] [raw]
Subject: Re: [PATCH rdma-next v4 1/4] lib/scatterlist: Add support in dynamic allocation of SG table from pages

On Fri, Oct 02, 2020 at 07:11:33PM +0300, Maor Gottlieb wrote:
>
> On 10/2/2020 6:02 PM, Jason Gunthorpe wrote:
> > On Sun, Sep 27, 2020 at 09:46:44AM +0300, Leon Romanovsky wrote:
> > > +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
> > > + struct page **pages, unsigned int n_pages, unsigned int offset,
> > > + unsigned long size, unsigned int max_segment,
> > > + struct scatterlist *prv, unsigned int left_pages,
> > > + gfp_t gfp_mask)
> > > {
> > > - unsigned int chunks, cur_page, seg_len, i;
> > > + unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
> > > + struct scatterlist *s = prv;
> > > + unsigned int table_size;
> > > + unsigned int tmp_nents;
> > > int ret;
> > > - struct scatterlist *s;
> > >
> > > if (WARN_ON(!max_segment || offset_in_page(max_segment)))
> > > - return -EINVAL;
> > > + return ERR_PTR(-EINVAL);
> > > + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
> > > + return ERR_PTR(-EOPNOTSUPP);
> > > +
> > > + tmp_nents = prv ? sgt->nents : 0;
> > > +
> > > + if (prv &&
> > > + page_to_pfn(sg_page(prv)) + (prv->length >> PAGE_SHIFT) ==
> > This calculation of the end doesn't consider sg->offset
>
> Right, should be fixed.
> >
> > > + page_to_pfn(pages[0]))
> > > + prv_len = prv->length;
> > >
> > > /* compute number of contiguous chunks */
> > > chunks = 1;
> > > @@ -410,13 +461,17 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
> > > }
> > > }
> > >
> > > - ret = sg_alloc_table(sgt, chunks, gfp_mask);
> > > - if (unlikely(ret))
> > > - return ret;
> > > + if (!prv) {
> > > + /* Only the last allocation could be less than the maximum */
> > > + table_size = left_pages ? SG_MAX_SINGLE_ALLOC : chunks;
> > > + ret = sg_alloc_table(sgt, table_size, gfp_mask);
> > > + if (unlikely(ret))
> > > + return ERR_PTR(ret);
> > > + }
> > This is basically redundant right? Now that get_next_sg() can allocate
> > SGs it can just build them one by one, no need to preallocate.
> >
> > Actually all the changes the the allocation seem like overkill, just
> > allocate a single new array directly in get_next_sg() whenever it
> > needs.
>
> No, only the last allocation could be less than maximum. (as written in the
> comment).

The point is that get_next_sg is fully redundent with
sg_alloc_table() because it is always used in cases when prv is
set. There is zero reason to call sg_alloc_table here in the one case
where prv is not set.

Further this cleans up the spagehtti goto in the middle of the for
loop and avoids allocating an extra chunk if the page list fully fits
in prv.

Given how much smaller it is I think you should look more carefully.

Jason