ppc64 use CMA area for the allocation of guest page table (hash page table). We won't
be able to start guest if we fail to allocate hash page table. We have observed
hash table allocation failure because we failed to migrate pages out of CMA region
because they were pinned. This happen when we are using VFIO. VFIO on ppc64 pins
the entire guest RAM. If the guest RAM pages get allocated out of CMA region, we
won't be able to migrate those pages. The pages are also pinned for the lifetime of the
guest.
Currently we support migration of non-compound pages. With THP and with the addition of
hugetlb migration we can end up allocating compound pages from CMA region. This
patch series add support for migrating compound pages. The first path adds the helper
get_user_pages_cma_migrate() which pin the page making sure we migrate them out of
CMA region before incrementing the reference count.
Changes from V4:
* use __GFP_NOWARN when allocating pages to avoid page allocation failure warnings.
Changes from V3:
* Move the hugetlb check before transhuge check
* Use compound head page when isolating hugetlb page
Aneesh Kumar K.V (3):
mm: Add get_user_pages_cma_migrate
powerpc/mm/iommu: Allow migration of cma allocated pages during
mm_iommu_get
powerpc/mm/iommu: Allow large IOMMU page size only for hugetlb backing
arch/powerpc/mm/mmu_context_iommu.c | 140 ++++++++--------------------
include/linux/hugetlb.h | 2 +
include/linux/migrate.h | 3 +
mm/hugetlb.c | 4 +-
mm/migrate.c | 139 +++++++++++++++++++++++++++
5 files changed, 186 insertions(+), 102 deletions(-)
--
2.19.2
THP pages can get split during different code paths. An incremented reference
count do imply we will not split the compound page. But the pmd entry can be
converted to level 4 pte entries. Keep the code simpler by allowing large
IOMMU page size only if the guest ram is backed by hugetlb pages.
Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
arch/powerpc/mm/mmu_context_iommu.c | 24 +++++++-----------------
1 file changed, 7 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 1d5161f93ce6..0741d905ed04 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -95,8 +95,6 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t *mem;
long i, ret = 0, locked_entries = 0;
unsigned int pageshift;
- unsigned long flags;
- unsigned long cur_ua;
mutex_lock(&mem_list_mutex);
@@ -159,23 +157,15 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
pageshift = PAGE_SHIFT;
for (i = 0; i < entries; ++i) {
struct page *page = mem->hpages[i];
- cur_ua = ua + (i << PAGE_SHIFT);
- if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
- pte_t *pte;
+ /*
+ * Allow to use larger than 64k IOMMU pages. Only do that
+ * if we are backed by hugetlb.
+ */
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
struct page *head = compound_head(page);
- unsigned int compshift = compound_order(head);
- unsigned int pteshift;
-
- local_irq_save(flags); /* disables as well */
- pte = find_linux_pte(mm->pgd, cur_ua, NULL, &pteshift);
-
- /* Double check it is still the same pinned page */
- if (pte && pte_page(*pte) == head &&
- pteshift == compshift + PAGE_SHIFT)
- pageshift = max_t(unsigned int, pteshift,
- PAGE_SHIFT);
- local_irq_restore(flags);
+
+ pageshift = compound_order(head) + PAGE_SHIFT;
}
mem->pageshift = min(mem->pageshift, pageshift);
/*
--
2.19.2
This helper does a get_user_pages_fast and if it find pages in the CMA area
it will try to migrate them before taking page reference. This makes sure that
we don't keep non-movable pages (due to page reference count) in the CMA area.
Not able to move pages out of CMA area result in CMA allocation failures.
Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
include/linux/hugetlb.h | 2 +
include/linux/migrate.h | 3 +
mm/hugetlb.c | 4 +-
mm/migrate.c | 139 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 146 insertions(+), 2 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 087fd5f48c91..1eed0cdaec0e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask);
struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
unsigned long address);
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nmask);
int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t idx);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index f2b4abbca55e..d82b35afd2eb 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -286,6 +286,9 @@ static inline int migrate_vma(const struct migrate_vma_ops *ops,
}
#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */
+extern int get_user_pages_cma_migrate(unsigned long start, int nr_pages, int write,
+ struct page **pages);
+
#endif /* CONFIG_MIGRATION */
#endif /* _LINUX_MIGRATE_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7f2a28ab46d5..faf3102ae45e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1585,8 +1585,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
return page;
}
-static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
- int nid, nodemask_t *nmask)
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+ int nid, nodemask_t *nmask)
{
struct page *page;
diff --git a/mm/migrate.c b/mm/migrate.c
index f7e4bfdc13b7..d564558fba03 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2946,3 +2946,142 @@ int migrate_vma(const struct migrate_vma_ops *ops,
}
EXPORT_SYMBOL(migrate_vma);
#endif /* defined(MIGRATE_VMA_HELPER) */
+
+static struct page *new_non_cma_page(struct page *page, unsigned long private)
+{
+ /*
+ * We want to make sure we allocate the new page from the same node
+ * as the source page.
+ */
+ int nid = page_to_nid(page);
+ /*
+ * Trying to allocate a page for migration. Ignore allocation
+ * failure warnings
+ */
+ gfp_t gfp_mask = GFP_USER | __GFP_THISNODE | __GFP_NOWARN;
+
+ if (PageHighMem(page))
+ gfp_mask |= __GFP_HIGHMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (PageHuge(page)) {
+ struct hstate *h = page_hstate(page);
+ /*
+ * We don't want to dequeue from the pool because pool pages will
+ * mostly be from the CMA region.
+ */
+ return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+ }
+#endif
+ if (PageTransHuge(page)) {
+ struct page *thp;
+ /*
+ * ignore allocation failure warnings
+ */
+ gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_THISNODE | __GFP_NOWARN;
+
+ /*
+ * Remove the movable mask so that we don't allocate from
+ * CMA area again.
+ */
+ thp_gfpmask &= ~__GFP_MOVABLE;
+ thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+ if (!thp)
+ return NULL;
+ prep_transhuge_page(thp);
+ return thp;
+ }
+
+ return __alloc_pages_node(nid, gfp_mask, 0);
+}
+
+/**
+ * get_user_pages_cma_migrate() - pin user pages in memory by migrating pages in CMA region
+ * @start: starting user address
+ * @nr_pages: number of pages from start to pin
+ * @write: whether pages will be written to
+ * @pages: array that receives pointers to the pages pinned.
+ * Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * If the pinned pages are backed by CMA region, we migrate those pages out,
+ * allocating new pages from non-CMA region. This helps in avoiding keeping
+ * pages pinned in the CMA region for a long time thereby resulting in
+ * CMA allocation failures.
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
+
+int get_user_pages_cma_migrate(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ int i, ret;
+ bool drain_allow = true;
+ bool migrate_allow = true;
+ LIST_HEAD(cma_page_list);
+
+get_user_again:
+ ret = get_user_pages_fast(start, nr_pages, write, pages);
+ if (ret <= 0)
+ return ret;
+
+ for (i = 0; i < ret; ++i) {
+ /*
+ * If we get a page from the CMA zone, since we are going to
+ * be pinning these entries, we might as well move them out
+ * of the CMA zone if possible.
+ */
+ if (is_migrate_cma_page(pages[i]) && migrate_allow) {
+
+ struct page *head = compound_head(pages[i]);
+
+ if (PageHuge(head))
+ isolate_huge_page(head, &cma_page_list);
+ else {
+ if (!PageLRU(head) && drain_allow) {
+ lru_add_drain_all();
+ drain_allow = false;
+ }
+
+ if (!isolate_lru_page(head)) {
+ list_add_tail(&head->lru, &cma_page_list);
+ mod_node_page_state(page_pgdat(head),
+ NR_ISOLATED_ANON +
+ page_is_file_cache(head),
+ hpage_nr_pages(head));
+ }
+ }
+ }
+ }
+ if (!list_empty(&cma_page_list)) {
+ /*
+ * drop the above get_user_pages reference.
+ */
+ for (i = 0; i < ret; ++i)
+ put_page(pages[i]);
+
+ if (migrate_pages(&cma_page_list, new_non_cma_page,
+ NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+ /*
+ * some of the pages failed migration. Do get_user_pages
+ * without migration.
+ */
+ migrate_allow = false;
+
+ if (!list_empty(&cma_page_list))
+ putback_movable_pages(&cma_page_list);
+ }
+ /*
+ * We did migrate all the pages, Try to get the page references again
+ * migrating any new CMA pages which we failed to isolate earlier.
+ */
+ drain_allow = true;
+ goto get_user_again;
+ }
+ return ret;
+}
--
2.19.2
Current code doesn't do page migration if the page allocated is a compound page.
With HugeTLB migration support, we can end up allocating hugetlb pages from
CMA region. Also THP pages can be allocated from CMA region. This patch updates
the code to handle compound pages correctly.
This use the new helper get_user_pages_cma_migrate. It does one get_user_pages
with right count, instead of doing one get_user_pages per page. That avoids
reading page table multiple times.
The patch also convert the hpas member of mm_iommu_table_group_mem_t to a union.
We use the same storage location to store pointers to struct page. We cannot
update alll the code path use struct page *, because we access hpas in real mode
and we can't do that struct page * to pfn conversion in real mode.
Signed-off-by: Aneesh Kumar K.V <[email protected]>
---
arch/powerpc/mm/mmu_context_iommu.c | 120 ++++++++--------------------
1 file changed, 35 insertions(+), 85 deletions(-)
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 56c2234cc6ae..1d5161f93ce6 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -21,6 +21,7 @@
#include <linux/sizes.h>
#include <asm/mmu_context.h>
#include <asm/pte-walk.h>
+#include <linux/mm_inline.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t {
atomic64_t mapped;
unsigned int pageshift;
u64 ua; /* userspace address */
- u64 entries; /* number of entries in hpas[] */
- u64 *hpas; /* vmalloc'ed */
+ u64 entries; /* number of entries in hpages[] */
+ /*
+ * in mm_iommu_get we temporarily use this to store
+ * struct page address.
+ *
+ * We need to convert ua to hpa in real mode. Make it
+ * simpler by storing physicall address.
+ */
+ union {
+ struct page **hpages; /* vmalloc'ed */
+ phys_addr_t *hpas;
+ };
};
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -78,63 +89,14 @@ bool mm_iommu_preregistered(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
-/*
- * Taken from alloc_migrate_target with changes to remove CMA allocations
- */
-struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
-{
- gfp_t gfp_mask = GFP_USER;
- struct page *new_page;
-
- if (PageCompound(page))
- return NULL;
-
- if (PageHighMem(page))
- gfp_mask |= __GFP_HIGHMEM;
-
- /*
- * We don't want the allocation to force an OOM if possibe
- */
- new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
- return new_page;
-}
-
-static int mm_iommu_move_page_from_cma(struct page *page)
-{
- int ret = 0;
- LIST_HEAD(cma_migrate_pages);
-
- /* Ignore huge pages for now */
- if (PageCompound(page))
- return -EBUSY;
-
- lru_add_drain();
- ret = isolate_lru_page(page);
- if (ret)
- return ret;
-
- list_add(&page->lru, &cma_migrate_pages);
- put_page(page); /* Drop the gup reference */
-
- ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
- NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
- if (ret) {
- if (!list_empty(&cma_migrate_pages))
- putback_movable_pages(&cma_migrate_pages);
- }
-
- return 0;
-}
-
long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
- long i, j, ret = 0, locked_entries = 0;
+ long i, ret = 0, locked_entries = 0;
unsigned int pageshift;
unsigned long flags;
unsigned long cur_ua;
- struct page *page = NULL;
mutex_lock(&mem_list_mutex);
@@ -181,41 +143,24 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
+ ret = get_user_pages_cma_migrate(ua, entries, 1, mem->hpages);
+ if (ret != entries) {
+ /* free the reference taken */
+ for (i = 0; i < ret; i++)
+ put_page(mem->hpages[i]);
+
+ vfree(mem->hpas);
+ kfree(mem);
+ ret = -EFAULT;
+ goto unlock_exit;
+ } else
+ ret = 0;
+
+ pageshift = PAGE_SHIFT;
for (i = 0; i < entries; ++i) {
+ struct page *page = mem->hpages[i];
cur_ua = ua + (i << PAGE_SHIFT);
- if (1 != get_user_pages_fast(cur_ua,
- 1/* pages */, 1/* iswrite */, &page)) {
- ret = -EFAULT;
- for (j = 0; j < i; ++j)
- put_page(pfn_to_page(mem->hpas[j] >>
- PAGE_SHIFT));
- vfree(mem->hpas);
- kfree(mem);
- goto unlock_exit;
- }
- /*
- * If we get a page from the CMA zone, since we are going to
- * be pinning these entries, we might as well move them out
- * of the CMA zone if possible. NOTE: faulting in + migration
- * can be expensive. Batching can be considered later
- */
- if (is_migrate_cma_page(page)) {
- if (mm_iommu_move_page_from_cma(page))
- goto populate;
- if (1 != get_user_pages_fast(cur_ua,
- 1/* pages */, 1/* iswrite */,
- &page)) {
- ret = -EFAULT;
- for (j = 0; j < i; ++j)
- put_page(pfn_to_page(mem->hpas[j] >>
- PAGE_SHIFT));
- vfree(mem->hpas);
- kfree(mem);
- goto unlock_exit;
- }
- }
-populate:
- pageshift = PAGE_SHIFT;
+
if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
pte_t *pte;
struct page *head = compound_head(page);
@@ -233,7 +178,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
local_irq_restore(flags);
}
mem->pageshift = min(mem->pageshift, pageshift);
+ /*
+ * We don't need struct page reference any more, switch
+ * physicall address.
+ */
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+
}
atomic64_set(&mem->mapped, 1);
--
2.19.2
On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
> This helper does a get_user_pages_fast and if it find pages in the CMA area
> it will try to migrate them before taking page reference. This makes sure that
> we don't keep non-movable pages (due to page reference count) in the CMA area.
> Not able to move pages out of CMA area result in CMA allocation failures.
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> include/linux/hugetlb.h | 2 +
> include/linux/migrate.h | 3 +
> mm/hugetlb.c | 4 +-
> mm/migrate.c | 139 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 146 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 087fd5f48c91..1eed0cdaec0e 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> nodemask_t *nmask);
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address);
> +struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> + int nid, nodemask_t *nmask);
> int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
> pgoff_t idx);
>
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index f2b4abbca55e..d82b35afd2eb 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -286,6 +286,9 @@ static inline int migrate_vma(const struct migrate_vma_ops *ops,
> }
> #endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */
>
> +extern int get_user_pages_cma_migrate(unsigned long start, int nr_pages, int write,
> + struct page **pages);
> +
> #endif /* CONFIG_MIGRATION */
>
> #endif /* _LINUX_MIGRATE_H */
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 7f2a28ab46d5..faf3102ae45e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1585,8 +1585,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
> return page;
> }
>
> -static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> - int nid, nodemask_t *nmask)
> +struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> + int nid, nodemask_t *nmask)
> {
> struct page *page;
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index f7e4bfdc13b7..d564558fba03 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -2946,3 +2946,142 @@ int migrate_vma(const struct migrate_vma_ops *ops,
> }
> EXPORT_SYMBOL(migrate_vma);
> #endif /* defined(MIGRATE_VMA_HELPER) */
> +
> +static struct page *new_non_cma_page(struct page *page, unsigned long private)
> +{
> + /*
> + * We want to make sure we allocate the new page from the same node
> + * as the source page.
> + */
> + int nid = page_to_nid(page);
> + /*
> + * Trying to allocate a page for migration. Ignore allocation
> + * failure warnings
> + */
> + gfp_t gfp_mask = GFP_USER | __GFP_THISNODE | __GFP_NOWARN;
> +
> + if (PageHighMem(page))
> + gfp_mask |= __GFP_HIGHMEM;
> +
> +#ifdef CONFIG_HUGETLB_PAGE
> + if (PageHuge(page)) {
> + struct hstate *h = page_hstate(page);
> + /*
> + * We don't want to dequeue from the pool because pool pages will
> + * mostly be from the CMA region.
> + */
> + return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
> + }
> +#endif
> + if (PageTransHuge(page)) {
> + struct page *thp;
> + /*
> + * ignore allocation failure warnings
> + */
> + gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_THISNODE | __GFP_NOWARN;
> +
> + /*
> + * Remove the movable mask so that we don't allocate from
> + * CMA area again.
> + */
> + thp_gfpmask &= ~__GFP_MOVABLE;
> + thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
> + if (!thp)
> + return NULL;
> + prep_transhuge_page(thp);
> + return thp;
> + }
> +
> + return __alloc_pages_node(nid, gfp_mask, 0);
> +}
> +
> +/**
> + * get_user_pages_cma_migrate() - pin user pages in memory by migrating pages in CMA region
> + * @start: starting user address
> + * @nr_pages: number of pages from start to pin
> + * @write: whether pages will be written to
> + * @pages: array that receives pointers to the pages pinned.
> + * Should be at least nr_pages long.
> + *
> + * Attempt to pin user pages in memory without taking mm->mmap_sem.
> + * If not successful, it will fall back to taking the lock and
> + * calling get_user_pages().
> + *
> + * If the pinned pages are backed by CMA region, we migrate those pages out,
> + * allocating new pages from non-CMA region. This helps in avoiding keeping
> + * pages pinned in the CMA region for a long time thereby resulting in
> + * CMA allocation failures.
> + *
> + * Returns number of pages pinned. This may be fewer than the number
> + * requested. If nr_pages is 0 or negative, returns 0. If no pages
> + * were pinned, returns -errno.
> + */
> +
> +int get_user_pages_cma_migrate(unsigned long start, int nr_pages, int write,
> + struct page **pages)
> +{
> + int i, ret;
> + bool drain_allow = true;
> + bool migrate_allow = true;
> + LIST_HEAD(cma_page_list);
> +
> +get_user_again:
> + ret = get_user_pages_fast(start, nr_pages, write, pages);
> + if (ret <= 0)
> + return ret;
> +
> + for (i = 0; i < ret; ++i) {
> + /*
> + * If we get a page from the CMA zone, since we are going to
> + * be pinning these entries, we might as well move them out
> + * of the CMA zone if possible.
> + */
> + if (is_migrate_cma_page(pages[i]) && migrate_allow) {
> +
> + struct page *head = compound_head(pages[i]);
> +
> + if (PageHuge(head))
> + isolate_huge_page(head, &cma_page_list);
You need curly braces in both branches as per
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst#n191
> + else {
> + if (!PageLRU(head) && drain_allow) {
> + lru_add_drain_all();
> + drain_allow = false;
> + }
> +
> + if (!isolate_lru_page(head)) {
> + list_add_tail(&head->lru, &cma_page_list);
> + mod_node_page_state(page_pgdat(head),
> + NR_ISOLATED_ANON +
> + page_is_file_cache(head),
> + hpage_nr_pages(head));
> + }
> + }
> + }
> + }
> + if (!list_empty(&cma_page_list)) {
> + /*
> + * drop the above get_user_pages reference.
> + */
Can be a single line comment.
> + for (i = 0; i < ret; ++i)
> + put_page(pages[i]);
> +
> + if (migrate_pages(&cma_page_list, new_non_cma_page,
> + NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
> + /*
> + * some of the pages failed migration. Do get_user_pages
> + * without migration.
> + */
> + migrate_allow = false;
> +
> + if (!list_empty(&cma_page_list))
> + putback_movable_pages(&cma_page_list);
> + }
> + /*
> + * We did migrate all the pages, Try to get the page references again
> + * migrating any new CMA pages which we failed to isolate earlier.
> + */
> + drain_allow = true;
> + goto get_user_again;
So it is possible to have pages pinned, then successfully migrated
(migrate_pages() returned 0), then pinned again, then some pages may end
up in CMA again and migrate again and nothing seems to prevent this loop
from being endless. What do I miss?
(ps I hate such "goto"s, confuse a lot)
> + }
> + return ret;
> +}
>
--
Alexey
On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
> This helper does a get_user_pages_fast and if it find pages in the CMA area
> it will try to migrate them before taking page reference. This makes sure that
> we don't keep non-movable pages (due to page reference count) in the CMA area.
> Not able to move pages out of CMA area result in CMA allocation failures.
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> include/linux/hugetlb.h | 2 +
> include/linux/migrate.h | 3 +
> mm/hugetlb.c | 4 +-
> mm/migrate.c | 139 ++++++++++++++++++++++++++++++++++++++++
> 4 files changed, 146 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 087fd5f48c91..1eed0cdaec0e 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
> nodemask_t *nmask);
> struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
> unsigned long address);
> +struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
> + int nid, nodemask_t *nmask);
> int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
> pgoff_t idx);
>
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index f2b4abbca55e..d82b35afd2eb 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -286,6 +286,9 @@ static inline int migrate_vma(const struct migrate_vma_ops *ops,
> }
> #endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */
>
> +extern int get_user_pages_cma_migrate(unsigned long start, int nr_pages, int write,
> + struct page **pages);
ah, sorry for commenting the same patch again but
./scripts/checkpatch.pl complains a log on this patch.
--
Alexey
On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
> Current code doesn't do page migration if the page allocated is a compound page.
> With HugeTLB migration support, we can end up allocating hugetlb pages from
> CMA region. Also THP pages can be allocated from CMA region. This patch updates
> the code to handle compound pages correctly.
>
> This use the new helper get_user_pages_cma_migrate. It does one get_user_pages
> with right count, instead of doing one get_user_pages per page. That avoids
> reading page table multiple times.
>
> The patch also convert the hpas member of mm_iommu_table_group_mem_t to a union.
> We use the same storage location to store pointers to struct page. We cannot
> update alll the code path use struct page *, because we access hpas in real mode
s/alll/all/
> and we can't do that struct page * to pfn conversion in real mode.
>
> Signed-off-by: Aneesh Kumar K.V <[email protected]>
> ---
> arch/powerpc/mm/mmu_context_iommu.c | 120 ++++++++--------------------
> 1 file changed, 35 insertions(+), 85 deletions(-)
>
> diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
> index 56c2234cc6ae..1d5161f93ce6 100644
> --- a/arch/powerpc/mm/mmu_context_iommu.c
> +++ b/arch/powerpc/mm/mmu_context_iommu.c
> @@ -21,6 +21,7 @@
> #include <linux/sizes.h>
> #include <asm/mmu_context.h>
> #include <asm/pte-walk.h>
> +#include <linux/mm_inline.h>
>
> static DEFINE_MUTEX(mem_list_mutex);
>
> @@ -34,8 +35,18 @@ struct mm_iommu_table_group_mem_t {
> atomic64_t mapped;
> unsigned int pageshift;
> u64 ua; /* userspace address */
> - u64 entries; /* number of entries in hpas[] */
Still a valid comment imho, or you could s'hpas'hpas/hpages' but
replacing hpas with hpages seems strange.
> - u64 *hpas; /* vmalloc'ed */
> + u64 entries; /* number of entries in hpages[] */
> + /*
> + * in mm_iommu_get we temporarily use this to store
> + * struct page address.
> + *
> + * We need to convert ua to hpa in real mode. Make it
> + * simpler by storing physicall address.
s/physicall/physical/
> + */
> + union {
> + struct page **hpages; /* vmalloc'ed */
> + phys_addr_t *hpas;
> + };
> };
>
> static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
> @@ -78,63 +89,14 @@ bool mm_iommu_preregistered(struct mm_struct *mm)
> }
> EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
>
> -/*
> - * Taken from alloc_migrate_target with changes to remove CMA allocations
> - */
> -struct page *new_iommu_non_cma_page(struct page *page, unsigned long private)
> -{
> - gfp_t gfp_mask = GFP_USER;
> - struct page *new_page;
> -
> - if (PageCompound(page))
> - return NULL;
> -
> - if (PageHighMem(page))
> - gfp_mask |= __GFP_HIGHMEM;
> -
> - /*
> - * We don't want the allocation to force an OOM if possibe
> - */
> - new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
> - return new_page;
> -}
> -
> -static int mm_iommu_move_page_from_cma(struct page *page)
> -{
> - int ret = 0;
> - LIST_HEAD(cma_migrate_pages);
> -
> - /* Ignore huge pages for now */
> - if (PageCompound(page))
> - return -EBUSY;
> -
> - lru_add_drain();
> - ret = isolate_lru_page(page);
> - if (ret)
> - return ret;
> -
> - list_add(&page->lru, &cma_migrate_pages);
> - put_page(page); /* Drop the gup reference */
> -
> - ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
> - NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE);
> - if (ret) {
> - if (!list_empty(&cma_migrate_pages))
> - putback_movable_pages(&cma_migrate_pages);
> - }
> -
> - return 0;
> -}
> -
> long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> struct mm_iommu_table_group_mem_t **pmem)
> {
> struct mm_iommu_table_group_mem_t *mem;
> - long i, j, ret = 0, locked_entries = 0;
> + long i, ret = 0, locked_entries = 0;
> unsigned int pageshift;
> unsigned long flags;
> unsigned long cur_ua;
> - struct page *page = NULL;
>
> mutex_lock(&mem_list_mutex);
>
> @@ -181,41 +143,24 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> goto unlock_exit;
> }
>
> + ret = get_user_pages_cma_migrate(ua, entries, 1, mem->hpages);
btw get_user_pages_cma_migrate() name suggests me (yeah, not a native
speaker and an ignorant person in general :) ) that it migrates and pins
pages while it can actually pin pages without migrating them (if it
could not).
> + if (ret != entries) {
> + /* free the reference taken */
> + for (i = 0; i < ret; i++)
> + put_page(mem->hpages[i]);
> +
> + vfree(mem->hpas);
> + kfree(mem);
> + ret = -EFAULT;
> + goto unlock_exit;
> + } else
Missing curly braces.
> + ret = 0;
> +
> + pageshift = PAGE_SHIFT;
> for (i = 0; i < entries; ++i) {
> + struct page *page = mem->hpages[i];
An empty line here.
> cur_ua = ua + (i << PAGE_SHIFT);
> - if (1 != get_user_pages_fast(cur_ua,
> - 1/* pages */, 1/* iswrite */, &page)) {
> - ret = -EFAULT;
> - for (j = 0; j < i; ++j)
> - put_page(pfn_to_page(mem->hpas[j] >>
> - PAGE_SHIFT));
> - vfree(mem->hpas);
> - kfree(mem);
> - goto unlock_exit;
> - }
> - /*
> - * If we get a page from the CMA zone, since we are going to
> - * be pinning these entries, we might as well move them out
> - * of the CMA zone if possible. NOTE: faulting in + migration
> - * can be expensive. Batching can be considered later
> - */
> - if (is_migrate_cma_page(page)) {
> - if (mm_iommu_move_page_from_cma(page))
> - goto populate;
> - if (1 != get_user_pages_fast(cur_ua,
> - 1/* pages */, 1/* iswrite */,
> - &page)) {
> - ret = -EFAULT;
> - for (j = 0; j < i; ++j)
> - put_page(pfn_to_page(mem->hpas[j] >>
> - PAGE_SHIFT));
> - vfree(mem->hpas);
> - kfree(mem);
> - goto unlock_exit;
> - }
> - }
> -populate:
> - pageshift = PAGE_SHIFT;
> +
> if (mem->pageshift > PAGE_SHIFT && PageCompound(page)) {
> pte_t *pte;
> struct page *head = compound_head(page);
> @@ -233,7 +178,12 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
> local_irq_restore(flags);
> }
> mem->pageshift = min(mem->pageshift, pageshift);
> + /*
> + * We don't need struct page reference any more, switch
> + * physicall address.
s/physicall/physical/
> + */
> mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
> +
Unnecessary empty line.
> }
>
> atomic64_set(&mem->mapped, 1);
>
--
Alexey
On 12/20/18 9:49 AM, Alexey Kardashevskiy wrote:
>
>
> On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
>> This helper does a get_user_pages_fast and if it find pages in the CMA area
>> it will try to migrate them before taking page reference. This makes sure that
>> we don't keep non-movable pages (due to page reference count) in the CMA area.
>> Not able to move pages out of CMA area result in CMA allocation failures.
>>
>> Signed-off-by: Aneesh Kumar K.V <[email protected]>
>
.....
>> + * We did migrate all the pages, Try to get the page references again
>> + * migrating any new CMA pages which we failed to isolate earlier.
>> + */
>> + drain_allow = true;
>> + goto get_user_again;
>
>
> So it is possible to have pages pinned, then successfully migrated
> (migrate_pages() returned 0), then pinned again, then some pages may end
> up in CMA again and migrate again and nothing seems to prevent this loop
> from being endless. What do I miss?
>
pages used as target page for migration won't be allocated from CMA region.
-aneesh
On 20/12/2018 16:22, Aneesh Kumar K.V wrote:
> On 12/20/18 9:49 AM, Alexey Kardashevskiy wrote:
>>
>>
>> On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
>>> This helper does a get_user_pages_fast and if it find pages in the
>>> CMA area
>>> it will try to migrate them before taking page reference. This makes
>>> sure that
>>> we don't keep non-movable pages (due to page reference count) in the
>>> CMA area.
>>> Not able to move pages out of CMA area result in CMA allocation
>>> failures.
>>>
>>> Signed-off-by: Aneesh Kumar K.V <[email protected]>
>>
>
> .....
>>> + * We did migrate all the pages, Try to get the page
>>> references again
>>> + * migrating any new CMA pages which we failed to isolate
>>> earlier.
>>> + */
>>> + drain_allow = true;
>>> + goto get_user_again;
>>
>>
>> So it is possible to have pages pinned, then successfully migrated
>> (migrate_pages() returned 0), then pinned again, then some pages may end
>> up in CMA again and migrate again and nothing seems to prevent this loop
>> from being endless. What do I miss?
>>
>
> pages used as target page for migration won't be allocated from CMA region.
Then migrate_allow should be set to "false" regardless what
migrate_pages() returned and then I am totally missing the point of this
goto and going through the loop again even when we know for sure it
won't do literally anything but checking is_migrate_cma_page() even
though we know pages won't be allocated from CMA.
It should be simple gup_fast() instead of goto and then we won't need
goto/migrate_allow.
--
Alexey
On 20/12/2018 16:52, Aneesh Kumar K.V wrote:
> On 12/20/18 11:18 AM, Alexey Kardashevskiy wrote:
>>
>>
>> On 20/12/2018 16:22, Aneesh Kumar K.V wrote:
>>> On 12/20/18 9:49 AM, Alexey Kardashevskiy wrote:
>>>>
>>>>
>>>> On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
>>>>> This helper does a get_user_pages_fast and if it find pages in the
>>>>> CMA area
>>>>> it will try to migrate them before taking page reference. This makes
>>>>> sure that
>>>>> we don't keep non-movable pages (due to page reference count) in the
>>>>> CMA area.
>>>>> Not able to move pages out of CMA area result in CMA allocation
>>>>> failures.
>>>>>
>>>>> Signed-off-by: Aneesh Kumar K.V <[email protected]>
>>>>
>>>
>>> .....
>>>>> + * We did migrate all the pages, Try to get the page
>>>>> references again
>>>>> + * migrating any new CMA pages which we failed to isolate
>>>>> earlier.
>>>>> + */
>>>>> + drain_allow = true;
>>>>> + goto get_user_again;
>>>>
>>>>
>>>> So it is possible to have pages pinned, then successfully migrated
>>>> (migrate_pages() returned 0), then pinned again, then some pages may
>>>> end
>>>> up in CMA again and migrate again and nothing seems to prevent this
>>>> loop
>>>> from being endless. What do I miss?
>>>>
>>>
>>> pages used as target page for migration won't be allocated from CMA
>>> region.
>>
>>
>> Then migrate_allow should be set to "false" regardless what
>> migrate_pages() returned and then I am totally missing the point of this
>> goto and going through the loop again even when we know for sure it
>> won't do literally anything but checking is_migrate_cma_page() even
>> though we know pages won't be allocated from CMA.
>>
>
> Because we might have failed to isolate all the pages in the first attempt.
isolate==migrate?
If we failed to migrate, then migrate_pages() returns non zero (positive
or negative), we set migrate_allow to false, empty the cma_page_list
and repeat but we won't add anything to cma_page_list as
migrate_allow==false.
If we succeeded to migrate, then we repeat the loop with
migrate_allow==true but it does not matter as is_migrate_cma_page() is
expected to return false because we just successfully migrated
_everything_ so we won't be adding anything to cma_page_list either.
What have I missed?
--
Alexey
On 12/20/18 11:50 AM, Alexey Kardashevskiy wrote:
>
>
> On 20/12/2018 16:52, Aneesh Kumar K.V wrote:
>> On 12/20/18 11:18 AM, Alexey Kardashevskiy wrote:
>>>
>>>
>>> On 20/12/2018 16:22, Aneesh Kumar K.V wrote:
>>>> On 12/20/18 9:49 AM, Alexey Kardashevskiy wrote:
>>>>>
>>>>>
>>>>> On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
>>>>>> This helper does a get_user_pages_fast and if it find pages in the
>>>>>> CMA area
>>>>>> it will try to migrate them before taking page reference. This makes
>>>>>> sure that
>>>>>> we don't keep non-movable pages (due to page reference count) in the
>>>>>> CMA area.
>>>>>> Not able to move pages out of CMA area result in CMA allocation
>>>>>> failures.
>>>>>>
>>>>>> Signed-off-by: Aneesh Kumar K.V <[email protected]>
>>>>>
>>>>
>>>> .....
>>>>>> + * We did migrate all the pages, Try to get the page
>>>>>> references again
>>>>>> + * migrating any new CMA pages which we failed to isolate
>>>>>> earlier.
>>>>>> + */
>>>>>> + drain_allow = true;
>>>>>> + goto get_user_again;
>>>>>
>>>>>
>>>>> So it is possible to have pages pinned, then successfully migrated
>>>>> (migrate_pages() returned 0), then pinned again, then some pages may
>>>>> end
>>>>> up in CMA again and migrate again and nothing seems to prevent this
>>>>> loop
>>>>> from being endless. What do I miss?
>>>>>
>>>>
>>>> pages used as target page for migration won't be allocated from CMA
>>>> region.
>>>
>>>
>>> Then migrate_allow should be set to "false" regardless what
>>> migrate_pages() returned and then I am totally missing the point of this
>>> goto and going through the loop again even when we know for sure it
>>> won't do literally anything but checking is_migrate_cma_page() even
>>> though we know pages won't be allocated from CMA.
>>>
>>
>> Because we might have failed to isolate all the pages in the first attempt.
>
> isolate==migrate?
no
The call to isolate_lru_page and isolate_huge_page. We can fail because
the percpu pagevec is not fully drained
-aneesh
On 12/20/18 11:18 AM, Alexey Kardashevskiy wrote:
>
>
> On 20/12/2018 16:22, Aneesh Kumar K.V wrote:
>> On 12/20/18 9:49 AM, Alexey Kardashevskiy wrote:
>>>
>>>
>>> On 19/12/2018 14:40, Aneesh Kumar K.V wrote:
>>>> This helper does a get_user_pages_fast and if it find pages in the
>>>> CMA area
>>>> it will try to migrate them before taking page reference. This makes
>>>> sure that
>>>> we don't keep non-movable pages (due to page reference count) in the
>>>> CMA area.
>>>> Not able to move pages out of CMA area result in CMA allocation
>>>> failures.
>>>>
>>>> Signed-off-by: Aneesh Kumar K.V <[email protected]>
>>>
>>
>> .....
>>>> + * We did migrate all the pages, Try to get the page
>>>> references again
>>>> + * migrating any new CMA pages which we failed to isolate
>>>> earlier.
>>>> + */
>>>> + drain_allow = true;
>>>> + goto get_user_again;
>>>
>>>
>>> So it is possible to have pages pinned, then successfully migrated
>>> (migrate_pages() returned 0), then pinned again, then some pages may end
>>> up in CMA again and migrate again and nothing seems to prevent this loop
>>> from being endless. What do I miss?
>>>
>>
>> pages used as target page for migration won't be allocated from CMA region.
>
>
> Then migrate_allow should be set to "false" regardless what
> migrate_pages() returned and then I am totally missing the point of this
> goto and going through the loop again even when we know for sure it
> won't do literally anything but checking is_migrate_cma_page() even
> though we know pages won't be allocated from CMA.
>
Because we might have failed to isolate all the pages in the first attempt.
-aneesh