2022-09-27 16:39:56

by Chih-En Lin

[permalink] [raw]
Subject: [RFC PATCH v2 6/9] mm, pgtable: Add COW_PTE_OWNER_EXCLUSIVE flag

For present COW logic (physical page), in some situations (e.g., pinned
page), we cannot share those pages. To make the COW PTE consistent with
current logic, introduce the COW_PTE_OWNER_EXCLUSIVE flag to avoid doing
COW to the PTE table during fork(). The following is a list of the
exclusive flag used.

- GUP pinnig with COW physical page will get in trouble. Currently, it
will not do COW when GUP works. Follow the rule here.

Signed-off-by: Chih-En Lin <[email protected]>
---
include/linux/pgtable.h | 18 ++++++++++++++++++
mm/gup.c | 13 +++++++++++--
2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 8b497d7d800ed..9b08a3361d490 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -656,6 +656,24 @@ static inline int cow_pte_count(pmd_t *pmd)
return page_count(pmd_page(*pmd));
}

+/* Keep the first bit clear. See more detail in the comments of struct page. */
+#define COW_PTE_OWNER_EXCLUSIVE ((pmd_t *) 0x02UL)
+
+static inline void pmd_cow_pte_mkexclusive(pmd_t *pmd)
+{
+ set_cow_pte_owner(pmd, COW_PTE_OWNER_EXCLUSIVE);
+}
+
+static inline bool pmd_cow_pte_exclusive(pmd_t *pmd)
+{
+ return cow_pte_owner_is_same(pmd, COW_PTE_OWNER_EXCLUSIVE);
+}
+
+static inline void pmd_cow_pte_clear_mkexclusive(pmd_t *pmd)
+{
+ set_cow_pte_owner(pmd, NULL);
+}
+
#ifndef pte_access_permitted
#define pte_access_permitted(pte, write) \
(pte_present(pte) && (!(write) || pte_write(pte)))
diff --git a/mm/gup.c b/mm/gup.c
index 5abdaf4874605..4949c8d42a400 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -634,6 +634,11 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
mark_page_accessed(page);
}
out:
+ /*
+ * We don't share the PTE when any other pinned page exists. And
+ * let the exclusive flag stick around until the table is freed.
+ */
+ pmd_cow_pte_mkexclusive(pmd);
pte_unmap_unlock(ptep, ptl);
return page;
no_page:
@@ -932,6 +937,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
pte = pte_offset_map(pmd, address);
if (pte_none(*pte))
goto unmap;
+ pmd_cow_pte_clear_mkexclusive(pmd);
*vma = get_gate_vma(mm);
if (!page)
goto out;
@@ -2764,8 +2770,11 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
PMD_SHIFT, next, flags, pages, nr))
return 0;
- } else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
- return 0;
+ } else {
+ if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
+ return 0;
+ pmd_cow_pte_mkexclusive(&pmd);
+ }
} while (pmdp++, addr = next, addr != end);

return 1;
--
2.37.3