2022-12-20 07:45:25

by Chih-En Lin

[permalink] [raw]
Subject: [PATCH v3 11/14] mm/migrate_device: Support COW PTE

Break COW PTE before collecting the pages in COW-ed PTE.

Signed-off-by: Chih-En Lin <[email protected]>
---
mm/migrate_device.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 721b2365dbca9..f6d67bd9629f5 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -106,6 +106,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
}
}

+ if (!break_cow_pte_range(vma, pmdp, start, end))
+ return migrate_vma_collect_skip(start, end, walk);
if (unlikely(pmd_bad(*pmdp)))
return migrate_vma_collect_skip(start, end, walk);

--
2.37.3


2022-12-20 12:11:12

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v3 11/14] mm/migrate_device: Support COW PTE

Hi Chih-En,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v6.1-rc7]
[cannot apply to akpm-mm/mm-everything tip/perf/core acme/perf/core linus/master v6.1 v6.1-rc8 next-20221220]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Chih-En-Lin/Introduce-Copy-On-Write-to-Page-Table/20221220-153207
patch link: https://lore.kernel.org/r/20221220072743.3039060-12-shiyn.lin%40gmail.com
patch subject: [PATCH v3 11/14] mm/migrate_device: Support COW PTE
config: x86_64-rhel-8.3-kselftests
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/77cd28466a15d4d3fd3d6f23044a9196d543dba2
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Chih-En-Lin/Introduce-Copy-On-Write-to-Page-Table/20221220-153207
git checkout 77cd28466a15d4d3fd3d6f23044a9196d543dba2
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 olddefconfig
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All error/warnings (new ones prefixed by >>):

mm/migrate_device.c: In function 'migrate_vma_collect_pmd':
>> mm/migrate_device.c:109:39: warning: passing argument 2 of 'break_cow_pte_range' makes integer from pointer without a cast [-Wint-conversion]
109 | if (!break_cow_pte_range(vma, pmdp, start, end))
| ^~~~
| |
| pmd_t *
In file included from include/linux/migrate.h:5,
from mm/migrate_device.c:9:
include/linux/mm.h:1898:67: note: expected 'long unsigned int' but argument is of type 'pmd_t *'
1898 | int break_cow_pte_range(struct vm_area_struct *vma, unsigned long start,
| ~~~~~~~~~~~~~~^~~~~
>> mm/migrate_device.c:109:14: error: too many arguments to function 'break_cow_pte_range'
109 | if (!break_cow_pte_range(vma, pmdp, start, end))
| ^~~~~~~~~~~~~~~~~~~
In file included from include/linux/migrate.h:5,
from mm/migrate_device.c:9:
include/linux/mm.h:1898:5: note: declared here
1898 | int break_cow_pte_range(struct vm_area_struct *vma, unsigned long start,
| ^~~~~~~~~~~~~~~~~~~


vim +/break_cow_pte_range +109 mm/migrate_device.c

> 9 #include <linux/migrate.h>
10 #include <linux/mm.h>
11 #include <linux/mm_inline.h>
12 #include <linux/mmu_notifier.h>
13 #include <linux/oom.h>
14 #include <linux/pagewalk.h>
15 #include <linux/rmap.h>
16 #include <linux/swapops.h>
17 #include <asm/tlbflush.h>
18 #include "internal.h"
19
20 static int migrate_vma_collect_skip(unsigned long start,
21 unsigned long end,
22 struct mm_walk *walk)
23 {
24 struct migrate_vma *migrate = walk->private;
25 unsigned long addr;
26
27 for (addr = start; addr < end; addr += PAGE_SIZE) {
28 migrate->dst[migrate->npages] = 0;
29 migrate->src[migrate->npages++] = 0;
30 }
31
32 return 0;
33 }
34
35 static int migrate_vma_collect_hole(unsigned long start,
36 unsigned long end,
37 __always_unused int depth,
38 struct mm_walk *walk)
39 {
40 struct migrate_vma *migrate = walk->private;
41 unsigned long addr;
42
43 /* Only allow populating anonymous memory. */
44 if (!vma_is_anonymous(walk->vma))
45 return migrate_vma_collect_skip(start, end, walk);
46
47 for (addr = start; addr < end; addr += PAGE_SIZE) {
48 migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
49 migrate->dst[migrate->npages] = 0;
50 migrate->npages++;
51 migrate->cpages++;
52 }
53
54 return 0;
55 }
56
57 static int migrate_vma_collect_pmd(pmd_t *pmdp,
58 unsigned long start,
59 unsigned long end,
60 struct mm_walk *walk)
61 {
62 struct migrate_vma *migrate = walk->private;
63 struct vm_area_struct *vma = walk->vma;
64 struct mm_struct *mm = vma->vm_mm;
65 unsigned long addr = start, unmapped = 0;
66 spinlock_t *ptl;
67 pte_t *ptep;
68
69 again:
70 if (pmd_none(*pmdp))
71 return migrate_vma_collect_hole(start, end, -1, walk);
72
73 if (pmd_trans_huge(*pmdp)) {
74 struct page *page;
75
76 ptl = pmd_lock(mm, pmdp);
77 if (unlikely(!pmd_trans_huge(*pmdp))) {
78 spin_unlock(ptl);
79 goto again;
80 }
81
82 page = pmd_page(*pmdp);
83 if (is_huge_zero_page(page)) {
84 spin_unlock(ptl);
85 split_huge_pmd(vma, pmdp, addr);
86 if (pmd_trans_unstable(pmdp))
87 return migrate_vma_collect_skip(start, end,
88 walk);
89 } else {
90 int ret;
91
92 get_page(page);
93 spin_unlock(ptl);
94 if (unlikely(!trylock_page(page)))
95 return migrate_vma_collect_skip(start, end,
96 walk);
97 ret = split_huge_page(page);
98 unlock_page(page);
99 put_page(page);
100 if (ret)
101 return migrate_vma_collect_skip(start, end,
102 walk);
103 if (pmd_none(*pmdp))
104 return migrate_vma_collect_hole(start, end, -1,
105 walk);
106 }
107 }
108
> 109 if (!break_cow_pte_range(vma, pmdp, start, end))
110 return migrate_vma_collect_skip(start, end, walk);
111 if (unlikely(pmd_bad(*pmdp)))
112 return migrate_vma_collect_skip(start, end, walk);
113
114 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
115 arch_enter_lazy_mmu_mode();
116
117 for (; addr < end; addr += PAGE_SIZE, ptep++) {
118 unsigned long mpfn = 0, pfn;
119 struct page *page;
120 swp_entry_t entry;
121 pte_t pte;
122
123 pte = *ptep;
124
125 if (pte_none(pte)) {
126 if (vma_is_anonymous(vma)) {
127 mpfn = MIGRATE_PFN_MIGRATE;
128 migrate->cpages++;
129 }
130 goto next;
131 }
132
133 if (!pte_present(pte)) {
134 /*
135 * Only care about unaddressable device page special
136 * page table entry. Other special swap entries are not
137 * migratable, and we ignore regular swapped page.
138 */
139 entry = pte_to_swp_entry(pte);
140 if (!is_device_private_entry(entry))
141 goto next;
142
143 page = pfn_swap_entry_to_page(entry);
144 if (!(migrate->flags &
145 MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
146 page->pgmap->owner != migrate->pgmap_owner)
147 goto next;
148
149 mpfn = migrate_pfn(page_to_pfn(page)) |
150 MIGRATE_PFN_MIGRATE;
151 if (is_writable_device_private_entry(entry))
152 mpfn |= MIGRATE_PFN_WRITE;
153 } else {
154 pfn = pte_pfn(pte);
155 if (is_zero_pfn(pfn) &&
156 (migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) {
157 mpfn = MIGRATE_PFN_MIGRATE;
158 migrate->cpages++;
159 goto next;
160 }
161 page = vm_normal_page(migrate->vma, addr, pte);
162 if (page && !is_zone_device_page(page) &&
163 !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
164 goto next;
165 else if (page && is_device_coherent_page(page) &&
166 (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) ||
167 page->pgmap->owner != migrate->pgmap_owner))
168 goto next;
169 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
170 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
171 }
172
173 /* FIXME support THP */
174 if (!page || !page->mapping || PageTransCompound(page)) {
175 mpfn = 0;
176 goto next;
177 }
178
179 /*
180 * By getting a reference on the page we pin it and that blocks
181 * any kind of migration. Side effect is that it "freezes" the
182 * pte.
183 *
184 * We drop this reference after isolating the page from the lru
185 * for non device page (device page are not on the lru and thus
186 * can't be dropped from it).
187 */
188 get_page(page);
189
190 /*
191 * We rely on trylock_page() to avoid deadlock between
192 * concurrent migrations where each is waiting on the others
193 * page lock. If we can't immediately lock the page we fail this
194 * migration as it is only best effort anyway.
195 *
196 * If we can lock the page it's safe to set up a migration entry
197 * now. In the common case where the page is mapped once in a
198 * single process setting up the migration entry now is an
199 * optimisation to avoid walking the rmap later with
200 * try_to_migrate().
201 */
202 if (trylock_page(page)) {
203 bool anon_exclusive;
204 pte_t swp_pte;
205
206 flush_cache_page(vma, addr, pte_pfn(*ptep));
207 anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
208 if (anon_exclusive) {
209 pte = ptep_clear_flush(vma, addr, ptep);
210
211 if (page_try_share_anon_rmap(page)) {
212 set_pte_at(mm, addr, ptep, pte);
213 unlock_page(page);
214 put_page(page);
215 mpfn = 0;
216 goto next;
217 }
218 } else {
219 pte = ptep_get_and_clear(mm, addr, ptep);
220 }
221
222 migrate->cpages++;
223
224 /* Set the dirty flag on the folio now the pte is gone. */
225 if (pte_dirty(pte))
226 folio_mark_dirty(page_folio(page));
227
228 /* Setup special migration page table entry */
229 if (mpfn & MIGRATE_PFN_WRITE)
230 entry = make_writable_migration_entry(
231 page_to_pfn(page));
232 else if (anon_exclusive)
233 entry = make_readable_exclusive_migration_entry(
234 page_to_pfn(page));
235 else
236 entry = make_readable_migration_entry(
237 page_to_pfn(page));
238 if (pte_present(pte)) {
239 if (pte_young(pte))
240 entry = make_migration_entry_young(entry);
241 if (pte_dirty(pte))
242 entry = make_migration_entry_dirty(entry);
243 }
244 swp_pte = swp_entry_to_pte(entry);
245 if (pte_present(pte)) {
246 if (pte_soft_dirty(pte))
247 swp_pte = pte_swp_mksoft_dirty(swp_pte);
248 if (pte_uffd_wp(pte))
249 swp_pte = pte_swp_mkuffd_wp(swp_pte);
250 } else {
251 if (pte_swp_soft_dirty(pte))
252 swp_pte = pte_swp_mksoft_dirty(swp_pte);
253 if (pte_swp_uffd_wp(pte))
254 swp_pte = pte_swp_mkuffd_wp(swp_pte);
255 }
256 set_pte_at(mm, addr, ptep, swp_pte);
257
258 /*
259 * This is like regular unmap: we remove the rmap and
260 * drop page refcount. Page won't be freed, as we took
261 * a reference just above.
262 */
263 page_remove_rmap(page, vma, false);
264 put_page(page);
265
266 if (pte_present(pte))
267 unmapped++;
268 } else {
269 put_page(page);
270 mpfn = 0;
271 }
272
273 next:
274 migrate->dst[migrate->npages] = 0;
275 migrate->src[migrate->npages++] = mpfn;
276 }
277
278 /* Only flush the TLB if we actually modified any entries */
279 if (unmapped)
280 flush_tlb_range(walk->vma, start, end);
281
282 arch_leave_lazy_mmu_mode();
283 pte_unmap_unlock(ptep - 1, ptl);
284
285 return 0;
286 }
287

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (12.21 kB)
config (173.72 kB)
Download all attachments

2022-12-20 14:49:15

by kernel test robot

[permalink] [raw]
Subject: Re: [PATCH v3 11/14] mm/migrate_device: Support COW PTE

Hi Chih-En,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v6.1-rc7]
[cannot apply to akpm-mm/mm-everything tip/perf/core acme/perf/core linus/master v6.1 v6.1-rc8 next-20221220]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url: https://github.com/intel-lab-lkp/linux/commits/Chih-En-Lin/Introduce-Copy-On-Write-to-Page-Table/20221220-153207
patch link: https://lore.kernel.org/r/20221220072743.3039060-12-shiyn.lin%40gmail.com
patch subject: [PATCH v3 11/14] mm/migrate_device: Support COW PTE
config: x86_64-rhel-8.3-rust
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/77cd28466a15d4d3fd3d6f23044a9196d543dba2
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Chih-En-Lin/Introduce-Copy-On-Write-to-Page-Table/20221220-153207
git checkout 77cd28466a15d4d3fd3d6f23044a9196d543dba2
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <[email protected]>

All errors (new ones prefixed by >>):

>> mm/migrate_device.c:109:45: error: too many arguments to function call, expected 3, have 4
if (!break_cow_pte_range(vma, pmdp, start, end))
~~~~~~~~~~~~~~~~~~~ ^~~
include/linux/mm.h:1898:5: note: 'break_cow_pte_range' declared here
int break_cow_pte_range(struct vm_area_struct *vma, unsigned long start,
^
1 error generated.


vim +109 mm/migrate_device.c

56
57 static int migrate_vma_collect_pmd(pmd_t *pmdp,
58 unsigned long start,
59 unsigned long end,
60 struct mm_walk *walk)
61 {
62 struct migrate_vma *migrate = walk->private;
63 struct vm_area_struct *vma = walk->vma;
64 struct mm_struct *mm = vma->vm_mm;
65 unsigned long addr = start, unmapped = 0;
66 spinlock_t *ptl;
67 pte_t *ptep;
68
69 again:
70 if (pmd_none(*pmdp))
71 return migrate_vma_collect_hole(start, end, -1, walk);
72
73 if (pmd_trans_huge(*pmdp)) {
74 struct page *page;
75
76 ptl = pmd_lock(mm, pmdp);
77 if (unlikely(!pmd_trans_huge(*pmdp))) {
78 spin_unlock(ptl);
79 goto again;
80 }
81
82 page = pmd_page(*pmdp);
83 if (is_huge_zero_page(page)) {
84 spin_unlock(ptl);
85 split_huge_pmd(vma, pmdp, addr);
86 if (pmd_trans_unstable(pmdp))
87 return migrate_vma_collect_skip(start, end,
88 walk);
89 } else {
90 int ret;
91
92 get_page(page);
93 spin_unlock(ptl);
94 if (unlikely(!trylock_page(page)))
95 return migrate_vma_collect_skip(start, end,
96 walk);
97 ret = split_huge_page(page);
98 unlock_page(page);
99 put_page(page);
100 if (ret)
101 return migrate_vma_collect_skip(start, end,
102 walk);
103 if (pmd_none(*pmdp))
104 return migrate_vma_collect_hole(start, end, -1,
105 walk);
106 }
107 }
108
> 109 if (!break_cow_pte_range(vma, pmdp, start, end))
110 return migrate_vma_collect_skip(start, end, walk);
111 if (unlikely(pmd_bad(*pmdp)))
112 return migrate_vma_collect_skip(start, end, walk);
113
114 ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
115 arch_enter_lazy_mmu_mode();
116
117 for (; addr < end; addr += PAGE_SIZE, ptep++) {
118 unsigned long mpfn = 0, pfn;
119 struct page *page;
120 swp_entry_t entry;
121 pte_t pte;
122
123 pte = *ptep;
124
125 if (pte_none(pte)) {
126 if (vma_is_anonymous(vma)) {
127 mpfn = MIGRATE_PFN_MIGRATE;
128 migrate->cpages++;
129 }
130 goto next;
131 }
132
133 if (!pte_present(pte)) {
134 /*
135 * Only care about unaddressable device page special
136 * page table entry. Other special swap entries are not
137 * migratable, and we ignore regular swapped page.
138 */
139 entry = pte_to_swp_entry(pte);
140 if (!is_device_private_entry(entry))
141 goto next;
142
143 page = pfn_swap_entry_to_page(entry);
144 if (!(migrate->flags &
145 MIGRATE_VMA_SELECT_DEVICE_PRIVATE) ||
146 page->pgmap->owner != migrate->pgmap_owner)
147 goto next;
148
149 mpfn = migrate_pfn(page_to_pfn(page)) |
150 MIGRATE_PFN_MIGRATE;
151 if (is_writable_device_private_entry(entry))
152 mpfn |= MIGRATE_PFN_WRITE;
153 } else {
154 pfn = pte_pfn(pte);
155 if (is_zero_pfn(pfn) &&
156 (migrate->flags & MIGRATE_VMA_SELECT_SYSTEM)) {
157 mpfn = MIGRATE_PFN_MIGRATE;
158 migrate->cpages++;
159 goto next;
160 }
161 page = vm_normal_page(migrate->vma, addr, pte);
162 if (page && !is_zone_device_page(page) &&
163 !(migrate->flags & MIGRATE_VMA_SELECT_SYSTEM))
164 goto next;
165 else if (page && is_device_coherent_page(page) &&
166 (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_COHERENT) ||
167 page->pgmap->owner != migrate->pgmap_owner))
168 goto next;
169 mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
170 mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
171 }
172
173 /* FIXME support THP */
174 if (!page || !page->mapping || PageTransCompound(page)) {
175 mpfn = 0;
176 goto next;
177 }
178
179 /*
180 * By getting a reference on the page we pin it and that blocks
181 * any kind of migration. Side effect is that it "freezes" the
182 * pte.
183 *
184 * We drop this reference after isolating the page from the lru
185 * for non device page (device page are not on the lru and thus
186 * can't be dropped from it).
187 */
188 get_page(page);
189
190 /*
191 * We rely on trylock_page() to avoid deadlock between
192 * concurrent migrations where each is waiting on the others
193 * page lock. If we can't immediately lock the page we fail this
194 * migration as it is only best effort anyway.
195 *
196 * If we can lock the page it's safe to set up a migration entry
197 * now. In the common case where the page is mapped once in a
198 * single process setting up the migration entry now is an
199 * optimisation to avoid walking the rmap later with
200 * try_to_migrate().
201 */
202 if (trylock_page(page)) {
203 bool anon_exclusive;
204 pte_t swp_pte;
205
206 flush_cache_page(vma, addr, pte_pfn(*ptep));
207 anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
208 if (anon_exclusive) {
209 pte = ptep_clear_flush(vma, addr, ptep);
210
211 if (page_try_share_anon_rmap(page)) {
212 set_pte_at(mm, addr, ptep, pte);
213 unlock_page(page);
214 put_page(page);
215 mpfn = 0;
216 goto next;
217 }
218 } else {
219 pte = ptep_get_and_clear(mm, addr, ptep);
220 }
221
222 migrate->cpages++;
223
224 /* Set the dirty flag on the folio now the pte is gone. */
225 if (pte_dirty(pte))
226 folio_mark_dirty(page_folio(page));
227
228 /* Setup special migration page table entry */
229 if (mpfn & MIGRATE_PFN_WRITE)
230 entry = make_writable_migration_entry(
231 page_to_pfn(page));
232 else if (anon_exclusive)
233 entry = make_readable_exclusive_migration_entry(
234 page_to_pfn(page));
235 else
236 entry = make_readable_migration_entry(
237 page_to_pfn(page));
238 if (pte_present(pte)) {
239 if (pte_young(pte))
240 entry = make_migration_entry_young(entry);
241 if (pte_dirty(pte))
242 entry = make_migration_entry_dirty(entry);
243 }
244 swp_pte = swp_entry_to_pte(entry);
245 if (pte_present(pte)) {
246 if (pte_soft_dirty(pte))
247 swp_pte = pte_swp_mksoft_dirty(swp_pte);
248 if (pte_uffd_wp(pte))
249 swp_pte = pte_swp_mkuffd_wp(swp_pte);
250 } else {
251 if (pte_swp_soft_dirty(pte))
252 swp_pte = pte_swp_mksoft_dirty(swp_pte);
253 if (pte_swp_uffd_wp(pte))
254 swp_pte = pte_swp_mkuffd_wp(swp_pte);
255 }
256 set_pte_at(mm, addr, ptep, swp_pte);
257
258 /*
259 * This is like regular unmap: we remove the rmap and
260 * drop page refcount. Page won't be freed, as we took
261 * a reference just above.
262 */
263 page_remove_rmap(page, vma, false);
264 put_page(page);
265
266 if (pte_present(pte))
267 unmapped++;
268 } else {
269 put_page(page);
270 mpfn = 0;
271 }
272
273 next:
274 migrate->dst[migrate->npages] = 0;
275 migrate->src[migrate->npages++] = mpfn;
276 }
277
278 /* Only flush the TLB if we actually modified any entries */
279 if (unmapped)
280 flush_tlb_range(walk->vma, start, end);
281
282 arch_leave_lazy_mmu_mode();
283 pte_unmap_unlock(ptep - 1, ptl);
284
285 return 0;
286 }
287

--
0-DAY CI Kernel Test Service
https://01.org/lkp


Attachments:
(No filename) (10.12 kB)
config (169.41 kB)
Download all attachments