Commit-ID: f9848081ba2cf98628df165cc2784c32fe9fc9e2
Gitweb: http://git.kernel.org/tip/f9848081ba2cf98628df165cc2784c32fe9fc9e2
Author: Peter Zijlstra <[email protected]>
AuthorDate: Wed, 18 Jul 2012 22:06:47 +0200
Committer: Ingo Molnar <[email protected]>
CommitDate: Sun, 28 Oct 2012 17:31:14 +0100
sched, numa, mm/mpol: Add_MPOL_F_HOME
Add MPOL_F_HOME, to implement multi-stage home node binding.
Suggested-by: Andrea Arcangeli <[email protected]>
Suggested-by: Rik van Riel <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Paul Turner <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andrew Morton <[email protected]>
Link: http://lkml.kernel.org/n/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
---
include/uapi/linux/mempolicy.h | 1 +
mm/mempolicy.c | 34 +++++++++++++++++++++++++++++++++-
2 files changed, 34 insertions(+), 1 deletions(-)
diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h
index 6a1baae..a2bcd04 100644
--- a/include/uapi/linux/mempolicy.h
+++ b/include/uapi/linux/mempolicy.h
@@ -69,6 +69,7 @@ enum mpol_rebind_step {
#define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */
#define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
+#define MPOL_F_HOME (1 << 4) /* this is the home-node policy */
#endif /* _UAPI_LINUX_MEMPOLICY_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 97e6d99..1ac0479 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2216,6 +2216,7 @@ static void sp_free(struct sp_node *n)
* @page - page to be checked
* @vma - vm area where page mapped
* @addr - virtual address where page mapped
+ * @multi - use multi-stage node binding
*
* Lookup current policy node id for vma,addr and "compare to" page's
* node id.
@@ -2278,6 +2279,37 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
default:
BUG();
}
+
+ /*
+ * Multi-stage node selection is used in conjunction with a periodic
+ * migration fault to build a temporal task<->page relation. By
+ * using a two-stage filter we remove short/unlikely relations.
+ *
+ * Using P(p) ~ n_p / n_t as per frequentist probability, we can
+ * equate a task's usage of a particular page (n_p) per total usage
+ * of this page (n_t) (in a given time-span) to a probability.
+ *
+ * Our periodic faults will then sample this probability and getting
+ * the same result twice in a row, given these samples are fully
+ * independent, is then given by P(n)^2, provided our sample period
+ * is sufficiently short compared to the usage pattern.
+ *
+ * This quadric squishes small probabilities, making it less likely
+ * we act on an unlikely task<->page relation.
+ */
+ if (pol->flags & MPOL_F_HOME) {
+ int last_nid;
+
+ /*
+ * Migrate towards the current node, depends on
+ * task_numa_placement() details.
+ */
+ polnid = numa_node_id();
+ last_nid = page_xchg_last_nid(page, polnid);
+ if (last_nid != polnid)
+ goto out;
+ }
+
if (curnid != polnid)
ret = polnid;
out:
@@ -2470,7 +2502,7 @@ void __init numa_policy_init(void)
preferred_node_policy[nid] = (struct mempolicy) {
.refcnt = ATOMIC_INIT(1),
.mode = MPOL_PREFERRED,
- .flags = MPOL_F_MOF,
+ .flags = MPOL_F_MOF | MPOL_F_HOME,
.v = { .preferred_node = nid, },
};
}