The maple tree marks nodes dead as soon as they are going to be
replaced. This could be problematic when used in the RCU context since
the writer may be starved of CPU time by the readers. This patch set
addresses the issue by switching the data replacement strategy to one
that will only mark data as dead once the new data is available.
This series changes the ordering of the node replacement so that the new
data is live before the old data is marked 'dead'. When readers hit
'dead' nodes, they will restart from the top of the tree and end up in
the new data.
In more complex scenarios, the replacement strategy means a subtree is
built and graphed into the tree leaving some nodes to point to the old
parent. The view of tasks into the old data will either remain with the
old data, or see the new data once the old data is marked 'dead'.
Iterators will see the 'dead' node and restart on their own and switch
to the new data. There is no risk of the reader seeing old data in
these cases.
The 'dead' subtree of data is then fully marked dead, but reused nodes
will still point to the dead nodes until the parent pointer is updated.
Walking up to a 'dead' node will cause a re-walk from the top of the
tree and enter the new data area where old data is not reachable.
Once the parent pointers are fully up to date in the active data, the
'dead' subtree is iterated to collect entirely 'dead' subtrees, and dead
nodes (nodes that partially contained reused data).
Liam R. Howlett (6):
maple_tree: Add hex output to maple_arange64 dump
maple_tree: Reorder replacement of nodes to avoid live lock
maple_tree: introduce mas_put_in_tree()
maple_tree: Introduce mas_tree_parent() definition
maple_tree: Change mas_adopt_children() parent usage
maple_tree: Replace data before marking dead in split and spanning
store
lib/maple_tree.c | 607 +++++++++++++++++++----------------------------
1 file changed, 240 insertions(+), 367 deletions(-)
--
2.39.2
mas_replace() has a single user that takes a flag which is now always
true. Replace this function with mas_put_in_tree() to better align with
mas_replace_node(). Inline the remaining logic into the only caller;
mas_wmb_replace().
Signed-off-by: Liam R. Howlett <[email protected]>
---
lib/maple_tree.c | 73 ++++++++++++++++++------------------------------
1 file changed, 27 insertions(+), 46 deletions(-)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 0d4573a8d134..c01b1be1480c 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -1715,45 +1715,32 @@ static inline void mas_adopt_children(struct ma_state *mas,
}
/*
- * mas_replace() - Replace a maple node in the tree with mas->node. Uses the
- * parent encoding to locate the maple node in the tree.
- * @mas - the ma_state to use for operations.
- * @advanced - boolean to adopt the child nodes and free the old node (false) or
- * leave the node (true) and handle the adoption and free elsewhere.
+ * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old
+ * node as dead.
+ * @mas - the maple state with the new node
+ * @old_enode - The old maple encoded node to replace.
*/
-static inline void mas_replace(struct ma_state *mas, bool advanced)
+static inline void mas_put_in_tree(struct ma_state *mas,
+ struct maple_enode *old_enode)
__must_hold(mas->tree->ma_lock)
{
- struct maple_node *mn = mas_mn(mas);
- struct maple_enode *old_enode;
- unsigned char offset = 0;
- void __rcu **slots = NULL;
-
- if (ma_is_root(mn)) {
- old_enode = mas_root_locked(mas);
- } else {
- offset = mte_parent_slot(mas->node);
- slots = ma_slots(mte_parent(mas->node),
- mas_parent_type(mas, mas->node));
- old_enode = mas_slot_locked(mas, slots, offset);
- }
-
- if (!advanced && !mte_is_leaf(mas->node))
- mas_adopt_children(mas, mas->node);
+ unsigned char offset;
+ void __rcu **slots;
if (mte_is_root(mas->node)) {
- mn->parent = ma_parent_ptr(
+ mas_mn(mas)->parent = ma_parent_ptr(
((unsigned long)mas->tree | MA_ROOT_PARENT));
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
mas_set_height(mas);
} else {
+
+ offset = mte_parent_slot(mas->node);
+ slots = ma_slots(mte_parent(mas->node),
+ mas_parent_type(mas, mas->node));
rcu_assign_pointer(slots[offset], mas->node);
}
- if (!advanced) {
- mte_set_node_dead(old_enode);
- mas_free(mas, old_enode);
- }
+ mte_set_node_dead(old_enode);
}
/*
@@ -1767,22 +1754,7 @@ static inline void mas_replace_node(struct ma_state *mas,
struct maple_enode *old_enode)
__must_hold(mas->tree->ma_lock)
{
- if (mte_is_root(mas->node)) {
- mas_mn(mas)->parent = ma_parent_ptr(
- ((unsigned long)mas->tree | MA_ROOT_PARENT));
- rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
- mas_set_height(mas);
- } else {
- unsigned char offset = 0;
- void __rcu **slots = NULL;
-
- offset = mte_parent_slot(mas->node);
- slots = ma_slots(mte_parent(mas->node),
- mas_parent_type(mas, mas->node));
- rcu_assign_pointer(slots[offset], mas->node);
- }
-
- mte_set_node_dead(old_enode);
+ mas_put_in_tree(mas, old_enode);
mas_free(mas, old_enode);
}
@@ -2789,11 +2761,20 @@ static inline void mas_wmb_replace(struct ma_state *mas,
struct ma_topiary *free,
struct ma_topiary *destroy)
{
- /* All nodes must see old data as dead prior to replacing that data */
- smp_wmb(); /* Needed for RCU */
+ struct maple_enode *old_enode;
+
+ if (mte_is_root(mas->node)) {
+ old_enode = mas_root_locked(mas);
+ } else {
+ unsigned char offset = mte_parent_slot(mas->node);
+ void __rcu **slots = ma_slots(mte_parent(mas->node),
+ mas_parent_type(mas, mas->node));
+
+ old_enode = mas_slot_locked(mas, slots, offset);
+ }
/* Insert the new data in the tree */
- mas_replace(mas, true);
+ mas_put_in_tree(mas, old_enode);
if (!mte_is_leaf(mas->node))
mas_descend_adopt(mas);
--
2.39.2