2020-07-23 11:52:19

by KP Singh

[permalink] [raw]
Subject: [PATCH bpf-next v6 3/7] bpf: Generalize bpf_sk_storage

From: KP Singh <[email protected]>

Refactor the functionality in bpf_sk_storage.c so that concept of
storage linked to kernel objects can be extended to other objects like
inode, task_struct etc.

Each new local storage will still be a separate map and provide its own
set of helpers. This allows for future object specific extensions and
still share a lot of the underlying implementation.

Signed-off-by: KP Singh <[email protected]>
---
include/linux/bpf.h | 13 ++
include/net/bpf_sk_storage.h | 55 +++++
include/uapi/linux/bpf.h | 8 +-
net/core/bpf_sk_storage.c | 387 ++++++++++++++++++++-------------
tools/include/uapi/linux/bpf.h | 8 +-
5 files changed, 320 insertions(+), 151 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 72221aea1c60..e20fd4ac8dc7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -33,6 +33,9 @@ struct btf;
struct btf_type;
struct exception_table_entry;
struct seq_operations;
+struct bpf_local_storage;
+struct bpf_local_storage_map;
+struct bpf_local_storage_elem;

extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -93,6 +96,16 @@ struct bpf_map_ops {
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);

+ /* Functions called by bpf_local_storage maps */
+ bool (*map_local_storage_unlink)(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_omem);
+ struct bpf_local_storage_elem *(*map_selem_alloc)(
+ struct bpf_local_storage_map *smap, void *owner, void *value,
+ bool charge_omem);
+ struct bpf_local_storage_data *(*map_local_storage_update)(
+ void *owner, struct bpf_map *map, void *value, u64 flags);
+
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 950c5aaba15e..e3185cfb91da 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,8 +3,15 @@
#ifndef _BPF_SK_STORAGE_H
#define _BPF_SK_STORAGE_H

+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
#include <linux/types.h>
#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>

struct sock;

@@ -34,6 +41,54 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
u16 idx);

+/* Helper functions for bpf_local_storage */
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ bool cacheit_lockit);
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+ const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type);
+
+void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem);
+
+bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_omem);
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem);
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *value);
+
+struct bpf_local_storage *
+bpf_local_storage_alloc(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_publish(struct bpf_local_storage_elem *first_selem,
+ struct bpf_local_storage **addr,
+ struct bpf_local_storage *curr);
+
+int bpf_local_storage_check_update_flags(struct bpf_map *map, u64 map_flags);
+
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_map *map,
+ struct bpf_local_storage *local_storage, void *value,
+ u64 map_flags);
+
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
struct bpf_sk_storage_diag *
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 54d0c886e3ba..b9d2e4792d08 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3630,9 +3630,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};

-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<local>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};

/* BPF_FUNC_read_branch_records flags. */
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index aa3e3a47acb5..f6bb02f076ad 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -83,7 +83,7 @@ struct bpf_local_storage_elem {
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_local_storage_elem */
- struct sock *owner; /* The object that owns the the above "list" of
+ void *owner; /* The object that owns the the above "list" of
* bpf_local_storage_elem.
*/
struct rcu_head rcu;
@@ -119,15 +119,11 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
return !hlist_unhashed(&selem->map_node);
}

-static struct bpf_local_storage_elem *
-bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
- void *value, bool charge_omem)
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *value)
{
struct bpf_local_storage_elem *selem;

- if (charge_omem && omem_charge(sk, smap->elem_size))
- return NULL;
-
selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if (selem) {
if (value)
@@ -135,6 +131,23 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
return selem;
}

+ return NULL;
+}
+
+static struct bpf_local_storage_elem *
+sk_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
+ bool charge_omem)
+{
+ struct bpf_local_storage_elem *selem;
+ struct sock *sk = owner;
+
+ if (charge_omem && omem_charge(sk, smap->elem_size))
+ return NULL;
+
+ selem = bpf_selem_alloc(smap, value);
+ if (selem)
+ return selem;
+
if (charge_omem)
atomic_sub(smap->elem_size, &sk->sk_omem_alloc);

@@ -145,51 +158,65 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
*/
-static bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem,
- bool uncharge_omem)
+bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_omem)
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
- struct sock *sk;

smap = rcu_dereference(SDATA(selem)->smap);
- sk = local_storage->owner;
+ /* local_storage is not freed now. local_storage->lock is
+ * still held and raw_spin_unlock_bh(&local_storage->lock)
+ * will be done by the caller.
+ * Although the unlock will be done under
+ * rcu_read_lock(), it is more intutivie to
+ * read if kfree_rcu(local_storage, rcu) is done
+ * after the raw_spin_unlock_bh(&local_storage->lock).
+ *
+ * Hence, a "bool free_local_storage" is returned
+ * to the caller which then calls the kfree_rcu()
+ * after unlock.
+ */
+ free_local_storage = smap->map.ops->map_local_storage_unlink(
+ local_storage, selem, uncharge_omem);
+ hlist_del_init_rcu(&selem->snode);
+ if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
+ SDATA(selem))
+ RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
+
+ kfree_rcu(selem, rcu);
+
+ return free_local_storage;
+}

+static bool unlink_sk_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_omem)
+{
+ struct bpf_local_storage_map *smap;
+ struct sock *sk = local_storage->owner;
+ bool free_local_storage;
+
+ smap = rcu_dereference(SDATA(selem)->smap);
/* All uncharging on sk->sk_omem_alloc must be done first.
* sk may be freed once the last selem is unlinked from local_storage.
*/
- if (uncharge_omem)
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);

free_local_storage = hlist_is_singular_node(&selem->snode,
&local_storage->list);
+
+ if (uncharge_omem)
+ atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+
if (free_local_storage) {
- atomic_sub(sizeof(struct bpf_local_storage), &sk->sk_omem_alloc);
- local_storage->owner = NULL;
+ atomic_sub(sizeof(struct bpf_local_storage),
+ &sk->sk_omem_alloc);
+
/* After this RCU_INIT, sk may be freed and cannot be used */
RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
-
- /* local_storage is not freed now. local_storage->lock is
- * still held and raw_spin_unlock_bh(&local_storage->lock)
- * will be done by the caller.
- *
- * Although the unlock will be done under
- * rcu_read_lock(), it is more intutivie to
- * read if kfree_rcu(local_storage, rcu) is done
- * after the raw_spin_unlock_bh(&local_storage->lock).
- *
- * Hence, a "bool free_local_storage" is returned
- * to the caller which then calls the kfree_rcu()
- * after unlock.
- */
+ local_storage->owner = NULL;
}
- hlist_del_init_rcu(&selem->snode);
- if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
- SDATA(selem))
- RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
-
- kfree_rcu(selem, rcu);

return free_local_storage;
}
@@ -214,14 +241,14 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
kfree_rcu(local_storage, rcu);
}

-static void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem)
+void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
{
RCU_INIT_POINTER(selem->local_storage, local_storage);
hlist_add_head(&selem->snode, &local_storage->list);
}

-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
@@ -238,8 +265,8 @@ static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
raw_spin_unlock_bh(&b->lock);
}

-static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);

@@ -249,7 +276,7 @@ static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
raw_spin_unlock_bh(&b->lock);
}

-static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
{
/* Always unlink from map before unlinking from local_storage
* because selem will be freed after successfully unlinked from
@@ -259,7 +286,7 @@ static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
__bpf_selem_unlink_storage(selem);
}

-static struct bpf_local_storage_data *
+struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit)
@@ -325,59 +352,53 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
return 0;
}

-static int sk_storage_alloc(struct sock *sk,
+struct bpf_local_storage *
+bpf_local_storage_alloc(struct bpf_local_storage_map *smap)
+{
+ struct bpf_local_storage *storage;
+
+ storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+ if (!storage)
+ return NULL;
+
+ INIT_HLIST_HEAD(&storage->list);
+ raw_spin_lock_init(&storage->lock);
+ return storage;
+}
+
+static int sk_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *first_selem)
{
- struct bpf_local_storage *prev_sk_storage, *sk_storage;
+ struct bpf_local_storage *curr;
+ struct sock *sk = owner;
int err;

- err = omem_charge(sk, sizeof(*sk_storage));
+ err = omem_charge(sk, sizeof(*curr));
if (err)
return err;

- sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
- if (!sk_storage) {
+ curr = bpf_local_storage_alloc(smap);
+ if (!curr) {
err = -ENOMEM;
goto uncharge;
}
- INIT_HLIST_HEAD(&sk_storage->list);
- raw_spin_lock_init(&sk_storage->lock);
- sk_storage->owner = sk;

- bpf_selem_link_storage(sk_storage, first_selem);
+ curr->owner = sk;
+
+ bpf_selem_link_storage(curr, first_selem);
bpf_selem_link_map(smap, first_selem);
- /* Publish sk_storage to sk. sk->sk_lock cannot be acquired.
- * Hence, atomic ops is used to set sk->sk_bpf_storage
- * from NULL to the newly allocated sk_storage ptr.
- *
- * From now on, the sk->sk_bpf_storage pointer is protected
- * by the sk_storage->lock. Hence, when freeing
- * the sk->sk_bpf_storage, the sk_storage->lock must
- * be held before setting sk->sk_bpf_storage to NULL.
- */
- prev_sk_storage = cmpxchg((struct bpf_local_storage **)&sk->sk_bpf_storage,
- NULL, sk_storage);
- if (unlikely(prev_sk_storage)) {
- bpf_selem_unlink_map(first_selem);
- err = -EAGAIN;
- goto uncharge;

- /* Note that even first_selem was linked to smap's
- * bucket->list, first_selem can be freed immediately
- * (instead of kfree_rcu) because
- * bpf_sk_storage_map_free() does a
- * synchronize_rcu() before walking the bucket->list.
- * Hence, no one is accessing selem from the
- * bucket->list under rcu_read_lock().
- */
- }
+ err = bpf_local_storage_publish(first_selem,
+ (struct bpf_local_storage **)&sk->sk_bpf_storage, curr);
+ if (err)
+ goto uncharge;

return 0;

uncharge:
- kfree(sk_storage);
- atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
+ kfree(curr);
+ atomic_sub(sizeof(*curr), &sk->sk_omem_alloc);
return err;
}

@@ -386,54 +407,28 @@ static int sk_storage_alloc(struct sock *sk,
* Otherwise, it will become a leak (and other memory issues
* during map destruction).
*/
-static struct bpf_local_storage_data *
-bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_map *map,
+ struct bpf_local_storage *local_storage, void *value,
u64 map_flags)
{
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem;
- struct bpf_local_storage *local_storage;
struct bpf_local_storage_map *smap;
int err;

- /* BPF_EXIST and BPF_NOEXIST cannot be both set */
- if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
- /* BPF_F_LOCK can only be used in a value with spin_lock */
- unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
- return ERR_PTR(-EINVAL);
-
smap = (struct bpf_local_storage_map *)map;
- local_storage = rcu_dereference(sk->sk_bpf_storage);
- if (!local_storage || hlist_empty(&local_storage->list)) {
- /* Very first elem for this object */
- err = check_flags(NULL, map_flags);
- if (err)
- return ERR_PTR(err);
-
- selem = bpf_selem_alloc(smap, sk, value, true);
- if (!selem)
- return ERR_PTR(-ENOMEM);
-
- err = sk_storage_alloc(sk, smap, selem);
- if (err) {
- kfree(selem);
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
- return ERR_PTR(err);
- }
-
- return SDATA(selem);
- }

if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
/* Hoping to find an old_sdata to do inline update
* such that it can avoid taking the local_storage->lock
* and changing the lists.
*/
- old_sdata =
- bpf_local_storage_lookup(local_storage, smap, false);
+ old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
err = check_flags(old_sdata, map_flags);
if (err)
return ERR_PTR(err);
+
if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
copy_map_value_locked(map, old_sdata->data,
value, false);
@@ -471,10 +466,9 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
* and then uncharge the old selem later (which may cause
* a potential but unnecessary charge failure), avoid taking
* a charge at all here (the "!old_sdata" check) and the
- * old_sdata will not be uncharged later during
- * bpf_selem_unlink_storage().
+ * old_sdata will not be uncharged later during bpf_selem_unlink().
*/
- selem = bpf_selem_alloc(smap, sk, value, !old_sdata);
+ selem = map->ops->map_selem_alloc(smap, owner, value, !old_sdata);
if (!selem) {
err = -ENOMEM;
goto unlock_err;
@@ -501,6 +495,87 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
return ERR_PTR(err);
}

+int bpf_local_storage_check_update_flags(struct bpf_map *map, u64 map_flags)
+{
+ /* BPF_EXIST and BPF_NOEXIST cannot be both set */
+ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
+ /* BPF_F_LOCK can only be used in a value with spin_lock */
+ unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Publish local_storage to the address. This is used because we are already
+ * in a region where we cannot grab a lock on the object owning the storage (
+ * (e.g sk->sk_lock). Hence, atomic ops is used.
+ *
+ * From now on, the addr pointer is protected
+ * by the local_storage->lock. Hence, upon freeing,
+ * the local_storage->lock must be held before unlinking the storage from the
+ * owner.
+ */
+int bpf_local_storage_publish(struct bpf_local_storage_elem *first_selem,
+ struct bpf_local_storage **addr,
+ struct bpf_local_storage *curr)
+{
+ struct bpf_local_storage *prev;
+
+ prev = cmpxchg(addr, NULL, curr);
+ if (unlikely(prev)) {
+ /* Note that even first_selem was linked to smap's
+ * bucket->list, first_selem can be freed immediately
+ * (instead of kfree_rcu) because
+ * bpf_local_storage_map_free() does a
+ * synchronize_rcu() before walking the bucket->list.
+ * Hence, no one is accessing selem from the
+ * bucket->list under rcu_read_lock().
+ */
+ bpf_selem_unlink_map(first_selem);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static struct bpf_local_storage_data *
+sk_storage_update(void *owner, struct bpf_map *map, void *value, u64 map_flags)
+{
+ struct bpf_local_storage_data *old_sdata = NULL;
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map *smap;
+ struct sock *sk;
+ int err;
+
+ err = bpf_local_storage_check_update_flags(map, map_flags);
+ if (err)
+ return ERR_PTR(err);
+
+ sk = owner;
+ local_storage = rcu_dereference(sk->sk_bpf_storage);
+ smap = (struct bpf_local_storage_map *)map;
+
+ if (!local_storage || hlist_empty(&local_storage->list)) {
+ /* Very first elem */
+ selem = map->ops->map_selem_alloc(smap, owner, value, !old_sdata);
+ if (!selem)
+ return ERR_PTR(-ENOMEM);
+
+ err = sk_storage_alloc(owner, smap, selem);
+ if (err) {
+ kfree(selem);
+ atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+ return ERR_PTR(err);
+ }
+
+ return SDATA(selem);
+ }
+
+ return bpf_local_storage_update(owner, map, local_storage, value,
+ map_flags);
+}
+
static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
{
struct bpf_local_storage_data *sdata;
@@ -566,7 +641,7 @@ void bpf_sk_storage_free(struct sock *sk)
* Thus, no elem can be added-to or deleted-from the
* sk_storage->list by the bpf_prog or by the bpf-map's syscall.
*
- * It is racing with bpf_sk_storage_map_free() alone
+ * It is racing with bpf_local_storage_map_free() alone
* when unlinking elem from the sk_storage->list and
* the map's bucket->list.
*/
@@ -586,17 +661,12 @@ void bpf_sk_storage_free(struct sock *sk)
kfree_rcu(sk_storage, rcu);
}

-static void bpf_local_storage_map_free(struct bpf_map *map)
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
{
struct bpf_local_storage_elem *selem;
- struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
unsigned int i;

- smap = (struct bpf_local_storage_map *)map;
-
- bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
-
/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
@@ -606,42 +676,51 @@ static void bpf_local_storage_map_free(struct bpf_map *map)

/* bpf prog and the userspace can no longer access this map
* now. No new selem (of this map) can be added
- * to the sk->sk_bpf_storage or to the map bucket's list.
+ * to the bpf_local_storage or to the map bucket's list.
*
* The elem of this map can be cleaned up here
- * or
- * by bpf_sk_storage_free() during __sk_destruct().
+ * or by bpf_local_storage_free() during the destruction of the
+ * owner object. eg. __sk_destruct.
*/
for (i = 0; i < (1U << smap->bucket_log); i++) {
b = &smap->buckets[i];

rcu_read_lock();
/* No one is adding to b->list now */
- while ((selem = hlist_entry_safe(
- rcu_dereference_raw(hlist_first_rcu(&b->list)),
- struct bpf_local_storage_elem, map_node))) {
+ while ((selem = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(&b->list)),
+ struct bpf_local_storage_elem,
+ map_node))) {
bpf_selem_unlink(selem);
cond_resched_rcu();
}
rcu_read_unlock();
}

- /* bpf_sk_storage_free() may still need to access the map.
- * e.g. bpf_sk_storage_free() has unlinked selem from the map
+ /* bpf_local_storage_free() may still need to access the map.
+ * e.g. bpf_local_storage_free() has unlinked selem from the map
* which then made the above while((selem = ...)) loop
* exited immediately.
*
- * However, the bpf_sk_storage_free() still needs to access
+ * However, the bpf_local_storage_free() still needs to access
* the smap->elem_size to do the uncharging in
- * bpf_selem_unlink_storage().
+ * bpf_selem_unlink().
*
* Hence, wait another rcu grace period for the
- * bpf_sk_storage_free() to finish.
+ * bpf_local_storage_free() to finish.
*/
synchronize_rcu();

kvfree(smap->buckets);
- kfree(map);
+ kfree(smap);
+}
+
+static void sk_storage_map_free(struct bpf_map *map)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = (struct bpf_local_storage_map *)map;
+ bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+ bpf_local_storage_map_free(smap);
}

/* U16_MAX is much more than enough for sk local storage
@@ -653,7 +732,7 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
sizeof(struct bpf_local_storage_elem)), \
(U16_MAX - sizeof(struct bpf_local_storage_elem)))

-static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
{
if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
@@ -672,7 +751,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
return 0;
}

-static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
{
struct bpf_local_storage_map *smap;
unsigned int i;
@@ -710,9 +789,21 @@ static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&smap->buckets[i].lock);
}

- smap->elem_size = sizeof(struct bpf_local_storage_elem) + attr->value_size;
- smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
+ smap->elem_size =
+ sizeof(struct bpf_local_storage_elem) + attr->value_size;
+
+ return smap;
+}

+static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = bpf_local_storage_map_alloc(attr);
+ if (IS_ERR(smap))
+ return ERR_CAST(smap);
+
+ smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
return &smap->map;
}

@@ -722,10 +813,10 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
return -ENOTSUPP;
}

-static int bpf_sk_storage_map_check_btf(const struct bpf_map *map,
- const struct btf *btf,
- const struct btf_type *key_type,
- const struct btf_type *value_type)
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+ const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type)
{
u32 int_data;

@@ -766,8 +857,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
fd = *(int *)key;
sock = sockfd_lookup(fd, &err);
if (sock) {
- sdata = bpf_local_storage_update(sock->sk, map, value,
- map_flags);
+ sdata = sk_storage_update(sock->sk, map, value, map_flags);
sockfd_put(sock);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -798,7 +888,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
{
struct bpf_local_storage_elem *copy_selem;

- copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
+ copy_selem = sk_selem_alloc(smap, newsk, NULL, true);
if (!copy_selem)
return NULL;

@@ -900,7 +990,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
* destruction).
*/
refcount_inc_not_zero(&sk->sk_refcnt)) {
- sdata = bpf_local_storage_update(sk, map, value, BPF_NOEXIST);
+ sdata = sk_storage_update(sk, map, value, BPF_NOEXIST);
/* sk must be a fullsock (guaranteed by verifier),
* so sock_gen_put() is unnecessary.
*/
@@ -927,16 +1017,19 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)

static int sk_storage_map_btf_id;
const struct bpf_map_ops sk_storage_map_ops = {
- .map_alloc_check = bpf_sk_storage_map_alloc_check,
- .map_alloc = bpf_local_storage_map_alloc,
- .map_free = bpf_local_storage_map_free,
+ .map_alloc_check = bpf_local_storage_map_alloc_check,
+ .map_alloc = sk_storage_map_alloc,
+ .map_free = sk_storage_map_free,
.map_get_next_key = notsupp_get_next_key,
.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
- .map_check_btf = bpf_sk_storage_map_check_btf,
+ .map_check_btf = bpf_local_storage_map_check_btf,
.map_btf_name = "bpf_local_storage_map",
.map_btf_id = &sk_storage_map_btf_id,
+ .map_selem_alloc = sk_selem_alloc,
+ .map_local_storage_update = sk_storage_update,
+ .map_local_storage_unlink = unlink_sk_storage,
};

const struct bpf_func_proto bpf_sk_storage_get_proto = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 54d0c886e3ba..b9d2e4792d08 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3630,9 +3630,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};

-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<local>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};

/* BPF_FUNC_read_branch_records flags. */
--
2.28.0.rc0.105.gf9edc3c819-goog


2020-07-25 01:14:54

by Martin KaFai Lau

[permalink] [raw]
Subject: Re: [PATCH bpf-next v6 3/7] bpf: Generalize bpf_sk_storage

On Thu, Jul 23, 2020 at 01:50:28PM +0200, KP Singh wrote:
> From: KP Singh <[email protected]>
>
> Refactor the functionality in bpf_sk_storage.c so that concept of
> storage linked to kernel objects can be extended to other objects like
> inode, task_struct etc.
>
> Each new local storage will still be a separate map and provide its own
> set of helpers. This allows for future object specific extensions and
> still share a lot of the underlying implementation.
>

[ ... ]

> @@ -386,54 +407,28 @@ static int sk_storage_alloc(struct sock *sk,
> * Otherwise, it will become a leak (and other memory issues
> * during map destruction).
> */
> -static struct bpf_local_storage_data *
> -bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
> +struct bpf_local_storage_data *
> +bpf_local_storage_update(void *owner, struct bpf_map *map,
> + struct bpf_local_storage *local_storage, void *value,
> u64 map_flags)
> {
> struct bpf_local_storage_data *old_sdata = NULL;
> struct bpf_local_storage_elem *selem;
> - struct bpf_local_storage *local_storage;
> struct bpf_local_storage_map *smap;
> int err;
>
> - /* BPF_EXIST and BPF_NOEXIST cannot be both set */
> - if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
> - /* BPF_F_LOCK can only be used in a value with spin_lock */
> - unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
> - return ERR_PTR(-EINVAL);
> -
> smap = (struct bpf_local_storage_map *)map;
> - local_storage = rcu_dereference(sk->sk_bpf_storage);
> - if (!local_storage || hlist_empty(&local_storage->list)) {
> - /* Very first elem for this object */
> - err = check_flags(NULL, map_flags);
This check_flags here is missing in the later sk_storage_update().

> - if (err)
> - return ERR_PTR(err);
> -
> - selem = bpf_selem_alloc(smap, sk, value, true);
> - if (!selem)
> - return ERR_PTR(-ENOMEM);
> -
> - err = sk_storage_alloc(sk, smap, selem);
> - if (err) {
> - kfree(selem);
> - atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
> - return ERR_PTR(err);
> - }
> -
> - return SDATA(selem);
> - }
>
> if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
> /* Hoping to find an old_sdata to do inline update
> * such that it can avoid taking the local_storage->lock
> * and changing the lists.
> */
> - old_sdata =
> - bpf_local_storage_lookup(local_storage, smap, false);
> + old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
> err = check_flags(old_sdata, map_flags);
> if (err)
> return ERR_PTR(err);
> +
> if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
> copy_map_value_locked(map, old_sdata->data,
> value, false);

[ ... ]

> +static struct bpf_local_storage_data *
> +sk_storage_update(void *owner, struct bpf_map *map, void *value, u64 map_flags)
> +{
> + struct bpf_local_storage_data *old_sdata = NULL;
> + struct bpf_local_storage_elem *selem;
> + struct bpf_local_storage *local_storage;
> + struct bpf_local_storage_map *smap;
> + struct sock *sk;
> + int err;
> +
> + err = bpf_local_storage_check_update_flags(map, map_flags);
> + if (err)
> + return ERR_PTR(err);
> +
> + sk = owner;
> + local_storage = rcu_dereference(sk->sk_bpf_storage);
> + smap = (struct bpf_local_storage_map *)map;
> +
> + if (!local_storage || hlist_empty(&local_storage->list)) {

"check_flags(NULL, map_flags);" is gone in this refactoring.

This part of code is copied into the inode_storage_update()
in the latter patch which then has the same issue.

> + /* Very first elem */
> + selem = map->ops->map_selem_alloc(smap, owner, value, !old_sdata);
> + if (!selem)
> + return ERR_PTR(-ENOMEM);

> static int sk_storage_map_btf_id;
> const struct bpf_map_ops sk_storage_map_ops = {
> - .map_alloc_check = bpf_sk_storage_map_alloc_check,
> - .map_alloc = bpf_local_storage_map_alloc,
> - .map_free = bpf_local_storage_map_free,
> + .map_alloc_check = bpf_local_storage_map_alloc_check,
> + .map_alloc = sk_storage_map_alloc,
> + .map_free = sk_storage_map_free,
> .map_get_next_key = notsupp_get_next_key,
> .map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
> .map_update_elem = bpf_fd_sk_storage_update_elem,
> .map_delete_elem = bpf_fd_sk_storage_delete_elem,
> - .map_check_btf = bpf_sk_storage_map_check_btf,
> + .map_check_btf = bpf_local_storage_map_check_btf,
> .map_btf_name = "bpf_local_storage_map",
> .map_btf_id = &sk_storage_map_btf_id,
> + .map_selem_alloc = sk_selem_alloc,
> + .map_local_storage_update = sk_storage_update,
> + .map_local_storage_unlink = unlink_sk_storage,
I think refactoring codes as map_selem_alloc, map_local_storage_update,
and map_local_storage_unlink is not the best option. The sk and inode
version of the above map_ops are mostly the same. Fixing the
issue like the one mentioned above need to fix both sk, inode, and
the future kernel-object code.

The only difference is sk charge omem and inode does not.
I have played around a little. I think adding the following three ops (pasted at
the end) is better and should be enough for both sk and inode. The inode
does not even have to implement the (un)charge ops at all.

That should remove the duplication for the followings:
- (sk|inode)_selem_alloc
- (sk|inode)_storage_update
- unlink_(sk|inode)_storage
- (sk|inode)_storage_alloc

Another bonus is the new bpf_local_storage_check_update_flags() and
bpf_local_storage_publish() will be no longer needed too.

I have hacked up this patch 3 change to compiler-test out this idea.
I will post in another email. Let me know wdyt.

--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -33,6 +33,9 @@ struct btf;
struct btf_type;
struct exception_table_entry;
struct seq_operations;
+struct bpf_local_storage;
+struct bpf_local_storage_map;
+struct bpf_local_storage_elem;

extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -93,6 +96,13 @@ struct bpf_map_ops {
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);

+ /* Functions called by bpf_local_storage maps */
+ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
+ void *owner, u32 size);
+ void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
+ void *owner, u32 size);
+ struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(struct bpf_local_storage_map *smap,
+ void *owner);
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;

2020-07-25 01:35:36

by Martin KaFai Lau

[permalink] [raw]
Subject: [RFC PATCH bpf-next] bpf: POC on local_storage charge and uncharge map_ops

It is a direct replacement of the patch 3 in discussion [1]
and to test out the idea on adding
map_local_storage_charge, map_local_storage_uncharge,
and map_owner_storage_ptr.

It is only compiler tested to demo the idea.

[1]: https://patchwork.ozlabs.org/project/netdev/patch/[email protected]/

Signed-off-by: Martin KaFai Lau <[email protected]>
---
include/linux/bpf.h | 10 ++
include/net/bpf_sk_storage.h | 51 +++++++
include/uapi/linux/bpf.h | 8 +-
net/core/bpf_sk_storage.c | 250 +++++++++++++++++++++------------
tools/include/uapi/linux/bpf.h | 8 +-
5 files changed, 236 insertions(+), 91 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 72221aea1c60..d4eab7ccbb51 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -33,6 +33,9 @@ struct btf;
struct btf_type;
struct exception_table_entry;
struct seq_operations;
+struct bpf_local_storage;
+struct bpf_local_storage_map;
+struct bpf_local_storage_elem;

extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
@@ -93,6 +96,13 @@ struct bpf_map_ops {
__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
struct poll_table_struct *pts);

+ /* Functions called by bpf_local_storage maps */
+ int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
+ void *owner, u32 size);
+ void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
+ void *owner, u32 size);
+ struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(struct bpf_local_storage_map *smap,
+ void *owner);
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;
diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index 950c5aaba15e..05b777950eb3 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -3,8 +3,15 @@
#ifndef _BPF_SK_STORAGE_H
#define _BPF_SK_STORAGE_H

+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
#include <linux/types.h>
#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
+#include <uapi/linux/btf.h>

struct sock;

@@ -34,6 +41,50 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
u16 idx);

+/* Helper functions for bpf_local_storage */
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
+
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr);
+
+struct bpf_local_storage_data *
+bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_map *smap,
+ bool cacheit_lockit);
+
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
+
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+ const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type);
+
+void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem);
+
+bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_omem);
+
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem);
+
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
+
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
+ bool charge_mem);
+
+int
+bpf_local_storage_alloc(void *owner,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *first_selem);
+
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_map *map, void *value,
+ u64 map_flags);
+
#ifdef CONFIG_BPF_SYSCALL
int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
struct bpf_sk_storage_diag *
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 54d0c886e3ba..b9d2e4792d08 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3630,9 +3630,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};

-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<local>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};

/* BPF_FUNC_read_branch_records flags. */
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index aa3e3a47acb5..ec54b9d424c8 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -83,7 +83,7 @@ struct bpf_local_storage_elem {
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
struct hlist_head list; /* List of bpf_local_storage_elem */
- struct sock *owner; /* The object that owns the the above "list" of
+ void *owner; /* The object that owns the the above "list" of
* bpf_local_storage_elem.
*/
struct rcu_head rcu;
@@ -109,6 +109,33 @@ static int omem_charge(struct sock *sk, unsigned int size)
return -ENOMEM;
}

+static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
+{
+ struct bpf_map *map = &smap->map;
+
+ if (!map->ops->map_local_storage_charge)
+ return 0;
+
+ return map->ops->map_local_storage_charge(smap, owner, size);
+}
+
+static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
+ u32 size)
+{
+ struct bpf_map *map = &smap->map;
+
+ if (map->ops->map_local_storage_uncharge)
+ map->ops->map_local_storage_uncharge(smap, owner, size);
+}
+
+static struct bpf_local_storage __rcu **
+owner_storage(struct bpf_local_storage_map *smap, void *owner)
+{
+ struct bpf_map *map = &smap->map;
+
+ return map->ops->map_owner_storage_ptr(smap, owner);
+}
+
static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->snode);
@@ -119,13 +146,13 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
return !hlist_unhashed(&selem->map_node);
}

-static struct bpf_local_storage_elem *
-bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
- void *value, bool charge_omem)
+struct bpf_local_storage_elem *
+bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
+ void *value, bool charge_mem)
{
struct bpf_local_storage_elem *selem;

- if (charge_omem && omem_charge(sk, smap->elem_size))
+ if (charge_mem && mem_charge(smap, owner, smap->elem_size))
return NULL;

selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
@@ -135,40 +162,42 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
return selem;
}

- if (charge_omem)
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+ if (charge_mem)
+ mem_uncharge(smap, owner, smap->elem_size);

return NULL;
}

-/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
+/* local_storage->lock must be held and selem->sk_storage == sk_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
*/
-static bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem,
- bool uncharge_omem)
+bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem,
+ bool uncharge_mem)
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
- struct sock *sk;
+ void *owner;

smap = rcu_dereference(SDATA(selem)->smap);
- sk = local_storage->owner;
+ owner = local_storage->owner;

- /* All uncharging on sk->sk_omem_alloc must be done first.
- * sk may be freed once the last selem is unlinked from local_storage.
+ /* All uncharging on owner must be done first.
+ * The owner may be freed once the last selem is unlinked from
+ * local_storage.
*/
- if (uncharge_omem)
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+ if (uncharge_mem)
+ mem_uncharge(smap, owner, smap->elem_size);

free_local_storage = hlist_is_singular_node(&selem->snode,
&local_storage->list);
if (free_local_storage) {
- atomic_sub(sizeof(struct bpf_local_storage), &sk->sk_omem_alloc);
+ mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
local_storage->owner = NULL;
- /* After this RCU_INIT, sk may be freed and cannot be used */
- RCU_INIT_POINTER(sk->sk_bpf_storage, NULL);
+
+ /* After this RCU_INIT, owner may be freed and cannot be used */
+ RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);

/* local_storage is not freed now. local_storage->lock is
* still held and raw_spin_unlock_bh(&local_storage->lock)
@@ -214,14 +243,14 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
kfree_rcu(local_storage, rcu);
}

-static void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem)
+void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
{
RCU_INIT_POINTER(selem->local_storage, local_storage);
hlist_add_head(&selem->snode, &local_storage->list);
}

-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
@@ -238,8 +267,8 @@ static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
raw_spin_unlock_bh(&b->lock);
}

-static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
+void bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);

@@ -249,7 +278,7 @@ static void bpf_selem_link_map(struct bpf_local_storage_map *smap,
raw_spin_unlock_bh(&b->lock);
}

-static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
{
/* Always unlink from map before unlinking from local_storage
* because selem will be freed after successfully unlinked from
@@ -259,7 +288,7 @@ static void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
__bpf_selem_unlink_storage(selem);
}

-static struct bpf_local_storage_data *
+struct bpf_local_storage_data *
bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
bool cacheit_lockit)
@@ -325,40 +354,45 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
return 0;
}

-static int sk_storage_alloc(struct sock *sk,
+int bpf_local_storage_alloc(void *owner,
struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *first_selem)
{
- struct bpf_local_storage *prev_sk_storage, *sk_storage;
+ struct bpf_local_storage *prev_storage, *storage;
+ struct bpf_local_storage **owner_storage_ptr;
int err;

- err = omem_charge(sk, sizeof(*sk_storage));
+ err = mem_charge(smap, owner, sizeof(*storage));
if (err)
return err;

- sk_storage = kzalloc(sizeof(*sk_storage), GFP_ATOMIC | __GFP_NOWARN);
- if (!sk_storage) {
+ storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
+ if (!storage) {
err = -ENOMEM;
goto uncharge;
}
- INIT_HLIST_HEAD(&sk_storage->list);
- raw_spin_lock_init(&sk_storage->lock);
- sk_storage->owner = sk;

- bpf_selem_link_storage(sk_storage, first_selem);
+ INIT_HLIST_HEAD(&storage->list);
+ raw_spin_lock_init(&storage->lock);
+ storage->owner = owner;
+
+ bpf_selem_link_storage(storage, first_selem);
bpf_selem_link_map(smap, first_selem);
- /* Publish sk_storage to sk. sk->sk_lock cannot be acquired.
- * Hence, atomic ops is used to set sk->sk_bpf_storage
- * from NULL to the newly allocated sk_storage ptr.
+
+ owner_storage_ptr =
+ (struct bpf_local_storage **)owner_storage(smap, owner);
+ /* Publish storage to the owner.
+ * Instead of using any lock of the kernel object (i.e. owner),
+ * cmpxchg will work with any kernel object regardless what
+ * the running context is, bh, irq...etc.
*
- * From now on, the sk->sk_bpf_storage pointer is protected
- * by the sk_storage->lock. Hence, when freeing
- * the sk->sk_bpf_storage, the sk_storage->lock must
- * be held before setting sk->sk_bpf_storage to NULL.
+ * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
+ * is protected by the storage->lock. Hence, when freeing
+ * the owner->storage, the storage->lock must be held before
+ * setting owner->storage ptr to NULL.
*/
- prev_sk_storage = cmpxchg((struct bpf_local_storage **)&sk->sk_bpf_storage,
- NULL, sk_storage);
- if (unlikely(prev_sk_storage)) {
+ prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
+ if (unlikely(prev_storage)) {
bpf_selem_unlink_map(first_selem);
err = -EAGAIN;
goto uncharge;
@@ -366,7 +400,7 @@ static int sk_storage_alloc(struct sock *sk,
/* Note that even first_selem was linked to smap's
* bucket->list, first_selem can be freed immediately
* (instead of kfree_rcu) because
- * bpf_sk_storage_map_free() does a
+ * bpf_local_storage_map_free() does a
* synchronize_rcu() before walking the bucket->list.
* Hence, no one is accessing selem from the
* bucket->list under rcu_read_lock().
@@ -376,8 +410,8 @@ static int sk_storage_alloc(struct sock *sk,
return 0;

uncharge:
- kfree(sk_storage);
- atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
+ kfree(storage);
+ mem_uncharge(smap, owner, sizeof(*storage));
return err;
}

@@ -386,9 +420,9 @@ static int sk_storage_alloc(struct sock *sk,
* Otherwise, it will become a leak (and other memory issues
* during map destruction).
*/
-static struct bpf_local_storage_data *
-bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
- u64 map_flags)
+struct bpf_local_storage_data *
+bpf_local_storage_update(void *owner, struct bpf_map *map,
+ void *value, u64 map_flags)
{
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem;
@@ -403,21 +437,21 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
return ERR_PTR(-EINVAL);

smap = (struct bpf_local_storage_map *)map;
- local_storage = rcu_dereference(sk->sk_bpf_storage);
+ local_storage = rcu_dereference(*owner_storage(smap, owner));
if (!local_storage || hlist_empty(&local_storage->list)) {
- /* Very first elem for this object */
+ /* Very first elem for the owner */
err = check_flags(NULL, map_flags);
if (err)
return ERR_PTR(err);

- selem = bpf_selem_alloc(smap, sk, value, true);
+ selem = bpf_selem_alloc(smap, owner, value, true);
if (!selem)
return ERR_PTR(-ENOMEM);

- err = sk_storage_alloc(sk, smap, selem);
+ err = bpf_local_storage_alloc(owner, smap, selem);
if (err) {
kfree(selem);
- atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
+ mem_uncharge(smap, owner, smap->elem_size);
return ERR_PTR(err);
}

@@ -429,8 +463,8 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
* such that it can avoid taking the local_storage->lock
* and changing the lists.
*/
- old_sdata =
- bpf_local_storage_lookup(local_storage, smap, false);
+ old_sdata = bpf_local_storage_lookup(local_storage, smap,
+ false);
err = check_flags(old_sdata, map_flags);
if (err)
return ERR_PTR(err);
@@ -474,7 +508,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
* old_sdata will not be uncharged later during
* bpf_selem_unlink_storage().
*/
- selem = bpf_selem_alloc(smap, sk, value, !old_sdata);
+ selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
if (!selem) {
err = -ENOMEM;
goto unlock_err;
@@ -566,7 +600,7 @@ void bpf_sk_storage_free(struct sock *sk)
* Thus, no elem can be added-to or deleted-from the
* sk_storage->list by the bpf_prog or by the bpf-map's syscall.
*
- * It is racing with bpf_sk_storage_map_free() alone
+ * It is racing with bpf_local_storage_map_free() alone
* when unlinking elem from the sk_storage->list and
* the map's bucket->list.
*/
@@ -586,17 +620,12 @@ void bpf_sk_storage_free(struct sock *sk)
kfree_rcu(sk_storage, rcu);
}

-static void bpf_local_storage_map_free(struct bpf_map *map)
+void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
{
struct bpf_local_storage_elem *selem;
- struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
unsigned int i;

- smap = (struct bpf_local_storage_map *)map;
-
- bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
-
/* Note that this map might be concurrently cloned from
* bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
* RCU read section to finish before proceeding. New RCU
@@ -606,11 +635,12 @@ static void bpf_local_storage_map_free(struct bpf_map *map)

/* bpf prog and the userspace can no longer access this map
* now. No new selem (of this map) can be added
- * to the sk->sk_bpf_storage or to the map bucket's list.
+ * to the bpf_local_storage or to the map bucket's list.
*
* The elem of this map can be cleaned up here
* or
- * by bpf_sk_storage_free() during __sk_destruct().
+ * by bpf_local_storage_free() during the destruction of the
+ * owner object. eg. __sk_destruct.
*/
for (i = 0; i < (1U << smap->bucket_log); i++) {
b = &smap->buckets[i];
@@ -626,22 +656,31 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
rcu_read_unlock();
}

- /* bpf_sk_storage_free() may still need to access the map.
- * e.g. bpf_sk_storage_free() has unlinked selem from the map
+ /* bpf_local_storage_free() may still need to access the map.
+ * e.g. bpf_local_storage_free() has unlinked selem from the map
* which then made the above while((selem = ...)) loop
* exited immediately.
*
- * However, the bpf_sk_storage_free() still needs to access
+ * However, the bpf_local_storage_free() still needs to access
* the smap->elem_size to do the uncharging in
* bpf_selem_unlink_storage().
*
* Hence, wait another rcu grace period for the
- * bpf_sk_storage_free() to finish.
+ * bpf_local_storage_free() to finish.
*/
synchronize_rcu();

kvfree(smap->buckets);
- kfree(map);
+ kfree(smap);
+}
+
+static void sk_storage_map_free(struct bpf_map *map)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = (struct bpf_local_storage_map *)map;
+ bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+ bpf_local_storage_map_free(smap);
}

/* U16_MAX is much more than enough for sk local storage
@@ -653,7 +692,7 @@ static void bpf_local_storage_map_free(struct bpf_map *map)
sizeof(struct bpf_local_storage_elem)), \
(U16_MAX - sizeof(struct bpf_local_storage_elem)))

-static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
+int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
{
if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
!(attr->map_flags & BPF_F_NO_PREALLOC) ||
@@ -672,7 +711,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
return 0;
}

-static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
+struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
{
struct bpf_local_storage_map *smap;
unsigned int i;
@@ -710,9 +749,21 @@ static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&smap->buckets[i].lock);
}

- smap->elem_size = sizeof(struct bpf_local_storage_elem) + attr->value_size;
- smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
+ smap->elem_size =
+ sizeof(struct bpf_local_storage_elem) + attr->value_size;
+
+ return smap;
+}
+
+static struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_local_storage_map *smap;

+ smap = bpf_local_storage_map_alloc(attr);
+ if (IS_ERR(smap))
+ return ERR_CAST(smap);
+
+ smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
return &smap->map;
}

@@ -722,10 +773,10 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
return -ENOTSUPP;
}

-static int bpf_sk_storage_map_check_btf(const struct bpf_map *map,
- const struct btf *btf,
- const struct btf_type *key_type,
- const struct btf_type *value_type)
+int bpf_local_storage_map_check_btf(const struct bpf_map *map,
+ const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type)
{
u32 int_data;

@@ -856,7 +907,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
bpf_selem_link_map(smap, copy_selem);
bpf_selem_link_storage(new_sk_storage, copy_selem);
} else {
- ret = sk_storage_alloc(newsk, smap, copy_selem);
+ ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
if (ret) {
kfree(copy_selem);
atomic_sub(smap->elem_size,
@@ -925,18 +976,43 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
return -ENOENT;
}

+static int sk_storage_charge(struct bpf_local_storage_map *smap,
+ void *owner, u32 size)
+{
+ return omem_charge(owner, size);
+}
+
+static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
+ void *owner, u32 size)
+{
+ struct sock *sk = owner;
+
+ atomic_sub(size, &sk->sk_omem_alloc);
+}
+
+static struct bpf_local_storage __rcu **
+sk_storage_ptr(struct bpf_local_storage_map *smap, void *owner)
+{
+ struct sock *sk = owner;
+
+ return &sk->sk_bpf_storage;
+}
+
static int sk_storage_map_btf_id;
const struct bpf_map_ops sk_storage_map_ops = {
- .map_alloc_check = bpf_sk_storage_map_alloc_check,
- .map_alloc = bpf_local_storage_map_alloc,
- .map_free = bpf_local_storage_map_free,
+ .map_alloc_check = bpf_local_storage_map_alloc_check,
+ .map_alloc = sk_storage_map_alloc,
+ .map_free = sk_storage_map_free,
.map_get_next_key = notsupp_get_next_key,
.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
.map_update_elem = bpf_fd_sk_storage_update_elem,
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
- .map_check_btf = bpf_sk_storage_map_check_btf,
+ .map_check_btf = bpf_local_storage_map_check_btf,
.map_btf_name = "bpf_local_storage_map",
.map_btf_id = &sk_storage_map_btf_id,
+ .map_local_storage_charge = sk_storage_charge,
+ .map_local_storage_uncharge = sk_storage_uncharge,
+ .map_owner_storage_ptr = sk_storage_ptr,
};

const struct bpf_func_proto bpf_sk_storage_get_proto = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 54d0c886e3ba..b9d2e4792d08 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3630,9 +3630,13 @@ enum {
BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
};

-/* BPF_FUNC_sk_storage_get flags */
+/* BPF_FUNC_<local>_storage_get flags */
enum {
- BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+ BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
+ /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
+ * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
+ */
+ BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
};

/* BPF_FUNC_read_branch_records flags. */
--
2.24.1

2020-07-27 20:27:34

by KP Singh

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next] bpf: POC on local_storage charge and uncharge map_ops

Thanks for this, I was able to update the series with this patch and it works.
One minor comment though.

I was wondering how should I send it as a part of the series. I will keep the
original commit description + mention this thread and add your Co-Developed-by:
tag and then you can add your Signed-off-by: as well. I am not sure of the
canonical way here and am open to suggestions :)

- KP

On 25.07.20 03:30, Martin KaFai Lau wrote:
> It is a direct replacement of the patch 3 in discussion [1]
> and to test out the idea on adding
> map_local_storage_charge, map_local_storage_uncharge,
> and map_owner_storage_ptr.
>
> It is only compiler tested to demo the idea.
>
> [1]: https://patchwork.ozlabs.org/project/netdev/patch/[email protected]/
>
> Signed-off-by: Martin KaFai Lau <[email protected]>
> ---
> include/linux/bpf.h | 10 ++
> include/net/bpf_sk_storage.h | 51 +++++++
> include/uapi/linux/bpf.h | 8 +-

[...]

> +
> +static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
> + void *owner, u32 size)
> +{
> + struct sock *sk = owner;
> +
> + atomic_sub(size, &sk->sk_omem_alloc);
> +}
> +
> +static struct bpf_local_storage __rcu **
> +sk_storage_ptr(struct bpf_local_storage_map *smap, void *owner)

Do we need an smap pointer here? It's not being used and is also not
used for inode as well.

- KP

> +{
> + struct sock *sk = owner;
> +
> + return &sk->sk_bpf_storage;
> +}
> +

[...]

> -/* BPF_FUNC_sk_storage_get flags */
> +/* BPF_FUNC_<local>_storage_get flags */
> enum {
> - BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
> + BPF_LOCAL_STORAGE_GET_F_CREATE = (1ULL << 0),
> + /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
> + * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
> + */
> + BPF_SK_STORAGE_GET_F_CREATE = BPF_LOCAL_STORAGE_GET_F_CREATE,
> };
>
> /* BPF_FUNC_read_branch_records flags. */
>

2020-07-27 20:30:03

by KP Singh

[permalink] [raw]
Subject: Re: [PATCH bpf-next v6 3/7] bpf: Generalize bpf_sk_storage



On 25.07.20 03:13, Martin KaFai Lau wrote:
> On Thu, Jul 23, 2020 at 01:50:28PM +0200, KP Singh wrote:
>> From: KP Singh <[email protected]>
>>
>> Refactor the functionality in bpf_sk_storage.c so that concept of
>> storage linked to kernel objects can be extended to other objects like
>> inode, task_struct etc.
>>
>> Each new local storage will still be a separate map and provide its own
>> set of helpers. This allows for future object specific extensions and
>> still share a lot of the underlying implementation.
>>
>
> [ ... ]
>
>> @@ -386,54 +407,28 @@ static int sk_storage_alloc(struct sock *sk,
>> * Otherwise, it will become a leak (and other memory issues
>> * during map destruction).
>> */
>> -static struct bpf_local_storage_data *
>> -bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
>> +struct bpf_local_storage_data *
>> +bpf_local_storage_update(void *owner, struct bpf_map *map,
>> + struct bpf_local_storage *local_storage, void *value,
>> u64 map_flags)
>> {
>> struct bpf_local_storage_data *old_sdata = NULL;
>> struct bpf_local_storage_elem *selem;
>> - struct bpf_local_storage *local_storage;
>> struct bpf_local_storage_map *smap;
>> int err;
>>
>> - /* BPF_EXIST and BPF_NOEXIST cannot be both set */
>> - if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
>> - /* BPF_F_LOCK can only be used in a value with spin_lock */
>> - unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
>> - return ERR_PTR(-EINVAL);
>> -
>> smap = (struct bpf_local_storage_map *)map;
>> - local_storage = rcu_dereference(sk->sk_bpf_storage);
>> - if (!local_storage || hlist_empty(&local_storage->list)) {
>> - /* Very first elem for this object */
>> - err = check_flags(NULL, map_flags);
> This check_flags here is missing in the later sk_storage_update().
>
>> - if (err)
>> - return ERR_PTR(err);
>> -
>> - selem = bpf_selem_alloc(smap, sk, value, true);
>> - if (!selem)
>> - return ERR_PTR(-ENOMEM);
>> -
>> - err = sk_storage_alloc(sk, smap, selem);
>> - if (err) {
>> - kfree(selem);
>> - atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
>> - return ERR_PTR(err);
>> - }
>> -
>> - return SDATA(selem);
>> - }
>>
>> if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
>> /* Hoping to find an old_sdata to do inline update
>> * such that it can avoid taking the local_storage->lock
>> * and changing the lists.
>> */
>> - old_sdata =
>> - bpf_local_storage_lookup(local_storage, smap, false);
>> + old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
>> err = check_flags(old_sdata, map_flags);
>> if (err)
>> return ERR_PTR(err);
>> +
>> if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
>> copy_map_value_locked(map, old_sdata->data,
>> value, false);
>
> [ ... ]
>
>> +static struct bpf_local_storage_data *
>> +sk_storage_update(void *owner, struct bpf_map *map, void *value, u64 map_flags)
>> +{
>> + struct bpf_local_storage_data *old_sdata = NULL;
>> + struct bpf_local_storage_elem *selem;
>> + struct bpf_local_storage *local_storage;
>> + struct bpf_local_storage_map *smap;
>> + struct sock *sk;
>> + int err;
>> +
>> + err = bpf_local_storage_check_update_flags(map, map_flags);
>> + if (err)
>> + return ERR_PTR(err);
>> +
>> + sk = owner;
>> + local_storage = rcu_dereference(sk->sk_bpf_storage);
>> + smap = (struct bpf_local_storage_map *)map;
>> +
>> + if (!local_storage || hlist_empty(&local_storage->list)) {
>
> "check_flags(NULL, map_flags);" is gone in this refactoring.
>
> This part of code is copied into the inode_storage_update()
> in the latter patch which then has the same issue.
>
>> + /* Very first elem */
>> + selem = map->ops->map_selem_alloc(smap, owner, value, !old_sdata);
>> + if (!selem)
>> + return ERR_PTR(-ENOMEM);
>
>> static int sk_storage_map_btf_id;
>> const struct bpf_map_ops sk_storage_map_ops = {
>> - .map_alloc_check = bpf_sk_storage_map_alloc_check,
>> - .map_alloc = bpf_local_storage_map_alloc,
>> - .map_free = bpf_local_storage_map_free,
>> + .map_alloc_check = bpf_local_storage_map_alloc_check,
>> + .map_alloc = sk_storage_map_alloc,
>> + .map_free = sk_storage_map_free,
>> .map_get_next_key = notsupp_get_next_key,
>> .map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
>> .map_update_elem = bpf_fd_sk_storage_update_elem,
>> .map_delete_elem = bpf_fd_sk_storage_delete_elem,
>> - .map_check_btf = bpf_sk_storage_map_check_btf,
>> + .map_check_btf = bpf_local_storage_map_check_btf,
>> .map_btf_name = "bpf_local_storage_map",
>> .map_btf_id = &sk_storage_map_btf_id,
>> + .map_selem_alloc = sk_selem_alloc,
>> + .map_local_storage_update = sk_storage_update,
>> + .map_local_storage_unlink = unlink_sk_storage,
> I think refactoring codes as map_selem_alloc, map_local_storage_update,
> and map_local_storage_unlink is not the best option. The sk and inode
> version of the above map_ops are mostly the same. Fixing the
> issue like the one mentioned above need to fix both sk, inode, and
> the future kernel-object code.
>
> The only difference is sk charge omem and inode does not.
> I have played around a little. I think adding the following three ops (pasted at
> the end) is better and should be enough for both sk and inode. The inode
> does not even have to implement the (un)charge ops at all.
>
> That should remove the duplication for the followings:
> - (sk|inode)_selem_alloc
> - (sk|inode)_storage_update
> - unlink_(sk|inode)_storage
> - (sk|inode)_storage_alloc
>
> Another bonus is the new bpf_local_storage_check_update_flags() and
> bpf_local_storage_publish() will be no longer needed too.

I really like this approach. Thank you so much!

>
> I have hacked up this patch 3 change to compiler-test out this idea.
> I will post in another email. Let me know wdy>
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -33,6 +33,9 @@ struct btf;
> struct btf_type;
> struct exception_table_entry;
> struct seq_operations;
> +struct bpf_local_storage;
> +struct bpf_local_storage_map;
> +struct bpf_local_storage_elem;
>
> extern struct idr btf_idr;
> extern spinlock_t btf_idr_lock;
> @@ -93,6 +96,13 @@ struct bpf_map_ops {
> __poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
> struct poll_table_struct *pts);
>
> + /* Functions called by bpf_local_storage maps */
> + int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
> + void *owner, u32 size);
> + void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
> + void *owner, u32 size);
> + struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(struct bpf_local_storage_map *smap,
> + void *owner);
> /* BTF name and id of struct allocated by map_alloc */
> const char * const map_btf_name;
> int *map_btf_id;
>

2020-07-27 21:44:55

by Martin KaFai Lau

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next] bpf: POC on local_storage charge and uncharge map_ops

On Mon, Jul 27, 2020 at 10:26:53PM +0200, KP Singh wrote:
> Thanks for this, I was able to update the series with this patch and it works.
> One minor comment though.
>
> I was wondering how should I send it as a part of the series. I will keep the
> original commit description + mention this thread and add your Co-Developed-by:
> tag and then you can add your Signed-off-by: as well.
Sounds good to me.

Thanks for verifying the idea. Feel free to make changes or clean up on
this RFC.

> I am not sure of the
> canonical way here and am open to suggestions :)
>
> - KP
>
> On 25.07.20 03:30, Martin KaFai Lau wrote:
> > It is a direct replacement of the patch 3 in discussion [1]
> > and to test out the idea on adding
> > map_local_storage_charge, map_local_storage_uncharge,
> > and map_owner_storage_ptr.
> >
> > It is only compiler tested to demo the idea.
> >
> > [1]: https://urldefense.proofpoint.com/v2/url?u=https-3A__patchwork.ozlabs.org_project_netdev_patch_20200723115032.460770-2D4-2Dkpsingh-40chromium.org_&d=DwICaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=VQnoQ7LvghIj0gVEaiQSUw&m=NZVmomh5sMPlIAqLmFQ_MXlMILuq1Z7TQqntbPoZ0ew&s=MLVevCJz2eNWswxXXF3jFYdAV2UG-xJEi0I1PkLL-fw&e=
> >
> > Signed-off-by: Martin KaFai Lau <[email protected]>
> > ---
> > include/linux/bpf.h | 10 ++
> > include/net/bpf_sk_storage.h | 51 +++++++
> > include/uapi/linux/bpf.h | 8 +-
>
> [...]
>
> > +
> > +static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
> > + void *owner, u32 size)
> > +{
> > + struct sock *sk = owner;
> > +
> > + atomic_sub(size, &sk->sk_omem_alloc);
> > +}
> > +
> > +static struct bpf_local_storage __rcu **
> > +sk_storage_ptr(struct bpf_local_storage_map *smap, void *owner)
>
> Do we need an smap pointer here? It's not being used and is also not
> used for inode as well.
You are correct. No, it is not needed.
I threw in there merely because it is a map_ops. It is unused
and can be removed.

> > +{
> > + struct sock *sk = owner;
> > +
> > + return &sk->sk_bpf_storage;
> > +}
> > +