Currently, BPF supports writes to packet data in very specific cases.
The implementation can be of more general use and can be extended to any
number of writable buffers in a context. The implementation adds two new
register types: PTR_TO_BUFFER and PTR_TO_BUFFER_END, similar to the types
PTR_TO_PACKET and PTR_TO_PACKET_END. In addition, a field 'buf_id' is
added to the reg_state structure as a way to distinguish between different
buffers in a single context.
Buffers are specified in the context by a pair of members:
- a pointer to the start of the buffer (type PTR_TO_BUFFER)
- a pointer to the first byte beyond the buffer (type PTR_TO_BUFFER_END)
A context can contain multiple buffers. Each buffer/buffer_end pair is
identified by a unique id (buf_id). The start-of-buffer member offset is
usually a good unique identifier.
The semantics for using a writable buffer are the same as for packet data.
The BPF program must contain a range test (buf + num > buf_end) to ensure
that the verifier can verify that offsets are within the allowed range.
Whenever a helper is called that might update the content of the context
all range information for registers that hold pointers to a buffer is
cleared, just as it is done for packet pointers.
Signed-off-by: Kris Van Hees <[email protected]>
Reviewed-by: Nick Alcock <[email protected]>
---
include/linux/bpf.h | 3 +
include/linux/bpf_verifier.h | 4 +-
kernel/bpf/verifier.c | 198 ++++++++++++++++++++++++-----------
3 files changed, 145 insertions(+), 60 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e4bcb79656c4..fc3eda0192fb 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -275,6 +275,8 @@ enum bpf_reg_type {
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
+ PTR_TO_BUFFER, /* reg points to ctx buffer */
+ PTR_TO_BUFFER_END, /* reg points to ctx buffer end */
};
/* The information passed from prog-specific *_is_valid_access
@@ -283,6 +285,7 @@ enum bpf_reg_type {
struct bpf_insn_access_aux {
enum bpf_reg_type reg_type;
int ctx_field_size;
+ u32 buf_id;
};
static inline void
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 1305ccbd8fe6..3538382184f3 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -45,7 +45,7 @@ struct bpf_reg_state {
/* Ordering of fields matters. See states_equal() */
enum bpf_reg_type type;
union {
- /* valid when type == PTR_TO_PACKET */
+ /* valid when type == PTR_TO_PACKET | PTR_TO_BUFFER */
u16 range;
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
@@ -132,6 +132,8 @@ struct bpf_reg_state {
*/
u32 frameno;
enum bpf_reg_liveness live;
+ /* For PTR_TO_BUFFER, to identify distinct buffers in a context. */
+ u32 buf_id;
};
enum bpf_stack_slot_type {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f9e5536fd1af..5fba4e6f5424 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -406,6 +406,8 @@ static const char * const reg_type_str[] = {
[PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
[PTR_TO_TP_BUFFER] = "tp_buffer",
+ [PTR_TO_BUFFER] = "buf",
+ [PTR_TO_BUFFER_END] = "buf_end",
};
static char slot_type_char[] = {
@@ -467,6 +469,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
verbose(env, ",off=%d", reg->off);
if (type_is_pkt_pointer(t))
verbose(env, ",r=%d", reg->range);
+ else if (t == PTR_TO_BUFFER)
+ verbose(env, ",r=%d,bid=%d", reg->range,
+ reg->buf_id);
else if (t == CONST_PTR_TO_MAP ||
t == PTR_TO_MAP_VALUE ||
t == PTR_TO_MAP_VALUE_OR_NULL)
@@ -855,6 +860,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
reg->type == PTR_TO_PACKET_END;
}
+static bool reg_is_buf_pointer_any(const struct bpf_reg_state *reg)
+{
+ return reg_is_pkt_pointer_any(reg) ||
+ reg->type == PTR_TO_BUFFER || reg->type == PTR_TO_BUFFER_END;
+}
+
/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
enum bpf_reg_type which)
@@ -1550,7 +1561,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return err;
}
-#define MAX_PACKET_OFF 0xffff
+#define MAX_BUFFER_OFF 0xffff
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
@@ -1585,7 +1596,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
}
}
-static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
+static int __check_buffer_access(struct bpf_verifier_env *env, u32 regno,
int off, int size, bool zero_size_allowed)
{
struct bpf_reg_state *regs = cur_regs(env);
@@ -1593,14 +1604,15 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
(u64)off + size > reg->range) {
- verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
- off, size, regno, reg->id, reg->off, reg->range);
+ verbose(env, "invalid access to %s, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
+ reg_is_pkt_pointer(reg) ? "packet" : "buffer", off,
+ size, regno, reg->id, reg->off, reg->range);
return -EACCES;
}
return 0;
}
-static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
+static int check_buffer_access(struct bpf_verifier_env *env, u32 regno, int off,
int size, bool zero_size_allowed)
{
struct bpf_reg_state *regs = cur_regs(env);
@@ -1620,35 +1632,37 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
regno);
return -EACCES;
}
- err = __check_packet_access(env, regno, off, size, zero_size_allowed);
+ err = __check_buffer_access(env, regno, off, size, zero_size_allowed);
if (err) {
- verbose(env, "R%d offset is outside of the packet\n", regno);
+ verbose(env, "R%d offset is outside of the %s\n",
+ regno, reg_is_pkt_pointer(reg) ? "packet" : "buffer");
return err;
}
- /* __check_packet_access has made sure "off + size - 1" is within u16.
- * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
- * otherwise find_good_pkt_pointers would have refused to set range info
- * that __check_packet_access would have rejected this pkt access.
- * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
- */
- env->prog->aux->max_pkt_offset =
- max_t(u32, env->prog->aux->max_pkt_offset,
- off + reg->umax_value + size - 1);
+ if (reg_is_pkt_pointer(reg)) {
+ /* __check_buffer_access ensures "off + size - 1" is within u16
+ * reg->umax_value can't be bigger than * MAX_BUFFER_OFF which
+ * is 0xffff, otherwise find_good_buf_pointers would have
+ * refused to set range info and __check_buffer_access would
+ * have rejected this pkt access.
+ * Therefore, "off + reg->umax_value + size - 1" won't overflow
+ * u32.
+ */
+ env->prog->aux->max_pkt_offset =
+ max_t(u32, env->prog->aux->max_pkt_offset,
+ off + reg->umax_value + size - 1);
+ }
return err;
}
/* check access to 'struct bpf_context' fields. Supports fixed offsets only */
-static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
- enum bpf_access_type t, enum bpf_reg_type *reg_type)
+static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx,
+ int off, int size, enum bpf_access_type t,
+ struct bpf_insn_access_aux *info)
{
- struct bpf_insn_access_aux info = {
- .reg_type = *reg_type,
- };
-
if (env->ops->is_valid_access &&
- env->ops->is_valid_access(off, size, t, env->prog, &info)) {
+ env->ops->is_valid_access(off, size, t, env->prog, info)) {
/* A non zero info.ctx_field_size indicates that this field is a
* candidate for later verifier transformation to load the whole
* field and then apply a mask when accessed with a narrower
@@ -1656,9 +1670,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
* will only allow for whole field access and rejects any other
* type of narrower access.
*/
- *reg_type = info.reg_type;
-
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
+ env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
/* remember the offset of last byte accessed in ctx */
if (env->prog->aux->max_ctx_offset < off + size)
env->prog->aux->max_ctx_offset = off + size;
@@ -1870,6 +1882,10 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_TCP_SOCK:
pointer_desc = "tcp_sock ";
break;
+ case PTR_TO_BUFFER:
+ pointer_desc = "buffer ";
+ strict = true;
+ break;
default:
break;
}
@@ -2084,7 +2100,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
- enum bpf_reg_type reg_type = SCALAR_VALUE;
+ struct bpf_insn_access_aux info = {
+ .reg_type = SCALAR_VALUE,
+ .buf_id = 0,
+ };
+
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
@@ -2096,21 +2116,22 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err < 0)
return err;
- err = check_ctx_access(env, insn_idx, off, size, t, ®_type);
+ err = check_ctx_access(env, insn_idx, off, size, t, &info);
if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a
* PTR_TO_PACKET[_META,_END]. In the latter
* case, we know the offset is zero.
*/
- if (reg_type == SCALAR_VALUE) {
+ if (info.reg_type == SCALAR_VALUE) {
mark_reg_unknown(env, regs, value_regno);
} else {
mark_reg_known_zero(env, regs,
value_regno);
- if (reg_type_may_be_null(reg_type))
+ if (reg_type_may_be_null(info.reg_type))
regs[value_regno].id = ++env->id_gen;
}
- regs[value_regno].type = reg_type;
+ regs[value_regno].type = info.reg_type;
+ regs[value_regno].buf_id = info.buf_id;
}
} else if (reg->type == PTR_TO_STACK) {
@@ -2141,7 +2162,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
value_regno);
return -EACCES;
}
- err = check_packet_access(env, regno, off, size, false);
+ err = check_buffer_access(env, regno, off, size, false);
+ if (!err && t == BPF_READ && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_BUFFER) {
+ if (t == BPF_WRITE && value_regno >= 0 &&
+ is_pointer_value(env, value_regno)) {
+ verbose(env, "R%d leaks addr into buffer\n",
+ value_regno);
+ return -EACCES;
+ }
+ err = check_buffer_access(env, regno, off, size, false);
if (!err && t == BPF_READ && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_FLOW_KEYS) {
@@ -2382,7 +2413,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
switch (reg->type) {
case PTR_TO_PACKET:
case PTR_TO_PACKET_META:
- return check_packet_access(env, regno, reg->off, access_size,
+ return check_buffer_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_VALUE:
if (check_map_access_type(env, regno, reg->off, access_size,
@@ -2962,34 +2993,35 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
-/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
- * are now invalid, so turn them into unknown SCALAR_VALUE.
+/* Packet or buffer data might have moved, any old PTR_TO_PACKET[_META,_END]
+ * and/or PTR_TO_BUFFER[_END] are now invalid, so turn them into unknown
+ * SCALAR_VALUE.
*/
-static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
+static void __clear_all_buf_pointers(struct bpf_verifier_env *env,
struct bpf_func_state *state)
{
struct bpf_reg_state *regs = state->regs, *reg;
int i;
for (i = 0; i < MAX_BPF_REG; i++)
- if (reg_is_pkt_pointer_any(®s[i]))
+ if (reg_is_buf_pointer_any(®s[i]))
mark_reg_unknown(env, regs, i);
bpf_for_each_spilled_reg(i, state, reg) {
if (!reg)
continue;
- if (reg_is_pkt_pointer_any(reg))
+ if (reg_is_buf_pointer_any(reg))
__mark_reg_unknown(reg);
}
}
-static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+static void clear_all_buf_pointers(struct bpf_verifier_env *env)
{
struct bpf_verifier_state *vstate = env->cur_state;
int i;
for (i = 0; i <= vstate->curframe; i++)
- __clear_all_pkt_pointers(env, vstate->frame[i]);
+ __clear_all_buf_pointers(env, vstate->frame[i]);
}
static void release_reg_references(struct bpf_verifier_env *env,
@@ -3417,7 +3449,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
if (changes_data)
- clear_all_pkt_pointers(env);
+ clear_all_buf_pointers(env);
return 0;
}
@@ -4349,7 +4381,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
-static void __find_good_pkt_pointers(struct bpf_func_state *state,
+static void __find_good_buf_pointers(struct bpf_func_state *state,
struct bpf_reg_state *dst_reg,
enum bpf_reg_type type, u16 new_range)
{
@@ -4358,7 +4390,11 @@ static void __find_good_pkt_pointers(struct bpf_func_state *state,
for (i = 0; i < MAX_BPF_REG; i++) {
reg = &state->regs[i];
- if (reg->type == type && reg->id == dst_reg->id)
+ if (reg->type != type)
+ continue;
+ if (type == PTR_TO_BUFFER && reg->buf_id != dst_reg->buf_id)
+ continue;
+ if (reg->id == dst_reg->id)
/* keep the maximum range already checked */
reg->range = max(reg->range, new_range);
}
@@ -4366,12 +4402,16 @@ static void __find_good_pkt_pointers(struct bpf_func_state *state,
bpf_for_each_spilled_reg(i, state, reg) {
if (!reg)
continue;
- if (reg->type == type && reg->id == dst_reg->id)
+ if (reg->type != type)
+ continue;
+ if (type == PTR_TO_BUFFER && reg->buf_id != dst_reg->buf_id)
+ continue;
+ if (reg->id == dst_reg->id)
reg->range = max(reg->range, new_range);
}
}
-static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
+static void find_good_buf_pointers(struct bpf_verifier_state *vstate,
struct bpf_reg_state *dst_reg,
enum bpf_reg_type type,
bool range_right_open)
@@ -4384,8 +4424,8 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
/* This doesn't give us any range */
return;
- if (dst_reg->umax_value > MAX_PACKET_OFF ||
- dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
+ if (dst_reg->umax_value > MAX_BUFFER_OFF ||
+ dst_reg->umax_value + dst_reg->off > MAX_BUFFER_OFF)
/* Risk of overflow. For instance, ptr + (1<<63) may be less
* than pkt_end, but that's because it's also less than pkt.
*/
@@ -4440,10 +4480,10 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
/* If our ids match, then we must have the same max_value. And we
* don't care about the other reg's fixed offset, since if it's too big
* the range won't allow anything.
- * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
+ * dst_reg->off is known < MAX_BUFFER_OFF, therefore it fits in a u16.
*/
for (i = 0; i <= vstate->curframe; i++)
- __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+ __find_good_buf_pointers(vstate->frame[i], dst_reg, type,
new_range);
}
@@ -4934,7 +4974,7 @@ static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
}
}
-/* The logic is similar to find_good_pkt_pointers(), both could eventually
+/* The logic is similar to find_good_buf_pointers(), both could eventually
* be folded together at some point.
*/
static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
@@ -4977,14 +5017,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
(dst_reg->type == PTR_TO_PACKET_META &&
reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
- find_good_pkt_pointers(this_branch, dst_reg,
+ find_good_buf_pointers(this_branch, dst_reg,
dst_reg->type, false);
} else if ((dst_reg->type == PTR_TO_PACKET_END &&
src_reg->type == PTR_TO_PACKET) ||
(reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
src_reg->type == PTR_TO_PACKET_META)) {
/* pkt_end > pkt_data', pkt_data > pkt_meta' */
- find_good_pkt_pointers(other_branch, src_reg,
+ find_good_buf_pointers(other_branch, src_reg,
+ src_reg->type, true);
+ } else if (dst_reg->type == PTR_TO_BUFFER &&
+ src_reg->type == PTR_TO_BUFFER_END) {
+ /* buf' > buf_end */
+ find_good_buf_pointers(this_branch, dst_reg,
+ dst_reg->type, false);
+ } else if (dst_reg->type == PTR_TO_BUFFER_END &&
+ src_reg->type == PTR_TO_BUFFER) {
+ /* buf_end > buf' */
+ find_good_buf_pointers(other_branch, src_reg,
src_reg->type, true);
} else {
return false;
@@ -4996,14 +5046,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
(dst_reg->type == PTR_TO_PACKET_META &&
reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
- find_good_pkt_pointers(other_branch, dst_reg,
+ find_good_buf_pointers(other_branch, dst_reg,
dst_reg->type, true);
} else if ((dst_reg->type == PTR_TO_PACKET_END &&
src_reg->type == PTR_TO_PACKET) ||
(reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
src_reg->type == PTR_TO_PACKET_META)) {
/* pkt_end < pkt_data', pkt_data > pkt_meta' */
- find_good_pkt_pointers(this_branch, src_reg,
+ find_good_buf_pointers(this_branch, src_reg,
+ src_reg->type, false);
+ } else if (dst_reg->type == PTR_TO_BUFFER &&
+ src_reg->type == PTR_TO_BUFFER_END) {
+ /* buf' < buf_end */
+ find_good_buf_pointers(other_branch, dst_reg,
+ dst_reg->type, true);
+ } else if (dst_reg->type == PTR_TO_BUFFER_END &&
+ src_reg->type == PTR_TO_BUFFER) {
+ /* buf_end < buf' */
+ find_good_buf_pointers(this_branch, src_reg,
src_reg->type, false);
} else {
return false;
@@ -5015,14 +5075,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
(dst_reg->type == PTR_TO_PACKET_META &&
reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
- find_good_pkt_pointers(this_branch, dst_reg,
+ find_good_buf_pointers(this_branch, dst_reg,
dst_reg->type, true);
} else if ((dst_reg->type == PTR_TO_PACKET_END &&
src_reg->type == PTR_TO_PACKET) ||
(reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
src_reg->type == PTR_TO_PACKET_META)) {
/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
- find_good_pkt_pointers(other_branch, src_reg,
+ find_good_buf_pointers(other_branch, src_reg,
+ src_reg->type, false);
+ } else if (dst_reg->type == PTR_TO_BUFFER &&
+ src_reg->type == PTR_TO_BUFFER_END) {
+ /* buf' >= buf_end */
+ find_good_buf_pointers(this_branch, dst_reg,
+ dst_reg->type, true);
+ } else if (dst_reg->type == PTR_TO_BUFFER_END &&
+ src_reg->type == PTR_TO_BUFFER) {
+ /* buf_end >= buf' */
+ find_good_buf_pointers(other_branch, src_reg,
src_reg->type, false);
} else {
return false;
@@ -5034,15 +5104,25 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
(dst_reg->type == PTR_TO_PACKET_META &&
reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
- find_good_pkt_pointers(other_branch, dst_reg,
+ find_good_buf_pointers(other_branch, dst_reg,
dst_reg->type, false);
} else if ((dst_reg->type == PTR_TO_PACKET_END &&
src_reg->type == PTR_TO_PACKET) ||
(reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
src_reg->type == PTR_TO_PACKET_META)) {
/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
- find_good_pkt_pointers(this_branch, src_reg,
+ find_good_buf_pointers(this_branch, src_reg,
src_reg->type, true);
+ } else if (dst_reg->type == PTR_TO_BUFFER &&
+ src_reg->type == PTR_TO_BUFFER_END) {
+ /* buf' <= buf_end */
+ find_good_buf_pointers(other_branch, dst_reg,
+ dst_reg->type, true);
+ } else if (dst_reg->type == PTR_TO_BUFFER_END &&
+ src_reg->type == PTR_TO_BUFFER) {
+ /* buf_end <= buf' */
+ find_good_buf_pointers(this_branch, src_reg,
+ src_reg->type, false);
} else {
return false;
}
@@ -7972,7 +8052,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*/
prog->cb_access = 1;
env->prog->aux->stack_depth = MAX_BPF_STACK;
- env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
+ env->prog->aux->max_pkt_offset = MAX_BUFFER_OFF;
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
--
2.20.1
Hi Kris,
Note, it's best to thread patches. Otherwise they get spread out in
mail boxes and hard to manage. That is, every patch should be a reply
to the 00/11 header patch.
Also, Peter Ziljstra (Cc'd) is the maintainer of perf on the kernel
side. Please include him on Ccing perf changes that are done inside the
kernel.
-- Steve
On Mon, 20 May 2019 23:52:24 +0000 (UTC)
Kris Van Hees <[email protected]> wrote:
> Currently, BPF supports writes to packet data in very specific cases.
> The implementation can be of more general use and can be extended to any
> number of writable buffers in a context. The implementation adds two new
> register types: PTR_TO_BUFFER and PTR_TO_BUFFER_END, similar to the types
> PTR_TO_PACKET and PTR_TO_PACKET_END. In addition, a field 'buf_id' is
> added to the reg_state structure as a way to distinguish between different
> buffers in a single context.
>
> Buffers are specified in the context by a pair of members:
> - a pointer to the start of the buffer (type PTR_TO_BUFFER)
> - a pointer to the first byte beyond the buffer (type PTR_TO_BUFFER_END)
>
> A context can contain multiple buffers. Each buffer/buffer_end pair is
> identified by a unique id (buf_id). The start-of-buffer member offset is
> usually a good unique identifier.
>
> The semantics for using a writable buffer are the same as for packet data.
> The BPF program must contain a range test (buf + num > buf_end) to ensure
> that the verifier can verify that offsets are within the allowed range.
>
> Whenever a helper is called that might update the content of the context
> all range information for registers that hold pointers to a buffer is
> cleared, just as it is done for packet pointers.
>
> Signed-off-by: Kris Van Hees <[email protected]>
> Reviewed-by: Nick Alcock <[email protected]>
> ---
> include/linux/bpf.h | 3 +
> include/linux/bpf_verifier.h | 4 +-
> kernel/bpf/verifier.c | 198 ++++++++++++++++++++++++-----------
> 3 files changed, 145 insertions(+), 60 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index e4bcb79656c4..fc3eda0192fb 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -275,6 +275,8 @@ enum bpf_reg_type {
> PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
> PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
> PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
> + PTR_TO_BUFFER, /* reg points to ctx buffer */
> + PTR_TO_BUFFER_END, /* reg points to ctx buffer end */
> };
>
> /* The information passed from prog-specific *_is_valid_access
> @@ -283,6 +285,7 @@ enum bpf_reg_type {
> struct bpf_insn_access_aux {
> enum bpf_reg_type reg_type;
> int ctx_field_size;
> + u32 buf_id;
> };
>
> static inline void
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 1305ccbd8fe6..3538382184f3 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -45,7 +45,7 @@ struct bpf_reg_state {
> /* Ordering of fields matters. See states_equal() */
> enum bpf_reg_type type;
> union {
> - /* valid when type == PTR_TO_PACKET */
> + /* valid when type == PTR_TO_PACKET | PTR_TO_BUFFER */
> u16 range;
>
> /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
> @@ -132,6 +132,8 @@ struct bpf_reg_state {
> */
> u32 frameno;
> enum bpf_reg_liveness live;
> + /* For PTR_TO_BUFFER, to identify distinct buffers in a context. */
> + u32 buf_id;
> };
>
> enum bpf_stack_slot_type {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index f9e5536fd1af..5fba4e6f5424 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -406,6 +406,8 @@ static const char * const reg_type_str[] = {
> [PTR_TO_TCP_SOCK] = "tcp_sock",
> [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
> [PTR_TO_TP_BUFFER] = "tp_buffer",
> + [PTR_TO_BUFFER] = "buf",
> + [PTR_TO_BUFFER_END] = "buf_end",
> };
>
> static char slot_type_char[] = {
> @@ -467,6 +469,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
> verbose(env, ",off=%d", reg->off);
> if (type_is_pkt_pointer(t))
> verbose(env, ",r=%d", reg->range);
> + else if (t == PTR_TO_BUFFER)
> + verbose(env, ",r=%d,bid=%d", reg->range,
> + reg->buf_id);
> else if (t == CONST_PTR_TO_MAP ||
> t == PTR_TO_MAP_VALUE ||
> t == PTR_TO_MAP_VALUE_OR_NULL)
> @@ -855,6 +860,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
> reg->type == PTR_TO_PACKET_END;
> }
>
> +static bool reg_is_buf_pointer_any(const struct bpf_reg_state *reg)
> +{
> + return reg_is_pkt_pointer_any(reg) ||
> + reg->type == PTR_TO_BUFFER || reg->type == PTR_TO_BUFFER_END;
> +}
> +
> /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
> static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
> enum bpf_reg_type which)
> @@ -1550,7 +1561,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
> return err;
> }
>
> -#define MAX_PACKET_OFF 0xffff
> +#define MAX_BUFFER_OFF 0xffff
>
> static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
> const struct bpf_call_arg_meta *meta,
> @@ -1585,7 +1596,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
> }
> }
>
> -static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
> +static int __check_buffer_access(struct bpf_verifier_env *env, u32 regno,
> int off, int size, bool zero_size_allowed)
> {
> struct bpf_reg_state *regs = cur_regs(env);
> @@ -1593,14 +1604,15 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
>
> if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
> (u64)off + size > reg->range) {
> - verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
> - off, size, regno, reg->id, reg->off, reg->range);
> + verbose(env, "invalid access to %s, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
> + reg_is_pkt_pointer(reg) ? "packet" : "buffer", off,
> + size, regno, reg->id, reg->off, reg->range);
> return -EACCES;
> }
> return 0;
> }
>
> -static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
> +static int check_buffer_access(struct bpf_verifier_env *env, u32 regno, int off,
> int size, bool zero_size_allowed)
> {
> struct bpf_reg_state *regs = cur_regs(env);
> @@ -1620,35 +1632,37 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
> regno);
> return -EACCES;
> }
> - err = __check_packet_access(env, regno, off, size, zero_size_allowed);
> + err = __check_buffer_access(env, regno, off, size, zero_size_allowed);
> if (err) {
> - verbose(env, "R%d offset is outside of the packet\n", regno);
> + verbose(env, "R%d offset is outside of the %s\n",
> + regno, reg_is_pkt_pointer(reg) ? "packet" : "buffer");
> return err;
> }
>
> - /* __check_packet_access has made sure "off + size - 1" is within u16.
> - * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
> - * otherwise find_good_pkt_pointers would have refused to set range info
> - * that __check_packet_access would have rejected this pkt access.
> - * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
> - */
> - env->prog->aux->max_pkt_offset =
> - max_t(u32, env->prog->aux->max_pkt_offset,
> - off + reg->umax_value + size - 1);
> + if (reg_is_pkt_pointer(reg)) {
> + /* __check_buffer_access ensures "off + size - 1" is within u16
> + * reg->umax_value can't be bigger than * MAX_BUFFER_OFF which
> + * is 0xffff, otherwise find_good_buf_pointers would have
> + * refused to set range info and __check_buffer_access would
> + * have rejected this pkt access.
> + * Therefore, "off + reg->umax_value + size - 1" won't overflow
> + * u32.
> + */
> + env->prog->aux->max_pkt_offset =
> + max_t(u32, env->prog->aux->max_pkt_offset,
> + off + reg->umax_value + size - 1);
> + }
>
> return err;
> }
>
> /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
> -static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
> - enum bpf_access_type t, enum bpf_reg_type *reg_type)
> +static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx,
> + int off, int size, enum bpf_access_type t,
> + struct bpf_insn_access_aux *info)
> {
> - struct bpf_insn_access_aux info = {
> - .reg_type = *reg_type,
> - };
> -
> if (env->ops->is_valid_access &&
> - env->ops->is_valid_access(off, size, t, env->prog, &info)) {
> + env->ops->is_valid_access(off, size, t, env->prog, info)) {
> /* A non zero info.ctx_field_size indicates that this field is a
> * candidate for later verifier transformation to load the whole
> * field and then apply a mask when accessed with a narrower
> @@ -1656,9 +1670,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
> * will only allow for whole field access and rejects any other
> * type of narrower access.
> */
> - *reg_type = info.reg_type;
> -
> - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
> + env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
> /* remember the offset of last byte accessed in ctx */
> if (env->prog->aux->max_ctx_offset < off + size)
> env->prog->aux->max_ctx_offset = off + size;
> @@ -1870,6 +1882,10 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
> case PTR_TO_TCP_SOCK:
> pointer_desc = "tcp_sock ";
> break;
> + case PTR_TO_BUFFER:
> + pointer_desc = "buffer ";
> + strict = true;
> + break;
> default:
> break;
> }
> @@ -2084,7 +2100,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> mark_reg_unknown(env, regs, value_regno);
>
> } else if (reg->type == PTR_TO_CTX) {
> - enum bpf_reg_type reg_type = SCALAR_VALUE;
> + struct bpf_insn_access_aux info = {
> + .reg_type = SCALAR_VALUE,
> + .buf_id = 0,
> + };
> +
>
> if (t == BPF_WRITE && value_regno >= 0 &&
> is_pointer_value(env, value_regno)) {
> @@ -2096,21 +2116,22 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> if (err < 0)
> return err;
>
> - err = check_ctx_access(env, insn_idx, off, size, t, ®_type);
> + err = check_ctx_access(env, insn_idx, off, size, t, &info);
> if (!err && t == BPF_READ && value_regno >= 0) {
> /* ctx access returns either a scalar, or a
> * PTR_TO_PACKET[_META,_END]. In the latter
> * case, we know the offset is zero.
> */
> - if (reg_type == SCALAR_VALUE) {
> + if (info.reg_type == SCALAR_VALUE) {
> mark_reg_unknown(env, regs, value_regno);
> } else {
> mark_reg_known_zero(env, regs,
> value_regno);
> - if (reg_type_may_be_null(reg_type))
> + if (reg_type_may_be_null(info.reg_type))
> regs[value_regno].id = ++env->id_gen;
> }
> - regs[value_regno].type = reg_type;
> + regs[value_regno].type = info.reg_type;
> + regs[value_regno].buf_id = info.buf_id;
> }
>
> } else if (reg->type == PTR_TO_STACK) {
> @@ -2141,7 +2162,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> value_regno);
> return -EACCES;
> }
> - err = check_packet_access(env, regno, off, size, false);
> + err = check_buffer_access(env, regno, off, size, false);
> + if (!err && t == BPF_READ && value_regno >= 0)
> + mark_reg_unknown(env, regs, value_regno);
> + } else if (reg->type == PTR_TO_BUFFER) {
> + if (t == BPF_WRITE && value_regno >= 0 &&
> + is_pointer_value(env, value_regno)) {
> + verbose(env, "R%d leaks addr into buffer\n",
> + value_regno);
> + return -EACCES;
> + }
> + err = check_buffer_access(env, regno, off, size, false);
> if (!err && t == BPF_READ && value_regno >= 0)
> mark_reg_unknown(env, regs, value_regno);
> } else if (reg->type == PTR_TO_FLOW_KEYS) {
> @@ -2382,7 +2413,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
> switch (reg->type) {
> case PTR_TO_PACKET:
> case PTR_TO_PACKET_META:
> - return check_packet_access(env, regno, reg->off, access_size,
> + return check_buffer_access(env, regno, reg->off, access_size,
> zero_size_allowed);
> case PTR_TO_MAP_VALUE:
> if (check_map_access_type(env, regno, reg->off, access_size,
> @@ -2962,34 +2993,35 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
> check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
> }
>
> -/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
> - * are now invalid, so turn them into unknown SCALAR_VALUE.
> +/* Packet or buffer data might have moved, any old PTR_TO_PACKET[_META,_END]
> + * and/or PTR_TO_BUFFER[_END] are now invalid, so turn them into unknown
> + * SCALAR_VALUE.
> */
> -static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
> +static void __clear_all_buf_pointers(struct bpf_verifier_env *env,
> struct bpf_func_state *state)
> {
> struct bpf_reg_state *regs = state->regs, *reg;
> int i;
>
> for (i = 0; i < MAX_BPF_REG; i++)
> - if (reg_is_pkt_pointer_any(®s[i]))
> + if (reg_is_buf_pointer_any(®s[i]))
> mark_reg_unknown(env, regs, i);
>
> bpf_for_each_spilled_reg(i, state, reg) {
> if (!reg)
> continue;
> - if (reg_is_pkt_pointer_any(reg))
> + if (reg_is_buf_pointer_any(reg))
> __mark_reg_unknown(reg);
> }
> }
>
> -static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
> +static void clear_all_buf_pointers(struct bpf_verifier_env *env)
> {
> struct bpf_verifier_state *vstate = env->cur_state;
> int i;
>
> for (i = 0; i <= vstate->curframe; i++)
> - __clear_all_pkt_pointers(env, vstate->frame[i]);
> + __clear_all_buf_pointers(env, vstate->frame[i]);
> }
>
> static void release_reg_references(struct bpf_verifier_env *env,
> @@ -3417,7 +3449,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
> }
>
> if (changes_data)
> - clear_all_pkt_pointers(env);
> + clear_all_buf_pointers(env);
> return 0;
> }
>
> @@ -4349,7 +4381,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> return 0;
> }
>
> -static void __find_good_pkt_pointers(struct bpf_func_state *state,
> +static void __find_good_buf_pointers(struct bpf_func_state *state,
> struct bpf_reg_state *dst_reg,
> enum bpf_reg_type type, u16 new_range)
> {
> @@ -4358,7 +4390,11 @@ static void __find_good_pkt_pointers(struct bpf_func_state *state,
>
> for (i = 0; i < MAX_BPF_REG; i++) {
> reg = &state->regs[i];
> - if (reg->type == type && reg->id == dst_reg->id)
> + if (reg->type != type)
> + continue;
> + if (type == PTR_TO_BUFFER && reg->buf_id != dst_reg->buf_id)
> + continue;
> + if (reg->id == dst_reg->id)
> /* keep the maximum range already checked */
> reg->range = max(reg->range, new_range);
> }
> @@ -4366,12 +4402,16 @@ static void __find_good_pkt_pointers(struct bpf_func_state *state,
> bpf_for_each_spilled_reg(i, state, reg) {
> if (!reg)
> continue;
> - if (reg->type == type && reg->id == dst_reg->id)
> + if (reg->type != type)
> + continue;
> + if (type == PTR_TO_BUFFER && reg->buf_id != dst_reg->buf_id)
> + continue;
> + if (reg->id == dst_reg->id)
> reg->range = max(reg->range, new_range);
> }
> }
>
> -static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
> +static void find_good_buf_pointers(struct bpf_verifier_state *vstate,
> struct bpf_reg_state *dst_reg,
> enum bpf_reg_type type,
> bool range_right_open)
> @@ -4384,8 +4424,8 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
> /* This doesn't give us any range */
> return;
>
> - if (dst_reg->umax_value > MAX_PACKET_OFF ||
> - dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
> + if (dst_reg->umax_value > MAX_BUFFER_OFF ||
> + dst_reg->umax_value + dst_reg->off > MAX_BUFFER_OFF)
> /* Risk of overflow. For instance, ptr + (1<<63) may be less
> * than pkt_end, but that's because it's also less than pkt.
> */
> @@ -4440,10 +4480,10 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
> /* If our ids match, then we must have the same max_value. And we
> * don't care about the other reg's fixed offset, since if it's too big
> * the range won't allow anything.
> - * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
> + * dst_reg->off is known < MAX_BUFFER_OFF, therefore it fits in a u16.
> */
> for (i = 0; i <= vstate->curframe; i++)
> - __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
> + __find_good_buf_pointers(vstate->frame[i], dst_reg, type,
> new_range);
> }
>
> @@ -4934,7 +4974,7 @@ static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
> }
> }
>
> -/* The logic is similar to find_good_pkt_pointers(), both could eventually
> +/* The logic is similar to find_good_buf_pointers(), both could eventually
> * be folded together at some point.
> */
> static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
> @@ -4977,14 +5017,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> (dst_reg->type == PTR_TO_PACKET_META &&
> reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
> - find_good_pkt_pointers(this_branch, dst_reg,
> + find_good_buf_pointers(this_branch, dst_reg,
> dst_reg->type, false);
> } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> src_reg->type == PTR_TO_PACKET) ||
> (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> src_reg->type == PTR_TO_PACKET_META)) {
> /* pkt_end > pkt_data', pkt_data > pkt_meta' */
> - find_good_pkt_pointers(other_branch, src_reg,
> + find_good_buf_pointers(other_branch, src_reg,
> + src_reg->type, true);
> + } else if (dst_reg->type == PTR_TO_BUFFER &&
> + src_reg->type == PTR_TO_BUFFER_END) {
> + /* buf' > buf_end */
> + find_good_buf_pointers(this_branch, dst_reg,
> + dst_reg->type, false);
> + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> + src_reg->type == PTR_TO_BUFFER) {
> + /* buf_end > buf' */
> + find_good_buf_pointers(other_branch, src_reg,
> src_reg->type, true);
> } else {
> return false;
> @@ -4996,14 +5046,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> (dst_reg->type == PTR_TO_PACKET_META &&
> reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
> - find_good_pkt_pointers(other_branch, dst_reg,
> + find_good_buf_pointers(other_branch, dst_reg,
> dst_reg->type, true);
> } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> src_reg->type == PTR_TO_PACKET) ||
> (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> src_reg->type == PTR_TO_PACKET_META)) {
> /* pkt_end < pkt_data', pkt_data > pkt_meta' */
> - find_good_pkt_pointers(this_branch, src_reg,
> + find_good_buf_pointers(this_branch, src_reg,
> + src_reg->type, false);
> + } else if (dst_reg->type == PTR_TO_BUFFER &&
> + src_reg->type == PTR_TO_BUFFER_END) {
> + /* buf' < buf_end */
> + find_good_buf_pointers(other_branch, dst_reg,
> + dst_reg->type, true);
> + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> + src_reg->type == PTR_TO_BUFFER) {
> + /* buf_end < buf' */
> + find_good_buf_pointers(this_branch, src_reg,
> src_reg->type, false);
> } else {
> return false;
> @@ -5015,14 +5075,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> (dst_reg->type == PTR_TO_PACKET_META &&
> reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
> - find_good_pkt_pointers(this_branch, dst_reg,
> + find_good_buf_pointers(this_branch, dst_reg,
> dst_reg->type, true);
> } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> src_reg->type == PTR_TO_PACKET) ||
> (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> src_reg->type == PTR_TO_PACKET_META)) {
> /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
> - find_good_pkt_pointers(other_branch, src_reg,
> + find_good_buf_pointers(other_branch, src_reg,
> + src_reg->type, false);
> + } else if (dst_reg->type == PTR_TO_BUFFER &&
> + src_reg->type == PTR_TO_BUFFER_END) {
> + /* buf' >= buf_end */
> + find_good_buf_pointers(this_branch, dst_reg,
> + dst_reg->type, true);
> + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> + src_reg->type == PTR_TO_BUFFER) {
> + /* buf_end >= buf' */
> + find_good_buf_pointers(other_branch, src_reg,
> src_reg->type, false);
> } else {
> return false;
> @@ -5034,15 +5104,25 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> (dst_reg->type == PTR_TO_PACKET_META &&
> reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
> - find_good_pkt_pointers(other_branch, dst_reg,
> + find_good_buf_pointers(other_branch, dst_reg,
> dst_reg->type, false);
> } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> src_reg->type == PTR_TO_PACKET) ||
> (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> src_reg->type == PTR_TO_PACKET_META)) {
> /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
> - find_good_pkt_pointers(this_branch, src_reg,
> + find_good_buf_pointers(this_branch, src_reg,
> src_reg->type, true);
> + } else if (dst_reg->type == PTR_TO_BUFFER &&
> + src_reg->type == PTR_TO_BUFFER_END) {
> + /* buf' <= buf_end */
> + find_good_buf_pointers(other_branch, dst_reg,
> + dst_reg->type, true);
> + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> + src_reg->type == PTR_TO_BUFFER) {
> + /* buf_end <= buf' */
> + find_good_buf_pointers(this_branch, src_reg,
> + src_reg->type, false);
> } else {
> return false;
> }
> @@ -7972,7 +8052,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
> */
> prog->cb_access = 1;
> env->prog->aux->stack_depth = MAX_BPF_STACK;
> - env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
> + env->prog->aux->max_pkt_offset = MAX_BUFFER_OFF;
>
> /* mark bpf_tail_call as different opcode to avoid
> * conditional branch in the interpeter for every normal
On Mon, May 20, 2019 at 09:21:34PM -0400, Steven Rostedt wrote:
> Hi Kris,
>
> Note, it's best to thread patches. Otherwise they get spread out in
> mail boxes and hard to manage. That is, every patch should be a reply
> to the 00/11 header patch.
Thanks for that advice - I will make sure to do that for future postings.
> Also, Peter Ziljstra (Cc'd) is the maintainer of perf on the kernel
> side. Please include him on Ccing perf changes that are done inside the
> kernel.
Ah, my apologies for missing Peter in the list of Cc's. Thank you for adding
him. I will update my list.
Kris
> On Mon, 20 May 2019 23:52:24 +0000 (UTC)
> Kris Van Hees <[email protected]> wrote:
>
> > Currently, BPF supports writes to packet data in very specific cases.
> > The implementation can be of more general use and can be extended to any
> > number of writable buffers in a context. The implementation adds two new
> > register types: PTR_TO_BUFFER and PTR_TO_BUFFER_END, similar to the types
> > PTR_TO_PACKET and PTR_TO_PACKET_END. In addition, a field 'buf_id' is
> > added to the reg_state structure as a way to distinguish between different
> > buffers in a single context.
> >
> > Buffers are specified in the context by a pair of members:
> > - a pointer to the start of the buffer (type PTR_TO_BUFFER)
> > - a pointer to the first byte beyond the buffer (type PTR_TO_BUFFER_END)
> >
> > A context can contain multiple buffers. Each buffer/buffer_end pair is
> > identified by a unique id (buf_id). The start-of-buffer member offset is
> > usually a good unique identifier.
> >
> > The semantics for using a writable buffer are the same as for packet data.
> > The BPF program must contain a range test (buf + num > buf_end) to ensure
> > that the verifier can verify that offsets are within the allowed range.
> >
> > Whenever a helper is called that might update the content of the context
> > all range information for registers that hold pointers to a buffer is
> > cleared, just as it is done for packet pointers.
> >
> > Signed-off-by: Kris Van Hees <[email protected]>
> > Reviewed-by: Nick Alcock <[email protected]>
> > ---
> > include/linux/bpf.h | 3 +
> > include/linux/bpf_verifier.h | 4 +-
> > kernel/bpf/verifier.c | 198 ++++++++++++++++++++++++-----------
> > 3 files changed, 145 insertions(+), 60 deletions(-)
> >
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index e4bcb79656c4..fc3eda0192fb 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -275,6 +275,8 @@ enum bpf_reg_type {
> > PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
> > PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
> > PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
> > + PTR_TO_BUFFER, /* reg points to ctx buffer */
> > + PTR_TO_BUFFER_END, /* reg points to ctx buffer end */
> > };
> >
> > /* The information passed from prog-specific *_is_valid_access
> > @@ -283,6 +285,7 @@ enum bpf_reg_type {
> > struct bpf_insn_access_aux {
> > enum bpf_reg_type reg_type;
> > int ctx_field_size;
> > + u32 buf_id;
> > };
> >
> > static inline void
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index 1305ccbd8fe6..3538382184f3 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -45,7 +45,7 @@ struct bpf_reg_state {
> > /* Ordering of fields matters. See states_equal() */
> > enum bpf_reg_type type;
> > union {
> > - /* valid when type == PTR_TO_PACKET */
> > + /* valid when type == PTR_TO_PACKET | PTR_TO_BUFFER */
> > u16 range;
> >
> > /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
> > @@ -132,6 +132,8 @@ struct bpf_reg_state {
> > */
> > u32 frameno;
> > enum bpf_reg_liveness live;
> > + /* For PTR_TO_BUFFER, to identify distinct buffers in a context. */
> > + u32 buf_id;
> > };
> >
> > enum bpf_stack_slot_type {
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index f9e5536fd1af..5fba4e6f5424 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -406,6 +406,8 @@ static const char * const reg_type_str[] = {
> > [PTR_TO_TCP_SOCK] = "tcp_sock",
> > [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
> > [PTR_TO_TP_BUFFER] = "tp_buffer",
> > + [PTR_TO_BUFFER] = "buf",
> > + [PTR_TO_BUFFER_END] = "buf_end",
> > };
> >
> > static char slot_type_char[] = {
> > @@ -467,6 +469,9 @@ static void print_verifier_state(struct bpf_verifier_env *env,
> > verbose(env, ",off=%d", reg->off);
> > if (type_is_pkt_pointer(t))
> > verbose(env, ",r=%d", reg->range);
> > + else if (t == PTR_TO_BUFFER)
> > + verbose(env, ",r=%d,bid=%d", reg->range,
> > + reg->buf_id);
> > else if (t == CONST_PTR_TO_MAP ||
> > t == PTR_TO_MAP_VALUE ||
> > t == PTR_TO_MAP_VALUE_OR_NULL)
> > @@ -855,6 +860,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
> > reg->type == PTR_TO_PACKET_END;
> > }
> >
> > +static bool reg_is_buf_pointer_any(const struct bpf_reg_state *reg)
> > +{
> > + return reg_is_pkt_pointer_any(reg) ||
> > + reg->type == PTR_TO_BUFFER || reg->type == PTR_TO_BUFFER_END;
> > +}
> > +
> > /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
> > static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
> > enum bpf_reg_type which)
> > @@ -1550,7 +1561,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
> > return err;
> > }
> >
> > -#define MAX_PACKET_OFF 0xffff
> > +#define MAX_BUFFER_OFF 0xffff
> >
> > static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
> > const struct bpf_call_arg_meta *meta,
> > @@ -1585,7 +1596,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
> > }
> > }
> >
> > -static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
> > +static int __check_buffer_access(struct bpf_verifier_env *env, u32 regno,
> > int off, int size, bool zero_size_allowed)
> > {
> > struct bpf_reg_state *regs = cur_regs(env);
> > @@ -1593,14 +1604,15 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
> >
> > if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
> > (u64)off + size > reg->range) {
> > - verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
> > - off, size, regno, reg->id, reg->off, reg->range);
> > + verbose(env, "invalid access to %s, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
> > + reg_is_pkt_pointer(reg) ? "packet" : "buffer", off,
> > + size, regno, reg->id, reg->off, reg->range);
> > return -EACCES;
> > }
> > return 0;
> > }
> >
> > -static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
> > +static int check_buffer_access(struct bpf_verifier_env *env, u32 regno, int off,
> > int size, bool zero_size_allowed)
> > {
> > struct bpf_reg_state *regs = cur_regs(env);
> > @@ -1620,35 +1632,37 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
> > regno);
> > return -EACCES;
> > }
> > - err = __check_packet_access(env, regno, off, size, zero_size_allowed);
> > + err = __check_buffer_access(env, regno, off, size, zero_size_allowed);
> > if (err) {
> > - verbose(env, "R%d offset is outside of the packet\n", regno);
> > + verbose(env, "R%d offset is outside of the %s\n",
> > + regno, reg_is_pkt_pointer(reg) ? "packet" : "buffer");
> > return err;
> > }
> >
> > - /* __check_packet_access has made sure "off + size - 1" is within u16.
> > - * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
> > - * otherwise find_good_pkt_pointers would have refused to set range info
> > - * that __check_packet_access would have rejected this pkt access.
> > - * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
> > - */
> > - env->prog->aux->max_pkt_offset =
> > - max_t(u32, env->prog->aux->max_pkt_offset,
> > - off + reg->umax_value + size - 1);
> > + if (reg_is_pkt_pointer(reg)) {
> > + /* __check_buffer_access ensures "off + size - 1" is within u16
> > + * reg->umax_value can't be bigger than * MAX_BUFFER_OFF which
> > + * is 0xffff, otherwise find_good_buf_pointers would have
> > + * refused to set range info and __check_buffer_access would
> > + * have rejected this pkt access.
> > + * Therefore, "off + reg->umax_value + size - 1" won't overflow
> > + * u32.
> > + */
> > + env->prog->aux->max_pkt_offset =
> > + max_t(u32, env->prog->aux->max_pkt_offset,
> > + off + reg->umax_value + size - 1);
> > + }
> >
> > return err;
> > }
> >
> > /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
> > -static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
> > - enum bpf_access_type t, enum bpf_reg_type *reg_type)
> > +static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx,
> > + int off, int size, enum bpf_access_type t,
> > + struct bpf_insn_access_aux *info)
> > {
> > - struct bpf_insn_access_aux info = {
> > - .reg_type = *reg_type,
> > - };
> > -
> > if (env->ops->is_valid_access &&
> > - env->ops->is_valid_access(off, size, t, env->prog, &info)) {
> > + env->ops->is_valid_access(off, size, t, env->prog, info)) {
> > /* A non zero info.ctx_field_size indicates that this field is a
> > * candidate for later verifier transformation to load the whole
> > * field and then apply a mask when accessed with a narrower
> > @@ -1656,9 +1670,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
> > * will only allow for whole field access and rejects any other
> > * type of narrower access.
> > */
> > - *reg_type = info.reg_type;
> > -
> > - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
> > + env->insn_aux_data[insn_idx].ctx_field_size = info->ctx_field_size;
> > /* remember the offset of last byte accessed in ctx */
> > if (env->prog->aux->max_ctx_offset < off + size)
> > env->prog->aux->max_ctx_offset = off + size;
> > @@ -1870,6 +1882,10 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
> > case PTR_TO_TCP_SOCK:
> > pointer_desc = "tcp_sock ";
> > break;
> > + case PTR_TO_BUFFER:
> > + pointer_desc = "buffer ";
> > + strict = true;
> > + break;
> > default:
> > break;
> > }
> > @@ -2084,7 +2100,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> > mark_reg_unknown(env, regs, value_regno);
> >
> > } else if (reg->type == PTR_TO_CTX) {
> > - enum bpf_reg_type reg_type = SCALAR_VALUE;
> > + struct bpf_insn_access_aux info = {
> > + .reg_type = SCALAR_VALUE,
> > + .buf_id = 0,
> > + };
> > +
> >
> > if (t == BPF_WRITE && value_regno >= 0 &&
> > is_pointer_value(env, value_regno)) {
> > @@ -2096,21 +2116,22 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> > if (err < 0)
> > return err;
> >
> > - err = check_ctx_access(env, insn_idx, off, size, t, ®_type);
> > + err = check_ctx_access(env, insn_idx, off, size, t, &info);
> > if (!err && t == BPF_READ && value_regno >= 0) {
> > /* ctx access returns either a scalar, or a
> > * PTR_TO_PACKET[_META,_END]. In the latter
> > * case, we know the offset is zero.
> > */
> > - if (reg_type == SCALAR_VALUE) {
> > + if (info.reg_type == SCALAR_VALUE) {
> > mark_reg_unknown(env, regs, value_regno);
> > } else {
> > mark_reg_known_zero(env, regs,
> > value_regno);
> > - if (reg_type_may_be_null(reg_type))
> > + if (reg_type_may_be_null(info.reg_type))
> > regs[value_regno].id = ++env->id_gen;
> > }
> > - regs[value_regno].type = reg_type;
> > + regs[value_regno].type = info.reg_type;
> > + regs[value_regno].buf_id = info.buf_id;
> > }
> >
> > } else if (reg->type == PTR_TO_STACK) {
> > @@ -2141,7 +2162,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
> > value_regno);
> > return -EACCES;
> > }
> > - err = check_packet_access(env, regno, off, size, false);
> > + err = check_buffer_access(env, regno, off, size, false);
> > + if (!err && t == BPF_READ && value_regno >= 0)
> > + mark_reg_unknown(env, regs, value_regno);
> > + } else if (reg->type == PTR_TO_BUFFER) {
> > + if (t == BPF_WRITE && value_regno >= 0 &&
> > + is_pointer_value(env, value_regno)) {
> > + verbose(env, "R%d leaks addr into buffer\n",
> > + value_regno);
> > + return -EACCES;
> > + }
> > + err = check_buffer_access(env, regno, off, size, false);
> > if (!err && t == BPF_READ && value_regno >= 0)
> > mark_reg_unknown(env, regs, value_regno);
> > } else if (reg->type == PTR_TO_FLOW_KEYS) {
> > @@ -2382,7 +2413,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
> > switch (reg->type) {
> > case PTR_TO_PACKET:
> > case PTR_TO_PACKET_META:
> > - return check_packet_access(env, regno, reg->off, access_size,
> > + return check_buffer_access(env, regno, reg->off, access_size,
> > zero_size_allowed);
> > case PTR_TO_MAP_VALUE:
> > if (check_map_access_type(env, regno, reg->off, access_size,
> > @@ -2962,34 +2993,35 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
> > check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
> > }
> >
> > -/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
> > - * are now invalid, so turn them into unknown SCALAR_VALUE.
> > +/* Packet or buffer data might have moved, any old PTR_TO_PACKET[_META,_END]
> > + * and/or PTR_TO_BUFFER[_END] are now invalid, so turn them into unknown
> > + * SCALAR_VALUE.
> > */
> > -static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
> > +static void __clear_all_buf_pointers(struct bpf_verifier_env *env,
> > struct bpf_func_state *state)
> > {
> > struct bpf_reg_state *regs = state->regs, *reg;
> > int i;
> >
> > for (i = 0; i < MAX_BPF_REG; i++)
> > - if (reg_is_pkt_pointer_any(®s[i]))
> > + if (reg_is_buf_pointer_any(®s[i]))
> > mark_reg_unknown(env, regs, i);
> >
> > bpf_for_each_spilled_reg(i, state, reg) {
> > if (!reg)
> > continue;
> > - if (reg_is_pkt_pointer_any(reg))
> > + if (reg_is_buf_pointer_any(reg))
> > __mark_reg_unknown(reg);
> > }
> > }
> >
> > -static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
> > +static void clear_all_buf_pointers(struct bpf_verifier_env *env)
> > {
> > struct bpf_verifier_state *vstate = env->cur_state;
> > int i;
> >
> > for (i = 0; i <= vstate->curframe; i++)
> > - __clear_all_pkt_pointers(env, vstate->frame[i]);
> > + __clear_all_buf_pointers(env, vstate->frame[i]);
> > }
> >
> > static void release_reg_references(struct bpf_verifier_env *env,
> > @@ -3417,7 +3449,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
> > }
> >
> > if (changes_data)
> > - clear_all_pkt_pointers(env);
> > + clear_all_buf_pointers(env);
> > return 0;
> > }
> >
> > @@ -4349,7 +4381,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
> > return 0;
> > }
> >
> > -static void __find_good_pkt_pointers(struct bpf_func_state *state,
> > +static void __find_good_buf_pointers(struct bpf_func_state *state,
> > struct bpf_reg_state *dst_reg,
> > enum bpf_reg_type type, u16 new_range)
> > {
> > @@ -4358,7 +4390,11 @@ static void __find_good_pkt_pointers(struct bpf_func_state *state,
> >
> > for (i = 0; i < MAX_BPF_REG; i++) {
> > reg = &state->regs[i];
> > - if (reg->type == type && reg->id == dst_reg->id)
> > + if (reg->type != type)
> > + continue;
> > + if (type == PTR_TO_BUFFER && reg->buf_id != dst_reg->buf_id)
> > + continue;
> > + if (reg->id == dst_reg->id)
> > /* keep the maximum range already checked */
> > reg->range = max(reg->range, new_range);
> > }
> > @@ -4366,12 +4402,16 @@ static void __find_good_pkt_pointers(struct bpf_func_state *state,
> > bpf_for_each_spilled_reg(i, state, reg) {
> > if (!reg)
> > continue;
> > - if (reg->type == type && reg->id == dst_reg->id)
> > + if (reg->type != type)
> > + continue;
> > + if (type == PTR_TO_BUFFER && reg->buf_id != dst_reg->buf_id)
> > + continue;
> > + if (reg->id == dst_reg->id)
> > reg->range = max(reg->range, new_range);
> > }
> > }
> >
> > -static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
> > +static void find_good_buf_pointers(struct bpf_verifier_state *vstate,
> > struct bpf_reg_state *dst_reg,
> > enum bpf_reg_type type,
> > bool range_right_open)
> > @@ -4384,8 +4424,8 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
> > /* This doesn't give us any range */
> > return;
> >
> > - if (dst_reg->umax_value > MAX_PACKET_OFF ||
> > - dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
> > + if (dst_reg->umax_value > MAX_BUFFER_OFF ||
> > + dst_reg->umax_value + dst_reg->off > MAX_BUFFER_OFF)
> > /* Risk of overflow. For instance, ptr + (1<<63) may be less
> > * than pkt_end, but that's because it's also less than pkt.
> > */
> > @@ -4440,10 +4480,10 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
> > /* If our ids match, then we must have the same max_value. And we
> > * don't care about the other reg's fixed offset, since if it's too big
> > * the range won't allow anything.
> > - * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
> > + * dst_reg->off is known < MAX_BUFFER_OFF, therefore it fits in a u16.
> > */
> > for (i = 0; i <= vstate->curframe; i++)
> > - __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
> > + __find_good_buf_pointers(vstate->frame[i], dst_reg, type,
> > new_range);
> > }
> >
> > @@ -4934,7 +4974,7 @@ static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
> > }
> > }
> >
> > -/* The logic is similar to find_good_pkt_pointers(), both could eventually
> > +/* The logic is similar to find_good_buf_pointers(), both could eventually
> > * be folded together at some point.
> > */
> > static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
> > @@ -4977,14 +5017,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> > (dst_reg->type == PTR_TO_PACKET_META &&
> > reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> > /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
> > - find_good_pkt_pointers(this_branch, dst_reg,
> > + find_good_buf_pointers(this_branch, dst_reg,
> > dst_reg->type, false);
> > } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> > src_reg->type == PTR_TO_PACKET) ||
> > (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> > src_reg->type == PTR_TO_PACKET_META)) {
> > /* pkt_end > pkt_data', pkt_data > pkt_meta' */
> > - find_good_pkt_pointers(other_branch, src_reg,
> > + find_good_buf_pointers(other_branch, src_reg,
> > + src_reg->type, true);
> > + } else if (dst_reg->type == PTR_TO_BUFFER &&
> > + src_reg->type == PTR_TO_BUFFER_END) {
> > + /* buf' > buf_end */
> > + find_good_buf_pointers(this_branch, dst_reg,
> > + dst_reg->type, false);
> > + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> > + src_reg->type == PTR_TO_BUFFER) {
> > + /* buf_end > buf' */
> > + find_good_buf_pointers(other_branch, src_reg,
> > src_reg->type, true);
> > } else {
> > return false;
> > @@ -4996,14 +5046,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> > (dst_reg->type == PTR_TO_PACKET_META &&
> > reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> > /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
> > - find_good_pkt_pointers(other_branch, dst_reg,
> > + find_good_buf_pointers(other_branch, dst_reg,
> > dst_reg->type, true);
> > } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> > src_reg->type == PTR_TO_PACKET) ||
> > (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> > src_reg->type == PTR_TO_PACKET_META)) {
> > /* pkt_end < pkt_data', pkt_data > pkt_meta' */
> > - find_good_pkt_pointers(this_branch, src_reg,
> > + find_good_buf_pointers(this_branch, src_reg,
> > + src_reg->type, false);
> > + } else if (dst_reg->type == PTR_TO_BUFFER &&
> > + src_reg->type == PTR_TO_BUFFER_END) {
> > + /* buf' < buf_end */
> > + find_good_buf_pointers(other_branch, dst_reg,
> > + dst_reg->type, true);
> > + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> > + src_reg->type == PTR_TO_BUFFER) {
> > + /* buf_end < buf' */
> > + find_good_buf_pointers(this_branch, src_reg,
> > src_reg->type, false);
> > } else {
> > return false;
> > @@ -5015,14 +5075,24 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> > (dst_reg->type == PTR_TO_PACKET_META &&
> > reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> > /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
> > - find_good_pkt_pointers(this_branch, dst_reg,
> > + find_good_buf_pointers(this_branch, dst_reg,
> > dst_reg->type, true);
> > } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> > src_reg->type == PTR_TO_PACKET) ||
> > (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> > src_reg->type == PTR_TO_PACKET_META)) {
> > /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
> > - find_good_pkt_pointers(other_branch, src_reg,
> > + find_good_buf_pointers(other_branch, src_reg,
> > + src_reg->type, false);
> > + } else if (dst_reg->type == PTR_TO_BUFFER &&
> > + src_reg->type == PTR_TO_BUFFER_END) {
> > + /* buf' >= buf_end */
> > + find_good_buf_pointers(this_branch, dst_reg,
> > + dst_reg->type, true);
> > + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> > + src_reg->type == PTR_TO_BUFFER) {
> > + /* buf_end >= buf' */
> > + find_good_buf_pointers(other_branch, src_reg,
> > src_reg->type, false);
> > } else {
> > return false;
> > @@ -5034,15 +5104,25 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn,
> > (dst_reg->type == PTR_TO_PACKET_META &&
> > reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
> > /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
> > - find_good_pkt_pointers(other_branch, dst_reg,
> > + find_good_buf_pointers(other_branch, dst_reg,
> > dst_reg->type, false);
> > } else if ((dst_reg->type == PTR_TO_PACKET_END &&
> > src_reg->type == PTR_TO_PACKET) ||
> > (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
> > src_reg->type == PTR_TO_PACKET_META)) {
> > /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
> > - find_good_pkt_pointers(this_branch, src_reg,
> > + find_good_buf_pointers(this_branch, src_reg,
> > src_reg->type, true);
> > + } else if (dst_reg->type == PTR_TO_BUFFER &&
> > + src_reg->type == PTR_TO_BUFFER_END) {
> > + /* buf' <= buf_end */
> > + find_good_buf_pointers(other_branch, dst_reg,
> > + dst_reg->type, true);
> > + } else if (dst_reg->type == PTR_TO_BUFFER_END &&
> > + src_reg->type == PTR_TO_BUFFER) {
> > + /* buf_end <= buf' */
> > + find_good_buf_pointers(this_branch, src_reg,
> > + src_reg->type, false);
> > } else {
> > return false;
> > }
> > @@ -7972,7 +8052,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
> > */
> > prog->cb_access = 1;
> > env->prog->aux->stack_depth = MAX_BPF_STACK;
> > - env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
> > + env->prog->aux->max_pkt_offset = MAX_BUFFER_OFF;
> >
> > /* mark bpf_tail_call as different opcode to avoid
> > * conditional branch in the interpeter for every normal