2021-03-24 21:51:54

by Florent Revest

[permalink] [raw]
Subject: [PATCH bpf-next v2 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

Two helpers (trace_printk and seq_printf) have very similar
implementations of format string parsing and a third one is coming
(snprintf). To avoid code duplication and make the code easier to
maintain, this moves the operations associated with format string
parsing (validation and argument sanitization) into one generic
function.

Unfortunately, the implementation of the two existing helpers already
drifted quite a bit and unifying them entailed a lot of changes:

- bpf_trace_printk always expected fmt[fmt_size] to be the terminating
NULL character, this is no longer true, the first 0 is terminating.
- bpf_trace_printk now supports %% (which produces the percentage char).
- bpf_trace_printk now skips width formating fields.
- bpf_trace_printk now supports the X modifier (capital hexadecimal).
- bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6
- argument casting on 32 bit has been simplified into one macro and
using an enum instead of obscure int increments.

- bpf_seq_printf now uses bpf_trace_copy_string instead of
strncpy_from_kernel_nofault and handles the %pks %pus specifiers.
- bpf_seq_printf now prints longs correctly on 32 bit architectures.

- both were changed to use a global per-cpu tmp buffer instead of one
stack buffer for trace_printk and 6 small buffers for seq_printf.
- to avoid per-cpu buffer usage conflict, these helpers disable
preemption while the per-cpu buffer is in use.
- both helpers now support the %ps and %pS specifiers to print symbols.

Signed-off-by: Florent Revest <[email protected]>
---
kernel/trace/bpf_trace.c | 529 ++++++++++++++++++---------------------
1 file changed, 244 insertions(+), 285 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0d23755c2747..0fdca94a3c9c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -372,7 +372,7 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
return &bpf_probe_write_user_proto;
}

-static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
+static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
size_t bufsz)
{
void __user *user_ptr = (__force void __user *)unsafe_ptr;
@@ -382,178 +382,284 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
switch (fmt_ptype) {
case 's':
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
- if ((unsigned long)unsafe_ptr < TASK_SIZE) {
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
- }
+ if ((unsigned long)unsafe_ptr < TASK_SIZE)
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
fallthrough;
#endif
case 'k':
- strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
- break;
+ return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
case 'u':
- strncpy_from_user_nofault(buf, user_ptr, bufsz);
- break;
+ return strncpy_from_user_nofault(buf, user_ptr, bufsz);
}
+
+ return -EINVAL;
}

static DEFINE_RAW_SPINLOCK(trace_printk_lock);

-#define BPF_TRACE_PRINTK_SIZE 1024
+enum bpf_printf_mod_type {
+ BPF_PRINTF_INT,
+ BPF_PRINTF_LONG,
+ BPF_PRINTF_LONG_LONG,
+};

-static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
-{
- static char buf[BPF_TRACE_PRINTK_SIZE];
- unsigned long flags;
- va_list ap;
- int ret;
+/* Horrid workaround for getting va_list handling working with different
+ * argument type combinations generically for 32 and 64 bit archs.
+ */
+#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \
+ ((mod[arg_nb] == BPF_PRINTF_LONG_LONG || \
+ (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)) \
+ ? args[arg_nb] \
+ : ((mod[arg_nb] == BPF_PRINTF_LONG || \
+ (mod[arg_nb] == BPF_PRINTF_INT && __BITS_PER_LONG == 32)) \
+ ? (long)args[arg_nb] \
+ : (u32)args[arg_nb]))
+
+/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+ */
+#define MAX_PRINTF_BUF_LEN 512

- raw_spin_lock_irqsave(&trace_printk_lock, flags);
- va_start(ap, fmt);
- ret = vsnprintf(buf, sizeof(buf), fmt, ap);
- va_end(ap);
- /* vsnprintf() will not append null for zero-length strings */
- if (ret == 0)
- buf[0] = '\0';
- trace_bpf_trace_printk(buf);
- raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+struct bpf_printf_buf {
+ char tmp_buf[MAX_PRINTF_BUF_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
+static DEFINE_PER_CPU(int, bpf_printf_buf_used);

- return ret;
+static void bpf_printf_postamble(void)
+{
+ if (this_cpu_read(bpf_printf_buf_used)) {
+ this_cpu_dec(bpf_printf_buf_used);
+ preempt_enable();
+ }
}

/*
- * Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
+ * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
+ *
+ * Returns a negative value if fmt is an invalid format string or 0 otherwise.
+ *
+ * This can be used in two ways:
+ * - Format string verification only: when final_args and mod are NULL
+ * - Arguments preparation: in addition to the above verification, it writes in
+ * final_args a copy of raw_args where pointers from BPF have been sanitized
+ * into pointers safe to use by snprintf. This also writes in the mod array
+ * the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
+ *
+ * In argument preparation mode, if 0 is returned, safe temporary buffers are
+ * allocated and bpf_printf_postamble should be called to free them after use.
*/
-BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
- u64, arg2, u64, arg3)
-{
- int i, mod[3] = {}, fmt_cnt = 0;
- char buf[64], fmt_ptype;
- void *unsafe_ptr = NULL;
- bool str_seen = false;
-
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- return -EINVAL;
-
- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
- return -EINVAL;
+int bpf_printf_preamble(char *fmt, u32 fmt_size, const u64 *raw_args,
+ u64 *final_args, enum bpf_printf_mod_type *mod,
+ u32 num_args)
+{
+ struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
+ int err, i, fmt_cnt = 0, copy_size, used;
+ char *unsafe_ptr = NULL, *tmp_buf = NULL;
+ bool prepare_args = final_args && mod;
+ enum bpf_printf_mod_type current_mod;
+ size_t tmp_buf_len;
+ u64 current_arg;
+ char fmt_ptype;
+
+ for (i = 0; i < fmt_size && fmt[i] != '\0'; i++) {
+ if ((!isprint(fmt[i]) && !isspace(fmt[i])) ||
+ !isascii(fmt[i])) {
+ err = -EINVAL;
+ goto out;
+ }

if (fmt[i] != '%')
continue;

- if (fmt_cnt >= 3)
- return -EINVAL;
+ if (fmt[i + 1] == '%') {
+ i++;
+ continue;
+ }
+
+ if (fmt_cnt >= num_args) {
+ err = -EINVAL;
+ goto out;
+ }

/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
i++;
- if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
+
+ /* skip optional "[0 +-][num]" width formating field */
+ while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
+ fmt[i] == ' ')
+ i++;
+ if (fmt[i] >= '1' && fmt[i] <= '9') {
i++;
- } else if (fmt[i] == 'p') {
- mod[fmt_cnt]++;
- if ((fmt[i + 1] == 'k' ||
- fmt[i + 1] == 'u') &&
+ while (fmt[i] >= '0' && fmt[i] <= '9')
+ i++;
+ }
+
+ if (fmt[i] == 'p') {
+ current_mod = BPF_PRINTF_LONG;
+
+ if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
fmt[i + 2] == 's') {
fmt_ptype = fmt[i + 1];
i += 2;
goto fmt_str;
}

- if (fmt[i + 1] == 'B') {
- i++;
+ if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
+ ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
+ fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
+ fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
+ /* just kernel pointers */
+ if (prepare_args)
+ current_arg = raw_args[fmt_cnt];
goto fmt_next;
}

- /* disallow any further format extensions */
- if (fmt[i + 1] != 0 &&
- !isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
+ /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+ if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
+ (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ i += 2;
+ if (!prepare_args)
+ goto fmt_next;
+
+ if (!tmp_buf) {
+ used = this_cpu_inc_return(bpf_printf_buf_used);
+ if (WARN_ON_ONCE(used > 1)) {
+ this_cpu_dec(bpf_printf_buf_used);
+ return -EBUSY;
+ }
+ preempt_disable();
+ tmp_buf = bufs->tmp_buf;
+ tmp_buf_len = MAX_PRINTF_BUF_LEN;
+ }
+
+ copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+ if (tmp_buf_len < copy_size) {
+ err = -ENOSPC;
+ goto out;
+ }
+
+ unsafe_ptr = (char *)(long)raw_args[fmt_cnt];
+ err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr,
+ copy_size);
+ if (err < 0)
+ memset(tmp_buf, 0, copy_size);
+ current_arg = (u64)(long)tmp_buf;
+ tmp_buf += copy_size;
+ tmp_buf_len -= copy_size;

goto fmt_next;
} else if (fmt[i] == 's') {
- mod[fmt_cnt]++;
+ current_mod = BPF_PRINTF_LONG;
fmt_ptype = fmt[i];
fmt_str:
- if (str_seen)
- /* allow only one '%s' per fmt string */
- return -EINVAL;
- str_seen = true;
-
if (fmt[i + 1] != 0 &&
!isspace(fmt[i + 1]) &&
- !ispunct(fmt[i + 1]))
- return -EINVAL;
+ !ispunct(fmt[i + 1])) {
+ err = -EINVAL;
+ goto out;
+ }

- switch (fmt_cnt) {
- case 0:
- unsafe_ptr = (void *)(long)arg1;
- arg1 = (long)buf;
- break;
- case 1:
- unsafe_ptr = (void *)(long)arg2;
- arg2 = (long)buf;
- break;
- case 2:
- unsafe_ptr = (void *)(long)arg3;
- arg3 = (long)buf;
- break;
+ if (!prepare_args)
+ goto fmt_next;
+
+ if (!tmp_buf) {
+ used = this_cpu_inc_return(bpf_printf_buf_used);
+ if (WARN_ON_ONCE(used > 1)) {
+ this_cpu_dec(bpf_printf_buf_used);
+ return -EBUSY;
+ }
+ preempt_disable();
+ tmp_buf = bufs->tmp_buf;
+ tmp_buf_len = MAX_PRINTF_BUF_LEN;
+ }
+
+ if (!tmp_buf_len) {
+ err = -ENOSPC;
+ goto out;
}

- bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
- sizeof(buf));
+ unsafe_ptr = (char *)(long)raw_args[fmt_cnt];
+ err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
+ fmt_ptype, tmp_buf_len);
+ if (err < 0) {
+ tmp_buf[0] = '\0';
+ err = 1;
+ }
+
+ current_arg = (u64)(long)tmp_buf;
+ tmp_buf += err;
+ tmp_buf_len -= err;
+
goto fmt_next;
}

+ current_mod = BPF_PRINTF_INT;
+
if (fmt[i] == 'l') {
- mod[fmt_cnt]++;
+ current_mod = BPF_PRINTF_LONG;
+ i++;
+ }
+ if (fmt[i] == 'l') {
+ current_mod = BPF_PRINTF_LONG_LONG;
i++;
}

- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x')
- return -EINVAL;
+ if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
+ fmt[i] != 'x' && fmt[i] != 'X') {
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (prepare_args)
+ current_arg = raw_args[fmt_cnt];
fmt_next:
+ if (prepare_args) {
+ mod[fmt_cnt] = current_mod;
+ final_args[fmt_cnt] = current_arg;
+ }
fmt_cnt++;
}

-/* Horrid workaround for getting va_list handling working with different
- * argument type combinations generically for 32 and 64 bit archs.
- */
-#define __BPF_TP_EMIT() __BPF_ARG3_TP()
-#define __BPF_TP(...) \
- bpf_do_trace_printk(fmt, ##__VA_ARGS__)
-
-#define __BPF_ARG1_TP(...) \
- ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_TP(arg1, ##__VA_ARGS__) \
- : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
- : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
-
-#define __BPF_ARG2_TP(...) \
- ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
- : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
- : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
-
-#define __BPF_ARG3_TP(...) \
- ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
- ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
- : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
- ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
- : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
-
- return __BPF_TP_EMIT();
+ err = 0;
+out:
+ bpf_printf_postamble();
+ return err;
+}
+
+#define MAX_TRACE_PRINTK_VARARGS 3
+#define BPF_TRACE_PRINTK_SIZE 1024
+
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+ u64, arg2, u64, arg3)
+{
+ u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
+ enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
+ static char buf[BPF_TRACE_PRINTK_SIZE];
+ unsigned long flags;
+ int ret;
+
+ ret = bpf_printf_preamble(fmt, fmt_size, args, args, mod,
+ MAX_TRACE_PRINTK_VARARGS);
+ if (ret < 0)
+ return ret;
+
+ ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
+ /* snprintf() will not append null for zero-length strings */
+ if (ret == 0)
+ buf[0] = '\0';
+
+ raw_spin_lock_irqsave(&trace_printk_lock, flags);
+ trace_bpf_trace_printk(buf);
+ raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+
+ bpf_printf_postamble();
+
+ return ret;
}

static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -581,184 +687,37 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
}

#define MAX_SEQ_PRINTF_VARARGS 12
-#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
-#define MAX_SEQ_PRINTF_STR_LEN 128
-
-struct bpf_seq_printf_buf {
- char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
-};
-static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
-static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);

BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
const void *, data, u32, data_len)
{
- int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
- int i, buf_used, copy_size, num_args;
- u64 params[MAX_SEQ_PRINTF_VARARGS];
- struct bpf_seq_printf_buf *bufs;
- const u64 *args = data;
-
- buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
- if (WARN_ON_ONCE(buf_used > 1)) {
- err = -EBUSY;
- goto out;
- }
-
- bufs = this_cpu_ptr(&bpf_seq_printf_buf);
-
- /*
- * bpf_check()->check_func_arg()->check_stack_boundary()
- * guarantees that fmt points to bpf program stack,
- * fmt_size bytes of it were initialized and fmt_size > 0
- */
- if (fmt[--fmt_size] != 0)
- goto out;
-
- if (data_len & 7)
- goto out;
-
- for (i = 0; i < fmt_size; i++) {
- if (fmt[i] == '%') {
- if (fmt[i + 1] == '%')
- i++;
- else if (!data || !data_len)
- goto out;
- }
- }
+ enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
+ u64 args[MAX_SEQ_PRINTF_VARARGS];
+ int err, num_args;

+ if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+ (data_len && !data))
+ return -EINVAL;
num_args = data_len / 8;

- /* check format string for allowed specifiers */
- for (i = 0; i < fmt_size; i++) {
- /* only printable ascii for now. */
- if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
- err = -EINVAL;
- goto out;
- }
-
- if (fmt[i] != '%')
- continue;
-
- if (fmt[i + 1] == '%') {
- i++;
- continue;
- }
-
- if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
- err = -E2BIG;
- goto out;
- }
-
- if (fmt_cnt >= num_args) {
- err = -EINVAL;
- goto out;
- }
-
- /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
- i++;
-
- /* skip optional "[0 +-][num]" width formating field */
- while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
- fmt[i] == ' ')
- i++;
- if (fmt[i] >= '1' && fmt[i] <= '9') {
- i++;
- while (fmt[i] >= '0' && fmt[i] <= '9')
- i++;
- }
-
- if (fmt[i] == 's') {
- void *unsafe_ptr;
-
- /* try our best to copy */
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
- unsafe_ptr = (void *)(long)args[fmt_cnt];
- err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN);
- if (err < 0)
- bufs->buf[memcpy_cnt][0] = '\0';
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'p') {
- if (fmt[i + 1] == 0 ||
- fmt[i + 1] == 'K' ||
- fmt[i + 1] == 'x' ||
- fmt[i + 1] == 'B') {
- /* just kernel pointers */
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- continue;
- }
-
- /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
- if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
- err = -EINVAL;
- goto out;
- }
- if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
- err = -EINVAL;
- goto out;
- }
-
- if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
- err = -E2BIG;
- goto out;
- }
-
-
- copy_size = (fmt[i + 2] == '4') ? 4 : 16;
-
- err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
- (void *) (long) args[fmt_cnt],
- copy_size);
- if (err < 0)
- memset(bufs->buf[memcpy_cnt], 0, copy_size);
- params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
- i += 2;
- fmt_cnt++;
- memcpy_cnt++;
- continue;
- }
-
- if (fmt[i] == 'l') {
- i++;
- if (fmt[i] == 'l')
- i++;
- }
-
- if (fmt[i] != 'i' && fmt[i] != 'd' &&
- fmt[i] != 'u' && fmt[i] != 'x' &&
- fmt[i] != 'X') {
- err = -EINVAL;
- goto out;
- }
-
- params[fmt_cnt] = args[fmt_cnt];
- fmt_cnt++;
- }
+ err = bpf_printf_preamble(fmt, fmt_size, data, args, mod, num_args);
+ if (err < 0)
+ return err;

/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
* all of them to seq_printf().
*/
- seq_printf(m, fmt, params[0], params[1], params[2], params[3],
- params[4], params[5], params[6], params[7], params[8],
- params[9], params[10], params[11]);
+ seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+ BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+ BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+ BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+ BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+ BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+ BPF_CAST_FMT_ARG(11, args, mod));

- err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
-out:
- this_cpu_dec(bpf_seq_printf_buf_used);
- return err;
+ bpf_printf_postamble();
+
+ return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}

BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
--
2.31.0.291.g576ba9dcdaf-goog


2021-03-26 21:55:51

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

On Tue, Mar 23, 2021 at 7:23 PM Florent Revest <[email protected]> wrote:
>
> Two helpers (trace_printk and seq_printf) have very similar
> implementations of format string parsing and a third one is coming
> (snprintf). To avoid code duplication and make the code easier to
> maintain, this moves the operations associated with format string
> parsing (validation and argument sanitization) into one generic
> function.
>
> Unfortunately, the implementation of the two existing helpers already
> drifted quite a bit and unifying them entailed a lot of changes:

"Unfortunately" as in a lot of extra work for you? I think overall
though it was very fortunate that you ended up doing it, all
implementations are more feature-complete and saner now, no? Thanks a
lot for your hard work!

>
> - bpf_trace_printk always expected fmt[fmt_size] to be the terminating
> NULL character, this is no longer true, the first 0 is terminating.

You mean if you had bpf_trace_printk("bla bla\0some more bla\0", 24)
it would emit that zero character? If yes, I don't think it was a sane
behavior anyways.

> - bpf_trace_printk now supports %% (which produces the percentage char).
> - bpf_trace_printk now skips width formating fields.
> - bpf_trace_printk now supports the X modifier (capital hexadecimal).
> - bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6
> - argument casting on 32 bit has been simplified into one macro and
> using an enum instead of obscure int increments.
>
> - bpf_seq_printf now uses bpf_trace_copy_string instead of
> strncpy_from_kernel_nofault and handles the %pks %pus specifiers.
> - bpf_seq_printf now prints longs correctly on 32 bit architectures.
>
> - both were changed to use a global per-cpu tmp buffer instead of one
> stack buffer for trace_printk and 6 small buffers for seq_printf.
> - to avoid per-cpu buffer usage conflict, these helpers disable
> preemption while the per-cpu buffer is in use.
> - both helpers now support the %ps and %pS specifiers to print symbols.
>
> Signed-off-by: Florent Revest <[email protected]>
> ---

This is great, you already saved some lines of code! I suspect I'll
have some complaints about mods (it feels like this preample should
provide extra information about which arguments have to be read from
kernel/user memory, but I'll see next patches first.

See my comments below (I deliberately didn't trim most of the code for
easier jumping around), but it's great overall, thanks!

> kernel/trace/bpf_trace.c | 529 ++++++++++++++++++---------------------
> 1 file changed, 244 insertions(+), 285 deletions(-)
>
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 0d23755c2747..0fdca94a3c9c 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -372,7 +372,7 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
> return &bpf_probe_write_user_proto;
> }
>
> -static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
> +static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
> size_t bufsz)
> {
> void __user *user_ptr = (__force void __user *)unsafe_ptr;
> @@ -382,178 +382,284 @@ static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
> switch (fmt_ptype) {
> case 's':
> #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> - if ((unsigned long)unsafe_ptr < TASK_SIZE) {
> - strncpy_from_user_nofault(buf, user_ptr, bufsz);
> - break;
> - }
> + if ((unsigned long)unsafe_ptr < TASK_SIZE)
> + return strncpy_from_user_nofault(buf, user_ptr, bufsz);
> fallthrough;
> #endif
> case 'k':
> - strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
> - break;
> + return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
> case 'u':
> - strncpy_from_user_nofault(buf, user_ptr, bufsz);
> - break;
> + return strncpy_from_user_nofault(buf, user_ptr, bufsz);
> }
> +
> + return -EINVAL;
> }
>
> static DEFINE_RAW_SPINLOCK(trace_printk_lock);
>
> -#define BPF_TRACE_PRINTK_SIZE 1024
> +enum bpf_printf_mod_type {
> + BPF_PRINTF_INT,
> + BPF_PRINTF_LONG,
> + BPF_PRINTF_LONG_LONG,
> +};
>
> -static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
> -{
> - static char buf[BPF_TRACE_PRINTK_SIZE];
> - unsigned long flags;
> - va_list ap;
> - int ret;
> +/* Horrid workaround for getting va_list handling working with different
> + * argument type combinations generically for 32 and 64 bit archs.
> + */
> +#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \
> + ((mod[arg_nb] == BPF_PRINTF_LONG_LONG || \
> + (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)) \
> + ? args[arg_nb] \
> + : ((mod[arg_nb] == BPF_PRINTF_LONG || \
> + (mod[arg_nb] == BPF_PRINTF_INT && __BITS_PER_LONG == 32)) \

is this right? INT is always 32-bit, it's only LONG that differs.
Shouldn't the rule be

(LONG_LONG || LONG && __BITS_PER_LONG) -> (__u64)args[args_nb]
(INT || LONG && __BITS_PER_LONG == 32) -> (__u32)args[args_nb]

Does (long) cast do anything fancy when casting from u64? Sorry, maybe
I'm confused.


> + ? (long)args[arg_nb] \
> + : (u32)args[arg_nb]))
> +
> +/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
> + */
> +#define MAX_PRINTF_BUF_LEN 512
>
> - raw_spin_lock_irqsave(&trace_printk_lock, flags);
> - va_start(ap, fmt);
> - ret = vsnprintf(buf, sizeof(buf), fmt, ap);
> - va_end(ap);
> - /* vsnprintf() will not append null for zero-length strings */
> - if (ret == 0)
> - buf[0] = '\0';
> - trace_bpf_trace_printk(buf);
> - raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> +struct bpf_printf_buf {
> + char tmp_buf[MAX_PRINTF_BUF_LEN];
> +};
> +static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
> +static DEFINE_PER_CPU(int, bpf_printf_buf_used);
>
> - return ret;
> +static void bpf_printf_postamble(void)
> +{
> + if (this_cpu_read(bpf_printf_buf_used)) {
> + this_cpu_dec(bpf_printf_buf_used);
> + preempt_enable();
> + }
> }
>
> /*
> - * Only limited trace_printk() conversion specifiers allowed:
> - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
> + * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
> + *
> + * Returns a negative value if fmt is an invalid format string or 0 otherwise.
> + *
> + * This can be used in two ways:
> + * - Format string verification only: when final_args and mod are NULL
> + * - Arguments preparation: in addition to the above verification, it writes in
> + * final_args a copy of raw_args where pointers from BPF have been sanitized
> + * into pointers safe to use by snprintf. This also writes in the mod array
> + * the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
> + *
> + * In argument preparation mode, if 0 is returned, safe temporary buffers are
> + * allocated and bpf_printf_postamble should be called to free them after use.
> */
> -BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
> - u64, arg2, u64, arg3)
> -{
> - int i, mod[3] = {}, fmt_cnt = 0;
> - char buf[64], fmt_ptype;
> - void *unsafe_ptr = NULL;
> - bool str_seen = false;
> -
> - /*
> - * bpf_check()->check_func_arg()->check_stack_boundary()
> - * guarantees that fmt points to bpf program stack,
> - * fmt_size bytes of it were initialized and fmt_size > 0
> - */
> - if (fmt[--fmt_size] != 0)
> - return -EINVAL;
> -
> - /* check format string for allowed specifiers */
> - for (i = 0; i < fmt_size; i++) {
> - if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
> - return -EINVAL;
> +int bpf_printf_preamble(char *fmt, u32 fmt_size, const u64 *raw_args,
> + u64 *final_args, enum bpf_printf_mod_type *mod,
> + u32 num_args)
> +{
> + struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
> + int err, i, fmt_cnt = 0, copy_size, used;
> + char *unsafe_ptr = NULL, *tmp_buf = NULL;
> + bool prepare_args = final_args && mod;

probably better to enforce that both or none are specified, otherwise
return error

> + enum bpf_printf_mod_type current_mod;
> + size_t tmp_buf_len;
> + u64 current_arg;
> + char fmt_ptype;
> +
> + for (i = 0; i < fmt_size && fmt[i] != '\0'; i++) {

Can we say that if the last character is not '\0' then it's a bad
format string and return -EINVAL? And if \0 is inside the format
string, then it's also a bad format string? I wonder what others think
about this?... I think sanity should prevail.

> + if ((!isprint(fmt[i]) && !isspace(fmt[i])) ||
> + !isascii(fmt[i])) {

&& always binds tighter than ||, so you can omit extra (). I'd put
this on a single line as well, but that's a total nit.

> + err = -EINVAL;
> + goto out;
> + }
>
> if (fmt[i] != '%')
> continue;
>
> - if (fmt_cnt >= 3)
> - return -EINVAL;
> + if (fmt[i + 1] == '%') {
> + i++;
> + continue;
> + }
> +
> + if (fmt_cnt >= num_args) {
> + err = -EINVAL;
> + goto out;
> + }
>
> /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
> i++;
> - if (fmt[i] == 'l') {
> - mod[fmt_cnt]++;
> +
> + /* skip optional "[0 +-][num]" width formating field */

typo: formatting

> + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
> + fmt[i] == ' ')
> + i++;
> + if (fmt[i] >= '1' && fmt[i] <= '9') {
> i++;

Are we worried about integer overflow here? %123123123123123d
hopefully won't crash anything, right?

> - } else if (fmt[i] == 'p') {
> - mod[fmt_cnt]++;
> - if ((fmt[i + 1] == 'k' ||
> - fmt[i + 1] == 'u') &&
> + while (fmt[i] >= '0' && fmt[i] <= '9')
> + i++;

whoa, fmt_size shouldn't be ignored

> + }
> +

and here if we exhausted all format string but haven't gotten to
format specified, we should -EINVAL

if (i >= fmt_size) return -EINVAL?

> + if (fmt[i] == 'p') {
> + current_mod = BPF_PRINTF_LONG;
> +
> + if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
> fmt[i + 2] == 's') {

right, if i + 2 is ok to access? always be remembering about fmt_size

> fmt_ptype = fmt[i + 1];
> i += 2;
> goto fmt_str;
> }
>
> - if (fmt[i + 1] == 'B') {
> - i++;
> + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
> + ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
> + fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
> + fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
> + /* just kernel pointers */
> + if (prepare_args)
> + current_arg = raw_args[fmt_cnt];

fmt_cnt is not the best name, imo. arg_cnt makes more sense

> goto fmt_next;
> }
>
> - /* disallow any further format extensions */
> - if (fmt[i + 1] != 0 &&
> - !isspace(fmt[i + 1]) &&
> - !ispunct(fmt[i + 1]))
> - return -EINVAL;
> + /* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
> + if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
> + (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + i += 2;
> + if (!prepare_args)
> + goto fmt_next;
> +
> + if (!tmp_buf) {
> + used = this_cpu_inc_return(bpf_printf_buf_used);
> + if (WARN_ON_ONCE(used > 1)) {
> + this_cpu_dec(bpf_printf_buf_used);
> + return -EBUSY;
> + }
> + preempt_disable();

shouldn't we preempt_disable before we got bpf_printf_buf_used? if we
get preempted after incrementing counter, buffer will be unusable for
a while, potentially, right?

> + tmp_buf = bufs->tmp_buf;
> + tmp_buf_len = MAX_PRINTF_BUF_LEN;
> + }
> +
> + copy_size = (fmt[i + 2] == '4') ? 4 : 16;
> + if (tmp_buf_len < copy_size) {
> + err = -ENOSPC;
> + goto out;
> + }
> +
> + unsafe_ptr = (char *)(long)raw_args[fmt_cnt];
> + err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr,
> + copy_size);
> + if (err < 0)
> + memset(tmp_buf, 0, copy_size);
> + current_arg = (u64)(long)tmp_buf;
> + tmp_buf += copy_size;
> + tmp_buf_len -= copy_size;
>
> goto fmt_next;
> } else if (fmt[i] == 's') {
> - mod[fmt_cnt]++;
> + current_mod = BPF_PRINTF_LONG;
> fmt_ptype = fmt[i];
> fmt_str:
> - if (str_seen)
> - /* allow only one '%s' per fmt string */
> - return -EINVAL;
> - str_seen = true;
> -
> if (fmt[i + 1] != 0 &&
> !isspace(fmt[i + 1]) &&
> - !ispunct(fmt[i + 1]))
> - return -EINVAL;
> + !ispunct(fmt[i + 1])) {
> + err = -EINVAL;
> + goto out;
> + }
>
> - switch (fmt_cnt) {
> - case 0:
> - unsafe_ptr = (void *)(long)arg1;
> - arg1 = (long)buf;
> - break;
> - case 1:
> - unsafe_ptr = (void *)(long)arg2;
> - arg2 = (long)buf;
> - break;
> - case 2:
> - unsafe_ptr = (void *)(long)arg3;
> - arg3 = (long)buf;
> - break;
> + if (!prepare_args)
> + goto fmt_next;
> +
> + if (!tmp_buf) {
> + used = this_cpu_inc_return(bpf_printf_buf_used);
> + if (WARN_ON_ONCE(used > 1)) {
> + this_cpu_dec(bpf_printf_buf_used);
> + return -EBUSY;
> + }
> + preempt_disable();
> + tmp_buf = bufs->tmp_buf;
> + tmp_buf_len = MAX_PRINTF_BUF_LEN;
> + }

how about helper used like this:

if (try_get_fmt_tmp_buf(&tmp_buf, &tmp_buf_len))
return -EBUSY;

which will do nothing if tmp_buf != NULL?

> +
> + if (!tmp_buf_len) {
> + err = -ENOSPC;
> + goto out;
> }
>
> - bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
> - sizeof(buf));
> + unsafe_ptr = (char *)(long)raw_args[fmt_cnt];
> + err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
> + fmt_ptype, tmp_buf_len);
> + if (err < 0) {
> + tmp_buf[0] = '\0';
> + err = 1;
> + }
> +
> + current_arg = (u64)(long)tmp_buf;
> + tmp_buf += err;
> + tmp_buf_len -= err;
> +
> goto fmt_next;
> }
>
> + current_mod = BPF_PRINTF_INT;
> +
> if (fmt[i] == 'l') {
> - mod[fmt_cnt]++;
> + current_mod = BPF_PRINTF_LONG;
> + i++;
> + }
> + if (fmt[i] == 'l') {
> + current_mod = BPF_PRINTF_LONG_LONG;
> i++;
> }
>
> - if (fmt[i] != 'i' && fmt[i] != 'd' &&
> - fmt[i] != 'u' && fmt[i] != 'x')
> - return -EINVAL;
> + if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
> + fmt[i] != 'x' && fmt[i] != 'X') {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + if (prepare_args)
> + current_arg = raw_args[fmt_cnt];
> fmt_next:
> + if (prepare_args) {

I'd ditch prepare_args variable and just check final_args (and that
check to ensure both mods and final_args are specified I suggested
above)

> + mod[fmt_cnt] = current_mod;
> + final_args[fmt_cnt] = current_arg;
> + }
> fmt_cnt++;
> }

[...]

> -
> - return __BPF_TP_EMIT();
> + err = 0;
> +out:
> + bpf_printf_postamble();

naming is hard, but preamble and postamble reads way too fancy :)
bpf_printf_prepare() and bpf_printf_cleanup() or something like that
is a bit more to the point, no?


> + return err;
> +}
> +

[...]

>
> static const struct bpf_func_proto bpf_trace_printk_proto = {
> @@ -581,184 +687,37 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> }
>
> #define MAX_SEQ_PRINTF_VARARGS 12
> -#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
> -#define MAX_SEQ_PRINTF_STR_LEN 128
> -
> -struct bpf_seq_printf_buf {
> - char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
> -};
> -static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
> -static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
>
> BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
> const void *, data, u32, data_len)

[...]

> + enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
> + u64 args[MAX_SEQ_PRINTF_VARARGS];
> + int err, num_args;
>
> + if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
> + (data_len && !data))

data && !data_len is also an error, no?

> + return -EINVAL;
> num_args = data_len / 8;
>

[...]

2021-03-26 22:52:52

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

On Fri, Mar 26, 2021 at 2:53 PM Andrii Nakryiko
<[email protected]> wrote:
>
> On Tue, Mar 23, 2021 at 7:23 PM Florent Revest <[email protected]> wrote:
> >
> > Two helpers (trace_printk and seq_printf) have very similar
> > implementations of format string parsing and a third one is coming
> > (snprintf). To avoid code duplication and make the code easier to
> > maintain, this moves the operations associated with format string
> > parsing (validation and argument sanitization) into one generic
> > function.
> >
> > Unfortunately, the implementation of the two existing helpers already
> > drifted quite a bit and unifying them entailed a lot of changes:
>
> "Unfortunately" as in a lot of extra work for you? I think overall
> though it was very fortunate that you ended up doing it, all
> implementations are more feature-complete and saner now, no? Thanks a
> lot for your hard work!
>
> >
> > - bpf_trace_printk always expected fmt[fmt_size] to be the terminating
> > NULL character, this is no longer true, the first 0 is terminating.
>
> You mean if you had bpf_trace_printk("bla bla\0some more bla\0", 24)
> it would emit that zero character? If yes, I don't think it was a sane
> behavior anyways.
>
> > - bpf_trace_printk now supports %% (which produces the percentage char).
> > - bpf_trace_printk now skips width formating fields.
> > - bpf_trace_printk now supports the X modifier (capital hexadecimal).
> > - bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6
> > - argument casting on 32 bit has been simplified into one macro and
> > using an enum instead of obscure int increments.
> >
> > - bpf_seq_printf now uses bpf_trace_copy_string instead of
> > strncpy_from_kernel_nofault and handles the %pks %pus specifiers.
> > - bpf_seq_printf now prints longs correctly on 32 bit architectures.
> >
> > - both were changed to use a global per-cpu tmp buffer instead of one
> > stack buffer for trace_printk and 6 small buffers for seq_printf.
> > - to avoid per-cpu buffer usage conflict, these helpers disable
> > preemption while the per-cpu buffer is in use.
> > - both helpers now support the %ps and %pS specifiers to print symbols.
> >
> > Signed-off-by: Florent Revest <[email protected]>
> > ---
>
> This is great, you already saved some lines of code! I suspect I'll
> have some complaints about mods (it feels like this preample should
> provide extra information about which arguments have to be read from
> kernel/user memory, but I'll see next patches first.

Disregard the last part (at least for now). I had a mental model that
it should be possible to parse a format string once and then remember
"instructions" (i.e., arg1 is long, arg2 is string, and so on). But
that's too complicated, so I think re-parsing the format string is
much simpler.

>
> See my comments below (I deliberately didn't trim most of the code for
> easier jumping around), but it's great overall, thanks!
>
> > kernel/trace/bpf_trace.c | 529 ++++++++++++++++++---------------------
> > 1 file changed, 244 insertions(+), 285 deletions(-)
> >

[...]

> > +int bpf_printf_preamble(char *fmt, u32 fmt_size, const u64 *raw_args,
> > + u64 *final_args, enum bpf_printf_mod_type *mod,
> > + u32 num_args)
> > +{
> > + struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
> > + int err, i, fmt_cnt = 0, copy_size, used;
> > + char *unsafe_ptr = NULL, *tmp_buf = NULL;
> > + bool prepare_args = final_args && mod;
>
> probably better to enforce that both or none are specified, otherwise
> return error

it's actually three of them: raw_args, mod, and num_args, right? All
three are either NULL or non-NULL.

>
> > + enum bpf_printf_mod_type current_mod;
> > + size_t tmp_buf_len;
> > + u64 current_arg;
> > + char fmt_ptype;
> > +
> > + for (i = 0; i < fmt_size && fmt[i] != '\0'; i++) {
>

[...]

2021-04-07 09:15:41

by Florent Revest

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

[Sorry for the late replies, I'm just back from a long easter break :)]

On Fri, Mar 26, 2021 at 11:51 PM Andrii Nakryiko
<[email protected]> wrote:
> On Fri, Mar 26, 2021 at 2:53 PM Andrii Nakryiko
> <[email protected]> wrote:
> > On Tue, Mar 23, 2021 at 7:23 PM Florent Revest <[email protected]> wrote:
> > > Unfortunately, the implementation of the two existing helpers already
> > > drifted quite a bit and unifying them entailed a lot of changes:
> >
> > "Unfortunately" as in a lot of extra work for you? I think overall
> > though it was very fortunate that you ended up doing it, all
> > implementations are more feature-complete and saner now, no? Thanks a
> > lot for your hard work!

Ahah, "unfortunately" a bit of extra work for me, indeed. But I find
this kind of refactoring patches even harder to review than to write
so thank you too!

> > > - bpf_trace_printk always expected fmt[fmt_size] to be the terminating
> > > NULL character, this is no longer true, the first 0 is terminating.
> >
> > You mean if you had bpf_trace_printk("bla bla\0some more bla\0", 24)
> > it would emit that zero character? If yes, I don't think it was a sane
> > behavior anyways.

The call to snprintf in bpf_do_trace_printk would eventually ignore
"some more bla" but the parsing done in bpf_trace_printk would indeed
read the whole string.

> > This is great, you already saved some lines of code! I suspect I'll
> > have some complaints about mods (it feels like this preample should
> > provide extra information about which arguments have to be read from
> > kernel/user memory, but I'll see next patches first.
>
> Disregard the last part (at least for now). I had a mental model that
> it should be possible to parse a format string once and then remember
> "instructions" (i.e., arg1 is long, arg2 is string, and so on). But
> that's too complicated, so I think re-parsing the format string is
> much simpler.

I also wanted to do that originally but realized it would keep a lot
of the complexity in the helpers themselves and not really move the
needle.

> > > +/* Horrid workaround for getting va_list handling working with different
> > > + * argument type combinations generically for 32 and 64 bit archs.
> > > + */
> > > +#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \
> > > + ((mod[arg_nb] == BPF_PRINTF_LONG_LONG || \
> > > + (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)) \
> > > + ? args[arg_nb] \
> > > + : ((mod[arg_nb] == BPF_PRINTF_LONG || \
> > > + (mod[arg_nb] == BPF_PRINTF_INT && __BITS_PER_LONG == 32)) \
> >
> > is this right? INT is always 32-bit, it's only LONG that differs.
> > Shouldn't the rule be
> >
> > (LONG_LONG || LONG && __BITS_PER_LONG) -> (__u64)args[args_nb]
> > (INT || LONG && __BITS_PER_LONG == 32) -> (__u32)args[args_nb]
> >
> > Does (long) cast do anything fancy when casting from u64? Sorry, maybe
> > I'm confused.

To be honest, I am also confused by that logic... :p My patch tries to
conserve exactly the same logic as "88a5c690b6 bpf: fix
bpf_trace_printk on 32 bit archs" because I was also afraid of missing
something and could not test it on 32 bit arches. From that commit
description, it is unclear to me what "u32 and long are passed
differently to u64, since the result of C conditional operators
follows the "usual arithmetic conversions" rules" means. Maybe Daniel
can comment on this ?

> > > +int bpf_printf_preamble(char *fmt, u32 fmt_size, const u64 *raw_args,
> > > + u64 *final_args, enum bpf_printf_mod_type *mod,
> > > + u32 num_args)
> > > +{
> > > + struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
> > > + int err, i, fmt_cnt = 0, copy_size, used;
> > > + char *unsafe_ptr = NULL, *tmp_buf = NULL;
> > > + bool prepare_args = final_args && mod;
> >
> > probably better to enforce that both or none are specified, otherwise
> > return error

Fair :)

> it's actually three of them: raw_args, mod, and num_args, right? All
> three are either NULL or non-NULL.

It is a bit tricky to see from that patch but in "3/6 bpf: Add a
bpf_snprintf helper" the verifier code calls this function with
num_args != 0 to check whether the number of arguments is correct
without actually converting anything.

Also when the helper gets called, raw_args can come from the BPF
program and be NULL but in that case we will also have num_args = 0
guaranteed by the helper so the loop will bail out if it encounters a
format specifier.

> > > + enum bpf_printf_mod_type current_mod;
> > > + size_t tmp_buf_len;
> > > + u64 current_arg;
> > > + char fmt_ptype;
> > > +
> > > + for (i = 0; i < fmt_size && fmt[i] != '\0'; i++) {
> >
> > Can we say that if the last character is not '\0' then it's a bad
> > format string and return -EINVAL? And if \0 is inside the format
> > string, then it's also a bad format string? I wonder what others think
> > about this?... I think sanity should prevail.

Overall, there are two situations:
- bpf_seq_printf, bpf_trace_printk: we have a pointer and size but we
are not guaranteed zero-termination
- bpf_snprintf: we have a pointer, no size but it's guaranteed to be
zero-terminated (by ARG_PTR_TO_CONST_STR)

Currently, in the bpf_snprintf helper, I set fmt_size to UINT_MAX and
the terminating condition will be fmt[i] == '\0'.
As you pointed out a bit further, I got a bit carried away with the
refactoring and dropped the zero-termination checks for the existing
helpers !

So I see two possibilities:
- either we check fmt[last] == '\0', add a bail out condition in the
loop if we encounter another `\0` and set fmt_size to sprintf(fmt) in
the bpf_snprintf verifier and helper code.
- or we unconditionally call strnlen(fmt, fmt_size) in
bpf_printf_preamble. If no 0 is found, we return an error, if there is
one we treat it as the NULL terminating character.

> > > + if ((!isprint(fmt[i]) && !isspace(fmt[i])) ||
> > > + !isascii(fmt[i])) {
> >
> > && always binds tighter than ||, so you can omit extra (). I'd put
> > this on a single line as well, but that's a total nit.

Neat! :)

> > > + err = -EINVAL;
> > > + goto out;
> > > + }
> > >
> > > if (fmt[i] != '%')
> > > continue;
> > >
> > > - if (fmt_cnt >= 3)
> > > - return -EINVAL;
> > > + if (fmt[i + 1] == '%') {
> > > + i++;
> > > + continue;
> > > + }
> > > +
> > > + if (fmt_cnt >= num_args) {
> > > + err = -EINVAL;
> > > + goto out;
> > > + }
> > >
> > > /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
> > > i++;
> > > - if (fmt[i] == 'l') {
> > > - mod[fmt_cnt]++;
> > > +
> > > + /* skip optional "[0 +-][num]" width formating field */
> >
> > typo: formatting

Fixed

> > > + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
> > > + fmt[i] == ' ')
> > > + i++;
> > > + if (fmt[i] >= '1' && fmt[i] <= '9') {
> > > i++;
> >
> > Are we worried about integer overflow here? %123123123123123d
> > hopefully won't crash anything, right?

I expect that this should be handled gracefully by the subsequent call
to snprintf(). Our parsing logic does not guarantee that the format
string is 100% legit but it guarantees that it's safe to call
vsnprintf with arguments coming from BPF. If the output buffer is too
small to hold the output, the output will be truncated.

Note that this is already how bpf_seq_printf already works.

> > > - } else if (fmt[i] == 'p') {
> > > - mod[fmt_cnt]++;
> > > - if ((fmt[i + 1] == 'k' ||
> > > - fmt[i + 1] == 'u') &&
> > > + while (fmt[i] >= '0' && fmt[i] <= '9')
> > > + i++;
> >
> > whoa, fmt_size shouldn't be ignored

Oh no, I'll attach the stone of shame! It all made sense with
bpf_snprintf() in mind because, there, we are guaranteed to have a
NULL terminated string already but in an excess of refactoring
enthusiasm I dropped the zero-termination check for the other helpers.

But if we implement either of the options discussed above, then we do
not need to constantly check fmt_size.

> > > + }
> > > +
> >
> > and here if we exhausted all format string but haven't gotten to
> > format specified, we should -EINVAL
> >
> > if (i >= fmt_size) return -EINVAL?

Same comment as above, if we are already guaranteed zero-termination
by a prior check, we don't need that.

> > > + if (fmt[i] == 'p') {
> > > + current_mod = BPF_PRINTF_LONG;
> > > +
> > > + if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
> > > fmt[i + 2] == 's') {
> >
> > right, if i + 2 is ok to access? always be remembering about fmt_size

Same.

> > > fmt_ptype = fmt[i + 1];
> > > i += 2;
> > > goto fmt_str;
> > > }
> > >
> > > - if (fmt[i + 1] == 'B') {
> > > - i++;
> > > + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
> > > + ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
> > > + fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
> > > + fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
> > > + /* just kernel pointers */
> > > + if (prepare_args)
> > > + current_arg = raw_args[fmt_cnt];
> >
> > fmt_cnt is not the best name, imo. arg_cnt makes more sense

Mh, we already have "num_args" that can make it confusing. The way I see it:
- the number of format specifiers is the number of %d %s... in the format string
- the number of arguments is the number of values given in the raw_args array.

Potentially, the number of arguments can be higher than the number of
format specifiers, for example printf("%d\n", i, j); so calling them
differently sorta makes sense.
But to be honest I don't have a strong opinion about this and this is
mainly just a remaining from the current bpf_seq_printf
implementation.

> > > + if (!tmp_buf) {
> > > + used = this_cpu_inc_return(bpf_printf_buf_used);
> > > + if (WARN_ON_ONCE(used > 1)) {
> > > + this_cpu_dec(bpf_printf_buf_used);
> > > + return -EBUSY;
> > > + }
> > > + preempt_disable();
> >
> > shouldn't we preempt_disable before we got bpf_printf_buf_used? if we
> > get preempted after incrementing counter, buffer will be unusable for
> > a while, potentially, right?

Good catch :)

> > > + if (!tmp_buf) {
> > > + used = this_cpu_inc_return(bpf_printf_buf_used);
> > > + if (WARN_ON_ONCE(used > 1)) {
> > > + this_cpu_dec(bpf_printf_buf_used);
> > > + return -EBUSY;
> > > + }
> > > + preempt_disable();
> > > + tmp_buf = bufs->tmp_buf;
> > > + tmp_buf_len = MAX_PRINTF_BUF_LEN;
> > > + }
> >
> > how about helper used like this:
> >
> > if (try_get_fmt_tmp_buf(&tmp_buf, &tmp_buf_len))
> > return -EBUSY;
> >
> > which will do nothing if tmp_buf != NULL?

Yep, I quite like that. :)

> > > fmt_next:
> > > + if (prepare_args) {
> >
> > I'd ditch prepare_args variable and just check final_args (and that
> > check to ensure both mods and final_args are specified I suggested
> > above)

Agreed.

> > > + mod[fmt_cnt] = current_mod;
> > > + final_args[fmt_cnt] = current_arg;
> > > + }
> > > fmt_cnt++;
> > > }
> >
> > [...]
> >
> > > -
> > > - return __BPF_TP_EMIT();
> > > + err = 0;
> > > +out:
> > > + bpf_printf_postamble();
> >
> > naming is hard, but preamble and postamble reads way too fancy :)
> > bpf_printf_prepare() and bpf_printf_cleanup() or something like that
> > is a bit more to the point, no?

Haha, you're totally right.

> > > + if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
> > > + (data_len && !data))
> >
> > data && !data_len is also an error, no?

Isn't that checked by the verifier ?

I don't mind adding an explicit check for it (data_len ^ data or two
clearer conditions ?) but I think that even if this were to happen,
this would not be a problem: if we encounter a format specifier,
num_args will be zero so bpf_printf_preamble will bail out before it
tries to access data.

2021-04-07 22:07:47

by Andrii Nakryiko

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

On Tue, Apr 6, 2021 at 8:35 AM Florent Revest <[email protected]> wrote:
>
> [Sorry for the late replies, I'm just back from a long easter break :)]
>
> On Fri, Mar 26, 2021 at 11:51 PM Andrii Nakryiko
> <[email protected]> wrote:
> > On Fri, Mar 26, 2021 at 2:53 PM Andrii Nakryiko
> > <[email protected]> wrote:
> > > On Tue, Mar 23, 2021 at 7:23 PM Florent Revest <[email protected]> wrote:
> > > > Unfortunately, the implementation of the two existing helpers already
> > > > drifted quite a bit and unifying them entailed a lot of changes:
> > >
> > > "Unfortunately" as in a lot of extra work for you? I think overall
> > > though it was very fortunate that you ended up doing it, all
> > > implementations are more feature-complete and saner now, no? Thanks a
> > > lot for your hard work!
>
> Ahah, "unfortunately" a bit of extra work for me, indeed. But I find
> this kind of refactoring patches even harder to review than to write
> so thank you too!
>
> > > > - bpf_trace_printk always expected fmt[fmt_size] to be the terminating
> > > > NULL character, this is no longer true, the first 0 is terminating.
> > >
> > > You mean if you had bpf_trace_printk("bla bla\0some more bla\0", 24)
> > > it would emit that zero character? If yes, I don't think it was a sane
> > > behavior anyways.
>
> The call to snprintf in bpf_do_trace_printk would eventually ignore
> "some more bla" but the parsing done in bpf_trace_printk would indeed
> read the whole string.
>
> > > This is great, you already saved some lines of code! I suspect I'll
> > > have some complaints about mods (it feels like this preample should
> > > provide extra information about which arguments have to be read from
> > > kernel/user memory, but I'll see next patches first.
> >
> > Disregard the last part (at least for now). I had a mental model that
> > it should be possible to parse a format string once and then remember
> > "instructions" (i.e., arg1 is long, arg2 is string, and so on). But
> > that's too complicated, so I think re-parsing the format string is
> > much simpler.
>
> I also wanted to do that originally but realized it would keep a lot
> of the complexity in the helpers themselves and not really move the
> needle.
>
> > > > +/* Horrid workaround for getting va_list handling working with different
> > > > + * argument type combinations generically for 32 and 64 bit archs.
> > > > + */
> > > > +#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \
> > > > + ((mod[arg_nb] == BPF_PRINTF_LONG_LONG || \
> > > > + (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)) \
> > > > + ? args[arg_nb] \
> > > > + : ((mod[arg_nb] == BPF_PRINTF_LONG || \
> > > > + (mod[arg_nb] == BPF_PRINTF_INT && __BITS_PER_LONG == 32)) \
> > >
> > > is this right? INT is always 32-bit, it's only LONG that differs.
> > > Shouldn't the rule be
> > >
> > > (LONG_LONG || LONG && __BITS_PER_LONG) -> (__u64)args[args_nb]
> > > (INT || LONG && __BITS_PER_LONG == 32) -> (__u32)args[args_nb]
> > >
> > > Does (long) cast do anything fancy when casting from u64? Sorry, maybe
> > > I'm confused.
>
> To be honest, I am also confused by that logic... :p My patch tries to
> conserve exactly the same logic as "88a5c690b6 bpf: fix
> bpf_trace_printk on 32 bit archs" because I was also afraid of missing
> something and could not test it on 32 bit arches. From that commit
> description, it is unclear to me what "u32 and long are passed
> differently to u64, since the result of C conditional operators
> follows the "usual arithmetic conversions" rules" means. Maybe Daniel
> can comment on this ?

Yeah, no idea. Seems like the code above should work fine for 32 and
64 bitness and both little- and big-endianness.

>
> > > > +int bpf_printf_preamble(char *fmt, u32 fmt_size, const u64 *raw_args,
> > > > + u64 *final_args, enum bpf_printf_mod_type *mod,
> > > > + u32 num_args)
> > > > +{
> > > > + struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
> > > > + int err, i, fmt_cnt = 0, copy_size, used;
> > > > + char *unsafe_ptr = NULL, *tmp_buf = NULL;
> > > > + bool prepare_args = final_args && mod;
> > >
> > > probably better to enforce that both or none are specified, otherwise
> > > return error
>
> Fair :)
>
> > it's actually three of them: raw_args, mod, and num_args, right? All
> > three are either NULL or non-NULL.
>
> It is a bit tricky to see from that patch but in "3/6 bpf: Add a
> bpf_snprintf helper" the verifier code calls this function with
> num_args != 0 to check whether the number of arguments is correct
> without actually converting anything.
>
> Also when the helper gets called, raw_args can come from the BPF
> program and be NULL but in that case we will also have num_args = 0
> guaranteed by the helper so the loop will bail out if it encounters a
> format specifier.

ok, but at least final_args and mod are locked together, so should be
enforced to be either null or not, right?

>
> > > > + enum bpf_printf_mod_type current_mod;
> > > > + size_t tmp_buf_len;
> > > > + u64 current_arg;
> > > > + char fmt_ptype;
> > > > +
> > > > + for (i = 0; i < fmt_size && fmt[i] != '\0'; i++) {
> > >
> > > Can we say that if the last character is not '\0' then it's a bad
> > > format string and return -EINVAL? And if \0 is inside the format
> > > string, then it's also a bad format string? I wonder what others think
> > > about this?... I think sanity should prevail.
>
> Overall, there are two situations:
> - bpf_seq_printf, bpf_trace_printk: we have a pointer and size but we
> are not guaranteed zero-termination
> - bpf_snprintf: we have a pointer, no size but it's guaranteed to be
> zero-terminated (by ARG_PTR_TO_CONST_STR)
>
> Currently, in the bpf_snprintf helper, I set fmt_size to UINT_MAX and
> the terminating condition will be fmt[i] == '\0'.
> As you pointed out a bit further, I got a bit carried away with the
> refactoring and dropped the zero-termination checks for the existing
> helpers !
>
> So I see two possibilities:
> - either we check fmt[last] == '\0', add a bail out condition in the
> loop if we encounter another `\0` and set fmt_size to sprintf(fmt) in
> the bpf_snprintf verifier and helper code.
> - or we unconditionally call strnlen(fmt, fmt_size) in
> bpf_printf_preamble. If no 0 is found, we return an error, if there is
> one we treat it as the NULL terminating character.

I was thinking about the second one. It is clearly acceptable on BPF
verifier side, though one might argue that we are doing extra work on
the BPF helper side. I don't think it matters in practice, so I'll be
fine with that, if that makes code cleaner and simpler.

>
> > > > + if ((!isprint(fmt[i]) && !isspace(fmt[i])) ||
> > > > + !isascii(fmt[i])) {
> > >
> > > && always binds tighter than ||, so you can omit extra (). I'd put
> > > this on a single line as well, but that's a total nit.
>
> Neat! :)

I just got a compilation warning in a similar situation yesterday when
I dropped unnecessary parentheses, so some versions of compilers might
think it is not a good practice. Just keep that in mind. I don't think
I care enough.

>
> > > > + err = -EINVAL;
> > > > + goto out;
> > > > + }
> > > >
> > > > if (fmt[i] != '%')
> > > > continue;
> > > >
> > > > - if (fmt_cnt >= 3)
> > > > - return -EINVAL;
> > > > + if (fmt[i + 1] == '%') {
> > > > + i++;
> > > > + continue;
> > > > + }
> > > > +
> > > > + if (fmt_cnt >= num_args) {
> > > > + err = -EINVAL;
> > > > + goto out;
> > > > + }
> > > >
> > > > /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
> > > > i++;
> > > > - if (fmt[i] == 'l') {
> > > > - mod[fmt_cnt]++;
> > > > +
> > > > + /* skip optional "[0 +-][num]" width formating field */
> > >
> > > typo: formatting
>
> Fixed
>
> > > > + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
> > > > + fmt[i] == ' ')
> > > > + i++;
> > > > + if (fmt[i] >= '1' && fmt[i] <= '9') {
> > > > i++;
> > >
> > > Are we worried about integer overflow here? %123123123123123d
> > > hopefully won't crash anything, right?
>
> I expect that this should be handled gracefully by the subsequent call
> to snprintf(). Our parsing logic does not guarantee that the format
> string is 100% legit but it guarantees that it's safe to call
> vsnprintf with arguments coming from BPF. If the output buffer is too
> small to hold the output, the output will be truncated.
>
> Note that this is already how bpf_seq_printf already works.

Ok, but let's not hope and add the test for this.

>
> > > > - } else if (fmt[i] == 'p') {
> > > > - mod[fmt_cnt]++;
> > > > - if ((fmt[i + 1] == 'k' ||
> > > > - fmt[i + 1] == 'u') &&
> > > > + while (fmt[i] >= '0' && fmt[i] <= '9')
> > > > + i++;
> > >
> > > whoa, fmt_size shouldn't be ignored
>
> Oh no, I'll attach the stone of shame! It all made sense with
> bpf_snprintf() in mind because, there, we are guaranteed to have a
> NULL terminated string already but in an excess of refactoring
> enthusiasm I dropped the zero-termination check for the other helpers.
>
> But if we implement either of the options discussed above, then we do
> not need to constantly check fmt_size.

let's see when we get to the next version ;) I don't remember code
enough by now, but I'll keep that in mind for the next revision
anyways

>
> > > > + }
> > > > +
> > >
> > > and here if we exhausted all format string but haven't gotten to
> > > format specified, we should -EINVAL
> > >
> > > if (i >= fmt_size) return -EINVAL?
>
> Same comment as above, if we are already guaranteed zero-termination
> by a prior check, we don't need that.
>
> > > > + if (fmt[i] == 'p') {
> > > > + current_mod = BPF_PRINTF_LONG;
> > > > +
> > > > + if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
> > > > fmt[i + 2] == 's') {
> > >
> > > right, if i + 2 is ok to access? always be remembering about fmt_size
>
> Same.
>
> > > > fmt_ptype = fmt[i + 1];
> > > > i += 2;
> > > > goto fmt_str;
> > > > }
> > > >
> > > > - if (fmt[i + 1] == 'B') {
> > > > - i++;
> > > > + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
> > > > + ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
> > > > + fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
> > > > + fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
> > > > + /* just kernel pointers */
> > > > + if (prepare_args)
> > > > + current_arg = raw_args[fmt_cnt];
> > >
> > > fmt_cnt is not the best name, imo. arg_cnt makes more sense
>
> Mh, we already have "num_args" that can make it confusing. The way I see it:
> - the number of format specifiers is the number of %d %s... in the format string
> - the number of arguments is the number of values given in the raw_args array.
>

Well, if you read "fmt_cnt" as "number of formatters" then yeah, I
suppose it's fine. Never mind. Just fmt_cnt and fmt_size refers to
slightly different "fmt"s, which confused me for a bit, but that's ok.
You use different naming conventions, which is inconsistent, so maybe
adjust that for purists (i.e., if you have num_args, then you should
have num_fmts; or, alternatively, arg_cnt and fmt_cnt). But I'm just
nitpicking, obviously.

> Potentially, the number of arguments can be higher than the number of
> format specifiers, for example printf("%d\n", i, j); so calling them
> differently sorta makes sense.
> But to be honest I don't have a strong opinion about this and this is
> mainly just a remaining from the current bpf_seq_printf
> implementation.

Yep. I think it's ok to allow num_args > fmt_cnt.

>
> > > > + if (!tmp_buf) {
> > > > + used = this_cpu_inc_return(bpf_printf_buf_used);
> > > > + if (WARN_ON_ONCE(used > 1)) {
> > > > + this_cpu_dec(bpf_printf_buf_used);
> > > > + return -EBUSY;
> > > > + }
> > > > + preempt_disable();
> > >
> > > shouldn't we preempt_disable before we got bpf_printf_buf_used? if we
> > > get preempted after incrementing counter, buffer will be unusable for
> > > a while, potentially, right?
>
> Good catch :)
>
> > > > + if (!tmp_buf) {
> > > > + used = this_cpu_inc_return(bpf_printf_buf_used);
> > > > + if (WARN_ON_ONCE(used > 1)) {
> > > > + this_cpu_dec(bpf_printf_buf_used);
> > > > + return -EBUSY;
> > > > + }
> > > > + preempt_disable();
> > > > + tmp_buf = bufs->tmp_buf;
> > > > + tmp_buf_len = MAX_PRINTF_BUF_LEN;
> > > > + }
> > >
> > > how about helper used like this:
> > >
> > > if (try_get_fmt_tmp_buf(&tmp_buf, &tmp_buf_len))
> > > return -EBUSY;
> > >
> > > which will do nothing if tmp_buf != NULL?
>
> Yep, I quite like that. :)
>
> > > > fmt_next:
> > > > + if (prepare_args) {
> > >
> > > I'd ditch prepare_args variable and just check final_args (and that
> > > check to ensure both mods and final_args are specified I suggested
> > > above)
>
> Agreed.
>
> > > > + mod[fmt_cnt] = current_mod;
> > > > + final_args[fmt_cnt] = current_arg;
> > > > + }
> > > > fmt_cnt++;
> > > > }
> > >
> > > [...]
> > >
> > > > -
> > > > - return __BPF_TP_EMIT();
> > > > + err = 0;
> > > > +out:
> > > > + bpf_printf_postamble();
> > >
> > > naming is hard, but preamble and postamble reads way too fancy :)
> > > bpf_printf_prepare() and bpf_printf_cleanup() or something like that
> > > is a bit more to the point, no?
>
> Haha, you're totally right.
>
> > > > + if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
> > > > + (data_len && !data))
> > >
> > > data && !data_len is also an error, no?
>
> Isn't that checked by the verifier ?

data_len is ARG_CONST_SIZE_OR_ZERO, so data_len == 0 is allowed by
verifier. But it's probably no harm either to allow data != NULL and
data_len = 0. Might simplify some more dynamic use of snprintf(),
actually.

>
> I don't mind adding an explicit check for it (data_len ^ data or two
> clearer conditions ?) but I think that even if this were to happen,
> this would not be a problem: if we encounter a format specifier,
> num_args will be zero so bpf_printf_preamble will bail out before it
> tries to access data.

agree

2021-04-08 22:54:08

by Florent Revest

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

On Wed, Apr 7, 2021 at 11:54 PM Andrii Nakryiko
<[email protected]> wrote:
> On Tue, Apr 6, 2021 at 8:35 AM Florent Revest <[email protected]> wrote:
> > On Fri, Mar 26, 2021 at 11:51 PM Andrii Nakryiko
> > <[email protected]> wrote:
> > > On Fri, Mar 26, 2021 at 2:53 PM Andrii Nakryiko
> > > <[email protected]> wrote:
> > > > On Tue, Mar 23, 2021 at 7:23 PM Florent Revest <[email protected]> wrote:
> > > > > +/* Horrid workaround for getting va_list handling working with different
> > > > > + * argument type combinations generically for 32 and 64 bit archs.
> > > > > + */
> > > > > +#define BPF_CAST_FMT_ARG(arg_nb, args, mod) \
> > > > > + ((mod[arg_nb] == BPF_PRINTF_LONG_LONG || \
> > > > > + (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)) \
> > > > > + ? args[arg_nb] \
> > > > > + : ((mod[arg_nb] == BPF_PRINTF_LONG || \
> > > > > + (mod[arg_nb] == BPF_PRINTF_INT && __BITS_PER_LONG == 32)) \
> > > >
> > > > is this right? INT is always 32-bit, it's only LONG that differs.
> > > > Shouldn't the rule be
> > > >
> > > > (LONG_LONG || LONG && __BITS_PER_LONG) -> (__u64)args[args_nb]
> > > > (INT || LONG && __BITS_PER_LONG == 32) -> (__u32)args[args_nb]
> > > >
> > > > Does (long) cast do anything fancy when casting from u64? Sorry, maybe
> > > > I'm confused.
> >
> > To be honest, I am also confused by that logic... :p My patch tries to
> > conserve exactly the same logic as "88a5c690b6 bpf: fix
> > bpf_trace_printk on 32 bit archs" because I was also afraid of missing
> > something and could not test it on 32 bit arches. From that commit
> > description, it is unclear to me what "u32 and long are passed
> > differently to u64, since the result of C conditional operators
> > follows the "usual arithmetic conversions" rules" means. Maybe Daniel
> > can comment on this ?
>
> Yeah, no idea. Seems like the code above should work fine for 32 and
> 64 bitness and both little- and big-endianness.

Yeah, looks good to me as well. I'll use it in v3.

> > > > > +int bpf_printf_preamble(char *fmt, u32 fmt_size, const u64 *raw_args,
> > > > > + u64 *final_args, enum bpf_printf_mod_type *mod,
> > > > > + u32 num_args)
> > > > > +{
> > > > > + struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
> > > > > + int err, i, fmt_cnt = 0, copy_size, used;
> > > > > + char *unsafe_ptr = NULL, *tmp_buf = NULL;
> > > > > + bool prepare_args = final_args && mod;
> > > >
> > > > probably better to enforce that both or none are specified, otherwise
> > > > return error
> >
> > Fair :)
> >
> > > it's actually three of them: raw_args, mod, and num_args, right? All
> > > three are either NULL or non-NULL.
> >
> > It is a bit tricky to see from that patch but in "3/6 bpf: Add a
> > bpf_snprintf helper" the verifier code calls this function with
> > num_args != 0 to check whether the number of arguments is correct
> > without actually converting anything.
> >
> > Also when the helper gets called, raw_args can come from the BPF
> > program and be NULL but in that case we will also have num_args = 0
> > guaranteed by the helper so the loop will bail out if it encounters a
> > format specifier.
>
> ok, but at least final_args and mod are locked together, so should be
> enforced to be either null or not, right?

Yes :) will do.

> > > > > + enum bpf_printf_mod_type current_mod;
> > > > > + size_t tmp_buf_len;
> > > > > + u64 current_arg;
> > > > > + char fmt_ptype;
> > > > > +
> > > > > + for (i = 0; i < fmt_size && fmt[i] != '\0'; i++) {
> > > >
> > > > Can we say that if the last character is not '\0' then it's a bad
> > > > format string and return -EINVAL? And if \0 is inside the format
> > > > string, then it's also a bad format string? I wonder what others think
> > > > about this?... I think sanity should prevail.
> >
> > Overall, there are two situations:
> > - bpf_seq_printf, bpf_trace_printk: we have a pointer and size but we
> > are not guaranteed zero-termination
> > - bpf_snprintf: we have a pointer, no size but it's guaranteed to be
> > zero-terminated (by ARG_PTR_TO_CONST_STR)
> >
> > Currently, in the bpf_snprintf helper, I set fmt_size to UINT_MAX and
> > the terminating condition will be fmt[i] == '\0'.
> > As you pointed out a bit further, I got a bit carried away with the
> > refactoring and dropped the zero-termination checks for the existing
> > helpers !
> >
> > So I see two possibilities:
> > - either we check fmt[last] == '\0', add a bail out condition in the
> > loop if we encounter another `\0` and set fmt_size to sprintf(fmt) in
> > the bpf_snprintf verifier and helper code.
> > - or we unconditionally call strnlen(fmt, fmt_size) in
> > bpf_printf_preamble. If no 0 is found, we return an error, if there is
> > one we treat it as the NULL terminating character.
>
> I was thinking about the second one. It is clearly acceptable on BPF
> verifier side, though one might argue that we are doing extra work on
> the BPF helper side. I don't think it matters in practice, so I'll be
> fine with that, if that makes code cleaner and simpler.

I also prefer that option, yes.

> > > > > + if ((!isprint(fmt[i]) && !isspace(fmt[i])) ||
> > > > > + !isascii(fmt[i])) {
> > > >
> > > > && always binds tighter than ||, so you can omit extra (). I'd put
> > > > this on a single line as well, but that's a total nit.
> >
> > Neat! :)
>
> I just got a compilation warning in a similar situation yesterday when
> I dropped unnecessary parentheses, so some versions of compilers might
> think it is not a good practice. Just keep that in mind. I don't think
> I care enough.

Yes, I noticed the same compilation warning and it bothers me. I'll
keep the parentheses but make it one line.

> > > > > + while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
> > > > > + fmt[i] == ' ')
> > > > > + i++;
> > > > > + if (fmt[i] >= '1' && fmt[i] <= '9') {
> > > > > i++;
> > > >
> > > > Are we worried about integer overflow here? %123123123123123d
> > > > hopefully won't crash anything, right?
> >
> > I expect that this should be handled gracefully by the subsequent call
> > to snprintf(). Our parsing logic does not guarantee that the format
> > string is 100% legit but it guarantees that it's safe to call
> > vsnprintf with arguments coming from BPF. If the output buffer is too
> > small to hold the output, the output will be truncated.
> >
> > Note that this is already how bpf_seq_printf already works.
>
> Ok, but let's not hope and add the test for this.

Ok

> > > > > - } else if (fmt[i] == 'p') {
> > > > > - mod[fmt_cnt]++;
> > > > > - if ((fmt[i + 1] == 'k' ||
> > > > > - fmt[i + 1] == 'u') &&
> > > > > + while (fmt[i] >= '0' && fmt[i] <= '9')
> > > > > + i++;
> > > >
> > > > whoa, fmt_size shouldn't be ignored
> >
> > Oh no, I'll attach the stone of shame! It all made sense with
> > bpf_snprintf() in mind because, there, we are guaranteed to have a
> > NULL terminated string already but in an excess of refactoring
> > enthusiasm I dropped the zero-termination check for the other helpers.
> >
> > But if we implement either of the options discussed above, then we do
> > not need to constantly check fmt_size.
>
> let's see when we get to the next version ;) I don't remember code
> enough by now, but I'll keep that in mind for the next revision
> anyways

Sure, I'll send v3 asap.

> > > > > fmt_ptype = fmt[i + 1];
> > > > > i += 2;
> > > > > goto fmt_str;
> > > > > }
> > > > >
> > > > > - if (fmt[i + 1] == 'B') {
> > > > > - i++;
> > > > > + if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
> > > > > + ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
> > > > > + fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
> > > > > + fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
> > > > > + /* just kernel pointers */
> > > > > + if (prepare_args)
> > > > > + current_arg = raw_args[fmt_cnt];
> > > >
> > > > fmt_cnt is not the best name, imo. arg_cnt makes more sense
> >
> > Mh, we already have "num_args" that can make it confusing. The way I see it:
> > - the number of format specifiers is the number of %d %s... in the format string
> > - the number of arguments is the number of values given in the raw_args array.
> >
>
> Well, if you read "fmt_cnt" as "number of formatters" then yeah, I
> suppose it's fine. Never mind. Just fmt_cnt and fmt_size refers to
> slightly different "fmt"s, which confused me for a bit, but that's ok.
> You use different naming conventions, which is inconsistent, so maybe
> adjust that for purists (i.e., if you have num_args, then you should
> have num_fmts; or, alternatively, arg_cnt and fmt_cnt). But I'm just
> nitpicking, obviously.

I agree, I'll see if I can clean this up a bit.

> > > > > + if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
> > > > > + (data_len && !data))
> > > >
> > > > data && !data_len is also an error, no?
> >
> > Isn't that checked by the verifier ?
>
> data_len is ARG_CONST_SIZE_OR_ZERO, so data_len == 0 is allowed by
> verifier. But it's probably no harm either to allow data != NULL and
> data_len = 0. Might simplify some more dynamic use of snprintf(),
> actually.

Agree