2018-07-02 19:17:23

by Okash Khawaja

[permalink] [raw]
Subject: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

This consumes functionality exported in the previous patch. It does the
main job of printing with BTF data. This is used in the following patch
to provide a more readable output of a map's dump. It relies on
json_writer to do json printing. Below is sample output where map keys
are ints and values are of type struct A:

typedef int int_type;
enum E {
E0,
E1,
};

struct B {
int x;
int y;
};

struct A {
int m;
unsigned long long n;
char o;
int p[8];
int q[4][8];
enum E r;
void *s;
struct B t;
const int u;
int_type v;
unsigned int w1: 3;
unsigned int w2: 3;
};

$ sudo bpftool map dump id 14
[{
"key": 0,
"value": {
"m": 1,
"n": 2,
"o": "c",
"p": [15,16,17,18,15,16,17,18
],
"q": [[25,26,27,28,25,26,27,28
],[35,36,37,38,35,36,37,38
],[45,46,47,48,45,46,47,48
],[55,56,57,58,55,56,57,58
]
],
"r": 1,
"s": 0x7ffd80531cf8,
"t": {
"x": 5,
"y": 10
},
"u": 100,
"v": 20,
"w1": 0x7,
"w2": 0x3
}
}
]

This patch uses json's {} and [] to imply struct/union and array. More
explicit information can be added later. For example, a command line
option can be introduced to print whether a key or value is struct
or union, name of a struct etc. This will however come at the expense
of duplicating info when, for example, printing an array of structs.
enums are printed as ints without their names.

Signed-off-by: Okash Khawaja <[email protected]>

---
tools/bpf/bpftool/btf_dumper.c | 263 +++++++++++++++++++++++++++++++++++++++++
tools/bpf/bpftool/btf_dumper.h | 23 +++
2 files changed, 286 insertions(+)

--- /dev/null
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <stdio.h> /* for (FILE *) used by json_writer */
+#include <linux/bitops.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "btf.h"
+#include "json_writer.h"
+#include "btf_dumper.h"
+
+#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
+#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
+#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
+#define BITS_ROUNDUP_BYTES(bits) \
+ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+
+static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
+ uint8_t bit_offset, const void *data);
+
+static void btf_dumper_ptr(const void *data, json_writer_t *jw,
+ bool is_plain_text)
+{
+ if (is_plain_text)
+ jsonw_printf(jw, "%p", *((uintptr_t *)data));
+ else
+ jsonw_printf(jw, "%u", *((uintptr_t *)data));
+}
+
+static int btf_dumper_modifier(const struct btf_dumper *d, uint32_t type_id,
+ const void *data)
+{
+ int32_t actual_type_id = btf__resolve_type(d->btf, type_id);
+ int ret;
+
+ if (actual_type_id < 0)
+ return actual_type_id;
+
+ ret = btf_dumper_do_type(d, actual_type_id, 0, data);
+
+ return ret;
+}
+
+static void btf_dumper_enum(const void *data, json_writer_t *jw)
+{
+ jsonw_printf(jw, "%d", *((int32_t *)data));
+}
+
+static int btf_dumper_array(const struct btf_dumper *d, uint32_t type_id,
+ const void *data)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+ struct btf_array *arr = (struct btf_array *)(t + 1);
+ int64_t elem_size;
+ int ret = 0;
+ uint32_t i;
+
+ elem_size = btf__resolve_size(d->btf, arr->type);
+ if (elem_size < 0)
+ return elem_size;
+
+ jsonw_start_array(d->jw);
+ for (i = 0; i < arr->nelems; i++) {
+ ret = btf_dumper_do_type(d, arr->type, 0,
+ data + (i * elem_size));
+ if (ret)
+ break;
+ }
+
+ jsonw_end_array(d->jw);
+ return ret;
+}
+
+static void btf_dumper_int_bits(uint32_t int_type, uint8_t bit_offset,
+ const void *data, json_writer_t *jw,
+ bool is_plain_text)
+{
+ uint32_t bits = BTF_INT_BITS(int_type);
+ uint16_t total_bits_offset;
+ uint16_t bytes_to_copy;
+ uint16_t bits_to_copy;
+ uint8_t upper_bits;
+ union {
+ uint64_t u64_num;
+ uint8_t u8_nums[8];
+ } print_num;
+
+ total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
+ data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+ bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+ bits_to_copy = bits + bit_offset;
+ bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
+
+ print_num.u64_num = 0;
+ memcpy(&print_num.u64_num, data, bytes_to_copy);
+
+ upper_bits = BITS_PER_BYTE_MASKED(bits_to_copy);
+ if (upper_bits) {
+ uint8_t mask = (1 << upper_bits) - 1;
+
+ print_num.u8_nums[bytes_to_copy - 1] &= mask;
+ }
+
+ print_num.u64_num >>= bit_offset;
+
+ if (is_plain_text)
+ jsonw_printf(jw, "0x%llx", print_num.u64_num);
+ else
+ jsonw_printf(jw, "%llu", print_num.u64_num);
+}
+
+static int btf_dumper_int(const struct btf_type *t, uint8_t bit_offset,
+ const void *data, json_writer_t *jw,
+ bool is_plain_text)
+{
+ uint32_t *int_type = (uint32_t *)(t + 1);
+ uint32_t bits = BTF_INT_BITS(*int_type);
+ int ret = 0;
+
+ /* if this is bit field */
+ if (bit_offset || BTF_INT_OFFSET(*int_type) ||
+ BITS_PER_BYTE_MASKED(bits)) {
+ btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+ is_plain_text);
+ return ret;
+ }
+
+ switch (BTF_INT_ENCODING(*int_type)) {
+ case 0:
+ if (BTF_INT_BITS(*int_type) == 64)
+ jsonw_printf(jw, "%lu", *((uint64_t *)data));
+ else if (BTF_INT_BITS(*int_type) == 32)
+ jsonw_printf(jw, "%u", *((uint32_t *)data));
+ else if (BTF_INT_BITS(*int_type) == 16)
+ jsonw_printf(jw, "%hu", *((uint16_t *)data));
+ else if (BTF_INT_BITS(*int_type) == 8)
+ jsonw_printf(jw, "%hhu", *((uint8_t *)data));
+ else
+ btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+ is_plain_text);
+ break;
+ case BTF_INT_SIGNED:
+ if (BTF_INT_BITS(*int_type) == 64)
+ jsonw_printf(jw, "%ld", *((int64_t *)data));
+ else if (BTF_INT_BITS(*int_type) == 32)
+ jsonw_printf(jw, "%d", *((int32_t *)data));
+ else if (BTF_INT_BITS(*int_type) == 16)
+ jsonw_printf(jw, "%hd", *((int16_t *)data));
+ else if (BTF_INT_BITS(*int_type) == 8)
+ jsonw_printf(jw, "%hhd", *((int8_t *)data));
+ else
+ btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+ is_plain_text);
+ break;
+ case BTF_INT_CHAR:
+ if (*((char *)data) == '\0')
+ jsonw_null(jw);
+ else if (isprint(*((char *)data)))
+ jsonw_printf(jw, "\"%c\"", *((char *)data));
+ else
+ if (is_plain_text)
+ jsonw_printf(jw, "%hhx", *((char *)data));
+ else
+ jsonw_printf(jw, "%hhd", *((char *)data));
+ break;
+ case BTF_INT_BOOL:
+ jsonw_bool(jw, *((int *)data));
+ break;
+ default:
+ /* shouldn't happen */
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int btf_dumper_struct(const struct btf_dumper *d, uint32_t type_id,
+ const void *data)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+ struct btf_member *m;
+ int ret = 0;
+ int i, vlen;
+
+ if (!t)
+ return -EINVAL;
+
+ vlen = BTF_INFO_VLEN(t->info);
+ jsonw_start_object(d->jw);
+ m = (struct btf_member *)(t + 1);
+
+ for (i = 0; i < vlen; i++) {
+ jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
+ ret = btf_dumper_do_type(d, m[i].type,
+ BITS_PER_BYTE_MASKED(m[i].offset), data
+ + BITS_ROUNDDOWN_BYTES(m[i].offset));
+ if (ret)
+ return ret;
+ }
+
+ jsonw_end_object(d->jw);
+
+ return 0;
+}
+
+static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
+ uint8_t bit_offset, const void *data)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+ int ret = 0;
+
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_INT:
+ ret = btf_dumper_int(t, bit_offset, data, d->jw,
+ d->is_plain_text);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ ret = btf_dumper_struct(d, type_id, data);
+ break;
+ case BTF_KIND_ARRAY:
+ ret = btf_dumper_array(d, type_id, data);
+ break;
+ case BTF_KIND_ENUM:
+ btf_dumper_enum(data, d->jw);
+ break;
+ case BTF_KIND_PTR:
+ btf_dumper_ptr(data, d->jw, d->is_plain_text);
+ break;
+ case BTF_KIND_UNKN:
+ jsonw_printf(d->jw, "(unknown)");
+ break;
+ case BTF_KIND_FWD:
+ /* map key or value can't be forward */
+ ret = -EINVAL;
+ break;
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ ret = btf_dumper_modifier(d, type_id, data);
+ break;
+ default:
+ jsonw_printf(d->jw, "(unsupported-kind");
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
+ const void *data)
+{
+ if (!d)
+ return -EINVAL;
+
+ return btf_dumper_do_type(d, type_id, 0, data);
+}
--- /dev/null
+++ b/tools/bpf/bpftool/btf_dumper.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef BTF_DUMPER_H
+#define BTF_DUMPER_H
+
+struct btf_dumper {
+ const struct btf *btf;
+ json_writer_t *jw;
+ bool is_plain_text;
+};
+
+/* btf_dumper_type - print data along with type information
+ * @d: an instance containing context for dumping types
+ * @type_id: index in btf->types array. this points to the type to be dumped
+ * @data: pointer the actual data, i.e. the values to be printed
+ *
+ * Returns zero on success and negative error code otherwise
+ */
+int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
+ const void *data);
+
+#endif



2018-07-03 05:08:03

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Mon, 2 Jul 2018 11:39:15 -0700, Okash Khawaja wrote:
> This consumes functionality exported in the previous patch. It does the
> main job of printing with BTF data. This is used in the following patch
> to provide a more readable output of a map's dump. It relies on
> json_writer to do json printing. Below is sample output where map keys
> are ints and values are of type struct A:
>
> typedef int int_type;
> enum E {
> E0,
> E1,
> };
>
> struct B {
> int x;
> int y;
> };
>
> struct A {
> int m;
> unsigned long long n;
> char o;
> int p[8];
> int q[4][8];
> enum E r;
> void *s;
> struct B t;
> const int u;
> int_type v;
> unsigned int w1: 3;
> unsigned int w2: 3;
> };
>
> $ sudo bpftool map dump id 14
> [{
> "key": 0,
> "value": {
> "m": 1,
> "n": 2,
> "o": "c",
> "p": [15,16,17,18,15,16,17,18
> ],
> "q": [[25,26,27,28,25,26,27,28
> ],[35,36,37,38,35,36,37,38
> ],[45,46,47,48,45,46,47,48
> ],[55,56,57,58,55,56,57,58
> ]
> ],
> "r": 1,
> "s": 0x7ffd80531cf8,
> "t": {
> "x": 5,
> "y": 10
> },
> "u": 100,
> "v": 20,
> "w1": 0x7,
> "w2": 0x3
> }
> }
> ]
>
> This patch uses json's {} and [] to imply struct/union and array. More
> explicit information can be added later. For example, a command line
> option can be introduced to print whether a key or value is struct
> or union, name of a struct etc. This will however come at the expense
> of duplicating info when, for example, printing an array of structs.
> enums are printed as ints without their names.
>
> Signed-off-by: Okash Khawaja <[email protected]>
>
> ---
> tools/bpf/bpftool/btf_dumper.c | 263 +++++++++++++++++++++++++++++++++++++++++
> tools/bpf/bpftool/btf_dumper.h | 23 +++
> 2 files changed, 286 insertions(+)
>
> --- /dev/null
> +++ b/tools/bpf/bpftool/btf_dumper.c
> @@ -0,0 +1,263 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2018 Facebook */
> +
> +#include <linux/btf.h>
> +#include <linux/err.h>
> +#include <stdio.h> /* for (FILE *) used by json_writer */
> +#include <linux/bitops.h>
> +#include <string.h>
> +#include <ctype.h>
> +
> +#include "btf.h"
> +#include "json_writer.h"
> +#include "btf_dumper.h"



> +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
> +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)

Perhaps it's just me but BIT_OFFSET or BIT_COUNT as a name of this macro
would make it more obvious to parse in the code below.

> +#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
> +#define BITS_ROUNDUP_BYTES(bits) \
> + (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
> +
> +static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
> + uint8_t bit_offset, const void *data);
> +
> +static void btf_dumper_ptr(const void *data, json_writer_t *jw,
> + bool is_plain_text)
> +{
> + if (is_plain_text)
> + jsonw_printf(jw, "%p", *((uintptr_t *)data));
> + else
> + jsonw_printf(jw, "%u", *((uintptr_t *)data));
> +}
> +
> +static int btf_dumper_modifier(const struct btf_dumper *d, uint32_t type_id,
> + const void *data)
> +{
> + int32_t actual_type_id = btf__resolve_type(d->btf, type_id);

Please prefer kernel types like __u32 wherever possible.

> + int ret;
> +
> + if (actual_type_id < 0)
> + return actual_type_id;
> +
> + ret = btf_dumper_do_type(d, actual_type_id, 0, data);
> +
> + return ret;

ret is unnecessary.

> +}
> +
> +static void btf_dumper_enum(const void *data, json_writer_t *jw)
> +{
> + jsonw_printf(jw, "%d", *((int32_t *)data));

Unnecessary parenthesis. There is a lot of those, please remove them
all.

> +}
> +
> +static int btf_dumper_array(const struct btf_dumper *d, uint32_t type_id,
> + const void *data)
> +{
> + const struct btf_type *t = btf__type_by_id(d->btf, type_id);
> + struct btf_array *arr = (struct btf_array *)(t + 1);
> + int64_t elem_size;
> + int ret = 0;
> + uint32_t i;
> +
> + elem_size = btf__resolve_size(d->btf, arr->type);
> + if (elem_size < 0)
> + return elem_size;
> +
> + jsonw_start_array(d->jw);
> + for (i = 0; i < arr->nelems; i++) {
> + ret = btf_dumper_do_type(d, arr->type, 0,
> + data + (i * elem_size));

Unnecessary parenthesis.

> + if (ret)
> + break;
> + }
> +
> + jsonw_end_array(d->jw);
> + return ret;
> +}
> +
> +static void btf_dumper_int_bits(uint32_t int_type, uint8_t bit_offset,
> + const void *data, json_writer_t *jw,
> + bool is_plain_text)
> +{
> + uint32_t bits = BTF_INT_BITS(int_type);
> + uint16_t total_bits_offset;
> + uint16_t bytes_to_copy;
> + uint16_t bits_to_copy;

Please use normal int types for things which don't have to be
explicitly sized. Using explicitly sized variables is bad style,
and ALU operations other than on word or byte quantities are usually
slower on modern CPUs.

> + uint8_t upper_bits;
> + union {
> + uint64_t u64_num;
> + uint8_t u8_nums[8];

Are the int types in BTF constrained to 64bit at most?

> + } print_num;
> +
> + total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
> + data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
> + bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
> + bits_to_copy = bits + bit_offset;
> + bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
> +
> + print_num.u64_num = 0;
> + memcpy(&print_num.u64_num, data, bytes_to_copy);

This scheme is unlikely to work on big endian machines...

> + upper_bits = BITS_PER_BYTE_MASKED(bits_to_copy);
> + if (upper_bits) {
> + uint8_t mask = (1 << upper_bits) - 1;
> +
> + print_num.u8_nums[bytes_to_copy - 1] &= mask;
> + }
> +
> + print_num.u64_num >>= bit_offset;
> +
> + if (is_plain_text)
> + jsonw_printf(jw, "0x%llx", print_num.u64_num);
> + else
> + jsonw_printf(jw, "%llu", print_num.u64_num);
> +}
> +
> +static int btf_dumper_int(const struct btf_type *t, uint8_t bit_offset,
> + const void *data, json_writer_t *jw,
> + bool is_plain_text)
> +{
> + uint32_t *int_type = (uint32_t *)(t + 1);
> + uint32_t bits = BTF_INT_BITS(*int_type);
> + int ret = 0;
> +
> + /* if this is bit field */
> + if (bit_offset || BTF_INT_OFFSET(*int_type) ||
> + BITS_PER_BYTE_MASKED(bits)) {
> + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> + is_plain_text);
> + return ret;
> + }
> +
> + switch (BTF_INT_ENCODING(*int_type)) {
> + case 0:
> + if (BTF_INT_BITS(*int_type) == 64)
> + jsonw_printf(jw, "%lu", *((uint64_t *)data));
> + else if (BTF_INT_BITS(*int_type) == 32)
> + jsonw_printf(jw, "%u", *((uint32_t *)data));
> + else if (BTF_INT_BITS(*int_type) == 16)
> + jsonw_printf(jw, "%hu", *((uint16_t *)data));
> + else if (BTF_INT_BITS(*int_type) == 8)
> + jsonw_printf(jw, "%hhu", *((uint8_t *)data));
> + else
> + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> + is_plain_text);
> + break;
> + case BTF_INT_SIGNED:
> + if (BTF_INT_BITS(*int_type) == 64)
> + jsonw_printf(jw, "%ld", *((int64_t *)data));
> + else if (BTF_INT_BITS(*int_type) == 32)
> + jsonw_printf(jw, "%d", *((int32_t *)data));
> + else if (BTF_INT_BITS(*int_type) == 16)

Please drop the double space. Both for 16 where it makes no sense and
for 8 where it's marginally useful but not really.

> + jsonw_printf(jw, "%hd", *((int16_t *)data));
> + else if (BTF_INT_BITS(*int_type) == 8)
> + jsonw_printf(jw, "%hhd", *((int8_t *)data));
> + else
> + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> + is_plain_text);
> + break;
> + case BTF_INT_CHAR:
> + if (*((char *)data) == '\0')
> + jsonw_null(jw);

Mm.. I don't think 0 char is equivalent to null.

> + else if (isprint(*((char *)data)))
> + jsonw_printf(jw, "\"%c\"", *((char *)data));

This looks very suspicious. So if I see a "6" for a char field it's
either a 6 ('\u0006') or a 54 ('6')...

> + else
> + if (is_plain_text)
> + jsonw_printf(jw, "%hhx", *((char *)data));
> + else
> + jsonw_printf(jw, "%hhd", *((char *)data));

... I think you need to always print a string, and express it as
\u00%02hhx for non-printable.

> + break;
> + case BTF_INT_BOOL:
> + jsonw_bool(jw, *((int *)data));
> + break;
> + default:
> + /* shouldn't happen */
> + ret = -EINVAL;

You only set ret to something else than 0 here just to break and
immediately return. Please remove the ret variable.

> + break;
> + }
> +
> + return ret;
> +}
> +
> +static int btf_dumper_struct(const struct btf_dumper *d, uint32_t type_id,
> + const void *data)
> +{
> + const struct btf_type *t = btf__type_by_id(d->btf, type_id);

Please don't call functions which need error checking as initialized
the if below looks very awkward..

> + struct btf_member *m;
> + int ret = 0;
> +
> + int i, vlen;
> +
> + if (!t)
> + return -EINVAL;
> +
> + vlen = BTF_INFO_VLEN(t->info);
> + jsonw_start_object(d->jw);
> + m = (struct btf_member *)(t + 1);
> +
> + for (i = 0; i < vlen; i++) {
> + jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
> + ret = btf_dumper_do_type(d, m[i].type,
> + BITS_PER_BYTE_MASKED(m[i].offset), data
> + + BITS_ROUNDDOWN_BYTES(m[i].offset));

Please use a temp variable to avoid this awkward multi-line sum.

> + if (ret)
> + return ret;

You can't return without jsonw_end_object().

> + }
> +
> + jsonw_end_object(d->jw);
> +
> + return 0;
> +}
> +
> +static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
> + uint8_t bit_offset, const void *data)
> +{
> + const struct btf_type *t = btf__type_by_id(d->btf, type_id);
> + int ret = 0;
> +
> + switch (BTF_INFO_KIND(t->info)) {
> + case BTF_KIND_INT:
> + ret = btf_dumper_int(t, bit_offset, data, d->jw,
> + d->is_plain_text);
> + break;
> + case BTF_KIND_STRUCT:
> + case BTF_KIND_UNION:
> + ret = btf_dumper_struct(d, type_id, data);
> + break;
> + case BTF_KIND_ARRAY:
> + ret = btf_dumper_array(d, type_id, data);
> + break;
> + case BTF_KIND_ENUM:
> + btf_dumper_enum(data, d->jw);
> + break;
> + case BTF_KIND_PTR:
> + btf_dumper_ptr(data, d->jw, d->is_plain_text);
> + break;
> + case BTF_KIND_UNKN:
> + jsonw_printf(d->jw, "(unknown)");
> + break;
> + case BTF_KIND_FWD:
> + /* map key or value can't be forward */

Right, but you have to print _something_, otherwise we would have a
name without a value, which would break JSON, no?

> + ret = -EINVAL;
> + break;
> + case BTF_KIND_TYPEDEF:
> + case BTF_KIND_VOLATILE:
> + case BTF_KIND_CONST:
> + case BTF_KIND_RESTRICT:
> + ret = btf_dumper_modifier(d, type_id, data);
> + break;
> + default:
> + jsonw_printf(d->jw, "(unsupported-kind");
> + ret = -EINVAL;
> + break;
> + }
> +
> + return ret;

Why return ret; at all, there is no code after the switch just return
directly from cases and save 9 LOC.

> +}
> +
> +int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
> + const void *data)
> +{
> + if (!d)
> + return -EINVAL;

No need for defensive programming.

> + return btf_dumper_do_type(d, type_id, 0, data);
> +}
> --- /dev/null
> +++ b/tools/bpf/bpftool/btf_dumper.h
> @@ -0,0 +1,23 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Copyright (c) 2018 Facebook */
> +
> +#ifndef BTF_DUMPER_H
> +#define BTF_DUMPER_H
> +
> +struct btf_dumper {
> + const struct btf *btf;
> + json_writer_t *jw;
> + bool is_plain_text;
> +};
> +
> +/* btf_dumper_type - print data along with type information
> + * @d: an instance containing context for dumping types
> + * @type_id: index in btf->types array. this points to the type to be dumped
> + * @data: pointer the actual data, i.e. the values to be printed
> + *
> + * Returns zero on success and negative error code otherwise
> + */
> +int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
> + const void *data);
> +
> +#endif

Please don't add header files for a single struct and single function.
Just put this in main.h.

2018-07-03 21:48:28

by Okash Khawaja

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Mon, Jul 02, 2018 at 10:06:59PM -0700, Jakub Kicinski wrote:
> On Mon, 2 Jul 2018 11:39:15 -0700, Okash Khawaja wrote:
[...]

>
> > +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
> > +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
>
> Perhaps it's just me but BIT_OFFSET or BIT_COUNT as a name of this macro
> would make it more obvious to parse in the code below.
I don't mind either. However these macro names are also used inside
kernel for same purpose. For sake of consistency, I'd recommend we keep
them :)

>
> > +#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
> > +#define BITS_ROUNDUP_BYTES(bits) \
> > + (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
> > +
> > +static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
> > + uint8_t bit_offset, const void *data);
> > +
> > +static void btf_dumper_ptr(const void *data, json_writer_t *jw,
> > + bool is_plain_text)
> > +{
> > + if (is_plain_text)
> > + jsonw_printf(jw, "%p", *((uintptr_t *)data));
> > + else
> > + jsonw_printf(jw, "%u", *((uintptr_t *)data));
> > +}
> > +
> > +static int btf_dumper_modifier(const struct btf_dumper *d, uint32_t type_id,
> > + const void *data)
> > +{
> > + int32_t actual_type_id = btf__resolve_type(d->btf, type_id);
>
> Please prefer kernel types like __u32 wherever possible.
>
> > + int ret;
> > +
> > + if (actual_type_id < 0)
> > + return actual_type_id;
> > +
> > + ret = btf_dumper_do_type(d, actual_type_id, 0, data);
> > +
> > + return ret;
>
> ret is unnecessary.
>
> > +}
> > +
> > +static void btf_dumper_enum(const void *data, json_writer_t *jw)
> > +{
> > + jsonw_printf(jw, "%d", *((int32_t *)data));
>
> Unnecessary parenthesis. There is a lot of those, please remove them
> all.
>
> > +}
> > +
> > +static int btf_dumper_array(const struct btf_dumper *d, uint32_t type_id,
> > + const void *data)
> > +{
> > + const struct btf_type *t = btf__type_by_id(d->btf, type_id);
> > + struct btf_array *arr = (struct btf_array *)(t + 1);
> > + int64_t elem_size;
> > + int ret = 0;
> > + uint32_t i;
> > +
> > + elem_size = btf__resolve_size(d->btf, arr->type);
> > + if (elem_size < 0)
> > + return elem_size;
> > +
> > + jsonw_start_array(d->jw);
> > + for (i = 0; i < arr->nelems; i++) {
> > + ret = btf_dumper_do_type(d, arr->type, 0,
> > + data + (i * elem_size));
>
> Unnecessary parenthesis.
>
> > + if (ret)
> > + break;
> > + }
> > +
> > + jsonw_end_array(d->jw);
> > + return ret;
> > +}
> > +
> > +static void btf_dumper_int_bits(uint32_t int_type, uint8_t bit_offset,
> > + const void *data, json_writer_t *jw,
> > + bool is_plain_text)
> > +{
> > + uint32_t bits = BTF_INT_BITS(int_type);
> > + uint16_t total_bits_offset;
> > + uint16_t bytes_to_copy;
> > + uint16_t bits_to_copy;
>
> Please use normal int types for things which don't have to be
> explicitly sized. Using explicitly sized variables is bad style,
> and ALU operations other than on word or byte quantities are usually
> slower on modern CPUs.
>
> > + uint8_t upper_bits;
> > + union {
> > + uint64_t u64_num;
> > + uint8_t u8_nums[8];
>
> Are the int types in BTF constrained to 64bit at most?
>
> > + } print_num;
> > +
> > + total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
> > + data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
> > + bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
> > + bits_to_copy = bits + bit_offset;
> > + bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
> > +
> > + print_num.u64_num = 0;
> > + memcpy(&print_num.u64_num, data, bytes_to_copy);
>
> This scheme is unlikely to work on big endian machines...
Can you give an example how?

>
> > + upper_bits = BITS_PER_BYTE_MASKED(bits_to_copy);
> > + if (upper_bits) {
> > + uint8_t mask = (1 << upper_bits) - 1;
> > +
> > + print_num.u8_nums[bytes_to_copy - 1] &= mask;
> > + }
> > +
> > + print_num.u64_num >>= bit_offset;
> > +
> > + if (is_plain_text)
> > + jsonw_printf(jw, "0x%llx", print_num.u64_num);
> > + else
> > + jsonw_printf(jw, "%llu", print_num.u64_num);
> > +}
> > +
> > +static int btf_dumper_int(const struct btf_type *t, uint8_t bit_offset,
> > + const void *data, json_writer_t *jw,
> > + bool is_plain_text)
> > +{
> > + uint32_t *int_type = (uint32_t *)(t + 1);
> > + uint32_t bits = BTF_INT_BITS(*int_type);
> > + int ret = 0;
> > +
> > + /* if this is bit field */
> > + if (bit_offset || BTF_INT_OFFSET(*int_type) ||
> > + BITS_PER_BYTE_MASKED(bits)) {
> > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > + is_plain_text);
> > + return ret;
> > + }
> > +
> > + switch (BTF_INT_ENCODING(*int_type)) {
> > + case 0:
> > + if (BTF_INT_BITS(*int_type) == 64)
> > + jsonw_printf(jw, "%lu", *((uint64_t *)data));
> > + else if (BTF_INT_BITS(*int_type) == 32)
> > + jsonw_printf(jw, "%u", *((uint32_t *)data));
> > + else if (BTF_INT_BITS(*int_type) == 16)
> > + jsonw_printf(jw, "%hu", *((uint16_t *)data));
> > + else if (BTF_INT_BITS(*int_type) == 8)
> > + jsonw_printf(jw, "%hhu", *((uint8_t *)data));
> > + else
> > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > + is_plain_text);
> > + break;
> > + case BTF_INT_SIGNED:
> > + if (BTF_INT_BITS(*int_type) == 64)
> > + jsonw_printf(jw, "%ld", *((int64_t *)data));
> > + else if (BTF_INT_BITS(*int_type) == 32)
> > + jsonw_printf(jw, "%d", *((int32_t *)data));
> > + else if (BTF_INT_BITS(*int_type) == 16)
>
> Please drop the double space. Both for 16 where it makes no sense and
> for 8 where it's marginally useful but not really.
>
> > + jsonw_printf(jw, "%hd", *((int16_t *)data));
> > + else if (BTF_INT_BITS(*int_type) == 8)
> > + jsonw_printf(jw, "%hhd", *((int8_t *)data));
> > + else
> > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > + is_plain_text);
> > + break;
> > + case BTF_INT_CHAR:
> > + if (*((char *)data) == '\0')
> > + jsonw_null(jw);
>
> Mm.. I don't think 0 char is equivalent to null.
Yes, thanks. Will fix.

>
> > + else if (isprint(*((char *)data)))
> > + jsonw_printf(jw, "\"%c\"", *((char *)data));
>
> This looks very suspicious. So if I see a "6" for a char field it's
> either a 6 ('\u0006') or a 54 ('6')...
It will always be 54. May be I missed your point. Could you explain why
it would be other than 54?

>
> > + else
> > + if (is_plain_text)
> > + jsonw_printf(jw, "%hhx", *((char *)data));
> > + else
> > + jsonw_printf(jw, "%hhd", *((char *)data));
>
> ... I think you need to always print a string, and express it as
> \u00%02hhx for non-printable.
Okay that makes sense
>
> > + break;
> > + case BTF_INT_BOOL:
> > + jsonw_bool(jw, *((int *)data));
> > + break;
> > + default:
> > + /* shouldn't happen */
> > + ret = -EINVAL;
>
> You only set ret to something else than 0 here just to break and
> immediately return. Please remove the ret variable.
>
> > + break;
> > + }
> > +
> > + return ret;
> > +}
> > +
> > +static int btf_dumper_struct(const struct btf_dumper *d, uint32_t type_id,
> > + const void *data)
> > +{
> > + const struct btf_type *t = btf__type_by_id(d->btf, type_id);
>
> Please don't call functions which need error checking as initialized
> the if below looks very awkward..
>
> > + struct btf_member *m;
> > + int ret = 0;
> > +
> > + int i, vlen;
> > +
> > + if (!t)
> > + return -EINVAL;
> > +
> > + vlen = BTF_INFO_VLEN(t->info);
> > + jsonw_start_object(d->jw);
> > + m = (struct btf_member *)(t + 1);
> > +
> > + for (i = 0; i < vlen; i++) {
> > + jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
> > + ret = btf_dumper_do_type(d, m[i].type,
> > + BITS_PER_BYTE_MASKED(m[i].offset), data
> > + + BITS_ROUNDDOWN_BYTES(m[i].offset));
>
> Please use a temp variable to avoid this awkward multi-line sum.
>
> > + if (ret)
> > + return ret;
>
> You can't return without jsonw_end_object().
>
> > + }
> > +
> > + jsonw_end_object(d->jw);
> > +
> > + return 0;
> > +}
> > +
> > +static int btf_dumper_do_type(const struct btf_dumper *d, uint32_t type_id,
> > + uint8_t bit_offset, const void *data)
> > +{
> > + const struct btf_type *t = btf__type_by_id(d->btf, type_id);
> > + int ret = 0;
> > +
> > + switch (BTF_INFO_KIND(t->info)) {
> > + case BTF_KIND_INT:
> > + ret = btf_dumper_int(t, bit_offset, data, d->jw,
> > + d->is_plain_text);
> > + break;
> > + case BTF_KIND_STRUCT:
> > + case BTF_KIND_UNION:
> > + ret = btf_dumper_struct(d, type_id, data);
> > + break;
> > + case BTF_KIND_ARRAY:
> > + ret = btf_dumper_array(d, type_id, data);
> > + break;
> > + case BTF_KIND_ENUM:
> > + btf_dumper_enum(data, d->jw);
> > + break;
> > + case BTF_KIND_PTR:
> > + btf_dumper_ptr(data, d->jw, d->is_plain_text);
> > + break;
> > + case BTF_KIND_UNKN:
> > + jsonw_printf(d->jw, "(unknown)");
> > + break;
> > + case BTF_KIND_FWD:
> > + /* map key or value can't be forward */
>
> Right, but you have to print _something_, otherwise we would have a
> name without a value, which would break JSON, no?
>
> > + ret = -EINVAL;
> > + break;
> > + case BTF_KIND_TYPEDEF:
> > + case BTF_KIND_VOLATILE:
> > + case BTF_KIND_CONST:
> > + case BTF_KIND_RESTRICT:
> > + ret = btf_dumper_modifier(d, type_id, data);
> > + break;
> > + default:
> > + jsonw_printf(d->jw, "(unsupported-kind");
> > + ret = -EINVAL;
> > + break;
> > + }
> > +
> > + return ret;
>
> Why return ret; at all, there is no code after the switch just return
> directly from cases and save 9 LOC.
>
> > +}
> > +
> > +int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
> > + const void *data)
> > +{
> > + if (!d)
> > + return -EINVAL;
>
> No need for defensive programming.
>
> > + return btf_dumper_do_type(d, type_id, 0, data);
> > +}
> > --- /dev/null
> > +++ b/tools/bpf/bpftool/btf_dumper.h
> > @@ -0,0 +1,23 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/* Copyright (c) 2018 Facebook */
> > +
> > +#ifndef BTF_DUMPER_H
> > +#define BTF_DUMPER_H
> > +
> > +struct btf_dumper {
> > + const struct btf *btf;
> > + json_writer_t *jw;
> > + bool is_plain_text;
> > +};
> > +
> > +/* btf_dumper_type - print data along with type information
> > + * @d: an instance containing context for dumping types
> > + * @type_id: index in btf->types array. this points to the type to be dumped
> > + * @data: pointer the actual data, i.e. the values to be printed
> > + *
> > + * Returns zero on success and negative error code otherwise
> > + */
> > +int32_t btf_dumper_type(const struct btf_dumper *d, uint32_t type_id,
> > + const void *data);
> > +
> > +#endif
>
> Please don't add header files for a single struct and single function.
> Just put this in main.h.

Thanks for your feedback. I'll reply with v3.

Okash

2018-07-03 22:24:32

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Tue, 3 Jul 2018 22:46:00 +0100, Okash Khawaja wrote:
> On Mon, Jul 02, 2018 at 10:06:59PM -0700, Jakub Kicinski wrote:
> > On Mon, 2 Jul 2018 11:39:15 -0700, Okash Khawaja wrote:
> > > +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
> > > +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
> >
> > Perhaps it's just me but BIT_OFFSET or BIT_COUNT as a name of this macro
> > would make it more obvious to parse in the code below.
> I don't mind either. However these macro names are also used inside
> kernel for same purpose. For sake of consistency, I'd recommend we keep
> them :)

Ugh, okay :)

> > > + } print_num;
> > > +
> > > + total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
> > > + data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
> > > + bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
> > > + bits_to_copy = bits + bit_offset;
> > > + bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
> > > +
> > > + print_num.u64_num = 0;
> > > + memcpy(&print_num.u64_num, data, bytes_to_copy);
> >
> > This scheme is unlikely to work on big endian machines...
> Can you give an example how?

On BE:

Input: [0x01, 0x82]
Bit length: 15
Bytes to copy: 2
bit_offset: 0
upper_bits: 7

print_num.u64_num = 0;
# [0, 0, 0, 0, 0, 0, 0, 0]

memcpy(&print_num.u64_num, data, bytes_to_copy);
# [0x01, 0x82, 0, 0, 0, 0, 0, 0]

mask = (1 << upper_bits) - 1;
# mask = 0x7f

print_num.u8_nums[bytes_to_copy - 1] &= mask;
# [0x01, 0x02, 0, 0, 0, 0, 0, 0]

printf("0x%llx", print_num.u64_num);
# 0x0102000000000000 AKA 72620543991349248
# expected:
# 0x0102 AKA 258

Am I missing something?

> > > + upper_bits = BITS_PER_BYTE_MASKED(bits_to_copy);
> > > + if (upper_bits) {
> > > + uint8_t mask = (1 << upper_bits) - 1;
> > > +
> > > + print_num.u8_nums[bytes_to_copy - 1] &= mask;
> > > + }
> > > +
> > > + print_num.u64_num >>= bit_offset;
> > > +
> > > + if (is_plain_text)
> > > + jsonw_printf(jw, "0x%llx", print_num.u64_num);
> > > + else
> > > + jsonw_printf(jw, "%llu", print_num.u64_num);
> > > +}
> > > +
> > > +static int btf_dumper_int(const struct btf_type *t, uint8_t bit_offset,
> > > + const void *data, json_writer_t *jw,
> > > + bool is_plain_text)
> > > +{
> > > + uint32_t *int_type = (uint32_t *)(t + 1);
> > > + uint32_t bits = BTF_INT_BITS(*int_type);
> > > + int ret = 0;
> > > +
> > > + /* if this is bit field */
> > > + if (bit_offset || BTF_INT_OFFSET(*int_type) ||
> > > + BITS_PER_BYTE_MASKED(bits)) {
> > > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > > + is_plain_text);
> > > + return ret;
> > > + }
> > > +
> > > + switch (BTF_INT_ENCODING(*int_type)) {
> > > + case 0:
> > > + if (BTF_INT_BITS(*int_type) == 64)
> > > + jsonw_printf(jw, "%lu", *((uint64_t *)data));
> > > + else if (BTF_INT_BITS(*int_type) == 32)
> > > + jsonw_printf(jw, "%u", *((uint32_t *)data));
> > > + else if (BTF_INT_BITS(*int_type) == 16)
> > > + jsonw_printf(jw, "%hu", *((uint16_t *)data));
> > > + else if (BTF_INT_BITS(*int_type) == 8)
> > > + jsonw_printf(jw, "%hhu", *((uint8_t *)data));
> > > + else
> > > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > > + is_plain_text);
> > > + break;
> > > + case BTF_INT_SIGNED:
> > > + if (BTF_INT_BITS(*int_type) == 64)
> > > + jsonw_printf(jw, "%ld", *((int64_t *)data));
> > > + else if (BTF_INT_BITS(*int_type) == 32)
> > > + jsonw_printf(jw, "%d", *((int32_t *)data));
> > > + else if (BTF_INT_BITS(*int_type) == 16)
> >
> > Please drop the double space. Both for 16 where it makes no sense and
> > for 8 where it's marginally useful but not really.
> >
> > > + jsonw_printf(jw, "%hd", *((int16_t *)data));
> > > + else if (BTF_INT_BITS(*int_type) == 8)
> > > + jsonw_printf(jw, "%hhd", *((int8_t *)data));
> > > + else
> > > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > > + is_plain_text);
> > > + break;
> > > + case BTF_INT_CHAR:
> > > + if (*((char *)data) == '\0')
> > > + jsonw_null(jw);
> >
> > Mm.. I don't think 0 char is equivalent to null.
> Yes, thanks. Will fix.
>
> >
> > > + else if (isprint(*((char *)data)))
> > > + jsonw_printf(jw, "\"%c\"", *((char *)data));
> >
> > This looks very suspicious. So if I see a "6" for a char field it's
> > either a 6 ('\u0006') or a 54 ('6')...
> It will always be 54. May be I missed your point. Could you explain why
> it would be other than 54?

Ah, I think I missed that %c is in quotes...

> > > + else
> > > + if (is_plain_text)
> > > + jsonw_printf(jw, "%hhx", *((char *)data));

This seems to be missing a "0x" prefix?

> > > + else
> > > + jsonw_printf(jw, "%hhd", *((char *)data));
> >
> > ... I think you need to always print a string, and express it as
> > \u00%02hhx for non-printable.
> Okay that makes sense

Yeah, IDK, char can be used as a byte as well as a string. In eBPF
it may actually be more likely to just be used as a raw byte buffer...
Either way I think it may be nice to keep it consistent, at least for
the JSON output could we do either always ints or always characters?

2018-07-03 22:39:50

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Tue, 3 Jul 2018 15:23:31 -0700, Jakub Kicinski wrote:
> > > > + else
> > > > + jsonw_printf(jw, "%hhd", *((char *)data));
> > >
> > > ... I think you need to always print a string, and express it as
> > > \u00%02hhx for non-printable.
> > Okay that makes sense
>
> Yeah, IDK, char can be used as a byte as well as a string. In eBPF
> it may actually be more likely to just be used as a raw byte buffer...

Actually, what is the definition/purpose of BTF_INT_CHAR? There seems
to be no BTF_INT_SHORT and BTF_INT_SIGNED can simply be of size 8...
Is normal int only used for bitfields of size 8 and BTF_INT_CHAR for
char variables?

The kernel seems to be rejecting combinations of those flags, is
unsigned char going to not be marked as char then?

> Either way I think it may be nice to keep it consistent, at least for
> the JSON output could we do either always ints or always characters?


2018-07-03 23:35:20

by Martin KaFai Lau

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Tue, Jul 03, 2018 at 03:38:43PM -0700, Jakub Kicinski wrote:
> On Tue, 3 Jul 2018 15:23:31 -0700, Jakub Kicinski wrote:
> > > > > + else
> > > > > + jsonw_printf(jw, "%hhd", *((char *)data));
> > > >
> > > > ... I think you need to always print a string, and express it as
> > > > \u00%02hhx for non-printable.
> > > Okay that makes sense
> >
> > Yeah, IDK, char can be used as a byte as well as a string. In eBPF
> > it may actually be more likely to just be used as a raw byte buffer...
>
> Actually, what is the definition/purpose of BTF_INT_CHAR? There seems
> to be no BTF_INT_SHORT and BTF_INT_SIGNED can simply be of size 8...
> Is normal int only used for bitfields of size 8 and BTF_INT_CHAR for
> char variables?
>
> The kernel seems to be rejecting combinations of those flags, is
> unsigned char going to not be marked as char then?
BTF_INT_ENOCODING (CHAR/SIGNED/BOOL) is for formatting (e.g. pretty
print). It is mainly how CTF is using it also. Hence, BTF_INT_ENCODINGs
is not a 1:1 mapping to C integer types.
The size of an interger is described by BTF_INT_BITS instead.

>
> > Either way I think it may be nice to keep it consistent, at least for
> > the JSON output could we do either always ints or always characters?
>

2018-07-04 16:32:49

by Okash Khawaja

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

hi,

On Tue, Jul 03, 2018 at 03:23:31PM -0700, Jakub Kicinski wrote:
> On Tue, 3 Jul 2018 22:46:00 +0100, Okash Khawaja wrote:
> > On Mon, Jul 02, 2018 at 10:06:59PM -0700, Jakub Kicinski wrote:
> > > On Mon, 2 Jul 2018 11:39:15 -0700, Okash Khawaja wrote:
> > > > +#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
> > > > +#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
> > >
> > > Perhaps it's just me but BIT_OFFSET or BIT_COUNT as a name of this macro
> > > would make it more obvious to parse in the code below.
> > I don't mind either. However these macro names are also used inside
> > kernel for same purpose. For sake of consistency, I'd recommend we keep
> > them :)
>
> Ugh, okay :)
>
> > > > + } print_num;
> > > > +
> > > > + total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
> > > > + data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
> > > > + bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
> > > > + bits_to_copy = bits + bit_offset;
> > > > + bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
> > > > +
> > > > + print_num.u64_num = 0;
> > > > + memcpy(&print_num.u64_num, data, bytes_to_copy);
> > >
> > > This scheme is unlikely to work on big endian machines...
> > Can you give an example how?
>
> On BE:
>
> Input: [0x01, 0x82]
> Bit length: 15
> Bytes to copy: 2
> bit_offset: 0
> upper_bits: 7
>
> print_num.u64_num = 0;
> # [0, 0, 0, 0, 0, 0, 0, 0]
>
> memcpy(&print_num.u64_num, data, bytes_to_copy);
> # [0x01, 0x82, 0, 0, 0, 0, 0, 0]
>
> mask = (1 << upper_bits) - 1;
> # mask = 0x7f
>
> print_num.u8_nums[bytes_to_copy - 1] &= mask;
> # [0x01, 0x02, 0, 0, 0, 0, 0, 0]
>
> printf("0x%llx", print_num.u64_num);
> # 0x0102000000000000 AKA 72620543991349248
> # expected:
> # 0x0102 AKA 258
>
> Am I missing something?
yes you're right, good catch! i'll fix this. thanks vrey much :)

>
> > > > + upper_bits = BITS_PER_BYTE_MASKED(bits_to_copy);
> > > > + if (upper_bits) {
> > > > + uint8_t mask = (1 << upper_bits) - 1;
> > > > +
> > > > + print_num.u8_nums[bytes_to_copy - 1] &= mask;
> > > > + }
> > > > +
> > > > + print_num.u64_num >>= bit_offset;
> > > > +
> > > > + if (is_plain_text)
> > > > + jsonw_printf(jw, "0x%llx", print_num.u64_num);
> > > > + else
> > > > + jsonw_printf(jw, "%llu", print_num.u64_num);
> > > > +}
> > > > +
> > > > +static int btf_dumper_int(const struct btf_type *t, uint8_t bit_offset,
> > > > + const void *data, json_writer_t *jw,
> > > > + bool is_plain_text)
> > > > +{
> > > > + uint32_t *int_type = (uint32_t *)(t + 1);
> > > > + uint32_t bits = BTF_INT_BITS(*int_type);
> > > > + int ret = 0;
> > > > +
> > > > + /* if this is bit field */
> > > > + if (bit_offset || BTF_INT_OFFSET(*int_type) ||
> > > > + BITS_PER_BYTE_MASKED(bits)) {
> > > > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > > > + is_plain_text);
> > > > + return ret;
> > > > + }
> > > > +
> > > > + switch (BTF_INT_ENCODING(*int_type)) {
> > > > + case 0:
> > > > + if (BTF_INT_BITS(*int_type) == 64)
> > > > + jsonw_printf(jw, "%lu", *((uint64_t *)data));
> > > > + else if (BTF_INT_BITS(*int_type) == 32)
> > > > + jsonw_printf(jw, "%u", *((uint32_t *)data));
> > > > + else if (BTF_INT_BITS(*int_type) == 16)
> > > > + jsonw_printf(jw, "%hu", *((uint16_t *)data));
> > > > + else if (BTF_INT_BITS(*int_type) == 8)
> > > > + jsonw_printf(jw, "%hhu", *((uint8_t *)data));
> > > > + else
> > > > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > > > + is_plain_text);
> > > > + break;
> > > > + case BTF_INT_SIGNED:
> > > > + if (BTF_INT_BITS(*int_type) == 64)
> > > > + jsonw_printf(jw, "%ld", *((int64_t *)data));
> > > > + else if (BTF_INT_BITS(*int_type) == 32)
> > > > + jsonw_printf(jw, "%d", *((int32_t *)data));
> > > > + else if (BTF_INT_BITS(*int_type) == 16)
> > >
> > > Please drop the double space. Both for 16 where it makes no sense and
> > > for 8 where it's marginally useful but not really.
> > >
> > > > + jsonw_printf(jw, "%hd", *((int16_t *)data));
> > > > + else if (BTF_INT_BITS(*int_type) == 8)
> > > > + jsonw_printf(jw, "%hhd", *((int8_t *)data));
> > > > + else
> > > > + btf_dumper_int_bits(*int_type, bit_offset, data, jw,
> > > > + is_plain_text);
> > > > + break;
> > > > + case BTF_INT_CHAR:
> > > > + if (*((char *)data) == '\0')
> > > > + jsonw_null(jw);
> > >
> > > Mm.. I don't think 0 char is equivalent to null.
> > Yes, thanks. Will fix.
> >
> > >
> > > > + else if (isprint(*((char *)data)))
> > > > + jsonw_printf(jw, "\"%c\"", *((char *)data));
> > >
> > > This looks very suspicious. So if I see a "6" for a char field it's
> > > either a 6 ('\u0006') or a 54 ('6')...
> > It will always be 54. May be I missed your point. Could you explain why
> > it would be other than 54?
>
> Ah, I think I missed that %c is in quotes...
>
> > > > + else
> > > > + if (is_plain_text)
> > > > + jsonw_printf(jw, "%hhx", *((char *)data));
>
> This seems to be missing a "0x" prefix?
yes it does. will add 0x.

>
> > > > + else
> > > > + jsonw_printf(jw, "%hhd", *((char *)data));
> > >
> > > ... I think you need to always print a string, and express it as
> > > \u00%02hhx for non-printable.
> > Okay that makes sense
>
> Yeah, IDK, char can be used as a byte as well as a string. In eBPF
> it may actually be more likely to just be used as a raw byte buffer...
> Either way I think it may be nice to keep it consistent, at least for
> the JSON output could we do either always ints or always characters?
yes, makes sense. i'll keep them always characters.

2018-07-07 13:34:14

by Okash Khawaja

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Tue, Jul 03, 2018 at 04:33:50PM -0700, Martin KaFai Lau wrote:
> On Tue, Jul 03, 2018 at 03:38:43PM -0700, Jakub Kicinski wrote:
> > On Tue, 3 Jul 2018 15:23:31 -0700, Jakub Kicinski wrote:
> > > > > > + else
> > > > > > + jsonw_printf(jw, "%hhd", *((char *)data));
> > > > >
> > > > > ... I think you need to always print a string, and express it as
> > > > > \u00%02hhx for non-printable.
> > > > Okay that makes sense
> > >
> > > Yeah, IDK, char can be used as a byte as well as a string. In eBPF
> > > it may actually be more likely to just be used as a raw byte buffer...
> >
> > Actually, what is the definition/purpose of BTF_INT_CHAR? There seems
> > to be no BTF_INT_SHORT and BTF_INT_SIGNED can simply be of size 8...
> > Is normal int only used for bitfields of size 8 and BTF_INT_CHAR for
> > char variables?
> >
> > The kernel seems to be rejecting combinations of those flags, is
> > unsigned char going to not be marked as char then?
> BTF_INT_ENOCODING (CHAR/SIGNED/BOOL) is for formatting (e.g. pretty
> print). It is mainly how CTF is using it also. Hence, BTF_INT_ENCODINGs
> is not a 1:1 mapping to C integer types.
> The size of an interger is described by BTF_INT_BITS instead.
>
> >
> > > Either way I think it may be nice to keep it consistent, at least for
> > > the JSON output could we do either always ints or always characters?
> >

for !isprint() case, will "\x%02hhx" make more sense?

2018-07-07 18:50:45

by Jakub Kicinski

[permalink] [raw]
Subject: Re: [PATCH bpf-next v2 2/3] bpf: btf: add btf print functionality

On Sat, 7 Jul 2018 14:30:58 +0100, Okash Khawaja wrote:
> On Tue, Jul 03, 2018 at 04:33:50PM -0700, Martin KaFai Lau wrote:
> > On Tue, Jul 03, 2018 at 03:38:43PM -0700, Jakub Kicinski wrote:
> > > On Tue, 3 Jul 2018 15:23:31 -0700, Jakub Kicinski wrote:
> > > > > > > + else
> > > > > > > + jsonw_printf(jw, "%hhd", *((char *)data));
> > > > > >
> > > > > > ... I think you need to always print a string, and express it as
> > > > > > \u00%02hhx for non-printable.
> > > > > Okay that makes sense
> > > >
> > > > Yeah, IDK, char can be used as a byte as well as a string. In eBPF
> > > > it may actually be more likely to just be used as a raw byte buffer...
> > >
> > > Actually, what is the definition/purpose of BTF_INT_CHAR? There seems
> > > to be no BTF_INT_SHORT and BTF_INT_SIGNED can simply be of size 8...
> > > Is normal int only used for bitfields of size 8 and BTF_INT_CHAR for
> > > char variables?
> > >
> > > The kernel seems to be rejecting combinations of those flags, is
> > > unsigned char going to not be marked as char then?
> > BTF_INT_ENOCODING (CHAR/SIGNED/BOOL) is for formatting (e.g. pretty
> > print). It is mainly how CTF is using it also. Hence, BTF_INT_ENCODINGs
> > is not a 1:1 mapping to C integer types.
> > The size of an interger is described by BTF_INT_BITS instead.
> >
> > >
> > > > Either way I think it may be nice to keep it consistent, at least for
> > > > the JSON output could we do either always ints or always characters?
> > >
>
> for !isprint() case, will "\x%02hhx" make more sense?

According to (quick look over) the JSON standard \x%02hhx is not a
valid escape sequence, everything has to be Unicode, so \u00%02hhx.
JSON validators online agree seem to reject \x as well.