This series adds optimiztion for division by constants and updates the
histogram trigger expression kselftests and documentation.
It is dependent on the series at [1] and the fix at [2]; and can be applied
on top of those after dropping the patch 7 in [1].
[1] https://lore.kernel.org/r/[email protected]/
[2] https://lore.kernel.org/r/[email protected]/
Kalesh Singh (4):
tracing/histogram: Optimize division by constants (v2)
tracing/histogram: Update division by 0 documentation (v1)
tracing/histogram: Document hist trigger variables (v3)
tracing/selftests: Add tests for hist trigger expression parsing (v7)
Documentation/trace/histogram.rst | 3 +-
kernel/trace/trace.c | 11 ++
kernel/trace/trace_events_hist.c | 117 +++++++++++++++++-
.../trigger/trigger-hist-expressions.tc | 63 ++++++++++
4 files changed, 192 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
--
2.33.1.1089.g2158813163f-goog
If the divisor is a constant use specific division functions to
avoid extra branches when the trigger is hit.
If the divisor constant but not a power of 2, the division can be
replaced with a multiplication and shift in the following case:
Let X = dividend and Y = divisor.
Choose Z = some power of 2. If Y <= Z, then:
X / Y = (X * (Z / Y)) / Z
(Z / Y) is a constant (mult) which is calculated at parse time, so:
X / Y = (X * mult) / Z
The division by Z can be replaced by a shift since Z is a power of 2:
X / Y = (X * mult) >> shift
As long, as X < Z the results will not be off by more than 1.
Signed-off-by: Kalesh Singh <[email protected]>
Suggested-by: Steven Rostedt <[email protected]>
---
Changes in v2:
- Return -EDOM if divisor is a constant and zero, per Steve
kernel/trace/trace_events_hist.c | 117 ++++++++++++++++++++++++++++++-
1 file changed, 116 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 364cb3091789..1084aa41f047 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -68,7 +68,8 @@
C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \
C(EXPECT_NUMBER, "Expecting numeric literal"), \
- C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"),
+ C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \
+ C(DIVISION_BY_ZERO, "Division by zero"),
#undef C
#define C(a, b) HIST_ERR_##a
@@ -92,6 +93,7 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field,
#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
#define HIST_ACTIONS_MAX 8
#define HIST_CONST_DIGITS_MAX 21
+#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */
enum field_op_id {
FIELD_OP_NONE,
@@ -160,6 +162,8 @@ struct hist_field {
/* Numeric literals are represented as u64 */
u64 constant;
+ /* Used to optimize division by constants */
+ u64 div_multiplier;
};
static u64 hist_field_none(struct hist_field *field,
@@ -311,6 +315,72 @@ static u64 hist_field_div(struct hist_field *hist_field,
return div64_u64(val1, val2);
}
+static u64 div_by_power_of_two(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+ u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
+
+ return val1 >> __ffs64(val2);
+}
+
+static u64 div_by_not_power_of_two(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+ u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
+
+ return div64_u64(val1, val2);
+}
+
+static u64 div_by_mult_and_shift(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+ /*
+ * If the divisor is a constant, do a multiplication and shift instead.
+ *
+ * Choose Z = some power of 2. If Y <= Z, then:
+ * X / Y = (X * (Z / Y)) / Z
+ *
+ * (Z / Y) is a constant (mult) which is calculated at parse time, so:
+ * X / Y = (X * mult) / Z
+ *
+ * The division by Z can be replaced by a shift since Z is a power of 2:
+ * X / Y = (X * mult) >> HIST_DIV_SHIFT
+ *
+ * As long, as X < Z the results will not be off by more than 1.
+ */
+ if (val1 < (1 << HIST_DIV_SHIFT)) {
+ u64 mult = operand2->div_multiplier;
+
+ return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT;
+ } else {
+ u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
+
+ return div64_u64(val1, val2);
+ }
+}
+
static u64 hist_field_mult(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -573,6 +643,37 @@ struct snapshot_context {
void *key;
};
+
+static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
+ const char *var_name);
+
+/*
+ * Returns the specific division function to use if the divisor
+ * is constant. This avoids extra branches when the trigger is hit.
+ */
+static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
+{
+ u64 div;
+
+ if (divisor->flags & HIST_FIELD_FL_VAR_REF) {
+ struct hist_field *var;
+
+ var = find_var_field(divisor->var.hist_data, divisor->name);
+ div = var->constant;
+ } else
+ div = divisor->constant;
+
+ if (!(div & (div - 1)))
+ return div_by_power_of_two;
+
+ /* If the divisor is too large, do a regular division */
+ if (div > (1 << HIST_DIV_SHIFT))
+ return div_by_not_power_of_two;
+
+ divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
+ return div_by_mult_and_shift;
+}
+
static void track_data_free(struct track_data *track_data)
{
struct hist_elt_data *elt_data;
@@ -2575,6 +2676,20 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
expr->operands[0] = operand1;
expr->operands[1] = operand2;
+
+ if (field_op == FIELD_OP_DIV &&
+ operand2_flags & HIST_FIELD_FL_CONST) {
+ u64 divisor = (var2) ? var2->constant : operand2->constant;
+
+ if (!divisor) {
+ hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
+ ret = -EDOM;
+ goto free;
+ }
+
+ op_fn = hist_field_get_div_fn(operand2);
+ }
+
if (combine_consts) {
if (var1)
expr->operands[0] = var1;
--
2.33.1.1089.g2158813163f-goog
Update the tracefs README to describe how hist trigger variables
can be created.
Signed-off-by: Kalesh Singh <[email protected]>
Acked-by: Masami Hiramatsu <[email protected]>
---
Changes in v2:
- Add Masami's Acked-by.
kernel/trace/trace.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bc677cd64224..c41b3786401d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5628,6 +5628,7 @@ static const char readme_msg[] =
#ifdef CONFIG_HIST_TRIGGERS
" hist trigger\t- If set, event hits are aggregated into a hash table\n"
"\t Format: hist:keys=<field1[,field2,...]>\n"
+ "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
"\t [:values=<field1[,field2,...]>]\n"
"\t [:sort=<field1[,field2,...]>]\n"
"\t [:size=#entries]\n"
@@ -5639,6 +5640,16 @@ static const char readme_msg[] =
"\t common_timestamp - to record current timestamp\n"
"\t common_cpu - to record the CPU the event happened on\n"
"\n"
+ "\t A hist trigger variable can be:\n"
+ "\t - a reference to a field e.g. x=current_timestamp,\n"
+ "\t - a reference to another variable e.g. y=$x,\n"
+ "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
+ "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
+ "\n"
+ "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n"
+ "\t multiplication(*) and division(/) operators. An operand can be either a\n"
+ "\t variable reference, field or numeric literal.\n"
+ "\n"
"\t When a matching event is hit, an entry is added to a hash\n"
"\t table using the key(s) and value(s) named, and the value of a\n"
"\t sum called 'hitcount' is incremented. Keys and values\n"
--
2.33.1.1089.g2158813163f-goog
If the divisor is a constant and zero, the undeifned case can be
detected and an error returned instead of -1.
Signed-off-by: Kalesh Singh <[email protected]>
---
Documentation/trace/histogram.rst | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 66ec972dfb78..859fd1b76c63 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1766,7 +1766,8 @@ using the same key and variable from yet another event::
Expressions support the use of addition, subtraction, multiplication and
division operators (+-\*/).
-Note that division by zero always returns -1.
+Note if division by zero cannot be detected at parse time (i.e. the
+divisor is not a constant), the result will be -1.
Numeric constants can also be used directly in an expression::
--
2.33.1.1089.g2158813163f-goog
Add tests for the parsing of hist trigger expressions; and to
validate expression evaluation.
Signed-off-by: Kalesh Singh <[email protected]>
---
Changes in v7:
- Add error check test for divison by constant 0.
Changes in v6:
- Read the expression result from the trigger file,
instead of creating a histogram to print the value.
Changes in v5:
- Add README pattern to requires tag, per Masami
Changes in v3:
- Remove .sym-offset error check tests
Changes in v2:
- Add Namhyung's Reviewed-by
- Update comment to clarify err_pos in "Too many subexpressions" test
.../trigger/trigger-hist-expressions.tc | 63 +++++++++++++++++++
1 file changed, 63 insertions(+)
create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
new file mode 100644
index 000000000000..05ffba299dbf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
@@ -0,0 +1,63 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram expression parsing
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README
+
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+test_hist_expr() { # test_name expression expected_val
+ trigger="events/sched/sched_process_fork/trigger"
+
+ reset_trigger_file $trigger
+
+ echo "Test hist trigger expressions - $1"
+
+ echo "hist:keys=common_pid:x=$2" > $trigger
+
+ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+ actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'`
+
+ if [ $actual != $3 ]; then
+ fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual"
+ fi
+
+ reset_trigger_file $trigger
+}
+
+check_error() { # test_name command-with-error-pos-by-^
+ trigger="events/sched/sched_process_fork/trigger"
+
+ echo "Test hist trigger expressions - $1"
+ ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger
+}
+
+test_hist_expr "Variable assignment" "123" "123"
+
+test_hist_expr "Subtraction not associative" "16-8-4-2" "2"
+
+test_hist_expr "Division not associative" "64/8/4/2" "1"
+
+test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14"
+
+test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12"
+
+test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8"
+
+test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5"
+
+# err pos for "too many subexpressions" is dependent on where
+# the last subexpression was detected. This can vary depending
+# on how the expression tree was generated.
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4'
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5'
+
+check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2'
+
+check_error "Division by zero" 'hist:keys=common_pid:x=3/^0'
+
+exit 0
--
2.33.1.1089.g2158813163f-goog
On Fri, 29 Oct 2021 11:33:27 -0700
Kalesh Singh <[email protected]> wrote:
> If the divisor is a constant use specific division functions to
> avoid extra branches when the trigger is hit.
>
> If the divisor constant but not a power of 2, the division can be
> replaced with a multiplication and shift in the following case:
>
> Let X = dividend and Y = divisor.
>
> Choose Z = some power of 2. If Y <= Z, then:
> X / Y = (X * (Z / Y)) / Z
>
> (Z / Y) is a constant (mult) which is calculated at parse time, so:
> X / Y = (X * mult) / Z
>
> The division by Z can be replaced by a shift since Z is a power of 2:
> X / Y = (X * mult) >> shift
>
> As long, as X < Z the results will not be off by more than 1.
>
> Signed-off-by: Kalesh Singh <[email protected]>
> Suggested-by: Steven Rostedt <[email protected]>
> ---
>
> Changes in v2:
> - Return -EDOM if divisor is a constant and zero, per Steve
>
> kernel/trace/trace_events_hist.c | 117 ++++++++++++++++++++++++++++++-
> 1 file changed, 116 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> index 364cb3091789..1084aa41f047 100644
> --- a/kernel/trace/trace_events_hist.c
> +++ b/kernel/trace/trace_events_hist.c
> @@ -68,7 +68,8 @@
> C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
> C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \
> C(EXPECT_NUMBER, "Expecting numeric literal"), \
> - C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"),
> + C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \
> + C(DIVISION_BY_ZERO, "Division by zero"),
>
> #undef C
> #define C(a, b) HIST_ERR_##a
> @@ -92,6 +93,7 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field,
> #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
> #define HIST_ACTIONS_MAX 8
> #define HIST_CONST_DIGITS_MAX 21
> +#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */
>
> enum field_op_id {
> FIELD_OP_NONE,
> @@ -160,6 +162,8 @@ struct hist_field {
>
> /* Numeric literals are represented as u64 */
> u64 constant;
> + /* Used to optimize division by constants */
> + u64 div_multiplier;
> };
>
> static u64 hist_field_none(struct hist_field *field,
> @@ -311,6 +315,72 @@ static u64 hist_field_div(struct hist_field *hist_field,
> return div64_u64(val1, val2);
> }
>
> +static u64 div_by_power_of_two(struct hist_field *hist_field,
> + struct tracing_map_elt *elt,
> + struct trace_buffer *buffer,
> + struct ring_buffer_event *rbe,
> + void *event)
> +{
> + struct hist_field *operand1 = hist_field->operands[0];
> + struct hist_field *operand2 = hist_field->operands[1];
> +
> + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
If these functions are only called when val2 is constant, can't we make it
such that we get val2 from the hist_field directly? That is:
u64 val2 = operand2->constant;
That would save us a function call, and an indirect on at that (that gets
slowed down by spectre).
Same for the ones below.
-- Steve
> +
> + return val1 >> __ffs64(val2);
> +}
> +
> +static u64 div_by_not_power_of_two(struct hist_field *hist_field,
> + struct tracing_map_elt *elt,
> + struct trace_buffer *buffer,
> + struct ring_buffer_event *rbe,
> + void *event)
> +{
> + struct hist_field *operand1 = hist_field->operands[0];
> + struct hist_field *operand2 = hist_field->operands[1];
> +
> + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
> +
> + return div64_u64(val1, val2);
> +}
> +
> +static u64 div_by_mult_and_shift(struct hist_field *hist_field,
> + struct tracing_map_elt *elt,
> + struct trace_buffer *buffer,
> + struct ring_buffer_event *rbe,
> + void *event)
> +{
> + struct hist_field *operand1 = hist_field->operands[0];
> + struct hist_field *operand2 = hist_field->operands[1];
> +
> + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> +
> + /*
> + * If the divisor is a constant, do a multiplication and shift instead.
> + *
> + * Choose Z = some power of 2. If Y <= Z, then:
> + * X / Y = (X * (Z / Y)) / Z
> + *
> + * (Z / Y) is a constant (mult) which is calculated at parse time, so:
> + * X / Y = (X * mult) / Z
> + *
> + * The division by Z can be replaced by a shift since Z is a power of 2:
> + * X / Y = (X * mult) >> HIST_DIV_SHIFT
> + *
> + * As long, as X < Z the results will not be off by more than 1.
> + */
> + if (val1 < (1 << HIST_DIV_SHIFT)) {
> + u64 mult = operand2->div_multiplier;
> +
> + return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT;
> + } else {
> + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
> +
> + return div64_u64(val1, val2);
> + }
> +}
> +
> static u64 hist_field_mult(struct hist_field *hist_field,
> struct tracing_map_elt *elt,
> struct trace_buffer *buffer,
> @@ -573,6 +643,37 @@ struct snapshot_context {
> void *key;
> };
>
> +
> +static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
> + const char *var_name);
> +
> +/*
> + * Returns the specific division function to use if the divisor
> + * is constant. This avoids extra branches when the trigger is hit.
> + */
> +static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
> +{
> + u64 div;
> +
> + if (divisor->flags & HIST_FIELD_FL_VAR_REF) {
> + struct hist_field *var;
> +
> + var = find_var_field(divisor->var.hist_data, divisor->name);
> + div = var->constant;
> + } else
> + div = divisor->constant;
> +
> + if (!(div & (div - 1)))
> + return div_by_power_of_two;
> +
> + /* If the divisor is too large, do a regular division */
> + if (div > (1 << HIST_DIV_SHIFT))
> + return div_by_not_power_of_two;
> +
> + divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
> + return div_by_mult_and_shift;
> +}
> +
> static void track_data_free(struct track_data *track_data)
> {
> struct hist_elt_data *elt_data;
> @@ -2575,6 +2676,20 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
> expr->operands[0] = operand1;
> expr->operands[1] = operand2;
>
> +
> + if (field_op == FIELD_OP_DIV &&
> + operand2_flags & HIST_FIELD_FL_CONST) {
> + u64 divisor = (var2) ? var2->constant : operand2->constant;
> +
> + if (!divisor) {
> + hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
> + ret = -EDOM;
> + goto free;
> + }
> +
> + op_fn = hist_field_get_div_fn(operand2);
> + }
> +
> if (combine_consts) {
> if (var1)
> expr->operands[0] = var1;
On Fri, Oct 29, 2021 at 11:45 AM Steven Rostedt <[email protected]> wrote:
>
> On Fri, 29 Oct 2021 11:33:27 -0700
> Kalesh Singh <[email protected]> wrote:
>
> > If the divisor is a constant use specific division functions to
> > avoid extra branches when the trigger is hit.
> >
> > If the divisor constant but not a power of 2, the division can be
> > replaced with a multiplication and shift in the following case:
> >
> > Let X = dividend and Y = divisor.
> >
> > Choose Z = some power of 2. If Y <= Z, then:
> > X / Y = (X * (Z / Y)) / Z
> >
> > (Z / Y) is a constant (mult) which is calculated at parse time, so:
> > X / Y = (X * mult) / Z
> >
> > The division by Z can be replaced by a shift since Z is a power of 2:
> > X / Y = (X * mult) >> shift
> >
> > As long, as X < Z the results will not be off by more than 1.
> >
> > Signed-off-by: Kalesh Singh <[email protected]>
> > Suggested-by: Steven Rostedt <[email protected]>
> > ---
> >
> > Changes in v2:
> > - Return -EDOM if divisor is a constant and zero, per Steve
> >
> > kernel/trace/trace_events_hist.c | 117 ++++++++++++++++++++++++++++++-
> > 1 file changed, 116 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> > index 364cb3091789..1084aa41f047 100644
> > --- a/kernel/trace/trace_events_hist.c
> > +++ b/kernel/trace/trace_events_hist.c
> > @@ -68,7 +68,8 @@
> > C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
> > C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \
> > C(EXPECT_NUMBER, "Expecting numeric literal"), \
> > - C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"),
> > + C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \
> > + C(DIVISION_BY_ZERO, "Division by zero"),
> >
> > #undef C
> > #define C(a, b) HIST_ERR_##a
> > @@ -92,6 +93,7 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field,
> > #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
> > #define HIST_ACTIONS_MAX 8
> > #define HIST_CONST_DIGITS_MAX 21
> > +#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */
> >
> > enum field_op_id {
> > FIELD_OP_NONE,
> > @@ -160,6 +162,8 @@ struct hist_field {
> >
> > /* Numeric literals are represented as u64 */
> > u64 constant;
> > + /* Used to optimize division by constants */
> > + u64 div_multiplier;
> > };
> >
> > static u64 hist_field_none(struct hist_field *field,
> > @@ -311,6 +315,72 @@ static u64 hist_field_div(struct hist_field *hist_field,
> > return div64_u64(val1, val2);
> > }
> >
> > +static u64 div_by_power_of_two(struct hist_field *hist_field,
> > + struct tracing_map_elt *elt,
> > + struct trace_buffer *buffer,
> > + struct ring_buffer_event *rbe,
> > + void *event)
> > +{
> > + struct hist_field *operand1 = hist_field->operands[0];
> > + struct hist_field *operand2 = hist_field->operands[1];
> > +
> > + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> > + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
>
> If these functions are only called when val2 is constant, can't we make it
> such that we get val2 from the hist_field directly? That is:
>
> u64 val2 = operand2->constant;
operand2 might be a var ref to a constant, so we would need to resolve
that with hist_field_var_ref().
-Kalesh
>
> That would save us a function call, and an indirect on at that (that gets
> slowed down by spectre).
>
> Same for the ones below.
>
> -- Steve
>
>
> > +
> > + return val1 >> __ffs64(val2);
> > +}
> > +
> > +static u64 div_by_not_power_of_two(struct hist_field *hist_field,
> > + struct tracing_map_elt *elt,
> > + struct trace_buffer *buffer,
> > + struct ring_buffer_event *rbe,
> > + void *event)
> > +{
> > + struct hist_field *operand1 = hist_field->operands[0];
> > + struct hist_field *operand2 = hist_field->operands[1];
> > +
> > + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> > + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
> > +
> > + return div64_u64(val1, val2);
> > +}
> > +
> > +static u64 div_by_mult_and_shift(struct hist_field *hist_field,
> > + struct tracing_map_elt *elt,
> > + struct trace_buffer *buffer,
> > + struct ring_buffer_event *rbe,
> > + void *event)
> > +{
> > + struct hist_field *operand1 = hist_field->operands[0];
> > + struct hist_field *operand2 = hist_field->operands[1];
> > +
> > + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> > +
> > + /*
> > + * If the divisor is a constant, do a multiplication and shift instead.
> > + *
> > + * Choose Z = some power of 2. If Y <= Z, then:
> > + * X / Y = (X * (Z / Y)) / Z
> > + *
> > + * (Z / Y) is a constant (mult) which is calculated at parse time, so:
> > + * X / Y = (X * mult) / Z
> > + *
> > + * The division by Z can be replaced by a shift since Z is a power of 2:
> > + * X / Y = (X * mult) >> HIST_DIV_SHIFT
> > + *
> > + * As long, as X < Z the results will not be off by more than 1.
> > + */
> > + if (val1 < (1 << HIST_DIV_SHIFT)) {
> > + u64 mult = operand2->div_multiplier;
> > +
> > + return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT;
> > + } else {
> > + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
> > +
> > + return div64_u64(val1, val2);
> > + }
> > +}
> > +
> > static u64 hist_field_mult(struct hist_field *hist_field,
> > struct tracing_map_elt *elt,
> > struct trace_buffer *buffer,
> > @@ -573,6 +643,37 @@ struct snapshot_context {
> > void *key;
> > };
> >
> > +
> > +static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
> > + const char *var_name);
> > +
> > +/*
> > + * Returns the specific division function to use if the divisor
> > + * is constant. This avoids extra branches when the trigger is hit.
> > + */
> > +static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
> > +{
> > + u64 div;
> > +
> > + if (divisor->flags & HIST_FIELD_FL_VAR_REF) {
> > + struct hist_field *var;
> > +
> > + var = find_var_field(divisor->var.hist_data, divisor->name);
> > + div = var->constant;
> > + } else
> > + div = divisor->constant;
> > +
> > + if (!(div & (div - 1)))
> > + return div_by_power_of_two;
> > +
> > + /* If the divisor is too large, do a regular division */
> > + if (div > (1 << HIST_DIV_SHIFT))
> > + return div_by_not_power_of_two;
> > +
> > + divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
> > + return div_by_mult_and_shift;
> > +}
> > +
> > static void track_data_free(struct track_data *track_data)
> > {
> > struct hist_elt_data *elt_data;
> > @@ -2575,6 +2676,20 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
> > expr->operands[0] = operand1;
> > expr->operands[1] = operand2;
> >
> > +
> > + if (field_op == FIELD_OP_DIV &&
> > + operand2_flags & HIST_FIELD_FL_CONST) {
> > + u64 divisor = (var2) ? var2->constant : operand2->constant;
> > +
> > + if (!divisor) {
> > + hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
> > + ret = -EDOM;
> > + goto free;
> > + }
> > +
> > + op_fn = hist_field_get_div_fn(operand2);
> > + }
> > +
> > if (combine_consts) {
> > if (var1)
> > expr->operands[0] = var1;
>
On Fri, Oct 29, 2021 at 11:53 AM Kalesh Singh <[email protected]> wrote:
>
> On Fri, Oct 29, 2021 at 11:45 AM Steven Rostedt <[email protected]> wrote:
> >
> > On Fri, 29 Oct 2021 11:33:27 -0700
> > Kalesh Singh <[email protected]> wrote:
> >
> > > If the divisor is a constant use specific division functions to
> > > avoid extra branches when the trigger is hit.
> > >
> > > If the divisor constant but not a power of 2, the division can be
> > > replaced with a multiplication and shift in the following case:
> > >
> > > Let X = dividend and Y = divisor.
> > >
> > > Choose Z = some power of 2. If Y <= Z, then:
> > > X / Y = (X * (Z / Y)) / Z
> > >
> > > (Z / Y) is a constant (mult) which is calculated at parse time, so:
> > > X / Y = (X * mult) / Z
> > >
> > > The division by Z can be replaced by a shift since Z is a power of 2:
> > > X / Y = (X * mult) >> shift
> > >
> > > As long, as X < Z the results will not be off by more than 1.
> > >
> > > Signed-off-by: Kalesh Singh <[email protected]>
> > > Suggested-by: Steven Rostedt <[email protected]>
> > > ---
> > >
> > > Changes in v2:
> > > - Return -EDOM if divisor is a constant and zero, per Steve
> > >
> > > kernel/trace/trace_events_hist.c | 117 ++++++++++++++++++++++++++++++-
> > > 1 file changed, 116 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
> > > index 364cb3091789..1084aa41f047 100644
> > > --- a/kernel/trace/trace_events_hist.c
> > > +++ b/kernel/trace/trace_events_hist.c
> > > @@ -68,7 +68,8 @@
> > > C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
> > > C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \
> > > C(EXPECT_NUMBER, "Expecting numeric literal"), \
> > > - C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"),
> > > + C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \
> > > + C(DIVISION_BY_ZERO, "Division by zero"),
> > >
> > > #undef C
> > > #define C(a, b) HIST_ERR_##a
> > > @@ -92,6 +93,7 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field,
> > > #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
> > > #define HIST_ACTIONS_MAX 8
> > > #define HIST_CONST_DIGITS_MAX 21
> > > +#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */
> > >
> > > enum field_op_id {
> > > FIELD_OP_NONE,
> > > @@ -160,6 +162,8 @@ struct hist_field {
> > >
> > > /* Numeric literals are represented as u64 */
> > > u64 constant;
> > > + /* Used to optimize division by constants */
> > > + u64 div_multiplier;
> > > };
> > >
> > > static u64 hist_field_none(struct hist_field *field,
> > > @@ -311,6 +315,72 @@ static u64 hist_field_div(struct hist_field *hist_field,
> > > return div64_u64(val1, val2);
> > > }
> > >
> > > +static u64 div_by_power_of_two(struct hist_field *hist_field,
> > > + struct tracing_map_elt *elt,
> > > + struct trace_buffer *buffer,
> > > + struct ring_buffer_event *rbe,
> > > + void *event)
> > > +{
> > > + struct hist_field *operand1 = hist_field->operands[0];
> > > + struct hist_field *operand2 = hist_field->operands[1];
> > > +
> > > + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> > > + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
> >
> > If these functions are only called when val2 is constant, can't we make it
> > such that we get val2 from the hist_field directly? That is:
> >
> > u64 val2 = operand2->constant;
>
> operand2 might be a var ref to a constant, so we would need to resolve
> that with hist_field_var_ref().
>
> -Kalesh
>
> >
> > That would save us a function call, and an indirect on at that (that gets
> > slowed down by spectre).
So would it be adding something like below?
if (operand2->flags & HIST_FIELD_FL_CONST)
val2 = operand2->constant;
else
val2 = operand2->fn(operand2, elt, buffer, rbe, event);
Thanks,
Kalesh
> >
> > Same for the ones below.
> >
> > -- Steve
> >
> >
> > > +
> > > + return val1 >> __ffs64(val2);
> > > +}
> > > +
> > > +static u64 div_by_not_power_of_two(struct hist_field *hist_field,
> > > + struct tracing_map_elt *elt,
> > > + struct trace_buffer *buffer,
> > > + struct ring_buffer_event *rbe,
> > > + void *event)
> > > +{
> > > + struct hist_field *operand1 = hist_field->operands[0];
> > > + struct hist_field *operand2 = hist_field->operands[1];
> > > +
> > > + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> > > + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
> > > +
> > > + return div64_u64(val1, val2);
> > > +}
> > > +
> > > +static u64 div_by_mult_and_shift(struct hist_field *hist_field,
> > > + struct tracing_map_elt *elt,
> > > + struct trace_buffer *buffer,
> > > + struct ring_buffer_event *rbe,
> > > + void *event)
> > > +{
> > > + struct hist_field *operand1 = hist_field->operands[0];
> > > + struct hist_field *operand2 = hist_field->operands[1];
> > > +
> > > + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
> > > +
> > > + /*
> > > + * If the divisor is a constant, do a multiplication and shift instead.
> > > + *
> > > + * Choose Z = some power of 2. If Y <= Z, then:
> > > + * X / Y = (X * (Z / Y)) / Z
> > > + *
> > > + * (Z / Y) is a constant (mult) which is calculated at parse time, so:
> > > + * X / Y = (X * mult) / Z
> > > + *
> > > + * The division by Z can be replaced by a shift since Z is a power of 2:
> > > + * X / Y = (X * mult) >> HIST_DIV_SHIFT
> > > + *
> > > + * As long, as X < Z the results will not be off by more than 1.
> > > + */
> > > + if (val1 < (1 << HIST_DIV_SHIFT)) {
> > > + u64 mult = operand2->div_multiplier;
> > > +
> > > + return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT;
> > > + } else {
> > > + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event);
> > > +
> > > + return div64_u64(val1, val2);
> > > + }
> > > +}
> > > +
> > > static u64 hist_field_mult(struct hist_field *hist_field,
> > > struct tracing_map_elt *elt,
> > > struct trace_buffer *buffer,
> > > @@ -573,6 +643,37 @@ struct snapshot_context {
> > > void *key;
> > > };
> > >
> > > +
> > > +static struct hist_field *find_var_field(struct hist_trigger_data *hist_data,
> > > + const char *var_name);
> > > +
> > > +/*
> > > + * Returns the specific division function to use if the divisor
> > > + * is constant. This avoids extra branches when the trigger is hit.
> > > + */
> > > +static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
> > > +{
> > > + u64 div;
> > > +
> > > + if (divisor->flags & HIST_FIELD_FL_VAR_REF) {
> > > + struct hist_field *var;
> > > +
> > > + var = find_var_field(divisor->var.hist_data, divisor->name);
> > > + div = var->constant;
> > > + } else
> > > + div = divisor->constant;
> > > +
> > > + if (!(div & (div - 1)))
> > > + return div_by_power_of_two;
> > > +
> > > + /* If the divisor is too large, do a regular division */
> > > + if (div > (1 << HIST_DIV_SHIFT))
> > > + return div_by_not_power_of_two;
> > > +
> > > + divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
> > > + return div_by_mult_and_shift;
> > > +}
> > > +
> > > static void track_data_free(struct track_data *track_data)
> > > {
> > > struct hist_elt_data *elt_data;
> > > @@ -2575,6 +2676,20 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
> > > expr->operands[0] = operand1;
> > > expr->operands[1] = operand2;
> > >
> > > +
> > > + if (field_op == FIELD_OP_DIV &&
> > > + operand2_flags & HIST_FIELD_FL_CONST) {
> > > + u64 divisor = (var2) ? var2->constant : operand2->constant;
> > > +
> > > + if (!divisor) {
> > > + hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
> > > + ret = -EDOM;
> > > + goto free;
> > > + }
> > > +
> > > + op_fn = hist_field_get_div_fn(operand2);
> > > + }
> > > +
> > > if (combine_consts) {
> > > if (var1)
> > > expr->operands[0] = var1;
> >
On Fri, 29 Oct 2021 11:53:16 -0700
Kalesh Singh <[email protected]> wrote:
> > If these functions are only called when val2 is constant, can't we make it
> > such that we get val2 from the hist_field directly? That is:
> >
> > u64 val2 = operand2->constant;
>
> operand2 might be a var ref to a constant, so we would need to resolve
> that with hist_field_var_ref().
So can a var_ref change? If not, then we should convert that to a constant
for this operation.
-- Steve
On Fri, Oct 29, 2021 at 1:25 PM Steven Rostedt <[email protected]> wrote:
>
> On Fri, 29 Oct 2021 11:53:16 -0700
> Kalesh Singh <[email protected]> wrote:
>
> > > If these functions are only called when val2 is constant, can't we make it
> > > such that we get val2 from the hist_field directly? That is:
> > >
> > > u64 val2 = operand2->constant;
> >
> > operand2 might be a var ref to a constant, so we would need to resolve
> > that with hist_field_var_ref().
>
> So can a var_ref change? If not, then we should convert that to a constant
> for this operation.
A var ref to a constant won't change. I think we can just copy the
constant value to the var ref's hist field and then we'll be able to
get it from there instead of calling the fn() function. I'll post
another version with this.
Thanks,
Kalesh
>
> -- Steve
If the divisor is a constant use specific division functions to
avoid extra branches when the trigger is hit.
If the divisor constant but not a power of 2, the division can be
replaced with a multiplication and shift in the following case:
Let X = dividend and Y = divisor.
Choose Z = some power of 2. If Y <= Z, then:
X / Y = (X * (Z / Y)) / Z
(Z / Y) is a constant (mult) which is calculated at parse time, so:
X / Y = (X * mult) / Z
The division by Z can be replaced by a shift since Z is a power of 2:
X / Y = (X * mult) >> shift
As long, as X < Z the results will not be off by more than 1.
Signed-off-by: Kalesh Singh <[email protected]>
Suggested-by: Steven Rostedt <[email protected]>
---
Changes in v3:
- Avoid indirect function calls for retrieving const divisor value.
Changes in v2:
- Return -EDOM if divisor is a constant and zero.
kernel/trace/trace_events_hist.c | 105 ++++++++++++++++++++++++++++++-
1 file changed, 104 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 364cb3091789..7ccb6b3a1fb4 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -68,7 +68,8 @@
C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \
C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \
C(EXPECT_NUMBER, "Expecting numeric literal"), \
- C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"),
+ C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \
+ C(DIVISION_BY_ZERO, "Division by zero"),
#undef C
#define C(a, b) HIST_ERR_##a
@@ -92,6 +93,7 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field,
#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX)
#define HIST_ACTIONS_MAX 8
#define HIST_CONST_DIGITS_MAX 21
+#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */
enum field_op_id {
FIELD_OP_NONE,
@@ -160,6 +162,8 @@ struct hist_field {
/* Numeric literals are represented as u64 */
u64 constant;
+ /* Used to optimize division by constants */
+ u64 div_multiplier;
};
static u64 hist_field_none(struct hist_field *field,
@@ -311,6 +315,68 @@ static u64 hist_field_div(struct hist_field *hist_field,
return div64_u64(val1, val2);
}
+static u64 div_by_power_of_two(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+ return val1 >> __ffs64(operand2->constant);
+}
+
+static u64 div_by_not_power_of_two(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+ return div64_u64(val1, operand2->constant);
+}
+
+static u64 div_by_mult_and_shift(struct hist_field *hist_field,
+ struct tracing_map_elt *elt,
+ struct trace_buffer *buffer,
+ struct ring_buffer_event *rbe,
+ void *event)
+{
+ struct hist_field *operand1 = hist_field->operands[0];
+ struct hist_field *operand2 = hist_field->operands[1];
+
+ u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event);
+
+ /*
+ * If the divisor is a constant, do a multiplication and shift instead.
+ *
+ * Choose Z = some power of 2. If Y <= Z, then:
+ * X / Y = (X * (Z / Y)) / Z
+ *
+ * (Z / Y) is a constant (mult) which is calculated at parse time, so:
+ * X / Y = (X * mult) / Z
+ *
+ * The division by Z can be replaced by a shift since Z is a power of 2:
+ * X / Y = (X * mult) >> HIST_DIV_SHIFT
+ *
+ * As long, as X < Z the results will not be off by more than 1.
+ */
+ if (val1 < (1 << HIST_DIV_SHIFT)) {
+ u64 mult = operand2->div_multiplier;
+
+ return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT;
+ }
+
+ return div64_u64(val1, operand2->constant);
+}
+
static u64 hist_field_mult(struct hist_field *hist_field,
struct tracing_map_elt *elt,
struct trace_buffer *buffer,
@@ -573,6 +639,25 @@ struct snapshot_context {
void *key;
};
+/*
+ * Returns the specific division function to use if the divisor
+ * is constant. This avoids extra branches when the trigger is hit.
+ */
+static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor)
+{
+ u64 div = divisor->constant;
+
+ if (!(div & (div - 1)))
+ return div_by_power_of_two;
+
+ /* If the divisor is too large, do a regular division */
+ if (div > (1 << HIST_DIV_SHIFT))
+ return div_by_not_power_of_two;
+
+ divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div);
+ return div_by_mult_and_shift;
+}
+
static void track_data_free(struct track_data *track_data)
{
struct hist_elt_data *elt_data;
@@ -2575,6 +2660,24 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
expr->operands[0] = operand1;
expr->operands[1] = operand2;
+ if (field_op == FIELD_OP_DIV &&
+ operand2_flags & HIST_FIELD_FL_CONST) {
+ u64 divisor = var2 ? var2->constant : operand2->constant;
+
+ if (!divisor) {
+ hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str));
+ ret = -EDOM;
+ goto free;
+ }
+
+ /*
+ * Copy the divisor here so we don't have to look it up
+ * later if this is a var ref
+ */
+ operand2->constant = divisor;
+ op_fn = hist_field_get_div_fn(operand2);
+ }
+
if (combine_consts) {
if (var1)
expr->operands[0] = var1;
--
2.33.1.1089.g2158813163f-goog