2024-04-12 21:02:39

by Abhishek Chauhan

[permalink] [raw]
Subject: [RFC PATCH bpf-next v3 2/2] net: Add additional bit to support userspace timestamp type

tstamp_type can be real, mono or userspace timestamp.

This commit adds userspace timestamp and sets it if there is
valid transmit_time available in socket coming from userspace.

To make the design scalable for future needs this commit bring in
the change to extend the tstamp_type:1 to tstamp_type:2 to support
userspace timestamp.

Link: https://lore.kernel.org/netdev/[email protected]/
Signed-off-by: Abhishek Chauhan <[email protected]>
---
Changes since v2
- Minor changes to commit subject

Changes since v1
- identified additional changes in BPF framework.
- Bit shift in SKB_MONO_DELIVERY_TIME_MASK and TC_AT_INGRESS_MASK.
- Made changes in skb_set_delivery_time to keep changes similar to
previous code for mono_delivery_time and just setting tstamp_type
bit 1 for userspace timestamp.


include/linux/skbuff.h | 19 +++++++++++++++----
net/ipv4/ip_output.c | 2 +-
net/ipv4/raw.c | 2 +-
net/ipv6/ip6_output.c | 2 +-
net/ipv6/raw.c | 2 +-
net/packet/af_packet.c | 7 +++----
.../selftests/bpf/prog_tests/ctx_rewrite.c | 8 ++++----
7 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a83a2120b57f..b6346c21c3d4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -827,7 +827,8 @@ enum skb_tstamp_type {
* @tstamp_type: When set, skb->tstamp has the
* delivery_time in mono clock base (i.e. EDT). Otherwise, the
* skb->tstamp has the (rcv) timestamp at ingress and
- * delivery_time at egress.
+ * delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
+ * coming from userspace
* @napi_id: id of the NAPI struct this skb came from
* @sender_cpu: (aka @napi_id) source CPU in XPS
* @alloc_cpu: CPU which did the skb allocation.
@@ -955,7 +956,7 @@ struct sk_buff {
/* private: */
__u8 __mono_tc_offset[0];
/* public: */
- __u8 tstamp_type:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
+ __u8 tstamp_type:2; /* See SKB_MONO_DELIVERY_TIME_MASK */
#ifdef CONFIG_NET_XGRESS
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
__u8 tc_skip_classify:1;
@@ -1090,10 +1091,10 @@ struct sk_buff {
*/
#ifdef __BIG_ENDIAN_BITFIELD
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
-#define TC_AT_INGRESS_MASK (1 << 6)
+#define TC_AT_INGRESS_MASK (1 << 5)
#else
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
-#define TC_AT_INGRESS_MASK (1 << 1)
+#define TC_AT_INGRESS_MASK (1 << 2)
#endif
#define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)

@@ -4262,6 +4263,16 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
case CLOCK_MONO:
skb->tstamp_type = kt && tstamp_type;
break;
+ /* if any other time base, must be from userspace
+ * so set userspace tstamp_type bit
+ * See skbuff tstamp_type:2
+ * 0x0 => real timestamp_type
+ * 0x1 => mono timestamp_type
+ * 0x2 => timestamp_type set from userspace
+ */
+ default:
+ if (kt && tstamp_type)
+ skb->tstamp_type = 0x2;
}
}

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 62e457f7c02c..c9317d4addce 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1457,7 +1457,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,

skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
skb->mark = cork->mark;
- skb->tstamp = cork->transmit_time;
+ skb_set_delivery_time(skb, cork->transmit_time, sk->sk_clockid);
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
* on dst refcount
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dcb11f22cbf2..bbc46a40c8b6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->protocol = htons(ETH_P_IP);
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);
skb_dst_set(skb, &rt->dst);
*rtp = NULL;

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a9e819115622..0b8193bdd98f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1924,7 +1924,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,

skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = cork->base.mark;
- skb->tstamp = cork->base.transmit_time;
+ skb_set_delivery_time(skb, cork->base.transmit_time, sk->sk_clockid);

ip6_cork_steal_dst(skb, cork);
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0d896ca7b589..625f3a917e50 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);

skb_put(skb, length);
skb_reset_network_header(skb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8c6d3fbb4ed8..356c96f23370 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2056,8 +2056,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
skb->dev = dev;
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = READ_ONCE(sk->sk_mark);
- skb->tstamp = sockc.transmit_time;
-
+ skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);
skb_setup_tx_timestamp(skb, sockc.tsflags);

if (unlikely(extra_len == 4))
@@ -2585,7 +2584,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->dev = dev;
skb->priority = READ_ONCE(po->sk.sk_priority);
skb->mark = READ_ONCE(po->sk.sk_mark);
- skb->tstamp = sockc->transmit_time;
+ skb_set_delivery_time(skb, sockc->transmit_time, po->sk.sk_clockid);
skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);

@@ -3063,7 +3062,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->dev = dev;
skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark;
- skb->tstamp = sockc.transmit_time;
+ skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);

if (unlikely(extra_len == 4))
skb->no_fcs = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
index 3b7c57fe55a5..d7f58d9671f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -69,15 +69,15 @@ static struct test_case test_cases[] = {
{
N(SCHED_CLS, struct __sk_buff, tstamp),
.read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
- "w11 &= 3;"
- "if w11 != 0x3 goto pc+2;"
+ "w11 &= 5;"
+ "if w11 != 0x5 goto pc+2;"
"$dst = 0;"
"goto pc+1;"
"$dst = *(u64 *)($ctx + sk_buff::tstamp);",
.write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
- "if w11 & 0x2 goto pc+1;"
+ "if w11 & 0x4 goto pc+1;"
"goto pc+2;"
- "w11 &= -2;"
+ "w11 &= -4;"
"*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
"*(u64 *)($ctx + sk_buff::tstamp) = $src;",
},
--
2.25.1



2024-04-13 19:07:21

by Willem de Bruijn

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v3 2/2] net: Add additional bit to support userspace timestamp type

Abhishek Chauhan wrote:
> tstamp_type can be real, mono or userspace timestamp.
>
> This commit adds userspace timestamp and sets it if there is
> valid transmit_time available in socket coming from userspace.

Comment is outdated: we now set the actual clockid_t (compressed
into fewer bits), rather than an abstract "go see sk_clockid".

> To make the design scalable for future needs this commit bring in
> the change to extend the tstamp_type:1 to tstamp_type:2 to support
> userspace timestamp.
>
> Link: https://lore.kernel.org/netdev/[email protected]/
> Signed-off-by: Abhishek Chauhan <[email protected]>
> ---
> Changes since v2
> - Minor changes to commit subject
>
> Changes since v1
> - identified additional changes in BPF framework.
> - Bit shift in SKB_MONO_DELIVERY_TIME_MASK and TC_AT_INGRESS_MASK.
> - Made changes in skb_set_delivery_time to keep changes similar to
> previous code for mono_delivery_time and just setting tstamp_type
> bit 1 for userspace timestamp.
>
>
> include/linux/skbuff.h | 19 +++++++++++++++----
> net/ipv4/ip_output.c | 2 +-
> net/ipv4/raw.c | 2 +-
> net/ipv6/ip6_output.c | 2 +-
> net/ipv6/raw.c | 2 +-
> net/packet/af_packet.c | 7 +++----
> .../selftests/bpf/prog_tests/ctx_rewrite.c | 8 ++++----
> 7 files changed, 26 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index a83a2120b57f..b6346c21c3d4 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -827,7 +827,8 @@ enum skb_tstamp_type {
> * @tstamp_type: When set, skb->tstamp has the
> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
> * skb->tstamp has the (rcv) timestamp at ingress and
> - * delivery_time at egress.
> + * delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
> + * coming from userspace
> * @napi_id: id of the NAPI struct this skb came from
> * @sender_cpu: (aka @napi_id) source CPU in XPS
> * @alloc_cpu: CPU which did the skb allocation.
> @@ -955,7 +956,7 @@ struct sk_buff {
> /* private: */
> __u8 __mono_tc_offset[0];
> /* public: */
> - __u8 tstamp_type:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
> + __u8 tstamp_type:2; /* See SKB_MONO_DELIVERY_TIME_MASK */
> #ifdef CONFIG_NET_XGRESS
> __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
> __u8 tc_skip_classify:1;

A quick pahole for a fairly standard .config that I had laying around
shows a hole after this list of bits, so no huge concerns there from
adding a bit:

__u8 slow_gro:1; /* 3: 4 1 */
__u8 csum_not_inet:1; /* 3: 5 1 */

/* XXX 2 bits hole, try to pack */

__u16 tc_index; /* 4 2 */

> @@ -1090,10 +1091,10 @@ struct sk_buff {
> */
> #ifdef __BIG_ENDIAN_BITFIELD
> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
> -#define TC_AT_INGRESS_MASK (1 << 6)
> +#define TC_AT_INGRESS_MASK (1 << 5)

Have to be careful when adding a new 2 bit tstamp_type with both bits
set, that this does not incorrectly get interpreted as MONO.

I haven't looked closely at the BPF API, but hopefully it can be
extensible to return the specific type. If it is hardcoded to return
either MONO or not, then only 0x1 should match, not 0x3.

> #else
> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
> -#define TC_AT_INGRESS_MASK (1 << 1)
> +#define TC_AT_INGRESS_MASK (1 << 2)
> #endif
> #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
>
> @@ -4262,6 +4263,16 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
> case CLOCK_MONO:

Come to think of it, these CLOCK_* names are too generic and shadow
existing ones like CLOCK_MONOTONIC.

Instead, define SKB_CLOCK_.

> skb->tstamp_type = kt && tstamp_type;
> break;
> + /* if any other time base, must be from userspace
> + * so set userspace tstamp_type bit
> + * See skbuff tstamp_type:2
> + * 0x0 => real timestamp_type
> + * 0x1 => mono timestamp_type
> + * 0x2 => timestamp_type set from userspace
> + */
> + default:
> + if (kt && tstamp_type)
> + skb->tstamp_type = 0x2;

Needs a constant.

Plan is to add SKB_CLOCK_TAI, rather than SKB_CLOCK_USER that
requires a further lookup to sk_clockid.

> }
> }
>
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 62e457f7c02c..c9317d4addce 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -1457,7 +1457,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
>
> skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
> skb->mark = cork->mark;
> - skb->tstamp = cork->transmit_time;
> + skb_set_delivery_time(skb, cork->transmit_time, sk->sk_clockid);

If adding 1 or 2 specific clock types, like SKB_CLOCK_TAI, then
skb_set_delivery_time will have to detect unsupported sk_clockid
values and fail for those.

The function does not return an error, so just fail to set the
delivery time and WARN_ONCE.



2024-04-15 20:19:53

by Martin KaFai Lau

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v3 2/2] net: Add additional bit to support userspace timestamp type

On 4/13/24 12:07 PM, Willem de Bruijn wrote:
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index a83a2120b57f..b6346c21c3d4 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -827,7 +827,8 @@ enum skb_tstamp_type {
>> * @tstamp_type: When set, skb->tstamp has the
>> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
>> * skb->tstamp has the (rcv) timestamp at ingress and
>> - * delivery_time at egress.
>> + * delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
>> + * coming from userspace
>> * @napi_id: id of the NAPI struct this skb came from
>> * @sender_cpu: (aka @napi_id) source CPU in XPS
>> * @alloc_cpu: CPU which did the skb allocation.
>> @@ -955,7 +956,7 @@ struct sk_buff {
>> /* private: */
>> __u8 __mono_tc_offset[0];
>> /* public: */
>> - __u8 tstamp_type:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
>> + __u8 tstamp_type:2; /* See SKB_MONO_DELIVERY_TIME_MASK */
>> #ifdef CONFIG_NET_XGRESS
>> __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
>> __u8 tc_skip_classify:1;
>
> A quick pahole for a fairly standard .config that I had laying around
> shows a hole after this list of bits, so no huge concerns there from
> adding a bit:
>
> __u8 slow_gro:1; /* 3: 4 1 */
> __u8 csum_not_inet:1; /* 3: 5 1 */
>
> /* XXX 2 bits hole, try to pack */
>
> __u16 tc_index; /* 4 2 */
>
>> @@ -1090,10 +1091,10 @@ struct sk_buff {
>> */
>> #ifdef __BIG_ENDIAN_BITFIELD
>> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
>> -#define TC_AT_INGRESS_MASK (1 << 6)
>> +#define TC_AT_INGRESS_MASK (1 << 5)
>
> Have to be careful when adding a new 2 bit tstamp_type with both bits
> set, that this does not incorrectly get interpreted as MONO.
>
> I haven't looked closely at the BPF API, but hopefully it can be
> extensible to return the specific type. If it is hardcoded to return
> either MONO or not, then only 0x1 should match, not 0x3.

Good point. I believe it is the best to have bpf to consider both bits in
tstamp_type:2 in filter.c to avoid the 0x3 surprise in the future. The BPF API
can be extended to support SKB_CLOCK_TAI.

Regardless, in bpf_convert_tstamp_write(), it still needs to clear both bits in
tstamp_type when it is at ingress. Right now it only clears the mono bit.

Then it may as well consider both tstamp_type:2 bits in
bpf_convert_tstamp_read() and bpf_convert_tstamp_type_read(). e.g.
bpf_convert_tstamp_type_read(), it should be a pretty straight forward change
because the SKB_CLOCK_* enum value should be a 1:1 mapping to the BPF_SKB_TSTAMP_*.

>
>> #else
>> #define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
>> -#define TC_AT_INGRESS_MASK (1 << 1)
>> +#define TC_AT_INGRESS_MASK (1 << 2)
>> #endif
>> #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset)
>>


2024-04-16 23:41:28

by Abhishek Chauhan

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v3 2/2] net: Add additional bit to support userspace timestamp type



On 4/15/2024 1:00 PM, Martin KaFai Lau wrote:
> On 4/13/24 12:07 PM, Willem de Bruijn wrote:
>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>>> index a83a2120b57f..b6346c21c3d4 100644
>>> --- a/include/linux/skbuff.h
>>> +++ b/include/linux/skbuff.h
>>> @@ -827,7 +827,8 @@ enum skb_tstamp_type {
>>>    *    @tstamp_type: When set, skb->tstamp has the
>>>    *        delivery_time in mono clock base (i.e. EDT).  Otherwise, the
>>>    *        skb->tstamp has the (rcv) timestamp at ingress and
>>> - *        delivery_time at egress.
>>> + *        delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
>>> + *        coming from userspace
>>>    *    @napi_id: id of the NAPI struct this skb came from
>>>    *    @sender_cpu: (aka @napi_id) source CPU in XPS
>>>    *    @alloc_cpu: CPU which did the skb allocation.
>>> @@ -955,7 +956,7 @@ struct sk_buff {
>>>       /* private: */
>>>       __u8            __mono_tc_offset[0];
>>>       /* public: */
>>> -    __u8            tstamp_type:1;    /* See SKB_MONO_DELIVERY_TIME_MASK */
>>> +    __u8            tstamp_type:2;    /* See SKB_MONO_DELIVERY_TIME_MASK */
>>>   #ifdef CONFIG_NET_XGRESS
>>>       __u8            tc_at_ingress:1;    /* See TC_AT_INGRESS_MASK */
>>>       __u8            tc_skip_classify:1;
>>
>> A quick pahole for a fairly standard .config that I had laying around
>> shows a hole after this list of bits, so no huge concerns there from
>> adding a bit:
>>
>>             __u8               slow_gro:1;           /*     3: 4  1 */
>>             __u8               csum_not_inet:1;      /*     3: 5  1 */
>>
>>             /* XXX 2 bits hole, try to pack */
>>
>>             __u16              tc_index;             /*     4     2 */
>>
>>> @@ -1090,10 +1091,10 @@ struct sk_buff {
>>>    */
>>>   #ifdef __BIG_ENDIAN_BITFIELD
>>>   #define SKB_MONO_DELIVERY_TIME_MASK    (1 << 7)
>>> -#define TC_AT_INGRESS_MASK        (1 << 6)
>>> +#define TC_AT_INGRESS_MASK        (1 << 5)
>>
>> Have to be careful when adding a new 2 bit tstamp_type with both bits
>> set, that this does not incorrectly get interpreted as MONO.
>>
>> I haven't looked closely at the BPF API, but hopefully it can be
>> extensible to return the specific type. If it is hardcoded to return
>> either MONO or not, then only 0x1 should match, not 0x3.
>
> Good point. I believe it is the best to have bpf to consider both bits in tstamp_type:2 in filter.c to avoid the 0x3 surprise in the future. The BPF API can be extended to support SKB_CLOCK_TAI.
>
> Regardless, in bpf_convert_tstamp_write(), it still needs to clear both bits in tstamp_type when it is at ingress. Right now it only clears the mono bit.
>
> Then it may as well consider both tstamp_type:2 bits in bpf_convert_tstamp_read() and bpf_convert_tstamp_type_read(). e.g. bpf_convert_tstamp_type_read(), it should be a pretty straight forward change because the SKB_CLOCK_* enum value should be a 1:1 mapping to the BPF_SKB_TSTAMP_*.
>
>>
>>>   #else
>>>   #define SKB_MONO_DELIVERY_TIME_MASK    (1 << 0)
>>> -#define TC_AT_INGRESS_MASK        (1 << 1)
>>> +#define TC_AT_INGRESS_MASK        (1 << 2)
>>>   #endif
>>>   #define SKB_BF_MONO_TC_OFFSET        offsetof(struct sk_buff, __mono_tc_offset)
>>>  
>
Hi Martin and Willem,

I have made the changes as per your guidelines . I will be raising RFC patch bpf-next v4 soon. Giving you heads up of the changes i am bringing in the BPF code. If you feel i have done something incorrectly, please do correct me here.
I apologies for adding the code here and making the content of the email huge for other upstream reviewers.

//Introduce a new BPF tstamp type mask in bpf.h

#ifdef __BIG_ENDIAN_BITFIELD
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7)
+ #define SKB_TAI_DELIVERY_TIME_MASK (1 << 6) (new)
#define TC_AT_INGRESS_MASK (1 << 5)
#else
#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0)
+ #define SKB_TAI_DELIVERY_TIME_MASK (1 << 1) (new)
#define TC_AT_INGRESS_MASK (1 << 2)
#endif

//changes in the filter.c (bpf_convert_tstamp_{read,write}, bpf_convert_tstamp_type_read())code are accordingly taken care since now we have 3 bits instead of 2

Value 3 => unspec
Value 2 => tai
Value 1 => mono

static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
struct bpf_insn *insn)
{
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;
/* AX is needed because src_reg and dst_reg could be the same */
__u8 tmp_reg = BPF_REG_AX;

*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
SKB_BF_MONO_TC_OFFSET);
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2); <== check for both bits are set (if so make it tstamp_unspec)
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK, 3); <== if mono is set then its mono base and jump to 3rd instruction from here.
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
SKB_TAI_DELIVERY_TIME_MASK, 4); <== if tai is set then its tai base and jump to 4th instruction from here.
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
*insn++ = BPF_JMP_A(1);
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
*insn++ = BPF_JMP_A(1);
*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_TAI);

return insn;
}

//Values 7, 6 and 5 as input will set the value reg to 0
//otherwise the skb_reg has correct configuration and will store the content in value reg.

static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
const struct bpf_insn *si,
struct bpf_insn *insn)
{
__u8 value_reg = si->dst_reg;
__u8 skb_reg = si->src_reg;

#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, read skb->tstamp as is if tstamp_type_access is true.
*/
if (!prog->tstamp_type_access) {
/* AX is needed because src_reg and dst_reg could be the same */
__u8 tmp_reg = BPF_REG_AX;

*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
/*check if all three bits are set*/
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
SKB_TAI_DELIVERY_TIME_MASK); <== check if all 3 bits are set which is value 7
/*if all 3 bits are set jump 3 instructions and clear the register */
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
SKB_TAI_DELIVERY_TIME_MASK, 4); <== if all 3 bits are set then value reg is set to 0
/*Now check Mono is set with ingress mask if so clear*/
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 3); <== check if value is 5 (mono + ingress) if so then value reg is set to 0
/*Now Check tai is set with ingress mask if so clear*/
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
TC_AT_INGRESS_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2); <== check if value is 6 (tai + ingress) if so then value reg is set to 0
/*Now Check tai and mono are set if so clear */
*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
SKB_MONO_DELIVERY_TIME_MASK | SKB_TAI_DELIVERY_TIME_MASK, 1); <== check if value 3 (tai + mono) if so then value reg is set to 0
/* goto <store> */
*insn++ = BPF_JMP_A(2);
/* skb->tc_at_ingress && skb->tstamp_type:1,
* read 0 as the (rcv) timestamp.
*/
*insn++ = BPF_MOV64_IMM(value_reg, 0);
*insn++ = BPF_JMP_A(1);
}
#endif

*insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
offsetof(struct sk_buff, tstamp));
return insn;
}

//this was pretty straight forward
//if ingress mask is set just go ahead and unset both tai and mono delivery time.

static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
const struct bpf_insn *si,
struct bpf_insn *insn)
{
__u8 value_reg = si->src_reg;
__u8 skb_reg = si->dst_reg;

#ifdef CONFIG_NET_XGRESS
/* If the tstamp_type is read,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
* Otherwise, writing at ingress will have to clear the
* mono_delivery_time (skb->tstamp_type:1)bit also.
*/
if (!prog->tstamp_type_access) {
__u8 tmp_reg = BPF_REG_AX;

*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
/* Writing __sk_buff->tstamp as ingress, goto <clear> */
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
/* goto <store> */
*insn++ = BPF_JMP_A(3);
/* <clear>: mono_delivery_time or (skb->tstamp_type:1) */
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
/* <clear>: tai delivery_time or (skb->tstamp_type:2) */ (new)
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TAI_DELIVERY_TIME_MASK); (new) <== reset tai delivery mask if ingress bit is set.
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
}
#endif

/* <store>: skb->tstamp = tstamp */
*insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_DW | BPF_MEM,
skb_reg, value_reg, offsetof(struct sk_buff, tstamp), si->imm);
return insn;


And the ctx_rewrite will be as follows

.read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
"w11 &= 7;"
"if w11 == 0x7 goto pc+4;"
"if w11 == 0x5 goto pc+3;"
"if w11 == 0x6 goto pc+2;"
"if w11 == 0x3 goto pc+1;"
"goto pc+2"
"$dst = 0;"
"goto pc+1;"
"$dst = *(u64 *)($ctx + sk_buff::tstamp);",
.write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
"if w11 & 0x4 goto pc+1;"
"goto pc+3;"
"w11 &= -2;"
"w11 &= -3;"
"*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
"*(u64 *)($ctx + sk_buff::tstamp) = $src;",