2024-04-18 00:44:10

by Abhishek Chauhan

[permalink] [raw]
Subject: [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty

mono_delivery_time was added to check if skb->tstamp has delivery
time in mono clock base (i.e. EDT) otherwise skb->tstamp has
timestamp in ingress and delivery_time at egress.

Renaming the bitfield from mono_delivery_time to tstamp_type is for
extensibilty for other timestamps such as userspace timestamp
(i.e. SO_TXTIME) set via sock opts.

As we are renaming the mono_delivery_time to tstamp_type, it makes
sense to start assigning tstamp_type based on enum defined
in this commit.

Earlier we used bool arg flag to check if the tstamp is mono in
function skb_set_delivery_time, Now the signature of the functions
accepts tstamp_type to distinguish between mono and real time.

In future tstamp_type:1 can be extended to support userspace timestamp
by increasing the bitfield.

Link: https://lore.kernel.org/netdev/[email protected]/
Signed-off-by: Abhishek Chauhan <[email protected]>
---
Changes since v3
- Fixed inconsistent capitalization in skbuff.h
- remove reference to MONO_DELIVERY_TIME_MASK in skbuff.h
and point it to skb_tstamp_type now.
- Explicitely setting SKB_CLOCK_MONO if valid transmit_time
ip_send_unicast_reply
- Keeping skb_tstamp inline with skb_clear_tstamp.
- skb_set_delivery_time checks if timstamp is 0 and
sets the tstamp_type to SKB_CLOCK_REAL.
- Above comments are given by Willem
- Found out that skbuff.h has access to uapi/linux/time.h
So now instead of using CLOCK_REAL/CLOCK_MONO
i am checking actual clockid_t directly to set tstamp_type
example:- CLOCK_REALTIME/CLOCK_MONOTONIC
- Compilation error fixed in
net/ieee802154/6lowpan/reassembly.c

Changes since v2
- Minor changes to commit subject

Changes since v1
- Squashed the two commits into one as mentioned by Willem.
- Introduced switch in skb_set_delivery_time.
- Renamed and removed directionality aspects w.r.t tstamp_type
as mentioned by Willem.

include/linux/skbuff.h | 44 +++++++++++++++++-----
include/net/inet_frag.h | 4 +-
net/bridge/netfilter/nf_conntrack_bridge.c | 6 +--
net/core/dev.c | 2 +-
net/core/filter.c | 10 ++---
net/ieee802154/6lowpan/reassembly.c | 2 +-
net/ipv4/inet_fragment.c | 2 +-
net/ipv4/ip_fragment.c | 2 +-
net/ipv4/ip_output.c | 9 +++--
net/ipv4/tcp_output.c | 14 +++----
net/ipv6/ip6_output.c | 6 +--
net/ipv6/netfilter.c | 6 +--
net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
net/ipv6/reassembly.c | 2 +-
net/ipv6/tcp_ipv6.c | 2 +-
net/sched/act_bpf.c | 4 +-
net/sched/cls_bpf.c | 4 +-
17 files changed, 73 insertions(+), 48 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index aded4949ea79..2e7811c7e4db 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -702,6 +702,17 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif

+/**
+ * tstamp_type:1 can take 2 values each
+ * represented by time base in skb
+ * 0x0 => real timestamp_type
+ * 0x1 => mono timestamp_type
+ */
+enum skb_tstamp_type {
+ SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
+ SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
+};
+
/**
* DOC: Basic sk_buff geometry
*
@@ -819,7 +830,7 @@ typedef unsigned char *sk_buff_data_t;
* @dst_pending_confirm: need to confirm neighbour
* @decrypted: Decrypted SKB
* @slow_gro: state present at GRO time, slower prepare step required
- * @mono_delivery_time: When set, skb->tstamp has the
+ * @tstamp_type: When set, skb->tstamp has the
* delivery_time in mono clock base (i.e. EDT). Otherwise, the
* skb->tstamp has the (rcv) timestamp at ingress and
* delivery_time at egress.
@@ -950,7 +961,7 @@ struct sk_buff {
/* private: */
__u8 __mono_tc_offset[0];
/* public: */
- __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */
+ __u8 tstamp_type:1; /* See SKB_CLOCK_*_MASK */
#ifdef CONFIG_NET_XGRESS
__u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */
__u8 tc_skip_classify:1;
@@ -4170,7 +4181,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb,
static inline void __net_timestamp(struct sk_buff *skb)
{
skb->tstamp = ktime_get_real();
- skb->mono_delivery_time = 0;
+ skb->tstamp_type = SKB_CLOCK_REAL;
}

static inline ktime_t net_timedelta(ktime_t t)
@@ -4179,10 +4190,23 @@ static inline ktime_t net_timedelta(ktime_t t)
}

static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
- bool mono)
+ u8 tstamp_type)
{
skb->tstamp = kt;
- skb->mono_delivery_time = kt && mono;
+
+ if (!kt) {
+ skb->tstamp_type = SKB_CLOCK_REAL;
+ return;
+ }
+
+ switch (tstamp_type) {
+ case CLOCK_REALTIME:
+ skb->tstamp_type = SKB_CLOCK_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ skb->tstamp_type = SKB_CLOCK_MONO;
+ break;
+ }
}

DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
@@ -4192,8 +4216,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key);
*/
static inline void skb_clear_delivery_time(struct sk_buff *skb)
{
- if (skb->mono_delivery_time) {
- skb->mono_delivery_time = 0;
+ if (skb->tstamp_type) {
+ skb->tstamp_type = SKB_CLOCK_REAL;
if (static_branch_unlikely(&netstamp_needed_key))
skb->tstamp = ktime_get_real();
else
@@ -4203,7 +4227,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb)

static inline void skb_clear_tstamp(struct sk_buff *skb)
{
- if (skb->mono_delivery_time)
+ if (skb->tstamp_type)
return;

skb->tstamp = 0;
@@ -4211,7 +4235,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb)

static inline ktime_t skb_tstamp(const struct sk_buff *skb)
{
- if (skb->mono_delivery_time)
+ if (skb->tstamp_type)
return 0;

return skb->tstamp;
@@ -4219,7 +4243,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb)

static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond)
{
- if (!skb->mono_delivery_time && skb->tstamp)
+ if (skb->tstamp_type != SKB_CLOCK_MONO && skb->tstamp)
return skb->tstamp;

if (static_branch_unlikely(&netstamp_needed_key) || cond)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 153960663ce4..5af6eb14c5db 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -76,7 +76,7 @@ struct frag_v6_compare_key {
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
- * @mono_delivery_time: stamp has a mono delivery time (EDT)
+ * @tstamp_type: stamp has a mono delivery time (EDT)
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @fqdir: pointer to struct fqdir
@@ -97,7 +97,7 @@ struct inet_frag_queue {
ktime_t stamp;
int len;
int meat;
- u8 mono_delivery_time;
+ u8 tstamp_type;
__u8 flags;
u16 max_size;
struct fqdir *fqdir;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index c3c51b9a6826..816bb0fde718 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -32,7 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
unsigned int hlen, ll_rs, mtu;
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
@@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
if (iter.frag)
ip_fraglist_prepare(skb, &iter);

- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -113,7 +113,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
goto blackhole;
}

- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/core/dev.c b/net/core/dev.c
index 854a3a28a8d8..4e7dfefbb824 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2146,7 +2146,7 @@ EXPORT_SYMBOL(net_disable_timestamp);
static inline void net_timestamp_set(struct sk_buff *skb)
{
skb->tstamp = 0;
- skb->mono_delivery_time = 0;
+ skb->tstamp_type = SKB_CLOCK_REAL;
if (static_branch_unlikely(&netstamp_needed_key))
skb->tstamp = ktime_get_real();
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8d185d99a643..b1192e672cbb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7709,13 +7709,13 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
if (!tstamp)
return -EINVAL;
skb->tstamp = tstamp;
- skb->mono_delivery_time = 1;
+ skb->tstamp_type = SKB_CLOCK_MONO;
break;
case BPF_SKB_TSTAMP_UNSPEC:
if (tstamp)
return -EINVAL;
skb->tstamp = 0;
- skb->mono_delivery_time = 0;
+ skb->tstamp_type = SKB_CLOCK_REAL;
break;
default:
return -EINVAL;
@@ -9422,7 +9422,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
- /* skb->tc_at_ingress && skb->mono_delivery_time,
+ /* skb->tc_at_ingress && skb->tstamp_type:1,
* read 0 as the (rcv) timestamp.
*/
*insn++ = BPF_MOV64_IMM(value_reg, 0);
@@ -9447,7 +9447,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
* the bpf prog is aware the tstamp could have delivery time.
* Thus, write skb->tstamp as is if tstamp_type_access is true.
* Otherwise, writing at ingress will have to clear the
- * mono_delivery_time bit also.
+ * mono_delivery_time (skb->tstamp_type:1)bit also.
*/
if (!prog->tstamp_type_access) {
__u8 tmp_reg = BPF_REG_AX;
@@ -9457,7 +9457,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
/* goto <store> */
*insn++ = BPF_JMP_A(2);
- /* <clear>: mono_delivery_time */
+ /* <clear>: mono_delivery_time or (skb->tstamp_type:1) */
*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
}
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 6dd960ec558c..ba0455ad7701 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -130,7 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
goto err;

fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
if (frag_type == LOWPAN_DISPATCH_FRAG1)
fq->q.flags |= INET_FRAG_FIRST_IN;

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index faaec92a46ac..d179a2c84222 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -619,7 +619,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
skb_mark_not_on_list(head);
head->prev = NULL;
head->tstamp = q->stamp;
- head->mono_delivery_time = q->mono_delivery_time;
+ head->tstamp_type = q->tstamp_type;

if (sk)
refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fb947d1613fe..787aa86800f5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -355,7 +355,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->iif = dev->ifindex;

qp->q.stamp = skb->tstamp;
- qp->q.mono_delivery_time = skb->mono_delivery_time;
+ qp->q.tstamp_type = skb->tstamp_type;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 1fe794967211..f29349151203 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -764,7 +764,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
{
struct iphdr *iph;
struct sk_buff *skb2;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
@@ -856,7 +856,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
}
}

- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);

if (!err)
@@ -912,7 +912,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/*
* Put this fragment into the sending queue.
*/
- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, skb2);
if (err)
goto fail;
@@ -1649,7 +1649,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
- nskb->mono_delivery_time = !!transmit_time;
+ if (transmit_time)
+ nskb->tstamp_type = SKB_CLOCK_MONO;
if (txhash)
skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
ip_push_pending_frames(sk, &fl4);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 61119d42b0fd..a062f88c47c3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1300,7 +1300,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
tp = tcp_sk(sk);
prior_wstamp = tp->tcp_wstamp_ns;
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
- skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);
if (clone_it) {
oskb = skb;

@@ -1650,7 +1650,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,

skb_split(skb, buff, len);

- skb_set_delivery_time(buff, skb->tstamp, true);
+ skb_set_delivery_time(buff, skb->tstamp, CLOCK_MONOTONIC);
tcp_fragment_tstamp(skb, buff);

old_factor = tcp_skb_pcount(skb);
@@ -2731,7 +2731,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp_ns" is used as a start point for the retransmit timer */
tp->tcp_wstamp_ns = tp->tcp_clock_cache;
- skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
+ skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);
list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
tcp_init_tso_segs(skb, mss_now);
goto repair; /* Skip network transmission */
@@ -3714,11 +3714,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
#ifdef CONFIG_SYN_COOKIES
if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
skb_set_delivery_time(skb, cookie_init_timestamp(req, now),
- true);
+ CLOCK_MONOTONIC);
else
#endif
{
- skb_set_delivery_time(skb, now, true);
+ skb_set_delivery_time(skb, now, CLOCK_MONOTONIC);
if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */
tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb);
}
@@ -3805,7 +3805,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb,
synack_type, &opts);

- skb_set_delivery_time(skb, now, true);
+ skb_set_delivery_time(skb, now, CLOCK_MONOTONIC);
tcp_add_tx_delay(skb, tp);

return skb;
@@ -3989,7 +3989,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)

err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);

- skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true);
+ skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, CLOCK_MONOTONIC);

/* Now full SYN+DATA was cloned and sent (or not),
* remove the SYN from the original skb (syn_data)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index b9dd3a66e423..a9e819115622 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -859,7 +859,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
ktime_t tstamp = skb->tstamp;
@@ -955,7 +955,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);

- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -1016,7 +1016,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
/*
* Put this fragment into the sending queue.
*/
- skb_set_delivery_time(frag, tstamp, mono_delivery_time);
+ skb_set_delivery_time(frag, tstamp, tstamp_type);
err = output(net, sk, frag);
if (err)
goto fail;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 53d255838e6a..e0c2347b4dc6 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -126,7 +126,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
- bool mono_delivery_time = skb->mono_delivery_time;
+ u8 tstamp_type = skb->tstamp_type;
ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
@@ -192,7 +192,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);

- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -225,7 +225,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
goto blackhole;
}

- skb_set_delivery_time(skb2, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb2, tstamp, tstamp_type);
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d0dcbaca1994..5cc5d823d33f 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;

fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index acb4f119e11f..ea724ff558b4 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->iif = dev->ifindex;

fq->q.stamp = skb->tstamp;
- fq->q.mono_delivery_time = skb->mono_delivery_time;
+ fq->q.tstamp_type = skb->tstamp_type;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bb7c3caf4f85..7f8a8bd77547 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -975,7 +975,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
mark = inet_twsk(sk)->tw_mark;
else
mark = READ_ONCE(sk->sk_mark);
- skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
+ skb_set_delivery_time(buff, tcp_transmit_time(sk), CLOCK_MONOTONIC);
}
if (txhash) {
/* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 0e3cf11ae5fc..9cd12d61818a 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -54,8 +54,8 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(filter, skb);
}
- if (unlikely(!skb->tstamp && skb->mono_delivery_time))
- skb->mono_delivery_time = 0;
+ if (unlikely(!skb->tstamp && skb->tstamp_type))
+ skb->tstamp_type = SKB_CLOCK_REAL;
if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK)
skb_orphan(skb);

diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5e83e890f6a4..506516be5287 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -104,8 +104,8 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb,
bpf_compute_data_pointers(skb);
filter_res = bpf_prog_run(prog->filter, skb);
}
- if (unlikely(!skb->tstamp && skb->mono_delivery_time))
- skb->mono_delivery_time = 0;
+ if (unlikely(!skb->tstamp && skb->tstamp_type))
+ skb->tstamp_type = SKB_CLOCK_REAL;

if (prog->exts_integrated) {
res->class = 0;
--
2.25.1



2024-04-18 18:48:12

by Willem de Bruijn

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty

Abhishek Chauhan wrote:
> mono_delivery_time was added to check if skb->tstamp has delivery
> time in mono clock base (i.e. EDT) otherwise skb->tstamp has
> timestamp in ingress and delivery_time at egress.
>
> Renaming the bitfield from mono_delivery_time to tstamp_type is for
> extensibilty for other timestamps such as userspace timestamp
> (i.e. SO_TXTIME) set via sock opts.
>
> As we are renaming the mono_delivery_time to tstamp_type, it makes
> sense to start assigning tstamp_type based on enum defined
> in this commit.
>
> Earlier we used bool arg flag to check if the tstamp is mono in
> function skb_set_delivery_time, Now the signature of the functions
> accepts tstamp_type to distinguish between mono and real time.
>
> In future tstamp_type:1 can be extended to support userspace timestamp
> by increasing the bitfield.
>
> Link: https://lore.kernel.org/netdev/[email protected]/
> Signed-off-by: Abhishek Chauhan <[email protected]>

> +/**
> + * tstamp_type:1 can take 2 values each
> + * represented by time base in skb
> + * 0x0 => real timestamp_type
> + * 0x1 => mono timestamp_type
> + */
> +enum skb_tstamp_type {
> + SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
> + SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
> +};
> +

Can drop the comments. These names are self documenting.

> /**
> * DOC: Basic sk_buff geometry
> *
> @@ -819,7 +830,7 @@ typedef unsigned char *sk_buff_data_t;
> * @dst_pending_confirm: need to confirm neighbour
> * @decrypted: Decrypted SKB
> * @slow_gro: state present at GRO time, slower prepare step required
> - * @mono_delivery_time: When set, skb->tstamp has the
> + * @tstamp_type: When set, skb->tstamp has the
> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
> * skb->tstamp has the (rcv) timestamp at ingress and
> * delivery_time at egress.

Is this still correct? I think all egress does now annotate correctly
as SKB_CLOCK_MONO. So when not set it always is SKB_CLOCK_REAL.

> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 61119d42b0fd..a062f88c47c3 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -1300,7 +1300,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
> tp = tcp_sk(sk);
> prior_wstamp = tp->tcp_wstamp_ns;
> tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
> - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
> + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);

Multiple references to CLOCK_MONOTONIC left




2024-04-18 20:08:36

by Abhishek Chauhan

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty



On 4/18/2024 11:47 AM, Willem de Bruijn wrote:
> Abhishek Chauhan wrote:
>> mono_delivery_time was added to check if skb->tstamp has delivery
>> time in mono clock base (i.e. EDT) otherwise skb->tstamp has
>> timestamp in ingress and delivery_time at egress.
>>
>> Renaming the bitfield from mono_delivery_time to tstamp_type is for
>> extensibilty for other timestamps such as userspace timestamp
>> (i.e. SO_TXTIME) set via sock opts.
>>
>> As we are renaming the mono_delivery_time to tstamp_type, it makes
>> sense to start assigning tstamp_type based on enum defined
>> in this commit.
>>
>> Earlier we used bool arg flag to check if the tstamp is mono in
>> function skb_set_delivery_time, Now the signature of the functions
>> accepts tstamp_type to distinguish between mono and real time.
>>
>> In future tstamp_type:1 can be extended to support userspace timestamp
>> by increasing the bitfield.
>>
>> Link: https://lore.kernel.org/netdev/[email protected]/
>> Signed-off-by: Abhishek Chauhan <[email protected]>
>
>> +/**
>> + * tstamp_type:1 can take 2 values each
>> + * represented by time base in skb
>> + * 0x0 => real timestamp_type
>> + * 0x1 => mono timestamp_type
>> + */
>> +enum skb_tstamp_type {
>> + SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
>> + SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
>> +};
>> +
>
> Can drop the comments. These names are self documenting.

Noted! . I will take care of this
>
>> /**
>> * DOC: Basic sk_buff geometry
>> *
>> @@ -819,7 +830,7 @@ typedef unsigned char *sk_buff_data_t;
>> * @dst_pending_confirm: need to confirm neighbour
>> * @decrypted: Decrypted SKB
>> * @slow_gro: state present at GRO time, slower prepare step required
>> - * @mono_delivery_time: When set, skb->tstamp has the
>> + * @tstamp_type: When set, skb->tstamp has the
>> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
>> * skb->tstamp has the (rcv) timestamp at ingress and
>> * delivery_time at egress.
>
> Is this still correct? I think all egress does now annotate correctly
> as SKB_CLOCK_MONO. So when not set it always is SKB_CLOCK_REAL.
>
That is correct.

>> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
>> index 61119d42b0fd..a062f88c47c3 100644
>> --- a/net/ipv4/tcp_output.c
>> +++ b/net/ipv4/tcp_output.c
>> @@ -1300,7 +1300,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
>> tp = tcp_sk(sk);
>> prior_wstamp = tp->tcp_wstamp_ns;
>> tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
>> - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
>> + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);
>
> Multiple references to CLOCK_MONOTONIC left
>
I think i took care of all the references. Apologies if i didn't understand your comment here.


>
>

2024-04-18 20:12:34

by Willem de Bruijn

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty

Abhishek Chauhan (ABC) wrote:
>
>
> On 4/18/2024 11:47 AM, Willem de Bruijn wrote:
> > Abhishek Chauhan wrote:
> >> mono_delivery_time was added to check if skb->tstamp has delivery
> >> time in mono clock base (i.e. EDT) otherwise skb->tstamp has
> >> timestamp in ingress and delivery_time at egress.
> >>
> >> Renaming the bitfield from mono_delivery_time to tstamp_type is for
> >> extensibilty for other timestamps such as userspace timestamp
> >> (i.e. SO_TXTIME) set via sock opts.
> >>
> >> As we are renaming the mono_delivery_time to tstamp_type, it makes
> >> sense to start assigning tstamp_type based on enum defined
> >> in this commit.
> >>
> >> Earlier we used bool arg flag to check if the tstamp is mono in
> >> function skb_set_delivery_time, Now the signature of the functions
> >> accepts tstamp_type to distinguish between mono and real time.
> >>
> >> In future tstamp_type:1 can be extended to support userspace timestamp
> >> by increasing the bitfield.
> >>
> >> Link: https://lore.kernel.org/netdev/[email protected]/
> >> Signed-off-by: Abhishek Chauhan <[email protected]>
> >
> >> +/**
> >> + * tstamp_type:1 can take 2 values each
> >> + * represented by time base in skb
> >> + * 0x0 => real timestamp_type
> >> + * 0x1 => mono timestamp_type
> >> + */
> >> +enum skb_tstamp_type {
> >> + SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
> >> + SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
> >> +};
> >> +
> >
> > Can drop the comments. These names are self documenting.
>
> Noted! . I will take care of this
> >
> >> /**
> >> * DOC: Basic sk_buff geometry
> >> *
> >> @@ -819,7 +830,7 @@ typedef unsigned char *sk_buff_data_t;
> >> * @dst_pending_confirm: need to confirm neighbour
> >> * @decrypted: Decrypted SKB
> >> * @slow_gro: state present at GRO time, slower prepare step required
> >> - * @mono_delivery_time: When set, skb->tstamp has the
> >> + * @tstamp_type: When set, skb->tstamp has the
> >> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
> >> * skb->tstamp has the (rcv) timestamp at ingress and
> >> * delivery_time at egress.
> >
> > Is this still correct? I think all egress does now annotate correctly
> > as SKB_CLOCK_MONO. So when not set it always is SKB_CLOCK_REAL.
> >
> That is correct.
>
> >> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> >> index 61119d42b0fd..a062f88c47c3 100644
> >> --- a/net/ipv4/tcp_output.c
> >> +++ b/net/ipv4/tcp_output.c
> >> @@ -1300,7 +1300,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
> >> tp = tcp_sk(sk);
> >> prior_wstamp = tp->tcp_wstamp_ns;
> >> tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
> >> - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
> >> + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);
> >
> > Multiple references to CLOCK_MONOTONIC left
> >
> I think i took care of all the references. Apologies if i didn't understand your comment here.

On closer read, there is a type issue here.

skb_set_delivery_time takes a u8 tstamp_type. But it is often passed
a clockid_t, and that is also what the switch expects.

But it does also get called with a tstamp_type in code like the
following:

+ u8 tstamp_type = skb->tstamp_type;
unsigned int hlen, ll_rs, mtu;
ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
@@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
if (iter.frag)
ip_fraglist_prepare(skb, &iter);

- skb_set_delivery_time(skb, tstamp, mono_delivery_time);
+ skb_set_delivery_time(skb, tstamp, tstamp_type);

So maybe we need two variants, one that takes a tstamp_type and one
that tames a clockid_t?

The first can be simple, not switch needed. Just apply the two stores.

2024-04-18 20:39:36

by Abhishek Chauhan

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty



On 4/18/2024 1:11 PM, Willem de Bruijn wrote:
> Abhishek Chauhan (ABC) wrote:
>>
>>
>> On 4/18/2024 11:47 AM, Willem de Bruijn wrote:
>>> Abhishek Chauhan wrote:
>>>> mono_delivery_time was added to check if skb->tstamp has delivery
>>>> time in mono clock base (i.e. EDT) otherwise skb->tstamp has
>>>> timestamp in ingress and delivery_time at egress.
>>>>
>>>> Renaming the bitfield from mono_delivery_time to tstamp_type is for
>>>> extensibilty for other timestamps such as userspace timestamp
>>>> (i.e. SO_TXTIME) set via sock opts.
>>>>
>>>> As we are renaming the mono_delivery_time to tstamp_type, it makes
>>>> sense to start assigning tstamp_type based on enum defined
>>>> in this commit.
>>>>
>>>> Earlier we used bool arg flag to check if the tstamp is mono in
>>>> function skb_set_delivery_time, Now the signature of the functions
>>>> accepts tstamp_type to distinguish between mono and real time.
>>>>
>>>> In future tstamp_type:1 can be extended to support userspace timestamp
>>>> by increasing the bitfield.
>>>>
>>>> Link: https://lore.kernel.org/netdev/[email protected]/
>>>> Signed-off-by: Abhishek Chauhan <[email protected]>
>>>
>>>> +/**
>>>> + * tstamp_type:1 can take 2 values each
>>>> + * represented by time base in skb
>>>> + * 0x0 => real timestamp_type
>>>> + * 0x1 => mono timestamp_type
>>>> + */
>>>> +enum skb_tstamp_type {
>>>> + SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
>>>> + SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
>>>> +};
>>>> +
>>>
>>> Can drop the comments. These names are self documenting.
>>
>> Noted! . I will take care of this
>>>
>>>> /**
>>>> * DOC: Basic sk_buff geometry
>>>> *
>>>> @@ -819,7 +830,7 @@ typedef unsigned char *sk_buff_data_t;
>>>> * @dst_pending_confirm: need to confirm neighbour
>>>> * @decrypted: Decrypted SKB
>>>> * @slow_gro: state present at GRO time, slower prepare step required
>>>> - * @mono_delivery_time: When set, skb->tstamp has the
>>>> + * @tstamp_type: When set, skb->tstamp has the
>>>> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
>>>> * skb->tstamp has the (rcv) timestamp at ingress and
>>>> * delivery_time at egress.
>>>
>>> Is this still correct? I think all egress does now annotate correctly
>>> as SKB_CLOCK_MONO. So when not set it always is SKB_CLOCK_REAL.
>>>
>> That is correct.
>>
>>>> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
>>>> index 61119d42b0fd..a062f88c47c3 100644
>>>> --- a/net/ipv4/tcp_output.c
>>>> +++ b/net/ipv4/tcp_output.c
>>>> @@ -1300,7 +1300,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
>>>> tp = tcp_sk(sk);
>>>> prior_wstamp = tp->tcp_wstamp_ns;
>>>> tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
>>>> - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
>>>> + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);
>>>
>>> Multiple references to CLOCK_MONOTONIC left
>>>
>> I think i took care of all the references. Apologies if i didn't understand your comment here.
>
> On closer read, there is a type issue here.
>
> skb_set_delivery_time takes a u8 tstamp_type. But it is often passed
> a clockid_t, and that is also what the switch expects.
>
> But it does also get called with a tstamp_type in code like the
> following:
>
> + u8 tstamp_type = skb->tstamp_type;
> unsigned int hlen, ll_rs, mtu;
> ktime_t tstamp = skb->tstamp;
> struct ip_frag_state state;
> @@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
> if (iter.frag)
> ip_fraglist_prepare(skb, &iter);
>
> - skb_set_delivery_time(skb, tstamp, mono_delivery_time);
> + skb_set_delivery_time(skb, tstamp, tstamp_type);
>
> So maybe we need two variants, one that takes a tstamp_type and one
> that tames a clockid_t?
>
> The first can be simple, not switch needed. Just apply the two stores.
I agree to what you are saying but clockid_t => points to int itself.

For example :-
void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
clockid_t clockid)

qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); => sch_api.c
qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); =>sch_etf.c (q->clockid is int)

But i can change it to two new APIs one which accepts only clock_id (with switch) and other accepts u8 to directly store whatever is given.

2024-04-18 20:52:29

by Willem de Bruijn

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty

Abhishek Chauhan (ABC) wrote:
>
>
> On 4/18/2024 1:11 PM, Willem de Bruijn wrote:
> > Abhishek Chauhan (ABC) wrote:
> >>
> >>
> >> On 4/18/2024 11:47 AM, Willem de Bruijn wrote:
> >>> Abhishek Chauhan wrote:
> >>>> mono_delivery_time was added to check if skb->tstamp has delivery
> >>>> time in mono clock base (i.e. EDT) otherwise skb->tstamp has
> >>>> timestamp in ingress and delivery_time at egress.
> >>>>
> >>>> Renaming the bitfield from mono_delivery_time to tstamp_type is for
> >>>> extensibilty for other timestamps such as userspace timestamp
> >>>> (i.e. SO_TXTIME) set via sock opts.
> >>>>
> >>>> As we are renaming the mono_delivery_time to tstamp_type, it makes
> >>>> sense to start assigning tstamp_type based on enum defined
> >>>> in this commit.
> >>>>
> >>>> Earlier we used bool arg flag to check if the tstamp is mono in
> >>>> function skb_set_delivery_time, Now the signature of the functions
> >>>> accepts tstamp_type to distinguish between mono and real time.
> >>>>
> >>>> In future tstamp_type:1 can be extended to support userspace timestamp
> >>>> by increasing the bitfield.
> >>>>
> >>>> Link: https://lore.kernel.org/netdev/[email protected]/
> >>>> Signed-off-by: Abhishek Chauhan <[email protected]>
> >>>
> >>>> +/**
> >>>> + * tstamp_type:1 can take 2 values each
> >>>> + * represented by time base in skb
> >>>> + * 0x0 => real timestamp_type
> >>>> + * 0x1 => mono timestamp_type
> >>>> + */
> >>>> +enum skb_tstamp_type {
> >>>> + SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
> >>>> + SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
> >>>> +};
> >>>> +
> >>>
> >>> Can drop the comments. These names are self documenting.
> >>
> >> Noted! . I will take care of this
> >>>
> >>>> /**
> >>>> * DOC: Basic sk_buff geometry
> >>>> *
> >>>> @@ -819,7 +830,7 @@ typedef unsigned char *sk_buff_data_t;
> >>>> * @dst_pending_confirm: need to confirm neighbour
> >>>> * @decrypted: Decrypted SKB
> >>>> * @slow_gro: state present at GRO time, slower prepare step required
> >>>> - * @mono_delivery_time: When set, skb->tstamp has the
> >>>> + * @tstamp_type: When set, skb->tstamp has the
> >>>> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
> >>>> * skb->tstamp has the (rcv) timestamp at ingress and
> >>>> * delivery_time at egress.
> >>>
> >>> Is this still correct? I think all egress does now annotate correctly
> >>> as SKB_CLOCK_MONO. So when not set it always is SKB_CLOCK_REAL.
> >>>
> >> That is correct.
> >>
> >>>> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> >>>> index 61119d42b0fd..a062f88c47c3 100644
> >>>> --- a/net/ipv4/tcp_output.c
> >>>> +++ b/net/ipv4/tcp_output.c
> >>>> @@ -1300,7 +1300,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
> >>>> tp = tcp_sk(sk);
> >>>> prior_wstamp = tp->tcp_wstamp_ns;
> >>>> tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
> >>>> - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
> >>>> + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);
> >>>
> >>> Multiple references to CLOCK_MONOTONIC left
> >>>
> >> I think i took care of all the references. Apologies if i didn't understand your comment here.
> >
> > On closer read, there is a type issue here.
> >
> > skb_set_delivery_time takes a u8 tstamp_type. But it is often passed
> > a clockid_t, and that is also what the switch expects.
> >
> > But it does also get called with a tstamp_type in code like the
> > following:
> >
> > + u8 tstamp_type = skb->tstamp_type;
> > unsigned int hlen, ll_rs, mtu;
> > ktime_t tstamp = skb->tstamp;
> > struct ip_frag_state state;
> > @@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
> > if (iter.frag)
> > ip_fraglist_prepare(skb, &iter);
> >
> > - skb_set_delivery_time(skb, tstamp, mono_delivery_time);
> > + skb_set_delivery_time(skb, tstamp, tstamp_type);
> >
> > So maybe we need two variants, one that takes a tstamp_type and one
> > that tames a clockid_t?
> >
> > The first can be simple, not switch needed. Just apply the two stores.
> I agree to what you are saying but clockid_t => points to int itself.
>
> For example :-
> void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
> clockid_t clockid)
>
> qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); => sch_api.c
> qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); =>sch_etf.c (q->clockid is int)

My concern is more that we use CLOCK_MONOTONIC and SKB_CLOCK_MONO
(and other clocks) interchangeably, without invariant checks to make
sure that they map onto the same integer value.

2024-04-18 20:53:32

by Abhishek Chauhan

[permalink] [raw]
Subject: Re: [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty



On 4/18/2024 1:49 PM, Willem de Bruijn wrote:
> Abhishek Chauhan (ABC) wrote:
>>
>>
>> On 4/18/2024 1:11 PM, Willem de Bruijn wrote:
>>> Abhishek Chauhan (ABC) wrote:
>>>>
>>>>
>>>> On 4/18/2024 11:47 AM, Willem de Bruijn wrote:
>>>>> Abhishek Chauhan wrote:
>>>>>> mono_delivery_time was added to check if skb->tstamp has delivery
>>>>>> time in mono clock base (i.e. EDT) otherwise skb->tstamp has
>>>>>> timestamp in ingress and delivery_time at egress.
>>>>>>
>>>>>> Renaming the bitfield from mono_delivery_time to tstamp_type is for
>>>>>> extensibilty for other timestamps such as userspace timestamp
>>>>>> (i.e. SO_TXTIME) set via sock opts.
>>>>>>
>>>>>> As we are renaming the mono_delivery_time to tstamp_type, it makes
>>>>>> sense to start assigning tstamp_type based on enum defined
>>>>>> in this commit.
>>>>>>
>>>>>> Earlier we used bool arg flag to check if the tstamp is mono in
>>>>>> function skb_set_delivery_time, Now the signature of the functions
>>>>>> accepts tstamp_type to distinguish between mono and real time.
>>>>>>
>>>>>> In future tstamp_type:1 can be extended to support userspace timestamp
>>>>>> by increasing the bitfield.
>>>>>>
>>>>>> Link: https://lore.kernel.org/netdev/[email protected]/
>>>>>> Signed-off-by: Abhishek Chauhan <[email protected]>
>>>>>
>>>>>> +/**
>>>>>> + * tstamp_type:1 can take 2 values each
>>>>>> + * represented by time base in skb
>>>>>> + * 0x0 => real timestamp_type
>>>>>> + * 0x1 => mono timestamp_type
>>>>>> + */
>>>>>> +enum skb_tstamp_type {
>>>>>> + SKB_CLOCK_REAL, /* Time base is skb is REALTIME */
>>>>>> + SKB_CLOCK_MONO, /* Time base is skb is MONOTONIC */
>>>>>> +};
>>>>>> +
>>>>>
>>>>> Can drop the comments. These names are self documenting.
>>>>
>>>> Noted! . I will take care of this
>>>>>
>>>>>> /**
>>>>>> * DOC: Basic sk_buff geometry
>>>>>> *
>>>>>> @@ -819,7 +830,7 @@ typedef unsigned char *sk_buff_data_t;
>>>>>> * @dst_pending_confirm: need to confirm neighbour
>>>>>> * @decrypted: Decrypted SKB
>>>>>> * @slow_gro: state present at GRO time, slower prepare step required
>>>>>> - * @mono_delivery_time: When set, skb->tstamp has the
>>>>>> + * @tstamp_type: When set, skb->tstamp has the
>>>>>> * delivery_time in mono clock base (i.e. EDT). Otherwise, the
>>>>>> * skb->tstamp has the (rcv) timestamp at ingress and
>>>>>> * delivery_time at egress.
>>>>>
>>>>> Is this still correct? I think all egress does now annotate correctly
>>>>> as SKB_CLOCK_MONO. So when not set it always is SKB_CLOCK_REAL.
>>>>>
>>>> That is correct.
>>>>
>>>>>> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
>>>>>> index 61119d42b0fd..a062f88c47c3 100644
>>>>>> --- a/net/ipv4/tcp_output.c
>>>>>> +++ b/net/ipv4/tcp_output.c
>>>>>> @@ -1300,7 +1300,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
>>>>>> tp = tcp_sk(sk);
>>>>>> prior_wstamp = tp->tcp_wstamp_ns;
>>>>>> tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
>>>>>> - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true);
>>>>>> + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, CLOCK_MONOTONIC);
>>>>>
>>>>> Multiple references to CLOCK_MONOTONIC left
>>>>>
>>>> I think i took care of all the references. Apologies if i didn't understand your comment here.
>>>
>>> On closer read, there is a type issue here.
>>>
>>> skb_set_delivery_time takes a u8 tstamp_type. But it is often passed
>>> a clockid_t, and that is also what the switch expects.
>>>
>>> But it does also get called with a tstamp_type in code like the
>>> following:
>>>
>>> + u8 tstamp_type = skb->tstamp_type;
>>> unsigned int hlen, ll_rs, mtu;
>>> ktime_t tstamp = skb->tstamp;
>>> struct ip_frag_state state;
>>> @@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
>>> if (iter.frag)
>>> ip_fraglist_prepare(skb, &iter);
>>>
>>> - skb_set_delivery_time(skb, tstamp, mono_delivery_time);
>>> + skb_set_delivery_time(skb, tstamp, tstamp_type);
>>>
>>> So maybe we need two variants, one that takes a tstamp_type and one
>>> that tames a clockid_t?
>>>
>>> The first can be simple, not switch needed. Just apply the two stores.
>> I agree to what you are saying but clockid_t => points to int itself.
>>
>> For example :-
>> void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
>> clockid_t clockid)
>>
>> qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); => sch_api.c
>> qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid); =>sch_etf.c (q->clockid is int)
>
> My concern is more that we use CLOCK_MONOTONIC and SKB_CLOCK_MONO
> (and other clocks) interchangeably, without invariant checks to make
> sure that they map onto the same integer value.
Ah i see. I got it . I will make two APIs . Makes sense.
1. One can check for clockid => switch => set
2. One can set it directly.