Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753695AbYKSMKq (ORCPT ); Wed, 19 Nov 2008 07:10:46 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1752800AbYKSMJI (ORCPT ); Wed, 19 Nov 2008 07:09:08 -0500 Received: from mga09.intel.com ([134.134.136.24]:3550 "EHLO mga09.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752625AbYKSMI6 (ORCPT ); Wed, 19 Nov 2008 07:08:58 -0500 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.33,631,1220252400"; d="scan'208";a="361907244" From: Patrick Ohly To: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org, David Miller , Patrick Ohly Subject: [RFC PATCH 03/11] net: infrastructure for hardware time stamping Date: Wed, 19 Nov 2008 13:08:40 +0100 Message-Id: <1227096528-24150-4-git-send-email-patrick.ohly@intel.com> X-Mailer: git-send-email 1.6.0.4 In-Reply-To: <1227096528-24150-3-git-send-email-patrick.ohly@intel.com> References: <1227096528-24150-1-git-send-email-patrick.ohly@intel.com> <1227096528-24150-2-git-send-email-patrick.ohly@intel.com> <1227096528-24150-3-git-send-email-patrick.ohly@intel.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 12366 Lines: 402 The new sk_buff->hwtstamp is used to transport time stamping instructions to the device driver (outgoing packets) and to return raw hardware time stamps back to user space (incoming or sent packets). Implements TX time stamping in software if the device driver doesn't support hardware time stamping. The new semantic for hardware/software time stamping around net_device->hard_start_xmit() is based on two assumptions about existing network device drivers which don't support hardware time stamping and know nothing about it: - they leave the skb->hwtstamp field unmodified - the keep the connection to the originating socket in skb->sk alive, i.e., don't call skb_orphan() Given that hwtstamp is a new field, the first assumption is safe. The second is only true for some drivers. As a result, software TX time stamping currently works with the bnx2 driver, but not with the unmodified igb driver (the two drivers this patch series was tested with). --- include/linux/netdevice.h | 11 ++++ include/linux/skbuff.h | 136 ++++++++++++++++++++++++++++++++++++++++++++- net/core/dev.c | 23 +++++++- net/core/skbuff.c | 72 ++++++++++++++++++++++++ 4 files changed, 239 insertions(+), 3 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f44..24bea0c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -763,6 +763,17 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; + +#define HAVE_HW_TIME_STAMP + /* Transforms original raw hardware time stamp to + * system time base. Always required when supporting + * hardware time stamping. + * + * Returns empty stamp (= all zero) if conversion wasn't + * possible. + */ + ktime_t (*hwtstamp_raw2sys)(struct net_device *dev, + ktime_t hwstamp); }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01b6f8..c8004eb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -200,12 +200,42 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif + +/** + * sk_buff_hwtstamp - hardware/software time stamping instructions + * (for outgoing packets) or result (for packets processes by the + * network device) + * + * @hwtstamp: hardware time stamp; software time stamps are stored + * in skb->tstamp + * @tstamp_tx_hardware: generate hardware time stamp + * @tstamp_tx_software: generate software time stamp + * @tstamp_tx_in_progress: device driver is going to provide hardware + * time stamp + */ +union sk_buff_hwtstamp +{ + ktime_t hwtstamp; + struct { + __u64 hwtstamp_padding:60, + tstamp_tx_hardware:1, + tstamp_tx_software:1, + tstamp_tx_in_progress:1; + }; +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @sk: Socket we are owned by - * @tstamp: Time we arrived + * @tstamp: Time we arrived: generated by ktime_get_real() and + * thus is recorded in system time. + * @hwtstamp: Time we arrived or were sent: generated by the + * network device and therefore not directly related to + * system time. For outgoing packets the time stamp + * is not valid yet. Instead the union is used to + * transport time stamping requests to the device. * @dev: Device we arrived on/are leaving by * @transport_header: Transport layer header * @network_header: Network layer header @@ -266,6 +296,7 @@ struct sk_buff { struct sock *sk; ktime_t tstamp; + union sk_buff_hwtstamp hwtstamp; struct net_device *dev; union { @@ -1700,6 +1731,11 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, extern void skb_init(void); +static inline ktime_t skb_get_ktime(const struct sk_buff *skb) +{ + return skb->tstamp; +} + /** * skb_get_timestamp - get timestamp from a skb * @skb: skb to get stamp from @@ -1714,6 +1750,11 @@ static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval * *stamp = ktime_to_timeval(skb->tstamp); } +static inline void skb_get_timestampns(const struct sk_buff *skb, struct timespec *stamp) +{ + *stamp = ktime_to_timespec(skb->tstamp); +} + static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); @@ -1729,6 +1770,99 @@ static inline ktime_t net_invalid_timestamp(void) return ktime_set(0, 0); } +/** + * skb_hwtstamp_available - checks whether the time stamp value has + * been set (= non-zero) and really came from hardware + * + * Only works for packets which have been processed by the device + * driver. + */ +static inline int skb_hwtstamp_available(const struct sk_buff *skb) +{ + return skb->hwtstamp.hwtstamp.tv64 != 0; +} + +/** + * skb_hwtstamp_set - stores a time stamp generated by hardware in the skb + * @skb: time stamp is stored here + * @hwtstamp: original, untransformed hardware time stamp + */ +static inline void skb_hwtstamp_set(struct sk_buff *skb, + ktime_t hwtstamp) +{ + skb->hwtstamp.hwtstamp = hwtstamp; +} + +/** + * skb_hwtstamp_raw - fills the timespec with the original, "raw" time + * stamp as generated by the hardware when it processed the packet + * + * Returns 1 if such a hardware time stamp is unavailable or cannot be + * inferred. Otherwise it returns 0 and doesn't modify the timespec. + */ +int skb_hwtstamp_raw(const struct sk_buff *skb, struct timespec *ts); + +/** + * skb_hwtstamp_transformed - fills the timespec with the hardware + * time stamp generated when the hardware processed the packet, + * transformed to system time + * + * Beware that this transformation is not perfect: packet A received on + * interface 1 before packet B on interface 2 might have a higher + * transformed time stamp. + * + * Returns 1 if a transformed hardware time stamp is available, 0 + * otherwise. In that case the timespec is left unchanged. + */ +int skb_hwtstamp_transformed(const struct sk_buff *skb, struct timespec *ts); + +static inline int skb_hwtstamp_check_tx_hardware(struct sk_buff *skb) +{ + return skb->hwtstamp.tstamp_tx_hardware; +} + +static inline int skb_hwtstamp_check_tx_software(struct sk_buff *skb) +{ + return skb->hwtstamp.tstamp_tx_software; +} + +static inline int skb_hwtstamp_check_tx_in_progress(struct sk_buff *skb) +{ + return skb->hwtstamp.tstamp_tx_in_progress; +} + +static inline void skb_hwtstamp_set_tx_in_progress(struct sk_buff *skb) +{ + skb->hwtstamp.tstamp_tx_in_progress = 1; +} + +/** + * skb_hwtstamp_tx - queue clone of skb with send time stamp + * @orig_skb: the original outgoing packet + * @stamp: either raw hardware time stamp or result of ktime_get_real() + * @dev: NULL if time stamp from ktime_get_real(), otherwise device + * which generated the hardware time stamp; the device may or + * may not implement the system time<->hardware time mapping + * functions + * + * This function will not actually timestamp the skb, but, if the skb has a + * socket associated, clone the skb, timestamp it, and queue it to the error + * queue of the socket. Errors are silently ignored. + */ +void skb_hwtstamp_tx(struct sk_buff *orig_skb, + ktime_t stamp, + struct net_device *dev); + +/** + * skb_tx_software_timestamp - software fallback for send time stamping + */ +static inline void skb_tx_software_timestamp(struct sk_buff *skb) +{ + if (skb_hwtstamp_check_tx_software(skb) && + !skb_hwtstamp_check_tx_in_progress(skb)) + skb_hwtstamp_tx(skb, ktime_get_real(), NULL); +} + extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); extern __sum16 __skb_checksum_complete(struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index e08c0fc..b4b8eb8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1658,6 +1658,8 @@ static int dev_gso_segment(struct sk_buff *skb) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { + int rc; + if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1669,13 +1671,29 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return dev->hard_start_xmit(skb, dev); + rc = dev->hard_start_xmit(skb, dev); + /* + * TODO: if skb_orphan() was called by + * dev->hard_start_xmit() (for example, the unmodified + * igb driver does that; bnx2 doesn't), then + * skb_tx_software_timestamp() will be unable to send + * back the time stamp. + * + * How can this be prevented? Always create another + * reference to the socket before calling + * dev->hard_start_xmit()? Prevent that skb_orphan() + * does anything in dev->hard_start_xmit() by clearing + * the skb destructor before the call and restoring it + * afterwards, then doing the skb_orphan() ourselves? + */ + if (likely(!rc)) + skb_tx_software_timestamp(skb); + return rc; } gso: do { struct sk_buff *nskb = skb->next; - int rc; skb->next = nskb->next; nskb->next = NULL; @@ -1685,6 +1703,7 @@ gso: skb->next = nskb; return rc; } + skb_tx_software_timestamp(skb); if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 267185a..38360d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #ifdef CONFIG_NET_CLS_ACT #include @@ -55,6 +56,7 @@ #include #include #include +#include #include #include @@ -496,6 +498,7 @@ EXPORT_SYMBOL(skb_recycle_check); static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; + new->hwtstamp = old->hwtstamp; new->dev = old->dev; new->transport_header = old->transport_header; new->network_header = old->network_header; @@ -2395,6 +2398,41 @@ err: EXPORT_SYMBOL_GPL(skb_segment); +int skb_hwtstamp_raw(const struct sk_buff *skb, struct timespec *ts) +{ + if (skb_hwtstamp_available(skb)) { + *ts = ktime_to_timespec(skb->hwtstamp.hwtstamp); + return 1; + } + return 0; +} + +EXPORT_SYMBOL_GPL(skb_hwtstamp_raw); + +int skb_hwtstamp_transformed(const struct sk_buff *skb, struct timespec *ts) +{ + struct rtable *rt; + struct in_device *idev; + struct net_device *netdev; + + if (skb_hwtstamp_available(skb) && + (rt = skb->rtable) != NULL && + (idev = rt->idev) != NULL && + (netdev = idev->dev) != NULL && + netdev->hwtstamp_raw2sys) { + ktime_t hwtstamp_sys = + netdev->hwtstamp_raw2sys(netdev, + skb->hwtstamp.hwtstamp); + if (hwtstamp_sys.tv64) { + *ts = ktime_to_timespec(hwtstamp_sys); + return 1; + } + } + return 0; +} + +EXPORT_SYMBOL_GPL(skb_hwtstamp_transformed); + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", @@ -2601,6 +2639,40 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) return elt; } +void skb_hwtstamp_tx(struct sk_buff *orig_skb, + ktime_t stamp, + struct net_device *dev) +{ + struct sock *sk = orig_skb->sk; + struct sock_exterr_skb *serr; + struct sk_buff *skb; + int err = -ENOMEM; + + if (!sk) + return; + + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) + return; + + if (dev) { + skb->hwtstamp.hwtstamp = stamp; + } else { + skb->tstamp = stamp; + skb->hwtstamp.hwtstamp.tv64 = 0; + } + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + err = sock_queue_err_skb(sk, skb); + if (err) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_hwtstamp_tx); + + /** * skb_partial_csum_set - set up and verify partial csum values for packet * @skb: the skb to set -- 1.6.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/