Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1757891Ab1FVCLj (ORCPT ); Tue, 21 Jun 2011 22:11:39 -0400 Received: from mail-qy0-f174.google.com ([209.85.216.174]:37927 "EHLO mail-qy0-f174.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757714Ab1FVCLh (ORCPT ); Tue, 21 Jun 2011 22:11:37 -0400 DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:to:cc:subject:date:message-id:x-mailer:in-reply-to:references; b=Phd9AsB0azWlmUGGoKdJeQF5sC7d+RUoe9fXf487yHBCBxjoyWsvftsWQrSeILfXX/ UXtTlZdcbYlHYBjEhfbdEyOFNXspN8lztvCs6AwnOxFvOQyQpdGghNMftIRov3DnnATi 7w6wXkXopUCVwW4Tlif5EQUKmx4VANDbb2wdE= From: Chetan Loke To: netdev@vger.kernel.org Cc: davem@davemloft.net, eric.dumazet@gmail.com, joe@perches.com, bhutchings@solarflare.com, shemminger@vyatta.com, linux-kernel@vger.kernel.org, Chetan Loke Subject: [PATCH v2 net-next af-packet 1/2] Enhance af-packet to provide (near zero)lossless packet capture functionality. Date: Tue, 21 Jun 2011 22:10:49 -0400 Message-Id: <1308708650-25509-2-git-send-email-loke.chetan@gmail.com> X-Mailer: git-send-email 1.7.5.2 In-Reply-To: <1308708650-25509-1-git-send-email-loke.chetan@gmail.com> References: <1308708650-25509-1-git-send-email-loke.chetan@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 4894 Lines: 181 Added TPACKET_V3 definitions Signed-off-by: Chetan Loke --- include/linux/if_packet.h | 128 +++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 128 insertions(+), 0 deletions(-) diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 6d66ce1..e5fad08 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -55,6 +55,17 @@ struct tpacket_stats { unsigned int tp_drops; }; +struct tpacket_stats_v3 { + unsigned int tp_packets; + unsigned int tp_drops; + unsigned int tp_freeze_q_cnt; +}; + +union tpacket_stats_u { + struct tpacket_stats stats1; + struct tpacket_stats_v3 stats3; +}; + struct tpacket_auxdata { __u32 tp_status; __u32 tp_len; @@ -71,6 +82,7 @@ struct tpacket_auxdata { #define TP_STATUS_LOSING 0x4 #define TP_STATUS_CSUMNOTREADY 0x8 #define TP_STATUS_VLAN_VALID 0x10 /* auxdata has valid tp_vlan_tci */ +#define TP_STATUS_BLK_TMO 0x20 /* Tx ring - header status */ #define TP_STATUS_AVAILABLE 0x0 @@ -102,12 +114,114 @@ struct tpacket2_hdr { __u32 tp_nsec; __u16 tp_vlan_tci; }; +struct tpacket3_hdr { + __u32 tp_status; + __u32 tp_len; + __u32 tp_snaplen; + __u16 tp_mac; + __u16 tp_net; + __u32 tp_sec; + __u32 tp_nsec; + __u16 tp_vlan_tci; + __u16 tp_padding; + __u32 tp_next_offset; +}; + +struct bd_ts { + unsigned int ts_sec; + union { + struct { + unsigned int ts_usec; + }; + struct { + unsigned int ts_nsec; + }; + }; +} __attribute__ ((__packed__)); + +struct bd_v1 { + /* + * If you re-order the first 5 fields then + * the BLOCK_XXX macros will NOT work. + */ + __u32 block_status; + __u32 num_pkts; + __u32 offset_to_first_pkt; + + /* Number of valid bytes (including padding) + * blk_len <= tp_block_size + */ + __u32 blk_len; + + /* + * Quite a few uses of sequence number: + * 1. Make sure cache flush etc worked. + * Well, one can argue - why not use the increasing ts below? + * But look at 2. below first. + * 2. When you pass around blocks to other user space decoders, + * you can see which blk[s] is[are] outstanding etc. + * 3. Validate kernel code. + */ + __u64 seq_num; + + /* + * ts_last_pkt: + * + * Case 1. Block has 'N'(N >=1) packets and TMO'd(timed out) + * ts_last_pkt == 'time-stamp of last packet' and NOT the + * time when the timer fired and the block was closed. + * By providing the ts of the last packet we can absolutely + * guarantee that time-stamp wise, the first packet in the next + * block will never precede the last packet of the previous + * block. + * Case 2. Block has zero packets and TMO'd + * ts_last_pkt = time when the timer fired and the block + * was closed. + * Case 3. Block has 'N' packets and NO TMO. + * ts_last_pkt = time-stamp of the last pkt in the block. + * + * ts_first_pkt: + * Is always the time-stamp when the block was opened. + * Case a) ZERO packets + * No packets to deal with but atleast you know the + * time-interval of this block. + * Case b) Non-zero packets + * Use the ts of the first packet in the block. + * + */ + struct bd_ts ts_first_pkt; + struct bd_ts ts_last_pkt; +} __attribute__ ((__packed__)); + +struct block_desc { + __u16 version; + __u16 offset_to_priv; + union { + struct { + __u32 words[4]; + __u64 dword; + } __attribute__ ((__packed__)); + struct bd_v1 bd1; + }; +} __attribute__ ((__packed__)); + + #define TPACKET2_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket2_hdr)) + sizeof(struct sockaddr_ll)) +#define TPACKET3_HDRLEN (TPACKET_ALIGN(sizeof(struct tpacket3_hdr)) + sizeof(struct sockaddr_ll)) + +#define BLOCK_STATUS(x) ((x)->words[0]) +#define BLOCK_NUM_PKTS(x) ((x)->words[1]) +#define BLOCK_O2FP(x) ((x)->words[2]) +#define BLOCK_LEN(x) ((x)->words[3]) +#define BLOCK_SNUM(x) ((x)->dword) +#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) +#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x))) enum tpacket_versions { TPACKET_V1, TPACKET_V2, + TPACKET_V3, }; /* @@ -130,6 +244,20 @@ struct tpacket_req { unsigned int tp_frame_nr; /* Total number of frames */ }; +struct tpacket_req3 { + unsigned int tp_block_size; /* Minimal size of contiguous block */ + unsigned int tp_block_nr; /* Number of blocks */ + unsigned int tp_frame_size; /* Size of frame */ + unsigned int tp_frame_nr; /* Total number of frames */ + unsigned int tp_retire_blk_tov; /* timeout in msecs */ + unsigned int tp_sizeof_priv; /* size of private data area */ +}; + +union tpacket_req_u { + struct tpacket_req req; + struct tpacket_req3 req3; +}; + struct packet_mreq { int mr_ifindex; unsigned short mr_type; -- 1.7.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/