Skip to content

Commit 4289e60

Browse files
committed
Merge branch 'tc-skb-diet'
Willem de Bruijn says: ==================== convert tc_verd to integer bitfields The skb tc_verd field takes up two bytes but uses far fewer bits. Convert the remaining use cases to bitfields that fit in existing holes (depending on config options) and potentially save the two bytes in struct sk_buff. This patchset is based on an earlier set by Florian Westphal and its discussion (http://www.spinics.net/lists/netdev/msg329181.html). Patches 1 and 2 are low hanging fruit: removing the last traces of data that are no longer stored in tc_verd. Patches 3 and 4 convert tc_verd to individual bitfields (5 bits). Patch 5 reduces TC_AT to a single bitfield, as AT_STACK is not valid here (unlike in the case of TC_FROM). Patch 6 changes TC_FROM to two bitfields with clearly defined purpose. It may be possible to reduce storage further after this initial round. If tc_skip_classify is set only by IFB, testing skb_iif may suffice. The L2 header pushing/popping logic can perhaps be shared with AF_PACKET, which currently not pkt_type for the same purpose. Changes: RFC -> v1 - (patch 3): remove no longer needed label in tfc_action_exec - (patch 5): set tc_at_ingress at the same points as existing SET_TC_AT calls Tested ingress mirred + netem + ifb: ip link set dev ifb0 up tc qdisc add dev eth0 ingress tc filter add dev eth0 parent ffff: \ u32 match ip dport 8000 0xffff \ action mirred egress redirect dev ifb0 tc qdisc add dev ifb0 root netem delay 1000ms nc -u -l 8000 & ssh $otherhost nc -u $host 8000 Tested egress mirred: ip link add veth1 type veth peer name veth2 ip link set dev veth1 up ip link set dev veth2 up tcpdump -n -i veth2 udp and dst port 8000 & tc qdisc add dev eth0 root handle 1: prio tc filter add dev eth0 parent 1:0 \ u32 match ip dport 8000 0xffff \ action mirred egress redirect dev veth1 tc qdisc add dev veth1 root netem delay 1000ms nc -u $otherhost 8000 Tested ingress mirred: ip link add veth1 type veth peer name veth2 ip link add veth3 type veth peer name veth4 ip netns add ns0 ip netns add ns1 for i in 1 2 3 4; do \ NS=ns$((${i}%2)); \ ip link set dev veth${i} netns ${NS}; \ ip netns exec ${NS} \ ip addr add dev veth${i} 192.168.1.${i}/24; \ ip netns exec ${NS} \ ip link set dev veth${i} up; \ done ip netns exec ns0 tc qdisc add dev veth2 ingress ip netns exec ns0 \ tc filter add dev veth2 parent ffff: \ u32 match ip dport 8000 0xffff \ action mirred ingress redirect dev veth4 ip netns exec ns0 \ tcpdump -n -i veth4 udp and dst port 8000 & ip netns exec ns1 \ nc -u 192.168.1.2 8000 ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
2 parents 29b84f2 + bc31c90 commit 4289e60

File tree

13 files changed

+66
-118
lines changed

13 files changed

+66
-118
lines changed

drivers/net/ifb.c

+6-10
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,8 @@ static void ifb_ri_tasklet(unsigned long _txp)
7878
}
7979

8080
while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
81-
u32 from = G_TC_FROM(skb->tc_verd);
82-
83-
skb->tc_verd = 0;
84-
skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
81+
skb->tc_redirected = 0;
82+
skb->tc_skip_classify = 1;
8583

8684
u64_stats_update_begin(&txp->tsync);
8785
txp->tx_packets++;
@@ -101,13 +99,12 @@ static void ifb_ri_tasklet(unsigned long _txp)
10199
rcu_read_unlock();
102100
skb->skb_iif = txp->dev->ifindex;
103101

104-
if (from & AT_EGRESS) {
102+
if (!skb->tc_from_ingress) {
105103
dev_queue_xmit(skb);
106-
} else if (from & AT_INGRESS) {
104+
} else {
107105
skb_pull(skb, skb->mac_len);
108106
netif_receive_skb(skb);
109-
} else
110-
BUG();
107+
}
111108
}
112109

113110
if (__netif_tx_trylock(txq)) {
@@ -239,15 +236,14 @@ static void ifb_setup(struct net_device *dev)
239236
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
240237
{
241238
struct ifb_dev_private *dp = netdev_priv(dev);
242-
u32 from = G_TC_FROM(skb->tc_verd);
243239
struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
244240

245241
u64_stats_update_begin(&txp->rsync);
246242
txp->rx_packets++;
247243
txp->rx_bytes += skb->len;
248244
u64_stats_update_end(&txp->rsync);
249245

250-
if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
246+
if (!skb->tc_redirected || !skb->skb_iif) {
251247
dev_kfree_skb(skb);
252248
dev->stats.rx_dropped++;
253249
return NETDEV_TX_OK;

drivers/staging/octeon/ethernet-tx.c

+2-3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#endif /* CONFIG_XFRM */
2424

2525
#include <linux/atomic.h>
26+
#include <net/sch_generic.h>
2627

2728
#include <asm/octeon/octeon.h>
2829

@@ -369,9 +370,7 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
369370

370371
#ifdef CONFIG_NET_SCHED
371372
skb->tc_index = 0;
372-
#ifdef CONFIG_NET_CLS_ACT
373-
skb->tc_verd = 0;
374-
#endif /* CONFIG_NET_CLS_ACT */
373+
skb_reset_tc(skb);
375374
#endif /* CONFIG_NET_SCHED */
376375
#endif /* REUSE_SKBUFFS_WITHOUT_FREE */
377376

include/linux/skbuff.h

+10-5
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,10 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
589589
* @pkt_type: Packet class
590590
* @fclone: skbuff clone status
591591
* @ipvs_property: skbuff is owned by ipvs
592+
* @tc_skip_classify: do not classify packet. set by IFB device
593+
* @tc_at_ingress: used within tc_classify to distinguish in/egress
594+
* @tc_redirected: packet was redirected by a tc action
595+
* @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect
592596
* @peeked: this packet has been seen already, so stats have been
593597
* done for it, don't do them again
594598
* @nf_trace: netfilter packet trace flag
@@ -598,7 +602,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1,
598602
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
599603
* @skb_iif: ifindex of device we arrived on
600604
* @tc_index: Traffic control index
601-
* @tc_verd: traffic control verdict
602605
* @hash: the packet hash
603606
* @queue_mapping: Queue mapping for multiqueue devices
604607
* @xmit_more: More SKBs are pending for this queue
@@ -749,13 +752,15 @@ struct sk_buff {
749752
#ifdef CONFIG_NET_SWITCHDEV
750753
__u8 offload_fwd_mark:1;
751754
#endif
752-
/* 2, 4 or 5 bit hole */
755+
#ifdef CONFIG_NET_CLS_ACT
756+
__u8 tc_skip_classify:1;
757+
__u8 tc_at_ingress:1;
758+
__u8 tc_redirected:1;
759+
__u8 tc_from_ingress:1;
760+
#endif
753761

754762
#ifdef CONFIG_NET_SCHED
755763
__u16 tc_index; /* traffic control index */
756-
#ifdef CONFIG_NET_CLS_ACT
757-
__u16 tc_verd; /* traffic control verdict */
758-
#endif
759764
#endif
760765

761766
union {

include/net/sch_generic.h

+19-1
Original file line numberDiff line numberDiff line change
@@ -409,15 +409,33 @@ bool tcf_destroy(struct tcf_proto *tp, bool force);
409409
void tcf_destroy_chain(struct tcf_proto __rcu **fl);
410410
int skb_do_redirect(struct sk_buff *);
411411

412+
static inline void skb_reset_tc(struct sk_buff *skb)
413+
{
414+
#ifdef CONFIG_NET_CLS_ACT
415+
skb->tc_redirected = 0;
416+
#endif
417+
}
418+
412419
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
413420
{
414421
#ifdef CONFIG_NET_CLS_ACT
415-
return G_TC_AT(skb->tc_verd) & AT_INGRESS;
422+
return skb->tc_at_ingress;
416423
#else
417424
return false;
418425
#endif
419426
}
420427

428+
static inline bool skb_skip_tc_classify(struct sk_buff *skb)
429+
{
430+
#ifdef CONFIG_NET_CLS_ACT
431+
if (skb->tc_skip_classify) {
432+
skb->tc_skip_classify = 0;
433+
return true;
434+
}
435+
#endif
436+
return false;
437+
}
438+
421439
/* Reset all TX qdiscs greater then index of a device. */
422440
static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
423441
{

include/uapi/linux/pkt_cls.h

-55
Original file line numberDiff line numberDiff line change
@@ -4,61 +4,6 @@
44
#include <linux/types.h>
55
#include <linux/pkt_sched.h>
66

7-
#ifdef __KERNEL__
8-
/* I think i could have done better macros ; for now this is stolen from
9-
* some arch/mips code - jhs
10-
*/
11-
#define _TC_MAKE32(x) ((x))
12-
13-
#define _TC_MAKEMASK1(n) (_TC_MAKE32(1) << _TC_MAKE32(n))
14-
#define _TC_MAKEMASK(v,n) (_TC_MAKE32((_TC_MAKE32(1)<<(v))-1) << _TC_MAKE32(n))
15-
#define _TC_MAKEVALUE(v,n) (_TC_MAKE32(v) << _TC_MAKE32(n))
16-
#define _TC_GETVALUE(v,n,m) ((_TC_MAKE32(v) & _TC_MAKE32(m)) >> _TC_MAKE32(n))
17-
18-
/* verdict bit breakdown
19-
*
20-
bit 0: when set -> this packet has been munged already
21-
22-
bit 1: when set -> It is ok to munge this packet
23-
24-
bit 2,3,4,5: Reclassify counter - sort of reverse TTL - if exceeded
25-
assume loop
26-
27-
bit 6,7: Where this packet was last seen
28-
0: Above the transmit example at the socket level
29-
1: on the Ingress
30-
2: on the Egress
31-
32-
bit 8: when set --> Request not to classify on ingress.
33-
34-
bits 9,10,11: redirect counter - redirect TTL. Loop avoidance
35-
36-
*
37-
* */
38-
39-
#define S_TC_FROM _TC_MAKE32(6)
40-
#define M_TC_FROM _TC_MAKEMASK(2,S_TC_FROM)
41-
#define G_TC_FROM(x) _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM)
42-
#define V_TC_FROM(x) _TC_MAKEVALUE(x,S_TC_FROM)
43-
#define SET_TC_FROM(v,n) ((V_TC_FROM(n)) | (v & ~M_TC_FROM))
44-
#define AT_STACK 0x0
45-
#define AT_INGRESS 0x1
46-
#define AT_EGRESS 0x2
47-
48-
#define TC_NCLS _TC_MAKEMASK1(8)
49-
#define SET_TC_NCLS(v) ( TC_NCLS | (v & ~TC_NCLS))
50-
#define CLR_TC_NCLS(v) ( v & ~TC_NCLS)
51-
52-
#define S_TC_AT _TC_MAKE32(12)
53-
#define M_TC_AT _TC_MAKEMASK(2,S_TC_AT)
54-
#define G_TC_AT(x) _TC_GETVALUE(x,S_TC_AT,M_TC_AT)
55-
#define V_TC_AT(x) _TC_MAKEVALUE(x,S_TC_AT)
56-
#define SET_TC_AT(v,n) ((V_TC_AT(n)) | (v & ~M_TC_AT))
57-
58-
#define MAX_REC_LOOP 4
59-
#define MAX_RED_LOOP 4
60-
#endif
61-
627
/* Action attributes */
638
enum {
649
TCA_ACT_UNSPEC,

net/core/dev.c

+7-15
Original file line numberDiff line numberDiff line change
@@ -3153,9 +3153,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
31533153
if (!cl)
31543154
return skb;
31553155

3156-
/* skb->tc_verd and qdisc_skb_cb(skb)->pkt_len were already set
3157-
* earlier by the caller.
3158-
*/
3156+
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
31593157
qdisc_bstats_cpu_update(cl->q, skb);
31603158

31613159
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -3320,7 +3318,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
33203318

33213319
qdisc_pkt_len_init(skb);
33223320
#ifdef CONFIG_NET_CLS_ACT
3323-
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
3321+
skb->tc_at_ingress = 0;
33243322
# ifdef CONFIG_NET_EGRESS
33253323
if (static_key_false(&egress_needed)) {
33263324
skb = sch_handle_egress(skb, &rc, dev);
@@ -3920,7 +3918,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
39203918
}
39213919

39223920
qdisc_skb_cb(skb)->pkt_len = skb->len;
3923-
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3921+
skb->tc_at_ingress = 1;
39243922
qdisc_bstats_cpu_update(cl->q, skb);
39253923

39263924
switch (tc_classify(skb, cl, &cl_res, false)) {
@@ -4093,12 +4091,8 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
40934091
goto out;
40944092
}
40954093

4096-
#ifdef CONFIG_NET_CLS_ACT
4097-
if (skb->tc_verd & TC_NCLS) {
4098-
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
4099-
goto ncls;
4100-
}
4101-
#endif
4094+
if (skb_skip_tc_classify(skb))
4095+
goto skip_classify;
41024096

41034097
if (pfmemalloc)
41044098
goto skip_taps;
@@ -4126,10 +4120,8 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
41264120
goto out;
41274121
}
41284122
#endif
4129-
#ifdef CONFIG_NET_CLS_ACT
4130-
skb->tc_verd = 0;
4131-
ncls:
4132-
#endif
4123+
skb_reset_tc(skb);
4124+
skip_classify:
41334125
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
41344126
goto drop;
41354127

net/core/pktgen.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -3439,9 +3439,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
34393439
/* skb was 'freed' by stack, so clean few
34403440
* bits and reuse it
34413441
*/
3442-
#ifdef CONFIG_NET_CLS_ACT
3443-
skb->tc_verd = 0; /* reset reclass/redir ttl */
3444-
#endif
3442+
skb_reset_tc(skb);
34453443
} while (--burst > 0);
34463444
goto out; /* Skips xmit_mode M_START_XMIT */
34473445
} else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) {

net/core/skbuff.c

-3
Original file line numberDiff line numberDiff line change
@@ -878,9 +878,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
878878
#endif
879879
#ifdef CONFIG_NET_SCHED
880880
CHECK_SKB_FIELD(tc_index);
881-
#ifdef CONFIG_NET_CLS_ACT
882-
CHECK_SKB_FIELD(tc_verd);
883-
#endif
884881
#endif
885882

886883
}

net/sched/act_api.c

+4-7
Original file line numberDiff line numberDiff line change
@@ -426,11 +426,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
426426
{
427427
int ret = -1, i;
428428

429-
if (skb->tc_verd & TC_NCLS) {
430-
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
431-
ret = TC_ACT_OK;
432-
goto exec_done;
433-
}
429+
if (skb_skip_tc_classify(skb))
430+
return TC_ACT_OK;
431+
434432
for (i = 0; i < nr_actions; i++) {
435433
const struct tc_action *a = actions[i];
436434

@@ -439,9 +437,8 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
439437
if (ret == TC_ACT_REPEAT)
440438
goto repeat; /* we need a ttl - JHS */
441439
if (ret != TC_ACT_PIPE)
442-
goto exec_done;
440+
break;
443441
}
444-
exec_done:
445442
return ret;
446443
}
447444
EXPORT_SYMBOL(tcf_action_exec);

net/sched/act_ife.c

+3-4
Original file line numberDiff line numberDiff line change
@@ -736,12 +736,11 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
736736
u16 metalen = ife_get_sz(skb, ife);
737737
int hdrm = metalen + skb->dev->hard_header_len + IFE_METAHDRLEN;
738738
unsigned int skboff = skb->dev->hard_header_len;
739-
u32 at = G_TC_AT(skb->tc_verd);
740739
int new_len = skb->len + hdrm;
741740
bool exceed_mtu = false;
742741
int err;
743742

744-
if (at & AT_EGRESS) {
743+
if (!skb_at_tc_ingress(skb)) {
745744
if (new_len > skb->dev->mtu)
746745
exceed_mtu = true;
747746
}
@@ -773,7 +772,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
773772
return TC_ACT_SHOT;
774773
}
775774

776-
if (!(at & AT_EGRESS))
775+
if (skb_at_tc_ingress(skb))
777776
skb_push(skb, skb->dev->hard_header_len);
778777

779778
iethh = (struct ethhdr *)skb->data;
@@ -816,7 +815,7 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
816815
ether_addr_copy(oethh->h_dest, iethh->h_dest);
817816
oethh->h_proto = htons(ife->eth_type);
818817

819-
if (!(at & AT_EGRESS))
818+
if (skb_at_tc_ingress(skb))
820819
skb_pull(skb, skb->dev->hard_header_len);
821820

822821
spin_unlock(&ife->tcf_lock);

0 commit comments

Comments
 (0)