Skip to content

Commit 66f5d6c

Browse files
tracywwnjdavem330
authored andcommitted
ipv6: replace rwlock with rcu and spinlock in fib6_table
With all the preparation work before, we are now ready to replace rwlock with rcu and spinlock in fib6_table. That means now all fib6_node in fib6_table are protected by rcu. And when freeing fib6_node, call_rcu() is used to wait for the rcu grace period before releasing the memory. When accessing fib6_node, corresponding rcu APIs need to be used. And all previous sessions protected by the write lock will now be protected by the spin lock per table. All previous sessions protected by read lock will now be protected by rcu_read_lock(). A couple of things to note here: 1. As part of the work of replacing rwlock with rcu, the linked list of fn->leaf now has to be rcu protected as well. So both fn->leaf and rt->dst.rt6_next are now __rcu tagged and corresponding rcu APIs are used when manipulating them. 2. For fn->rr_ptr, first of all, it also needs to be rcu protected now and is tagged with __rcu and rcu APIs are used in corresponding places. Secondly, fn->rr_ptr is changed in rt6_select() which is a reader thread. This makes the issue a bit complicated. We think a valid solution for it is to let rt6_select() grab the tb6_lock if it decides to change it. As it is not in the normal operation and only happens when there is no valid neighbor cache for the route, we think the performance impact should be low. 3. fib6_walk_continue() has to be called with tb6_lock held even in the route dumping related functions, e.g. inet6_dump_fib(), fib6_tables_dump() and ipv6_route_seq_ops. It is because fib6_walk_continue() makes modifications to the walker structure, and so are fib6_repair_tree() and fib6_del_route(). In order to do proper syncing between them, we need to let fib6_walk_continue() hold the lock. We may be able to do further improvement on the way we do the tree walk to get rid of the need for holding the spin lock. But not for now. 4. When fib6_del_route() removes a route from the tree, we no longer mark rt->dst.rt6_next to NULL to make simultaneous reader be able to further traverse the list with rcu. However, rt->dst.rt6_next is only valid within this same rcu period. No one should access it later. 5. All the operation of atomic_inc(rt->rt6i_ref) is changed to be performed before we publish this route (either by linking it to fn->leaf or insert it in the list pointed by fn->leaf) just to be safe because as soon as we publish the route, some read thread will be able to access it. Signed-off-by: Wei Wang <weiwan@google.com> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 17ecf59 commit 66f5d6c

File tree

5 files changed

+333
-230
lines changed

5 files changed

+333
-230
lines changed

include/net/dst.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ struct dst_entry {
101101
union {
102102
struct dst_entry *next;
103103
struct rtable __rcu *rt_next;
104-
struct rt6_info *rt6_next;
104+
struct rt6_info __rcu *rt6_next;
105105
struct dn_route __rcu *dn_next;
106106
};
107107
};

include/net/ip6_fib.h

+16-8
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,18 @@ struct fib6_config {
6868
};
6969

7070
struct fib6_node {
71-
struct fib6_node *parent;
72-
struct fib6_node *left;
73-
struct fib6_node *right;
71+
struct fib6_node __rcu *parent;
72+
struct fib6_node __rcu *left;
73+
struct fib6_node __rcu *right;
7474
#ifdef CONFIG_IPV6_SUBTREES
75-
struct fib6_node *subtree;
75+
struct fib6_node __rcu *subtree;
7676
#endif
77-
struct rt6_info *leaf;
77+
struct rt6_info __rcu *leaf;
7878

7979
__u16 fn_bit; /* bit key */
8080
__u16 fn_flags;
8181
int fn_sernum;
82-
struct rt6_info *rr_ptr;
82+
struct rt6_info __rcu *rr_ptr;
8383
struct rcu_head rcu;
8484
};
8585

@@ -91,7 +91,7 @@ struct fib6_gc_args {
9191
#ifndef CONFIG_IPV6_SUBTREES
9292
#define FIB6_SUBTREE(fn) NULL
9393
#else
94-
#define FIB6_SUBTREE(fn) ((fn)->subtree)
94+
#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
9595
#endif
9696

9797
struct mx6_config {
@@ -174,6 +174,14 @@ struct rt6_info {
174174
unused:7;
175175
};
176176

177+
#define for_each_fib6_node_rt_rcu(fn) \
178+
for (rt = rcu_dereference((fn)->leaf); rt; \
179+
rt = rcu_dereference(rt->dst.rt6_next))
180+
181+
#define for_each_fib6_walker_rt(w) \
182+
for (rt = (w)->leaf; rt; \
183+
rt = rcu_dereference_protected(rt->dst.rt6_next, 1))
184+
177185
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
178186
{
179187
return ((struct rt6_info *)dst)->rt6i_idev;
@@ -310,7 +318,7 @@ struct rt6_statistics {
310318
struct fib6_table {
311319
struct hlist_node tb6_hlist;
312320
u32 tb6_id;
313-
rwlock_t tb6_lock;
321+
spinlock_t tb6_lock;
314322
struct fib6_node tb6_root;
315323
struct inet_peer_base tb6_peers;
316324
unsigned int flags;

net/ipv6/addrconf.c

+5-6
Original file line numberDiff line numberDiff line change
@@ -2321,12 +2321,12 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
23212321
if (!table)
23222322
return NULL;
23232323

2324-
read_lock_bh(&table->tb6_lock);
2324+
rcu_read_lock();
23252325
fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
23262326
if (!fn)
23272327
goto out;
23282328

2329-
for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2329+
for_each_fib6_node_rt_rcu(fn) {
23302330
if (rt->dst.dev->ifindex != dev->ifindex)
23312331
continue;
23322332
if ((rt->rt6i_flags & flags) != flags)
@@ -2338,7 +2338,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
23382338
break;
23392339
}
23402340
out:
2341-
read_unlock_bh(&table->tb6_lock);
2341+
rcu_read_unlock();
23422342
return rt;
23432343
}
23442344

@@ -5898,10 +5898,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
58985898
spin_lock(&ifa->lock);
58995899
if (ifa->rt) {
59005900
struct rt6_info *rt = ifa->rt;
5901-
struct fib6_table *table = rt->rt6i_table;
59025901
int cpu;
59035902

5904-
read_lock(&table->tb6_lock);
5903+
rcu_read_lock();
59055904
addrconf_set_nopolicy(ifa->rt, val);
59065905
if (rt->rt6i_pcpu) {
59075906
for_each_possible_cpu(cpu) {
@@ -5911,7 +5910,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
59115910
addrconf_set_nopolicy(*rtp, val);
59125911
}
59135912
}
5914-
read_unlock(&table->tb6_lock);
5913+
rcu_read_unlock();
59155914
}
59165915
spin_unlock(&ifa->lock);
59175916
}

0 commit comments

Comments
 (0)