Skip to content

Commit 622cdb9

Browse files
Paolo Abenigregkh
Paolo Abeni
authored andcommitted
netfilter: on sockopt() acquire sock lock only in the required scope
commit 3f34cfa upstream. Syzbot reported several deadlocks in the netfilter area caused by rtnl lock and socket lock being acquired with a different order on different code paths, leading to backtraces like the following one: ====================================================== WARNING: possible circular locking dependency detected 4.15.0-rc9+ torvalds#212 Not tainted ------------------------------------------------------ syzkaller041579/3682 is trying to acquire lock: (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] lock_sock include/net/sock.h:1463 [inline] (sk_lock-AF_INET6){+.+.}, at: [<000000008775e4dd>] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 but task is already holding lock: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (rtnl_mutex){+.+.}: __mutex_lock_common kernel/locking/mutex.c:756 [inline] __mutex_lock+0x16f/0x1a80 kernel/locking/mutex.c:893 mutex_lock_nested+0x16/0x20 kernel/locking/mutex.c:908 rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 register_netdevice_notifier+0xad/0x860 net/core/dev.c:1607 tee_tg_check+0x1a0/0x280 net/netfilter/xt_TEE.c:106 xt_check_target+0x22c/0x7d0 net/netfilter/x_tables.c:845 check_target net/ipv6/netfilter/ip6_tables.c:538 [inline] find_check_entry.isra.7+0x935/0xcf0 net/ipv6/netfilter/ip6_tables.c:580 translate_table+0xf52/0x1690 net/ipv6/netfilter/ip6_tables.c:749 do_replace net/ipv6/netfilter/ip6_tables.c:1165 [inline] do_ip6t_set_ctl+0x370/0x5f0 net/ipv6/netfilter/ip6_tables.c:1691 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ipv6_setsockopt+0x115/0x150 net/ipv6/ipv6_sockglue.c:928 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 -> #0 (sk_lock-AF_INET6){+.+.}: lock_acquire+0x1d5/0x580 kernel/locking/lockdep.c:3914 lock_sock_nested+0xc2/0x110 net/core/sock.c:2780 lock_sock include/net/sock.h:1463 [inline] do_ipv6_setsockopt.isra.8+0x3c5/0x39d0 net/ipv6/ipv6_sockglue.c:167 ipv6_setsockopt+0xd7/0x150 net/ipv6/ipv6_sockglue.c:922 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2978 SYSC_setsockopt net/socket.c:1849 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1828 entry_SYSCALL_64_fastpath+0x29/0xa0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(rtnl_mutex); lock(sk_lock-AF_INET6); lock(rtnl_mutex); lock(sk_lock-AF_INET6); *** DEADLOCK *** 1 lock held by syzkaller041579/3682: #0: (rtnl_mutex){+.+.}, at: [<000000004342eaa9>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 The problem, as Florian noted, is that nf_setsockopt() is always called with the socket held, even if the lock itself is required only for very tight scopes and only for some operation. This patch addresses the issues moving the lock_sock() call only where really needed, namely in ipv*_getorigdst(), so that nf_setsockopt() does not need anymore to acquire both locks. Fixes: 22265a5 ("netfilter: xt_TEE: resolve oif using netdevice notifiers") Reported-by: syzbot+a4c2dc980ac1af699b36@syzkaller.appspotmail.com Suggested-by: Florian Westphal <fw@strlen.de> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 1eda32c commit 622cdb9

File tree

4 files changed

+26
-29
lines changed

4 files changed

+26
-29
lines changed

net/ipv4/ip_sockglue.c

+4-10
Original file line numberDiff line numberDiff line change
@@ -1221,11 +1221,8 @@ int ip_setsockopt(struct sock *sk, int level,
12211221
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
12221222
optname != IP_IPSEC_POLICY &&
12231223
optname != IP_XFRM_POLICY &&
1224-
!ip_mroute_opt(optname)) {
1225-
lock_sock(sk);
1224+
!ip_mroute_opt(optname))
12261225
err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1227-
release_sock(sk);
1228-
}
12291226
#endif
12301227
return err;
12311228
}
@@ -1250,12 +1247,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
12501247
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
12511248
optname != IP_IPSEC_POLICY &&
12521249
optname != IP_XFRM_POLICY &&
1253-
!ip_mroute_opt(optname)) {
1254-
lock_sock(sk);
1255-
err = compat_nf_setsockopt(sk, PF_INET, optname,
1256-
optval, optlen);
1257-
release_sock(sk);
1258-
}
1250+
!ip_mroute_opt(optname))
1251+
err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
1252+
optlen);
12591253
#endif
12601254
return err;
12611255
}

net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c

+5-1
Original file line numberDiff line numberDiff line change
@@ -259,15 +259,19 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
259259
struct nf_conntrack_tuple tuple;
260260

261261
memset(&tuple, 0, sizeof(tuple));
262+
263+
lock_sock(sk);
262264
tuple.src.u3.ip = inet->inet_rcv_saddr;
263265
tuple.src.u.tcp.port = inet->inet_sport;
264266
tuple.dst.u3.ip = inet->inet_daddr;
265267
tuple.dst.u.tcp.port = inet->inet_dport;
266268
tuple.src.l3num = PF_INET;
267269
tuple.dst.protonum = sk->sk_protocol;
270+
release_sock(sk);
268271

269272
/* We only do TCP and SCTP at the moment: is there a better way? */
270-
if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
273+
if (tuple.dst.protonum != IPPROTO_TCP &&
274+
tuple.dst.protonum != IPPROTO_SCTP) {
271275
pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
272276
return -ENOPROTOOPT;
273277
}

net/ipv6/ipv6_sockglue.c

+5-12
Original file line numberDiff line numberDiff line change
@@ -905,12 +905,8 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
905905
#ifdef CONFIG_NETFILTER
906906
/* we need to exclude all possible ENOPROTOOPTs except default case */
907907
if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
908-
optname != IPV6_XFRM_POLICY) {
909-
lock_sock(sk);
910-
err = nf_setsockopt(sk, PF_INET6, optname, optval,
911-
optlen);
912-
release_sock(sk);
913-
}
908+
optname != IPV6_XFRM_POLICY)
909+
err = nf_setsockopt(sk, PF_INET6, optname, optval, optlen);
914910
#endif
915911
return err;
916912
}
@@ -940,12 +936,9 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
940936
#ifdef CONFIG_NETFILTER
941937
/* we need to exclude all possible ENOPROTOOPTs except default case */
942938
if (err == -ENOPROTOOPT && optname != IPV6_IPSEC_POLICY &&
943-
optname != IPV6_XFRM_POLICY) {
944-
lock_sock(sk);
945-
err = compat_nf_setsockopt(sk, PF_INET6, optname,
946-
optval, optlen);
947-
release_sock(sk);
948-
}
939+
optname != IPV6_XFRM_POLICY)
940+
err = compat_nf_setsockopt(sk, PF_INET6, optname, optval,
941+
optlen);
949942
#endif
950943
return err;
951944
}

net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c

+12-6
Original file line numberDiff line numberDiff line change
@@ -226,20 +226,27 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
226226
static int
227227
ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
228228
{
229-
const struct inet_sock *inet = inet_sk(sk);
229+
struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
230230
const struct ipv6_pinfo *inet6 = inet6_sk(sk);
231+
const struct inet_sock *inet = inet_sk(sk);
231232
const struct nf_conntrack_tuple_hash *h;
232233
struct sockaddr_in6 sin6;
233-
struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
234234
struct nf_conn *ct;
235+
__be32 flow_label;
236+
int bound_dev_if;
235237

238+
lock_sock(sk);
236239
tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
237240
tuple.src.u.tcp.port = inet->inet_sport;
238241
tuple.dst.u3.in6 = sk->sk_v6_daddr;
239242
tuple.dst.u.tcp.port = inet->inet_dport;
240243
tuple.dst.protonum = sk->sk_protocol;
244+
bound_dev_if = sk->sk_bound_dev_if;
245+
flow_label = inet6->flow_label;
246+
release_sock(sk);
241247

242-
if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP)
248+
if (tuple.dst.protonum != IPPROTO_TCP &&
249+
tuple.dst.protonum != IPPROTO_SCTP)
243250
return -ENOPROTOOPT;
244251

245252
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
@@ -257,14 +264,13 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
257264

258265
sin6.sin6_family = AF_INET6;
259266
sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
260-
sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK;
267+
sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
261268
memcpy(&sin6.sin6_addr,
262269
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
263270
sizeof(sin6.sin6_addr));
264271

265272
nf_ct_put(ct);
266-
sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
267-
sk->sk_bound_dev_if);
273+
sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
268274
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
269275
}
270276

0 commit comments

Comments
 (0)