Skip to content

Commit 49d11be

Browse files
josefbacikkdave
authored andcommitted
btrfs: add a helper to read the tree_root commit root for backref lookup
I got the following lockdep splat with tree locks converted to rwsem patches on btrfs/104: ====================================================== WARNING: possible circular locking dependency detected 5.9.0+ torvalds#102 Not tainted ------------------------------------------------------ btrfs-cleaner/903 is trying to acquire lock: ffff8e7fab6ffe30 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x32/0x170 but task is already holding lock: ffff8e7fab628a88 (&fs_info->commit_root_sem){++++}-{3:3}, at: btrfs_find_all_roots+0x41/0x80 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&fs_info->commit_root_sem){++++}-{3:3}: down_read+0x40/0x130 caching_thread+0x53/0x5a0 btrfs_work_helper+0xfa/0x520 process_one_work+0x238/0x540 worker_thread+0x55/0x3c0 kthread+0x13a/0x150 ret_from_fork+0x1f/0x30 -> #2 (&caching_ctl->mutex){+.+.}-{3:3}: __mutex_lock+0x7e/0x7b0 btrfs_cache_block_group+0x1e0/0x510 find_free_extent+0xb6e/0x12f0 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb1/0x330 alloc_tree_block_no_bg_flush+0x4f/0x60 __btrfs_cow_block+0x11d/0x580 btrfs_cow_block+0x10c/0x220 commit_cowonly_roots+0x47/0x2e0 btrfs_commit_transaction+0x595/0xbd0 sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 deactivate_locked_super+0x36/0xa0 cleanup_mnt+0x12d/0x190 task_work_run+0x5c/0xa0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x54/0x280 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (&space_info->groups_sem){++++}-{3:3}: down_read+0x40/0x130 find_free_extent+0x2ed/0x12f0 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb1/0x330 alloc_tree_block_no_bg_flush+0x4f/0x60 __btrfs_cow_block+0x11d/0x580 btrfs_cow_block+0x10c/0x220 commit_cowonly_roots+0x47/0x2e0 btrfs_commit_transaction+0x595/0xbd0 sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 deactivate_locked_super+0x36/0xa0 cleanup_mnt+0x12d/0x190 task_work_run+0x5c/0xa0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x54/0x280 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (btrfs-root-00){++++}-{3:3}: __lock_acquire+0x1167/0x2150 lock_acquire+0xb9/0x3d0 down_read_nested+0x43/0x130 __btrfs_tree_read_lock+0x32/0x170 __btrfs_read_lock_root_node+0x3a/0x50 btrfs_search_slot+0x614/0x9d0 btrfs_find_root+0x35/0x1b0 btrfs_read_tree_root+0x61/0x120 btrfs_get_root_ref+0x14b/0x600 find_parent_nodes+0x3e6/0x1b30 btrfs_find_all_roots_safe+0xb4/0x130 btrfs_find_all_roots+0x60/0x80 btrfs_qgroup_trace_extent_post+0x27/0x40 btrfs_add_delayed_data_ref+0x3fd/0x460 btrfs_free_extent+0x42/0x100 __btrfs_mod_ref+0x1d7/0x2f0 walk_up_proc+0x11c/0x400 walk_up_tree+0xf0/0x180 btrfs_drop_snapshot+0x1c7/0x780 btrfs_clean_one_deleted_snapshot+0xfb/0x110 cleaner_kthread+0xd4/0x140 kthread+0x13a/0x150 ret_from_fork+0x1f/0x30 other info that might help us debug this: Chain exists of: btrfs-root-00 --> &caching_ctl->mutex --> &fs_info->commit_root_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fs_info->commit_root_sem); lock(&caching_ctl->mutex); lock(&fs_info->commit_root_sem); lock(btrfs-root-00); *** DEADLOCK *** 3 locks held by btrfs-cleaner/903: #0: ffff8e7fab628838 (&fs_info->cleaner_mutex){+.+.}-{3:3}, at: cleaner_kthread+0x6e/0x140 #1: ffff8e7faadac640 (sb_internal){.+.+}-{0:0}, at: start_transaction+0x40b/0x5c0 #2: ffff8e7fab628a88 (&fs_info->commit_root_sem){++++}-{3:3}, at: btrfs_find_all_roots+0x41/0x80 stack backtrace: CPU: 0 PID: 903 Comm: btrfs-cleaner Not tainted 5.9.0+ torvalds#102 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x8b/0xb0 check_noncircular+0xcf/0xf0 __lock_acquire+0x1167/0x2150 ? __bfs+0x42/0x210 lock_acquire+0xb9/0x3d0 ? __btrfs_tree_read_lock+0x32/0x170 down_read_nested+0x43/0x130 ? __btrfs_tree_read_lock+0x32/0x170 __btrfs_tree_read_lock+0x32/0x170 __btrfs_read_lock_root_node+0x3a/0x50 btrfs_search_slot+0x614/0x9d0 ? find_held_lock+0x2b/0x80 btrfs_find_root+0x35/0x1b0 ? do_raw_spin_unlock+0x4b/0xa0 btrfs_read_tree_root+0x61/0x120 btrfs_get_root_ref+0x14b/0x600 find_parent_nodes+0x3e6/0x1b30 btrfs_find_all_roots_safe+0xb4/0x130 btrfs_find_all_roots+0x60/0x80 btrfs_qgroup_trace_extent_post+0x27/0x40 btrfs_add_delayed_data_ref+0x3fd/0x460 btrfs_free_extent+0x42/0x100 __btrfs_mod_ref+0x1d7/0x2f0 walk_up_proc+0x11c/0x400 walk_up_tree+0xf0/0x180 btrfs_drop_snapshot+0x1c7/0x780 ? btrfs_clean_one_deleted_snapshot+0x73/0x110 btrfs_clean_one_deleted_snapshot+0xfb/0x110 cleaner_kthread+0xd4/0x140 ? btrfs_alloc_root+0x50/0x50 kthread+0x13a/0x150 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x1f/0x30 BTRFS info (device sdb): disk space caching is enabled BTRFS info (device sdb): has skinny extents This happens because qgroups does a backref lookup when we create a delayed ref. From here it may have to look up a root from an indirect ref, which does a normal lookup on the tree_root, which takes the read lock on the tree_root nodes. To fix this we need to add a variant for looking up roots that searches the commit root of the tree_root. Then when we do the backref search using the commit root we are sure to not take any locks on the tree_root nodes. This gets rid of the lockdep splat when running btrfs/104. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 5223cc6 commit 49d11be

File tree

3 files changed

+114
-41
lines changed

3 files changed

+114
-41
lines changed

fs/btrfs/backref.c

+12-1
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
544544
int level = ref->level;
545545
struct btrfs_key search_key = ref->key_for_search;
546546

547-
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
547+
/*
548+
* If we're search_commit_root we could possibly be holding locks on
549+
* other tree nodes. This happens when qgroups does backref walks when
550+
* adding new delayed refs. To deal with this we need to look in cache
551+
* for the root, and if we don't find it then we need to search the
552+
* tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
553+
* here.
554+
*/
555+
if (path->search_commit_root)
556+
root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
557+
else
558+
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
548559
if (IS_ERR(root)) {
549560
ret = PTR_ERR(root);
550561
goto out_free;

fs/btrfs/disk-io.c

+99-40
Original file line numberDiff line numberDiff line change
@@ -1281,32 +1281,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
12811281
return 0;
12821282
}
12831283

1284-
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1285-
struct btrfs_key *key)
1284+
static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
1285+
struct btrfs_path *path,
1286+
struct btrfs_key *key)
12861287
{
12871288
struct btrfs_root *root;
12881289
struct btrfs_fs_info *fs_info = tree_root->fs_info;
1289-
struct btrfs_path *path;
12901290
u64 generation;
12911291
int ret;
12921292
int level;
12931293

1294-
path = btrfs_alloc_path();
1295-
if (!path)
1296-
return ERR_PTR(-ENOMEM);
1297-
12981294
root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
1299-
if (!root) {
1300-
ret = -ENOMEM;
1301-
goto alloc_fail;
1302-
}
1295+
if (!root)
1296+
return ERR_PTR(-ENOMEM);
13031297

13041298
ret = btrfs_find_root(tree_root, key, path,
13051299
&root->root_item, &root->root_key);
13061300
if (ret) {
13071301
if (ret > 0)
13081302
ret = -ENOENT;
1309-
goto find_fail;
1303+
goto fail;
13101304
}
13111305

13121306
generation = btrfs_root_generation(&root->root_item);
@@ -1317,21 +1311,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
13171311
if (IS_ERR(root->node)) {
13181312
ret = PTR_ERR(root->node);
13191313
root->node = NULL;
1320-
goto find_fail;
1314+
goto fail;
13211315
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
13221316
ret = -EIO;
1323-
goto find_fail;
1317+
goto fail;
13241318
}
13251319
root->commit_root = btrfs_root_node(root);
1326-
out:
1327-
btrfs_free_path(path);
13281320
return root;
1329-
1330-
find_fail:
1321+
fail:
13311322
btrfs_put_root(root);
1332-
alloc_fail:
1333-
root = ERR_PTR(ret);
1334-
goto out;
1323+
return ERR_PTR(ret);
1324+
}
1325+
1326+
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1327+
struct btrfs_key *key)
1328+
{
1329+
struct btrfs_root *root;
1330+
struct btrfs_path *path;
1331+
1332+
path = btrfs_alloc_path();
1333+
if (!path)
1334+
return ERR_PTR(-ENOMEM);
1335+
root = read_tree_root_path(tree_root, path, key);
1336+
btrfs_free_path(path);
1337+
1338+
return root;
13351339
}
13361340

13371341
/*
@@ -1419,6 +1423,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
14191423
return root;
14201424
}
14211425

1426+
static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
1427+
u64 objectid)
1428+
{
1429+
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
1430+
return btrfs_grab_root(fs_info->tree_root);
1431+
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
1432+
return btrfs_grab_root(fs_info->extent_root);
1433+
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
1434+
return btrfs_grab_root(fs_info->chunk_root);
1435+
if (objectid == BTRFS_DEV_TREE_OBJECTID)
1436+
return btrfs_grab_root(fs_info->dev_root);
1437+
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
1438+
return btrfs_grab_root(fs_info->csum_root);
1439+
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
1440+
return btrfs_grab_root(fs_info->quota_root) ?
1441+
fs_info->quota_root : ERR_PTR(-ENOENT);
1442+
if (objectid == BTRFS_UUID_TREE_OBJECTID)
1443+
return btrfs_grab_root(fs_info->uuid_root) ?
1444+
fs_info->uuid_root : ERR_PTR(-ENOENT);
1445+
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
1446+
return btrfs_grab_root(fs_info->free_space_root) ?
1447+
fs_info->free_space_root : ERR_PTR(-ENOENT);
1448+
return NULL;
1449+
}
1450+
14221451
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
14231452
struct btrfs_root *root)
14241453
{
@@ -1518,25 +1547,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
15181547
struct btrfs_key key;
15191548
int ret;
15201549

1521-
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
1522-
return btrfs_grab_root(fs_info->tree_root);
1523-
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
1524-
return btrfs_grab_root(fs_info->extent_root);
1525-
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
1526-
return btrfs_grab_root(fs_info->chunk_root);
1527-
if (objectid == BTRFS_DEV_TREE_OBJECTID)
1528-
return btrfs_grab_root(fs_info->dev_root);
1529-
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
1530-
return btrfs_grab_root(fs_info->csum_root);
1531-
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
1532-
return btrfs_grab_root(fs_info->quota_root) ?
1533-
fs_info->quota_root : ERR_PTR(-ENOENT);
1534-
if (objectid == BTRFS_UUID_TREE_OBJECTID)
1535-
return btrfs_grab_root(fs_info->uuid_root) ?
1536-
fs_info->uuid_root : ERR_PTR(-ENOENT);
1537-
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
1538-
return btrfs_grab_root(fs_info->free_space_root) ?
1539-
fs_info->free_space_root : ERR_PTR(-ENOENT);
1550+
root = btrfs_get_global_root(fs_info, objectid);
1551+
if (root)
1552+
return root;
15401553
again:
15411554
root = btrfs_lookup_fs_root(fs_info, objectid);
15421555
if (root) {
@@ -1621,6 +1634,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
16211634
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
16221635
}
16231636

1637+
/*
1638+
* btrfs_get_fs_root_commit_root - return a root for the given objectid
1639+
* @fs_info: the fs_info
1640+
* @objectid: the objectid we need to lookup
1641+
*
1642+
* This is exclusively used for backref walking, and exists specifically because
1643+
* of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref
1644+
* creation time, which means we may have to read the tree_root in order to look
1645+
* up a fs root that is not in memory. If the root is not in memory we will
1646+
* read the tree root commit root and look up the fs root from there. This is a
1647+
* temporary root, it will not be inserted into the radix tree as it doesn't
1648+
* have the most uptodate information, it'll simply be discarded once the
1649+
* backref code is finished using the root.
1650+
*/
1651+
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
1652+
struct btrfs_path *path,
1653+
u64 objectid)
1654+
{
1655+
struct btrfs_root *root;
1656+
struct btrfs_key key;
1657+
1658+
ASSERT(path->search_commit_root && path->skip_locking);
1659+
1660+
/*
1661+
* This can return -ENOENT if we ask for a root that doesn't exist, but
1662+
* since this is called via the backref walking code we won't be looking
1663+
* up a root that doesn't exist, unless there's corruption. So if root
1664+
* != NULL just return it.
1665+
*/
1666+
root = btrfs_get_global_root(fs_info, objectid);
1667+
if (root)
1668+
return root;
1669+
1670+
root = btrfs_lookup_fs_root(fs_info, objectid);
1671+
if (root)
1672+
return root;
1673+
1674+
key.objectid = objectid;
1675+
key.type = BTRFS_ROOT_ITEM_KEY;
1676+
key.offset = (u64)-1;
1677+
root = read_tree_root_path(fs_info->tree_root, path, &key);
1678+
btrfs_release_path(path);
1679+
1680+
return root;
1681+
}
1682+
16241683
/*
16251684
* called by the kthread helper functions to finally call the bio end_io
16261685
* functions. This is where read checksum verification actually happens

fs/btrfs/disk-io.h

+3
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
6969
u64 objectid, bool check_ref);
7070
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
7171
u64 objectid, dev_t anon_dev);
72+
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
73+
struct btrfs_path *path,
74+
u64 objectid);
7275

7376
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
7477
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);

0 commit comments

Comments
 (0)