Skip to content

Commit 8810488

Browse files
josefbacikgregkh
authored andcommitted
btrfs: add a helper to read the tree_root commit root for backref lookup
[ Upstream commit 49d11be ] I got the following lockdep splat with tree locks converted to rwsem patches on btrfs/104: ====================================================== WARNING: possible circular locking dependency detected 5.9.0+ torvalds#102 Not tainted ------------------------------------------------------ btrfs-cleaner/903 is trying to acquire lock: ffff8e7fab6ffe30 (btrfs-root-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x32/0x170 but task is already holding lock: ffff8e7fab628a88 (&fs_info->commit_root_sem){++++}-{3:3}, at: btrfs_find_all_roots+0x41/0x80 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&fs_info->commit_root_sem){++++}-{3:3}: down_read+0x40/0x130 caching_thread+0x53/0x5a0 btrfs_work_helper+0xfa/0x520 process_one_work+0x238/0x540 worker_thread+0x55/0x3c0 kthread+0x13a/0x150 ret_from_fork+0x1f/0x30 -> #2 (&caching_ctl->mutex){+.+.}-{3:3}: __mutex_lock+0x7e/0x7b0 btrfs_cache_block_group+0x1e0/0x510 find_free_extent+0xb6e/0x12f0 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb1/0x330 alloc_tree_block_no_bg_flush+0x4f/0x60 __btrfs_cow_block+0x11d/0x580 btrfs_cow_block+0x10c/0x220 commit_cowonly_roots+0x47/0x2e0 btrfs_commit_transaction+0x595/0xbd0 sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 deactivate_locked_super+0x36/0xa0 cleanup_mnt+0x12d/0x190 task_work_run+0x5c/0xa0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x54/0x280 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (&space_info->groups_sem){++++}-{3:3}: down_read+0x40/0x130 find_free_extent+0x2ed/0x12f0 btrfs_reserve_extent+0xb3/0x1b0 btrfs_alloc_tree_block+0xb1/0x330 alloc_tree_block_no_bg_flush+0x4f/0x60 __btrfs_cow_block+0x11d/0x580 btrfs_cow_block+0x10c/0x220 commit_cowonly_roots+0x47/0x2e0 btrfs_commit_transaction+0x595/0xbd0 sync_filesystem+0x74/0x90 generic_shutdown_super+0x22/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 deactivate_locked_super+0x36/0xa0 cleanup_mnt+0x12d/0x190 task_work_run+0x5c/0xa0 exit_to_user_mode_prepare+0x1df/0x200 syscall_exit_to_user_mode+0x54/0x280 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (btrfs-root-00){++++}-{3:3}: __lock_acquire+0x1167/0x2150 lock_acquire+0xb9/0x3d0 down_read_nested+0x43/0x130 __btrfs_tree_read_lock+0x32/0x170 __btrfs_read_lock_root_node+0x3a/0x50 btrfs_search_slot+0x614/0x9d0 btrfs_find_root+0x35/0x1b0 btrfs_read_tree_root+0x61/0x120 btrfs_get_root_ref+0x14b/0x600 find_parent_nodes+0x3e6/0x1b30 btrfs_find_all_roots_safe+0xb4/0x130 btrfs_find_all_roots+0x60/0x80 btrfs_qgroup_trace_extent_post+0x27/0x40 btrfs_add_delayed_data_ref+0x3fd/0x460 btrfs_free_extent+0x42/0x100 __btrfs_mod_ref+0x1d7/0x2f0 walk_up_proc+0x11c/0x400 walk_up_tree+0xf0/0x180 btrfs_drop_snapshot+0x1c7/0x780 btrfs_clean_one_deleted_snapshot+0xfb/0x110 cleaner_kthread+0xd4/0x140 kthread+0x13a/0x150 ret_from_fork+0x1f/0x30 other info that might help us debug this: Chain exists of: btrfs-root-00 --> &caching_ctl->mutex --> &fs_info->commit_root_sem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fs_info->commit_root_sem); lock(&caching_ctl->mutex); lock(&fs_info->commit_root_sem); lock(btrfs-root-00); *** DEADLOCK *** 3 locks held by btrfs-cleaner/903: #0: ffff8e7fab628838 (&fs_info->cleaner_mutex){+.+.}-{3:3}, at: cleaner_kthread+0x6e/0x140 #1: ffff8e7faadac640 (sb_internal){.+.+}-{0:0}, at: start_transaction+0x40b/0x5c0 #2: ffff8e7fab628a88 (&fs_info->commit_root_sem){++++}-{3:3}, at: btrfs_find_all_roots+0x41/0x80 stack backtrace: CPU: 0 PID: 903 Comm: btrfs-cleaner Not tainted 5.9.0+ torvalds#102 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 Call Trace: dump_stack+0x8b/0xb0 check_noncircular+0xcf/0xf0 __lock_acquire+0x1167/0x2150 ? __bfs+0x42/0x210 lock_acquire+0xb9/0x3d0 ? __btrfs_tree_read_lock+0x32/0x170 down_read_nested+0x43/0x130 ? __btrfs_tree_read_lock+0x32/0x170 __btrfs_tree_read_lock+0x32/0x170 __btrfs_read_lock_root_node+0x3a/0x50 btrfs_search_slot+0x614/0x9d0 ? find_held_lock+0x2b/0x80 btrfs_find_root+0x35/0x1b0 ? do_raw_spin_unlock+0x4b/0xa0 btrfs_read_tree_root+0x61/0x120 btrfs_get_root_ref+0x14b/0x600 find_parent_nodes+0x3e6/0x1b30 btrfs_find_all_roots_safe+0xb4/0x130 btrfs_find_all_roots+0x60/0x80 btrfs_qgroup_trace_extent_post+0x27/0x40 btrfs_add_delayed_data_ref+0x3fd/0x460 btrfs_free_extent+0x42/0x100 __btrfs_mod_ref+0x1d7/0x2f0 walk_up_proc+0x11c/0x400 walk_up_tree+0xf0/0x180 btrfs_drop_snapshot+0x1c7/0x780 ? btrfs_clean_one_deleted_snapshot+0x73/0x110 btrfs_clean_one_deleted_snapshot+0xfb/0x110 cleaner_kthread+0xd4/0x140 ? btrfs_alloc_root+0x50/0x50 kthread+0x13a/0x150 ? kthread_create_worker_on_cpu+0x40/0x40 ret_from_fork+0x1f/0x30 BTRFS info (device sdb): disk space caching is enabled BTRFS info (device sdb): has skinny extents This happens because qgroups does a backref lookup when we create a delayed ref. From here it may have to look up a root from an indirect ref, which does a normal lookup on the tree_root, which takes the read lock on the tree_root nodes. To fix this we need to add a variant for looking up roots that searches the commit root of the tree_root. Then when we do the backref search using the commit root we are sure to not take any locks on the tree_root nodes. This gets rid of the lockdep splat when running btrfs/104. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent ecce1be commit 8810488

File tree

3 files changed

+114
-41
lines changed

3 files changed

+114
-41
lines changed

fs/btrfs/backref.c

+12-1
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,18 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
544544
int level = ref->level;
545545
struct btrfs_key search_key = ref->key_for_search;
546546

547-
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
547+
/*
548+
* If we're search_commit_root we could possibly be holding locks on
549+
* other tree nodes. This happens when qgroups does backref walks when
550+
* adding new delayed refs. To deal with this we need to look in cache
551+
* for the root, and if we don't find it then we need to search the
552+
* tree_root's commit root, thus the btrfs_get_fs_root_commit_root usage
553+
* here.
554+
*/
555+
if (path->search_commit_root)
556+
root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id);
557+
else
558+
root = btrfs_get_fs_root(fs_info, ref->root_id, false);
548559
if (IS_ERR(root)) {
549560
ret = PTR_ERR(root);
550561
goto out_free;

fs/btrfs/disk-io.c

+99-40
Original file line numberDiff line numberDiff line change
@@ -1338,32 +1338,26 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
13381338
return 0;
13391339
}
13401340

1341-
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1342-
struct btrfs_key *key)
1341+
static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
1342+
struct btrfs_path *path,
1343+
struct btrfs_key *key)
13431344
{
13441345
struct btrfs_root *root;
13451346
struct btrfs_fs_info *fs_info = tree_root->fs_info;
1346-
struct btrfs_path *path;
13471347
u64 generation;
13481348
int ret;
13491349
int level;
13501350

1351-
path = btrfs_alloc_path();
1352-
if (!path)
1353-
return ERR_PTR(-ENOMEM);
1354-
13551351
root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
1356-
if (!root) {
1357-
ret = -ENOMEM;
1358-
goto alloc_fail;
1359-
}
1352+
if (!root)
1353+
return ERR_PTR(-ENOMEM);
13601354

13611355
ret = btrfs_find_root(tree_root, key, path,
13621356
&root->root_item, &root->root_key);
13631357
if (ret) {
13641358
if (ret > 0)
13651359
ret = -ENOENT;
1366-
goto find_fail;
1360+
goto fail;
13671361
}
13681362

13691363
generation = btrfs_root_generation(&root->root_item);
@@ -1374,21 +1368,31 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
13741368
if (IS_ERR(root->node)) {
13751369
ret = PTR_ERR(root->node);
13761370
root->node = NULL;
1377-
goto find_fail;
1371+
goto fail;
13781372
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
13791373
ret = -EIO;
1380-
goto find_fail;
1374+
goto fail;
13811375
}
13821376
root->commit_root = btrfs_root_node(root);
1383-
out:
1384-
btrfs_free_path(path);
13851377
return root;
1386-
1387-
find_fail:
1378+
fail:
13881379
btrfs_put_root(root);
1389-
alloc_fail:
1390-
root = ERR_PTR(ret);
1391-
goto out;
1380+
return ERR_PTR(ret);
1381+
}
1382+
1383+
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
1384+
struct btrfs_key *key)
1385+
{
1386+
struct btrfs_root *root;
1387+
struct btrfs_path *path;
1388+
1389+
path = btrfs_alloc_path();
1390+
if (!path)
1391+
return ERR_PTR(-ENOMEM);
1392+
root = read_tree_root_path(tree_root, path, key);
1393+
btrfs_free_path(path);
1394+
1395+
return root;
13921396
}
13931397

13941398
/*
@@ -1476,6 +1480,31 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
14761480
return root;
14771481
}
14781482

1483+
static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
1484+
u64 objectid)
1485+
{
1486+
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
1487+
return btrfs_grab_root(fs_info->tree_root);
1488+
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
1489+
return btrfs_grab_root(fs_info->extent_root);
1490+
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
1491+
return btrfs_grab_root(fs_info->chunk_root);
1492+
if (objectid == BTRFS_DEV_TREE_OBJECTID)
1493+
return btrfs_grab_root(fs_info->dev_root);
1494+
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
1495+
return btrfs_grab_root(fs_info->csum_root);
1496+
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
1497+
return btrfs_grab_root(fs_info->quota_root) ?
1498+
fs_info->quota_root : ERR_PTR(-ENOENT);
1499+
if (objectid == BTRFS_UUID_TREE_OBJECTID)
1500+
return btrfs_grab_root(fs_info->uuid_root) ?
1501+
fs_info->uuid_root : ERR_PTR(-ENOENT);
1502+
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
1503+
return btrfs_grab_root(fs_info->free_space_root) ?
1504+
fs_info->free_space_root : ERR_PTR(-ENOENT);
1505+
return NULL;
1506+
}
1507+
14791508
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
14801509
struct btrfs_root *root)
14811510
{
@@ -1573,25 +1602,9 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
15731602
struct btrfs_key key;
15741603
int ret;
15751604

1576-
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
1577-
return btrfs_grab_root(fs_info->tree_root);
1578-
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
1579-
return btrfs_grab_root(fs_info->extent_root);
1580-
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
1581-
return btrfs_grab_root(fs_info->chunk_root);
1582-
if (objectid == BTRFS_DEV_TREE_OBJECTID)
1583-
return btrfs_grab_root(fs_info->dev_root);
1584-
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
1585-
return btrfs_grab_root(fs_info->csum_root);
1586-
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
1587-
return btrfs_grab_root(fs_info->quota_root) ?
1588-
fs_info->quota_root : ERR_PTR(-ENOENT);
1589-
if (objectid == BTRFS_UUID_TREE_OBJECTID)
1590-
return btrfs_grab_root(fs_info->uuid_root) ?
1591-
fs_info->uuid_root : ERR_PTR(-ENOENT);
1592-
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
1593-
return btrfs_grab_root(fs_info->free_space_root) ?
1594-
fs_info->free_space_root : ERR_PTR(-ENOENT);
1605+
root = btrfs_get_global_root(fs_info, objectid);
1606+
if (root)
1607+
return root;
15951608
again:
15961609
root = btrfs_lookup_fs_root(fs_info, objectid);
15971610
if (root) {
@@ -1676,6 +1689,52 @@ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
16761689
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
16771690
}
16781691

1692+
/*
1693+
* btrfs_get_fs_root_commit_root - return a root for the given objectid
1694+
* @fs_info: the fs_info
1695+
* @objectid: the objectid we need to lookup
1696+
*
1697+
* This is exclusively used for backref walking, and exists specifically because
1698+
* of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref
1699+
* creation time, which means we may have to read the tree_root in order to look
1700+
* up a fs root that is not in memory. If the root is not in memory we will
1701+
* read the tree root commit root and look up the fs root from there. This is a
1702+
* temporary root, it will not be inserted into the radix tree as it doesn't
1703+
* have the most uptodate information, it'll simply be discarded once the
1704+
* backref code is finished using the root.
1705+
*/
1706+
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
1707+
struct btrfs_path *path,
1708+
u64 objectid)
1709+
{
1710+
struct btrfs_root *root;
1711+
struct btrfs_key key;
1712+
1713+
ASSERT(path->search_commit_root && path->skip_locking);
1714+
1715+
/*
1716+
* This can return -ENOENT if we ask for a root that doesn't exist, but
1717+
* since this is called via the backref walking code we won't be looking
1718+
* up a root that doesn't exist, unless there's corruption. So if root
1719+
* != NULL just return it.
1720+
*/
1721+
root = btrfs_get_global_root(fs_info, objectid);
1722+
if (root)
1723+
return root;
1724+
1725+
root = btrfs_lookup_fs_root(fs_info, objectid);
1726+
if (root)
1727+
return root;
1728+
1729+
key.objectid = objectid;
1730+
key.type = BTRFS_ROOT_ITEM_KEY;
1731+
key.offset = (u64)-1;
1732+
root = read_tree_root_path(fs_info->tree_root, path, &key);
1733+
btrfs_release_path(path);
1734+
1735+
return root;
1736+
}
1737+
16791738
/*
16801739
* called by the kthread helper functions to finally call the bio end_io
16811740
* functions. This is where read checksum verification actually happens

fs/btrfs/disk-io.h

+3
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
6969
u64 objectid, bool check_ref);
7070
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
7171
u64 objectid, dev_t anon_dev);
72+
struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
73+
struct btrfs_path *path,
74+
u64 objectid);
7275

7376
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
7477
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);

0 commit comments

Comments
 (0)