Skip to content

Commit cd36119

Browse files
josefbacikkdave
authored andcommitted
btrfs: wait on uncached block groups on every allocation loop
My initial fix for the generic/475 hangs was related to metadata, but our CI testing uncovered another case where we hang for similar reasons. We again have a task with a plug that is holding an outstanding request that is keeping the dm device from finishing it's suspend, and that task is stuck in the allocator. This time it is stuck trying to allocate data, but we do not have a block group that matches the size class. The larger loop in the allocator looks like this (simplified of course) find_free_extent for_each_block_group { ffe_ctl->cached == btrfs_block_group_cache_done(bg) if (!ffe_ctl->cached) ffe_ctl->have_caching_bg = true; do_allocation() btrfs_wait_block_group_cache_progress(); } if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg) go search again; In my earlier fix we were trying to allocate from the block group, but we weren't waiting for the progress because we were only waiting for the free space to be >= the amount of free space we wanted. My fix made it so we waited for forward progress to be made as well, so we would be sure to wait. This time however we did not have a block group that matched our size class, so what was happening was this find_free_extent for_each_block_group { ffe_ctl->cached == btrfs_block_group_cache_done(bg) if (!ffe_ctl->cached) ffe_ctl->have_caching_bg = true; if (size_class_doesn't_match()) goto loop; do_allocation() btrfs_wait_block_group_cache_progress(); loop: release_block_group(block_group); } if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg) go search again; The size_class_doesn't_match() part was true, so we'd just skip this block group and never wait for caching, and then because we found a caching block group we'd just go back and do the loop again. We never sleep and thus never flush the plug and we have the same deadlock. Fix the logic for waiting on the block group caching to instead do it unconditionally when we goto loop. This takes the logic out of the allocation step, so now the loop looks more like this find_free_extent for_each_block_group { ffe_ctl->cached == btrfs_block_group_cache_done(bg) if (!ffe_ctl->cached) ffe_ctl->have_caching_bg = true; if (size_class_doesn't_match()) goto loop; do_allocation() btrfs_wait_block_group_cache_progress(); loop: if (loop > LOOP_CACHING_NOWAIT && !ffe_ctl->retry_uncached && !ffe_ctl->cached) { ffe_ctl->retry_uncached = true; btrfs_wait_block_group_cache_progress(); } release_block_group(block_group); } if (loop == LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg) go search again; This simplifies the logic a lot, and makes sure that if we're hitting uncached block groups we're always waiting on them at some point. I ran this through 100 iterations of generic/475, as this particular case was harder to hit than the previous one. Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 84af994 commit cd36119

File tree

2 files changed

+22
-52
lines changed

2 files changed

+22
-52
lines changed

fs/btrfs/extent-tree.c

+18-43
Original file line numberDiff line numberDiff line change
@@ -3481,7 +3481,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
34813481
* Helper function for find_free_extent().
34823482
*
34833483
* Return -ENOENT to inform caller that we need fallback to unclustered mode.
3484-
* Return -EAGAIN to inform caller that we need to re-search this block group
34853484
* Return >0 to inform caller that we find nothing
34863485
* Return 0 means we have found a location and set ffe_ctl->found_offset.
34873486
*/
@@ -3562,14 +3561,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
35623561
trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
35633562
return 0;
35643563
}
3565-
} else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
3566-
!ffe_ctl->retry_clustered) {
3567-
spin_unlock(&last_ptr->refill_lock);
3568-
3569-
ffe_ctl->retry_clustered = true;
3570-
btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3571-
ffe_ctl->empty_cluster + ffe_ctl->empty_size);
3572-
return -EAGAIN;
35733564
}
35743565
/*
35753566
* At this point we either didn't find a cluster or we weren't able to
@@ -3584,7 +3575,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
35843575
/*
35853576
* Return >0 to inform caller that we find nothing
35863577
* Return 0 when we found an free extent and set ffe_ctrl->found_offset
3587-
* Return -EAGAIN to inform caller that we need to re-search this block group
35883578
*/
35893579
static int find_free_extent_unclustered(struct btrfs_block_group *bg,
35903580
struct find_free_extent_ctl *ffe_ctl)
@@ -3622,25 +3612,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
36223612
offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
36233613
ffe_ctl->num_bytes, ffe_ctl->empty_size,
36243614
&ffe_ctl->max_extent_size);
3625-
3626-
/*
3627-
* If we didn't find a chunk, and we haven't failed on this block group
3628-
* before, and this block group is in the middle of caching and we are
3629-
* ok with waiting, then go ahead and wait for progress to be made, and
3630-
* set @retry_unclustered to true.
3631-
*
3632-
* If @retry_unclustered is true then we've already waited on this
3633-
* block group once and should move on to the next block group.
3634-
*/
3635-
if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
3636-
ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
3637-
btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
3638-
ffe_ctl->empty_size);
3639-
ffe_ctl->retry_unclustered = true;
3640-
return -EAGAIN;
3641-
} else if (!offset) {
3615+
if (!offset)
36423616
return 1;
3643-
}
36443617
ffe_ctl->found_offset = offset;
36453618
return 0;
36463619
}
@@ -3654,7 +3627,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
36543627
/* We want to try and use the cluster allocator, so lets look there */
36553628
if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
36563629
ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
3657-
if (ret >= 0 || ret == -EAGAIN)
3630+
if (ret >= 0)
36583631
return ret;
36593632
/* ret == -ENOENT case falls through */
36603633
}
@@ -3872,8 +3845,7 @@ static void release_block_group(struct btrfs_block_group *block_group,
38723845
{
38733846
switch (ffe_ctl->policy) {
38743847
case BTRFS_EXTENT_ALLOC_CLUSTERED:
3875-
ffe_ctl->retry_clustered = false;
3876-
ffe_ctl->retry_unclustered = false;
3848+
ffe_ctl->retry_uncached = false;
38773849
break;
38783850
case BTRFS_EXTENT_ALLOC_ZONED:
38793851
/* Nothing to do */
@@ -4220,9 +4192,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
42204192
ffe_ctl->orig_have_caching_bg = false;
42214193
ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
42224194
ffe_ctl->loop = 0;
4223-
/* For clustered allocation */
4224-
ffe_ctl->retry_clustered = false;
4225-
ffe_ctl->retry_unclustered = false;
4195+
ffe_ctl->retry_uncached = false;
42264196
ffe_ctl->cached = 0;
42274197
ffe_ctl->max_extent_size = 0;
42284198
ffe_ctl->total_free_space = 0;
@@ -4373,16 +4343,12 @@ static noinline int find_free_extent(struct btrfs_root *root,
43734343

43744344
bg_ret = NULL;
43754345
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
4376-
if (ret == 0) {
4377-
if (bg_ret && bg_ret != block_group) {
4378-
btrfs_release_block_group(block_group,
4379-
ffe_ctl->delalloc);
4380-
block_group = bg_ret;
4381-
}
4382-
} else if (ret == -EAGAIN) {
4383-
goto have_block_group;
4384-
} else if (ret > 0) {
4346+
if (ret > 0)
43854347
goto loop;
4348+
4349+
if (bg_ret && bg_ret != block_group) {
4350+
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
4351+
block_group = bg_ret;
43864352
}
43874353

43884354
/* Checks */
@@ -4423,6 +4389,15 @@ static noinline int find_free_extent(struct btrfs_root *root,
44234389
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
44244390
break;
44254391
loop:
4392+
if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
4393+
!ffe_ctl->retry_uncached) {
4394+
ffe_ctl->retry_uncached = true;
4395+
btrfs_wait_block_group_cache_progress(block_group,
4396+
ffe_ctl->num_bytes +
4397+
ffe_ctl->empty_cluster +
4398+
ffe_ctl->empty_size);
4399+
goto have_block_group;
4400+
}
44264401
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
44274402
cond_resched();
44284403
}

fs/btrfs/extent-tree.h

+4-9
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,11 @@ struct find_free_extent_ctl {
4848
int loop;
4949

5050
/*
51-
* Whether we're refilling a cluster, if true we need to re-search
52-
* current block group but don't try to refill the cluster again.
51+
* Set to true if we're retrying the allocation on this block group
52+
* after waiting for caching progress, this is so that we retry only
53+
* once before moving on to another block group.
5354
*/
54-
bool retry_clustered;
55-
56-
/*
57-
* Whether we're updating free space cache, if true we need to re-search
58-
* current block group but don't try updating free space cache again.
59-
*/
60-
bool retry_unclustered;
55+
bool retry_uncached;
6156

6257
/* If current block group is cached */
6358
int cached;

0 commit comments

Comments
 (0)