Skip to content

Commit 0096580

Browse files
adam900710kdave
authored andcommitted
btrfs: scrub: introduce error reporting functionality for scrub_stripe
The new helper, scrub_stripe_report_errors(), will report the result of the scrub to system log. The main reporting is done by introducing a new helper, scrub_print_common_warning(), which is mostly the same content from scrub_print_wanring(), but without the need for a scrub_block. Since we're reporting the errors, it's the perfect time to update the scrub stats too. Signed-off-by: Qu Wenruo <wqu@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 058e09e commit 0096580

File tree

1 file changed

+157
-11
lines changed

1 file changed

+157
-11
lines changed

fs/btrfs/scrub.c

+157-11
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ enum scrub_stripe_flags {
105105
* Represent one contiguous range with a length of BTRFS_STRIPE_LEN.
106106
*/
107107
struct scrub_stripe {
108+
struct scrub_ctx *sctx;
108109
struct btrfs_block_group *bg;
109110

110111
struct page *pages[SCRUB_STRIPE_PAGES];
@@ -119,6 +120,13 @@ struct scrub_stripe {
119120
/* Should be BTRFS_STRIPE_LEN / sectorsize. */
120121
u16 nr_sectors;
121122

123+
/*
124+
* How many data/meta extents are in this stripe. Only for scrub status
125+
* reporting purposes.
126+
*/
127+
u16 nr_data_extents;
128+
u16 nr_meta_extents;
129+
122130
atomic_t pending_io;
123131
wait_queue_head_t io_wait;
124132
wait_queue_head_t repair_wait;
@@ -377,6 +385,7 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
377385
kfree(stripe->csums);
378386
stripe->sectors = NULL;
379387
stripe->csums = NULL;
388+
stripe->sctx = NULL;
380389
stripe->state = 0;
381390
}
382391

@@ -1046,10 +1055,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
10461055
return 0;
10471056
}
10481057

1049-
static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
1058+
static void scrub_print_common_warning(const char *errstr, struct btrfs_device *dev,
1059+
bool is_super, u64 logical, u64 physical)
10501060
{
1051-
struct btrfs_device *dev;
1052-
struct btrfs_fs_info *fs_info;
1061+
struct btrfs_fs_info *fs_info = dev->fs_info;
10531062
struct btrfs_path *path;
10541063
struct btrfs_key found_key;
10551064
struct extent_buffer *eb;
@@ -1062,22 +1071,18 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
10621071
u8 ref_level = 0;
10631072
int ret;
10641073

1065-
WARN_ON(sblock->sector_count < 1);
1066-
dev = sblock->dev;
1067-
fs_info = sblock->sctx->fs_info;
1068-
10691074
/* Super block error, no need to search extent tree. */
1070-
if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
1075+
if (is_super) {
10711076
btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
1072-
errstr, btrfs_dev_name(dev), sblock->physical);
1077+
errstr, btrfs_dev_name(dev), physical);
10731078
return;
10741079
}
10751080
path = btrfs_alloc_path();
10761081
if (!path)
10771082
return;
10781083

1079-
swarn.physical = sblock->physical;
1080-
swarn.logical = sblock->logical;
1084+
swarn.physical = physical;
1085+
swarn.logical = logical;
10811086
swarn.errstr = errstr;
10821087
swarn.dev = NULL;
10831088

@@ -1126,6 +1131,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
11261131
btrfs_free_path(path);
11271132
}
11281133

1134+
static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
1135+
{
1136+
scrub_print_common_warning(errstr, sblock->dev,
1137+
sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER,
1138+
sblock->logical, sblock->physical);
1139+
}
1140+
11291141
static inline void scrub_get_recover(struct scrub_recover *recover)
11301142
{
11311143
refcount_inc(&recover->refs);
@@ -2453,6 +2465,131 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
24532465
}
24542466
}
24552467

2468+
static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
2469+
struct scrub_stripe *stripe)
2470+
{
2471+
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
2472+
DEFAULT_RATELIMIT_BURST);
2473+
struct btrfs_fs_info *fs_info = sctx->fs_info;
2474+
struct btrfs_device *dev = NULL;
2475+
u64 physical = 0;
2476+
int nr_data_sectors = 0;
2477+
int nr_meta_sectors = 0;
2478+
int nr_nodatacsum_sectors = 0;
2479+
int nr_repaired_sectors = 0;
2480+
int sector_nr;
2481+
2482+
/*
2483+
* Init needed infos for error reporting.
2484+
*
2485+
* Although our scrub_stripe infrastucture is mostly based on btrfs_submit_bio()
2486+
* thus no need for dev/physical, error reporting still needs dev and physical.
2487+
*/
2488+
if (!bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) {
2489+
u64 mapped_len = fs_info->sectorsize;
2490+
struct btrfs_io_context *bioc = NULL;
2491+
int stripe_index = stripe->mirror_num - 1;
2492+
int ret;
2493+
2494+
/* For scrub, our mirror_num should always start at 1. */
2495+
ASSERT(stripe->mirror_num >= 1);
2496+
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
2497+
stripe->logical, &mapped_len, &bioc);
2498+
/*
2499+
* If we failed, dev will be NULL, and later detailed reports
2500+
* will just be skipped.
2501+
*/
2502+
if (ret < 0)
2503+
goto skip;
2504+
physical = bioc->stripes[stripe_index].physical;
2505+
dev = bioc->stripes[stripe_index].dev;
2506+
btrfs_put_bioc(bioc);
2507+
}
2508+
2509+
skip:
2510+
for_each_set_bit(sector_nr, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
2511+
bool repaired = false;
2512+
2513+
if (stripe->sectors[sector_nr].is_metadata) {
2514+
nr_meta_sectors++;
2515+
} else {
2516+
nr_data_sectors++;
2517+
if (!stripe->sectors[sector_nr].csum)
2518+
nr_nodatacsum_sectors++;
2519+
}
2520+
2521+
if (test_bit(sector_nr, &stripe->init_error_bitmap) &&
2522+
!test_bit(sector_nr, &stripe->error_bitmap)) {
2523+
nr_repaired_sectors++;
2524+
repaired = true;
2525+
}
2526+
2527+
/* Good sector from the beginning, nothing need to be done. */
2528+
if (!test_bit(sector_nr, &stripe->init_error_bitmap))
2529+
continue;
2530+
2531+
/*
2532+
* Report error for the corrupted sectors. If repaired, just
2533+
* output the message of repaired message.
2534+
*/
2535+
if (repaired) {
2536+
if (dev) {
2537+
btrfs_err_rl_in_rcu(fs_info,
2538+
"fixed up error at logical %llu on dev %s physical %llu",
2539+
stripe->logical, btrfs_dev_name(dev),
2540+
physical);
2541+
} else {
2542+
btrfs_err_rl_in_rcu(fs_info,
2543+
"fixed up error at logical %llu on mirror %u",
2544+
stripe->logical, stripe->mirror_num);
2545+
}
2546+
continue;
2547+
}
2548+
2549+
/* The remaining are all for unrepaired. */
2550+
if (dev) {
2551+
btrfs_err_rl_in_rcu(fs_info,
2552+
"unable to fixup (regular) error at logical %llu on dev %s physical %llu",
2553+
stripe->logical, btrfs_dev_name(dev),
2554+
physical);
2555+
} else {
2556+
btrfs_err_rl_in_rcu(fs_info,
2557+
"unable to fixup (regular) error at logical %llu on mirror %u",
2558+
stripe->logical, stripe->mirror_num);
2559+
}
2560+
2561+
if (test_bit(sector_nr, &stripe->io_error_bitmap))
2562+
if (__ratelimit(&rs) && dev)
2563+
scrub_print_common_warning("i/o error", dev, false,
2564+
stripe->logical, physical);
2565+
if (test_bit(sector_nr, &stripe->csum_error_bitmap))
2566+
if (__ratelimit(&rs) && dev)
2567+
scrub_print_common_warning("checksum error", dev, false,
2568+
stripe->logical, physical);
2569+
if (test_bit(sector_nr, &stripe->meta_error_bitmap))
2570+
if (__ratelimit(&rs) && dev)
2571+
scrub_print_common_warning("header error", dev, false,
2572+
stripe->logical, physical);
2573+
}
2574+
2575+
spin_lock(&sctx->stat_lock);
2576+
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
2577+
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
2578+
sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
2579+
sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
2580+
sctx->stat.no_csum += nr_nodatacsum_sectors;
2581+
sctx->stat.read_errors +=
2582+
bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors);
2583+
sctx->stat.csum_errors +=
2584+
bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors);
2585+
sctx->stat.verify_errors +=
2586+
bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors);
2587+
sctx->stat.uncorrectable_errors +=
2588+
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
2589+
sctx->stat.corrected_errors += nr_repaired_sectors;
2590+
spin_unlock(&sctx->stat_lock);
2591+
}
2592+
24562593
/*
24572594
* The main entrance for all read related scrub work, including:
24582595
*
@@ -2526,6 +2663,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
25262663
goto out;
25272664
}
25282665
out:
2666+
scrub_stripe_report_errors(stripe->sctx, stripe);
25292667
set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
25302668
wake_up(&stripe->repair_wait);
25312669
}
@@ -4189,6 +4327,10 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
41894327
if (ret)
41904328
goto out;
41914329
get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
4330+
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
4331+
stripe->nr_meta_extents++;
4332+
if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
4333+
stripe->nr_data_extents++;
41924334
cur_logical = max(extent_start, cur_logical);
41934335

41944336
/*
@@ -4222,6 +4364,10 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
42224364
}
42234365
get_extent_info(&path, &extent_start, &extent_len,
42244366
&extent_flags, &extent_gen);
4367+
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
4368+
stripe->nr_meta_extents++;
4369+
if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
4370+
stripe->nr_data_extents++;
42254371
fill_one_extent_info(fs_info, stripe, extent_start, extent_len,
42264372
extent_flags, extent_gen);
42274373
cur_logical = extent_start + extent_len;

0 commit comments

Comments
 (0)