Skip to content

Commit c278531

Browse files
Dmitry Monakhovtytso
Dmitry Monakhov
authored andcommitted
ext4: fix ext4_flush_completed_IO wait semantics
BUG #1) All places where we call ext4_flush_completed_IO are broken because buffered io and DIO/AIO goes through three stages 1) submitted io, 2) completed io (in i_completed_io_list) conversion pended 3) finished io (conversion done) And by calling ext4_flush_completed_IO we will flush only requests which were in (2) stage, which is wrong because: 1) punch_hole and truncate _must_ wait for all outstanding unwritten io regardless to it's state. 2) fsync and nolock_dio_read should also wait because there is a time window between end_page_writeback() and ext4_add_complete_io() As result integrity fsync is broken in case of buffered write to fallocated region: fsync blkdev_completion ->filemap_write_and_wait_range ->ext4_end_bio ->end_page_writeback <-- filemap_write_and_wait_range return ->ext4_flush_completed_IO sees empty i_completed_io_list but pended conversion still exist ->ext4_add_complete_io BUG #2) Race window becomes wider due to the 'ext4: completed_io locking cleanup V4' patch series This patch make following changes: 1) ext4_flush_completed_io() now first try to flush completed io and when wait for any outstanding unwritten io via ext4_unwritten_wait() 2) Rename function to more appropriate name. 3) Assert that all callers of ext4_flush_unwritten_io should hold i_mutex to prevent endless wait Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz>
1 parent 041bbb6 commit c278531

File tree

6 files changed

+19
-13
lines changed

6 files changed

+19
-13
lines changed

fs/ext4/ext4.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1947,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
19471947

19481948
/* fsync.c */
19491949
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
1950-
extern int ext4_flush_completed_IO(struct inode *);
1950+
extern int ext4_flush_unwritten_io(struct inode *);
19511951

19521952
/* hash.c */
19531953
extern int ext4fs_dirhash(const char *name, int len, struct
@@ -2371,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations;
23712371
extern const struct inode_operations ext4_file_inode_operations;
23722372
extern const struct file_operations ext4_file_operations;
23732373
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2374+
extern void ext4_unwritten_wait(struct inode *inode);
23742375

23752376
/* namei.c */
23762377
extern const struct inode_operations ext4_dir_inode_operations;

fs/ext4/extents.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -4268,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
42684268
* finish any pending end_io work so we won't run the risk of
42694269
* converting any truncated blocks to initialized later
42704270
*/
4271-
ext4_flush_completed_IO(inode);
4271+
ext4_flush_unwritten_io(inode);
42724272

42734273
/*
42744274
* probably first extent we're gonna free will be last in block
@@ -4847,10 +4847,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
48474847

48484848
/* Wait all existing dio workers, newcomers will block on i_mutex */
48494849
ext4_inode_block_unlocked_dio(inode);
4850-
inode_dio_wait(inode);
4851-
err = ext4_flush_completed_IO(inode);
4850+
err = ext4_flush_unwritten_io(inode);
48524851
if (err)
48534852
goto out_dio;
4853+
inode_dio_wait(inode);
48544854

48554855
credits = ext4_writepage_trans_blocks(inode);
48564856
handle = ext4_journal_start(inode, credits);

fs/ext4/file.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
5555
return 0;
5656
}
5757

58-
static void ext4_unwritten_wait(struct inode *inode)
58+
void ext4_unwritten_wait(struct inode *inode)
5959
{
6060
wait_queue_head_t *wq = ext4_ioend_wq(inode);
6161

fs/ext4/fsync.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
138138
if (inode->i_sb->s_flags & MS_RDONLY)
139139
goto out;
140140

141-
ret = ext4_flush_completed_IO(inode);
141+
ret = ext4_flush_unwritten_io(inode);
142142
if (ret < 0)
143143
goto out;
144144

fs/ext4/indirect.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -807,9 +807,11 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
807807

808808
retry:
809809
if (rw == READ && ext4_should_dioread_nolock(inode)) {
810-
if (unlikely(!list_empty(&ei->i_completed_io_list)))
811-
ext4_flush_completed_IO(inode);
812-
810+
if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) {
811+
mutex_lock(&inode->i_mutex);
812+
ext4_flush_unwritten_io(inode);
813+
mutex_unlock(&inode->i_mutex);
814+
}
813815
/*
814816
* Nolock dioread optimization may be dynamically disabled
815817
* via ext4_inode_block_unlocked_dio(). Check inode's state

fs/ext4/page-io.c

+7-4
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,6 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
189189

190190
list_add_tail(&io->list, &complete);
191191
}
192-
/* It is important to update all flags for all end_io in one shot w/o
193-
* dropping the lock.*/
194192
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
195193
while (!list_empty(&complete)) {
196194
io = list_entry(complete.next, ext4_io_end_t, list);
@@ -228,9 +226,14 @@ static void ext4_end_io_work(struct work_struct *work)
228226
ext4_do_flush_completed_IO(io->inode, io);
229227
}
230228

231-
int ext4_flush_completed_IO(struct inode *inode)
229+
int ext4_flush_unwritten_io(struct inode *inode)
232230
{
233-
return ext4_do_flush_completed_IO(inode, NULL);
231+
int ret;
232+
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
233+
!(inode->i_state & I_FREEING));
234+
ret = ext4_do_flush_completed_IO(inode, NULL);
235+
ext4_unwritten_wait(inode);
236+
return ret;
234237
}
235238

236239
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)

0 commit comments

Comments
 (0)