Skip to content

Commit 14da920

Browse files
jankaraAl Viro
authored and
Al Viro
committed
fs: Protect write paths by sb_start_write - sb_end_write
There are several entry points which dirty pages in a filesystem. mmap (handled by block_page_mkwrite()), buffered write (handled by __generic_file_aio_write()), splice write (generic_file_splice_write), truncate, and fallocate (these can dirty last partial page - handled inside each filesystem separately). Protect these places with sb_start_write() and sb_end_write(). ->page_mkwrite() calls are particularly complex since they are called with mmap_sem held and thus we cannot use standard sb_start_write() due to lock ordering constraints. We solve the problem by using a special freeze protection sb_start_pagefault() which ranks below mmap_sem. BugLink: https://bugs.launchpad.net/bugs/897421 Tested-by: Kamal Mostafa <kamal@canonical.com> Tested-by: Peter M. Petrakis <peter.petrakis@canonical.com> Tested-by: Dann Frazier <dann.frazier@canonical.com> Tested-by: Massimo Morana <massimo.morana@canonical.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 5d37e9e commit 14da920

File tree

5 files changed

+26
-23
lines changed

5 files changed

+26
-23
lines changed

fs/buffer.c

+4-18
Original file line numberDiff line numberDiff line change
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write);
23062306
* beyond EOF, then the page is guaranteed safe against truncation until we
23072307
* unlock the page.
23082308
*
2309-
* Direct callers of this function should call vfs_check_frozen() so that page
2310-
* fault does not busyloop until the fs is thawed.
2309+
* Direct callers of this function should protect against filesystem freezing
2310+
* using sb_start_write() - sb_end_write() functions.
23112311
*/
23122312
int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
23132313
get_block_t get_block)
@@ -2345,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
23452345

23462346
if (unlikely(ret < 0))
23472347
goto out_unlock;
2348-
/*
2349-
* Freezing in progress? We check after the page is marked dirty and
2350-
* with page lock held so if the test here fails, we are sure freezing
2351-
* code will wait during syncing until the page fault is done - at that
2352-
* point page will be dirty and unlocked so freezing code will write it
2353-
* and writeprotect it again.
2354-
*/
23552348
set_page_dirty(page);
2356-
if (inode->i_sb->s_frozen != SB_UNFROZEN) {
2357-
ret = -EAGAIN;
2358-
goto out_unlock;
2359-
}
23602349
wait_on_page_writeback(page);
23612350
return 0;
23622351
out_unlock:
@@ -2371,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
23712360
int ret;
23722361
struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
23732362

2374-
/*
2375-
* This check is racy but catches the common case. The check in
2376-
* __block_page_mkwrite() is reliable.
2377-
*/
2378-
vfs_check_frozen(sb, SB_FREEZE_WRITE);
2363+
sb_start_pagefault(sb);
23792364
ret = __block_page_mkwrite(vma, vmf, get_block);
2365+
sb_end_pagefault(sb);
23802366
return block_page_mkwrite_return(ret);
23812367
}
23822368
EXPORT_SYMBOL(block_page_mkwrite);

fs/open.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
164164
if (IS_APPEND(inode))
165165
goto out_putf;
166166

167+
sb_start_write(inode->i_sb);
167168
error = locks_verify_truncate(inode, file, length);
168169
if (!error)
169170
error = security_path_truncate(&file->f_path);
170171
if (!error)
171172
error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
173+
sb_end_write(inode->i_sb);
172174
out_putf:
173175
fput(file);
174176
out:
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
266268
if (!file->f_op->fallocate)
267269
return -EOPNOTSUPP;
268270

269-
return file->f_op->fallocate(file, mode, offset, len);
271+
sb_start_write(inode->i_sb);
272+
ret = file->f_op->fallocate(file, mode, offset, len);
273+
sb_end_write(inode->i_sb);
274+
return ret;
270275
}
271276

272277
SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)

fs/splice.c

+3
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
996996
};
997997
ssize_t ret;
998998

999+
sb_start_write(inode->i_sb);
1000+
9991001
pipe_lock(pipe);
10001002

10011003
splice_from_pipe_begin(&sd);
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
10341036
*ppos += ret;
10351037
balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
10361038
}
1039+
sb_end_write(inode->i_sb);
10371040

10381041
return ret;
10391042
}

mm/filemap.c

+10-2
Original file line numberDiff line numberDiff line change
@@ -1718,14 +1718,22 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
17181718
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
17191719
int ret = VM_FAULT_LOCKED;
17201720

1721+
sb_start_pagefault(inode->i_sb);
17211722
file_update_time(vma->vm_file);
17221723
lock_page(page);
17231724
if (page->mapping != inode->i_mapping) {
17241725
unlock_page(page);
17251726
ret = VM_FAULT_NOPAGE;
17261727
goto out;
17271728
}
1729+
/*
1730+
* We mark the page dirty already here so that when freeze is in
1731+
* progress, we are guaranteed that writeback during freezing will
1732+
* see the dirty page and writeprotect it again.
1733+
*/
1734+
set_page_dirty(page);
17281735
out:
1736+
sb_end_pagefault(inode->i_sb);
17291737
return ret;
17301738
}
17311739
EXPORT_SYMBOL(filemap_page_mkwrite);
@@ -2426,8 +2434,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
24262434
count = ocount;
24272435
pos = *ppos;
24282436

2429-
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2430-
24312437
/* We can write back this queue in page reclaim */
24322438
current->backing_dev_info = mapping->backing_dev_info;
24332439
written = 0;
@@ -2526,6 +2532,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
25262532

25272533
BUG_ON(iocb->ki_pos != pos);
25282534

2535+
sb_start_write(inode->i_sb);
25292536
mutex_lock(&inode->i_mutex);
25302537
blk_start_plug(&plug);
25312538
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
@@ -2539,6 +2546,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
25392546
ret = err;
25402547
}
25412548
blk_finish_plug(&plug);
2549+
sb_end_write(inode->i_sb);
25422550
return ret;
25432551
}
25442552
EXPORT_SYMBOL(generic_file_aio_write);

mm/filemap_xip.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,8 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
402402
loff_t pos;
403403
ssize_t ret;
404404

405+
sb_start_write(inode->i_sb);
406+
405407
mutex_lock(&inode->i_mutex);
406408

407409
if (!access_ok(VERIFY_READ, buf, len)) {
@@ -412,8 +414,6 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
412414
pos = *ppos;
413415
count = len;
414416

415-
vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
416-
417417
/* We can write back this queue in page reclaim */
418418
current->backing_dev_info = mapping->backing_dev_info;
419419

@@ -437,6 +437,7 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
437437
current->backing_dev_info = NULL;
438438
out_up:
439439
mutex_unlock(&inode->i_mutex);
440+
sb_end_write(inode->i_sb);
440441
return ret;
441442
}
442443
EXPORT_SYMBOL_GPL(xip_file_write);

0 commit comments

Comments
 (0)