Skip to content

Commit 62c230b

Browse files
Mel Gormantorvalds
Mel Gorman
authored andcommitted
mm: add support for a filesystem to activate swap files and use direct_IO for writing swap pages
Currently swapfiles are managed entirely by the core VM by using ->bmap to allocate space and write to the blocks directly. This effectively ensures that the underlying blocks are allocated and avoids the need for the swap subsystem to locate what physical blocks store offsets within a file. If the swap subsystem is to use the filesystem information to locate the blocks, it is critical that information such as block groups, block bitmaps and the block descriptor table that map the swap file were resident in memory. This patch adds address_space_operations that the VM can call when activating or deactivating swap backed by a file. int swap_activate(struct file *); int swap_deactivate(struct file *); The ->swap_activate() method is used to communicate to the file that the VM relies on it, and the address_space should take adequate measures such as reserving space in the underlying device, reserving memory for mempools and pinning information such as the block descriptor table in memory. The ->swap_deactivate() method is called on sys_swapoff() if ->swap_activate() returned success. After a successful swapfile ->swap_activate, the swapfile is marked SWP_FILE and swapper_space.a_ops will proxy to sis->swap_file->f_mappings->a_ops using ->direct_io to write swapcache pages and ->readpage to read. It is perfectly possible that direct_IO be used to read the swap pages but it is an unnecessary complication. Similarly, it is possible that ->writepage be used instead of direct_io to write the pages but filesystem developers have stated that calling writepage from the VM is undesirable for a variety of reasons and using direct_IO opens up the possibility of writing back batches of swap pages in the future. [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: David S. Miller <davem@davemloft.net> Cc: Eric B Munson <emunson@mgebm.net> Cc: Eric Paris <eparis@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Mike Christie <michaelc@cs.wisc.edu> Cc: Neil Brown <neilb@suse.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: Xiaotian Feng <dfeng@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 18022c5 commit 62c230b

File tree

7 files changed

+105
-3
lines changed

7 files changed

+105
-3
lines changed

Documentation/filesystems/Locking

+13
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ prototypes:
206206
int (*launder_page)(struct page *);
207207
int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
208208
int (*error_remove_page)(struct address_space *, struct page *);
209+
int (*swap_activate)(struct file *);
210+
int (*swap_deactivate)(struct file *);
209211

210212
locking rules:
211213
All except set_page_dirty and freepage may block
@@ -229,6 +231,8 @@ migratepage: yes (both)
229231
launder_page: yes
230232
is_partially_uptodate: yes
231233
error_remove_page: yes
234+
swap_activate: no
235+
swap_deactivate: no
232236

233237
->write_begin(), ->write_end(), ->sync_page() and ->readpage()
234238
may be called from the request handler (/dev/loop).
@@ -330,6 +334,15 @@ cleaned, or an error value if not. Note that in order to prevent the page
330334
getting mapped back in and redirtied, it needs to be kept locked
331335
across the entire operation.
332336

337+
->swap_activate will be called with a non-zero argument on
338+
files backing (non block device backed) swapfiles. A return value
339+
of zero indicates success, in which case this file can be used for
340+
backing swapspace. The swapspace operations will be proxied to the
341+
address space operations.
342+
343+
->swap_deactivate() will be called in the sys_swapoff()
344+
path after ->swap_activate() returned success.
345+
333346
----------------------- file_lock_operations ------------------------------
334347
prototypes:
335348
void (*fl_copy_lock)(struct file_lock *, struct file_lock *);

Documentation/filesystems/vfs.txt

+12
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,8 @@ struct address_space_operations {
592592
int (*migratepage) (struct page *, struct page *);
593593
int (*launder_page) (struct page *);
594594
int (*error_remove_page) (struct mapping *mapping, struct page *page);
595+
int (*swap_activate)(struct file *);
596+
int (*swap_deactivate)(struct file *);
595597
};
596598

597599
writepage: called by the VM to write a dirty page to backing store.
@@ -760,6 +762,16 @@ struct address_space_operations {
760762
Setting this implies you deal with pages going away under you,
761763
unless you have them locked or reference counts increased.
762764

765+
swap_activate: Called when swapon is used on a file to allocate
766+
space if necessary and pin the block lookup information in
767+
memory. A return value of zero indicates success,
768+
in which case this file can be used to back swapspace. The
769+
swapspace operations will be proxied to this address space's
770+
->swap_{out,in} methods.
771+
772+
swap_deactivate: Called during swapoff on files where swap_activate
773+
was successful.
774+
763775

764776
The File Object
765777
===============

include/linux/fs.h

+4
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,10 @@ struct address_space_operations {
638638
int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
639639
unsigned long);
640640
int (*error_remove_page)(struct address_space *, struct page *);
641+
642+
/* swapfile support */
643+
int (*swap_activate)(struct file *file);
644+
int (*swap_deactivate)(struct file *file);
641645
};
642646

643647
extern const struct address_space_operations empty_aops;

include/linux/swap.h

+2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ enum {
151151
SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */
152152
SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */
153153
SWP_BLKDEV = (1 << 6), /* its a block device */
154+
SWP_FILE = (1 << 7), /* set after swap_activate success */
154155
/* add others here before... */
155156
SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */
156157
};
@@ -320,6 +321,7 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
320321
/* linux/mm/page_io.c */
321322
extern int swap_readpage(struct page *);
322323
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
324+
extern int swap_set_page_dirty(struct page *page);
323325
extern void end_swap_bio_read(struct bio *bio, int err);
324326

325327
/* linux/mm/swap_state.c */

mm/page_io.c

+52
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <linux/swap.h>
1818
#include <linux/bio.h>
1919
#include <linux/swapops.h>
20+
#include <linux/buffer_head.h>
2021
#include <linux/writeback.h>
2122
#include <linux/frontswap.h>
2223
#include <asm/pgtable.h>
@@ -94,6 +95,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
9495
{
9596
struct bio *bio;
9697
int ret = 0, rw = WRITE;
98+
struct swap_info_struct *sis = page_swap_info(page);
9799

98100
if (try_to_free_swap(page)) {
99101
unlock_page(page);
@@ -105,6 +107,32 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
105107
end_page_writeback(page);
106108
goto out;
107109
}
110+
111+
if (sis->flags & SWP_FILE) {
112+
struct kiocb kiocb;
113+
struct file *swap_file = sis->swap_file;
114+
struct address_space *mapping = swap_file->f_mapping;
115+
struct iovec iov = {
116+
.iov_base = page_address(page),
117+
.iov_len = PAGE_SIZE,
118+
};
119+
120+
init_sync_kiocb(&kiocb, swap_file);
121+
kiocb.ki_pos = page_file_offset(page);
122+
kiocb.ki_left = PAGE_SIZE;
123+
kiocb.ki_nbytes = PAGE_SIZE;
124+
125+
unlock_page(page);
126+
ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
127+
&kiocb, &iov,
128+
kiocb.ki_pos, 1);
129+
if (ret == PAGE_SIZE) {
130+
count_vm_event(PSWPOUT);
131+
ret = 0;
132+
}
133+
return ret;
134+
}
135+
108136
bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
109137
if (bio == NULL) {
110138
set_page_dirty(page);
@@ -126,6 +154,7 @@ int swap_readpage(struct page *page)
126154
{
127155
struct bio *bio;
128156
int ret = 0;
157+
struct swap_info_struct *sis = page_swap_info(page);
129158

130159
VM_BUG_ON(!PageLocked(page));
131160
VM_BUG_ON(PageUptodate(page));
@@ -134,6 +163,17 @@ int swap_readpage(struct page *page)
134163
unlock_page(page);
135164
goto out;
136165
}
166+
167+
if (sis->flags & SWP_FILE) {
168+
struct file *swap_file = sis->swap_file;
169+
struct address_space *mapping = swap_file->f_mapping;
170+
171+
ret = mapping->a_ops->readpage(swap_file, page);
172+
if (!ret)
173+
count_vm_event(PSWPIN);
174+
return ret;
175+
}
176+
137177
bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
138178
if (bio == NULL) {
139179
unlock_page(page);
@@ -145,3 +185,15 @@ int swap_readpage(struct page *page)
145185
out:
146186
return ret;
147187
}
188+
189+
int swap_set_page_dirty(struct page *page)
190+
{
191+
struct swap_info_struct *sis = page_swap_info(page);
192+
193+
if (sis->flags & SWP_FILE) {
194+
struct address_space *mapping = sis->swap_file->f_mapping;
195+
return mapping->a_ops->set_page_dirty(page);
196+
} else {
197+
return __set_page_dirty_no_writeback(page);
198+
}
199+
}

mm/swap_state.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
*/
2828
static const struct address_space_operations swap_aops = {
2929
.writepage = swap_writepage,
30-
.set_page_dirty = __set_page_dirty_no_writeback,
30+
.set_page_dirty = swap_set_page_dirty,
3131
.migratepage = migrate_page,
3232
};
3333

mm/swapfile.c

+21-2
Original file line numberDiff line numberDiff line change
@@ -1329,6 +1329,14 @@ static void destroy_swap_extents(struct swap_info_struct *sis)
13291329
list_del(&se->list);
13301330
kfree(se);
13311331
}
1332+
1333+
if (sis->flags & SWP_FILE) {
1334+
struct file *swap_file = sis->swap_file;
1335+
struct address_space *mapping = swap_file->f_mapping;
1336+
1337+
sis->flags &= ~SWP_FILE;
1338+
mapping->a_ops->swap_deactivate(swap_file);
1339+
}
13321340
}
13331341

13341342
/*
@@ -1410,7 +1418,9 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
14101418
*/
14111419
static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
14121420
{
1413-
struct inode *inode;
1421+
struct file *swap_file = sis->swap_file;
1422+
struct address_space *mapping = swap_file->f_mapping;
1423+
struct inode *inode = mapping->host;
14141424
unsigned blocks_per_page;
14151425
unsigned long page_no;
14161426
unsigned blkbits;
@@ -1421,13 +1431,22 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
14211431
int nr_extents = 0;
14221432
int ret;
14231433

1424-
inode = sis->swap_file->f_mapping->host;
14251434
if (S_ISBLK(inode->i_mode)) {
14261435
ret = add_swap_extent(sis, 0, sis->max, 0);
14271436
*span = sis->pages;
14281437
goto out;
14291438
}
14301439

1440+
if (mapping->a_ops->swap_activate) {
1441+
ret = mapping->a_ops->swap_activate(swap_file);
1442+
if (!ret) {
1443+
sis->flags |= SWP_FILE;
1444+
ret = add_swap_extent(sis, 0, sis->max, 0);
1445+
*span = sis->pages;
1446+
}
1447+
goto out;
1448+
}
1449+
14311450
blkbits = inode->i_blkbits;
14321451
blocks_per_page = PAGE_SIZE >> blkbits;
14331452

0 commit comments

Comments
 (0)