Skip to content

Commit 5a1c84b

Browse files
gormanmtorvalds
authored andcommitted
mm: remove reclaim and compaction retry approximations
If per-zone LRU accounting is available then there is no point approximating whether reclaim and compaction should retry based on pgdat statistics. This is effectively a revert of "mm, vmstat: remove zone and node double accounting by approximating retries" with the difference that inactive/active stats are still available. This preserves the history of why the approximation was retried and why it had to be reverted to handle OOM kills on 32-bit systems. Link: http://lkml.kernel.org/r/1469110261-7365-4-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Minchan Kim <minchan@kernel.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent bb4cc2b commit 5a1c84b

File tree

8 files changed

+39
-58
lines changed

8 files changed

+39
-58
lines changed

include/linux/mmzone.h

+1
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ enum zone_stat_item {
116116
NR_ZONE_INACTIVE_FILE,
117117
NR_ZONE_ACTIVE_FILE,
118118
NR_ZONE_UNEVICTABLE,
119+
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
119120
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
120121
NR_SLAB_RECLAIMABLE,
121122
NR_SLAB_UNRECLAIMABLE,

include/linux/swap.h

+1
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
307307
struct vm_area_struct *vma);
308308

309309
/* linux/mm/vmscan.c */
310+
extern unsigned long zone_reclaimable_pages(struct zone *zone);
310311
extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
311312
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
312313
gfp_t gfp_mask, nodemask_t *mask);

mm/compaction.c

+1-19
Original file line numberDiff line numberDiff line change
@@ -1438,11 +1438,6 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
14381438
{
14391439
struct zone *zone;
14401440
struct zoneref *z;
1441-
pg_data_t *last_pgdat = NULL;
1442-
1443-
/* Do not retry compaction for zone-constrained allocations */
1444-
if (ac->high_zoneidx < ZONE_NORMAL)
1445-
return false;
14461441

14471442
/*
14481443
* Make sure at least one zone would pass __compaction_suitable if we continue
@@ -1453,27 +1448,14 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
14531448
unsigned long available;
14541449
enum compact_result compact_result;
14551450

1456-
if (last_pgdat == zone->zone_pgdat)
1457-
continue;
1458-
1459-
/*
1460-
* This over-estimates the number of pages available for
1461-
* reclaim/compaction but walking the LRU would take too
1462-
* long. The consequences are that compaction may retry
1463-
* longer than it should for a zone-constrained allocation
1464-
* request.
1465-
*/
1466-
last_pgdat = zone->zone_pgdat;
1467-
available = pgdat_reclaimable_pages(zone->zone_pgdat) / order;
1468-
14691451
/*
14701452
* Do not consider all the reclaimable memory because we do not
14711453
* want to trash just for a single high order allocation which
14721454
* is even not guaranteed to appear even if __compaction_suitable
14731455
* is happy about the watermark check.
14741456
*/
1457+
available = zone_reclaimable_pages(zone) / order;
14751458
available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
1476-
available = min(zone->managed_pages, available);
14771459
compact_result = __compaction_suitable(zone, order, alloc_flags,
14781460
ac_classzone_idx(ac), available);
14791461
if (compact_result != COMPACT_SKIPPED &&

mm/migrate.c

+2
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,9 @@ int migrate_page_move_mapping(struct address_space *mapping,
513513
}
514514
if (dirty && mapping_cap_account_dirty(mapping)) {
515515
__dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
516+
__dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
516517
__inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
518+
__inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
517519
}
518520
}
519521
local_irq_enable();

mm/page-writeback.c

+5
Original file line numberDiff line numberDiff line change
@@ -2462,6 +2462,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
24622462

24632463
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
24642464
__inc_node_page_state(page, NR_FILE_DIRTY);
2465+
__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
24652466
__inc_node_page_state(page, NR_DIRTIED);
24662467
__inc_wb_stat(wb, WB_RECLAIMABLE);
24672468
__inc_wb_stat(wb, WB_DIRTIED);
@@ -2483,6 +2484,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
24832484
if (mapping_cap_account_dirty(mapping)) {
24842485
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
24852486
dec_node_page_state(page, NR_FILE_DIRTY);
2487+
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
24862488
dec_wb_stat(wb, WB_RECLAIMABLE);
24872489
task_io_account_cancelled_write(PAGE_SIZE);
24882490
}
@@ -2739,6 +2741,7 @@ int clear_page_dirty_for_io(struct page *page)
27392741
if (TestClearPageDirty(page)) {
27402742
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
27412743
dec_node_page_state(page, NR_FILE_DIRTY);
2744+
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
27422745
dec_wb_stat(wb, WB_RECLAIMABLE);
27432746
ret = 1;
27442747
}
@@ -2785,6 +2788,7 @@ int test_clear_page_writeback(struct page *page)
27852788
if (ret) {
27862789
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
27872790
dec_node_page_state(page, NR_WRITEBACK);
2791+
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
27882792
inc_node_page_state(page, NR_WRITTEN);
27892793
}
27902794
unlock_page_memcg(page);
@@ -2839,6 +2843,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
28392843
if (!ret) {
28402844
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
28412845
inc_node_page_state(page, NR_WRITEBACK);
2846+
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
28422847
}
28432848
unlock_page_memcg(page);
28442849
return ret;

mm/page_alloc.c

+10-39
Original file line numberDiff line numberDiff line change
@@ -3402,7 +3402,6 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
34023402
{
34033403
struct zone *zone;
34043404
struct zoneref *z;
3405-
pg_data_t *current_pgdat = NULL;
34063405

34073406
/*
34083407
* Make sure we converge to OOM if we cannot make any progress
@@ -3411,15 +3410,6 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
34113410
if (no_progress_loops > MAX_RECLAIM_RETRIES)
34123411
return false;
34133412

3414-
/*
3415-
* Blindly retry lowmem allocation requests that are often ignored by
3416-
* the OOM killer up to MAX_RECLAIM_RETRIES as we not have a reliable
3417-
* and fast means of calculating reclaimable, dirty and writeback pages
3418-
* in eligible zones.
3419-
*/
3420-
if (ac->high_zoneidx < ZONE_NORMAL)
3421-
goto out;
3422-
34233413
/*
34243414
* Keep reclaiming pages while there is a chance this will lead
34253415
* somewhere. If none of the target zones can satisfy our allocation
@@ -3430,38 +3420,18 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
34303420
ac->nodemask) {
34313421
unsigned long available;
34323422
unsigned long reclaimable;
3433-
int zid;
34343423

3435-
if (current_pgdat == zone->zone_pgdat)
3436-
continue;
3437-
3438-
current_pgdat = zone->zone_pgdat;
3439-
available = reclaimable = pgdat_reclaimable_pages(current_pgdat);
3424+
available = reclaimable = zone_reclaimable_pages(zone);
34403425
available -= DIV_ROUND_UP(no_progress_loops * available,
34413426
MAX_RECLAIM_RETRIES);
3442-
3443-
/* Account for all free pages on eligible zones */
3444-
for (zid = 0; zid <= zone_idx(zone); zid++) {
3445-
struct zone *acct_zone = &current_pgdat->node_zones[zid];
3446-
3447-
available += zone_page_state_snapshot(acct_zone, NR_FREE_PAGES);
3448-
}
3427+
available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
34493428

34503429
/*
34513430
* Would the allocation succeed if we reclaimed the whole
3452-
* available? This is approximate because there is no
3453-
* accurate count of reclaimable pages per zone.
3431+
* available?
34543432
*/
3455-
for (zid = 0; zid <= zone_idx(zone); zid++) {
3456-
struct zone *check_zone = &current_pgdat->node_zones[zid];
3457-
unsigned long estimate;
3458-
3459-
estimate = min(check_zone->managed_pages, available);
3460-
if (!__zone_watermark_ok(check_zone, order,
3461-
min_wmark_pages(check_zone), ac_classzone_idx(ac),
3462-
alloc_flags, estimate))
3463-
continue;
3464-
3433+
if (__zone_watermark_ok(zone, order, min_wmark_pages(zone),
3434+
ac_classzone_idx(ac), alloc_flags, available)) {
34653435
/*
34663436
* If we didn't make any progress and have a lot of
34673437
* dirty + writeback pages then we should wait for
@@ -3471,16 +3441,15 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
34713441
if (!did_some_progress) {
34723442
unsigned long write_pending;
34733443

3474-
write_pending =
3475-
node_page_state(current_pgdat, NR_WRITEBACK) +
3476-
node_page_state(current_pgdat, NR_FILE_DIRTY);
3444+
write_pending = zone_page_state_snapshot(zone,
3445+
NR_ZONE_WRITE_PENDING);
34773446

34783447
if (2 * write_pending > reclaimable) {
34793448
congestion_wait(BLK_RW_ASYNC, HZ/10);
34803449
return true;
34813450
}
34823451
}
3483-
out:
3452+
34843453
/*
34853454
* Memory allocation/reclaim might be called from a WQ
34863455
* context and the current implementation of the WQ
@@ -4361,6 +4330,7 @@ void show_free_areas(unsigned int filter)
43614330
" active_file:%lukB"
43624331
" inactive_file:%lukB"
43634332
" unevictable:%lukB"
4333+
" writepending:%lukB"
43644334
" present:%lukB"
43654335
" managed:%lukB"
43664336
" mlocked:%lukB"
@@ -4383,6 +4353,7 @@ void show_free_areas(unsigned int filter)
43834353
K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)),
43844354
K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)),
43854355
K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)),
4356+
K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)),
43864357
K(zone->present_pages),
43874358
K(zone->managed_pages),
43884359
K(zone_page_state(zone, NR_MLOCK)),

mm/vmscan.c

+18
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,24 @@ static bool sane_reclaim(struct scan_control *sc)
194194
}
195195
#endif
196196

197+
/*
198+
* This misses isolated pages which are not accounted for to save counters.
199+
* As the data only determines if reclaim or compaction continues, it is
200+
* not expected that isolated pages will be a dominating factor.
201+
*/
202+
unsigned long zone_reclaimable_pages(struct zone *zone)
203+
{
204+
unsigned long nr;
205+
206+
nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
207+
zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
208+
if (get_nr_swap_pages() > 0)
209+
nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
210+
zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
211+
212+
return nr;
213+
}
214+
197215
unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat)
198216
{
199217
unsigned long nr;

mm/vmstat.c

+1
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,7 @@ const char * const vmstat_text[] = {
926926
"nr_zone_inactive_file",
927927
"nr_zone_active_file",
928928
"nr_zone_unevictable",
929+
"nr_zone_write_pending",
929930
"nr_mlock",
930931
"nr_slab_reclaimable",
931932
"nr_slab_unreclaimable",

0 commit comments

Comments
 (0)