Skip to content

Commit b6845b2

Browse files
Mel Gormansfrothwell
Mel Gorman
authored andcommitted
mm: vmscan: do not throttle based on pfmemalloc reserves if node has no ZONE_NORMAL
throttle_direct_reclaim() is meant to trigger during swap-over-network during which the min watermark is treated as a pfmemalloc reserve. It throttes on the first node in the zonelist but this is flawed. The user-visible impact is that a process running on CPU whose local memory node has no ZONE_NORMAL will stall for prolonged periods of time, possibly indefintely. This is due to throttle_direct_reclaim thinking the pfmemalloc reserves are depleted when in fact they don't exist on that node. On a NUMA machine running a 32-bit kernel (I know) allocation requests from CPUs on node 1 would detect no pfmemalloc reserves and the process gets throttled. This patch adjusts throttling of direct reclaim to throttle based on the first node in the zonelist that has a usable ZONE_NORMAL or lower zone. Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
1 parent 1a3a32e commit b6845b2

File tree

1 file changed

+27
-6
lines changed

1 file changed

+27
-6
lines changed

mm/vmscan.c

+27-6
Original file line numberDiff line numberDiff line change
@@ -2537,10 +2537,17 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
25372537

25382538
for (i = 0; i <= ZONE_NORMAL; i++) {
25392539
zone = &pgdat->node_zones[i];
2540+
if (!populated_zone(zone))
2541+
continue;
2542+
25402543
pfmemalloc_reserve += min_wmark_pages(zone);
25412544
free_pages += zone_page_state(zone, NR_FREE_PAGES);
25422545
}
25432546

2547+
/* If there are no reserves (unexpected config) then do not throttle */
2548+
if (!pfmemalloc_reserve)
2549+
return true;
2550+
25442551
wmark_ok = free_pages > pfmemalloc_reserve / 2;
25452552

25462553
/* kswapd must be awake if processes are being throttled */
@@ -2565,9 +2572,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
25652572
static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
25662573
nodemask_t *nodemask)
25672574
{
2575+
struct zoneref *z;
25682576
struct zone *zone;
2569-
int high_zoneidx = gfp_zone(gfp_mask);
2570-
pg_data_t *pgdat;
2577+
pg_data_t *pgdat = NULL;
25712578

25722579
/*
25732580
* Kernel threads should not be throttled as they may be indirectly
@@ -2586,10 +2593,24 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
25862593
if (fatal_signal_pending(current))
25872594
goto out;
25882595

2589-
/* Check if the pfmemalloc reserves are ok */
2590-
first_zones_zonelist(zonelist, high_zoneidx, NULL, &zone);
2591-
pgdat = zone->zone_pgdat;
2592-
if (pfmemalloc_watermark_ok(pgdat))
2596+
/*
2597+
* Check if the pfmemalloc reserves are ok by finding the first node
2598+
* with a usable ZONE_NORMAL or lower zone
2599+
*/
2600+
for_each_zone_zonelist_nodemask(zone, z, zonelist,
2601+
gfp_mask, nodemask) {
2602+
if (zone_idx(zone) > ZONE_NORMAL)
2603+
continue;
2604+
2605+
/* Throttle based on the first usable node */
2606+
pgdat = zone->zone_pgdat;
2607+
if (pfmemalloc_watermark_ok(pgdat))
2608+
goto out;
2609+
break;
2610+
}
2611+
2612+
/* If no zone was usable by the allocation flags then do not throttle */
2613+
if (!pgdat)
25932614
goto out;
25942615

25952616
/* Account for the throttling */

0 commit comments

Comments
 (0)