Skip to content

Commit 70b4459

Browse files
gormanmtorvalds
authored andcommitted
mm, compaction: use free lists to quickly locate a migration source
The migration scanner is a linear scan of a zone with a potentiall large search space. Furthermore, many pageblocks are unusable such as those filled with reserved pages or partially filled with pages that cannot migrate. These still get scanned in the common case of allocating a THP and the cost accumulates. The patch uses a partial search of the free lists to locate a migration source candidate that is marked as MOVABLE when allocating a THP. It prefers picking a block with a larger number of free pages already on the basis that there are fewer pages to migrate to free the entire block. The lowest PFN found during searches is tracked as the basis of the start for the linear search after the first search of the free list fails. After the search, the free list is shuffled so that the next search will not encounter the same page. If the search fails then the subsequent searches will be shorter and the linear scanner is used. If this search fails, or if the request is for a small or unmovable/reclaimable allocation then the linear scanner is still used. It is somewhat pointless to use the list search in those cases. Small free pages must be used for the search and there is no guarantee that movable pages are located within that block that are contiguous. 5.0.0-rc1 5.0.0-rc1 noboost-v3r10 findmig-v3r15 Amean fault-both-3 3771.41 ( 0.00%) 3390.40 ( 10.10%) Amean fault-both-5 5409.05 ( 0.00%) 5082.28 ( 6.04%) Amean fault-both-7 7040.74 ( 0.00%) 7012.51 ( 0.40%) Amean fault-both-12 11887.35 ( 0.00%) 11346.63 ( 4.55%) Amean fault-both-18 16718.19 ( 0.00%) 15324.19 ( 8.34%) Amean fault-both-24 21157.19 ( 0.00%) 16088.50 * 23.96%* Amean fault-both-30 21175.92 ( 0.00%) 18723.42 * 11.58%* Amean fault-both-32 21339.03 ( 0.00%) 18612.01 * 12.78%* 5.0.0-rc1 5.0.0-rc1 noboost-v3r10 findmig-v3r15 Percentage huge-3 86.50 ( 0.00%) 89.83 ( 3.85%) Percentage huge-5 92.52 ( 0.00%) 91.96 ( -0.61%) Percentage huge-7 92.44 ( 0.00%) 92.85 ( 0.44%) Percentage huge-12 92.98 ( 0.00%) 92.74 ( -0.25%) Percentage huge-18 91.70 ( 0.00%) 91.71 ( 0.02%) Percentage huge-24 91.59 ( 0.00%) 92.13 ( 0.60%) Percentage huge-30 90.14 ( 0.00%) 93.79 ( 4.04%) Percentage huge-32 90.03 ( 0.00%) 91.27 ( 1.37%) This shows an improvement in allocation latencies with similar allocation success rates. While not presented, there was a 31% reduction in migration scanning and a 8% reduction on system CPU usage. A 2-socket machine showed similar benefits. [mgorman@techsingularity.net: several fixes] Link: http://lkml.kernel.org/r/20190204120111.GL9565@techsingularity.net [vbabka@suse.cz: migrate block that was found-fast, some optimisations] Link: http://lkml.kernel.org/r/20190118175136.31341-10-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <Vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dan Carpenter <dan.carpenter@oracle.com> Cc: David Rientjes <rientjes@google.com> Cc: YueHaibing <yuehaibing@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent fd1444b commit 70b4459

File tree

4 files changed

+188
-9
lines changed

4 files changed

+188
-9
lines changed

drivers/gpu/drm/i915/i915_utils.h

-6
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,6 @@ static inline u64 ptr_to_u64(const void *ptr)
123123

124124
#include <linux/list.h>
125125

126-
static inline int list_is_first(const struct list_head *list,
127-
const struct list_head *head)
128-
{
129-
return head->next == list;
130-
}
131-
132126
static inline void __list_del_many(struct list_head *head,
133127
struct list_head *first)
134128
{

include/linux/list.h

+11
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,17 @@ static inline void list_bulk_move_tail(struct list_head *head,
206206
head->prev = last;
207207
}
208208

209+
/**
210+
* list_is_first -- tests whether @ list is the first entry in list @head
211+
* @list: the entry to test
212+
* @head: the head of the list
213+
*/
214+
static inline int list_is_first(const struct list_head *list,
215+
const struct list_head *head)
216+
{
217+
return list->prev == head;
218+
}
219+
209220
/**
210221
* list_is_last - tests whether @list is the last entry in list @head
211222
* @list: the entry to test

mm/compaction.c

+175-3
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,12 @@ static bool suitable_migration_target(struct compact_control *cc,
10401040
return false;
10411041
}
10421042

1043+
static inline unsigned int
1044+
freelist_scan_limit(struct compact_control *cc)
1045+
{
1046+
return (COMPACT_CLUSTER_MAX >> cc->fast_search_fail) + 1;
1047+
}
1048+
10431049
/*
10441050
* Test whether the free scanner has reached the same or lower pageblock than
10451051
* the migration scanner, and compaction should thus terminate.
@@ -1050,6 +1056,19 @@ static inline bool compact_scanners_met(struct compact_control *cc)
10501056
<= (cc->migrate_pfn >> pageblock_order);
10511057
}
10521058

1059+
/* Reorder the free list to reduce repeated future searches */
1060+
static void
1061+
move_freelist_tail(struct list_head *freelist, struct page *freepage)
1062+
{
1063+
LIST_HEAD(sublist);
1064+
1065+
if (!list_is_first(freelist, &freepage->lru)) {
1066+
list_cut_position(&sublist, freelist, &freepage->lru);
1067+
if (!list_empty(&sublist))
1068+
list_splice_tail(&sublist, freelist);
1069+
}
1070+
}
1071+
10531072
/*
10541073
* Based on information in the current compact_control, find blocks
10551074
* suitable for isolating free pages from and then isolate them.
@@ -1207,6 +1226,148 @@ typedef enum {
12071226
*/
12081227
int sysctl_compact_unevictable_allowed __read_mostly = 1;
12091228

1229+
static inline void
1230+
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
1231+
{
1232+
if (cc->fast_start_pfn == ULONG_MAX)
1233+
return;
1234+
1235+
if (!cc->fast_start_pfn)
1236+
cc->fast_start_pfn = pfn;
1237+
1238+
cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
1239+
}
1240+
1241+
static inline unsigned long
1242+
reinit_migrate_pfn(struct compact_control *cc)
1243+
{
1244+
if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
1245+
return cc->migrate_pfn;
1246+
1247+
cc->migrate_pfn = cc->fast_start_pfn;
1248+
cc->fast_start_pfn = ULONG_MAX;
1249+
1250+
return cc->migrate_pfn;
1251+
}
1252+
1253+
/*
1254+
* Briefly search the free lists for a migration source that already has
1255+
* some free pages to reduce the number of pages that need migration
1256+
* before a pageblock is free.
1257+
*/
1258+
static unsigned long fast_find_migrateblock(struct compact_control *cc)
1259+
{
1260+
unsigned int limit = freelist_scan_limit(cc);
1261+
unsigned int nr_scanned = 0;
1262+
unsigned long distance;
1263+
unsigned long pfn = cc->migrate_pfn;
1264+
unsigned long high_pfn;
1265+
int order;
1266+
1267+
/* Skip hints are relied on to avoid repeats on the fast search */
1268+
if (cc->ignore_skip_hint)
1269+
return pfn;
1270+
1271+
/*
1272+
* If the migrate_pfn is not at the start of a zone or the start
1273+
* of a pageblock then assume this is a continuation of a previous
1274+
* scan restarted due to COMPACT_CLUSTER_MAX.
1275+
*/
1276+
if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
1277+
return pfn;
1278+
1279+
/*
1280+
* For smaller orders, just linearly scan as the number of pages
1281+
* to migrate should be relatively small and does not necessarily
1282+
* justify freeing up a large block for a small allocation.
1283+
*/
1284+
if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
1285+
return pfn;
1286+
1287+
/*
1288+
* Only allow kcompactd and direct requests for movable pages to
1289+
* quickly clear out a MOVABLE pageblock for allocation. This
1290+
* reduces the risk that a large movable pageblock is freed for
1291+
* an unmovable/reclaimable small allocation.
1292+
*/
1293+
if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
1294+
return pfn;
1295+
1296+
/*
1297+
* When starting the migration scanner, pick any pageblock within the
1298+
* first half of the search space. Otherwise try and pick a pageblock
1299+
* within the first eighth to reduce the chances that a migration
1300+
* target later becomes a source.
1301+
*/
1302+
distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
1303+
if (cc->migrate_pfn != cc->zone->zone_start_pfn)
1304+
distance >>= 2;
1305+
high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
1306+
1307+
for (order = cc->order - 1;
1308+
order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
1309+
order--) {
1310+
struct free_area *area = &cc->zone->free_area[order];
1311+
struct list_head *freelist;
1312+
unsigned long flags;
1313+
struct page *freepage;
1314+
1315+
if (!area->nr_free)
1316+
continue;
1317+
1318+
spin_lock_irqsave(&cc->zone->lock, flags);
1319+
freelist = &area->free_list[MIGRATE_MOVABLE];
1320+
list_for_each_entry(freepage, freelist, lru) {
1321+
unsigned long free_pfn;
1322+
1323+
nr_scanned++;
1324+
free_pfn = page_to_pfn(freepage);
1325+
if (free_pfn < high_pfn) {
1326+
update_fast_start_pfn(cc, free_pfn);
1327+
1328+
/*
1329+
* Avoid if skipped recently. Ideally it would
1330+
* move to the tail but even safe iteration of
1331+
* the list assumes an entry is deleted, not
1332+
* reordered.
1333+
*/
1334+
if (get_pageblock_skip(freepage)) {
1335+
if (list_is_last(freelist, &freepage->lru))
1336+
break;
1337+
1338+
continue;
1339+
}
1340+
1341+
/* Reorder to so a future search skips recent pages */
1342+
move_freelist_tail(freelist, freepage);
1343+
1344+
pfn = pageblock_start_pfn(free_pfn);
1345+
cc->fast_search_fail = 0;
1346+
set_pageblock_skip(freepage);
1347+
break;
1348+
}
1349+
1350+
if (nr_scanned >= limit) {
1351+
cc->fast_search_fail++;
1352+
move_freelist_tail(freelist, freepage);
1353+
break;
1354+
}
1355+
}
1356+
spin_unlock_irqrestore(&cc->zone->lock, flags);
1357+
}
1358+
1359+
cc->total_migrate_scanned += nr_scanned;
1360+
1361+
/*
1362+
* If fast scanning failed then use a cached entry for a page block
1363+
* that had free pages as the basis for starting a linear scan.
1364+
*/
1365+
if (pfn == cc->migrate_pfn)
1366+
pfn = reinit_migrate_pfn(cc);
1367+
1368+
return pfn;
1369+
}
1370+
12101371
/*
12111372
* Isolate all pages that can be migrated from the first suitable block,
12121373
* starting at the block pointed to by the migrate scanner pfn within
@@ -1222,16 +1383,25 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
12221383
const isolate_mode_t isolate_mode =
12231384
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
12241385
(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
1386+
bool fast_find_block;
12251387

12261388
/*
12271389
* Start at where we last stopped, or beginning of the zone as
1228-
* initialized by compact_zone()
1390+
* initialized by compact_zone(). The first failure will use
1391+
* the lowest PFN as the starting point for linear scanning.
12291392
*/
1230-
low_pfn = cc->migrate_pfn;
1393+
low_pfn = fast_find_migrateblock(cc);
12311394
block_start_pfn = pageblock_start_pfn(low_pfn);
12321395
if (block_start_pfn < zone->zone_start_pfn)
12331396
block_start_pfn = zone->zone_start_pfn;
12341397

1398+
/*
1399+
* fast_find_migrateblock marks a pageblock skipped so to avoid
1400+
* the isolation_suitable check below, check whether the fast
1401+
* search was successful.
1402+
*/
1403+
fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
1404+
12351405
/* Only scan within a pageblock boundary */
12361406
block_end_pfn = pageblock_end_pfn(low_pfn);
12371407

@@ -1240,6 +1410,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
12401410
* Do not cross the free scanner.
12411411
*/
12421412
for (; block_end_pfn <= cc->free_pfn;
1413+
fast_find_block = false,
12431414
low_pfn = block_end_pfn,
12441415
block_start_pfn = block_end_pfn,
12451416
block_end_pfn += pageblock_nr_pages) {
@@ -1259,7 +1430,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
12591430
continue;
12601431

12611432
/* If isolation recently failed, do not retry */
1262-
if (!isolation_suitable(cc, page))
1433+
if (!isolation_suitable(cc, page) && !fast_find_block)
12631434
continue;
12641435

12651436
/*
@@ -1550,6 +1721,7 @@ static enum compact_result compact_zone(struct compact_control *cc)
15501721
* want to compact the whole zone), but check that it is initialised
15511722
* by ensuring the values are within zone boundaries.
15521723
*/
1724+
cc->fast_start_pfn = 0;
15531725
if (cc->whole_zone) {
15541726
cc->migrate_pfn = start_pfn;
15551727
cc->free_pfn = pageblock_start_pfn(end_pfn - 1);

mm/internal.h

+2
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,11 @@ struct compact_control {
188188
unsigned int nr_migratepages; /* Number of pages to migrate */
189189
unsigned long free_pfn; /* isolate_freepages search base */
190190
unsigned long migrate_pfn; /* isolate_migratepages search base */
191+
unsigned long fast_start_pfn; /* a pfn to start linear scan from */
191192
struct zone *zone;
192193
unsigned long total_migrate_scanned;
193194
unsigned long total_free_scanned;
195+
unsigned int fast_search_fail; /* failures to use free list searches */
194196
const gfp_t gfp_mask; /* gfp mask of a direct compactor */
195197
int order; /* order a direct compactor needs */
196198
int migratetype; /* migratetype of direct compactor */

0 commit comments

Comments
 (0)