@@ -1270,7 +1270,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
1270
1270
((node = hstate_next_node_to_free(hs, mask)) || 1); \
1271
1271
nr_nodes--)
1272
1272
1273
- #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
1273
+ /* used to demote non-gigantic_huge pages as well */
1274
1274
static void __destroy_compound_gigantic_page (struct page * page ,
1275
1275
unsigned int order , bool demote )
1276
1276
{
@@ -1293,6 +1293,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
1293
1293
__ClearPageHead (page );
1294
1294
}
1295
1295
1296
+ static void destroy_compound_hugetlb_page_for_demote (struct page * page ,
1297
+ unsigned int order )
1298
+ {
1299
+ __destroy_compound_gigantic_page (page , order , true);
1300
+ }
1301
+
1302
+ #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
1296
1303
static void destroy_compound_gigantic_page (struct page * page ,
1297
1304
unsigned int order )
1298
1305
{
@@ -1438,6 +1445,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page,
1438
1445
__remove_hugetlb_page (h , page , adjust_surplus , false);
1439
1446
}
1440
1447
1448
+ static void remove_hugetlb_page_for_demote (struct hstate * h , struct page * page ,
1449
+ bool adjust_surplus )
1450
+ {
1451
+ __remove_hugetlb_page (h , page , adjust_surplus , true);
1452
+ }
1453
+
1441
1454
static void add_hugetlb_page (struct hstate * h , struct page * page ,
1442
1455
bool adjust_surplus )
1443
1456
{
@@ -1779,6 +1792,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
1779
1792
return __prep_compound_gigantic_page (page , order , false);
1780
1793
}
1781
1794
1795
+ static bool prep_compound_gigantic_page_for_demote (struct page * page ,
1796
+ unsigned int order )
1797
+ {
1798
+ return __prep_compound_gigantic_page (page , order , true);
1799
+ }
1800
+
1782
1801
/*
1783
1802
* PageHuge() only returns true for hugetlbfs pages, but not for normal or
1784
1803
* transparent huge pages. See the PageTransHuge() documentation for more
@@ -3304,9 +3323,72 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
3304
3323
return 0 ;
3305
3324
}
3306
3325
3326
+ static int demote_free_huge_page (struct hstate * h , struct page * page )
3327
+ {
3328
+ int i , nid = page_to_nid (page );
3329
+ struct hstate * target_hstate ;
3330
+ int rc = 0 ;
3331
+
3332
+ target_hstate = size_to_hstate (PAGE_SIZE << h -> demote_order );
3333
+
3334
+ remove_hugetlb_page_for_demote (h , page , false);
3335
+ spin_unlock_irq (& hugetlb_lock );
3336
+
3337
+ rc = alloc_huge_page_vmemmap (h , page );
3338
+ if (rc ) {
3339
+ /* Allocation of vmemmmap failed, we can not demote page */
3340
+ spin_lock_irq (& hugetlb_lock );
3341
+ set_page_refcounted (page );
3342
+ add_hugetlb_page (h , page , false);
3343
+ return rc ;
3344
+ }
3345
+
3346
+ /*
3347
+ * Use destroy_compound_hugetlb_page_for_demote for all huge page
3348
+ * sizes as it will not ref count pages.
3349
+ */
3350
+ destroy_compound_hugetlb_page_for_demote (page , huge_page_order (h ));
3351
+
3352
+ /*
3353
+ * Taking target hstate mutex synchronizes with set_max_huge_pages.
3354
+ * Without the mutex, pages added to target hstate could be marked
3355
+ * as surplus.
3356
+ *
3357
+ * Note that we already hold h->resize_lock. To prevent deadlock,
3358
+ * use the convention of always taking larger size hstate mutex first.
3359
+ */
3360
+ mutex_lock (& target_hstate -> resize_lock );
3361
+ for (i = 0 ; i < pages_per_huge_page (h );
3362
+ i += pages_per_huge_page (target_hstate )) {
3363
+ if (hstate_is_gigantic (target_hstate ))
3364
+ prep_compound_gigantic_page_for_demote (page + i ,
3365
+ target_hstate -> order );
3366
+ else
3367
+ prep_compound_page (page + i , target_hstate -> order );
3368
+ set_page_private (page + i , 0 );
3369
+ set_page_refcounted (page + i );
3370
+ prep_new_huge_page (target_hstate , page + i , nid );
3371
+ put_page (page + i );
3372
+ }
3373
+ mutex_unlock (& target_hstate -> resize_lock );
3374
+
3375
+ spin_lock_irq (& hugetlb_lock );
3376
+
3377
+ /*
3378
+ * Not absolutely necessary, but for consistency update max_huge_pages
3379
+ * based on pool changes for the demoted page.
3380
+ */
3381
+ h -> max_huge_pages -- ;
3382
+ target_hstate -> max_huge_pages += pages_per_huge_page (h );
3383
+
3384
+ return rc ;
3385
+ }
3386
+
3307
3387
static int demote_pool_huge_page (struct hstate * h , nodemask_t * nodes_allowed )
3308
3388
__must_hold (& hugetlb_lock )
3309
3389
{
3390
+ int nr_nodes , node ;
3391
+ struct page * page ;
3310
3392
int rc = 0 ;
3311
3393
3312
3394
lockdep_assert_held (& hugetlb_lock );
@@ -3317,9 +3399,15 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
3317
3399
return - EINVAL ; /* internal error */
3318
3400
}
3319
3401
3320
- /*
3321
- * TODO - demote fucntionality will be added in subsequent patch
3322
- */
3402
+ for_each_node_mask_to_free (h , nr_nodes , node , nodes_allowed ) {
3403
+ if (!list_empty (& h -> hugepage_freelists [node ])) {
3404
+ page = list_entry (h -> hugepage_freelists [node ].next ,
3405
+ struct page , lru );
3406
+ rc = demote_free_huge_page (h , page );
3407
+ break ;
3408
+ }
3409
+ }
3410
+
3323
3411
return rc ;
3324
3412
}
3325
3413
@@ -3554,10 +3642,6 @@ static ssize_t demote_store(struct kobject *kobj,
3554
3642
/*
3555
3643
* Check for available pages to demote each time thorough the
3556
3644
* loop as demote_pool_huge_page will drop hugetlb_lock.
3557
- *
3558
- * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock
3559
- * but will when full demote functionality is added in a later
3560
- * patch.
3561
3645
*/
3562
3646
if (nid != NUMA_NO_NODE )
3563
3647
nr_available = h -> free_huge_pages_node [nid ];
0 commit comments