Skip to content

Commit 8531fc6

Browse files
mjkravetztorvalds
authored andcommitted
hugetlb: add hugetlb demote page support
Demote page functionality will split a huge page into a number of huge pages of a smaller size. For example, on x86 a 1GB huge page can be demoted into 512 2M huge pages. Demotion is done 'in place' by simply splitting the huge page. Added '*_for_demote' wrappers for remove_hugetlb_page, destroy_compound_hugetlb_page and prep_compound_gigantic_page for use by demote code. [mike.kravetz@oracle.com: v4] Link: https://lkml.kernel.org/r/6ca29b8e-527c-d6ec-900e-e6a43e4f8b73@oracle.com Link: https://lkml.kernel.org/r/20211007181918.136982-6-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com> Cc: David Hildenbrand <david@redhat.com> Cc: David Rientjes <rientjes@google.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Muchun Song <songmuchun@bytedance.com> Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev> Cc: Nghia Le <nghialm78@gmail.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 34d9e35 commit 8531fc6

File tree

1 file changed

+92
-8
lines changed

1 file changed

+92
-8
lines changed

mm/hugetlb.c

+92-8
Original file line numberDiff line numberDiff line change
@@ -1270,7 +1270,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
12701270
((node = hstate_next_node_to_free(hs, mask)) || 1); \
12711271
nr_nodes--)
12721272

1273-
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
1273+
/* used to demote non-gigantic_huge pages as well */
12741274
static void __destroy_compound_gigantic_page(struct page *page,
12751275
unsigned int order, bool demote)
12761276
{
@@ -1293,6 +1293,13 @@ static void __destroy_compound_gigantic_page(struct page *page,
12931293
__ClearPageHead(page);
12941294
}
12951295

1296+
static void destroy_compound_hugetlb_page_for_demote(struct page *page,
1297+
unsigned int order)
1298+
{
1299+
__destroy_compound_gigantic_page(page, order, true);
1300+
}
1301+
1302+
#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
12961303
static void destroy_compound_gigantic_page(struct page *page,
12971304
unsigned int order)
12981305
{
@@ -1438,6 +1445,12 @@ static void remove_hugetlb_page(struct hstate *h, struct page *page,
14381445
__remove_hugetlb_page(h, page, adjust_surplus, false);
14391446
}
14401447

1448+
static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
1449+
bool adjust_surplus)
1450+
{
1451+
__remove_hugetlb_page(h, page, adjust_surplus, true);
1452+
}
1453+
14411454
static void add_hugetlb_page(struct hstate *h, struct page *page,
14421455
bool adjust_surplus)
14431456
{
@@ -1779,6 +1792,12 @@ static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
17791792
return __prep_compound_gigantic_page(page, order, false);
17801793
}
17811794

1795+
static bool prep_compound_gigantic_page_for_demote(struct page *page,
1796+
unsigned int order)
1797+
{
1798+
return __prep_compound_gigantic_page(page, order, true);
1799+
}
1800+
17821801
/*
17831802
* PageHuge() only returns true for hugetlbfs pages, but not for normal or
17841803
* transparent huge pages. See the PageTransHuge() documentation for more
@@ -3304,9 +3323,72 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid,
33043323
return 0;
33053324
}
33063325

3326+
static int demote_free_huge_page(struct hstate *h, struct page *page)
3327+
{
3328+
int i, nid = page_to_nid(page);
3329+
struct hstate *target_hstate;
3330+
int rc = 0;
3331+
3332+
target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
3333+
3334+
remove_hugetlb_page_for_demote(h, page, false);
3335+
spin_unlock_irq(&hugetlb_lock);
3336+
3337+
rc = alloc_huge_page_vmemmap(h, page);
3338+
if (rc) {
3339+
/* Allocation of vmemmmap failed, we can not demote page */
3340+
spin_lock_irq(&hugetlb_lock);
3341+
set_page_refcounted(page);
3342+
add_hugetlb_page(h, page, false);
3343+
return rc;
3344+
}
3345+
3346+
/*
3347+
* Use destroy_compound_hugetlb_page_for_demote for all huge page
3348+
* sizes as it will not ref count pages.
3349+
*/
3350+
destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
3351+
3352+
/*
3353+
* Taking target hstate mutex synchronizes with set_max_huge_pages.
3354+
* Without the mutex, pages added to target hstate could be marked
3355+
* as surplus.
3356+
*
3357+
* Note that we already hold h->resize_lock. To prevent deadlock,
3358+
* use the convention of always taking larger size hstate mutex first.
3359+
*/
3360+
mutex_lock(&target_hstate->resize_lock);
3361+
for (i = 0; i < pages_per_huge_page(h);
3362+
i += pages_per_huge_page(target_hstate)) {
3363+
if (hstate_is_gigantic(target_hstate))
3364+
prep_compound_gigantic_page_for_demote(page + i,
3365+
target_hstate->order);
3366+
else
3367+
prep_compound_page(page + i, target_hstate->order);
3368+
set_page_private(page + i, 0);
3369+
set_page_refcounted(page + i);
3370+
prep_new_huge_page(target_hstate, page + i, nid);
3371+
put_page(page + i);
3372+
}
3373+
mutex_unlock(&target_hstate->resize_lock);
3374+
3375+
spin_lock_irq(&hugetlb_lock);
3376+
3377+
/*
3378+
* Not absolutely necessary, but for consistency update max_huge_pages
3379+
* based on pool changes for the demoted page.
3380+
*/
3381+
h->max_huge_pages--;
3382+
target_hstate->max_huge_pages += pages_per_huge_page(h);
3383+
3384+
return rc;
3385+
}
3386+
33073387
static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
33083388
__must_hold(&hugetlb_lock)
33093389
{
3390+
int nr_nodes, node;
3391+
struct page *page;
33103392
int rc = 0;
33113393

33123394
lockdep_assert_held(&hugetlb_lock);
@@ -3317,9 +3399,15 @@ static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
33173399
return -EINVAL; /* internal error */
33183400
}
33193401

3320-
/*
3321-
* TODO - demote fucntionality will be added in subsequent patch
3322-
*/
3402+
for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
3403+
if (!list_empty(&h->hugepage_freelists[node])) {
3404+
page = list_entry(h->hugepage_freelists[node].next,
3405+
struct page, lru);
3406+
rc = demote_free_huge_page(h, page);
3407+
break;
3408+
}
3409+
}
3410+
33233411
return rc;
33243412
}
33253413

@@ -3554,10 +3642,6 @@ static ssize_t demote_store(struct kobject *kobj,
35543642
/*
35553643
* Check for available pages to demote each time thorough the
35563644
* loop as demote_pool_huge_page will drop hugetlb_lock.
3557-
*
3558-
* NOTE: demote_pool_huge_page does not yet drop hugetlb_lock
3559-
* but will when full demote functionality is added in a later
3560-
* patch.
35613645
*/
35623646
if (nid != NUMA_NO_NODE)
35633647
nr_available = h->free_huge_pages_node[nid];

0 commit comments

Comments
 (0)