Skip to content

Commit 0030f53

Browse files
hnaztorvalds
authored andcommitted
mm: memcg: fix compaction/migration failing due to memcg limits
Compaction (and page migration in general) can currently be hindered through pages being owned by memory cgroups that are at their limits and unreclaimable. The reason is that the replacement page is being charged against the limit while the page being replaced is also still charged. But this seems unnecessary, given that only one of the two pages will still be in use after migration finishes. This patch changes the memcg migration sequence so that the replacement page is not charged. Whatever page is still in use after successful or failed migration gets to keep the charge of the page that was going to be replaced. The replacement page will still show up temporarily in the rss/cache statistics, this can be fixed in a later patch as it's less urgent. Reported-by: David Rientjes <rientjes@google.com> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Hugh Dickins <hughd@google.com> Cc: David Rientjes <rientjes@google.com> Cc: Wanpeng Li <liwp.linux@gmail.com> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 7374492 commit 0030f53

File tree

3 files changed

+43
-46
lines changed

3 files changed

+43
-46
lines changed

include/linux/memcontrol.h

+5-6
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
9898

9999
extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg);
100100

101-
extern int
102-
mem_cgroup_prepare_migration(struct page *page,
103-
struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask);
101+
extern void
102+
mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
103+
struct mem_cgroup **memcgp);
104104
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
105105
struct page *oldpage, struct page *newpage, bool migration_ok);
106106

@@ -276,11 +276,10 @@ static inline struct cgroup_subsys_state
276276
return NULL;
277277
}
278278

279-
static inline int
279+
static inline void
280280
mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
281-
struct mem_cgroup **memcgp, gfp_t gfp_mask)
281+
struct mem_cgroup **memcgp)
282282
{
283-
return 0;
284283
}
285284

286285
static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,

mm/memcontrol.c

+36-31
Original file line numberDiff line numberDiff line change
@@ -2976,7 +2976,8 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
29762976
* uncharge if !page_mapped(page)
29772977
*/
29782978
static struct mem_cgroup *
2979-
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2979+
__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
2980+
bool end_migration)
29802981
{
29812982
struct mem_cgroup *memcg = NULL;
29822983
unsigned int nr_pages = 1;
@@ -3020,7 +3021,16 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
30203021
/* fallthrough */
30213022
case MEM_CGROUP_CHARGE_TYPE_DROP:
30223023
/* See mem_cgroup_prepare_migration() */
3023-
if (page_mapped(page) || PageCgroupMigration(pc))
3024+
if (page_mapped(page))
3025+
goto unlock_out;
3026+
/*
3027+
* Pages under migration may not be uncharged. But
3028+
* end_migration() /must/ be the one uncharging the
3029+
* unused post-migration page and so it has to call
3030+
* here with the migration bit still set. See the
3031+
* res_counter handling below.
3032+
*/
3033+
if (!end_migration && PageCgroupMigration(pc))
30243034
goto unlock_out;
30253035
break;
30263036
case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
@@ -3054,7 +3064,12 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
30543064
mem_cgroup_swap_statistics(memcg, true);
30553065
mem_cgroup_get(memcg);
30563066
}
3057-
if (!mem_cgroup_is_root(memcg))
3067+
/*
3068+
* Migration does not charge the res_counter for the
3069+
* replacement page, so leave it alone when phasing out the
3070+
* page that is unused after the migration.
3071+
*/
3072+
if (!end_migration && !mem_cgroup_is_root(memcg))
30583073
mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
30593074

30603075
return memcg;
@@ -3070,14 +3085,14 @@ void mem_cgroup_uncharge_page(struct page *page)
30703085
if (page_mapped(page))
30713086
return;
30723087
VM_BUG_ON(page->mapping && !PageAnon(page));
3073-
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON);
3088+
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
30743089
}
30753090

30763091
void mem_cgroup_uncharge_cache_page(struct page *page)
30773092
{
30783093
VM_BUG_ON(page_mapped(page));
30793094
VM_BUG_ON(page->mapping);
3080-
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE);
3095+
__mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
30813096
}
30823097

30833098
/*
@@ -3141,7 +3156,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
31413156
if (!swapout) /* this was a swap cache but the swap is unused ! */
31423157
ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
31433158

3144-
memcg = __mem_cgroup_uncharge_common(page, ctype);
3159+
memcg = __mem_cgroup_uncharge_common(page, ctype, false);
31453160

31463161
/*
31473162
* record memcg information, if swapout && memcg != NULL,
@@ -3231,19 +3246,18 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
32313246
* Before starting migration, account PAGE_SIZE to mem_cgroup that the old
32323247
* page belongs to.
32333248
*/
3234-
int mem_cgroup_prepare_migration(struct page *page,
3235-
struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask)
3249+
void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
3250+
struct mem_cgroup **memcgp)
32363251
{
32373252
struct mem_cgroup *memcg = NULL;
32383253
struct page_cgroup *pc;
32393254
enum charge_type ctype;
3240-
int ret = 0;
32413255

32423256
*memcgp = NULL;
32433257

32443258
VM_BUG_ON(PageTransHuge(page));
32453259
if (mem_cgroup_disabled())
3246-
return 0;
3260+
return;
32473261

32483262
pc = lookup_page_cgroup(page);
32493263
lock_page_cgroup(pc);
@@ -3288,24 +3302,9 @@ int mem_cgroup_prepare_migration(struct page *page,
32883302
* we return here.
32893303
*/
32903304
if (!memcg)
3291-
return 0;
3305+
return;
32923306

32933307
*memcgp = memcg;
3294-
ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
3295-
css_put(&memcg->css);/* drop extra refcnt */
3296-
if (ret) {
3297-
if (PageAnon(page)) {
3298-
lock_page_cgroup(pc);
3299-
ClearPageCgroupMigration(pc);
3300-
unlock_page_cgroup(pc);
3301-
/*
3302-
* The old page may be fully unmapped while we kept it.
3303-
*/
3304-
mem_cgroup_uncharge_page(page);
3305-
}
3306-
/* we'll need to revisit this error code (we have -EINTR) */
3307-
return -ENOMEM;
3308-
}
33093308
/*
33103309
* We charge new page before it's used/mapped. So, even if unlock_page()
33113310
* is called before end_migration, we can catch all events on this new
@@ -3318,8 +3317,12 @@ int mem_cgroup_prepare_migration(struct page *page,
33183317
ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
33193318
else
33203319
ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3320+
/*
3321+
* The page is committed to the memcg, but it's not actually
3322+
* charged to the res_counter since we plan on replacing the
3323+
* old one and only one page is going to be left afterwards.
3324+
*/
33213325
__mem_cgroup_commit_charge(memcg, newpage, 1, ctype, false);
3322-
return ret;
33233326
}
33243327

33253328
/* remove redundant charge if migration failed*/
@@ -3341,6 +3344,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
33413344
used = newpage;
33423345
unused = oldpage;
33433346
}
3347+
anon = PageAnon(used);
3348+
__mem_cgroup_uncharge_common(unused,
3349+
anon ? MEM_CGROUP_CHARGE_TYPE_ANON
3350+
: MEM_CGROUP_CHARGE_TYPE_CACHE,
3351+
true);
3352+
css_put(&memcg->css);
33443353
/*
33453354
* We disallowed uncharge of pages under migration because mapcount
33463355
* of the page goes down to zero, temporarly.
@@ -3350,10 +3359,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *memcg,
33503359
lock_page_cgroup(pc);
33513360
ClearPageCgroupMigration(pc);
33523361
unlock_page_cgroup(pc);
3353-
anon = PageAnon(used);
3354-
__mem_cgroup_uncharge_common(unused,
3355-
anon ? MEM_CGROUP_CHARGE_TYPE_ANON
3356-
: MEM_CGROUP_CHARGE_TYPE_CACHE);
33573362

33583363
/*
33593364
* If a page is a file cache, radix-tree replacement is very atomic

mm/migrate.c

+2-9
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
683683
{
684684
int rc = -EAGAIN;
685685
int remap_swapcache = 1;
686-
int charge = 0;
687686
struct mem_cgroup *mem;
688687
struct anon_vma *anon_vma = NULL;
689688

@@ -725,12 +724,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
725724
}
726725

727726
/* charge against new page */
728-
charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL);
729-
if (charge == -ENOMEM) {
730-
rc = -ENOMEM;
731-
goto unlock;
732-
}
733-
BUG_ON(charge);
727+
mem_cgroup_prepare_migration(page, newpage, &mem);
734728

735729
if (PageWriteback(page)) {
736730
/*
@@ -820,8 +814,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
820814
put_anon_vma(anon_vma);
821815

822816
uncharge:
823-
if (!charge)
824-
mem_cgroup_end_migration(mem, page, newpage, rc == 0);
817+
mem_cgroup_end_migration(mem, page, newpage, rc == 0);
825818
unlock:
826819
unlock_page(page);
827820
out:

0 commit comments

Comments
 (0)