Skip to content

Commit 3ad3d90

Browse files
Xiao Guangrongtorvalds
Xiao Guangrong
authored andcommitted
mm: mmu_notifier: fix freed page still mapped in secondary MMU
mmu_notifier_release() is called when the process is exiting. It will delete all the mmu notifiers. But at this time the page belonging to the process is still present in page tables and is present on the LRU list, so this race will happen: CPU 0 CPU 1 mmu_notifier_release: try_to_unmap: hlist_del_init_rcu(&mn->hlist); ptep_clear_flush_notify: mmu nofifler not found free page !!!!!! /* * At the point, the page has been * freed, but it is still mapped in * the secondary MMU. */ mn->ops->release(mn, mm); Then the box is not stable and sometimes we can get this bug: [ 738.075923] BUG: Bad page state in process migrate-perf pfn:03bec [ 738.075931] page:ffffea00000efb00 count:0 mapcount:0 mapping: (null) index:0x8076 [ 738.075936] page flags: 0x20000000000014(referenced|dirty) The same issue is present in mmu_notifier_unregister(). We can call ->release before deleting the notifier to ensure the page has been unmapped from the secondary MMU before it is freed. Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Avi Kivity <avi@redhat.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent bdf4f4d commit 3ad3d90

File tree

1 file changed

+23
-22
lines changed

1 file changed

+23
-22
lines changed

mm/mmu_notifier.c

+23-22
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,24 @@
3333
void __mmu_notifier_release(struct mm_struct *mm)
3434
{
3535
struct mmu_notifier *mn;
36+
struct hlist_node *n;
37+
38+
/*
39+
* RCU here will block mmu_notifier_unregister until
40+
* ->release returns.
41+
*/
42+
rcu_read_lock();
43+
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist)
44+
/*
45+
* if ->release runs before mmu_notifier_unregister it
46+
* must be handled as it's the only way for the driver
47+
* to flush all existing sptes and stop the driver
48+
* from establishing any more sptes before all the
49+
* pages in the mm are freed.
50+
*/
51+
if (mn->ops->release)
52+
mn->ops->release(mn, mm);
53+
rcu_read_unlock();
3654

3755
spin_lock(&mm->mmu_notifier_mm->lock);
3856
while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
@@ -46,23 +64,6 @@ void __mmu_notifier_release(struct mm_struct *mm)
4664
* mmu_notifier_unregister to return.
4765
*/
4866
hlist_del_init_rcu(&mn->hlist);
49-
/*
50-
* RCU here will block mmu_notifier_unregister until
51-
* ->release returns.
52-
*/
53-
rcu_read_lock();
54-
spin_unlock(&mm->mmu_notifier_mm->lock);
55-
/*
56-
* if ->release runs before mmu_notifier_unregister it
57-
* must be handled as it's the only way for the driver
58-
* to flush all existing sptes and stop the driver
59-
* from establishing any more sptes before all the
60-
* pages in the mm are freed.
61-
*/
62-
if (mn->ops->release)
63-
mn->ops->release(mn, mm);
64-
rcu_read_unlock();
65-
spin_lock(&mm->mmu_notifier_mm->lock);
6667
}
6768
spin_unlock(&mm->mmu_notifier_mm->lock);
6869

@@ -284,16 +285,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
284285
{
285286
BUG_ON(atomic_read(&mm->mm_count) <= 0);
286287

287-
spin_lock(&mm->mmu_notifier_mm->lock);
288288
if (!hlist_unhashed(&mn->hlist)) {
289-
hlist_del_rcu(&mn->hlist);
290-
291289
/*
292290
* RCU here will force exit_mmap to wait ->release to finish
293291
* before freeing the pages.
294292
*/
295293
rcu_read_lock();
296-
spin_unlock(&mm->mmu_notifier_mm->lock);
294+
297295
/*
298296
* exit_mmap will block in mmu_notifier_release to
299297
* guarantee ->release is called before freeing the
@@ -302,8 +300,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
302300
if (mn->ops->release)
303301
mn->ops->release(mn, mm);
304302
rcu_read_unlock();
305-
} else
303+
304+
spin_lock(&mm->mmu_notifier_mm->lock);
305+
hlist_del_rcu(&mn->hlist);
306306
spin_unlock(&mm->mmu_notifier_mm->lock);
307+
}
307308

308309
/*
309310
* Wait any running method to finish, of course including

0 commit comments

Comments
 (0)