mirror of
https://github.com/lkl/linux.git
synced 2025-12-19 16:13:19 +09:00
mm: change do_vmi_align_munmap() tracking of VMAs to remove
The majority of the calls to munmap a vm range is within a single vma. The maple tree is able to store a single entry at 0, with a size of 1 as a pointer and avoid any allocations. Change do_vmi_align_munmap() to store the VMAs being munmap()'ed into a tree indexed by the count. This will leverage the ability to store the first entry without a node allocation. Storing the entries into a tree by the count and not the vma start and end means changing the functions which iterate over the entries. Update unmap_vmas() and free_pgtables() to take a maple state and a tree end address to support this functionality. Passing through the same maple state to unmap_vmas() and free_pgtables() means the state needs to be reset between calls. This happens in the static unmap_region() and exit_mmap(). Link: https://lkml.kernel.org/r/20230724183157.3939892-4-Liam.Howlett@oracle.com Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> Cc: Peng Zhang <zhangpeng.00@bytedance.com> Cc: Suren Baghdasaryan <surenb@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
committed by
Andrew Morton
parent
8c314f3b55
commit
fd892593d4
@@ -2287,9 +2287,9 @@ static inline void zap_vma_pages(struct vm_area_struct *vma)
|
|||||||
zap_page_range_single(vma, vma->vm_start,
|
zap_page_range_single(vma, vma->vm_start,
|
||||||
vma->vm_end - vma->vm_start, NULL);
|
vma->vm_end - vma->vm_start, NULL);
|
||||||
}
|
}
|
||||||
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
|
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
|
||||||
struct vm_area_struct *start_vma, unsigned long start,
|
struct vm_area_struct *start_vma, unsigned long start,
|
||||||
unsigned long end, bool mm_wr_locked);
|
unsigned long end, unsigned long tree_end, bool mm_wr_locked);
|
||||||
|
|
||||||
struct mmu_notifier_range;
|
struct mmu_notifier_range;
|
||||||
|
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ bool __folio_end_writeback(struct folio *folio);
|
|||||||
void deactivate_file_folio(struct folio *folio);
|
void deactivate_file_folio(struct folio *folio);
|
||||||
void folio_activate(struct folio *folio);
|
void folio_activate(struct folio *folio);
|
||||||
|
|
||||||
void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
|
void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
|
||||||
struct vm_area_struct *start_vma, unsigned long floor,
|
struct vm_area_struct *start_vma, unsigned long floor,
|
||||||
unsigned long ceiling, bool mm_wr_locked);
|
unsigned long ceiling, bool mm_wr_locked);
|
||||||
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
|
void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
|
||||||
|
|||||||
16
mm/memory.c
16
mm/memory.c
@@ -361,12 +361,10 @@ void free_pgd_range(struct mmu_gather *tlb,
|
|||||||
} while (pgd++, addr = next, addr != end);
|
} while (pgd++, addr = next, addr != end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
|
void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
|
||||||
struct vm_area_struct *vma, unsigned long floor,
|
struct vm_area_struct *vma, unsigned long floor,
|
||||||
unsigned long ceiling, bool mm_wr_locked)
|
unsigned long ceiling, bool mm_wr_locked)
|
||||||
{
|
{
|
||||||
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
unsigned long addr = vma->vm_start;
|
unsigned long addr = vma->vm_start;
|
||||||
struct vm_area_struct *next;
|
struct vm_area_struct *next;
|
||||||
@@ -375,7 +373,7 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
|
|||||||
* Note: USER_PGTABLES_CEILING may be passed as ceiling and may
|
* Note: USER_PGTABLES_CEILING may be passed as ceiling and may
|
||||||
* be 0. This will underflow and is okay.
|
* be 0. This will underflow and is okay.
|
||||||
*/
|
*/
|
||||||
next = mas_find(&mas, ceiling - 1);
|
next = mas_find(mas, ceiling - 1);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hide vma from rmap and truncate_pagecache before freeing
|
* Hide vma from rmap and truncate_pagecache before freeing
|
||||||
@@ -396,7 +394,7 @@ void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
|
|||||||
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
|
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
|
||||||
&& !is_vm_hugetlb_page(next)) {
|
&& !is_vm_hugetlb_page(next)) {
|
||||||
vma = next;
|
vma = next;
|
||||||
next = mas_find(&mas, ceiling - 1);
|
next = mas_find(mas, ceiling - 1);
|
||||||
if (mm_wr_locked)
|
if (mm_wr_locked)
|
||||||
vma_start_write(vma);
|
vma_start_write(vma);
|
||||||
unlink_anon_vmas(vma);
|
unlink_anon_vmas(vma);
|
||||||
@@ -1713,9 +1711,10 @@ static void unmap_single_vma(struct mmu_gather *tlb,
|
|||||||
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
|
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
|
||||||
* drops the lock and schedules.
|
* drops the lock and schedules.
|
||||||
*/
|
*/
|
||||||
void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
|
void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
|
||||||
struct vm_area_struct *vma, unsigned long start_addr,
|
struct vm_area_struct *vma, unsigned long start_addr,
|
||||||
unsigned long end_addr, bool mm_wr_locked)
|
unsigned long end_addr, unsigned long tree_end,
|
||||||
|
bool mm_wr_locked)
|
||||||
{
|
{
|
||||||
struct mmu_notifier_range range;
|
struct mmu_notifier_range range;
|
||||||
struct zap_details details = {
|
struct zap_details details = {
|
||||||
@@ -1723,7 +1722,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
|
|||||||
/* Careful - we need to zap private pages too! */
|
/* Careful - we need to zap private pages too! */
|
||||||
.even_cows = true,
|
.even_cows = true,
|
||||||
};
|
};
|
||||||
MA_STATE(mas, mt, vma->vm_end, vma->vm_end);
|
|
||||||
|
|
||||||
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
|
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm,
|
||||||
start_addr, end_addr);
|
start_addr, end_addr);
|
||||||
@@ -1731,7 +1729,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt,
|
|||||||
do {
|
do {
|
||||||
unmap_single_vma(tlb, vma, start_addr, end_addr, &details,
|
unmap_single_vma(tlb, vma, start_addr, end_addr, &details,
|
||||||
mm_wr_locked);
|
mm_wr_locked);
|
||||||
} while ((vma = mas_find(&mas, end_addr - 1)) != NULL);
|
} while ((vma = mas_find(mas, tree_end - 1)) != NULL);
|
||||||
mmu_notifier_invalidate_range_end(&range);
|
mmu_notifier_invalidate_range_end(&range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
41
mm/mmap.c
41
mm/mmap.c
@@ -76,10 +76,10 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
|
|||||||
static bool ignore_rlimit_data;
|
static bool ignore_rlimit_data;
|
||||||
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
|
core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
|
||||||
|
|
||||||
static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
|
static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
|
||||||
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
||||||
struct vm_area_struct *next, unsigned long start,
|
struct vm_area_struct *next, unsigned long start,
|
||||||
unsigned long end, bool mm_wr_locked);
|
unsigned long end, unsigned long tree_end, bool mm_wr_locked);
|
||||||
|
|
||||||
static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
|
static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
|
||||||
{
|
{
|
||||||
@@ -2293,18 +2293,20 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
|
|||||||
*
|
*
|
||||||
* Called with the mm semaphore held.
|
* Called with the mm semaphore held.
|
||||||
*/
|
*/
|
||||||
static void unmap_region(struct mm_struct *mm, struct maple_tree *mt,
|
static void unmap_region(struct mm_struct *mm, struct ma_state *mas,
|
||||||
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
struct vm_area_struct *vma, struct vm_area_struct *prev,
|
||||||
struct vm_area_struct *next,
|
struct vm_area_struct *next, unsigned long start,
|
||||||
unsigned long start, unsigned long end, bool mm_wr_locked)
|
unsigned long end, unsigned long tree_end, bool mm_wr_locked)
|
||||||
{
|
{
|
||||||
struct mmu_gather tlb;
|
struct mmu_gather tlb;
|
||||||
|
unsigned long mt_start = mas->index;
|
||||||
|
|
||||||
lru_add_drain();
|
lru_add_drain();
|
||||||
tlb_gather_mmu(&tlb, mm);
|
tlb_gather_mmu(&tlb, mm);
|
||||||
update_hiwater_rss(mm);
|
update_hiwater_rss(mm);
|
||||||
unmap_vmas(&tlb, mt, vma, start, end, mm_wr_locked);
|
unmap_vmas(&tlb, mas, vma, start, end, tree_end, mm_wr_locked);
|
||||||
free_pgtables(&tlb, mt, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
|
mas_set(mas, mt_start);
|
||||||
|
free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
|
||||||
next ? next->vm_start : USER_PGTABLES_CEILING,
|
next ? next->vm_start : USER_PGTABLES_CEILING,
|
||||||
mm_wr_locked);
|
mm_wr_locked);
|
||||||
tlb_finish_mmu(&tlb);
|
tlb_finish_mmu(&tlb);
|
||||||
@@ -2472,7 +2474,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
|||||||
goto end_split_failed;
|
goto end_split_failed;
|
||||||
}
|
}
|
||||||
vma_start_write(next);
|
vma_start_write(next);
|
||||||
mas_set_range(&mas_detach, next->vm_start, next->vm_end - 1);
|
mas_set(&mas_detach, count);
|
||||||
error = mas_store_gfp(&mas_detach, next, GFP_KERNEL);
|
error = mas_store_gfp(&mas_detach, next, GFP_KERNEL);
|
||||||
if (error)
|
if (error)
|
||||||
goto munmap_gather_failed;
|
goto munmap_gather_failed;
|
||||||
@@ -2511,17 +2513,17 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
|||||||
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
|
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
|
||||||
/* Make sure no VMAs are about to be lost. */
|
/* Make sure no VMAs are about to be lost. */
|
||||||
{
|
{
|
||||||
MA_STATE(test, &mt_detach, start, end - 1);
|
MA_STATE(test, &mt_detach, 0, 0);
|
||||||
struct vm_area_struct *vma_mas, *vma_test;
|
struct vm_area_struct *vma_mas, *vma_test;
|
||||||
int test_count = 0;
|
int test_count = 0;
|
||||||
|
|
||||||
vma_iter_set(vmi, start);
|
vma_iter_set(vmi, start);
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
vma_test = mas_find(&test, end - 1);
|
vma_test = mas_find(&test, count - 1);
|
||||||
for_each_vma_range(*vmi, vma_mas, end) {
|
for_each_vma_range(*vmi, vma_mas, end) {
|
||||||
BUG_ON(vma_mas != vma_test);
|
BUG_ON(vma_mas != vma_test);
|
||||||
test_count++;
|
test_count++;
|
||||||
vma_test = mas_next(&test, end - 1);
|
vma_test = mas_next(&test, count - 1);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
BUG_ON(count != test_count);
|
BUG_ON(count != test_count);
|
||||||
@@ -2542,9 +2544,11 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
|
|||||||
* We can free page tables without write-locking mmap_lock because VMAs
|
* We can free page tables without write-locking mmap_lock because VMAs
|
||||||
* were isolated before we downgraded mmap_lock.
|
* were isolated before we downgraded mmap_lock.
|
||||||
*/
|
*/
|
||||||
unmap_region(mm, &mt_detach, vma, prev, next, start, end, !unlock);
|
mas_set(&mas_detach, 1);
|
||||||
|
unmap_region(mm, &mas_detach, vma, prev, next, start, end, count,
|
||||||
|
!unlock);
|
||||||
/* Statistics and freeing VMAs */
|
/* Statistics and freeing VMAs */
|
||||||
mas_set(&mas_detach, start);
|
mas_set(&mas_detach, 0);
|
||||||
remove_mt(mm, &mas_detach);
|
remove_mt(mm, &mas_detach);
|
||||||
validate_mm(mm);
|
validate_mm(mm);
|
||||||
if (unlock)
|
if (unlock)
|
||||||
@@ -2864,9 +2868,10 @@ unmap_and_free_vma:
|
|||||||
fput(vma->vm_file);
|
fput(vma->vm_file);
|
||||||
vma->vm_file = NULL;
|
vma->vm_file = NULL;
|
||||||
|
|
||||||
|
vma_iter_set(&vmi, vma->vm_end);
|
||||||
/* Undo any partial mapping done by a device driver. */
|
/* Undo any partial mapping done by a device driver. */
|
||||||
unmap_region(mm, &mm->mm_mt, vma, prev, next, vma->vm_start,
|
unmap_region(mm, &vmi.mas, vma, prev, next, vma->vm_start,
|
||||||
vma->vm_end, true);
|
vma->vm_end, vma->vm_end, true);
|
||||||
}
|
}
|
||||||
if (file && (vm_flags & VM_SHARED))
|
if (file && (vm_flags & VM_SHARED))
|
||||||
mapping_unmap_writable(file->f_mapping);
|
mapping_unmap_writable(file->f_mapping);
|
||||||
@@ -3185,7 +3190,7 @@ void exit_mmap(struct mm_struct *mm)
|
|||||||
tlb_gather_mmu_fullmm(&tlb, mm);
|
tlb_gather_mmu_fullmm(&tlb, mm);
|
||||||
/* update_hiwater_rss(mm) here? but nobody should be looking */
|
/* update_hiwater_rss(mm) here? but nobody should be looking */
|
||||||
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
|
/* Use ULONG_MAX here to ensure all VMAs in the mm are unmapped */
|
||||||
unmap_vmas(&tlb, &mm->mm_mt, vma, 0, ULONG_MAX, false);
|
unmap_vmas(&tlb, &mas, vma, 0, ULONG_MAX, ULONG_MAX, false);
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -3195,7 +3200,8 @@ void exit_mmap(struct mm_struct *mm)
|
|||||||
set_bit(MMF_OOM_SKIP, &mm->flags);
|
set_bit(MMF_OOM_SKIP, &mm->flags);
|
||||||
mmap_write_lock(mm);
|
mmap_write_lock(mm);
|
||||||
mt_clear_in_rcu(&mm->mm_mt);
|
mt_clear_in_rcu(&mm->mm_mt);
|
||||||
free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
|
mas_set(&mas, vma->vm_end);
|
||||||
|
free_pgtables(&tlb, &mas, vma, FIRST_USER_ADDRESS,
|
||||||
USER_PGTABLES_CEILING, true);
|
USER_PGTABLES_CEILING, true);
|
||||||
tlb_finish_mmu(&tlb);
|
tlb_finish_mmu(&tlb);
|
||||||
|
|
||||||
@@ -3204,6 +3210,7 @@ void exit_mmap(struct mm_struct *mm)
|
|||||||
* enabled, without holding any MM locks besides the unreachable
|
* enabled, without holding any MM locks besides the unreachable
|
||||||
* mmap_write_lock.
|
* mmap_write_lock.
|
||||||
*/
|
*/
|
||||||
|
mas_set(&mas, vma->vm_end);
|
||||||
do {
|
do {
|
||||||
if (vma->vm_flags & VM_ACCOUNT)
|
if (vma->vm_flags & VM_ACCOUNT)
|
||||||
nr_accounted += vma_pages(vma);
|
nr_accounted += vma_pages(vma);
|
||||||
|
|||||||
Reference in New Issue
Block a user