Files
lkl_linux/arch/s390/mm/pageattr.c
Rick Edgecombe 6ecc21bb43 mm: Move pte/pmd_mkwrite() callers with no VMA to _novma()
The x86 Shadow stack feature includes a new type of memory called shadow
stack. This shadow stack memory has some unusual properties, which requires
some core mm changes to function properly.

One of these unusual properties is that shadow stack memory is writable,
but only in limited ways. These limits are applied via a specific PTE
bit combination. Nevertheless, the memory is writable, and core mm code
will need to apply the writable permissions in the typical paths that
call pte_mkwrite(). Future patches will make pte_mkwrite() take a VMA, so
that the x86 implementation of it can know whether to create regular
writable or shadow stack mappings.

But there are a couple of challenges to this. Modifying the signatures of
each arch pte_mkwrite() implementation would be error prone because some
are generated with macros and would need to be re-implemented. Also, some
pte_mkwrite() callers operate on kernel memory without a VMA.

So this can be done in a three step process. First pte_mkwrite() can be
renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite()
added that just calls pte_mkwrite_novma(). Next callers without a VMA can
be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers
can be changed to take/pass a VMA.

Earlier work did the first step, so next move the callers that don't have
a VMA to pte_mkwrite_novma(). Also do the same for pmd_mkwrite().  This
will be ok for the shadow stack feature, as these callers are on kernel
memory which will not need to be made shadow stack, and the other
architectures only currently support one type of memory in pte_mkwrite()

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Link: https://lore.kernel.org/all/20230613001108.3040476-3-rick.p.edgecombe%40intel.com
2023-07-11 14:10:57 -07:00

453 lines
11 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright IBM Corp. 2011
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
*/
#include <linux/hugetlb.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
#include <asm/facility.h>
#include <asm/pgalloc.h>
#include <asm/kfence.h>
#include <asm/page.h>
#include <asm/set_memory.h>
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
: [addr] "+a" (addr) : [skey] "d" (skey));
return addr;
}
void __storage_key_init_range(unsigned long start, unsigned long end)
{
unsigned long boundary, size;
while (start < end) {
if (MACHINE_HAS_EDAT1) {
/* set storage keys for a 1MB frame */
size = 1UL << 20;
boundary = (start + size) & ~(size - 1);
if (boundary <= end) {
do {
start = sske_frame(start, PAGE_DEFAULT_KEY);
} while (start < boundary);
continue;
}
}
page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
start += PAGE_SIZE;
}
}
#ifdef CONFIG_PROC_FS
atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
void arch_report_meminfo(struct seq_file *m)
{
seq_printf(m, "DirectMap4k: %8lu kB\n",
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
seq_printf(m, "DirectMap1M: %8lu kB\n",
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
seq_printf(m, "DirectMap2G: %8lu kB\n",
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
}
#endif /* CONFIG_PROC_FS */
static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
unsigned long dtt)
{
unsigned long *table, mask;
mask = 0;
if (MACHINE_HAS_EDAT2) {
switch (dtt) {
case CRDTE_DTT_REGION3:
mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
break;
case CRDTE_DTT_SEGMENT:
mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
break;
case CRDTE_DTT_PAGE:
mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
break;
}
table = (unsigned long *)((unsigned long)old & mask);
crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
} else if (MACHINE_HAS_IDTE) {
cspg(old, *old, new);
} else {
csp((unsigned int *)old + 1, *old, new);
}
}
static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
unsigned long flags)
{
pte_t *ptep, new;
if (flags == SET_MEMORY_4K)
return 0;
ptep = pte_offset_kernel(pmdp, addr);
do {
new = *ptep;
if (pte_none(new))
return -EINVAL;
if (flags & SET_MEMORY_RO)
new = pte_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pte_mkwrite_novma(pte_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
else if (flags & SET_MEMORY_X)
new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
if (flags & SET_MEMORY_INV) {
new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
} else if (flags & SET_MEMORY_DEF) {
new = __pte(pte_val(new) & PAGE_MASK);
new = set_pte_bit(new, PAGE_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
}
pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
ptep++;
addr += PAGE_SIZE;
cond_resched();
} while (addr < end);
return 0;
}
static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
{
unsigned long pte_addr, prot;
pte_t *pt_dir, *ptep;
pmd_t new;
int i, ro, nx;
pt_dir = vmem_pte_alloc();
if (!pt_dir)
return -ENOMEM;
pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
if (!nx)
prot &= ~_PAGE_NOEXEC;
ptep = pt_dir;
for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte(ptep, __pte(pte_addr | prot));
pte_addr += PAGE_SIZE;
ptep++;
}
new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
update_page_count(PG_DIRECT_MAP_1M, -1);
return 0;
}
static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
unsigned long flags)
{
pmd_t new = *pmdp;
if (flags & SET_MEMORY_RO)
new = pmd_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pmd_mkwrite_novma(pmd_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
else if (flags & SET_MEMORY_X)
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
if (flags & SET_MEMORY_INV) {
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
} else if (flags & SET_MEMORY_DEF) {
new = __pmd(pmd_val(new) & PMD_MASK);
new = set_pmd_bit(new, SEGMENT_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
}
static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
int need_split;
pmd_t *pmdp;
int rc = 0;
pmdp = pmd_offset(pudp, addr);
do {
if (pmd_none(*pmdp))
return -EINVAL;
next = pmd_addr_end(addr, end);
if (pmd_large(*pmdp)) {
need_split = !!(flags & SET_MEMORY_4K);
need_split |= !!(addr & ~PMD_MASK);
need_split |= !!(addr + PMD_SIZE > next);
if (need_split) {
rc = split_pmd_page(pmdp, addr);
if (rc)
return rc;
continue;
}
modify_pmd_page(pmdp, addr, flags);
} else {
rc = walk_pte_level(pmdp, addr, next, flags);
if (rc)
return rc;
}
pmdp++;
addr = next;
cond_resched();
} while (addr < end);
return rc;
}
static int split_pud_page(pud_t *pudp, unsigned long addr)
{
unsigned long pmd_addr, prot;
pmd_t *pm_dir, *pmdp;
pud_t new;
int i, ro, nx;
pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
if (!pm_dir)
return -ENOMEM;
pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
if (!nx)
prot &= ~_SEGMENT_ENTRY_NOEXEC;
pmdp = pm_dir;
for (i = 0; i < PTRS_PER_PMD; i++) {
set_pmd(pmdp, __pmd(pmd_addr | prot));
pmd_addr += PMD_SIZE;
pmdp++;
}
new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
update_page_count(PG_DIRECT_MAP_2G, -1);
return 0;
}
static void modify_pud_page(pud_t *pudp, unsigned long addr,
unsigned long flags)
{
pud_t new = *pudp;
if (flags & SET_MEMORY_RO)
new = pud_wrprotect(new);
else if (flags & SET_MEMORY_RW)
new = pud_mkwrite(pud_mkdirty(new));
if (flags & SET_MEMORY_NX)
new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
else if (flags & SET_MEMORY_X)
new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
if (flags & SET_MEMORY_INV) {
new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
} else if (flags & SET_MEMORY_DEF) {
new = __pud(pud_val(new) & PUD_MASK);
new = set_pud_bit(new, REGION3_KERNEL);
if (!MACHINE_HAS_NX)
new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
}
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
}
static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
int need_split;
pud_t *pudp;
int rc = 0;
pudp = pud_offset(p4d, addr);
do {
if (pud_none(*pudp))
return -EINVAL;
next = pud_addr_end(addr, end);
if (pud_large(*pudp)) {
need_split = !!(flags & SET_MEMORY_4K);
need_split |= !!(addr & ~PUD_MASK);
need_split |= !!(addr + PUD_SIZE > next);
if (need_split) {
rc = split_pud_page(pudp, addr);
if (rc)
break;
continue;
}
modify_pud_page(pudp, addr, flags);
} else {
rc = walk_pmd_level(pudp, addr, next, flags);
}
pudp++;
addr = next;
cond_resched();
} while (addr < end && !rc);
return rc;
}
static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
p4d_t *p4dp;
int rc = 0;
p4dp = p4d_offset(pgd, addr);
do {
if (p4d_none(*p4dp))
return -EINVAL;
next = p4d_addr_end(addr, end);
rc = walk_pud_level(p4dp, addr, next, flags);
p4dp++;
addr = next;
cond_resched();
} while (addr < end && !rc);
return rc;
}
DEFINE_MUTEX(cpa_mutex);
static int change_page_attr(unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long next;
int rc = -EINVAL;
pgd_t *pgdp;
pgdp = pgd_offset_k(addr);
do {
if (pgd_none(*pgdp))
break;
next = pgd_addr_end(addr, end);
rc = walk_p4d_level(pgdp, addr, next, flags);
if (rc)
break;
cond_resched();
} while (pgdp++, addr = next, addr < end && !rc);
return rc;
}
static int change_page_attr_alias(unsigned long addr, unsigned long end,
unsigned long flags)
{
unsigned long alias, offset, va_start, va_end;
struct vm_struct *area;
int rc = 0;
/*
* Changes to read-only permissions on kernel VA mappings are also
* applied to the kernel direct mapping. Execute permissions are
* intentionally not transferred to keep all allocated pages within
* the direct mapping non-executable.
*/
flags &= SET_MEMORY_RO | SET_MEMORY_RW;
if (!flags)
return 0;
area = NULL;
while (addr < end) {
if (!area)
area = find_vm_area((void *)addr);
if (!area || !(area->flags & VM_ALLOC))
return 0;
va_start = (unsigned long)area->addr;
va_end = va_start + area->nr_pages * PAGE_SIZE;
offset = (addr - va_start) >> PAGE_SHIFT;
alias = (unsigned long)page_address(area->pages[offset]);
rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
if (rc)
break;
addr += PAGE_SIZE;
if (addr >= va_end)
area = NULL;
}
return rc;
}
int __set_memory(unsigned long addr, int numpages, unsigned long flags)
{
unsigned long end;
int rc;
if (!MACHINE_HAS_NX)
flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
if (!flags)
return 0;
if (!numpages)
return 0;
addr &= PAGE_MASK;
end = addr + numpages * PAGE_SIZE;
mutex_lock(&cpa_mutex);
rc = change_page_attr(addr, end, flags);
if (rc)
goto out;
rc = change_page_attr_alias(addr, end, flags);
out:
mutex_unlock(&cpa_mutex);
return rc;
}
int set_direct_map_invalid_noflush(struct page *page)
{
return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
}
int set_direct_map_default_noflush(struct page *page)
{
return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
}
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
static void ipte_range(pte_t *pte, unsigned long address, int nr)
{
int i;
if (test_facility(13)) {
__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
return;
}
for (i = 0; i < nr; i++) {
__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
address += PAGE_SIZE;
pte++;
}
}
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long address;
pte_t *ptep, pte;
int nr, i, j;
for (i = 0; i < numpages;) {
address = (unsigned long)page_to_virt(page + i);
ptep = virt_to_kpte(address);
nr = (unsigned long)ptep >> ilog2(sizeof(long));
nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
nr = min(numpages - i, nr);
if (enable) {
for (j = 0; j < nr; j++) {
pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
set_pte(ptep, pte);
address += PAGE_SIZE;
ptep++;
}
} else {
ipte_range(ptep, address, nr);
}
i += nr;
}
}
#endif /* CONFIG_DEBUG_PAGEALLOC */