mirror of
https://github.com/lkl/linux.git
synced 2025-12-19 08:03:01 +09:00
The x86 Shadow stack feature includes a new type of memory called shadow stack. This shadow stack memory has some unusual properties, which requires some core mm changes to function properly. One of these unusual properties is that shadow stack memory is writable, but only in limited ways. These limits are applied via a specific PTE bit combination. Nevertheless, the memory is writable, and core mm code will need to apply the writable permissions in the typical paths that call pte_mkwrite(). Future patches will make pte_mkwrite() take a VMA, so that the x86 implementation of it can know whether to create regular writable or shadow stack mappings. But there are a couple of challenges to this. Modifying the signatures of each arch pte_mkwrite() implementation would be error prone because some are generated with macros and would need to be re-implemented. Also, some pte_mkwrite() callers operate on kernel memory without a VMA. So this can be done in a three step process. First pte_mkwrite() can be renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite() added that just calls pte_mkwrite_novma(). Next callers without a VMA can be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers can be changed to take/pass a VMA. Earlier work did the first step, so next move the callers that don't have a VMA to pte_mkwrite_novma(). Also do the same for pmd_mkwrite(). This will be ok for the shadow stack feature, as these callers are on kernel memory which will not need to be made shadow stack, and the other architectures only currently support one type of memory in pte_mkwrite() Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org> Acked-by: David Hildenbrand <david@redhat.com> Link: https://lore.kernel.org/all/20230613001108.3040476-3-rick.p.edgecombe%40intel.com
453 lines
11 KiB
C
453 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright IBM Corp. 2011
|
|
* Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
|
|
*/
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/facility.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/kfence.h>
|
|
#include <asm/page.h>
|
|
#include <asm/set_memory.h>
|
|
|
|
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
|
|
{
|
|
asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
|
|
: [addr] "+a" (addr) : [skey] "d" (skey));
|
|
return addr;
|
|
}
|
|
|
|
void __storage_key_init_range(unsigned long start, unsigned long end)
|
|
{
|
|
unsigned long boundary, size;
|
|
|
|
while (start < end) {
|
|
if (MACHINE_HAS_EDAT1) {
|
|
/* set storage keys for a 1MB frame */
|
|
size = 1UL << 20;
|
|
boundary = (start + size) & ~(size - 1);
|
|
if (boundary <= end) {
|
|
do {
|
|
start = sske_frame(start, PAGE_DEFAULT_KEY);
|
|
} while (start < boundary);
|
|
continue;
|
|
}
|
|
}
|
|
page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
|
|
start += PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
|
|
|
|
void arch_report_meminfo(struct seq_file *m)
|
|
{
|
|
seq_printf(m, "DirectMap4k: %8lu kB\n",
|
|
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_4K]) << 2);
|
|
seq_printf(m, "DirectMap1M: %8lu kB\n",
|
|
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_1M]) << 10);
|
|
seq_printf(m, "DirectMap2G: %8lu kB\n",
|
|
atomic_long_read(&direct_pages_count[PG_DIRECT_MAP_2G]) << 21);
|
|
}
|
|
#endif /* CONFIG_PROC_FS */
|
|
|
|
static void pgt_set(unsigned long *old, unsigned long new, unsigned long addr,
|
|
unsigned long dtt)
|
|
{
|
|
unsigned long *table, mask;
|
|
|
|
mask = 0;
|
|
if (MACHINE_HAS_EDAT2) {
|
|
switch (dtt) {
|
|
case CRDTE_DTT_REGION3:
|
|
mask = ~(PTRS_PER_PUD * sizeof(pud_t) - 1);
|
|
break;
|
|
case CRDTE_DTT_SEGMENT:
|
|
mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
|
|
break;
|
|
case CRDTE_DTT_PAGE:
|
|
mask = ~(PTRS_PER_PTE * sizeof(pte_t) - 1);
|
|
break;
|
|
}
|
|
table = (unsigned long *)((unsigned long)old & mask);
|
|
crdte(*old, new, table, dtt, addr, S390_lowcore.kernel_asce);
|
|
} else if (MACHINE_HAS_IDTE) {
|
|
cspg(old, *old, new);
|
|
} else {
|
|
csp((unsigned int *)old + 1, *old, new);
|
|
}
|
|
}
|
|
|
|
static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end,
|
|
unsigned long flags)
|
|
{
|
|
pte_t *ptep, new;
|
|
|
|
if (flags == SET_MEMORY_4K)
|
|
return 0;
|
|
ptep = pte_offset_kernel(pmdp, addr);
|
|
do {
|
|
new = *ptep;
|
|
if (pte_none(new))
|
|
return -EINVAL;
|
|
if (flags & SET_MEMORY_RO)
|
|
new = pte_wrprotect(new);
|
|
else if (flags & SET_MEMORY_RW)
|
|
new = pte_mkwrite_novma(pte_mkdirty(new));
|
|
if (flags & SET_MEMORY_NX)
|
|
new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC));
|
|
else if (flags & SET_MEMORY_X)
|
|
new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
|
|
if (flags & SET_MEMORY_INV) {
|
|
new = set_pte_bit(new, __pgprot(_PAGE_INVALID));
|
|
} else if (flags & SET_MEMORY_DEF) {
|
|
new = __pte(pte_val(new) & PAGE_MASK);
|
|
new = set_pte_bit(new, PAGE_KERNEL);
|
|
if (!MACHINE_HAS_NX)
|
|
new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC));
|
|
}
|
|
pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE);
|
|
ptep++;
|
|
addr += PAGE_SIZE;
|
|
cond_resched();
|
|
} while (addr < end);
|
|
return 0;
|
|
}
|
|
|
|
static int split_pmd_page(pmd_t *pmdp, unsigned long addr)
|
|
{
|
|
unsigned long pte_addr, prot;
|
|
pte_t *pt_dir, *ptep;
|
|
pmd_t new;
|
|
int i, ro, nx;
|
|
|
|
pt_dir = vmem_pte_alloc();
|
|
if (!pt_dir)
|
|
return -ENOMEM;
|
|
pte_addr = pmd_pfn(*pmdp) << PAGE_SHIFT;
|
|
ro = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT);
|
|
nx = !!(pmd_val(*pmdp) & _SEGMENT_ENTRY_NOEXEC);
|
|
prot = pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL);
|
|
if (!nx)
|
|
prot &= ~_PAGE_NOEXEC;
|
|
ptep = pt_dir;
|
|
for (i = 0; i < PTRS_PER_PTE; i++) {
|
|
set_pte(ptep, __pte(pte_addr | prot));
|
|
pte_addr += PAGE_SIZE;
|
|
ptep++;
|
|
}
|
|
new = __pmd(__pa(pt_dir) | _SEGMENT_ENTRY);
|
|
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
|
|
update_page_count(PG_DIRECT_MAP_4K, PTRS_PER_PTE);
|
|
update_page_count(PG_DIRECT_MAP_1M, -1);
|
|
return 0;
|
|
}
|
|
|
|
static void modify_pmd_page(pmd_t *pmdp, unsigned long addr,
|
|
unsigned long flags)
|
|
{
|
|
pmd_t new = *pmdp;
|
|
|
|
if (flags & SET_MEMORY_RO)
|
|
new = pmd_wrprotect(new);
|
|
else if (flags & SET_MEMORY_RW)
|
|
new = pmd_mkwrite_novma(pmd_mkdirty(new));
|
|
if (flags & SET_MEMORY_NX)
|
|
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
|
|
else if (flags & SET_MEMORY_X)
|
|
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
|
|
if (flags & SET_MEMORY_INV) {
|
|
new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_INVALID));
|
|
} else if (flags & SET_MEMORY_DEF) {
|
|
new = __pmd(pmd_val(new) & PMD_MASK);
|
|
new = set_pmd_bit(new, SEGMENT_KERNEL);
|
|
if (!MACHINE_HAS_NX)
|
|
new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC));
|
|
}
|
|
pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT);
|
|
}
|
|
|
|
static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
|
|
unsigned long flags)
|
|
{
|
|
unsigned long next;
|
|
int need_split;
|
|
pmd_t *pmdp;
|
|
int rc = 0;
|
|
|
|
pmdp = pmd_offset(pudp, addr);
|
|
do {
|
|
if (pmd_none(*pmdp))
|
|
return -EINVAL;
|
|
next = pmd_addr_end(addr, end);
|
|
if (pmd_large(*pmdp)) {
|
|
need_split = !!(flags & SET_MEMORY_4K);
|
|
need_split |= !!(addr & ~PMD_MASK);
|
|
need_split |= !!(addr + PMD_SIZE > next);
|
|
if (need_split) {
|
|
rc = split_pmd_page(pmdp, addr);
|
|
if (rc)
|
|
return rc;
|
|
continue;
|
|
}
|
|
modify_pmd_page(pmdp, addr, flags);
|
|
} else {
|
|
rc = walk_pte_level(pmdp, addr, next, flags);
|
|
if (rc)
|
|
return rc;
|
|
}
|
|
pmdp++;
|
|
addr = next;
|
|
cond_resched();
|
|
} while (addr < end);
|
|
return rc;
|
|
}
|
|
|
|
static int split_pud_page(pud_t *pudp, unsigned long addr)
|
|
{
|
|
unsigned long pmd_addr, prot;
|
|
pmd_t *pm_dir, *pmdp;
|
|
pud_t new;
|
|
int i, ro, nx;
|
|
|
|
pm_dir = vmem_crst_alloc(_SEGMENT_ENTRY_EMPTY);
|
|
if (!pm_dir)
|
|
return -ENOMEM;
|
|
pmd_addr = pud_pfn(*pudp) << PAGE_SHIFT;
|
|
ro = !!(pud_val(*pudp) & _REGION_ENTRY_PROTECT);
|
|
nx = !!(pud_val(*pudp) & _REGION_ENTRY_NOEXEC);
|
|
prot = pgprot_val(ro ? SEGMENT_KERNEL_RO : SEGMENT_KERNEL);
|
|
if (!nx)
|
|
prot &= ~_SEGMENT_ENTRY_NOEXEC;
|
|
pmdp = pm_dir;
|
|
for (i = 0; i < PTRS_PER_PMD; i++) {
|
|
set_pmd(pmdp, __pmd(pmd_addr | prot));
|
|
pmd_addr += PMD_SIZE;
|
|
pmdp++;
|
|
}
|
|
new = __pud(__pa(pm_dir) | _REGION3_ENTRY);
|
|
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
|
|
update_page_count(PG_DIRECT_MAP_1M, PTRS_PER_PMD);
|
|
update_page_count(PG_DIRECT_MAP_2G, -1);
|
|
return 0;
|
|
}
|
|
|
|
static void modify_pud_page(pud_t *pudp, unsigned long addr,
|
|
unsigned long flags)
|
|
{
|
|
pud_t new = *pudp;
|
|
|
|
if (flags & SET_MEMORY_RO)
|
|
new = pud_wrprotect(new);
|
|
else if (flags & SET_MEMORY_RW)
|
|
new = pud_mkwrite(pud_mkdirty(new));
|
|
if (flags & SET_MEMORY_NX)
|
|
new = set_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
|
|
else if (flags & SET_MEMORY_X)
|
|
new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
|
|
if (flags & SET_MEMORY_INV) {
|
|
new = set_pud_bit(new, __pgprot(_REGION_ENTRY_INVALID));
|
|
} else if (flags & SET_MEMORY_DEF) {
|
|
new = __pud(pud_val(new) & PUD_MASK);
|
|
new = set_pud_bit(new, REGION3_KERNEL);
|
|
if (!MACHINE_HAS_NX)
|
|
new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC));
|
|
}
|
|
pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3);
|
|
}
|
|
|
|
static int walk_pud_level(p4d_t *p4d, unsigned long addr, unsigned long end,
|
|
unsigned long flags)
|
|
{
|
|
unsigned long next;
|
|
int need_split;
|
|
pud_t *pudp;
|
|
int rc = 0;
|
|
|
|
pudp = pud_offset(p4d, addr);
|
|
do {
|
|
if (pud_none(*pudp))
|
|
return -EINVAL;
|
|
next = pud_addr_end(addr, end);
|
|
if (pud_large(*pudp)) {
|
|
need_split = !!(flags & SET_MEMORY_4K);
|
|
need_split |= !!(addr & ~PUD_MASK);
|
|
need_split |= !!(addr + PUD_SIZE > next);
|
|
if (need_split) {
|
|
rc = split_pud_page(pudp, addr);
|
|
if (rc)
|
|
break;
|
|
continue;
|
|
}
|
|
modify_pud_page(pudp, addr, flags);
|
|
} else {
|
|
rc = walk_pmd_level(pudp, addr, next, flags);
|
|
}
|
|
pudp++;
|
|
addr = next;
|
|
cond_resched();
|
|
} while (addr < end && !rc);
|
|
return rc;
|
|
}
|
|
|
|
static int walk_p4d_level(pgd_t *pgd, unsigned long addr, unsigned long end,
|
|
unsigned long flags)
|
|
{
|
|
unsigned long next;
|
|
p4d_t *p4dp;
|
|
int rc = 0;
|
|
|
|
p4dp = p4d_offset(pgd, addr);
|
|
do {
|
|
if (p4d_none(*p4dp))
|
|
return -EINVAL;
|
|
next = p4d_addr_end(addr, end);
|
|
rc = walk_pud_level(p4dp, addr, next, flags);
|
|
p4dp++;
|
|
addr = next;
|
|
cond_resched();
|
|
} while (addr < end && !rc);
|
|
return rc;
|
|
}
|
|
|
|
DEFINE_MUTEX(cpa_mutex);
|
|
|
|
static int change_page_attr(unsigned long addr, unsigned long end,
|
|
unsigned long flags)
|
|
{
|
|
unsigned long next;
|
|
int rc = -EINVAL;
|
|
pgd_t *pgdp;
|
|
|
|
pgdp = pgd_offset_k(addr);
|
|
do {
|
|
if (pgd_none(*pgdp))
|
|
break;
|
|
next = pgd_addr_end(addr, end);
|
|
rc = walk_p4d_level(pgdp, addr, next, flags);
|
|
if (rc)
|
|
break;
|
|
cond_resched();
|
|
} while (pgdp++, addr = next, addr < end && !rc);
|
|
return rc;
|
|
}
|
|
|
|
static int change_page_attr_alias(unsigned long addr, unsigned long end,
|
|
unsigned long flags)
|
|
{
|
|
unsigned long alias, offset, va_start, va_end;
|
|
struct vm_struct *area;
|
|
int rc = 0;
|
|
|
|
/*
|
|
* Changes to read-only permissions on kernel VA mappings are also
|
|
* applied to the kernel direct mapping. Execute permissions are
|
|
* intentionally not transferred to keep all allocated pages within
|
|
* the direct mapping non-executable.
|
|
*/
|
|
flags &= SET_MEMORY_RO | SET_MEMORY_RW;
|
|
if (!flags)
|
|
return 0;
|
|
area = NULL;
|
|
while (addr < end) {
|
|
if (!area)
|
|
area = find_vm_area((void *)addr);
|
|
if (!area || !(area->flags & VM_ALLOC))
|
|
return 0;
|
|
va_start = (unsigned long)area->addr;
|
|
va_end = va_start + area->nr_pages * PAGE_SIZE;
|
|
offset = (addr - va_start) >> PAGE_SHIFT;
|
|
alias = (unsigned long)page_address(area->pages[offset]);
|
|
rc = change_page_attr(alias, alias + PAGE_SIZE, flags);
|
|
if (rc)
|
|
break;
|
|
addr += PAGE_SIZE;
|
|
if (addr >= va_end)
|
|
area = NULL;
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
int __set_memory(unsigned long addr, int numpages, unsigned long flags)
|
|
{
|
|
unsigned long end;
|
|
int rc;
|
|
|
|
if (!MACHINE_HAS_NX)
|
|
flags &= ~(SET_MEMORY_NX | SET_MEMORY_X);
|
|
if (!flags)
|
|
return 0;
|
|
if (!numpages)
|
|
return 0;
|
|
addr &= PAGE_MASK;
|
|
end = addr + numpages * PAGE_SIZE;
|
|
mutex_lock(&cpa_mutex);
|
|
rc = change_page_attr(addr, end, flags);
|
|
if (rc)
|
|
goto out;
|
|
rc = change_page_attr_alias(addr, end, flags);
|
|
out:
|
|
mutex_unlock(&cpa_mutex);
|
|
return rc;
|
|
}
|
|
|
|
int set_direct_map_invalid_noflush(struct page *page)
|
|
{
|
|
return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_INV);
|
|
}
|
|
|
|
int set_direct_map_default_noflush(struct page *page)
|
|
{
|
|
return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
|
|
}
|
|
|
|
#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
|
|
|
|
static void ipte_range(pte_t *pte, unsigned long address, int nr)
|
|
{
|
|
int i;
|
|
|
|
if (test_facility(13)) {
|
|
__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
|
|
return;
|
|
}
|
|
for (i = 0; i < nr; i++) {
|
|
__ptep_ipte(address, pte, 0, 0, IPTE_GLOBAL);
|
|
address += PAGE_SIZE;
|
|
pte++;
|
|
}
|
|
}
|
|
|
|
void __kernel_map_pages(struct page *page, int numpages, int enable)
|
|
{
|
|
unsigned long address;
|
|
pte_t *ptep, pte;
|
|
int nr, i, j;
|
|
|
|
for (i = 0; i < numpages;) {
|
|
address = (unsigned long)page_to_virt(page + i);
|
|
ptep = virt_to_kpte(address);
|
|
nr = (unsigned long)ptep >> ilog2(sizeof(long));
|
|
nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
|
|
nr = min(numpages - i, nr);
|
|
if (enable) {
|
|
for (j = 0; j < nr; j++) {
|
|
pte = clear_pte_bit(*ptep, __pgprot(_PAGE_INVALID));
|
|
set_pte(ptep, pte);
|
|
address += PAGE_SIZE;
|
|
ptep++;
|
|
}
|
|
} else {
|
|
ipte_range(ptep, address, nr);
|
|
}
|
|
i += nr;
|
|
}
|
|
}
|
|
|
|
#endif /* CONFIG_DEBUG_PAGEALLOC */
|