Skip to content

Commit 37bc3e5

Browse files
bsingharorampe
authored andcommitted
powerpc/lib/code-patching: Use alternate map for patch_instruction()
This patch creates the window using text_poke_area, allocated via get_vm_area(). text_poke_area is per CPU to avoid locking. text_poke_area for each cpu is setup using late_initcall, prior to setup of these alternate mapping areas, we continue to use direct write to change/modify kernel text. With the ability to use alternate mappings to write to kernel text, it provides us the freedom to then turn text read-only and implement CONFIG_STRICT_KERNEL_RWX. This code is CPU hotplug aware to ensure that the we have mappings for any new cpus as they come online and tear down mappings for any CPUs that go offline. Signed-off-by: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
1 parent efe4fbb commit 37bc3e5

File tree

1 file changed

+167
-4
lines changed

1 file changed

+167
-4
lines changed

arch/powerpc/lib/code-patching.c

+167-4
Original file line numberDiff line numberDiff line change
@@ -12,23 +12,186 @@
1212
#include <linux/vmalloc.h>
1313
#include <linux/init.h>
1414
#include <linux/mm.h>
15-
#include <asm/page.h>
16-
#include <asm/code-patching.h>
15+
#include <linux/cpuhotplug.h>
16+
#include <linux/slab.h>
1717
#include <linux/uaccess.h>
1818
#include <linux/kprobes.h>
1919

20+
#include <asm/pgtable.h>
21+
#include <asm/tlbflush.h>
22+
#include <asm/page.h>
23+
#include <asm/code-patching.h>
2024

21-
int patch_instruction(unsigned int *addr, unsigned int instr)
25+
static int __patch_instruction(unsigned int *addr, unsigned int instr)
2226
{
2327
int err;
2428

2529
__put_user_size(instr, addr, 4, err);
2630
if (err)
2731
return err;
28-
asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
32+
33+
asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" :: "r" (addr));
34+
35+
return 0;
36+
}
37+
38+
#ifdef CONFIG_STRICT_KERNEL_RWX
39+
static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
40+
41+
static int text_area_cpu_up(unsigned int cpu)
42+
{
43+
struct vm_struct *area;
44+
45+
area = get_vm_area(PAGE_SIZE, VM_ALLOC);
46+
if (!area) {
47+
WARN_ONCE(1, "Failed to create text area for cpu %d\n",
48+
cpu);
49+
return -1;
50+
}
51+
this_cpu_write(text_poke_area, area);
52+
53+
return 0;
54+
}
55+
56+
static int text_area_cpu_down(unsigned int cpu)
57+
{
58+
free_vm_area(this_cpu_read(text_poke_area));
59+
return 0;
60+
}
61+
62+
/*
63+
* Run as a late init call. This allows all the boot time patching to be done
64+
* simply by patching the code, and then we're called here prior to
65+
* mark_rodata_ro(), which happens after all init calls are run. Although
66+
* BUG_ON() is rude, in this case it should only happen if ENOMEM, and we judge
67+
* it as being preferable to a kernel that will crash later when someone tries
68+
* to use patch_instruction().
69+
*/
70+
static int __init setup_text_poke_area(void)
71+
{
72+
BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
73+
"powerpc/text_poke:online", text_area_cpu_up,
74+
text_area_cpu_down));
75+
76+
return 0;
77+
}
78+
late_initcall(setup_text_poke_area);
79+
80+
/*
81+
* This can be called for kernel text or a module.
82+
*/
83+
static int map_patch_area(void *addr, unsigned long text_poke_addr)
84+
{
85+
unsigned long pfn;
86+
int err;
87+
88+
if (is_vmalloc_addr(addr))
89+
pfn = vmalloc_to_pfn(addr);
90+
else
91+
pfn = __pa_symbol(addr) >> PAGE_SHIFT;
92+
93+
err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT),
94+
pgprot_val(PAGE_KERNEL));
95+
96+
pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
97+
if (err)
98+
return -1;
99+
29100
return 0;
30101
}
31102

103+
static inline int unmap_patch_area(unsigned long addr)
104+
{
105+
pte_t *ptep;
106+
pmd_t *pmdp;
107+
pud_t *pudp;
108+
pgd_t *pgdp;
109+
110+
pgdp = pgd_offset_k(addr);
111+
if (unlikely(!pgdp))
112+
return -EINVAL;
113+
114+
pudp = pud_offset(pgdp, addr);
115+
if (unlikely(!pudp))
116+
return -EINVAL;
117+
118+
pmdp = pmd_offset(pudp, addr);
119+
if (unlikely(!pmdp))
120+
return -EINVAL;
121+
122+
ptep = pte_offset_kernel(pmdp, addr);
123+
if (unlikely(!ptep))
124+
return -EINVAL;
125+
126+
pr_devel("clearing mm %p, pte %p, addr %lx\n", &init_mm, ptep, addr);
127+
128+
/*
129+
* In hash, pte_clear flushes the tlb, in radix, we have to
130+
*/
131+
pte_clear(&init_mm, addr, ptep);
132+
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
133+
134+
return 0;
135+
}
136+
137+
int patch_instruction(unsigned int *addr, unsigned int instr)
138+
{
139+
int err;
140+
unsigned int *dest = NULL;
141+
unsigned long flags;
142+
unsigned long text_poke_addr;
143+
unsigned long kaddr = (unsigned long)addr;
144+
145+
/*
146+
* During early early boot patch_instruction is called
147+
* when text_poke_area is not ready, but we still need
148+
* to allow patching. We just do the plain old patching
149+
* We use slab_is_available and per cpu read * via this_cpu_read
150+
* of text_poke_area. Per-CPU areas might not be up early
151+
* this can create problems with just using this_cpu_read()
152+
*/
153+
if (!slab_is_available() || !this_cpu_read(text_poke_area))
154+
return __patch_instruction(addr, instr);
155+
156+
local_irq_save(flags);
157+
158+
text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr;
159+
if (map_patch_area(addr, text_poke_addr)) {
160+
err = -1;
161+
goto out;
162+
}
163+
164+
dest = (unsigned int *)(text_poke_addr) +
165+
((kaddr & ~PAGE_MASK) / sizeof(unsigned int));
166+
167+
/*
168+
* We use __put_user_size so that we can handle faults while
169+
* writing to dest and return err to handle faults gracefully
170+
*/
171+
__put_user_size(instr, dest, 4, err);
172+
if (!err)
173+
asm ("dcbst 0, %0; sync; icbi 0,%0; icbi 0,%1; sync; isync"
174+
::"r" (dest), "r"(addr));
175+
176+
err = unmap_patch_area(text_poke_addr);
177+
if (err)
178+
pr_warn("failed to unmap %lx\n", text_poke_addr);
179+
180+
out:
181+
local_irq_restore(flags);
182+
183+
return err;
184+
}
185+
#else /* !CONFIG_STRICT_KERNEL_RWX */
186+
187+
int patch_instruction(unsigned int *addr, unsigned int instr)
188+
{
189+
return __patch_instruction(addr, instr);
190+
}
191+
192+
#endif /* CONFIG_STRICT_KERNEL_RWX */
193+
NOKPROBE_SYMBOL(patch_instruction);
194+
32195
int patch_branch(unsigned int *addr, unsigned long target, int flags)
33196
{
34197
return patch_instruction(addr, create_branch(addr, target, flags));

0 commit comments

Comments
 (0)