Skip to content

Commit c75a74f

Browse files
bjorn-rivospalmer-dabbelt
authored andcommitted
riscv: mm: Add memory hotplugging support
For an architecture to support memory hotplugging, a couple of callbacks needs to be implemented: arch_add_memory() This callback is responsible for adding the physical memory into the direct map, and call into the memory hotplugging generic code via __add_pages() that adds the corresponding struct page entries, and updates the vmemmap mapping. arch_remove_memory() This is the inverse of the callback above. vmemmap_free() This function tears down the vmemmap mappings (if CONFIG_SPARSEMEM_VMEMMAP is enabled), and also deallocates the backing vmemmap pages. Note that for persistent memory, an alternative allocator for the backing pages can be used; The vmem_altmap. This means that when the backing pages are cleared, extra care is needed so that the correct deallocation method is used. arch_get_mappable_range() This functions returns the PA range that the direct map can map. Used by the MHP internals for sanity checks. The page table unmap/teardown functions are heavily based on code from the x86 tree. The same remove_pgd_mapping() function is used in both vmemmap_free() and arch_remove_memory(), but in the latter function the backing pages are not removed. Signed-off-by: Björn Töpel <bjorn@rivosinc.com> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> Link: https://lore.kernel.org/r/20240605114100.315918-7-bjorn@kernel.org Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent 6e6c5e2 commit c75a74f

File tree

1 file changed

+267
-0
lines changed

1 file changed

+267
-0
lines changed

arch/riscv/mm/init.c

+267
Original file line numberDiff line numberDiff line change
@@ -1533,3 +1533,270 @@ struct execmem_info __init *execmem_arch_setup(void)
15331533
}
15341534
#endif /* CONFIG_MMU */
15351535
#endif /* CONFIG_EXECMEM */
1536+
1537+
#ifdef CONFIG_MEMORY_HOTPLUG
1538+
static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
1539+
{
1540+
struct page *page = pmd_page(*pmd);
1541+
struct ptdesc *ptdesc = page_ptdesc(page);
1542+
pte_t *pte;
1543+
int i;
1544+
1545+
for (i = 0; i < PTRS_PER_PTE; i++) {
1546+
pte = pte_start + i;
1547+
if (!pte_none(*pte))
1548+
return;
1549+
}
1550+
1551+
pagetable_pte_dtor(ptdesc);
1552+
if (PageReserved(page))
1553+
free_reserved_page(page);
1554+
else
1555+
pagetable_free(ptdesc);
1556+
pmd_clear(pmd);
1557+
}
1558+
1559+
static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
1560+
{
1561+
struct page *page = pud_page(*pud);
1562+
struct ptdesc *ptdesc = page_ptdesc(page);
1563+
pmd_t *pmd;
1564+
int i;
1565+
1566+
for (i = 0; i < PTRS_PER_PMD; i++) {
1567+
pmd = pmd_start + i;
1568+
if (!pmd_none(*pmd))
1569+
return;
1570+
}
1571+
1572+
pagetable_pmd_dtor(ptdesc);
1573+
if (PageReserved(page))
1574+
free_reserved_page(page);
1575+
else
1576+
pagetable_free(ptdesc);
1577+
pud_clear(pud);
1578+
}
1579+
1580+
static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
1581+
{
1582+
struct page *page = p4d_page(*p4d);
1583+
pud_t *pud;
1584+
int i;
1585+
1586+
for (i = 0; i < PTRS_PER_PUD; i++) {
1587+
pud = pud_start + i;
1588+
if (!pud_none(*pud))
1589+
return;
1590+
}
1591+
1592+
if (PageReserved(page))
1593+
free_reserved_page(page);
1594+
else
1595+
free_pages((unsigned long)page_address(page), 0);
1596+
p4d_clear(p4d);
1597+
}
1598+
1599+
static void __meminit free_vmemmap_storage(struct page *page, size_t size,
1600+
struct vmem_altmap *altmap)
1601+
{
1602+
int order = get_order(size);
1603+
1604+
if (altmap) {
1605+
vmem_altmap_free(altmap, size >> PAGE_SHIFT);
1606+
return;
1607+
}
1608+
1609+
if (PageReserved(page)) {
1610+
unsigned int nr_pages = 1 << order;
1611+
1612+
while (nr_pages--)
1613+
free_reserved_page(page++);
1614+
return;
1615+
}
1616+
1617+
free_pages((unsigned long)page_address(page), order);
1618+
}
1619+
1620+
static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, unsigned long end,
1621+
bool is_vmemmap, struct vmem_altmap *altmap)
1622+
{
1623+
unsigned long next;
1624+
pte_t *ptep, pte;
1625+
1626+
for (; addr < end; addr = next) {
1627+
next = (addr + PAGE_SIZE) & PAGE_MASK;
1628+
if (next > end)
1629+
next = end;
1630+
1631+
ptep = pte_base + pte_index(addr);
1632+
pte = ptep_get(ptep);
1633+
if (!pte_present(*ptep))
1634+
continue;
1635+
1636+
pte_clear(&init_mm, addr, ptep);
1637+
if (is_vmemmap)
1638+
free_vmemmap_storage(pte_page(pte), PAGE_SIZE, altmap);
1639+
}
1640+
}
1641+
1642+
static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, unsigned long end,
1643+
bool is_vmemmap, struct vmem_altmap *altmap)
1644+
{
1645+
unsigned long next;
1646+
pte_t *pte_base;
1647+
pmd_t *pmdp, pmd;
1648+
1649+
for (; addr < end; addr = next) {
1650+
next = pmd_addr_end(addr, end);
1651+
pmdp = pmd_base + pmd_index(addr);
1652+
pmd = pmdp_get(pmdp);
1653+
if (!pmd_present(pmd))
1654+
continue;
1655+
1656+
if (pmd_leaf(pmd)) {
1657+
pmd_clear(pmdp);
1658+
if (is_vmemmap)
1659+
free_vmemmap_storage(pmd_page(pmd), PMD_SIZE, altmap);
1660+
continue;
1661+
}
1662+
1663+
pte_base = (pte_t *)pmd_page_vaddr(*pmdp);
1664+
remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap);
1665+
free_pte_table(pte_base, pmdp);
1666+
}
1667+
}
1668+
1669+
static void __meminit remove_pud_mapping(pud_t *pud_base, unsigned long addr, unsigned long end,
1670+
bool is_vmemmap, struct vmem_altmap *altmap)
1671+
{
1672+
unsigned long next;
1673+
pud_t *pudp, pud;
1674+
pmd_t *pmd_base;
1675+
1676+
for (; addr < end; addr = next) {
1677+
next = pud_addr_end(addr, end);
1678+
pudp = pud_base + pud_index(addr);
1679+
pud = pudp_get(pudp);
1680+
if (!pud_present(pud))
1681+
continue;
1682+
1683+
if (pud_leaf(pud)) {
1684+
if (pgtable_l4_enabled) {
1685+
pud_clear(pudp);
1686+
if (is_vmemmap)
1687+
free_vmemmap_storage(pud_page(pud), PUD_SIZE, altmap);
1688+
}
1689+
continue;
1690+
}
1691+
1692+
pmd_base = pmd_offset(pudp, 0);
1693+
remove_pmd_mapping(pmd_base, addr, next, is_vmemmap, altmap);
1694+
1695+
if (pgtable_l4_enabled)
1696+
free_pmd_table(pmd_base, pudp);
1697+
}
1698+
}
1699+
1700+
static void __meminit remove_p4d_mapping(p4d_t *p4d_base, unsigned long addr, unsigned long end,
1701+
bool is_vmemmap, struct vmem_altmap *altmap)
1702+
{
1703+
unsigned long next;
1704+
p4d_t *p4dp, p4d;
1705+
pud_t *pud_base;
1706+
1707+
for (; addr < end; addr = next) {
1708+
next = p4d_addr_end(addr, end);
1709+
p4dp = p4d_base + p4d_index(addr);
1710+
p4d = p4dp_get(p4dp);
1711+
if (!p4d_present(p4d))
1712+
continue;
1713+
1714+
if (p4d_leaf(p4d)) {
1715+
if (pgtable_l5_enabled) {
1716+
p4d_clear(p4dp);
1717+
if (is_vmemmap)
1718+
free_vmemmap_storage(p4d_page(p4d), P4D_SIZE, altmap);
1719+
}
1720+
continue;
1721+
}
1722+
1723+
pud_base = pud_offset(p4dp, 0);
1724+
remove_pud_mapping(pud_base, addr, next, is_vmemmap, altmap);
1725+
1726+
if (pgtable_l5_enabled)
1727+
free_pud_table(pud_base, p4dp);
1728+
}
1729+
}
1730+
1731+
static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bool is_vmemmap,
1732+
struct vmem_altmap *altmap)
1733+
{
1734+
unsigned long addr, next;
1735+
p4d_t *p4d_base;
1736+
pgd_t *pgd;
1737+
1738+
for (addr = va; addr < end; addr = next) {
1739+
next = pgd_addr_end(addr, end);
1740+
pgd = pgd_offset_k(addr);
1741+
1742+
if (!pgd_present(*pgd))
1743+
continue;
1744+
1745+
if (pgd_leaf(*pgd))
1746+
continue;
1747+
1748+
p4d_base = p4d_offset(pgd, 0);
1749+
remove_p4d_mapping(p4d_base, addr, next, is_vmemmap, altmap);
1750+
}
1751+
1752+
flush_tlb_all();
1753+
}
1754+
1755+
static void __meminit remove_linear_mapping(phys_addr_t start, u64 size)
1756+
{
1757+
unsigned long va = (unsigned long)__va(start);
1758+
unsigned long end = (unsigned long)__va(start + size);
1759+
1760+
remove_pgd_mapping(va, end, false, NULL);
1761+
}
1762+
1763+
struct range arch_get_mappable_range(void)
1764+
{
1765+
struct range mhp_range;
1766+
1767+
mhp_range.start = __pa(PAGE_OFFSET);
1768+
mhp_range.end = __pa(PAGE_END - 1);
1769+
return mhp_range;
1770+
}
1771+
1772+
int __ref arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params)
1773+
{
1774+
int ret = 0;
1775+
1776+
create_linear_mapping_range(start, start + size, 0, &params->pgprot);
1777+
ret = __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, params);
1778+
if (ret) {
1779+
remove_linear_mapping(start, size);
1780+
goto out;
1781+
}
1782+
1783+
max_pfn = PFN_UP(start + size);
1784+
max_low_pfn = max_pfn;
1785+
1786+
out:
1787+
flush_tlb_all();
1788+
return ret;
1789+
}
1790+
1791+
void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
1792+
{
1793+
__remove_pages(start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap);
1794+
remove_linear_mapping(start, size);
1795+
flush_tlb_all();
1796+
}
1797+
1798+
void __ref vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap)
1799+
{
1800+
remove_pgd_mapping(start, end, true, altmap);
1801+
}
1802+
#endif /* CONFIG_MEMORY_HOTPLUG */

0 commit comments

Comments
 (0)