1. memblock

1.1. 结构体

memblock相关的全局变量是编译时静态初始化的。

 1/// mm/memblock.c
 2
 3/// INIT_MEMBLOCK_MEMORY_REGIONS和INIT_MEMBLOCK_RESERVED_REGIONS见arch/arm64/include/asm/memory.h
 4static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock;
 5static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
 6#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
 7static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS];
 8#endif
 9
10struct memblock memblock __initdata_memblock = {
11    .memory.regions		= memblock_memory_init_regions,
12    .memory.cnt		= 1,	/* empty dummy entry */
13    .memory.max		= INIT_MEMBLOCK_MEMORY_REGIONS,
14    .memory.name		= "memory",
15
16    .reserved.regions	= memblock_reserved_init_regions,
17    .reserved.cnt		= 1,	/* empty dummy entry */
18    .reserved.max		= INIT_MEMBLOCK_RESERVED_REGIONS,
19    .reserved.name		= "reserved",
20
21    .bottom_up		= false,
22    .current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
23};
24
25#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
26struct memblock_type physmem = {
27    .regions		= memblock_physmem_init_regions,
28    .cnt			= 1,	/* empty dummy entry */
29    .max			= INIT_PHYSMEM_REGIONS,
30    .name			= "physmem",
31};
32#endif

1.2. memblock操作函数

主要关注struct memblock memblockmemoryreserved两个成员和三个函数。

  1. memblock_addmemblock.memory添加内存。
  2. memblock_allocmemblock.memory中分配内存,并将申请的内存加入到memblock.reserved中。
  3. memblock_freememblock.memory中移除内存。

1.3. memory和reserved

 1# cat /sys/kernel/debug/memblock/memory
 2   0: 0x0000000040000000..0x000000007fffffff    0 NONE
 3# cat /sys/kernel/debug/memblock/reserved
 4   0: 0x0000000040210000..0x0000000042408fff    0 NONE
 5   1: 0x000000004240e000..0x000000004240ffff    0 NONE
 6   2: 0x0000000048000000..0x00000000480fffff    0 NONE
 7   3: 0x00000000717c0000..0x000000007f9fffff    0 NONE
 8   4: 0x000000007fa25000..0x000000007fa6efff    0 NONE
 9   5: 0x000000007fa6f7c0..0x000000007fb8fbc7    0 NONE
10   6: 0x000000007fb8fc00..0x000000007fbcbfff    0 NONE
11   7: 0x000000007fbce000..0x000000007fbcffff    0 NONE
12   8: 0x000000007fbd0200..0x000000007fbd024f    0 NONE
13   9: 0x000000007fbd0280..0x000000007fbd0447    0 NONE
14  10: 0x000000007fbd0480..0x000000007fbd04df    0 NONE
15  11: 0x000000007fbd0500..0x000000007fbd0687    0 NONE
16  12: 0x000000007fbd06c0..0x000000007fbd0907    0 NONE
17  13: 0x000000007fbd0940..0x000000007fbd0a5f    0 NONE
18  14: 0x000000007fbd0a80..0x000000007fbd0a8f    0 NONE
19  15: 0x000000007fbd0ac0..0x000000007fbd0ac7    0 NONE
20  16: 0x000000007fbd0b00..0x000000007fbd0b07    0 NONE
21  17: 0x000000007fbd0b40..0x000000007fbd0bcf    0 NONE
22  18: 0x000000007fbd0c00..0x000000007fbd0c8f    0 NONE
23  19: 0x000000007fbd0cc0..0x000000007fbf1e47    0 NONE
24  20: 0x000000007fbf1e60..0x000000007fbfbffb    0 NONE
25  21: 0x000000007fbfc000..0x000000007fffffff    0 NONE
26
27# cat /proc/iomem    /// 部分内容
2840000000-7fffffff : System RAM
29  40210000-4165ffff : Kernel code
30  41660000-41deffff : reserved
31  41df0000-4240ffff : Kernel data
32  48000000-480fffff : reserved
33  717c0000-759fffff : reserved
34  75a00000-7d9fffff : Crash kernel
35  7da00000-7f9fffff : reserved
36  7fa25000-7fa6efff : reserved
37  7fa6f000-7fb8ffff : reserved
38  7fb90000-7fbcbfff : reserved
39  7fbce000-7fbcffff : reserved
40  7fbd0000-7fbd0fff : reserved
41  7fbd1000-7fbf1fff : reserved
42  7fbf2000-7fbfbfff : reserved
43  7fbfc000-7fffffff : reserved

memblock从内存区域的角度。 iomem从资源(resource)的角度。

1.3.1. memory

  • 设备树中"memory"节点指定的内存。
  • arm64_memblock_init处理命令行中的"mem",无论memory_limit是多少,都需要把内核镜像区域加到memory中。
  • 使用initrd时,arm64_memblock_init,需要将initrd占用的内存加入memory中。

1.3.2. reserved

  • setup_machine_fdt中,将设备树的区域加入到reserved中。

  • arm64_memblock_init -> early_init_fdt_scan_reserved_mem -> fdt_scan_reserved_mem扫描设备树中reserved-memory下指定no-map的节点。

  • arm64_memblock_init,将内核镜像区域加入到reserved中。 arm64_memblock_init,将initrd区域加入到reserved中。

  • 使用initramfs时,reserve_initrd_meminitrd区域加入到reserved中。

  • setup_arch -> bootmem_init -> cma_declare_contiguous_nid,cma内存区域。

  • setup_arch -> bootmem_init -> reserve_crashkernel,crashkernel内存区域。

  • 启动过程中,使用memblock_alloc申请后,未释放的内存,举例如下:

    1. 启动参数指定log_buf_len时,setup_log_buf申请新的log_buf。
    2. setup_command_line中申请存放saved_command_linestatic_command_line的内存。
    3. 建立映射时,使用early_pgtable_alloc为pgd申请的内存,如线性映射。

2. 获取物理内存大小

主要函数early_init_dt_scan_memory(drivers/of/fdt.c)。扫描设备树中"memory"节点,使用memblock_add将内存加入到memblock.memory中。

3. arm64_memblock_init

注意在调用arm64_memblock_init前,已经使用过memblock_add添加过内存。

arm64_memblock_init的主要作用就是移除一些不能映射的区域,并处理预留内存:

  1. 内核镜像区域。
  2. initrd区域。
  3. 设备树区域
  4. 设备中指定的reserved-memorymemreserve

4. paging_init

paging_init主要完成两个映射。

 1/// arch/arm64/mm/mmu.c
 2void __init paging_init(void)
 3{
 4    pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
 5    extern pgd_t init_idmap_pg_dir[];
 6
 7    idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
 8
 9    map_kernel(pgdp);    /// 内核镜像到vmalloc区域中对应的虚拟地址
10    map_mem(pgdp);       /// 整个物理内存区域的线性映射
11
12    pgd_clear_fixmap();
13
14    /// 替换pgd为swapper_pg_dir
15    cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
16    init_mm.pgd = swapper_pg_dir;
17
18    memblock_phys_free(__pa_symbol(init_pg_dir),
19            __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
20
21    memblock_allow_resize();
22
23    create_idmap();   /// 建立.idmap.text段的映射
24}

5. 内核空间内存布局

ARM64 Linux-6.6的内存布局中,线性映射区域在内核空间的低地址,而5.3.0-rc3之前的版本内核,线性映射区域在高地址,见commit 14c127c957c1c6070647c171e72f06e0db275ebf

宏/变量 含义
PAGE_OFFSET 内核空间虚拟地址起始
PAGE_END 内核空间虚拟地址空间的中间位置
PHYS_OFFSET 物理内存起始地址
kimage_voffset 内核镜像映射(虚拟地址)起始地址和内核镜像在物理内存地址的差值

控制内存布局的宏主要在arch/arm64/include/asm/memory.h中定义。

 1/// arch/arm64/include/asm/memory.h
 2/*
 3 * PAGE_OFFSET - the virtual address of the start of the linear map, at the
 4 *               start of the TTBR1 address space.
 5 * PAGE_END - the end of the linear map, where all other kernel mappings begin.
 6 * KIMAGE_VADDR - the virtual address of the start of the kernel image.
 7 * VA_BITS - the maximum number of bits for virtual addresses.
 8 */
 9#define VA_BITS	            (CONFIG_ARM64_VA_BITS)
10#define _PAGE_OFFSET(va)    (-(UL(1) << (va)))
11#define PAGE_OFFSET         (_PAGE_OFFSET(VA_BITS))                     /// (1 << 64) - (1 << VA_BITS)
12#define KIMAGE_VADDR        (MODULES_END)
13#define MODULES_END         (MODULES_VADDR + MODULES_VSIZE)
14#define MODULES_VADDR       (_PAGE_END(VA_BITS_MIN))                    /// (1 << 64) - (1 << (VA_BITS_MIN - 1))
15#define MODULES_VSIZE       (SZ_2G)
16#define VMEMMAP_START       (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))     /// (1 << 64) - (1 << (VA_BITS - VMEMMAP_SHIFT))
17#define VMEMMAP_END         (VMEMMAP_START + VMEMMAP_SIZE)
18#define PCI_IO_END          (VMEMMAP_START - SZ_8M)
19#define PCI_IO_START        (PCI_IO_END - PCI_IO_SIZE)
20#define FIXADDR_TOP         (VMEMMAP_START - SZ_32M)
21
22#if VA_BITS > 48
23#define VA_BITS_MIN         (48)
24#else
25#define VA_BITS_MIN         (VA_BITS)
26#endif
27
28#define _PAGE_END(va)       (-(UL(1) << ((va) - 1)))                    /// (1 << 64) - (1 << (va - 1))
29
30#define KERNEL_START        _text
31#define KERNEL_END          _end

VMALLOC_STARTVMALLOC_END定义如下。

 1/// arch/arm64/include/asm/pgtable.h
 2/*
 3 * VMALLOC range.
 4 *
 5 * VMALLOC_START: beginning of the kernel vmalloc space
 6 * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
 7 *	and fixed mappings
 8 */
 9#define VMALLOC_START		(MODULES_END)
10#define VMALLOC_END		(VMEMMAP_START - SZ_256M)

5.1. 内核空间内存布局示例

不考虑KASAN_SHADOW时,布局如下:

 1/// VA_BITS = 48
 2/// VA_BITS_MIN = 48
 3/// VMEMMAP_SHIFT = 6
 4/// PAGE_SHIFT = 12
 5
 6high | VMEMMAP_END                     /// 0xffff_fe00_0000_0000
 7     |     VMEMMAP_SIZE                /// VMEMMAP_SIZE=2TB(0x200_0000_0000)
 8  ^  | VMEMMAP_START                   /// 0xffff_fc000_000_0000(VMEMMAP_SHIFT=6)
 9  |  |     hole                        /// 8MB(0x80_0000)
10  |  | PCI_IO_END                      /// 0xffff_fbff_ff80_0000
11  |  |     PCI_IO_SIZE                 /// PCI_IO_SIZE=16MB(0x100_0000)
12  |  | PCI_IO_START                    /// 0xffff_fbff_fe80_0000
13  |  |     hole
14  |  | FIXADDR_TOP                     /// 0xffff_fbff_fe00_0000(VMEMMAP_START - SZ_32M)
15  |  |     FIXADDR_SIZE                /// (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
16  |  | FIXADDR_START                   /// FIXADDR_TOP - FIXADDR_SIZE
17  |  |     FIXADDR_TOT_SIZE            /// (__end_of_fixed_addresses << PAGE_SHIFT)
18  |  | FIXADDR_TOT_START               /// FIXADDR_TOP - FIXADDR_TOT_SIZE
19  |  |     hole
20  |  | VMALLOC_END                     /// 0xffff_fbff_f000_0000(VMEMMAP_START - SZ_256M)
21  |  |     126973.75GB                 /// (126974GB - 256M)(含kimage)
22  |  | VMALLOC_START(MODULES_END)      /// 0xffff_8000_8000_0000(VA_BITS=48)
23  |  | MODULES_END/KIMAGE_VADDR        /// 0xffff_8000_8000_0000(VA_BITS=48)
24  |  |     MODULES_VSIZE               /// 2GB(0x8000_0000)
25  |  | MODULES_VADDR/PAGE_END          /// 0xffff_8000_0000_0000(VA_BITS=48)
26  |  |     128TB                       /// Linear Mapping(0x8000_0000_0000)
27 low | PAGE_OFFSET                     /// 0xffff_0000_0000_0000(VA_BITS=48)

注意现在这个布局中,只有部分区域建立了页表,如线性映射区,内核镜像区等。

默认情况下,modulesvmalloc区域,见module_alloc函数。ARM64为modules单独划分了区域,采用vmalloc同一算法进行管理。

5.2. 内核镜像布局

内核镜像在vmalloc区域,如果启动参数中添加nokaslr,内核镜像的起始地址与VMALLOC_START相同。

name flags addr start addr end size
.head.text AX FFFF_8000_8000_0000 FFFF_8000_8000_FFFF 0x10000(65536)
.text AX FFFF_8000_8001_0000 FFFF_8000_80DD_9FFF 0xDCA000(14458880)
hole FFFF_8000_80DD_A000 FFFF_8000_80DD_FFFF 0x6000(24576)
.rodata WA FFFF_8000_80DE_0000 FFFF_8000_813D_0B5D 0x5F0B5E(6228830)
hole FFFF_8000_813D_0B5E FFFF_8000_813D_0B5F 0x2(2)
.pci_fixup A FFFF_8000_813D_0B60 FFFF_8000_813D_358F 0x2A30(10800)
.printk_index WA FFFF_8000_813D_3590 FFFF_8000_813E_92FF 0x15D70(89456)
__ksymtab A FFFF_8000_813E_9300 FFFF_8000_813F_7267 0xDF68(57192)
__ksymtab_gpl A FFFF_8000_813F_7268 FFFF_8000_8140_9E73 0x12C0C(76812)
__kcrctab A FFFF_8000_8140_9E74 FFFF_8000_8140_E8EB 0x4A78(19064)
__kcrctab_gpl A FFFF_8000_8140_E8EC FFFF_8000_8141_4CEF 0x6404(25604)
__ksymtab_strings AMS FFFF_8000_8141_4CF0 FFFF_8000_8144_A5F4 0x35905(219397)
hole FFFF_8000_8144_A5F5 FFFF_8000_8144_A5F7 0x3(3)
__param A FFFF_8000_8144_A5F8 FFFF_8000_8144_DBDF 0x35E8(13800)
__modver WA FFFF_8000_8144_DBE0 FFFF_8000_8144_E1C7 0x5E8(1512)
__ex_table A FFFF_8000_8144_E1C8 FFFF_8000_8145_070B 0x2544(9540)
.notes A FFFF_8000_8145_070C FFFF_8000_8145_075F 0x54(84)
hole FFFF_8000_8145_0760 FFFF_8000_8145_0FFF 0x8A0(2208)
.got WA FFFF_8000_8145_1000 FFFF_8000_8145_1007 0x8(8)
.got.plt WA FFFF_8000_8145_1008 FFFF_8000_8145_101F 0x18(24)
hole FFFF_8000_8145_1020 FFFF_8000_8145_17FF 0x7E0(2016)
.rodata.text AX FFFF_8000_8145_1800 FFFF_8000_8145_6FFF 0x5800(22528)
hole FFFF_8000_8145_7000 FFFF_8000_8145_FFFF 0x9000(36864)
.init.text AX FFFF_8000_8146_0000 FFFF_8000_814D_0E83 0x70E84(462468)
hole FFFF_8000_814D_0E84 FFFF_8000_814D_0E87 0x4(4)
.exit.text AX FFFF_8000_814D_0E88 FFFF_8000_814D_531B 0x4494(17556)
.altinstructions A FFFF_8000_814D_531C FFFF_8000_8151_C64F 0x47334(291636)
hole FFFF_8000_8151_C650 FFFF_8000_8152_4FFF 0x89B0(35248)
.init.data WA FFFF_8000_8152_5000 FFFF_8000_8161_BA99 0xF6A9A(1010330)
hole FFFF_8000_8161_BA9A FFFF_8000_8161_BFFF 0x566(1382)
.data..percpu WA FFFF_8000_8161_C000 FFFF_8000_8163_0CF7 0x14CF8(85240)
.rela.dyn A FFFF_8000_8163_0CF8 FFFF_8000_81BE_099F 0x5AFCA8(5962920)
hole FFFF_8000_81BE_09A0 FFFF_8000_81BE_FFFF 0xF660(63072)
.data WA FFFF_8000_81BF_0000 FFFF_8000_81E5_E89F 0x26E8A0(2549920)
__bug_table WA FFFF_8000_81E5_E8A0 FFFF_8000_81E7_4C1F 0x16380(91008)
hole FFFF_8000_81E7_4C20 FFFF_8000_81E7_4FFF 0x3E0(992)
.mmuoff.data.write WA FFFF_8000_81E7_5000 FFFF_8000_81E7_5007 0x8(8)
hole FFFF_8000_81E7_5008 FFFF_8000_81E7_57FF 0x7F8(2040)
.mmuoff.data.read WA FFFF_8000_81E7_5800 FFFF_8000_81E7_5807 0x8(8)
.pecoff_edata_padding A FFFF_8000_81E7_5808 FFFF_8000_81E7_59FF 0x1F8(504)
hole FFFF_8000_81E7_5A00 FFFF_8000_81E7_5FFF 0x600(1536)
.bss WA FFFF_8000_81E7_6000 FFFF_8000_8220_89A7 0x3929A8(3746216)