1. memblock
1.1. 结构体
memblock
相关的全局变量是编译时静态初始化的。
1/// mm/memblock.c
2
3/// INIT_MEMBLOCK_MEMORY_REGIONS和INIT_MEMBLOCK_RESERVED_REGIONS见arch/arm64/include/asm/memory.h
4static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_MEMORY_REGIONS] __initdata_memblock;
5static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
6#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
7static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS];
8#endif
9
10struct memblock memblock __initdata_memblock = {
11 .memory.regions = memblock_memory_init_regions,
12 .memory.cnt = 1, /* empty dummy entry */
13 .memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
14 .memory.name = "memory",
15
16 .reserved.regions = memblock_reserved_init_regions,
17 .reserved.cnt = 1, /* empty dummy entry */
18 .reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
19 .reserved.name = "reserved",
20
21 .bottom_up = false,
22 .current_limit = MEMBLOCK_ALLOC_ANYWHERE,
23};
24
25#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
26struct memblock_type physmem = {
27 .regions = memblock_physmem_init_regions,
28 .cnt = 1, /* empty dummy entry */
29 .max = INIT_PHYSMEM_REGIONS,
30 .name = "physmem",
31};
32#endif
1.2. memblock操作函数
主要关注struct memblock memblock
的memory
和reserved
两个成员和三个函数。
memblock_add
向memblock.memory
添加内存。memblock_alloc
从memblock.memory
中分配内存,并将申请的内存加入到memblock.reserved
中。memblock_free
从memblock.memory
中移除内存。
1.3. memory和reserved
1# cat /sys/kernel/debug/memblock/memory
2 0: 0x0000000040000000..0x000000007fffffff 0 NONE
3# cat /sys/kernel/debug/memblock/reserved
4 0: 0x0000000040210000..0x0000000042408fff 0 NONE
5 1: 0x000000004240e000..0x000000004240ffff 0 NONE
6 2: 0x0000000048000000..0x00000000480fffff 0 NONE
7 3: 0x00000000717c0000..0x000000007f9fffff 0 NONE
8 4: 0x000000007fa25000..0x000000007fa6efff 0 NONE
9 5: 0x000000007fa6f7c0..0x000000007fb8fbc7 0 NONE
10 6: 0x000000007fb8fc00..0x000000007fbcbfff 0 NONE
11 7: 0x000000007fbce000..0x000000007fbcffff 0 NONE
12 8: 0x000000007fbd0200..0x000000007fbd024f 0 NONE
13 9: 0x000000007fbd0280..0x000000007fbd0447 0 NONE
14 10: 0x000000007fbd0480..0x000000007fbd04df 0 NONE
15 11: 0x000000007fbd0500..0x000000007fbd0687 0 NONE
16 12: 0x000000007fbd06c0..0x000000007fbd0907 0 NONE
17 13: 0x000000007fbd0940..0x000000007fbd0a5f 0 NONE
18 14: 0x000000007fbd0a80..0x000000007fbd0a8f 0 NONE
19 15: 0x000000007fbd0ac0..0x000000007fbd0ac7 0 NONE
20 16: 0x000000007fbd0b00..0x000000007fbd0b07 0 NONE
21 17: 0x000000007fbd0b40..0x000000007fbd0bcf 0 NONE
22 18: 0x000000007fbd0c00..0x000000007fbd0c8f 0 NONE
23 19: 0x000000007fbd0cc0..0x000000007fbf1e47 0 NONE
24 20: 0x000000007fbf1e60..0x000000007fbfbffb 0 NONE
25 21: 0x000000007fbfc000..0x000000007fffffff 0 NONE
26
27# cat /proc/iomem /// 部分内容
2840000000-7fffffff : System RAM
29 40210000-4165ffff : Kernel code
30 41660000-41deffff : reserved
31 41df0000-4240ffff : Kernel data
32 48000000-480fffff : reserved
33 717c0000-759fffff : reserved
34 75a00000-7d9fffff : Crash kernel
35 7da00000-7f9fffff : reserved
36 7fa25000-7fa6efff : reserved
37 7fa6f000-7fb8ffff : reserved
38 7fb90000-7fbcbfff : reserved
39 7fbce000-7fbcffff : reserved
40 7fbd0000-7fbd0fff : reserved
41 7fbd1000-7fbf1fff : reserved
42 7fbf2000-7fbfbfff : reserved
43 7fbfc000-7fffffff : reserved
memblock
从内存区域的角度。
iomem
从资源(resource
)的角度。
1.3.1. memory
- 设备树中
"memory"
节点指定的内存。 arm64_memblock_init
处理命令行中的"mem",无论memory_limit
是多少,都需要把内核镜像区域加到memory
中。- 使用
initrd
时,arm64_memblock_init
,需要将initrd
占用的内存加入memory
中。
1.3.2. reserved
-
setup_machine_fdt
中,将设备树的区域加入到reserved
中。 -
arm64_memblock_init -> early_init_fdt_scan_reserved_mem -> fdt_scan_reserved_mem
扫描设备树中reserved-memory
下指定no-map
的节点。 -
arm64_memblock_init
,将内核镜像区域加入到reserved
中。arm64_memblock_init
,将initrd
区域加入到reserved
中。 -
使用
initramfs
时,reserve_initrd_mem
将initrd
区域加入到reserved
中。 -
setup_arch -> bootmem_init -> cma_declare_contiguous_nid
,cma内存区域。 -
setup_arch -> bootmem_init -> reserve_crashkernel
,crashkernel内存区域。 -
启动过程中,使用
memblock_alloc
申请后,未释放的内存,举例如下:- 启动参数指定
log_buf_len
时,setup_log_buf
申请新的log_buf。 setup_command_line
中申请存放saved_command_line
和static_command_line
的内存。- 建立映射时,使用
early_pgtable_alloc
为pgd申请的内存,如线性映射。
- 启动参数指定
2. 获取物理内存大小
主要函数early_init_dt_scan_memory
(drivers/of/fdt.c
)。扫描设备树中"memory"
节点,使用memblock_add
将内存加入到memblock.memory
中。
3. arm64_memblock_init
注意在调用arm64_memblock_init
前,已经使用过memblock_add
添加过内存。
arm64_memblock_init
的主要作用就是移除一些不能映射的区域,并处理预留内存:
- 内核镜像区域。
initrd
区域。- 设备树区域
- 设备中指定的
reserved-memory
和memreserve
。
4. paging_init
paging_init
主要完成两个映射。
1/// arch/arm64/mm/mmu.c
2void __init paging_init(void)
3{
4 pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
5 extern pgd_t init_idmap_pg_dir[];
6
7 idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
8
9 map_kernel(pgdp); /// 内核镜像到vmalloc区域中对应的虚拟地址
10 map_mem(pgdp); /// 整个物理内存区域的线性映射
11
12 pgd_clear_fixmap();
13
14 /// 替换pgd为swapper_pg_dir
15 cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
16 init_mm.pgd = swapper_pg_dir;
17
18 memblock_phys_free(__pa_symbol(init_pg_dir),
19 __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
20
21 memblock_allow_resize();
22
23 create_idmap(); /// 建立.idmap.text段的映射
24}
5. 内核空间内存布局
ARM64 Linux-6.6的内存布局中,线性映射区域在内核空间的低地址,而5.3.0-rc3之前的版本内核,线性映射区域在高地址,见commit 14c127c957c1c6070647c171e72f06e0db275ebf。
宏/变量 | 含义 |
---|---|
PAGE_OFFSET | 内核空间虚拟地址起始 |
PAGE_END | 内核空间虚拟地址空间的中间位置 |
PHYS_OFFSET | 物理内存起始地址 |
kimage_voffset | 内核镜像映射(虚拟地址)起始地址和内核镜像在物理内存地址的差值 |
控制内存布局的宏主要在arch/arm64/include/asm/memory.h
中定义。
1/// arch/arm64/include/asm/memory.h
2/*
3 * PAGE_OFFSET - the virtual address of the start of the linear map, at the
4 * start of the TTBR1 address space.
5 * PAGE_END - the end of the linear map, where all other kernel mappings begin.
6 * KIMAGE_VADDR - the virtual address of the start of the kernel image.
7 * VA_BITS - the maximum number of bits for virtual addresses.
8 */
9#define VA_BITS (CONFIG_ARM64_VA_BITS)
10#define _PAGE_OFFSET(va) (-(UL(1) << (va)))
11#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) /// (1 << 64) - (1 << VA_BITS)
12#define KIMAGE_VADDR (MODULES_END)
13#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
14#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) /// (1 << 64) - (1 << (VA_BITS_MIN - 1))
15#define MODULES_VSIZE (SZ_2G)
16#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) /// (1 << 64) - (1 << (VA_BITS - VMEMMAP_SHIFT))
17#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
18#define PCI_IO_END (VMEMMAP_START - SZ_8M)
19#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
20#define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
21
22#if VA_BITS > 48
23#define VA_BITS_MIN (48)
24#else
25#define VA_BITS_MIN (VA_BITS)
26#endif
27
28#define _PAGE_END(va) (-(UL(1) << ((va) - 1))) /// (1 << 64) - (1 << (va - 1))
29
30#define KERNEL_START _text
31#define KERNEL_END _end
VMALLOC_START
和VMALLOC_END
定义如下。
1/// arch/arm64/include/asm/pgtable.h
2/*
3 * VMALLOC range.
4 *
5 * VMALLOC_START: beginning of the kernel vmalloc space
6 * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
7 * and fixed mappings
8 */
9#define VMALLOC_START (MODULES_END)
10#define VMALLOC_END (VMEMMAP_START - SZ_256M)
5.1. 内核空间内存布局示例
不考虑KASAN_SHADOW
时,布局如下:
1/// VA_BITS = 48
2/// VA_BITS_MIN = 48
3/// VMEMMAP_SHIFT = 6
4/// PAGE_SHIFT = 12
5
6high | VMEMMAP_END /// 0xffff_fe00_0000_0000
7 | VMEMMAP_SIZE /// VMEMMAP_SIZE=2TB(0x200_0000_0000)
8 ^ | VMEMMAP_START /// 0xffff_fc000_000_0000(VMEMMAP_SHIFT=6)
9 | | hole /// 8MB(0x80_0000)
10 | | PCI_IO_END /// 0xffff_fbff_ff80_0000
11 | | PCI_IO_SIZE /// PCI_IO_SIZE=16MB(0x100_0000)
12 | | PCI_IO_START /// 0xffff_fbff_fe80_0000
13 | | hole
14 | | FIXADDR_TOP /// 0xffff_fbff_fe00_0000(VMEMMAP_START - SZ_32M)
15 | | FIXADDR_SIZE /// (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
16 | | FIXADDR_START /// FIXADDR_TOP - FIXADDR_SIZE
17 | | FIXADDR_TOT_SIZE /// (__end_of_fixed_addresses << PAGE_SHIFT)
18 | | FIXADDR_TOT_START /// FIXADDR_TOP - FIXADDR_TOT_SIZE
19 | | hole
20 | | VMALLOC_END /// 0xffff_fbff_f000_0000(VMEMMAP_START - SZ_256M)
21 | | 126973.75GB /// (126974GB - 256M)(含kimage)
22 | | VMALLOC_START(MODULES_END) /// 0xffff_8000_8000_0000(VA_BITS=48)
23 | | MODULES_END/KIMAGE_VADDR /// 0xffff_8000_8000_0000(VA_BITS=48)
24 | | MODULES_VSIZE /// 2GB(0x8000_0000)
25 | | MODULES_VADDR/PAGE_END /// 0xffff_8000_0000_0000(VA_BITS=48)
26 | | 128TB /// Linear Mapping(0x8000_0000_0000)
27 low | PAGE_OFFSET /// 0xffff_0000_0000_0000(VA_BITS=48)
注意现在这个布局中,只有部分区域建立了页表,如线性映射区,内核镜像区等。
默认情况下,modules
在vmalloc
区域,见module_alloc
函数。ARM64为modules
单独划分了区域,采用vmalloc
同一算法进行管理。
5.2. 内核镜像布局
内核镜像在vmalloc
区域,如果启动参数中添加nokaslr
,内核镜像的起始地址与VMALLOC_START
相同。
name | flags | addr start | addr end | size |
---|---|---|---|---|
.head.text | AX | FFFF_8000_8000_0000 | FFFF_8000_8000_FFFF | 0x10000(65536) |
.text | AX | FFFF_8000_8001_0000 | FFFF_8000_80DD_9FFF | 0xDCA000(14458880) |
hole | FFFF_8000_80DD_A000 | FFFF_8000_80DD_FFFF | 0x6000(24576) | |
.rodata | WA | FFFF_8000_80DE_0000 | FFFF_8000_813D_0B5D | 0x5F0B5E(6228830) |
hole | FFFF_8000_813D_0B5E | FFFF_8000_813D_0B5F | 0x2(2) | |
.pci_fixup | A | FFFF_8000_813D_0B60 | FFFF_8000_813D_358F | 0x2A30(10800) |
.printk_index | WA | FFFF_8000_813D_3590 | FFFF_8000_813E_92FF | 0x15D70(89456) |
__ksymtab | A | FFFF_8000_813E_9300 | FFFF_8000_813F_7267 | 0xDF68(57192) |
__ksymtab_gpl | A | FFFF_8000_813F_7268 | FFFF_8000_8140_9E73 | 0x12C0C(76812) |
__kcrctab | A | FFFF_8000_8140_9E74 | FFFF_8000_8140_E8EB | 0x4A78(19064) |
__kcrctab_gpl | A | FFFF_8000_8140_E8EC | FFFF_8000_8141_4CEF | 0x6404(25604) |
__ksymtab_strings | AMS | FFFF_8000_8141_4CF0 | FFFF_8000_8144_A5F4 | 0x35905(219397) |
hole | FFFF_8000_8144_A5F5 | FFFF_8000_8144_A5F7 | 0x3(3) | |
__param | A | FFFF_8000_8144_A5F8 | FFFF_8000_8144_DBDF | 0x35E8(13800) |
__modver | WA | FFFF_8000_8144_DBE0 | FFFF_8000_8144_E1C7 | 0x5E8(1512) |
__ex_table | A | FFFF_8000_8144_E1C8 | FFFF_8000_8145_070B | 0x2544(9540) |
.notes | A | FFFF_8000_8145_070C | FFFF_8000_8145_075F | 0x54(84) |
hole | FFFF_8000_8145_0760 | FFFF_8000_8145_0FFF | 0x8A0(2208) | |
.got | WA | FFFF_8000_8145_1000 | FFFF_8000_8145_1007 | 0x8(8) |
.got.plt | WA | FFFF_8000_8145_1008 | FFFF_8000_8145_101F | 0x18(24) |
hole | FFFF_8000_8145_1020 | FFFF_8000_8145_17FF | 0x7E0(2016) | |
.rodata.text | AX | FFFF_8000_8145_1800 | FFFF_8000_8145_6FFF | 0x5800(22528) |
hole | FFFF_8000_8145_7000 | FFFF_8000_8145_FFFF | 0x9000(36864) | |
.init.text | AX | FFFF_8000_8146_0000 | FFFF_8000_814D_0E83 | 0x70E84(462468) |
hole | FFFF_8000_814D_0E84 | FFFF_8000_814D_0E87 | 0x4(4) | |
.exit.text | AX | FFFF_8000_814D_0E88 | FFFF_8000_814D_531B | 0x4494(17556) |
.altinstructions | A | FFFF_8000_814D_531C | FFFF_8000_8151_C64F | 0x47334(291636) |
hole | FFFF_8000_8151_C650 | FFFF_8000_8152_4FFF | 0x89B0(35248) | |
.init.data | WA | FFFF_8000_8152_5000 | FFFF_8000_8161_BA99 | 0xF6A9A(1010330) |
hole | FFFF_8000_8161_BA9A | FFFF_8000_8161_BFFF | 0x566(1382) | |
.data..percpu | WA | FFFF_8000_8161_C000 | FFFF_8000_8163_0CF7 | 0x14CF8(85240) |
.rela.dyn | A | FFFF_8000_8163_0CF8 | FFFF_8000_81BE_099F | 0x5AFCA8(5962920) |
hole | FFFF_8000_81BE_09A0 | FFFF_8000_81BE_FFFF | 0xF660(63072) | |
.data | WA | FFFF_8000_81BF_0000 | FFFF_8000_81E5_E89F | 0x26E8A0(2549920) |
__bug_table | WA | FFFF_8000_81E5_E8A0 | FFFF_8000_81E7_4C1F | 0x16380(91008) |
hole | FFFF_8000_81E7_4C20 | FFFF_8000_81E7_4FFF | 0x3E0(992) | |
.mmuoff.data.write | WA | FFFF_8000_81E7_5000 | FFFF_8000_81E7_5007 | 0x8(8) |
hole | FFFF_8000_81E7_5008 | FFFF_8000_81E7_57FF | 0x7F8(2040) | |
.mmuoff.data.read | WA | FFFF_8000_81E7_5800 | FFFF_8000_81E7_5807 | 0x8(8) |
.pecoff_edata_padding | A | FFFF_8000_81E7_5808 | FFFF_8000_81E7_59FF | 0x1F8(504) |
hole | FFFF_8000_81E7_5A00 | FFFF_8000_81E7_5FFF | 0x600(1536) | |
.bss | WA | FFFF_8000_81E7_6000 | FFFF_8000_8220_89A7 | 0x3929A8(3746216) |