MIT6.828 | Lab 2: Memory Management - Part 3: Kernel Address Space

JOS将处理器的32位线性地址空间分为两部分。 我们将在实验3中开始加载和运行的用户环境(进程)将控制下部的布局和内容,而内核始终保持对上部的完全控制。 划分线某种程度上由inc/memlayout.h中的符号ULIM定义,为内核保留大约256MB的虚拟地址空间。 这就解释了为什么我们需要在lab1中为内核提供如此高的链接地址:否则内核的虚拟地址空间中没有足够的空间来同时映射到它下面的用户环境中。

参考inc/memlayout.h中的JOS存储器布局图对本次和接下来的lab会很有帮助。

权限和故障隔离

Permissions and Fault Isolation

由于内核和用户内存都存在于每个环境的地址空间中,因此我们必须在x86页表中使用权限位,以允许用户仅访问地址空间的用户部分。 否则用户代码中的bug可能会覆盖内核数据,从而导致崩溃或更微妙的故障; 或者也可能窃取其他环境的私有数据。 请注意,可写权限位PTE_W会对用户和内核代码均有效!

ULIM以上内存用户没有任何权限,而内核能够拥有读写权限。 对于地址范围[UTOP,ULIM),内核和用户环境都具有相同的权限:它们可以读取但不能写入此地址范围。 此范围的地址用于将某些内核数据结构以只读方式暴露给用户环境。 最后,UTOP下面的地址空间供用户环境使用; 用户环境将设置访问此内存的权限。

初始化内核地址空间

Initializing the Kernel Address Space

现在你将设置UTOP上方的地址空间——地址空间的内核部分。 inc/memlayout.h显示了你应该使用的布局。 您将使用刚刚编写的函数来设置适当的线性到物理的映射。

Exercise 5. Fill in the missing code in mem_init() after the call to check_page().

Your code should now pass the check_kern_pgdir() and check_page_installed_pgdir() checks.

In inc/memlayout.h:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/*
 * Virtual memory map:                                Permissions
 *                                                    kernel/user
 *
 *    4 Gig -------->  +------------------------------+
 *                     |                              | RW/--
 *                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *                     :              .               :
 *                     :              .               :
 *                     :              .               :
 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~| RW/--
 *                     |                              | RW/--
 *                     |   Remapped Physical Memory   | RW/--
 *                     |                              | RW/--
 *    KERNBASE, ---->  +------------------------------+ 0xf0000000      --+
 *    KSTACKTOP        |     CPU0's Kernel Stack      | RW/--  KSTKSIZE   |
 *                     | - - - - - - - - - - - - - - -|                   |
 *                     |      Invalid Memory (*)      | --/--  KSTKGAP    |
 *                     +------------------------------+                   |
 *                     |     CPU1's Kernel Stack      | RW/--  KSTKSIZE   |
 *                     | - - - - - - - - - - - - - - -|                 PTSIZE
 *                     |      Invalid Memory (*)      | --/--  KSTKGAP    |
 *                     +------------------------------+                   |
 *                     :              .               :                   |
 *                     :              .               :                   |
 *    MMIOLIM ------>  +------------------------------+ 0xefc00000      --+
 *                     |       Memory-mapped I/O      | RW/--  PTSIZE
 * ULIM, MMIOBASE -->  +------------------------------+ 0xef800000
 *                     |  Cur. Page Table (User R-)   | R-/R-  PTSIZE
 *    UVPT      ---->  +------------------------------+ 0xef400000
 *                     |          RO PAGES            | R-/R-  PTSIZE
 *    UPAGES    ---->  +------------------------------+ 0xef000000
 *                     |           RO ENVS            | R-/R-  PTSIZE
 * UTOP,UENVS ------>  +------------------------------+ 0xeec00000
 * UXSTACKTOP -/       |     User Exception Stack     | RW/RW  PGSIZE
 *                     +------------------------------+ 0xeebff000
 *                     |       Empty Memory (*)       | --/--  PGSIZE
 *    USTACKTOP  --->  +------------------------------+ 0xeebfe000
 *                     |      Normal User Stack       | RW/RW  PGSIZE
 *                     +------------------------------+ 0xeebfd000
 *                     |                              |
 *                     |                              |
 *                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *                     .                              .
 *                     .                              .
 *                     .                              .
 *                     |~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|
 *                     |     Program Data & Heap      |
 *    UTEXT -------->  +------------------------------+ 0x00800000
 *    PFTEMP ------->  |       Empty Memory (*)       |        PTSIZE
 *                     |                              |
 *    UTEMP -------->  +------------------------------+ 0x00400000      --+
 *                     |       Empty Memory (*)       |                   |
 *                     | - - - - - - - - - - - - - - -|                   |
 *                     |  User STAB Data (optional)   |                 PTSIZE
 *    USTABDATA ---->  +------------------------------+ 0x00200000        |
 *                     |       Empty Memory (*)       |                   |
 *    0 ------------>  +------------------------------+                 --+
 *
 * (*) Note: The kernel ensures that "Invalid Memory" is *never* mapped.
 *     "Empty Memory" is normally unmapped, but user programs may map pages
 *     there if desired.  JOS user programs map pages temporarily at UTEMP.
 */

根据这里的布局图,可以完成 pmap.c::mem_init() 中的内容:

  1. UPAGES 开始,往上 PTSIZE 范围的地址空间,可以看到其对用户和内核均为只读
1
2
3
4
5
6
7
8
	//////////////////////////////////////////////////////////////////////
	// Map 'pages' read-only by the user at linear address UPAGES
	// Permissions:
	//    - the new image at UPAGES -- kernel R, user R
	//      (ie. perm = PTE_U | PTE_P)
	//    - pages itself -- kernel RW, user NONE
	// Your code goes here:
	boot_map_region(kern_pgdir, UPAGES, PTSIZE, PADDR(pages), PTE_U);
  1. bootstack: 由注释可知,该部分的空间范围为[KSTACKTOP-KSTKSIZE, KSTACKTOP),即从 KSTACKTOP-KSTKSIZE 开始往上的 KSTKSIZE 大小的空间,可以看到内核具有读写权限,用户没有访问权限
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
	//////////////////////////////////////////////////////////////////////
	// Use the physical memory that 'bootstack' refers to as the kernel
	// stack.  The kernel stack grows down from virtual address KSTACKTOP.
	// We consider the entire range from [KSTACKTOP-PTSIZE, KSTACKTOP)
	// to be the kernel stack, but break this into two pieces:
	//     * [KSTACKTOP-KSTKSIZE, KSTACKTOP) -- backed by physical memory
	//     * [KSTACKTOP-PTSIZE, KSTACKTOP-KSTKSIZE) -- not backed; so if
	//       the kernel overflows its stack, it will fault rather than
	//       overwrite memory.  Known as a "guard page".
	//     Permissions: kernel RW, user NONE
	// Your code goes here:
	boot_map_region(kern_pgdir, KSTACKTOP-KSTKSIZE, KSTKSIZE, PADDR(bootstack), PTE_W);
  1. 所有的物理内存,即从 KERNBASE 开始的 32bit 能表示的最大地址 $2^{32} - 1$, 因此其尺寸恰好为 负数以2的补码表示下的 -KERNBASE ; 可以看到对于这部分空间,内核具有读写权限,用户没有权限。
1
2
3
4
5
6
7
8
9
	//////////////////////////////////////////////////////////////////////
	// Map all of physical memory at KERNBASE.
	// Ie.  the VA range [KERNBASE, 2^32) should map to
	//      the PA range [0, 2^32 - KERNBASE)
	// We might not have 2^32 - KERNBASE bytes of physical memory, but
	// we just set up the mapping anyway.
	// Permissions: kernel RW, user NONE
	// Your code goes here:
	boot_map_region(kern_pgdir, KERNBASE, -KERNBASE, 0, PTE_W);

Question

What entries (rows) in the page directory have been filled in at this point? What addresses do they map and where do they point? In other words, fill out this table as much as possible:

EntryBase Virtual AddressPoints to (logically):10230xffc00000Page table for top 4MB of phys memory10220xff800000?.??.??.??20x00800000?10x00400000?00x00000000[see next question]

和最上面给出的inc/memlayout.h中的内存布局一致,简单的线性映射即可。即 $$\text{Virtual address} = Entry \times {0X0040000}$$

We have placed the kernel and user environment in the same address space. Why will user programs not be able to read or write the kernel's memory? What specific mechanisms protect the kernel memory?

内核内存部分,PTE_U 无效,因此用户没有读写权限。

使用寻址区域限制,和权限类型检查来保护内核内存。

What is the maximum amount of physical memory that this operating system can support? Why?

UPAGES 大小最大为4MB,而每个PageInfo大小为8B,所以可以最多可以存储512K个PageInfo结构体,而每个PageInfo对应4KB内存,所以最多 512K*4K = 2G内存。

但这里使用两级表寻址内存页面。 一级页表是页面目录,最多可以处理第二级的1K页表;二级页表可以处理最多1K页面。 页面目录所寻址1M($2^{20}$)页面,而每个页面包含4K字节($2 ^{12}$)个字节,所以一个页面目录的表可以跨越80386的整个物理地址空间($2 ^{20}\times 2 ^{12} = 2 ^{32} Byte = 4 GB)$

How much space overhead is there for managing memory, if we actually had the maximum amount of physical memory? How is this overhead broken down?

2GB内存对应 512 个物理页,每个 PageInfo 结构占用 8 Byte ,共 4MB。页目录需要 512*8=4KB,而页表包括 512K 个页表项,共512K*4=2MB存储,所以额外消耗的内存为 6MB + 4KB

Revisit the page table setup in kern/entry.S and kern/entrypgdir.c. Immediately after we turn on paging, EIP is still a low number (a little over 1MB). At what point do we transition to running at an EIP above KERNBASE? What makes it possible for us to continue executing at a low EIP between when we enable paging and when we begin running at an EIP above KERNBASE? Why is this transition necessary?

kern/entry.S 中的 jmp *%eax 语句开始,系统便跳转到高地址运行。因为在 entry.S 中我们的CR3 加载的是 entry_pgdir,它将物理地址 [0, 4M)同时映射到了虚拟地址的 [0, 4M)[KERNBASE, KERNBASE+4M),所以能保证正常运行。

而新的kern_pgdir加载后,并没有映射低位的虚拟地址 [0, 4M),所以这一步跳转是必要的。

挑战

挑战1

Challenge! We consumed many physical pages to hold the page tables for the KERNBASE mapping. Do a more space-efficient job using the PTE_PS ("Page Size") bit in the page directory entries. This bit was not supported in the original 80386, but is supported on more recent x86 processors. You will therefore have to refer to Volume 3 of the current Intel manuals. Make sure you design the kernel to use this optimization only on processors that support it!

挑战二

参考链接:https://github.com/Clann24/jos/tree/master/lab2

Challenge! Extend the JOS kernel monitor with commands to:

  • Display in a useful and easy-to-read format all of the physical page mappings (or lack thereof) that apply to a particular range of virtual/linear addresses in the currently active address space. For example, you might enter showmappings 0x3000 0x5000 to display the physical page mappings and corresponding permission bits that apply to the pages at virtual addresses 0x3000, 0x4000, and 0x5000.
  • Explicitly set, clear, or change the permissions of any mapping in the current address space.
  • Dump the contents of a range of memory given either a virtual or physical address range. Be sure the dump code behaves correctly when the range extends across page boundaries!
  • Do anything else that you think might be useful later for debugging the kernel. (There's a good chance it will be!)

1.1 读取字符串转化为地址

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
uint32_t xtoi(char* buf) {
	uint32_t res = 0;
	buf += 2; //0x...
	while (*buf) { 
		if (*buf >= 'a') *buf = *buf-'a'+'0'+10;//aha
		res = res*16 + *buf - '0';
		++buf;
	}
	return res;
}

1.2 格式化打印 pte_t:

1
2
3
4
void pprint(pte_t *pte) {
	cprintf("PTE_P: %x, PTE_W: %x, PTE_U: %x\n", 
		*pte&PTE_P, *pte&PTE_W, *pte&PTE_U);
}

1.3 showmapping

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
int
showmappings(int argc, char **argv, struct Trapframe *tf)
{
	if (argc == 1) {
		cprintf("Usage: showmappings 0xbegin_addr 0xend_addr\n");
		return 0;
	}
	uint32_t begin = xtoi(argv[1]), end = xtoi(argv[2]);
	cprintf("begin: %x, end: %x\n", begin, end);
	for (; begin <= end; begin += PGSIZE) {
		pte_t *pte = pgdir_walk(kern_pgdir, (void *) begin, 1);	//create
		if (!pte) panic("boot_map_region panic, out of memory");
		if (*pte & PTE_P) {
			cprintf("page %x with ", begin);
			pprint(pte);
		} else cprintf("page not exist: %x\n", begin);
	}
	return 0;
}

2.1 setm

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
int setm(int argc, char **argv, struct Trapframe *tf) {
	if (argc == 1) {
		cprintf("Usage: setm 0xaddr [0|1 :clear or set] [P|W|U]\n");
		return 0;
	}
	uint32_t addr = xtoi(argv[1]);
	pte_t *pte = pgdir_walk(kern_pgdir, (void *)addr, 1);
	cprintf("%x before setm: ", addr);
	pprint(pte);
	uint32_t perm = 0;
	if (argv[3][0] == 'P') perm = PTE_P;
	if (argv[3][0] == 'W') perm = PTE_W;
	if (argv[3][0] == 'U') perm = PTE_U;
	if (argv[2][0] == '0') 	//clear
		*pte = *pte & ~perm;
	else 	//set
		*pte = *pte | perm;
	cprintf("%x after  setm: ", addr);
	pprint(pte);
	return 0;
}

4.1 showvm: 查看内存

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
int showvm(int argc, char **argv, struct Trapframe *tf) {
	if (argc == 1) {
		cprintf("Usage: showvm 0xaddr 0xn\n");
		return 0;
	}
	void** addr = (void**) xtoi(argv[1]);
	uint32_t n = xtoi(argv[2]);
	int i;
	for (i = 0; i < n; ++i)
		cprintf("VM at %x is %x\n", addr+i, addr[i]);
	return 0;
}
updatedupdated2023-01-302023-01-30
点击刷新