給 Pi 使用的 3rd Bootloader,這個 Bootloader 的功能是能夠執行 loadimg
這個指令,在電腦端透過 UART 直接將 image 傳到實體機上,免去插拔記憶卡的困擾。
- Waiting power supply to settle to nominal state
- Fetch BIOS from ROM
BIOS:
- Power-On Self-Test
- Load boot configuration (e.g. boot order)
- Load the Stage 1 bootloader
Stage 1 Bootloader:
- Read MBR on disk
- Setup a stack
- Load the Stage 2 bootloader
Stage 2 bootloader:
- Read configuration file to startup a boot selection menu (e.g. GRUB)
- Load kernel
Ref: x86 Initial Boot Sequence
Linux 的啟動及初始化包含以下步驟:
- BIOS
- GRUB2 Stage 1 boot loader
- GRUB2 Stage 1.5 boot loader
- GRUB2 Stage 2 boot loader
- Kernel
- Init (systemd), the parent of all processes
Ref: 桌機 Linux 的開機流程 (BIOS +GRUB2)
因為 Bootloader 有可能在載入 image 的過程就把自己給蓋掉了,所以在載入 image 前要先將自己搬到不會被蓋過去的地方,再進行載入。
Spec 有一條是指定說可以載入到記憶體的任意位置,所以理論上應該是在準備載入前,要拿到欲載入 image 的大小,算 offset 看是要把自己搬到前面或是後面,但我這邊偷懶,在寫完 3rd Bootloader 後確定 size 不會超過 16KB 後,一開場就把自己搬家到記憶體最後面 0x3B3FC000
(VC Core Base 位址: 0x3B400000
- 16KB)。
P.S 幫我 DEMO 的助教人很好算我對 XD
linker script 把搬家的程式放在 .text.relocate
section 裡。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
| SECTIONS
{
. = 0x80000;
.relocate :
{
KEEP(*(.text.relocate))
}
. = ALIGN(4096);
_begin = .;
.text :
{
KEEP(*(.text.boot))
*(.text)
}
. = ALIGN(4096);
.data :
{
*(.data)
}
. = ALIGN(4096);
.bss (NOLOAD) :
{
__bss_start = .;
*(.bss)
__bss_end = .;
}
_end = .;
}
__bss_size = (__bss_end - __bss_start) >> 3;
__boot_loader = 0x3B3FC000;
|
一開機進行初始化,然後跳到 relocate.c 裡面的 relocate
function。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
| .section ".text.relocate"
.global _relocate
_relocate:
// get cpu id
mrs x1, MPIDR_EL1
and x1, x1, #3
cbz x1, 2f
// if cpu_id > 0, stop
1:
wfe
b 1b
// if cpu_id == 0
2:
// set stack pointer
ldr x1, =__boot_loader
mov sp, x1
// clear bss
ldr x1, =__bss_start
ldr x2, =__bss_size
3: cbz x2, 4f
str xzr, [x1], #8
sub x2, x2, #1
cbnz x2, 3b
4: bl relocate
.section ".text.boot"
.global _start
_start:
// jump to main function in C
bl main
// halt this core if return
1:
wfe
b 1b
|
把從 linker script 裡面定義的 _begin
到 _end
的內容複製到 __boot_loader
後,然後 branch 到 __boot_loader
這個記憶體位置上。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
| extern unsigned char _begin, _end, __boot_loader;
__attribute__((section(".text.relocate"))) void relocate() {
unsigned long kernel_size = (&_end - &_begin);
unsigned char *new_bl = (unsigned char *)&__boot_loader;
unsigned char *bl = (unsigned char *)&_begin;
while (kernel_size--) {
*new_bl++ = *bl;
*bl++ = 0;
}
void (*start)(void) = (void *)&__boot_loader;
start();
}
|
main.c
一樣跑一個 shell 當讀到 loadimg
指令時來載入 image。
一開始讀要寫到哪個記憶體位址,接著就看要怎麼和外面的 script 搭配,我自己是設計成一開始傳一個大小還有一個 image 的 checksum,傳輸如果成功就會跳到指定的位置。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
| void loadimg() {
long long address = address_input();
if (address == -1) {
return;
}
uart_printf("Send image via UART now!\n");
// big endian
int img_size = 0, i;
for (i = 0; i < 4; i++) {
img_size <<= 8;
img_size |= (int)uart_read_raw();
}
// big endian
int img_checksum = 0;
for (i = 0; i < 4; i++) {
img_checksum <<= 8;
img_checksum |= (int)uart_read_raw();
}
char *kernel = (char *)address;
for (i = 0; i < img_size; i++) {
char b = uart_read_raw();
*(kernel + i) = b;
img_checksum -= (int)b;
}
if (img_checksum != 0) {
uart_printf("Failed!");
}
else {
void (*start_os)(void) = (void *)kernel;
start_os();
}
}
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
| import argparse
import serial
import os
import sys
import numpy as np
parser = argparse.ArgumentParser()
parser.add_argument("image")
parser.add_argument("tty")
args = parser.parse_args()
def checksum(bytecodes):
# convert bytes to int
return int(np.array(list(bytecodes), dtype=np.int32).sum())
def main():
try:
ser = serial.Serial(args.tty, 115200)
except:
print("Serial init failed!")
exit(1)
file_path = args.image
file_size = os.stat(file_path).st_size
with open(file_path, 'rb') as f:
bytecodes = f.read()
file_checksum = checksum(bytecodes)
ser.write(file_size.to_bytes(4, byteorder="big"))
ser.write(file_checksum.to_bytes(4, byteorder="big"))
print(f"Image Size: {file_size}, Checksum: {file_checksum}")
per_chunk = 128
chunk_count = file_size // per_chunk
chunk_count = chunk_count + 1 if file_size % per_chunk else chunk_count
for i in range(chunk_count):
sys.stdout.write('\r')
sys.stdout.write("%d/%d" % (i + 1, chunk_count))
sys.stdout.flush()
ser.write(bytecodes[i * per_chunk: (i+1) * per_chunk])
while not ser.writable():
pass
if __name__ == "__main__":
main()
|
Calculate how long will it take for loading a 10MB kernel image by UART if baud rate is 115200.
$$
(\frac{1}{115200} \times 10) \times 10 \times 1024 \times 1024 = 910.22 \text{(s)}
$$