3rd Bootloader

給 Pi 使用的 3rd Bootloader,這個 Bootloader 的功能是能夠執行 loadimg 這個指令,在電腦端透過 UART 直接將 image 傳到實體機上,免去插拔記憶卡的困擾。

先備知識

x86 開機流程

  1. Waiting power supply to settle to nominal state
  2. Fetch BIOS from ROM

BIOS:

  1. Power-On Self-Test
  2. Load boot configuration (e.g. boot order)
  3. Load the Stage 1 bootloader

Stage 1 Bootloader:

  1. Read MBR on disk
  2. Setup a stack
  3. Load the Stage 2 bootloader

Stage 2 bootloader:

  1. Read configuration file to startup a boot selection menu (e.g. GRUB)
  2. Load kernel

Ref: x86 Initial Boot Sequence

Linux

Linux 的啟動及初始化包含以下步驟:

  • BIOS
  • GRUB2 Stage 1 boot loader
  • GRUB2 Stage 1.5 boot loader
  • GRUB2 Stage 2 boot loader
  • Kernel
  • Init (systemd), the parent of all processes

Ref: 桌機 Linux 的開機流程 (BIOS +GRUB2)

搬家

因為 Bootloader 有可能在載入 image 的過程就把自己給蓋掉了,所以在載入 image 前要先將自己搬到不會被蓋過去的地方,再進行載入。

Spec 有一條是指定說可以載入到記憶體的任意位置,所以理論上應該是在準備載入前,要拿到欲載入 image 的大小,算 offset 看是要把自己搬到前面或是後面,但我這邊偷懶,在寫完 3rd Bootloader 後確定 size 不會超過 16KB 後,一開場就把自己搬家到記憶體最後面 0x3B3FC000 (VC Core Base 位址: 0x3B400000 - 16KB)。

P.S 幫我 DEMO 的助教人很好算我對 XD

linker.ld

linker script 把搬家的程式放在 .text.relocate section 裡。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
SECTIONS
{
  . = 0x80000;
  .relocate :
  {
    KEEP(*(.text.relocate))
  }

  . = ALIGN(4096);
  _begin = .;
  .text :
  {
    KEEP(*(.text.boot))
    *(.text)
  }

  . = ALIGN(4096);
  .data :
  {
    *(.data)
  }

  . = ALIGN(4096);
  .bss (NOLOAD) :
  {
    __bss_start = .;

    *(.bss)

    __bss_end = .;
  }
  _end = .;
}

__bss_size = (__bss_end - __bss_start) >> 3;
__boot_loader = 0x3B3FC000;

start.s

一開機進行初始化,然後跳到 relocate.c 裡面的 relocate function。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
.section ".text.relocate"

.global _relocate

_relocate:
    // get cpu id
    mrs     x1, MPIDR_EL1
    and     x1, x1, #3
    cbz     x1, 2f
    // if cpu_id > 0, stop
1:
    wfe
    b       1b
    // if cpu_id == 0
2:
    // set stack pointer
    ldr     x1, =__boot_loader
    mov     sp, x1

    // clear bss
    ldr     x1, =__bss_start
    ldr     x2, =__bss_size
3:  cbz     x2, 4f
    str     xzr, [x1], #8
    sub     x2, x2, #1
    cbnz    x2, 3b

4:  bl      relocate


.section ".text.boot"

.global _start

_start:
    // jump to main function in C
    bl      main
    // halt this core if return
1:
    wfe
    b       1b

relocate.c

把從 linker script 裡面定義的 _begin_end 的內容複製到 __boot_loader 後,然後 branch 到 __boot_loader 這個記憶體位置上。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
extern unsigned char _begin, _end, __boot_loader;

__attribute__((section(".text.relocate"))) void relocate() {
    unsigned long kernel_size = (&_end - &_begin);
    unsigned char *new_bl = (unsigned char *)&__boot_loader;
    unsigned char *bl = (unsigned char *)&_begin;

    while (kernel_size--) {
        *new_bl++ = *bl;
        *bl++ = 0;
    }

    void (*start)(void) = (void *)&__boot_loader;
    start();
}

載入 Image

main.c

一樣跑一個 shell 當讀到 loadimg 指令時來載入 image。

loadimg.c

一開始讀要寫到哪個記憶體位址,接著就看要怎麼和外面的 script 搭配,我自己是設計成一開始傳一個大小還有一個 image 的 checksum,傳輸如果成功就會跳到指定的位置。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
void loadimg() {
    long long address = address_input();

    if (address == -1) {
        return;
    }

    uart_printf("Send image via UART now!\n");

    // big endian
    int img_size = 0, i;
    for (i = 0; i < 4; i++) {
        img_size <<= 8;
        img_size |= (int)uart_read_raw();
    }

    // big endian
    int img_checksum = 0;
    for (i = 0; i < 4; i++) {
        img_checksum <<= 8;
        img_checksum |= (int)uart_read_raw();
    }

    char *kernel = (char *)address;

    for (i = 0; i < img_size; i++) {
        char b = uart_read_raw();
        *(kernel + i) = b;
        img_checksum -= (int)b;
    }

    if (img_checksum != 0) {
        uart_printf("Failed!");
    }
    else {
        void (*start_os)(void) = (void *)kernel;
        start_os();
    }
}

sendimg.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import argparse
import serial
import os
import sys
import numpy as np

parser = argparse.ArgumentParser()
parser.add_argument("image")
parser.add_argument("tty")
args = parser.parse_args()


def checksum(bytecodes):
    # convert bytes to int
    return int(np.array(list(bytecodes), dtype=np.int32).sum())


def main():
    try:
        ser = serial.Serial(args.tty, 115200)
    except:
        print("Serial init failed!")
        exit(1)

    file_path = args.image
    file_size = os.stat(file_path).st_size

    with open(file_path, 'rb') as f:
        bytecodes = f.read()

    file_checksum = checksum(bytecodes)

    ser.write(file_size.to_bytes(4, byteorder="big"))
    ser.write(file_checksum.to_bytes(4, byteorder="big"))

    print(f"Image Size: {file_size}, Checksum: {file_checksum}")

    per_chunk = 128
    chunk_count = file_size // per_chunk
    chunk_count = chunk_count + 1 if file_size % per_chunk else chunk_count

    for i in range(chunk_count):
        sys.stdout.write('\r')
        sys.stdout.write("%d/%d" % (i + 1, chunk_count))
        sys.stdout.flush()
        ser.write(bytecodes[i * per_chunk: (i+1) * per_chunk])
        while not ser.writable():
            pass


if __name__ == "__main__":
    main()

載入速度估算

Calculate how long will it take for loading a 10MB kernel image by UART if baud rate is 115200.

$$ (\frac{1}{115200} \times 10) \times 10 \times 1024 \times 1024 = 910.22 \text{(s)} $$

comments powered by Disqus