System Call

前面在 exception 的實驗有實作簡單的 system call,現在要擴充 system call 的功能。

Trapframe

還記得前面的 kernel_entry 會將目前的 context push 進 kernel stack:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
    .macro kernel_entry
    sub sp, sp, #272
    stp x0, x1, [sp, #16 * 0]
    stp x2, x3, [sp, #16 * 1]
    stp x4, x5, [sp, #16 * 2]
    stp x6, x7, [sp, #16 * 3]
    stp x8, x9, [sp, #16 * 4]
    stp x10, x11, [sp, #16 * 5]
    stp x12, x13, [sp, #16 * 6]
    stp x14, x15, [sp, #16 * 7]
    stp x16, x17, [sp, #16 * 8]
    stp x18, x19, [sp, #16 * 9]
    stp x20, x21, [sp, #16 * 10]
    stp x22, x23, [sp, #16 * 11]
    stp x24, x25, [sp, #16 * 12]
    stp x26, x27, [sp, #16 * 13]
    stp x28, x29, [sp, #16 * 14]

    mrs x21, sp_el0
    stp x30, x21, [sp, #16 * 15]

    mrs x21, elr_el1
    mrs x22, spsr_el1
    stp x21, x22, [sp, #16 * 16]
    .endm

我們把這段記憶體叫做 Trapframe

|           .            |
|------------------------| --+
| task 1 saved registers |   |-- trapframe
|------------------------| --+
|           .            |
|           .            |
+------------------------+ <- task 1 kenrel stack

Trapframe 是 user space 與 kernel space 溝通的橋樑:

  • user 可以設定參數與 system call number,當進入 kernel 後就可以透過 trapframe 存取
  • kernel 可以修改 trapframe,當作 system call 的回傳值

exception.h

首先先定義一下 trapframe 的 structure,這部分跟你 push 進 stack 的順序有關,我是這樣定的:

1
2
3
4
5
6
struct trapframe {
    uint64_t x[31]; // general register from x0 ~ x30
    uint64_t sp_el0;
    uint64_t elr_el1;
    uint64_t spsr_el1;
};

User library API

將 system call 為不同 architechture / os 包裝一層 API。參考 ARM 的 calling convention,我將 system call number 放在 x8

sys.h

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#ifndef __SYS_H__
#define __SYS_H__

#define SYS_GET_TASK_ID     0
#define SYS_UART_READ       1
#define SYS_UART_WRITE      2
#define SYS_EXEC            3
#define SYS_FORK            4
#define SYS_EXIT            5

#endif

#ifndef __ASSEMBLY__

#include "typedef.h"

/* Function in sys.S */
extern uint64_t get_taskid();
extern uint32_t uart_read(char buf[], uint32_t size);
extern uint32_t uart_write(const char buf[], uint32_t size);
extern int exec(void(*func)());
extern int fork();
extern void exit(int status);

#endif 

sys.S

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#define __ASSEMBLY__
#include "sys.h"

.global get_taskid
get_taskid:
    mov x8, SYS_GET_TASK_ID
    svc #0
    ret

.global uart_read
uart_read:
    mov x8, SYS_UART_READ
    svc #0
    ret

.global uart_write
uart_write:
    mov x8, SYS_UART_WRITE
    svc #0
    ret

.global exec
exec:
    mov x8, SYS_EXEC
    svc #0
    ret

.global fork
fork:
    mov x8, SYS_FORK
    svc #0
    ret

.global exit
exit:
    mov x8, SYS_EXIT
    svc #0

Synchronous Exception Handler

如果是 svc 處發的 synchronous exception,就當作是 system call。將 system call number 從 trapframe 取出,並呼叫 sys_call_router

exception.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
void sync_exc_router(unsigned long esr, unsigned long elr, struct trapframe* trapframe) {
    int ec = (esr >> 26) & 0b111111;
    int iss = esr & 0x1FFFFFF;
    if (ec == 0b010101) {  // system call
        uint64_t syscall_num = trapframe->x[8];
        sys_call_router(syscall_num, trapframe);
    }
    else {
        uart_printf("Exception return address 0x%x\n", elr);
        uart_printf("Exception class (EC) 0x%x\n", ec);
        uart_printf("Instruction specific syndrome (ISS) 0x%x\n", iss);
    }
}

Interface for System Call

根據 system call number 分配不同的 handler。

exception.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
void sys_call_router(uint64_t sys_call_num, struct trapframe* trapframe) {
    switch (sys_call_num) {
        case SYS_GET_TASK_ID:
            sys_get_task_id(trapframe);
            break;

        case SYS_UART_READ:
            sys_uart_read(trapframe);
            break;

        case SYS_UART_WRITE:
            sys_uart_write(trapframe);
            break;

        case SYS_EXEC:
            sys_exec(trapframe);
            break;

        case SYS_FORK:
            sys_fork(trapframe);
            break;

        case SYS_EXIT:
            sys_exit(trapframe);
            break;
    }
}

System call handler

將 return value 放在 x0

get_taskid

拿到目前的 task id。

exception.c

1
2
3
4
void sys_get_task_id(struct trapframe* trapframe) {
    uint64_t task_id = get_current_task()->id;
    trapframe->x[0] = task_id;
}

uart_read

從 user space 傳進一個 char 指標與希望讀的大小。

exception.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
void sys_uart_read(struct trapframe* trapframe) {
    char* buf = (char*) trapframe->x[0];
    uint32_t size = trapframe->x[1];

    irq_enable();
    for (uint32_t i = 0; i < size; i++) {
        buf[i] = uart0_read();
    }
    buf[size] = '\0';
    irq_disable();
    trapframe->x[0] = size;
}

uart_write

將 user space 傳進來的 char 陣列寫入 size 個 char。

exception.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
void sys_uart_write(struct trapframe* trapframe) {
    const char* buf = (char*) trapframe->x[0];
    uint32_t size = trapframe->x[1];

    irq_enable();
    for (uint32_t i = 0; i < size; i++) {
        uart0_write(buf[i]);
    }
    irq_disable();
    trapframe->x[0] = size;
}

exec

將傳進來的 function pointer 當作 do_exec 的參數。

exception.c

1
2
3
4
5
void sys_exec(struct trapframe* trapframe) {
    void (*func)() = (void(*)()) trapframe->x[0];
    do_exec(func);
    trapframe->x[0] = 0;
}

fork

將目前 task 的狀態複製一份給 child,parent 回傳 child task id,child 回傳 0。

exception.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
void sys_fork(struct trapframe* trapframe) {
    struct task_t* parent_task = get_current_task();

    int child_id = privilege_task_create(return_from_fork, parent_task->priority);
    struct task_t* child_task = &task_pool[child_id];

    char* child_kstack = &kstack_pool[child_task->id][KSTACK_TOP_IDX];
    char* parent_kstack = &kstack_pool[parent_task->id][KSTACK_TOP_IDX];
    char* child_ustack = &ustack_pool[child_task->id][USTACK_TOP_IDX];
    char* parent_ustack = &ustack_pool[parent_task->id][USTACK_TOP_IDX];

    uint64_t kstack_offset = parent_kstack - (char*)trapframe;
    uint64_t ustack_offset = parent_ustack - (char*)trapframe->sp_el0;

    for (uint64_t i = 0; i < kstack_offset; i++) {
        *(child_kstack - i) = *(parent_kstack - i);
    }
    for (uint64_t i = 0; i < ustack_offset; i++) {
        *(child_ustack - i) = *(parent_ustack - i);
    }

    // place child's kernel stack to right place
    child_task->cpu_context.sp = (uint64_t)child_kstack - kstack_offset;

    // place child's user stack to right place
    struct trapframe* child_trapframe = (struct trapframe*) child_task->cpu_context.sp;
    child_trapframe->sp_el0 = (uint64_t)child_ustack - ustack_offset;

    child_trapframe->x[0] = 0;
    trapframe->x[0] = child_task->id;
}

offset 示意圖:

|          .           |
|----------------------| <- char* trapframe (low)
| task saved registers |
|----------------------|
|          .           |
|          .           |
+----------------------+ <- char* parent_kstack (high)

exit

exit 時將 task 狀態改成 zombie,並 reschedule。

exception.c

1
2
3
void sys_exit(struct trapframe* trapframe) {
    do_exit(trapframe->x[0]);
}

schedule.c

1
2
3
4
5
6
7
8
void do_exit(int status) {
    struct task_t* current = get_current_task();
    current->state = ZOMBIE;
    current->exit_status = status;

    // WARNING: release user stack if dynamic allocation
    schedule();
}

Zombie reaper

用來檢查如果有 task 是 zombie state,就將他的記憶體釋放,並將狀態改成 EXIT

最好不要讓 EXIT process 自己釋放記憶體

實作方式:

  • A privilege_task that always check if there are zombie tasks. (Easier)
  • Each task has its parent task, the parent should use wait function to check and reap zombie child task.

我是使用簡單版的方式

schedule.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
void zombie_reaper() {
    while (1) {
        for (int i = 0; i < TASK_POOL_SIZE; i++) {
            if (task_pool[i].state == ZOMBIE) {
                uart_printf("reaper %d!\n", i);
                task_pool[i].state = EXIT;
                // WARNING: release kernel stack if dynamic allocation
            }
        }
        schedule();
    }
}
comments powered by Disqus