Magenta源代码笔记(2) —— 线程调度器以及内核定时器

来源：互联网发布：淘宝服务助手怎么关闭编辑：程序博客网时间：2024/06/08 13:07

线程（Thread）是被内核调度的执行单位。线程被创建后，需要为其指定内存地址空间，它在运行的过程中，可以改变这个指定的内存地址空间。一段内存地址空间，可以被0个、一个或多个线程指定，这些线程没有主线程、原始线程之类的概念。

Magenta中，一个象Linux这样的传统的操作系统进程的概念被分解为：地址空间、虚实映射机制、程序及原始数据（装到内存里的那些用户自己写的逻辑）、执行线索等等。

Magenta的每个线程对应内存中的一个线程控制块（TCB，thread control block）对象，没有内核线程这样的说法，也就是说，没有一个线程是只工作在内核态中的，除非它是一个哑（idle）线程，所有系统调用都是工作在调用它的那个线程的用户态空间中的。

转自裴喜龙老师写的Magenta内核的介绍：http://weibo.com/xilongpei

Magenta内核中有关线程的代码主要在kernel/kernel/thread.c文件中，下面是阅读该文件的笔记。

Magenta内核中线程的生命周期

Magenta中线程状态共有六个，分别是

kernel/include/kernel/thread.h

enum thread_state {    THREAD_SUSPENDED = 0,// 悬挂，线程的初始状态    THREAD_READY,// 就绪，线程被放置入就绪列表，可以被调度器调用    THREAD_RUNNING,// 运行，当前的线程    THREAD_BLOCKED,// 阻塞，进入某一内核对象的等待队列    THREAD_SLEEPING,// 睡眠，调用sleep函数后    THREAD_DEATH,// 死亡，线程退出后};

线程状态切换的方法如下图：

下面分析一下Magenta内核中的任务调度器

Magenta内核中任务调度器有如下的特点：

可变优先级调度（默认为32个优先级）
时间片轮转（非real time任务、时间片默认为50ms）
抢占式（非real time任务）、非抢占（real time任务）

优先级设置相关

kernel/include/kernel/thread.h

#define NUM_PRIORITIES 32#define LOWEST_PRIORITY 0#define HIGHEST_PRIORITY (NUM_PRIORITIES - 1)#define DPC_PRIORITY (NUM_PRIORITIES - 2)#define IDLE_PRIORITY LOWEST_PRIORITY#define LOW_PRIORITY (NUM_PRIORITIES / 4)#define DEFAULT_PRIORITY (NUM_PRIORITIES / 2)#define HIGH_PRIORITY ((NUM_PRIORITIES / 4) * 3)...void thread_set_priority(int priority); //优先级调整

调度算法实现（非SMP架构情况下）

Magenta中使用的是类似位图的调度算法，结构图如下：

kernel/kernel/thread.c

/* the run queue */static struct list_node run_queue[NUM_PRIORITIES];// 全局队列数组static uint32_t run_queue_bitmap;// 32位整形保存每个队列的状态...// 该函数寻找出就绪队列中优先级最高的线程// 非SMP架构情况下static thread_t *get_top_thread(int cpu) {    thread_t *newthread;    uint32_t local_run_queue_bitmap = run_queue_bitmap;    while (local_run_queue_bitmap) {        /* find the first (remaining) queue with a thread in it */        // 找出存在就绪线程的最高优先级，不像其他某些调度器使用数组来保存对应关系        // 这里使用了编译器内置的__builtin_clz函数，以下摘录自gcc官网        // — Built-in Function: int __builtin_clz (unsigned int x)        //    Returns the number of leading 0-bits in x, starting at the most significant bit position. If x is 0, the result is undefined.        // 该函数是计算整型开头的0的数量        // 通过计算可以获取到值为1的最高位，也就是最高的优先级        uint next_queue = HIGHEST_PRIORITY - __builtin_clz(local_run_queue_bitmap)                          - (sizeof(run_queue_bitmap) * 8 - NUM_PRIORITIES);        list_for_every_entry(&run_queue[next_queue], newthread, thread_t, queue_node) {            // 从就绪列表中移除发现的新线程            list_delete(&newthread->queue_node);            // 若改就绪列表为空，将优先级位图对应位置0            if (list_is_empty(&run_queue[next_queue]))                run_queue_bitmap &= ~(1<<next_queue);            return newthread;        }        local_run_queue_bitmap &= ~(1<<next_queue);    }    /* no threads to run, select the idle thread for this cpu */    // 若没有就绪线程，切换至idle线程    return idle_thread(cpu);}//该函数为任务调度方法void thread_resched(void){    thread_t *oldthread;    thread_t *newthread;    thread_t *current_thread = get_current_thread();    uint cpu = arch_curr_cpu_num();    DEBUG_ASSERT(arch_ints_disabled());    DEBUG_ASSERT(spin_lock_held(&thread_lock));    DEBUG_ASSERT(current_thread->state != THREAD_RUNNING);    THREAD_STATS_INC(reschedules);    // 获取最高优先级的就绪线程    newthread = get_top_thread(cpu);    DEBUG_ASSERT(newthread);    newthread->state = THREAD_RUNNING;    oldthread = current_thread;    if (newthread == oldthread)        return;    lk_bigtime_t now = current_time_hires();    oldthread->runtime_us += now - oldthread->last_started_running_us;    newthread->last_started_running_us = now;    /* set up quantum for the new thread if it was consumed */    // 若新线程时间片已经消耗完，设置新的时间片    if (newthread->remaining_quantum <= 0) {        newthread->remaining_quantum = 5; // XXX make this smarter    }    /* mark the cpu ownership of the threads */    thread_set_curr_cpu(oldthread, -1);    thread_set_curr_cpu(newthread, cpu);    KEVLOG_THREAD_SWITCH(oldthread, newthread);#if PLATFORM_HAS_DYNAMIC_TIMER    if (thread_is_real_time_or_idle(newthread)) {        if (!thread_is_real_time_or_idle(oldthread)) {            /* if we're switching from a non real time to a real time, cancel             * the preemption timer. */            // 若从非realtime线程切换至realtime线程，关闭时间片轮转定时器            // 切换至realtime线程后，无法通过时间片轮转进行调度，只能等待当前realtime线程退出时重新进行调度            timer_cancel(&preempt_timer[cpu]);        }    } else if (thread_is_real_time_or_idle(oldthread)) {        /* if we're switching from a real time (or idle thread) to a regular one,         * set up a periodic timer to run our preemption tick. */        // 若从realtime线程切换至非realtime线程，打开时间片轮转定时器        // 定时器10ms溢出，时间片为 10ms * 5 = 50ms        timer_set_periodic(&preempt_timer[cpu], 10, (timer_callback)thread_timer_tick, NULL);    }#endif    /* set some optional target debug leds */    target_set_debug_led(0, !thread_is_idle(newthread));    /* do the switch */    // 将当前线程切换至新线程    set_current_thread(newthread);#if WITH_KERNEL_VM    /* see if we need to swap mmu context */    // mmu上下文切换，内存管理相关    if (newthread->aspace != oldthread->aspace) {        vmm_context_switch(oldthread->aspace, newthread->aspace);    }#endif    /* do the low level context switch */    // 底层上下文切换，寄存器暂存    // 由arch文件夹中不同架构不同的代码实现    arch_context_switch(oldthread, newthread);}

线程调度器中时间片轮转涉及到了内核中定时器的使用。

Magenta为每个CPU核心分别分配了一个内核定时器（SMP架构下）。

定时器在线程系统初始化中完成初始化，此函数是在kernel/top/main.c中的lk_main函数中新建的初始化线程中进行调用。

kernel/kernel/thread.c

void thread_init(void){#if PLATFORM_HAS_DYNAMIC_TIMER    for (uint i = 0; i < SMP_MAX_CPUS; i++) {        timer_initialize(&preempt_timer[i]);    }#endif}

该定时器在调度函数中进行开启和关闭

kernel/kernel/thread.c

void thread_resched(void){...#if PLATFORM_HAS_DYNAMIC_TIMER    if (thread_is_real_time_or_idle(newthread)) {        if (!thread_is_real_time_or_idle(oldthread)) {            /* if we're switching from a non real time to a real time, cancel             * the preemption timer. */#if DEBUG_THREAD_CONTEXT_SWITCH            dprintf(ALWAYS, "arch_context_switch: stop preempt, cpu %d, old %p (%s), new %p (%s)\n",                    cpu, oldthread, oldthread->name, newthread, newthread->name);#endif            timer_cancel(&preempt_timer[cpu]);        }    } else if (thread_is_real_time_or_idle(oldthread)) {        /* if we're switching from a real time (or idle thread) to a regular one,         * set up a periodic timer to run our preemption tick. */#if DEBUG_THREAD_CONTEXT_SWITCH        dprintf(ALWAYS, "arch_context_switch: start preempt, cpu %d, old %p (%s), new %p (%s)\n",                cpu, oldthread, oldthread->name, newthread, newthread->name);#endif        timer_set_periodic(&preempt_timer[cpu], 10, (timer_callback)thread_timer_tick, NULL);    }#endif...}

timer_cancel为关闭定时器。

timer_set_periodic，设置定时器为周期性溢出，同时传入溢出时的回调函数。

kernel/kernel/thread.c

// 调度器定时器溢出回调函数enum handler_return thread_timer_tick(void){    // 获取当前线程    thread_t *current_thread = get_current_thread();    // 若为realtime或idle线程，通知上层不进行调度    if (thread_is_real_time_or_idle(current_thread))        return INT_NO_RESCHEDULE;    // 标记消耗的时间片    current_thread->remaining_quantum--;    // 若时间片已经消耗完了，通知上层进行调度    if (current_thread->remaining_quantum <= 0) {        return INT_RESCHEDULE;    } else {        return INT_NO_RESCHEDULE;    }}

定时器的实现

定时器的实现位于kernel/kernel/timer.c，实现的方法也是比较常用的方法，比较好理解。

以下代码的分析均在宏PLATFORM_HAS_DYNAMIC_TIMER已经定义的情况下，该宏指的是当前平台的硬件定时器的溢出的时间可以在运行过程中进行修改。

定时器结构体：

kernel/include/kernel/timer.h

typedef struct timer {    int magic;    struct list_node node;// 节点    lk_time_t scheduled_time;// 下次调度时间    lk_time_t periodic_time;// 周期性事件    timer_callback callback;// 回调    void *arg;// 回调参数} timer_t;

代码中针对每一个CPU核心（SMP架构下为多个）维护着一个内核定时器的优先队列，其顺序由调度时间的升序进行排列

kernel/include/kernel/timer.c

// 每个cpu具有的定时器结构struct timer_state {    struct list_node timer_queue;} __CPU_ALIGN;static struct timer_state timers[SMP_MAX_CPUS];...// 该函数将定时器插入至对应cpu的定时器优先队列static void insert_timer_in_queue(uint cpu, timer_t *timer){    timer_t *entry;    DEBUG_ASSERT(arch_ints_disabled());    LTRACEF("timer %p, cpu %u, scheduled %u, periodic %u\n", timer, cpu, timer->scheduled_time, timer->periodic_time);    list_for_every_entry(&timers[cpu].timer_queue, entry, timer_t, node) {        // 若找到原队列中某个定时器的时间大于插入的定时器的时间，则将需要插入的定时器插入至这个定时器的前面        if (TIME_GT(entry->scheduled_time, timer->scheduled_time)) {            list_add_before(&entry->node, &timer->node);            return;        }    }    /* walked off the end of the list */    // 遍历完成没有满足条件，直接插入到队列尾端    list_add_tail(&timers[cpu].timer_queue, &timer->node);}...

定时器实质是由cpu内的硬件定时器实现，在定时器的几个控制函数，可以清楚地看到上述的优先队列的操作以及底层硬件定时器的操作。

timer_set类函数

kernel/kernel/timer.c

void timer_set_oneshot(timer_t *timer, lk_time_t delay, timer_callback callback, void *arg){    if (delay == 0)        delay = 1;    timer_set(timer, delay, 0, callback, arg);}void timer_set_periodic(timer_t *timer, lk_time_t period, timer_callback callback, void *arg){    if (period == 0)        period = 1;    timer_set(timer, period, period, callback, arg);}

timer_set_oneshot：设定单发的定时器，该定时器仅会触发一次

timer_set_periodic：设定周期性定时器，该定时器会按照传入时间参数周期性的溢出

可以看到实质上定时器的实现函数为timer_set

kernel/kernel/timer.c

static void timer_set(timer_t *timer, lk_time_t delay, lk_time_t period, timer_callback callback, void *arg){    lk_time_t now;    LTRACEF("timer %p, delay %u, period %u, callback %p, arg %p\n", timer, delay, period, callback, arg);    DEBUG_ASSERT(timer->magic == TIMER_MAGIC);    if (list_in_list(&timer->node)) {        panic("timer %p already in list\n", timer);    }    /* Bump the delay, since we're probably straddling a millisecond */    delay += 1;    // 获取当前时间    now = current_time();    // 根据参数设置调度时间    timer->scheduled_time = now + delay;    // 设置周期时间    timer->periodic_time = period;    // 设置回调函数    timer->callback = callback;    // 设置回调参数    timer->arg = arg;    LTRACEF("scheduled time %u\n", timer->scheduled_time);    // 上锁并保存中断    spin_lock_saved_state_t state;    spin_lock_irqsave(&timer_lock, state);    // 将定时器插入至队列中    uint cpu = arch_curr_cpu_num();    insert_timer_in_queue(cpu, timer);#if PLATFORM_HAS_DYNAMIC_TIMER    // 若队列的头为新插入的定时器，则调整硬件定时器的溢出时间    if (list_peek_head_type(&timers[cpu].timer_queue, timer_t, node) == timer) {        /* we just modified the head of the timer queue */        LTRACEF("setting new timer for %u msecs\n", delay);        // 该函数为底层硬件定时器的设置函数，分别由不同的platform实现        // timer_tick即为传入底层的回调函数，该函数实质在定时器中断例程进行执行        platform_set_oneshot_timer(timer_tick, NULL, delay);    }#endif    // 释放锁以及还原中断状态    spin_unlock_irqrestore(&timer_lock, state);}

上述代码中的timer_tick即为传入底层的回调函数，该函数实质在定时器中断例程进行执行。

kernel/kernel/timer.c

static enum handler_return timer_tick(void *arg, lk_time_t now){    timer_t *timer;    enum handler_return ret = INT_NO_RESCHEDULE;    DEBUG_ASSERT(arch_ints_disabled());    THREAD_STATS_INC(timer_ints);//  KEVLOG_TIMER_TICK(); // enable only if necessary    uint cpu = arch_curr_cpu_num();    LTRACEF("cpu %u now %u, sp %p\n", cpu, now, __GET_FRAME());    // 队列操作上锁    spin_lock(&timer_lock);    for (;;) {        /* see if there's an event to process */        // 获取当前触发的定时器        timer = list_peek_head_type(&timers[cpu].timer_queue, timer_t, node);        if (likely(timer == 0))            break;        LTRACEF("next item on timer queue %p at %u now %u (%p, arg %p)\n", timer, timer->scheduled_time, now, timer->callback, timer->arg);        if (likely(TIME_LT(now, timer->scheduled_time)))            break;        /* process it */        LTRACEF("timer %p\n", timer);        DEBUG_ASSERT(timer && timer->magic == TIMER_MAGIC);        // 将当前定时器从队列中移除        list_delete(&timer->node);        /* we pulled it off the list, release the list lock to handle it */        // 移除后可以释放锁        spin_unlock(&timer_lock);        LTRACEF("dequeued timer %p, scheduled %u periodic %u\n", timer, timer->scheduled_time, timer->periodic_time);        THREAD_STATS_INC(timers);        // 获取当前定时器是否为周期性定时器        bool periodic = timer->periodic_time > 0;        LTRACEF("timer %p firing callback %p, arg %p\n", timer, timer->callback, timer->arg);        KEVLOG_TIMER_CALL(timer->callback, timer->arg);        // 调用回调函数        if (timer->callback(timer, now, timer->arg) == INT_RESCHEDULE)            ret = INT_RESCHEDULE;        DEBUG_ASSERT(arch_ints_disabled());        /* it may have been requeued or periodic, grab the lock so we can safely inspect it */        // 定时器队列上锁        spin_lock(&timer_lock);        /* if it was a periodic timer and it hasn't been requeued         * by the callback put it back in the list         */        // 若该定时器为周期性定时器，则重新计算下次调度的时间，并重新插入至定时器队列中        if (periodic && !list_in_list(&timer->node) && timer->periodic_time > 0) {            LTRACEF("periodic timer, period %u\n", timer->periodic_time);            timer->scheduled_time = now + timer->periodic_time;            insert_timer_in_queue(cpu, timer);        }    }#if PLATFORM_HAS_DYNAMIC_TIMER    /* reset the timer to the next event */    // 为下一次定时器触发做准备    // 获取下一个定时器    ttimer_tickimer = list_peek_head_type(&timers[cpu].timer_queue, timer_t, node);    if (timer) {        /* has to be the case or it would have fired already */        DEBUG_ASSERT(TIME_GT(timer->scheduled_time, now));        // 计算定时时间        lk_time_t delay = timer->scheduled_time - now;        LTRACEF("setting new timer for %u msecs for event %p\n", (uint)delay, timer);        // 底层设置定时器        platform_set_oneshot_timer(timer_tick, NULL, delay);    }    /* we're done manipulating the timer queue */    // 释放队列锁    spin_unlock(&timer_lock);#else    ...#endif    return ret;}

以上分析了内核代码中从各平台中抽象出来的定时器实现代码，接下来讲针对某个平台来分析定时器底层的实现。

timer.c设计底层定时器的共有两个函数platform_set_oneshot_timer和回调timer_tick，以底层的实现arm_generic为例

kernel/dev/timer/arm_generic/arm_generic_timer.c

// 保存全局的硬件定时器回调static platform_timer_callback t_callback;...static enum handler_return platform_tick(void *arg){    // 写定时器控制寄存器，关闭定时器    write_cntp_ctl(0);    // 若回调函数存在则调用并返回    if (t_callback) {        return t_callback(arg, current_time());    } else {        return INT_NO_RESCHEDULE;    }}status_t platform_set_oneshot_timer(platform_timer_callback callback, void *arg, lk_time_t interval){    // 通过定时时间计算硬件定时器所需定时值    uint64_t cntpct_interval = lk_time_to_cntpct(interval);    ASSERT(arg == NULL);    // 设置回调函数    t_callback = callback;    // 设置硬件定时器寄存器时间    if (cntpct_interval <= INT_MAX)        write_cntp_tval(cntpct_interval);    else        write_cntp_cval(read_cntpct() + cntpct_interval);    // 写定时器控制寄存器，打开定时器    write_cntp_ctl(1);    return 0;}

platform_tick即是在中断中触发的函数，它是在底层硬件定时器初始化的时候被设置进去的。

kernel/dev/timer/arm_generic/arm_generic_timer.c

void arm_generic_timer_init(int irq, uint32_t freq_override){...    // 向终端系统中注册了定时器中断的服务例程，即是platform_tick    register_int_handler(irq, &platform_tick, NULL);...}

中断的部分将会在阅读中断系统代码的时候统一地进行相应的分析。

0 1