Linux时间子系统(三) -- clockevent, tick device
1 clockeventclockevent是具有编程和事件产生能力的定时器,在我使用的ARM SOC平台上底层依赖的硬件是ARM Generic Timer中的Timers。当然,在SOC系统中应该还会有其它的timer可以使用,但是本文的介绍只限于arm的timer。1.1 数据结构struct clock_event_device {void
转载请标明出处floater的csdn blog,http://blog.csdn.net/flaoter
1 clockevent
clockevent是具有编程和事件产生能力的定时器,在我使用的ARM SOC平台上底层依赖的硬件是ARM Generic Timer中的Timers。当然,在SOC系统中应该还会有其它的timer可以使用,但是本文的介绍只限于arm的timer。
1.1 数据结构
struct clock_event_device {
void (*event_handler)(struct clock_event_device *); //事件处理函数指针
int (*set_next_event)(unsigned long evt,
struct clock_event_device *); //设置下次触发事件,cycles
int (*set_next_ktime)(ktime_t expires,
struct clock_event_device *); //设置下次触发事件,ktime
ktime_t next_event;
u64 max_delta_ns; //最大时间差ns
u64 min_delta_ns; //最小时间差ns
u32 mult;
u32 shift; //ns和cylces的转换关系
enum clock_event_mode mode;
unsigned int features;
unsigned long retries;
void (*broadcast)(const struct cpumask *mask);
void (*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
void (*suspend)(struct clock_event_device *);
void (*resume)(struct clock_event_device *);
unsigned long min_delta_ticks;
unsigned long max_delta_ticks;
const char *name;
int rating;
int irq;
int bound_on;
const struct cpumask *cpumask;
struct list_head list;
struct module *owner;
} ____cacheline_aligned;
• mode
这个成员是说明clockevent的工作模式,具体的mode设定是由set_mode这个callback函数来完成的。
enum clock_event_mode {
CLOCK_EVT_MODE_UNUSED = 0,
CLOCK_EVT_MODE_SHUTDOWN,
CLOCK_EVT_MODE_PERIODIC,
CLOCK_EVT_MODE_ONESHOT,
CLOCK_EVT_MODE_RESUME,
};
• feature
说明clockevetn设备的特征。CLOCK_EVT_FEAT_PERIODIC说明该硬件timer可以产生周期性的clock event,CLOCK_EVT_FEAT_ONESHOT说明该硬件timer可以产生单触发的clock event。不要将feature和mode的使用场景混淆。
#define CLOCK_EVT_FEAT_PERIODIC 0x000001 //产生周期触发事件的特征
#define CLOCK_EVT_FEAT_ONESHOT 0x000002 //产生单触发事件的特征
#define CLOCK_EVT_FEAT_KTIME 0x000004 //产生事件的时间基准是ktime,不是cycles
• list
内核使用如下两个链表来管理系统中的clock_event_device。clockevent_devices list中是当前active的device。
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
1.2 clockevent的建立过程
这里我先对clockevent的注册函数进行介绍,再对在ARM SOC平台调用它的流程进行说明。
1.2.1 clock_event_device的注册
注册函数如下,
void clockevents_register_device(struct clock_event_device *dev)
{
unsigned long flags;
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
}
raw_spin_lock_irqsave(&clockevents_lock, flags);
list_add(&dev->list, &clockevent_devices); //将当前clock_event_device设备加入到clockevent_devices list中
tick_check_new_device(dev); //通知tick device层进行处理,clockevent的替换也在该函数进行
clockevents_notify_released(); //遍历clockevents_released list,添加到clockevent_devices list中
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
}
tick_check_new_device是tick device层的函数,在此只列出它调用的clockevents_exchange_device,clockevents_exchange_device函数不再展开了,它实现将curdev从clockevent_devices list中删除,添加到clockevents_released list中。
void tick_check_new_device(struct clock_event_device *newdev)
{
...
//前面有很多情况的判断,在后续章节中会有介绍
clockevents_exchange_device(curdev, newdev); //将curdev从clockevent_devices list中删除,添加到clockevents_released list中
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
...
}
1.2.2 ARM SOC平台上clockevent的注册
在上一节介绍clocksource的注册时提到过在内核启动阶段的time_init函数的clocksource_of_init中,会对段__clksrc_of_table进行解析,armv8_arch_timer的注册函数arch_timer_init会被调用,在本小节会对此函数进行详解。
在解析此函数之前,先看看dts中关于此timer的定义,
timer {
compatible = "arm,armv8-timer";
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(8)
| IRQ_TYPE_LEVEL_LOW)>;
clock-frequency = <26000000>;
};
在时间子系统(一)中曾经对ARM Generic Timer进行过说明,每个processor都有如下四个timer, 并且由于它们都是cpu私有的,所以产生的中断都是PPI类型的。
• A Non-secure EL1 physical timer.
• A Secure EL1 physical timer.
• A Non-secure EL2 physical timer.
• A virtual timer.
关于这几个中断,在内核中有如下枚举描述。
enum ppi_nr {
PHYS_SECURE_PPI,
PHYS_NONSECURE_PPI,
VIRT_PPI,
HYP_PPI,
MAX_TIMER_PPI
};
回顾了这些知识后,在对arch_timer_init进行解析。
static void __init arch_timer_init(struct device_node *np)
{
int i;
if (arch_timers_present & ARCH_CP15_TIMER) {
pr_warn("arch_timer: multiple nodes in dt, skipping\n");
return;
}
arch_timers_present |= ARCH_CP15_TIMER; //CP15方式访问
for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
arch_timer_ppi[i] = irq_of_parse_and_map(np, i); //分析dt,申请irq num
arch_timer_detect_rate(NULL, np); //timer时钟频率
/*
* If HYP mode is available, we know that the physical timer
* has been configured to be accessible from PL1. Use it, so
* that a guest can use the virtual timer instead.
*
* If no interrupt provided for virtual timer, we'll have to
* stick to the physical timer. It'd better be accessible...
*/
//如果hyper模式可用或者没给virtual timer分配中断号,需要使用physical timer
//hyper模式下,guest OS需要访问virtual timer,我们还是使用physical timer
if (is_hyp_mode_available() || !arch_timer_ppi[VIRT_PPI]) {
arch_timer_use_virtual = false;
if (!arch_timer_ppi[PHYS_SECURE_PPI] ||
!arch_timer_ppi[PHYS_NONSECURE_PPI]) {
pr_warn("arch_timer: No interrupt available, giving up\n");
return;
}
}
arch_timer_c3stop = !of_property_read_bool(np, "always-on");
arch_timer_register(); //arch timer的注册
arch_timer_common_init(); //之前介绍clocksource的章节有过介绍
}
请注意下面arch_timer_register中使用的是percpu类型的变量,clock_event_device是percpu的资源。
static int __init arch_timer_register(void)
{
int err;
int ppi;
arch_timer_evt = alloc_percpu(struct clock_event_device); //为clock_event_device类型的percpu变量申请内存
if (!arch_timer_evt) {
err = -ENOMEM;
goto out;
}
if (arch_timer_use_virtual) {
ppi = arch_timer_ppi[VIRT_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_virt,
"arch_timer", arch_timer_evt);
} else { //为physcal timer申请中断,需要分别注册secure和non-secure physical timer PPI
ppi = arch_timer_ppi[PHYS_SECURE_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (!err && arch_timer_ppi[PHYS_NONSECURE_PPI]) {
ppi = arch_timer_ppi[PHYS_NONSECURE_PPI];
err = request_percpu_irq(ppi, arch_timer_handler_phys,
"arch_timer", arch_timer_evt);
if (err)
free_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI],
arch_timer_evt);
}
}
if (err) {
pr_err("arch_timer: can't register interrupt %d (%d)\n",
ppi, err);
goto out_free;
}
err = register_cpu_notifier(&arch_timer_cpu_nb);
if (err)
goto out_free_irq;
err = arch_timer_cpu_pm_init();
if (err)
goto out_unreg_notify;
/* Immediately configure the timer on the boot CPU */
arch_timer_setup(this_cpu_ptr(arch_timer_evt)); //注册clock event device
return 0;
}
平台的percpu timer硬件寄存器都是通过CP15方式来访问。
static int arch_timer_setup(struct clock_event_device *clk)
{
__arch_timer_setup(ARCH_CP15_TIMER, clk); //通过协处理器CP15访问timer的寄存器
if (arch_timer_use_virtual)
enable_percpu_irq(arch_timer_ppi[VIRT_PPI], 0);
else {
enable_percpu_irq(arch_timer_ppi[PHYS_SECURE_PPI], 0); //使能中断
if (arch_timer_ppi[PHYS_NONSECURE_PPI])
enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
}
arch_counter_set_user_access();
if (IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM))
arch_timer_configure_evtstream();
return 0;
}
static void __arch_timer_setup(unsigned type,
struct clock_event_device *clk)
{
//clock_event_device变量赋值
clk->features = CLOCK_EVT_FEAT_ONESHOT;
if (type == ARCH_CP15_TIMER) {
if (arch_timer_c3stop)
clk->features |= CLOCK_EVT_FEAT_C3STOP;
clk->name = "arch_sys_timer";
clk->rating = 450;
clk->cpumask = cpumask_of(smp_processor_id());
if (arch_timer_use_virtual) {
clk->irq = arch_timer_ppi[VIRT_PPI];
clk->set_mode = arch_timer_set_mode_virt;
clk->set_next_event = arch_timer_set_next_event_virt;
} else {
clk->irq = arch_timer_ppi[PHYS_SECURE_PPI];
clk->set_mode = arch_timer_set_mode_phys;
clk->set_next_event = arch_timer_set_next_event_phys;
}
} else {
clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
clk->name = "arch_mem_timer";
clk->rating = 400;
clk->cpumask = cpu_all_mask;
if (arch_timer_mem_use_virtual) {
clk->set_mode = arch_timer_set_mode_virt_mem;
clk->set_next_event =
arch_timer_set_next_event_virt_mem;
} else {
clk->set_mode = arch_timer_set_mode_phys_mem;
clk->set_next_event =
arch_timer_set_next_event_phys_mem;
}
}
clk->set_mode(CLOCK_EVT_MODE_SHUTDOWN, clk);
clockevents_config_and_register(clk, arch_timer_rate, 0xf, 0x7fffffff); //注册clock_event_device
}
通过__arch_timer_setup函数可见arch-timer的feature并不支持CLOCK_EVT_FEAT_PERIODIC。
void clockevents_config_and_register(struct clock_event_device *dev,
u32 freq, unsigned long min_delta,
unsigned long max_delta)
{
dev->min_delta_ticks = min_delta;
dev->max_delta_ticks = max_delta;
clockevents_config(dev, freq);
clockevents_register_device(dev);
}
clockevents_register_device已经在1.2.1节中进行了解析。至此,clockevent的注册过程就完成了。
内核中还有为应用层提供了sysfs的接口,实现过程不再描述了,可以通过如下接口查看。
cat /sys/devices/system/clockevents/clockevent0/current_device
arch_sys_timer
cat /sys/devices/system/clockevents/clockevent1/current_device
arch_sys_timer
此外,除了cpu core上的clockevent设备外,kernel中还有broadcast的clockevent注册,在此处也不说明了。
2 tick device
2.1 数据结构
struct tick_device只是对struct clock_event_device的一个封装,加入了运行模式变量,支持PERIODIC和ONESHOT两种模式。
struct tick_device {
struct clock_event_device *evtdev;
enum tick_device_mode mode;
};
enum tick_device_mode {
TICKDEV_MODE_PERIODIC,
TICKDEV_MODE_ONESHOT,
};
请注意此处的TICKDEV_MODE_PERIODIC与clock_event_device的成员feature CLOCK_EVT_FEAT_PERIODIC不要一起理解,即使是CLOCK_EVT_FEAT_ONESHOT的clockevent也可以支持TICKDEV_MODE_PERIODIC模式的tick device。
2.2 tickdevice的建立过程
在clock_event_device的注册过程中会调用tick_check_new_device通知tick device层进行处理,上文中只介绍了更新clockevent,此处对检查处理和tick device设备的创建进行说明。
此函数中的条件判断很多,一些场景在我使用的平台并没有出现,所以我只能按照代码进行理解了。
void tick_check_new_device(struct clock_event_device *newdev)
{
struct clock_event_device *curdev;
struct tick_device *td;
int cpu;
cpu = smp_processor_id(); //获得本地cpu id
if (!cpumask_test_cpu(cpu, newdev->cpumask)) //是否为本cpu服务
goto out_bc;
td = &per_cpu(tick_cpu_device, cpu); //获取当前cpu的tick device
curdev = td->evtdev;
/* cpu local device ? */
//此处判断比较复杂,如果newdev只为此cpu服务,继续注册。否则clockevent可以服务多个cpu,这种情况我使用的平台没有出现,不太理解它的处理。
if (!tick_check_percpu(curdev, newdev, cpu))
goto out_bc;
/* Preference decision */
//根据是否支持单触发模式和它的rating值,决定是否替换原来旧的clock_event_device,代码不再进行展开了
if (!tick_check_preferred(curdev, newdev))
goto out_bc;
if (!try_module_get(newdev->owner))
return;
/*
* Replace the eventually existing device by the new
* device. If the current device is the broadcast device, do
* not give it back to the clockevents layer !
*/
if (tick_is_broadcast_device(curdev)) {
clockevents_shutdown(curdev);
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev); //更新clockevent
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
return;
}
根据tick_device_mode会建立周期性的或单触发的tick_device。如果是第一次setup,只能建立周期模式的tick device。
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
const struct cpumask *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;
/*
* First device setup ?
*/
if (!td->evtdev) { //当前cpu第一次注册tick_device
/*
* If no cpu took the do_timer update, assign it to
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { //需要有一个global的tick device管理全局的jiffies等时间信息
if (!tick_nohz_full_cpu(cpu))
tick_do_timer_cpu = cpu;
else
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
tick_next_period = ktime_get();
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
}
/*
* Startup in periodic mode first.
*/
td->mode = TICKDEV_MODE_PERIODIC; //当前cpu第一次设定tick device的时候,缺省设定为周期性的tick
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
td->evtdev->event_handler = clockevents_handle_noop;
}
td->evtdev = newdev; //将新的clock_event_device赋值给tick_device
/*
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/*
* When global broadcasting is active, check if the current
* device is registered as a placeholder for broadcast mode.
* This allows us to handle this x86 misfeature in a generic
* way. This function also returns !=0 when we keep the
* current active broadcast state for this CPU.
*/
if (tick_device_uses_broadcast(newdev, cpu))
return;
if (td->mode == TICKDEV_MODE_PERIODIC)
tick_setup_periodic(newdev, 0);
else
tick_setup_oneshot(newdev, handler, next_event);
}
配置周期性的tick_device,需要调用tick_setup_periodic。前面说过cpu第一次设置tick_device时默认配置成周期触发,所以启动阶段每个cpu都会调用tick_setup_period。
void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
{
tick_set_periodic_handler(dev, broadcast); //(1)
/* Broadcast setup ? */
if (!tick_device_is_functional(dev))
return;
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!tick_broadcast_oneshot_active()) { //(2)
clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
} else { //(3)
unsigned long seq;
ktime_t next;
do {
seq = read_seqbegin(&jiffies_lock);
next = tick_next_period;
} while (read_seqretry(&jiffies_lock, seq));
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
for (;;) {
if (!clockevents_program_event(dev, next, false))
return;
next = ktime_add(next, tick_period);
}
}
}
(1)设置event_handler=tick_handle_periodic
(2)clock_event_device支持周期触发,只需要设置clock_event_device的模式为周期触发
(3)clock_event_device不支持周期触发,将clock_event_device设置为单触发模式,并使用clockevents_program_event编程设置下一事件。
在下一次clockevent事件发生时都会调用tick_handle_periodic。
周期性tick的clock event handler的处理函数tick_handle_periodic分析如下,
void tick_handle_periodic(struct clock_event_device *dev)
{
int cpu = smp_processor_id();
ktime_t next = dev->next_event;
tick_periodic(cpu); //(1)
if (dev->mode != CLOCK_EVT_MODE_ONESHOT) //模式为周期模式的clockevent直接返回
return;
for (;;) { //模式为单触发的clockevent还需要按照前面的配置,再使用clockevents_program_event编程设置下一事件
/*
* Setup the next period for devices, which do not have
* periodic mode:
*/
next = ktime_add(next, tick_period);
if (!clockevents_program_event(dev, next, false))
return;
/*
* Have to be careful here. If we're in oneshot mode,
* before we call tick_periodic() in a loop, we need
* to be sure we're using a real hardware clocksource.
* Otherwise we could get trapped in an infinite
* loop, as the tick_periodic() increments jiffies,
* which then will increment time, possibly causing
* the loop to trigger again and again.
*/
if (timekeeping_valid_for_hres())
tick_periodic(cpu);
}
}
tick_period会处理全局的时间信息更新任务和本地cpu上的进程时间信息。处理全局时间jiffies时,它需要选用一个全局的tick device来执行。
static void tick_periodic(int cpu)
{
if (tick_do_timer_cpu == cpu) { //如果当前tick_device是全局的tick_device
write_seqlock(&jiffies_lock);
/* Keep track of the next tick event */
tick_next_period = ktime_add(tick_next_period, tick_period);
do_timer(1); //更新jiffies
write_sequnlock(&jiffies_lock);
update_wall_time(); //更新墙上时间
}
update_process_times(user_mode(get_irq_regs())); //更新进程的时间信息,并调用周期调度器schedulet_tick
profile_tick(CPU_PROFILING);
}
到此介绍了每个cpu的tick_device的注册,启动阶段tick_device工作在周期触发模式,并且它对应的event_handler为tick_handle_periodic。在高分辨时钟(CONFIG_HIGH_RES_TIMERS)和动态时钟(CONFIG_TICKLESS)特性开启后还会有变化。
更多推荐
所有评论(0)