Linux SMP启动流程学习(二)
Linux SMP启动流程学习(二)3 SMP系统启动流程3.1 SMP启动判断源码:/arch/arm/kernel/setup.c调用:start_kernel() —> smp_setup_processor_id()void __init smp_setup_processor_id(void){int i;//判断是否是smp系统,是则从arm协处理器读取...
Linux SMP启动流程学习(二)
3 SMP系统启动流程
3.1 SMP启动判断
源码:/arch/arm/kernel/setup.c
调用:start_kernel() —> smp_setup_processor_id()
void __init smp_setup_processor_id(void)
{
int i;
//判断是否是smp系统,是则从arm协处理器读取当前cpuid,否则为0
u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
//根据level确定CPU号,即cpu = (mpidr >> 0) & 0xff;
u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
//设置cpu的map数组
cpu_logical_map(0) = cpu;
//nr_cpu_ids表示系统中CPU总数
for (i = 1; i < nr_cpu_ids; ++i)
cpu_logical_map(i) = i == cpu ? 0 : i;
/*
* clear __my_cpu_offset on boot CPU to avoid hang caused by
* using percpu variable early, for example, lockdep will
* access percpu variable inside lock_release
*/
set_my_cpu_offset(0);
//内核启动的第一条Log信息
pr_info("Booting Linux on physical CPU 0x%x\n", mpidr);
}
源码:/arch/arm/include/asm/smp_plat.h
/*
* Return true if we are running on a SMP platform
*/
static inline bool is_smp(void)
{
#ifndef CONFIG_SMP
return false;
#elif defined(CONFIG_SMP_ON_UP)
extern unsigned int smp_on_up;
return !!smp_on_up;
#else
return true;
#endif
}
源码:/arch/arm/include/asm/cputype.h
static inline unsigned int __attribute_const__ read_cpuid_mpidr(void)
{
return read_cpuid(CPUID_MPIDR);
}
#define read_cpuid(reg) \
({ \
unsigned int __val; \
asm("mrc p15, 0, %0, c0, c0, " __stringify(reg) \
: "=r" (__val) \
: \
: "cc"); \
__val; \
})
/*
* The memory clobber prevents gcc 4.5 from reordering the mrc before
* any is_smp() tests, which can cause undefined instruction aborts on
* ARM1136 r0 due to the missing extended CP15 registers.
*/
#define read_cpuid_ext(ext_reg) \
({ \
unsigned int __val; \
asm("mrc p15, 0, %0, c0, " ext_reg \
: "=r" (__val) \
: \
: "memory"); \
__val; \
})
3.2 DeviceTree cpu_node解析
源码:/arch/arm/kernel/devtree.c:70
调用流程:start_kernel() -> setup_arch() -> arm_dt_init_cpu_maps()
/*
* arm_dt_init_cpu_maps - Function retrieves cpu nodes from the device tree
* and builds the cpu logical map array containing MPIDR values related to
* logical cpus
*
* Updates the cpu possible mask with the number of parsed cpu nodes
*/
void __init arm_dt_init_cpu_maps(void)
{
/*
* Temp logical map is initialized with UINT_MAX values that are
* considered invalid logical map entries since the logical map must
* contain a list of MPIDR[23:0] values where MPIDR[31:24] must
* read as 0.
*/
struct device_node *cpu, *cpus;
int found_method = 0;
u32 i, j, cpuidx = 1;
u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0;
u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID };
bool bootcpu_valid = false;
//DeviceTree中的查找路径
cpus = of_find_node_by_path("/cpus");
if (!cpus)
return;
//遍历 cpus 所有的child node
for_each_child_of_node(cpus, cpu) {
const __be32 *cell;
int prop_bytes;
u32 hwid;
//只查找device_type为cpu的node
if (of_node_cmp(cpu->type, "cpu"))
continue;
pr_debug(" * %pOF...\n", cpu);
/*
* A device tree containing CPU nodes with missing "reg"
* properties is considered invalid to build the
* cpu_logical_map.
*/
//读取reg属性的值,并赋值给prop_bytes
cell = of_get_property(cpu, "reg", &prop_bytes);
if (!cell || prop_bytes < sizeof(*cell)) {
pr_debug(" * %pOF missing reg property\n", cpu);
of_node_put(cpu);
return;
}
/*
* Bits n:24 must be set to 0 in the DT since the reg property
* defines the MPIDR[23:0].
*/
do {
hwid = be32_to_cpu(*cell++);//获取hwid
prop_bytes -= sizeof(*cell);
} while (!hwid && prop_bytes > 0);
//reg的属性值bit:24必须设置为0,这是Arm CPU binding定义的
if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) {
of_node_put(cpu);
return;
}
/*
* Duplicate MPIDRs are a recipe for disaster.
* Scan all initialized entries and check for
* duplicates. If any is found just bail out.
* temp values were initialized to UINT_MAX
* to avoid matching valid MPIDR[23:0] values.
*/
//根据DTB中的信息设定cpu logical map数组
for (j = 0; j < cpuidx; j++)
if (WARN(tmp_map[j] == hwid,
"Duplicate /cpu reg properties in the DT\n")) {
of_node_put(cpu);
return;
}
/*
* Build a stashed array of MPIDR values. Numbering scheme
* requires that if detected the boot CPU must be assigned
* logical id 0. Other CPUs get sequential indexes starting
* from 1. If a CPU node with a reg property matching the
* boot CPU MPIDR is detected, this is recorded so that the
* logical map built from DT is validated and can be used
* to override the map created in smp_setup_processor_id().
*/
// 数组tmp_map保存了系统中所有CPU的MPIDR值(CPU ID值),
// 具体的index的编码规则是: tmp_map[0]保存了booting CPU的id值,
// 其余的CPU的ID值保存在1~NR_CPUS的位置
if (hwid == mpidr) {
i = 0;
bootcpu_valid = true;
} else {
i = cpuidx++;
}
if (WARN(cpuidx > nr_cpu_ids, "DT /cpu %u nodes greater than "
"max cores %u, capping them\n",
cpuidx, nr_cpu_ids)) {
cpuidx = nr_cpu_ids;
of_node_put(cpu);
break;
}
tmp_map[i] = hwid;
if (!found_method)
found_method = set_smp_ops_by_method(cpu);
}
/*
* Fallback to an enable-method in the cpus node if nothing found in
* a cpu node.
*/
//匹配smp的处理方法
if (!found_method)
set_smp_ops_by_method(cpus);
if (!bootcpu_valid) {
pr_warn("DT missing boot CPU MPIDR[23:0], fall back to default cpu_logical_map\n");
return;
}
/*
* Since the boot CPU node contains proper data, and all nodes have
* a reg property, the DT CPU list can be considered valid and the
* logical map created in smp_setup_processor_id() can be overridden
*/
for (i = 0; i < cpuidx; i++) {
set_cpu_possible(i, true); //配置系统可运行的CPU核心的数量
cpu_logical_map(i) = tmp_map[i];
pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i));
}
}
3.3 smp处理方法建立
3.3.1 DeviceTree方式建立
3.3.1.1 主要结构体介绍
of_cpu_method:
struct of_cpu_method {
const char *method; //method名字
const struct smp_operations *ops; //smp具体的ops实现方式
};
smp_operations:
struct smp_operations {
#ifdef CONFIG_SMP
/*
* Setup the set of possible CPUs (via set_cpu_possible)
*/
//初始化系统可运行的cpu
void (*smp_init_cpus)(void);
/*
* Initialize cpu_possible map, and enable coherency
*/
void (*smp_prepare_cpus)(unsigned int max_cpus);
/*
* Perform platform specific initialisation of the specified CPU.
*/
//引导slave cpu的初始化
void (*smp_secondary_init)(unsigned int cpu);
/*
* Boot a secondary CPU, and assign it the specified idle task.
* This also gives us the initial stack to use for this CPU.
*/
//启动slave CPU
int (*smp_boot_secondary)(unsigned int cpu, struct task_struct *idle);
#ifdef CONFIG_HOTPLUG_CPU
int (*cpu_kill)(unsigned int cpu);
void (*cpu_die)(unsigned int cpu);
bool (*cpu_can_disable)(unsigned int cpu);
int (*cpu_disable)(unsigned int cpu);
#endif
#endif
};
3.3.1.2 DeviceTree建立方法
源码:/arch/arm/kernel/devtree.c
调用:start_kernel() -> setup_arch() -> arm_dt_init_cpu_maps() -> set_smp_ops_by_method()
extern struct of_cpu_method __cpu_method_of_table[];
static const struct of_cpu_method __cpu_method_of_table_sentinel
__used __section(__cpu_method_of_table_end);
static int __init set_smp_ops_by_method(struct device_node *node)
{
const char *method;
struct of_cpu_method *m = __cpu_method_of_table;//内核中所有注册的method
//获取当前节点是否有method
if (of_property_read_string(node, "enable-method", &method))
return 0;
for (; m->method; m++)
if (!strcmp(m->method, method)) {
smp_set_ops(m->ops);//配置当前系统的smp的ops方法
return 1;
}
return 0;
}
3.3.1.3 DeviceTree 中cpus节点
源码:/arch/arm/boot/dts/sun8i-a23-a33.dtsi
cpus {
enable-method = "allwinner,sun8i-a23";
#address-cells = <1>;
#size-cells = <0>;
cpu0: cpu@0 {
compatible = "arm,cortex-a7";
device_type = "cpu";
reg = <0>;
};
cpu@1 {
compatible = "arm,cortex-a7";
device_type = "cpu";
reg = <1>;
};
};
3.3.1.4 smp_operations 的实现
源码:/arch/arm/mach-sunxi/platsmp.c
static const struct smp_operations sun8i_smp_ops __initconst = {
.smp_prepare_cpus = sun8i_smp_prepare_cpus,
.smp_boot_secondary = sun8i_smp_boot_secondary,
};
CPU_METHOD_OF_DECLARE(sun8i_a23_smp, "allwinner,sun8i-a23", &sun8i_smp_ops);
3.3.2 机器描述符mdesc当时建立
start_kernel()->setup_arch()
在该函数中,通过以下代码初始化smp_operations结构:
示例代码,arch/arm/mach-vexpress/v2m.c
DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
.dt_compat = v2m_dt_match,
.l2c_aux_val = 0x00400000,
.l2c_aux_mask = 0xfe0fffff,
.smp = smp_ops(vexpress_smp_dt_ops), //非空值
.smp_init = smp_init_ops(vexpress_smp_init_ops), //非空值
MACHINE_END
#ifdef CONFIG_SMP
#define smp_ops(ops) (&(ops))
#define smp_init_ops(ops) (&(ops))
#else
#define smp_ops(ops) (struct smp_operations *)NULL
#define smp_init_ops(ops) (bool (*)(void))NULL
#endi
3.3.2.1 smp的实现
由上,可轻松知道smp最终是指向vexpress_smp_dt_ops,该结构体定义如下:
const struct smp_operations vexpress_smp_dt_ops __initconst = {
.smp_prepare_cpus = vexpress_smp_dt_prepare_cpus,
.smp_secondary_init = versatile_secondary_init,
.smp_boot_secondary = versatile_boot_secondary,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_die = vexpress_cpu_die,
#endif
};
3.3.2.2 smp_init实现
bool __init vexpress_smp_init_ops(void)
{
#ifdef CONFIG_MCPM
int cpu;
struct device_node *cpu_node, *cci_node;
/*
* The best way to detect a multi-cluster configuration
* is to detect if the kernel can take over CCI ports
* control. Loop over possible CPUs and check if CCI
* port control is available.
* Override the default vexpress_smp_ops if so.
*/
for_each_possible_cpu(cpu) {
bool available;
cpu_node = of_get_cpu_node(cpu, NULL);
if (WARN(!cpu_node, "Missing cpu device node!"))
return false;
cci_node = of_parse_phandle(cpu_node, "cci-control-port", 0);
available = cci_node && of_device_is_available(cci_node);
of_node_put(cci_node);
of_node_put(cpu_node);
if (!available)
return false;
}
mcpm_smp_set_ops();
return true;
#else
return false;
#endif
}
3.4 SMP smp_ops初始化
void __init smp_set_ops(const struct smp_operations *ops)
{
if (ops)
smp_ops = *ops;
};
3.5 SMP启动
SMP多核启动函数调用流程如下:
start_kernel() -> rest_init() -> kernel_init() -> kernel_init_freeable()
3.5.1 SMP前期启动准备
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int ncores = num_possible_cpus();//获取系统可使用的CPU数量
init_cpu_topology();
smp_store_cpu_info(smp_processor_id());//存储当前CPU 的ID号
/*
* are we trying to boot more cores than exist?
*/
if (max_cpus > ncores)
max_cpus = ncores;
if (ncores > 1 && max_cpus) {
/*
* Initialise the present map, which describes the set of CPUs
* actually populated at the present time. A platform should
* re-initialize the map in the platforms smp_prepare_cpus()
* if present != possible (e.g. physical hotplug).
*/
init_cpu_present(cpu_possible_mask);
/*
* Initialise the SCU if there are more than one CPU
* and let them know where to start.
*/
// 调用smp_operations 中的smp_prepare_cpus成员函数,
// 为wake_up CPU做准备
if (smp_ops.smp_prepare_cpus)
smp_ops.smp_prepare_cpus(max_cpus);
}
}
3.5.2 SMP后期启动过程
/* Called by boot processor to activate the rest. */
void __init smp_init(void)
{
int num_nodes, num_cpus;
unsigned int cpu;
idle_threads_init(); //空闲线程初始化
cpuhp_threads_init(); //cpu热插拔初始化
pr_info("Bringing up secondary CPUs ...\n");
/* FIXME: This should be done in userspace --RR */
//wake_up系统中具备online条件的CPU
for_each_present_cpu(cpu) {
if (num_online_cpus() >= setup_max_cpus)
break;
if (!cpu_online(cpu))
cpu_up(cpu); //wake_up所有的非online的CPU
}
num_nodes = num_online_nodes();
num_cpus = num_online_cpus();
pr_info("Brought up %d node%s, %d CPU%s\n",
num_nodes, (num_nodes > 1 ? "s" : ""),
num_cpus, (num_cpus > 1 ? "s" : ""));
/* Any cleanup work */
smp_cpus_done(setup_max_cpus);
}
3.5.3 cpu_up的实现过程
cpu_up()的调用过程如下:
cpu_up() -> do_cpu_up() -> _cpu_up() -> cpuhp_up_callbacks() -> cpuhp_invoke_callback()
cpu_up()函数源码:kernel/cpu.c:1072
int cpu_up(unsigned int cpu)
{
return do_cpu_up(cpu, CPUHP_ONLINE);
}
EXPORT_SYMBOL_GPL(cpu_up);
do_cpu_up()函数源码:kernel/cpu.c:1042
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
{
int err = 0;
if (!cpu_possible(cpu)) {
pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
cpu);
#if defined(CONFIG_IA64)
pr_err("please check additional_cpus= boot parameter\n");
#endif
return -EINVAL;
}
err = try_online_node(cpu_to_node(cpu));
if (err)
return err;
cpu_maps_update_begin();
if (cpu_hotplug_disabled) {
err = -EBUSY;
goto out;
}
err = _cpu_up(cpu, 0, target);
out:
cpu_maps_update_done();
return err;
}
_cpu_up()函数源码:kernel/cpu.c:984
/* Requires cpu_add_remove_lock to be held */
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
struct task_struct *idle;
int ret = 0;
cpus_write_lock();
if (!cpu_present(cpu)) {
ret = -EINVAL;
goto out;
}
/*
* The caller of do_cpu_up might have raced with another
* caller. Ignore it for now.
*/
if (st->state >= target)
goto out;
if (st->state == CPUHP_OFFLINE) {
/* Let it fail before we try to bring the cpu up */
idle = idle_thread_get(cpu);
if (IS_ERR(idle)) {
ret = PTR_ERR(idle);
goto out;
}
}
cpuhp_tasks_frozen = tasks_frozen;
cpuhp_set_state(st, target);
/*
* If the current CPU state is in the range of the AP hotplug thread,
* then we need to kick the thread once more.
*/
if (st->state > CPUHP_BRINGUP_CPU) {
ret = cpuhp_kick_ap_work(cpu);
/*
* The AP side has done the error rollback already. Just
* return the error code..
*/
if (ret)
goto out;
}
/*
* Try to reach the target state. We max out on the BP at
* CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
* responsible for bringing it up to the target state.
*/
target = min((int)target, CPUHP_BRINGUP_CPU);
ret = cpuhp_up_callbacks(cpu, st, target);
out:
cpus_write_unlock();
return ret;
}
cpuhp_up_callbacks ()函数源码:/kernel/cpu.c:469
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
enum cpuhp_state prev_state = st->state;
int ret = 0;
while (st->state < target) {
st->state++;
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
if (ret) {
st->target = prev_state;
undo_cpu_up(cpu, st);
break;
}
}
return ret;
}
cpuhp_invoke_callback()函数源码:/kernel/cpu.c:157
/**
* cpuhp_invoke_callback _ Invoke the callbacks for a given state
* @cpu: The cpu for which the callback should be invoked
* @state: The state to do callbacks for
* @bringup: True if the bringup callback should be invoked
* @node: For multi-instance, do a single entry callback for install/remove
* @lastp: For multi-instance rollback, remember how far we got
*
* Called from cpu hotplug and from the state register machinery.
*/
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
bool bringup, struct hlist_node *node,
struct hlist_node **lastp)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
//该状态,从全局数组cpuhp_bp_states[]获取相应的struct cpuhp_step结构变量,
struct cpuhp_step *step = cpuhp_get_step(state);
int (*cbm)(unsigned int cpu, struct hlist_node *node);
int (*cb)(unsigned int cpu);
int ret, cnt;
if (st->fail == state) {
st->fail = CPUHP_INVALID;
if (!(bringup ? step->startup.single : step->teardown.single))
return 0;
return -EAGAIN;
}
if (!step->multi_instance) {
WARN_ON_ONCE(lastp && *lastp);
cb = bringup ? step->startup.single : step->teardown.single;
if (!cb)
return 0;
trace_cpuhp_enter(cpu, st->target, state, cb);
ret = cb(cpu); //回调cpuhp_bp_states[]数组元素中的回调函数
trace_cpuhp_exit(cpu, st->state, state, ret);
return ret;
}
cbm = bringup ? step->startup.multi : step->teardown.multi;
if (!cbm)
return 0;
/* Single invocation for instance add/remove */
if (node) {
WARN_ON_ONCE(lastp && *lastp);
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
ret = cbm(cpu, node); //回调cpuhp_bp_states[]数组元素中的回调函数
trace_cpuhp_exit(cpu, st->state, state, ret);
return ret;
}
/* State transition. Invoke on all instances */
cnt = 0;
hlist_for_each(node, &step->list) {
if (lastp && node == *lastp)
break;
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
ret = cbm(cpu, node);
trace_cpuhp_exit(cpu, st->state, state, ret);
if (ret) {
if (!lastp)
goto err;
*lastp = node;
return ret;
}
cnt++;
}
if (lastp)
*lastp = NULL;
return 0;
err:
/* Rollback the instances if one failed */
cbm = !bringup ? step->startup.multi : step->teardown.multi;
if (!cbm)
return ret;
hlist_for_each(node, &step->list) {
if (!cnt--)
break;
trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
ret = cbm(cpu, node);
trace_cpuhp_exit(cpu, st->state, state, ret);
/*
* Rollback must not fail,
*/
WARN_ON_ONCE(ret);
}
return ret;
}
3.5.4 cpuhp_state的定义
上面的代码中出现过几个状态值,具体如下:
- CPUHP_BRINGUP_CPU
- CPUHP_ONLINE(此为最大值)
在上面的cpu_up()函数中传入的target为CPUHP_ONLINE,由下知改值为最大值,而在_cpu_up()函数中,target变为CPUHP_BRINGUP_CPU,进入到cpuhp_up_callbacks()后,由于st->state是0,是小于传下来的target的,因此会通过一个while循环,每个cpu都遍历所有满足st->state < CPUHP_BRINGUP_CPU,来进行启动其他cpu的一些准备工作。
/*
* CPU-up CPU-down
*
* BP AP BP AP
*
* OFFLINE OFFLINE
* | ^
* v |
* BRINGUP_CPU->AP_OFFLINE BRINGUP_CPU <- AP_IDLE_DEAD (idle thread/play_dead)
* | AP_OFFLINE
* v (IRQ-off) ,---------------^
* AP_ONLNE | (stop_machine)
* | TEARDOWN_CPU <- AP_ONLINE_IDLE
* | ^
* v |
* AP_ACTIVE AP_ACTIVE
*/
enum cpuhp_state {
CPUHP_INVALID = -1,
CPUHP_OFFLINE = 0,
CPUHP_CREATE_THREADS,
CPUHP_PERF_PREPARE,
CPUHP_PERF_X86_PREPARE,
CPUHP_PERF_X86_AMD_UNCORE_PREP,
CPUHP_PERF_BFIN,
CPUHP_PERF_POWER,
CPUHP_PERF_SUPERH,
CPUHP_X86_HPET_DEAD,
CPUHP_X86_APB_DEAD,
CPUHP_X86_MCE_DEAD,
CPUHP_VIRT_NET_DEAD,
CPUHP_SLUB_DEAD,
CPUHP_MM_WRITEBACK_DEAD,
CPUHP_MM_VMSTAT_DEAD,
CPUHP_SOFTIRQ_DEAD,
CPUHP_NET_MVNETA_DEAD,
CPUHP_CPUIDLE_DEAD,
CPUHP_ARM64_FPSIMD_DEAD,
CPUHP_ARM_OMAP_WAKE_DEAD,
CPUHP_IRQ_POLL_DEAD,
CPUHP_BLOCK_SOFTIRQ_DEAD,
CPUHP_ACPI_CPUDRV_DEAD,
CPUHP_S390_PFAULT_DEAD,
CPUHP_BLK_MQ_DEAD,
CPUHP_FS_BUFF_DEAD,
CPUHP_PRINTK_DEAD,
CPUHP_MM_MEMCQ_DEAD,
CPUHP_PERCPU_CNT_DEAD,
CPUHP_RADIX_DEAD,
CPUHP_PAGE_ALLOC_DEAD,
CPUHP_NET_DEV_DEAD,
CPUHP_PCI_XGENE_DEAD,
CPUHP_IOMMU_INTEL_DEAD,
CPUHP_LUSTRE_CFS_DEAD,
CPUHP_WORKQUEUE_PREP,
CPUHP_POWER_NUMA_PREPARE,
CPUHP_HRTIMERS_PREPARE,
CPUHP_PROFILE_PREPARE,
CPUHP_X2APIC_PREPARE,
CPUHP_SMPCFD_PREPARE,
CPUHP_RELAY_PREPARE,
CPUHP_SLAB_PREPARE,
CPUHP_MD_RAID5_PREPARE,
CPUHP_RCUTREE_PREP,
CPUHP_CPUIDLE_COUPLED_PREPARE,
CPUHP_POWERPC_PMAC_PREPARE,
CPUHP_POWERPC_MMU_CTX_PREPARE,
CPUHP_XEN_PREPARE,
CPUHP_XEN_EVTCHN_PREPARE,
CPUHP_ARM_SHMOBILE_SCU_PREPARE,
CPUHP_SH_SH3X_PREPARE,
CPUHP_NET_FLOW_PREPARE,
CPUHP_TOPOLOGY_PREPARE,
CPUHP_NET_IUCV_PREPARE,
CPUHP_ARM_BL_PREPARE,
CPUHP_TRACE_RB_PREPARE,
CPUHP_MM_ZS_PREPARE,
CPUHP_MM_ZSWP_MEM_PREPARE,
CPUHP_MM_ZSWP_POOL_PREPARE,
CPUHP_KVM_PPC_BOOK3S_PREPARE,
CPUHP_ZCOMP_PREPARE,
CPUHP_TIMERS_PREPARE,
CPUHP_MIPS_SOC_PREPARE,
CPUHP_BP_PREPARE_DYN,
CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
CPUHP_BRINGUP_CPU,
CPUHP_AP_IDLE_DEAD,
CPUHP_AP_OFFLINE,
CPUHP_AP_SCHED_STARTING,
CPUHP_AP_RCUTREE_DYING,
CPUHP_AP_IRQ_GIC_STARTING,
CPUHP_AP_IRQ_HIP04_STARTING,
CPUHP_AP_IRQ_ARMADA_XP_STARTING,
CPUHP_AP_IRQ_BCM2836_STARTING,
CPUHP_AP_IRQ_MIPS_GIC_STARTING,
CPUHP_AP_ARM_MVEBU_COHERENCY,
CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
CPUHP_AP_PERF_X86_STARTING,
CPUHP_AP_PERF_X86_AMD_IBS_STARTING,
CPUHP_AP_PERF_X86_CQM_STARTING,
CPUHP_AP_PERF_X86_CSTATE_STARTING,
CPUHP_AP_PERF_XTENSA_STARTING,
CPUHP_AP_PERF_METAG_STARTING,
CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
CPUHP_AP_ARM_VFP_STARTING,
CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
CPUHP_AP_PERF_ARM_ACPI_STARTING,
CPUHP_AP_PERF_ARM_STARTING,
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
CPUHP_AP_JCORE_TIMER_STARTING,
CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_TWD_STARTING,
CPUHP_AP_METAG_TIMER_STARTING,
CPUHP_AP_QCOM_TIMER_STARTING,
CPUHP_AP_ARMADA_TIMER_STARTING,
CPUHP_AP_MARCO_TIMER_STARTING,
CPUHP_AP_MIPS_GIC_TIMER_STARTING,
CPUHP_AP_ARC_TIMER_STARTING,
CPUHP_AP_KVM_STARTING,
CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
CPUHP_AP_KVM_ARM_VGIC_STARTING,
CPUHP_AP_KVM_ARM_TIMER_STARTING,
/* Must be the last timer callback */
CPUHP_AP_DUMMY_TIMER_STARTING,
CPUHP_AP_ARM_XEN_STARTING,
CPUHP_AP_ARM_CORESIGHT_STARTING,
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
CPUHP_AP_X86_TBOOT_DYING,
CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU,
CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SMPBOOT_THREADS,
CPUHP_AP_X86_VDSO_VMA_ONLINE,
CPUHP_AP_IRQ_AFFINITY_ONLINE,
CPUHP_AP_PERF_ONLINE,
CPUHP_AP_PERF_X86_ONLINE,
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
CPUHP_AP_PERF_X86_RAPL_ONLINE,
CPUHP_AP_PERF_X86_CQM_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
CPUHP_AP_PERF_ARM_CCN_ONLINE,
CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
CPUHP_AP_PERF_ARM_L2X0_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_ACTIVE,
CPUHP_ONLINE,
};
3.5.5 cpuhp_bp_states 的定义
/* Boot processor state steps */
static struct cpuhp_step cpuhp_bp_states[] = {
[CPUHP_OFFLINE] = {
.name = "offline",
.startup.single = NULL,
.teardown.single = NULL,
},
#ifdef CONFIG_SMP
[CPUHP_CREATE_THREADS]= {
.name = "threads:prepare",
.startup.single = smpboot_create_threads,
.teardown.single = NULL,
.cant_stop = true,
},
[CPUHP_PERF_PREPARE] = {
.name = "perf:prepare",
.startup.single = perf_event_init_cpu,
.teardown.single = perf_event_exit_cpu,
},
[CPUHP_WORKQUEUE_PREP] = {
.name = "workqueue:prepare",
.startup.single = workqueue_prepare_cpu,
.teardown.single = NULL,
},
[CPUHP_HRTIMERS_PREPARE] = {
.name = "hrtimers:prepare",
.startup.single = hrtimers_prepare_cpu,
.teardown.single = hrtimers_dead_cpu,
},
[CPUHP_SMPCFD_PREPARE] = {
.name = "smpcfd:prepare",
.startup.single = smpcfd_prepare_cpu,
.teardown.single = smpcfd_dead_cpu,
},
[CPUHP_RELAY_PREPARE] = {
.name = "relay:prepare",
.startup.single = relay_prepare_cpu,
.teardown.single = NULL,
},
[CPUHP_SLAB_PREPARE] = {
.name = "slab:prepare",
.startup.single = slab_prepare_cpu,
.teardown.single = slab_dead_cpu,
},
[CPUHP_RCUTREE_PREP] = {
.name = "RCU/tree:prepare",
.startup.single = rcutree_prepare_cpu,
.teardown.single = rcutree_dead_cpu,
},
/*
* On the tear-down path, timers_dead_cpu() must be invoked
* before blk_mq_queue_reinit_notify() from notify_dead(),
* otherwise a RCU stall occurs.
*/
[CPUHP_TIMERS_PREPARE] = {
.name = "timers:dead",
.startup.single = timers_prepare_cpu,
.teardown.single = timers_dead_cpu,
},
/* Kicks the plugged cpu into life */
[CPUHP_BRINGUP_CPU] = {
.name = "cpu:bringup",
.startup.single = bringup_cpu,
.teardown.single = NULL,
.cant_stop = true,
},
/*
* Handled on controll processor until the plugged processor manages
* this itself.
*/
[CPUHP_TEARDOWN_CPU] = {
.name = "cpu:teardown",
.startup.single = NULL,
.teardown.single = takedown_cpu,
.cant_stop = true,
},
#else
[CPUHP_BRINGUP_CPU] = { },
#endif
};
3.5.6 bringup_cpu的实现
由上分析,当遍历到CPUHP_BRINGUP_CPU元素时,便会调用到bringup_cpu()函数来启动对应的cpu,该函数的主要调用流程如下:
bringup_cpu() -> __cpu_up() -> smp_ops.smp_boot_secondary()
bringup_cpu()的源码:/kernel/cpu.c:435
static int bringup_cpu(unsigned int cpu)
{
struct task_struct *idle = idle_thread_get(cpu);
int ret;
/*
* Some architectures have to walk the irq descriptors to
* setup the vector space for the cpu which comes online.
* Prevent irq alloc/free across the bringup.
*/
irq_lock_sparse();
/* Arch-specific enabling code. */
ret = __cpu_up(cpu, idle);
irq_unlock_sparse();
if (ret)
return ret;
return bringup_wait_for_ap(cpu);
}
__cpu_up()函数源码:/kernel/cpu.c:104
int __cpu_up(unsigned int cpu, struct task_struct *idle)
{
int ret;
if (!smp_ops.smp_boot_secondary)
return -ENOSYS;
/*
* We need to tell the secondary core where to find
* its stack and the page tables.
*/
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
#ifdef CONFIG_ARM_MPU
secondary_data.mpu_rgn_info = &mpu_rgn_info;
#endif
#ifdef CONFIG_MMU
secondary_data.pgdir = virt_to_phys(idmap_pgd);
secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
#endif
sync_cache_w(&secondary_data);
/*
* Now bring the CPU into our world.
*/
ret = smp_ops.smp_boot_secondary(cpu, idle);
if (ret == 0) {
/*
* CPU was successfully started, wait for it
* to come online or time out.
*/
wait_for_completion_timeout(&cpu_running,
msecs_to_jiffies(1000));
if (!cpu_online(cpu)) {
pr_crit("CPU%u: failed to come online\n", cpu);
ret = -EIO;
}
} else {
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
memset(&secondary_data, 0, sizeof(secondary_data));
return ret;
}
smp_boot_secondary()的实现具体是各个芯片厂商需要关心的事了,与具体的硬件相关。
示例代码,/arch/arm/mach-sunxi/platsmp.c
static int sun8i_smp_boot_secondary(unsigned int cpu,
struct task_struct *idle)
{
u32 reg;
if (!(prcm_membase && cpucfg_membase))
return -EFAULT;
spin_lock(&cpu_lock);
/* Set CPU boot address */
//配置bringup_cpu的入口地址
writel(__pa_symbol(secondary_startup),
cpucfg_membase + CPUCFG_PRIVATE0_REG);
/* Assert the CPU core in reset */
writel(0, cpucfg_membase + CPUCFG_CPU_RST_CTRL_REG(cpu));
/* Assert the L1 cache in reset */
reg = readl(cpucfg_membase + CPUCFG_GEN_CTRL_REG);
writel(reg & ~BIT(cpu), cpucfg_membase + CPUCFG_GEN_CTRL_REG);
/* Clear CPU power-off gating */
reg = readl(prcm_membase + PRCM_CPU_PWROFF_REG);
writel(reg & ~BIT(cpu), prcm_membase + PRCM_CPU_PWROFF_REG);
mdelay(1);
/* Deassert the CPU core reset */
writel(3, cpucfg_membase + CPUCFG_CPU_RST_CTRL_REG(cpu));
spin_unlock(&cpu_lock);
return 0;
}
3.5.7 secondary_startup()函数的实现
该函数是一段汇编代码,其源码如下:/arch/arm/kernel/head.S:366
#if defined(CONFIG_SMP)
.text
.arm
ENTRY(secondary_startup_arm)
THUMB( badr r9, 1f ) @ Kernel is entered in ARM.
THUMB( bx r9 ) @ If this is a Thumb-2 kernel,
THUMB( .thumb ) @ switch to Thumb now.
THUMB(1: )
ENTRY(secondary_startup)
/*
* Common entry point for secondary CPUs.
*
* Ensure that we're in SVC mode, and IRQs are disabled. Lookup
* the processor type - there is no need to check the machine type
* as it has already been validated by the primary processor.
*/
ARM_BE8(setend be) @ ensure we are in BE8 mode
#ifdef CONFIG_ARM_VIRT_EXT
bl __hyp_stub_install_secondary
#endif
safe_svcmode_maskall r9
mrc p15, 0, r9, c0, c0 @ get processor id
bl __lookup_processor_type
movs r10, r5 @ invalid processor?
moveq r0, #'p' @ yes, error 'p'
THUMB( it eq ) @ force fixup-able long branch encoding
beq __error_p
/*
* Use the page tables supplied from __cpu_up.
*/
adr r4, __secondary_data
ldmia r4, {r5, r7, r12} @ address to jump to after
sub lr, r4, r5 @ mmu has been enabled
add r3, r7, lr
ldrd r4, [r3, #0] @ get secondary_data.pgdir
ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE:
ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps
ARM_BE8(eor r4, r4, r5) @ without using a temp reg.
ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir
badr lr, __enable_mmu @ return address
mov r13, r12 @ __secondary_switched address
ldr r12, [r10, #PROCINFO_INITFUNC]
add r12, r12, r10 @ initialise processor
@ (return control reg)
ret r12
ENDPROC(secondary_startup)
ENDPROC(secondary_startup_arm)
更多推荐
所有评论(0)