【deepseek】Linux上下文切换时间精确测量方案
·
Linux上下文切换时间精确测量方案
1. 方案架构:内核模块 + 微基准测试
2. 内核模块实现(核心)
// ctx_switch.c - 上下文切换时间测量内核模块
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
#include <linux/uaccess.h>
#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/version.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
#include <linux/irqflags.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
#include <linux/cpumask.h>
#define MODULE_NAME "ctx_switch"
#define MAX_SAMPLES 100000
#define MAX_CPUS NR_CPUS
// 上下文切换事件记录
struct ctx_switch_event {
u64 switch_in_ts; // 切换进入时间
u64 switch_out_ts; // 切换离开时间
u64 switch_time_ns; // 切换时间 = switch_in_ts - switch_out_ts
pid_t prev_pid; // 切换出的进程
pid_t next_pid; // 切换入的进程
int cpu; // CPU编号
u32 seq; // 序列号
char prev_comm[TASK_COMM_LEN]; // 进程名
char next_comm[TASK_COMM_LEN];
};
// 每个CPU的统计数据
struct per_cpu_stats {
struct ctx_switch_event events[MAX_SAMPLES];
unsigned int count;
unsigned int head;
unsigned int tail;
spinlock_t lock;
// 统计信息
u64 min_ns;
u64 max_ns;
u64 total_ns;
u64 total_sq_ns; // 平方和,用于计算标准差
// 当前切换状态
u64 last_switch_out_ts;
pid_t last_switch_out_pid;
char last_switch_out_comm[TASK_COMM_LEN];
// 活跃状态
bool measuring;
};
// 全局变量
static struct per_cpu_stats __percpu *cpu_stats;
static struct proc_dir_entry *proc_entry;
static atomic_t module_active = ATOMIC_INIT(0);
static atomic_t test_seq = ATOMIC_INIT(0);
static struct task_struct *measure_threads[MAX_CPUS];
// 获取高精度时间戳
static inline u64 get_ns_timestamp(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
return local_clock();
#else
return sched_clock();
#endif
}
// 获取CPU周期计数(最高精度)
static inline u64 get_cycles(void)
{
#if defined(CONFIG_X86) || defined(CONFIG_X86_64)
unsigned int low, high;
asm volatile("rdtsc" : "=a" (low), "=d" (high));
return ((u64)high << 32) | low;
#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
u64 val;
asm volatile("mrs %0, cntvct_el0" : "=r" (val));
return val;
#else
return get_ns_timestamp();
#endif
}
// 记录上下文切换事件
static void record_ctx_switch(int cpu, pid_t prev_pid, const char *prev_comm,
pid_t next_pid, const char *next_comm,
u64 switch_out_ts, u64 switch_in_ts)
{
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
struct ctx_switch_event *event;
unsigned long flags;
u64 switch_time_ns;
if (!stats || !stats->measuring)
return;
// 计算切换时间
if (switch_in_ts < switch_out_ts) {
// 处理回绕
switch_time_ns = (U64_MAX - switch_out_ts) + switch_in_ts;
} else {
switch_time_ns = switch_in_ts - switch_out_ts;
}
// 过滤异常值(> 1ms的切换时间通常不是正常的上下文切换)
if (switch_time_ns > 1000000) // 1ms
return;
spin_lock_irqsave(&stats->lock, flags);
// 使用环形缓冲区
unsigned int idx = stats->head;
event = &stats->events[idx];
event->switch_out_ts = switch_out_ts;
event->switch_in_ts = switch_in_ts;
event->switch_time_ns = switch_time_ns;
event->prev_pid = prev_pid;
event->next_pid = next_pid;
event->cpu = cpu;
event->seq = atomic_read(&test_seq);
strncpy(event->prev_comm, prev_comm, TASK_COMM_LEN - 1);
strncpy(event->next_comm, next_comm, TASK_COMM_LEN - 1);
event->prev_comm[TASK_COMM_LEN - 1] = '\0';
event->next_comm[TASK_COMM_LEN - 1] = '\0';
// 更新统计
if (stats->count == 0) {
stats->min_ns = switch_time_ns;
stats->max_ns = switch_time_ns;
} else {
if (switch_time_ns < stats->min_ns)
stats->min_ns = switch_time_ns;
if (switch_time_ns > stats->max_ns)
stats->max_ns = switch_time_ns;
}
stats->total_ns += switch_time_ns;
stats->total_sq_ns += switch_time_ns * switch_time_ns;
// 移动头指针
stats->head = (stats->head + 1) % MAX_SAMPLES;
stats->count++;
// 如果缓冲区满,丢弃最旧数据
if (stats->count > MAX_SAMPLES) {
struct ctx_switch_event *old_event = &stats->events[stats->tail];
// 从统计中移除旧数据
stats->total_ns -= old_event->switch_time_ns;
stats->total_sq_ns -= old_event->switch_time_ns * old_event->switch_time_ns;
stats->tail = (stats->tail + 1) % MAX_SAMPLES;
stats->count--;
}
spin_unlock_irqrestore(&stats->lock, flags);
}
// 调度器跟踪点处理(内核4.4+)
#if defined(CONFIG_TRACEPOINTS) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
#include <trace/events/sched.h>
// 调度切换事件
static void trace_sched_switch_handler(void *ignore,
bool preempt,
struct task_struct *prev,
struct task_struct *next)
{
int cpu = smp_processor_id();
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
u64 now = get_ns_timestamp();
if (!stats || !stats->measuring)
return;
// 记录切换出的时间
stats->last_switch_out_ts = now;
stats->last_switch_out_pid = prev->pid;
strncpy(stats->last_switch_out_comm, prev->comm, TASK_COMM_LEN - 1);
stats->last_switch_out_comm[TASK_COMM_LEN - 1] = '\0';
}
// 唤醒事件(用于记录切换进入)
static void trace_sched_wakeup_handler(void *ignore, struct task_struct *p)
{
int cpu = smp_processor_id();
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
if (!stats || !stats->measuring || stats->last_switch_out_ts == 0)
return;
u64 now = get_ns_timestamp();
// 记录上下文切换
record_ctx_switch(cpu,
stats->last_switch_out_pid,
stats->last_switch_out_comm,
p->pid,
p->comm,
stats->last_switch_out_ts,
now);
// 重置
stats->last_switch_out_ts = 0;
}
#endif
// 创建测量线程(用于主动触发上下文切换)
static int measure_thread_func(void *arg)
{
int cpu = (long)arg;
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
struct sched_param param = { .sched_priority = 99 };
cpumask_t mask;
// 设置最高实时优先级
sched_setscheduler(current, SCHED_FIFO, ¶m);
// 绑定到指定CPU
cpumask_clear(&mask);
cpumask_set_cpu(cpu, &mask);
set_cpus_allowed_ptr(current, &mask);
pr_info("Context switch measurement thread started on CPU %d, PID %d\n",
cpu, current->pid);
stats->measuring = true;
while (!kthread_should_stop()) {
// 主动放弃CPU,触发上下文切换
schedule();
// 短暂睡眠,避免过度占用CPU
msleep(1);
}
stats->measuring = false;
return 0;
}
// 启动测量线程
static int start_measurement(void)
{
int cpu;
atomic_inc(&test_seq);
for_each_online_cpu(cpu) {
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
char name[16];
// 重置统计
spin_lock(&stats->lock);
stats->head = 0;
stats->tail = 0;
stats->count = 0;
stats->min_ns = 0;
stats->max_ns = 0;
stats->total_ns = 0;
stats->total_sq_ns = 0;
stats->last_switch_out_ts = 0;
spin_unlock(&stats->lock);
// 创建测量线程
snprintf(name, sizeof(name), "ctx_meas_%d", cpu);
measure_threads[cpu] = kthread_create(measure_thread_func,
(void *)(long)cpu,
name);
if (!IS_ERR(measure_threads[cpu])) {
wake_up_process(measure_threads[cpu]);
}
}
atomic_set(&module_active, 1);
pr_info("Context switch measurement started\n");
return 0;
}
// 停止测量
static void stop_measurement(void)
{
int cpu;
atomic_set(&module_active, 0);
for_each_online_cpu(cpu) {
if (measure_threads[cpu] && !IS_ERR(measure_threads[cpu])) {
kthread_stop(measure_threads[cpu]);
measure_threads[cpu] = NULL;
}
}
pr_info("Context switch measurement stopped\n");
}
// 计算统计信息
static void calculate_stats(struct per_cpu_stats *stats,
u64 *min_ns, u64 *max_ns, u64 *avg_ns,
u64 *stddev_ns, u64 *p95_ns, u64 *p99_ns)
{
unsigned long flags;
unsigned int count;
u64 *latencies;
if (!stats || stats->count == 0) {
*min_ns = *max_ns = *avg_ns = *stddev_ns = *p95_ns = *p99_ns = 0;
return;
}
// 分配临时数组
latencies = kmalloc(sizeof(u64) * stats->count, GFP_KERNEL);
if (!latencies)
return;
// 复制数据
spin_lock_irqsave(&stats->lock, flags);
count = stats->count;
unsigned int idx = stats->tail;
for (unsigned int i = 0; i < count; i++) {
latencies[i] = stats->events[idx].switch_time_ns;
idx = (idx + 1) % MAX_SAMPLES;
}
*min_ns = stats->min_ns;
*max_ns = stats->max_ns;
*avg_ns = stats->total_ns / count;
// 计算标准差
u64 avg_sq = stats->total_sq_ns / count;
u64 variance = avg_sq - (*avg_ns * *avg_ns);
*stddev_ns = int_sqrt(variance);
spin_unlock_irqrestore(&stats->lock, flags);
// 排序计算百分位数
sort(latencies, count, sizeof(u64), cmp_u64, NULL);
*p95_ns = latencies[count * 95 / 100];
*p99_ns = latencies[count * 99 / 100];
kfree(latencies);
}
// 比较函数
static int cmp_u64(const void *a, const void *b)
{
u64 la = *(const u64 *)a;
u64 lb = *(const u64 *)b;
if (la < lb) return -1;
if (la > lb) return 1;
return 0;
}
// /proc接口
static int proc_show(struct seq_file *m, void *v)
{
int cpu;
seq_printf(m, "=== Linux Context Switch Time Measurement ===\n\n");
seq_printf(m, "Module status: %s\n",
atomic_read(&module_active) ? "Active" : "Inactive");
seq_printf(m, "Test sequence: %d\n\n", atomic_read(&test_seq));
for_each_online_cpu(cpu) {
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
if (stats && stats->count > 0) {
u64 min_ns, max_ns, avg_ns, stddev_ns, p95_ns, p99_ns;
calculate_stats(stats, &min_ns, &max_ns, &avg_ns,
&stddev_ns, &p95_ns, &p99_ns);
seq_printf(m, "CPU %d:\n", cpu);
seq_printf(m, " Samples: %u\n", stats->count);
seq_printf(m, " Min: %llu ns (%.3f us)\n",
min_ns, min_ns / 1000.0);
seq_printf(m, " Max: %llu ns (%.3f us)\n",
max_ns, max_ns / 1000.0);
seq_printf(m, " Average: %llu ns (%.3f us)\n",
avg_ns, avg_ns / 1000.0);
seq_printf(m, " StdDev: %llu ns (%.3f us)\n",
stddev_ns, stddev_ns / 1000.0);
seq_printf(m, " 95th %%ile: %llu ns (%.3f us)\n",
p95_ns, p95_ns / 1000.0);
seq_printf(m, " 99th %%ile: %llu ns (%.3f us)\n",
p99_ns, p99_ns / 1000.0);
seq_printf(m, " Jitter: %llu ns (%.3f us)\n\n",
max_ns - min_ns, (max_ns - min_ns) / 1000.0);
// 显示最近几次切换
seq_printf(m, " Recent switches (last 5):\n");
spin_lock(&stats->lock);
unsigned int idx = (stats->head - 5 + MAX_SAMPLES) % MAX_SAMPLES;
for (int i = 0; i < 5 && i < stats->count; i++) {
struct ctx_switch_event *event = &stats->events[idx];
seq_printf(m, " %s(%d) -> %s(%d): %llu ns\n",
event->prev_comm, event->prev_pid,
event->next_comm, event->next_pid,
event->switch_time_ns);
idx = (idx + 1) % MAX_SAMPLES;
}
spin_unlock(&stats->lock);
seq_printf(m, "\n");
}
}
return 0;
}
static int proc_open(struct inode *inode, struct file *file)
{
return single_open(file, proc_show, NULL);
}
static ssize_t proc_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
char cmd[64];
if (copy_from_user(cmd, buf, min(len, sizeof(cmd)-1)))
return -EFAULT;
cmd[min(len, sizeof(cmd)-1)] = '\0';
if (strncmp(cmd, "start", 5) == 0) {
start_measurement();
} else if (strncmp(cmd, "stop", 4) == 0) {
stop_measurement();
} else if (strncmp(cmd, "reset", 5) == 0) {
int cpu;
for_each_online_cpu(cpu) {
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
if (stats) {
spin_lock(&stats->lock);
stats->head = 0;
stats->tail = 0;
stats->count = 0;
stats->min_ns = 0;
stats->max_ns = 0;
stats->total_ns = 0;
stats->total_sq_ns = 0;
spin_unlock(&stats->lock);
}
}
pr_info("Statistics reset\n");
}
return len;
}
static const struct proc_ops proc_fops = {
.proc_open = proc_open,
.proc_read = seq_read,
.proc_write = proc_write,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
// 模块初始化
static int __init ctx_switch_init(void)
{
int cpu;
pr_info("Context Switch Time Measurement Module Loading\n");
// 分配per-CPU数据
cpu_stats = alloc_percpu(struct per_cpu_stats);
if (!cpu_stats)
return -ENOMEM;
// 初始化per-CPU数据
for_each_online_cpu(cpu) {
struct per_cpu_stats *stats = per_cpu_ptr(cpu_stats, cpu);
spin_lock_init(&stats->lock);
stats->head = 0;
stats->tail = 0;
stats->count = 0;
stats->min_ns = 0;
stats->max_ns = 0;
stats->total_ns = 0;
stats->total_sq_ns = 0;
stats->last_switch_out_ts = 0;
stats->measuring = false;
measure_threads[cpu] = NULL;
}
// 创建/proc接口
proc_entry = proc_create(MODULE_NAME, 0666, NULL, &proc_fops);
if (!proc_entry) {
free_percpu(cpu_stats);
return -ENOMEM;
}
pr_info("Context Switch Time Measurement Module Ready\n");
pr_info("Usage:\n");
pr_info(" echo start > /proc/%s # Start measurement\n", MODULE_NAME);
pr_info(" echo stop > /proc/%s # Stop measurement\n", MODULE_NAME);
pr_info(" echo reset > /proc/%s # Reset statistics\n", MODULE_NAME);
pr_info(" cat /proc/%s # View results\n", MODULE_NAME);
return 0;
}
static void __exit ctx_switch_exit(void)
{
stop_measurement();
if (proc_entry)
proc_remove(proc_entry);
if (cpu_stats)
free_percpu(cpu_stats);
pr_info("Context Switch Time Measurement Module Unloaded\n");
}
module_init(ctx_switch_init);
module_exit(ctx_switch_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Embedded Engineer");
MODULE_DESCRIPTION("Precise Linux Context Switch Time Measurement");
3. 用户空间微基准测试
// ctx_benchmark.c - 上下文切换基准测试
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>
#include <time.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <sys/resource.h>
#include <sys/mman.h>
#include <errno.h>
#include <math.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/stat.h>
#define DEFAULT_ITERATIONS 100000
#define DEFAULT_THREADS 2
#define DEFAULT_PRIORITY 99
#define PAGE_SIZE 4096
// 测试结果
struct test_results {
double min_us;
double max_us;
double avg_us;
double stddev_us;
double p95_us;
double p99_us;
double p999_us;
unsigned long *latencies_ns;
unsigned int samples;
unsigned int iterations;
};
// 共享数据(避免false sharing)
struct thread_data {
pthread_t thread_id;
int thread_num;
int cpu_id;
volatile int ready;
volatile int go;
volatile int stop;
unsigned long *latencies;
unsigned int count;
pthread_barrier_t *barrier;
pthread_mutex_t *mutex;
pthread_cond_t *cond;
char padding[64]; // 缓存行填充
} __attribute__((aligned(64)));
// 全局变量
static volatile int global_stop = 0;
static struct test_results global_results;
// 获取纳秒时间戳
static inline long long get_ns_timestamp(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
// 绑定到特定CPU
static void bind_to_cpu(int cpu_id)
{
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu_id, &cpuset);
if (pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset) != 0) {
perror("pthread_setaffinity_np");
}
}
// 设置实时优先级
static void set_realtime_priority(int priority)
{
struct sched_param param;
param.sched_priority = priority;
if (pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m) != 0) {
if (errno == EPERM) {
fprintf(stderr, "Warning: Need root for SCHED_FIFO priority %d\n", priority);
} else {
perror("pthread_setschedparam");
}
}
}
// 测试方法1:使用管道进行进程间上下文切换
static void *pipe_ctx_switch_test(void *arg)
{
struct thread_data *data = (struct thread_data *)arg;
int pipefd[2];
char buffer = 'X';
long long start_ns, end_ns;
unsigned int idx = 0;
// 创建管道
if (pipe(pipefd) == -1) {
perror("pipe");
return NULL;
}
// 设置实时优先级
set_realtime_priority(DEFAULT_PRIORITY);
// 绑定到CPU
bind_to_cpu(data->cpu_id);
// 等待所有线程就绪
pthread_barrier_wait(data->barrier);
while (!data->stop && idx < data->count) {
// 写入管道(触发上下文切换)
start_ns = get_ns_timestamp();
write(pipefd[1], &buffer, 1);
// 读取管道(等待另一个进程)
read(pipefd[0], &buffer, 1);
end_ns = get_ns_timestamp();
// 记录延迟(两次切换)
data->latencies[idx] = (end_ns - start_ns) / 2;
idx++;
}
close(pipefd[0]);
close(pipefd[1]);
return NULL;
}
// 测试方法2:使用互斥锁进行线程间上下文切换
static void *mutex_ctx_switch_test(void *arg)
{
struct thread_data *data = (struct thread_data *)arg;
long long start_ns, end_ns;
unsigned int idx = 0;
set_realtime_priority(DEFAULT_PRIORITY);
bind_to_cpu(data->cpu_id);
pthread_barrier_wait(data->barrier);
while (!data->stop && idx < data->count) {
pthread_mutex_lock(data->mutex);
// 记录获取锁的时间
start_ns = get_ns_timestamp();
// 通知另一个线程
data->ready = 1;
// 等待另一个线程获取锁
while (!data->go && !data->stop) {
sched_yield();
}
data->go = 0;
pthread_mutex_unlock(data->mutex);
// 记录释放锁的时间
end_ns = get_ns_timestamp();
data->latencies[idx] = end_ns - start_ns;
idx++;
}
return NULL;
}
// 测试方法3:使用条件变量
static void *condvar_ctx_switch_test(void *arg)
{
struct thread_data *data = (struct thread_data *)arg;
long long start_ns, end_ns;
unsigned int idx = 0;
set_realtime_priority(DEFAULT_PRIORITY);
bind_to_cpu(data->cpu_id);
pthread_barrier_wait(data->barrier);
while (!data->stop && idx < data->count) {
pthread_mutex_lock(data->mutex);
// 等待条件变量
start_ns = get_ns_timestamp();
pthread_cond_wait(data->cond, data->mutex);
end_ns = get_ns_timestamp();
pthread_mutex_unlock(data->mutex);
data->latencies[idx] = end_ns - start_ns;
idx++;
// 通知另一个线程
usleep(1);
}
return NULL;
}
// 测试方法4:使用sched_yield主动放弃CPU
static void *yield_ctx_switch_test(void *arg)
{
struct thread_data *data = (struct thread_data *)arg;
long long start_ns, end_ns;
unsigned int idx = 0;
set_realtime_priority(DEFAULT_PRIORITY);
bind_to_cpu(data->cpu_id);
pthread_barrier_wait(data->barrier);
while (!data->stop && idx < data->count) {
start_ns = get_ns_timestamp();
sched_yield();
end_ns = get_ns_timestamp();
data->latencies[idx] = end_ns - start_ns;
idx++;
// 避免过于频繁
usleep(10);
}
return NULL;
}
// 计算统计信息
static void calculate_statistics(struct test_results *results)
{
unsigned long long sum = 0, sum_sq = 0;
unsigned long min = ULONG_MAX, max = 0;
if (results->samples == 0)
return;
// 第一遍:基本统计
for (unsigned int i = 0; i < results->samples; i++) {
unsigned long latency = results->latencies_ns[i];
sum += latency;
sum_sq += latency * latency;
if (latency < min) min = latency;
if (latency > max) max = latency;
}
results->min_us = min / 1000.0;
results->max_us = max / 1000.0;
results->avg_us = (sum / results->samples) / 1000.0;
// 计算标准差
double avg_ns = (double)sum / results->samples;
double variance = ((double)sum_sq / results->samples) - (avg_ns * avg_ns);
results->stddev_us = sqrt(variance) / 1000.0;
// 排序计算百分位数
qsort(results->latencies_ns, results->samples,
sizeof(unsigned long), compare_ulong);
results->p95_us = results->latencies_ns[results->samples * 95 / 100] / 1000.0;
results->p99_us = results->latencies_ns[results->samples * 99 / 100] / 1000.0;
results->p999_us = results->latencies_ns[results->samples * 999 / 1000] / 1000.0;
}
// 比较函数
static int compare_ulong(const void *a, const void *b)
{
unsigned long la = *(const unsigned long *)a;
unsigned long lb = *(const unsigned long *)b;
if (la < lb) return -1;
if (la > lb) return 1;
return 0;
}
// 运行上下文切换测试
static int run_ctx_switch_test(int test_type, int iterations, int num_threads)
{
pthread_t *threads;
struct thread_data *thread_data;
pthread_barrier_t barrier;
pthread_mutex_t mutex;
pthread_cond_t cond;
struct timespec start_ts, end_ts;
double elapsed;
printf("\n=== Context Switch Time Benchmark ===\n");
printf("Test type: %d\n", test_type);
printf("Iterations: %d\n", iterations);
printf("Threads: %d\n", num_threads);
printf("Priority: SCHED_FIFO %d\n", DEFAULT_PRIORITY);
// 初始化同步对象
pthread_barrier_init(&barrier, NULL, num_threads + 1);
pthread_mutex_init(&mutex, NULL);
pthread_cond_init(&cond, NULL);
// 分配线程数据
threads = calloc(num_threads, sizeof(pthread_t));
thread_data = calloc(num_threads, sizeof(struct thread_data));
// 分配结果数组
global_results.samples = iterations;
global_results.iterations = iterations;
global_results.latencies_ns = calloc(iterations, sizeof(unsigned long));
// 初始化线程数据
for (int i = 0; i < num_threads; i++) {
thread_data[i].thread_num = i;
thread_data[i].cpu_id = i % sysconf(_SC_NPROCESSORS_ONLN);
thread_data[i].ready = 0;
thread_data[i].go = 0;
thread_data[i].stop = 0;
thread_data[i].latencies = global_results.latencies_ns + (i * iterations / num_threads);
thread_data[i].count = iterations / num_threads;
thread_data[i].barrier = &barrier;
thread_data[i].mutex = &mutex;
thread_data[i].cond = &cond;
}
// 创建线程
for (int i = 0; i < num_threads; i++) {
void *(*thread_func)(void *) = NULL;
switch (test_type) {
case 1: thread_func = pipe_ctx_switch_test; break;
case 2: thread_func = mutex_ctx_switch_test; break;
case 3: thread_func = condvar_ctx_switch_test; break;
case 4: thread_func = yield_ctx_switch_test; break;
default:
fprintf(stderr, "Invalid test type\n");
goto cleanup;
}
if (pthread_create(&threads[i], NULL, thread_func, &thread_data[i]) != 0) {
perror("pthread_create");
goto cleanup;
}
}
// 等待所有线程就绪
pthread_barrier_wait(&barrier);
// 开始计时
clock_gettime(CLOCK_MONOTONIC, &start_ts);
// 运行测试
sleep(1); // 让测试运行一段时间
// 停止测试
global_stop = 1;
for (int i = 0; i < num_threads; i++) {
thread_data[i].stop = 1;
}
// 等待线程结束
for (int i = 0; i < num_threads; i++) {
pthread_join(threads[i], NULL);
}
// 结束计时
clock_gettime(CLOCK_MONOTONIC, &end_ts);
elapsed = (end_ts.tv_sec - start_ts.tv_sec) +
(end_ts.tv_nsec - start_ts.tv_nsec) / 1e9;
// 计算统计
calculate_statistics(&global_results);
// 输出结果
printf("\n=== Test Results ===\n");
printf("Duration: %.2f seconds\n", elapsed);
printf("Samples: %u\n", global_results.samples);
printf("Throughput: %.1f switches/sec\n", global_results.samples / elapsed);
printf("\nContext Switch Time:\n");
printf(" Minimum: %8.3f us\n", global_results.min_us);
printf(" Maximum: %8.3f us\n", global_results.max_us);
printf(" Average: %8.3f us\n", global_results.avg_us);
printf(" Std Deviation:%8.3f us\n", global_results.stddev_us);
printf(" 95th %%ile: %8.3f us\n", global_results.p95_us);
printf(" 99th %%ile: %8.3f us\n", global_results.p99_us);
printf(" 99.9th %%ile: %8.3f us\n", global_results.p999_us);
printf(" Jitter: %8.3f us\n", global_results.max_us - global_results.min_us);
// 保存结果
FILE *fp = fopen("ctx_switch_results.csv", "a");
if (fp) {
fprintf(fp, "%d,%d,%d,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n",
test_type, iterations, num_threads,
global_results.min_us, global_results.max_us, global_results.avg_us,
global_results.stddev_us, global_results.p95_us, global_results.p99_us,
global_results.p999_us, global_results.samples / elapsed);
fclose(fp);
}
cleanup:
// 清理
pthread_barrier_destroy(&barrier);
pthread_mutex_destroy(&mutex);
pthread_cond_destroy(&cond);
free(threads);
free(thread_data);
free(global_results.latencies_ns);
return 0;
}
// 信号处理
static void signal_handler(int sig)
{
global_stop = 1;
printf("\nTest interrupted by signal %d\n", sig);
}
int main(int argc, char *argv[])
{
int test_type = 1; // 1=pipe, 2=mutex, 3=condvar, 4=yield
int iterations = DEFAULT_ITERATIONS;
int num_threads = DEFAULT_THREADS;
// 设置信号处理
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
// 解析参数
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-t") == 0 && i+1 < argc) {
test_type = atoi(argv[++i]);
} else if (strcmp(argv[i], "-n") == 0 && i+1 < argc) {
iterations = atoi(argv[++i]);
} else if (strcmp(argv[i], "-p") == 0 && i+1 < argc) {
num_threads = atoi(argv[++i]);
} else if (strcmp(argv[i], "-h") == 0) {
printf("Usage: %s [options]\n", argv[0]);
printf("Options:\n");
printf(" -t <type> Test type: 1=pipe, 2=mutex, 3=condvar, 4=yield (default: 1)\n");
printf(" -n <iter> Number of iterations (default: %d)\n", DEFAULT_ITERATIONS);
printf(" -p <threads> Number of threads (default: %d)\n", DEFAULT_THREADS);
printf(" -h Show this help\n");
return 0;
}
}
// 检查root权限
if (geteuid() != 0) {
printf("Warning: Need root for real-time scheduling\n");
printf("Running with normal priority...\n");
}
return run_ctx_switch_test(test_type, iterations, num_threads);
}
4. Shell脚本自动化测试
#!/bin/bash
# ctx_switch_bench.sh - 上下文切换时间自动化测试
ITERATIONS=100000
THREADS=2
OUTPUT_FILE="ctx_switch_results.txt"
CSV_FILE="ctx_switch_summary.csv"
echo "Linux Context Switch Time Benchmark"
echo "=================================="
# 1. 编译测试程序
echo -e "\n1. Compiling test programs..."
cat > ctx_benchmark.c << 'EOF'
// 简化的测试程序
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <time.h>
#include <sched.h>
#include <unistd.h>
#define ITERATIONS 100000
long long get_ns() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return ts.tv_sec * 1000000000LL + ts.tv_nsec;
}
void *test_thread(void *arg) {
long long *results = (long long *)arg;
int count = ITERATIONS / 2;
// 设置实时优先级
struct sched_param param = { .sched_priority = 99 };
sched_setscheduler(0, SCHED_FIFO, ¶m);
for (int i = 0; i < count; i++) {
long long start = get_ns();
sched_yield();
long long end = get_ns();
results[i] = end - start;
usleep(10);
}
return NULL;
}
int main() {
pthread_t thread1, thread2;
long long results1[ITERATIONS/2], results2[ITERATIONS/2];
long long sum = 0, min = 1e9, max = 0;
printf("Starting context switch test...\n");
// 创建测试线程
pthread_create(&thread1, NULL, test_thread, results1);
pthread_create(&thread2, NULL, test_thread, results2);
// 等待线程完成
pthread_join(thread1, NULL);
pthread_join(thread2, NULL);
// 合并结果
for (int i = 0; i < ITERATIONS/2; i++) {
long long val = results1[i];
sum += val;
if (val < min) min = val;
if (val > max) max = val;
val = results2[i];
sum += val;
if (val < min) min = val;
if (val > max) max = val;
}
double avg = (double)sum / ITERATIONS;
printf("\nResults (2 threads, %d iterations):\n", ITERATIONS);
printf(" Min: %.3f us\n", min / 1000.0);
printf(" Max: %.3f us\n", max / 1000.0);
printf(" Average: %.3f us\n", avg / 1000.0);
printf(" Jitter: %.3f us\n", (max - min) / 1000.0);
return 0;
}
EOF
gcc -O2 -o ctx_benchmark ctx_benchmark.c -lpthread -lrt
# 2. 运行基本测试
echo -e "\n2. Running basic context switch test..."
sudo ./ctx_benchmark 2>&1 | tee $OUTPUT_FILE
# 3. 使用lmbench的lat_ctx测试
echo -e "\n3. Running lmbench lat_ctx (if available)..."
if command -v lat_ctx &> /dev/null; then
echo "Testing with 2 processes..."
lat_ctx -P 1 -s 0 2 2>&1 | grep -A5 "size"
echo -e "\nTesting with varying process counts..."
for procs in 2 4 8 16 32; do
echo -n "Processes $procs: "
lat_ctx -P 1 -s 0 $procs 2>&1 | tail -1
done
fi
# 4. 使用perf stat测量上下文切换率
echo -e "\n4. Measuring context switch rate..."
echo "Running perf stat for 10 seconds..."
sudo perf stat -e context-switches,cpu-migrations -a sleep 10 2>&1 | grep -E "(context-switches|cpu-migrations|seconds)"
# 5. 不同负载下的测试
echo -e "\n5. Testing under different loads..."
# 空闲系统
echo -e "\nA. Idle system:"
sudo ./ctx_benchmark 2>&1 | tail -5
# CPU负载
echo -e "\nB. Under CPU load:"
stress-ng --cpu 4 --timeout 30s --quiet &
sleep 2
sudo ./ctx_benchmark 2>&1 | tail -5
killall stress-ng 2>/dev/null
# IO负载
echo -e "\nC. Under IO load:"
stress-ng --io 4 --timeout 30s --quiet &
sleep 2
sudo ./ctx_benchmark 2>&1 | tail -5
killall stress-ng 2>/dev/null
# 内存负载
echo -e "\nD. Under memory load:"
stress-ng --vm 2 --vm-bytes 512M --timeout 30s --quiet &
sleep 2
sudo ./ctx_benchmark 2>&1 | tail -5
killall stress-ng 2>/dev/null
# 6. 不同调度策略
echo -e "\n6. Testing different scheduling policies..."
for policy in "SCHED_OTHER" "SCHED_RR" "SCHED_FIFO"; do
echo -e "\n$policy:"
case $policy in
"SCHED_OTHER")
sudo chrt -o 0 ./ctx_benchmark 2>&1 | tail -5
;;
"SCHED_RR")
sudo chrt -r 50 ./ctx_benchmark 2>&1 | tail -5
;;
"SCHED_FIFO")
sudo chrt -f 99 ./ctx_benchmark 2>&1 | tail -5
;;
esac
done
# 7. 生成报告
echo -e "\n7. Generating summary report..."
echo "Test Configuration:" > $CSV_FILE
echo "Date,Test,Iterations,Threads,Min(us),Max(us),Avg(us),Jitter(us)" >> $CSV_FILE
if [ -f $OUTPUT_FILE ]; then
# 提取关键指标
MIN_US=$(grep "Min:" $OUTPUT_FILE | awk '{print $2}')
MAX_US=$(grep "Max:" $OUTPUT_FILE | awk '{print $2}')
AVG_US=$(grep "Average:" $OUTPUT_FILE | awk '{print $2}')
JITTER_US=$(grep "Jitter:" $OUTPUT_FILE | awk '{print $2}')
echo "$(date),Basic,$ITERATIONS,$THREADS,$MIN_US,$MAX_US,$AVG_US,$JITTER_US" >> $CSV_FILE
echo "Summary saved to $CSV_FILE"
fi
# 8. 使用内核模块(如果可用)
echo -e "\n8. Using kernel module (if loaded)..."
if [ -f /proc/ctx_switch ]; then
echo "Starting measurement..."
echo "start" | sudo tee /proc/ctx_switch > /dev/null
sleep 5
echo "Results:"
sudo cat /proc/ctx_switch | head -20
echo "stop" | sudo tee /proc/ctx_switch > /dev/null
else
echo "Kernel module not loaded. To load: sudo insmod ctx_switch.ko"
fi
echo -e "\nTest complete! Results saved to $OUTPUT_FILE and $CSV_FILE"
5. 构建和使用
5.1 Makefile
# Makefile for context switch measurement
obj-m += ctx_switch.o
KDIR := /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
CFLAGS_user := -O2 -Wall -lpthread -lrt
all: kernel user
kernel:
$(MAKE) -C $(KDIR) M=$(PWD) modules
user:
gcc $(CFLAGS_user) -o ctx_benchmark ctx_benchmark.c
gcc $(CFLAGS_user) -o ctx_switch_test ctx_switch_test.c
clean:
$(MAKE) -C $(KDIR) M=$(PWD) clean
rm -f ctx_benchmark ctx_switch_test *.o
install:
sudo insmod ctx_switch.ko
sudo chmod 666 /proc/ctx_switch
uninstall:
sudo rmmod ctx_switch
test:
sudo ./ctx_benchmark
sudo ./ctx_switch_test -t 1 -n 10000
.PHONY: all kernel user clean install uninstall test
5.2 使用步骤
# 1. 编译
make
# 2. 加载内核模块
sudo insmod ctx_switch.ko
# 3. 开始测量
echo "start" | sudo tee /proc/ctx_switch
# 4. 运行负载测试
stress-ng --cpu 4 --io 2 &
# 5. 查看结果
cat /proc/ctx_switch
# 6. 停止测量
echo "stop" | sudo tee /proc/ctx_switch
# 7. 运行用户空间测试
sudo ./ctx_benchmark
sudo ./ctx_switch_test -t 1 -n 100000 -p 4
# 8. 运行自动化测试
chmod +x ctx_switch_bench.sh
sudo ./ctx_switch_bench.sh
6. 专业工具和方法
6.1 使用lmbench
# 安装lmbench
sudo apt-get install lmbench
# 测试上下文切换时间
lat_ctx -s 0 2 # 2个进程
lat_ctx -s 0 4 # 4个进程
lat_ctx -s 0 8 # 8个进程
# 测试不同大小的进程
lat_ctx -s 64k 2 # 64KB数据大小
lat_ctx -s 1m 2 # 1MB数据大小
6.2 使用perf
# 测量上下文切换率
sudo perf stat -e context-switches,cpu-migrations -a sleep 10
# 跟踪上下文切换事件
sudo perf record -e sched:sched_switch -a sleep 10
sudo perf report
# 测量特定进程
sudo perf stat -e context-switches -p $(pidof my_app) sleep 5
6.3 使用systemtap
# systemtap脚本测量上下文切换时间
cat > ctx_switch.stp << 'EOF'
global switch_time, switch_count
probe scheduler.ctxswitch {
switch_time[execname(), pid()] = gettimeofday_ns()
}
probe scheduler.ctxswitch.return {
if (switch_time[execname(), pid()]) {
delta = gettimeofday_ns() - switch_time[execname(), pid()]
printf("%s (%d): %d ns\n", execname(), pid(), delta)
switch_count++
delete switch_time[execname(), pid()]
}
}
probe end {
printf("\nTotal context switches: %d\n", switch_count)
}
EOF
sudo stap ctx_switch.stp -c "sleep 10"
7. 优化建议
7.1 减少上下文切换时间
# 1. 使用CPU亲和性
taskset -c 0,1 ./my_app # 绑定到CPU 0和1
# 2. 使用实时优先级
chrt -f 99 ./my_app
# 3. 减少进程数量
# 合并功能相似的进程
# 4. 使用线程代替进程
# 线程切换比进程切换更快
# 5. 调整调度器参数
echo 1000000 > /proc/sys/kernel/sched_min_granularity_ns
echo 10000000 > /proc/sys/kernel/sched_wakeup_granularity_ns
# 6. 禁用NUMA平衡
echo 0 > /proc/sys/kernel/numa_balancing
# 7. 使用大页
echo 2048 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
7.2 内核配置优化
# 检查当前配置
grep -E "CONFIG_PREEMPT|CONFIG_HZ|CONFIG_NO_HZ" /boot/config-$(uname -r)
# 推荐配置
CONFIG_PREEMPT=y # 内核抢占
CONFIG_PREEMPT_RT=y # 实时内核(可选)
CONFIG_HZ=1000 # 1000Hz定时器
CONFIG_NO_HZ_IDLE=y # 空闲时停止定时器
CONFIG_NO_HZ_FULL=y # 完全无滴答(减少中断)
8. 结果解读指南
8.1 性能等级参考
< 1us : 优秀(缓存命中,CPU亲和性好)
1-5us : 良好(正常Linux系统)
5-20us : 一般(可能有调度器压力)
20-50us : 较差(系统负载高)
> 50us : 差(需要优化)
8.2 关键影响因素
- CPU缓存状态:L1/L2/L3缓存命中率
- TLB状态:页表缓存命中率
- 调度器开销:调度决策时间
- 内存访问模式:局部性原理
- 系统负载:其他进程的干扰
8.3 典型测试场景结果
# 测试结果示例
=== Context Switch Time Results ===
Scenario 1: Idle system, 2 threads, SCHED_FIFO
----------------------------------------------
Min: 0.8 us # 最佳情况
Average: 1.2 us # 典型值
P99: 2.8 us # 99%的切换时间
Max: 5.6 us # 最坏情况
Jitter: 4.8 us # 变化范围
Scenario 2: Under CPU load (4 cores 100%)
----------------------------------------------
Min: 1.2 us
Average: 3.8 us
P99: 12.4 us
Max: 25.6 us
Jitter: 24.4 us
Scenario 3: Different process counts (lmbench)
----------------------------------------------
2 processes: 1.5 us
4 processes: 2.1 us
8 processes: 3.8 us
16 processes: 7.2 us
32 processes: 14.5 us
9. 综合评估脚本
#!/bin/bash
# context_switch_eval.sh - 上下文切换综合评估
echo "Context Switch Time Comprehensive Evaluation"
echo "=========================================="
# 1. 系统信息
echo -e "\n1. System Information:"
echo "Kernel: $(uname -r)"
echo "CPU: $(grep "model name" /proc/cpuinfo | head -1 | cut -d: -f2)"
echo "Cores: $(grep -c "^processor" /proc/cpuinfo)"
echo "Memory: $(free -h | grep Mem | awk '{print $2}')"
# 2. 当前上下文切换率
echo -e "\n2. Current Context Switch Rate:"
sudo vmstat 1 5 | tail -4 | awk '{print $12 " cs/sec"}'
# 3. 使用不同工具测试
echo -e "\n3. Cross-Validation with Multiple Tools:"
# 使用lmbench
if command -v lat_ctx &> /dev/null; then
echo -e "\nA. lmbench lat_ctx:"
for procs in 2 4 8; do
echo -n " $procs processes: "
lat_ctx -s 0 $procs 2>&1 | tail -1 | awk '{print $2 " us"}'
done
fi
# 使用perf
echo -e "\nB. perf stat (10 seconds):"
sudo perf stat -e context-switches,cpu-migrations -a sleep 10 2>&1 | \
grep -E "(context-switches|cpu-migrations|seconds)" | \
awk '{printf " %s: %s\n", $2, $1}'
# 4. 不同负载测试
echo -e "\n4. Load Impact Analysis:"
load_tests=("idle" "cpu" "io" "memory")
for test in "${load_tests[@]}"; do
echo -e "\n $test load:"
case $test in
"cpu")
stress-ng --cpu 4 --timeout 20s --quiet &
LOAD_PID=$!
sleep 2
;;
"io")
stress-ng --io 4 --timeout 20s --quiet &
LOAD_PID=$!
sleep 2
;;
"memory")
stress-ng --vm 2 --vm-bytes 512M --timeout 20s --quiet &
LOAD_PID=$!
sleep 2
;;
esac
# 运行测试
if [ -f ./ctx_benchmark ]; then
sudo ./ctx_benchmark 2>&1 | grep -E "(Min:|Max:|Average:|Jitter:)" | \
while read line; do echo " $line"; done
fi
# 清理负载
if [ ! -z "$LOAD_PID" ]; then
kill $LOAD_PID 2>/dev/null
wait $LOAD_PID 2>/dev/null
fi
done
# 5. 优化建议
echo -e "\n5. Optimization Recommendations:"
# 检查当前配置
echo -e "\n Current configuration:"
echo -n " Kernel preemption: "
if grep -q "CONFIG_PREEMPT=y" /boot/config-$(uname -r) 2>/dev/null; then
echo "Enabled"
else
echo "Disabled (consider enabling)"
fi
echo -n " HZ: "
grep "CONFIG_HZ=" /boot/config-$(uname -r) 2>/dev/null | cut -d= -f2
echo -n " CPU isolation: "
if grep -q "isolcpus" /proc/cmdline; then
echo "Enabled"
else
echo "Disabled (consider isolating CPUs)"
fi
# 6. 生成报告
echo -e "\n6. Summary Report:"
echo "=================="
echo "For real-time applications:"
echo "- Target context switch time: < 10us"
echo "- Monitor P99 latency (not just average)"
echo "- Consider using CPU isolation"
echo "- Use SCHED_FIFO for critical tasks"
echo "- Monitor system load regularly"
echo "- Consider PREEMPT_RT for hard real-time"
echo -e "\nEvaluation complete!"
总结
这个完整的上下文切换时间测量方案提供:
核心特性
- 多种测量方法:管道、互斥锁、条件变量、yield
- 内核模块:精确跟踪调度器事件
- 用户空间工具:灵活的基准测试
- 自动化脚本:一键测试和分析
- 专业工具集成:lmbench、perf、systemtap
关键指标
- 平均切换时间:系统典型性能
- P99/P999延迟:最坏情况保障
- 抖动:性能稳定性
- 不同负载下的表现:真实场景评估
适用场景
- 实时系统评估:验证是否满足实时性要求
- 性能调优:识别上下文切换瓶颈
- 系统比较:不同硬件/内核配置对比
- 容量规划:确定系统能支持的并发任务数
最佳实践
- 组合使用多种工具进行交叉验证
- 测试不同负载场景(空闲、CPU、IO、内存)
- 关注百分位数而不仅是平均值
- 监控长期趋势而不仅是单次测试
- 结合业务场景设定合理的目标值
通过这个方案,您可以全面、精确地测量Linux系统的上下文切换时间,为系统优化和实时性评估提供可靠数据支持。
更多推荐



所有评论(0)