Python自动化脚本A/B测试与实验管理实战
·
A/B测试是数据驱动决策的核心工具,用于比较两个或多个版本的效果差异。本文将介绍如何在Python中实现完整的A/B测试框架,包括实验配置、流量分配、数据收集和统计分析。
什么是A/B测试
A/B测试是一种对照实验方法:
- A组(对照组):保持原有方案
- B组(实验组):使用新方案
- 通过统计方法判断哪个方案效果更好
核心组件设计
实验配置管理
import hashlib
import json
import time
from dataclasses import dataclass, field
from typing import Dict, Any, Callable, Optional
from enum import Enum
import random
class ExperimentType(Enum):
AB = "ab" # 二选一
MVT = "mvt" # 多变量测试
@dataclass
class Experiment:
"""实验配置"""
name: str
experiment_type: ExperimentType
variants: Dict[str, float] # 变体及其流量占比
start_time: float
end_time: Optional[float] = None
metadata: Dict[str, Any] = field(default_factory=dict)
def is_active(self) -> bool:
"""检查实验是否在运行"""
now = time.time()
if now < self.start_time:
return False
if self.end_time and now > self.end_time:
return False
return True
def get_variant(self, user_id: str) -> str:
"""根据用户ID分配变体"""
if not self.is_active():
return "control"
# 使用哈希保证同一用户始终分到同一组
hash_str = f"{self.name}:{user_id}"
hash_value = int(hashlib.md5(hash_str.encode()).hexdigest(), 16)
# 按流量权重分配
boundary = hash_value % 10000 / 10000
cumulative = 0
for variant, weight in self.variants.items():
cumulative += weight
if boundary < cumulative:
return variant
return list(self.variants.keys())[0]
class ExperimentManager:
"""实验管理器"""
def __init__(self, config_file: Optional[str] = None):
self.experiments: Dict[str, Experiment] = {}
self.user_assignments: Dict[str, Dict[str, str]] = {} # user_id -> {exp_name: variant}
self.metrics: Dict[str, Dict[str, list]] = {} # exp_name -> {metric_name: [values]}
if config_file:
self.load_config(config_file)
def load_config(self, config_file: str):
"""从文件加载实验配置"""
with open(config_file, 'r') as f:
configs = json.load(f)
for config in configs:
exp = Experiment(
name=config['name'],
experiment_type=ExperimentType(config['type']),
variants=config['variants'],
start_time=config['start_time'],
end_time=config.get('end_time'),
metadata=config.get('metadata', {})
)
self.experiments[config['name']] = exp
def get_variant(self, user_id: str, exp_name: str) -> str:
"""获取用户在该实验中的变体"""
# 缓存分配结果
if user_id not in self.user_assignments:
self.user_assignments[user_id] = {}
if exp_name not in self.user_assignments[user_id]:
if exp_name in self.experiments:
self.user_assignments[user_id][exp_name] = \
self.experiments[exp_name].get_variant(user_id)
else:
return "control"
return self.user_assignments[user_id][exp_name]
def record_metric(self, user_id: str, exp_name: str, metric_name: str, value: float):
"""记录指标"""
key = f"{exp_name}:{metric_name}"
if key not in self.metrics:
self.metrics[key] = {'values': [], 'variants': []}
variant = self.get_variant(user_id, exp_name)
self.metrics[key]['values'].append(value)
self.metrics[key]['variants'].append(variant)
def get_results(self, exp_name: str) -> Dict[str, Any]:
"""获取实验结果统计"""
results = {}
for key, data in self.metrics.items():
exp, metric = key.split(':', 1)
if exp != exp_name:
continue
variant_values = {}
for value, variant in zip(data['values'], data['variants']):
if variant not in variant_values:
variant_values[variant] = []
variant_values[variant].append(value)
results[metric] = {}
for variant, values in variant_values.items():
results[metric][variant] = {
'count': len(values),
'mean': sum(values) / len(values) if values else 0,
'sum': sum(values)
}
return results
实战案例:推荐算法A/B测试
def recommend_algorithm_a(user_id: str, items: list) -> list:
"""推荐算法A:基于热度的推荐"""
# 返回前10个热门物品
return items[:10] if len(items) > 10 else items
def recommend_algorithm_b(user_id: str, items: list) -> list:
"""推荐算法B:基于用户历史的推荐"""
# 简化版:随机打乱
random.shuffle(items)
return items[:10] if len(items) > 10 else items
class RecommendationSystem:
"""推荐系统A/B测试"""
def __init__(self, exp_manager: ExperimentManager):
self.exp_manager = exp_manager
def recommend(self, user_id: str, items: list) -> tuple:
"""推荐接口"""
variant = self.exp_manager.get_variant(user_id, 'recommendation_algo')
if variant == 'algorithm_b':
result = recommend_algorithm_b(user_id, items)
else:
result = recommend_algorithm_a(user_id, items)
return result, variant
def record_click(self, user_id: str, item_id: str):
"""记录点击"""
self.exp_manager.record_metric(user_id, 'recommendation_algo', 'click', 1)
def record_view(self, user_id: str, item_id: str):
"""记录曝光"""
self.exp_manager.record_metric(user_id, 'recommendation_algo', 'view', 1)
统计分析模块
import math
from typing import List, Tuple
def calculate_statistics(control: List[float], treatment: List[float]) -> Dict[str, float]:
"""计算统计显著性"""
n1, n2 = len(control), len(treatment)
mean1 = sum(control) / n1 if n1 > 0 else 0
mean2 = sum(treatment) / n2 if n2 > 0 else 0
var1 = sum((x - mean1) ** 2 for x in control) / (n1 - 1) if n1 > 1 else 0
var2 = sum((x - mean2) ** 2 for x in treatment) / (n2 - 1) if n2 > 1 else 0
# 合并方差
pooled_se = math.sqrt(var1 / n1 + var2 / n2)
# Z分数
z_score = (mean2 - mean1) / pooled_se if pooled_se > 0 else 0
# 相对提升
lift = (mean2 - mean1) / mean1 * 100 if mean1 > 0 else 0
return {
'control_mean': mean1,
'treatment_mean': mean2,
'lift_percent': lift,
'z_score': z_score,
'significant': abs(z_score) > 1.96 # 95%置信区间
}
总结
本文介绍了Python实现A/B测试的完整方案,包括实验配置、流量分配、数据收集和统计分析。通过这套框架,你可以方便地对各种算法和策略进行对照实验,用数据驱动产品优化决策。
更多推荐
所有评论(0)