A/B测试是数据驱动决策的核心工具,用于比较两个或多个版本的效果差异。本文将介绍如何在Python中实现完整的A/B测试框架,包括实验配置、流量分配、数据收集和统计分析。

什么是A/B测试

A/B测试是一种对照实验方法:

  • A组(对照组):保持原有方案
  • B组(实验组):使用新方案
  • 通过统计方法判断哪个方案效果更好

核心组件设计

实验配置管理

import hashlib
import json
import time
from dataclasses import dataclass, field
from typing import Dict, Any, Callable, Optional
from enum import Enum
import random

class ExperimentType(Enum):
    AB = "ab"  # 二选一
    MVT = "mvt"  # 多变量测试

@dataclass
class Experiment:
    """实验配置"""
    name: str
    experiment_type: ExperimentType
    variants: Dict[str, float]  # 变体及其流量占比
    start_time: float
    end_time: Optional[float] = None
    metadata: Dict[str, Any] = field(default_factory=dict)
    
    def is_active(self) -> bool:
        """检查实验是否在运行"""
        now = time.time()
        if now < self.start_time:
            return False
        if self.end_time and now > self.end_time:
            return False
        return True
    
    def get_variant(self, user_id: str) -> str:
        """根据用户ID分配变体"""
        if not self.is_active():
            return "control"
        
        # 使用哈希保证同一用户始终分到同一组
        hash_str = f"{self.name}:{user_id}"
        hash_value = int(hashlib.md5(hash_str.encode()).hexdigest(), 16)
        
        # 按流量权重分配
        boundary = hash_value % 10000 / 10000
        cumulative = 0
        
        for variant, weight in self.variants.items():
            cumulative += weight
            if boundary < cumulative:
                return variant
        
        return list(self.variants.keys())[0]

class ExperimentManager:
    """实验管理器"""
    
    def __init__(self, config_file: Optional[str] = None):
        self.experiments: Dict[str, Experiment] = {}
        self.user_assignments: Dict[str, Dict[str, str]] = {}  # user_id -> {exp_name: variant}
        self.metrics: Dict[str, Dict[str, list]] = {}  # exp_name -> {metric_name: [values]}
        
        if config_file:
            self.load_config(config_file)
    
    def load_config(self, config_file: str):
        """从文件加载实验配置"""
        with open(config_file, 'r') as f:
            configs = json.load(f)
        
        for config in configs:
            exp = Experiment(
                name=config['name'],
                experiment_type=ExperimentType(config['type']),
                variants=config['variants'],
                start_time=config['start_time'],
                end_time=config.get('end_time'),
                metadata=config.get('metadata', {})
            )
            self.experiments[config['name']] = exp
    
    def get_variant(self, user_id: str, exp_name: str) -> str:
        """获取用户在该实验中的变体"""
        # 缓存分配结果
        if user_id not in self.user_assignments:
            self.user_assignments[user_id] = {}
        
        if exp_name not in self.user_assignments[user_id]:
            if exp_name in self.experiments:
                self.user_assignments[user_id][exp_name] = \
                    self.experiments[exp_name].get_variant(user_id)
            else:
                return "control"
        
        return self.user_assignments[user_id][exp_name]
    
    def record_metric(self, user_id: str, exp_name: str, metric_name: str, value: float):
        """记录指标"""
        key = f"{exp_name}:{metric_name}"
        if key not in self.metrics:
            self.metrics[key] = {'values': [], 'variants': []}
        
        variant = self.get_variant(user_id, exp_name)
        self.metrics[key]['values'].append(value)
        self.metrics[key]['variants'].append(variant)
    
    def get_results(self, exp_name: str) -> Dict[str, Any]:
        """获取实验结果统计"""
        results = {}
        
        for key, data in self.metrics.items():
            exp, metric = key.split(':', 1)
            if exp != exp_name:
                continue
            
            variant_values = {}
            for value, variant in zip(data['values'], data['variants']):
                if variant not in variant_values:
                    variant_values[variant] = []
                variant_values[variant].append(value)
            
            results[metric] = {}
            for variant, values in variant_values.items():
                results[metric][variant] = {
                    'count': len(values),
                    'mean': sum(values) / len(values) if values else 0,
                    'sum': sum(values)
                }
        
        return results

实战案例:推荐算法A/B测试

def recommend_algorithm_a(user_id: str, items: list) -> list:
    """推荐算法A:基于热度的推荐"""
    # 返回前10个热门物品
    return items[:10] if len(items) > 10 else items

def recommend_algorithm_b(user_id: str, items: list) -> list:
    """推荐算法B:基于用户历史的推荐"""
    # 简化版:随机打乱
    random.shuffle(items)
    return items[:10] if len(items) > 10 else items

class RecommendationSystem:
    """推荐系统A/B测试"""
    
    def __init__(self, exp_manager: ExperimentManager):
        self.exp_manager = exp_manager
    
    def recommend(self, user_id: str, items: list) -> tuple:
        """推荐接口"""
        variant = self.exp_manager.get_variant(user_id, 'recommendation_algo')
        
        if variant == 'algorithm_b':
            result = recommend_algorithm_b(user_id, items)
        else:
            result = recommend_algorithm_a(user_id, items)
        
        return result, variant
    
    def record_click(self, user_id: str, item_id: str):
        """记录点击"""
        self.exp_manager.record_metric(user_id, 'recommendation_algo', 'click', 1)
    
    def record_view(self, user_id: str, item_id: str):
        """记录曝光"""
        self.exp_manager.record_metric(user_id, 'recommendation_algo', 'view', 1)

统计分析模块

import math
from typing import List, Tuple

def calculate_statistics(control: List[float], treatment: List[float]) -> Dict[str, float]:
    """计算统计显著性"""
    n1, n2 = len(control), len(treatment)
    
    mean1 = sum(control) / n1 if n1 > 0 else 0
    mean2 = sum(treatment) / n2 if n2 > 0 else 0
    
    var1 = sum((x - mean1) ** 2 for x in control) / (n1 - 1) if n1 > 1 else 0
    var2 = sum((x - mean2) ** 2 for x in treatment) / (n2 - 1) if n2 > 1 else 0
    
    # 合并方差
    pooled_se = math.sqrt(var1 / n1 + var2 / n2)
    
    # Z分数
    z_score = (mean2 - mean1) / pooled_se if pooled_se > 0 else 0
    
    # 相对提升
    lift = (mean2 - mean1) / mean1 * 100 if mean1 > 0 else 0
    
    return {
        'control_mean': mean1,
        'treatment_mean': mean2,
        'lift_percent': lift,
        'z_score': z_score,
        'significant': abs(z_score) > 1.96  # 95%置信区间
    }

总结

本文介绍了Python实现A/B测试的完整方案,包括实验配置、流量分配、数据收集和统计分析。通过这套框架,你可以方便地对各种算法和策略进行对照实验,用数据驱动产品优化决策。

更多推荐