pip3 install --user seaborn seaborn这个库主要什么作用 python3
displot(): 分布图(histplot, kdeplot, ecdfplot 的图形界面)jointplot(): 联合分布图(同时展示两个变量的关系及其各自分布)regplot(): 回归图(与 lmplot 类似,但接口略有不同)pairplot(): 配对图(绘制数据集中所有数值型变量的成对关系)stripplot() / swarmplot(): 分类散点图。pointplot()
pip3 install --user seaborn seaborn这个库主要什么作用
在数据分析上
如果你是进行数据分析和探索性数据分析(EDA),Seaborn 通常是比原生 Matplotlib 更高效、更美观的选择。 当然,如果需要高度定制化的图表,你仍然可以结合使用 Matplotlib 和 Seaborn,在 Seaborn 的基础上用 Matplotlib 的 API 进行微调。
专注于统计可视化:内置了大量用于展示数据分布、关系、比较和分类的图表类型,这些图表在数据分析中非常常用。
Seaborn 能绘制的主要图表类型
Seaborn 的图表大致可以分为以下几类:
1. 关系图 (Relational plots)
用于展示变量之间的关系,比如相关性。
scatterplot(): 散点图
lineplot(): 折线图(可自动展示置信区间)
2. 分布图 (Distribution plots)
用于展示单变量或多变量的分布情况。
histplot(): 直方图
kdeplot(): 核密度估计图
ecdfplot(): 经验累积分布函数图
displot(): 分布图(histplot, kdeplot, ecdfplot 的图形界面)
3. 分类图 (Categorical plots)
用于展示分类变量与连续变量之间的关系。
catplot(): 分类图(下面许多图的图形界面)
stripplot() / swarmplot(): 分类散点图
boxplot(): 箱线图
violinplot(): 小提琴图
barplot(): 条形图(默认显示平均值和置信区间)
pointplot(): 点图(默认显示平均值和置信区间)
4. 回归图 (Regression plots)
在关系图的基础上,自动添加线性回归拟合线及其置信区间。
lmplot(): 回归图
regplot(): 回归图(与 lmplot 类似,但接口略有不同)
5. 矩阵图 (Matrix plots)
用于绘制矩阵数据。
heatmap(): 热力图(非常常用!用于显示相关性矩阵、混淆矩阵等)
clustermap(): 聚类图
6. 多面板图 (Multi-plot grids)
用于轻松创建基于不同数据子集的多个图表组合。
FacetGrid: 分面网格
PairGrid: 配对网格(用于绘制数据集中的成对关系)
jointplot(): 联合分布图(同时展示两个变量的关系及其各自分布)
pairplot(): 配对图(绘制数据集中所有数值型变量的成对关系)
hx2@Ubantu:~$ pip3 install --user seaborn
Collecting seaborn
Using cached seaborn-0.11.2-py3-none-any.whl (292 kB)
Requirement already satisfied: numpy>=1.15 in ./.local/lib/python3.6/site-packages (from seaborn) (1.19.5)
Requirement already satisfied: pandas>=0.23 in ./.local/lib/python3.6/site-packages (from seaborn) (1.1.5)
Requirement already satisfied: matplotlib>=2.2 in ./.local/lib/python3.6/site-packages (from seaborn) (3.3.4)
Collecting scipy>=1.0
Downloading scipy-1.5.4-cp36-cp36m-manylinux1_x86_64.whl (25.9 MB)
|████████████████████████████████| 25.9 MB 46 kB/s
Requirement already satisfied: cycler>=0.10 in ./.local/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (0.11.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in ./.local/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (3.1.4)
Requirement already satisfied: kiwisolver>=1.0.1 in ./.local/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (1.3.1)
Requirement already satisfied: python-dateutil>=2.1 in ./.local/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (2.9.0.post0)
Requirement already satisfied: pillow>=6.2.0 in ./.local/lib/python3.6/site-packages (from matplotlib>=2.2->seaborn) (8.4.0)
Requirement already satisfied: pytz>=2017.2 in ./.local/lib/python3.6/site-packages (from pandas>=0.23->seaborn) (2025.2)
Requirement already satisfied: six>=1.5 in ./.local/lib/python3.6/site-packages (from python-dateutil>=2.1->matplotlib>=2.2->seaborn) (1.17.0)
Installing collected packages: scipy, seaborn
Successfully installed scipy-1.5.4 seaborn-0.11.2
hx2@Ubantu:~$
代码部分
hxtest 数据库
-- 创建电商数据库
-- 用户表:存储用户基本信息
CREATE TABLE users (
user_id INT AUTO_INCREMENT PRIMARY KEY COMMENT '用户ID,主键,自增长',
registration_date DATETIME NOT NULL COMMENT '用户注册日期时间',
user_level VARCHAR(50) NOT NULL COMMENT '用户等级(VIP、黄金、普通等)',
city VARCHAR(100) COMMENT '用户所在城市',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录最后更新时间',
INDEX idx_registration_date (registration_date) COMMENT '注册日期索引',
INDEX idx_user_level (user_level) COMMENT '用户等级索引',
INDEX idx_city (city) COMMENT '城市索引'
) COMMENT='用户信息表';
-- 商品表:存储商品基本信息
CREATE TABLE products (
product_id INT AUTO_INCREMENT PRIMARY KEY COMMENT '商品ID,主键,自增长',
category VARCHAR(100) NOT NULL COMMENT '商品类别(电子产品、服装、家居等)',
brand VARCHAR(100) NOT NULL COMMENT '商品品牌',
cost_price DECIMAL(10, 2) NOT NULL COMMENT '商品成本价格',
product_name VARCHAR(200) COMMENT '商品名称',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录最后更新时间',
INDEX idx_category (category) COMMENT '商品类别索引',
INDEX idx_brand (brand) COMMENT '品牌索引'
) COMMENT='商品信息表';
-- 订单表:存储订单交易信息
CREATE TABLE orders (
order_id INT AUTO_INCREMENT PRIMARY KEY COMMENT '订单ID,主键,自增长',
user_id INT NOT NULL COMMENT '用户ID,外键关联users表',
product_id INT NOT NULL COMMENT '商品ID,外键关联products表',
order_date DATETIME NOT NULL COMMENT '订单创建日期时间',
order_amount DECIMAL(10, 2) NOT NULL COMMENT '订单总金额',
payment_status ENUM('pending', 'paid', 'failed', 'refunded') DEFAULT 'pending' COMMENT '支付状态:待支付、已支付、支付失败、已退款',
shipping_status ENUM('pending', 'shipped', 'delivered', 'cancelled') DEFAULT 'pending' COMMENT '配送状态:待发货、已发货、已送达、已取消',
quantity INT NOT NULL DEFAULT 1 COMMENT '购买数量',
unit_price DECIMAL(10, 2) NOT NULL COMMENT '商品单价',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '记录创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '记录最后更新时间',
INDEX idx_user_id (user_id) COMMENT '用户ID索引',
INDEX idx_product_id (product_id) COMMENT '商品ID索引',
INDEX idx_order_date (order_date) COMMENT '订单日期索引',
INDEX idx_payment_status (payment_status) COMMENT '支付状态索引',
INDEX idx_shipping_status (shipping_status) COMMENT '配送状态索引',
FOREIGN KEY (user_id) REFERENCES users(user_id) ON DELETE CASCADE ,
FOREIGN KEY (product_id) REFERENCES products(product_id) ON DELETE CASCADE
) COMMENT='订单信息表';
-- 插入示例用户数据
INSERT INTO users (user_id, registration_date, user_level, city) VALUES
(1, '2023-01-15 10:00:00', 'VIP', '北京市'),
(2, '2023-02-20 14:30:00', '普通', '上海市'),
(3, '2023-03-10 09:15:00', '黄金', '广州市'),
(4, '2023-04-05 16:20:00', '普通', '深圳市'),
(5, '2023-05-12 11:30:00', 'VIP', '杭州市');
-- 插入示例商品数据
INSERT INTO products (product_id, category, brand, cost_price, product_name) VALUES
(1, '电子产品', 'Apple', 5000.00, 'iPhone 14 Pro'),
(2, '电子产品', 'Samsung', 4500.00, 'Galaxy S23'),
(3, '服装', 'Nike', 300.00, '运动鞋'),
(4, '服装', 'Adidas', 280.00, '运动外套'),
(5, '家居', 'IKEA', 200.00, '书桌'),
(6, '家居', 'MUJI', 150.00, '收纳盒'),
(7, '美妆', 'L\'Oreal', 80.00, '护肤套装'),
(8, '食品', '三只松鼠', 50.00, '坚果礼盒');
-- 插入示例订单数据
INSERT INTO orders (user_id, product_id, order_date, order_amount, payment_status, shipping_status, quantity, unit_price) VALUES
(1, 1, '2023-06-01 10:00:00', 5999.00, 'paid', 'delivered', 1, 5999.00),
(1, 3, '2023-06-15 14:30:00', 899.00, 'paid', 'shipped', 2, 449.50),
(2, 5, '2023-07-01 09:15:00', 399.00, 'paid', 'delivered', 1, 399.00),
(2, 8, '2023-07-05 16:20:00', 199.00, 'paid', 'delivered', 4, 49.75),
(3, 1, '2023-07-15 16:45:00', 5999.00, 'paid', 'pending', 1, 5999.00),
(3, 7, '2023-07-20 11:30:00', 299.00, 'paid', 'shipped', 3, 99.67),
(4, 2, '2023-08-01 13:25:00', 4999.00, 'paid', 'delivered', 1, 4999.00),
(4, 4, '2023-08-10 15:40:00', 599.00, 'paid', 'shipped', 2, 299.50),
(5, 6, '2023-08-15 09:10:00', 450.00, 'pending', 'pending', 3, 150.00),
(5, 8, '2023-08-20 14:55:00', 149.00, 'paid', 'delivered', 3, 49.67);
-- 查询表结构验证
SHOW CREATE TABLE users;
SHOW CREATE TABLE products;
SHOW CREATE TABLE orders;
代码实现 python3
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import mysql.connector
import traceback
# 设置Seaborn样式和中文字体支持
sns.set_style("whitegrid")
sns.set_palette("pastel") # 使用柔和的调色板
plt.rcParams['font.sans-serif'] = [
'WenQuanYi Zen Hei',
'SimHei',
'DejaVu Sans',
'Arial Unicode MS'
]
plt.rcParams['axes.unicode_minus'] = False
class ECommerceAnalyzer:
def __init__(self, db_config):
self.db_config = db_config
self.connection = None
self.df_orders = None
self.df_users = None
self.df_products = None
self.df_merged = None # 添加一个合并后的数据属性
def connect_db(self):
"""连接数据库"""
try:
self.connection = mysql.connector.connect(
host=self.db_config['host'],
user=self.db_config['user'],
password=self.db_config['password'],
database=self.db_config['database'],
auth_plugin='mysql_native_password'
)
print("数据库连接成功")
return True
except Exception as e:
print(f"数据库连接失败: {e}")
print(traceback.format_exc())
return False
def load_data(self):
"""加载电商数据"""
try:
# 加载订单数据
query_orders = """
SELECT order_id, user_id, product_id, order_date, order_amount,
payment_status, shipping_status, quantity, unit_price
FROM orders
"""
self.df_orders = pd.read_sql(query_orders, self.connection)
self.df_orders['order_date'] = pd.to_datetime(self.df_orders['order_date'])
# 加载用户数据
query_users = "SELECT user_id, registration_date, user_level, city FROM users"
self.df_users = pd.read_sql(query_users, self.connection)
# 加载商品数据
query_products = "SELECT product_id, category, brand, cost_price FROM products"
self.df_products = pd.read_sql(query_products, self.connection)
# 预先合并数据,避免重复合并
self._merge_data()
print("数据加载完成")
print(f"订单数据: {len(self.df_orders)} 条记录")
print(f"用户数据: {len(self.df_users)} 条记录")
print(f"商品数据: {len(self.df_products)} 条记录")
print(f"合并后数据: {len(self.df_merged)} 条记录")
return True
except Exception as e:
print(f"数据加载失败: {e}")
print(traceback.format_exc())
return False
def _merge_data(self):
"""合并所有数据并计算相关指标"""
try:
# 合并订单和商品数据
df_temp = pd.merge(self.df_orders, self.df_products, on='product_id')
# 计算毛利率
df_temp['gross_profit'] = df_temp['unit_price'] - df_temp['cost_price']
df_temp['gross_margin'] = df_temp['gross_profit'] / df_temp['unit_price'] * 100
# 合并用户数据
self.df_merged = pd.merge(df_temp, self.df_users, on='user_id')
except Exception as e:
print(f"数据合并失败: {e}")
print(traceback.format_exc())
def calculate_sales_metrics(self):
"""计算销售指标"""
metrics = {}
# 总交易额和订单数
metrics['total_gmv'] = self.df_orders['order_amount'].sum()
metrics['total_orders'] = len(self.df_orders)
metrics['avg_order_value'] = metrics['total_gmv'] / metrics['total_orders'] if metrics['total_orders'] > 0 else 0
# 月度销售趋势
monthly_sales = self.df_orders.groupby(
self.df_orders['order_date'].dt.to_period('M')
)['order_amount'].sum()
# 销售增长率
if len(monthly_sales) > 1:
growth_rate = (monthly_sales.iloc[-1] - monthly_sales.iloc[-2]) / monthly_sales.iloc[-2] * 100
metrics['sales_growth_rate'] = growth_rate
return metrics
def analyze_user_behavior(self):
"""用户行为分析"""
analysis = {}
# 用户分层分析
user_level_stats = self.df_merged.groupby('user_level').agg({
'order_amount': ['count', 'sum', 'mean']
}).round(2)
# 复购率分析
user_order_count = self.df_merged.groupby('user_id')['order_id'].nunique()
repeat_customers = (user_order_count > 1).sum()
analysis['repeat_purchase_rate'] = repeat_customers / len(user_order_count) * 100 if len(user_order_count) > 0 else 0
return analysis, user_level_stats
def product_performance_analysis(self):
"""商品表现分析"""
# 商品类别分析
category_analysis = self.df_merged.groupby('category').agg({
'order_id': 'count',
'order_amount': 'sum',
'gross_margin': 'mean'
}).round(2)
# 热销商品TOP10
top_products = self.df_merged.groupby('product_id').agg({
'order_id': 'count',
'order_amount': 'sum'
}).nlargest(10, 'order_amount')
return category_analysis, top_products
def create_dashboard(self):
"""使用Seaborn创建数据可视化仪表板"""
try:
# 创建2x2的子图布局
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('电商零售数据分析仪表板', fontsize=16, fontweight='bold')
# 1. 销售趋势图 - 使用Seaborn的lineplot
monthly_sales = self.df_orders.groupby(
self.df_orders['order_date'].dt.to_period('M')
)['order_amount'].sum().reset_index()
monthly_sales['order_date'] = monthly_sales['order_date'].astype(str)
sns.lineplot(data=monthly_sales, x='order_date', y='order_amount',
marker='o', ax=axes[0, 0], linewidth=2.5)
axes[0, 0].set_title('月度销售趋势', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('月份')
axes[0, 0].set_ylabel('销售额')
axes[0, 0].tick_params(axis='x', rotation=45)
# 2. 用户层级分布 - 使用Seaborn的countplot
if 'user_level' in self.df_users.columns:
sns.countplot(data=self.df_users, x='user_level', ax=axes[0, 1])
axes[0, 1].set_title('用户等级分布', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('用户等级')
axes[0, 1].set_ylabel('用户数量')
# 3. 商品类别销售占比 - 使用Seaborn的barplot
category_sales = self.df_merged.groupby('category')['order_amount'].sum().reset_index().sort_values('order_amount', ascending=False)
sns.barplot(data=category_sales, x='category', y='order_amount', ax=axes[1, 0])
axes[1, 0].set_title('各类别销售额', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('商品类别')
axes[1, 0].set_ylabel('销售额')
axes[1, 0].tick_params(axis='x', rotation=45)
# 4. 客单价分布 - 使用Seaborn的histplot
sns.histplot(data=self.df_orders, x='order_amount', bins=30, ax=axes[1, 1], kde=True)
axes[1, 1].set_title('订单金额分布', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('订单金额')
axes[1, 1].set_ylabel('频次')
# 调整布局
plt.tight_layout()
plt.savefig('ecommerce_dashboard_seaborn.png', dpi=300, bbox_inches='tight')
print("仪表板已保存为 ecommerce_dashboard_seaborn.png")
plt.show()
# 创建额外的分析图表
self.create_additional_visualizations()
except Exception as e:
print(f"创建仪表板失败: {e}")
print(traceback.format_exc())
def create_additional_visualizations(self):
"""创建额外的Seaborn可视化图表"""
try:
# 1. 创建用户等级与平均订单价值的关系图
plt.figure(figsize=(10, 6))
user_level_avg = self.df_merged.groupby('user_level')['order_amount'].mean().reset_index()
sns.barplot(data=user_level_avg, x='user_level', y='order_amount')
plt.title('不同用户等级的平均订单价值')
plt.xlabel('用户等级')
plt.ylabel('平均订单金额')
plt.tight_layout()
plt.savefig('user_level_avg_order.png', dpi=300)
# 2. 创建商品类别与毛利率的关系图
plt.figure(figsize=(12, 8))
category_margin = self.df_merged.groupby('category')['gross_margin'].mean().reset_index()
category_margin = category_margin.sort_values('gross_margin', ascending=False)
# 创建颜色映射
colors = sns.color_palette("RdYlGn", len(category_margin))
sns.barplot(data=category_margin, x='gross_margin', y='category', palette=colors)
plt.title('各商品类别的平均毛利率')
plt.xlabel('毛利率 (%)')
plt.ylabel('商品类别')
plt.tight_layout()
plt.savefig('category_margin.png', dpi=300)
# 3. 创建支付状态与订单金额的箱线图(如果存在payment_status列)
if 'payment_status' in self.df_merged.columns:
plt.figure(figsize=(10, 6))
sns.boxplot(data=self.df_merged, x='payment_status', y='order_amount')
plt.title('不同支付状态的订单金额分布')
plt.xlabel('支付状态')
plt.ylabel('订单金额')
plt.tight_layout()
plt.savefig('payment_status_boxplot.png', dpi=300)
# 4. 创建散点图:单价与销量的关系
plt.figure(figsize=(10, 6))
product_sales = self.df_merged.groupby('product_id').agg({
'unit_price': 'mean',
'quantity': 'sum'
}).reset_index()
sns.scatterplot(data=product_sales, x='unit_price', y='quantity', size='quantity', sizes=(20, 200))
plt.title('商品单价与销量关系')
plt.xlabel('单价')
plt.ylabel('销量')
plt.tight_layout()
plt.savefig('price_vs_quantity.png', dpi=300)
print("额外可视化图表已保存")
except Exception as e:
print(f"创建额外可视化图表失败: {e}")
print(traceback.format_exc())
def generate_report(self):
"""生成分析报告"""
try:
sales_metrics = self.calculate_sales_metrics()
user_analysis, user_stats = self.analyze_user_behavior()
category_analysis, top_products = self.product_performance_analysis()
print("=" * 50)
print("电商零售数据分析报告")
print("=" * 50)
print(f"\n1. 销售业绩指标:")
print(f" 总交易额: ¥{sales_metrics['total_gmv']:,.2f}")
print(f" 总订单数: {sales_metrics['total_orders']}")
print(f" 客单价: ¥{sales_metrics['avg_order_value']:,.2f}")
if 'sales_growth_rate' in sales_metrics:
print(f" 销售增长率: {sales_metrics['sales_growth_rate']:.2f}%")
print(f"\n2. 用户行为指标:")
print(f" 复购率: {user_analysis['repeat_purchase_rate']:.2f}%")
print(f"\n3. 用户等级分析:")
print(user_stats)
print(f"\n4. 商品类别分析:")
print(category_analysis)
print(f"\n5. 热销商品TOP10:")
print(top_products)
except Exception as e:
print(f"生成报告失败: {e}")
print(traceback.format_exc())
def run_analysis(self):
"""运行完整分析"""
try:
if self.connect_db() and self.load_data():
self.generate_report()
self.create_dashboard()
if self.connection:
self.connection.close()
print("数据库连接已关闭")
except Exception as e:
print(f"分析过程中出现错误: {e}")
print(traceback.format_exc())
# 使用示例
if __name__ == "__main__":
db_config = {
'host': 'localhost',
'user': 'root',
'password': 'root',
'database': 'hxtest'
}
analyzer = ECommerceAnalyzer(db_config)
analyzer.run_analysis()
坑部分:
# 或者安装文泉驿正黑
sudo apt-get install fonts-wqy-zenhei
# 更新字体缓存
sudo fc-cache -f -v
# 验证安装
fc-list | grep -i wqy
安装字体后,建议重启Python环境或重新启动终端,这样字体设置才会生效。如果仍然显示乱码,可以尝试清除matplotlib缓存:
rm -rf ~/.cache/matplotlib
hx2@Ubantu:~$ pwd
/home/hx2
hx2@Ubantu:~$
hx2@Ubantu:~$ rm -rf ~/.cache/matplotlib
hx2@Ubantu:~$ fc-list | grep -i wqy
/usr/share/fonts/truetype/wqy/wqy-microhei.ttc: 文泉驿微米黑,文泉驛微米黑,WenQuanYi Micro Hei:style=Regular
/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc: 文泉驿正黑,文泉驛正黑,WenQuanYi Zen Hei:style=Regular
/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc: 文泉驿点阵正黑,文泉驛點陣正黑,WenQuanYi Zen Hei Sharp:style=Regular
/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc: 文泉驿等宽正黑,文泉驛等寬正黑,WenQuanYi Zen Hei Mono:style=Regular
/usr/share/fonts/truetype/wqy/wqy-microhei.ttc: 文泉驿等宽微米黑,文泉驛等寬微米黑,WenQuanYi Micro Hei Mono:style=Regular
hx2@Ubantu:~$
更多推荐
所有评论(0)