大数据比赛第四部分可视化(python)
import pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsimport numpy as npplt.rcParams['font.sans-serif'] = ['SimHei']# 指定默认字体:解决plot不能显示中文问题plt.rcParams['axes.unicode_minus'] = False#
·
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体:解决plot不能显示中文问题
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
# 读入文件并自定义列名
airpm=pd.read_csv("D:\\dataInputOutput\\input\\airpm25.txt",header=None, names=['date', 'pm25','city', 'district'])
pm_hefei=airpm.pm25[airpm.city=="合肥"].mean()
pm_huangshan=airpm.pm25[airpm.city=="黄山"].mean()
pm_wuhu=airpm.pm25[airpm.city=="芜湖"].mean()
proportions=[pm_hefei,pm_huangshan,pm_wuhu]
city_list=['合肥','黄山','芜湖'] # x轴各个城市指标
plt.bar(city_list,proportions,color=("red","green","blue") ) # 柱状图的颜色
plt.xlabel(u'城市') # x轴坐标表示含义
plt.ylabel(u'pm2.5平均值') # y轴坐标表示含义
# 将图片保存为文件demo.png
plt.savefig('demo.png')
# coding: utf-8
# In[2]:
# 2、利用折线图,画出城市,各城市随时间的pm2.5值变化
# 要求将输出的折线图保存成图像文件Y轴表示pm2.5的值,X轴表示日期,城市
# 要求:(5分)
# # 1)折线图中含图例;
# # 不同的城市用不同的颜色表达
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
import matplotlib.dates as mdates
from matplotlib.dates import AutoDateLocator
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体:解决plot不能显示中文问题
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
# 读入文件并自定义列名
airpm=pd.read_csv('D:\\dataInputOutput\\input\\airpm25.txt',header=None, names=['date', 'pm25','city', 'district'])
# 将数据按照日期跟城市分组并求每天的平均值
airpm_group = airpm.groupby(['city','date'],as_index = False)['pm25'].mean()
# 将日期设置为索引
data=airpm_group.set_index('date');
# 画合肥pm2.5的折线图
data_hefei = data.index[data.city=='合肥']
data_hefei_translation = [datetime.strptime(str(d), '%Y%m%d').date() for d in data_hefei]
data_hefei_mean = data['pm25'][data.city=='合肥'].values
plt.plot(data_hefei_translation, data_hefei_mean,'b',lw=1.5,label="合肥",color='blue')
# 画黄山pm2.5的折线图
data_huangshan = data.index[data.city=='黄山']
data_huangshan_translation = [datetime.strptime(str(d), '%Y%m%d').date() for d in data_hefei]
data_huangshan_mean = data['pm25'][data.city=='黄山'].values
plt.plot(data_huangshan_translation, data_huangshan_mean,'b',lw=1.5,label="黄山",color='red')
# 画芜湖pm2.5的折线图
data_wuhu = data.index[data.city=='芜湖']
data_wuhu_translation = [datetime.strptime(str(d), '%Y%m%d').date() for d in data_hefei]
data_wuhu_mean = data['pm25'][data.city=='芜湖'].values
plt.plot(data_wuhu_translation, data_wuhu_mean,'b',lw=1.5,label="芜湖",color='green')
# 设置图形的一些属性
autodates = AutoDateLocator() # 时间间隔自动选取
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # 显示时间坐标的格式
plt.gcf().autofmt_xdate() # 自动旋转日期标记
plt.xlabel('日期',size=20) # x轴名称
plt.ylabel('当天平均PM2.5',size=20) # y轴名称
# 添加图例
plt.legend(loc=0)
# 展示图形
plt.show()
# coding: utf-8
# In[20]:
# 3、利用柱线混合图形画出合肥市,高新区,庐阳区的pm2.5的值。要求将输出的折线图保存成图像文件,
# X轴表示时间,柱状表示 高新区 的pm2.5的值,线形表示 庐阳区的pm2.5的值。(10分)
# 要求:
# 图形中含有图例
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datetime import datetime
import matplotlib.dates as mdates
from matplotlib.dates import AutoDateLocator
plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体:解决plot不能显示中文问题
plt.rcParams['axes.unicode_minus'] = False # 解决保存图像是负号'-'显示为方块的问题
# 设置图形的一些属性
autodates = AutoDateLocator() # 时间间隔自动选取
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) # 显示时间坐标的格式
plt.gcf().autofmt_xdate() # 自动旋转日期标记
plt.xlabel('日期',size=20) # x轴名称
plt.ylabel('PM2.5',size=20) # y轴名称
# 读入文件并自定义列名
airpm=pd.read_csv('D:\\dataInputOutput\\input\\airpm25.txt',header=None, names=['date', 'pm25','city', 'district'])
# 将日期设置为索引
data=airpm.set_index('date');
# 画合肥庐阳区pm2.5的折线图
data_luyang = data.index[(data['city']=='合肥') & (data['district']=='庐阳区')]
data_luyang_translation = [datetime.strptime(str(d), '%Y%m%d').date() for d in data_luyang]
data_luyang_mean = data['pm25'][(data['city']=='合肥') & (data['district']=='庐阳区')].values
plt.plot(data_luyang_translation, data_luyang_mean,lw=1.5,label="庐阳区",color='red')
# 保存折线图
plt.savefig('demo1.png')
# 画合肥高新区pm2.5的柱状图
data_gaoxin = data.index[(data['city']=='合肥') & (data['district']=='高新区')]
data_gaoxin_translation = [datetime.strptime(str(d), '%Y%m%d').date() for d in data_gaoxin]
data_gaoxin_mean = data['pm25'][(data['city']=='合肥') & (data['district']=='高新区')].values
plt.bar(data_gaoxin_translation, data_gaoxin_mean,label="高新区",color='blue')
# 添加图例
plt.legend(loc=0)
# 展示图形
plt.show()
python 导库:
点击阅读全文
更多推荐
目录
所有评论(0)