题目地址

https://nbviewer.jupyter.org/github/schmit/cme193-ipython-notebooks-lecture/blob/master/Exercises.ipynb

import random

import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# import statsmodels.api as sm
import statsmodels.formula.api as smf

sns.set_context("talk")

anascombe = pd.read_csv('Anscombe.csv')

x = anascombe.groupby('dataset')['x']
y = anascombe.groupby('dataset')['y']
print("x mean:", x.mean())
print("x variance:", x.var())
print("y mean:", y.mean())
print("y variance:", y.var())

print()
print(anascombe[anascombe['dataset'] == 'I'].corr())
print(anascombe[anascombe['dataset'] == 'II'].corr())
print(anascombe[anascombe['dataset'] == 'III'].corr())
print(anascombe[anascombe['dataset'] == 'IV'].corr())
print()

lin_model = smf.ols('y ~ x', anascombe[anascombe['dataset'] == 'I']).fit()
print(lin_model.summary())

lin_model = smf.ols('y ~ x', anascombe[anascombe['dataset'] == 'II']).fit()
print(lin_model.summary())

lin_model = smf.ols('y ~ x', anascombe[anascombe['dataset'] == 'III']).fit()
print(lin_model.summary())

lin_model = smf.ols('y ~ x', anascombe[anascombe['dataset'] == 'IV']).fit()
print(lin_model.summary())


g = sns.FacetGrid(anascombe, col="dataset")
g.map(plt.scatter, "x", "y")
plt.show()

因为是练习题,所以是有直接的参考资料的。我觉得这种教pandans的方式很好,方法太多了。

Logo

瓜分20万奖金 获得内推名额 丰厚实物奖励 易参与易上手

更多推荐