python homework——the 14th week

Part1
1.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa

anc=pd.read_csv('anscombe.csv')
print('The mean of x is\n'+str(anc.groupby('dataset')['x'].mean()))
print('The mean of y is\n'+str(anc.groupby('dataset')['y'].mean()))
print('The variance of x is\n'+str(anc.groupby('dataset')['x'].var()))
print('The variance of y is\n'+str(anc.groupby('dataset')['y'].var()))
python homework——the 14th weekpython homework——the 14th week
2.
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa

anc=pd.read_csv('anscombe.csv')
print('The correlation coefficient of the first dataset is \n'+str(anc[anc['dataset']=='I'].corr()))
print('The correlation coefficient of the second dataset is \n'+str(anc[anc['dataset']=='II'].corr()))
print('The correlation coefficient of the third dataset is \n'+str(anc[anc['dataset']=='III'].corr()))
print('The correlation coefficient of the fourth dataset is \n'+str(anc[anc['dataset']=='IV'].corr()))
python homework——the 14th week
3.

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa


anc=pd.read_csv('anscombe.csv')
md=[0,0,0,0]
md[0] = sfa.ols('y ~ x', anc[anc['dataset'] == 'I']).fit()
md[1] = sfa.ols('y ~ x', anc[anc['dataset'] == 'II']).fit()
md[2] = sfa.ols('y ~ x', anc[anc['dataset'] == 'III']).fit()
md[3] = sfa.ols('y ~ x', anc[anc['dataset'] == 'IV']).fit()
for i in range(4):
     print('\n')
     print(md[i].summary())
python homework——the 14th weekpython homework——the 14th weekpython homework——the 14th weekpython homework——the 14th week

Part2
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sfa

anc=pd.read_csv('anscombe.csv')

vsl=sns.FacetGrid(anc,col='dataset')
vsl.map(plt.scatter,'x','y')
plt.show()
python homework——the 14th week