执行《阿里云天池大赛赛题解析——机器学习篇》第二章例程不成功?

执行《阿里云天池大赛赛题解析——机器学习篇》第二章2.2.4章节第3小节例程:



import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


###import warnings
###warnings.filterwarnings("ignore")





import logging
logging.basicConfig(format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s',
                    level=logging.DEBUG)
logging.debug('debug info')
logging.info('info info')
logging.warning('warning info')
logging.error('error info')
logging.critical('critial info')





##%matplotlib inline



train_data_file = "./zhengqi_train.txt"
test_data_file = "./zhengqi_test.txt"



train_data = pd.read_csv(train_data_file, sep='\t', encoding='utf-8')
test_data = pd.read_csv(test_data_file, sep='\t', encoding='utf-8')



train_data.info()
test_data.info()


test_data.describe()
test_data.describe()


train_data.head()
test_data.head()


fig = plt.figure(figsize=(4, 6))  # 指定绘图对象宽度和高度
sns.boxplot(train_data['V0'],orient="v", width=0.5)


# 画箱式图
column = train_data.columns.tolist()[:39]  # 列表头
fig = plt.figure(figsize=(20, 40))  # 指定绘图对象宽度和高度
for i in range(38):
    plt.subplot(13, 3, i + 1)  # 13行3列子图
    sns.boxplot(train_data[column[i]], orient="v", width=0.5)  # 箱式图
    plt.ylabel(column[i], fontsize=8)
plt.show()


def find_outliers(model, X, y, sigma=3):
    # predict y values using model
    try:
        y_pred = pd.Series(model.predict(X), index=y.index)
    # if predicting fails, try fitting the model first
    except:
        model.fit(X, y)
        y_pred = pd.Series(model.predict(X), index=y.index)

    # calculate residuals between the model prediction and true y values
    resid = y - y_pred
    mean_resid = resid.mean()
    std_resid = resid.std()

    # calculate z statistic, define outliers to be where |z|>sigma
    z = (resid - mean_resid) / std_resid
    outliers = z[abs(z) > sigma].index

    # print and plot the results
    print('R2=', model.score(X, y))
    print('Mse=', mean_squared_error(y, y_pred))
    print('-------------------------------------------------------')

    print(len(outliers), 'outliers;', ' ALL data shape:', X.shape)

    plt.figure(figsize=(15, 5))
    ax_131 = plt.subplot(1, 3, 1)
    plt.plot(y, y_pred, '.')
    plt.plot(y.loc[outliers], y_pred.loc[outliers], 'ro')
    plt.legend(['Accepted', 'Outlier'])
    plt.xlabel('y')
    plt.ylabel('y_pred');

    ax_132 = plt.subplot(1, 3, 2)
    plt.plot(y, y - y_pred, '.')
    plt.plot(y.loc[outliers], y.loc[outliers] - y_pred.loc[outliers], 'ro')
    plt.legend(['Accepted', 'Outlier'])
    plt.xlabel('y')
    plt.ylabel('y - y_pred');

    ax_133 = plt.subplot(1, 3, 3)
    z.plot.hist(bins=50, ax=ax_133)
    z.loc[outliers].plot.hist(color='r', bins=50, ax=ax_133)
    plt.legend(['Accepted', 'Outlier'])
    plt.xlabel('z')

    plt.savefig('outliers.png')

    return outliers


# 通过岭回归模型找出异常值,并绘制其分布
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
X_train = train_data.iloc[:,0:-1]
y_train = train_data.iloc[:,-1]
outliers = find_outliers(Ridge(),X_train,y_train)











































plt.figure(figsize=(10,5))

ax=plt.subplot(1,2,1)
sns.distplot(train_data['V0'],fit=stats.norm)
###plt.show()
ax=plt.subplot(1,2,2)
res = stats.probplot(train_data['V0'], plot=plt)
plt.show()


'''
train_cols = len(train_data.columns)
plt.figure(figsize=( train_cols, 4 ))

i = 0
for col in train_data.columns:
    i += 1
    ax = plt.subplot( train_cols , 4,  i)
    sns.distplot(train_data[col], fit=stats.norm)

    i += 1
    ax = plt.subplot( train_cols , 4 , i)
    res = stats.probplot(train_data[col], plot=plt)
plt.tight_layout()
plt.show()
'''


train_cols = 6
train_rows = len(train_data.columns)
plt.figure(figsize=(4 * train_cols, 4 * train_rows))

i = 0
for col in train_data.columns:
    i += 1
    ax = plt.subplot(train_rows, train_cols, i)
    sns.distplot(train_data[col], fit=stats.norm)

    i += 1
    ax = plt.subplot(train_rows, train_cols, i)
    res = stats.probplot(train_data[col], plot=plt)
plt.show()

不成功,原书中23页上部图并未显示出来,请问该怎么解决?

阅读 919
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题