特征归一化后的多项式回归拟合结果-SofaSofa

#%%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ITEMS = 4
path = 'train.txt'
data = pd.read_csv(path, sep='\t\t', header=None, names=['Population', 'Profit'],)
base = data.iloc[:, 0:-1].values
for i in range(ITEMS-1):
    data.insert(0, '{}Square'.format(i+2), base ** (i+2))

X = data.iloc[:, 0:-1]
sample_mean = X.mean().values
sample_std = X.std().values
X = (X - X.mean()) / X.std()
X.insert(X.shape[1] - 1, 'Ones', 1)

y = data.iloc[:, -1]

X = np.matrix(X.values)
y = np.matrix(y.values).T
theta = np.matrix(np.zeros(ITEMS + 1))

#%%
def computeCost(X, y, theta):
    inner = np.power(X * theta.T - y, 2)
    return np.sum(inner) / (2 * len(X))

#%% 
def gradientDescent(X, y, theta, alpha, epoch):
    N = X.shape[0]
    cost = np.zeros(epoch)
    for i in range(epoch):
        theta = theta - (alpha / N) * (X * theta.T - y).T * X
        cost[i] = computeCost(X, y, theta)
    return theta, cost
#%%
#迭代次数增加到一万后和矩阵法的结果一致
alpha = 0.01
epoch = 1000
solution, cost = gradientDescent(X, y, theta, alpha, epoch)

x = np.linspace(data.Population.min(), data.Population.max(), 100)

x_extend =  (np.array(x, ndmin=2) - sample_mean[-1]) / sample_std[-1]
for i in range(ITEMS-1):
    x_extend = np.insert(x_extend, 0, values = (x ** (i+2)-sample_mean[-i-2]) / sample_std[-i-2], axis=0)
x_extend = np.insert(x_extend, ITEMS, values = 1, axis = 0)

pred = np.matrix(x_extend.T) * solution.T

fig, ax = plt.subplots(figsize=(6, 4))
ax.plot(x, pred, 'r', label='Prediction')
ax.scatter(data.Population, data.Profit, label='Training Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs Population Size')
plt.show()


#%% 
#梯度下降过程
fig, ax = plt.subplots()
ax.plot(np.arange(epoch), cost, 'r')
plt.show()

代码如上，梯度下降很快就收敛了，但是最后拟合出来的曲线和理想情况相去甚远，一直是单调递增的，不管是四次还是十次拟合，都是一个形状，请问大家这是为什么呢？

cbshnrh 2019-07-11 17:59

2个回答

图像看起来是一波三折的，但是未必就是多项式能够拟合的吧，毕竟多项式也不能拟合任意函数。

这种形状不如试试分段的线性回归。

SofaSofa数据科学社区 DS面试题库 DS面经

东布东 2019-07-12 14:43

你这只是一阶的线性回归。要用到$x^2$,$x^3$，$x_ix_j$这类高阶输入的线性回归才是多项式回归。

SofaSofa数据科学社区 DS面试题库 DS面经

Zealing 2019-07-12 23:40

特征归一化后的多项式回归拟合结果

Warning

2个回答

Warning

Warning