最近在学吴恩达深度学习,但是碰到一些问题,想请教大神。我学到了第一课第四周,我的程序是我参照吴老师的程序自己写了一遍,但是我遇到了问题。在双层网络时,程序结果和吴老师的一模一样,但是L层网络时,我程序代价函数的值收敛在0.64处,检查了两天都没有发现错误在哪里,所以想在这里问问,希望大家能告诉我在哪里错了。非常感谢
原版资源可以在下面这个链接找到
https://github.com/robbertliu/deeplearning.ai-andrewNG
以下是我的python程序:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from dnn_app_utils_v2 import *
def Sigmoid(z):
return 1 / (1 + np.exp(- z))
def ReLU(z):
return np.maximum(0, z)
def SigmoidBackward(z):
a = 1 / (1 + np.exp(- z))
dz = a * (1 - a)
return dz
def ReLUBackward(z):
dz = np.ones(z.shape)
dz[z < 0] = 0
return dz
def InitializeParameters(dims):
np.random.seed(1)
parameters = {}
L = len(dims) - 1
for l in range(1, L + 1):
parameters['W' + str(l)] = np.random.randn(dims[l], dims[l - 1]) * 0.01
parameters['b' + str(l)] = np.zeros((dims[l], 1))
return parameters
def LinearForward(A_pre,W,b):
Z = W @ A_pre + b
return Z
def ActivationForward(A_pre,W,b,activation):
Z = LinearForward(A_pre, W, b)
if activation == 'sigmoid':
A = Sigmoid(Z)
elif activation == 'relu':
A = ReLU(Z)
return Z,A
def ForwardPropagation(X,parameters):
cache = {}
cache['Z0'], cache['A0'] = np.zeros(X.shape), X
L = len(parameters) // 2
for l in range(1,L+1):
if l == L:
cache['Z' + str(l)],cache['A' + str(l)] = ActivationForward(cache['A' + str(l - 1)], parameters['W' + str(l)], parameters['b' + str(l)],activation = 'sigmoid')
else:
cache['Z' + str(l)], cache['A' + str(l)] = ActivationForward(cache['A' + str(l - 1)], parameters['W' + str(l)], parameters['b' + str(l)], activation='relu')
#print(cache.keys())
return cache
def ComputeCost(Y, cache):
m = Y.shape[1]
L = len(cache) // 2 - 1
J = - Y @ np.log(cache['A' + str(L)].T) - (1 - Y) @ np.log(1 - cache['A' + str(L)].T)
return J / m
def LinearBackward(A_pre,dZ,W,b):
m = A_pre.shape[1]
dW = dZ @ A_pre.T / m
db = np.sum(dZ, axis = 1, keepdims = True) / m
dA_pre = W.T @ dZ
return dA_pre,dW,db
def ActivationBackward(Z,A_pre,dA,W,b,activation):
if activation == 'sigmoid':
dZ = dA * SigmoidBackward(Z)
elif activation == 'relu':
dZ = dA * ReLUBackward(Z)
dA_pre, dW, db = LinearBackward(A_pre, dZ, W, b)
return dZ,dW,db,dA_pre
def BackwardPropagation(Y,cache,parameters):
L = len(parameters) // 2
m = Y.shape[1]
grads = {}
grads['dA' + str(L)] = - Y / cache['A' + str(L)] + (1 - Y) / (1 - cache['A' + str(L)])
for l in range(L,0,-1):
if l == L:
grads['dZ' + str(l)], grads['dW' + str(l)], grads['db' + str(l)], grads['dA' + str(l - 1)] = ActivationBackward(Z = cache['Z' + str(l)],
A_pre = cache['A' + str(l - 1)],
dA = grads['dA' + str(l)],
W = parameters['W' + str(l)],
b = parameters['b' + str(l)],activation = 'sigmoid')
else:
grads['dZ' + str(l)], grads['dW' + str(l)], grads['db' + str(l)], grads['dA' + str(l - 1)] = ActivationBackward(Z = cache['Z' + str(l)],
A_pre = cache['A' + str(l - 1)],
dA = grads['dA' + str(l)],
W = parameters['W' + str(l)],
b = parameters['b' + str(l)],activation='relu')
grads['dZ0'] = np.array([0]).reshape(1,1)
grads['dW0'] = np.array([0]).reshape(1,1)
grads['db0'] = np.array([0]).reshape(1,1)
#print(grads.keys())
return grads
def UpdateParameters(parameters,grads,learning_rate):
L = len(parameters) // 2
for l in range(1,L+1):
parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads['dW' + str(l)]
parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads['db' + str(l)]
return parameters
def Model(X,Y,dims,learning_rate,iteration_num):
parameters = InitializeParameters(dims)
costs = []
for i in range(iteration_num):
cache = ForwardPropagation(X, parameters)
if i % 100 == 0:
costs.append(np.squeeze(ComputeCost(Y, cache)))
print('time:',i,'cost:',costs[len(costs) - 1])
grads = BackwardPropagation(Y, cache, parameters)
parameters = UpdateParameters(parameters, grads, learning_rate)
return parameters,costs
def Prediction(parameters,X,Y):
L = len(parameters) // 2
cache = ForwardPropagation(X, parameters)
A = cache['A' + str(L)]
p = np.zeros(Y.shape)
m = A.shape[1]
for i in range(m):
if A[0,i] > 0.5:
p[0,i] = 1
else:
p[0,i] = 0
print("Accuracy: " + str(np.sum((p == Y) / m)))
return p
train_x_orig, train_y, test_x_orig, test_y, classes = load_data()
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T
train_x = train_x_flatten / 255
test_x = test_x_flatten / 255
n_x = train_x.shape[0]
n_y = train_y.shape[0]
dims = (n_x,20,7,5,n_y)
parameters,costs = Model(train_x, train_y, dims, learning_rate = 0.0075,iteration_num = 2500)
print('train set:')
Prediction(parameters,train_x,train_y)
print('test set:')
Prediction(parameters,test_x,test_y)
plt.plot(list(range(25)),costs)
plt.show()
2个回答
def ReLUBackward(z):
dz = np.ones(z.shape)
dz[z < 0] = 0
return dz
dz = np.ones(z.shape)这里dZ应该是dA
SofaSofa数据科学社区DS面试题库 DS面经