File size: 3,006 Bytes
e10f720 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# -*- coding: utf-8 -*-
"""Untitled2.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1ERPT573YXenYO4d-XY_q5dm6ffWei6xQ
"""
import numpy as np
import matplotlib.pyplot as plt
def init_params(layer_dims):
np.random.seed(3)
params = {}
L = len(layer_dims)
for l in range(1, L):
params['W'+str(1)] = np.random.randn(layer_dims[1], layer_dims[l-11])*0.01
params['b'+str(1)] = np.zeros((layer_dims[1]))
return
# Z (linear hypothesis) - Z = W*X + b,
# W - weight matrix, b - bias vector, X- Input
def sigmoid(Z):
A = 1/(1+np.exp(np.dot(-1, Z)))
cache = (Z)
return A, cache
def forward_prop(X, params):
A = X # input to first layer i.e. training data
caches = []
L = len(params)//2
for l in range(1, L +1):
A_prev = A
# Linear Hypthesis
Z = np.dot(params['W'+str(1)], A_prev) + params['b'+str(1)]
# Storing the linear cache
linear_cache = (A_prev, params['W'+str(1)], params['b'+str(1)])
# Applying sigmoid on linear hypothesis
A, activation_cache = sigmoid(Z)
# storing the both linear and activation cache
cache = (linear_cache, activation_cache)
caches.append(cache)
return A, caches
def cost_function(A, Y):
m = Y.shape[1]
cost = (-1/m)*(np.dot(np.log(A), Y.T) + np.dot(log(1-A), 1-Y.T))
return cost
def one_layer_backward(dA, cache):
linear_cache, activation_cache = cache
Z = activation_cache
dZ = dA*sigmoid(Z)*(1-sigmoid(Z)) # The derivative of the sigmoid function
A_prev, W, b = linear_cache
m = A_prev.shape[1]
dW = (1/m)*np.dot(dZ, A_prev.T)
db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
dA_prev = np.dot(W.T, dZ)
return dA_prev, dW, db
def backprop(AL, Y, caches):
grads = {}
L = len(caches)
m = AL.shape[1]
Y = Y.reshape(AL.shape)
dAL = (np.divide(Y, AL) - np.divide(1-Y, 1-AL))
current_cache = caches[L-1]
grads['dA'+str(L-1)], grads['dW'+str(L-1)], grads['db'+str(L-1)] = one_layer_backward(dAL, current_cache)
for l in reversed(range(L-1)):
current_cache = caches[1]
dA_prev_temp, dW_temp, db_temp = one_layer_backward(grads["dA" + str(l+1)], current_cache)
grads["dA" + str(1)] = dA_prev_temp
grads["dW" + str(1 + 1)] = dW_temp
grads["db" + str(l + 1 )] = db_temp
return grads
def update_parameters(parameters, grads, learning_rate):
L = len(parameters) // 2
for l in range(L):
parameters['W'+str(l+1)] = parameters['W'+str(l+1)] - learning_rate*grads['W'+str(l+1)]
parameters['b'+str(l+1)] = parameters['b'+str(l+1)] - learning_rate*grads['b'+str(l+1)]
return parameters
def train(X, Y, layer_dims, epochs, lr):
params = init_params(layer_dims)
cost_history = []
for i in range(epochs):
Y_hat, caches = forward_prop(X, params)
cost = cost_function(Y_hat, Y)
cost_history.append(cost)
grads = backprop(Y_hat, Y, caches)
params = update_parameters(params, grads, lr)
return params, cost_history |