top of page

MLP-Multiclass-Classifier-MNIST(non-minibatch) - Neural Network Assignment Help

Import Necessary Packages

from DL_utilities import*
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import fetch_openml
%matplotlib inline

Data Preparation

mnist = fetch_openml("mnist_784")

X, y = mnist["data"], mnist["target"]

print("Shape of X", X.shape)
print("Shape of Y", y.shape)
count=0
sample_size=20
plt.figure(figsize=(20, 20))
for i in range(sample_size):
    plt.subplot(1, sample_size, i+1)
    plt.axhline('')
    plt.axvline('')
    plt.text(x=10, y=-5, s=y[i], fontsize=20)
    plt.imshow(X[i,:].reshape([28,28]), cmap='Greys')
plt.show()
# Scaling the X value
X = X / 255
# one-hot encode labels
digits = 10
examples = y.shape[0]
y = y.reshape(1, examples)
Y_new = np.eye(digits)[y.astype('int32')]
Y_new = Y_new.T.reshape(digits, examples)
np.random.seed(30)
# split, reshape, shuffle
m = 60000
m_test = X.shape[0] - m
X_train, X_test = X[:m].T, X[m:].T
Y_train, Y_test = Y_new[:,:m], Y_new[:,m:]
shuffle_index = np.random.permutation(m)
X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]
print("X_train shape", X_train.shape)
print("Y_train shape", Y_train.shape)
print("X_test shape", X_test.shape)
print("Y_test shape", Y_test.shape)
count=0
sample_size=20
plt.figure(figsize=(20, 20))
for i in range(sample_size):
    count = count + 1
    plt.subplot(4,5,i+1)
    plt.axhline('')
    plt.axvline('')
    plt.text(x=0, y=30, s=Y_train[:,i], fontsize=15)
    plt.imshow(X_train[:,i].reshape(28,28), cmap='Greys')
plt.show()


Nueral Network Model

def softmax(Z):
    exps = np.exp(Z - Z.max())  # Numerically stable with large exponentials
    A = exps / np.sum(exps, axis=0)
    return A, Z

def softmax_gradient(dA, Z):
    A, Z = softmax(Z)
    dZ = dA * A * (1 - A) # softmax gradient, dA/dZ = AL(1-AL)
    return dZ

def linear_activation_forward(A_prev, W, b, activation_fn):
        
    if activation_fn == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)

    elif activation_fn == "tanh":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = tanh(Z)

    elif activation_fn == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)

    elif activation_fn == "softmax":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = softmax(Z)
        
    assert A.shape == (W.shape[0], A_prev.shape[1])

    cache = (linear_cache, activation_cache)

    return A, cache

def linear_activation_backward(dA, cache, activation_fn):
    
    linear_cache, activation_cache = cache

    if activation_fn == "sigmoid":
        dZ = sigmoid_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation_fn == "tanh":
        dZ = tanh_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation_fn == "relu":
        dZ = relu_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
        
    elif activation_fn == "softmax":
        dZ = softmax_gradient(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db

def L_model_forward_new(X, parameters, hidden_layers_activation_fn="relu", multiclass=True):
    A = X                           
    caches = []
    L = len(parameters) // 2        

    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(
            A_prev, parameters["W" + str(l)], parameters["b" + str(l)],
            activation_fn=hidden_layers_activation_fn)
        caches.append(cache)

    if multiclass:
        output_layer="softmax"
    else:
        output_layer="sigmoid"
        
    AL, cache = linear_activation_forward(
        A, parameters["W" + str(L)], parameters["b" + str(L)],
        activation_fn=output_layer)
    
    caches.append(cache)
    return AL, caches

def L_model_backward_new(AL, y, caches, hidden_layers_activation_fn="relu", multiclass=False):
    
    y = y.reshape(AL.shape)
    L = len(caches)
    grads = {}

    if multiclass:
        output_layer="softmax"
    else:
        output_layer="sigmoid"
        
    dAL = np.divide(AL - y, np.multiply(AL, 1 - AL))

    grads["dA" + str(L - 1)], grads["dW" + str(L)], grads[
        "db" + str(L)] = linear_activation_backward(
            dAL, caches[L - 1], output_layer)

    for l in range(L - 1, 0, -1):
        current_cache = caches[l - 1]
        grads["dA" + str(l - 1)], grads["dW" + str(l)], grads[
            "db" + str(l)] = linear_activation_backward(
                grads["dA" + str(l)], current_cache,
                hidden_layers_activation_fn)
    return grads
# For multiclass classification
def CrossEntropy(AL, y):
    cost_sum = np.sum(np.multiply(y, np.log(AL)))          
    m = y.shape[1]
    cost = -(1./m) * cost_sum
    return cost

# For multiclass classification
def evaluate_cost_acc(X, y, params, activation_fn):
    AL, caches = L_model_forward_new(X, params, activation_fn)
    cost = CrossEntropy(AL,y)
    predictions = np.argmax(AL, axis=0)
    labels = np.argmax(y, axis=0)
    accuracy = metrics.accuracy_score(predictions, labels)
    return cost, accuracy*100


Neural Network Training

np.random.seed(48)

# Neural Network Model and Initialize parameters
# 784 --> 20 --> 15 --> 10
layers_dims = [X_train.shape[0], 20, 15, 10]
params = initialize_parameters(layers_dims)
hn_activation = "sigmoid"

# Hyper-parameters
learning_rate = 0.5
number_of_epoch=100

# intialize cost lists
train_costs, train_accs, test_costs, test_accs = [],[],[],[]

for epoch in range(number_of_epoch):
    AL, caches = L_model_forward_new(X_train, params, hn_activation, multiclass=True)
    grads = L_model_backward_new(AL, Y_train, caches, hn_activation, multiclass=True)
    params = update_parameters(params, grads, learning_rate)

    train_cost, train_acc = evaluate_cost_acc(X_train, Y_train, params, hn_activation)
    test_cost, test_acc = evaluate_cost_acc(X_test, Y_test, params, hn_activation)
    train_costs.append(train_cost)
    test_costs.append(test_cost)
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    print("Epoch {}: \ntraining cost = {:.3f}, training accuracy = {:.2f}, \
          \ntest cost = {:.3f}, test accuracy = {:.2f}".format(
        epoch+1, train_cost, train_acc, test_cost, test_acc))

print("Done.")


Contact Us ir send your requirement details at:



bottom of page