Practice Neural Network using numpy

Updated on 14 Apr 2019

We'll use numpy to code a simple neural network and test it on MNIST dataset. The architecture is the same as the TensorFlow's "Get started" tutorial except that we use mini-batch gradient descent as our optimizer instead of adam.

load data

import _pickle as cPickle
import gzip


def load_data():
    f = gzip.open('./data/mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = cPickle.load(f, encoding='bytes')
    f.close()
    return training_data, validation_data, test_data

You may download the dataset from here

weights initialization

class Model:
    """
    Architecture:
        Flatten -> Dense -> ReLU -> Dropout -> Dense -> SoftMax
    """

    def __init__(self, input_size, hidden_size, output_size, dropout_p):
        self.params = {
            'W1': np.random.randn(input_size, hidden_size) / np.sqrt(input_size),
            'b1': np.zeros((1, hidden_size)),
            'W2': np.random.randn(hidden_size, output_size) / np.sqrt(hidden_size),
            'b2': np.zeros((1, output_size))
        }
        self.dropout_p = dropout_p

dropout_p defines the fraction of the input units to drop

training function

def train(self, X, y, X_val, y_val, nb_epoch, batch_size, eta):
    n = len(X)
    for i in range(nb_epoch):
        epoch_loss = 0
        X, y = shuffle(X, y)
        for j in range(0, n, batch_size):
            X_batch = X[j:j + batch_size]
            y_batch = y[j:j + batch_size]
            loss, grads = self.loss(X_batch, y_batch)
            epoch_loss += loss
            # update parameters
            for param_name in ('W1', 'b1', 'W2', 'b2'):
                self.params[param_name] -= eta * grads[param_name]
        train_acc = self.evaluate(X, y)
        val_acc = self.evaluate(X_val, y_val)
        print("epoch %d / %d: loss %f, train_acc: %f, val_acc: %f" %
              (i + 1, nb_epoch, epoch_loss / n, train_acc, val_acc))

ReLU

def ReLU(x):
    return np.maximum(0, x)

dropout

def dropout(x, dropout_p):
    return x * np.random.binomial([np.ones(x.shape)], 1 - dropout_p)[0] / (1 - dropout_p)

softmax

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

feed forward and back propagation

def loss(self, X, y):
    W1, b1 = self.params['W1'], self.params['b1']
    W2, b2 = self.params['W2'], self.params['b2']

    n = X.shape[0]

    # feed forward pass
    h1 = ReLU(np.dot(X, W1) + b1)
    h1 = dropout(h1, dropout_p=self.dropout_p)
    out = np.dot(h1, W2) + b2
    probs = softmax(out)

    # loss
    log_probs = -np.log(probs[range(n), y])
    loss = np.sum(log_probs) / n

    # backward pass
    dout = probs
    dout[range(n), y] -= 1
    dh1 = np.dot(dout, W2.T)
    dh1[h1 <= 0] = 0
    dW2 = np.dot(h1.T, dout)
    db2 = np.sum(dout, axis=0, keepdims=True)
    dW1 = np.dot(X.T, dh1)
    db1 = np.sum(dh1, axis=0, keepdims=True)

    grads = {
        'W1': dW1,
        'b1': db1,
        'W2': dW2,
        'b2': db2
    }
    return loss, grads

evaluation

def evaluate(self, X, y):    
    h1 = ReLU(np.dot(X, self.params['W1']) + self.params['b1'])
    out = np.dot(h1, self.params['W2']) + self.params['b2']
    probs = softmax(out)
    pred = np.argmax(probs, axis=1)
    return sum(pred == y) / X.shape[0]

output

epoch 1 / 5: loss 0.024931, train_acc: 0.960260, val_acc: 0.957000
epoch 2 / 5: loss 0.011182, train_acc: 0.979260, val_acc: 0.973300
epoch 3 / 5: loss 0.007939, train_acc: 0.985060, val_acc: 0.976700
epoch 4 / 5: loss 0.006107, train_acc: 0.989840, val_acc: 0.978800
epoch 5 / 5: loss 0.004749, train_acc: 0.991940, val_acc: 0.978300
Test set accuracy 0.9794

See here for complete code gist.