top of page
Search

# ADALINE with Stochastic Gradient Descent (Minibatch) | Implementing Stochastic Gradient Descent

In this notebook, we are implementing ADALINE "by hand" without using PyTorch's autograd capabilities.

Import Necessary Packages

```import pandas as pd
import matplotlib.pyplot as plt
import torch
%matplotlib inline```

## Load & Prepare a Toy Dataset

```df = pd.read_csv('./datasets/iris.data', index_col=None, header=None)
df.columns = ['x1', 'x2', 'x3', 'x4', 'y']
df = df.iloc[50:150]
df['y'] = df['y'].apply(lambda x: 0 if x == 'Iris-versicolor' else 1)
df.tail()```

output:

```# Assign features and target

X = torch.tensor(df[['x2', 'x3']].values, dtype=torch.float)
y = torch.tensor(df['y'].values, dtype=torch.int)

# Shuffling & train/test split

torch.manual_seed(123)
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)

X, y = X[shuffle_idx], y[shuffle_idx]

percent70 = int(shuffle_idx.size(0)*0.7)

X_train, X_test = X[shuffle_idx[:percent70]], X[shuffle_idx[percent70:]]
y_train, y_test = y[shuffle_idx[:percent70]], y[shuffle_idx[percent70:]]

# Normalize (mean zero, unit variance)

mu, sigma = X_train.mean(dim=0), X_train.std(dim=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma
```

```plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], label='class 0')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], label='class 1')
plt.legend()
plt.show()```

output:

```plt.scatter(X_test[y_test == 0, 0], X_test[y_test == 0, 1], label='class 0')
plt.scatter(X_test[y_test == 1, 0], X_test[y_test == 1, 1], label='class 1')
plt.legend()
plt.show()```

output:

## Implement ADALINE Model

```class Adaline1():
def __init__(self, num_features):
self.num_features = num_features
self.weights = torch.zeros(num_features, 1,
dtype=torch.float)
self.bias = torch.zeros(1, dtype=torch.float)

def forward(self, x):
netinputs = torch.add(torch.mm(x, self.weights), self.bias)
activations = netinputs
return activations.view(-1)

def backward(self, x, yhat, y):

grad_loss_yhat = 2*(yhat - y)

# Chain rule: inner times outer
grad_loss_yhat.view(-1, 1)) / y.size(0)

# return negative gradient
```

## Define Training and Evaluation Functions

```####################################################
##### Training and evaluation wrappers
###################################################

def loss(yhat, y):

def train(model, x, y, num_epochs,
learning_rate=0.01, seed=123, minibatch_size=10):
cost = []

torch.manual_seed(seed)
for e in range(num_epochs):

#### Shuffle epoch
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)
minibatches = torch.split(shuffle_idx, minibatch_size)

for minibatch_idx in minibatches:

#### Compute outputs ####
yhat = model.forward(x[minibatch_idx])

#### Compute gradients ####
model.backward(x[minibatch_idx], yhat, y[minibatch_idx])

#### Update weights ####
model.weights += learning_rate * negative_grad_w
model.bias += learning_rate * negative_grad_b

#### Logging ####
minibatch_loss = loss(yhat, y[minibatch_idx])
print('    Minibatch MSE: %.3f' % minibatch_loss)

#### Logging ####
yhat = model.forward(x)
curr_loss = loss(yhat, y)
print('Epoch: %03d' % (e+1), end="")
print(' | MSE: %.5f' % curr_loss)
cost.append(curr_loss)

return cost
```

## Train Model

```model = Adaline1(num_features=X_train.size(1))
cost = train(model,
X_train, y_train.float(),
num_epochs=20,
learning_rate=0.1,
seed=123,
minibatch_size=10)```

Output:

Minibatch MSE: 0.500 Minibatch MSE: 0.341 Minibatch MSE: 0.220 Minibatch MSE: 0.245 Minibatch MSE: 0.157 Minibatch MSE: 0.133 Minibatch MSE: 0.144 Epoch: 001 | MSE: 0.12142 Minibatch MSE: 0.107 Minibatch MSE: 0.147 Minibatch MSE: 0.064 Minibatch MSE: 0.079 Minibatch MSE: 0.185 Minibatch MSE: 0.063 Minibatch MSE: 0.135 Epoch: 002 | MSE: 0.09932 Minibatch MSE: 0.093 Minibatch MSE: 0.064 ... ...

Plot Loss (MSE)

```plt.plot(range(len(cost)), cost)
plt.ylabel('Mean Squared Error')
plt.xlabel('Epoch')
plt.show()```

output:

### Compare with analytical solution

```print('Weights', model.weights)
print('Bias', model.bias)```

Output:

Weights tensor([[-0.0763],

[ 0.4181]])

Bias tensor([0.4888])

```def analytical_solution(x, y):
Xb = torch.cat( (torch.ones((x.size(0), 1)), x), dim=1)
w = torch.zeros(x.size(1))
z = torch.inverse(torch.matmul(Xb.t(), Xb))
params = torch.matmul(z, torch.matmul(Xb.t(), y))
b, w = torch.tensor([params[0]]), params[1:].view(x.size(1), 1)
return w, b

w, b = analytical_solution(X_train, y_train.float())
print('Analytical weights', w)
print('Analytical bias', b)```

Output:

Analytical weights tensor([[-0.0703],

[ 0.4219]]) Analytical bias tensor([0.4857])

## Evaluate on Evaluation Metric (Prediction Accuracy)

```ones = torch.ones(y_train.size())
zeros = torch.zeros(y_train.size())
train_pred = model.forward(X_train)
train_acc = torch.mean(
(torch.where(train_pred > 0.5,
ones,
zeros).int() == y_train).float())

ones = torch.ones(y_test.size())
zeros = torch.zeros(y_test.size())
test_pred = model.forward(X_test)
test_acc = torch.mean(
(torch.where(test_pred > 0.5,
ones,
zeros).int() == y_test).float())

print('Training Accuracy: %.2f' % (train_acc*100))
print('Test Accuracy: %.2f' % (test_acc*100))```

Output:

Training Accuracy: 90.00

Test Accuracy: 96.67

## Decision Boundary

```##########################
### 2D Decision Boundary
##########################

w, b = model.weights, model.bias - 0.5

x_min = -3
y_min = ( (-(w[0] * x_min) - b[0])
/ w[1] )

x_max = 3
y_max = ( (-(w[0] * x_max) - b[0])
/ w[1] )

fig, ax = plt.subplots(1, 2, sharex=True, figsize=(7, 3))

ax[0].plot([x_min, x_max], [y_min, y_max])
ax[1].plot([x_min, x_max], [y_min, y_max])

ax[0].scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], label='class 0', marker='o')
ax[0].scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], label='class 1', marker='s')

ax[1].scatter(X_test[y_test==0, 0], X_test[y_test==0, 1], label='class 0', marker='o')
ax[1].scatter(X_test[y_test==1, 0], X_test[y_test==1, 1], label='class 1', marker='s')

ax[1].legend(loc='upper left')
plt.show()
```

output: