In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from itertools import product

In [12]:
data = pd.read_csv('data/bel_data_test.csv')
data = np.array(data)

# Split data
X = data[:, 1:].T
Y = data[:, 0].astype(int)

# Separate test set (first 1000 rows)
X_test = X[:, :1000]
Y_test = Y[:1000]

# Remaining data for training and validation
X_remain = X[:, 1000:]
Y_remain = Y[1000:]

# Split remaining data into training and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X_remain.T, Y_remain, test_size=0.2, random_state=42)
X_train, X_val = X_train.T, X_val.T

In [None]:
# Determine input and output layer sizes
input_size = X_train.shape[0]
output_size = len(np.unique(Y))-1

print(f"Input layer size: {input_size}")
print(f"Output layer size: {output_size}")

In [14]:
def init_params(layer_dims):
    params = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        params[f'W{l}'] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2. / layer_dims[l-1])
        params[f'b{l}'] = np.zeros((layer_dims[l], 1))
    
    return params

In [15]:
def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

def forward_prop(X, params):
    caches = []
    A = X
    L = len(params) // 2

    for l in range(1, L):
        A_prev = A
        W = params[f'W{l}']
        b = params[f'b{l}']
        Z = np.dot(W, A_prev) + b
        A = ReLU(Z)
        caches.append((A_prev, W, b, Z))

    WL = params[f'W{L}']
    bL = params[f'b{L}']
    ZL = np.dot(WL, A) + bL
    AL = softmax(ZL)
    caches.append((A, WL, bL, ZL))

    return AL, caches

def ReLU_deriv(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def backward_prop(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = one_hot(Y)

    dAL = AL - Y
    current_cache = caches[L-1]
    grads[f"dW{L}"] = 1 / m * np.dot(dAL, current_cache[0].T)
    grads[f"db{L}"] = 1 / m * np.sum(dAL, axis=1, keepdims=True)
    dA_prev = np.dot(current_cache[1].T, dAL)

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dZ = dA_prev * ReLU_deriv(current_cache[3])
        grads[f"dW{l+1}"] = 1 / m * np.dot(dZ, current_cache[0].T)
        grads[f"db{l+1}"] = 1 / m * np.sum(dZ, axis=1, keepdims=True)
        if l > 0:
            dA_prev = np.dot(current_cache[1].T, dZ)

    return grads

def update_params(params, grads, alpha):
    L = len(params) // 2

    for l in range(1, L + 1):
        params[f"W{l}"] -= alpha * grads[f"dW{l}"]
        params[f"b{l}"] -= alpha * grads[f"db{l}"]

    return params

def get_predictions(AL):
    return np.argmax(AL, axis=0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size

In [16]:
def gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold=0.85):
    params = init_params(layer_dims)
    best_val_accuracy = 0
    acc_store = []
    
    for i in range(iterations):
        AL, caches = forward_prop(X_train, params)
        grads = backward_prop(AL, Y_train, caches)
        params = update_params(params, grads, alpha)

        if i % 100 == 0:
            train_predictions = get_predictions(AL)
            train_accuracy = get_accuracy(train_predictions, Y_train)
            
            val_AL, _ = forward_prop(X_val, params)
            val_predictions = get_predictions(val_AL)
            val_accuracy = get_accuracy(val_predictions, Y_val)
            
            print(f"Iteration {i}: Train Accuracy: {train_accuracy:.4f}, Validation Accuracy: {val_accuracy:.4f}")
            acc_store.append((train_accuracy, val_accuracy))
            
            if val_accuracy > best_val_accuracy:
                best_val_accuracy = val_accuracy
                best_params = params.copy()
            
            # Early stopping condition based on validation accuracy threshold
            if val_accuracy >= accuracy_threshold:
                print(f"Validation accuracy threshold of {accuracy_threshold:.2f} reached. Stopping training.")
                break

    return best_params, best_val_accuracy, acc_store

In [17]:
def grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha, iterations, accuracy_threshold=0.85):
    results = []
    
    for layer_config in layer_configs:
        layer_dims = [input_size] + list(layer_config) + [output_size]
        print(f"Training architecture: {layer_dims}")
        best_params, accuracy, acc_store = gradient_descent(X_train, Y_train, X_val, Y_val, layer_dims, alpha, iterations, accuracy_threshold)
        results.append((layer_config, accuracy, best_params, acc_store))
        print(f"Architecture {layer_dims}: Best Validation Accuracy: {accuracy:.4f}\n")
    
    return sorted(results, key=lambda x: x[1], reverse=True)

In [18]:
# def predict(X, parameters):
#     AL, _ = forward_propagation(X, parameters)
#     predictions = (AL > 0.5)  # Classify as 1 if greater than 0.5
#     return predictions

In [19]:
hidden_layers = [1, 2]
neurons_per_layer = [64, 128, 256]
layer_configs = list(product(*[neurons_per_layer] * max(hidden_layers)))

In [None]:
# Perform grid search
print("Performing grid search...")
best_configs = grid_search(X_train, Y_train, X_val, Y_val, layer_configs, alpha=0.01, iterations=4000)

print("\nTop 5 Architectures:")
for config, accuracy, _, _ in best_configs[:5]:
    print(f"Hidden Layers: {config}, Validation Accuracy: {accuracy:.4f}")

# Select the best configuration
best_config, best_accuracy, best_params, best_acc_store = best_configs[0]
best_layer_dims = [input_size] + list(best_config) + [output_size]

print(f"\nBest architecture: {best_layer_dims}")
print(f"Best validation accuracy: {best_accuracy:.4f}")

In [None]:
# Save the accuracy data for the best model
df = pd.DataFrame(best_acc_store, columns=['Train Accuracy', 'Validation Accuracy'])
df.to_csv('results/bel_acc.csv', index=False)

# Save the weights of the best model
np.savez("weights/bel_weights", **best_params)

# Evaluate on test set
test_AL, _ = forward_prop(X_test, best_params)
test_predictions = get_predictions(test_AL)
test_accuracy = get_accuracy(test_predictions, Y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")