Initial commit
This commit is contained in:
commit
1995df58ce
21 changed files with 6708 additions and 0 deletions
59
net/activation.py
Normal file
59
net/activation.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
import numpy as np
|
||||
|
||||
class Activations:
|
||||
@staticmethod
|
||||
def LeakyReLU(x, alpha=0.01):
|
||||
return np.where(x > 0, x, alpha * x)
|
||||
|
||||
@staticmethod
|
||||
def LeakyReLU_deriv(x, alpha=0.01):
|
||||
return np.where(x > 0, 1, alpha)
|
||||
|
||||
@staticmethod
|
||||
def InverseLeakyReLU(x, alpha=0.01):
|
||||
return np.where(x > 0, x, x / alpha)
|
||||
|
||||
@staticmethod
|
||||
def ReLU(x):
|
||||
return np.maximum(0, x)
|
||||
|
||||
@staticmethod
|
||||
def ReLU_deriv(x):
|
||||
return np.where(x > 0, 1, 0)
|
||||
|
||||
@staticmethod
|
||||
def InverseReLU(x):
|
||||
return np.maximum(0, x) # Note: This is lossy for negative values
|
||||
|
||||
@staticmethod
|
||||
def Sigmoid(x):
|
||||
return 1 / (1 + np.exp(-x))
|
||||
|
||||
@staticmethod
|
||||
def Sigmoid_deriv(x):
|
||||
s = Activations.Sigmoid(x)
|
||||
return s * (1 - s)
|
||||
|
||||
@staticmethod
|
||||
def InverseSigmoid(x):
|
||||
return np.log(x / (1 - x))
|
||||
|
||||
@staticmethod
|
||||
def Softmax(x):
|
||||
exp_x = np.exp(x - np.max(x, axis=0, keepdims=True))
|
||||
return exp_x / np.sum(exp_x, axis=0, keepdims=True)
|
||||
|
||||
@staticmethod
|
||||
def InverseSoftmax(x):
|
||||
return np.log(x) - np.max(np.log(x))
|
||||
|
||||
@classmethod
|
||||
def get_function_name(cls, func):
|
||||
return func.__name__
|
||||
|
||||
@classmethod
|
||||
def get_all_activation_names(cls):
|
||||
return [name for name, func in cls.__dict__.items()
|
||||
if callable(func) and not name.startswith("__") and
|
||||
not name.endswith("_deriv") and not name.startswith("Inverse") and
|
||||
not name in ['get_function_name', 'get_all_activation_names']]
|
||||
65
net/loss.py
Normal file
65
net/loss.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import numpy as np
|
||||
|
||||
class Loss:
|
||||
@staticmethod
|
||||
def mean_squared_error(Y, A):
|
||||
""" Mean Squared Error (MSE) """
|
||||
return np.mean((Y - A) ** 2)
|
||||
|
||||
@staticmethod
|
||||
def mean_absolute_error(Y, A):
|
||||
""" Mean Absolute Error (MAE) """
|
||||
return np.mean(np.abs(Y - A))
|
||||
|
||||
@staticmethod
|
||||
def huber_loss(Y, A, delta=1.0):
|
||||
""" Huber Loss """
|
||||
error = Y - A
|
||||
is_small_error = np.abs(error) <= delta
|
||||
squared_loss = 0.5 * error ** 2
|
||||
linear_loss = delta * (np.abs(error) - 0.5 * delta)
|
||||
return np.where(is_small_error, squared_loss, linear_loss).mean()
|
||||
|
||||
@staticmethod
|
||||
def binary_cross_entropy_loss(Y, A):
|
||||
""" Binary Cross-Entropy Loss """
|
||||
m = Y.shape[1]
|
||||
return -np.sum(Y * np.log(A + 1e-8) + (1 - Y) * np.log(1 - A + 1e-8)) / m
|
||||
|
||||
@staticmethod
|
||||
def categorical_cross_entropy_loss(Y, A):
|
||||
""" Categorical Cross-Entropy Loss (for softmax) """
|
||||
m = Y.shape[1]
|
||||
return -np.sum(Y * np.log(A + 1e-8)) / m
|
||||
|
||||
@staticmethod
|
||||
def hinge_loss(Y, A):
|
||||
""" Hinge Loss (used in SVM) """
|
||||
return np.mean(np.maximum(0, 1 - Y * A))
|
||||
|
||||
@staticmethod
|
||||
def kl_divergence(P, Q):
|
||||
""" Kullback-Leibler Divergence """
|
||||
return np.sum(P * np.log(P / (Q + 1e-8)))
|
||||
|
||||
@staticmethod
|
||||
def poisson_loss(Y, A):
|
||||
""" Poisson Loss """
|
||||
return np.mean(A - Y * np.log(A + 1e-8))
|
||||
|
||||
@staticmethod
|
||||
def cosine_proximity_loss(Y, A):
|
||||
""" Cosine Proximity Loss """
|
||||
dot_product = np.sum(Y * A, axis=0)
|
||||
norms = np.linalg.norm(Y, axis=0) * np.linalg.norm(A, axis=0)
|
||||
return -np.mean(dot_product / (norms + 1e-8))
|
||||
|
||||
@classmethod
|
||||
def get_function_name(cls, func):
|
||||
return func.__name__
|
||||
|
||||
@classmethod
|
||||
def get_all_loss_names(cls):
|
||||
return [name for name, func in cls.__dict__.items()
|
||||
if callable(func) and not name.startswith("__") and
|
||||
not name in ['get_function_name', 'get_all_loss_names']]
|
||||
169
net/mlp.py
Normal file
169
net/mlp.py
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from net.activation import Activations as af
|
||||
from net.optimizer import Optimizers as opt
|
||||
from net.loss import Loss
|
||||
|
||||
class MLP:
|
||||
def __init__(self, architecture, activations, optimizer, loss_function):
|
||||
self.architecture = architecture
|
||||
self.activations = activations
|
||||
self.optimizer = self.select_optimizer(optimizer)
|
||||
self.loss_function = getattr(Loss, loss_function)
|
||||
|
||||
self.params = self.init_params()
|
||||
self.activation_funcs = self.select_activations()
|
||||
|
||||
self.acc_store = []
|
||||
self.loss_store = []
|
||||
self.test_results = []
|
||||
|
||||
def init_params(self):
|
||||
params = {}
|
||||
for i in range(1, len(self.architecture)):
|
||||
params[f'W{i}'] = np.random.randn(self.architecture[i], self.architecture[i-1]) * 0.01
|
||||
params[f'b{i}'] = np.zeros((self.architecture[i], 1))
|
||||
return params
|
||||
|
||||
def select_activations(self):
|
||||
activation_funcs = []
|
||||
for activation in self.activations:
|
||||
activation_funcs.append(getattr(af, activation))
|
||||
return activation_funcs
|
||||
|
||||
def select_optimizer(self, optimizer_name):
|
||||
return getattr(opt, optimizer_name)
|
||||
|
||||
def forward_prop(self, X):
|
||||
A = X
|
||||
caches = []
|
||||
for i in range(1, len(self.architecture)):
|
||||
W = self.params[f'W{i}']
|
||||
b = self.params[f'b{i}']
|
||||
Z = np.dot(W, A) + b
|
||||
A = self.activation_funcs[i-1](Z)
|
||||
caches.append((A, W, b, Z))
|
||||
return A, caches
|
||||
|
||||
def backward_prop(self, AL, Y, caches):
|
||||
grads = {}
|
||||
L = len(caches)
|
||||
|
||||
# Ensure Y is a 2D array
|
||||
Y = Y.reshape(-1, 1) if Y.ndim == 1 else Y
|
||||
m = Y.shape[1]
|
||||
|
||||
Y = self.one_hot(Y)
|
||||
|
||||
dAL = AL - Y
|
||||
current_cache = caches[L-1]
|
||||
grads[f"dA{L}"], grads[f"dW{L}"], grads[f"db{L}"] = self.linear_activation_backward(
|
||||
dAL, current_cache, self.activation_funcs[L-1].__name__)
|
||||
|
||||
for l in reversed(range(L-1)):
|
||||
current_cache = caches[l]
|
||||
dA_prev_temp, dW_temp, db_temp = self.linear_activation_backward(
|
||||
grads[f"dA{l+2}"], current_cache, self.activation_funcs[l].__name__)
|
||||
grads[f"dA{l+1}"] = dA_prev_temp
|
||||
grads[f"dW{l+1}"] = dW_temp
|
||||
grads[f"db{l+1}"] = db_temp
|
||||
|
||||
return grads
|
||||
|
||||
def one_hot(self, Y):
|
||||
num_classes = self.architecture[-1]
|
||||
if Y.ndim == 1:
|
||||
return np.eye(num_classes)[Y]
|
||||
else:
|
||||
return np.eye(num_classes)[Y.reshape(-1)].T
|
||||
|
||||
def linear_activation_backward(self, dA, cache, activation):
|
||||
A_prev, W, b, Z = cache
|
||||
m = A_prev.shape[1]
|
||||
|
||||
if activation == "Softmax":
|
||||
dZ = dA
|
||||
elif activation == "ReLU":
|
||||
dZ = dA * af.ReLU_deriv(Z)
|
||||
else:
|
||||
raise ValueError(f"Backward propagation not implemented for {activation}")
|
||||
|
||||
dW = 1 / m * np.dot(dZ, A_prev.T)
|
||||
db = 1 / m * np.sum(dZ, axis=1, keepdims=True)
|
||||
dA_prev = np.dot(W.T, dZ)
|
||||
|
||||
return dA_prev, dW, db
|
||||
|
||||
def get_predictions(self, A):
|
||||
return np.argmax(A, axis=0)
|
||||
|
||||
def get_accuracy(self, predictions, Y):
|
||||
return np.mean(predictions == Y)
|
||||
|
||||
def train(self, X, Y, alpha, iterations, validation_split=0.2):
|
||||
X_train, X_val, Y_train, Y_val = train_test_split(X.T, Y, test_size=validation_split, shuffle=True, random_state=42)
|
||||
X_train, X_val = X_train.T, X_val.T
|
||||
|
||||
# Ensure Y_train and Y_val are 1D arrays
|
||||
Y_train = Y_train.ravel()
|
||||
Y_val = Y_val.ravel()
|
||||
|
||||
for i in range(iterations):
|
||||
AL, caches = self.forward_prop(X_train)
|
||||
grads = self.backward_prop(AL, Y_train, caches)
|
||||
|
||||
self.params = self.optimizer(self.params, grads, alpha)
|
||||
|
||||
if i % 10 == 0:
|
||||
train_preds = self.get_predictions(AL)
|
||||
train_acc = self.get_accuracy(train_preds, Y_train)
|
||||
train_loss = self.loss_function(self.one_hot(Y_train), AL)
|
||||
|
||||
AL_val, _ = self.forward_prop(X_val)
|
||||
val_preds = self.get_predictions(AL_val)
|
||||
val_acc = self.get_accuracy(val_preds, Y_val)
|
||||
val_loss = self.loss_function(self.one_hot(Y_val), AL_val)
|
||||
|
||||
print(f"Iteration {i}")
|
||||
print(f"Training Accuracy: {train_acc:.4f}, Validation Accuracy: {val_acc:.4f}")
|
||||
print(f"Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
|
||||
print("-------------------------------------------------------")
|
||||
|
||||
self.acc_store.append((train_acc, val_acc))
|
||||
self.loss_store.append((train_loss, val_loss))
|
||||
|
||||
return self.params
|
||||
|
||||
def test(self, X_test, Y_test):
|
||||
AL, _ = self.forward_prop(X_test)
|
||||
predictions = self.get_predictions(AL)
|
||||
test_accuracy = self.get_accuracy(predictions, Y_test)
|
||||
test_loss = self.loss_function(self.one_hot(Y_test), AL)
|
||||
|
||||
self.test_results.append((test_accuracy, test_loss))
|
||||
|
||||
print(f"Test Accuracy: {test_accuracy:.4f}")
|
||||
print(f"Test Loss: {test_loss:.4f}")
|
||||
|
||||
def save_model(self, dataset):
|
||||
weights_file = f"weights/{dataset}_{self.activation_funcs[0].__name__}_weights.npz"
|
||||
results_file = f"results/{dataset}_{self.activation_funcs[0].__name__}_results.csv"
|
||||
|
||||
np.savez(weights_file, **self.params)
|
||||
|
||||
train_df = pd.DataFrame(self.acc_store, columns=["training_accuracy", "validation_accuracy"])
|
||||
loss_df = pd.DataFrame(self.loss_store, columns=["training_loss", "validation_loss"])
|
||||
test_df = pd.DataFrame(self.test_results, columns=['test_accuracy', 'test_loss'])
|
||||
|
||||
combined_df = pd.concat([train_df, loss_df, test_df], axis=1)
|
||||
combined_df.to_csv(results_file, index=False)
|
||||
|
||||
print(f"Weights saved to {weights_file}")
|
||||
print(f"Results saved to {results_file}")
|
||||
|
||||
def load_weights(self, file_name):
|
||||
data = np.load(file_name)
|
||||
self.params = {key: data[key] for key in data.files}
|
||||
77
net/modules.py
Normal file
77
net/modules.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def load_data(file_path):
|
||||
data = pd.read_csv(file_path)
|
||||
data = np.array(data)
|
||||
m, n = data.shape
|
||||
|
||||
data_train = data[1000:m].T
|
||||
Y_train = data_train[0].astype(int)
|
||||
X_train = data_train[1:n]
|
||||
|
||||
data_test = data[0:1000].T
|
||||
Y_test = data_test[0].astype(int)
|
||||
X_test = data_test[1:n]
|
||||
|
||||
return X_train, Y_train, X_test, Y_test
|
||||
|
||||
def plot_accuracy(acc_store, save_path=None):
|
||||
"""
|
||||
Plot training and validation accuracy over iterations.
|
||||
|
||||
Parameters:
|
||||
acc_store (list of tuples): Each tuple contains (training_accuracy, validation_accuracy).
|
||||
save_path (str, optional): If provided, saves the plot to the specified path.
|
||||
"""
|
||||
# Unzip the accuracy data
|
||||
training_accuracy, validation_accuracy = zip(*acc_store)
|
||||
|
||||
# Plot
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.plot(training_accuracy, label='Training Accuracy')
|
||||
plt.plot(validation_accuracy, label='Validation Accuracy')
|
||||
plt.title('Training and Validation Accuracy Over Iterations')
|
||||
plt.xlabel('Iterations (in steps of 10)')
|
||||
plt.ylabel('Accuracy')
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
|
||||
# Save the plot if a path is provided
|
||||
if save_path:
|
||||
plt.savefig(save_path)
|
||||
print(f"Accuracy plot saved to {save_path}")
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
|
||||
|
||||
def plot_loss(loss_store, save_path=None):
|
||||
"""
|
||||
Plot training and validation loss over iterations.
|
||||
|
||||
Parameters:
|
||||
loss_store (list of tuples): Each tuple contains (training_loss, validation_loss).
|
||||
save_path (str, optional): If provided, saves the plot to the specified path.
|
||||
"""
|
||||
# Unzip the loss data
|
||||
training_loss, validation_loss = zip(*loss_store)
|
||||
|
||||
# Plot
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.plot(training_loss, label='Training Loss')
|
||||
plt.plot(validation_loss, label='Validation Loss')
|
||||
plt.title('Training and Validation Loss Over Iterations')
|
||||
plt.xlabel('Iterations (in steps of 10)')
|
||||
plt.ylabel('Loss')
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
|
||||
# Save the plot if a path is provided
|
||||
if save_path:
|
||||
plt.savefig(save_path)
|
||||
print(f"Loss plot saved to {save_path}")
|
||||
|
||||
# Show the plot
|
||||
plt.show()
|
||||
14
net/optimizer.py
Normal file
14
net/optimizer.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
class Optimizers:
|
||||
@staticmethod
|
||||
def gradient_descent(params, grads, alpha):
|
||||
"""
|
||||
Performs gradient descent optimization for a multi-layer network.
|
||||
|
||||
:param params: Dictionary containing the network parameters (W1, b1, W2, b2, etc.)
|
||||
:param grads: Dictionary containing the gradients (dW1, db1, dW2, db2, etc.)
|
||||
:param alpha: Learning rate
|
||||
:return: Updated parameters dictionary
|
||||
"""
|
||||
for key in params:
|
||||
params[key] -= alpha * grads['d' + key]
|
||||
return params
|
||||
109
net/transcoder.py
Normal file
109
net/transcoder.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
import numpy as np
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from net.mlp import MLP
|
||||
from net.modules import calculate_loss, calculate_accuracy, plot_learning_curves, plot_encoded_space, plot_reconstructions
|
||||
|
||||
class Transcoder(MLP):
|
||||
def __init__(self, input_size, hidden_size, output_size, hidden_activation='leaky_relu', output_activation='softmax', alpha=0.01):
|
||||
super().__init__(input_size, hidden_size, output_size, hidden_activation, output_activation, alpha)
|
||||
self.train_losses = []
|
||||
self.val_losses = []
|
||||
self.train_accuracies = []
|
||||
self.val_accuracies = []
|
||||
self.image_shape = self.determine_image_shape(input_size)
|
||||
|
||||
@staticmethod
|
||||
def determine_image_shape(input_size):
|
||||
sqrt = int(np.sqrt(input_size))
|
||||
if sqrt ** 2 == input_size:
|
||||
return (sqrt, sqrt)
|
||||
else:
|
||||
return (input_size, 1) # Default to column vector if not square
|
||||
|
||||
def encode_image(self, X):
|
||||
_, _, _, A2 = self.forward_prop(X)
|
||||
# print(f"Debug - Encoded image shape: {A2.shape}") #Debugging
|
||||
return A2
|
||||
|
||||
def decode_image(self, A2):
|
||||
# Start decoding from the encoded representation (A2)
|
||||
# print(f"Debug - A2 image shape: {A2.shape}") #Debugging
|
||||
|
||||
# Step 1: Reverse the output_activation function to get Z2
|
||||
Z2 = self.inverse_output_activation(A2)
|
||||
# print(f"Debug - Z2 image shape: {Z2.shape}") #Debugging
|
||||
|
||||
# Step 2: Reverse the second linear transformation to get A1
|
||||
A1 = np.linalg.pinv(self.W2).dot(Z2 - self.b2)
|
||||
# print(f"Debug - A1 image shape: {A1.shape}") #Debugging
|
||||
|
||||
# Step 3: Reverse the hidden_activation function to get Z1
|
||||
Z1 = self.inverse_hidden_activation(A1, self.alpha)
|
||||
# print(f"Debug - Z1 image shape: {Z1.shape}") #Debugging
|
||||
|
||||
# Step 4: Reverse the first linear transformation to get X (flattened 1D array)
|
||||
X_flat = np.linalg.pinv(self.W1).dot(Z1 - self.b1)
|
||||
# print(f"Debug - X_Flat image shape: {X_flat.shape}") #Debugging
|
||||
|
||||
# Step 5: If X_flat has shape (1024, n_samples), reshape it for each sample
|
||||
if X_flat.ndim > 1:
|
||||
X_flat = X_flat[:, 0] # Extract the first sample or reshape for batch processing
|
||||
|
||||
# Reshape to original image dimensions (32x32)
|
||||
X_image = X_flat.reshape(self.image_shape)
|
||||
|
||||
return X_image
|
||||
|
||||
def transcode(self, X):
|
||||
print(f"Debug - Input X shape: {X.shape}")
|
||||
encoded = self.encode_image(X)
|
||||
decoded = self.decode_image(encoded)
|
||||
return encoded, decoded
|
||||
|
||||
def train_with_validation(self, X, Y, alpha, iterations, val_split=0.2):
|
||||
# Ensure X is of shape (n_features, n_samples)
|
||||
if X.shape[0] != self.input_size:
|
||||
X = X.T
|
||||
|
||||
# Ensure Y is a 1D array
|
||||
if Y.ndim > 1:
|
||||
Y = Y.ravel()
|
||||
|
||||
X_train, X_val, Y_train, Y_val = train_test_split(X.T, Y, test_size=val_split, random_state=42)
|
||||
X_train, X_val = X_train.T, X_val.T # Transpose back to (n_features, n_samples)
|
||||
|
||||
for i in range(iterations):
|
||||
# Train step
|
||||
Z1, A1, Z2, A2 = self.forward_prop(X_train)
|
||||
dW1, db1, dW2, db2 = self.backward_prop(Z1, A1, Z2, A2, X_train, Y_train)
|
||||
self.update_params(dW1, db1, dW2, db2, alpha)
|
||||
|
||||
# Calculate and store losses and accuracies
|
||||
train_loss = calculate_loss(self, X_train, Y_train)
|
||||
val_loss = calculate_loss(self, X_val, Y_val)
|
||||
train_accuracy = calculate_accuracy(self, X_train, Y_train)
|
||||
val_accuracy = calculate_accuracy(self, X_val, Y_val)
|
||||
|
||||
self.train_losses.append(train_loss)
|
||||
self.val_losses.append(val_loss)
|
||||
self.train_accuracies.append(train_accuracy)
|
||||
self.val_accuracies.append(val_accuracy)
|
||||
|
||||
if i % 100 == 0:
|
||||
print(f"Iteration {i}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, "
|
||||
f"Train Accuracy = {train_accuracy:.4f}, Val Accuracy = {val_accuracy:.4f}")
|
||||
|
||||
def plot_learning_curves(self):
|
||||
plot_learning_curves(self.train_losses, self.val_losses, self.train_accuracies, self.val_accuracies)
|
||||
|
||||
def plot_encoded_space(self, X, Y):
|
||||
if X.shape[0] != self.input_size:
|
||||
X = X.T
|
||||
plot_encoded_space(self, X, Y)
|
||||
|
||||
def plot_reconstructions(self, X, num_images=5):
|
||||
if X.shape[0] != self.input_size:
|
||||
X = X.T
|
||||
plot_reconstructions(self, X, num_images)
|
||||
Loading…
Add table
Add a link
Reference in a new issue