with memleaks

This commit is contained in:
Jakob Stornig 2023-09-24 02:25:17 +02:00
parent e0d9353d4b
commit e7378cdb51
2 changed files with 111 additions and 70 deletions

13
main.c
View file

@ -4,6 +4,13 @@
#include "image.h" #include "image.h"
#include "neural_net.h" #include "neural_net.h"
#include <stdlib.h>
void testFree(Image ** images, int count){
for(int i = 0; i < count; i++){
img_free(images[i]);
}
free(images);
}
int main() { int main() {
// Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000); // Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000);
@ -30,6 +37,8 @@ int main() {
// int pause; // int pause;
int imported_count = 0; int imported_count = 0;
Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", &imported_count, 10000); Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", &imported_count, 10000);
Neural_Network * net = create_network(3, 28*28, 30, 10); testFree(images, imported_count);
train_network_with_batches(net, images, imported_count, 1, 10, 3);
//Neural_Network * net = create_network(3, 28*28, 30, 10);
//train_network_with_batches(net, images, imported_count, 1, 10, 3);
} }

View file

@ -7,11 +7,76 @@
#include <math.h> #include <math.h>
#include "image.h" #include "image.h"
//this is a helper struct only used for training.
typedef struct{ typedef struct{
Neural_Network * network; int layer_count;
Matrix ** weights_delta; Matrix ** weights_delta;
Matrix ** biases_delta Matrix ** biases_delta;
}; Matrix ** sum_weights_delta;
Matrix ** sum_biases_delta;
Matrix ** layer_activations;
Matrix ** layer_activations_wo_sigmoid;
} DynamicTrainingContainer;
DynamicTrainingContainer * init_training_container(Neural_Network * network){
DynamicTrainingContainer * container = malloc(sizeof(DynamicTrainingContainer));
container->layer_count = network->layer_count;
container->weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
container->biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
container->sum_weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
container->sum_biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
container->layer_activations_wo_sigmoid = malloc(sizeof(Matrix*) * network->layer_count - 1);
container->layer_activations = malloc(sizeof(Matrix*) * network->layer_count);
for(int i = 0; i < network->layer_count-1; i++){
container->weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns);
container->biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns);
container->sum_weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns);
container->sum_biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns);
container->layer_activations_wo_sigmoid[i] = matrix_create(network->sizes[i], 1);
}
for (int i = 0; i < network->layer_count; i++) {
container->layer_activations[i] = matrix_create(network->sizes[i], 1);
}
return container;
}
void dynamic_training_container_reset_delta(DynamicTrainingContainer * container){
for(int i = 0; i < container->layer_count-1; i++){
matrix_fill(container->weights_delta[i], 0);
matrix_fill(container->biases_delta[i], 0);
}
}
void dynamic_training_container_reset_sum_delta(DynamicTrainingContainer * container){
for(int i = 0; i < container->layer_count-1; i++){
matrix_fill(container->sum_weights_delta[i], 0);
matrix_fill(container->sum_biases_delta[i], 0);
}
}
void dynamic_training_container_free_everything(DynamicTrainingContainer * container){
for(int i = 0; i < container->layer_count-1; i++){
matrix_free(container->weights_delta[i]);
matrix_free(container->biases_delta[i]);
matrix_free(container->sum_weights_delta[i]);
matrix_free(container->sum_biases_delta[i]);
matrix_free(container->layer_activations_wo_sigmoid[i]);
}
for (int i = 0; i < container->layer_count; i++) {
matrix_free(container->layer_activations[i]);
}
free(container->weights_delta);
free(container->biases_delta);
free(container->sum_weights_delta);
free(container->sum_biases_delta);
free(container->layer_activations_wo_sigmoid);
free(container->layer_activations);
}
void evaluate(Neural_Network * network, Image** images, int imageCount){ void evaluate(Neural_Network * network, Image** images, int imageCount){
@ -37,12 +102,8 @@ double sigmoid_prime(double input){
return sigmoid(input)*(1- sigmoid(input)); return sigmoid(input)*(1- sigmoid(input));
} }
void back_prop(Neural_Network * network, Image* training_sample, Matrix ** weights_delta, Matrix ** biases_delta){ void back_prop(Neural_Network * network, Image* training_sample, DynamicTrainingContainer * trainingContainer){
//all Matrix** are external, to avoid repeated memory allocation and deallocation. dynamic_training_container_reset_delta(trainingContainer);
for(int i = 0; i < network->layer_count - 1; i++){
matrix_fill(weights_delta[i], 0);
matrix_fill(biases_delta[i], 0);
}
Matrix * desired_result = create_one_hot_result(training_sample); //freed in line 47 Matrix * desired_result = create_one_hot_result(training_sample); //freed in line 47
@ -50,90 +111,65 @@ void back_prop(Neural_Network * network, Image* training_sample, Matrix ** weigh
//feedforward###################################### //feedforward######################################
//input_activation //input_activation
Matrix * current_activation = matrix_flatten(training_sample->pixel_values, 0);//freed by freeing layer_activation Matrix * current_activation = matrix_flatten(training_sample->pixel_values, 0);//freed by freeing layer_activation
trainingContainer->layer_activations[0] = current_activation;
Matrix ** layer_activations = malloc(sizeof(Matrix*) * network->layer_count); //freed at end
Matrix ** layer_activations_wo_sigmoid = malloc(sizeof(Matrix*) * network->layer_count - 1);//freed at end
layer_activations[0] = current_activation;
for(int i = 0; i < network->layer_count-1; i++){ for(int i = 0; i < network->layer_count-1; i++){
Matrix * dot_result = dot(network->weights[i], current_activation);//freed 3 lines below Matrix * dot_result = dot(network->weights[i], current_activation);//freed 3 lines below
Matrix * addition_result = add(dot_result, network->biases[i]); //freed by freeing layer activations wo sigmoid Matrix * addition_result = add(dot_result, network->biases[i]); //freed by freeing layer activations wo sigmoid
matrix_free(dot_result); matrix_free(dot_result);
layer_activations_wo_sigmoid[i] = addition_result; trainingContainer->layer_activations_wo_sigmoid[i] = addition_result;
current_activation = apply(sigmoid, addition_result); current_activation = apply(sigmoid, addition_result);
layer_activations[i+1] = current_activation; //freed by freeing layer activations trainingContainer->layer_activations[i+1] = current_activation; //freed by freeing layer activations
dot_result = NULL; dot_result = NULL;
} }
//backward pass#################################### //backward pass####################################
//calculate delta for last layer; //calculate delta for last layer;
//bias //bias
Matrix * subtraction_result = subtract(layer_activations[network->layer_count-1], desired_result); Matrix * subtraction_result = subtract(trainingContainer->layer_activations[network->layer_count-1], desired_result);
Matrix * s_prime = apply(sigmoid_prime, layer_activations_wo_sigmoid[network->layer_count-2]); Matrix * s_prime = apply(sigmoid_prime, trainingContainer->layer_activations_wo_sigmoid[network->layer_count-2]);
Matrix * delta = multiply(subtraction_result, s_prime); Matrix * delta = multiply(subtraction_result, s_prime);
matrix_free(s_prime); matrix_free(s_prime);
matrix_free(subtraction_result); matrix_free(subtraction_result);
biases_delta[network->layer_count-2] = delta; trainingContainer->biases_delta[network->layer_count-2] = delta;
//weights //weights
Matrix * transposed = transpose(layer_activations[network->layer_count-2]); Matrix * transposed = transpose(trainingContainer->layer_activations[network->layer_count-2]);
weights_delta[network->layer_count-2] = dot(delta, transposed); trainingContainer->weights_delta[network->layer_count-2] = dot(delta, transposed);
matrix_free(transposed); matrix_free(transposed);
transposed = NULL; transposed = NULL;
for(int layer = network->layer_count-3; layer >= 0; layer--){ for(int layer = network->layer_count-3; layer >= 0; layer--){
Matrix * activation_wo_sigmoid = layer_activations_wo_sigmoid[layer]; Matrix * activation_wo_sigmoid = trainingContainer->layer_activations_wo_sigmoid[layer];
Matrix * derivative = apply(sigmoid_prime, activation_wo_sigmoid); Matrix * derivative = apply(sigmoid_prime, activation_wo_sigmoid);
Matrix * transposed_layer_weight = transpose(network->weights[layer + 1]); Matrix * transposed_layer_weight = transpose(network->weights[layer + 1]);
Matrix * dot_result = dot(transposed_layer_weight, delta); Matrix * dot_result = dot(transposed_layer_weight, delta);
matrix_free(transposed_layer_weight); matrix_free(transposed_layer_weight);
delta = multiply(dot_result, derivative); delta = multiply(dot_result, derivative);
biases_delta[layer] = delta; trainingContainer->biases_delta[layer] = delta;
Matrix * transposed_activation = transpose(layer_activations[layer]); Matrix * transposed_activation = transpose(trainingContainer->layer_activations[layer]);
weights_delta[layer] = dot(delta, transposed_activation); trainingContainer->weights_delta[layer] = dot(delta, transposed_activation);
matrix_free(transposed_activation); matrix_free(transposed_activation);
} }
matrix_free(desired_result); matrix_free(desired_result);
//free layer_activations
for(int i = 0; i < network->layer_count; i++){
matrix_free(layer_activations[i]);
}
free(layer_activations);
//free layer_activations wo sigmoid
for(int i = 0; i < network->layer_count - 1; i++){
matrix_free(layer_activations_wo_sigmoid[i]);
}
free(layer_activations_wo_sigmoid);
} }
void update_batch(Neural_Network * network, Image** training_data, int batch_start, int batch_end, double learning_rate){ void update_batch(Neural_Network * network, DynamicTrainingContainer * trainingContainer, Image** training_data, int batch_start, int batch_end, double learning_rate){
Matrix** weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); dynamic_training_container_reset_delta(trainingContainer);
Matrix** biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); dynamic_training_container_reset_sum_delta(trainingContainer);
Matrix** sum_weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
Matrix** sum_biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
for(int i = 0; i < network->layer_count - 1; i++){
weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns);
biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns);
sum_weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns);
sum_biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns);
}
for(int i = batch_start; i <= batch_end; i++){ for(int i = batch_start; i <= batch_end; i++){
back_prop(network, training_data[i], weights_delta, biases_delta); back_prop(network, training_data[i], trainingContainer);
for(int j = 0; j < network->layer_count-1; j++){ for(int j = 0; j < network->layer_count-1; j++){
Matrix * sum_weights_free = sum_weights_delta[j]; Matrix * sum_weights_free = trainingContainer->sum_weights_delta[j];
sum_weights_delta[j] = add(sum_weights_delta[j], weights_delta[j]); trainingContainer->sum_weights_delta[j] = add(trainingContainer->sum_weights_delta[j], trainingContainer->weights_delta[j]);
matrix_free(sum_weights_free); matrix_free(sum_weights_free);
Matrix * sum_biases_free = sum_biases_delta[j]; Matrix * sum_biases_free = trainingContainer->sum_biases_delta[j];
sum_biases_delta[j] = add(sum_biases_delta[j], biases_delta[j]); trainingContainer->sum_biases_delta[j] = add(trainingContainer->sum_biases_delta[j], trainingContainer->biases_delta[j]);
matrix_free(sum_biases_free); matrix_free(sum_biases_free);
} }
} }
@ -142,38 +178,34 @@ void update_batch(Neural_Network * network, Image** training_data, int batch_sta
double scaling_factor = learning_rate/(batch_end-batch_start); double scaling_factor = learning_rate/(batch_end-batch_start);
for(int i = 0; i < network->layer_count-1; i++){ for(int i = 0; i < network->layer_count-1; i++){
//update weights //update weights
Matrix * weight_change = scale(sum_weights_delta[i], scaling_factor); Matrix * weight_change = scale(trainingContainer->sum_weights_delta[i], scaling_factor);
matrix_free(sum_weights_delta[i]);
Matrix * new_weights = subtract(network->weights[i], weight_change); Matrix * new_weights = subtract(network->weights[i], weight_change);
matrix_free(network->weights[i]); matrix_free(network->weights[i]);
network->weights[i] = new_weights; network->weights[i] = new_weights;
//update biases //update biases
Matrix * bias_change = scale(sum_biases_delta[i], scaling_factor); Matrix * bias_change = scale(trainingContainer->sum_biases_delta[i], scaling_factor);
matrix_free(sum_biases_delta[i]);
Matrix * new_biases = subtract(network->biases[i], bias_change); Matrix * new_biases = subtract(network->biases[i], bias_change);
matrix_free(network->biases[i]); matrix_free(network->biases[i]);
network->biases[i] = new_biases; network->biases[i] = new_biases;
} }
free(sum_weights_delta);
free(sum_biases_delta);
for(int i = 0; i < network->layer_count - 1; i++){
matrix_free(weights_delta[i]);
matrix_free(biases_delta[i]);
}
} }
void train_network_with_batches(Neural_Network * network, Image** training_data, int image_count, int epochs, int batch_size, double learning_rate){ void train_network_with_batches(Neural_Network * network, Image** training_data, int image_count, int epochs, int batch_size, double learning_rate){
DynamicTrainingContainer * container = init_training_container(network);
for(int i = 0; i < epochs; i++){ for(int i = 0; i < epochs; i++){
for(int j = 0; j < image_count/batch_size; j++){ for(int j = 0; j < image_count/batch_size; j++){
int batch_start = j*batch_size; int batch_start = j*batch_size;
int batch_end = j*batch_size + batch_size - 1; int batch_end = j*batch_size + batch_size - 1;
update_batch(network, training_data, batch_start, batch_end, learning_rate); update_batch(network, container, training_data, batch_start, batch_end, learning_rate);
} }
evaluate(network, training_data, 1000); evaluate(network, training_data, 500);
} }
dynamic_training_container_free_everything(container);
free(container);
} }