From 34a23c6eab091ec97ee1ff604b905f45764d2a7c Mon Sep 17 00:00:00 2001 From: Jakob Stornig Date: Sat, 23 Sep 2023 22:05:55 +0200 Subject: [PATCH 1/3] (try): stochastic gradient decent --- CMakeLists.txt | 2 +- image.c | 15 +++- image.h | 3 +- main.c | 49 ++++++---- neural_net.c | 218 +++++++++++++++++++++++++++++++++++++++++++++ neural_net.h | 22 +++++ neuronal_network.c | 6 +- 7 files changed, 288 insertions(+), 27 deletions(-) create mode 100644 neural_net.c create mode 100644 neural_net.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 461b2a6..e759032 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,5 +3,5 @@ project(c_net C) set(CMAKE_C_STANDARD 11) -add_executable(c_net main.c matrix.c image.c neuronal_network.c util.c util.h) +add_executable(c_net main.c matrix.c image.c neuronal_network.c util.c util.h neural_net.c neural_net.h) target_link_libraries(c_net m) diff --git a/image.c b/image.c index 11d7340..c262fbf 100644 --- a/image.c +++ b/image.c @@ -68,8 +68,8 @@ Image * load_pgm_image(char * image_file_string){ Image** import_images(char* image_file_string, char* label_file_string, int* _number_imported, int count) { printf("Loading Images\n"); // create file pointer for the image and label data - FILE* image_file = fopen(image_file_string, "r"); - FILE* label_file = fopen(label_file_string, "r"); + FILE* image_file = fopen(image_file_string, "rb"); + FILE* label_file = fopen(label_file_string, "rb"); // check if the file could be opened if(image_file == NULL || label_file == NULL) { @@ -159,6 +159,17 @@ Image** import_images(char* image_file_string, char* label_file_string, int* _nu return images; } +Matrix* create_one_hot_result(Image* image){ + if(image->label < 0 || image->label > 9){ + printf("create_one_hot_result should only be used on correctly labeled images\n"); + exit(1); + } + + Matrix* result = matrix_create(10, 1); + result->numbers[image->label][0] = 1; + return result; +} + void img_print (Image* img) { //print the image diff --git a/image.h b/image.h index 6d3caeb..78dff98 100644 --- a/image.h +++ b/image.h @@ -1,8 +1,6 @@ #pragma once #include "matrix.h" -#include "matrix.h" - typedef struct { Matrix* pixel_values; char label; @@ -27,6 +25,7 @@ static const int MAGIC_NUMBER_IMAGES = 2051; */ Image ** import_images(char* image_file_string, char* label_file_string, int* number_imported, int count); Image * load_pgm_image(char * image_file_string); +Matrix* create_one_hot_result(Image* image); void img_print (Image* image); void img_visualize(Image*image); void img_free (Image* image); \ No newline at end of file diff --git a/main.c b/main.c index 309e363..da6ef80 100644 --- a/main.c +++ b/main.c @@ -2,25 +2,36 @@ #include "matrix.h" #include "image.h" -#include "neuronal_network.h" + +#include "neural_net.h" int main() { - Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000); -// img_visualize(images[4]); - - Neural_Network* nn = new_network(28*28, 16, 10, 0.5); - randomize_network(nn, 20); -// save_network(nn); - -// Neural_Network* nn = load_network("../networks/test1.txt"); - - - for (int i = 0; i < 10000; ++i) { - train_network(nn, images[i], images[i]->label); - } - - measure_network_accuracy(nn, images, 100); - - - +// Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000); +//// img_visualize(images[4]); +// +// Neural_Network* nn = new_network(28*28, 16, 10, 0.5); +// randomize_network(nn, 20); +//// save_network(nn); +// +//// Neural_Network* nn = load_network("../networks/test1.txt"); +// +// +// for (int i = 0; i < 10000; ++i) { +// train_network(nn, images[i], images[i]->label); +// } +// +// measure_network_accuracy(nn, images, 100); +// Matrix *m = matrix_create(2, 1); +// m->numbers[0][0] = 1; +// m->numbers[1][0] = 1; +// Neural_Network * net = create_network(3, 2, 3, 1); +// feedforward(net, m); +// +// int pause; + int imported_count = 0; + Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", &imported_count, 60000); + matrix_save(images[0]->pixel_values, "image1.txt"); + matrix_save(images[1]->pixel_values, "images2.txt"); + Neural_Network * net = create_network(3, 28*28, 30, 10); + train_network_with_batches(net, images, imported_count, 1, 10, 3); } \ No newline at end of file diff --git a/neural_net.c b/neural_net.c new file mode 100644 index 0000000..5368f96 --- /dev/null +++ b/neural_net.c @@ -0,0 +1,218 @@ +// +// Created by jakob on 22.09.2023. +// +#include +#include +#include "neural_net.h" +#include +#include "image.h" + +void evaluate(Neural_Network * network, Image** images, int imageCount){ + int numCorrect = 0; + for(int i = 0; i <= imageCount; i++){ + Matrix * input = matrix_flatten(images[i]->pixel_values, 0); + Matrix * res = feedforward(network, input); + char result = (char)matrix_argmax(res); + if(result == images[i]->label){ + numCorrect++; + } + matrix_free(input); + matrix_free(res); + } + printf("%d/%d", numCorrect, imageCount); +} + +double sigmoid(double input) { + return 1.0 / (1 + exp(-1 * input)); +} + +double sigmoid_prime(double input){ + return sigmoid(input)*(1- sigmoid(input)); +} + +void back_prop(Neural_Network * network, Image* training_sample, Matrix ** weights_delta, Matrix ** biases_delta){ + //all Matrix** are external, to avoid repeated memory allocation and deallocation. + for(int i = 0; i < network->layer_count - 1; i++){ + matrix_fill(weights_delta[i], 0); + matrix_fill(biases_delta[i], 0); + } + + Matrix * desired_result = create_one_hot_result(training_sample); //freed in line 47 + + + //feedforward###################################### + //input_activation + Matrix * current_activation = matrix_flatten(training_sample->pixel_values, 0);//freed by freeing layer_activation + + Matrix ** layer_activations = malloc(sizeof(Matrix*) * network->layer_count); //freed at end + Matrix ** layer_activations_wo_sigmoid = malloc(sizeof(Matrix*) * network->layer_count - 1);//freed at end + layer_activations[0] = current_activation; + + for(int i = 0; i < network->layer_count-1; i++){ + Matrix * dot_result = dot(network->weights[i], current_activation);//freed 3 lines below + Matrix * addition_result = add(dot_result, network->biases[i]); //freed by freeing layer activations wo sigmoid + matrix_free(dot_result); + layer_activations_wo_sigmoid[i] = addition_result; + current_activation = apply(sigmoid, addition_result); + layer_activations[i+1] = current_activation; //freed by freeing layer activations + dot_result = NULL; + } + + //backward pass#################################### + //calculate delta for last layer; + //bias + Matrix * subtraction_result = subtract(layer_activations[network->layer_count-1], desired_result); + Matrix * delta = apply(sigmoid_prime, subtraction_result); + matrix_free(subtraction_result); + biases_delta[network->layer_count-1] = delta; + + //weights + Matrix * transposed = transpose(layer_activations[network->layer_count-2]); + weights_delta[network->layer_count-1] = dot(delta, transposed); + matrix_free(transposed); + transposed = NULL; + + for(int layer = network->layer_count-3; layer >= 0; layer--){ + Matrix * activation_wo_sigmoid = layer_activations_wo_sigmoid[layer]; + Matrix * derivative = apply(sigmoid_prime, activation_wo_sigmoid); + Matrix * transposed_layer_weight = transpose(network->weights[layer + 1]); + Matrix * dot_result = dot(transposed_layer_weight, delta); + matrix_free(transposed_layer_weight); + delta = multiply(dot_result, derivative); + + biases_delta[layer] = delta; + Matrix * transposed_activation = transpose(layer_activations[layer]); + weights_delta[layer] = dot(delta, transposed_activation); + matrix_free(transposed_activation); + } + + matrix_free(desired_result); + + //free layer_activations + for(int i = 0; i < network->layer_count; i++){ + matrix_free(layer_activations[i]); + } + free(layer_activations); + + //free layer_activations wo sigmoid + for(int i = 0; i < network->layer_count - 1; i++){ + matrix_free(layer_activations_wo_sigmoid[i]); + } + free(layer_activations_wo_sigmoid); + + +} + +void update_batch(Neural_Network * network, Image** training_data, int batch_start, int batch_end, double learning_rate){ + Matrix** weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + Matrix** biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + Matrix** sum_weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + Matrix** sum_biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + + for(int i = 0; i < network->layer_count - 1; i++){ + weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns); + biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns); + sum_weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns); + sum_biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns); + } + + for(int i = batch_start; i <= batch_end; i++){ + back_prop(network, training_data[i], weights_delta, biases_delta); + for(int j = 0; j < network->layer_count-1; j++){ + Matrix * sum_weights_free = sum_weights_delta[j]; + sum_weights_delta[j] = add(sum_weights_delta[j], weights_delta[j]); + matrix_free(sum_weights_free); + + Matrix * sum_biases_free = sum_biases_delta[j]; + sum_biases_delta[j] = add(sum_biases_delta[j], biases_delta[j]); + matrix_free(sum_biases_free); + } + } + + //change network + double scaling_factor = learning_rate/(batch_end-batch_start); + for(int i = 0; i < network->layer_count-1; i++){ + //update weights + Matrix * weight_change = scale(sum_weights_delta[i], scaling_factor); + matrix_free(sum_weights_delta[i]); + Matrix * new_weights = subtract(network->weights[i], weight_change); + matrix_free(network->weights[i]); + network->weights[i] = new_weights; + + //update biases + Matrix * bias_change = scale(sum_biases_delta[i], scaling_factor); + matrix_free(sum_biases_delta[i]); + Matrix * new_biases = subtract(network->biases[i], bias_change); + matrix_free(network->biases[i]); + network->biases[i] = new_biases; + } + //TODO: update mini batch + + +} + +void train_network_with_batches(Neural_Network * network, Image** training_data, int image_count, int epochs, int batch_size, double learning_rate){ + for(int i = 0; i < epochs; i++){ + for(int j = 0; j < image_count/batch_size; j++){ + int batch_start = j*batch_size; + int batch_end = j*batch_size + batch_size - 1; + update_batch(network, training_data, batch_start, batch_end, learning_rate); + } + evaluate(network, training_data, 1000); + } +} + + +Neural_Network* create_network(int layer_count,...){ + Neural_Network * network = malloc(sizeof(Neural_Network)); + network->layer_count = layer_count; + network->sizes = malloc(sizeof(int) * layer_count); + network->weights = malloc(sizeof(Matrix*)*(layer_count-1)); + network->biases = malloc(sizeof(Matrix*)*(layer_count-1)); + + //read sizes + va_list layer_sizes; + va_start(layer_sizes, layer_count); + for(int i = 0; i < layer_count; i++){ + network->sizes[i] = va_arg(layer_sizes, int); + } + va_end(layer_sizes); + + //init weights + for(int i = 0; i < layer_count-1; i++){ + network->weights[i] = matrix_create(network->sizes[i+1], network->sizes[i]); + matrix_randomize(network->weights[i], network->sizes[i]); + } + + //init biases + for(int i = 0; i < layer_count-1; i++){ + network->biases[i] = matrix_create(network->sizes[i+1], 1); + matrix_randomize(network->biases[i], network->sizes[i]); + } + + return network; +} + + + +//given an input "activations" it returns the matrix that the network would output +Matrix * feedforward(Neural_Network * net, Matrix * activations){ + + Matrix * current_layer_activation = activations; + + //next_layer_activation = sigmoid(dot(layer_weights, layer_activations)+layer_biases); + for(int i = 0; i < net->layer_count - 1; i++){ + Matrix * dot_result = dot(net->weights[i], current_layer_activation); + Matrix * addition_result = add(dot_result, net->biases[i]); + Matrix * sigmoid_result = apply(sigmoid, addition_result); + + current_layer_activation = sigmoid_result; + matrix_free(dot_result); + matrix_free(addition_result); + } + return current_layer_activation; +} + + + + diff --git a/neural_net.h b/neural_net.h new file mode 100644 index 0000000..45a9779 --- /dev/null +++ b/neural_net.h @@ -0,0 +1,22 @@ +// +// Created by jakob on 22.09.2023. +// + +#include "matrix.h" +#include "image.h" + +#ifndef C_NET_NEURAL_NET_H +#define C_NET_NEURAL_NET_H + +#endif //C_NET_NEURAL_NET_H + +typedef struct { + int layer_count; + int* sizes; + Matrix ** weights; + Matrix ** biases; +} Neural_Network; + +Neural_Network* create_network(int layer_count,...); +Matrix * feedforward(Neural_Network * net, Matrix * activations); +void train_network_with_batches(Neural_Network * network, Image** training_data, int image_count, int epochs, int batch_size, double learning_rate); diff --git a/neuronal_network.c b/neuronal_network.c index a970cec..862ddb3 100644 --- a/neuronal_network.c +++ b/neuronal_network.c @@ -343,9 +343,9 @@ Matrix * backPropagation(double learning_rate, Matrix* weights, Matrix* biases, //void batch_train_network(Neural_Network* network, Image** images, int size); -double sigmoid(double input) { - return 1.0 / (1 + exp(-1 * input)); -} +//double sigmoid(double input) { +// return 1.0 / (1 + exp(-1 * input)); +//} double sigmoid_derivative(double x) { return x * (1.0 - x); From e0d9353d4b40f82cf9c34ea70560852751ede66b Mon Sep 17 00:00:00 2001 From: Jakob Stornig Date: Sun, 24 Sep 2023 00:31:29 +0200 Subject: [PATCH 2/3] with memleaks --- image.h | 4 ---- main.c | 4 +--- neural_net.c | 24 +++++++++++++++++++----- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/image.h b/image.h index 78dff98..4054d04 100644 --- a/image.h +++ b/image.h @@ -6,10 +6,6 @@ typedef struct { char label; } Image; -typedef struct { - const Image* image; - const size_t size; -} Image_Container; static const int MAGIC_NUMBER_LABEL = 2049; diff --git a/main.c b/main.c index da6ef80..2d787c0 100644 --- a/main.c +++ b/main.c @@ -29,9 +29,7 @@ int main() { // // int pause; int imported_count = 0; - Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", &imported_count, 60000); - matrix_save(images[0]->pixel_values, "image1.txt"); - matrix_save(images[1]->pixel_values, "images2.txt"); + Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", &imported_count, 10000); Neural_Network * net = create_network(3, 28*28, 30, 10); train_network_with_batches(net, images, imported_count, 1, 10, 3); } \ No newline at end of file diff --git a/neural_net.c b/neural_net.c index 5368f96..15b4399 100644 --- a/neural_net.c +++ b/neural_net.c @@ -7,6 +7,13 @@ #include #include "image.h" +typedef struct{ + Neural_Network * network; + Matrix ** weights_delta; + Matrix ** biases_delta +}; + + void evaluate(Neural_Network * network, Image** images, int imageCount){ int numCorrect = 0; for(int i = 0; i <= imageCount; i++){ @@ -23,7 +30,7 @@ void evaluate(Neural_Network * network, Image** images, int imageCount){ } double sigmoid(double input) { - return 1.0 / (1 + exp(-1 * input)); + return 1.0 / (1 + exp(-input)); } double sigmoid_prime(double input){ @@ -62,13 +69,15 @@ void back_prop(Neural_Network * network, Image* training_sample, Matrix ** weigh //calculate delta for last layer; //bias Matrix * subtraction_result = subtract(layer_activations[network->layer_count-1], desired_result); - Matrix * delta = apply(sigmoid_prime, subtraction_result); + Matrix * s_prime = apply(sigmoid_prime, layer_activations_wo_sigmoid[network->layer_count-2]); + Matrix * delta = multiply(subtraction_result, s_prime); + matrix_free(s_prime); matrix_free(subtraction_result); - biases_delta[network->layer_count-1] = delta; + biases_delta[network->layer_count-2] = delta; //weights Matrix * transposed = transpose(layer_activations[network->layer_count-2]); - weights_delta[network->layer_count-1] = dot(delta, transposed); + weights_delta[network->layer_count-2] = dot(delta, transposed); matrix_free(transposed); transposed = NULL; @@ -146,7 +155,12 @@ void update_batch(Neural_Network * network, Image** training_data, int batch_sta matrix_free(network->biases[i]); network->biases[i] = new_biases; } - //TODO: update mini batch + free(sum_weights_delta); + free(sum_biases_delta); + for(int i = 0; i < network->layer_count - 1; i++){ + matrix_free(weights_delta[i]); + matrix_free(biases_delta[i]); + } } From e7378cdb514d25521098d7e21e7cda4ec180a451 Mon Sep 17 00:00:00 2001 From: Jakob Stornig Date: Sun, 24 Sep 2023 02:25:17 +0200 Subject: [PATCH 3/3] with memleaks --- main.c | 13 +++- neural_net.c | 168 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 111 insertions(+), 70 deletions(-) diff --git a/main.c b/main.c index 2d787c0..7c53c7a 100644 --- a/main.c +++ b/main.c @@ -4,6 +4,13 @@ #include "image.h" #include "neural_net.h" +#include +void testFree(Image ** images, int count){ + for(int i = 0; i < count; i++){ + img_free(images[i]); + } + free(images); +} int main() { // Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000); @@ -30,6 +37,8 @@ int main() { // int pause; int imported_count = 0; Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", &imported_count, 10000); - Neural_Network * net = create_network(3, 28*28, 30, 10); - train_network_with_batches(net, images, imported_count, 1, 10, 3); + testFree(images, imported_count); + + //Neural_Network * net = create_network(3, 28*28, 30, 10); + //train_network_with_batches(net, images, imported_count, 1, 10, 3); } \ No newline at end of file diff --git a/neural_net.c b/neural_net.c index 15b4399..0f90d95 100644 --- a/neural_net.c +++ b/neural_net.c @@ -7,11 +7,76 @@ #include #include "image.h" +//this is a helper struct only used for training. typedef struct{ - Neural_Network * network; + int layer_count; Matrix ** weights_delta; - Matrix ** biases_delta -}; + Matrix ** biases_delta; + Matrix ** sum_weights_delta; + Matrix ** sum_biases_delta; + Matrix ** layer_activations; + Matrix ** layer_activations_wo_sigmoid; +} DynamicTrainingContainer; + +DynamicTrainingContainer * init_training_container(Neural_Network * network){ + DynamicTrainingContainer * container = malloc(sizeof(DynamicTrainingContainer)); + container->layer_count = network->layer_count; + container->weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + container->biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + container->sum_weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + container->sum_biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); + container->layer_activations_wo_sigmoid = malloc(sizeof(Matrix*) * network->layer_count - 1); + + container->layer_activations = malloc(sizeof(Matrix*) * network->layer_count); + + for(int i = 0; i < network->layer_count-1; i++){ + container->weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns); + container->biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns); + container->sum_weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns); + container->sum_biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns); + container->layer_activations_wo_sigmoid[i] = matrix_create(network->sizes[i], 1); + } + for (int i = 0; i < network->layer_count; i++) { + container->layer_activations[i] = matrix_create(network->sizes[i], 1); + } + return container; +} + +void dynamic_training_container_reset_delta(DynamicTrainingContainer * container){ + for(int i = 0; i < container->layer_count-1; i++){ + matrix_fill(container->weights_delta[i], 0); + matrix_fill(container->biases_delta[i], 0); + } +} + +void dynamic_training_container_reset_sum_delta(DynamicTrainingContainer * container){ + for(int i = 0; i < container->layer_count-1; i++){ + matrix_fill(container->sum_weights_delta[i], 0); + matrix_fill(container->sum_biases_delta[i], 0); + } +} + +void dynamic_training_container_free_everything(DynamicTrainingContainer * container){ + + for(int i = 0; i < container->layer_count-1; i++){ + matrix_free(container->weights_delta[i]); + matrix_free(container->biases_delta[i]); + matrix_free(container->sum_weights_delta[i]); + matrix_free(container->sum_biases_delta[i]); + matrix_free(container->layer_activations_wo_sigmoid[i]); + } + for (int i = 0; i < container->layer_count; i++) { + matrix_free(container->layer_activations[i]); + } + + free(container->weights_delta); + free(container->biases_delta); + free(container->sum_weights_delta); + free(container->sum_biases_delta); + free(container->layer_activations_wo_sigmoid); + + free(container->layer_activations); +} void evaluate(Neural_Network * network, Image** images, int imageCount){ @@ -37,12 +102,8 @@ double sigmoid_prime(double input){ return sigmoid(input)*(1- sigmoid(input)); } -void back_prop(Neural_Network * network, Image* training_sample, Matrix ** weights_delta, Matrix ** biases_delta){ - //all Matrix** are external, to avoid repeated memory allocation and deallocation. - for(int i = 0; i < network->layer_count - 1; i++){ - matrix_fill(weights_delta[i], 0); - matrix_fill(biases_delta[i], 0); - } +void back_prop(Neural_Network * network, Image* training_sample, DynamicTrainingContainer * trainingContainer){ + dynamic_training_container_reset_delta(trainingContainer); Matrix * desired_result = create_one_hot_result(training_sample); //freed in line 47 @@ -50,90 +111,65 @@ void back_prop(Neural_Network * network, Image* training_sample, Matrix ** weigh //feedforward###################################### //input_activation Matrix * current_activation = matrix_flatten(training_sample->pixel_values, 0);//freed by freeing layer_activation - - Matrix ** layer_activations = malloc(sizeof(Matrix*) * network->layer_count); //freed at end - Matrix ** layer_activations_wo_sigmoid = malloc(sizeof(Matrix*) * network->layer_count - 1);//freed at end - layer_activations[0] = current_activation; + trainingContainer->layer_activations[0] = current_activation; for(int i = 0; i < network->layer_count-1; i++){ Matrix * dot_result = dot(network->weights[i], current_activation);//freed 3 lines below Matrix * addition_result = add(dot_result, network->biases[i]); //freed by freeing layer activations wo sigmoid matrix_free(dot_result); - layer_activations_wo_sigmoid[i] = addition_result; + trainingContainer->layer_activations_wo_sigmoid[i] = addition_result; current_activation = apply(sigmoid, addition_result); - layer_activations[i+1] = current_activation; //freed by freeing layer activations + trainingContainer->layer_activations[i+1] = current_activation; //freed by freeing layer activations dot_result = NULL; } //backward pass#################################### //calculate delta for last layer; //bias - Matrix * subtraction_result = subtract(layer_activations[network->layer_count-1], desired_result); - Matrix * s_prime = apply(sigmoid_prime, layer_activations_wo_sigmoid[network->layer_count-2]); + Matrix * subtraction_result = subtract(trainingContainer->layer_activations[network->layer_count-1], desired_result); + Matrix * s_prime = apply(sigmoid_prime, trainingContainer->layer_activations_wo_sigmoid[network->layer_count-2]); Matrix * delta = multiply(subtraction_result, s_prime); matrix_free(s_prime); matrix_free(subtraction_result); - biases_delta[network->layer_count-2] = delta; + trainingContainer->biases_delta[network->layer_count-2] = delta; //weights - Matrix * transposed = transpose(layer_activations[network->layer_count-2]); - weights_delta[network->layer_count-2] = dot(delta, transposed); + Matrix * transposed = transpose(trainingContainer->layer_activations[network->layer_count-2]); + trainingContainer->weights_delta[network->layer_count-2] = dot(delta, transposed); matrix_free(transposed); transposed = NULL; for(int layer = network->layer_count-3; layer >= 0; layer--){ - Matrix * activation_wo_sigmoid = layer_activations_wo_sigmoid[layer]; + Matrix * activation_wo_sigmoid = trainingContainer->layer_activations_wo_sigmoid[layer]; Matrix * derivative = apply(sigmoid_prime, activation_wo_sigmoid); Matrix * transposed_layer_weight = transpose(network->weights[layer + 1]); Matrix * dot_result = dot(transposed_layer_weight, delta); matrix_free(transposed_layer_weight); delta = multiply(dot_result, derivative); - biases_delta[layer] = delta; - Matrix * transposed_activation = transpose(layer_activations[layer]); - weights_delta[layer] = dot(delta, transposed_activation); + trainingContainer->biases_delta[layer] = delta; + Matrix * transposed_activation = transpose(trainingContainer->layer_activations[layer]); + trainingContainer->weights_delta[layer] = dot(delta, transposed_activation); matrix_free(transposed_activation); } matrix_free(desired_result); - //free layer_activations - for(int i = 0; i < network->layer_count; i++){ - matrix_free(layer_activations[i]); - } - free(layer_activations); - - //free layer_activations wo sigmoid - for(int i = 0; i < network->layer_count - 1; i++){ - matrix_free(layer_activations_wo_sigmoid[i]); - } - free(layer_activations_wo_sigmoid); - - } -void update_batch(Neural_Network * network, Image** training_data, int batch_start, int batch_end, double learning_rate){ - Matrix** weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); - Matrix** biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); - Matrix** sum_weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); - Matrix** sum_biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1); - - for(int i = 0; i < network->layer_count - 1; i++){ - weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns); - biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns); - sum_weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns); - sum_biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns); - } +void update_batch(Neural_Network * network, DynamicTrainingContainer * trainingContainer, Image** training_data, int batch_start, int batch_end, double learning_rate){ + dynamic_training_container_reset_delta(trainingContainer); + dynamic_training_container_reset_sum_delta(trainingContainer); for(int i = batch_start; i <= batch_end; i++){ - back_prop(network, training_data[i], weights_delta, biases_delta); + back_prop(network, training_data[i], trainingContainer); for(int j = 0; j < network->layer_count-1; j++){ - Matrix * sum_weights_free = sum_weights_delta[j]; - sum_weights_delta[j] = add(sum_weights_delta[j], weights_delta[j]); + Matrix * sum_weights_free = trainingContainer->sum_weights_delta[j]; + trainingContainer->sum_weights_delta[j] = add(trainingContainer->sum_weights_delta[j], trainingContainer->weights_delta[j]); matrix_free(sum_weights_free); - Matrix * sum_biases_free = sum_biases_delta[j]; - sum_biases_delta[j] = add(sum_biases_delta[j], biases_delta[j]); + Matrix * sum_biases_free = trainingContainer->sum_biases_delta[j]; + trainingContainer->sum_biases_delta[j] = add(trainingContainer->sum_biases_delta[j], trainingContainer->biases_delta[j]); matrix_free(sum_biases_free); } } @@ -142,38 +178,34 @@ void update_batch(Neural_Network * network, Image** training_data, int batch_sta double scaling_factor = learning_rate/(batch_end-batch_start); for(int i = 0; i < network->layer_count-1; i++){ //update weights - Matrix * weight_change = scale(sum_weights_delta[i], scaling_factor); - matrix_free(sum_weights_delta[i]); + Matrix * weight_change = scale(trainingContainer->sum_weights_delta[i], scaling_factor); Matrix * new_weights = subtract(network->weights[i], weight_change); matrix_free(network->weights[i]); network->weights[i] = new_weights; //update biases - Matrix * bias_change = scale(sum_biases_delta[i], scaling_factor); - matrix_free(sum_biases_delta[i]); + Matrix * bias_change = scale(trainingContainer->sum_biases_delta[i], scaling_factor); Matrix * new_biases = subtract(network->biases[i], bias_change); matrix_free(network->biases[i]); network->biases[i] = new_biases; } - free(sum_weights_delta); - free(sum_biases_delta); - for(int i = 0; i < network->layer_count - 1; i++){ - matrix_free(weights_delta[i]); - matrix_free(biases_delta[i]); - } - - } void train_network_with_batches(Neural_Network * network, Image** training_data, int image_count, int epochs, int batch_size, double learning_rate){ + DynamicTrainingContainer * container = init_training_container(network); + + for(int i = 0; i < epochs; i++){ for(int j = 0; j < image_count/batch_size; j++){ int batch_start = j*batch_size; int batch_end = j*batch_size + batch_size - 1; - update_batch(network, training_data, batch_start, batch_end, learning_rate); + update_batch(network, container, training_data, batch_start, batch_end, learning_rate); } - evaluate(network, training_data, 1000); + evaluate(network, training_data, 500); } + + dynamic_training_container_free_everything(container); + free(container); }