From 3349ce7a435a941e9e8c2c9df49a0bf59be25a42 Mon Sep 17 00:00:00 2001 From: Thomas Date: Fri, 22 Sep 2023 12:54:04 +0200 Subject: [PATCH] minor changes --- main.c | 12 ++-- matrix.c | 2 + neuronal_network.c | 147 +++++++++++++++++---------------------------- neuronal_network.h | 9 +-- 4 files changed, 68 insertions(+), 102 deletions(-) diff --git a/main.c b/main.c index faad683..d656166 100644 --- a/main.c +++ b/main.c @@ -1,6 +1,5 @@ #include -#include "matrix.h" #include "image.h" #include "neuronal_network.h" @@ -8,17 +7,20 @@ int main() { Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000); // img_visualize(images[4]); - Neural_Network* nn = new_network(28*28, 100, 10, 0.5); - randomize_network(nn, 20); + Neural_Network* nn = new_network(28*28, 50, 10, 0.1); + randomize_network(nn, 1); // save_network(nn); // Neural_Network* nn = load_network("../networks/test1.txt"); - for (int i = 0; i < 10000; ++i) { + for (int i = 0; i < 20000; ++i) { train_network(nn, images[i], images[i]->label); } - printf("%lf\n", measure_network_accuracy(nn, images, 100)); +// train_network(nn, images[0], images[0]->label); +// train_network(nn, images[0], images[0]->label); + + printf("%lf\n", measure_network_accuracy(nn, images, 2000)); } \ No newline at end of file diff --git a/matrix.c b/matrix.c index c958c0f..fd42480 100644 --- a/matrix.c +++ b/matrix.c @@ -349,8 +349,10 @@ int matrix_argmax(Matrix* matrix) { printf("ERROR: Matrix is not Mx1 (matrix_argmax)"); exit(EXIT_FAILURE); } + double max_value = 0; int max_index = 0; + for (int i = 0; i < matrix->rows; i++) { if (matrix->numbers[i][0] > max_value) { max_value = matrix->numbers[i][0]; diff --git a/neuronal_network.c b/neuronal_network.c index 92e0563..fdb8a1d 100644 --- a/neuronal_network.c +++ b/neuronal_network.c @@ -1,17 +1,11 @@ #include #include "neuronal_network.h" #include -#include #include double sigmoid(double input); -double sigmoid_derivative(double x); - -Matrix* softmax(Matrix* matrix); +Matrix* predict(Neural_Network* network, Matrix* image_data); double square(double input); - -double loss_function(Matrix* output_matrix, int image_label); - Matrix * backPropagation(double learning_rate, Matrix* weights, Matrix* biases, Matrix* current_layer_activation, Matrix* previous_layer_activation, Matrix* sigma_old); Neural_Network* new_network(int input_size, int hidden_size, int output_size, double learning_rate){ @@ -135,6 +129,18 @@ Neural_Network* load_network(char* file) { return saved_network; } +void print_network(Neural_Network* network) { + matrix_print(network->bias_1); + matrix_print(network->bias_2); + matrix_print(network->bias_3); + matrix_print(network->bias_output); + + matrix_print(network->weights_1); + matrix_print(network->weights_2); + matrix_print(network->weights_3); + matrix_print(network->weights_output); +} + double measure_network_accuracy(Neural_Network* network, Image** images, int amount) { int num_correct = 0; for (int i = 0; i < amount; i++) { @@ -144,7 +150,7 @@ double measure_network_accuracy(Neural_Network* network, Image** images, int amo } matrix_free(prediction); } - return 1.0 * num_correct / amount; + return ((double) num_correct) / amount; } Matrix* predict_image(Neural_Network* network, Image* image){ @@ -171,8 +177,6 @@ Matrix* predict(Neural_Network* network, Matrix* image_data) { Matrix* final_add = add(final_dot, network->bias_output); Matrix* final_outputs = apply(sigmoid, final_add); - Matrix* result = softmax(final_outputs); - matrix_free(h1_dot); matrix_free(h1_add); matrix_free(h1_outputs); @@ -187,18 +191,8 @@ Matrix* predict(Neural_Network* network, Matrix* image_data) { matrix_free(final_dot); matrix_free(final_add); - matrix_free(final_outputs); - return result; -} - -double cost_function(Matrix* calculated, int expected){ - calculated->numbers[expected] -= 1; - apply(square, calculated); - -// double loss = 0.5 * (target - output) * (target - output); - - return 0; + return final_outputs; } void train_network(Neural_Network* network, Image *image, int label) { @@ -224,42 +218,45 @@ void train_network(Neural_Network* network, Image *image, int label) { Matrix* final_outputs = apply(sigmoid, final_add); // begin backpropagation - Matrix* temp9 = matrix_create(final_outputs->rows, 1); - matrix_fill(temp9, 1); - Matrix* temp1 = subtract(temp9, final_outputs); - Matrix* temp2 = multiply(temp1, final_outputs); // * soll-ist - Matrix* temp3 = matrix_create(final_outputs->rows, final_outputs->columns); - matrix_fill(temp3, 0); - temp3->numbers[label][0] = 1; - Matrix* temp4 = subtract(temp3, final_outputs); - Matrix* sigma1 = multiply(temp2, temp4); + // The output of this is equal to an array of the size (10, 1) where each element is the derivative of the sigmoid function + // with the input of the neuron prior to the application of the activation function + Matrix* matrix_filled_with_ones = matrix_create(final_outputs->rows, 1); + matrix_fill(matrix_filled_with_ones, 1); + Matrix* temp1 = subtract(matrix_filled_with_ones, final_outputs); + Matrix* derivative_input = multiply(final_outputs, temp1); // * soll-ist + + + // create label matrix, which indicates the correct output of the neural network + Matrix* correct_output = matrix_create(final_outputs->rows, final_outputs->columns); + matrix_fill(correct_output, 0); + correct_output->numbers[label][0] = 1; + + // calculate the difference between what the value should be and what it actually is (MAYBE USE MES) + Matrix* error_difference = subtract(final_outputs, correct_output); // * output ist minus output soll + + // multiply the derivative of the activation function with the input to the neuron + Matrix* sigma1 = multiply(derivative_input, error_difference); + + // Calculate the delta for the weights Matrix* temp5 = transpose(h3_outputs); Matrix* temp6 = dot(sigma1, temp5); Matrix* weights_delta = scale(temp6, network->learning_rate); Matrix* bias_delta = scale(sigma1, network->learning_rate); -// Matrix* temp7 = add(weights_delta, network->weights_output); -// matrix_free(network->weights_output); -// network->weights_output = temp7; -// -// Matrix* temp8 = add(bias_delta, network->bias_output); -// matrix_free(network->bias_output); -// network->bias_output = temp8; - - Matrix* temp7 = add(weights_delta, network->weights_output); + Matrix* temp7 = add(network->weights_output, weights_delta); for (int i = 0; i < network->weights_output->rows; ++i) { for (int j = 0; j < network->weights_output->columns; ++j) { network->weights_output->numbers[i][j] = temp7->numbers[i][j]; } } - Matrix* temp8 = add(bias_delta, network->bias_output); - for (int i = 0; i < network->bias_output->rows; ++i) { - for (int j = 0; j < network->bias_output->columns; ++j) { - network->bias_output->numbers[i][j] = temp8->numbers[i][j]; - } - } +// Matrix* temp8 = add(network->bias_output, bias_delta); +// for (int i = 0; i < network->bias_output->rows; ++i) { +// for (int j = 0; j < network->bias_output->columns; ++j) { +// network->bias_output->numbers[i][j] = temp8->numbers[i][j]; +// } +// } // other levels Matrix* sigma2 = backPropagation(network->learning_rate, network->weights_3, network->bias_3, h3_outputs, h2_outputs, sigma1); @@ -294,14 +291,14 @@ void train_network(Neural_Network* network, Image *image, int label) { matrix_free(temp1); - matrix_free(temp2); - matrix_free(temp3); - matrix_free(temp4); + matrix_free(derivative_input); + matrix_free(correct_output); + matrix_free(error_difference); matrix_free(temp5); matrix_free(temp6); matrix_free(temp7); - matrix_free(temp8); - matrix_free(temp9); +// matrix_free(temp8); + matrix_free(matrix_filled_with_ones); } Matrix * backPropagation(double learning_rate, Matrix* weights, Matrix* biases, Matrix* current_layer_activation, Matrix* previous_layer_activation, Matrix* sigma_old) { @@ -327,26 +324,26 @@ Matrix * backPropagation(double learning_rate, Matrix* weights, Matrix* biases, Matrix* weights_delta = scale(temp4, learning_rate); Matrix* bias_delta = scale(sigma_new, learning_rate); - Matrix* temp5 = add(weights_delta, weights); + Matrix* temp5 = add(weights, weights_delta); for (int i = 0; i < weights->rows; ++i) { for (int j = 0; j < weights->columns; ++j) { weights->numbers[i][j] = temp5->numbers[i][j]; } } - Matrix* temp6 = add(bias_delta, biases); - for (int i = 0; i < biases->rows; ++i) { - for (int j = 0; j < biases->columns; ++j) { - biases->numbers[i][j] = temp6->numbers[i][j]; - } - } +// Matrix* temp6 = add(biases, bias_delta); +// for (int i = 0; i < biases->rows; ++i) { +// for (int j = 0; j < biases->columns; ++j) { +// biases->numbers[i][j] = temp6->numbers[i][j]; +// } +// } matrix_free(temp1); matrix_free(temp2); matrix_free(temp3); matrix_free(temp4); matrix_free(temp5); - matrix_free(temp6); +// matrix_free(temp6); matrix_free(temp7); matrix_free(weights_delta); matrix_free(bias_delta); @@ -358,38 +355,6 @@ double sigmoid(double input) { return 1.0 / (1 + exp(-1 * input)); } -double sigmoid_derivative(double x) { - return x * (1.0 - x); -} - -Matrix* softmax(Matrix* matrix) { - double total = 0; - - for (int i = 0; i < matrix->rows; i++) { - for (int j = 0; j < matrix->columns; j++) { - total += exp(matrix->numbers[i][j]); - } - } - Matrix* result_matrix = matrix_create(matrix->rows, matrix->columns); - for (int i = 0; i < result_matrix->rows; i++) { - for (int j = 0; j < result_matrix->columns; j++) { - result_matrix->numbers[i][j] = exp(matrix->numbers[i][j]) / total; - } - } - return result_matrix; -} - double square(double input) { return input * input; -} - -//double loss_function(Matrix* output_matrix, int image_label) { -// Matrix* temp = matrix_copy(output_matrix); -// -// temp->numbers[1][image_label] -= 1; -// apply(square, temp); -// -// matrix_free(temp); -// -// return matrix_sum(temp);; -//} \ No newline at end of file +} \ No newline at end of file diff --git a/neuronal_network.h b/neuronal_network.h index c0fce54..080e944 100644 --- a/neuronal_network.h +++ b/neuronal_network.h @@ -1,11 +1,9 @@ -#pragma once #include "matrix.h" #include "image.h" typedef struct { int input_size; - //Matrix* input; as local variable given to function // hidden layers int hidden_size; @@ -19,7 +17,6 @@ typedef struct { int output_size; Matrix* weights_output; Matrix* bias_output; - //Matrix* output; as local variable given to function double learning_rate; @@ -28,16 +25,16 @@ typedef struct { static const int MAX_BYTES = 100; Neural_Network* new_network(int input_size, int hidden_size, int output_size, double learning_rate); -//void print_network(Neural_Network* network); + void randomize_network(Neural_Network* network, int scope); void free_network(Neural_Network* network); void save_network(Neural_Network* network); Neural_Network* load_network(char* file); +void print_network(Neural_Network* network); + double measure_network_accuracy(Neural_Network* network, Image** images, int amount); Matrix* predict_image(Neural_Network* network, Image* image); -Matrix* predict(Neural_Network* network, Matrix* image_data); void train_network(Neural_Network* network, Image *image, int label); -void batch_train_network(Neural_Network* network, Image** images, int size);