(try): stochastic gradient decent
This commit is contained in:
parent
86ac3e855c
commit
34a23c6eab
7 changed files with 288 additions and 27 deletions
|
|
@ -3,5 +3,5 @@ project(c_net C)
|
||||||
|
|
||||||
set(CMAKE_C_STANDARD 11)
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
|
||||||
add_executable(c_net main.c matrix.c image.c neuronal_network.c util.c util.h)
|
add_executable(c_net main.c matrix.c image.c neuronal_network.c util.c util.h neural_net.c neural_net.h)
|
||||||
target_link_libraries(c_net m)
|
target_link_libraries(c_net m)
|
||||||
|
|
|
||||||
15
image.c
15
image.c
|
|
@ -68,8 +68,8 @@ Image * load_pgm_image(char * image_file_string){
|
||||||
Image** import_images(char* image_file_string, char* label_file_string, int* _number_imported, int count) {
|
Image** import_images(char* image_file_string, char* label_file_string, int* _number_imported, int count) {
|
||||||
printf("Loading Images\n");
|
printf("Loading Images\n");
|
||||||
// create file pointer for the image and label data
|
// create file pointer for the image and label data
|
||||||
FILE* image_file = fopen(image_file_string, "r");
|
FILE* image_file = fopen(image_file_string, "rb");
|
||||||
FILE* label_file = fopen(label_file_string, "r");
|
FILE* label_file = fopen(label_file_string, "rb");
|
||||||
|
|
||||||
// check if the file could be opened
|
// check if the file could be opened
|
||||||
if(image_file == NULL || label_file == NULL) {
|
if(image_file == NULL || label_file == NULL) {
|
||||||
|
|
@ -159,6 +159,17 @@ Image** import_images(char* image_file_string, char* label_file_string, int* _nu
|
||||||
return images;
|
return images;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Matrix* create_one_hot_result(Image* image){
|
||||||
|
if(image->label < 0 || image->label > 9){
|
||||||
|
printf("create_one_hot_result should only be used on correctly labeled images\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix* result = matrix_create(10, 1);
|
||||||
|
result->numbers[image->label][0] = 1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
void img_print (Image* img) {
|
void img_print (Image* img) {
|
||||||
|
|
||||||
//print the image
|
//print the image
|
||||||
|
|
|
||||||
3
image.h
3
image.h
|
|
@ -1,8 +1,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "matrix.h"
|
#include "matrix.h"
|
||||||
|
|
||||||
#include "matrix.h"
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
Matrix* pixel_values;
|
Matrix* pixel_values;
|
||||||
char label;
|
char label;
|
||||||
|
|
@ -27,6 +25,7 @@ static const int MAGIC_NUMBER_IMAGES = 2051;
|
||||||
*/
|
*/
|
||||||
Image ** import_images(char* image_file_string, char* label_file_string, int* number_imported, int count);
|
Image ** import_images(char* image_file_string, char* label_file_string, int* number_imported, int count);
|
||||||
Image * load_pgm_image(char * image_file_string);
|
Image * load_pgm_image(char * image_file_string);
|
||||||
|
Matrix* create_one_hot_result(Image* image);
|
||||||
void img_print (Image* image);
|
void img_print (Image* image);
|
||||||
void img_visualize(Image*image);
|
void img_visualize(Image*image);
|
||||||
void img_free (Image* image);
|
void img_free (Image* image);
|
||||||
49
main.c
49
main.c
|
|
@ -2,25 +2,36 @@
|
||||||
|
|
||||||
#include "matrix.h"
|
#include "matrix.h"
|
||||||
#include "image.h"
|
#include "image.h"
|
||||||
#include "neuronal_network.h"
|
|
||||||
|
#include "neural_net.h"
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000);
|
// Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", NULL, 60000);
|
||||||
// img_visualize(images[4]);
|
//// img_visualize(images[4]);
|
||||||
|
//
|
||||||
Neural_Network* nn = new_network(28*28, 16, 10, 0.5);
|
// Neural_Network* nn = new_network(28*28, 16, 10, 0.5);
|
||||||
randomize_network(nn, 20);
|
// randomize_network(nn, 20);
|
||||||
// save_network(nn);
|
//// save_network(nn);
|
||||||
|
//
|
||||||
// Neural_Network* nn = load_network("../networks/test1.txt");
|
//// Neural_Network* nn = load_network("../networks/test1.txt");
|
||||||
|
//
|
||||||
|
//
|
||||||
for (int i = 0; i < 10000; ++i) {
|
// for (int i = 0; i < 10000; ++i) {
|
||||||
train_network(nn, images[i], images[i]->label);
|
// train_network(nn, images[i], images[i]->label);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
measure_network_accuracy(nn, images, 100);
|
// measure_network_accuracy(nn, images, 100);
|
||||||
|
// Matrix *m = matrix_create(2, 1);
|
||||||
|
// m->numbers[0][0] = 1;
|
||||||
|
// m->numbers[1][0] = 1;
|
||||||
|
// Neural_Network * net = create_network(3, 2, 3, 1);
|
||||||
|
// feedforward(net, m);
|
||||||
|
//
|
||||||
|
// int pause;
|
||||||
|
int imported_count = 0;
|
||||||
|
Image** images = import_images("../data/train-images.idx3-ubyte", "../data/train-labels.idx1-ubyte", &imported_count, 60000);
|
||||||
|
matrix_save(images[0]->pixel_values, "image1.txt");
|
||||||
|
matrix_save(images[1]->pixel_values, "images2.txt");
|
||||||
|
Neural_Network * net = create_network(3, 28*28, 30, 10);
|
||||||
|
train_network_with_batches(net, images, imported_count, 1, 10, 3);
|
||||||
}
|
}
|
||||||
218
neural_net.c
Normal file
218
neural_net.c
Normal file
|
|
@ -0,0 +1,218 @@
|
||||||
|
//
|
||||||
|
// Created by jakob on 22.09.2023.
|
||||||
|
//
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "neural_net.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
|
void evaluate(Neural_Network * network, Image** images, int imageCount){
|
||||||
|
int numCorrect = 0;
|
||||||
|
for(int i = 0; i <= imageCount; i++){
|
||||||
|
Matrix * input = matrix_flatten(images[i]->pixel_values, 0);
|
||||||
|
Matrix * res = feedforward(network, input);
|
||||||
|
char result = (char)matrix_argmax(res);
|
||||||
|
if(result == images[i]->label){
|
||||||
|
numCorrect++;
|
||||||
|
}
|
||||||
|
matrix_free(input);
|
||||||
|
matrix_free(res);
|
||||||
|
}
|
||||||
|
printf("%d/%d", numCorrect, imageCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sigmoid(double input) {
|
||||||
|
return 1.0 / (1 + exp(-1 * input));
|
||||||
|
}
|
||||||
|
|
||||||
|
double sigmoid_prime(double input){
|
||||||
|
return sigmoid(input)*(1- sigmoid(input));
|
||||||
|
}
|
||||||
|
|
||||||
|
void back_prop(Neural_Network * network, Image* training_sample, Matrix ** weights_delta, Matrix ** biases_delta){
|
||||||
|
//all Matrix** are external, to avoid repeated memory allocation and deallocation.
|
||||||
|
for(int i = 0; i < network->layer_count - 1; i++){
|
||||||
|
matrix_fill(weights_delta[i], 0);
|
||||||
|
matrix_fill(biases_delta[i], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix * desired_result = create_one_hot_result(training_sample); //freed in line 47
|
||||||
|
|
||||||
|
|
||||||
|
//feedforward######################################
|
||||||
|
//input_activation
|
||||||
|
Matrix * current_activation = matrix_flatten(training_sample->pixel_values, 0);//freed by freeing layer_activation
|
||||||
|
|
||||||
|
Matrix ** layer_activations = malloc(sizeof(Matrix*) * network->layer_count); //freed at end
|
||||||
|
Matrix ** layer_activations_wo_sigmoid = malloc(sizeof(Matrix*) * network->layer_count - 1);//freed at end
|
||||||
|
layer_activations[0] = current_activation;
|
||||||
|
|
||||||
|
for(int i = 0; i < network->layer_count-1; i++){
|
||||||
|
Matrix * dot_result = dot(network->weights[i], current_activation);//freed 3 lines below
|
||||||
|
Matrix * addition_result = add(dot_result, network->biases[i]); //freed by freeing layer activations wo sigmoid
|
||||||
|
matrix_free(dot_result);
|
||||||
|
layer_activations_wo_sigmoid[i] = addition_result;
|
||||||
|
current_activation = apply(sigmoid, addition_result);
|
||||||
|
layer_activations[i+1] = current_activation; //freed by freeing layer activations
|
||||||
|
dot_result = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
//backward pass####################################
|
||||||
|
//calculate delta for last layer;
|
||||||
|
//bias
|
||||||
|
Matrix * subtraction_result = subtract(layer_activations[network->layer_count-1], desired_result);
|
||||||
|
Matrix * delta = apply(sigmoid_prime, subtraction_result);
|
||||||
|
matrix_free(subtraction_result);
|
||||||
|
biases_delta[network->layer_count-1] = delta;
|
||||||
|
|
||||||
|
//weights
|
||||||
|
Matrix * transposed = transpose(layer_activations[network->layer_count-2]);
|
||||||
|
weights_delta[network->layer_count-1] = dot(delta, transposed);
|
||||||
|
matrix_free(transposed);
|
||||||
|
transposed = NULL;
|
||||||
|
|
||||||
|
for(int layer = network->layer_count-3; layer >= 0; layer--){
|
||||||
|
Matrix * activation_wo_sigmoid = layer_activations_wo_sigmoid[layer];
|
||||||
|
Matrix * derivative = apply(sigmoid_prime, activation_wo_sigmoid);
|
||||||
|
Matrix * transposed_layer_weight = transpose(network->weights[layer + 1]);
|
||||||
|
Matrix * dot_result = dot(transposed_layer_weight, delta);
|
||||||
|
matrix_free(transposed_layer_weight);
|
||||||
|
delta = multiply(dot_result, derivative);
|
||||||
|
|
||||||
|
biases_delta[layer] = delta;
|
||||||
|
Matrix * transposed_activation = transpose(layer_activations[layer]);
|
||||||
|
weights_delta[layer] = dot(delta, transposed_activation);
|
||||||
|
matrix_free(transposed_activation);
|
||||||
|
}
|
||||||
|
|
||||||
|
matrix_free(desired_result);
|
||||||
|
|
||||||
|
//free layer_activations
|
||||||
|
for(int i = 0; i < network->layer_count; i++){
|
||||||
|
matrix_free(layer_activations[i]);
|
||||||
|
}
|
||||||
|
free(layer_activations);
|
||||||
|
|
||||||
|
//free layer_activations wo sigmoid
|
||||||
|
for(int i = 0; i < network->layer_count - 1; i++){
|
||||||
|
matrix_free(layer_activations_wo_sigmoid[i]);
|
||||||
|
}
|
||||||
|
free(layer_activations_wo_sigmoid);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_batch(Neural_Network * network, Image** training_data, int batch_start, int batch_end, double learning_rate){
|
||||||
|
Matrix** weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
|
||||||
|
Matrix** biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
|
||||||
|
Matrix** sum_weights_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
|
||||||
|
Matrix** sum_biases_delta = malloc(sizeof(Matrix*)*network->layer_count - 1);
|
||||||
|
|
||||||
|
for(int i = 0; i < network->layer_count - 1; i++){
|
||||||
|
weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns);
|
||||||
|
biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns);
|
||||||
|
sum_weights_delta[i] = matrix_create(network->weights[i]->rows, network->weights[i]->columns);
|
||||||
|
sum_biases_delta[i] = matrix_create(network->biases[i]->rows, network->biases[i]->columns);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = batch_start; i <= batch_end; i++){
|
||||||
|
back_prop(network, training_data[i], weights_delta, biases_delta);
|
||||||
|
for(int j = 0; j < network->layer_count-1; j++){
|
||||||
|
Matrix * sum_weights_free = sum_weights_delta[j];
|
||||||
|
sum_weights_delta[j] = add(sum_weights_delta[j], weights_delta[j]);
|
||||||
|
matrix_free(sum_weights_free);
|
||||||
|
|
||||||
|
Matrix * sum_biases_free = sum_biases_delta[j];
|
||||||
|
sum_biases_delta[j] = add(sum_biases_delta[j], biases_delta[j]);
|
||||||
|
matrix_free(sum_biases_free);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//change network
|
||||||
|
double scaling_factor = learning_rate/(batch_end-batch_start);
|
||||||
|
for(int i = 0; i < network->layer_count-1; i++){
|
||||||
|
//update weights
|
||||||
|
Matrix * weight_change = scale(sum_weights_delta[i], scaling_factor);
|
||||||
|
matrix_free(sum_weights_delta[i]);
|
||||||
|
Matrix * new_weights = subtract(network->weights[i], weight_change);
|
||||||
|
matrix_free(network->weights[i]);
|
||||||
|
network->weights[i] = new_weights;
|
||||||
|
|
||||||
|
//update biases
|
||||||
|
Matrix * bias_change = scale(sum_biases_delta[i], scaling_factor);
|
||||||
|
matrix_free(sum_biases_delta[i]);
|
||||||
|
Matrix * new_biases = subtract(network->biases[i], bias_change);
|
||||||
|
matrix_free(network->biases[i]);
|
||||||
|
network->biases[i] = new_biases;
|
||||||
|
}
|
||||||
|
//TODO: update mini batch
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void train_network_with_batches(Neural_Network * network, Image** training_data, int image_count, int epochs, int batch_size, double learning_rate){
|
||||||
|
for(int i = 0; i < epochs; i++){
|
||||||
|
for(int j = 0; j < image_count/batch_size; j++){
|
||||||
|
int batch_start = j*batch_size;
|
||||||
|
int batch_end = j*batch_size + batch_size - 1;
|
||||||
|
update_batch(network, training_data, batch_start, batch_end, learning_rate);
|
||||||
|
}
|
||||||
|
evaluate(network, training_data, 1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Neural_Network* create_network(int layer_count,...){
|
||||||
|
Neural_Network * network = malloc(sizeof(Neural_Network));
|
||||||
|
network->layer_count = layer_count;
|
||||||
|
network->sizes = malloc(sizeof(int) * layer_count);
|
||||||
|
network->weights = malloc(sizeof(Matrix*)*(layer_count-1));
|
||||||
|
network->biases = malloc(sizeof(Matrix*)*(layer_count-1));
|
||||||
|
|
||||||
|
//read sizes
|
||||||
|
va_list layer_sizes;
|
||||||
|
va_start(layer_sizes, layer_count);
|
||||||
|
for(int i = 0; i < layer_count; i++){
|
||||||
|
network->sizes[i] = va_arg(layer_sizes, int);
|
||||||
|
}
|
||||||
|
va_end(layer_sizes);
|
||||||
|
|
||||||
|
//init weights
|
||||||
|
for(int i = 0; i < layer_count-1; i++){
|
||||||
|
network->weights[i] = matrix_create(network->sizes[i+1], network->sizes[i]);
|
||||||
|
matrix_randomize(network->weights[i], network->sizes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
//init biases
|
||||||
|
for(int i = 0; i < layer_count-1; i++){
|
||||||
|
network->biases[i] = matrix_create(network->sizes[i+1], 1);
|
||||||
|
matrix_randomize(network->biases[i], network->sizes[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return network;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//given an input "activations" it returns the matrix that the network would output
|
||||||
|
Matrix * feedforward(Neural_Network * net, Matrix * activations){
|
||||||
|
|
||||||
|
Matrix * current_layer_activation = activations;
|
||||||
|
|
||||||
|
//next_layer_activation = sigmoid(dot(layer_weights, layer_activations)+layer_biases);
|
||||||
|
for(int i = 0; i < net->layer_count - 1; i++){
|
||||||
|
Matrix * dot_result = dot(net->weights[i], current_layer_activation);
|
||||||
|
Matrix * addition_result = add(dot_result, net->biases[i]);
|
||||||
|
Matrix * sigmoid_result = apply(sigmoid, addition_result);
|
||||||
|
|
||||||
|
current_layer_activation = sigmoid_result;
|
||||||
|
matrix_free(dot_result);
|
||||||
|
matrix_free(addition_result);
|
||||||
|
}
|
||||||
|
return current_layer_activation;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
22
neural_net.h
Normal file
22
neural_net.h
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
//
|
||||||
|
// Created by jakob on 22.09.2023.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include "matrix.h"
|
||||||
|
#include "image.h"
|
||||||
|
|
||||||
|
#ifndef C_NET_NEURAL_NET_H
|
||||||
|
#define C_NET_NEURAL_NET_H
|
||||||
|
|
||||||
|
#endif //C_NET_NEURAL_NET_H
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int layer_count;
|
||||||
|
int* sizes;
|
||||||
|
Matrix ** weights;
|
||||||
|
Matrix ** biases;
|
||||||
|
} Neural_Network;
|
||||||
|
|
||||||
|
Neural_Network* create_network(int layer_count,...);
|
||||||
|
Matrix * feedforward(Neural_Network * net, Matrix * activations);
|
||||||
|
void train_network_with_batches(Neural_Network * network, Image** training_data, int image_count, int epochs, int batch_size, double learning_rate);
|
||||||
|
|
@ -343,9 +343,9 @@ Matrix * backPropagation(double learning_rate, Matrix* weights, Matrix* biases,
|
||||||
|
|
||||||
//void batch_train_network(Neural_Network* network, Image** images, int size);
|
//void batch_train_network(Neural_Network* network, Image** images, int size);
|
||||||
|
|
||||||
double sigmoid(double input) {
|
//double sigmoid(double input) {
|
||||||
return 1.0 / (1 + exp(-1 * input));
|
// return 1.0 / (1 + exp(-1 * input));
|
||||||
}
|
//}
|
||||||
|
|
||||||
double sigmoid_derivative(double x) {
|
double sigmoid_derivative(double x) {
|
||||||
return x * (1.0 - x);
|
return x * (1.0 - x);
|
||||||
|
|
|
||||||
Reference in a new issue