I've tried several times now to write my own neural network class and I think this is the closest I've gotten, the network seems to run properly but it doesn't seem to learn correctly.
I'm trying to make it approximate exclusive or but it seems to be only capable of learning to give one output regardless of the input. For example, if I run back propogation with 1 1 as input and 0 as the desired output it will output 0 when given any pattern as input.
My usual method of training though involves back propogation with the following inputs and target outputs
1 1, 0
1 0, 1
0 1, 1
0 0, 0
and I run back propogation with these 4 pairs 1000 times. But when I do this I end of with a network that outputs .5 for all inputs, I even made a graph of the absolute error versus iterations, hopefully the link works.
http://img.photobucket.com/albums/v493/OffbeatPatriot/plot1-1.png
It seems like instead of learning the proper response for patterns it just gets stuck between the two responses I want it to give in the different situations. If nothing else it seems consistent so I wonder if I could simply have misunderstood the theory.
This is my code, I'm afraid it's a lot, so I made an attempt to annotate it, although I don't frequently do that so I'm not sure if my comments are helpful.
Network.h
#ifndef NETWORK_H_INCLUDED
#define NETWORK_H_INCLUDED
#include <math.h>
#include <vector>
#include <map>
#include <string>
#include <cstdlib>
#include <time.h>
#include <iostream>
#include <fstream>
using namespace std;
class Neuron
{
public:
map<Neuron*, double> weights;
double bias;
double activation;
double delta;
bool updated;
string group;
vector<Neuron*> connections;
Neuron(string name);
void update();
};
class Neural_Network
{
private:
vector<Neuron*> inputs;
vector<Neuron*> outputs;
map<string, vector<Neuron*> > groups;
double learning_rate;
void initialize();
public:
Neural_Network();
void set_learning_rate(double rate);
void set_input(string name);
void set_output(string name);
vector<double> get_weights();
void add_group(string name, int size);
vector<double> run(vector<double> input);
vector<double> hebbian(vector<double> input);
void back_propogation(vector<vector<double> > input, vector<vector<double> > targets, int iterations = 1);
void connect(string from, string to, double density = 1);
void print();
};
#endif // NETWORK_H_INCLUDED
Network.cpp
#include "Network.h"
double sigmoid(double input, double slope = 1)
{
return 1.0 / (1.0 + exp(-input/slope));
}
double abs(double in)
{
if(in > 0)
return in;
else
return -in;
}
Neuron::Neuron(string name){group = name;}
void Neuron::update()
{
//Update neuron's activation
activation = bias;
for(map<Neuron*, double>::iterator iter = weights.begin();iter != weights.end();iter++)
{
activation += iter->first->activation * iter->second;
}
activation = sigmoid(activation);
updated = true;
}
Neural_Network::Neural_Network(){learning_rate = .5;}
void Neural_Network::set_learning_rate(double rate)
{
learning_rate = rate;
}
void Neural_Network::set_input(string name)
{
//set output group
inputs = groups[name];
}
void Neural_Network::set_output(string name)
{
//set input group
outputs = groups[name];
}
vector<double> Neural_Network::get_weights()
{
vector<double> weights;
//iterate over all neurons and collect the weights
for (map<string, vector<Neuron*> >::iterator it = groups.begin(); it != groups.end(); ++it)
{
for (vector<Neuron*>::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
{
for (map<Neuron*, double>::iterator it3 = (*it2)->weights.begin(); it3 != (*it2)->weights.end(); ++it3)
{
weights.push_back(it3->second);
}
}
}
return weights;
}
void Neural_Network::add_group(string name, int size)
{
vector<Neuron*> neurons;
//add a group of neurons of the requested size and save under the given name
for(int i = 0;i < size;i++)
{
neurons.push_back(new Neuron(name));
}
groups[name] = neurons;
}
void Neural_Network::initialize()
{
//set all neurons as not being updated, in the run and back propogation functions many of the neurons
//will have their update function called or deltas adjusted multiple times, the updated value is used
//to keep neurons from updating after they already have been
for (map<string, vector<Neuron*> >::iterator it = groups.begin(); it != groups.end(); ++it)
{
for (vector<Neuron*>::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
{
(*it2)->updated = false;
}
}
}
vector<double> Neural_Network::run(vector<double> input)
{
vector<double> output;
initialize();
//set the input neuron activations to the input values
for(unsigned int i = 0;i < input.size();i++)
{
inputs[i]->activation = input[i];
}
vector<Neuron*> to_update, to_update2, temp;
//collect all the neurons which take input fron the input neurons
for (vector<Neuron*>::iterator it = inputs.begin(); it != inputs.end(); ++it)
{
to_update.insert(to_update.begin(), (*it)->connections.begin(), (*it)->connections.end());
}
//update all neurons
for (int i = 0;i < to_update.size(); i++)
{
//check if neuron was already updated
if(!to_update[i]->updated)
{
to_update[i]->update();
//add all neurons that take input from this neuron to the update queue
to_update.insert(to_update.end(), to_update[i]->connections.begin(), to_update[i]->connections.end());
}
}
//collect and return the activations of the output neurons
for(vector<Neuron*>::iterator it = outputs.begin();it != outputs.end();it++)
{
output.push_back((*it)->activation);
}
return output;
}
vector<double> Neural_Network::hebbian(vector<double> input)
{
//same function as run but updates the neuron weights with a hebbian learning rule
vector<double> output;
for(unsigned int i = 0;i < input.size();i++)
{
inputs[i]->activation = input[i];
}
vector<Neuron*> to_update, to_update2, temp;
for (vector<Neuron*>::iterator it = inputs.begin(); it != inputs.end(); ++it)
{
to_update.insert(to_update.begin(), (*it)->connections.begin(), (*it)->connections.end());
}
for (vector<Neuron*>::iterator it = to_update.begin(); it != to_update.end(); ++it)
{
(*it)->update();
for(map<Neuron*, double>::iterator iter = (*it)->weights.begin();iter != (*it)->weights.end();iter++)
{
iter->second += learning_rate*(*it)->activation*iter->first->activation;
}
to_update.insert(to_update.end(), (*it)->connections.begin(), (*it)->connections.end());
}
for(vector<Neuron*>::iterator it = outputs.begin();it != outputs.end();it++)
{
output.push_back((*it)->activation);
}
return output;
}
void Neural_Network::back_propogation(vector<vector<double> > input, vector<vector<double> > targets, int iterations)
{
vector<Neuron*> to_do, done, next_to_do;
double error;
fstream file ("learning.bin", ios::out|ios::binary);
double size = input.size() * iterations;
//write the number of times the algorithm will be run to a file
file.write((char*)&size, sizeof(double));
for(int q = 0;q < iterations;q++)
{
//iterate over all inputs
for(unsigned int i = 0;i < input.size();i++)
{
//run the network with the input
run(input[i]);
error = 0;
//calculate the deltas of all the output neurons
for(unsigned int u = 0;u < outputs.size();u++)
{
outputs[u]->delta = outputs[u]->activation*(1 - outputs[u]->activation)*(targets[i][u] - outputs[u]->activation);
//sum the absolute value of the error for each neuron
error += abs((targets[i][u] - outputs[u]->activation));
//remember the neurons that have had their deltas calculated for back propogation
done.push_back(outputs[u]);
//iterate over the weights of each output neuron to find the neurons that give input to them,
//these will be the next neurons to have their deltas calculated
for(map<Neuron*, double>::iterator iter = outputs[u]->weights.begin();iter != outputs[u]->weights.end();iter++)
{
to_do.push_back(iter->first);
}
}
//write the error
file.write((char*)&error, sizeof(double));
initialize();
//continue calculating deltas until the algorithm comes to a neuron with no weights(ie, an input neuron)
while(to_do[0]->weights.size() > 0)
{
//calculate deltas for next batch of neurons
for(unsigned int i = 0;i < to_do.size();i++)
{
if(!to_do[i]->updated)
{
//initialize deltas to 0
to_do[i]->delta = 0;
//start calculating deltas
for(unsigned int u = 0;u < done.size();u++)
{
//check for a weight connecting the two neurons
if(done[u]->weights.count(to_do[i]) == 1);
{
to_do[i]->delta += done[u]->delta*done[u]->weights[to_do[i]];
}
}
to_do[i]->delta *= to_do[i]->activation*(1 - to_do[i]->activation);
//finish calculating deltas
//iterate over weights to find next batch of neurons to find deltas for
for(map<Neuron*, double>::iterator iter = to_do[i]->weights.begin();iter != to_do[i]->weights.end();iter++)
{
next_to_do.push_back(iter->first);
}
to_do[i]->updated = true;
}
}
//set the next batch of neurons to calculate deltas for as the current one
to_do = next_to_do;
}
//update the weights and biases of all neurons
for (map<string, vector<Neuron*> >::iterator it = groups.begin(); it != groups.end(); ++it)
{
for (vector<Neuron*>::iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2)
{
(*it2)->bias += learning_rate*(*it2)->delta;
for(map<Neuron*, double>::iterator iter = (*it2)->weights.begin();iter != (*it2)->weights.end();iter++)
{
iter->second += learning_rate*iter->first->activation*(*it2)->delta;
}
}
}
}
}
file.close();
}
void Neural_Network::connect(string from, string to, double density)
{
//connect the neuron groups from and to with the specified connection density
time_t seconds;
time(&seconds);
srand((unsigned int) seconds);
//iterate over all neurons in the from group
for (vector<Neuron*>::iterator it = groups[from].begin(); it!=groups[from].end(); ++it)
{
//clear all the post synaptic connections
(*it)->connections.clear();
//iterate over all neurons in the to group
for (vector<Neuron*>::iterator it2 = groups[to].begin(); it2!=groups[to].end(); ++it2)
{
//connect the two neurons with the probability defined in density
if(*it != *it2 && rand()/((double)RAND_MAX) < density)
{
//add the connection to the pre synaptic neuron
(*it)->connections.push_back(*it2);
//add the connection to the post synaptic neuron with a random weight in [0, 1]
(*it2)->weights[*it] = rand()/((double)RAND_MAX);
//set the bias of the post synaptic neuron to a random value in [0, 1]
(*it2)->bias = rand()/((double)RAND_MAX);
}
}
}
}
void Neural_Network::print()
{
//print out all information about the neural network
for (map<string, vector<Neuron*> >::iterator it = groups.begin(); it != groups.end(); ++it)
{
cout << it->first << ":\n";
for (int i = 0;i < it->second.size(); i++)
{
cout << " Neuron " << i << ":\n";
cout << " activation: " << it->second[i]->activation << "\n";
cout << " delta: " << it->second[i]->delta << "\n";
cout << " bias: " << it->second[i]->bias << "\n";
cout << " inputs:\n";
for (map<Neuron*, double>::iterator it2 = it->second[i]->weights.begin(); it2 != it->second[i]->weights.end(); ++it2)
{
for(int u = 0;u < groups[it2->first->group].size();u++)
{
if(groups[it2->first->group][u] == it2->first)
{
cout << " " << it2->first->group << " Neuron " << u << " weight: " << it2->second << "\n";
}
}
}
}
}
}
main.cpp
#include "Network.h"
using namespace std;
int main()
{
Neural_Network network;
network.add_group("input", 2);
network.add_group("hidden", 2);
network.add_group("output", 1);
network.connect("input", "hidden");
network.connect("hidden", "output");
network.set_input("input");
network.set_output("output");
vector<vector<double> > inputs, outputs;
vector<double> test1, test2, test3, test4, ans1, ans2, ans3, ans4;
test1.push_back(1);
test1.push_back(1);
test2.push_back(1);
test2.push_back(0);
test3.push_back(0);
test3.push_back(1);
test4.push_back(0);
test4.push_back(0);
ans1.push_back(0);
ans2.push_back(1);
ans3.push_back(1);
ans4.push_back(0);
inputs.push_back(test1);
inputs.push_back(test2);
inputs.push_back(test3);
inputs.push_back(test4);
outputs.push_back(ans1);
outputs.push_back(ans2);
outputs.push_back(ans3);
outputs.push_back(ans4);
network.back_propogation(inputs, outputs, 100);
cout << "running\n";
cout << network.run(test1)[0] << "\n";
cout << network.run(test2)[0] << "\n";
cout << network.run(test3)[0] << "\n";
cout << network.run(test4)[0] << "\n";
system("python plot_learning.py");
}