Hello Everyone.
Could anyone please help me quickly find duplicates in a vector of over 60000 records. It takes a loooootttttt of time ;'(. Here's the code.
#include <string>
#include <vector>
#include <iostream>
#include <algorithm>
#include <sstream>
struct MyPred
{
std::string x;
std::string y;
// add default values
MyPred(const std::string& x = "", const std::string& y = ""): x(x), y(y) {}
bool operator==(const MyPred& p) const
{
return x == p.x && y == p.y;
}
bool operator<(const MyPred& p) const
{
if(x < p.x) return true;
if(x > p.x) return false;
if(y < p.y) return true;
if(y > p.y) return false;
return false;
}
};
int main()
{
std::vector<MyPred>* vPred = new std::vector<MyPred>;
MyPred pred;
for(int i = 0; i < 60000; i++)
{
std::stringstream ss1;
ss1 << i;
pred.x = ss1.str();
std::stringstream ss2;
ss2 << i+1;
pred.y = ss2.str();
vPred->push_back(pred);
}
for(int i = 10; i < 25; i++)
{
std::stringstream ss1;
ss1 << i;
pred.x = ss1.str();
std::stringstream ss2;
ss2 << i+1;
pred.y = ss2.str();
vPred->push_back(pred);
}
for(int i = 35000; i < 35005; i++)
{
std::stringstream ss1;
ss1 << i;
pred.x = ss1.str();
std::stringstream ss2;
ss2 << i+1;
pred.y = ss2.str();
vPred->push_back(pred);
}
/*vPred->push_back(MyPred("a", "a"));
vPred->push_back(MyPred("a", "b"));
vPred->push_back(MyPred("a", "a"));
vPred->push_back(MyPred("b", "a"));
vPred->push_back(MyPred("a", "b"));
vPred->push_back(MyPred("a", "c"));
vPred->push_back(MyPred("b", "b"));
vPred->push_back(MyPred("a", "a"));
vPred->push_back(MyPred("a", "a"));*/
// The values need to be in order for equal_range() to work
std::sort(vPred->begin(), vPred->end());
std::vector<MyPred> uPred; // values that were always unique
std::vector<MyPred> dPred; // values that were duplicated
std::pair<std::vector<MyPred>::iterator, std::vector<MyPred>::iterator> ret;
for(std::vector<MyPred>::iterator i = vPred->begin(); i != vPred->end(); i = ret.second)
{
ret = std::equal_range(i, vPred->end(), *i);
if(ret.second - ret.first != 1) // duplicates
{
for(std::vector<MyPred>::iterator j = ret.first; j != ret.second; ++j)
{
dPred.push_back(*i); //put each duplicate onto a new vector
}
}
else if(ret.second - ret.first == 1)
{
uPred.push_back(*i);
}
}
std::cout << "vPred: Sorted input\n";
for(std::vector<MyPred>::iterator i = vPred->begin(); i != vPred->end(); ++i)
{
std::cout << "[" << i->x << ", " << i->y << "]" << '\n';
}
std::cout << "dPred: Only the values that were duplicated\n";
for(std::vector<MyPred>::iterator i = dPred.begin(); i != dPred.end(); ++i)
{
std::cout << "[" << i->x << ", " << i->y << "]" << '\n';
}
std::cout << "uPred: Only the values that were unique\n";
for(std::vector<MyPred>::iterator i = uPred.begin(); i != uPred.end(); ++i)
{
std::cout << "[" << i->x << ", " << i->y << "]" << '\n';
}
delete vPred;
}