header.h file:
#include <stdlib.h>
#include <math.h>
#include <malloc.h>
#include <stdio.h>
#include <string.h>
#define STRBUF 256 /* Default string buffer size */
#define SEPARATOR(a) (a == ' ' || a == ',') ? 1 : 0
#define SUCCESS(a) if(a == NULL) { printf("\nUnsuccessful Allocation");\
exit(0);}
#define HASHTBLSIZE 101
typedef struct tupleclass{
long ClassValue; /* Class of the instance */
long ClassCount; /* Frequency within the class */
struct tupleclass *Next; /* Ptr to the next TUPLECLASS */
}TUPLECLASS;
typedef struct instance{
long tuple; /* Contains index of the instance in DataSet */
long ClassCount; /* Number of distinct class of instance */
struct instance *Down; /* Pointer to the next instance */
TUPLECLASS *Right; /* Pointer to the tuple class */
}INSTANCE;
typedef struct hashtable{
INSTANCE *Down; /* Pointer to the first instance */
long Count; /* Number of distinct instance */
}HASHTABLE;
lib.c file:
/* THIS FILE MAINLY CONTAINS THE CODE FOR READING DATA FROM AN EXTERNAL DATA
FILE AND GENERATING RANDOM NUMBERS */
#include "header.h"
#define IB1 1 /* Definitions for GetChangedSeed() */
#define IB2 2
#define IB5 16
#define IB18 131072
#define MBIG 1000000000 /* Definitions for rand3() */
#define MSEED 161803398
#define MZ 0
#define FAC (1.0/MBIG)
/*----------------------------- Global variables ----------------------------*/
char **DataSet; /* dataset buffer */
long ROW, COL; /* Number of row and column in a dataset */
unsigned long Seed; /* Seed for random value generation */
/*-------------------------Function references-------------------------------*/
long GetNumberofAttributes(char *);
long GetNumberofTuples(char *);
void AllocateDataSpace( long, long );
void GetDataIntoBuffer(char *);
void DisplayDataBuffer();
/*---------------------------------------------------------------------------*/
void InitRandomize(){
Seed = time(NULL);
}
/*---------------------------------------------------------------------------*/
int GetChangedSeed(unsigned long *seed)
{
unsigned long newbit;
newbit = (*seed & IB18) >> 17 /* Get bit 18 */
^ (*seed & IB5) >> 4 /* XOR with bit 5 */
^ (*seed & IB2) >> 1 /* XOR with bit 2 */
^ (*seed & IB1); /* XOR with bit 1 */
*seed = (*seed << 1) | newbit; /* Leftshit the seed and put the result */
return (int) newbit; /* of the XOR's in its bit 1 */
}
/*---------------------------------------------------------------------------*/
/*---------------------Generates Random feature set--------------------------*/
/*---------------------------------------------------------------------------*/
void GenerateRandomFeatures(char *mask)
{
long i;
for ( i = 0; i < COL - 1; i++ ) {
if( GetChangedSeed( &Seed ) )
mask[ i ] = 1;
else
mask[ i ] = 0;
}
}
/*---------------------------------------------------------------------------*/
float ran3 (idum)
long *idum;
{
static int inext, inextp;
static long ma[56];
static int iff=0;
long mj,mk;
int i,ii,k;
if (*idum < 0 || iff == 0) { /* initialization */
iff=1;
mj=MSEED-(*idum < 0 ? -*idum : *idum);
mj %= MBIG;
ma[55]=mj;
for (i=1;i<=54;i++) {
ii=(21*i) % 55;
ma[ii]=mk;
mk=mj-mk;
if (mk < MZ) mk += MBIG;
mj=ma[ii];
}
for (k=1;k<=4;k++)
for (i=1;i<=55;i++) {
ma[i] -= ma[1+(i+30) % 55];
if (ma[i] < MZ) ma[i] += MBIG;
}
inext=0;
inextp=31;
*idum=11;
}
if (++inext == 56) inext=1;
if (++inextp == 56) inextp=1;
mj=ma[inext]-ma[inextp];
if (mj < MZ) mj +=MBIG;
ma[inext]=mj;
return mj*FAC;
}
/*---------------------------------------------------------------------------*/
/*-------------------------Random Function Generator-------------------------*/
/*---------------------------------------------------------------------------*/
long GetRandomNumber(long MaxRange){
long value;
value = (GetChangedSeed(&Seed) + 1) * 786;
return (long) (ran3(&value) * MaxRange);
}
/*---------------------------------------------------------------------------*/
/*--------------------Reads data from the file-------------------------------*/
/*---------------------------------------------------------------------------*/
ReadData(char *filename){
FILE *fp;
/* checking file existance */
if ((fp = fopen(filename, "r")) == NULL){
printf ("\n %s not found in this path", filename);
exit(0);
}
fclose( fp );
/* get number of tuples and attrib from the data file*/
COL = GetNumberofAttributes( filename );
ROW = GetNumberofTuples( filename );
/* allocate memory for the data set in the DataSet buffer */
AllocateDataSpace( ROW, COL );
/* Collect data into DataSet buffer from the file */
GetDataIntoBuffer( filename );
}
/*---------------------------------------------------------------------------*/
long GetNumberofTuples(char *filename){
FILE *pfp;
char command[STRBUF];
sprintf(command, "cat %s | wc -l", filename); /* use wc system command */
pfp = popen(command, "r"); /* open a pipe to read */
fgets(command, STRBUF, pfp); /* get the number of line */
pclose (pfp);
return atol(command);
}
/*---------------------------------------------------------------------------*/
long GetNumberofAttributes(char *filename){
long i, field;
FILE *fp;
char buffer[STRBUF];
fp = fopen(filename, "r");
fgets( buffer, STRBUF, fp);
fclose (fp);
field = 0;
for ( i = strlen(buffer); i; i--){
if ( SEPARATOR(buffer[i]) )
field ++;
}
return field + 1; /* number of field is = separator + 1 */
}
/*---------------------------------------------------------------------------*/
void AllocateDataSpace(long row, long col){
DataSet = malloc(sizeof(char*) * row);
SUCCESS( DataSet );
while(row){
DataSet[ row - 1] = (char *)malloc(sizeof(char) * col * 6);
SUCCESS( DataSet[ row - 1 ] );
row --;
}
}
/*---------------------------------------------------------------------------*/
void GetDataIntoBuffer(char *database){
FILE *fp;
long tuples, field, i, j;
char *buffer;
fp = fopen (database, "r");
buffer = malloc(COL * 6);
for (tuples = 0; tuples < ROW; tuples ++){
fgets( buffer, COL * 6, fp); /* collect one tuple */
strcpy(DataSet[tuples], buffer);
}
fclose (fp);
free(buffer);
}
/*---------------------------------------------------------------------------*/
void FreeDataBuffer(){
free( DataSet );
}
/*---------------------------------------------------------------------------*/
void DisplayDataBuffer(){
int i = ROW;
int j = COL;
for(i = 0 ; i < ROW; i ++)
printf("%s", DataSet[i]);
}
/*---------------------------------------------------------------------------*/
rand.c file:
#include "header.h"
/***********************External Definitions*******************************/
extern long ROW, COL;
extern char **DataSet;
extern void ReadData(char *);
extern long GetRandomNumber(long);
void CreateFile(char*);
void RandomizeData(long);
void Swap(long, long);
/***************************************************************************/
main(int argc, char *argv[]){
long N;
char file[256];
char target[256];
N = atol(argv[3]);
strcpy(target, argv[2]);
strcpy(file, argv[1]);
ReadData(file);
RandomizeData(N);
CreateFile(target);
}
/***************************************************************************/
void RandomizeData(long N){
long times = N * ROW, i;
long indx1, indx2;
for(i = 0; i < times; i++){
indx1 = GetRandomNumber( ROW - 1);
if( indx1 > ROW - 1 )
indx1 = ROW - 1;
if( indx1 < 0 )
indx1 = 0;
indx2 = GetRandomNumber( ROW - 1);
if( indx2 > ROW - 1 )
indx2 = ROW - 1;
if( indx2 < 0 )
indx2 = 0;
Swap(indx1, indx2);
}
}
/***************************************************************************/
void Swap(long indx1, long indx2){
long i;
char *buffer;
buffer = (char*)malloc(COL * 6);
strcpy(buffer, DataSet[indx1]);
strcpy(DataSet[indx1], DataSet[indx2]);
strcpy(DataSet[indx2], buffer);
free(buffer);
}
/***************************************************************************/
void CreateFile(char *filename){
FILE *fp;
long i;
fp = fopen(filename, "w");
for ( i = 0; i < ROW; i++)
fputs(DataSet[i], fp);
fclose(fp);
}
makefile:
exe: rand.o lib.o
gcc -o shuffle rand.o lib.o
clean:
rm -f *.o shuffle