Can anyone help me with external direct merge sorting?
Here is my code
#include<iostream>
#include<conio.h>
#include<stdio.h>
using namespace std;
int GetKey(char infoLine[30]) //Getting sorting key
{
char temps[30];
strcpy(temps,infoLine);
return atoi(strtok(temps," ")); //returns first number in a line
}
void DirectMerge(char fileName[20])
{
FILE* FTS; //file wich will be sorted
FILE *A,*B; //временные файлы
long int numKeys=0; //number of numbers in file
char temp1[30],temp2[30]; //current numbers from each of temporary files
FTS=fopen(fileName,"rt");
while(!feof(FTS)) //getting number of numbers in file
{
fgets(temp1,30,FTS);
numKeys++;
}
fclose(FTS);
long int iterator=1; //number of numbers in one sequence
while(iterator<numKeys) //while length of sequence is smaller than number of numbers is file
{
FTS=fopen(fileName,"rt");
A=fopen("a.txt","wt");
B=fopen("b.txt","wt");
for(int i=0;i<numKeys/2;i++) //reading half of numbers to the first temp file
{
fgets(temp1,30,FTS);
fputs(temp1,A);
}
if((numKeys/2)%iterator!=0) //read more numbers if number of previously read numbers doesn't contain integer number of sequences
{
int tmp=numKeys/2;
while(tmp%iterator!=0)
{
fgets(temp1,30,FTS);
fputs(temp1,A);
tmp++;
}
}
while(!feof(FTS)) //all other numbers write to second temp file
{
fgets(temp2,30,FTS);
fputs(temp2,B);
}
fclose(FTS);
fclose(A);
fclose(B);
//phase 2 -merging temp files
FTS=fopen(fileName,"wt");
A=fopen("a.txt","rt");
B=fopen("b.txt","rt");
fgets(temp1,30,A);
fgets(temp2,30,B);
bool sh1=1,sh2=1;
bool nl1=0,nl2=0;
if(feof(A)) nl1=1;
if(feof(B)) nl2=1;
bool once=1;
while((!feof(A) && !feof(B)) || once)
{
int iterA=0,iterB=0; //number of read numbers on current iteration from the first and the second files
bool iterCon=1; //if we need to continue iterations
while((iterCon && !feof(A) && !feof(B)) || (once && iterCon))
{
if(GetKey(temp1)<=GetKey(temp2)) //compare keys
{
fputs(temp1,FTS); //write to the file
if(nl1) fputc('\n',FTS);
sh1=1;
if(fgets(temp1,30,A)!=NULL) sh1=0; else once=0;
iterA++; //increase number of numbers read from the first file
} else
{
fputs(temp2,FTS);
if(nl2) fputc('\n',FTS);
sh2=1;
if(fgets(temp2,30,B)!=NULL) sh2=0; else once=0;
iterB++;
}
if(iterA==iterator || iterB==iterator) iterCon=0; //if in one of the files number of read numbers is equal to the length of the sequence on this iteration then stop merging on this iteration
}
if(!iterCon) //if in one of the files has unfinished sequence
{
while(iterA<iterator)
{
fputs(temp1,FTS);
sh1=1;
if(fgets(temp1,30,A)!=NULL) sh1=0; else break;
iterA++;
}
while(iterB<iterator)
{
fputs(temp2,FTS);
sh2=1;
if(fgets(temp2,30,B)!=NULL) sh2=0; else break;
iterB++;
}
}
}
while(!feof(A)) //if one of files was larger than another than write it's ending
{
fputs(temp1,FTS);
sh1=1;
if(fgets(temp1,30,A)!=NULL) sh1=0;
}
while(!feof(B))
{
fputs(temp2,FTS);
sh2=1;
if(fgets(temp2,30,B)!=NULL) sh2=0;
}
if(!sh1) fputs(temp1,FTS); //if line was read but was not written into result file because of the end of file
if(!sh2) fputs(temp2,FTS);
fclose(FTS); //close files
fclose(A);
fclose(B);
iterator*=2; //increase length of sequence
}
remove("a.txt");
remove("b.txt");
ShowFile(fileName);
}
int main()
{
char namer[20];
cin>>namer;
DirectMerge(namer);
}
Actually it's a translation of pascal code
var
s:string;
t:text;
Procedure MergeSort(name: string; var f: text);
Var a1,a2,s,i,j,kol,tmp: integer;
f1,f2: text;
b: boolean;
Begin
kol:=0;
Assign(f,name);
Reset(f);
While not EOF(f) do
begin
read(f,a1);
inc(kol);
End;
Close(f);
Assign(f1,'{имя 1-го вспомогательного файла}.txt');
Assign(f2,'{имя 2-го вспомогательного файла}.txt');
s:=1;
While (s<kol) do
begin
Reset(f); Rewrite(f1); Rewrite(f2);
For i:=1 to kol div 2 do
begin
Read(f,a1);
Write(f1,a1,' ');
End;
If (kol div 2) mod s<>0 then
begin
tmp:=kol div 2;
While tmp mod s<>0 do
begin
Read(f,a1);
Write(f1,a1,' ');
inc(tmp);
End;
End;
While not EOF(f) do
begin
Read(f,a2);
Write(f2,a2,' ');
End;
Close(f); Close(f1); Close(f2);
Rewrite(f); Reset(f1); Reset(f2);
Read(f1,a1);
Read(f2,a2);
While (not EOF(f1)) and (not EOF(f2)) do
begin
i:=0; j:=0;
b:=true;
While (b) and (not EOF(f1)) and (not EOF(f2)) do
begin
If (a1<a2) then
begin
Write(f,a1,' ');
Read(f1,a1);
inc(i);
End
else
begin
Write(f,a2,' ');
Read(f2,a2);
inc(j);
End;
If (i=s) or (j=s) then b:=false;
End;
If not b then
begin
While (i<s) and (not EOF(f1)) do
begin
Write(f,a1,' ');
Read(f1,a1);
inc(i);
End;
While (j<s) and (not EOF(f2)) do
begin
Write(f,a2,' ');
Read(f2,a2);
inc(j);
End;
End;
End;
While not EOF(f1) do
begin
tmp:=a1;
Read(f1,a1);
If not EOF(f1) then
Write(f,tmp,' ')
else
Write(f,tmp);
End;
While not EOF(f2) do
begin
tmp:=a2;
Read(f2,a2);
If not EOF(f2) then
Write(f,tmp,' ')
else
Write(f,tmp);
End;
Close(f); Close(f1); Close(f2);
s:=s*2;
readln;
End;
Erase(f1);
Erase(f2);
End;
Begin
MergeSort('input.txt',t);
end.
but pascal program sorts file with numbers in one line and I need a program that sorts lines with sort keys in the beginning of the line.The main problem is that Pascal EOF and C++ feof work different so I added a lot checks but finally there is still something wrong about the code.
For example for input file
1 крутота
3 огог
2 зомби
5 ломби
7 йохохо
6 трулаа
1
1
3
2
11
27
200 токены
156
1 шмокены
24
56 лоло
the output is
1 крутота
1
1 шмокены
1
2 зомби
2
3
3 огог
5 ломби
6 трулаа
7 йохохо
11
24
27
27
56 лоло156
200 токены
were 156 gets into the end of line "56 лоло" and there are duplicate 27 numbers .It seems that end of line symbol was lost somewhere but I can't find were.