Hi all,

My first, but likely not my last post. I’m teaching myself C++ with basically no programming background. I’ve been using Borland, reading books (such as teach yourself in 21 days, etc.) and reading this website, but now I’ve run into a problem and after three days of trying to figure it out, I decided to seek your help. My code so far is below.

Here’s the deal, I’m trying to write a program that imports a file of DNA sequences (basically A T C and Gs) from one format called FASTA and export those sequences into a new format called PHASE. There is a lot of nonsense in the FASTA file that I need to skip over and the sequence length may change file to file; hence, I’m using a switch to get to the good parts of the FASTA file. That seemed to work out well. But now putting the data into the array is a problem. It does not seem to work like the more simple examples that I’ve seen and practiced.

To start: I want to bring in the first and second DNA sequences, after a bunch of junk characters, into a 2D array. My program nearly works in that it goes to the correct part of the FASTA file before entering characters into the array, but then for some reason it gets stuck in a loop. I’m only seeking help on getting these sequences into a 2D array. But if you have other tips for a beginner working a project like this, let me know.

Here the code and an abridged example of a FASTA file.

#include <iostream>
#include <string>
#include <fstream>  //Provides input and output classes
using namespace std;
#pragma hdrstop

#pragma argsused
int main(int argc, char* argv[])
{
ifstream FASTAin("FASTA2.txt");
if (!FASTAin)
        {
        cout << "File not found.\n";
        cout << "Press enter to exit.\n";
        }

ofstream PHASEout("PHASE2.txt");
if (!PHASEout)
        {
        cout << "Unable to export the new PHASE.txt file.";
        cout << "Press enter to exit.\n";
        }

cout << "File found.\n";
cout << "Press enter continue.\n";

getchar();

const int M=2;
const int N=5000;

char seq1[M][N];
char IDname[80]="";
char ch='a';
unsigned short int row=0;
unsigned short int col=0;

int flipper=-9;
unsigned short int t=0;

        while (FASTAin.get(ch))
        {
           if (ch==10)
            {
            flipper=0;
            }

           if (ch=='>') //10 = ascii code for <enter> character
            {
            flipper=1;
            }

           if (flipper==-9)
            {
            int ignore=0;
            cout << "."; ignore++;
            }

           if (flipper==0)
           {
                for (row=0; row<=M; row++)
                {
                    for (col=0; col<=N; col++)
                    {
                    seq1[row][col] = ch;
              //Here is the problem!
                    }
                }
           }

           if (flipper==1)
           {
           IDname[t]=ch; t++;
           }

}
        if (FASTAin.eof ()) //ignore this
             {
               for (int k=1;k<(M+1);k++)  //for all int up to i
               {
                  cout << seq1[k];     //print idname
                  PHASEout << seq1[k];
               }
             }

cout << seq1[row][col];
cout << "\n***End of FASTA file contents.***\n";
getchar();
FASTAin.close();   
PHASEout.close();

getchar();
return 0;
}

------------THE FILE: FASTA.txt--Important stuff starts at BAGATT...

[oi]
>'1_{GreeNlANd}'   [Jun 10, 2005]
BAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACGACCGCTATGTATTTCGTAC
ATTACTGCCAGTCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTAC
ATAAAAACCCAATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCA
ACCTTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCGCCCACTAGGATACC
AACAAACCTATCCACCCTTAACAGTACATAGTACATAAAACCATTTACCGTACATAGCAC
ATTACAGTCAAATCCCTTCTCE
>'2_{GreeNlANd}'
bAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATCTCGTAC
ATTACTGCCAGTCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTAC
ATAAAAACCCAATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCA
ACCTTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCGCCCACTAGGATACC
AACAAACCTATCCACCCTTAACAGTACATAGTACATAAAACCATTTACCGTACATAGCAC
ATTACAGTCAAATCCCTTCTCe

well, as a general tip for this forum, put your code in
BB tags so one can see whitespaces.

Sorry.

#include <iostream>
#include <string>
#include <fstream>  
using namespace std;
#pragma hdrstop

#pragma argsused
int main(int argc, char* argv[])
{
ifstream FASTAin("FASTA2.txt");
if (!FASTAin)
        {
        cout << "File not found.\n";
        cout << "Press enter to exit.\n";
        }
ofstream PHASEout("PHASE2.txt");

if (!PHASEout)
        {
        cout << "Unable to export the new PHASE.txt file.";
        cout << "Press enter to exit.\n";
        }
cout << "File found.\n";
cout << "This program will create or write over the file PHASE.txt.\n";
cout << "Press enter continue.\n";
getchar();

const int M=2;
const int N=5000;

char seq1[M][N];
char IDname[80]="";
char ch='a';
unsigned short int row=0;
unsigned short int col=0;

int flipper=-9;
unsigned short int t=0;

        while (FASTAin.get(ch))
        {
           if (ch==10)
           {
           flipper=0;
           }

           if (ch=='>') 
           {
           flipper=1;
           }

           if (flipper==0)
           {
                for (row=0; row<=M; row++)
                {
                    for (col=0; col<=N; col++)
                    {
                    seq1[row][col] = ch;
              //Problem area
                    }
                }
           }

           if (flipper==1)
           {
           IDname[t]=ch; t++;
           }

     }

cout << seq1[row][col];
cout << "\n***End of FASTA file contents.***\n";
getchar();

FASTAin.close();   
PHASEout.close();

getchar();

return 0;
}

ok, the reson your program crash is this:

for (row=0; row<=M; row++) 
                {
                    for (col=0; col<=N; col++)
                    {

the row, and col both goes 1 val to far.
if you change it to

for (row=0; row<=M-1; row++)
                {
                    for (col=0; col<=N-1; col++)
                    {

it will work the way you have writen it. but I think that it aint going to work the way
you want to, since all you do is enter the ch into the whole array. so you wil get a array filld with 1 char.
I dont think you want the for loops, at least not there.

I see what your saying. I tried using the break; command at the end of this loop assuming it would take me back to the top of the program, get another character, and continue filling the array, but now it still hits this for loop twice before moving on to another character. Very strange. Clearly I'm thinking about this wrong. If you have any suggestions let me know, otherwise I'll simply hit the books again and come up with a different logic. Thanks for the tip though.

I think this might help you a bit, it uses string instead of char to simplefie it a bit, but if I aint totaly wrong it does wath you want.
it only get the data from the file, and print it to screen, so you still need to make the saving work.

#include <iostream>
#include <string>
#include <fstream>  

using namespace std;

const int M=3;  // number of enteries
string store[M]; // global string array. store all enteries

void getfile() // used to get all enteries from file and put it in the store array
{
    char ch; // used for reading the data.
    ifstream FASTAin("fasta.txt"); 
    if (!FASTAin)
    {
        cout << "Error: no file";
    }
    FASTAin.get(ch); // get the first char in file
    int row = -1; 
    // in this while loop we get the data from the file, and divide it by enteries.
    while (!FASTAin.eof())
    {
          if (ch == '>') // '>' is the start of a new enterie. SO one need to change row.
          {
              row++;
          }
          
          if (row > -1)
          {
              store[row] += ch; // add the char to the string.
          }
          
          FASTAin.get(ch); // get next char
    }
    FASTAin.close(); // we have no more user for the file annymore.   
}

string getname (string data)
{
    int start = data.find('{')+1;
    int end = data.find('}') - start;
    if (start == 0)// dident find the {
    {
        return "";
    }
    return data.substr(start, end);
}
string getdata (string data)
{
    int start = data.find('\n')+1; 
    int end = data.length() - start;
    if (start == -0)// dident find newline
    {
        return "";
    }
    
    return data.substr(start, end);
}

string getdate (string data)
{
    int start = data.find('[')+1;
    int end = data.find(']') - start;
    if (start == 0)
    {
        return "";
    }
    
    return data.substr(start, end);
}
// main
int main()
{
    getfile();
    for (int i = 0; i<M; i++)
    {
        cout << getname (store[i]) << "  -  " << getdate(store[i]) << endl << getdata(store[i]) << endl << endl;
    }
    int i;
    cin>>i;
}
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.