Makailah 0 Newbie Poster

I must find the restriction maps in a FASTA file and then for every sequence print out a graphical display of the cut sites in the restriction map by printing the sequence and labeling the recognition sites with the enzyme name. I also must use a hash to store the data. I must print out all graphical displays of every sequence.

This is what I have:

#!/usr/bin/perl
# Declare and initialize variables
my %rebase_hash = ();

unless(dbmopen(%rebase_hash, 'DBNAME', 0644)) {

print "Cannot open DBM file DBNAME with mode 0644\n";
}

my @file_data = ();
my $query = '';
my $dna = '';
my $recognition_site = '';
my $regexp = '';
my @locations = ();

# If there is a command-line argument, assume it's DNA
if(@ARGV) {
$dna = $ARGV[0];

# Otherwise, prompt for a FASTA file to open
}else{

print "Input a FASTA filename: ";
my $filename = <STDIN>;
chomp $filename;

unless(-e $filename) {
print "$filename does not exist!\n";
exit;
}
# Read in the file 
@file_data = get_file_data($filename);

# Extract the DNA sequence data from the contents of the FASTA file
$dna = extract_sequence_from_fasta_data(@file_d…
}

# Get the REBASE data into a hash, from file "DNASIS Rebase"
%rebase_hash = parseREBASE('DNASIS.txt');

# Prompt user for restriction enzyme names, create restriction map
do {
print "Search for what restriction site (or quit)?: ";

$query = <STDIN>;

chomp $query;

# Exit if empty query
if ($query =~ /^\s*$/ ) {

exit;
}
# Perform the search in the DNA sequence
if ( exists $rebase_hash{$query} ) {

($recognition_site, $regexp) = split ( " ", $rebase_hash{$query});

# Create the restriction map
@locations = match_positions($regexp, $dna);

# Report the restriction map to the user
if (@locations) {
print "Searching for $query $recognition_site $regexp\n";
print "A restriction site for $query at locations:\n";
print join(" ", @locations), "\n";

} else {
print "A restriction enzyme $query is not in the DNA:\n";
}
}
print "\n";

} until ( $query =~ /quit/ );

exit;

######################################…
# Subroutines
######################################…

######################################…

# A subroutine to get data from a file given its filename
# get_file_data
sub get_file_data {

my($filename) = @_;

use strict;
use warnings;

# Initialize variables
my @filedata = ( );

unless( open(GET_FILE_DATA, $filename) ) {
print STDERR "Cannot open file \"$filename\"\n\n";
exit;
}

@filedata = <GET_FILE_DATA>;

close GET_FILE_DATA;

return @filedata;
}

# a subroutine to extract FASTA sequence data from an array
sub extract_sequence_from_fasta_data {
my(@fasta_file_data) = @_;
use strict;
use warnings;
# Declare and initialize variables
my $sequence = '';
foreach my $line (@fasta_file_data) {
# discard blank line
if ($line =~ /^\s*$/) {
next;
# discard comment line
} elsif($line =~ /^\s*#/) {
next;
# discard fasta header line
} elsif($line =~ /^>/) {
next;
# keep line, add to sequence string
} else {
$sequence .= $line;
}
}
# remove non-sequence data (in this case, whitespace) from $sequence string
$sequence =~ s/\s//g;
return $sequence;
}
# A subroutine to return a hash where
# key = restriction enzyme name
# value = whitespace-separated recognition site and regular expression

sub parseREBASE {

my($rebasefile) = @_;

use strict;
use warnings;

# Declare variables
my @rebasefile = ( );
my %rebase_hash = ( );
my $name;
my $site;
my $regexp;

# Read in the REBASE file
my $rebase_filehandle = open_file($rebasefile);

while(<$rebase_filehandle>) {

# Discard header lines
( 1 .. /Rich Roberts/ ) and next;

# Discard blank lines
/^\s*$/ and next;

# Split the two (or three if includes parenthesized name) fields
my @fields = split( " ", $_);

# Get and store the name and the recognition site

# Remove parenthesized names, for simplicity's sake,
# by not saving the middle field, if any,
# just the first and last
$name = shift @fields;

$site = pop @fields;

# Translate the recognition sites to regular expressions
$regexp = IUB_to_regexp($site);

# Store the data into the hash
$rebase_hash{$name} = "$site $regexp";
}