Hello, I am parsing a rebase file and using different subroutines from the BeginPerlBioinfo module. I have used the subroutines I think I need but I keep on getting the message"use of initialized value $site in concatenation or string <$fh>.
use strict;
use warnings;
use P4;
# Declare and initialize variables
my %rebase_hash = ( );
my @file_data = ( );
my $query = '';
my $dna = '';
my $recognition_site = '';
my $regexp = '';
my @locations = ( );
# Get the REBASE data into a hash, from file "rebase.txt"
%rebase_hash = parseREBASE('rebase.txt');
# Prompt user for restriction enzyme names, create restriction map
do {
print "Search for what restriction site for (or quit)?: ";
$query = <STDIN>;
chomp $query;
# Exit if empty query
if ($query =~ /^\s*$/ ) {
exit;
}
# Perform the search in the DNA sequence
if ( exists $rebase_hash{$query} ) {
($recognition_site, $regexp) = split ( " ", $rebase_hash{$query});
# Create the restriction map
@locations = match_positions($regexp, $dna);
# Report the restriction map to the user
if (@locations) {
print "Searching for $query $recognition_site $regexp\n";
print "A restriction site for $query at locations:\n";
print join(" ", @locations), "\n";
} else {
print "A restriction site for $query is not in the DNA:\n";
}
}
print "\n";
} until ( $query =~ /quit/ );
exit;
P4 is the name of the module I made up with the subroutines I think that I need.
Rebase.txt is the file that I need to parse.
The subroutines that I have used are:
# open_file
#
# - given filename, set filehandle
sub open_file {
my($filename) = @_;
my $fh;
unless(open($fh, $filename)) {
print "Cannot open file $filename\n";
}
return $fh;
}
# A subroutine to get data from a file given its filename
# get_file_data
sub get_file_data {
my($filename) = @_;
use strict;
use warnings;
# Initialize variables
my @filedata = ( );
unless( open(GET_FILE_DATA, $filename) ) {
print STDERR "Cannot open file \"$filename\"\n\n";
exit;
}
@filedata = <GET_FILE_DATA>;
close GET_FILE_DATA;
return @filedata;
}
sub IUB_to_regexp {
my($iub) = @_;
my $regular_expression = '';
my %iub2character_class = (
A => 'A',
C => 'C',
G => 'G',
T => 'T',
R => '[GA]',
Y => '[CT]',
M => '[AC]',
K => '[GT]',
S => '[GC]',
W => '[AT]',
B => '[CGT]',
D => '[AGT]',
H => '[ACT]',
V => '[ACG]',
N => '[ACGT]',
);
# Remove the ^ signs from the recognition sites
$iub =~ s/\^//g;
# Translate each character in the iub sequence
for ( my $i = 0 ; $i < length($iub) ; ++$i ) {
$regular_expression
.= $iub2character_class{substr($iub, $i, 1)};
}
return $regular_expression;
}
sub match_positions {
my($regexp, $sequence) = @_;
use strict;
#
# Declare variables
#
my @positions = ( );
#
# Determine positions of regular expression matches
#
while ( $sequence =~ /$regexp/ig ) {
push ( @positions, pos($sequence) - length($&) + 1);
}
return @positions;
}
#
# A subroutine to return a hash where
# key = restriction enzyme name
# value = whitespace-separated recognition site and regular expression
sub parseREBASE {
my($rebasefile) = @_;
use strict;
use warnings;
# Declare variables
my @rebasefile = ( );
my %rebase_hash = ( );
my $name;
my $site;
my $regexp;
# Read in the REBASE file
my $rebase_filehandle = open_file($rebasefile);
while(<$rebase_filehandle>) {
# Discard header lines
( 1 .. /Rich Roberts/ ) and next;
# Discard blank lines
/^\s*$/ and next;
# Split the two (or three if includes parenthesized name) fields
my @fields = split( " ", $_);
# Get and store the name and the recognition site
# Remove parenthesized names, for simplicity's sake,
# by not saving the middle field, if any,
# just the first and last
$name = shift @fields;
$site = pop @fields;
# Translate the recognition sites to regular expressions
$regexp = IUB_to_regexp($site);
# Store the data into the hash
$rebase_hash{$name} = "$site $regexp";
}
# Return the hash containing the reformatted REBASE data
return %rebase_hash;
}
1;