I posted a similar thread back in Nov and got some great help. I'm modifying the code to be useful to not only compare multiple columns from two separate files, but also to classify each line from @hairpin to determine if the parameters fall within any of the ESThits, outside of them, or overlap with them. I think the script is good, but I'm very much a newbie and am getting some errors I can't figure out how to debug. Below is the code, sample data, and the errors, but if a debugger could solve my problem, please tell me which to use. Thanks!
Sample data:
$est
JG075107 C18173227 86.54 52 6 1 1 51 5197 5146 5.7e-13 72.0
JG075107 C18173227 96.30 108 3 1 1 107 5140 5033 5.7e-48 188.0
JG214006 scaffold1000 100.00 102 0 0 124 225 420785 420886 5.0e-51 199.0
EB713403 scaffold1000 100.00 110 0 0 1 110 254610 254719 3.5e-56 216.0
JG084088 scaffold1000 100.00 11 0 0 353 363 333466 333476 1.1e+03 21.0
JG239536 scaffold1000 100.00 11 0 0 355 365 333466 333476 1.1e+03 21.0
JG270855 scaffold1000 100.00 11 0 0 357 367 333466 333476 1.1e+03 21.0
JG046275 scaffold1000 100.00 11 0 0 391 401 333466 333476 1.1e+03 21.0
JG281670 scaffold1000 100.00 11 0 0 405 415 333466 333476 1.1e+03 21.0
JG166445 scaffold1000 100.00 11 0 0 419 429 333466 333476 1.1e+03 21.0
JG214006 scaffold1000 100.00 112 0 0 12 123 420302 420413 1.6e-57 220.0
JG214006 scaffold1000 100.00 115 0 0 226 340 421017 421131 7.9e-59 224.0
JG031035 scaffold1000 100.00 115 0 0 323 437 421131 421017 7.9e-59 224.0
JG214006 scaffold1000 100.00 116 0 0 341 456 421260 421375 6.8e-59 225.0
JG047614 scaffold1000 100.00 119 0 0 199 317 389254 389372 3.1e-61 233.0
JG030940 scaffold1000 100.00 119 0 0 341 459 389254 389372 3.1e-61 233.0
JG170532 scaffold1000 100.00 120 0 0 324 443 128522 128403 3.9e-61 232.0
JG197835 scaffold1000 100.00 143 0 0 286 428 422414 422272 1.2e-74 277.0
JG198376 scaffold1000 100.00 143 0 0 335 477 422414 422272 1.2e-74 277.0
JG232710 scaffold1000 100.00 143 0 0 349 491 422414 422272 1.2e-74 277.0
JG115037 scaffold1000 100.00 143 0 0 383 525 422414 422272 1.2e-74 277.0
JG214006 scaffold1000 100.00 143 0 0 646 788 422272 422414 1.2e-74 277.0
JG214006 scaffold1000 100.00 144 0 0 789 932 422620 422763 6.0e-76 281.0
JG157206 scaffold1000 100.00 146 0 0 386 531 390047 390192 1.8e-76 283.0
JG107854 scaffold1000 100.00 146 0 0 434 579 390047 390192 1.8e-76 283.0
JG030940 scaffold1000 100.00 146 0 0 460 605 390047 390192 1.8e-76 283.0
JG270855 scaffold1000 100.00 15 0 0 335 349 333419 333433 5.3e+00 29.0
JG240743 scaffold1000 100.00 15 0 0 638 652 272581 272567 3.6e+00 30.0
JG019310 scaffold1000 100.00 161 0 0 488 648 422132 421972 3.2e-85 312.0
JG149849 scaffold1000 100.00 18 0 0 266 283 330747 330764 9.6e-02 35.0
GW867612 scaffold1000 100.00 18 0 0 863 880 183 166 1.8e-01 34.0
JG198376 scaffold1000 100.00 181 0 0 478 658 422132 421952 6.8e-97 351.0
JG232710 scaffold1000 100.00 187 0 0 492 678 422132 421946 2.0e-100 363.0
JG115037 scaffold1000 100.00 187 0 0 526 712 422132 421946 2.0e-100 363.0
JG197835 scaffold1000 100.00 189 0 0 429 617 422132 421944 1.3e-101 367.0
JG214006 scaffold1000 100.00 189 0 0 457 645 421944 422132 1.3e-101 367.0
EB713403 scaffold1000 100.00 204 0 0 210 413 255844 256047 3.8e-110 395.0
JG276644 scaffold1000 100.00 21 0 0 497 517 258317 258297 1.5e-03 41.0
JG047614 scaffold1000 100.00 262 0 0 464 725 390295 390556 8.0e-146 513.0
JG084088 scaffold1000 100.00 4 0 0 227 230 331499 331502 1.2e+07 8.0
JG239536 scaffold1000 100.00 4 0 0 229 232 331499 331502 1.2e+07 8.0
JG270855 scaffold1000 100.00 4 0 0 232 235 331499 331502 1.2e+07 8.0
JG156496 scaffold1000 100.00 4 0 0 251 254 331488 331491 1.7e+07 7.0
JG281670 scaffold1000 100.00 4 0 0 279 282 331499 331502 1.2e+07 8.0
JG166445 scaffold1000 100.00 4 0 0 293 296 331499 331502 1.2e+07 8.0
JG148938 scaffold1000 100.00 48 0 0 544 591 258317 258270 3.3e-19 93.0
JG170532 scaffold1000 100.00 56 0 0 83 138 135823 135768 6.4e-24 109.0
JG087763 scaffold1000 100.00 58 0 0 567 624 273114 273057 2.2e-25 113.0
JG157206 scaffold1000 100.00 60 0 0 532 591 390295 390354 1.0e-26 118.0
JG107854 scaffold1000 100.00 60 0 0 580 639 390295 390354 1.0e-26 118.0
JG138677 scaffold1000 100.00 6 0 0 43 48 94597 94592 8.7e+05 12.0
JG276644 scaffold1000 100.00 61 0 0 348 408 258737 258677 4.6e-27 119.0
EH792492 scaffold1000 100.00 61 0 0 507 567 258737 258677 4.6e-27 119.0
JG030940 scaffold1000 100.00 62 0 0 606 667 390295 390356 7.6e-28 122.0
JG122601 scaffold1000 100.00 70 0 0 1 70 38731 38800 1.0e-31 134.0
JG276644 scaffold1000 100.00 74 0 0 274 347 258909 258836 1.8e-34 144.0
JG148938 scaffold1000 100.00 74 0 0 321 394 258909 258836 1.8e-34 144.0
EH792492 scaffold1000 100.00 74 0 0 433 506 258909 258836 1.8e-34 144.0
JG170532 scaffold1000 100.00 82 0 0 1 82 136002 135921 4.5e-39 159.0
JG031035 scaffold1000 100.00 83 0 0 438 520 420886 420804 5.7e-40 162.0
JG148938 scaffold1000 100.00 88 0 0 456 543 258592 258505 6.9e-43 172.0
JG170532 scaffold1000 100.00 91 0 0 233 323 135383 135293 1.8e-44 177.0
JG170532 scaffold1000 100.00 94 0 0 139 232 135700 135607 3.8e-46 182.0
EH792492 scaffold1000 100.00 95 0 0 656 750 258317 258223 1.0e-46 184.0
EB713403 scaffold1000 100.00 99 0 0 111 209 255051 255149 2.1e-49 193.0
$hairpin
scaffold1000-22_Betsplitef_0 scaffold1000 100 459 0 0 1 459 145290 144832 0 848
scaffold1000-22_Betsplitef_100 scaffold1000 100 306 0 0 1 306 145137 144832 4e-163 566
scaffold1000-22_Betsplitef_101 scaffold1000 100 289 0 0 1 289 145120 144832 1e-153 534
scaffold1000-22_Betsplitef_102 scaffold1000 100 278 0 0 1 278 145109 144832 1e-147 514
scaffold1000-22_Betsplitef_103 scaffold1000 100 273 0 0 1 273 145104 144832 9e-145 505
scaffold1000-22_Betsplitef_104 scaffold1000 100 242 0 0 1 242 145073 144832 1e-127 448
scaffold1000-22_Betsplitef_105 scaffold1000 100 201 0 0 1 201 145036 144836 7e-105 372
scaffold1000-22_Betsplitef_106 scaffold1000 100 180 0 0 1 180 145015 144836 3e-93 333
scaffold1000-22_Betsplitef_107 scaffold1000 100 110 0 0 1 110 181069 180960 1e-54 204
scaffold1000-22_Betsplitef_108 scaffold1000 100 459 0 0 1 459 145290 144832 0 848
scaffold1000-22_Betsplitef_109 scaffold1000 100 451 0 0 1 451 145282 144832 0 833
scaffold1000-22_Betsplitef_10 scaffold1000 100 366 0 0 1 366 145200 144835 0 676
scaffold1000-22_Betsplitef_110 scaffold1000 100 447 0 0 1 447 145278 144832 0 826
Errors:
syntax error at hairpinvsESTlocation.pl line 18, near "<($h)> {"
syntax error at hairpinvsESTlocation.pl line 29, near "$hairpin_test1["
syntax error at hairpinvsESTlocation.pl line 31, near "$hairpin_test2["
syntax error at hairpinvsESTlocation.pl line 35, near "}"
syntax error at hairpinvsESTlocation.pl line 40, near ") {"
Global symbol "@h0numeric" requires explicit package name at hairpinvsESTlocation.pl line 56.
syntax error at hairpinvsESTlocation.pl line 56, near ") next"
syntax error at hairpinvsESTlocation.pl line 58, near ") and"
syntax error at hairpinvsESTlocation.pl line 61, near ") and"
syntax error at hairpinvsESTlocation.pl line 64, near "else"
hairpinvsESTlocation.pl has too many errors.
#!/usr/bin/perl;
use strict;
use FileHandle;
use Data::Dumper;
#hairpinvsESTlocation.pl
#5-30-11 - test to compare locations of the ESTs within Scaffolds(small chunks of chromosomes) with the Predicted hairpins. (columns A=name, B=scaffold, I=start, J=stop, L=bitscore)
my ($EST,$hairpin) = @ARGV;
my $fh = new FileHandle;
my $h;
my $e;
#build array from hairpin data;
open ($h,$hairpin) or die "Could not open $hairpin: $!";
my (@h,@hout)
while <($h)> {
chomp;
push @h, [split (/\t/)]; #build array of hairpin data;
my ($h0, $h1, $h2, $h3, $h4, $h5, $h6, $h7, $h8, $h9, $h10, $h11); #interested in the columns for scaffold name and start and stop positions and hairpin identifier/name);
list ($h0, $h1, $h2, $h3, $h4, $h5, $h6, $h7, $h8, $h9, $h10, $h11) = split (/\t/);
$h1 =~ m/\w+(\d+)/; #this indicates that the formating will match (m) a word (w) and a decimal integer (d);
#the parenthesis within the // indicate what is to be pulled and accorded the designation $1 , in this case it is pulling just the numeric portion - see the next line;
my $h1numeric = $1; #removed my
# now you have the numeric portion only of the scaffold and for your comparison step 1 you could
# use a plain array style hash - test the start location
my $hairpin_test1[$h1numeric] = $h8; #start
# for test 2 you could prepare a second plain hash - test the stop location
my $hairpin_test2[$h1numeric] = $h9;
my $hhit = $h0."\t".$h1."\t".$h4."\t".$h5."\t".$h11; #scaffold_location, start, stop, bit
}
#build array from EST data
open ($e,$EST) or die "Could not open $EST: $!";
my (@e,@eout)
while (<$e>) {
my ($e0, $e1, $e2, $e3, $e5, $e6, $e7, $e8, $e9, $e10, $e11);
list($e0, $e1, $e2, $e3, $e5, $e6, $e7, $e8, $e9, $e10, $e11) = split(/\t/);
my $ehit = $e0."\t".$e8."\t".$e9; # EST hit, start, end
# }#set up for testing scaffold location
my $testindex1 = $e1; #scaffold location;
my $testindex1 =~ m/\w+(\d+)/;
my $testnumeric = $1; #removed my;
#set up for testing start location;
# my $testindex2 = $e8; #start;
# first test case
if (! defined($h0numeric[$testnumeric])) next;
# second test case
if ($h8 < $e8) and ($h9 < $e8) and ($h8 < $e9) and ($h9 < $e9)
or ($h8 > $e8) and ($h9 > $e8) and ($h8 > $e9) and ($h9 > $e9) ; print $hhit."\t".$ehit."\toutside";
if ($h8 < $e8) and ($h9 < $e8) and ($h8 > $e9) and ($h9 > $e9)
or ($h8 > $e8) and ($h9 > $e8) and ($h8 < $e9) and ($h9 < $e9); print $hhit."\t".$ehit."\tinside";
else print $hhit."\t".$ehit."\tpartial";
}
close $hairpin;
close $EST;
exit 0;