Hello there!
I wrote this code that performs removal of certain sequences, but its execution time exceeds one hour on a supercomputer. Could you help me to simplify it, so it will consume less time?
Big thanks!
/Robert
#!/usr/local/bin/perl -w
open (INPUT, "phastCons_200_chr6.txt") and print "phastCons is open\n";
open (OUT, ">phastCons_noex_chr6.txt") and print "phastCons_noex is open\n";
for $line_ph (<INPUT>) {
$overlap = 0;
@fields_ph = split (/\s+/, $line_ph);
open (INPUT2, "refFlat.txt") and print "refFlat.txt is open";
for $line (<INPUT2>) {
if ($overlap == 1) {
next;
}
@fields = split (/\s+/, $line);
if ($fields_ph[1] eq $fields[2] && $fields_ph[2] < $fields[5] && $fields_ph[3] > $fields[4]) {
@ex_start = split (/,/, $fields[9]);
@ex_end = split (/,/, $fields[10]);
for $i (0 .. $fields[8]-1) {
if ($fields_ph[2] < $ex_end[$i] && $fields_ph[3] > $ex_start[$i]) {
$overlap = 1;
}
}
}
}
close (INPUT2);
if ($overlap == 0) {
print OUT "$line_ph";
}
}
close (OUT);