Hello,
I need help in the following problem: I have this datarow
0 23 43 67 123 345 565 342 100 42 12 1 0 0 23 43 67 123 345 565 342 100 42 12 1 0
I want to filter out those columns, which are out of the mean vaule +/- standard deviation of the positions of the numbers. Position means the actual column number.
If my math ok, the mean value with the standard dev. is in this case : 6.7847 +/- 2.4499 and 19.7847 +/- 2.4499
So if the actual position is < 4.3348 and > 9.2346 in group 1, and < 17.3348 and > 22.2346, then we should set to zero the other numbers in the actual group.
So I should get something like this:
0 0 0 0 123 345 565 342 100 0 0 0 0 0 0 0 0 123 345 565 342 0 0 0 0 0
Here is my code so far:
#!/usr/bin/awk -f
{
n1 = split($0, a, /(^| )0( |$)/);
n2 = split($0, b, " ");
l++; printf("%d ",NF); #printf("%d %d ",l,NF);
act_pos = 0;
for (i=1;i<=n1;i++)
{
len = length(a[i]);
if (len > 0)
{
m = split(a[i], c, " ");
getActPos();
sum = 0; sum2 = 0; error = 0;
for (j=1;j<=m;j++)
{
sum += c[j];
sum2 += c[j]*(act_pos+j-1);
error += ((act_pos+j-1)-(sum2/sum))*((act_pos+j-1)-(sum2/sum));
# if ( (act_pos+j-1) < (sum2/sum-sqrt(error/m)) && (act_pos+j-1) > (sum2/sum+sqrt(error/m)) )
# printf("%d ",act_pos+j-1);
}
#printf("%.4f +/- %.4f ",sum2/sum,sqrt(error/m));
#getActPos();
#printf("begin: %d\tend: %d\n",act_pos,act_pos+m-1);
#printf("%s size=%d sum=%f sum2/sum=%f cntr=%.1f ", a[i], m, sum, sum2/sum, (2*(act_pos)+m-1)/2);
printf("%s %d %.1f %.4f %.1f %.4f ",a[i],m,sum,sum2/sum,(2*(act_pos)+m-1)/2,sqrt(error/m));
#printf("%d ",round(sum2/sum));
#printf("%s ",a[i]);
act_pos += m;
}
}
printf("\n");
}
function getActPos()
{
is_zero = 1;
while (act_pos < n2 && is_zero)
{
if (b[act_pos+1] != "0")
{
is_zero = 0;
}
act_pos++;
}
#print act_pos;
}