#!/bin/csh # # $Id$ # $Log$ # Revision 1.2 2005/10/05 06:18:35 nadya # use full path for "rm". Asssume everybody has /bin/rm. # # Revision 1.1.1.1 2005/07/29 00:12:16 nadya # Importing from meme-3.0.14, and adding configure/make # # if ($#argv < 0 || "$1" == "?") then usage: cat << USAGE USAGE: sd [-n][-m][-s ][-l][-multi][-sum] [-n] don't print newline after output. [-m] don't print sd, just mean [-t] assume columns are differences and print t-statistic [-s ] number of digits of significance to print after decimal point; default = 2 [-l] print ampersands between values for latex [-multi] latex double-column format (used with -t -l) [-sum] just print the sum Compute mean and sd of columns of numbers. Reads one or more columns from standard input. Output: [] or USAGE exit 1 endif onintr cleanup set pgm = $0; set pgm = $pgm:t # get input arguments set nonewline = 0; set pr_sd = 1; set pr_sum = 0; set t=0; set sig = 2; set l = 0; set multi = 0; while ("$1" != "") switch ($1) case -h: goto usage case -n: set nonewline = 1; breaksw case -m: set pr_sd = 0; breaksw case -t: set t = 1; breaksw case -l: set l = 1; breaksw case -s: shift; set sig = $1; breaksw; case -multi: set multi = 1; breaksw; case -sum: set pr_sum = 1; set pr_sd = 0; breaksw; endsw shift end # create a gawk script set gawk = $pgm.gawk.$$.tmp cat << "END" > $gawk BEGIN {n=0;} { if (substr($1,1,1) != "#") { nf = NF; # save number of columns for (i=1; i <= nf; i++) { val[n,i] = $i; # save each value } n++; } } END { # format if (l == 1) amp = " & "; else amp = " "; # compute the sums and means for (i=1; i <= nf; i++) { sum[i] = 0; for (j=0; j < n; j++) { sum[i] += val[j,i]; } mean[i] = sum[i]/n; } # compute the standard deviations for (i=1; i <= nf; i++) { var[i] = 0; for (j=0; j < n; j++) { var[i] += (mean[i] - val[j,i])^2; } var[i] /= n-1; sd[i] = sqrt(var[i]); } # print the means and standard deviations if (l==1 && t==1) printf("{\\em mean difference} & "); for (i=1; i <= nf; i++) { if (pr_sum == 1) { format = "%5." sig "f "; if (multi>0) format = "\\multicolumn{2}{c|}{" format "}"; if (i>1) format = amp format printf(format, sum[i]); } else if (pr_sd == 1) { format = "%5." sig "f" amp "%5." sig "f "; if (i>1) format = amp format printf(format, mean[i], sd[i]); } else { format = "%5." sig "f "; if (multi>0) format = "\\multicolumn{2}{c|}{" format "}"; if (i>1) format = amp format printf(format, mean[i]); } } if (nonewline == 0) { if (l==1) printf(" \\\\ \n"); else printf("\n"); } # print t-statistic if (t == 1) { df = n-1; if (l==1) printf("{\\em t-test, df=%d} & ", df); for (i=1; i <= nf; i++) { format = "%5." sig "f "; if (multi>0) format = "\\multicolumn{2}{c|}{" format "}"; if (i>1) format = amp format if (sd[i] > 0) { tt[i] = mean[i]*sqrt(n)/sd[i]; printf(format, tt[i]); } else { format = " Inf"; if (multi>0) format = "\\multicolumn{2}{c|}{" format "}"; if (i>1) format = amp format printf(format); } } if (nonewline == 0) { if (l==1) printf(" \\\\ \n"); else printf("\n"); } if (l==1) { # table of students t-distr, alpha = .01 tmp = "31.8 7.0 4.5 3.7 3.4 3.1 3.0 2.9 2.8 2.8"; tmp = tmp " 2.7 2.7 2.7 2.6 2.6 2.6 2.6 2.6 2.5 2.5"; tmp = tmp " 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5 2.5"; tmp = tmp " 2.5 2.5 2.5 2.5 2.5 2.5 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; tmp = tmp " 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4 2.4"; nval = split(tmp, st01); # table of students t-distr, alpha = .05 tmp = " 6.3 2.9 2.4 2.1 2.0 1.9 1.9 1.9 1.8 1.8"; tmp = tmp " 1.8 1.8 1.8 1.8 1.8 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; tmp = tmp " 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7 1.7"; nval = split(tmp, st05); # print significant or not at .01, .05 levels; for (j=1; j<=2; j++) { if (j==1) { level = .05; ST = st05[df]; } else { level = .01; ST = st01[df]; } printf("{\\em significant? $t_{%s,%d}=%4.1f$} & ", level, df, ST); for (i=1; i <= nf; i++) { format = "%4s"; if (multi>0) format= "\\multicolumn{2}{c|}{" format "}"; if (i>1) format = amp format if ((tt[i] > 0 && tt[i] > ST) || (tt[i] < 0 && tt[i] < -ST)) { printf(format, "YES") } else { printf(format, "NO"); } } if (nonewline == 0) { if (l==1) printf(" \\\\ \n"); else printf("\n"); } } } } } "END" # run the gawk script gawk -v nonewline=$nonewline -v pr_sd=$pr_sd -v pr_sum=$pr_sum \ -v sig=$sig -v t=$t -v l=$l -v multi=$multi -f $gawk cleanup: /bin/rm $pgm.*.$$.tmp exit 0 $Header$