# # Read one or more sib-pair pedigree files and corresponding scripts # combining these into a single pedigree file and script # # ped1 ped2 ---> ped3 # phe1 phe1 phe1 # phe2 phe2 # phe3 phe3 # mar1 mar1 # mar2 mar2 mar2 # mar3 mar3 # mar4 mar4 # # Model: # No individuals are present in more than one pedigree file: # the output pedigree IDs from each pedigree file are prefixed by the # position of the file in the argument list eg 1_ped1 # # Loci may present in more than one pedigree files, # and loci of the same name in different scripts are assumed to be # the same locus. Phenotypes from different files are put together, # but since the program cannot know whether markers # in different files are from the same chromosome, these follow the # phenotypes in "catped.in" ordered according to their appearance in the # original scripts. A new variable "dataset" identifies which pedigree # file that family came from. # AWK=/usr/bin/gawk SCRIPT="catped.in" PEDFILE="catped.ped" HELP=0 if [ $# = 0 ] || [ $# = 1 ] then HELP=1 else for i in $* do if [ ! -f $i ] then HELP=1 echo "ERROR: $i does not exist" echo fi done fi if [ $HELP = 1 ] then echo "Usage: catped ..." echo "Merge sib-pair type pedigree files and scripts" echo "assuming no individual is in more than one pedigree file." echo "Loci may present in more than one pedigree files," echo "and loci of the same name in different scripts are assumed identical." echo "A new variable \"dataset\" identifies which pedigree" echo "file each family came from." exit fi $AWK -v file_list="$*" -v script=$SCRIPT -v pedfile=$PEDFILE ' BEGIN { n_files=split(file_list,files) if ((n_files % 2)==1) { print "ERROR: Must be as many Sib-pair scripts as there are pedigree files" exit } aff["1"]="n"; aff["n"]="n" aff["2"]="y"; aff["y"]="y" aff["0"]="x"; aff["x"]="x" # # Merge the scripts # n_files=n_files/2 for(i=1;i<=n_files;i++) { sp_script=files[2*i-1] nloc[i]=0 pos=5 while (getline < sp_script) { if ($1=="set" && substr($2,1,3)=="loc") { name=$3 type=substr($4,1,3) mpos=$5 master[name]=master[name] " " i nloc[i]++ loc[i,nloc[i]]=name loctyp[i,name]=type mappos[i,name]=mpos locpos[i,name]=pos+1 if (type=="mar") { pos+=2 }else{ pos++ } } } close(sp_script) } # # Phenotypes # print "set locus dataset qua" > script nfinal=0 for(i=1;i<=n_files;i++) { for (j=1;j<=nloc[i];j++) { name=loc[i,j] type=loctyp[i,name] nsets=split(master[name],sets) if ( type != "mar" && i==sets[1]) { nfinal++ final[nfinal]=name fintyp[nfinal]=type printf "set locus %-10s %3s\n", \ name, loctyp[sets[1],name] > script } } } # # Genotypes # for (i=1;i<=n_files;i++) { for (j=1;j<=nloc[i];j++) { name=loc[i,j] type=loctyp[i,name] nsets=split(master[name],sets) if ( type == "mar" && i==sets[1]) { nfinal++ final[nfinal]=name fintyp[nfinal]=type printf "set locus %-10s %3s %8.2f\n",\ name, loctyp[i,name], mappos[i,name] > script } } } print "read pedigree " pedfile "\nrun\n" > script # # Merge the pedigrees # for(i=1;i<=n_files;i++) { sp_ped=files[2*i] while (getline < sp_ped) { if (substr($1,1,1)=="!") { print $0 }else{ fa=$3 mo=$4 sex=$5 if (fa=="0") fa="x" if (mo=="0") mo="x" if (sex=="1") { sex="m" }else if (sex=="2") { sex="f" } printf i "_" $1 " " $2 " " fa " " mo " " sex " " i for(j=1;j<=nfinal;j++) { type=fintyp[j] if (type=="mar") { if ((i,final[j]) in locpos) { pos=locpos[i,final[j]] printf " %3d %3d", $pos, $(pos+1) }else{ printf " x x" } }else{ if ((i,final[j]) in locpos) { pos=locpos[i,final[j]] val=$pos if (type=="aff") val=aff[val] printf " %s", val }else{ printf " x" } } } printf "\n" } } } }' > tmp$$ best tmp$$ > $PEDFILE rm tmp$$