function [LDdata, LDtimedata, LDrepeat, frequencynew, LD_genome, SA, Data, s, lamda, R, ep_value, Fitness] = selection %%NOTE: As of 05/18/2013 06:30AM, all two modes work fine. (LD alone & LD + frequency + figure) %%NOTE: %%Last updated 05/18/2013, 06:30AM %%-------------------------------------------------------------------------------- %%-------------- SIMULATION PARAMETERS-------------------------------------------- NumCliq=[1,5,10,50,100]; %number of cliques for introducing geographic separation between population Ng=100; %number of generations L=20; %%number of locus (units of genes) Npopstart=10^3; %number of population V_a=0; % allele fitness variance V_i=0.005*1; % epistatic fitness variance r=[0.1:0.05:0.35]; %user adjustable; r denotes the probability for successful random mating numrepeat=10; %number of whole repeat for LD procedure; not recommendable for figure drawing and frequency finding; entropycutoff=0.7; %%--------------------------------------------------------------------------------- %repeat the whole procedure in order to get quantified LD values; %%--------------------------------------------------------------------------------- %%-------------- INITIATION ------------------------------------------------------- % s, initial genotype matrix, binary genotype variables; column vector; ex) [+1; -1; -1; -1;] for L=4 along the column, each individuals genotype is stored as binary variable (initial) % s=[]; % s=de2bi(randi(2^L,1,Npopstart)-1)'; %random number from 0 to (2^L-1), and change it to binary form % cliquestart=randi(NC,1,Npopstart); % s=[cliquestart;s]; LDdata=cell(length(NumCliq),1); LDtimedata=cell(length(NumCliq),1); LDrepeat=zeros(numrepeat,length(r)); LDcutofftime=zeros(numrepeat,length(r)); % % Data=cell(Ng,1); Data{1}=s; % Store evolving matrix of all genotypes in cell % D=[]; %-; % LD_genome=zeros(length(r),Ng); % v=[]; %allele frequencies at loci. % % LD=cell(L,L); %each element of this cell is Dij, a 2*2 matrix e=[]; %not sure where it is used AllBinIndx=[0:1:2^L-1]'; %a column vector containing indices assigned for each genotype; total 2^L genotype available AllGeno_Bin=de2bi(AllBinIndx); %matrix of all genotype (binary form); each row contains one full genotype AllGeno=2*(AllGeno_Bin-0.5); %matrix of all genotype (in +1, -1 form); each row contains one full genotype ep_value=[]; %a row vector containing random epistasis value lamda=[]; %a row vector containing (normalized) poissant distribution parameter V_t=V_a+V_i; sigma=sqrt(V_t); f=sqrt(V_a/L); A=f*ones(1,L); %Allele fitness vector (row vector) %%--------------------------------------------------------------------------------- %%--------------------------------------------------------------------------------- %%END OF PARAMETERS. cstring='rgbcmyk'; cc=hsv(length(NumCliq)); %%============================================================================================================== %% %%---------------------SIMULATION OPERATION MODE----------------------------------- %% %%============================================================================================================== while(1) modenum=input('Type in the model you want to explore: (1)RE model (2)PE model '); if modenum~=1 & modenum~=2 fprintf('Wrong. Please type again') else break; end end %%============================================================================================================== %% %%------------------------ BEGINNNING OF RE MODEL --------------------------------- %% %%============================================================================================================== if modenum==1 while(1) modenum_a=input('mode of operation: 1)LD alone; 2)LD + Frequency; '); if modenum_a~=1 & modenum_a~=2 fprintf('Wrong. Please type again') else break; end end while(1) matingmode=input('mode of mating: 1)Random Mating; 2)Non-Random Mating (geographic barrier); '); if matingmode~=1 & matingmode~=2 fprintf('Wrong. Please type again') else break; end end figure('color', 'white'); %%============================================================================================================== %% NUMBER OF REPEATS FOR LD ANALYSIS (temprepeat) %%============================================================================================================== Cliqueindx=1; while(1) %for different number of cliques NC=NumCliq(Cliqueindx); LDrepeat=[]; LDcutofftime=[]; temprepeat=1; while(1) %temprepeat %% Initiation genotype=[]; genotype=de2bi(randi(2^L,1,Npopstart)-1)'; %random number from 0 to (2^L-1), and change it to binary form cliquestart=randi(NC,1,Npopstart); genotype=[cliquestart;genotype]; D=zeros(L,L,Ng); %-; LD_genome=zeros(length(r),Ng); LDcutoff=[]; LDtime=[]; %NOT an average LD, rather it is either 1) LD measured when entropy reduced to v=[]; %allele frequencies at loci. ep_value=[]; %a row vector containing random epistasis value lamda=[]; %a row vector containing (normalized) poissant distribution parameter ep_value=normrnd(0, sqrt(V_i), [1,2^L]); %updated 05/18/2013; ep_value depends on genotype, but fixed in time q=1; SA=[]; frequencynew=cell(length(r),1); %%============================================================================================================== %% DIFFERENT OUTCROSSING RATE (q) %%============================================================================================================== while(1) %r, outcrossing rate (q) k=1; n=1; Npop=Npopstart; v=[]; D=[]; N=[]; Data=cell(Ng,1); Data{1}=genotype; % Store evolving matrix of all genotypes in cell %%================================================================================= %% (q.1)GENERATION (k) %%================================================================================= while(1) %generation (k) Fitness=[]; v=zeros(1,L); lamda=[]; R=[]; stemp=[]; s=Data{k}; sforfit=(s([2:end],:)-0.5)*2; %ep_value=sqrt(V_i)*randn(1,Npop); %%generate random values from normal distrubtion with standard deviation sqrt(V_i) & mean=0; Fitness=A*sforfit+ep_value(:,bi2de(s([2:end],:)')+1); %fitness column vector. (N by 1) norm=(sum(exp(Fitness-mean(Fitness)))/Npopstart); lamda=(1/norm)*exp(Fitness-mean(Fitness)); % LAMDA AVERAGED TO 1 R=poissrnd(lamda); %a row vector, with each element specifying the number of gametes produced by each genotype / lamda=average(R) N(k)=size(Data{k},2); %fprintf('%d of N calculated',k); %%-------------------------------------------------------------------------------------------------------------- %%------------------------- (q.1.A) Linkage Disequilibrium ------------------------------------------------------- %%-------------------------------------------------------------------------------------------------------------- for i=1:L %one locus for a LD pair idx_ip=find(Data{k}(i+1,:)==1); vi=length(idx_ip)/N(k); if vi<0.01||vi>0.99 v(i)=0; else v(i)=vi; end for j=i+1:L %the other locuse for the LD pair idx_jp=find(Data{k}(j+1,:)==1); idx_ipjp=find(Data{k}(i+1,:)==1 & Data{k}(j+1,:)==1); %a pair between +1, +1 state % idx_ipjn=find(Data{k}(i,:)==1 & Data{k}(j,:)==0);%a pair between +1, -1 state % idx_injp=find(Data{k}(i,:)==0 & Data{k}(j,:)==1); % idx_injn=find(Data{k}(i,:)==0 & Data{k}(j,:)==0); vj=length(idx_jp)/N(k); vipjp=length(idx_ipjp)/N(k); % vipjn=length(idx_ipjn)/N(k); % vinjp=length(idx_injp)/N(k); % vinjn=length(idx_injn)/N(k); if vi<0.01||vi>0.99||vj<0.01||vj>0.99 D(i,j,k)=0; LD_genome(q,k)=LD_genome(q,k); else D(i,j,k)=((vipjp-vi*vj)^2)/(vi*(1-vi)*vj*(1-vj)); %+((vipjn-vi*(1-vj))/(vi*(1-vi)*vj*(1-vj)))^2+((vinjp-(1-vi)*vj)/(vi*(1-vi)*vj*(1-vj)))^2+((vinjn-(1-vi)*(1-vj))/(vi*(1-vi)*vj*(1-vj)))^2; LD_genome(q,k)=LD_genome(q,k)+D(i,j,k); end % if vi<0.01||vi>0.99||vj<0.01||vj>0.99 % LD{i,j,k}=[]; % else % LD{i,j,k}=[vipjp-vi*vj, vipjn-vi*(1-vj); vinjp-(1-vi)*vj, vinjn-(1-vi)*(1-vj)]; % end % LD_allstate(i,j,k)=mean(mean(LD{i,j,k})); end end v(v==0)=[]; SA(q,k)=-((v*log(v)')+((1-v)*log(1-v)')); if k==1 Sinitial(q)=SA(q,k); elseif k==Ng LDcutoff(q)=2*LD_genome(q,k)/(L*(L-1)); % updated 05/18 disp(' '); disp('not enough generation for such allelic entropy reduction'); disp(' '); break; %% this is when we get out of the GENERATION (K) LOOP. (right before progeny collection) else if SA(q,k)<=entropycutoff*Sinitial(q) fprintf('stopped in %d generation; allelic entropy reduced to 70%', k); LDtime(q)=k; disp(' '); fprintf('initial allelic entropy is %d', Sinitial(q)); disp(' '); fprintf('stopped allelic entropy is %d', SA(q,k)); disp(' '); disp(' '); LDcutoff(q)=2*LD_genome(q,k)/(L*(L-1)); % updated 05/18 break; end end %% END OF LINKAGE DISEQUILIBRIUM. %%-------------------------------------------------------------------------------------------------------------- %%----------------------------- (q.1.B) Progeny Collection ------------------------------------------------------- %%-------------------------------------------------------------------------------------------------------------- while(1) %loop repeating through all number of population in given generation. if R(n)~0; %progeny collection step. If the number of gamete tempidx=1; while tempidx<=R(n) stemp=[stemp, s(:,n)]; tempidx=tempidx+1; end end if n==Npop; Data{k+1}=stemp; %evolution matrix by 100% asexual propagation; kth generation Gamete genotype matrix; Npop=size(Data{k+1},2); n=1; break; end n=n+1; end %% END OF PROGENY COLLECTION. %%-------------------------------------------------------------------------------------------------------------- %%------------------------- (q.1.C) Gene Reassortment: Random vs Non-random Mating ------------------------------- %%-------------------------------------------------------------------------------------------------------------- tempridx=1; while tempridx<=Npop matingprob=rand; if matingprob 0 %ex2)[-1 -1 -1 1 -1] --> 2 %Note: C, the unique matrix of each Data, is already sorted, and %thus changing each row [C,ia,ic] = unique(sort(bi2de(Data{k}([2:end],:)')),'rows','first');%here, Cis the unique matrix, C=A(ia), A=C(ic) frequency{k}=[diff(ia); N(k)-ia(end)+1;]; %row vector, %but how to normalize the frequency discretely? More explicitly, how should I conserve population size for d iscrete Poissant model tempbinindx{k}=C; % tempall=AllBinIndx; [B,i1,i2]=intersect(tempall,tempbinindx{k}); tempall(i1)=[]; tempmissing=tempall; if isempty(tempmissing)==1 frequencynew{q}(k,:)=frequency{k}; else pos=tempmissing+1;%binary values of missing genotype; its position is tempmissing+1; missfreq=zeros(1,size(tempmissing,1)); tf=false(1,numel(frequency{k})+numel(missfreq)); result=double(tf); tf(pos)=true; result(tf)=missfreq; result(~tf)=frequency{k}; frequencynew{q}(k,:)=result; %each row contains frequency(in numbers, not ratio) of mth genotype in kth generation. end %% END OF FREQUENCY FINDING SECTION end k=k+1; end %%======================================================================================================== %% (q.2) GENOTYPE DISTRIBUTION FIGURES %%======================================================================================================== if modenum_a==2; figure(); colormap(lines); bar(frequencynew{q},'stacked'); title(strcat('r/sigma: ',num2str(r(q)/sigma))); % Create xlabel xlabel('Generation','FontSize',30); set(gca, 'fontsize',15); ylabel('Population','FontSize',30); end %%--------------------------------------------------------------------------------------------------------- if q