function [class, normpvalue, predlr] = DrugPredictionScore(training, test, groups, numbern)
% This function takes as an argument a (training) set that has
% observations(drugs)as rows and variables as columns, (test) is a test vector to classify using
% training and in this version is limited to n=1. (groups) is a column vector
% of group memberships(that are sequential and numeric starting at n=1). Note if you have 7 groups they must be ordered 1-7 for
% the observations in (training). Numbern is the
% number of nearest neighbors to include, the default is majority rule. The class is the predicted group
% mebership in test, and pvalue is the predicted significance of the
% inclusion of the test compound in the predicted class
% Currently a KNN classifier is hardcoded into the algorithm, but the
% algorithm can be used with any matlab multi-class classifier if the code
% is changed. In our shRNA dataset we have noted that Naive-Bayes
% Classifiers perform similarly well. In either case a euclidean metric of
% cluster size is used to assess significance.
%Step1 creating the reference distances and null distributions
%__________________________________________________________________________
%__________________________________________________________________________
DistanceMatrix=squareform(pdist(training,'euclidean'));
M=max(groups)
i=1
nulldist={}
ALtot=[]
while i< M+1
j=1%makes an index that terminates when the max group number is reached
AL=[]
gpos=[]
nongpos=[]
while j< length(groups)+1 %looks through the groups vector sequentially to determine which entries match the group label i
if groups(j,1)==i %sequentially checks the equality
gpos=horzcat(gpos,j)%horizontally concatenates the reference row for a given group.
elseif groups(j,1)~=i
nongpos=horzcat(nongpos,j)
end
j=j+1
end
S=length(gpos)
Sn=length(nongpos)
submati=[DistanceMatrix(gpos(1,1):gpos(1,S),gpos(1,1):gpos(1,S))]%builds submatrix of group specific distances and then calculates the averages on the basis of this
AL=sum(sum(submati))/(S*(S-1))
ALtot=horzcat(ALtot,AL)
k=1
newalvec=[]
while k < Sn+1 %this step build null distributions based on the empircal prediction using false negatives
alldistances=DistanceMatrix(nongpos(1,k),:)
l=1
nullpredictionsdist=[]
while l < S+1
nullpredictionsdist=vertcat(nullpredictionsdist,alldistances(1,gpos(1,l)))
l=l+1
end
newal=(sum(sum(submati))+sum(nullpredictionsdist))/((S*(S-1))+S)
newalvec=vertcat(newalvec,newal)
k=k+1
end
storednull{1,i}=newalvec
i=i+1
end
%__________________________________________________________________________
%step2 is the prediction of drug class and updating a new training set to
%take into account that prediction, and then calculating the average
%linkage following the inclusion of the test drug
%________________________________________________________________________
training2=training
test2=test
groups2=groups
class=knnclassify(test, training, groups, numbern, 'euclidean')
m=1
while m < length(class)+1
n=1
while n < length(groups)+1
if groups(n,1)==class(m,1)
if n