function [stat, cfg] = clusterstat(cfg, statrnd, statobs, varargin)
% SUBFUNCTION for computing cluster statistic for N-D volumetric source data
% or for channel-freq-time data
%
% This function uses
% cfg.dim
% cfg.inside (only for source data)
% cfg.tail = -1, 0, 1
% cfg.multivariate = no, yes
% cfg.orderedstats = no, yes
% cfg.clusterstatistic = max, maxsize, maxsum, wcm
% cfg.clusterthreshold = parametric, nonparametric_individual, nonparametric_common
% cfg.clusteralpha
% cfg.clustercritval
% cfg.wcm_weight
% cfg.feedback
% Copyright (C) 2005-2007, Robert Oostenveld
%
% This file is part of FieldTrip, see http://www.ru.nl/neuroimaging/fieldtrip
% for the documentation and details.
%
% FieldTrip is free software: you can redistribute it and/or modify
% it under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 3 of the License, or
% (at your option) any later version.
%
% FieldTrip is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with FieldTrip. If not, see .
%
% $Id: clusterstat.m 10193 2015-02-10 15:49:15Z roboos $
% set the defaults
cfg.orderedstats = ft_getopt(cfg, 'orderedstats', 'no');
cfg.multivariate = ft_getopt(cfg, 'multivariate', 'no');
cfg.minnbchan = ft_getopt(cfg, 'minnbchan', 0);
if cfg.tail~=cfg.clustertail
error('cfg.tail and cfg.clustertail should be identical')
end
% get conncevitiy matrix for the spatially neighbouring elements
channeighbstructmat = full(ft_getopt(cfg, 'connectivity', false));
needpos = cfg.tail==0 || cfg.tail== 1;
needneg = cfg.tail==0 || cfg.tail==-1;
Nsample = size(statrnd,1);
Nrand = size(statrnd,2);
prb_pos = ones(Nsample, 1);
prb_neg = ones(Nsample, 1);
postailrnd = false(Nsample,Nrand); % this holds the thresholded values
negtailrnd = false(Nsample,Nrand); % this holds the thresholded values
Nobspos = 0; % number of positive clusters in observed data
Nobsneg = 0; % number of negative clusters in observed data
if strcmp(cfg.clusterthreshold, 'parametric')
% threshold based on the critical value from parametric distribution
siz = size(cfg.clustercritval);
if all(siz==1) && cfg.clustertail==0
% it only specifies one critical value, assume that the left and right tail are symmetric around zero
negtailcritval = -cfg.clustercritval;
postailcritval = cfg.clustercritval;
elseif all(siz==1) && cfg.clustertail==-1
% it only specifies one critical value corresponding to the left tail
negtailcritval = cfg.clustercritval;
postailcritval = +inf * ones(size(negtailcritval));
elseif all(siz==1) && cfg.clustertail==1
% it only specifies one critical value corresponding to the right tail
postailcritval = cfg.clustercritval;
negtailcritval = -inf * ones(size(postailcritval));
elseif siz(1)==Nsample && siz(2)==1 && cfg.clustertail==0
% it specifies a single critical value for each sample, assume that the left and right tail are symmetric around zero
negtailcritval = -cfg.clustercritval;
postailcritval = cfg.clustercritval;
elseif siz(1)==Nsample && siz(2)==1 && cfg.clustertail==-1
% it specifies a critical value for the left tail
% which is different for each sample (samples have a different df)
negtailcritval = cfg.clustercritval;
postailcritval = +inf * ones(size(negtailcritval));
elseif siz(1)==Nsample && siz(2)==1 && cfg.clustertail==1
% it specifies a critical value for the right tail
% which is different for each sample (samples have a different df)
postailcritval = cfg.clustercritval;
negtailcritval = +inf * ones(size(postailcritval));
elseif siz(1)==Nsample && siz(2)==2 && cfg.clustertail==0
% it specifies a critical value for the left and for the right tail of the distribution
% which is different for each sample (samples have a different df)
negtailcritval = cfg.clustercritval(:,1);
postailcritval = cfg.clustercritval(:,2);
elseif prod(siz)==2 && cfg.clustertail==0
% it specifies a critical value for the left and for the right tail of the distribution
% which is the same for each sample (samples have the same df)
negtailcritval = cfg.clustercritval(1);
postailcritval = cfg.clustercritval(2);
else
error('cannot make sense out of the specified parametric critical values');
end
elseif strcmp(cfg.clusterthreshold, 'nonparametric_individual')
% threshold based on bootstrap using all other randomizations
% each voxel will get an individual threshold
[srt, ind] = sort(statrnd,2);
if cfg.clustertail==0
% both tails are needed
negtailcritval = srt(:,round(( cfg.clusteralpha/2)*size(statrnd,2)));
postailcritval = srt(:,round((1-cfg.clusteralpha/2)*size(statrnd,2)));
elseif cfg.clustertail==1
% only positive tail is needed
postailcritval = srt(:,round((1-cfg.clusteralpha)*size(statrnd,2)));
negtailcritval = -inf * ones(size(postailcritval));
elseif cfg.clustertail==-1
% only negative tail is needed
negtailcritval = srt(:,round(( cfg.clusteralpha)*size(statrnd,2)));
postailcritval = +inf * ones(size(negtailcritval));
end
elseif strcmp(cfg.clusterthreshold, 'nonparametric_common')
% threshold based on bootstrap using all other randomizations
% all voxels will get a common threshold
[srt, ind] = sort(statrnd(:));
if cfg.clustertail==0
% both tails are needed
negtailcritval = srt(round(( cfg.clusteralpha/2)*numel(statrnd)));
postailcritval = srt(round((1-cfg.clusteralpha/2)*numel(statrnd)));
elseif cfg.clustertail==1
% only positive tail is needed
postailcritval = srt(round((1-cfg.clusteralpha)*numel(statrnd)));
negtailcritval = -inf * ones(size(postailcritval));
elseif cfg.clustertail==-1
% only negative tail is needed
negtailcritval = srt(round(( cfg.clusteralpha)*numel(statrnd)));
postailcritval = +inf * ones(size(negtailcritval));
end
else
error('no valid threshold for clustering was given')
end % determine clusterthreshold
% these should be scalars or column vectors
negtailcritval = negtailcritval(:);
postailcritval = postailcritval(:);
% remember the critical values
cfg.clustercritval = [negtailcritval postailcritval];
% test whether the observed and the random statistics exceed the threshold
postailobs = (statobs >= postailcritval);
negtailobs = (statobs <= negtailcritval);
for i=1:Nrand
postailrnd(:,i) = (statrnd(:,i) >= postailcritval);
negtailrnd(:,i) = (statrnd(:,i) <= negtailcritval);
end
if ~isfield(cfg, 'inside')
cfg.inside = true(cfg.dim);
end % cfg.inside is set in ft_sourcestatistics, but is also needed for timelock and freq
if isfield(cfg, 'origdim'),
cfg.dim = cfg.origdim;
end % this snippet is to support correct clustering of N-dimensional data, not fully tested yet
% first do the clustering on the observed data
if needpos,
if ~isfinite(channeighbstructmat)
% this pertains to data for which the spatial dimension can be reshaped
% into 3D, i.e. when it is described on an ordered set of positions on a 3D-grid
tmp = zeros(cfg.dim);
tmp(cfg.inside) = postailobs;
numdims = length(cfg.dim);
if numdims == 2 || numdims == 3 % if 2D or 3D data
ft_hastoolbox('spm8',1);
% use spm_bwlabel for 2D/3D data to avoid usage of image processing toolbox
[posclusobs, posnum] = spm_bwlabel(tmp, 2*numdims);
else
% use bwlabeln from the image processing toolbox
posclusobs = bwlabeln(tmp, conndef(length(cfg.dim), 'min'));
end
posclusobs = posclusobs(cfg.inside);
else
if 0
posclusobs = findcluster(reshape(postailobs, [cfg.dim,1]),cfg.chancmbneighbstructmat,cfg.chancmbneighbselmat,cfg.minnbchan);
else
posclusobs = findcluster(reshape(postailobs, [cfg.dim,1]),channeighbstructmat,cfg.minnbchan);
end
posclusobs = posclusobs(:);
end % if channeighbstructmat
Nobspos = max(posclusobs(:)); % number of clusters exceeding the threshold
fprintf('found %d positive clusters in observed data\n', Nobspos);
end % if needpos
if needneg,
if ~isfinite(channeighbstructmat)
% this pertains to data for which the spatial dimension can be reshaped
% into 3D, i.e. when it is described on an ordered set of positions on a 3D-grid
tmp = zeros(cfg.dim);
tmp(cfg.inside) = negtailobs;
numdims = length(cfg.dim);
if numdims == 2 || numdims == 3 % if 2D or 3D data
ft_hastoolbox('spm8',1);
% use spm_bwlabel for 2D/3D data to avoid usage of image processing toolbox
[negclusobs, negnum] = spm_bwlabel(tmp, 2*numdims);
else
% use bwlabeln from the image processing toolbox
negclusobs = bwlabeln(tmp, conndef(length(cfg.dim),'min'));
end
negclusobs = negclusobs(cfg.inside);
else
if 0
negclusobs = findcluster(reshape(negtailobs, [cfg.dim,1]),cfg.chancmbneighbstructmat,cfg.chancmbneighbselmat,cfg.minnbchan);
else
negclusobs = findcluster(reshape(negtailobs, [cfg.dim,1]),channeighbstructmat,cfg.minnbchan);
end
negclusobs = negclusobs(:);
end % if channeighbstructmat
Nobsneg = max(negclusobs(:));
fprintf('found %d negative clusters in observed data\n', Nobsneg);
end % if needneg
stat = [];
stat.stat = statobs;
% catch situation where no clustering of the random data is needed
if (Nobspos+Nobsneg)==0
warning('no clusters were found in the observed data');
stat.prob = ones(Nsample, 1);
return
end
% allocate space to hold the randomization distributions of the cluster statistic
if strcmp(cfg.multivariate, 'yes') || strcmp(cfg.orderedstats, 'yes')
fprintf('allocating space for a %d-multivariate distribution of the positive clusters\n', Nobspos);
fprintf('allocating space for a %d-multivariate distribution of the negative clusters\n', Nobsneg);
posdistribution = zeros(Nobspos,Nrand); % this holds the multivariate randomization distribution of the positive cluster statistics
negdistribution = zeros(Nobsneg,Nrand); % this holds the multivariate randomization distribution of the negative cluster statistics
else
posdistribution = zeros(1,Nrand); % this holds the statistic of the largest positive cluster in each randomization
negdistribution = zeros(1,Nrand); % this holds the statistic of the largest negative cluster in each randomization
end
% do the clustering on the randomized data
ft_progress('init', cfg.feedback, 'computing clusters in randomization');
for i=1:Nrand
ft_progress(i/Nrand, 'computing clusters in randomization %d from %d\n', i, Nrand);
if needpos,
if ~isfinite(channeighbstructmat)
tmp = zeros(cfg.dim);
tmp(cfg.inside) = postailrnd(:,i);
numdims = length(cfg.dim);
if numdims == 2 || numdims == 3 % if 2D or 3D data
[posclusrnd, posrndnum] = spm_bwlabel(tmp, 2*numdims); % use spm_bwlabel for 2D/3D data to avoid usage of image toolbox
else
posclusrnd = bwlabeln(tmp, conndef(length(cfg.dim),'min')); % spm_bwlabel yet (feb 2011) supports only 2D/3D data
end
posclusrnd = posclusrnd(cfg.inside);
else
if 0
posclusrnd = findcluster(reshape(postailrnd(:,i), [cfg.dim,1]),cfg.chancmbneighbstructmat,cfg.chancmbneighbselmat,cfg.minnbchan);
else
posclusrnd = findcluster(reshape(postailrnd(:,i), [cfg.dim,1]),channeighbstructmat,cfg.minnbchan);
end
posclusrnd = posclusrnd(:);
end
Nrndpos = max(posclusrnd(:)); % number of clusters exceeding the threshold
stat = zeros(1,Nrndpos); % this will hold the statistic for each cluster
% fprintf('found %d positive clusters in this randomization\n', Nrndpos);
for j = 1:Nrndpos
if strcmp(cfg.clusterstatistic, 'max'),
stat(j) = max(statrnd(posclusrnd==j,i));
elseif strcmp(cfg.clusterstatistic, 'maxsize'),
stat(j) = length(find(posclusrnd==j));
elseif strcmp(cfg.clusterstatistic, 'maxsum'),
stat(j) = sum(statrnd(posclusrnd==j,i));
elseif strcmp(cfg.clusterstatistic, 'wcm'),
stat(j) = sum((statrnd(posclusrnd==j,i)-postailcritval).^cfg.wcm_weight);
else
error('unknown clusterstatistic');
end
end % for 1:Nrdnpos
if strcmp(cfg.multivariate, 'yes') || strcmp(cfg.orderedstats, 'yes')
stat = sort(stat, 'descend'); % sort them from most positive to most negative
if Nrndpos>Nobspos
posdistribution(:,i) = stat(1:Nobspos); % remember the largest N clusters
else
posdistribution(1:Nrndpos,i) = stat; % remember the largest N clusters
end
else
% univariate -> remember the most extreme cluster
if ~isempty(stat), posdistribution(i) = max(stat); end
end
end % needpos
if needneg,
if ~isfinite(channeighbstructmat)
tmp = zeros(cfg.dim);
tmp(cfg.inside) = negtailrnd(:,i);
numdims = length(cfg.dim);
if numdims == 2 || numdims == 3 % if 2D or 3D data
ft_hastoolbox('spm8',1);
[negclusrnd, negrndnum] = spm_bwlabel(tmp, 2*numdims); % use spm_bwlabel for 2D/3D to avoid usage of image toolbox
else
negclusrnd = bwlabeln(tmp, conndef(length(cfg.dim),'min')); % spm_bwlabel yet (feb 2011) supports only 2D/3D data
end
negclusrnd = negclusrnd(cfg.inside);
else
if 0
negclusrnd = findcluster(reshape(negtailrnd(:,i), [cfg.dim,1]),cfg.chancmbneighbstructmat,cfg.chancmbneighbselmat,cfg.minnbchan);
else
negclusrnd = findcluster(reshape(negtailrnd(:,i), [cfg.dim,1]),channeighbstructmat,cfg.minnbchan);
end
negclusrnd = negclusrnd(:);
end
Nrndneg = max(negclusrnd(:)); % number of clusters exceeding the threshold
stat = zeros(1,Nrndneg); % this will hold the statistic for each cluster
% fprintf('found %d negative clusters in this randomization\n', Nrndneg);
for j = 1:Nrndneg
if strcmp(cfg.clusterstatistic, 'max'),
stat(j) = min(statrnd(negclusrnd==j,i));
elseif strcmp(cfg.clusterstatistic, 'maxsize'),
stat(j) = -length(find(negclusrnd==j)); % encode the size of a negative cluster as a negative value
elseif strcmp(cfg.clusterstatistic, 'maxsum'),
stat(j) = sum(statrnd(negclusrnd==j,i));
elseif strcmp(cfg.clusterstatistic, 'wcm'),
stat(j) = -sum((abs(statrnd(negclusrnd==j,i)-negtailcritval)).^cfg.wcm_weight); % encoded as a negative value
else
error('unknown clusterstatistic');
end
end % for 1:Nrndneg
if strcmp(cfg.multivariate, 'yes') || strcmp(cfg.orderedstats, 'yes')
stat = sort(stat, 'ascend'); % sort them from most negative to most positive
if Nrndneg>Nobsneg
negdistribution(:,i) = stat(1:Nobsneg); % remember the most extreme clusters, i.e. the most negative
else
negdistribution(1:Nrndneg,i) = stat; % remember the most extreme clusters, i.e. the most negative
end
else
% univariate -> remember the most extreme cluster, which is the most negative
if ~isempty(stat), negdistribution(i) = min(stat); end
end
end % needneg
end % for 1:Nrand
ft_progress('close');
% compare the values for the observed clusters with the randomization distribution
if needpos,
posclusters = [];
stat = zeros(1,Nobspos);
for j = 1:Nobspos
if strcmp(cfg.clusterstatistic, 'max'),
stat(j) = max(statobs(posclusobs==j));
elseif strcmp(cfg.clusterstatistic, 'maxsize'),
stat(j) = length(find(posclusobs==j));
elseif strcmp(cfg.clusterstatistic, 'maxsum'),
stat(j) = sum(statobs(posclusobs==j));
elseif strcmp(cfg.clusterstatistic, 'wcm'),
stat(j) = sum((statobs(posclusobs==j)-postailcritval).^cfg.wcm_weight);
else
error('unknown clusterstatistic');
end
end
% sort the clusters based on their statistical value
[stat, indx] = sort(stat,'descend');
% reorder the cluster indices in the data
tmp = zeros(size(posclusobs));
for j=1:Nobspos
tmp(posclusobs==indx(j)) = j;
end
posclusobs = tmp;
if strcmp(cfg.multivariate, 'yes')
% estimate the probability of the mutivariate tail, i.e. one p-value for all clusters
prob = 0;
for i=1:Nrand
% compare all clusters simultaneosuly
prob = prob + any(posdistribution(:,i)>stat(:));
end
if isequal(cfg.numrandomization, 'all')
prob = prob/Nrand;
else % the minimum possible p-value should not be 0, but 1/N
prob = (prob + 1)/(Nrand + 1);
end
for j = 1:Nobspos
% collect a summary of the cluster properties
posclusters(j).prob = prob;
posclusters(j).clusterstat = stat(j);
end
% collect the probabilities in one large array
prb_pos(posclusobs~=0) = prob;
elseif strcmp(cfg.orderedstats, 'yes')
% compare the Nth ovbserved cluster against the randomization distribution of the Nth cluster
prob = zeros(1,Nobspos);
for j = 1:Nobspos
if isequal(cfg.numrandomization, 'all')
prob(j) = sum(posdistribution(j,:)>stat(j))/Nrand;
else % the minimum possible p-value should not be 0, but 1/N
prob(j) = (sum(posdistribution(j,:)>stat(j)) + 1)/(Nrand + 1);
end
% collect a summary of the cluster properties
posclusters(j).prob = prob(j);
posclusters(j).clusterstat = stat(j);
% collect the probabilities in one large array
prb_pos(posclusobs==j) = prob(j);
end
else
% univariate -> each cluster has it's own probability
prob = zeros(1,Nobspos);
for j = 1:Nobspos
if isequal(cfg.numrandomization, 'all')
prob(j) = sum(posdistribution>stat(j))/Nrand;
else % the minimum possible p-value should not be 0, but 1/N
prob(j) = (sum(posdistribution>stat(j)) + 1)/(Nrand + 1);
end
% collect a summary of the cluster properties
posclusters(j).prob = prob(j);
posclusters(j).clusterstat = stat(j);
% collect the probabilities in one large array
prb_pos(posclusobs==j) = prob(j);
end
end
end
if needneg,
negclusters = [];
stat = zeros(1,Nobsneg);
for j = 1:Nobsneg
if strcmp(cfg.clusterstatistic, 'max'),
stat(j) = min(statobs(negclusobs==j));
elseif strcmp(cfg.clusterstatistic, 'maxsize'),
stat(j) = -length(find(negclusobs==j)); % encode the size of a negative cluster as a negative value
elseif strcmp(cfg.clusterstatistic, 'maxsum'),
stat(j) = sum(statobs(negclusobs==j));
elseif strcmp(cfg.clusterstatistic, 'wcm'),
stat(j) = -sum((abs(statobs(negclusobs==j)-negtailcritval)).^cfg.wcm_weight); % encoded as a negative value
else
error('unknown clusterstatistic');
end
end
% sort the clusters based on their statistical value
[stat, indx] = sort(stat,'ascend');
% reorder the cluster indices in the observed data
tmp = zeros(size(negclusobs));
for j=1:Nobsneg
tmp(negclusobs==indx(j)) = j;
end
negclusobs = tmp;
if strcmp(cfg.multivariate, 'yes')
% estimate the probability of the mutivariate tail, i.e. one p-value for all clusters
prob = 0;
for i=1:Nrand
% compare all clusters simultaneosuly
prob = prob + any(negdistribution(:,i) each cluster has it's own probability
prob = zeros(1,Nobsneg);
for j = 1:Nobsneg
if isequal(cfg.numrandomization, 'all')
prob(j) = sum(negdistribution