#---------------------------------------------------------------------------- #MULTIVARIATE PERMUTATION TESTING OF CORRELATIONS #Description: This R program performs multivariate permutation tests of # the correlation between a single criterion variable and # multiple predictor variables. # Data must be imported by user with subjects/cases as rows # and variables as columns. Missing data must be specified. # #Written by: Jennifer Urbano Blackford, Ph.D. # #Owned by: The Kennedy Center, Vanderbilt University, Nashville, TN #-------------------------------------------------------------------------- #define function & specify arguments #defaults are specified with an '=' MPT.Corr<-function(observed.data,criterion,predictor.start,predictor.end, output.file="Results.txt",test=2,tail=2,alpha=.05,output=0,permutations=10000){ #-------------------------------------------------------------------------- #CREATE NECESSARY DATASETS AND VARIABLES #set user specifications #set text to print to screen or print to specified file if (output.file != '') (sink(output.file)) #create datasets using the user specifications above predict.data<-as.matrix(observed.data[, predictor.start:predictor.end,drop=FALSE]) if (output==1) (print (predict.data)) crit.data<-as.matrix(observed.data[,criterion,drop=FALSE]) if (output==1) (print (crit.data)) #Assign variable numbers from predictor and criterion datasets #set number of subjects as number of rows in Data nsubj<-nrow(crit.data) #set number of predictors to number of columns in predict.data npredict<-ncol(predict.data) #set number of criterion variables to number of columns in crit.data ncrit<-ncol(crit.data) #save the variable names for later use colnames <- dimnames(predict.data)[[2]] #set tail information for the cor.test function if (test==2) (alt<-"t") if (test==1 && tail==1) (alt<-"g") if (test==1 && tail==-1) (alt<-"l") #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #CREATE OBSERVED AND SIMULATED CORRELATION DATASETS #calculate correlation for actual data--crit.data and predict.data #save correlations in one matrix and p values in another observed.r<-matrix(0,1,npredict,dimnames=list(c("r"),c(colnames))) observed.p<-matrix(0,1,npredict,dimnames=list(c("p"),c(colnames))) #create a matrix to save the simulated r values simulated.r<-matrix(0, permutations,npredict) #create a vector OriginalCorr to save correlation results OriginalCorr<-apply(predict.data,2,cor.test,y=crit.data,alternative=alt) for(i in 1:npredict){ observed.r[i]<-as.matrix(OriginalCorr[[i]]$estimate) #get the r value observed.p[i]<-as.matrix(OriginalCorr[[i]]$p.value)} #get the p value #print correlation results writeLines("Multivariate Permutation Test for Correlations") writeLines(" Yoder, Blackford, Waller, and Kim (2003)") writeLines("") writeLines("Observed Correlations & P-values") writeLines("") print(cbind(t(round(observed.r,digits=3)),t(round(observed.p,digits=3)))) writeLines("") #create simulated r by shuffling the original criterion data and computing correlations for(loop in 1:permutations){ #randomly sampling nsubj times without replacement=shuffling ShuffleData<-as.matrix(crit.data[sample(1:nsubj),]) #save the post-shuffle corr for each variable and each loop into a matrix called simulated.r #note: when using the corr function you must specify to use pairwise deletion SimulatedCorr<-cor(predict.data,ShuffleData,use="pairwise.complete.obs") for(i in 1:npredict){ simulated.r[loop,i]<-SimulatedCorr[i]} } #end bracket for(loop in 1:permutations) #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ #PERFORM MULTIVARIATE PERMUATION TESTS #create a matrix to store the MPT results Results<-matrix(,1,4,dimnames=list(NULL,c("Variable","r","p","MPT exact p")))[-1,] #create matrices of maximum corrs and observed corrs in each permutation #loop to test significance for max r (i.e., step down procedure). while (npredict > 0) { #select largest of the absolute values--save column position #using column as placeholder provides a way to track the sign of the r max.col<-matrix(0,permutations) for (loop in 1:permutations){ max.col[loop,]<-(1:npredict)[abs(simulated.r[loop,])== max(abs(simulated.r[loop,]))]} #save the largest value--maintain original sign (using max.col) max.simulated.r<-matrix(0,permutations) for (loop in 1:permutations){ max.simulated.r[loop,1]<-simulated.r[loop,max.col[loop,]]} #print histogram if (output==1) (hist(max.simulated.r)) #select the largest correlation from the observed correlation for testing #use test specification information to determine which correlation to use #set max.col.r to the appropriate column number #two tailed test: get largest absolute value if (test==2) (max.col.r <-(order(abs(observed.r[1,]))[npredict])) #one-tailed positive test: get largest positive value if (test==1 && tail==1) (max.col.r<-(order(observed.r[1,])[npredict])) #one-tailed negative test: get largest negative value if (test==1 && tail==-1) (max.col.r<-(order(observed.r[1,])[1])) #use the max.col.r variable to get the name, r, and p-value #associated with the largest correlation to be tested largest.observed.r<-observed.r[max.col.r] smallest.observed.p<-observed.p[max.col.r] variable<-colnames[max.col.r] #apply breaks to stop the program if necessary #if testing the positive tail only then stop the program when the observed r is less than 0 if (test==1 && tail==1 && largest.observed.r <0) break #if testing the negative tail only then stop the program when the observed r is greater than 0 if (test==1 && tail==-1 && largest.observed.r >0) break #compare largest correlation to the max r distribution #determine significance by creating an array of 0s and 1s representing #whether the simulated corr is larger than the observed corr. p.dist<-rep(0,permutations) #for two tailed test #positive observed correlation--check positive tail & negative tail if (test==2 && (largest.observed.r >0)) (p.dist[max.simulated.r>= largest.observed.r]<-1) if (test==2 && (largest.observed.r >0)) (p.dist[max.simulated.r<= largest.observed.r*-1]<-1) #negative observed correlation--check positive tail & negative tail if (test==2 && (largest.observed.r <0))(p.dist[max.simulated.r<= largest.observed.r]<-1) if (test==2 && (largest.observed.r <0))(p.dist[max.simulated.r>= largest.observed.r*-1]<-1) #for one tailed positive test check only the positive tail if (test==1 && tail==1) (p.dist[max.simulated.r>= largest.observed.r]<-1) #for one tailed negative test check only the negative tail if (test==1 && tail==-1) (p.dist[max.simulated.r<= largest.observed.r]<-1) #get p-value as number of permutations above/below observed r and divided by number of permutations p.value<-0 if (sum(p.dist) > 0) (p.value<-(sum(p.dist)/permutations)) #stop the program when p.value is greater than the value specified if (p.value > alpha) break #if the p-value was significant then delete the column for max r and continue #delete the column associated with the maximum observed corr simulated.r<-simulated.r[,-max.col.r,drop=FALSE] colnames<-colnames[-max.col.r,drop=FALSE] observed.r<-observed.r[,-max.col.r,drop=FALSE] observed.p<-observed.p[,-max.col.r,drop=FALSE] #reset number of variables npredict<-npredict-1 #create a matrix to store the results ResultsTemp<-cbind(variable,round(largest.observed.r,digits=3), round(smallest.observed.p,digits=3),round(p.value, digits=3)) Results<-rbind(Results,ResultsTemp) rm(ResultsTemp) #end testing } #add row labels to Results if (nrow(Results)>=1) (dimnames(Results)[[1]]<-c(1:nrow(Results))) #print final results if there are any writeLines("Results of MPT for Correlations") writeLines("") if (nrow(Results)>=1) print(Results,quote=FALSE,rowlab=c(1:nrow(Results))) if (nrow(Results)<1) writeLines("No significant Results") writeLines("") sink() #close sink }