#Don't forget to install these packages to your machine before running this program. Go to principal menu, then Packages, and then Install library ("outliers"); library("tseries"); library(nortest); #The pvalue determines the sensibility for detecting outliers in each column of the gene expression matrix. #Here this parameter is fixed to .01 that corresponds to detects outliers significantly far away 99% area of the standard Normal distribution pvalue<-.01; #The pvalue can be modified by the user and it accepts values between 0=0) Noa<-TRUE else { D<-DBack; Noa<-FALSE}; }; D }; Ot<- c(0*1:length(C)); Ut<- c(0*1:length(C)); for (i in 1:length(C)) { C[[i]]<-na.omit(C[[i]]); C[[i]]<-getoutliers(C[[i]],pvalue); m<-mean(C[[i]]); stdev<-sqrt(var(C[[i]])); Ot[i]<-m+1.96*stdev; Ut[i]<-m-1.96*stdev; #Zalfa/2=1.96 corresponds to the alfa/2=2.5% quantile or the normalized distribution Z. #These symetric quantiles (in Ot[i] and Ut[i]) build a 95% confidence interval (alfa=5%), so the gene would be expressed in the 5% left. #The user can freely modified this quantity with the corresponding symetric quantiles of the normalized distribution Z, # which corresponds to the chosen confidence 90% (alfa=10%), 99% (alfa=1%)and so on. #This parameter is crucial because it determines the percentage of expressed genes in each matrix column } Mfin<-M*0; for(i in 1:length(colnames(M))) for (j in 1:length(rownames(M))) ifelse(M[j,i]Ot[i],Mfin[j,i]<-(1),Mfin[j,i]<-0)); write.table(Mfin, file="DiscretizedGenExpressionMatrix.txt", sep=" ", col.names=NA); print("The DiscretizedGenExpressionMatrix.txt is in your R working directory");