#' 'compute N-of-1-MixEnrich' #' #' @param abs.logFC #abslogFC is calculated as |log2(X1+1)-log2(X2+1)|, where X1 is the expression value of a gene under condition 1, and X2 is the expression values of a gene under condition 2 #' @param case A vector (paired with "baseline") containing gene expression from a sample of interest #' @param gene.symbol gene symbol is a vector of gene names corresponding to abs.logFC #' @param GeneSet.annotation GeneSet.list is a list of genesets. Each element of this geneset contains the gene names of the genes belonging to the gene set. #' @param FDR.method method for multiplicity correction #' @param alternative indicates the alternative hypothesis in Fisher's exact test. #' #' @return A list contains two elements. The first element is the pathways and their corresponding p.values. The second element is all the genes and their corresponding DEG status #' If either input vectors have no variance, the MD distance is chosen to the simple difference. mixEnrich = function (abs.logFC, gene.symbol,GeneSet.annotation, FDR.method = 'BY', FDR=T,alternative = c('two.sided','greater','less')){ alternative = match.arg(alternative) abs.logFC.tmp = abs.logFC[abs.logFC != 0 ] mix1 = try(normalmixEM(abs.logFC.tmp,lambda=.5,mu=c(.5,3),sigma=c(1,1)) ) p.dereg = mix1$posterior[,which.max(mix1$mu)] ##find the genes that we believe they are deregulated ind.DE = logical(length= length(abs.logFC)) ind.DE[abs.logFC != 0 ] = p.dereg>.5 DE.df = data.frame(symbol = gene.symbol, DEstatus = as.integer(ind.DE)) GeneSet.annotation = GeneSet.annotation[GeneSet.annotation$symbol %in% gene.symbol,] GO.sym.DE = merge(GeneSet.annotation,DE.df,by = 'symbol') ## The total number of genes in each GO category t1 = table(GO.sym.DE$path_id) ## the number of DE genes in each GO ind.DE2 = GO.sym.DE$DEstatus == 1 GO.sym.DEonly = GO.sym.DE[ind.DE2,] t2 = table(GO.sym.DEonly$path_id) df.GO1 = as.data.frame(t1) df.GO2 = as.data.frame(t2) names(df.GO1) = c('GO_ID', 'total') names(df.GO2) = c('GO_ID', 'DE') ## GO.count is a data frame which contains GO ids and the total number of genes belongs to this GO term and the DEGs in this GO term GO.count = merge(df.GO1, df.GO2, by = 'GO_ID') ## background gene DE status bg.DE = c(sum(ind.DE), length(ind.DE) - sum(ind.DE)) p.val.fisher = numeric(dim(GO.count)[1]) # a vecter to store the p.values of FET for each GO for (i in 1:dim(GO.count)[1]){ # calculate the p.value of fisher exact tests for every gene set single.GO = rbind(GO.count[i,]$DE,GO.count[i,]$total-GO.count[i,]$DE) m = cbind(single.GO, bg.DE - single.GO) m = t(m) p.val.fisher[i]=fisher.test(m, alternative = alternative)$p.value } if(FDR){ p.val.fisher.adj = p.adjust(p.val.fisher,method=FDR.method) fisher.res = data.frame(GO_ID = GO.count$GO_ID, p.value.adjusted= p.val.fisher.adj, p.value = p.val.fisher) return (fisher.res) }else{ fisher.res = data.frame(GO_ID = GO.count$GO_ID, p.value = p.val.fisher) return (fisher.res) } } library(mixtools) ## define a function to calculate absolute log Fold change absLogFC = function(x1,x2){ abs(log2(x1+1) - log2(x2+1)) }