# Script:	CMH and Breslow Day tests for merging 2x2 tables.
# Version:	0.3
# Contributors:	Slav Petrovski, Quanli Wang
# Last Update:	2018-10-06
# This document is intended for internal teaching use only and shall not be distributed outside of 2018 EMBO Phenotyping neurological syndromes for systems genetics Course.

breslowday.test <- function(x) {
  #Find the common OR based on Mantel-Haenszel
  or.hat.mh <- mantelhaen.test(x)$estimate
  #Number of strata
  K <- dim(x)[3]
  #Value of the Statistic
  X2.HBD <- 0
  #Value of aj, tildeaj and Var.aj
  a <- tildea <- Var.a <- numeric(K)
  
  for (j in 1:K) {
    #Find marginals of table j
    mj <- apply(x[,,j], MARGIN=1, sum)
    nj <- apply(x[,,j], MARGIN=2, sum)

    #Solve for tilde(a)_j
    coef <- c(-mj[1]*nj[1] * or.hat.mh, nj[2]-mj[1]+or.hat.mh*(nj[1]+mj[1]),
                 1-or.hat.mh)
    sols <- Re(polyroot(coef))
    #Take the root, which fulfills 0 < tilde(a)_j <= min(n1_j, m1_j)
    tildeaj <- sols[(0 < sols) &  (sols <= min(nj[1],mj[1]))]
    #Observed value
    aj <- x[1,1,j]
    
    #Determine other expected cell entries
    tildebj <- mj[1] - tildeaj
    tildecj <- nj[1] - tildeaj
    tildedj <- mj[2] - tildecj

    #Compute \hat{\Var}(a_j | \widehat{\OR}_MH)
    Var.aj <- (1/tildeaj + 1/tildebj + 1/tildecj + 1/tildedj)^(-1)

    #Compute contribution
    X2.HBD <- X2.HBD + as.numeric((aj - tildeaj)^2 / Var.aj)

    #Assign found value for later computations
    a[j] <- aj ;  tildea[j] <- tildeaj ; Var.a[j] <- Var.aj
  }

  #Compute Tarone corrected test
  X2.HBDT <-as.numeric( X2.HBD -  (sum(a) - sum(tildea))^2/sum(Var.aj) )

  #Compute p-value based on the Tarone corrected test
  p <- 1-pchisq(X2.HBDT, df=K-1)

  res <- list(X2.HBD=X2.HBD,X2.HBDT=X2.HBDT,p=p)
  class(res) <- "bdtest"
  return(res)
}




CMHP <- function(Folder,ngroups) {
	root.dir<-dirname(Folder)
	inputfile <- paste(Folder,basename(Folder),'.csv',sep="")
	df <- read.table(inputfile,header = 1,sep = ",")
	last.column <- ngroups * 4 + 1
	genes <- df[,1]
	counts <- as.matrix(df[,2:dim(df)[2]])
	n <- dim(counts)[1]
	pval<-matrix(0,nrow=n,ncol=4)
	rownames(pval) <- genes
	colnames(pval) <- c("corrected","uncorrected","exact","breslowday")
	rownames(counts) <- genes

	for (i in 1:n) {
		data <- counts[i,1:(last.column -1)]
		data <- data[which(data >=0)]
		groups <- length(data) / 4
		if (groups > 1) {
			dim(data) <- c(2,2,groups)
			pval[i,1] <- mantelhaen.test(data)$p.value
			pval[i,2] <- mantelhaen.test(data,correct = FALSE)$p.value
			pval[i,3] <- mantelhaen.test(data,exact = TRUE)$p.value
			pval[i,4] <- NA
			options(show.error.messages = FALSE)
			try(pval[i,4] <- breslowday.test(data)$p)
			options(show.error.messages = TRUE)
		} else {
			dim(data) <- c(2,2)
			pval[i,1] <- NA
			pval[i,2] <- NA
			pval[i,3] <- fisher.test(data)$p.value
			pval[i,4] <- NA
		}
	}
	outputfile <- paste(Folder,basename(Folder),'.test.csv',sep="")

	df = cbind(counts,pval)
	df <- cbind(rownames(df),df)
	colnames(df)[1] <- "Gene Name"
	df[df==-1] <- NA

	write.table(df, file = outputfile, row.names = FALSE, sep = ",")
}
