
signature_info<-read.csv("data/Signatures_Refs.csv")



load("data/esets.r")
load("data/exprsets.r")
load("data/gsc.methodology.paper.r")

#load symbols found in the datasets
load("data/exprsets.gene.symbols.r")
 
#read user uploaded signatures
read_signatures <- function(gene_list){
  

  ## to replace any number of spaces with a single space
  gene_list<-gsub("(?<=[\\s])\\s*|^\\s+|\\s+$", "", gene_list, perl=TRUE)
  signature<-strsplit(gene_list," ")
  names(signature)[1]<-"user_signature"
  uniqueList <- lapply(signature, unique)
  makeSet <- function(geneIds, n) {GeneSet(geneIds, geneIdType=SymbolIdentifier(), setName=n)}
  gsclist <- mapply(makeSet, uniqueList[], names(signature))
  gsc <- GeneSetCollection(gsclist)
  return(gsc)
  }  


applyGSVA <- function(edata,geneset,var_cutoff){

  edata<-nsFilter(edata, require.entrez=FALSE, remove.dupEntrez=FALSE,
           var.func=IQR, var.filter=FALSE,
           var.cutoff=var_cutoff, filterByQuantile=TRUE, feature.exclude="^AFFX")$eset
 
  
  a<-tryCatch(gsva(edata, geneset, min.sz=1, max.sz=4000,abs.ranking=FALSE,mx.diff=TRUE)$es.obs,
              error = function(e) {print("gene set collection bad format");
                                   NULL})
}




formatesdf <- function(esdf,setname){
  

  sig_num <- nrow(exprs(esdf))+1 
  esdf<-data.frame(esdf)
  esdf<-cbind(rownames(esdf),esdf)
  rownames(esdf)<-1:nrow(esdf)
    colind<-numeric()

    for (i in (sig_num+1):ncol(esdf)){
      
      if(length(unique(esdf[,i]))!=1)
      colind[length(colind)+1]=i   
  }
  
  #some experiments don't hold varying conditions, check
  if (length(colind)>0) #remove columns only if there are any left
  esdf <- esdf[,c(1:sig_num,colind)]
  
}



showboxplot<-function(indata, inx, iny,colcol) {
 
  if(!is.numeric(indata[,iny])) return()

  condition <- indata[,inx]
  color_cond <- indata[,colcol]
  colour <- factor(color_cond)
  
                                        

  
  p <- ggplot(indata, aes(condition, y=indata[,iny]), environment = environment()) +
       geom_boxplot() + 
       geom_jitter(aes(colour=colour),shape=16,size=3,  
        
      
    
        position = position_jitter(width = 0.2),show.legend = TRUE ) +
        ylab(colnames(indata)[iny]) + xlab(colnames(indata)[inx]) + 
        theme(axis.text  = element_text(size=12,color="black"), 
            legend.position="top", axis.title.y = element_text(size=14),title=element_text(size=14,face="bold"))+
        labs(x = NULL) + labs(y = "ES") + 
        labs(title = paste(colnames(indata)[iny],"\n"))

  print(p)
}


showplot<-function(indata,x,y,z) {
  
    cohort <- indata[,z]
    nsamples<- paste("N=",nrow(indata),",\n",sep = "")
    r<-paste("r =",round(cor(indata[,x],indata[,y],use="pairwise.complete.obs",method= ("spearman")),digits=2),",\n",sep="")
 
    my.pval<-cor.test(indata[,x],indata[,y],method= ("spearman"))$p.value
    if(my.pval<0.0001){
      my.pval<-'<10-4'
    }else{
      my.pval<-round(cor.test(indata[,x],indata[,y],method= ("spearman"))$p.value,digits=4)
    }
    pval<-paste("p-value=",my.pval,"\n",sep="")
    fig_text<-paste(nsamples,r,pval)
    

  
  
  p <-ggplot(indata,aes(indata[,x],indata[,y]),environment = environment())+
  stat_smooth(method="lm",se=FALSE) + #aes(shape = factor(cohort))+
  geom_point(aes(colour = factor(cohort)),size = 4,show.legend = TRUE)+
  ylab(colnames(indata)[y]) + xlab(colnames(indata)[x]) + labs(title = fig_text)+
    theme(axis.text  = element_text(size=14,color="black"), 
          legend.position="right", axis.title = element_text(size=14),legend.text=element_text(size=14))
 print(p)
 
}


normalise2control<-function(dataset,control_group,cohort_group){
  cotrol_pos<-match(cohort_group,colnames(dataset))
  #estimate the mean in the data column, for the chosen group
  controlmeans<-mean(dataset[dataset[,cotrol_pos]==control_group,1])
  #subtract mean from entire data column 
  dataset[,1]<-dataset[,1]-controlmeans

  return(dataset) 
}

run_limma<-function(dat,genesetnames,comparison_col){
  
  if(length(unique(dat[[comparison_col]]))==1) return()
  
  exprs <-as.matrix(t(dat[names(dat) %in% genesetnames]))
  pData <- dat[comparison_col]
  pData[,1]<-factor(pData[,1])
  phenoData <- new("AnnotatedDataFrame",data=pData)
  myexprset <- ExpressionSet(assayData=exprs,phenoData=phenoData)
  
  design <- model.matrix(~ 0+pData(myexprset)[,1]) #+pData(exprset)$Demographic.Data.Age+pData(exprset)$DemographicData.Sex)
  colnames(design) <- c(levels(pData(myexprset)[,1]))

  comparisons<-combn(colnames(design),2)
  comparisons<-paste(comparisons[1,]," - ",comparisons[2,],sep="")
  contrast.matrix <- makeContrasts(contrasts=comparisons,levels=design)
  fit= lmFit(myexprset, design)
  fit2 <- contrasts.fit(fit, contrast.matrix)
  fit2<- eBayes(fit2)
  results <- decideTests(fit2,adjust.method="BH")
  restable<-as.data.frame(colSums(abs(results)))
  
  
  res<-topTable(fit2, coef=1, adjust="BH", n=Inf)[,c('logFC','P.Value','adj.P.Val')]
  colnames(res)[1]<-"Diff"
  colnames(res)<-paste(rownames(restable)[1],colnames(res))
 
  #limma does not return row name for a single signature, so need's to be checked and added
  if (length(genesetnames)==1) rownames(res)<-genesetnames
  res<-res[match(genesetnames,rownames(res)),]
  
  if(length(comparisons)>1){
  for (i in 2:length(comparisons)){
    res2<-topTable(fit2, coef=i, adjust="BH", n=Inf)[,c('logFC','P.Value','adj.P.Val')]
    colnames(res2)[1]<-"Diff"
    colnames(res2)<-paste(rownames(restable)[i],colnames(res2))
    if (length(genesetnames)==1) rownames(res2)<-genesetnames
    res2<-res2[match(rownames(res),rownames(res2)),]
    res<-cbind(res,res2)
    }
  }
  
  # data.frame(t(res))
  res
  
}

