######################################
####DE scRNAseq data COVID-19 vs. CTRL
######################################
#R4.0.5
setwd("/Input")

library(hdf5r)
library(Seurat)
library(dplyr)
library(patchwork)
library(tidyverse)
library(Matrix)
library(RCurl)
library(scales)
library(cowplot)
library(AnnotationHub)
library(ensembldb)
library(readr)

niarakis <- readRDS("niarakis_combined_github.rds")
Idents(niarakis) <- "celltype"

# columns for binary phenotypes (CTRL vs. COVID-19)
niarakis$pheno <- niarakis$stim
library(plyr)
count(niarakis$pheno)

niarakis$pheno <- gsub("Severe COVID-19", "COVID-19", niarakis$pheno)
niarakis$pheno <- gsub("Mild COVID-19", "COVID-19", niarakis$pheno)
niarakis$pheno <- gsub("Health", "CTRL", niarakis$pheno)

Idents(niarakis) <- "celltype"
count(niarakis$celltype)

# Selected only celltypes involved directly in the SARS-CoV-2 infection
# Reported a significant comparison in cell groups, only cell type balanced for sample size were used for DE.
 
cil.cells <- subset(niarakis, idents = "Ciliated Cells")
secr.cells <- subset(niarakis, idents = "Secretory cells")
sup.cells <- subset(niarakis, idents = "Suprabasal cells")
bas.cells <- subset(niarakis, idents = "Basal cells")
at1.cells <- subset(niarakis, idents = "Alveolar cells type 1")


dir.create("/Epitelial_cell_type")
setwd("/Epitelial_cell_type")




#Ciliated
cil.cells_meta <-  cil.cells@meta.data
write.table(cil.cells_meta, file="cil_meta.txt")
meta.cil<-read.table("cil_meta.txt")
cil.cells<-AddMetaData(cil.cells, meta.cil)
cil_size <- count(cil.cells$pheno)
write.table(cil_size, file="cil_size.txt")
saveRDS(cil.cells, file="cil.cells_scC19DM_res.rds")

#Secretory
secr.cells_meta <-  secr.cells@meta.data
write.table(secr.cells_meta, file="secr_meta.txt")
meta.secr<-read.table("secr_meta.txt")
secr.cells<-AddMetaData(secr.cells, meta.secr)
secr_size <- count(secr.cells$pheno)
write.table(secr_size, file="secr_size.txt")
saveRDS(secr.cells, file="secr.cells_scC19DM_res.rds")

#Suprabasal
sup.cells_meta <-  sup.cells@meta.data
write.table(sup.cells_meta, file="sup_meta.txt")
meta.sup<-read.table("sup_meta.txt")
sup.cells<-AddMetaData(sup.cells, meta.sup)
sup_size <-count(sup.cells$pheno)
write.table(sup_size, file="sup_size.txt")
saveRDS(sup.cells, file="sup.cells_scC19DM_res.rds")

#Basal
bas.cells_meta <-  bas.cells@meta.data
write.table(bas.cells_meta, file="bas_meta.txt")
meta.bas<-read.table("bas_meta.txt")
bas.cells<-AddMetaData(bas.cells, meta.bas)
bas_size <-count(bas.cells$pheno)
write.table(bas_size, file="bas_size.txt")
saveRDS(bas.cells, file="bas.cells_scC19DM_res.rds")

#AT1
at1.cells_meta <- at1.cells@meta.data
write.table(at1.cells_meta, file="at1_meta.txt")
meta.at1<-read.table("at1_meta.txt")
at1.cells<-AddMetaData(at1.cells, meta.at1)
at1_size <-count(at1.cells$pheno)
write.table(at1_size, file="at1_size.txt")
saveRDS(at1.cells, file="at1.cells_scC19DM.rds")



setwd("../../.")

dir.create("Results")
setwd("Results")



cellDE<-function(i, j){
	cells.dir <- paste0("./", j)
	dir.create(cells.dir)
	setwd(cells.dir)
	library(ggplot2)
	DefaultAssay(i) <- "RNA"
	Idents(i) <- "pheno"
	patients.response <- FindMarkers(i, ident.1 = "COVID-19", ident.2 = "CTRL", verbose = T, min.diff.pct= 0.25)
	de<-patients.response
	de$diffexpressed<- "NO"
	de$diffexpressed[de$p_val_adj < 0.05 & de$avg_log2FC > 1] <- "COVID-19"
	de$diffexpressed[de$p_val_adj < 0.05 & de$avg_log2FC < -1] <- "CTRL"
	#File with DE data.
	de_1.5 <- paste0("./", j, "_covid-19_vs.ctrl_1_5%.txt")
	write.table(de, file=de_1.5)
	p<-ggplot(data=de, aes(x=avg_log2FC, y=-log10(p_val_adj), col=diffexpressed)) + geom_point(size = 10) + theme_minimal() + theme_grey(base_size = 22)
	p2 <- p + geom_vline(xintercept=c(-1, 1), col="red") + geom_hline(yintercept=-log10(0.05), col="red")
	p3 <- p2 + scale_color_manual(values=c("blue", "black", "red"))	
	pdf(file='vulcanoplot_covid_vs_ctrl.pdf', width=25, height=25)
	print(p3)
	dev.off()
	de_C19DM <- de[de$diffexpressed != "NO",]
	de_C19DM$p_val_adj <- NULL
	de_C19DM$p_val <- NULL
	de_C19DM$pct.1 <- NULL
	de_C19DM$pct.2 <- NULL
	de_C19DM$diffexpressed <- NULL
	de_C19DM$name <- rownames(de_C19DM)
	de_C19DM$value <-  de_C19DM$avg_log2FC/max(abs(de_C19DM$avg_log2FC))
	de_C19DM$avg_log2FC <- NULL
	rownames(de_C19DM) <- NULL
	#Results as input file for C19DM overlays.
	de_covid <- paste0("./", j, "_covid-19_vs.ctrl_overlay.txt")
	write.table(de_C19DM, file=de_covid)
	setwd("../.")
}
cellDE(bas.cells, c("bas.cells_res"))
cellDE(cil.cells, c("cil.cells_res"))
cellDE(secr.cells, c("secr.cells_res"))
cellDE(sup.cells, c("sup.cells_res"))
cellDE(at1.cells, c("at1.cells"))
