######################################
####DE scRNAseq data in Macrophages in Severe, Mild COVID-19 and Healthy CTRL 
######################################
#R4.0.5
setwd("/Input")

library(hdf5r)
library(Seurat)
library(dplyr)
library(patchwork)
library(tidyverse)
library(Matrix)
library(RCurl)
library(scales)
library(cowplot)
library(AnnotationHub)
library(ensembldb)
library(readr)

niarakis <- readRDS("niarakis_combined_github.rds")

# columns for binary phenotypes (CTRL vs. COVID-19)
niarakis$pheno <- niarakis$stim
library(plyr)
count(niarakis$pheno)

niarakis$pheno <- gsub("Severe COVID-19", "COVID-19", niarakis$pheno)
niarakis$pheno <- gsub("Mild COVID-19", "COVID-19", niarakis$pheno)
niarakis$pheno <- gsub("Health", "CTRL", niarakis$pheno)

niarakis$pheno2 <- niarakis$stim
niarakis$pheno2 <- gsub("CTRL", "Health", niarakis$pheno2)
count(niarakis$pheno2)


# Selected only celltypes involved directly in the SARS-CoV-2 infection
# Reported a significant comparison in cell groups, only cell type balanced for sample size were used for DE.

dir.create("/Macro_cell_type")
setwd("/Macro_cell_type")

Idents(niarakis) <- "celltype"

macro.cells <- subset(niarakis, idents = "Macrophages")






#Macrophages
macro.cells_meta <- macro.cells@meta.data
dir<-paste0("macro_meta.txt")
write.table(macro.cells_meta, file=dir)
meta.macro<-read.table(dir)
macro.cells<-AddMetaData(macro.cells, meta.macro)
macro_size <-count(macro.cells$pheno)
dir2<-paste0("macro_size.txt")
write.table(macro_size, file=dir2)
dir3<-paste0("macro_scC19DM.rds")
saveRDS(macro.cells, file=dir3)


Idents(macro.cells) <- "celltype"
count(macro.cells$pheno2)



setwd("../.")

dir.create("Results")
setwd("Results")



cellDE<-function(i, j, k, h){
	cells.dir <- paste0("./", j)
	dir.create(cells.dir)
	setwd(cells.dir)
	library(ggplot2)
	DefaultAssay(i) <- "RNA"
#	Idents(i) <- "pheno"
	Idents(i) <- "pheno2"
#	patients.response <- FindMarkers(i, ident.1 = "COVID-19", ident.2 = "CTRL", verbose = T, min.diff.pct= 0.25)
	patients.response <- FindMarkers(i, ident.1 = k , ident.2 = h, verbose = T, min.diff.pct= 0.25)
	de<-patients.response
	de$diffexpressed<- "NO"
	de$diffexpressed[de$p_val_adj < 0.05 & de$avg_log2FC > 1] <- k
	de$diffexpressed[de$p_val_adj < 0.05 & de$avg_log2FC < -1] <- h
	#File with DE data
	de_1.5 <- paste0("./", j, "_covid-19_vs.ctrl_1_5%.txt")
	write.table(de, file=de_1.5)
	p<-ggplot(data=de, aes(x=avg_log2FC, y=-log10(p_val_adj), col=diffexpressed)) + geom_point(size = 10) + theme_minimal() + theme_grey(base_size = 22)
	p2 <- p + geom_vline(xintercept=c(-1, 1), col="red") + geom_hline(yintercept=-log10(0.05), col="red")
	p3 <- p2 + scale_color_manual(values=c("blue", "black", "red"))	
	pdf(file='vulcanoplot_covid_vs_ctrl.pdf', width=25, height=25)
	print(p3)
	dev.off()
	de_C19DM <- de[de$diffexpressed != "NO",]
	de_C19DM$p_val_adj <- NULL
	de_C19DM$p_val <- NULL
	de_C19DM$pct.1 <- NULL
	de_C19DM$pct.2 <- NULL
	de_C19DM$diffexpressed <- NULL
	de_C19DM$name <- rownames(de_C19DM)
	de_C19DM$value <-  de_C19DM$avg_log2FC/max(abs(de_C19DM$avg_log2FC))
	de_C19DM$avg_log2FC <- NULL
	rownames(de_C19DM) <- NULL
	#Results as input file for C19DM overlays.
	de_covid <- paste0("./", j, "_covid-19_vs.ctrl_overlay.txt")
	write.table(de_C19DM, file=de_covid)
	setwd("../.")
}

cellDE(macro.cells, c("Macro_severe_vs._health"), c("Severe COVID-19"), c("Health"))
cellDE(macro.cells, c("Macro_severe_vs._mild"), c("Severe COVID-19"), c("Mild COVID-19"))
cellDE(macro.cells, c("Macro_mild_vs._health"), c("Mild COVID-19"), c("Health"))