---
title: "Data loading, merging and annotation
"
output: html_document
---

```{r}
library(Seurat)
library(cowplot)
library(patchwork)
library(tidyverse)
library(ggpubr)
library(ggplot2)
library(pheatmap)
library(RColorBrewer)
library(EnhancedVolcano)
library(viridis)
library(dplyr)
library(scales)
```
#First part of the script (QC and cell type specific marker expression) used also in the manuscript Chemla et al., 2023. There the same datasets are used for cell type identification but only day 30 samples filtered out for further analysis
#In the manuscript Zagare et al., 2024 day 30 and day 60 data are used

########################### TO LOAD NORMALIZED, SCALED DATASET, GO TO LINE ~500 ##########################################

##Data load and filtering
#D30 Samples
```{r}

#setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise')
#######################################################################################################################

dataWTD30 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131001/C230131001_scR_filtered_feature_bc_matrix'
list.files(dataWTD30) # Should show barcodes.tsv, genes.tsv, and matrix.mtx
expression_matrix <- Read10X(data.dir =dataWTD30)
objWTD30 = CreateSeuratObject(counts = expression_matrix)

objWTD30[["percent.mt"]] <- PercentageFeatureSet(objWTD30, pattern = "^MT-")

a1=VlnPlot(objWTD30, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
b1 = FeatureScatter(objWTD30, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(a1)

objWTD30

pdf("UnfilteredWTD30.pdf")

print(a1)
print(b1)

dev.off()

##data quality good


objWTD30F <- subset(objWTD30, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 10)

c1=VlnPlot(objWTD30F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
d1 = FeatureScatter(objWTD30F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(c1)

objWTD30F

pdf("FilteredWTD30.pdf")

print(c1)
print(d1)

dev.off()

######################################################################################################################


dataPDD30 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131003/C230131003_scR_filtered_feature_bc_matrix'
list.files(dataPDD30) # Should show barcodes.tsv, genes.tsv, and matrix.mtx
expression_matrix <- Read10X(data.dir =dataPDD30)
objPDD30 = CreateSeuratObject(counts = expression_matrix)

objPDD30[["percent.mt"]] <- PercentageFeatureSet(objPDD30, pattern = "^MT-")

a2=VlnPlot(objPDD30, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
b2 = FeatureScatter(objPDD30, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(a2)

objPDD30

pdf("UnfilteredPDD30.pdf")

print(a2)
print(b2)

dev.off()

##data quality good


objPDD30F <- subset(objPDD30, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 15)

c2=VlnPlot(objPDD30F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
d2 = FeatureScatter(objPDD30F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(c2)

objPDD30F

pdf("FilteredPDD30.pdf")

print(c2)
print(d2)

dev.off()

#########################################################################################################################

dataGCD30 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131004/C230131004_scR_filtered_feature_bc_matrix'
list.files(dataGCD30) # Should show barcodes.tsv, genes.tsv, and matrix.mtx
expression_matrix <- Read10X(data.dir =dataGCD30)
objGCD30 = CreateSeuratObject(counts = expression_matrix)

objGCD30[["percent.mt"]] <- PercentageFeatureSet(objGCD30, pattern = "^MT-")

a3=VlnPlot(objGCD30, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
b3 = FeatureScatter(objGCD30, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(a3)

objGCD30

pdf("UnfilteredGCD30.pdf")

print(a3)
print(b3)

dev.off()

##data quality good


objGCD30F <- subset(objGCD30, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 10)

c3=VlnPlot(objGCD30F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
d3 = FeatureScatter(objGCD30F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(c3)

objGCD30F

pdf("FilteredGCD30.pdf")

print(c3)
print(d3)

dev.off()

```
#D60 Samples
```{r}

#setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise/Plots_QC')
#######################################################################################################################

dataWTD60 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131002/C230131002_scR_filtered_feature_bc_matrix'
list.files(dataWTD60) # Should show barcodes.tsv, genes.tsv, and matrix.mtx
expression_matrix <- Read10X(data.dir =dataWTD60)
objWTD60 = CreateSeuratObject(counts = expression_matrix)

objWTD60[["percent.mt"]] <- PercentageFeatureSet(objWTD60, pattern = "^MT-")

a4=VlnPlot(objWTD60, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
b4 = FeatureScatter(objWTD60, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(a4)

objWTD60

pdf("UnfilteredWTD60.pdf")

print(a4)
print(b4)

dev.off()

##data quality good


objWTD60F <- subset(objWTD60, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 10)

c4=VlnPlot(objWTD60F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
d4 = FeatureScatter(objWTD60F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(c4)

objWTD60F

pdf("FilteredWTD60.pdf")

print(c4)
print(d4)

dev.off()

######################################################################################################################


dataPDD60 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131005/C230131005_scR_filtered_feature_bc_matrix'
list.files(dataPDD60) # Should show barcodes.tsv, genes.tsv, and matrix.mtx
expression_matrix <- Read10X(data.dir =dataPDD60)
objPDD60 = CreateSeuratObject(counts = expression_matrix)

objPDD60[["percent.mt"]] <- PercentageFeatureSet(objPDD60, pattern = "^MT-")

a5=VlnPlot(objPDD60, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
b5 = FeatureScatter(objPDD60, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(a5)

objPDD60

pdf("UnfilteredPDD60.pdf")

print(a5)
print(b5)

dev.off()

##data quality good


objPDD60F <- subset(objPDD60, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 20)

c5=VlnPlot(objPDD60F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
d5 = FeatureScatter(objPDD60F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(c5)

objPDD60F

pdf("FilteredPDD60.pdf")

print(c5)
print(d5)

dev.off()

#########################################################################################################################

dataGCD60 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131006/C230131006_scR_filtered_feature_bc_matrix'
list.files(dataGCD60) # Should show barcodes.tsv, genes.tsv, and matrix.mtx
expression_matrix <- Read10X(data.dir =dataGCD60)
objGCD60 = CreateSeuratObject(counts = expression_matrix)

objGCD60[["percent.mt"]] <- PercentageFeatureSet(objGCD60, pattern = "^MT-")

a6=VlnPlot(objGCD60, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
b6 = FeatureScatter(objGCD60, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(a6)

objGCD60

pdf("UnfilteredGCD60.pdf")

print(a6)
print(b6)

dev.off()

##data quality good


objGCD60F <- subset(objGCD60, subset = nFeature_RNA > 300 & nFeature_RNA < 7000 & percent.mt < 15)

c6=VlnPlot(objGCD60F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
d6 = FeatureScatter(objGCD60F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
print(c6)

objGCD60F

pdf("FilteredGCD60.pdf")

print(c6)
print(d6)

dev.off()

```
#Merge datasets
```{r}
combined.all <- merge(x = objWTD30F, y = list(objGCD30F, objPDD30F, objWTD60, objGCD60F, objPDD60F), add.cell.ids = c("WT_D30","GC_D30","PD_D30","WT_D60","GC_D60","PD_D60"), merge.data = TRUE, project = "combined.all")

combined.all
```

```{r}

groups <- sample(c(""), size = 26134, replace = TRUE)
names(groups) <- colnames(combined.all)
combined.all <- AddMetaData(object = combined.all, metadata = groups, col.name = "Genotype")

groups <- sample(c(""), size = 26134, replace = TRUE)
names(groups) <- colnames(combined.all)
combined.all <- AddMetaData(object = combined.all, metadata = groups, col.name = "Day")


WT_D30 <- grep("WT_D30", colnames(combined.all)) #1:6605
GC_D30 <- grep("GC_D30", colnames(combined.all)) #6606:12066
PD_D30 <- grep("PD_D30", colnames(combined.all)) #12067:17039
WT_D60 <- grep("WT_D60", colnames(combined.all)) #17040:21422
GC_D60 <- grep("GC_D60", colnames(combined.all)) #21423:24210
PD_D60 <- grep("PD_D60", colnames(combined.all)) #24211:26134

combined.all$Genotype[1:6605] <- "WT_D30"
combined.all$Genotype[6606:12066] <- "GC_D30"
combined.all$Genotype[12067:17039] <- "PD_D30"
combined.all$Genotype[17040:21422] <- "WT_D60"
combined.all$Genotype[21423:24210] <- "GC_D60"
combined.all$Genotype[24211:26134] <- "PD_D60"

combined.all$Day[1:6605] <- "D30"
combined.all$Day[6606:12066] <- "D30"
combined.all$Day[12067:17039] <- "D30"
combined.all$Day[17040:21422] <- "D60"
combined.all$Day[21423:24210] <- "D60"
combined.all$Day[24211:26134] <- "D60"
```

```{r}
ifnb.list <- SplitObject(combined.all, split.by = "Genotype")

ifnb.list <- lapply(X = ifnb.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)})
```

```{r}
anchors <- FindIntegrationAnchors(object.list = ifnb.list, dims = 1:20)
combined.all <- IntegrateData(anchorset = anchors, dims = 1:20)
```


```{r}
#setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise')
#saveRDS(combined.all, file="combined.all.rds")

```

```{r}
setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise/Integrated')
combined.all<-readRDS("combined.all_integrated.rds")

```
```{r}
DefaultAssay(combined.all) <- "integrated"

# Run the standard workflow for visualization and clustering
combined.all <- ScaleData(combined.all, verbose = FALSE)
combined.all <- RunPCA(combined.all, npcs = 20, verbose = FALSE)
p=ElbowPlot(combined.all)
# Clustering
combined.all <- RunUMAP(combined.all, reduction = "pca", dims = 1:20)
combined.all <- FindNeighbors(combined.all, reduction = "pca", dims = 1:20)
combined.all <- FindClusters(combined.all, resolution = 0.15)

```


# Visualization
```{r}
p1 <- DimPlot(combined.all, reduction = "umap", label = TRUE, pt.size = 1.5)
p2 <- DimPlot(combined.all, reduction = "umap", group.by = "Genotype")
p3 <- DimPlot(combined.all, reduction = "umap", group.by = "Day")
p4 <-DimPlot(combined.all, reduction = "umap", split.by = "Genotype")

print(p1)
print(p2)
print(p3)
print(p4)

pdf("umap4.pdf")
print(p1)
print(p2)
print(p3)
print(p4)
dev.off()

```

```{r}
setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise')
#saveRDS(combined.all, file="combined.all_norm_PCA.rds")

```

#cluster markers
```{r}

all.markers <- FindAllMarkers(combined.all, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)

#write.csv(all.markers, "Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise/Integrated/cell_cluster_markers.csv")
```


```{r}

Idents(combined.all) = combined.all$seurat_clusters
combined.all2 <-  RenameIdents(combined.all,  `0` = "GABAergic neurons", `1` = "Neurons", `2` = "Neural progenitors", `3` = "Dopaminergic neurons", `4` = "Astrocyte-like glia progenitors", `5` =  "TH-high Dopaminergic neurons", `6`="Oligodendrocyte-like glia progenitors")

combined.all2$CELL_ID <-Idents(combined.all2)
Idents(combined.all2) = combined.all2$CELL_ID

# Visualization
p2 <- DimPlot(combined.all2, reduction = "umap", label = TRUE)
p2

# Visualization
p <- DimPlot(combined.all2, reduction = "umap", split.by="Genotype",label =TRUE, pt.size = 1.5)


p0 <- DimPlot(combined.all2, reduction = "umap", label = TRUE, pt.size = 1.5 )
p4 <- DimPlot(combined.all2, reduction = "umap", group.by = "Day",label = TRUE)
p1 <- DimPlot(combined.all2, reduction = "umap", group.by = "Genotype",pt.size = 1.5)
p5 <- DimPlot(combined.all2, reduction = "umap", split.by = "Genotype",pt.size = 1.5)
p3 <- DimPlot(combined.all2, reduction = "umap", split.by = "Day",pt.size = 1.5)


```

###Markers Expression for each cluster
```{r}
DefaultAssay(combined.all2) <- "RNA"

s1<-DotPlot(combined.all2, features = c("PLK2","ATF3","NLGN1","XBP1","NEAT1","CALCB","HMGB2","RIMS2","ATF5"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=12, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank())

print(s1)

pdf("Neural_progenitor markers.pdf") #Dividing and metabolic shift undergoing cells
print(s1)
dev.off()
```


```{r}
DefaultAssay(combined.all2) <- "RNA"

s2<-DotPlot(combined.all2, features = c("TH","SLC6A3", "LMX1A", "LMX1B", "KCNJ6", "PITX3", "CALB2","NR4A2","SLC18A2", "SLC18A3", "AGTR1","NEUROD6","NDUFA4L2"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=12, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank())

print(s2)

pdf("DA_markers.pdf")
print(s2)
dev.off()
```

```{r}
DefaultAssay(combined.all2) <- "RNA"
s3<-DotPlot(combined.all2, features = c("MAP2", "MAPT","GAP43","EGFR","NTNG1","DLX2","SYT1","NCAM1","NRXN1", "NRXN2", "NRXN3","NLGN1", "NLGN2", "NLGN3"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=12, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank())

print(s3)

pdf("Mature_Neuron_markers.pdf")
print(s3)
dev.off()
```


```{r}
DefaultAssay(combined.all2) <- "RNA"
s4<-DotPlot(combined.all2, features = c("GAD1", "GAD2", "PAX2", "SLC6A1", "GABBR2","CHAT","ACHE","SLC17A7","SLC17A6","SLC17A8","SLC1A1","FEV","TPH1"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=12, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank())

print(s4)

pdf("Neuron_subtype_markers.pdf") #other than dopaminergic
print(s4)
dev.off()

```

```{r}
DefaultAssay(combined.all2) <- "RNA"
s5<-DotPlot(combined.all2, features = c("GFAP", "S100B", "ALDH1L1", "SLC1A2","AQP4","SOX9","SOX6",	"VIM", "NES", "PAX6", "HES1", "HES5", "HES6", "OCLN"), cols = c("lightgrey", "cadetblue"),dot.scale = 12) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank())

print(s5)

pdf("Glia_progenitor_markers.pdf")
print(s5)
dev.off()

```
```{r}
DefaultAssay(combined.all2) <- "RNA"
s6<-DotPlot(combined.all2, features = c("OLIG1", "OLIG2", "MOG", "SOX10","PDGFRA","CSPG4","NKX6-2"), cols = c("lightgrey", "cadetblue"),dot.scale = 12) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank())

print(s6)

pdf("Oligodendrocyte_progenitor_markers.pdf")
print(s6)
dev.off()
```

#Manuscript Zagare et al., 2024
```{r}
setwd('Z:/16-Our_Papers/In Preparation/MetabolicModeling-Miro1_Claudia&Alise/Figures/Figure 2/Originals')
#saveRDS(combined.all2, file="combined.all_integrated_annotated.rds")
combined.all2<-readRDS("combined.all_integrated_annotated.rds")
```

#Prepare plots for manuscript (Figure 2)
```{r}
Idents(combined.all2) = combined.all2$CELL_ID
combined.all2$Genotype<-factor(combined.all2$Genotype, levels = c("WT_D30","WT_D60","PD_D30","PD_D60","GC_D30","GC_D60"))

sf2a <- DimPlot(combined.all2, reduction = "umap", split.by = "Genotype",pt.size = 1,  cols = c("#CC79A7", "#66CC99", "lightskyblue","#9999CC", "#E69F00","coral3","black"))  


Idents(combined.all2) = combined.all2$Genotype
WTPDobj <- subset(combined.all2, idents = c("WT_D30","WT_D60","PD_D30","PD_D60"))

Idents(WTPDobj) = WTPDobj$CELL_ID
s2a <- DimPlot(WTPDobj, reduction = "umap", split.by = "Genotype",pt.size = 1,  cols = c("#CC79A7", "#66CC99", "lightskyblue","#9999CC", "#E69F00","coral3","black")) 


f2b <- DimPlot(WTPDobj, reduction = "umap", label = TRUE ,pt.size = 1,  cols = c("#CC79A7", "#66CC99", "lightskyblue","#9999CC", "#E69F00","coral3","black")) + theme(legend.position = "none")

print(sf2a)
print(s2a)
print(f2b)
```

#Subset object to analyse only dopaminergic neuron clusters
```{r}
Idents(WTPDobj) = WTPDobj$CELL_ID
levels(WTPDobj)
DNobj <- subset(WTPDobj, idents = c("Dopaminergic neurons","TH-high Dopaminergic neurons"))
levels(DNobj)
```
#Feature plot of TH in dopaminergic neurons
```{r}
DNobj$Genotype<-factor(DNobj$Genotype, levels = c("WT_D30","WT_D60","PD_D30","PD_D60"))
TH<-FeaturePlot(DNobj, features = "TH", cols = c("lightgrey", "red"), split.by = "Genotype" , pt.size = 1.5)

print(TH)
```

#Heatmap of main cell type markers
```{r}
Idents(WTPDobj) = WTPDobj$CELL_ID

data2<-subset(WTPDobj, features= c("XBP1","ATF5","HMGB2","SOX9","VIM","HES5","HES6","GFAP","S100B","AQP4","SOX10","PDGFRA","CSSPG4","MAP2","MAPT","SYT1","GAD1","GAD2","PAX2","LMX1B","TH","SLC6A3","KCNJ6","NR4A2","EN2","SOX6","AGTR1","CALB1"))


genes_expr <- AverageExpression(object=data2)$RNA

a=as.matrix(genes_expr)

Z_Normalize <- function(x){return((x-mean(x))/(sd(x)))}
  
NormalisedData_Zscore <- t(apply(as.matrix(genes_expr),1,Z_Normalize))

library(viridis)

ph<-pheatmap(t(NormalisedData_Zscore), color =  inferno (10), show_rownames = T,cluster_cols = FALSE, angle_col = 90,  fontsize = 10, cellwidth = 12, cellheight = 10)


print(ph)
```