--- title: "Data loading, merging and annotation " output: html_document --- ```{r} library(Seurat) library(cowplot) library(patchwork) library(tidyverse) library(ggpubr) library(ggplot2) library(pheatmap) library(RColorBrewer) library(EnhancedVolcano) library(viridis) library(dplyr) library(scales) ``` #First part of the script (QC and cell type specific marker expression) used also in the manuscript Chemla et al., 2023. There the same datasets are used for cell type identification but only day 30 samples filtered out for further analysis #In the manuscript Zagare et al., 2024 day 30 and day 60 data are used ########################### TO LOAD NORMALIZED, SCALED DATASET, GO TO LINE ~500 ########################################## ##Data load and filtering #D30 Samples ```{r} #setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise') ####################################################################################################################### dataWTD30 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131001/C230131001_scR_filtered_feature_bc_matrix' list.files(dataWTD30) # Should show barcodes.tsv, genes.tsv, and matrix.mtx expression_matrix <- Read10X(data.dir =dataWTD30) objWTD30 = CreateSeuratObject(counts = expression_matrix) objWTD30[["percent.mt"]] <- PercentageFeatureSet(objWTD30, pattern = "^MT-") a1=VlnPlot(objWTD30, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) b1 = FeatureScatter(objWTD30, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(a1) objWTD30 pdf("UnfilteredWTD30.pdf") print(a1) print(b1) dev.off() ##data quality good objWTD30F <- subset(objWTD30, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 10) c1=VlnPlot(objWTD30F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) d1 = FeatureScatter(objWTD30F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(c1) objWTD30F pdf("FilteredWTD30.pdf") print(c1) print(d1) dev.off() ###################################################################################################################### dataPDD30 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131003/C230131003_scR_filtered_feature_bc_matrix' list.files(dataPDD30) # Should show barcodes.tsv, genes.tsv, and matrix.mtx expression_matrix <- Read10X(data.dir =dataPDD30) objPDD30 = CreateSeuratObject(counts = expression_matrix) objPDD30[["percent.mt"]] <- PercentageFeatureSet(objPDD30, pattern = "^MT-") a2=VlnPlot(objPDD30, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) b2 = FeatureScatter(objPDD30, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(a2) objPDD30 pdf("UnfilteredPDD30.pdf") print(a2) print(b2) dev.off() ##data quality good objPDD30F <- subset(objPDD30, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 15) c2=VlnPlot(objPDD30F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) d2 = FeatureScatter(objPDD30F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(c2) objPDD30F pdf("FilteredPDD30.pdf") print(c2) print(d2) dev.off() ######################################################################################################################### dataGCD30 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131004/C230131004_scR_filtered_feature_bc_matrix' list.files(dataGCD30) # Should show barcodes.tsv, genes.tsv, and matrix.mtx expression_matrix <- Read10X(data.dir =dataGCD30) objGCD30 = CreateSeuratObject(counts = expression_matrix) objGCD30[["percent.mt"]] <- PercentageFeatureSet(objGCD30, pattern = "^MT-") a3=VlnPlot(objGCD30, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) b3 = FeatureScatter(objGCD30, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(a3) objGCD30 pdf("UnfilteredGCD30.pdf") print(a3) print(b3) dev.off() ##data quality good objGCD30F <- subset(objGCD30, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 10) c3=VlnPlot(objGCD30F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) d3 = FeatureScatter(objGCD30F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(c3) objGCD30F pdf("FilteredGCD30.pdf") print(c3) print(d3) dev.off() ``` #D60 Samples ```{r} #setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise/Plots_QC') ####################################################################################################################### dataWTD60 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131002/C230131002_scR_filtered_feature_bc_matrix' list.files(dataWTD60) # Should show barcodes.tsv, genes.tsv, and matrix.mtx expression_matrix <- Read10X(data.dir =dataWTD60) objWTD60 = CreateSeuratObject(counts = expression_matrix) objWTD60[["percent.mt"]] <- PercentageFeatureSet(objWTD60, pattern = "^MT-") a4=VlnPlot(objWTD60, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) b4 = FeatureScatter(objWTD60, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(a4) objWTD60 pdf("UnfilteredWTD60.pdf") print(a4) print(b4) dev.off() ##data quality good objWTD60F <- subset(objWTD60, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 10) c4=VlnPlot(objWTD60F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) d4 = FeatureScatter(objWTD60F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(c4) objWTD60F pdf("FilteredWTD60.pdf") print(c4) print(d4) dev.off() ###################################################################################################################### dataPDD60 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131005/C230131005_scR_filtered_feature_bc_matrix' list.files(dataPDD60) # Should show barcodes.tsv, genes.tsv, and matrix.mtx expression_matrix <- Read10X(data.dir =dataPDD60) objPDD60 = CreateSeuratObject(counts = expression_matrix) objPDD60[["percent.mt"]] <- PercentageFeatureSet(objPDD60, pattern = "^MT-") a5=VlnPlot(objPDD60, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) b5 = FeatureScatter(objPDD60, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(a5) objPDD60 pdf("UnfilteredPDD60.pdf") print(a5) print(b5) dev.off() ##data quality good objPDD60F <- subset(objPDD60, subset = nFeature_RNA > 300 & nFeature_RNA < 6000 & percent.mt < 20) c5=VlnPlot(objPDD60F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) d5 = FeatureScatter(objPDD60F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(c5) objPDD60F pdf("FilteredPDD60.pdf") print(c5) print(d5) dev.off() ######################################################################################################################### dataGCD60 <- 'Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/C230131006/C230131006_scR_filtered_feature_bc_matrix' list.files(dataGCD60) # Should show barcodes.tsv, genes.tsv, and matrix.mtx expression_matrix <- Read10X(data.dir =dataGCD60) objGCD60 = CreateSeuratObject(counts = expression_matrix) objGCD60[["percent.mt"]] <- PercentageFeatureSet(objGCD60, pattern = "^MT-") a6=VlnPlot(objGCD60, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) b6 = FeatureScatter(objGCD60, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(a6) objGCD60 pdf("UnfilteredGCD60.pdf") print(a6) print(b6) dev.off() ##data quality good objGCD60F <- subset(objGCD60, subset = nFeature_RNA > 300 & nFeature_RNA < 7000 & percent.mt < 15) c6=VlnPlot(objGCD60F, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) d6 = FeatureScatter(objGCD60F, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") print(c6) objGCD60F pdf("FilteredGCD60.pdf") print(c6) print(d6) dev.off() ``` #Merge datasets ```{r} combined.all <- merge(x = objWTD30F, y = list(objGCD30F, objPDD30F, objWTD60, objGCD60F, objPDD60F), add.cell.ids = c("WT_D30","GC_D30","PD_D30","WT_D60","GC_D60","PD_D60"), merge.data = TRUE, project = "combined.all") combined.all ``` ```{r} groups <- sample(c(""), size = 26134, replace = TRUE) names(groups) <- colnames(combined.all) combined.all <- AddMetaData(object = combined.all, metadata = groups, col.name = "Genotype") groups <- sample(c(""), size = 26134, replace = TRUE) names(groups) <- colnames(combined.all) combined.all <- AddMetaData(object = combined.all, metadata = groups, col.name = "Day") WT_D30 <- grep("WT_D30", colnames(combined.all)) #1:6605 GC_D30 <- grep("GC_D30", colnames(combined.all)) #6606:12066 PD_D30 <- grep("PD_D30", colnames(combined.all)) #12067:17039 WT_D60 <- grep("WT_D60", colnames(combined.all)) #17040:21422 GC_D60 <- grep("GC_D60", colnames(combined.all)) #21423:24210 PD_D60 <- grep("PD_D60", colnames(combined.all)) #24211:26134 combined.all$Genotype[1:6605] <- "WT_D30" combined.all$Genotype[6606:12066] <- "GC_D30" combined.all$Genotype[12067:17039] <- "PD_D30" combined.all$Genotype[17040:21422] <- "WT_D60" combined.all$Genotype[21423:24210] <- "GC_D60" combined.all$Genotype[24211:26134] <- "PD_D60" combined.all$Day[1:6605] <- "D30" combined.all$Day[6606:12066] <- "D30" combined.all$Day[12067:17039] <- "D30" combined.all$Day[17040:21422] <- "D60" combined.all$Day[21423:24210] <- "D60" combined.all$Day[24211:26134] <- "D60" ``` ```{r} ifnb.list <- SplitObject(combined.all, split.by = "Genotype") ifnb.list <- lapply(X = ifnb.list, FUN = function(x) { x <- NormalizeData(x) x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)}) ``` ```{r} anchors <- FindIntegrationAnchors(object.list = ifnb.list, dims = 1:20) combined.all <- IntegrateData(anchorset = anchors, dims = 1:20) ``` ```{r} #setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise') #saveRDS(combined.all, file="combined.all.rds") ``` ```{r} setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise/Integrated') combined.all<-readRDS("combined.all_integrated.rds") ``` ```{r} DefaultAssay(combined.all) <- "integrated" # Run the standard workflow for visualization and clustering combined.all <- ScaleData(combined.all, verbose = FALSE) combined.all <- RunPCA(combined.all, npcs = 20, verbose = FALSE) p=ElbowPlot(combined.all) # Clustering combined.all <- RunUMAP(combined.all, reduction = "pca", dims = 1:20) combined.all <- FindNeighbors(combined.all, reduction = "pca", dims = 1:20) combined.all <- FindClusters(combined.all, resolution = 0.15) ``` # Visualization ```{r} p1 <- DimPlot(combined.all, reduction = "umap", label = TRUE, pt.size = 1.5) p2 <- DimPlot(combined.all, reduction = "umap", group.by = "Genotype") p3 <- DimPlot(combined.all, reduction = "umap", group.by = "Day") p4 <-DimPlot(combined.all, reduction = "umap", split.by = "Genotype") print(p1) print(p2) print(p3) print(p4) pdf("umap4.pdf") print(p1) print(p2) print(p3) print(p4) dev.off() ``` ```{r} setwd('Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise') #saveRDS(combined.all, file="combined.all_norm_PCA.rds") ``` #cluster markers ```{r} all.markers <- FindAllMarkers(combined.all, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) #write.csv(all.markers, "Z:/09-Sharing of Data/for_Claudia/From_ScRNAseq/Analysis_Kiki_Alise/Integrated/cell_cluster_markers.csv") ``` ```{r} Idents(combined.all) = combined.all$seurat_clusters combined.all2 <- RenameIdents(combined.all, `0` = "GABAergic neurons", `1` = "Neurons", `2` = "Neural progenitors", `3` = "Dopaminergic neurons", `4` = "Astrocyte-like glia progenitors", `5` = "TH-high Dopaminergic neurons", `6`="Oligodendrocyte-like glia progenitors") combined.all2$CELL_ID <-Idents(combined.all2) Idents(combined.all2) = combined.all2$CELL_ID # Visualization p2 <- DimPlot(combined.all2, reduction = "umap", label = TRUE) p2 # Visualization p <- DimPlot(combined.all2, reduction = "umap", split.by="Genotype",label =TRUE, pt.size = 1.5) p0 <- DimPlot(combined.all2, reduction = "umap", label = TRUE, pt.size = 1.5 ) p4 <- DimPlot(combined.all2, reduction = "umap", group.by = "Day",label = TRUE) p1 <- DimPlot(combined.all2, reduction = "umap", group.by = "Genotype",pt.size = 1.5) p5 <- DimPlot(combined.all2, reduction = "umap", split.by = "Genotype",pt.size = 1.5) p3 <- DimPlot(combined.all2, reduction = "umap", split.by = "Day",pt.size = 1.5) ``` ###Markers Expression for each cluster ```{r} DefaultAssay(combined.all2) <- "RNA" s1<-DotPlot(combined.all2, features = c("PLK2","ATF3","NLGN1","XBP1","NEAT1","CALCB","HMGB2","RIMS2","ATF5"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=12, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank()) print(s1) pdf("Neural_progenitor markers.pdf") #Dividing and metabolic shift undergoing cells print(s1) dev.off() ``` ```{r} DefaultAssay(combined.all2) <- "RNA" s2<-DotPlot(combined.all2, features = c("TH","SLC6A3", "LMX1A", "LMX1B", "KCNJ6", "PITX3", "CALB2","NR4A2","SLC18A2", "SLC18A3", "AGTR1","NEUROD6","NDUFA4L2"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=12, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank()) print(s2) pdf("DA_markers.pdf") print(s2) dev.off() ``` ```{r} DefaultAssay(combined.all2) <- "RNA" s3<-DotPlot(combined.all2, features = c("MAP2", "MAPT","GAP43","EGFR","NTNG1","DLX2","SYT1","NCAM1","NRXN1", "NRXN2", "NRXN3","NLGN1", "NLGN2", "NLGN3"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=12, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank()) print(s3) pdf("Mature_Neuron_markers.pdf") print(s3) dev.off() ``` ```{r} DefaultAssay(combined.all2) <- "RNA" s4<-DotPlot(combined.all2, features = c("GAD1", "GAD2", "PAX2", "SLC6A1", "GABBR2","CHAT","ACHE","SLC17A7","SLC17A6","SLC17A8","SLC1A1","FEV","TPH1"), cols = c("lightgrey", "cadetblue"),dot.scale = 14) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=12, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank()) print(s4) pdf("Neuron_subtype_markers.pdf") #other than dopaminergic print(s4) dev.off() ``` ```{r} DefaultAssay(combined.all2) <- "RNA" s5<-DotPlot(combined.all2, features = c("GFAP", "S100B", "ALDH1L1", "SLC1A2","AQP4","SOX9","SOX6", "VIM", "NES", "PAX6", "HES1", "HES5", "HES6", "OCLN"), cols = c("lightgrey", "cadetblue"),dot.scale = 12) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank()) print(s5) pdf("Glia_progenitor_markers.pdf") print(s5) dev.off() ``` ```{r} DefaultAssay(combined.all2) <- "RNA" s6<-DotPlot(combined.all2, features = c("OLIG1", "OLIG2", "MOG", "SOX10","PDGFRA","CSPG4","NKX6-2"), cols = c("lightgrey", "cadetblue"),dot.scale = 12) + RotatedAxis()+theme(axis.text.x = element_text(size=14, color="black"),axis.text.y = element_text(size=14, color="black"), axis.title.x = element_blank(),axis.title.y = element_blank()) print(s6) pdf("Oligodendrocyte_progenitor_markers.pdf") print(s6) dev.off() ``` #Manuscript Zagare et al., 2024 ```{r} setwd('Z:/16-Our_Papers/In Preparation/MetabolicModeling-Miro1_Claudia&Alise/Figures/Figure 2/Originals') #saveRDS(combined.all2, file="combined.all_integrated_annotated.rds") combined.all2<-readRDS("combined.all_integrated_annotated.rds") ``` #Prepare plots for manuscript (Figure 2) ```{r} Idents(combined.all2) = combined.all2$CELL_ID combined.all2$Genotype<-factor(combined.all2$Genotype, levels = c("WT_D30","WT_D60","PD_D30","PD_D60","GC_D30","GC_D60")) sf2a <- DimPlot(combined.all2, reduction = "umap", split.by = "Genotype",pt.size = 1, cols = c("#CC79A7", "#66CC99", "lightskyblue","#9999CC", "#E69F00","coral3","black")) Idents(combined.all2) = combined.all2$Genotype WTPDobj <- subset(combined.all2, idents = c("WT_D30","WT_D60","PD_D30","PD_D60")) Idents(WTPDobj) = WTPDobj$CELL_ID s2a <- DimPlot(WTPDobj, reduction = "umap", split.by = "Genotype",pt.size = 1, cols = c("#CC79A7", "#66CC99", "lightskyblue","#9999CC", "#E69F00","coral3","black")) f2b <- DimPlot(WTPDobj, reduction = "umap", label = TRUE ,pt.size = 1, cols = c("#CC79A7", "#66CC99", "lightskyblue","#9999CC", "#E69F00","coral3","black")) + theme(legend.position = "none") print(sf2a) print(s2a) print(f2b) ``` #Subset object to analyse only dopaminergic neuron clusters ```{r} Idents(WTPDobj) = WTPDobj$CELL_ID levels(WTPDobj) DNobj <- subset(WTPDobj, idents = c("Dopaminergic neurons","TH-high Dopaminergic neurons")) levels(DNobj) ``` #Feature plot of TH in dopaminergic neurons ```{r} DNobj$Genotype<-factor(DNobj$Genotype, levels = c("WT_D30","WT_D60","PD_D30","PD_D60")) TH<-FeaturePlot(DNobj, features = "TH", cols = c("lightgrey", "red"), split.by = "Genotype" , pt.size = 1.5) print(TH) ``` #Heatmap of main cell type markers ```{r} Idents(WTPDobj) = WTPDobj$CELL_ID data2<-subset(WTPDobj, features= c("XBP1","ATF5","HMGB2","SOX9","VIM","HES5","HES6","GFAP","S100B","AQP4","SOX10","PDGFRA","CSSPG4","MAP2","MAPT","SYT1","GAD1","GAD2","PAX2","LMX1B","TH","SLC6A3","KCNJ6","NR4A2","EN2","SOX6","AGTR1","CALB1")) genes_expr <- AverageExpression(object=data2)$RNA a=as.matrix(genes_expr) Z_Normalize <- function(x){return((x-mean(x))/(sd(x)))} NormalisedData_Zscore <- t(apply(as.matrix(genes_expr),1,Z_Normalize)) library(viridis) ph<-pheatmap(t(NormalisedData_Zscore), color = inferno (10), show_rownames = T,cluster_cols = FALSE, angle_col = 90, fontsize = 10, cellwidth = 12, cellheight = 10) print(ph) ```