|
| 1 | +#!/usr/bin/env Rscript |
| 2 | +args = commandArgs(trailingOnly=TRUE) |
| 3 | + |
| 4 | +library(Seurat) |
| 5 | +library(dplyr) |
| 6 | +library(ggplot2) |
| 7 | +library(httpgd) |
| 8 | + |
| 9 | +method <- args[1] #"Arcsinh_b5" |
| 10 | +run_name = args[2] #"publicData_CITEseq" |
| 11 | +# run_name = "public13Dataset_CITEseq" |
| 12 | + |
| 13 | +master_path = "./" |
| 14 | +in_path = "/publicData_CITEseq/data/" |
| 15 | +out_path = paste0(master_path, "manuscript/results/", run_name) |
| 16 | +fig_path = paste0(out_path, "/Figures/") |
| 17 | + |
| 18 | +## load data directly |
| 19 | +marker_list = c("CD3", "CD4", "CD8", "CD14", "CD19", "CD25", "CD45RA", "CD56", "CD127") |
| 20 | +adt_data = readRDS(file = paste0(out_path, "/RDS/adt_data_RawCount_", run_name, ".rds")) |
| 21 | +adt_feature = readRDS(file = paste0(out_path, "/RDS/adt_feature_", run_name, ".rds")) |
| 22 | + |
| 23 | +## RPCA integration methods |
| 24 | +# creates a Seurat object based on the CITE-seq data |
| 25 | +rna_data = readRDS(file = './results/publicData_CITEseq/RDS/rna_data_common_RawCount_public13Dataset_CITEseq.rds') |
| 26 | +citeseq_obj = CreateSeuratObject(counts = t(rna_data)) |
| 27 | +citeseq_obj = AddMetaData(citeseq_obj, metadata = adt_feature) |
| 28 | + |
| 29 | +cite_list = SplitObject(citeseq_obj, split.by = "study_name") |
| 30 | +cite_list <- lapply(X = cite_list, FUN = function(x) { |
| 31 | + x <- NormalizeData(x) |
| 32 | + x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 5000) ## |
| 33 | +}) |
| 34 | + |
| 35 | +# select features that are repeatedly variable across datasets for integration run PCA on each |
| 36 | +# dataset using these features |
| 37 | +features <- SelectIntegrationFeatures(object.list = cite_list) |
| 38 | +cite_list <- lapply(X = cite_list, FUN = function(x) { |
| 39 | + x <- ScaleData(x, features = features, verbose = FALSE) |
| 40 | + x <- RunPCA(x, features = features, verbose = FALSE) |
| 41 | +}) |
| 42 | +immune.anchors <- FindIntegrationAnchors(object.list = cite_list, anchor.features = features, reduction = "rpca") ## |
| 43 | +# this command creates an 'integrated' data assay |
| 44 | +immune.combined <- IntegrateData(anchorset = immune.anchors) |
| 45 | +# specify that we will perform downstream analysis on the corrected data note that the |
| 46 | +# original unmodified data still resides in the 'RNA' assay |
| 47 | +DefaultAssay(immune.combined) <- "integrated" |
| 48 | + |
| 49 | +# Run the standard workflow for visualization and clustering |
| 50 | +immune.combined <- ScaleData(immune.combined, verbose = FALSE) |
| 51 | +immune.combined <- RunPCA(immune.combined, npcs = 100, verbose = FALSE) |
| 52 | +immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:50) |
| 53 | +saveRDS(immune.combined, file = paste0(out_path, "/RDS/citeseq_obj_RPCA_", run_name, ".rds")) |
| 54 | + |
| 55 | +citeNorm = readRDS(file = paste0(out_path, "/RDS/citeseq_obj_RPCA_", run_name, ".rds")) |
| 56 | + |
| 57 | +adt_data_adtnorm = readRDS(file = paste0(out_path, "/RDS/adt_data_norm_", method, "_", run_name, ".rds")) |
| 58 | +adt_data_adtnorm[is.na(adt_data_adtnorm)] = 0 |
| 59 | + |
| 60 | +rownames(adt_data_adtnorm) = colnames(citeNorm) |
| 61 | +adt_assay <- CreateAssay5Object(counts = t(adt_data_adtnorm), data = t(adt_data_adtnorm)) |
| 62 | +citeNorm[["ADT"]] <- adt_assay |
| 63 | +DefaultAssay(citeNorm) <- 'ADT' |
| 64 | +VariableFeatures(citeNorm) <- rownames(citeNorm[["ADT"]]) |
| 65 | +## set scale.data |
| 66 | +if(method %in% c("fastMNN_study", "fastMNN_sample", "logCPM", "decontPro")){ |
| 67 | + citeNorm = ScaleData(citeNorm) |
| 68 | +}else{ |
| 69 | + citeNorm <- SetAssayData(citeNorm, assay = "ADT", slot = "scale.data", new.data = t(adt_data_adtnorm)) |
| 70 | +} |
| 71 | +citeNorm <- RunPCA(citeNorm, reduction.name = 'apca') |
| 72 | + |
| 73 | +DefaultAssay(citeNorm) <- 'integrated' |
| 74 | +citeNorm <- FindMultiModalNeighbors( |
| 75 | + citeNorm, reduction.list = list("pca", "apca"), |
| 76 | + dims.list = list(1:15, 1:8), modality.weight.name = "RNA.weight", k.nn = 30 |
| 77 | +) |
| 78 | + |
| 79 | +citeNorm <- RunUMAP(citeNorm, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_", min.dist = 0.01) |
| 80 | +citeNorm <- FindClusters(citeNorm, graph.name = "wsnn", algorithm = 1, resolution = 0.15, verbose = FALSE) |
| 81 | +saveRDS(citeNorm, file = paste0(out_path, "/RDS/citeseq_obj_RPCA_citeNorm_WNN_15_8_withCluster_noScalePCA_nneighbor20_mindist0.01_", run_name, "_", method, ".rds")) |
| 82 | + |
| 83 | +ans_umap = Embeddings(citeNorm, reduction = "wnn.umap") %>% data.frame |
| 84 | +colnames(ans_umap) = c("X1", "X2") |
| 85 | +adt_feature$seurat_clusters = citeNorm@meta.data$seurat_clusters |
| 86 | + |
| 87 | + |
| 88 | +pdf(paste0("./manuscript/results/public13Dataset_CITEseq/Figures/WNN/citeseq_obj_RPCA_citeNorm_WNN_15_8_withCluster_noScalePCA_nneighbor20_mindist0.01_", run_name, "_", method, ".pdf"), width = 15, height = 11) |
| 89 | + |
| 90 | +reindex1 = which(adt_feature$cell_type_l1 == "undefined") |
| 91 | +reindex2 = which(adt_feature$cell_type_l1 != "undefined") |
| 92 | +reindex = c(reindex1, reindex2) |
| 93 | + |
| 94 | +# print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list( |
| 95 | +# target_feature = "seurat_clusters", |
| 96 | +# color_design = colorRampPalette(brewer.pal(8, "Spectral"))(length(adt_feature$seurat_clusters %>% unique)), |
| 97 | +# method_label = method |
| 98 | +# )) #+ scale_color_brewer(palette = "Dark2") |
| 99 | +# ) |
| 100 | + |
| 101 | + |
| 102 | +print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list( |
| 103 | +target_feature = "sample", |
| 104 | +color_design = colorRampPalette(brewer.pal(8, "Spectral"))(length(adt_feature$sample %>% unique)), |
| 105 | +method_label = method |
| 106 | +)) #+ scale_color_brewer(palette = "Dark2") |
| 107 | +) |
| 108 | + |
| 109 | +print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list( |
| 110 | +target_feature = "study_name", |
| 111 | +color_design = colorRampPalette(brewer.pal(8, "Dark2"))(length(adt_feature$study_name %>% unique)), |
| 112 | +method_label = method |
| 113 | +))) |
| 114 | + |
| 115 | +print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list( |
| 116 | +target_feature = "sample_status", |
| 117 | +color_design = colorRampPalette(brewer.pal(8, "Set1"))(length(adt_feature$sample_status %>% unique)), |
| 118 | +method_label = method |
| 119 | +))) |
| 120 | + |
| 121 | +adt_feature$cell_type_l1 = factor(adt_feature$cell_type_l1, levels = c("B", "CD4 T", "CD8 T", "monocytes", "NK", "DCs", "undefined")) |
| 122 | +print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list( |
| 123 | +target_feature = "cell_type_l1", |
| 124 | +color_design = c("#B00000", "#3F918B", "#896191", "#FF980A", "#226fa7", "#F781BF", "grey"), |
| 125 | +#color_design = c(colorRampPalette(brewer.pal(8, "Set1"))(6), "grey"), |
| 126 | +color_break = c("B", "CD4 T", "CD8 T", "monocytes", "NK", "DCs", "undefined"), |
| 127 | +method_label = method |
| 128 | +))) |
| 129 | + |
| 130 | +adt_feature$cell_type_l2 = factor(adt_feature$cell_type_l2, levels = c("naive B", "memory B", |
| 131 | + "naive CD4", "memory CD4", "Treg", |
| 132 | + "naive CD8", "memory CD8", |
| 133 | + "classical monocyte", "intermediate monocyte", "non-classical CD16+ monocyte", |
| 134 | + "CD16- NK", "CD16+ NK", |
| 135 | + "myeloid DC", "plasmacytoid DC", "undefined")) |
| 136 | +print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list( |
| 137 | +target_feature = "cell_type_l2", |
| 138 | +color_design = c( |
| 139 | + "#B00000", "#FF3380", |
| 140 | + "#9ef954", "#3F918B", "#03500e", |
| 141 | + "#896191", "#350154", |
| 142 | + "#FF980A", "#A78300", "#DB6400", |
| 143 | + "#1000a0", "#226fa7", |
| 144 | + "#F781BF", "#7B0054", "grey"), |
| 145 | +#color_design = c(colorRampPalette(brewer.pal(8, "Dark2"))(14), "grey"), |
| 146 | +color_break = c( |
| 147 | + "naive B", "memory B", |
| 148 | + "naive CD4", "memory CD4", "Treg", |
| 149 | + "naive CD8", "memory CD8", |
| 150 | + "classical monocyte", "intermediate monocyte", "non-classical CD16+ monocyte", |
| 151 | + "CD16- NK", "CD16+ NK", |
| 152 | + "myeloid DC", "plasmacytoid DC", "undefined"), |
| 153 | +method_label = method |
| 154 | +))) |
| 155 | + |
| 156 | +dev.off() |
| 157 | + |
| 158 | +pdf(paste0("./manuscript/results/public13Dataset_CITEseq/Figures/WNN/citeseq_obj_RPCA_citeNorm_WNN_15_8_withCluster_noScalePCA_nneighbor20_mindist0.01_", run_name, "_", method, "_weights.pdf"), width = 18, height = 9) |
| 159 | +VlnPlot(citeNorm, features = "ADT.weight", group.by = 'cell_type_l1', sort = TRUE, pt.size = 0) + |
| 160 | +scale_fill_manual(values = c("#B00000", "#3F918B", "#896191", "#FF980A", "#226fa7", "#F781BF", "grey"), |
| 161 | +breaks = c("B", "CD4 T", "CD8 T", "monocytes", "NK", "DCs", "undefined")) |
| 162 | +VlnPlot(citeNorm, features = "ADT.weight", group.by = 'cell_type_l2', sort = TRUE, pt.size = 0) + |
| 163 | +scale_fill_manual(values = c( |
| 164 | + "#B00000", "#FF3380", |
| 165 | + "#9ef954", "#3F918B", "#03500e", |
| 166 | + "#896191", "#350154", |
| 167 | + "#FF980A", "#A78300", "#DB6400", |
| 168 | + "#1000a0", "#226fa7", |
| 169 | + "#F781BF", "#7B0054", "grey"), |
| 170 | +breaks = c( |
| 171 | + "naive B", "memory B", |
| 172 | + "naive CD4", "memory CD4", "Treg", |
| 173 | + "naive CD8", "memory CD8", |
| 174 | + "classical monocyte", "intermediate monocyte", "non-classical CD16+ monocyte", |
| 175 | + "CD16- NK", "CD16+ NK", |
| 176 | + "myeloid DC", "plasmacytoid DC", "undefined")) |
| 177 | +VlnPlot(citeNorm, features = "ADT.weight", group.by = 'seurat_clusters', sort = TRUE, pt.size = 0) + |
| 178 | +scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Spectral"))(length(adt_feature$seurat_clusters %>% unique))) |
| 179 | +dev.off() |
| 180 | + |
0 commit comments