Skip to content

Commit 02321b1

Browse files
yezhengSTATyezhengSTAT
authored andcommitted
add code for the analysis in manuscript
1 parent f4c8233 commit 02321b1

31 files changed

Lines changed: 9338 additions & 0 deletions
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
#!/usr/bin/env Rscript
2+
args = commandArgs(trailingOnly=TRUE)
3+
4+
library(Seurat)
5+
library(dplyr)
6+
library(ggplot2)
7+
library(httpgd)
8+
9+
method <- args[1] #"Arcsinh_b5"
10+
run_name = args[2] #"publicData_CITEseq"
11+
# run_name = "public13Dataset_CITEseq"
12+
13+
master_path = "./"
14+
in_path = "/publicData_CITEseq/data/"
15+
out_path = paste0(master_path, "manuscript/results/", run_name)
16+
fig_path = paste0(out_path, "/Figures/")
17+
18+
## load data directly
19+
marker_list = c("CD3", "CD4", "CD8", "CD14", "CD19", "CD25", "CD45RA", "CD56", "CD127")
20+
adt_data = readRDS(file = paste0(out_path, "/RDS/adt_data_RawCount_", run_name, ".rds"))
21+
adt_feature = readRDS(file = paste0(out_path, "/RDS/adt_feature_", run_name, ".rds"))
22+
23+
## RPCA integration methods
24+
# creates a Seurat object based on the CITE-seq data
25+
rna_data = readRDS(file = './results/publicData_CITEseq/RDS/rna_data_common_RawCount_public13Dataset_CITEseq.rds')
26+
citeseq_obj = CreateSeuratObject(counts = t(rna_data))
27+
citeseq_obj = AddMetaData(citeseq_obj, metadata = adt_feature)
28+
29+
cite_list = SplitObject(citeseq_obj, split.by = "study_name")
30+
cite_list <- lapply(X = cite_list, FUN = function(x) {
31+
x <- NormalizeData(x)
32+
x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 5000) ##
33+
})
34+
35+
# select features that are repeatedly variable across datasets for integration run PCA on each
36+
# dataset using these features
37+
features <- SelectIntegrationFeatures(object.list = cite_list)
38+
cite_list <- lapply(X = cite_list, FUN = function(x) {
39+
x <- ScaleData(x, features = features, verbose = FALSE)
40+
x <- RunPCA(x, features = features, verbose = FALSE)
41+
})
42+
immune.anchors <- FindIntegrationAnchors(object.list = cite_list, anchor.features = features, reduction = "rpca") ##
43+
# this command creates an 'integrated' data assay
44+
immune.combined <- IntegrateData(anchorset = immune.anchors)
45+
# specify that we will perform downstream analysis on the corrected data note that the
46+
# original unmodified data still resides in the 'RNA' assay
47+
DefaultAssay(immune.combined) <- "integrated"
48+
49+
# Run the standard workflow for visualization and clustering
50+
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
51+
immune.combined <- RunPCA(immune.combined, npcs = 100, verbose = FALSE)
52+
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:50)
53+
saveRDS(immune.combined, file = paste0(out_path, "/RDS/citeseq_obj_RPCA_", run_name, ".rds"))
54+
55+
citeNorm = readRDS(file = paste0(out_path, "/RDS/citeseq_obj_RPCA_", run_name, ".rds"))
56+
57+
adt_data_adtnorm = readRDS(file = paste0(out_path, "/RDS/adt_data_norm_", method, "_", run_name, ".rds"))
58+
adt_data_adtnorm[is.na(adt_data_adtnorm)] = 0
59+
60+
rownames(adt_data_adtnorm) = colnames(citeNorm)
61+
adt_assay <- CreateAssay5Object(counts = t(adt_data_adtnorm), data = t(adt_data_adtnorm))
62+
citeNorm[["ADT"]] <- adt_assay
63+
DefaultAssay(citeNorm) <- 'ADT'
64+
VariableFeatures(citeNorm) <- rownames(citeNorm[["ADT"]])
65+
## set scale.data
66+
if(method %in% c("fastMNN_study", "fastMNN_sample", "logCPM", "decontPro")){
67+
citeNorm = ScaleData(citeNorm)
68+
}else{
69+
citeNorm <- SetAssayData(citeNorm, assay = "ADT", slot = "scale.data", new.data = t(adt_data_adtnorm))
70+
}
71+
citeNorm <- RunPCA(citeNorm, reduction.name = 'apca')
72+
73+
DefaultAssay(citeNorm) <- 'integrated'
74+
citeNorm <- FindMultiModalNeighbors(
75+
citeNorm, reduction.list = list("pca", "apca"),
76+
dims.list = list(1:15, 1:8), modality.weight.name = "RNA.weight", k.nn = 30
77+
)
78+
79+
citeNorm <- RunUMAP(citeNorm, nn.name = "weighted.nn", reduction.name = "wnn.umap", reduction.key = "wnnUMAP_", min.dist = 0.01)
80+
citeNorm <- FindClusters(citeNorm, graph.name = "wsnn", algorithm = 1, resolution = 0.15, verbose = FALSE)
81+
saveRDS(citeNorm, file = paste0(out_path, "/RDS/citeseq_obj_RPCA_citeNorm_WNN_15_8_withCluster_noScalePCA_nneighbor20_mindist0.01_", run_name, "_", method, ".rds"))
82+
83+
ans_umap = Embeddings(citeNorm, reduction = "wnn.umap") %>% data.frame
84+
colnames(ans_umap) = c("X1", "X2")
85+
adt_feature$seurat_clusters = citeNorm@meta.data$seurat_clusters
86+
87+
88+
pdf(paste0("./manuscript/results/public13Dataset_CITEseq/Figures/WNN/citeseq_obj_RPCA_citeNorm_WNN_15_8_withCluster_noScalePCA_nneighbor20_mindist0.01_", run_name, "_", method, ".pdf"), width = 15, height = 11)
89+
90+
reindex1 = which(adt_feature$cell_type_l1 == "undefined")
91+
reindex2 = which(adt_feature$cell_type_l1 != "undefined")
92+
reindex = c(reindex1, reindex2)
93+
94+
# print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list(
95+
# target_feature = "seurat_clusters",
96+
# color_design = colorRampPalette(brewer.pal(8, "Spectral"))(length(adt_feature$seurat_clusters %>% unique)),
97+
# method_label = method
98+
# )) #+ scale_color_brewer(palette = "Dark2")
99+
# )
100+
101+
102+
print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list(
103+
target_feature = "sample",
104+
color_design = colorRampPalette(brewer.pal(8, "Spectral"))(length(adt_feature$sample %>% unique)),
105+
method_label = method
106+
)) #+ scale_color_brewer(palette = "Dark2")
107+
)
108+
109+
print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list(
110+
target_feature = "study_name",
111+
color_design = colorRampPalette(brewer.pal(8, "Dark2"))(length(adt_feature$study_name %>% unique)),
112+
method_label = method
113+
)))
114+
115+
print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list(
116+
target_feature = "sample_status",
117+
color_design = colorRampPalette(brewer.pal(8, "Set1"))(length(adt_feature$sample_status %>% unique)),
118+
method_label = method
119+
)))
120+
121+
adt_feature$cell_type_l1 = factor(adt_feature$cell_type_l1, levels = c("B", "CD4 T", "CD8 T", "monocytes", "NK", "DCs", "undefined"))
122+
print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list(
123+
target_feature = "cell_type_l1",
124+
color_design = c("#B00000", "#3F918B", "#896191", "#FF980A", "#226fa7", "#F781BF", "grey"),
125+
#color_design = c(colorRampPalette(brewer.pal(8, "Set1"))(6), "grey"),
126+
color_break = c("B", "CD4 T", "CD8 T", "monocytes", "NK", "DCs", "undefined"),
127+
method_label = method
128+
)))
129+
130+
adt_feature$cell_type_l2 = factor(adt_feature$cell_type_l2, levels = c("naive B", "memory B",
131+
"naive CD4", "memory CD4", "Treg",
132+
"naive CD8", "memory CD8",
133+
"classical monocyte", "intermediate monocyte", "non-classical CD16+ monocyte",
134+
"CD16- NK", "CD16+ NK",
135+
"myeloid DC", "plasmacytoid DC", "undefined"))
136+
print(plot_umap_raster(ans_umap[reindex, ], adt_feature[reindex, ], point_size = 0.3, parameter_list = list(
137+
target_feature = "cell_type_l2",
138+
color_design = c(
139+
"#B00000", "#FF3380",
140+
"#9ef954", "#3F918B", "#03500e",
141+
"#896191", "#350154",
142+
"#FF980A", "#A78300", "#DB6400",
143+
"#1000a0", "#226fa7",
144+
"#F781BF", "#7B0054", "grey"),
145+
#color_design = c(colorRampPalette(brewer.pal(8, "Dark2"))(14), "grey"),
146+
color_break = c(
147+
"naive B", "memory B",
148+
"naive CD4", "memory CD4", "Treg",
149+
"naive CD8", "memory CD8",
150+
"classical monocyte", "intermediate monocyte", "non-classical CD16+ monocyte",
151+
"CD16- NK", "CD16+ NK",
152+
"myeloid DC", "plasmacytoid DC", "undefined"),
153+
method_label = method
154+
)))
155+
156+
dev.off()
157+
158+
pdf(paste0("./manuscript/results/public13Dataset_CITEseq/Figures/WNN/citeseq_obj_RPCA_citeNorm_WNN_15_8_withCluster_noScalePCA_nneighbor20_mindist0.01_", run_name, "_", method, "_weights.pdf"), width = 18, height = 9)
159+
VlnPlot(citeNorm, features = "ADT.weight", group.by = 'cell_type_l1', sort = TRUE, pt.size = 0) +
160+
scale_fill_manual(values = c("#B00000", "#3F918B", "#896191", "#FF980A", "#226fa7", "#F781BF", "grey"),
161+
breaks = c("B", "CD4 T", "CD8 T", "monocytes", "NK", "DCs", "undefined"))
162+
VlnPlot(citeNorm, features = "ADT.weight", group.by = 'cell_type_l2', sort = TRUE, pt.size = 0) +
163+
scale_fill_manual(values = c(
164+
"#B00000", "#FF3380",
165+
"#9ef954", "#3F918B", "#03500e",
166+
"#896191", "#350154",
167+
"#FF980A", "#A78300", "#DB6400",
168+
"#1000a0", "#226fa7",
169+
"#F781BF", "#7B0054", "grey"),
170+
breaks = c(
171+
"naive B", "memory B",
172+
"naive CD4", "memory CD4", "Treg",
173+
"naive CD8", "memory CD8",
174+
"classical monocyte", "intermediate monocyte", "non-classical CD16+ monocyte",
175+
"CD16- NK", "CD16+ NK",
176+
"myeloid DC", "plasmacytoid DC", "undefined"))
177+
VlnPlot(citeNorm, features = "ADT.weight", group.by = 'seurat_clusters', sort = TRUE, pt.size = 0) +
178+
scale_fill_manual(values = colorRampPalette(brewer.pal(8, "Spectral"))(length(adt_feature$seurat_clusters %>% unique)))
179+
dev.off()
180+

0 commit comments

Comments
 (0)