@@ -55,10 +55,10 @@ Various metrics can be used to filter low-quality cells from high-quality ones,
5555# This set up the working directory to this file so all files can be found
5656# library(rstudioapi)
5757# setwd(fs::path_dir(getSourceEditorContext()$path))
58- stopifnot(R.version$major>= 4) # requires R4
59- if (compareVersion(R.version$minor,"3.1")< 0) warning("We recommend >= R4.3.1")
60- stopifnot(compareVersion(as.character(BiocManager::version()), "3.16")>= 0)
61- stopifnot(compareVersion(as.character(packageVersion("Seurat")), "5.1")>= 0)
58+ stopifnot(R.version$major >= 4) # requires R4
59+ if (compareVersion(R.version$minor, "3.1") < 0) warning("We recommend >= R4.3.1")
60+ stopifnot(compareVersion(as.character(BiocManager::version()), "3.16") >= 0)
61+ stopifnot(compareVersion(as.character(packageVersion("Seurat")), "5.1") >= 0)
6262```
6363
6464``` {r load_libraries, cache = FALSE, message = FALSE, warning=FALSE, echo=FALSE,}
@@ -78,31 +78,35 @@ library(Seurat)
7878
7979import::from(magrittr, set_colnames, set_rownames, "%<>%")
8080
81- invisible(list2env(params,environment()))
81+ invisible(list2env(params, environment()))
8282source(project_file)
8383
8484ggplot2::theme_set(theme_prism(base_size = 12))
8585# https://grafify-vignettes.netlify.app/colour_palettes.html
8686# NOTE change colors here if you wish
87- scale_colour_discrete <- function(...)
88- scale_colour_manual(...,
89- values = as.vector(grafify:::graf_palettes[["kelly"]]))
90- scale_fill_discrete <- function(...)
91- scale_fill_manual(...,
92- values = as.vector(grafify:::graf_palettes[["kelly"]]))
87+ scale_colour_discrete <- function(...) {
88+ scale_colour_manual(...,
89+ values = as.vector(grafify:::graf_palettes[["kelly"]])
90+ )
91+ }
92+ scale_fill_discrete <- function(...) {
93+ scale_fill_manual(...,
94+ values = as.vector(grafify:::graf_palettes[["kelly"]])
95+ )
96+ }
9397
9498opts_chunk[["set"]](
95- cache = F,
96- cache.lazy = FALSE,
97- dev = c("png", "pdf"),
98- error = TRUE,
99- highlight = TRUE,
100- message = FALSE,
101- prompt = FALSE,
102- tidy = FALSE,
103- warning = FALSE,
104- echo = T,
105- fig.height = 4)
99+ cache = F,
100+ cache.lazy = FALSE,
101+ dev = c("png", "pdf"),
102+ error = TRUE,
103+ highlight = TRUE,
104+ message = FALSE,
105+ prompt = FALSE,
106+ tidy = FALSE,
107+ warning = FALSE,
108+ echo = T,
109+ fig.height = 4)
106110
107111# set seed for reproducibility
108112set.seed(1234567890L)
@@ -122,8 +126,8 @@ visium <- qs_read(visiumHD_obj)
122126visium <- PercentageFeatureSet(visium, "^mt-", col.name = "percent_mito")
123127visium <- PercentageFeatureSet(visium, "^Hb.*-", col.name = "percent_hb")
124128125- metaD$log10GenesPerUMI <- log10(metaD$nFeature)/ log10(metaD$nCount)
126- colnames(metaD)%<>%gsub(pattern= glue("_{DefaultAssay(visium)}"),replacement= "")
129+ metaD$log10GenesPerUMI <- log10(metaD$nFeature) / log10(metaD$nCount)
130+ colnames(metaD) %<>% gsub(pattern = glue("_{DefaultAssay(visium)}"), replacement = "")
127131```
128132
129133Let's take a quick look at the data and make a decision on whether we need to apply any filtering.
@@ -135,21 +139,23 @@ Let's take a quick look at the data and make a decision on whether we need to ap
135139Those two metrics is really dependent on tissue type, RNA quality, and sequencing depth. Since the test data is generated from Visium HD technology, we use bin and corresponding reference thresholds in the plot. Reference line at 100 is plotted as the suggested cut-offs for both metrics.
136140
137141``` {r}
138- summary_metaD <- apply(metaD[,-1],2,mean)
139- metacol_label <- list("nFeature"="Genes","nCount"="UMI")
140- refs <- list("nFeature"=100,"nCount"=100)
141- dists_before <- imap(metacol_label,\(label,col)
142- ggdensity(metaD,
143- x = col,xscale="log10",add = "mean", rug = TRUE,
144- alpha = 0.2,fill = "lightgray",
145- xlab=glue("Number of {label} per bin(in log10 scale)"),
146- ylab="Cell density",
147- title=glue('Pre-QC {label}/Bin'))+
148- geom_vline(xintercept = refs[[col]],color="darkred",cex=rel(1.3),linetype="dashed")+
149- annotate("text",x=summary_metaD[col],y = Inf,
150- label = glue("Mean \n = {round(summary_metaD[col],0)}"),
151- vjust = 1,hjust=2)
152- )
142+ summary_metaD <- apply(metaD[, -1], 2, mean)
143+ metacol_label <- list("nFeature" = "Genes", "nCount" = "UMI")
144+ refs <- list("nFeature" = 100, "nCount" = 100)
145+ dists_before <- imap(metacol_label, \(label, col)
146+ ggdensity(metaD,
147+ x = col, xscale = "log10", add = "mean", rug = TRUE,
148+ alpha = 0.2, fill = "lightgray",
149+ xlab = glue("Number of {label} per bin(in log10 scale)"),
150+ ylab = "Cell density",
151+ title = glue("Pre-QC {label}/Bin")
152+ ) +
153+ geom_vline(xintercept = refs[[col]], color = "darkred", cex = rel(1.3), linetype = "dashed") +
154+ annotate("text",
155+ x = summary_metaD[col], y = Inf,
156+ label = glue("Mean \n = {round(summary_metaD[col],0)}"),
157+ vjust = 1, hjust = 2
158+ ))
153159dists_before[[1]] | dists_before[[2]]
154160```
155161
@@ -162,56 +168,67 @@ With scRNA-seq this is more easily interpreted for a single cell, but for spatia
162168
163169``` {r complexity}
164170col <- "log10GenesPerUMI"
165- ggdensity(metaD,x = col,add = "mean", rug = TRUE,
166- alpha = 0.2,fill = "lightgray",
167- xlab="complexity",ylab="Cell density",title=glue('Novelty score'))+
168- geom_vline(xintercept = 0.8,color="darkred",cex=rel(1.3),linetype="dashed")+
169- annotate("text",x=summary_metaD[col],y = Inf,
170- label = glue("Mean = {round(summary_metaD[col],0)}"),
171- vjust = 1,hjust=2)+
172- theme(plot.title = element_text(hjust=0.5, face="bold"))
171+ ggdensity(metaD,
172+ x = col, add = "mean", rug = TRUE,
173+ alpha = 0.2, fill = "lightgray",
174+ xlab = "complexity", ylab = "Cell density", title = glue("Novelty score")
175+ ) +
176+ geom_vline(xintercept = 0.8, color = "darkred", cex = rel(1.3), linetype = "dashed") +
177+ annotate("text",
178+ x = summary_metaD[col], y = Inf,
179+ label = glue("Mean = {round(summary_metaD[col],0)}"),
180+ vjust = 1, hjust = 2
181+ ) +
182+ theme(plot.title = element_text(hjust = 0.5, face = "bold"))
173183```
174184
175185### mitochondria & hemoglospot/bingene ratios
176186
177187``` {r}
178- ggplot(metaD %>%
179- select(orig.ident,starts_with("percent_")) %>%
180- tidyr::gather(class,percent_unexpected,-orig.ident),
181- aes_string(x = "orig.ident", y = "percent_unexpected")) +
182- geom_violin(position=position_dodge(1),alpha=1, na.rm=TRUE,trim=FALSE)+
183- ggbeeswarm::geom_quasirandom(na.rm=TRUE,dodge.width=0.5,
184- method='quasirandom',alpha=0.01)+
185- geom_boxplot(width=0.1,outliers = F)+
186- geom_hline(yintercept=20)+
187- facet_grid(~class,scales = "free")+
188- theme(
189- axis.text.x = element_text(size=rel(1),face="bold"),
190- plot.title = element_text(hjust = 0.5),
191- strip.text.x = element_text(size = rel(1.5), colour = "black"),
192- legend.position = "none"
193- )+
194- scale_y_log10(breaks=c(1,5,10,20,100))+
188+ ggplot(
189+ metaD %>%
190+ select(orig.ident, starts_with("percent_")) %>%
191+ tidyr::gather(class, percent_unexpected, -orig.ident),
192+ aes_string(x = "orig.ident", y = "percent_unexpected")
193+ ) +
194+ geom_violin(position = position_dodge(1), alpha = 1, na.rm = TRUE, trim = FALSE) +
195+ ggbeeswarm::geom_quasirandom(
196+ na.rm = TRUE, dodge.width = 0.5,
197+ method = "quasirandom", alpha = 0.01
198+ ) +
199+ geom_boxplot(width = 0.1, outliers = F) +
200+ geom_hline(yintercept = 20) +
201+ facet_grid(~class, scales = "free") +
202+ theme(
203+ axis.text.x = element_text(size = rel(1), face = "bold"),
204+ plot.title = element_text(hjust = 0.5),
205+ strip.text.x = element_text(size = rel(1.5), colour = "black"),
206+ legend.position = "none"
207+ ) +
208+ scale_y_log10(breaks = c(1, 5, 10, 20, 100)) +
195209 # ylim(c(0,100))+
196- labs(x= "",y= "% of contamination genes")
210+ labs(x = "", y = "% of contamination genes")
197211```
198212
199213## QC metrics visualized on slides{.tabset}
200214
201215Here, we can look at all the QC metrics we discussed above on the individual tissue slide.
202216
203217``` {r}
204- features2check <- c(glue('nCount_{DefaultAssay(visium)}'),
205- glue('nFeature_{DefaultAssay(visium)}'),
206- "percent_mito","percent_hb")
218+ features2check <- c(
219+ glue("nCount_{DefaultAssay(visium)}"),
220+ glue("nFeature_{DefaultAssay(visium)}"),
221+ "percent_mito", "percent_hb"
222+ )
207223```
208224
209225``` {r spatial-plot,fig.height=5,fig.width=5,eval=T,results='asis'}
210- for(f in features2check){
226+ for (f in features2check) {
211227 cat("### ", f, "\n\n")
212- p1 <- SpatialFeaturePlot(visium,
213- feature = f,
214- pt.size.factor = 4)
228+ p1 <- SpatialFeaturePlot(visium,
229+ feature = f,
230+ pt.size.factor = 4
231+ )
215232 print(p1)
216233 cat("\n\n")
217234}
@@ -222,14 +239,14 @@ for(f in features2check){
222239Now, it is time to choose some cut-offs for QC metrics mentioned above and removing low-quality cells, as well as mitochondria, hemoglobin genes from the feature space and we can take a quick look at what are our top 20 expressed genes.
223240
224241``` {r filtering,fig.height=7,fig.width=7}
225- GeneVar <- glue(' nFeature_{DefaultAssay(visium)}' )
226- UMIVar <- glue(' nCount_{DefaultAssay(visium)}' )
227- cutoffs <- list("nFeature"= 100,"nCount"= 100,"hb"= 20,"mito"= 20)
228- Qced <- [email protected] [,GeneVar] > cutoffs$nFeature & 229- [email protected] [,UMIVar] > cutoffs$nCount & 230- visium$percent_hb < cutoffs$hb &
231- visium$percent_mito < cutoffs$mito
232- visium <- visium[,Qced]
242+ GeneVar <- glue(" nFeature_{DefaultAssay(visium)}" )
243+ UMIVar <- glue(" nCount_{DefaultAssay(visium)}" )
244+ cutoffs <- list("nFeature" = 100, "nCount" = 100, "hb" = 20, "mito" = 20)
245+ Qced <- [email protected] [, GeneVar] > cutoffs$nFeature & 246+ [email protected] [, UMIVar] > cutoffs$nCount & 247+ visium$percent_hb < cutoffs$hb &
248+ visium$percent_mito < cutoffs$mito
249+ visium <- visium[, Qced]
233250# Filter Mitocondrial
234251visium <- visium[!grepl("^mt-", rownames(visium)), ]
235252# Filter Hemoglobin gene (optional if that is a problem on your data)
@@ -238,33 +255,35 @@ visium <- visium[!grepl("^Hb.*-", rownames(visium)), ]
238255C <- GetAssayData(visium, slot = "counts")
239256C@x <- C@x / rep.int(colSums(C), diff(C@p))
240257most_expressed <- order(Matrix::rowSums(C), decreasing = T)[20:1]
241- exprD <- as.data.frame(t(C[most_expressed, ])) %>%
242- tibble::rownames_to_column("bin") %>%
243- tidyr::gather(gene,expr,-bin)
244-
245-
246- ggplot(exprD,aes(x=gene,y=expr,color=gene,fill=gene))+
247- geom_violin(position=position_dodge(1),alpha=0.5,
248- na.rm=TRUE,trim=FALSE)+
249- geom_boxplot(width=0.1,outliers = F,color="black")+
250- theme_minimal()+
258+ exprD <- as.data.frame(t(C[most_expressed, ])) %>%
259+ tibble::rownames_to_column("bin") %>%
260+ tidyr::gather(gene, expr, -bin)
261+
262+
263+ ggplot(exprD, aes(x = gene, y = expr, color = gene, fill = gene)) +
264+ geom_violin(
265+ position = position_dodge(1), alpha = 0.5,
266+ na.rm = TRUE, trim = FALSE
267+ ) +
268+ geom_boxplot(width = 0.1, outliers = F, color = "black") +
269+ theme_minimal() +
251270 theme(
252- axis.text.x = element_text(size= rel(1),face= "bold"),
253- plot.title = element_text(hjust = 0.5),
254- legend.position = "none"
255- )+
256- scale_y_log10(breaks= c(0.001,.01,.1,1),labels= c(0.1,1, 10,100))+
257- labs(x= "",y= "% of total UMIs/bin \n (log10 scaled)")+
271+ axis.text.x = element_text(size = rel(1), face = "bold"),
272+ plot.title = element_text(hjust = 0.5),
273+ legend.position = "none"
274+ ) +
275+ scale_y_log10(breaks = c(0.001, .01, .1, 1), labels = c(0.1, 1, 10, 100)) +
276+ labs(x = "", y = "% of total UMIs/bin \n (log10 scaled)") +
258277 coord_flip()
259278```
260279
261280
262281``` {r save_seurat}
263- if(!dir.exists(results_dir)){
282+ if (!dir.exists(results_dir)) {
264283 system(glue("mkdir -p {results_dir}"))
265284}
266285qs_save(visium, file.path(results_dir, "01_qc.qs"))
267- outputPath = file.path(results_dir, "01_qc.qs")
286+ outputPath <- file.path(results_dir, "01_qc.qs")
268287```
269288
270289We saved your qc-filled Seurat object in ** ` r outputPath ` ** .
0 commit comments