Skip to content

Commit 6066dca

Browse files
committed
lint files
1 parent 125e667 commit 6066dca

File tree

3 files changed

+241
-193
lines changed

3 files changed

+241
-193
lines changed

visium/01_quality_assessment/qc.Rmd

Whitespace-only changes.

visium/01_quality_assessment/quality_assessment.qmd

Lines changed: 117 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ Various metrics can be used to filter low-quality cells from high-quality ones,
5555
# This set up the working directory to this file so all files can be found
5656
# library(rstudioapi)
5757
# setwd(fs::path_dir(getSourceEditorContext()$path))
58-
stopifnot(R.version$major>= 4) # requires R4
59-
if (compareVersion(R.version$minor,"3.1")<0) warning("We recommend >= R4.3.1")
60-
stopifnot(compareVersion(as.character(BiocManager::version()), "3.16")>=0)
61-
stopifnot(compareVersion(as.character(packageVersion("Seurat")), "5.1")>=0)
58+
stopifnot(R.version$major >= 4) # requires R4
59+
if (compareVersion(R.version$minor, "3.1") < 0) warning("We recommend >= R4.3.1")
60+
stopifnot(compareVersion(as.character(BiocManager::version()), "3.16") >= 0)
61+
stopifnot(compareVersion(as.character(packageVersion("Seurat")), "5.1") >= 0)
6262
```
6363

6464
```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE, echo=FALSE,}
@@ -78,31 +78,35 @@ library(Seurat)
7878
7979
import::from(magrittr, set_colnames, set_rownames, "%<>%")
8080
81-
invisible(list2env(params,environment()))
81+
invisible(list2env(params, environment()))
8282
source(project_file)
8383
8484
ggplot2::theme_set(theme_prism(base_size = 12))
8585
# https://grafify-vignettes.netlify.app/colour_palettes.html
8686
# NOTE change colors here if you wish
87-
scale_colour_discrete <- function(...)
88-
scale_colour_manual(...,
89-
values = as.vector(grafify:::graf_palettes[["kelly"]]))
90-
scale_fill_discrete <- function(...)
91-
scale_fill_manual(...,
92-
values = as.vector(grafify:::graf_palettes[["kelly"]]))
87+
scale_colour_discrete <- function(...) {
88+
scale_colour_manual(...,
89+
values = as.vector(grafify:::graf_palettes[["kelly"]])
90+
)
91+
}
92+
scale_fill_discrete <- function(...) {
93+
scale_fill_manual(...,
94+
values = as.vector(grafify:::graf_palettes[["kelly"]])
95+
)
96+
}
9397
9498
opts_chunk[["set"]](
95-
cache = F,
96-
cache.lazy = FALSE,
97-
dev = c("png", "pdf"),
98-
error = TRUE,
99-
highlight = TRUE,
100-
message = FALSE,
101-
prompt = FALSE,
102-
tidy = FALSE,
103-
warning = FALSE,
104-
echo = T,
105-
fig.height = 4)
99+
cache = F,
100+
cache.lazy = FALSE,
101+
dev = c("png", "pdf"),
102+
error = TRUE,
103+
highlight = TRUE,
104+
message = FALSE,
105+
prompt = FALSE,
106+
tidy = FALSE,
107+
warning = FALSE,
108+
echo = T,
109+
fig.height = 4)
106110
107111
# set seed for reproducibility
108112
set.seed(1234567890L)
@@ -122,8 +126,8 @@ visium <- qs_read(visiumHD_obj)
122126
visium <- PercentageFeatureSet(visium, "^mt-", col.name = "percent_mito")
123127
visium <- PercentageFeatureSet(visium, "^Hb.*-", col.name = "percent_hb")
124128
125-
metaD$log10GenesPerUMI <- log10(metaD$nFeature)/log10(metaD$nCount)
126-
colnames(metaD)%<>%gsub(pattern=glue("_{DefaultAssay(visium)}"),replacement="")
129+
metaD$log10GenesPerUMI <- log10(metaD$nFeature) / log10(metaD$nCount)
130+
colnames(metaD) %<>% gsub(pattern = glue("_{DefaultAssay(visium)}"), replacement = "")
127131
```
128132

129133
Let's take a quick look at the data and make a decision on whether we need to apply any filtering.
@@ -135,21 +139,23 @@ Let's take a quick look at the data and make a decision on whether we need to ap
135139
Those two metrics is really dependent on tissue type, RNA quality, and sequencing depth. Since the test data is generated from Visium HD technology, we use bin and corresponding reference thresholds in the plot. Reference line at 100 is plotted as the suggested cut-offs for both metrics.
136140

137141
```{r}
138-
summary_metaD <- apply(metaD[,-1],2,mean)
139-
metacol_label <- list("nFeature"="Genes","nCount"="UMI")
140-
refs <- list("nFeature"=100,"nCount"=100)
141-
dists_before <- imap(metacol_label,\(label,col)
142-
ggdensity(metaD,
143-
x = col,xscale="log10",add = "mean", rug = TRUE,
144-
alpha = 0.2,fill = "lightgray",
145-
xlab=glue("Number of {label} per bin(in log10 scale)"),
146-
ylab="Cell density",
147-
title=glue('Pre-QC {label}/Bin'))+
148-
geom_vline(xintercept = refs[[col]],color="darkred",cex=rel(1.3),linetype="dashed")+
149-
annotate("text",x=summary_metaD[col],y = Inf,
150-
label = glue("Mean \n = {round(summary_metaD[col],0)}"),
151-
vjust = 1,hjust=2)
152-
)
142+
summary_metaD <- apply(metaD[, -1], 2, mean)
143+
metacol_label <- list("nFeature" = "Genes", "nCount" = "UMI")
144+
refs <- list("nFeature" = 100, "nCount" = 100)
145+
dists_before <- imap(metacol_label, \(label, col)
146+
ggdensity(metaD,
147+
x = col, xscale = "log10", add = "mean", rug = TRUE,
148+
alpha = 0.2, fill = "lightgray",
149+
xlab = glue("Number of {label} per bin(in log10 scale)"),
150+
ylab = "Cell density",
151+
title = glue("Pre-QC {label}/Bin")
152+
) +
153+
geom_vline(xintercept = refs[[col]], color = "darkred", cex = rel(1.3), linetype = "dashed") +
154+
annotate("text",
155+
x = summary_metaD[col], y = Inf,
156+
label = glue("Mean \n = {round(summary_metaD[col],0)}"),
157+
vjust = 1, hjust = 2
158+
))
153159
dists_before[[1]] | dists_before[[2]]
154160
```
155161

@@ -162,56 +168,67 @@ With scRNA-seq this is more easily interpreted for a single cell, but for spatia
162168

163169
```{r complexity}
164170
col <- "log10GenesPerUMI"
165-
ggdensity(metaD,x = col,add = "mean", rug = TRUE,
166-
alpha = 0.2,fill = "lightgray",
167-
xlab="complexity",ylab="Cell density",title=glue('Novelty score'))+
168-
geom_vline(xintercept = 0.8,color="darkred",cex=rel(1.3),linetype="dashed")+
169-
annotate("text",x=summary_metaD[col],y = Inf,
170-
label = glue("Mean = {round(summary_metaD[col],0)}"),
171-
vjust = 1,hjust=2)+
172-
theme(plot.title = element_text(hjust=0.5, face="bold"))
171+
ggdensity(metaD,
172+
x = col, add = "mean", rug = TRUE,
173+
alpha = 0.2, fill = "lightgray",
174+
xlab = "complexity", ylab = "Cell density", title = glue("Novelty score")
175+
) +
176+
geom_vline(xintercept = 0.8, color = "darkred", cex = rel(1.3), linetype = "dashed") +
177+
annotate("text",
178+
x = summary_metaD[col], y = Inf,
179+
label = glue("Mean = {round(summary_metaD[col],0)}"),
180+
vjust = 1, hjust = 2
181+
) +
182+
theme(plot.title = element_text(hjust = 0.5, face = "bold"))
173183
```
174184

175185
### mitochondria & hemoglospot/bingene ratios
176186

177187
```{r}
178-
ggplot(metaD %>%
179-
select(orig.ident,starts_with("percent_")) %>%
180-
tidyr::gather(class,percent_unexpected,-orig.ident),
181-
aes_string(x = "orig.ident", y = "percent_unexpected")) +
182-
geom_violin(position=position_dodge(1),alpha=1, na.rm=TRUE,trim=FALSE)+
183-
ggbeeswarm::geom_quasirandom(na.rm=TRUE,dodge.width=0.5,
184-
method='quasirandom',alpha=0.01)+
185-
geom_boxplot(width=0.1,outliers = F)+
186-
geom_hline(yintercept=20)+
187-
facet_grid(~class,scales = "free")+
188-
theme(
189-
axis.text.x = element_text(size=rel(1),face="bold"),
190-
plot.title = element_text(hjust = 0.5),
191-
strip.text.x = element_text(size = rel(1.5), colour = "black"),
192-
legend.position = "none"
193-
)+
194-
scale_y_log10(breaks=c(1,5,10,20,100))+
188+
ggplot(
189+
metaD %>%
190+
select(orig.ident, starts_with("percent_")) %>%
191+
tidyr::gather(class, percent_unexpected, -orig.ident),
192+
aes_string(x = "orig.ident", y = "percent_unexpected")
193+
) +
194+
geom_violin(position = position_dodge(1), alpha = 1, na.rm = TRUE, trim = FALSE) +
195+
ggbeeswarm::geom_quasirandom(
196+
na.rm = TRUE, dodge.width = 0.5,
197+
method = "quasirandom", alpha = 0.01
198+
) +
199+
geom_boxplot(width = 0.1, outliers = F) +
200+
geom_hline(yintercept = 20) +
201+
facet_grid(~class, scales = "free") +
202+
theme(
203+
axis.text.x = element_text(size = rel(1), face = "bold"),
204+
plot.title = element_text(hjust = 0.5),
205+
strip.text.x = element_text(size = rel(1.5), colour = "black"),
206+
legend.position = "none"
207+
) +
208+
scale_y_log10(breaks = c(1, 5, 10, 20, 100)) +
195209
# ylim(c(0,100))+
196-
labs(x="",y="% of contamination genes")
210+
labs(x = "", y = "% of contamination genes")
197211
```
198212

199213
## QC metrics visualized on slides{.tabset}
200214

201215
Here, we can look at all the QC metrics we discussed above on the individual tissue slide.
202216

203217
```{r}
204-
features2check <- c(glue('nCount_{DefaultAssay(visium)}'),
205-
glue('nFeature_{DefaultAssay(visium)}'),
206-
"percent_mito","percent_hb")
218+
features2check <- c(
219+
glue("nCount_{DefaultAssay(visium)}"),
220+
glue("nFeature_{DefaultAssay(visium)}"),
221+
"percent_mito", "percent_hb"
222+
)
207223
```
208224

209225
```{r spatial-plot,fig.height=5,fig.width=5,eval=T,results='asis'}
210-
for(f in features2check){
226+
for (f in features2check) {
211227
cat("### ", f, "\n\n")
212-
p1 <- SpatialFeaturePlot(visium,
213-
feature = f,
214-
pt.size.factor = 4)
228+
p1 <- SpatialFeaturePlot(visium,
229+
feature = f,
230+
pt.size.factor = 4
231+
)
215232
print(p1)
216233
cat("\n\n")
217234
}
@@ -222,14 +239,14 @@ for(f in features2check){
222239
Now, it is time to choose some cut-offs for QC metrics mentioned above and removing low-quality cells, as well as mitochondria, hemoglobin genes from the feature space and we can take a quick look at what are our top 20 expressed genes.
223240

224241
```{r filtering,fig.height=7,fig.width=7}
225-
GeneVar <- glue('nFeature_{DefaultAssay(visium)}')
226-
UMIVar <- glue('nCount_{DefaultAssay(visium)}')
227-
cutoffs <- list("nFeature"=100,"nCount"=100,"hb"=20,"mito"=20)
228-
Qced <- [email protected][,GeneVar] > cutoffs$nFeature &
229-
[email protected][,UMIVar] > cutoffs$nCount &
230-
visium$percent_hb < cutoffs$hb &
231-
visium$percent_mito < cutoffs$mito
232-
visium <- visium[,Qced]
242+
GeneVar <- glue("nFeature_{DefaultAssay(visium)}")
243+
UMIVar <- glue("nCount_{DefaultAssay(visium)}")
244+
cutoffs <- list("nFeature" = 100, "nCount" = 100, "hb" = 20, "mito" = 20)
245+
Qced <- [email protected][, GeneVar] > cutoffs$nFeature &
246+
[email protected][, UMIVar] > cutoffs$nCount &
247+
visium$percent_hb < cutoffs$hb &
248+
visium$percent_mito < cutoffs$mito
249+
visium <- visium[, Qced]
233250
# Filter Mitocondrial
234251
visium <- visium[!grepl("^mt-", rownames(visium)), ]
235252
# Filter Hemoglobin gene (optional if that is a problem on your data)
@@ -238,33 +255,35 @@ visium <- visium[!grepl("^Hb.*-", rownames(visium)), ]
238255
C <- GetAssayData(visium, slot = "counts")
239256
C@x <- C@x / rep.int(colSums(C), diff(C@p))
240257
most_expressed <- order(Matrix::rowSums(C), decreasing = T)[20:1]
241-
exprD <- as.data.frame(t(C[most_expressed, ])) %>%
242-
tibble::rownames_to_column("bin") %>%
243-
tidyr::gather(gene,expr,-bin)
244-
245-
246-
ggplot(exprD,aes(x=gene,y=expr,color=gene,fill=gene))+
247-
geom_violin(position=position_dodge(1),alpha=0.5,
248-
na.rm=TRUE,trim=FALSE)+
249-
geom_boxplot(width=0.1,outliers = F,color="black")+
250-
theme_minimal()+
258+
exprD <- as.data.frame(t(C[most_expressed, ])) %>%
259+
tibble::rownames_to_column("bin") %>%
260+
tidyr::gather(gene, expr, -bin)
261+
262+
263+
ggplot(exprD, aes(x = gene, y = expr, color = gene, fill = gene)) +
264+
geom_violin(
265+
position = position_dodge(1), alpha = 0.5,
266+
na.rm = TRUE, trim = FALSE
267+
) +
268+
geom_boxplot(width = 0.1, outliers = F, color = "black") +
269+
theme_minimal() +
251270
theme(
252-
axis.text.x = element_text(size=rel(1),face="bold"),
253-
plot.title = element_text(hjust = 0.5),
254-
legend.position = "none"
255-
)+
256-
scale_y_log10(breaks=c(0.001,.01,.1,1),labels=c(0.1,1,10,100))+
257-
labs(x="",y="% of total UMIs/bin \n (log10 scaled)")+
271+
axis.text.x = element_text(size = rel(1), face = "bold"),
272+
plot.title = element_text(hjust = 0.5),
273+
legend.position = "none"
274+
) +
275+
scale_y_log10(breaks = c(0.001, .01, .1, 1), labels = c(0.1, 1, 10, 100)) +
276+
labs(x = "", y = "% of total UMIs/bin \n (log10 scaled)") +
258277
coord_flip()
259278
```
260279

261280

262281
```{r save_seurat}
263-
if(!dir.exists(results_dir)){
282+
if (!dir.exists(results_dir)) {
264283
system(glue("mkdir -p {results_dir}"))
265284
}
266285
qs_save(visium, file.path(results_dir, "01_qc.qs"))
267-
outputPath = file.path(results_dir, "01_qc.qs")
286+
outputPath <- file.path(results_dir, "01_qc.qs")
268287
```
269288

270289
We saved your qc-filled Seurat object in **`r outputPath`**.

0 commit comments

Comments
 (0)