bcbio
diff --git a/‎visium/01_quality_assessment/qc.Rmd‎ b/‎visium/01_quality_assessment/qc.Rmd‎
diff --git a/‎visium/01_quality_assessment/quality_assessment.qmd‎
Lines changed: 117 additions & 98 deletions b/‎visium/01_quality_assessment/quality_assessment.qmd‎
Lines changed: 117 additions & 98 deletions
@@ -55,10 +55,10 @@ Various metrics can be used to filter low-quality cells from high-quality ones,
 # This set up the working directory to this file so all files can be found
 # library(rstudioapi)
 # setwd(fs::path_dir(getSourceEditorContext()$path))
-stopifnot(R.version$major>= 4) # requires R4
-if (compareVersion(R.version$minor,"3.1")<0) warning("We recommend >= R4.3.1") 
-stopifnot(compareVersion(as.character(BiocManager::version()), "3.16")>=0)
-stopifnot(compareVersion(as.character(packageVersion("Seurat")), "5.1")>=0)
+stopifnot(R.version$major >= 4) # requires R4
+if (compareVersion(R.version$minor, "3.1") < 0) warning("We recommend >= R4.3.1")
+stopifnot(compareVersion(as.character(BiocManager::version()), "3.16") >= 0)
+stopifnot(compareVersion(as.character(packageVersion("Seurat")), "5.1") >= 0)
 ```
 
 ```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE, echo=FALSE,}
@@ -78,31 +78,35 @@ library(Seurat)
 
 import::from(magrittr, set_colnames, set_rownames, "%<>%")
 
-invisible(list2env(params,environment()))
+invisible(list2env(params, environment()))
 source(project_file)
 
 ggplot2::theme_set(theme_prism(base_size = 12))
 # https://grafify-vignettes.netlify.app/colour_palettes.html
 # NOTE change colors here if you wish
-scale_colour_discrete <- function(...)
-  scale_colour_manual(..., 
-                      values = as.vector(grafify:::graf_palettes[["kelly"]]))
-scale_fill_discrete <- function(...)
-  scale_fill_manual(..., 
-                      values = as.vector(grafify:::graf_palettes[["kelly"]]))
+scale_colour_discrete <- function(...) {
+  scale_colour_manual(...,
+    values = as.vector(grafify:::graf_palettes[["kelly"]])
+  )
+}
+scale_fill_discrete <- function(...) {
+  scale_fill_manual(...,
+    values = as.vector(grafify:::graf_palettes[["kelly"]])
+  )
+}
 
 opts_chunk[["set"]](
-    cache = F,
-    cache.lazy = FALSE,
-    dev = c("png", "pdf"),
-    error = TRUE,
-    highlight = TRUE,
-    message = FALSE,
-    prompt = FALSE,
-    tidy = FALSE,
-    warning = FALSE,
-    echo = T, 
-    fig.height = 4)
+  cache = F,
+  cache.lazy = FALSE,
+  dev = c("png", "pdf"),
+  error = TRUE,
+  highlight = TRUE,
+  message = FALSE,
+  prompt = FALSE,
+  tidy = FALSE,
+  warning = FALSE,
+  echo = T,
+  fig.height = 4)
 
 # set seed for reproducibility
 set.seed(1234567890L)
@@ -122,8 +126,8 @@ visium <- qs_read(visiumHD_obj)
 visium <- PercentageFeatureSet(visium, "^mt-", col.name = "percent_mito")
 visium <- PercentageFeatureSet(visium, "^Hb.*-", col.name = "percent_hb")
 metaD <- [email protected]
-metaD$log10GenesPerUMI <- log10(metaD$nFeature)/log10(metaD$nCount)
-colnames(metaD)%<>%gsub(pattern=glue("_{DefaultAssay(visium)}"),replacement="")
+metaD$log10GenesPerUMI <- log10(metaD$nFeature) / log10(metaD$nCount)
+colnames(metaD) %<>% gsub(pattern = glue("_{DefaultAssay(visium)}"), replacement = "")
 ```
 
 Let's take a quick look at the data and make a decision on whether we need to apply any filtering.
@@ -135,21 +139,23 @@ Let's take a quick look at the data and make a decision on whether we need to ap
 Those two metrics is really dependent on tissue type, RNA quality, and sequencing depth. Since the test data is generated from Visium HD technology, we use bin and corresponding reference thresholds in the plot. Reference line at 100 is plotted as the suggested cut-offs for both metrics. 
 
 ```{r}
-summary_metaD <- apply(metaD[,-1],2,mean)
-metacol_label <- list("nFeature"="Genes","nCount"="UMI")
-refs <- list("nFeature"=100,"nCount"=100)
-dists_before <- imap(metacol_label,\(label,col)
-  ggdensity(metaD,
-          x = col,xscale="log10",add = "mean", rug = TRUE,
-          alpha = 0.2,fill = "lightgray",
-          xlab=glue("Number of {label} per bin(in log10 scale)"),
-          ylab="Cell density",
-          title=glue('Pre-QC {label}/Bin'))+
-  geom_vline(xintercept = refs[[col]],color="darkred",cex=rel(1.3),linetype="dashed")+
-  annotate("text",x=summary_metaD[col],y = Inf,
-           label = glue("Mean \n = {round(summary_metaD[col],0)}"),
-            vjust = 1,hjust=2)
-)
+summary_metaD <- apply(metaD[, -1], 2, mean)
+metacol_label <- list("nFeature" = "Genes", "nCount" = "UMI")
+refs <- list("nFeature" = 100, "nCount" = 100)
+dists_before <- imap(metacol_label, \(label, col)
+ggdensity(metaD,
+  x = col, xscale = "log10", add = "mean", rug = TRUE,
+  alpha = 0.2, fill = "lightgray",
+  xlab = glue("Number of {label} per bin(in log10 scale)"),
+  ylab = "Cell density",
+  title = glue("Pre-QC {label}/Bin")
+) +
+  geom_vline(xintercept = refs[[col]], color = "darkred", cex = rel(1.3), linetype = "dashed") +
+  annotate("text",
+    x = summary_metaD[col], y = Inf,
+    label = glue("Mean \n = {round(summary_metaD[col],0)}"),
+    vjust = 1, hjust = 2
+  ))
 dists_before[[1]] | dists_before[[2]]
 ```
 
@@ -162,56 +168,67 @@ With scRNA-seq this is more easily interpreted for a single cell, but for spatia
 
 ```{r complexity}
 col <- "log10GenesPerUMI"
-ggdensity(metaD,x = col,add = "mean", rug = TRUE,
-          alpha = 0.2,fill = "lightgray",
-          xlab="complexity",ylab="Cell density",title=glue('Novelty score'))+
-  geom_vline(xintercept = 0.8,color="darkred",cex=rel(1.3),linetype="dashed")+
-  annotate("text",x=summary_metaD[col],y = Inf,
-           label = glue("Mean = {round(summary_metaD[col],0)}"),
-            vjust = 1,hjust=2)+
-    theme(plot.title = element_text(hjust=0.5, face="bold"))
+ggdensity(metaD,
+  x = col, add = "mean", rug = TRUE,
+  alpha = 0.2, fill = "lightgray",
+  xlab = "complexity", ylab = "Cell density", title = glue("Novelty score")
+) +
+  geom_vline(xintercept = 0.8, color = "darkred", cex = rel(1.3), linetype = "dashed") +
+  annotate("text",
+    x = summary_metaD[col], y = Inf,
+    label = glue("Mean = {round(summary_metaD[col],0)}"),
+    vjust = 1, hjust = 2
+  ) +
+  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
 ```
 
 ### mitochondria & hemoglospot/bingene ratios
 
 ```{r}
-ggplot(metaD %>% 
-         select(orig.ident,starts_with("percent_")) %>% 
-         tidyr::gather(class,percent_unexpected,-orig.ident), 
-        aes_string(x = "orig.ident", y = "percent_unexpected")) +
-    geom_violin(position=position_dodge(1),alpha=1, na.rm=TRUE,trim=FALSE)+
-    ggbeeswarm::geom_quasirandom(na.rm=TRUE,dodge.width=0.5,
-                                 method='quasirandom',alpha=0.01)+
-    geom_boxplot(width=0.1,outliers = F)+
-   geom_hline(yintercept=20)+
-   facet_grid(~class,scales = "free")+
-    theme(
-      axis.text.x = element_text(size=rel(1),face="bold"),
-      plot.title = element_text(hjust = 0.5),
-      strip.text.x = element_text(size = rel(1.5), colour = "black"),
-      legend.position = "none"
-          )+
-  scale_y_log10(breaks=c(1,5,10,20,100))+
+ggplot(
+  metaD %>%
+    select(orig.ident, starts_with("percent_")) %>%
+    tidyr::gather(class, percent_unexpected, -orig.ident),
+  aes_string(x = "orig.ident", y = "percent_unexpected")
+) +
+  geom_violin(position = position_dodge(1), alpha = 1, na.rm = TRUE, trim = FALSE) +
+  ggbeeswarm::geom_quasirandom(
+    na.rm = TRUE, dodge.width = 0.5,
+    method = "quasirandom", alpha = 0.01
+  ) +
+  geom_boxplot(width = 0.1, outliers = F) +
+  geom_hline(yintercept = 20) +
+  facet_grid(~class, scales = "free") +
+  theme(
+    axis.text.x = element_text(size = rel(1), face = "bold"),
+    plot.title = element_text(hjust = 0.5),
+    strip.text.x = element_text(size = rel(1.5), colour = "black"),
+    legend.position = "none"
+  ) +
+  scale_y_log10(breaks = c(1, 5, 10, 20, 100)) +
   # ylim(c(0,100))+
-  labs(x="",y="% of contamination genes")
+  labs(x = "", y = "% of contamination genes")
 ```
 
 ## QC metrics visualized on slides{.tabset}
 
 Here, we can look at all the QC metrics we discussed above on the individual tissue slide.
 
 ```{r}
-features2check <- c(glue('nCount_{DefaultAssay(visium)}'),
-                    glue('nFeature_{DefaultAssay(visium)}'),
-                    "percent_mito","percent_hb")
+features2check <- c(
+  glue("nCount_{DefaultAssay(visium)}"),
+  glue("nFeature_{DefaultAssay(visium)}"),
+  "percent_mito", "percent_hb"
+)
 ```
 
 ```{r spatial-plot,fig.height=5,fig.width=5,eval=T,results='asis'}
-for(f in features2check){
+for (f in features2check) {
   cat("### ", f, "\n\n")
-  p1 <- SpatialFeaturePlot(visium, 
-                   feature = f,
-                  pt.size.factor = 4)
+  p1 <- SpatialFeaturePlot(visium,
+    feature = f,
+    pt.size.factor = 4
+  )
   print(p1)
   cat("\n\n")
 }
@@ -222,14 +239,14 @@ for(f in features2check){
 Now, it is time to choose some cut-offs for QC metrics mentioned above and removing low-quality cells, as well as mitochondria, hemoglobin genes from the feature space and we can take a quick look at what are our top 20 expressed genes.
 
 ```{r filtering,fig.height=7,fig.width=7}
-GeneVar <- glue('nFeature_{DefaultAssay(visium)}')
-UMIVar <- glue('nCount_{DefaultAssay(visium)}')
-cutoffs <- list("nFeature"=100,"nCount"=100,"hb"=20,"mito"=20)
-Qced <-  [email protected][,GeneVar] > cutoffs$nFeature & 
-     [email protected][,UMIVar] > cutoffs$nCount & 
-     visium$percent_hb < cutoffs$hb & 
-     visium$percent_mito < cutoffs$mito
-visium <- visium[,Qced]
+GeneVar <- glue("nFeature_{DefaultAssay(visium)}")
+UMIVar <- glue("nCount_{DefaultAssay(visium)}")
+cutoffs <- list("nFeature" = 100, "nCount" = 100, "hb" = 20, "mito" = 20)
+Qced <- [email protected][, GeneVar] > cutoffs$nFeature &
+  [email protected][, UMIVar] > cutoffs$nCount &
+  visium$percent_hb < cutoffs$hb &
+  visium$percent_mito < cutoffs$mito
+visium <- visium[, Qced]
 # Filter Mitocondrial
 visium <- visium[!grepl("^mt-", rownames(visium)), ]
 # Filter Hemoglobin gene (optional if that is a problem on your data)
@@ -238,33 +255,35 @@ visium <- visium[!grepl("^Hb.*-", rownames(visium)), ]
 C <- GetAssayData(visium, slot = "counts")
 C@x <- C@x / rep.int(colSums(C), diff(C@p))
 most_expressed <- order(Matrix::rowSums(C), decreasing = T)[20:1]
-exprD <- as.data.frame(t(C[most_expressed, ])) %>% 
-  tibble::rownames_to_column("bin") %>% 
-  tidyr::gather(gene,expr,-bin)
-
-
-ggplot(exprD,aes(x=gene,y=expr,color=gene,fill=gene))+
-  geom_violin(position=position_dodge(1),alpha=0.5,
-              na.rm=TRUE,trim=FALSE)+
-  geom_boxplot(width=0.1,outliers = F,color="black")+
-  theme_minimal()+
+exprD <- as.data.frame(t(C[most_expressed, ])) %>%
+  tibble::rownames_to_column("bin") %>%
+  tidyr::gather(gene, expr, -bin)
+
+
+ggplot(exprD, aes(x = gene, y = expr, color = gene, fill = gene)) +
+  geom_violin(
+    position = position_dodge(1), alpha = 0.5,
+    na.rm = TRUE, trim = FALSE
+  ) +
+  geom_boxplot(width = 0.1, outliers = F, color = "black") +
+  theme_minimal() +
   theme(
-      axis.text.x = element_text(size=rel(1),face="bold"),
-      plot.title = element_text(hjust = 0.5),
-      legend.position = "none"
-    )+ 
-  scale_y_log10(breaks=c(0.001,.01,.1,1),labels=c(0.1,1,10,100))+
-  labs(x="",y="% of total UMIs/bin \n (log10 scaled)")+
+    axis.text.x = element_text(size = rel(1), face = "bold"),
+    plot.title = element_text(hjust = 0.5),
+    legend.position = "none"
+  ) +
+  scale_y_log10(breaks = c(0.001, .01, .1, 1), labels = c(0.1, 1, 10, 100)) +
+  labs(x = "", y = "% of total UMIs/bin \n (log10 scaled)") +
   coord_flip()
 ```
 
 
 ```{r save_seurat}
-if(!dir.exists(results_dir)){
+if (!dir.exists(results_dir)) {
   system(glue("mkdir -p {results_dir}"))
 }
 qs_save(visium, file.path(results_dir, "01_qc.qs"))
-outputPath = file.path(results_dir, "01_qc.qs")
+outputPath <- file.path(results_dir, "01_qc.qs")
 ```
 
 We saved your qc-filled Seurat object in **`r outputPath`**.